teknoraver / rpms / systemd

Forked from rpms/systemd 2 months ago
Clone

Blame SOURCES/0570-copy-handle-copy_file_range-weirdness-on-procfs-sysf.patch

698723
From 8df650c7c5adc2bb24a0077d8332f5ee342e7fd8 Mon Sep 17 00:00:00 2001
446ea3
From: Lennart Poettering <lennart@poettering.net>
446ea3
Date: Fri, 26 Feb 2021 10:25:24 +0100
446ea3
Subject: [PATCH] copy: handle copy_file_range() weirdness on procfs/sysfs
446ea3
446ea3
This addresses the issue described in https://lwn.net/Articles/846403/
446ea3
and makes sure we will be able to stream bytes from procfs/sysfs via
446ea3
copy_bytes() if people ask us to.
446ea3
446ea3
Based on: ee1aa61c4710ae567a2b844e0f0bb8cb0456ab8c
698723
Related: #1970860
446ea3
---
446ea3
 src/basic/copy.c     | 75 +++++++++++++++++++++++++++++---------------
446ea3
 src/test/test-copy.c | 17 ++++++++++
446ea3
 2 files changed, 66 insertions(+), 26 deletions(-)
446ea3
446ea3
diff --git a/src/basic/copy.c b/src/basic/copy.c
446ea3
index e06a503a29..a48c42c5c6 100644
446ea3
--- a/src/basic/copy.c
446ea3
+++ b/src/basic/copy.c
446ea3
@@ -92,7 +92,7 @@ int copy_bytes_full(
446ea3
                 void **ret_remains,
446ea3
                 size_t *ret_remains_size) {
446ea3
 
446ea3
-        bool try_cfr = true, try_sendfile = true, try_splice = true;
446ea3
+        bool try_cfr = true, try_sendfile = true, try_splice = true, copied_something = false;
446ea3
         int r, nonblock_pipe = -1;
446ea3
         size_t m = SSIZE_MAX; /* that is the maximum that sendfile and c_f_r accept */
446ea3
 
446ea3
@@ -185,9 +185,20 @@ int copy_bytes_full(
446ea3
 
446ea3
                                 try_cfr = false;
446ea3
                                 /* use fallback below */
446ea3
-                        } else if (n == 0) /* EOF */
446ea3
-                                break;
446ea3
-                        else
446ea3
+                        } else if (n == 0) { /* likely EOF */
446ea3
+
446ea3
+                                if (copied_something)
446ea3
+                                        break;
446ea3
+
446ea3
+                                /* So, we hit EOF immediately, without having copied a single byte. This
446ea3
+                                 * could indicate two things: the file is actually empty, or we are on some
446ea3
+                                 * virtual file system such as procfs/sysfs where the syscall actually
446ea3
+                                 * doesn't work but doesn't return an error. Try to handle that, by falling
446ea3
+                                 * back to simple read()s in case we encounter empty files.
446ea3
+                                 *
446ea3
+                                 * See: https://lwn.net/Articles/846403/ */
446ea3
+                                try_cfr = try_sendfile = try_splice = false;
446ea3
+                        } else
446ea3
                                 /* Success! */
446ea3
                                 goto next;
446ea3
                 }
446ea3
@@ -201,9 +212,14 @@ int copy_bytes_full(
446ea3
 
446ea3
                                 try_sendfile = false;
446ea3
                                 /* use fallback below */
446ea3
-                        } else if (n == 0) /* EOF */
446ea3
+                        } else if (n == 0) { /* likely EOF */
446ea3
+
446ea3
+                                if (copied_something)
446ea3
+                                        break;
446ea3
+
446ea3
+                                try_sendfile = try_splice = false; /* same logic as above for copy_file_range() */
446ea3
                                 break;
446ea3
-                        else
446ea3
+                        } else
446ea3
                                 /* Success! */
446ea3
                                 goto next;
446ea3
                 }
446ea3
@@ -213,14 +229,14 @@ int copy_bytes_full(
446ea3
 
446ea3
                         /* splice()'s asynchronous I/O support is a bit weird. When it encounters a pipe file
446ea3
                          * descriptor, then it will ignore its O_NONBLOCK flag and instead only honour the
446ea3
-                         * SPLICE_F_NONBLOCK flag specified in its flag parameter. Let's hide this behaviour here, and
446ea3
-                         * check if either of the specified fds are a pipe, and if so, let's pass the flag
446ea3
-                         * automatically, depending on O_NONBLOCK being set.
446ea3
+                         * SPLICE_F_NONBLOCK flag specified in its flag parameter. Let's hide this behaviour
446ea3
+                         * here, and check if either of the specified fds are a pipe, and if so, let's pass
446ea3
+                         * the flag automatically, depending on O_NONBLOCK being set.
446ea3
                          *
446ea3
-                         * Here's a twist though: when we use it to move data between two pipes of which one has
446ea3
-                         * O_NONBLOCK set and the other has not, then we have no individual control over O_NONBLOCK
446ea3
-                         * behaviour. Hence in that case we can't use splice() and still guarantee systematic
446ea3
-                         * O_NONBLOCK behaviour, hence don't. */
446ea3
+                         * Here's a twist though: when we use it to move data between two pipes of which one
446ea3
+                         * has O_NONBLOCK set and the other has not, then we have no individual control over
446ea3
+                         * O_NONBLOCK behaviour. Hence in that case we can't use splice() and still guarantee
446ea3
+                         * systematic O_NONBLOCK behaviour, hence don't. */
446ea3
 
446ea3
                         if (nonblock_pipe < 0) {
446ea3
                                 int a, b;
446ea3
@@ -238,12 +254,13 @@ int copy_bytes_full(
446ea3
                                     (a == FD_IS_BLOCKING_PIPE && b == FD_IS_NONBLOCKING_PIPE) ||
446ea3
                                     (a == FD_IS_NONBLOCKING_PIPE && b == FD_IS_BLOCKING_PIPE))
446ea3
 
446ea3
-                                        /* splice() only works if one of the fds is a pipe. If neither is, let's skip
446ea3
-                                         * this step right-away. As mentioned above, if one of the two fds refers to a
446ea3
-                                         * blocking pipe and the other to a non-blocking pipe, we can't use splice()
446ea3
-                                         * either, hence don't try either. This hence means we can only use splice() if
446ea3
-                                         * either only one of the two fds is a pipe, or if both are pipes with the same
446ea3
-                                         * nonblocking flag setting. */
446ea3
+                                        /* splice() only works if one of the fds is a pipe. If neither is,
446ea3
+                                         * let's skip this step right-away. As mentioned above, if one of the
446ea3
+                                         * two fds refers to a blocking pipe and the other to a non-blocking
446ea3
+                                         * pipe, we can't use splice() either, hence don't try either. This
446ea3
+                                         * hence means we can only use splice() if either only one of the two
446ea3
+                                         * fds is a pipe, or if both are pipes with the same nonblocking flag
446ea3
+                                         * setting. */
446ea3
 
446ea3
                                         try_splice = false;
446ea3
                                 else
446ea3
@@ -259,9 +276,13 @@ int copy_bytes_full(
446ea3
 
446ea3
                                 try_splice = false;
446ea3
                                 /* use fallback below */
446ea3
-                        } else if (n == 0) /* EOF */
446ea3
-                                break;
446ea3
-                        else
446ea3
+                        } else if (n == 0) { /* likely EOF */
446ea3
+
446ea3
+                                if (copied_something)
446ea3
+                                        break;
446ea3
+
446ea3
+                                try_splice = false; /* same logic as above for copy_file_range() + sendfile() */
446ea3
+                        } else
446ea3
                                 /* Success! */
446ea3
                                 goto next;
446ea3
                 }
446ea3
@@ -312,11 +333,13 @@ int copy_bytes_full(
446ea3
                         assert(max_bytes >= (uint64_t) n);
446ea3
                         max_bytes -= n;
446ea3
                 }
446ea3
-                /* sendfile accepts at most SSIZE_MAX-offset bytes to copy,
446ea3
-                 * so reduce our maximum by the amount we already copied,
446ea3
-                 * but don't go below our copy buffer size, unless we are
446ea3
-                 * close the limit of bytes we are allowed to copy. */
446ea3
+
446ea3
+                /* sendfile accepts at most SSIZE_MAX-offset bytes to copy, so reduce our maximum by the
446ea3
+                 * amount we already copied, but don't go below our copy buffer size, unless we are close the
446ea3
+                 * limit of bytes we are allowed to copy. */
446ea3
                 m = MAX(MIN(COPY_BUFFER_SIZE, max_bytes), m - n);
446ea3
+
446ea3
+                copied_something = true;
446ea3
         }
446ea3
 
446ea3
         return 0; /* return 0 if we hit EOF earlier than the size limit */
446ea3
diff --git a/src/test/test-copy.c b/src/test/test-copy.c
446ea3
index 2e8d251ac1..29ac33e47a 100644
446ea3
--- a/src/test/test-copy.c
446ea3
+++ b/src/test/test-copy.c
446ea3
@@ -253,6 +253,22 @@ static void test_copy_atomic(void) {
446ea3
         assert_se(copy_file_atomic("/etc/fstab", q, 0644, 0, COPY_REPLACE) >= 0);
446ea3
 }
446ea3
 
446ea3
+static void test_copy_proc(void) {
446ea3
+        _cleanup_(rm_rf_physical_and_freep) char *p = NULL;
446ea3
+        _cleanup_free_ char *f = NULL, *a = NULL, *b = NULL;
446ea3
+
446ea3
+        /* Check if copying data from /proc/ works correctly, i.e. let's see if https://lwn.net/Articles/846403/ is a problem for us */
446ea3
+
446ea3
+        assert_se(mkdtemp_malloc(NULL, &p) >= 0);
446ea3
+        assert_se(f = path_join(NULL, p, "version"));
446ea3
+        assert_se(copy_file("/proc/version", f, 0, (mode_t) -1, 0, 0) >= 0);
446ea3
+
446ea3
+        assert_se(read_one_line_file("/proc/version", &a) >= 0);
446ea3
+        assert_se(read_one_line_file(f, &b) >= 0);
446ea3
+        assert_se(streq(a, b));
446ea3
+        assert_se(strlen(a) > 0);
446ea3
+}
446ea3
+
446ea3
 int main(int argc, char *argv[]) {
446ea3
         log_set_max_level(LOG_DEBUG);
446ea3
 
446ea3
@@ -267,6 +283,7 @@ int main(int argc, char *argv[]) {
446ea3
         test_copy_bytes_regular_file(argv[0], false, 32000); /* larger than copy buffer size */
446ea3
         test_copy_bytes_regular_file(argv[0], true, 32000);
446ea3
         test_copy_atomic();
446ea3
+        test_copy_proc();
446ea3
 
446ea3
         return 0;
446ea3
 }