04161d
From 5f2dac18054d9d9b3d84e7fba8c2a6e750d2c245 Mon Sep 17 00:00:00 2001
04161d
From: =?UTF-8?q?P=C3=A1draig=20Brady?= <P@draigBrady.com>
04161d
Date: Wed, 1 Apr 2020 12:51:34 +0100
04161d
Subject: [PATCH 01/12] cp: ensure --attributes-only doesn't remove files
04161d
04161d
* src/copy.c (copy_internal): Ensure we don't unlink the destination
04161d
unless explicitly requested.
04161d
* tests/cp/attr-existing.sh: Add test cases.
04161d
* NEWS: Mention the bug fix.
04161d
Fixes https://bugs.gnu.org/40352
04161d
04161d
Upstream-commit: 7b5f0fa47cd04c84975250d5b5da7c98e097e99f
04161d
Signed-off-by: Kamil Dudka <kdudka@redhat.com>
04161d
---
04161d
 src/copy.c                |  9 +++++----
04161d
 tests/cp/attr-existing.sh | 21 ++++++++++++++++++---
04161d
 2 files changed, 23 insertions(+), 7 deletions(-)
04161d
04161d
diff --git a/src/copy.c b/src/copy.c
04161d
index 6e5efc7..54601ce 100644
04161d
--- a/src/copy.c
04161d
+++ b/src/copy.c
04161d
@@ -2211,10 +2211,11 @@ copy_internal (char const *src_name, char const *dst_name,
04161d
                    /* Never unlink dst_name when in move mode.  */
04161d
                    && ! x->move_mode
04161d
                    && (x->unlink_dest_before_opening
04161d
-                       || (x->preserve_links && 1 < dst_sb.st_nlink)
04161d
-                       || (x->dereference == DEREF_NEVER
04161d
-                           && ! S_ISREG (src_sb.st_mode))
04161d
-                       ))
04161d
+                       || (x->data_copy_required
04161d
+                           && ((x->preserve_links && 1 < dst_sb.st_nlink)
04161d
+                               || (x->dereference == DEREF_NEVER
04161d
+                                   && ! S_ISREG (src_sb.st_mode))))
04161d
+                      ))
04161d
             {
04161d
               if (unlink (dst_name) != 0 && errno != ENOENT)
04161d
                 {
04161d
diff --git a/tests/cp/attr-existing.sh b/tests/cp/attr-existing.sh
04161d
index 59ce641..14fc844 100755
04161d
--- a/tests/cp/attr-existing.sh
04161d
+++ b/tests/cp/attr-existing.sh
04161d
@@ -19,11 +19,26 @@
04161d
 . "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
04161d
 print_ver_ cp
04161d
 
04161d
-printf '1' > file1
04161d
-printf '2' > file2
04161d
-printf '2' > file2.exp
04161d
+printf '1' > file1 || framework_failure_
04161d
+printf '2' > file2 || framework_failure_
04161d
+printf '2' > file2.exp || framework_failure_
04161d
 
04161d
 cp --attributes-only file1 file2 || fail=1
04161d
 cmp file2 file2.exp || fail=1
04161d
 
04161d
+# coreutils v8.32 and before would remove destination files
04161d
+# if hardlinked or the source was not a regular file.
04161d
+ln file2 link2 || framework_failure_
04161d
+cp -a --attributes-only file1 file2 || fail=1
04161d
+cmp file2 file2.exp || fail=1
04161d
+
04161d
+ln -s file1 sym1 || framework_failure_
04161d
+returns_ 1 cp -a --attributes-only sym1 file2 || fail=1
04161d
+cmp file2 file2.exp || fail=1
04161d
+
04161d
+# One can still force removal though
04161d
+cp -a --remove-destination --attributes-only sym1 file2 || fail=1
04161d
+test -L file2 || fail=1
04161d
+cmp file1 file2 || fail=1
04161d
+
04161d
 Exit $fail
04161d
-- 
04161d
2.26.3
04161d
04161d
04161d
From c728747b06e71894c96d1f27434f2484af992c75 Mon Sep 17 00:00:00 2001
04161d
From: Paul Eggert <eggert@cs.ucla.edu>
04161d
Date: Tue, 23 Jun 2020 19:18:04 -0700
04161d
Subject: [PATCH 02/12] cp: refactor extent_copy
04161d
04161d
* src/copy.c (extent_copy): New arg SCAN, replacing
04161d
REQUIRE_NORMAL_COPY.  All callers changed.
04161d
(enum scantype): New type.
04161d
(infer_scantype): Rename from is_probably_sparse and return
04161d
the new type.  Add args FD and SCAN.  All callers changed.
04161d
04161d
Upstream-commit: 761ba28400a04ee24eefe9cd4973ec8850cd7a52
04161d
Signed-off-by: Kamil Dudka <kdudka@redhat.com>
04161d
---
04161d
 src/copy.c | 119 +++++++++++++++++++++++++----------------------------
04161d
 1 file changed, 55 insertions(+), 64 deletions(-)
04161d
04161d
diff --git a/src/copy.c b/src/copy.c
04161d
index 54601ce..f694f91 100644
04161d
--- a/src/copy.c
04161d
+++ b/src/copy.c
04161d
@@ -422,9 +422,8 @@ extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size,
04161d
              size_t hole_size, off_t src_total_size,
04161d
              enum Sparse_type sparse_mode,
04161d
              char const *src_name, char const *dst_name,
04161d
-             bool *require_normal_copy)
04161d
+             struct extent_scan *scan)
04161d
 {
04161d
-  struct extent_scan scan;
04161d
   off_t last_ext_start = 0;
04161d
   off_t last_ext_len = 0;
04161d
 
04161d
@@ -432,45 +431,25 @@ extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size,
04161d
      We may need this at the end, for a final ftruncate.  */
04161d
   off_t dest_pos = 0;
04161d
 
04161d
-  extent_scan_init (src_fd, &scan;;
04161d
-
04161d
-  *require_normal_copy = false;
04161d
   bool wrote_hole_at_eof = true;
04161d
-  do
04161d
+  while (true)
04161d
     {
04161d
-      bool ok = extent_scan_read (&scan;;
04161d
-      if (! ok)
04161d
-        {
04161d
-          if (scan.hit_final_extent)
04161d
-            break;
04161d
-
04161d
-          if (scan.initial_scan_failed)
04161d
-            {
04161d
-              *require_normal_copy = true;
04161d
-              return false;
04161d
-            }
04161d
-
04161d
-          error (0, errno, _("%s: failed to get extents info"),
04161d
-                 quotef (src_name));
04161d
-          return false;
04161d
-        }
04161d
-
04161d
       bool empty_extent = false;
04161d
-      for (unsigned int i = 0; i < scan.ei_count || empty_extent; i++)
04161d
+      for (unsigned int i = 0; i < scan->ei_count || empty_extent; i++)
04161d
         {
04161d
           off_t ext_start;
04161d
           off_t ext_len;
04161d
           off_t ext_hole_size;
04161d
 
04161d
-          if (i < scan.ei_count)
04161d
+          if (i < scan->ei_count)
04161d
             {
04161d
-              ext_start = scan.ext_info[i].ext_logical;
04161d
-              ext_len = scan.ext_info[i].ext_length;
04161d
+              ext_start = scan->ext_info[i].ext_logical;
04161d
+              ext_len = scan->ext_info[i].ext_length;
04161d
             }
04161d
           else /* empty extent at EOF.  */
04161d
             {
04161d
               i--;
04161d
-              ext_start = last_ext_start + scan.ext_info[i].ext_length;
04161d
+              ext_start = last_ext_start + scan->ext_info[i].ext_length;
04161d
               ext_len = 0;
04161d
             }
04161d
 
04161d
@@ -498,7 +477,7 @@ extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size,
04161d
                 {
04161d
                   error (0, errno, _("cannot lseek %s"), quoteaf (src_name));
04161d
                 fail:
04161d
-                  extent_scan_free (&scan;;
04161d
+                  extent_scan_free (scan);
04161d
                   return false;
04161d
                 }
04161d
 
04161d
@@ -539,7 +518,7 @@ extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size,
04161d
           /* For now, do not treat FIEMAP_EXTENT_UNWRITTEN specially,
04161d
              because that (in combination with no sync) would lead to data
04161d
              loss at least on XFS and ext4 when using 2.6.39-rc3 kernels.  */
04161d
-          if (0 && (scan.ext_info[i].ext_flags & FIEMAP_EXTENT_UNWRITTEN))
04161d
+          if (0 && (scan->ext_info[i].ext_flags & FIEMAP_EXTENT_UNWRITTEN))
04161d
             {
04161d
               empty_extent = true;
04161d
               last_ext_len = 0;
04161d
@@ -571,16 +550,23 @@ extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size,
04161d
              extents beyond the apparent size.  */
04161d
           if (dest_pos == src_total_size)
04161d
             {
04161d
-              scan.hit_final_extent = true;
04161d
+              scan->hit_final_extent = true;
04161d
               break;
04161d
             }
04161d
         }
04161d
 
04161d
       /* Release the space allocated to scan->ext_info.  */
04161d
-      extent_scan_free (&scan;;
04161d
+      extent_scan_free (scan);
04161d
 
04161d
+      if (scan->hit_final_extent)
04161d
+        break;
04161d
+      if (! extent_scan_read (scan) && ! scan->hit_final_extent)
04161d
+        {
04161d
+          error (0, errno, _("%s: failed to get extents info"),
04161d
+                 quotef (src_name));
04161d
+          return false;
04161d
+        }
04161d
     }
04161d
-  while (! scan.hit_final_extent);
04161d
 
04161d
   /* When the source file ends with a hole, we have to do a little more work,
04161d
      since the above copied only up to and including the final extent.
04161d
@@ -1021,16 +1007,35 @@ fchmod_or_lchmod (int desc, char const *name, mode_t mode)
04161d
 # define HAVE_STRUCT_STAT_ST_BLOCKS 0
04161d
 #endif
04161d
 
04161d
+/* Type of scan being done on the input when looking for sparseness.  */
04161d
+enum scantype
04161d
+  {
04161d
+   /* No fancy scanning; just read and write.  */
04161d
+   PLAIN_SCANTYPE,
04161d
+
04161d
+   /* Read and examine data looking for zero blocks; useful when
04161d
+      attempting to create sparse output.  */
04161d
+   ZERO_SCANTYPE,
04161d
+
04161d
+   /* Extent information is available.  */
04161d
+   EXTENT_SCANTYPE
04161d
+  };
04161d
+
04161d
 /* Use a heuristic to determine whether stat buffer SB comes from a file
04161d
    with sparse blocks.  If the file has fewer blocks than would normally
04161d
    be needed for a file of its size, then at least one of the blocks in
04161d
    the file is a hole.  In that case, return true.  */
04161d
-static bool
04161d
-is_probably_sparse (struct stat const *sb)
04161d
+static enum scantype
04161d
+infer_scantype (int fd, struct stat const *sb, struct extent_scan *scan)
04161d
 {
04161d
-  return (HAVE_STRUCT_STAT_ST_BLOCKS
04161d
-          && S_ISREG (sb->st_mode)
04161d
-          && ST_NBLOCKS (*sb) < sb->st_size / ST_NBLOCKSIZE);
04161d
+  if (! (HAVE_STRUCT_STAT_ST_BLOCKS
04161d
+         && S_ISREG (sb->st_mode)
04161d
+         && ST_NBLOCKS (*sb) < sb->st_size / ST_NBLOCKSIZE))
04161d
+    return PLAIN_SCANTYPE;
04161d
+
04161d
+  extent_scan_init (fd, scan);
04161d
+  extent_scan_read (scan);
04161d
+  return scan->initial_scan_failed ? ZERO_SCANTYPE : EXTENT_SCANTYPE;
04161d
 }
04161d
 
04161d
 
04161d
@@ -1061,6 +1066,7 @@ copy_reg (char const *src_name, char const *dst_name,
04161d
   mode_t src_mode = src_sb->st_mode;
04161d
   struct stat sb;
04161d
   struct stat src_open_sb;
04161d
+  struct extent_scan scan;
04161d
   bool return_val = true;
04161d
   bool data_copy_required = x->data_copy_required;
04161d
 
04161d
@@ -1260,23 +1266,13 @@ copy_reg (char const *src_name, char const *dst_name,
04161d
       fdadvise (source_desc, 0, 0, FADVISE_SEQUENTIAL);
04161d
 
04161d
       /* Deal with sparse files.  */
04161d
-      bool make_holes = false;
04161d
-      bool sparse_src = is_probably_sparse (&src_open_sb);
04161d
-
04161d
-      if (S_ISREG (sb.st_mode))
04161d
-        {
04161d
-          /* Even with --sparse=always, try to create holes only
04161d
-             if the destination is a regular file.  */
04161d
-          if (x->sparse_mode == SPARSE_ALWAYS)
04161d
-            make_holes = true;
04161d
-
04161d
-          /* Use a heuristic to determine whether SRC_NAME contains any sparse
04161d
-             blocks.  If the file has fewer blocks than would normally be
04161d
-             needed for a file of its size, then at least one of the blocks in
04161d
-             the file is a hole.  */
04161d
-          if (x->sparse_mode == SPARSE_AUTO && sparse_src)
04161d
-            make_holes = true;
04161d
-        }
04161d
+      enum scantype scantype = infer_scantype (source_desc, &src_open_sb,
04161d
+                                               &scan;;
04161d
+      bool make_holes
04161d
+        = (S_ISREG (sb.st_mode)
04161d
+           && (x->sparse_mode == SPARSE_ALWAYS
04161d
+               || (x->sparse_mode == SPARSE_AUTO
04161d
+                   && scantype != PLAIN_SCANTYPE)));
04161d
 
04161d
       /* If not making a sparse file, try to use a more-efficient
04161d
          buffer size.  */
04161d
@@ -1305,10 +1301,8 @@ copy_reg (char const *src_name, char const *dst_name,
04161d
       buf_alloc = xmalloc (buf_size + buf_alignment);
04161d
       buf = ptr_align (buf_alloc, buf_alignment);
04161d
 
04161d
-      if (sparse_src)
04161d
+      if (scantype == EXTENT_SCANTYPE)
04161d
         {
04161d
-          bool normal_copy_required;
04161d
-
04161d
           /* Perform an efficient extent-based copy, falling back to the
04161d
              standard copy only if the initial extent scan fails.  If the
04161d
              '--sparse=never' option is specified, write all data but use
04161d
@@ -1316,14 +1310,11 @@ copy_reg (char const *src_name, char const *dst_name,
04161d
           if (extent_copy (source_desc, dest_desc, buf, buf_size, hole_size,
04161d
                            src_open_sb.st_size,
04161d
                            make_holes ? x->sparse_mode : SPARSE_NEVER,
04161d
-                           src_name, dst_name, &normal_copy_required))
04161d
+                           src_name, dst_name, &scan))
04161d
             goto preserve_metadata;
04161d
 
04161d
-          if (! normal_copy_required)
04161d
-            {
04161d
-              return_val = false;
04161d
-              goto close_src_and_dst_desc;
04161d
-            }
04161d
+          return_val = false;
04161d
+          goto close_src_and_dst_desc;
04161d
         }
04161d
 
04161d
       off_t n_read;
04161d
-- 
04161d
2.26.3
04161d
04161d
04161d
From ed7ff81de507bef46991f4caac550f41ab65e3ed Mon Sep 17 00:00:00 2001
04161d
From: Paul Eggert <eggert@cs.ucla.edu>
04161d
Date: Wed, 24 Jun 2020 17:05:20 -0700
04161d
Subject: [PATCH 03/12] cp: avoid copy_reg goto
04161d
04161d
* src/copy.c (copy_reg): Redo to avoid label and goto.
04161d
04161d
Upstream-commit: 2fcd0f3328f5181a2986905fa5469a0152c67279
04161d
Signed-off-by: Kamil Dudka <kdudka@redhat.com>
04161d
---
04161d
 src/copy.c | 34 +++++++++++-----------------------
04161d
 1 file changed, 11 insertions(+), 23 deletions(-)
04161d
04161d
diff --git a/src/copy.c b/src/copy.c
04161d
index f694f91..b382cfa 100644
04161d
--- a/src/copy.c
04161d
+++ b/src/copy.c
04161d
@@ -1301,29 +1301,18 @@ copy_reg (char const *src_name, char const *dst_name,
04161d
       buf_alloc = xmalloc (buf_size + buf_alignment);
04161d
       buf = ptr_align (buf_alloc, buf_alignment);
04161d
 
04161d
-      if (scantype == EXTENT_SCANTYPE)
04161d
-        {
04161d
-          /* Perform an efficient extent-based copy, falling back to the
04161d
-             standard copy only if the initial extent scan fails.  If the
04161d
-             '--sparse=never' option is specified, write all data but use
04161d
-             any extents to read more efficiently.  */
04161d
-          if (extent_copy (source_desc, dest_desc, buf, buf_size, hole_size,
04161d
-                           src_open_sb.st_size,
04161d
-                           make_holes ? x->sparse_mode : SPARSE_NEVER,
04161d
-                           src_name, dst_name, &scan))
04161d
-            goto preserve_metadata;
04161d
-
04161d
-          return_val = false;
04161d
-          goto close_src_and_dst_desc;
04161d
-        }
04161d
-
04161d
       off_t n_read;
04161d
-      bool wrote_hole_at_eof;
04161d
-      if (! sparse_copy (source_desc, dest_desc, buf, buf_size,
04161d
-                         make_holes ? hole_size : 0,
04161d
-                         x->sparse_mode == SPARSE_ALWAYS, src_name, dst_name,
04161d
-                         UINTMAX_MAX, &n_read,
04161d
-                         &wrote_hole_at_eof))
04161d
+      bool wrote_hole_at_eof = false;
04161d
+      if (! (scantype == EXTENT_SCANTYPE
04161d
+             ? extent_copy (source_desc, dest_desc, buf, buf_size, hole_size,
04161d
+                            src_open_sb.st_size,
04161d
+                            make_holes ? x->sparse_mode : SPARSE_NEVER,
04161d
+                            src_name, dst_name, &scan)
04161d
+             : sparse_copy (source_desc, dest_desc, buf, buf_size,
04161d
+                            make_holes ? hole_size : 0,
04161d
+                            x->sparse_mode == SPARSE_ALWAYS,
04161d
+                            src_name, dst_name, UINTMAX_MAX, &n_read,
04161d
+                            &wrote_hole_at_eof)))
04161d
         {
04161d
           return_val = false;
04161d
           goto close_src_and_dst_desc;
04161d
@@ -1336,7 +1325,6 @@ copy_reg (char const *src_name, char const *dst_name,
04161d
         }
04161d
     }
04161d
 
04161d
-preserve_metadata:
04161d
   if (x->preserve_timestamps)
04161d
     {
04161d
       struct timespec timespec[2];
04161d
-- 
04161d
2.26.3
04161d
04161d
04161d
From 5631bded3a385ca0bbd77456b50767fe5580240c Mon Sep 17 00:00:00 2001
04161d
From: Paul Eggert <eggert@cs.ucla.edu>
04161d
Date: Thu, 25 Jun 2020 16:31:44 -0700
04161d
Subject: [PATCH 04/12] cp: use SEEK_DATA/SEEK_HOLE if available
04161d
04161d
If it works, prefer lseek with SEEK_DATA and SEEK_HOLE to FIEMAP,
04161d
as lseek is simpler and more portable (will be in next POSIX).
04161d
Problem reported in 2011 by Jeff Liu (Bug#8061).
04161d
* NEWS: Mention this.
04161d
* src/copy.c (lseek_copy) [SEEK_HOLE]: New function.
04161d
(enum scantype): New constants ERROR_SCANTYPE, LSEEK_SCANTYPE.
04161d
(union scan_inference): New type.
04161d
(infer_scantype): Last arg is now union scan_inference *,
04161d
not struct extent_scan *.  All callers changed.
04161d
Prefer SEEK_HOLE to FIEMAP if both work, since
04161d
SEEK_HOLE is simpler and more portable.
04161d
(copy_reg): Do the fdadvise after initial scan, in case the scan
04161d
fails.  Report an error if the initial scan fails.
04161d
(copy_reg) [SEEK_HOLE]: Use lseek_copy if scantype says so.
04161d
04161d
Upstream-commit: a6eaee501f6ec0c152abe88640203a64c390993e
04161d
Signed-off-by: Kamil Dudka <kdudka@redhat.com>
04161d
---
04161d
 src/copy.c | 209 ++++++++++++++++++++++++++++++++++++++++++++++++++---
04161d
 1 file changed, 198 insertions(+), 11 deletions(-)
04161d
04161d
diff --git a/src/copy.c b/src/copy.c
04161d
index b382cfa..d88f8cf 100644
04161d
--- a/src/copy.c
04161d
+++ b/src/copy.c
04161d
@@ -416,7 +416,12 @@ write_zeros (int fd, off_t n_bytes)
04161d
    Upon a successful copy, return true.  If the initial extent scan
04161d
    fails, set *NORMAL_COPY_REQUIRED to true and return false.
04161d
    Upon any other failure, set *NORMAL_COPY_REQUIRED to false and
04161d
-   return false.  */
04161d
+   return false.
04161d
+
04161d
+   FIXME: Once we no longer need to support Linux kernel versions
04161d
+   before 3.1 (2011), this function can be retired as it is superseded
04161d
+   by lseek_copy.  That is, we no longer need extent-scan.h and can
04161d
+   remove any of the code that uses it.  */
04161d
 static bool
04161d
 extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size,
04161d
              size_t hole_size, off_t src_total_size,
04161d
@@ -595,6 +600,150 @@ extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size,
04161d
   return true;
04161d
 }
04161d
 
04161d
+#ifdef SEEK_HOLE
04161d
+/* Perform an efficient extent copy, if possible.  This avoids
04161d
+   the overhead of detecting holes in hole-introducing/preserving
04161d
+   copy, and thus makes copying sparse files much more efficient.
04161d
+   Copy from SRC_FD to DEST_FD, using BUF (of size BUF_SIZE) for a buffer.
04161d
+   Look for holes of size HOLE_SIZE in the input.
04161d
+   The input file is of size SRC_TOTAL_SIZE.
04161d
+   Use SPARSE_MODE to determine whether to create holes in the output.
04161d
+   SRC_NAME and DST_NAME are the input and output file names.
04161d
+   Return true if successful, false (with a diagnostic) otherwise.  */
04161d
+
04161d
+static bool
04161d
+lseek_copy (int src_fd, int dest_fd, char *buf, size_t buf_size,
04161d
+            size_t hole_size, off_t ext_start, off_t src_total_size,
04161d
+            enum Sparse_type sparse_mode,
04161d
+            char const *src_name, char const *dst_name)
04161d
+{
04161d
+  off_t last_ext_start = 0;
04161d
+  off_t last_ext_len = 0;
04161d
+  off_t dest_pos = 0;
04161d
+  bool wrote_hole_at_eof = true;
04161d
+
04161d
+  while (0 <= ext_start)
04161d
+    {
04161d
+      off_t ext_end = lseek (src_fd, ext_start, SEEK_HOLE);
04161d
+      if (ext_end < 0)
04161d
+        {
04161d
+          if (errno != ENXIO)
04161d
+            goto cannot_lseek;
04161d
+          ext_end = src_total_size;
04161d
+          if (ext_end <= ext_start)
04161d
+            {
04161d
+              /* The input file grew; get its current size.  */
04161d
+              src_total_size = lseek (src_fd, 0, SEEK_END);
04161d
+              if (src_total_size < 0)
04161d
+                goto cannot_lseek;
04161d
+
04161d
+              /* If the input file shrank after growing, stop copying.  */
04161d
+              if (src_total_size <= ext_start)
04161d
+                break;
04161d
+
04161d
+              ext_end = src_total_size;
04161d
+            }
04161d
+        }
04161d
+      /* If the input file must have grown, increase its measured size.  */
04161d
+      if (src_total_size < ext_end)
04161d
+        src_total_size = ext_end;
04161d
+
04161d
+      if (lseek (src_fd, ext_start, SEEK_SET) < 0)
04161d
+        goto cannot_lseek;
04161d
+
04161d
+      wrote_hole_at_eof = false;
04161d
+      off_t ext_hole_size = ext_start - last_ext_start - last_ext_len;
04161d
+
04161d
+      if (ext_hole_size)
04161d
+        {
04161d
+          if (sparse_mode != SPARSE_NEVER)
04161d
+            {
04161d
+              if (! create_hole (dest_fd, dst_name,
04161d
+                                 sparse_mode == SPARSE_ALWAYS,
04161d
+                                 ext_hole_size))
04161d
+                return false;
04161d
+              wrote_hole_at_eof = true;
04161d
+            }
04161d
+          else
04161d
+            {
04161d
+              /* When not inducing holes and when there is a hole between
04161d
+                 the end of the previous extent and the beginning of the
04161d
+                 current one, write zeros to the destination file.  */
04161d
+              if (! write_zeros (dest_fd, ext_hole_size))
04161d
+                {
04161d
+                  error (0, errno, _("%s: write failed"),
04161d
+                         quotef (dst_name));
04161d
+                  return false;
04161d
+                }
04161d
+            }
04161d
+        }
04161d
+
04161d
+      off_t ext_len = ext_end - ext_start;
04161d
+      last_ext_start = ext_start;
04161d
+      last_ext_len = ext_len;
04161d
+
04161d
+      /* Copy this extent, looking for further opportunities to not
04161d
+         bother to write zeros unless --sparse=never, since SEEK_HOLE
04161d
+         is conservative and may miss some holes.  */
04161d
+      off_t n_read;
04161d
+      bool read_hole;
04161d
+      if ( ! sparse_copy (src_fd, dest_fd, buf, buf_size,
04161d
+                          sparse_mode == SPARSE_NEVER ? 0 : hole_size,
04161d
+                          true, src_name, dst_name, ext_len, &n_read,
04161d
+                          &read_hole))
04161d
+        return false;
04161d
+
04161d
+      dest_pos = ext_start + n_read;
04161d
+      if (n_read)
04161d
+        wrote_hole_at_eof = read_hole;
04161d
+      if (n_read < ext_len)
04161d
+        {
04161d
+          /* The input file shrank.  */
04161d
+          src_total_size = dest_pos;
04161d
+          break;
04161d
+        }
04161d
+
04161d
+      ext_start = lseek (src_fd, dest_pos, SEEK_DATA);
04161d
+      if (ext_start < 0)
04161d
+        {
04161d
+          if (errno != ENXIO)
04161d
+            goto cannot_lseek;
04161d
+          break;
04161d
+        }
04161d
+    }
04161d
+
04161d
+  /* When the source file ends with a hole, we have to do a little more work,
04161d
+     since the above copied only up to and including the final extent.
04161d
+     In order to complete the copy, we may have to insert a hole or write
04161d
+     zeros in the destination corresponding to the source file's hole-at-EOF.
04161d
+
04161d
+     In addition, if the final extent was a block of zeros at EOF and we've
04161d
+     just converted them to a hole in the destination, we must call ftruncate
04161d
+     here in order to record the proper length in the destination.  */
04161d
+  if ((dest_pos < src_total_size || wrote_hole_at_eof)
04161d
+      && ! (sparse_mode == SPARSE_NEVER
04161d
+            ? write_zeros (dest_fd, src_total_size - dest_pos)
04161d
+            : ftruncate (dest_fd, src_total_size) == 0))
04161d
+    {
04161d
+      error (0, errno, _("failed to extend %s"), quoteaf (dst_name));
04161d
+      return false;
04161d
+    }
04161d
+
04161d
+  if (sparse_mode == SPARSE_ALWAYS && dest_pos < src_total_size
04161d
+      && punch_hole (dest_fd, dest_pos, src_total_size - dest_pos) < 0)
04161d
+    {
04161d
+      error (0, errno, _("error deallocating %s"), quoteaf (dst_name));
04161d
+      return false;
04161d
+    }
04161d
+
04161d
+  return true;
04161d
+
04161d
+ cannot_lseek:
04161d
+  error (0, errno, _("cannot lseek %s"), quoteaf (src_name));
04161d
+  return false;
04161d
+}
04161d
+#endif
04161d
+
04161d
 /* FIXME: describe */
04161d
 /* FIXME: rewrite this to use a hash table so we avoid the quadratic
04161d
    performance hit that's probably noticeable only on trees deeper
04161d
@@ -1010,6 +1159,9 @@ fchmod_or_lchmod (int desc, char const *name, mode_t mode)
04161d
 /* Type of scan being done on the input when looking for sparseness.  */
04161d
 enum scantype
04161d
   {
04161d
+   /* An error was found when determining scantype.  */
04161d
+   ERROR_SCANTYPE,
04161d
+
04161d
    /* No fancy scanning; just read and write.  */
04161d
    PLAIN_SCANTYPE,
04161d
 
04161d
@@ -1017,22 +1169,44 @@ enum scantype
04161d
       attempting to create sparse output.  */
04161d
    ZERO_SCANTYPE,
04161d
 
04161d
+   /* lseek information is available.  */
04161d
+   LSEEK_SCANTYPE,
04161d
+
04161d
    /* Extent information is available.  */
04161d
    EXTENT_SCANTYPE
04161d
   };
04161d
 
04161d
-/* Use a heuristic to determine whether stat buffer SB comes from a file
04161d
-   with sparse blocks.  If the file has fewer blocks than would normally
04161d
-   be needed for a file of its size, then at least one of the blocks in
04161d
-   the file is a hole.  In that case, return true.  */
04161d
+/* Result of infer_scantype.  */
04161d
+union scan_inference
04161d
+{
04161d
+  /* Used if infer_scantype returns LSEEK_SCANTYPE.  This is the
04161d
+     offset of the first data block, or -1 if the file has no data.  */
04161d
+  off_t ext_start;
04161d
+
04161d
+  /* Used if infer_scantype returns EXTENT_SCANTYPE.  */
04161d
+  struct extent_scan extent_scan;
04161d
+};
04161d
+
04161d
+/* Return how to scan a file with descriptor FD and stat buffer SB.
04161d
+   Store any information gathered into *SCAN.  */
04161d
 static enum scantype
04161d
-infer_scantype (int fd, struct stat const *sb, struct extent_scan *scan)
04161d
+infer_scantype (int fd, struct stat const *sb,
04161d
+                union scan_inference *scan_inference)
04161d
 {
04161d
   if (! (HAVE_STRUCT_STAT_ST_BLOCKS
04161d
          && S_ISREG (sb->st_mode)
04161d
          && ST_NBLOCKS (*sb) < sb->st_size / ST_NBLOCKSIZE))
04161d
     return PLAIN_SCANTYPE;
04161d
 
04161d
+#ifdef SEEK_HOLE
04161d
+  scan_inference->ext_start = lseek (fd, 0, SEEK_DATA);
04161d
+  if (0 <= scan_inference->ext_start)
04161d
+    return LSEEK_SCANTYPE;
04161d
+  else if (errno != EINVAL && errno != ENOTSUP)
04161d
+    return errno == ENXIO ? LSEEK_SCANTYPE : ERROR_SCANTYPE;
04161d
+#endif
04161d
+
04161d
+  struct extent_scan *scan = &scan_inference->extent_scan;
04161d
   extent_scan_init (fd, scan);
04161d
   extent_scan_read (scan);
04161d
   return scan->initial_scan_failed ? ZERO_SCANTYPE : EXTENT_SCANTYPE;
04161d
@@ -1066,7 +1240,7 @@ copy_reg (char const *src_name, char const *dst_name,
04161d
   mode_t src_mode = src_sb->st_mode;
04161d
   struct stat sb;
04161d
   struct stat src_open_sb;
04161d
-  struct extent_scan scan;
04161d
+  union scan_inference scan_inference;
04161d
   bool return_val = true;
04161d
   bool data_copy_required = x->data_copy_required;
04161d
 
04161d
@@ -1263,17 +1437,23 @@ copy_reg (char const *src_name, char const *dst_name,
04161d
       size_t buf_size = io_blksize (sb);
04161d
       size_t hole_size = ST_BLKSIZE (sb);
04161d
 
04161d
-      fdadvise (source_desc, 0, 0, FADVISE_SEQUENTIAL);
04161d
-
04161d
       /* Deal with sparse files.  */
04161d
       enum scantype scantype = infer_scantype (source_desc, &src_open_sb,
04161d
-                                               &scan;;
04161d
+                                               &scan_inference);
04161d
+      if (scantype == ERROR_SCANTYPE)
04161d
+        {
04161d
+          error (0, errno, _("cannot lseek %s"), quoteaf (src_name));
04161d
+          return_val = false;
04161d
+          goto close_src_and_dst_desc;
04161d
+        }
04161d
       bool make_holes
04161d
         = (S_ISREG (sb.st_mode)
04161d
            && (x->sparse_mode == SPARSE_ALWAYS
04161d
                || (x->sparse_mode == SPARSE_AUTO
04161d
                    && scantype != PLAIN_SCANTYPE)));
04161d
 
04161d
+      fdadvise (source_desc, 0, 0, FADVISE_SEQUENTIAL);
04161d
+
04161d
       /* If not making a sparse file, try to use a more-efficient
04161d
          buffer size.  */
04161d
       if (! make_holes)
04161d
@@ -1307,7 +1487,14 @@ copy_reg (char const *src_name, char const *dst_name,
04161d
              ? extent_copy (source_desc, dest_desc, buf, buf_size, hole_size,
04161d
                             src_open_sb.st_size,
04161d
                             make_holes ? x->sparse_mode : SPARSE_NEVER,
04161d
-                            src_name, dst_name, &scan)
04161d
+                            src_name, dst_name, &scan_inference.extent_scan)
04161d
+#ifdef SEEK_HOLE
04161d
+             : scantype == LSEEK_SCANTYPE
04161d
+             ? lseek_copy (source_desc, dest_desc, buf, buf_size, hole_size,
04161d
+                           scan_inference.ext_start, src_open_sb.st_size,
04161d
+                           make_holes ? x->sparse_mode : SPARSE_NEVER,
04161d
+                           src_name, dst_name)
04161d
+#endif
04161d
              : sparse_copy (source_desc, dest_desc, buf, buf_size,
04161d
                             make_holes ? hole_size : 0,
04161d
                             x->sparse_mode == SPARSE_ALWAYS,
04161d
-- 
04161d
2.26.3
04161d
04161d
04161d
From be7466be92d779cfbece418d4de33191ae52ab4a Mon Sep 17 00:00:00 2001
04161d
From: Kamil Dudka <kdudka@redhat.com>
04161d
Date: Wed, 24 Mar 2021 16:06:53 +0100
04161d
Subject: [PATCH 05/12] import the copy-file-range module from gnulib
04161d
04161d
---
04161d
 aclocal.m4            |  1 +
04161d
 lib/config.hin        |  3 +++
04161d
 lib/copy-file-range.c | 33 +++++++++++++++++++++++++++++++++
04161d
 lib/gnulib.mk         | 10 ++++++++++
04161d
 m4/copy-file-range.m4 | 36 ++++++++++++++++++++++++++++++++++++
04161d
 m4/gnulib-comp.m4     |  8 ++++++++
04161d
 6 files changed, 91 insertions(+)
04161d
 create mode 100644 lib/copy-file-range.c
04161d
 create mode 100644 m4/copy-file-range.m4
04161d
04161d
diff --git a/aclocal.m4 b/aclocal.m4
04161d
index 713f7c5..09a7ea8 100644
04161d
--- a/aclocal.m4
04161d
+++ b/aclocal.m4
04161d
@@ -1165,6 +1165,7 @@ m4_include([m4/closedir.m4])
04161d
 m4_include([m4/codeset.m4])
04161d
 m4_include([m4/config-h.m4])
04161d
 m4_include([m4/configmake.m4])
04161d
+m4_include([m4/copy-file-range.m4])
04161d
 m4_include([m4/ctype.m4])
04161d
 m4_include([m4/cycle-check.m4])
04161d
 m4_include([m4/d-ino.m4])
04161d
diff --git a/lib/config.hin b/lib/config.hin
04161d
index 9769c39..bf9f9f8 100644
04161d
--- a/lib/config.hin
04161d
+++ b/lib/config.hin
04161d
@@ -370,6 +370,9 @@
04161d
 /* Define to 1 when the gnulib module connect should be tested. */
04161d
 #undef GNULIB_TEST_CONNECT
04161d
 
04161d
+/* Define to 1 when the gnulib module copy-file-range should be tested. */
04161d
+#undef GNULIB_TEST_COPY_FILE_RANGE
04161d
+
04161d
 /* Define to 1 when the gnulib module dirfd should be tested. */
04161d
 #undef GNULIB_TEST_DIRFD
04161d
 
04161d
diff --git a/lib/copy-file-range.c b/lib/copy-file-range.c
04161d
new file mode 100644
04161d
index 0000000..069f144
04161d
--- /dev/null
04161d
+++ b/lib/copy-file-range.c
04161d
@@ -0,0 +1,33 @@
04161d
+/* Stub for copy_file_range
04161d
+   Copyright 2019-2020 Free Software Foundation, Inc.
04161d
+
04161d
+   This program is free software: you can redistribute it and/or modify
04161d
+   it under the terms of the GNU General Public License as published by
04161d
+   the Free Software Foundation; either version 3 of the License, or
04161d
+   (at your option) any later version.
04161d
+
04161d
+   This program is distributed in the hope that it will be useful,
04161d
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
04161d
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
04161d
+   GNU General Public License for more details.
04161d
+
04161d
+   You should have received a copy of the GNU General Public License
04161d
+   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
04161d
+
04161d
+#include <config.h>
04161d
+
04161d
+#include <unistd.h>
04161d
+
04161d
+#include <errno.h>
04161d
+
04161d
+ssize_t
04161d
+copy_file_range (int infd, off_t *pinoff,
04161d
+                 int outfd, off_t *poutoff,
04161d
+                 size_t length, unsigned int flags)
04161d
+{
04161d
+  /* There is little need to emulate copy_file_range with read+write,
04161d
+     since programs that use copy_file_range must fall back on
04161d
+     read+write anyway.  */
04161d
+  errno = ENOSYS;
04161d
+  return -1;
04161d
+}
04161d
diff --git a/lib/gnulib.mk b/lib/gnulib.mk
04161d
index b3633b8..86829f3 100644
04161d
--- a/lib/gnulib.mk
04161d
+++ b/lib/gnulib.mk
04161d
@@ -65,6 +65,7 @@
04161d
 #  closeout \
04161d
 #  config-h \
04161d
 #  configmake \
04161d
+#  copy-file-range \
04161d
 #  crypto/md5 \
04161d
 #  crypto/sha1 \
04161d
 #  crypto/sha256 \
04161d
@@ -800,6 +801,15 @@ CLEANFILES += lib/configmake.h lib/configmake.h-t
04161d
 
04161d
 ## end   gnulib module configmake
04161d
 
04161d
+## begin gnulib module copy-file-range
04161d
+
04161d
+
04161d
+EXTRA_DIST += lib/copy-file-range.c
04161d
+
04161d
+EXTRA_lib_libcoreutils_a_SOURCES += lib/copy-file-range.c
04161d
+
04161d
+## end   gnulib module copy-file-range
04161d
+
04161d
 ## begin gnulib module count-leading-zeros
04161d
 
04161d
 lib_libcoreutils_a_SOURCES += lib/count-leading-zeros.c
04161d
diff --git a/m4/copy-file-range.m4 b/m4/copy-file-range.m4
04161d
new file mode 100644
04161d
index 0000000..5c5a274
04161d
--- /dev/null
04161d
+++ b/m4/copy-file-range.m4
04161d
@@ -0,0 +1,36 @@
04161d
+# copy-file-range.m4
04161d
+dnl Copyright 2019-2020 Free Software Foundation, Inc.
04161d
+dnl This file is free software; the Free Software Foundation
04161d
+dnl gives unlimited permission to copy and/or distribute it,
04161d
+dnl with or without modifications, as long as this notice is preserved.
04161d
+
04161d
+AC_DEFUN([gl_FUNC_COPY_FILE_RANGE],
04161d
+[
04161d
+  AC_REQUIRE([gl_UNISTD_H_DEFAULTS])
04161d
+
04161d
+  dnl Persuade glibc <unistd.h> to declare copy_file_range.
04161d
+  AC_REQUIRE([AC_USE_SYSTEM_EXTENSIONS])
04161d
+
04161d
+  dnl Use AC_LINK_IFELSE, rather than AC_CHECK_FUNCS or a variant,
04161d
+  dnl since we don't want AC_CHECK_FUNCS's checks for glibc stubs.
04161d
+  dnl Programs that use copy_file_range must fall back on read+write
04161d
+  dnl anyway, and there's little point to substituting the Gnulib stub
04161d
+  dnl for a glibc stub.
04161d
+  AC_CACHE_CHECK([for copy_file_range], [gl_cv_func_copy_file_range],
04161d
+    [AC_LINK_IFELSE(
04161d
+       [AC_LANG_PROGRAM(
04161d
+          [[#include <unistd.h>
04161d
+          ]],
04161d
+          [[ssize_t (*func) (int, off_t *, int, off_t, size_t, unsigned)
04161d
+              = copy_file_range;
04161d
+            return func (0, 0, 0, 0, 0, 0) & 127;
04161d
+          ]])
04161d
+       ],
04161d
+       [gl_cv_func_copy_file_range=yes],
04161d
+       [gl_cv_func_copy_file_range=no])
04161d
+    ])
04161d
+
04161d
+  if test "$gl_cv_func_copy_file_range" != yes; then
04161d
+    HAVE_COPY_FILE_RANGE=0
04161d
+  fi
04161d
+])
04161d
diff --git a/m4/gnulib-comp.m4 b/m4/gnulib-comp.m4
04161d
index dead90e..953e7f0 100644
04161d
--- a/m4/gnulib-comp.m4
04161d
+++ b/m4/gnulib-comp.m4
04161d
@@ -129,6 +129,7 @@ AC_DEFUN([gl_EARLY],
04161d
   # Code from module configmake:
04161d
   # Code from module connect:
04161d
   # Code from module connect-tests:
04161d
+  # Code from module copy-file-range:
04161d
   # Code from module count-leading-zeros:
04161d
   # Code from module count-leading-zeros-tests:
04161d
   # Code from module crypto/af_alg:
04161d
@@ -977,6 +978,11 @@ AC_DEFUN([gl_INIT],
04161d
   gl_DIRENT_MODULE_INDICATOR([closedir])
04161d
   gl_CONFIG_H
04161d
   gl_CONFIGMAKE_PREP
04161d
+  gl_FUNC_COPY_FILE_RANGE
04161d
+  if test $HAVE_COPY_FILE_RANGE = 0; then
04161d
+    AC_LIBOBJ([copy-file-range])
04161d
+  fi
04161d
+  gl_UNISTD_MODULE_INDICATOR([copy-file-range])
04161d
   gl_AF_ALG
04161d
   AC_DEFINE([GL_COMPILE_CRYPTO_STREAM], 1, [Compile Gnulib crypto stream ops.])
04161d
   AC_REQUIRE([AC_C_RESTRICT])
04161d
@@ -2746,6 +2752,7 @@ AC_DEFUN([gl_FILE_LIST], [
04161d
   lib/closeout.c
04161d
   lib/closeout.h
04161d
   lib/copy-acl.c
04161d
+  lib/copy-file-range.c
04161d
   lib/count-leading-zeros.c
04161d
   lib/count-leading-zeros.h
04161d
   lib/creat-safer.c
04161d
@@ -3438,6 +3445,7 @@ AC_DEFUN([gl_FILE_LIST], [
04161d
   m4/codeset.m4
04161d
   m4/config-h.m4
04161d
   m4/configmake.m4
04161d
+  m4/copy-file-range.m4
04161d
   m4/ctype.m4
04161d
   m4/cycle-check.m4
04161d
   m4/d-ino.m4
04161d
-- 
04161d
2.26.3
04161d
04161d
04161d
From 48370c95bcf7c25ce021fbd2145062d3d29ae6d5 Mon Sep 17 00:00:00 2001
04161d
From: Paul Eggert <eggert@cs.ucla.edu>
04161d
Date: Thu, 25 Jun 2020 17:34:23 -0700
04161d
Subject: [PATCH 06/12] cp: use copy_file_range if available
04161d
04161d
* NEWS: Mention this.
04161d
* bootstrap.conf (gnulib_modules): Add copy-file-range.
04161d
* src/copy.c (sparse_copy): Try copy_file_range if not
04161d
looking for holes.
04161d
04161d
Upstream-commit: 4b04a0c3b792d27909670a81d21f2a3b3e0ea563
04161d
Signed-off-by: Kamil Dudka <kdudka@redhat.com>
04161d
---
04161d
 bootstrap.conf |  1 +
04161d
 src/copy.c     | 40 ++++++++++++++++++++++++++++++++++++++++
04161d
 2 files changed, 41 insertions(+)
04161d
04161d
diff --git a/bootstrap.conf b/bootstrap.conf
04161d
index 2a342c1..7d53e28 100644
04161d
--- a/bootstrap.conf
04161d
+++ b/bootstrap.conf
04161d
@@ -54,6 +54,7 @@ gnulib_modules="
04161d
   closeout
04161d
   config-h
04161d
   configmake
04161d
+  copy-file-range
04161d
   crypto/md5
04161d
   crypto/sha1
04161d
   crypto/sha256
04161d
diff --git a/src/copy.c b/src/copy.c
04161d
index d88f8cf..4050f69 100644
04161d
--- a/src/copy.c
04161d
+++ b/src/copy.c
04161d
@@ -265,6 +265,46 @@ sparse_copy (int src_fd, int dest_fd, char *buf, size_t buf_size,
04161d
 {
04161d
   *last_write_made_hole = false;
04161d
   *total_n_read = 0;
04161d
+
04161d
+  /* If not looking for holes, use copy_file_range if available.  */
04161d
+  if (!hole_size)
04161d
+    while (max_n_read)
04161d
+      {
04161d
+        /* Copy at most COPY_MAX bytes at a time; this is min
04161d
+           (PTRDIFF_MAX, SIZE_MAX) truncated to a value that is
04161d
+           surely aligned well.  */
04161d
+        ssize_t ssize_max = TYPE_MAXIMUM (ssize_t);
04161d
+        ptrdiff_t copy_max = MIN (ssize_max, SIZE_MAX) >> 30 << 30;
04161d
+        ssize_t n_copied = copy_file_range (src_fd, NULL, dest_fd, NULL,
04161d
+                                            MIN (max_n_read, copy_max), 0);
04161d
+        if (n_copied == 0)
04161d
+          {
04161d
+            /* copy_file_range incorrectly returns 0 when reading from
04161d
+               the proc file system on the Linux kernel through at
04161d
+               least 5.6.19 (2020), so fall back on 'read' if the
04161d
+               input file seems empty.  */
04161d
+            if (*total_n_read == 0)
04161d
+              break;
04161d
+            return true;
04161d
+          }
04161d
+        if (n_copied < 0)
04161d
+          {
04161d
+            if (errno == ENOSYS || errno == EINVAL
04161d
+                || errno == EBADF || errno == EXDEV)
04161d
+              break;
04161d
+            if (errno == EINTR)
04161d
+              n_copied = 0;
04161d
+            else
04161d
+              {
04161d
+                error (0, errno, _("error copying %s to %s"),
04161d
+                       quoteaf_n (0, src_name), quoteaf_n (1, dst_name));
04161d
+                return false;
04161d
+              }
04161d
+          }
04161d
+        max_n_read -= n_copied;
04161d
+        *total_n_read += n_copied;
04161d
+      }
04161d
+
04161d
   bool make_hole = false;
04161d
   off_t psize = 0;
04161d
 
04161d
-- 
04161d
2.26.3
04161d
04161d
04161d
From 23ea1ba463d33e268f35847059e637a5935e4581 Mon Sep 17 00:00:00 2001
04161d
From: Zorro Lang <zlang@redhat.com>
04161d
Date: Mon, 26 Apr 2021 17:25:18 +0200
04161d
Subject: [PATCH 07/12] copy: do not refuse to copy a swap file
04161d
04161d
* src/copy.c (sparse_copy): Fallback to read() if copy_file_range()
04161d
fails with ETXTBSY.  Otherwise it would be impossible to copy files
04161d
that are being used as swap.  This used to work before introducing
04161d
the support for copy_file_range() in coreutils.  (Bug#48036)
04161d
04161d
Upstream-commit: 785478013b416cde50794be35475c0c4fdbb48b4
04161d
Signed-off-by: Kamil Dudka <kdudka@redhat.com>
04161d
---
04161d
 src/copy.c | 2 +-
04161d
 1 file changed, 1 insertion(+), 1 deletion(-)
04161d
04161d
diff --git a/src/copy.c b/src/copy.c
04161d
index 4050f69..1798bb7 100644
04161d
--- a/src/copy.c
04161d
+++ b/src/copy.c
04161d
@@ -290,7 +290,7 @@ sparse_copy (int src_fd, int dest_fd, char *buf, size_t buf_size,
04161d
         if (n_copied < 0)
04161d
           {
04161d
             if (errno == ENOSYS || errno == EINVAL
04161d
-                || errno == EBADF || errno == EXDEV)
04161d
+                || errno == EBADF || errno == EXDEV || errno == ETXTBSY)
04161d
               break;
04161d
             if (errno == EINTR)
04161d
               n_copied = 0;
04161d
-- 
04161d
2.31.1
04161d
04161d
04161d
From cd7c7a6b5ad89ef0a61722552d532901fc1bed05 Mon Sep 17 00:00:00 2001
04161d
From: =?UTF-8?q?P=C3=A1draig=20Brady?= <P@draigBrady.com>
04161d
Date: Sun, 2 May 2021 21:27:17 +0100
04161d
Subject: [PATCH 08/12] copy: ensure we enforce --reflink=never
04161d
04161d
* src/copy.c (sparse_copy): Don't use copy_file_range()
04161d
with --reflink=never as copy_file_range() may implicitly
04161d
use acceleration techniques like reflinking.
04161d
(extent_copy): Pass through whether we allow reflinking.
04161d
(lseek_copy): Likewise.
04161d
Fixes https://bugs.gnu.org/48164
04161d
04161d
Upstream-commit: ea9af99234031ab8d5169c8a669434e2a6b4f864
04161d
Signed-off-by: Kamil Dudka <kdudka@redhat.com>
04161d
---
04161d
 src/copy.c | 20 +++++++++++++-------
04161d
 1 file changed, 13 insertions(+), 7 deletions(-)
04161d
04161d
diff --git a/src/copy.c b/src/copy.c
04161d
index 4050f69..0337538 100644
04161d
--- a/src/copy.c
04161d
+++ b/src/copy.c
04161d
@@ -258,7 +258,7 @@ create_hole (int fd, char const *name, bool punch_holes, off_t size)
04161d
    bytes read.  */
04161d
 static bool
04161d
 sparse_copy (int src_fd, int dest_fd, char *buf, size_t buf_size,
04161d
-             size_t hole_size, bool punch_holes,
04161d
+             size_t hole_size, bool punch_holes, bool allow_reflink,
04161d
              char const *src_name, char const *dst_name,
04161d
              uintmax_t max_n_read, off_t *total_n_read,
04161d
              bool *last_write_made_hole)
04161d
@@ -266,8 +266,9 @@ sparse_copy (int src_fd, int dest_fd, char *buf, size_t buf_size,
04161d
   *last_write_made_hole = false;
04161d
   *total_n_read = 0;
04161d
 
04161d
-  /* If not looking for holes, use copy_file_range if available.  */
04161d
-  if (!hole_size)
04161d
+  /* If not looking for holes, use copy_file_range if available,
04161d
+     but don't use if reflink disallowed as that may be implicit.  */
04161d
+  if ((! hole_size) && allow_reflink)
04161d
     while (max_n_read)
04161d
       {
04161d
         /* Copy at most COPY_MAX bytes at a time; this is min
04161d
@@ -466,6 +467,7 @@ static bool
04161d
 extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size,
04161d
              size_t hole_size, off_t src_total_size,
04161d
              enum Sparse_type sparse_mode,
04161d
+             bool allow_reflink,
04161d
              char const *src_name, char const *dst_name,
04161d
              struct extent_scan *scan)
04161d
 {
04161d
@@ -579,8 +581,8 @@ extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size,
04161d
 
04161d
               if ( ! sparse_copy (src_fd, dest_fd, buf, buf_size,
04161d
                                   sparse_mode == SPARSE_ALWAYS ? hole_size: 0,
04161d
-                                  true, src_name, dst_name, ext_len, &n_read,
04161d
-                                  &read_hole))
04161d
+                                  true, allow_reflink, src_name, dst_name,
04161d
+                                  ext_len, &n_read, &read_hole))
04161d
                 goto fail;
04161d
 
04161d
               dest_pos = ext_start + n_read;
04161d
@@ -655,6 +657,7 @@ static bool
04161d
 lseek_copy (int src_fd, int dest_fd, char *buf, size_t buf_size,
04161d
             size_t hole_size, off_t ext_start, off_t src_total_size,
04161d
             enum Sparse_type sparse_mode,
04161d
+            bool allow_reflink,
04161d
             char const *src_name, char const *dst_name)
04161d
 {
04161d
   off_t last_ext_start = 0;
04161d
@@ -729,8 +732,8 @@ lseek_copy (int src_fd, int dest_fd, char *buf, size_t buf_size,
04161d
       bool read_hole;
04161d
       if ( ! sparse_copy (src_fd, dest_fd, buf, buf_size,
04161d
                           sparse_mode == SPARSE_NEVER ? 0 : hole_size,
04161d
-                          true, src_name, dst_name, ext_len, &n_read,
04161d
-                          &read_hole))
04161d
+                          true, allow_reflink, src_name, dst_name,
04161d
+                          ext_len, &n_read, &read_hole))
04161d
         return false;
04161d
 
04161d
       dest_pos = ext_start + n_read;
04161d
@@ -1527,17 +1530,20 @@ copy_reg (char const *src_name, char const *dst_name,
04161d
              ? extent_copy (source_desc, dest_desc, buf, buf_size, hole_size,
04161d
                             src_open_sb.st_size,
04161d
                             make_holes ? x->sparse_mode : SPARSE_NEVER,
04161d
+                            x->reflink_mode != REFLINK_NEVER,
04161d
                             src_name, dst_name, &scan_inference.extent_scan)
04161d
 #ifdef SEEK_HOLE
04161d
              : scantype == LSEEK_SCANTYPE
04161d
              ? lseek_copy (source_desc, dest_desc, buf, buf_size, hole_size,
04161d
                            scan_inference.ext_start, src_open_sb.st_size,
04161d
                            make_holes ? x->sparse_mode : SPARSE_NEVER,
04161d
+                           x->reflink_mode != REFLINK_NEVER,
04161d
                            src_name, dst_name)
04161d
 #endif
04161d
              : sparse_copy (source_desc, dest_desc, buf, buf_size,
04161d
                             make_holes ? hole_size : 0,
04161d
                             x->sparse_mode == SPARSE_ALWAYS,
04161d
+                            x->reflink_mode != REFLINK_NEVER,
04161d
                             src_name, dst_name, UINTMAX_MAX, &n_read,
04161d
                             &wrote_hole_at_eof)))
04161d
         {
04161d
-- 
04161d
2.30.2
04161d
04161d
04161d
From 7978f1de88dcdb17b67db9268038930e9c71154f Mon Sep 17 00:00:00 2001
04161d
From: =?UTF-8?q?P=C3=A1draig=20Brady?= <P@draigBrady.com>
04161d
Date: Sat, 8 May 2021 17:18:54 +0100
04161d
Subject: [PATCH 09/12] copy: handle ENOTSUP from copy_file_range()
04161d
04161d
* src/copy.c (sparse_copy): Ensure we fall back to
04161d
a standard copy if copy_file_range() returns ENOTSUP.
04161d
This generally is best checked when checking ENOSYS,
04161d
but it also seems to be a practical concern on Centos 7,
04161d
as a quick search gave https://bugzilla.redhat.com/1840284
04161d
04161d
Upstream-commit: 8ec0d1799e19a079b8a661c6bb69f6c58e52f1aa
04161d
Signed-off-by: Kamil Dudka <kdudka@redhat.com>
04161d
---
04161d
 src/copy.c | 5 +++--
04161d
 1 file changed, 3 insertions(+), 2 deletions(-)
04161d
04161d
diff --git a/src/copy.c b/src/copy.c
04161d
index 9977193..e3977cd 100644
04161d
--- a/src/copy.c
04161d
+++ b/src/copy.c
04161d
@@ -290,8 +290,9 @@ sparse_copy (int src_fd, int dest_fd, char *buf, size_t buf_size,
04161d
           }
04161d
         if (n_copied < 0)
04161d
           {
04161d
-            if (errno == ENOSYS || errno == EINVAL
04161d
-                || errno == EBADF || errno == EXDEV || errno == ETXTBSY)
04161d
+            if (errno == ENOSYS || is_ENOTSUP (errno)
04161d
+                || errno == EINVAL || errno == EBADF
04161d
+                || errno == EXDEV || errno == ETXTBSY)
04161d
               break;
04161d
             if (errno == EINTR)
04161d
               n_copied = 0;
04161d
-- 
04161d
2.31.1
04161d
04161d
04161d
From d8d3edbfc13ff13c185f23436209b788f906aa41 Mon Sep 17 00:00:00 2001
04161d
From: =?UTF-8?q?P=C3=A1draig=20Brady?= <P@draigBrady.com>
04161d
Date: Sun, 9 May 2021 21:55:22 +0100
04161d
Subject: [PATCH 10/12] copy: handle EOPNOTSUPP from SEEK_DATA
04161d
04161d
* src/copy.c (infer_scantype): Ensure we don't error out
04161d
if SEEK_DATA returns EOPNOTSUPP, on systems where this value
04161d
is distinct from ENOTSUP.  Generally both of these should be checked.
04161d
04161d
Upstream-commit: 017877bd088284d515753d78b81ca6e6a88c1350
04161d
Signed-off-by: Kamil Dudka <kdudka@redhat.com>
04161d
---
04161d
 src/copy.c | 2 +-
04161d
 1 file changed, 1 insertion(+), 1 deletion(-)
04161d
04161d
diff --git a/src/copy.c b/src/copy.c
04161d
index e3977cd..de8030d 100644
04161d
--- a/src/copy.c
04161d
+++ b/src/copy.c
04161d
@@ -1246,7 +1246,7 @@ infer_scantype (int fd, struct stat const *sb,
04161d
   scan_inference->ext_start = lseek (fd, 0, SEEK_DATA);
04161d
   if (0 <= scan_inference->ext_start)
04161d
     return LSEEK_SCANTYPE;
04161d
-  else if (errno != EINVAL && errno != ENOTSUP)
04161d
+  else if (errno != EINVAL && !is_ENOTSUP (errno))
04161d
     return errno == ENXIO ? LSEEK_SCANTYPE : ERROR_SCANTYPE;
04161d
 #endif
04161d
 
04161d
-- 
04161d
2.31.1
04161d
04161d
04161d
From 1daf8c0fc9a5766c22b7ea84bea8c88c86a0c495 Mon Sep 17 00:00:00 2001
04161d
From: =?UTF-8?q?P=C3=A1draig=20Brady?= <P@draigBrady.com>
04161d
Date: Sat, 8 May 2021 19:23:20 +0100
04161d
Subject: [PATCH 11/12] copy: handle system security config issues with
04161d
 copy_file_range()
04161d
04161d
* src/copy.c (sparse_copy): Upon EPERM from copy_file_range(),
04161d
fall back to a standard copy, which will give a more accurate
04161d
error as to whether the issue is with the source or destination.
04161d
Also this will avoid the issue where seccomp or apparmor are
04161d
not configured to handle copy_file_range(), in which case
04161d
the fall back standard copy would succeed without issue.
04161d
This specific issue with seccomp was noticed for example in:
04161d
https://github.com/golang/go/issues/40900
04161d
04161d
Upstream-commit: 2e66e1732fced7af20fa76c60e636d39a1767d48
04161d
Signed-off-by: Kamil Dudka <kdudka@redhat.com>
04161d
---
04161d
 src/copy.c | 9 +++++++++
04161d
 1 file changed, 9 insertions(+)
04161d
04161d
diff --git a/src/copy.c b/src/copy.c
04161d
index de8030d..62eec7b 100644
04161d
--- a/src/copy.c
04161d
+++ b/src/copy.c
04161d
@@ -294,6 +294,15 @@ sparse_copy (int src_fd, int dest_fd, char *buf, size_t buf_size,
04161d
                 || errno == EINVAL || errno == EBADF
04161d
                 || errno == EXDEV || errno == ETXTBSY)
04161d
               break;
04161d
+
04161d
+            /* copy_file_range might not be enabled in seccomp filters,
04161d
+               so retry with a standard copy.  EPERM can also occur
04161d
+               for immutable files, but that would only be in the edge case
04161d
+               where the file is made immutable after creating/truncating,
04161d
+               in which case the (more accurate) error is still shown.  */
04161d
+            if (errno == EPERM && *total_n_read == 0)
04161d
+              break;
04161d
+
04161d
             if (errno == EINTR)
04161d
               n_copied = 0;
04161d
             else
04161d
-- 
04161d
2.31.1
04161d
04161d
04161d
From 42c9e598f61ba6bc27a615e39e40023a676a523b Mon Sep 17 00:00:00 2001
04161d
From: =?UTF-8?q?P=C3=A1draig=20Brady?= <P@draigBrady.com>
04161d
Date: Wed, 12 May 2021 23:47:38 +0100
04161d
Subject: [PATCH 12/12] copy: disallow copy_file_range() on Linux kernels
04161d
 before 5.3
04161d
04161d
copy_file_range() before Linux kernel release 5.3 had many issues,
04161d
as described at https://lwn.net/Articles/789527/, which was
04161d
referenced from https://lwn.net/Articles/846403/; a more general
04161d
article discussing the generality of copy_file_range().
04161d
Linux kernel 5.3 was released in September 2019, which is new enough
04161d
that we need to actively avoid older kernels.
04161d
04161d
* src/copy.c (functional_copy_file_range): A new function
04161d
that returns false for Linux kernels before version 5.3.
04161d
(sparse_copy): Call this new function to gate use of
04161d
copy_file_range().
04161d
04161d
Upstream-commit: ba5e6885d2c255648cddb87b4e795659c1990374
04161d
Signed-off-by: Kamil Dudka <kdudka@redhat.com>
04161d
---
04161d
 src/copy.c | 47 +++++++++++++++++++++++++++++++++++++++++++++--
04161d
 1 file changed, 45 insertions(+), 2 deletions(-)
04161d
04161d
diff --git a/src/copy.c b/src/copy.c
04161d
index 62eec7b..2e1699b 100644
04161d
--- a/src/copy.c
04161d
+++ b/src/copy.c
04161d
@@ -21,6 +21,7 @@
04161d
 #include <assert.h>
04161d
 #include <sys/ioctl.h>
04161d
 #include <sys/types.h>
04161d
+#include <sys/utsname.h>
04161d
 #include <selinux/selinux.h>
04161d
 
04161d
 #if HAVE_HURD_H
04161d
@@ -64,6 +65,7 @@
04161d
 #include "write-any-file.h"
04161d
 #include "areadlink.h"
04161d
 #include "yesno.h"
04161d
+#include "xstrtol.h"
04161d
 #include "selinux.h"
04161d
 
04161d
 #if USE_XATTR
04161d
@@ -244,6 +246,47 @@ create_hole (int fd, char const *name, bool punch_holes, off_t size)
04161d
   return true;
04161d
 }
04161d
 
04161d
+/* copy_file_range() before Linux kernel release 5.3 had many issues,
04161d
+   as described at https://lwn.net/Articles/789527/,
04161d
+   so return FALSE for Linux kernels earlier than that.
04161d
+   This function can be removed when such kernels (released before Sep 2019)
04161d
+   are no longer a consideration.  */
04161d
+
04161d
+static bool
04161d
+functional_copy_file_range (void)
04161d
+{
04161d
+#ifdef __linux__
04161d
+  static int version_allowed = -1;
04161d
+
04161d
+  if (version_allowed == -1)
04161d
+    version_allowed = 0;
04161d
+  else
04161d
+    return version_allowed;
04161d
+
04161d
+  struct utsname name;
04161d
+  if (uname (&name) == -1)
04161d
+    return version_allowed;
04161d
+
04161d
+  char *p = name.release;
04161d
+  uintmax_t ver[2] = {0, 0};
04161d
+  size_t iver = 0;
04161d
+
04161d
+  do
04161d
+    {
04161d
+      strtol_error err = xstrtoumax (p, &p, 10, &ver[iver], NULL);
04161d
+      if (err != LONGINT_OK || *p++ != '.')
04161d
+        break;
04161d
+    }
04161d
+  while (++iver < ARRAY_CARDINALITY (ver));
04161d
+
04161d
+  version_allowed = (ver[0] > 5 || (ver[0] == 5 && ver[1] >= 3));
04161d
+
04161d
+  return version_allowed;
04161d
+#else
04161d
+  return true;
04161d
+#endif
04161d
+
04161d
+}
04161d
 
04161d
 /* Copy the regular file open on SRC_FD/SRC_NAME to DST_FD/DST_NAME,
04161d
    honoring the MAKE_HOLES setting and using the BUF_SIZE-byte buffer
04161d
@@ -266,9 +309,9 @@ sparse_copy (int src_fd, int dest_fd, char *buf, size_t buf_size,
04161d
   *last_write_made_hole = false;
04161d
   *total_n_read = 0;
04161d
 
04161d
-  /* If not looking for holes, use copy_file_range if available,
04161d
+  /* If not looking for holes, use copy_file_range if functional,
04161d
      but don't use if reflink disallowed as that may be implicit.  */
04161d
-  if ((! hole_size) && allow_reflink)
04161d
+  if ((! hole_size) && allow_reflink && functional_copy_file_range ())
04161d
     while (max_n_read)
04161d
       {
04161d
         /* Copy at most COPY_MAX bytes at a time; this is min
04161d
-- 
04161d
2.31.1
04161d