Blob Blame History Raw
From 0be4847cdec9effd6128da03ea42a4953e5a6343 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Tue, 17 Aug 2021 22:03:11 +0100
Subject: [PATCH] cow: Make the block size configurable

Commit c1905b0a28 ("cache, cow: Use a 64K block size by default")
changed the nbdkit-cow-filter block size to 64K, but it was still a
fixed size.  In contrast the cache filter allows the block size to be
adjusted.

Allow the block size in this filter to be adjusted up or down with a
new cow-block-size=N parameter.

When using the VDDK plugin, adjusting this setting can make a
difference.  The following timings come from a modified virt-v2v which
sets cow-block-size and was used to convert from a VMware server to
-o null (this is also using cow-on-read=true):

  cow-block-size=64K:   18m18
  cow-block-size=256K:  14m13
  cow-block-size=1M:    14m19
  cow-block-size=4M:    37m33

As you can see it's not obvious how to choose a good block size, but
at least by allowing adjustment we can tune things.

(cherry picked from commit 7182c47d04d2b68005fceadefc0c14bfaa61a533)
---
 filters/cow/blk.c                 |  35 +++----
 filters/cow/blk.h                 |   5 -
 filters/cow/cow.c                 | 150 +++++++++++++++++-------------
 filters/cow/cow.h                 |  39 ++++++++
 filters/cow/nbdkit-cow-filter.pod |   5 +
 tests/Makefile.am                 |   2 +
 tests/test-cow-block-size.sh      |  72 ++++++++++++++
 7 files changed, 221 insertions(+), 87 deletions(-)
 create mode 100644 filters/cow/cow.h
 create mode 100755 tests/test-cow-block-size.sh

diff --git a/filters/cow/blk.c b/filters/cow/blk.c
index c22d5886..f9341dc1 100644
--- a/filters/cow/blk.c
+++ b/filters/cow/blk.c
@@ -99,6 +99,7 @@
 #include "pwrite.h"
 #include "utils.h"
 
+#include "cow.h"
 #include "blk.h"
 
 /* The temporary overlay. */
@@ -137,7 +138,7 @@ blk_init (void)
   size_t len;
   char *template;
 
-  bitmap_init (&bm, BLKSIZE, 2 /* bits per block */);
+  bitmap_init (&bm, blksize, 2 /* bits per block */);
 
   tmpdir = getenv ("TMPDIR");
   if (!tmpdir)
@@ -199,7 +200,7 @@ blk_set_size (uint64_t new_size)
   if (bitmap_resize (&bm, size) == -1)
     return -1;
 
-  if (ftruncate (fd, ROUND_UP (size, BLKSIZE)) == -1) {
+  if (ftruncate (fd, ROUND_UP (size, blksize)) == -1) {
     nbdkit_error ("ftruncate: %m");
     return -1;
   }
@@ -228,7 +229,7 @@ blk_read_multiple (nbdkit_next *next,
                    uint64_t blknum, uint64_t nrblocks,
                    uint8_t *block, bool cow_on_read, int *err)
 {
-  off_t offset = blknum * BLKSIZE;
+  off_t offset = blknum * blksize;
   enum bm_entry state;
   uint64_t b, runblocks;
 
@@ -262,8 +263,8 @@ blk_read_multiple (nbdkit_next *next,
   if (state == BLOCK_NOT_ALLOCATED) { /* Read underlying plugin. */
     unsigned n, tail = 0;
 
-    assert (BLKSIZE * runblocks <= UINT_MAX);
-    n = BLKSIZE * runblocks;
+    assert (blksize * runblocks <= UINT_MAX);
+    n = blksize * runblocks;
 
     if (offset + n > size) {
       tail = offset + n - size;
@@ -288,7 +289,7 @@ blk_read_multiple (nbdkit_next *next,
                       "at offset %" PRIu64 " into the cache",
                       runblocks, offset);
 
-      if (full_pwrite (fd, block, BLKSIZE * runblocks, offset) == -1) {
+      if (full_pwrite (fd, block, blksize * runblocks, offset) == -1) {
         *err = errno;
         nbdkit_error ("pwrite: %m");
         return -1;
@@ -298,14 +299,14 @@ blk_read_multiple (nbdkit_next *next,
     }
   }
   else if (state == BLOCK_ALLOCATED) { /* Read overlay. */
-    if (full_pread (fd, block, BLKSIZE * runblocks, offset) == -1) {
+    if (full_pread (fd, block, blksize * runblocks, offset) == -1) {
       *err = errno;
       nbdkit_error ("pread: %m");
       return -1;
     }
   }
   else /* state == BLOCK_TRIMMED */ {
-    memset (block, 0, BLKSIZE * runblocks);
+    memset (block, 0, blksize * runblocks);
   }
 
   /* If all done, return. */
@@ -316,7 +317,7 @@ blk_read_multiple (nbdkit_next *next,
   return blk_read_multiple (next,
                             blknum + runblocks,
                             nrblocks - runblocks,
-                            block + BLKSIZE * runblocks,
+                            block + blksize * runblocks,
                             cow_on_read, err);
 }
 
@@ -333,9 +334,9 @@ blk_cache (nbdkit_next *next,
 {
   /* XXX Could make this lock more fine-grained with some thought. */
   ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&lock);
-  off_t offset = blknum * BLKSIZE;
+  off_t offset = blknum * blksize;
   enum bm_entry state = bitmap_get_blk (&bm, blknum, BLOCK_NOT_ALLOCATED);
-  unsigned n = BLKSIZE, tail = 0;
+  unsigned n = blksize, tail = 0;
 
   if (offset + n > size) {
     tail = offset + n - size;
@@ -348,7 +349,7 @@ blk_cache (nbdkit_next *next,
 
   if (state == BLOCK_ALLOCATED) {
 #if HAVE_POSIX_FADVISE
-    int r = posix_fadvise (fd, offset, BLKSIZE, POSIX_FADV_WILLNEED);
+    int r = posix_fadvise (fd, offset, blksize, POSIX_FADV_WILLNEED);
     if (r) {
       errno = r;
       nbdkit_error ("posix_fadvise: %m");
@@ -373,7 +374,7 @@ blk_cache (nbdkit_next *next,
   memset (block + n, 0, tail);
 
   if (mode == BLK_CACHE_COW) {
-    if (full_pwrite (fd, block, BLKSIZE, offset) == -1) {
+    if (full_pwrite (fd, block, blksize, offset) == -1) {
       *err = errno;
       nbdkit_error ("pwrite: %m");
       return -1;
@@ -386,13 +387,13 @@ blk_cache (nbdkit_next *next,
 int
 blk_write (uint64_t blknum, const uint8_t *block, int *err)
 {
-  off_t offset = blknum * BLKSIZE;
+  off_t offset = blknum * blksize;
 
   if (cow_debug_verbose)
     nbdkit_debug ("cow: blk_write block %" PRIu64 " (offset %" PRIu64 ")",
                   blknum, (uint64_t) offset);
 
-  if (full_pwrite (fd, block, BLKSIZE, offset) == -1) {
+  if (full_pwrite (fd, block, blksize, offset) == -1) {
     *err = errno;
     nbdkit_error ("pwrite: %m");
     return -1;
@@ -407,14 +408,14 @@ blk_write (uint64_t blknum, const uint8_t *block, int *err)
 int
 blk_trim (uint64_t blknum, int *err)
 {
-  off_t offset = blknum * BLKSIZE;
+  off_t offset = blknum * blksize;
 
   if (cow_debug_verbose)
     nbdkit_debug ("cow: blk_trim block %" PRIu64 " (offset %" PRIu64 ")",
                   blknum, (uint64_t) offset);
 
   /* XXX As an optimization we could punch a whole in the overlay
-   * here.  However it's not trivial since BLKSIZE is unrelated to the
+   * here.  However it's not trivial since blksize is unrelated to the
    * overlay filesystem block size.
    */
   ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&lock);
diff --git a/filters/cow/blk.h b/filters/cow/blk.h
index b7e6f092..62fb5416 100644
--- a/filters/cow/blk.h
+++ b/filters/cow/blk.h
@@ -33,11 +33,6 @@
 #ifndef NBDKIT_BLK_H
 #define NBDKIT_BLK_H
 
-/* Size of a block in the overlay.  A 4K block size means that we need
- * 64 MB of memory to store the bitmap for a 1 TB underlying image.
- */
-#define BLKSIZE 65536
-
 /* Initialize the overlay and bitmap. */
 extern int blk_init (void);
 
diff --git a/filters/cow/cow.c b/filters/cow/cow.c
index 6efb39f2..1c62c857 100644
--- a/filters/cow/cow.c
+++ b/filters/cow/cow.c
@@ -40,6 +40,7 @@
 #include <string.h>
 #include <unistd.h>
 #include <errno.h>
+#include <limits.h>
 
 #include <pthread.h>
 
@@ -47,9 +48,11 @@
 
 #include "cleanup.h"
 #include "isaligned.h"
+#include "ispowerof2.h"
 #include "minmax.h"
 #include "rounding.h"
 
+#include "cow.h"
 #include "blk.h"
 
 /* Read-modify-write requests are serialized through this global lock.
@@ -58,6 +61,8 @@
  */
 static pthread_mutex_t rmw_lock = PTHREAD_MUTEX_INITIALIZER;
 
+unsigned blksize = 65536;       /* block size */
+
 static bool cow_on_cache;
 
 /* Cache on read ("cow-on-read") mode. */
@@ -69,13 +74,6 @@ extern enum cor_mode {
 enum cor_mode cor_mode = COR_OFF;
 const char *cor_path;
 
-static void
-cow_load (void)
-{
-  if (blk_init () == -1)
-    exit (EXIT_FAILURE);
-}
-
 static void
 cow_unload (void)
 {
@@ -86,7 +84,19 @@ static int
 cow_config (nbdkit_next_config *next, nbdkit_backend *nxdata,
             const char *key, const char *value)
 {
-  if (strcmp (key, "cow-on-cache") == 0) {
+  if (strcmp (key, "cow-block-size") == 0) {
+    int64_t r = nbdkit_parse_size (value);
+    if (r == -1)
+      return -1;
+    if (r <= 4096 || r > UINT_MAX || !is_power_of_2 (r)) {
+      nbdkit_error ("cow-block-size is out of range (4096..2G) "
+                    "or not a power of 2");
+      return -1;
+    }
+    blksize = r;
+    return 0;
+  }
+  else if (strcmp (key, "cow-on-cache") == 0) {
     int r;
 
     r = nbdkit_parse_bool (value);
@@ -114,9 +124,19 @@ cow_config (nbdkit_next_config *next, nbdkit_backend *nxdata,
 }
 
 #define cow_config_help \
+  "cow-block-size=<N>       Set COW block size.\n" \
   "cow-on-cache=<BOOL>      Copy cache (prefetch) requests to the overlay.\n" \
   "cow-on-read=<BOOL>|/PATH Copy read requests to the overlay."
 
+static int
+cow_get_ready (int thread_model)
+{
+  if (blk_init () == -1)
+    return -1;
+
+  return 0;
+}
+
 /* Decide if cow-on-read is currently on or off. */
 bool
 cow_on_read (void)
@@ -249,8 +269,8 @@ cow_pread (nbdkit_next *next,
   uint64_t blknum, blkoffs, nrblocks;
   int r;
 
-  if (!IS_ALIGNED (count | offset, BLKSIZE)) {
-    block = malloc (BLKSIZE);
+  if (!IS_ALIGNED (count | offset, blksize)) {
+    block = malloc (blksize);
     if (block == NULL) {
       *err = errno;
       nbdkit_error ("malloc: %m");
@@ -258,12 +278,12 @@ cow_pread (nbdkit_next *next,
     }
   }
 
-  blknum = offset / BLKSIZE;  /* block number */
-  blkoffs = offset % BLKSIZE; /* offset within the block */
+  blknum = offset / blksize;  /* block number */
+  blkoffs = offset % blksize; /* offset within the block */
 
   /* Unaligned head */
   if (blkoffs) {
-    uint64_t n = MIN (BLKSIZE - blkoffs, count);
+    uint64_t n = MIN (blksize - blkoffs, count);
 
     assert (block);
     r = blk_read (next, blknum, block, cow_on_read (), err);
@@ -279,15 +299,15 @@ cow_pread (nbdkit_next *next,
   }
 
   /* Aligned body */
-  nrblocks = count / BLKSIZE;
+  nrblocks = count / blksize;
   if (nrblocks > 0) {
     r = blk_read_multiple (next, blknum, nrblocks, buf, cow_on_read (), err);
     if (r == -1)
       return -1;
 
-    buf += nrblocks * BLKSIZE;
-    count -= nrblocks * BLKSIZE;
-    offset += nrblocks * BLKSIZE;
+    buf += nrblocks * blksize;
+    count -= nrblocks * blksize;
+    offset += nrblocks * blksize;
     blknum += nrblocks;
   }
 
@@ -314,8 +334,8 @@ cow_pwrite (nbdkit_next *next,
   uint64_t blknum, blkoffs;
   int r;
 
-  if (!IS_ALIGNED (count | offset, BLKSIZE)) {
-    block = malloc (BLKSIZE);
+  if (!IS_ALIGNED (count | offset, blksize)) {
+    block = malloc (blksize);
     if (block == NULL) {
       *err = errno;
       nbdkit_error ("malloc: %m");
@@ -323,12 +343,12 @@ cow_pwrite (nbdkit_next *next,
     }
   }
 
-  blknum = offset / BLKSIZE;  /* block number */
-  blkoffs = offset % BLKSIZE; /* offset within the block */
+  blknum = offset / blksize;  /* block number */
+  blkoffs = offset % blksize; /* offset within the block */
 
   /* Unaligned head */
   if (blkoffs) {
-    uint64_t n = MIN (BLKSIZE - blkoffs, count);
+    uint64_t n = MIN (blksize - blkoffs, count);
 
     /* Do a read-modify-write operation on the current block.
      * Hold the rmw_lock over the whole operation.
@@ -350,14 +370,14 @@ cow_pwrite (nbdkit_next *next,
   }
 
   /* Aligned body */
-  while (count >= BLKSIZE) {
+  while (count >= blksize) {
     r = blk_write (blknum, buf, err);
     if (r == -1)
       return -1;
 
-    buf += BLKSIZE;
-    count -= BLKSIZE;
-    offset += BLKSIZE;
+    buf += blksize;
+    count -= blksize;
+    offset += blksize;
     blknum++;
   }
 
@@ -397,19 +417,19 @@ cow_zero (nbdkit_next *next,
     return -1;
   }
 
-  block = malloc (BLKSIZE);
+  block = malloc (blksize);
   if (block == NULL) {
     *err = errno;
     nbdkit_error ("malloc: %m");
     return -1;
   }
 
-  blknum = offset / BLKSIZE;  /* block number */
-  blkoffs = offset % BLKSIZE; /* offset within the block */
+  blknum = offset / blksize;  /* block number */
+  blkoffs = offset % blksize; /* offset within the block */
 
   /* Unaligned head */
   if (blkoffs) {
-    uint64_t n = MIN (BLKSIZE - blkoffs, count);
+    uint64_t n = MIN (blksize - blkoffs, count);
 
     /* Do a read-modify-write operation on the current block.
      * Hold the rmw_lock over the whole operation.
@@ -429,9 +449,9 @@ cow_zero (nbdkit_next *next,
   }
 
   /* Aligned body */
-  if (count >= BLKSIZE)
-    memset (block, 0, BLKSIZE);
-  while (count >= BLKSIZE) {
+  if (count >= blksize)
+    memset (block, 0, blksize);
+  while (count >= blksize) {
     /* XXX There is the possibility of optimizing this: since this loop is
      * writing a whole, aligned block, we should use FALLOC_FL_ZERO_RANGE.
      */
@@ -439,8 +459,8 @@ cow_zero (nbdkit_next *next,
     if (r == -1)
       return -1;
 
-    count -= BLKSIZE;
-    offset += BLKSIZE;
+    count -= blksize;
+    offset += blksize;
     blknum++;
   }
 
@@ -471,8 +491,8 @@ cow_trim (nbdkit_next *next,
   uint64_t blknum, blkoffs;
   int r;
 
-  if (!IS_ALIGNED (count | offset, BLKSIZE)) {
-    block = malloc (BLKSIZE);
+  if (!IS_ALIGNED (count | offset, blksize)) {
+    block = malloc (blksize);
     if (block == NULL) {
       *err = errno;
       nbdkit_error ("malloc: %m");
@@ -480,12 +500,12 @@ cow_trim (nbdkit_next *next,
     }
   }
 
-  blknum = offset / BLKSIZE;  /* block number */
-  blkoffs = offset % BLKSIZE; /* offset within the block */
+  blknum = offset / blksize;  /* block number */
+  blkoffs = offset % blksize; /* offset within the block */
 
   /* Unaligned head */
   if (blkoffs) {
-    uint64_t n = MIN (BLKSIZE - blkoffs, count);
+    uint64_t n = MIN (blksize - blkoffs, count);
 
     /* Do a read-modify-write operation on the current block.
      * Hold the lock over the whole operation.
@@ -505,13 +525,13 @@ cow_trim (nbdkit_next *next,
   }
 
   /* Aligned body */
-  while (count >= BLKSIZE) {
+  while (count >= blksize) {
     r = blk_trim (blknum, err);
     if (r == -1)
       return -1;
 
-    count -= BLKSIZE;
-    offset += BLKSIZE;
+    count -= blksize;
+    offset += blksize;
     blknum++;
   }
 
@@ -568,22 +588,22 @@ cow_cache (nbdkit_next *next,
     mode = BLK_CACHE_COW;
 
   assert (!flags);
-  block = malloc (BLKSIZE);
+  block = malloc (blksize);
   if (block == NULL) {
     *err = errno;
     nbdkit_error ("malloc: %m");
     return -1;
   }
 
-  blknum = offset / BLKSIZE;  /* block number */
-  blkoffs = offset % BLKSIZE; /* offset within the block */
+  blknum = offset / blksize;  /* block number */
+  blkoffs = offset % blksize; /* offset within the block */
 
   /* Unaligned head */
   remaining += blkoffs;
   offset -= blkoffs;
 
   /* Unaligned tail */
-  remaining = ROUND_UP (remaining, BLKSIZE);
+  remaining = ROUND_UP (remaining, blksize);
 
   /* Aligned body */
   while (remaining) {
@@ -591,8 +611,8 @@ cow_cache (nbdkit_next *next,
     if (r == -1)
       return -1;
 
-    remaining -= BLKSIZE;
-    offset += BLKSIZE;
+    remaining -= blksize;
+    offset += blksize;
     blknum++;
   }
 
@@ -616,13 +636,13 @@ cow_extents (nbdkit_next *next,
    * value so rounding up is safe here.
    */
   end = offset + count;
-  offset = ROUND_DOWN (offset, BLKSIZE);
-  end = ROUND_UP (end, BLKSIZE);
+  offset = ROUND_DOWN (offset, blksize);
+  end = ROUND_UP (end, blksize);
   count = end - offset;
-  blknum = offset / BLKSIZE;
+  blknum = offset / blksize;
 
-  assert (IS_ALIGNED (offset, BLKSIZE));
-  assert (IS_ALIGNED (count, BLKSIZE));
+  assert (IS_ALIGNED (offset, blksize));
+  assert (IS_ALIGNED (count, blksize));
   assert (count > 0);           /* We must make forward progress. */
 
   while (count > 0) {
@@ -634,7 +654,7 @@ cow_extents (nbdkit_next *next,
     /* Present in the overlay. */
     if (present) {
       e.offset = offset;
-      e.length = BLKSIZE;
+      e.length = blksize;
 
       if (trimmed)
         e.type = NBDKIT_EXTENT_HOLE|NBDKIT_EXTENT_ZERO;
@@ -647,8 +667,8 @@ cow_extents (nbdkit_next *next,
       }
 
       blknum++;
-      offset += BLKSIZE;
-      count -= BLKSIZE;
+      offset += blksize;
+      count -= blksize;
     }
 
     /* Not present in the overlay, but we can ask the plugin. */
@@ -667,12 +687,12 @@ cow_extents (nbdkit_next *next,
          * (range_count), but count is a 64 bit quantity, so don't
          * overflow range_count here.
          */
-        if (range_count >= UINT32_MAX - BLKSIZE + 1) break;
+        if (range_count >= UINT32_MAX - blksize + 1) break;
 
         blknum++;
-        offset += BLKSIZE;
-        count -= BLKSIZE;
-        range_count += BLKSIZE;
+        offset += blksize;
+        count -= blksize;
+        range_count += blksize;
 
         if (count == 0) break;
         blk_status (blknum, &present, &trimmed);
@@ -706,7 +726,7 @@ cow_extents (nbdkit_next *next,
     /* Otherwise assume the block is non-sparse. */
     else {
       e.offset = offset;
-      e.length = BLKSIZE;
+      e.length = blksize;
       e.type = 0;
 
       if (nbdkit_add_extent (extents, e.offset, e.length, e.type) == -1) {
@@ -715,8 +735,8 @@ cow_extents (nbdkit_next *next,
       }
 
       blknum++;
-      offset += BLKSIZE;
-      count -= BLKSIZE;
+      offset += blksize;
+      count -= blksize;
     }
 
     /* If the caller only wanted the first extent, and we've managed
@@ -734,11 +754,11 @@ cow_extents (nbdkit_next *next,
 static struct nbdkit_filter filter = {
   .name              = "cow",
   .longname          = "nbdkit copy-on-write (COW) filter",
-  .load              = cow_load,
   .unload            = cow_unload,
   .open              = cow_open,
   .config            = cow_config,
   .config_help       = cow_config_help,
+  .get_ready         = cow_get_ready,
   .prepare           = cow_prepare,
   .get_size          = cow_get_size,
   .can_write         = cow_can_write,
diff --git a/filters/cow/cow.h b/filters/cow/cow.h
new file mode 100644
index 00000000..d46dbe91
--- /dev/null
+++ b/filters/cow/cow.h
@@ -0,0 +1,39 @@
+/* nbdkit
+ * Copyright (C) 2018-2021 Red Hat Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * * Neither the name of Red Hat nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef NBDKIT_COW_H
+#define NBDKIT_COW_H
+
+/* Size of a block in the cache. */
+extern unsigned blksize;
+
+#endif /* NBDKIT_COW_H */
diff --git a/filters/cow/nbdkit-cow-filter.pod b/filters/cow/nbdkit-cow-filter.pod
index 7f861140..997c9097 100644
--- a/filters/cow/nbdkit-cow-filter.pod
+++ b/filters/cow/nbdkit-cow-filter.pod
@@ -5,6 +5,7 @@ nbdkit-cow-filter - nbdkit copy-on-write (COW) filter
 =head1 SYNOPSIS
 
  nbdkit --filter=cow plugin [plugin-args...]
+                            [cow-block-size=N]
                             [cow-on-cache=false|true]
                             [cow-on-read=false|true|/PATH]
 
@@ -42,6 +43,10 @@ serve the same data to each client.
 
 =over 4
 
+=item B<cow-block-size=>N
+
+Set the block size used by the filter.  The default is 64K.
+
 =item B<cow-on-cache=false>
 
 Do not save data from cache (prefetch) requests in the overlay.  This
diff --git a/tests/Makefile.am b/tests/Makefile.am
index e61c5829..d93f848f 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -1404,6 +1404,7 @@ EXTRA_DIST += \
 if HAVE_MKE2FS_WITH_D
 TESTS += \
 	test-cow.sh \
+	test-cow-block-size.sh \
 	test-cow-extents1.sh \
 	test-cow-extents2.sh \
 	test-cow-extents-large.sh \
@@ -1415,6 +1416,7 @@ endif
 TESTS += test-cow-null.sh
 EXTRA_DIST += \
 	test-cow.sh \
+	test-cow-block-size.sh \
 	test-cow-extents1.sh \
 	test-cow-extents2.sh \
 	test-cow-extents-large.sh \
diff --git a/tests/test-cow-block-size.sh b/tests/test-cow-block-size.sh
new file mode 100755
index 00000000..6de1c068
--- /dev/null
+++ b/tests/test-cow-block-size.sh
@@ -0,0 +1,72 @@
+#!/usr/bin/env bash
+# nbdkit
+# Copyright (C) 2018-2021 Red Hat Inc.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# * Neither the name of Red Hat nor the names of its contributors may be
+# used to endorse or promote products derived from this software without
+# specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+
+source ./functions.sh
+set -e
+set -x
+
+requires_plugin linuxdisk
+requires guestfish --version
+requires nbdcopy --version
+requires qemu-img --version
+
+sock=$(mktemp -u /tmp/nbdkit-test-sock.XXXXXX)
+files="cow-block-size-base.img $sock cow-block-size.pid"
+rm -f $files
+cleanup_fn rm -f $files
+
+# Create a base image which is partitioned with an empty filesystem.
+rm -rf cow-block-size.d
+mkdir cow-block-size.d
+cleanup_fn rm -rf cow-block-size.d
+nbdkit -fv -U - linuxdisk cow-block-size.d size=100M \
+       --run 'nbdcopy "$uri" cow-block-size-base.img'
+lastmod="$(stat -c "%y" cow-block-size-base.img)"
+
+# Run nbdkit with a COW overlay, 4M block size and copy on read.
+start_nbdkit -P cow-block-size.pid -U $sock \
+             --filter=cow file cow-block-size-base.img \
+             cow-block-size=4M cow-on-read=true
+
+# Write some data into the overlay.
+guestfish --format=raw -a "nbd://?socket=$sock" -m /dev/sda1 <<EOF
+  fill-pattern "abcde" 128K /large
+  write /hello "hello, world"
+EOF
+
+# The original file must not be modified.
+currmod="$(stat -c "%y" cow-block-size-base.img)"
+
+if [ "$lastmod" != "$currmod" ]; then
+    echo "$0: FAILED last modified time of base file changed"
+    exit 1
+fi
-- 
2.31.1