Blob Blame History Raw
From 8bfe6512d07caf778fd001425435b048c45513eb Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Sat, 14 May 2022 13:46:56 +0100
Subject: [PATCH] New filter: scan

This filter will simply scan across the disk issuing a series of cache
requests to the underlying plugin.  It is similar in scope and usage
to the new nbdkit-readahead-filter.

(cherry picked from commit 65c20a09ceacb4431986a2982f2c2e746df63fcb)
---
 TODO                                          |   8 -
 configure.ac                                  |   2 +
 filters/cache/nbdkit-cache-filter.pod         |   4 +-
 .../nbdkit-cacheextents-filter.pod            |   1 +
 filters/readahead/nbdkit-readahead-filter.pod |   5 +
 filters/scan/Makefile.am                      |  72 +++++
 filters/scan/bgthread.c                       | 131 ++++++++
 filters/scan/nbdkit-scan-filter.pod           | 159 ++++++++++
 filters/scan/scan.c                           | 280 ++++++++++++++++++
 filters/scan/scan.h                           |  64 ++++
 plugins/ssh/nbdkit-ssh-plugin.pod             |   1 +
 plugins/torrent/nbdkit-torrent-plugin.pod     |   1 +
 plugins/vddk/nbdkit-vddk-plugin.pod           |   1 +
 tests/Makefile.am                             |  10 +
 tests/test-scan-copy.sh                       |  42 +++
 tests/test-scan-info.sh                       |  46 +++
 16 files changed, 817 insertions(+), 10 deletions(-)
 create mode 100644 filters/scan/Makefile.am
 create mode 100644 filters/scan/bgthread.c
 create mode 100644 filters/scan/nbdkit-scan-filter.pod
 create mode 100644 filters/scan/scan.c
 create mode 100644 filters/scan/scan.h
 create mode 100755 tests/test-scan-copy.sh
 create mode 100755 tests/test-scan-info.sh

diff --git a/TODO b/TODO
index 0f5dc41d..8600d9e4 100644
--- a/TODO
+++ b/TODO
@@ -182,14 +182,6 @@ Python:
 Suggestions for filters
 -----------------------
 
-* Add scan filter.  This would be placed on top of cache filters and
-  would scan (read) the whole disk in the background, ensuring it is
-  copied into the cache.  Useful if you have a slow plugin, limited
-  size device, and lots of local disk space, especially if you know
-  that the NBD clients will eventually read all of the device.  RWMJ
-  wrote an implementation of this but it doesn't work well without a
-  background thread.
-
 * Add shared filter.  Take advantage of filter context APIs to open a
   single context into the backend shared among multiple client
   connections.  This may even allow a filter to offer a more parallel
diff --git a/configure.ac b/configure.ac
index 1d209f67..466dbd9b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -142,6 +142,7 @@ filters="\
         readahead \
         retry \
         retry-request \
+        scan \
         stats \
         swab \
         tar \
@@ -1403,6 +1404,7 @@ AC_CONFIG_FILES([Makefile
                  filters/readahead/Makefile
                  filters/retry/Makefile
                  filters/retry-request/Makefile
+                 filters/scan/Makefile
                  filters/stats/Makefile
                  filters/swab/Makefile
                  filters/tar/Makefile
diff --git a/filters/cache/nbdkit-cache-filter.pod b/filters/cache/nbdkit-cache-filter.pod
index f4234e1a..935804b5 100644
--- a/filters/cache/nbdkit-cache-filter.pod
+++ b/filters/cache/nbdkit-cache-filter.pod
@@ -28,8 +28,8 @@ loss, as the name suggests).
 
 This filter only caches image contents.  To cache image metadata, use
 L<nbdkit-cacheextents-filter(1)> between this filter and the plugin.
-To accelerate sequential reads, use L<nbdkit-readahead-filter(1)> on
-top of this filter.
+To accelerate sequential reads, use L<nbdkit-readahead-filter(1)> or
+L<nbdkit-scan-filter(1)> on top of this filter.
 
 =head1 PARAMETERS
 
diff --git a/filters/cacheextents/nbdkit-cacheextents-filter.pod b/filters/cacheextents/nbdkit-cacheextents-filter.pod
index bb2514a4..6464eac2 100644
--- a/filters/cacheextents/nbdkit-cacheextents-filter.pod
+++ b/filters/cacheextents/nbdkit-cacheextents-filter.pod
@@ -54,6 +54,7 @@ L<nbdkit(1)>,
 L<nbdkit-cache-filter(1)>,
 L<nbdkit-extentlist-filter(1)>,
 L<nbdkit-readahead-filter(1)>,
+L<nbdkit-scan-filter(1)>,
 L<nbdkit-vddk-plugin(1)>,
 L<nbdkit-filter(3)>,
 L<qemu-img(1)>.
diff --git a/filters/readahead/nbdkit-readahead-filter.pod b/filters/readahead/nbdkit-readahead-filter.pod
index 630e5924..99d64dfb 100644
--- a/filters/readahead/nbdkit-readahead-filter.pod
+++ b/filters/readahead/nbdkit-readahead-filter.pod
@@ -27,6 +27,10 @@ option.
 The filter uses a simple adaptive algorithm which accelerates
 sequential reads and requires no further configuration.
 
+A similar filter is L<nbdkit-scan-filter(1)> which reads ahead over
+the whole disk, useful if you know that the client will be reading
+sequentially across most or all of the disk.
+
 =head2 Limitations
 
 In a number of significant cases this filter will do nothing.  The
@@ -91,6 +95,7 @@ L<nbdkit-cache-filter(1)>,
 L<nbdkit-cow-filter(1)>,
 L<nbdkit-file-plugin(1)>,
 L<nbdkit-retry-filter(1)>,
+L<nbdkit-scan-filter(1)>,
 L<nbdkit-torrent-plugin(1)>,
 L<nbdkit-vddk-plugin(1)>,
 L<nbdkit-filter(3)>,
diff --git a/filters/scan/Makefile.am b/filters/scan/Makefile.am
new file mode 100644
index 00000000..d4aabfc6
--- /dev/null
+++ b/filters/scan/Makefile.am
@@ -0,0 +1,72 @@
+# nbdkit
+# Copyright (C) 2019-2021 Red Hat Inc.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# * Neither the name of Red Hat nor the names of its contributors may be
+# used to endorse or promote products derived from this software without
+# specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+
+include $(top_srcdir)/common-rules.mk
+
+EXTRA_DIST = nbdkit-scan-filter.pod
+
+filter_LTLIBRARIES = nbdkit-scan-filter.la
+
+nbdkit_scan_filter_la_SOURCES = \
+	scan.c \
+	scan.h \
+	bgthread.c \
+	$(top_srcdir)/include/nbdkit-filter.h \
+	$(NULL)
+
+nbdkit_scan_filter_la_CPPFLAGS = \
+	-I$(top_srcdir)/include \
+	-I$(top_srcdir)/common/include \
+	-I$(top_srcdir)/common/utils \
+	$(NULL)
+nbdkit_scan_filter_la_CFLAGS = $(WARNINGS_CFLAGS)
+nbdkit_scan_filter_la_LDFLAGS = \
+	-module -avoid-version -shared $(NO_UNDEFINED_ON_WINDOWS) \
+	-Wl,--version-script=$(top_srcdir)/filters/filters.syms \
+	$(NULL)
+nbdkit_scan_filter_la_LIBADD = \
+	$(top_builddir)/common/utils/libutils.la \
+	$(top_builddir)/common/replacements/libcompat.la \
+	$(IMPORT_LIBRARY_ON_WINDOWS) \
+	$(NULL)
+
+if HAVE_POD
+
+man_MANS = nbdkit-scan-filter.1
+CLEANFILES += $(man_MANS)
+
+nbdkit-scan-filter.1: nbdkit-scan-filter.pod \
+		$(top_builddir)/podwrapper.pl
+	$(PODWRAPPER) --section=1 --man $@ \
+	    --html $(top_builddir)/html/$@.html \
+	    $<
+
+endif HAVE_POD
diff --git a/filters/scan/bgthread.c b/filters/scan/bgthread.c
new file mode 100644
index 00000000..384e79b6
--- /dev/null
+++ b/filters/scan/bgthread.c
@@ -0,0 +1,131 @@
+/* nbdkit
+ * Copyright (C) 2019-2022 Red Hat Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * * Neither the name of Red Hat nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <config.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <pthread.h>
+
+#include <nbdkit-filter.h>
+
+#include "scan.h"
+
+#include "cleanup.h"
+#include "minmax.h"
+
+static pthread_mutex_t clock_lock;
+static uint64_t clock_ = 0;
+
+static void
+adjust_clock (uint64_t offset)
+{
+  ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&clock_lock);
+  if (clock_ < offset)
+    clock_ = offset;
+}
+
+static void
+reset_clock (uint64_t offset)
+{
+  ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&clock_lock);
+  clock_ = 0;
+}
+
+static uint64_t
+get_starting_offset (void)
+{
+  ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&clock_lock);
+  return scan_clock ? clock_ : 0;
+}
+
+void *
+scan_thread (void *vp)
+{
+  struct bgthread_ctrl *ctrl = vp;
+  uint64_t offset, size;
+  int64_t r;
+
+  assert (ctrl->next != NULL);
+
+  /* Get the size of the underlying plugin.  Exit the thread on error
+   * because there's not much we can do without knowing the size.
+   */
+  r = ctrl->next->get_size (ctrl->next);
+  if (r == -1)
+    return NULL;
+  size = r;
+
+  /* Start scanning. */
+ start:
+  for (offset = get_starting_offset (); offset < size; offset += scan_size) {
+    uint64_t n;
+
+    /* Execute any commands in the queue. */
+    {
+      ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&ctrl->lock);
+      struct command cmd;
+
+      while (ctrl->cmds.len) {
+        cmd = ctrl->cmds.ptr[0];
+        command_queue_remove (&ctrl->cmds, 0);
+
+        switch (cmd.type) {
+        case CMD_QUIT:
+          nbdkit_debug ("scan: exiting background thread on connection close");
+          return NULL;
+
+        case CMD_NOTIFY_PREAD:
+          if (offset < cmd.offset)
+            offset = cmd.offset;
+        }
+      }
+    }
+
+    adjust_clock (offset);
+    if (offset > size)
+      continue;
+
+    /* Issue the next prefetch. */
+    n = MIN (scan_size, size - offset);
+    ctrl->next->cache (ctrl->next, n, offset, 0, NULL);
+  }
+
+  if (scan_forever) {
+    reset_clock (offset);
+    goto start;
+  }
+
+  nbdkit_debug ("scan: finished scanning the plugin");
+  return NULL;
+}
diff --git a/filters/scan/nbdkit-scan-filter.pod b/filters/scan/nbdkit-scan-filter.pod
new file mode 100644
index 00000000..4a8d0ef9
--- /dev/null
+++ b/filters/scan/nbdkit-scan-filter.pod
@@ -0,0 +1,159 @@
+=head1 NAME
+
+nbdkit-scan-filter - scan disk prefetching data ahead of sequential reads
+
+=head1 SYNOPSIS
+
+ nbdkit --filter=scan PLUGIN [scan-ahead=false] [scan-clock=false]
+                             [scan-forever=true] [scan-size=]NN
+
+ nbdkit --filter=scan --filter=cache PLUGIN
+
+ nbdkit --filter=scan --filter=cow PLUGIN cow-on-cache=true
+
+=head1 DESCRIPTION
+
+C<nbdkit-scan-filter> is a filter that scans the disk prefetching
+data.  It is sometimes useful if you expect that the client will read
+the disk sequentially.
+
+The basic operation of the filter is that when a client connects, the
+filter will start issuing C<.cache> (prefetch) requests to the plugin
+across the whole disk.  Plugins which support this command will
+prefetch the data, making subsequent reads faster.  For plugins which
+do not support this command, you can inject L<nbdkit-cache-filter(1)>
+below (after) this filter, giving approximately the same effect.
+L<nbdkit-cow-filter(1)> can be used instead of nbdkit-cache-filter, if
+you add the C<cow-on-cache=true> option.
+
+Various C<scan-*> parameters can be used to tune scanning, although
+the defaults should be suitable in most cases.
+
+A similar filter is L<nbdkit-readahead-filter(1)>.
+
+=head2 Limitations
+
+In a number of significant cases this filter will do nothing.  The
+filter will print a warning message if this happens.
+
+=over 4
+
+=item Thread model must be parallel *
+
+For example L<nbdkit-curl-plugin(1)> only supports
+C<serialize_requests>, and so this filter cannot perform prefetches in
+parallel with the read requests.
+
+=item Only scans while clients are connected *
+
+The current filter only scans while there is at least one client
+connected.
+
+=item Only scans the default export *
+
+The current filter only scans the default export and ignores all
+clients connecting to the non-default export name.
+
+* We may be able to lift these restrictions in future.
+
+=item Underlying filters or plugin must support C<.cache> (prefetch)
+
+Very many plugins do not have the concept of prefetching and/or
+do not implement the C<.cache> callback, and so there is no
+way for this filter to issue prefetches.
+
+You can usually get around this by adding I<--filter=cache> after this
+filter as explained above.
+
+=item Prefetching the whole disk may load it all into cache
+
+In particular if you use this filter together with
+L<nbdkit-cache-filter(1)> or L<nbdkit-cow-filter(1)>, they will cache
+the whole content of the plugin into a temporary file.  This may be
+many gigabytes of data, consuming all space in F</var/tmp>.  Of course
+this is the whole point of using this filter, but you should be aware
+of it.
+
+If using the cache filter, the total size of the cache can be limited
+(see L<nbdkit-cache-filter(1)/CACHE MAXIMUM SIZE>).
+
+=back
+
+=head1 PARAMETERS
+
+=over 4
+
+=item B<scan-ahead=false>
+
+By default the filter tries to stay ahead of incoming read requests.
+That is to say, it starts prefetching at the beginning of the disk and
+continues incrementally, but if the client issues a read beyond the
+current prefetch point then the filter skips forward and begins
+prefetching after the read.
+
+However if you set this parameter to false, then this behaviour is
+disabled.  The filter simply prefetches sequentially regardless of
+client requests.
+
+=item B<scan-clock=false>
+
+By default, if all clients disconnect and then another client
+connects, prefetching resumes at the same place in the disk.  (Like
+stopping and starting a clock.)
+
+If you set this parameter to false, then the filter starts prefetching
+from the beginning of the disk again.
+
+=item B<scan-forever=true>
+
+By default the filter scans over the disk once and then stops.
+
+If you set this parameter to true, then after the disk has been
+prefetched completely, the filter goes back to the beginning and
+starts over, repeating this for as long as nbdkit is running and there
+are clients connected.
+
+=item B<scan-size=>NN
+
+This parameter controls the prefetch block size.  The default is
+C<2M>.  This must be a power of 2 and most plugins will have their own
+limits on the amount of data they can prefetch in a single request.
+
+=back
+
+=head1 FILES
+
+=over 4
+
+=item F<$filterdir/nbdkit-scan-filter.so>
+
+The filter.
+
+Use C<nbdkit --dump-config> to find the location of C<$filterdir>.
+
+=back
+
+=head1 VERSION
+
+C<nbdkit-scan-filter> first appeared in nbdkit 1.32.
+
+=head1 SEE ALSO
+
+L<nbdkit(1)>,
+L<nbdkit-cache-filter(1)>,
+L<nbdkit-cow-filter(1)>,
+L<nbdkit-file-plugin(1)>,
+L<nbdkit-readahead-filter(1)>,
+L<nbdkit-retry-filter(1)>,
+L<nbdkit-torrent-plugin(1)>,
+L<nbdkit-vddk-plugin(1)>,
+L<nbdkit-filter(3)>,
+L<qemu-img(1)>.
+
+=head1 AUTHORS
+
+Richard W.M. Jones
+
+=head1 COPYRIGHT
+
+Copyright (C) 2019-2022 Red Hat Inc.
diff --git a/filters/scan/scan.c b/filters/scan/scan.c
new file mode 100644
index 00000000..ac5b18d2
--- /dev/null
+++ b/filters/scan/scan.c
@@ -0,0 +1,280 @@
+/* nbdkit
+ * Copyright (C) 2019-2022 Red Hat Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * * Neither the name of Red Hat nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <config.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <string.h>
+#include <errno.h>
+#include <pthread.h>
+
+#include <nbdkit-filter.h>
+
+#include "scan.h"
+
+#include "cleanup.h"
+#include "ispowerof2.h"
+#include "vector.h"
+
+static bool scan_ahead = true;
+bool scan_clock = true;
+bool scan_forever = false;
+unsigned scan_size = 2*1024*1024;
+
+static int thread_model = -1; /* Thread model of the underlying plugin. */
+
+/* Per-connection data. */
+struct scan_handle {
+  bool is_default_export;  /* If exportname == "". */
+  bool running;            /* True if background thread is running. */
+  pthread_t thread;        /* The background thread, one per connection. */
+  struct bgthread_ctrl ctrl;
+};
+
+static int
+scan_config (nbdkit_next_config *next, nbdkit_backend *nxdata,
+             const char *key, const char *value)
+{
+  int r;
+
+  if (strcmp (key, "scan-ahead") == 0) {
+    r = nbdkit_parse_bool (value);
+    if (r == -1)
+      return -1;
+    scan_ahead = r;
+    return 0;
+  }
+  else if (strcmp (key, "scan-clock") == 0) {
+    r = nbdkit_parse_bool (value);
+    if (r == -1)
+      return -1;
+    scan_clock = r;
+    return 0;
+  }
+  else if (strcmp (key, "scan-forever") == 0) {
+    r = nbdkit_parse_bool (value);
+    if (r == -1)
+      return -1;
+    scan_forever = r;
+    return 0;
+  }
+  else if (strcmp (key, "scan-size") == 0) {
+    scan_size = nbdkit_parse_size (value);
+    if (scan_size == -1)
+      return -1;
+    return 0;
+  }
+
+  return next (nxdata, key, value);
+}
+
+static int
+scan_config_complete (nbdkit_next_config_complete *next, nbdkit_backend *nxdata)
+{
+  if (scan_size < 512 || scan_size > 32*1024*1024 ||
+      !is_power_of_2 (scan_size)) {
+    nbdkit_error ("scan-size parameter should be [512..32M] "
+                  "and a power of two");
+    return -1;
+  }
+
+  return next (nxdata);
+}
+
+#define scan_config_help \
+  "scan-ahead=false         Skip ahead when client reads faster.\n" \
+  "scan-clock=false         Always start prefetching from beginning.\n" \
+  "scan-forever=true        Scan in a loop while clients connected.\n" \
+  "scan-size=NN             Set scan block size."
+
+/* We need to hook into .get_ready() so we can read the final thread
+ * model (of the whole server).
+ */
+static int
+scan_get_ready (int final_thread_model)
+{
+  thread_model = final_thread_model;
+  return 0;
+}
+
+static int
+send_command_to_background_thread (struct bgthread_ctrl *ctrl,
+                                   const struct command cmd)
+{
+  ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&ctrl->lock);
+  if (command_queue_append (&ctrl->cmds, cmd) == -1)
+    return -1;
+  /* Signal the thread if it could be sleeping on an empty queue. */
+  if (ctrl->cmds.len == 1)
+    pthread_cond_signal (&ctrl->cond);
+  return 0;
+}
+
+static void *
+scan_open (nbdkit_next_open *next, nbdkit_context *nxdata,
+           int readonly, const char *exportname, int is_tls)
+{
+  struct scan_handle *h;
+
+  if (next (nxdata, readonly, exportname) == -1)
+    return NULL;
+
+  h = calloc (1, sizeof *h);
+  if (h == NULL) {
+    nbdkit_error ("malloc: %m");
+    return NULL;
+  }
+
+  h->is_default_export = strcmp (exportname, "") == 0;
+  return h;
+}
+
+/* In prepare we check if it's possible to support the scan filter on
+ * this connection (or print a warning), and start the background
+ * thread.
+ */
+static int
+scan_prepare (nbdkit_next *next, void *handle, int readonly)
+{
+  struct scan_handle *h = handle;
+  int r, err;
+
+  if (!h->is_default_export) {
+    nbdkit_error ("scan: warning: not the default export, not scanning");
+    return 0;
+  }
+
+  if (thread_model != NBDKIT_THREAD_MODEL_PARALLEL) {
+    nbdkit_error ("scan: warning: underlying plugin does not support "
+                  "the PARALLEL thread model, not scanning");
+    return 0;
+  }
+
+  /* Call next->can_cache to read the underlying 'can_cache'. */
+  r = next->can_cache (next);
+  if (r == -1)
+    return -1;
+  if (r != NBDKIT_CACHE_NATIVE) {
+    nbdkit_error ("scan: warning: underlying plugin does not support "
+                  "NBD_CMD_CACHE, not scanning; try adding --filter=cache "
+                  "after this filter");
+    return 0;
+  }
+
+  /* Save the connection in the handle, for the background thread to use. */
+  h->ctrl.next = next;
+
+  /* Create the background thread. */
+  h->ctrl.cmds = (command_queue) empty_vector;
+  pthread_mutex_init (&h->ctrl.lock, NULL);
+  pthread_cond_init (&h->ctrl.cond, NULL);
+
+  err = pthread_create (&h->thread, NULL, scan_thread, &h->ctrl);
+  if (err != 0) {
+    errno = err;
+    nbdkit_error ("pthread_create: %m");
+    pthread_cond_destroy (&h->ctrl.cond);
+    pthread_mutex_destroy (&h->ctrl.lock);
+    return -1;
+  }
+
+  h->running = true;
+
+  return 0;
+}
+
+/* Finalize cleans up the thread if it is running. */
+static int
+scan_finalize (nbdkit_next *next, void *handle)
+{
+  struct scan_handle *h = handle;
+  const struct command quit_cmd = { .type = CMD_QUIT };
+
+  if (!h->running)
+    return 0;
+
+  send_command_to_background_thread (&h->ctrl, quit_cmd);
+  pthread_join (h->thread, NULL);
+  pthread_cond_destroy (&h->ctrl.cond);
+  pthread_mutex_destroy (&h->ctrl.lock);
+  command_queue_reset (&h->ctrl.cmds);
+  h->running = false;
+
+  return 0;
+}
+
+static void
+scan_close (void *handle)
+{
+  struct scan_handle *h = handle;
+
+  free (h);
+}
+
+/* Read data. */
+static int
+scan_pread (nbdkit_next *next,
+            void *handle, void *buf, uint32_t count, uint64_t offset,
+            uint32_t flags, int *err)
+{
+  struct scan_handle *h = handle;
+
+  if (scan_ahead && h->running) {
+    const struct command cmd =
+      { .type = CMD_NOTIFY_PREAD, .offset = offset + count };
+
+    if (send_command_to_background_thread (&h->ctrl, cmd) == -1)
+      return -1;
+  }
+
+  /* Issue the normal read. */
+  return next->pread (next, buf, count, offset, flags, err);
+}
+
+static struct nbdkit_filter filter = {
+  .name              = "scan",
+  .longname          = "nbdkit scan filter",
+  .get_ready         = scan_get_ready,
+  .config            = scan_config,
+  .config_complete   = scan_config_complete,
+  .config_help       = scan_config_help,
+  .open              = scan_open,
+  .prepare           = scan_prepare,
+  .finalize          = scan_finalize,
+  .close             = scan_close,
+  .pread             = scan_pread,
+};
+
+NBDKIT_REGISTER_FILTER(filter)
diff --git a/filters/scan/scan.h b/filters/scan/scan.h
new file mode 100644
index 00000000..7ff39310
--- /dev/null
+++ b/filters/scan/scan.h
@@ -0,0 +1,64 @@
+/* nbdkit
+ * Copyright (C) 2019-2022 Red Hat Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * * Neither the name of Red Hat nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef NBDKIT_SCAN_H
+#define NBDKIT_SCAN_H
+
+#include <stdbool.h>
+#include <pthread.h>
+
+#include <nbdkit-filter.h>
+
+#include "vector.h"
+
+extern bool scan_clock;
+extern bool scan_forever;
+extern unsigned scan_size;
+
+/* List of commands issued to the background thread. */
+struct command {
+  enum { CMD_QUIT, CMD_NOTIFY_PREAD } type;
+  uint64_t offset;
+};
+DEFINE_VECTOR_TYPE(command_queue, struct command);
+
+struct bgthread_ctrl {
+  command_queue cmds;           /* Command queue. */
+  pthread_mutex_t lock;         /* Lock for queue. */
+  pthread_cond_t cond;          /* Condition queue size 0 -> 1. */
+  nbdkit_next *next;            /* For sending cache operations. */
+};
+
+/* Start background thread (one per connection). */
+extern void *scan_thread (void *vp);
+
+#endif /* NBDKIT_SCAN_H */
diff --git a/plugins/ssh/nbdkit-ssh-plugin.pod b/plugins/ssh/nbdkit-ssh-plugin.pod
index 2bc2c4a7..214957d6 100644
--- a/plugins/ssh/nbdkit-ssh-plugin.pod
+++ b/plugins/ssh/nbdkit-ssh-plugin.pod
@@ -349,6 +349,7 @@ L<nbdkit-curl-plugin(1)>,
 L<nbdkit-extentlist-filter(1)>,
 L<nbdkit-readahead-filter(1)>,
 L<nbdkit-retry-filter(1)>,
+L<nbdkit-scan-filter(1)>,
 L<nbdkit-plugin(3)>,
 L<ssh(1)>,
 L<ssh-agent(1)>,
diff --git a/plugins/torrent/nbdkit-torrent-plugin.pod b/plugins/torrent/nbdkit-torrent-plugin.pod
index 196ce4e9..f09ac3d2 100644
--- a/plugins/torrent/nbdkit-torrent-plugin.pod
+++ b/plugins/torrent/nbdkit-torrent-plugin.pod
@@ -175,6 +175,7 @@ L<nbdkit-curl-plugin(1)>,
 L<nbdkit-file-plugin(1)>,
 L<nbdkit-iso-plugin(1)>,
 L<nbdkit-readahead-filter(1)>,
+L<nbdkit-scan-filter(1)>,
 L<transmission-show(1)>,
 L<https://en.wikipedia.org/wiki/BitTorrent>,
 L<http://libtorrent.org/>.
diff --git a/plugins/vddk/nbdkit-vddk-plugin.pod b/plugins/vddk/nbdkit-vddk-plugin.pod
index ea5899dc..3991e86b 100644
--- a/plugins/vddk/nbdkit-vddk-plugin.pod
+++ b/plugins/vddk/nbdkit-vddk-plugin.pod
@@ -733,6 +733,7 @@ L<nbdkit-plugin(3)>,
 L<nbdkit-blocksize-filter(1)>,
 L<nbdkit-readahead-filter(1)>,
 L<nbdkit-retry-filter(1)>,
+L<nbdkit-scan-filter(1)>,
 L<virsh(1)>,
 L<https://libvirt.org/drvesx.html>,
 L<https://www.vmware.com/support/developer/vddk/>,
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 5585b3b7..799aa6c2 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -1754,6 +1754,16 @@ test_retry_request_mirror_LDADD = \
 	$(LIBNBD_LIBS) \
 	$(NULL)
 
+# scan filter test.
+TESTS += \
+	test-scan-copy.sh \
+	test-scan-info.sh \
+	$(NULL)
+EXTRA_DIST += \
+	test-scan-copy.sh \
+	test-scan-info.sh \
+	$(NULL)
+
 # swab filter test.
 TESTS += \
 	test-swab-8.sh \
diff --git a/tests/test-scan-copy.sh b/tests/test-scan-copy.sh
new file mode 100755
index 00000000..227ad7b2
--- /dev/null
+++ b/tests/test-scan-copy.sh
@@ -0,0 +1,42 @@
+#!/usr/bin/env bash
+# nbdkit
+# Copyright (C) 2018-2022 Red Hat Inc.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# * Neither the name of Red Hat nor the names of its contributors may be
+# used to endorse or promote products derived from this software without
+# specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+
+source ./functions.sh
+set -e
+set -x
+
+requires nbdcopy --version
+requires_plugin sparse-random
+requires_filter scan
+
+nbdkit -fv -U - sparse-random 1M --filter=scan --run 'nbdcopy "$uri" "$uri"'
+nbdkit -fv -U - sparse-random 1G --filter=scan --run 'nbdcopy "$uri" "$uri"'
diff --git a/tests/test-scan-info.sh b/tests/test-scan-info.sh
new file mode 100755
index 00000000..6b109ca8
--- /dev/null
+++ b/tests/test-scan-info.sh
@@ -0,0 +1,46 @@
+#!/usr/bin/env bash
+# nbdkit
+# Copyright (C) 2018-2022 Red Hat Inc.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# * Neither the name of Red Hat nor the names of its contributors may be
+# used to endorse or promote products derived from this software without
+# specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+
+source ./functions.sh
+set -e
+set -x
+
+requires nbdinfo --version
+requires_filter scan
+
+# We're just testing that there are no problematic races with the
+# background thread.
+
+nbdkit -fv -U - memory 1 --filter=scan --run 'nbdinfo $uri'
+nbdkit -fv -U - memory 1M --filter=scan --run 'nbdinfo $uri'
+nbdkit -fv -U - memory 1G --filter=scan --run 'nbdinfo $uri'
+nbdkit -fv -U - memory 1G --filter=scan -e test --run 'nbdinfo $uri'
-- 
2.31.1