From 8bfe6512d07caf778fd001425435b048c45513eb Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Sat, 14 May 2022 13:46:56 +0100
Subject: [PATCH] New filter: scan
This filter will simply scan across the disk issuing a series of cache
requests to the underlying plugin. It is similar in scope and usage
to the new nbdkit-readahead-filter.
(cherry picked from commit 65c20a09ceacb4431986a2982f2c2e746df63fcb)
---
TODO | 8 -
configure.ac | 2 +
filters/cache/nbdkit-cache-filter.pod | 4 +-
.../nbdkit-cacheextents-filter.pod | 1 +
filters/readahead/nbdkit-readahead-filter.pod | 5 +
filters/scan/Makefile.am | 72 +++++
filters/scan/bgthread.c | 131 ++++++++
filters/scan/nbdkit-scan-filter.pod | 159 ++++++++++
filters/scan/scan.c | 280 ++++++++++++++++++
filters/scan/scan.h | 64 ++++
plugins/ssh/nbdkit-ssh-plugin.pod | 1 +
plugins/torrent/nbdkit-torrent-plugin.pod | 1 +
plugins/vddk/nbdkit-vddk-plugin.pod | 1 +
tests/Makefile.am | 10 +
tests/test-scan-copy.sh | 42 +++
tests/test-scan-info.sh | 46 +++
16 files changed, 817 insertions(+), 10 deletions(-)
create mode 100644 filters/scan/Makefile.am
create mode 100644 filters/scan/bgthread.c
create mode 100644 filters/scan/nbdkit-scan-filter.pod
create mode 100644 filters/scan/scan.c
create mode 100644 filters/scan/scan.h
create mode 100755 tests/test-scan-copy.sh
create mode 100755 tests/test-scan-info.sh
diff --git a/TODO b/TODO
index 0f5dc41d..8600d9e4 100644
--- a/TODO
+++ b/TODO
@@ -182,14 +182,6 @@ Python:
Suggestions for filters
-----------------------
-* Add scan filter. This would be placed on top of cache filters and
- would scan (read) the whole disk in the background, ensuring it is
- copied into the cache. Useful if you have a slow plugin, limited
- size device, and lots of local disk space, especially if you know
- that the NBD clients will eventually read all of the device. RWMJ
- wrote an implementation of this but it doesn't work well without a
- background thread.
-
* Add shared filter. Take advantage of filter context APIs to open a
single context into the backend shared among multiple client
connections. This may even allow a filter to offer a more parallel
diff --git a/configure.ac b/configure.ac
index 1d209f67..466dbd9b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -142,6 +142,7 @@ filters="\
readahead \
retry \
retry-request \
+ scan \
stats \
swab \
tar \
@@ -1403,6 +1404,7 @@ AC_CONFIG_FILES([Makefile
filters/readahead/Makefile
filters/retry/Makefile
filters/retry-request/Makefile
+ filters/scan/Makefile
filters/stats/Makefile
filters/swab/Makefile
filters/tar/Makefile
diff --git a/filters/cache/nbdkit-cache-filter.pod b/filters/cache/nbdkit-cache-filter.pod
index f4234e1a..935804b5 100644
--- a/filters/cache/nbdkit-cache-filter.pod
+++ b/filters/cache/nbdkit-cache-filter.pod
@@ -28,8 +28,8 @@ loss, as the name suggests).
This filter only caches image contents. To cache image metadata, use
L<nbdkit-cacheextents-filter(1)> between this filter and the plugin.
-To accelerate sequential reads, use L<nbdkit-readahead-filter(1)> on
-top of this filter.
+To accelerate sequential reads, use L<nbdkit-readahead-filter(1)> or
+L<nbdkit-scan-filter(1)> on top of this filter.
=head1 PARAMETERS
diff --git a/filters/cacheextents/nbdkit-cacheextents-filter.pod b/filters/cacheextents/nbdkit-cacheextents-filter.pod
index bb2514a4..6464eac2 100644
--- a/filters/cacheextents/nbdkit-cacheextents-filter.pod
+++ b/filters/cacheextents/nbdkit-cacheextents-filter.pod
@@ -54,6 +54,7 @@ L<nbdkit(1)>,
L<nbdkit-cache-filter(1)>,
L<nbdkit-extentlist-filter(1)>,
L<nbdkit-readahead-filter(1)>,
+L<nbdkit-scan-filter(1)>,
L<nbdkit-vddk-plugin(1)>,
L<nbdkit-filter(3)>,
L<qemu-img(1)>.
diff --git a/filters/readahead/nbdkit-readahead-filter.pod b/filters/readahead/nbdkit-readahead-filter.pod
index 630e5924..99d64dfb 100644
--- a/filters/readahead/nbdkit-readahead-filter.pod
+++ b/filters/readahead/nbdkit-readahead-filter.pod
@@ -27,6 +27,10 @@ option.
The filter uses a simple adaptive algorithm which accelerates
sequential reads and requires no further configuration.
+A similar filter is L<nbdkit-scan-filter(1)> which reads ahead over
+the whole disk, useful if you know that the client will be reading
+sequentially across most or all of the disk.
+
=head2 Limitations
In a number of significant cases this filter will do nothing. The
@@ -91,6 +95,7 @@ L<nbdkit-cache-filter(1)>,
L<nbdkit-cow-filter(1)>,
L<nbdkit-file-plugin(1)>,
L<nbdkit-retry-filter(1)>,
+L<nbdkit-scan-filter(1)>,
L<nbdkit-torrent-plugin(1)>,
L<nbdkit-vddk-plugin(1)>,
L<nbdkit-filter(3)>,
diff --git a/filters/scan/Makefile.am b/filters/scan/Makefile.am
new file mode 100644
index 00000000..d4aabfc6
--- /dev/null
+++ b/filters/scan/Makefile.am
@@ -0,0 +1,72 @@
+# nbdkit
+# Copyright (C) 2019-2021 Red Hat Inc.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# * Neither the name of Red Hat nor the names of its contributors may be
+# used to endorse or promote products derived from this software without
+# specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+
+include $(top_srcdir)/common-rules.mk
+
+EXTRA_DIST = nbdkit-scan-filter.pod
+
+filter_LTLIBRARIES = nbdkit-scan-filter.la
+
+nbdkit_scan_filter_la_SOURCES = \
+ scan.c \
+ scan.h \
+ bgthread.c \
+ $(top_srcdir)/include/nbdkit-filter.h \
+ $(NULL)
+
+nbdkit_scan_filter_la_CPPFLAGS = \
+ -I$(top_srcdir)/include \
+ -I$(top_srcdir)/common/include \
+ -I$(top_srcdir)/common/utils \
+ $(NULL)
+nbdkit_scan_filter_la_CFLAGS = $(WARNINGS_CFLAGS)
+nbdkit_scan_filter_la_LDFLAGS = \
+ -module -avoid-version -shared $(NO_UNDEFINED_ON_WINDOWS) \
+ -Wl,--version-script=$(top_srcdir)/filters/filters.syms \
+ $(NULL)
+nbdkit_scan_filter_la_LIBADD = \
+ $(top_builddir)/common/utils/libutils.la \
+ $(top_builddir)/common/replacements/libcompat.la \
+ $(IMPORT_LIBRARY_ON_WINDOWS) \
+ $(NULL)
+
+if HAVE_POD
+
+man_MANS = nbdkit-scan-filter.1
+CLEANFILES += $(man_MANS)
+
+nbdkit-scan-filter.1: nbdkit-scan-filter.pod \
+ $(top_builddir)/podwrapper.pl
+ $(PODWRAPPER) --section=1 --man $@ \
+ --html $(top_builddir)/html/$@.html \
+ $<
+
+endif HAVE_POD
diff --git a/filters/scan/bgthread.c b/filters/scan/bgthread.c
new file mode 100644
index 00000000..384e79b6
--- /dev/null
+++ b/filters/scan/bgthread.c
@@ -0,0 +1,131 @@
+/* nbdkit
+ * Copyright (C) 2019-2022 Red Hat Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * * Neither the name of Red Hat nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <config.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <pthread.h>
+
+#include <nbdkit-filter.h>
+
+#include "scan.h"
+
+#include "cleanup.h"
+#include "minmax.h"
+
+static pthread_mutex_t clock_lock;
+static uint64_t clock_ = 0;
+
+static void
+adjust_clock (uint64_t offset)
+{
+ ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&clock_lock);
+ if (clock_ < offset)
+ clock_ = offset;
+}
+
+static void
+reset_clock (uint64_t offset)
+{
+ ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&clock_lock);
+ clock_ = 0;
+}
+
+static uint64_t
+get_starting_offset (void)
+{
+ ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&clock_lock);
+ return scan_clock ? clock_ : 0;
+}
+
+void *
+scan_thread (void *vp)
+{
+ struct bgthread_ctrl *ctrl = vp;
+ uint64_t offset, size;
+ int64_t r;
+
+ assert (ctrl->next != NULL);
+
+ /* Get the size of the underlying plugin. Exit the thread on error
+ * because there's not much we can do without knowing the size.
+ */
+ r = ctrl->next->get_size (ctrl->next);
+ if (r == -1)
+ return NULL;
+ size = r;
+
+ /* Start scanning. */
+ start:
+ for (offset = get_starting_offset (); offset < size; offset += scan_size) {
+ uint64_t n;
+
+ /* Execute any commands in the queue. */
+ {
+ ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&ctrl->lock);
+ struct command cmd;
+
+ while (ctrl->cmds.len) {
+ cmd = ctrl->cmds.ptr[0];
+ command_queue_remove (&ctrl->cmds, 0);
+
+ switch (cmd.type) {
+ case CMD_QUIT:
+ nbdkit_debug ("scan: exiting background thread on connection close");
+ return NULL;
+
+ case CMD_NOTIFY_PREAD:
+ if (offset < cmd.offset)
+ offset = cmd.offset;
+ }
+ }
+ }
+
+ adjust_clock (offset);
+ if (offset > size)
+ continue;
+
+ /* Issue the next prefetch. */
+ n = MIN (scan_size, size - offset);
+ ctrl->next->cache (ctrl->next, n, offset, 0, NULL);
+ }
+
+ if (scan_forever) {
+ reset_clock (offset);
+ goto start;
+ }
+
+ nbdkit_debug ("scan: finished scanning the plugin");
+ return NULL;
+}
diff --git a/filters/scan/nbdkit-scan-filter.pod b/filters/scan/nbdkit-scan-filter.pod
new file mode 100644
index 00000000..4a8d0ef9
--- /dev/null
+++ b/filters/scan/nbdkit-scan-filter.pod
@@ -0,0 +1,159 @@
+=head1 NAME
+
+nbdkit-scan-filter - scan disk prefetching data ahead of sequential reads
+
+=head1 SYNOPSIS
+
+ nbdkit --filter=scan PLUGIN [scan-ahead=false] [scan-clock=false]
+ [scan-forever=true] [scan-size=]NN
+
+ nbdkit --filter=scan --filter=cache PLUGIN
+
+ nbdkit --filter=scan --filter=cow PLUGIN cow-on-cache=true
+
+=head1 DESCRIPTION
+
+C<nbdkit-scan-filter> is a filter that scans the disk prefetching
+data. It is sometimes useful if you expect that the client will read
+the disk sequentially.
+
+The basic operation of the filter is that when a client connects, the
+filter will start issuing C<.cache> (prefetch) requests to the plugin
+across the whole disk. Plugins which support this command will
+prefetch the data, making subsequent reads faster. For plugins which
+do not support this command, you can inject L<nbdkit-cache-filter(1)>
+below (after) this filter, giving approximately the same effect.
+L<nbdkit-cow-filter(1)> can be used instead of nbdkit-cache-filter, if
+you add the C<cow-on-cache=true> option.
+
+Various C<scan-*> parameters can be used to tune scanning, although
+the defaults should be suitable in most cases.
+
+A similar filter is L<nbdkit-readahead-filter(1)>.
+
+=head2 Limitations
+
+In a number of significant cases this filter will do nothing. The
+filter will print a warning message if this happens.
+
+=over 4
+
+=item Thread model must be parallel *
+
+For example L<nbdkit-curl-plugin(1)> only supports
+C<serialize_requests>, and so this filter cannot perform prefetches in
+parallel with the read requests.
+
+=item Only scans while clients are connected *
+
+The current filter only scans while there is at least one client
+connected.
+
+=item Only scans the default export *
+
+The current filter only scans the default export and ignores all
+clients connecting to the non-default export name.
+
+* We may be able to lift these restrictions in future.
+
+=item Underlying filters or plugin must support C<.cache> (prefetch)
+
+Very many plugins do not have the concept of prefetching and/or
+do not implement the C<.cache> callback, and so there is no
+way for this filter to issue prefetches.
+
+You can usually get around this by adding I<--filter=cache> after this
+filter as explained above.
+
+=item Prefetching the whole disk may load it all into cache
+
+In particular if you use this filter together with
+L<nbdkit-cache-filter(1)> or L<nbdkit-cow-filter(1)>, they will cache
+the whole content of the plugin into a temporary file. This may be
+many gigabytes of data, consuming all space in F</var/tmp>. Of course
+this is the whole point of using this filter, but you should be aware
+of it.
+
+If using the cache filter, the total size of the cache can be limited
+(see L<nbdkit-cache-filter(1)/CACHE MAXIMUM SIZE>).
+
+=back
+
+=head1 PARAMETERS
+
+=over 4
+
+=item B<scan-ahead=false>
+
+By default the filter tries to stay ahead of incoming read requests.
+That is to say, it starts prefetching at the beginning of the disk and
+continues incrementally, but if the client issues a read beyond the
+current prefetch point then the filter skips forward and begins
+prefetching after the read.
+
+However if you set this parameter to false, then this behaviour is
+disabled. The filter simply prefetches sequentially regardless of
+client requests.
+
+=item B<scan-clock=false>
+
+By default, if all clients disconnect and then another client
+connects, prefetching resumes at the same place in the disk. (Like
+stopping and starting a clock.)
+
+If you set this parameter to false, then the filter starts prefetching
+from the beginning of the disk again.
+
+=item B<scan-forever=true>
+
+By default the filter scans over the disk once and then stops.
+
+If you set this parameter to true, then after the disk has been
+prefetched completely, the filter goes back to the beginning and
+starts over, repeating this for as long as nbdkit is running and there
+are clients connected.
+
+=item B<scan-size=>NN
+
+This parameter controls the prefetch block size. The default is
+C<2M>. This must be a power of 2 and most plugins will have their own
+limits on the amount of data they can prefetch in a single request.
+
+=back
+
+=head1 FILES
+
+=over 4
+
+=item F<$filterdir/nbdkit-scan-filter.so>
+
+The filter.
+
+Use C<nbdkit --dump-config> to find the location of C<$filterdir>.
+
+=back
+
+=head1 VERSION
+
+C<nbdkit-scan-filter> first appeared in nbdkit 1.32.
+
+=head1 SEE ALSO
+
+L<nbdkit(1)>,
+L<nbdkit-cache-filter(1)>,
+L<nbdkit-cow-filter(1)>,
+L<nbdkit-file-plugin(1)>,
+L<nbdkit-readahead-filter(1)>,
+L<nbdkit-retry-filter(1)>,
+L<nbdkit-torrent-plugin(1)>,
+L<nbdkit-vddk-plugin(1)>,
+L<nbdkit-filter(3)>,
+L<qemu-img(1)>.
+
+=head1 AUTHORS
+
+Richard W.M. Jones
+
+=head1 COPYRIGHT
+
+Copyright (C) 2019-2022 Red Hat Inc.
diff --git a/filters/scan/scan.c b/filters/scan/scan.c
new file mode 100644
index 00000000..ac5b18d2
--- /dev/null
+++ b/filters/scan/scan.c
@@ -0,0 +1,280 @@
+/* nbdkit
+ * Copyright (C) 2019-2022 Red Hat Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * * Neither the name of Red Hat nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <config.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <string.h>
+#include <errno.h>
+#include <pthread.h>
+
+#include <nbdkit-filter.h>
+
+#include "scan.h"
+
+#include "cleanup.h"
+#include "ispowerof2.h"
+#include "vector.h"
+
+static bool scan_ahead = true;
+bool scan_clock = true;
+bool scan_forever = false;
+unsigned scan_size = 2*1024*1024;
+
+static int thread_model = -1; /* Thread model of the underlying plugin. */
+
+/* Per-connection data. */
+struct scan_handle {
+ bool is_default_export; /* If exportname == "". */
+ bool running; /* True if background thread is running. */
+ pthread_t thread; /* The background thread, one per connection. */
+ struct bgthread_ctrl ctrl;
+};
+
+static int
+scan_config (nbdkit_next_config *next, nbdkit_backend *nxdata,
+ const char *key, const char *value)
+{
+ int r;
+
+ if (strcmp (key, "scan-ahead") == 0) {
+ r = nbdkit_parse_bool (value);
+ if (r == -1)
+ return -1;
+ scan_ahead = r;
+ return 0;
+ }
+ else if (strcmp (key, "scan-clock") == 0) {
+ r = nbdkit_parse_bool (value);
+ if (r == -1)
+ return -1;
+ scan_clock = r;
+ return 0;
+ }
+ else if (strcmp (key, "scan-forever") == 0) {
+ r = nbdkit_parse_bool (value);
+ if (r == -1)
+ return -1;
+ scan_forever = r;
+ return 0;
+ }
+ else if (strcmp (key, "scan-size") == 0) {
+ scan_size = nbdkit_parse_size (value);
+ if (scan_size == -1)
+ return -1;
+ return 0;
+ }
+
+ return next (nxdata, key, value);
+}
+
+static int
+scan_config_complete (nbdkit_next_config_complete *next, nbdkit_backend *nxdata)
+{
+ if (scan_size < 512 || scan_size > 32*1024*1024 ||
+ !is_power_of_2 (scan_size)) {
+ nbdkit_error ("scan-size parameter should be [512..32M] "
+ "and a power of two");
+ return -1;
+ }
+
+ return next (nxdata);
+}
+
+#define scan_config_help \
+ "scan-ahead=false Skip ahead when client reads faster.\n" \
+ "scan-clock=false Always start prefetching from beginning.\n" \
+ "scan-forever=true Scan in a loop while clients connected.\n" \
+ "scan-size=NN Set scan block size."
+
+/* We need to hook into .get_ready() so we can read the final thread
+ * model (of the whole server).
+ */
+static int
+scan_get_ready (int final_thread_model)
+{
+ thread_model = final_thread_model;
+ return 0;
+}
+
+static int
+send_command_to_background_thread (struct bgthread_ctrl *ctrl,
+ const struct command cmd)
+{
+ ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&ctrl->lock);
+ if (command_queue_append (&ctrl->cmds, cmd) == -1)
+ return -1;
+ /* Signal the thread if it could be sleeping on an empty queue. */
+ if (ctrl->cmds.len == 1)
+ pthread_cond_signal (&ctrl->cond);
+ return 0;
+}
+
+static void *
+scan_open (nbdkit_next_open *next, nbdkit_context *nxdata,
+ int readonly, const char *exportname, int is_tls)
+{
+ struct scan_handle *h;
+
+ if (next (nxdata, readonly, exportname) == -1)
+ return NULL;
+
+ h = calloc (1, sizeof *h);
+ if (h == NULL) {
+ nbdkit_error ("malloc: %m");
+ return NULL;
+ }
+
+ h->is_default_export = strcmp (exportname, "") == 0;
+ return h;
+}
+
+/* In prepare we check if it's possible to support the scan filter on
+ * this connection (or print a warning), and start the background
+ * thread.
+ */
+static int
+scan_prepare (nbdkit_next *next, void *handle, int readonly)
+{
+ struct scan_handle *h = handle;
+ int r, err;
+
+ if (!h->is_default_export) {
+ nbdkit_error ("scan: warning: not the default export, not scanning");
+ return 0;
+ }
+
+ if (thread_model != NBDKIT_THREAD_MODEL_PARALLEL) {
+ nbdkit_error ("scan: warning: underlying plugin does not support "
+ "the PARALLEL thread model, not scanning");
+ return 0;
+ }
+
+ /* Call next->can_cache to read the underlying 'can_cache'. */
+ r = next->can_cache (next);
+ if (r == -1)
+ return -1;
+ if (r != NBDKIT_CACHE_NATIVE) {
+ nbdkit_error ("scan: warning: underlying plugin does not support "
+ "NBD_CMD_CACHE, not scanning; try adding --filter=cache "
+ "after this filter");
+ return 0;
+ }
+
+ /* Save the connection in the handle, for the background thread to use. */
+ h->ctrl.next = next;
+
+ /* Create the background thread. */
+ h->ctrl.cmds = (command_queue) empty_vector;
+ pthread_mutex_init (&h->ctrl.lock, NULL);
+ pthread_cond_init (&h->ctrl.cond, NULL);
+
+ err = pthread_create (&h->thread, NULL, scan_thread, &h->ctrl);
+ if (err != 0) {
+ errno = err;
+ nbdkit_error ("pthread_create: %m");
+ pthread_cond_destroy (&h->ctrl.cond);
+ pthread_mutex_destroy (&h->ctrl.lock);
+ return -1;
+ }
+
+ h->running = true;
+
+ return 0;
+}
+
+/* Finalize cleans up the thread if it is running. */
+static int
+scan_finalize (nbdkit_next *next, void *handle)
+{
+ struct scan_handle *h = handle;
+ const struct command quit_cmd = { .type = CMD_QUIT };
+
+ if (!h->running)
+ return 0;
+
+ send_command_to_background_thread (&h->ctrl, quit_cmd);
+ pthread_join (h->thread, NULL);
+ pthread_cond_destroy (&h->ctrl.cond);
+ pthread_mutex_destroy (&h->ctrl.lock);
+ command_queue_reset (&h->ctrl.cmds);
+ h->running = false;
+
+ return 0;
+}
+
+static void
+scan_close (void *handle)
+{
+ struct scan_handle *h = handle;
+
+ free (h);
+}
+
+/* Read data. */
+static int
+scan_pread (nbdkit_next *next,
+ void *handle, void *buf, uint32_t count, uint64_t offset,
+ uint32_t flags, int *err)
+{
+ struct scan_handle *h = handle;
+
+ if (scan_ahead && h->running) {
+ const struct command cmd =
+ { .type = CMD_NOTIFY_PREAD, .offset = offset + count };
+
+ if (send_command_to_background_thread (&h->ctrl, cmd) == -1)
+ return -1;
+ }
+
+ /* Issue the normal read. */
+ return next->pread (next, buf, count, offset, flags, err);
+}
+
+static struct nbdkit_filter filter = {
+ .name = "scan",
+ .longname = "nbdkit scan filter",
+ .get_ready = scan_get_ready,
+ .config = scan_config,
+ .config_complete = scan_config_complete,
+ .config_help = scan_config_help,
+ .open = scan_open,
+ .prepare = scan_prepare,
+ .finalize = scan_finalize,
+ .close = scan_close,
+ .pread = scan_pread,
+};
+
+NBDKIT_REGISTER_FILTER(filter)
diff --git a/filters/scan/scan.h b/filters/scan/scan.h
new file mode 100644
index 00000000..7ff39310
--- /dev/null
+++ b/filters/scan/scan.h
@@ -0,0 +1,64 @@
+/* nbdkit
+ * Copyright (C) 2019-2022 Red Hat Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * * Neither the name of Red Hat nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef NBDKIT_SCAN_H
+#define NBDKIT_SCAN_H
+
+#include <stdbool.h>
+#include <pthread.h>
+
+#include <nbdkit-filter.h>
+
+#include "vector.h"
+
+extern bool scan_clock;
+extern bool scan_forever;
+extern unsigned scan_size;
+
+/* List of commands issued to the background thread. */
+struct command {
+ enum { CMD_QUIT, CMD_NOTIFY_PREAD } type;
+ uint64_t offset;
+};
+DEFINE_VECTOR_TYPE(command_queue, struct command);
+
+struct bgthread_ctrl {
+ command_queue cmds; /* Command queue. */
+ pthread_mutex_t lock; /* Lock for queue. */
+ pthread_cond_t cond; /* Condition queue size 0 -> 1. */
+ nbdkit_next *next; /* For sending cache operations. */
+};
+
+/* Start background thread (one per connection). */
+extern void *scan_thread (void *vp);
+
+#endif /* NBDKIT_SCAN_H */
diff --git a/plugins/ssh/nbdkit-ssh-plugin.pod b/plugins/ssh/nbdkit-ssh-plugin.pod
index 2bc2c4a7..214957d6 100644
--- a/plugins/ssh/nbdkit-ssh-plugin.pod
+++ b/plugins/ssh/nbdkit-ssh-plugin.pod
@@ -349,6 +349,7 @@ L<nbdkit-curl-plugin(1)>,
L<nbdkit-extentlist-filter(1)>,
L<nbdkit-readahead-filter(1)>,
L<nbdkit-retry-filter(1)>,
+L<nbdkit-scan-filter(1)>,
L<nbdkit-plugin(3)>,
L<ssh(1)>,
L<ssh-agent(1)>,
diff --git a/plugins/torrent/nbdkit-torrent-plugin.pod b/plugins/torrent/nbdkit-torrent-plugin.pod
index 196ce4e9..f09ac3d2 100644
--- a/plugins/torrent/nbdkit-torrent-plugin.pod
+++ b/plugins/torrent/nbdkit-torrent-plugin.pod
@@ -175,6 +175,7 @@ L<nbdkit-curl-plugin(1)>,
L<nbdkit-file-plugin(1)>,
L<nbdkit-iso-plugin(1)>,
L<nbdkit-readahead-filter(1)>,
+L<nbdkit-scan-filter(1)>,
L<transmission-show(1)>,
L<https://en.wikipedia.org/wiki/BitTorrent>,
L<http://libtorrent.org/>.
diff --git a/plugins/vddk/nbdkit-vddk-plugin.pod b/plugins/vddk/nbdkit-vddk-plugin.pod
index ea5899dc..3991e86b 100644
--- a/plugins/vddk/nbdkit-vddk-plugin.pod
+++ b/plugins/vddk/nbdkit-vddk-plugin.pod
@@ -733,6 +733,7 @@ L<nbdkit-plugin(3)>,
L<nbdkit-blocksize-filter(1)>,
L<nbdkit-readahead-filter(1)>,
L<nbdkit-retry-filter(1)>,
+L<nbdkit-scan-filter(1)>,
L<virsh(1)>,
L<https://libvirt.org/drvesx.html>,
L<https://www.vmware.com/support/developer/vddk/>,
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 5585b3b7..799aa6c2 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -1754,6 +1754,16 @@ test_retry_request_mirror_LDADD = \
$(LIBNBD_LIBS) \
$(NULL)
+# scan filter test.
+TESTS += \
+ test-scan-copy.sh \
+ test-scan-info.sh \
+ $(NULL)
+EXTRA_DIST += \
+ test-scan-copy.sh \
+ test-scan-info.sh \
+ $(NULL)
+
# swab filter test.
TESTS += \
test-swab-8.sh \
diff --git a/tests/test-scan-copy.sh b/tests/test-scan-copy.sh
new file mode 100755
index 00000000..227ad7b2
--- /dev/null
+++ b/tests/test-scan-copy.sh
@@ -0,0 +1,42 @@
+#!/usr/bin/env bash
+# nbdkit
+# Copyright (C) 2018-2022 Red Hat Inc.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# * Neither the name of Red Hat nor the names of its contributors may be
+# used to endorse or promote products derived from this software without
+# specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+
+source ./functions.sh
+set -e
+set -x
+
+requires nbdcopy --version
+requires_plugin sparse-random
+requires_filter scan
+
+nbdkit -fv -U - sparse-random 1M --filter=scan --run 'nbdcopy "$uri" "$uri"'
+nbdkit -fv -U - sparse-random 1G --filter=scan --run 'nbdcopy "$uri" "$uri"'
diff --git a/tests/test-scan-info.sh b/tests/test-scan-info.sh
new file mode 100755
index 00000000..6b109ca8
--- /dev/null
+++ b/tests/test-scan-info.sh
@@ -0,0 +1,46 @@
+#!/usr/bin/env bash
+# nbdkit
+# Copyright (C) 2018-2022 Red Hat Inc.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# * Neither the name of Red Hat nor the names of its contributors may be
+# used to endorse or promote products derived from this software without
+# specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+
+source ./functions.sh
+set -e
+set -x
+
+requires nbdinfo --version
+requires_filter scan
+
+# We're just testing that there are no problematic races with the
+# background thread.
+
+nbdkit -fv -U - memory 1 --filter=scan --run 'nbdinfo $uri'
+nbdkit -fv -U - memory 1M --filter=scan --run 'nbdinfo $uri'
+nbdkit -fv -U - memory 1G --filter=scan --run 'nbdinfo $uri'
+nbdkit -fv -U - memory 1G --filter=scan -e test --run 'nbdinfo $uri'
--
2.31.1