|
|
f954f3 |
From b5dc8577c5c6d1205e2106b629fad327c3a409ea Mon Sep 17 00:00:00 2001
|
|
|
f954f3 |
From: "Richard W.M. Jones" <rjones@redhat.com>
|
|
|
f954f3 |
Date: Mon, 26 Jul 2021 13:55:21 +0100
|
|
|
f954f3 |
Subject: [PATCH] cache, cow: Add blk_read_multiple function
|
|
|
f954f3 |
|
|
|
f954f3 |
Currently the cache and cow filters break up large requests into many
|
|
|
f954f3 |
single block-sized requests to the underlying plugin. For some
|
|
|
f954f3 |
plugins (eg. curl) this is very inefficient and causes huge
|
|
|
f954f3 |
slow-downs.
|
|
|
f954f3 |
|
|
|
f954f3 |
For example I tested nbdkit + curl vs nbdkit + cache + curl against a
|
|
|
f954f3 |
slow, remote VMware server. A simple run of virt-inspector was at
|
|
|
f954f3 |
least 6-7 times slower with the cache filter. (It was so slow that I
|
|
|
f954f3 |
didn't actually let it run to completion - I am estimating the
|
|
|
f954f3 |
slowdown multiple using interim debug messages).
|
|
|
f954f3 |
|
|
|
f954f3 |
Implement a new blk_read_multiple function in the cache filter. It
|
|
|
f954f3 |
does not break up "runs" of blocks which all have the same cache
|
|
|
f954f3 |
state. The cache .pread method uses the new function to read the
|
|
|
f954f3 |
block-aligned part of the request.
|
|
|
f954f3 |
|
|
|
f954f3 |
(cherry picked from commit ab661ccef5b3369fa22c33d0289baddc251b73bf)
|
|
|
f954f3 |
---
|
|
|
f954f3 |
filters/cache/blk.c | 83 ++++++++++++++++++++++++++++++++-----------
|
|
|
f954f3 |
filters/cache/blk.h | 6 ++++
|
|
|
f954f3 |
filters/cache/cache.c | 21 +++++------
|
|
|
f954f3 |
filters/cow/blk.c | 63 +++++++++++++++++++++++---------
|
|
|
f954f3 |
filters/cow/blk.h | 6 ++++
|
|
|
f954f3 |
filters/cow/cow.c | 21 +++++------
|
|
|
f954f3 |
6 files changed, 138 insertions(+), 62 deletions(-)
|
|
|
f954f3 |
|
|
|
f954f3 |
diff --git a/filters/cache/blk.c b/filters/cache/blk.c
|
|
|
f954f3 |
index f52f30e3..f85ada35 100644
|
|
|
f954f3 |
--- a/filters/cache/blk.c
|
|
|
f954f3 |
+++ b/filters/cache/blk.c
|
|
|
f954f3 |
@@ -44,6 +44,7 @@
|
|
|
f954f3 |
#include <string.h>
|
|
|
f954f3 |
#include <unistd.h>
|
|
|
f954f3 |
#include <fcntl.h>
|
|
|
f954f3 |
+#include <limits.h>
|
|
|
f954f3 |
#include <errno.h>
|
|
|
f954f3 |
|
|
|
f954f3 |
#ifdef HAVE_SYS_STATVFS_H
|
|
|
f954f3 |
@@ -193,26 +194,40 @@ blk_set_size (uint64_t new_size)
|
|
|
f954f3 |
return 0;
|
|
|
f954f3 |
}
|
|
|
f954f3 |
|
|
|
f954f3 |
-int
|
|
|
f954f3 |
-blk_read (nbdkit_next *next,
|
|
|
f954f3 |
- uint64_t blknum, uint8_t *block, int *err)
|
|
|
f954f3 |
+static int
|
|
|
f954f3 |
+_blk_read_multiple (nbdkit_next *next,
|
|
|
f954f3 |
+ uint64_t blknum, uint64_t nrblocks,
|
|
|
f954f3 |
+ uint8_t *block, int *err)
|
|
|
f954f3 |
{
|
|
|
f954f3 |
off_t offset = blknum * blksize;
|
|
|
f954f3 |
- enum bm_entry state = bitmap_get_blk (&bm, blknum, BLOCK_NOT_CACHED);
|
|
|
f954f3 |
+ bool not_cached =
|
|
|
f954f3 |
+ bitmap_get_blk (&bm, blknum, BLOCK_NOT_CACHED) == BLOCK_NOT_CACHED;
|
|
|
f954f3 |
+ uint64_t b, runblocks;
|
|
|
f954f3 |
|
|
|
f954f3 |
- reclaim (fd, &bm;;
|
|
|
f954f3 |
+ assert (nrblocks > 0);
|
|
|
f954f3 |
|
|
|
f954f3 |
if (cache_debug_verbose)
|
|
|
f954f3 |
- nbdkit_debug ("cache: blk_read block %" PRIu64
|
|
|
f954f3 |
+ nbdkit_debug ("cache: blk_read_multiple block %" PRIu64
|
|
|
f954f3 |
" (offset %" PRIu64 ") is %s",
|
|
|
f954f3 |
blknum, (uint64_t) offset,
|
|
|
f954f3 |
- state == BLOCK_NOT_CACHED ? "not cached" :
|
|
|
f954f3 |
- state == BLOCK_CLEAN ? "clean" :
|
|
|
f954f3 |
- state == BLOCK_DIRTY ? "dirty" :
|
|
|
f954f3 |
- "unknown");
|
|
|
f954f3 |
+ not_cached ? "not cached" : "cached");
|
|
|
f954f3 |
|
|
|
f954f3 |
- if (state == BLOCK_NOT_CACHED) { /* Read underlying plugin. */
|
|
|
f954f3 |
- unsigned n = blksize, tail = 0;
|
|
|
f954f3 |
+ /* Find out how many of the following blocks form a "run" with the
|
|
|
f954f3 |
+ * same cached/not-cached state. We can process that many blocks in
|
|
|
f954f3 |
+ * one go.
|
|
|
f954f3 |
+ */
|
|
|
f954f3 |
+ for (b = 1, runblocks = 1; b < nrblocks; ++b, ++runblocks) {
|
|
|
f954f3 |
+ bool s =
|
|
|
f954f3 |
+ bitmap_get_blk (&bm, blknum + b, BLOCK_NOT_CACHED) == BLOCK_NOT_CACHED;
|
|
|
f954f3 |
+ if (not_cached != s)
|
|
|
f954f3 |
+ break;
|
|
|
f954f3 |
+ }
|
|
|
f954f3 |
+
|
|
|
f954f3 |
+ if (not_cached) { /* Read underlying plugin. */
|
|
|
f954f3 |
+ unsigned n, tail = 0;
|
|
|
f954f3 |
+
|
|
|
f954f3 |
+ assert (blksize * runblocks <= UINT_MAX);
|
|
|
f954f3 |
+ n = blksize * runblocks;
|
|
|
f954f3 |
|
|
|
f954f3 |
if (offset + n > size) {
|
|
|
f954f3 |
tail = offset + n - size;
|
|
|
f954f3 |
@@ -228,32 +243,60 @@ blk_read (nbdkit_next *next,
|
|
|
f954f3 |
*/
|
|
|
f954f3 |
memset (block + n, 0, tail);
|
|
|
f954f3 |
|
|
|
f954f3 |
- /* If cache-on-read, copy the block to the cache. */
|
|
|
f954f3 |
+ /* If cache-on-read, copy the blocks to the cache. */
|
|
|
f954f3 |
if (cache_on_read) {
|
|
|
f954f3 |
if (cache_debug_verbose)
|
|
|
f954f3 |
nbdkit_debug ("cache: cache-on-read block %" PRIu64
|
|
|
f954f3 |
" (offset %" PRIu64 ")",
|
|
|
f954f3 |
blknum, (uint64_t) offset);
|
|
|
f954f3 |
|
|
|
f954f3 |
- if (pwrite (fd, block, blksize, offset) == -1) {
|
|
|
f954f3 |
+ if (pwrite (fd, block, blksize * runblocks, offset) == -1) {
|
|
|
f954f3 |
*err = errno;
|
|
|
f954f3 |
nbdkit_error ("pwrite: %m");
|
|
|
f954f3 |
return -1;
|
|
|
f954f3 |
}
|
|
|
f954f3 |
- bitmap_set_blk (&bm, blknum, BLOCK_CLEAN);
|
|
|
f954f3 |
- lru_set_recently_accessed (blknum);
|
|
|
f954f3 |
+ for (b = 0; b < runblocks; ++b) {
|
|
|
f954f3 |
+ bitmap_set_blk (&bm, blknum + b, BLOCK_CLEAN);
|
|
|
f954f3 |
+ lru_set_recently_accessed (blknum + b);
|
|
|
f954f3 |
+ }
|
|
|
f954f3 |
}
|
|
|
f954f3 |
- return 0;
|
|
|
f954f3 |
}
|
|
|
f954f3 |
else { /* Read cache. */
|
|
|
f954f3 |
- if (pread (fd, block, blksize, offset) == -1) {
|
|
|
f954f3 |
+ if (pread (fd, block, blksize * runblocks, offset) == -1) {
|
|
|
f954f3 |
*err = errno;
|
|
|
f954f3 |
nbdkit_error ("pread: %m");
|
|
|
f954f3 |
return -1;
|
|
|
f954f3 |
}
|
|
|
f954f3 |
- lru_set_recently_accessed (blknum);
|
|
|
f954f3 |
- return 0;
|
|
|
f954f3 |
+ for (b = 0; b < runblocks; ++b)
|
|
|
f954f3 |
+ lru_set_recently_accessed (blknum + b);
|
|
|
f954f3 |
}
|
|
|
f954f3 |
+
|
|
|
f954f3 |
+ /* If all done, return. */
|
|
|
f954f3 |
+ if (runblocks == nrblocks)
|
|
|
f954f3 |
+ return 0;
|
|
|
f954f3 |
+
|
|
|
f954f3 |
+ /* Recurse to read remaining blocks. */
|
|
|
f954f3 |
+ return _blk_read_multiple (next,
|
|
|
f954f3 |
+ blknum + runblocks,
|
|
|
f954f3 |
+ nrblocks - runblocks,
|
|
|
f954f3 |
+ block + blksize * runblocks,
|
|
|
f954f3 |
+ err);
|
|
|
f954f3 |
+}
|
|
|
f954f3 |
+
|
|
|
f954f3 |
+int
|
|
|
f954f3 |
+blk_read_multiple (nbdkit_next *next,
|
|
|
f954f3 |
+ uint64_t blknum, uint64_t nrblocks,
|
|
|
f954f3 |
+ uint8_t *block, int *err)
|
|
|
f954f3 |
+{
|
|
|
f954f3 |
+ reclaim (fd, &bm;;
|
|
|
f954f3 |
+ return _blk_read_multiple (next, blknum, nrblocks, block, err);
|
|
|
f954f3 |
+}
|
|
|
f954f3 |
+
|
|
|
f954f3 |
+int
|
|
|
f954f3 |
+blk_read (nbdkit_next *next,
|
|
|
f954f3 |
+ uint64_t blknum, uint8_t *block, int *err)
|
|
|
f954f3 |
+{
|
|
|
f954f3 |
+ return blk_read_multiple (next, blknum, 1, block, err);
|
|
|
f954f3 |
}
|
|
|
f954f3 |
|
|
|
f954f3 |
int
|
|
|
f954f3 |
diff --git a/filters/cache/blk.h b/filters/cache/blk.h
|
|
|
f954f3 |
index 87c753e2..1ee33ed7 100644
|
|
|
f954f3 |
--- a/filters/cache/blk.h
|
|
|
f954f3 |
+++ b/filters/cache/blk.h
|
|
|
f954f3 |
@@ -55,6 +55,12 @@ extern int blk_read (nbdkit_next *next,
|
|
|
f954f3 |
uint64_t blknum, uint8_t *block, int *err)
|
|
|
f954f3 |
__attribute__((__nonnull__ (1, 3, 4)));
|
|
|
f954f3 |
|
|
|
f954f3 |
+/* As above, but read multiple blocks. */
|
|
|
f954f3 |
+extern int blk_read_multiple (nbdkit_next *next,
|
|
|
f954f3 |
+ uint64_t blknum, uint64_t nrblocks,
|
|
|
f954f3 |
+ uint8_t *block, int *err)
|
|
|
f954f3 |
+ __attribute__((__nonnull__ (1, 4, 5)));
|
|
|
f954f3 |
+
|
|
|
f954f3 |
/* If a single block is not cached, copy it from the plugin. */
|
|
|
f954f3 |
extern int blk_cache (nbdkit_next *next,
|
|
|
f954f3 |
uint64_t blknum, uint8_t *block, int *err)
|
|
|
f954f3 |
diff --git a/filters/cache/cache.c b/filters/cache/cache.c
|
|
|
f954f3 |
index 745f552d..14cc03f2 100644
|
|
|
f954f3 |
--- a/filters/cache/cache.c
|
|
|
f954f3 |
+++ b/filters/cache/cache.c
|
|
|
f954f3 |
@@ -313,7 +313,7 @@ cache_pread (nbdkit_next *next,
|
|
|
f954f3 |
uint32_t flags, int *err)
|
|
|
f954f3 |
{
|
|
|
f954f3 |
CLEANUP_FREE uint8_t *block = NULL;
|
|
|
f954f3 |
- uint64_t blknum, blkoffs;
|
|
|
f954f3 |
+ uint64_t blknum, blkoffs, nrblocks;
|
|
|
f954f3 |
int r;
|
|
|
f954f3 |
|
|
|
f954f3 |
assert (!flags);
|
|
|
f954f3 |
@@ -348,22 +348,17 @@ cache_pread (nbdkit_next *next,
|
|
|
f954f3 |
}
|
|
|
f954f3 |
|
|
|
f954f3 |
/* Aligned body */
|
|
|
f954f3 |
- /* XXX This breaks up large read requests into smaller ones, which
|
|
|
f954f3 |
- * is a problem for plugins which have a large, fixed per-request
|
|
|
f954f3 |
- * overhead (hello, curl). We should try to keep large requests
|
|
|
f954f3 |
- * together as much as possible, but that requires us to be much
|
|
|
f954f3 |
- * smarter here.
|
|
|
f954f3 |
- */
|
|
|
f954f3 |
- while (count >= blksize) {
|
|
|
f954f3 |
+ nrblocks = count / blksize;
|
|
|
f954f3 |
+ if (nrblocks > 0) {
|
|
|
f954f3 |
ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&lock);
|
|
|
f954f3 |
- r = blk_read (next, blknum, buf, err);
|
|
|
f954f3 |
+ r = blk_read_multiple (next, blknum, nrblocks, buf, err);
|
|
|
f954f3 |
if (r == -1)
|
|
|
f954f3 |
return -1;
|
|
|
f954f3 |
|
|
|
f954f3 |
- buf += blksize;
|
|
|
f954f3 |
- count -= blksize;
|
|
|
f954f3 |
- offset += blksize;
|
|
|
f954f3 |
- blknum++;
|
|
|
f954f3 |
+ buf += nrblocks * blksize;
|
|
|
f954f3 |
+ count -= nrblocks * blksize;
|
|
|
f954f3 |
+ offset += nrblocks * blksize;
|
|
|
f954f3 |
+ blknum += nrblocks;
|
|
|
f954f3 |
}
|
|
|
f954f3 |
|
|
|
f954f3 |
/* Unaligned tail */
|
|
|
f954f3 |
diff --git a/filters/cow/blk.c b/filters/cow/blk.c
|
|
|
f954f3 |
index b7c4d7f1..4ec8d1b8 100644
|
|
|
f954f3 |
--- a/filters/cow/blk.c
|
|
|
f954f3 |
+++ b/filters/cow/blk.c
|
|
|
f954f3 |
@@ -79,6 +79,7 @@
|
|
|
f954f3 |
#include <inttypes.h>
|
|
|
f954f3 |
#include <unistd.h>
|
|
|
f954f3 |
#include <fcntl.h>
|
|
|
f954f3 |
+#include <limits.h>
|
|
|
f954f3 |
#include <errno.h>
|
|
|
f954f3 |
#include <sys/types.h>
|
|
|
f954f3 |
|
|
|
f954f3 |
@@ -219,33 +220,48 @@ blk_status (uint64_t blknum, bool *present, bool *trimmed)
|
|
|
f954f3 |
*trimmed = state == BLOCK_TRIMMED;
|
|
|
f954f3 |
}
|
|
|
f954f3 |
|
|
|
f954f3 |
-/* These are the block operations. They always read or write a single
|
|
|
f954f3 |
- * whole block of size ‘blksize’.
|
|
|
f954f3 |
+/* These are the block operations. They always read or write whole
|
|
|
f954f3 |
+ * blocks of size ‘blksize’.
|
|
|
f954f3 |
*/
|
|
|
f954f3 |
int
|
|
|
f954f3 |
-blk_read (nbdkit_next *next,
|
|
|
f954f3 |
- uint64_t blknum, uint8_t *block, int *err)
|
|
|
f954f3 |
+blk_read_multiple (nbdkit_next *next,
|
|
|
f954f3 |
+ uint64_t blknum, uint64_t nrblocks,
|
|
|
f954f3 |
+ uint8_t *block, int *err)
|
|
|
f954f3 |
{
|
|
|
f954f3 |
off_t offset = blknum * BLKSIZE;
|
|
|
f954f3 |
enum bm_entry state;
|
|
|
f954f3 |
+ uint64_t b, runblocks;
|
|
|
f954f3 |
|
|
|
f954f3 |
- /* The state might be modified from another thread - for example
|
|
|
f954f3 |
- * another thread might write (BLOCK_NOT_ALLOCATED ->
|
|
|
f954f3 |
- * BLOCK_ALLOCATED) while we are reading from the plugin, returning
|
|
|
f954f3 |
- * the old data. However a read issued after the write returns
|
|
|
f954f3 |
- * should always return the correct data.
|
|
|
f954f3 |
+ /* Find out how many of the following blocks form a "run" with the
|
|
|
f954f3 |
+ * same state. We can process that many blocks in one go.
|
|
|
f954f3 |
+ *
|
|
|
f954f3 |
+ * About the locking: The state might be modified from another
|
|
|
f954f3 |
+ * thread - for example another thread might write
|
|
|
f954f3 |
+ * (BLOCK_NOT_ALLOCATED -> BLOCK_ALLOCATED) while we are reading
|
|
|
f954f3 |
+ * from the plugin, returning the old data. However a read issued
|
|
|
f954f3 |
+ * after the write returns should always return the correct data.
|
|
|
f954f3 |
*/
|
|
|
f954f3 |
{
|
|
|
f954f3 |
ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&lock);
|
|
|
f954f3 |
state = bitmap_get_blk (&bm, blknum, BLOCK_NOT_ALLOCATED);
|
|
|
f954f3 |
+
|
|
|
f954f3 |
+ for (b = 1, runblocks = 1; b < nrblocks; ++b, ++runblocks) {
|
|
|
f954f3 |
+ enum bm_entry s = bitmap_get_blk (&bm, blknum + b, BLOCK_NOT_ALLOCATED);
|
|
|
f954f3 |
+ if (state != s)
|
|
|
f954f3 |
+ break;
|
|
|
f954f3 |
+ }
|
|
|
f954f3 |
}
|
|
|
f954f3 |
|
|
|
f954f3 |
if (cow_debug_verbose)
|
|
|
f954f3 |
- nbdkit_debug ("cow: blk_read block %" PRIu64 " (offset %" PRIu64 ") is %s",
|
|
|
f954f3 |
+ nbdkit_debug ("cow: blk_read_multiple block %" PRIu64
|
|
|
f954f3 |
+ " (offset %" PRIu64 ") is %s",
|
|
|
f954f3 |
blknum, (uint64_t) offset, state_to_string (state));
|
|
|
f954f3 |
|
|
|
f954f3 |
if (state == BLOCK_NOT_ALLOCATED) { /* Read underlying plugin. */
|
|
|
f954f3 |
- unsigned n = BLKSIZE, tail = 0;
|
|
|
f954f3 |
+ unsigned n, tail = 0;
|
|
|
f954f3 |
+
|
|
|
f954f3 |
+ assert (BLKSIZE * runblocks <= UINT_MAX);
|
|
|
f954f3 |
+ n = BLKSIZE * runblocks;
|
|
|
f954f3 |
|
|
|
f954f3 |
if (offset + n > size) {
|
|
|
f954f3 |
tail = offset + n - size;
|
|
|
f954f3 |
@@ -260,20 +276,35 @@ blk_read (nbdkit_next *next,
|
|
|
f954f3 |
* zeroing the tail.
|
|
|
f954f3 |
*/
|
|
|
f954f3 |
memset (block + n, 0, tail);
|
|
|
f954f3 |
- return 0;
|
|
|
f954f3 |
}
|
|
|
f954f3 |
else if (state == BLOCK_ALLOCATED) { /* Read overlay. */
|
|
|
f954f3 |
- if (pread (fd, block, BLKSIZE, offset) == -1) {
|
|
|
f954f3 |
+ if (pread (fd, block, BLKSIZE * runblocks, offset) == -1) {
|
|
|
f954f3 |
*err = errno;
|
|
|
f954f3 |
nbdkit_error ("pread: %m");
|
|
|
f954f3 |
return -1;
|
|
|
f954f3 |
}
|
|
|
f954f3 |
- return 0;
|
|
|
f954f3 |
}
|
|
|
f954f3 |
else /* state == BLOCK_TRIMMED */ {
|
|
|
f954f3 |
- memset (block, 0, BLKSIZE);
|
|
|
f954f3 |
- return 0;
|
|
|
f954f3 |
+ memset (block, 0, BLKSIZE * runblocks);
|
|
|
f954f3 |
}
|
|
|
f954f3 |
+
|
|
|
f954f3 |
+ /* If all done, return. */
|
|
|
f954f3 |
+ if (runblocks == nrblocks)
|
|
|
f954f3 |
+ return 0;
|
|
|
f954f3 |
+
|
|
|
f954f3 |
+ /* Recurse to read remaining blocks. */
|
|
|
f954f3 |
+ return blk_read_multiple (next,
|
|
|
f954f3 |
+ blknum + runblocks,
|
|
|
f954f3 |
+ nrblocks - runblocks,
|
|
|
f954f3 |
+ block + BLKSIZE * runblocks,
|
|
|
f954f3 |
+ err);
|
|
|
f954f3 |
+}
|
|
|
f954f3 |
+
|
|
|
f954f3 |
+int
|
|
|
f954f3 |
+blk_read (nbdkit_next *next,
|
|
|
f954f3 |
+ uint64_t blknum, uint8_t *block, int *err)
|
|
|
f954f3 |
+{
|
|
|
f954f3 |
+ return blk_read_multiple (next, blknum, 1, block, err);
|
|
|
f954f3 |
}
|
|
|
f954f3 |
|
|
|
f954f3 |
int
|
|
|
f954f3 |
diff --git a/filters/cow/blk.h b/filters/cow/blk.h
|
|
|
f954f3 |
index e6fd7417..b066c602 100644
|
|
|
f954f3 |
--- a/filters/cow/blk.h
|
|
|
f954f3 |
+++ b/filters/cow/blk.h
|
|
|
f954f3 |
@@ -55,6 +55,12 @@ extern int blk_read (nbdkit_next *next,
|
|
|
f954f3 |
uint64_t blknum, uint8_t *block, int *err)
|
|
|
f954f3 |
__attribute__((__nonnull__ (1, 3, 4)));
|
|
|
f954f3 |
|
|
|
f954f3 |
+/* Read multiple blocks from the overlay or plugin. */
|
|
|
f954f3 |
+extern int blk_read_multiple (nbdkit_next *next,
|
|
|
f954f3 |
+ uint64_t blknum, uint64_t nrblocks,
|
|
|
f954f3 |
+ uint8_t *block, int *err)
|
|
|
f954f3 |
+ __attribute__((__nonnull__ (1, 4, 5)));
|
|
|
f954f3 |
+
|
|
|
f954f3 |
/* Cache mode for blocks not already in overlay */
|
|
|
f954f3 |
enum cache_mode {
|
|
|
f954f3 |
BLK_CACHE_IGNORE, /* Do nothing */
|
|
|
f954f3 |
diff --git a/filters/cow/cow.c b/filters/cow/cow.c
|
|
|
f954f3 |
index f30b7505..78daca22 100644
|
|
|
f954f3 |
--- a/filters/cow/cow.c
|
|
|
f954f3 |
+++ b/filters/cow/cow.c
|
|
|
f954f3 |
@@ -210,7 +210,7 @@ cow_pread (nbdkit_next *next,
|
|
|
f954f3 |
uint32_t flags, int *err)
|
|
|
f954f3 |
{
|
|
|
f954f3 |
CLEANUP_FREE uint8_t *block = NULL;
|
|
|
f954f3 |
- uint64_t blknum, blkoffs;
|
|
|
f954f3 |
+ uint64_t blknum, blkoffs, nrblocks;
|
|
|
f954f3 |
int r;
|
|
|
f954f3 |
|
|
|
f954f3 |
if (!IS_ALIGNED (count | offset, BLKSIZE)) {
|
|
|
f954f3 |
@@ -243,21 +243,16 @@ cow_pread (nbdkit_next *next,
|
|
|
f954f3 |
}
|
|
|
f954f3 |
|
|
|
f954f3 |
/* Aligned body */
|
|
|
f954f3 |
- /* XXX This breaks up large read requests into smaller ones, which
|
|
|
f954f3 |
- * is a problem for plugins which have a large, fixed per-request
|
|
|
f954f3 |
- * overhead (hello, curl). We should try to keep large requests
|
|
|
f954f3 |
- * together as much as possible, but that requires us to be much
|
|
|
f954f3 |
- * smarter here.
|
|
|
f954f3 |
- */
|
|
|
f954f3 |
- while (count >= BLKSIZE) {
|
|
|
f954f3 |
- r = blk_read (next, blknum, buf, err);
|
|
|
f954f3 |
+ nrblocks = count / BLKSIZE;
|
|
|
f954f3 |
+ if (nrblocks > 0) {
|
|
|
f954f3 |
+ r = blk_read_multiple (next, blknum, nrblocks, buf, err);
|
|
|
f954f3 |
if (r == -1)
|
|
|
f954f3 |
return -1;
|
|
|
f954f3 |
|
|
|
f954f3 |
- buf += BLKSIZE;
|
|
|
f954f3 |
- count -= BLKSIZE;
|
|
|
f954f3 |
- offset += BLKSIZE;
|
|
|
f954f3 |
- blknum++;
|
|
|
f954f3 |
+ buf += nrblocks * BLKSIZE;
|
|
|
f954f3 |
+ count -= nrblocks * BLKSIZE;
|
|
|
f954f3 |
+ offset += nrblocks * BLKSIZE;
|
|
|
f954f3 |
+ blknum += nrblocks;
|
|
|
f954f3 |
}
|
|
|
f954f3 |
|
|
|
f954f3 |
/* Unaligned tail */
|
|
|
f954f3 |
--
|
|
|
f954f3 |
2.31.1
|
|
|
f954f3 |
|