97168e
From 192f956f2b0761f270070555f8feb1f0544e5558 Mon Sep 17 00:00:00 2001
97168e
From: Hanna Reitz <hreitz@redhat.com>
97168e
Date: Wed, 9 Nov 2022 17:54:48 +0100
97168e
Subject: [PATCH 01/11] block/mirror: Do not wait for active writes
97168e
97168e
RH-Author: Hanna Czenczek <hreitz@redhat.com>
97168e
RH-MergeRequest: 246: block/mirror: Make active mirror progress even under full load
97168e
RH-Bugzilla: 2125119
97168e
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
97168e
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
97168e
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
97168e
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
97168e
RH-Commit: [1/3] 652d1e55b954f13eaec2c86f58735d4942837e16
97168e
97168e
Waiting for all active writes to settle before daring to create a
97168e
background copying operation means that we will never do background
97168e
operations while the guest does anything (in write-blocking mode), and
97168e
therefore cannot converge.  Yes, we also will not diverge, but actually
97168e
converging would be even nicer.
97168e
97168e
It is unclear why we did decide to wait for all active writes to settle
97168e
before creating a background operation, but it just does not seem
97168e
necessary.  Active writes will put themselves into the in_flight bitmap
97168e
and thus properly block actually conflicting background requests.
97168e
97168e
It is important for active requests to wait on overlapping background
97168e
requests, which we do in active_write_prepare().  However, so far it was
97168e
not documented why it is important.  Add such documentation now, and
97168e
also to the other call of mirror_wait_on_conflicts(), so that it becomes
97168e
more clear why and when requests need to actively wait for other
97168e
requests to settle.
97168e
97168e
Another thing to note is that of course we need to ensure that there are
97168e
no active requests when the job completes, but that is done by virtue of
97168e
the BDS being drained anyway, so there cannot be any active requests at
97168e
that point.
97168e
97168e
With this change, we will need to explicitly keep track of how many
97168e
bytes are in flight in active requests so that
97168e
job_progress_set_remaining() in mirror_run() can set the correct number
97168e
of remaining bytes.
97168e
97168e
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2123297
97168e
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
97168e
Message-Id: <20221109165452.67927-2-hreitz@redhat.com>
97168e
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
97168e
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
97168e
(cherry picked from commit d69a879bdf1aed586478eaa161ee064fe1b92f1a)
97168e
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
97168e
---
97168e
 block/mirror.c | 37 ++++++++++++++++++++++++++++++-------
97168e
 1 file changed, 30 insertions(+), 7 deletions(-)
97168e
97168e
diff --git a/block/mirror.c b/block/mirror.c
97168e
index efec2c7674..282f428cb7 100644
97168e
--- a/block/mirror.c
97168e
+++ b/block/mirror.c
97168e
@@ -81,6 +81,7 @@ typedef struct MirrorBlockJob {
97168e
     int max_iov;
97168e
     bool initial_zeroing_ongoing;
97168e
     int in_active_write_counter;
97168e
+    int64_t active_write_bytes_in_flight;
97168e
     bool prepared;
97168e
     bool in_drain;
97168e
 } MirrorBlockJob;
97168e
@@ -493,6 +494,13 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
97168e
     }
97168e
     bdrv_dirty_bitmap_unlock(s->dirty_bitmap);
97168e
 
97168e
+    /*
97168e
+     * Wait for concurrent requests to @offset.  The next loop will limit the
97168e
+     * copied area based on in_flight_bitmap so we only copy an area that does
97168e
+     * not overlap with concurrent in-flight requests.  Still, we would like to
97168e
+     * copy something, so wait until there are at least no more requests to the
97168e
+     * very beginning of the area.
97168e
+     */
97168e
     mirror_wait_on_conflicts(NULL, s, offset, 1);
97168e
 
97168e
     job_pause_point(&s->common.job);
97168e
@@ -993,12 +1001,6 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
97168e
         int64_t cnt, delta;
97168e
         bool should_complete;
97168e
 
97168e
-        /* Do not start passive operations while there are active
97168e
-         * writes in progress */
97168e
-        while (s->in_active_write_counter) {
97168e
-            mirror_wait_for_any_operation(s, true);
97168e
-        }
97168e
-
97168e
         if (s->ret < 0) {
97168e
             ret = s->ret;
97168e
             goto immediate_exit;
97168e
@@ -1015,7 +1017,9 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
97168e
         /* cnt is the number of dirty bytes remaining and s->bytes_in_flight is
97168e
          * the number of bytes currently being processed; together those are
97168e
          * the current remaining operation length */
97168e
-        job_progress_set_remaining(&s->common.job, s->bytes_in_flight + cnt);
97168e
+        job_progress_set_remaining(&s->common.job,
97168e
+                                   s->bytes_in_flight + cnt +
97168e
+                                   s->active_write_bytes_in_flight);
97168e
 
97168e
         /* Note that even when no rate limit is applied we need to yield
97168e
          * periodically with no pending I/O so that bdrv_drain_all() returns.
97168e
@@ -1073,6 +1077,10 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
97168e
 
97168e
             s->in_drain = true;
97168e
             bdrv_drained_begin(bs);
97168e
+
97168e
+            /* Must be zero because we are drained */
97168e
+            assert(s->in_active_write_counter == 0);
97168e
+
97168e
             cnt = bdrv_get_dirty_count(s->dirty_bitmap);
97168e
             if (cnt > 0 || mirror_flush(s) < 0) {
97168e
                 bdrv_drained_end(bs);
97168e
@@ -1306,6 +1314,7 @@ do_sync_target_write(MirrorBlockJob *job, MirrorMethod method,
97168e
     }
97168e
 
97168e
     job_progress_increase_remaining(&job->common.job, bytes);
97168e
+    job->active_write_bytes_in_flight += bytes;
97168e
 
97168e
     switch (method) {
97168e
     case MIRROR_METHOD_COPY:
97168e
@@ -1327,6 +1336,7 @@ do_sync_target_write(MirrorBlockJob *job, MirrorMethod method,
97168e
         abort();
97168e
     }
97168e
 
97168e
+    job->active_write_bytes_in_flight -= bytes;
97168e
     if (ret >= 0) {
97168e
         job_progress_update(&job->common.job, bytes);
97168e
     } else {
97168e
@@ -1375,6 +1385,19 @@ static MirrorOp *coroutine_fn active_write_prepare(MirrorBlockJob *s,
97168e
 
97168e
     s->in_active_write_counter++;
97168e
 
97168e
+    /*
97168e
+     * Wait for concurrent requests affecting the area.  If there are already
97168e
+     * running requests that are copying off now-to-be stale data in the area,
97168e
+     * we must wait for them to finish before we begin writing fresh data to the
97168e
+     * target so that the write operations appear in the correct order.
97168e
+     * Note that background requests (see mirror_iteration()) in contrast only
97168e
+     * wait for conflicting requests at the start of the dirty area, and then
97168e
+     * (based on the in_flight_bitmap) truncate the area to copy so it will not
97168e
+     * conflict with any requests beyond that.  For active writes, however, we
97168e
+     * cannot truncate that area.  The request from our parent must be blocked
97168e
+     * until the area is copied in full.  Therefore, we must wait for the whole
97168e
+     * area to become free of concurrent requests.
97168e
+     */
97168e
     mirror_wait_on_conflicts(op, s, offset, bytes);
97168e
 
97168e
     bitmap_set(s->in_flight_bitmap, start_chunk, end_chunk - start_chunk);
97168e
-- 
97168e
2.37.3
97168e