Blob Blame Raw
From 045f704f819575af1bf69b59ffe73db5ba0cf19b Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Wed, 4 Dec 2013 16:43:44 +0100
Subject: [PATCH 20/37] block: Generalise and optimise COR serialisation

Message-id: <1392117622-28812-21-git-send-email-kwolf@redhat.com>
Patchwork-id: 57185
O-Subject: [RHEL-7.0 qemu-kvm PATCH v2 20/37] block: Generalise and optimise COR serialisation
Bugzilla: 748906
RH-Acked-by: Laszlo Ersek <lersek@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Max Reitz <mreitz@redhat.com>

Change the API so that specific requests can be marked serialising. Only
these requests are checked for overlaps then.

This means that during a Copy on Read operation, not all requests
overlapping other requests are serialised any more, but only those that
actually overlap with the specific COR request.

Also remove COR from function and variable names because this
functionality can be useful in other contexts.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Benoit Canet <benoit@irqsave.net>
(cherry picked from commit 2dbafdc012d3ea81a97fec6226ca82d644539c9a)

Conflicts:
	include/block/block_int.h

Conflicts because in RHEL 7...
- ...write notifiers don't exist (context only)
- ...BdrvTrackRequest is in block.c rather than block_int.h

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block.c                   | 49 +++++++++++++++++++++++++++++------------------
 include/block/block_int.h |  4 ++--
 2 files changed, 32 insertions(+), 21 deletions(-)
---
 block.c                   |   49 +++++++++++++++++++++++++++-----------------
 include/block/block_int.h |    4 +-
 2 files changed, 32 insertions(+), 21 deletions(-)

diff --git a/block.c b/block.c
index 6317321..7b30bb3 100644
--- a/block.c
+++ b/block.c
@@ -2039,6 +2039,7 @@ struct BdrvTrackedRequest {
     int64_t offset;
     unsigned int bytes;
     bool is_write;
+    bool serialising;
     QLIST_ENTRY(BdrvTrackedRequest) list;
     Coroutine *co; /* owner, used for deadlock detection */
     CoQueue wait_queue; /* coroutines blocked on this request */
@@ -2051,6 +2052,10 @@ struct BdrvTrackedRequest {
  */
 static void tracked_request_end(BdrvTrackedRequest *req)
 {
+    if (req->serialising) {
+        req->bs->serialising_in_flight--;
+    }
+
     QLIST_REMOVE(req, list);
     qemu_co_queue_restart_all(&req->wait_queue);
 }
@@ -2065,10 +2070,11 @@ static void tracked_request_begin(BdrvTrackedRequest *req,
 {
     *req = (BdrvTrackedRequest){
         .bs = bs,
-        .offset = offset,
-        .bytes = bytes,
-        .is_write = is_write,
-        .co = qemu_coroutine_self(),
+        .offset         = offset,
+        .bytes          = bytes,
+        .is_write       = is_write,
+        .co             = qemu_coroutine_self(),
+        .serialising    = false,
     };
 
     qemu_co_queue_init(&req->wait_queue);
@@ -2076,6 +2082,14 @@ static void tracked_request_begin(BdrvTrackedRequest *req,
     QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
 }
 
+static void mark_request_serialising(BdrvTrackedRequest *req)
+{
+    if (!req->serialising) {
+        req->bs->serialising_in_flight++;
+        req->serialising = true;
+    }
+}
+
 /**
  * Round a region to cluster boundaries
  */
@@ -2128,26 +2142,31 @@ static bool tracked_request_overlaps(BdrvTrackedRequest *req,
     return true;
 }
 
-static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs,
-        BdrvTrackedRequest *self, int64_t offset, unsigned int bytes)
+static void coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
 {
+    BlockDriverState *bs = self->bs;
     BdrvTrackedRequest *req;
     int64_t cluster_offset;
     unsigned int cluster_bytes;
     bool retry;
 
+    if (!bs->serialising_in_flight) {
+        return;
+    }
+
     /* If we touch the same cluster it counts as an overlap.  This guarantees
      * that allocating writes will be serialized and not race with each other
      * for the same cluster.  For example, in copy-on-read it ensures that the
      * CoR read and write operations are atomic and guest writes cannot
      * interleave between them.
      */
-    round_bytes_to_clusters(bs, offset, bytes, &cluster_offset, &cluster_bytes);
+    round_bytes_to_clusters(bs, self->offset, self->bytes,
+                            &cluster_offset, &cluster_bytes);
 
     do {
         retry = false;
         QLIST_FOREACH(req, &bs->tracked_requests, list) {
-            if (req == self) {
+            if (req == self || (!req->serialising && !self->serialising)) {
                 continue;
             }
             if (tracked_request_overlaps(req, cluster_offset, cluster_bytes)) {
@@ -2761,12 +2780,10 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
 
     /* Handle Copy on Read and associated serialisation */
     if (flags & BDRV_REQ_COPY_ON_READ) {
-        bs->copy_on_read_in_flight++;
+        mark_request_serialising(req);
     }
 
-    if (bs->copy_on_read_in_flight) {
-        wait_for_overlapping_requests(bs, req, offset, bytes);
-    }
+    wait_serialising_requests(req);
 
     if (flags & BDRV_REQ_COPY_ON_READ) {
         int pnum;
@@ -2815,10 +2832,6 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
     }
 
 out:
-    if (flags & BDRV_REQ_COPY_ON_READ) {
-        bs->copy_on_read_in_flight--;
-    }
-
     return ret;
 }
 
@@ -3017,9 +3030,7 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
     assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
     assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
 
-    if (bs->copy_on_read_in_flight) {
-        wait_for_overlapping_requests(bs, req, offset, bytes);
-    }
+    wait_serialising_requests(req);
 
     if (flags & BDRV_REQ_ZERO_WRITE) {
         ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index e66bd5f..2ec4bb2 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -280,8 +280,8 @@ struct BlockDriverState {
 
     NotifierList close_notifiers;
 
-    /* number of in-flight copy-on-read requests */
-    unsigned int copy_on_read_in_flight;
+    /* number of in-flight serialising requests */
+    unsigned int serialising_in_flight;
 
     /* the time for latest disk I/O */
     int64_t slice_start;
-- 
1.7.1