9ae3a8
From 045f704f819575af1bf69b59ffe73db5ba0cf19b Mon Sep 17 00:00:00 2001
9ae3a8
From: Kevin Wolf <kwolf@redhat.com>
9ae3a8
Date: Wed, 4 Dec 2013 16:43:44 +0100
9ae3a8
Subject: [PATCH 20/37] block: Generalise and optimise COR serialisation
9ae3a8
9ae3a8
Message-id: <1392117622-28812-21-git-send-email-kwolf@redhat.com>
9ae3a8
Patchwork-id: 57185
9ae3a8
O-Subject: [RHEL-7.0 qemu-kvm PATCH v2 20/37] block: Generalise and optimise COR serialisation
9ae3a8
Bugzilla: 748906
9ae3a8
RH-Acked-by: Laszlo Ersek <lersek@redhat.com>
9ae3a8
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
9ae3a8
RH-Acked-by: Max Reitz <mreitz@redhat.com>
9ae3a8
9ae3a8
Change the API so that specific requests can be marked serialising. Only
9ae3a8
these requests are checked for overlaps then.
9ae3a8
9ae3a8
This means that during a Copy on Read operation, not all requests
9ae3a8
overlapping other requests are serialised any more, but only those that
9ae3a8
actually overlap with the specific COR request.
9ae3a8
9ae3a8
Also remove COR from function and variable names because this
9ae3a8
functionality can be useful in other contexts.
9ae3a8
9ae3a8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9ae3a8
Reviewed-by: Max Reitz <mreitz@redhat.com>
9ae3a8
Reviewed-by: Benoit Canet <benoit@irqsave.net>
9ae3a8
(cherry picked from commit 2dbafdc012d3ea81a97fec6226ca82d644539c9a)
9ae3a8
9ae3a8
Conflicts:
9ae3a8
	include/block/block_int.h
9ae3a8
9ae3a8
Conflicts because in RHEL 7...
9ae3a8
- ...write notifiers don't exist (context only)
9ae3a8
- ...BdrvTrackRequest is in block.c rather than block_int.h
9ae3a8
9ae3a8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9ae3a8
---
9ae3a8
 block.c                   | 49 +++++++++++++++++++++++++++++------------------
9ae3a8
 include/block/block_int.h |  4 ++--
9ae3a8
 2 files changed, 32 insertions(+), 21 deletions(-)
9ae3a8
---
9ae3a8
 block.c                   |   49 +++++++++++++++++++++++++++-----------------
9ae3a8
 include/block/block_int.h |    4 +-
9ae3a8
 2 files changed, 32 insertions(+), 21 deletions(-)
9ae3a8
9ae3a8
diff --git a/block.c b/block.c
9ae3a8
index 6317321..7b30bb3 100644
9ae3a8
--- a/block.c
9ae3a8
+++ b/block.c
9ae3a8
@@ -2039,6 +2039,7 @@ struct BdrvTrackedRequest {
9ae3a8
     int64_t offset;
9ae3a8
     unsigned int bytes;
9ae3a8
     bool is_write;
9ae3a8
+    bool serialising;
9ae3a8
     QLIST_ENTRY(BdrvTrackedRequest) list;
9ae3a8
     Coroutine *co; /* owner, used for deadlock detection */
9ae3a8
     CoQueue wait_queue; /* coroutines blocked on this request */
9ae3a8
@@ -2051,6 +2052,10 @@ struct BdrvTrackedRequest {
9ae3a8
  */
9ae3a8
 static void tracked_request_end(BdrvTrackedRequest *req)
9ae3a8
 {
9ae3a8
+    if (req->serialising) {
9ae3a8
+        req->bs->serialising_in_flight--;
9ae3a8
+    }
9ae3a8
+
9ae3a8
     QLIST_REMOVE(req, list);
9ae3a8
     qemu_co_queue_restart_all(&req->wait_queue);
9ae3a8
 }
9ae3a8
@@ -2065,10 +2070,11 @@ static void tracked_request_begin(BdrvTrackedRequest *req,
9ae3a8
 {
9ae3a8
     *req = (BdrvTrackedRequest){
9ae3a8
         .bs = bs,
9ae3a8
-        .offset = offset,
9ae3a8
-        .bytes = bytes,
9ae3a8
-        .is_write = is_write,
9ae3a8
-        .co = qemu_coroutine_self(),
9ae3a8
+        .offset         = offset,
9ae3a8
+        .bytes          = bytes,
9ae3a8
+        .is_write       = is_write,
9ae3a8
+        .co             = qemu_coroutine_self(),
9ae3a8
+        .serialising    = false,
9ae3a8
     };
9ae3a8
 
9ae3a8
     qemu_co_queue_init(&req->wait_queue);
9ae3a8
@@ -2076,6 +2082,14 @@ static void tracked_request_begin(BdrvTrackedRequest *req,
9ae3a8
     QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
9ae3a8
 }
9ae3a8
 
9ae3a8
+static void mark_request_serialising(BdrvTrackedRequest *req)
9ae3a8
+{
9ae3a8
+    if (!req->serialising) {
9ae3a8
+        req->bs->serialising_in_flight++;
9ae3a8
+        req->serialising = true;
9ae3a8
+    }
9ae3a8
+}
9ae3a8
+
9ae3a8
 /**
9ae3a8
  * Round a region to cluster boundaries
9ae3a8
  */
9ae3a8
@@ -2128,26 +2142,31 @@ static bool tracked_request_overlaps(BdrvTrackedRequest *req,
9ae3a8
     return true;
9ae3a8
 }
9ae3a8
 
9ae3a8
-static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs,
9ae3a8
-        BdrvTrackedRequest *self, int64_t offset, unsigned int bytes)
9ae3a8
+static void coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
9ae3a8
 {
9ae3a8
+    BlockDriverState *bs = self->bs;
9ae3a8
     BdrvTrackedRequest *req;
9ae3a8
     int64_t cluster_offset;
9ae3a8
     unsigned int cluster_bytes;
9ae3a8
     bool retry;
9ae3a8
 
9ae3a8
+    if (!bs->serialising_in_flight) {
9ae3a8
+        return;
9ae3a8
+    }
9ae3a8
+
9ae3a8
     /* If we touch the same cluster it counts as an overlap.  This guarantees
9ae3a8
      * that allocating writes will be serialized and not race with each other
9ae3a8
      * for the same cluster.  For example, in copy-on-read it ensures that the
9ae3a8
      * CoR read and write operations are atomic and guest writes cannot
9ae3a8
      * interleave between them.
9ae3a8
      */
9ae3a8
-    round_bytes_to_clusters(bs, offset, bytes, &cluster_offset, &cluster_bytes);
9ae3a8
+    round_bytes_to_clusters(bs, self->offset, self->bytes,
9ae3a8
+                            &cluster_offset, &cluster_bytes);
9ae3a8
 
9ae3a8
     do {
9ae3a8
         retry = false;
9ae3a8
         QLIST_FOREACH(req, &bs->tracked_requests, list) {
9ae3a8
-            if (req == self) {
9ae3a8
+            if (req == self || (!req->serialising && !self->serialising)) {
9ae3a8
                 continue;
9ae3a8
             }
9ae3a8
             if (tracked_request_overlaps(req, cluster_offset, cluster_bytes)) {
9ae3a8
@@ -2761,12 +2780,10 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
9ae3a8
 
9ae3a8
     /* Handle Copy on Read and associated serialisation */
9ae3a8
     if (flags & BDRV_REQ_COPY_ON_READ) {
9ae3a8
-        bs->copy_on_read_in_flight++;
9ae3a8
+        mark_request_serialising(req);
9ae3a8
     }
9ae3a8
 
9ae3a8
-    if (bs->copy_on_read_in_flight) {
9ae3a8
-        wait_for_overlapping_requests(bs, req, offset, bytes);
9ae3a8
-    }
9ae3a8
+    wait_serialising_requests(req);
9ae3a8
 
9ae3a8
     if (flags & BDRV_REQ_COPY_ON_READ) {
9ae3a8
         int pnum;
9ae3a8
@@ -2815,10 +2832,6 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
9ae3a8
     }
9ae3a8
 
9ae3a8
 out:
9ae3a8
-    if (flags & BDRV_REQ_COPY_ON_READ) {
9ae3a8
-        bs->copy_on_read_in_flight--;
9ae3a8
-    }
9ae3a8
-
9ae3a8
     return ret;
9ae3a8
 }
9ae3a8
 
9ae3a8
@@ -3017,9 +3030,7 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
9ae3a8
     assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
9ae3a8
     assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
9ae3a8
 
9ae3a8
-    if (bs->copy_on_read_in_flight) {
9ae3a8
-        wait_for_overlapping_requests(bs, req, offset, bytes);
9ae3a8
-    }
9ae3a8
+    wait_serialising_requests(req);
9ae3a8
 
9ae3a8
     if (flags & BDRV_REQ_ZERO_WRITE) {
9ae3a8
         ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags);
9ae3a8
diff --git a/include/block/block_int.h b/include/block/block_int.h
9ae3a8
index e66bd5f..2ec4bb2 100644
9ae3a8
--- a/include/block/block_int.h
9ae3a8
+++ b/include/block/block_int.h
9ae3a8
@@ -280,8 +280,8 @@ struct BlockDriverState {
9ae3a8
 
9ae3a8
     NotifierList close_notifiers;
9ae3a8
 
9ae3a8
-    /* number of in-flight copy-on-read requests */
9ae3a8
-    unsigned int copy_on_read_in_flight;
9ae3a8
+    /* number of in-flight serialising requests */
9ae3a8
+    unsigned int serialising_in_flight;
9ae3a8
 
9ae3a8
     /* the time for latest disk I/O */
9ae3a8
     int64_t slice_start;
9ae3a8
-- 
9ae3a8
1.7.1
9ae3a8