Blame SOURCES/kvm-block-Generalise-and-optimise-COR-serialisation.patch

0a122b
From 045f704f819575af1bf69b59ffe73db5ba0cf19b Mon Sep 17 00:00:00 2001
0a122b
From: Kevin Wolf <kwolf@redhat.com>
0a122b
Date: Wed, 4 Dec 2013 16:43:44 +0100
0a122b
Subject: [PATCH 20/37] block: Generalise and optimise COR serialisation
0a122b
0a122b
Message-id: <1392117622-28812-21-git-send-email-kwolf@redhat.com>
0a122b
Patchwork-id: 57185
0a122b
O-Subject: [RHEL-7.0 qemu-kvm PATCH v2 20/37] block: Generalise and optimise COR serialisation
0a122b
Bugzilla: 748906
0a122b
RH-Acked-by: Laszlo Ersek <lersek@redhat.com>
0a122b
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
0a122b
RH-Acked-by: Max Reitz <mreitz@redhat.com>
0a122b
0a122b
Change the API so that specific requests can be marked serialising. Only
0a122b
these requests are checked for overlaps then.
0a122b
0a122b
This means that during a Copy on Read operation, not all requests
0a122b
overlapping other requests are serialised any more, but only those that
0a122b
actually overlap with the specific COR request.
0a122b
0a122b
Also remove COR from function and variable names because this
0a122b
functionality can be useful in other contexts.
0a122b
0a122b
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
0a122b
Reviewed-by: Max Reitz <mreitz@redhat.com>
0a122b
Reviewed-by: Benoit Canet <benoit@irqsave.net>
0a122b
(cherry picked from commit 2dbafdc012d3ea81a97fec6226ca82d644539c9a)
0a122b
0a122b
Conflicts:
0a122b
	include/block/block_int.h
0a122b
0a122b
Conflicts because in RHEL 7...
0a122b
- ...write notifiers don't exist (context only)
0a122b
- ...BdrvTrackRequest is in block.c rather than block_int.h
0a122b
0a122b
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
0a122b
---
0a122b
 block.c                   | 49 +++++++++++++++++++++++++++++------------------
0a122b
 include/block/block_int.h |  4 ++--
0a122b
 2 files changed, 32 insertions(+), 21 deletions(-)
0a122b
---
0a122b
 block.c                   |   49 +++++++++++++++++++++++++++-----------------
0a122b
 include/block/block_int.h |    4 +-
0a122b
 2 files changed, 32 insertions(+), 21 deletions(-)
0a122b
0a122b
diff --git a/block.c b/block.c
0a122b
index 6317321..7b30bb3 100644
0a122b
--- a/block.c
0a122b
+++ b/block.c
0a122b
@@ -2039,6 +2039,7 @@ struct BdrvTrackedRequest {
0a122b
     int64_t offset;
0a122b
     unsigned int bytes;
0a122b
     bool is_write;
0a122b
+    bool serialising;
0a122b
     QLIST_ENTRY(BdrvTrackedRequest) list;
0a122b
     Coroutine *co; /* owner, used for deadlock detection */
0a122b
     CoQueue wait_queue; /* coroutines blocked on this request */
0a122b
@@ -2051,6 +2052,10 @@ struct BdrvTrackedRequest {
0a122b
  */
0a122b
 static void tracked_request_end(BdrvTrackedRequest *req)
0a122b
 {
0a122b
+    if (req->serialising) {
0a122b
+        req->bs->serialising_in_flight--;
0a122b
+    }
0a122b
+
0a122b
     QLIST_REMOVE(req, list);
0a122b
     qemu_co_queue_restart_all(&req->wait_queue);
0a122b
 }
0a122b
@@ -2065,10 +2070,11 @@ static void tracked_request_begin(BdrvTrackedRequest *req,
0a122b
 {
0a122b
     *req = (BdrvTrackedRequest){
0a122b
         .bs = bs,
0a122b
-        .offset = offset,
0a122b
-        .bytes = bytes,
0a122b
-        .is_write = is_write,
0a122b
-        .co = qemu_coroutine_self(),
0a122b
+        .offset         = offset,
0a122b
+        .bytes          = bytes,
0a122b
+        .is_write       = is_write,
0a122b
+        .co             = qemu_coroutine_self(),
0a122b
+        .serialising    = false,
0a122b
     };
0a122b
 
0a122b
     qemu_co_queue_init(&req->wait_queue);
0a122b
@@ -2076,6 +2082,14 @@ static void tracked_request_begin(BdrvTrackedRequest *req,
0a122b
     QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
0a122b
 }
0a122b
 
0a122b
+static void mark_request_serialising(BdrvTrackedRequest *req)
0a122b
+{
0a122b
+    if (!req->serialising) {
0a122b
+        req->bs->serialising_in_flight++;
0a122b
+        req->serialising = true;
0a122b
+    }
0a122b
+}
0a122b
+
0a122b
 /**
0a122b
  * Round a region to cluster boundaries
0a122b
  */
0a122b
@@ -2128,26 +2142,31 @@ static bool tracked_request_overlaps(BdrvTrackedRequest *req,
0a122b
     return true;
0a122b
 }
0a122b
 
0a122b
-static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs,
0a122b
-        BdrvTrackedRequest *self, int64_t offset, unsigned int bytes)
0a122b
+static void coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
0a122b
 {
0a122b
+    BlockDriverState *bs = self->bs;
0a122b
     BdrvTrackedRequest *req;
0a122b
     int64_t cluster_offset;
0a122b
     unsigned int cluster_bytes;
0a122b
     bool retry;
0a122b
 
0a122b
+    if (!bs->serialising_in_flight) {
0a122b
+        return;
0a122b
+    }
0a122b
+
0a122b
     /* If we touch the same cluster it counts as an overlap.  This guarantees
0a122b
      * that allocating writes will be serialized and not race with each other
0a122b
      * for the same cluster.  For example, in copy-on-read it ensures that the
0a122b
      * CoR read and write operations are atomic and guest writes cannot
0a122b
      * interleave between them.
0a122b
      */
0a122b
-    round_bytes_to_clusters(bs, offset, bytes, &cluster_offset, &cluster_bytes);
0a122b
+    round_bytes_to_clusters(bs, self->offset, self->bytes,
0a122b
+                            &cluster_offset, &cluster_bytes);
0a122b
 
0a122b
     do {
0a122b
         retry = false;
0a122b
         QLIST_FOREACH(req, &bs->tracked_requests, list) {
0a122b
-            if (req == self) {
0a122b
+            if (req == self || (!req->serialising && !self->serialising)) {
0a122b
                 continue;
0a122b
             }
0a122b
             if (tracked_request_overlaps(req, cluster_offset, cluster_bytes)) {
0a122b
@@ -2761,12 +2780,10 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
0a122b
 
0a122b
     /* Handle Copy on Read and associated serialisation */
0a122b
     if (flags & BDRV_REQ_COPY_ON_READ) {
0a122b
-        bs->copy_on_read_in_flight++;
0a122b
+        mark_request_serialising(req);
0a122b
     }
0a122b
 
0a122b
-    if (bs->copy_on_read_in_flight) {
0a122b
-        wait_for_overlapping_requests(bs, req, offset, bytes);
0a122b
-    }
0a122b
+    wait_serialising_requests(req);
0a122b
 
0a122b
     if (flags & BDRV_REQ_COPY_ON_READ) {
0a122b
         int pnum;
0a122b
@@ -2815,10 +2832,6 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
0a122b
     }
0a122b
 
0a122b
 out:
0a122b
-    if (flags & BDRV_REQ_COPY_ON_READ) {
0a122b
-        bs->copy_on_read_in_flight--;
0a122b
-    }
0a122b
-
0a122b
     return ret;
0a122b
 }
0a122b
 
0a122b
@@ -3017,9 +3030,7 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
0a122b
     assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
0a122b
     assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
0a122b
 
0a122b
-    if (bs->copy_on_read_in_flight) {
0a122b
-        wait_for_overlapping_requests(bs, req, offset, bytes);
0a122b
-    }
0a122b
+    wait_serialising_requests(req);
0a122b
 
0a122b
     if (flags & BDRV_REQ_ZERO_WRITE) {
0a122b
         ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags);
0a122b
diff --git a/include/block/block_int.h b/include/block/block_int.h
0a122b
index e66bd5f..2ec4bb2 100644
0a122b
--- a/include/block/block_int.h
0a122b
+++ b/include/block/block_int.h
0a122b
@@ -280,8 +280,8 @@ struct BlockDriverState {
0a122b
 
0a122b
     NotifierList close_notifiers;
0a122b
 
0a122b
-    /* number of in-flight copy-on-read requests */
0a122b
-    unsigned int copy_on_read_in_flight;
0a122b
+    /* number of in-flight serialising requests */
0a122b
+    unsigned int serialising_in_flight;
0a122b
 
0a122b
     /* the time for latest disk I/O */
0a122b
     int64_t slice_start;
0a122b
-- 
0a122b
1.7.1
0a122b