|
|
9ae3a8 |
From 045f704f819575af1bf69b59ffe73db5ba0cf19b Mon Sep 17 00:00:00 2001
|
|
|
9ae3a8 |
From: Kevin Wolf <kwolf@redhat.com>
|
|
|
9ae3a8 |
Date: Wed, 4 Dec 2013 16:43:44 +0100
|
|
|
9ae3a8 |
Subject: [PATCH 20/37] block: Generalise and optimise COR serialisation
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
Message-id: <1392117622-28812-21-git-send-email-kwolf@redhat.com>
|
|
|
9ae3a8 |
Patchwork-id: 57185
|
|
|
9ae3a8 |
O-Subject: [RHEL-7.0 qemu-kvm PATCH v2 20/37] block: Generalise and optimise COR serialisation
|
|
|
9ae3a8 |
Bugzilla: 748906
|
|
|
9ae3a8 |
RH-Acked-by: Laszlo Ersek <lersek@redhat.com>
|
|
|
9ae3a8 |
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
|
|
9ae3a8 |
RH-Acked-by: Max Reitz <mreitz@redhat.com>
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
Change the API so that specific requests can be marked serialising. Only
|
|
|
9ae3a8 |
these requests are checked for overlaps then.
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
This means that during a Copy on Read operation, not all requests
|
|
|
9ae3a8 |
overlapping other requests are serialised any more, but only those that
|
|
|
9ae3a8 |
actually overlap with the specific COR request.
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
Also remove COR from function and variable names because this
|
|
|
9ae3a8 |
functionality can be useful in other contexts.
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
|
|
9ae3a8 |
Reviewed-by: Max Reitz <mreitz@redhat.com>
|
|
|
9ae3a8 |
Reviewed-by: Benoit Canet <benoit@irqsave.net>
|
|
|
9ae3a8 |
(cherry picked from commit 2dbafdc012d3ea81a97fec6226ca82d644539c9a)
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
Conflicts:
|
|
|
9ae3a8 |
include/block/block_int.h
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
Conflicts because in RHEL 7...
|
|
|
9ae3a8 |
- ...write notifiers don't exist (context only)
|
|
|
9ae3a8 |
- ...BdrvTrackRequest is in block.c rather than block_int.h
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
|
|
9ae3a8 |
---
|
|
|
9ae3a8 |
block.c | 49 +++++++++++++++++++++++++++++------------------
|
|
|
9ae3a8 |
include/block/block_int.h | 4 ++--
|
|
|
9ae3a8 |
2 files changed, 32 insertions(+), 21 deletions(-)
|
|
|
9ae3a8 |
---
|
|
|
9ae3a8 |
block.c | 49 +++++++++++++++++++++++++++-----------------
|
|
|
9ae3a8 |
include/block/block_int.h | 4 +-
|
|
|
9ae3a8 |
2 files changed, 32 insertions(+), 21 deletions(-)
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
diff --git a/block.c b/block.c
|
|
|
9ae3a8 |
index 6317321..7b30bb3 100644
|
|
|
9ae3a8 |
--- a/block.c
|
|
|
9ae3a8 |
+++ b/block.c
|
|
|
9ae3a8 |
@@ -2039,6 +2039,7 @@ struct BdrvTrackedRequest {
|
|
|
9ae3a8 |
int64_t offset;
|
|
|
9ae3a8 |
unsigned int bytes;
|
|
|
9ae3a8 |
bool is_write;
|
|
|
9ae3a8 |
+ bool serialising;
|
|
|
9ae3a8 |
QLIST_ENTRY(BdrvTrackedRequest) list;
|
|
|
9ae3a8 |
Coroutine *co; /* owner, used for deadlock detection */
|
|
|
9ae3a8 |
CoQueue wait_queue; /* coroutines blocked on this request */
|
|
|
9ae3a8 |
@@ -2051,6 +2052,10 @@ struct BdrvTrackedRequest {
|
|
|
9ae3a8 |
*/
|
|
|
9ae3a8 |
static void tracked_request_end(BdrvTrackedRequest *req)
|
|
|
9ae3a8 |
{
|
|
|
9ae3a8 |
+ if (req->serialising) {
|
|
|
9ae3a8 |
+ req->bs->serialising_in_flight--;
|
|
|
9ae3a8 |
+ }
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
QLIST_REMOVE(req, list);
|
|
|
9ae3a8 |
qemu_co_queue_restart_all(&req->wait_queue);
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
@@ -2065,10 +2070,11 @@ static void tracked_request_begin(BdrvTrackedRequest *req,
|
|
|
9ae3a8 |
{
|
|
|
9ae3a8 |
*req = (BdrvTrackedRequest){
|
|
|
9ae3a8 |
.bs = bs,
|
|
|
9ae3a8 |
- .offset = offset,
|
|
|
9ae3a8 |
- .bytes = bytes,
|
|
|
9ae3a8 |
- .is_write = is_write,
|
|
|
9ae3a8 |
- .co = qemu_coroutine_self(),
|
|
|
9ae3a8 |
+ .offset = offset,
|
|
|
9ae3a8 |
+ .bytes = bytes,
|
|
|
9ae3a8 |
+ .is_write = is_write,
|
|
|
9ae3a8 |
+ .co = qemu_coroutine_self(),
|
|
|
9ae3a8 |
+ .serialising = false,
|
|
|
9ae3a8 |
};
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
qemu_co_queue_init(&req->wait_queue);
|
|
|
9ae3a8 |
@@ -2076,6 +2082,14 @@ static void tracked_request_begin(BdrvTrackedRequest *req,
|
|
|
9ae3a8 |
QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
+static void mark_request_serialising(BdrvTrackedRequest *req)
|
|
|
9ae3a8 |
+{
|
|
|
9ae3a8 |
+ if (!req->serialising) {
|
|
|
9ae3a8 |
+ req->bs->serialising_in_flight++;
|
|
|
9ae3a8 |
+ req->serialising = true;
|
|
|
9ae3a8 |
+ }
|
|
|
9ae3a8 |
+}
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
/**
|
|
|
9ae3a8 |
* Round a region to cluster boundaries
|
|
|
9ae3a8 |
*/
|
|
|
9ae3a8 |
@@ -2128,26 +2142,31 @@ static bool tracked_request_overlaps(BdrvTrackedRequest *req,
|
|
|
9ae3a8 |
return true;
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
-static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs,
|
|
|
9ae3a8 |
- BdrvTrackedRequest *self, int64_t offset, unsigned int bytes)
|
|
|
9ae3a8 |
+static void coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
|
|
|
9ae3a8 |
{
|
|
|
9ae3a8 |
+ BlockDriverState *bs = self->bs;
|
|
|
9ae3a8 |
BdrvTrackedRequest *req;
|
|
|
9ae3a8 |
int64_t cluster_offset;
|
|
|
9ae3a8 |
unsigned int cluster_bytes;
|
|
|
9ae3a8 |
bool retry;
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
+ if (!bs->serialising_in_flight) {
|
|
|
9ae3a8 |
+ return;
|
|
|
9ae3a8 |
+ }
|
|
|
9ae3a8 |
+
|
|
|
9ae3a8 |
/* If we touch the same cluster it counts as an overlap. This guarantees
|
|
|
9ae3a8 |
* that allocating writes will be serialized and not race with each other
|
|
|
9ae3a8 |
* for the same cluster. For example, in copy-on-read it ensures that the
|
|
|
9ae3a8 |
* CoR read and write operations are atomic and guest writes cannot
|
|
|
9ae3a8 |
* interleave between them.
|
|
|
9ae3a8 |
*/
|
|
|
9ae3a8 |
- round_bytes_to_clusters(bs, offset, bytes, &cluster_offset, &cluster_bytes);
|
|
|
9ae3a8 |
+ round_bytes_to_clusters(bs, self->offset, self->bytes,
|
|
|
9ae3a8 |
+ &cluster_offset, &cluster_bytes);
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
do {
|
|
|
9ae3a8 |
retry = false;
|
|
|
9ae3a8 |
QLIST_FOREACH(req, &bs->tracked_requests, list) {
|
|
|
9ae3a8 |
- if (req == self) {
|
|
|
9ae3a8 |
+ if (req == self || (!req->serialising && !self->serialising)) {
|
|
|
9ae3a8 |
continue;
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
if (tracked_request_overlaps(req, cluster_offset, cluster_bytes)) {
|
|
|
9ae3a8 |
@@ -2761,12 +2780,10 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
/* Handle Copy on Read and associated serialisation */
|
|
|
9ae3a8 |
if (flags & BDRV_REQ_COPY_ON_READ) {
|
|
|
9ae3a8 |
- bs->copy_on_read_in_flight++;
|
|
|
9ae3a8 |
+ mark_request_serialising(req);
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
- if (bs->copy_on_read_in_flight) {
|
|
|
9ae3a8 |
- wait_for_overlapping_requests(bs, req, offset, bytes);
|
|
|
9ae3a8 |
- }
|
|
|
9ae3a8 |
+ wait_serialising_requests(req);
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
if (flags & BDRV_REQ_COPY_ON_READ) {
|
|
|
9ae3a8 |
int pnum;
|
|
|
9ae3a8 |
@@ -2815,10 +2832,6 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
out:
|
|
|
9ae3a8 |
- if (flags & BDRV_REQ_COPY_ON_READ) {
|
|
|
9ae3a8 |
- bs->copy_on_read_in_flight--;
|
|
|
9ae3a8 |
- }
|
|
|
9ae3a8 |
-
|
|
|
9ae3a8 |
return ret;
|
|
|
9ae3a8 |
}
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
@@ -3017,9 +3030,7 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
|
|
|
9ae3a8 |
assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
|
|
|
9ae3a8 |
assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
- if (bs->copy_on_read_in_flight) {
|
|
|
9ae3a8 |
- wait_for_overlapping_requests(bs, req, offset, bytes);
|
|
|
9ae3a8 |
- }
|
|
|
9ae3a8 |
+ wait_serialising_requests(req);
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
if (flags & BDRV_REQ_ZERO_WRITE) {
|
|
|
9ae3a8 |
ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags);
|
|
|
9ae3a8 |
diff --git a/include/block/block_int.h b/include/block/block_int.h
|
|
|
9ae3a8 |
index e66bd5f..2ec4bb2 100644
|
|
|
9ae3a8 |
--- a/include/block/block_int.h
|
|
|
9ae3a8 |
+++ b/include/block/block_int.h
|
|
|
9ae3a8 |
@@ -280,8 +280,8 @@ struct BlockDriverState {
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
NotifierList close_notifiers;
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
- /* number of in-flight copy-on-read requests */
|
|
|
9ae3a8 |
- unsigned int copy_on_read_in_flight;
|
|
|
9ae3a8 |
+ /* number of in-flight serialising requests */
|
|
|
9ae3a8 |
+ unsigned int serialising_in_flight;
|
|
|
9ae3a8 |
|
|
|
9ae3a8 |
/* the time for latest disk I/O */
|
|
|
9ae3a8 |
int64_t slice_start;
|
|
|
9ae3a8 |
--
|
|
|
9ae3a8 |
1.7.1
|
|
|
9ae3a8 |
|