|
|
d1681e |
From fb62cbef3fcaa3e2a23a98182edaae332803b3bb Mon Sep 17 00:00:00 2001
|
|
|
d1681e |
From: Xavi Hernandez <xhernandez@redhat.com>
|
|
|
d1681e |
Date: Tue, 15 May 2018 11:37:16 +0200
|
|
|
d1681e |
Subject: [PATCH 283/305] cluster/ec: Fix pre-op xattrop management
|
|
|
d1681e |
|
|
|
d1681e |
Multiple pre-op xattrop can be simultaneously being processed. On the cbk
|
|
|
d1681e |
it was checked if the fop was waiting for some specific data (like size and
|
|
|
d1681e |
version) and, if so, it was assumed that this answer should contain that
|
|
|
d1681e |
data.
|
|
|
d1681e |
|
|
|
d1681e |
This is not true, since a fop can be waiting for some data, but it may come
|
|
|
d1681e |
from the xattrop of another fop.
|
|
|
d1681e |
|
|
|
d1681e |
This patch differentiates between needing some information and providing it.
|
|
|
d1681e |
|
|
|
d1681e |
This is related to parallel writes. Disabling them fixed the problem, but
|
|
|
d1681e |
also prevented concurrent reads. A change has been made so that disabling
|
|
|
d1681e |
parallel writes still allows parallel reads.
|
|
|
d1681e |
|
|
|
d1681e |
Upstream patch: https://review.gluster.org/20024
|
|
|
d1681e |
|
|
|
d1681e |
BUG: 1567001
|
|
|
d1681e |
Change-Id: I74772ad6b80b7b37805da93d5ec3ae099e96b041
|
|
|
d1681e |
Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
|
|
|
d1681e |
Reviewed-on: https://code.engineering.redhat.com/gerrit/139707
|
|
|
d1681e |
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
|
|
d1681e |
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
|
|
|
d1681e |
---
|
|
|
d1681e |
xlators/cluster/ec/src/ec-common.c | 70 ++++++++++++++++++++++----------------
|
|
|
d1681e |
xlators/cluster/ec/src/ec-common.h | 28 +++++++++++++--
|
|
|
d1681e |
xlators/cluster/ec/src/ec.c | 1 +
|
|
|
d1681e |
3 files changed, 66 insertions(+), 33 deletions(-)
|
|
|
d1681e |
|
|
|
d1681e |
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
|
|
|
d1681e |
index bd2ae50..b74bce0 100644
|
|
|
d1681e |
--- a/xlators/cluster/ec/src/ec-common.c
|
|
|
d1681e |
+++ b/xlators/cluster/ec/src/ec-common.c
|
|
|
d1681e |
@@ -21,10 +21,6 @@
|
|
|
d1681e |
#include "ec.h"
|
|
|
d1681e |
#include "ec-messages.h"
|
|
|
d1681e |
|
|
|
d1681e |
-#define EC_XATTROP_ALL_WAITING_FLAGS (EC_FLAG_WAITING_XATTROP |\
|
|
|
d1681e |
- EC_FLAG_WAITING_DATA_DIRTY |\
|
|
|
d1681e |
- EC_FLAG_WAITING_METADATA_DIRTY)
|
|
|
d1681e |
-
|
|
|
d1681e |
void
|
|
|
d1681e |
ec_update_fd_status (fd_t *fd, xlator_t *xl, int idx,
|
|
|
d1681e |
int32_t ret_status)
|
|
|
d1681e |
@@ -160,10 +156,16 @@ ec_is_range_conflict (ec_lock_link_t *l1, ec_lock_link_t *l2)
|
|
|
d1681e |
static gf_boolean_t
|
|
|
d1681e |
ec_lock_conflict (ec_lock_link_t *l1, ec_lock_link_t *l2)
|
|
|
d1681e |
{
|
|
|
d1681e |
+ ec_t *ec = l1->fop->xl->private;
|
|
|
d1681e |
+
|
|
|
d1681e |
if ((l1->fop->flags & EC_FLAG_LOCK_SHARED) &&
|
|
|
d1681e |
(l2->fop->flags & EC_FLAG_LOCK_SHARED))
|
|
|
d1681e |
return _gf_false;
|
|
|
d1681e |
|
|
|
d1681e |
+ if (!ec->parallel_writes) {
|
|
|
d1681e |
+ return _gf_true;
|
|
|
d1681e |
+ }
|
|
|
d1681e |
+
|
|
|
d1681e |
return ec_is_range_conflict (l1, l2);
|
|
|
d1681e |
}
|
|
|
d1681e |
|
|
|
d1681e |
@@ -1118,7 +1120,7 @@ ec_prepare_update_cbk (call_frame_t *frame, void *cookie,
|
|
|
d1681e |
ec_lock_t *lock = NULL;
|
|
|
d1681e |
ec_inode_t *ctx;
|
|
|
d1681e |
gf_boolean_t release = _gf_false;
|
|
|
d1681e |
- uint64_t waiting_flags = 0;
|
|
|
d1681e |
+ uint64_t provided_flags = 0;
|
|
|
d1681e |
uint64_t dirty[EC_VERSION_SIZE] = {0, 0};
|
|
|
d1681e |
|
|
|
d1681e |
lock = parent_link->lock;
|
|
|
d1681e |
@@ -1126,14 +1128,14 @@ ec_prepare_update_cbk (call_frame_t *frame, void *cookie,
|
|
|
d1681e |
ctx = lock->ctx;
|
|
|
d1681e |
|
|
|
d1681e |
INIT_LIST_HEAD(&list);
|
|
|
d1681e |
- waiting_flags = parent_link->waiting_flags & EC_XATTROP_ALL_WAITING_FLAGS;
|
|
|
d1681e |
+ provided_flags = EC_PROVIDED_FLAGS(parent_link->waiting_flags);
|
|
|
d1681e |
|
|
|
d1681e |
LOCK(&lock->loc.inode->lock);
|
|
|
d1681e |
|
|
|
d1681e |
list_for_each_entry(link, &lock->owners, owner_list) {
|
|
|
d1681e |
- if ((link->waiting_flags & waiting_flags) != 0) {
|
|
|
d1681e |
- link->waiting_flags ^= (link->waiting_flags & waiting_flags);
|
|
|
d1681e |
- if ((link->waiting_flags & EC_XATTROP_ALL_WAITING_FLAGS) == 0)
|
|
|
d1681e |
+ if ((link->waiting_flags & provided_flags) != 0) {
|
|
|
d1681e |
+ link->waiting_flags ^= (link->waiting_flags & provided_flags);
|
|
|
d1681e |
+ if (EC_NEEDED_FLAGS(link->waiting_flags) == 0)
|
|
|
d1681e |
list_add_tail(&link->fop->cbk_list, &list);
|
|
|
d1681e |
}
|
|
|
d1681e |
}
|
|
|
d1681e |
@@ -1146,7 +1148,7 @@ ec_prepare_update_cbk (call_frame_t *frame, void *cookie,
|
|
|
d1681e |
goto unlock;
|
|
|
d1681e |
}
|
|
|
d1681e |
|
|
|
d1681e |
- if (waiting_flags & EC_FLAG_WAITING_XATTROP) {
|
|
|
d1681e |
+ if (EC_FLAGS_HAVE(provided_flags, EC_FLAG_XATTROP)) {
|
|
|
d1681e |
op_errno = -ec_dict_del_array(dict, EC_XATTR_VERSION,
|
|
|
d1681e |
ctx->pre_version,
|
|
|
d1681e |
EC_VERSION_SIZE);
|
|
|
d1681e |
@@ -1207,20 +1209,20 @@ ec_prepare_update_cbk (call_frame_t *frame, void *cookie,
|
|
|
d1681e |
|
|
|
d1681e |
ec_set_dirty_flag (fop->data, ctx, dirty);
|
|
|
d1681e |
if (dirty[EC_METADATA_TXN] &&
|
|
|
d1681e |
- (waiting_flags & EC_FLAG_WAITING_METADATA_DIRTY)) {
|
|
|
d1681e |
+ (EC_FLAGS_HAVE(provided_flags, EC_FLAG_METADATA_DIRTY))) {
|
|
|
d1681e |
GF_ASSERT (!ctx->dirty[EC_METADATA_TXN]);
|
|
|
d1681e |
ctx->dirty[EC_METADATA_TXN] = 1;
|
|
|
d1681e |
}
|
|
|
d1681e |
|
|
|
d1681e |
if (dirty[EC_DATA_TXN] &&
|
|
|
d1681e |
- (waiting_flags & EC_FLAG_WAITING_DATA_DIRTY)) {
|
|
|
d1681e |
+ (EC_FLAGS_HAVE(provided_flags, EC_FLAG_DATA_DIRTY))) {
|
|
|
d1681e |
GF_ASSERT (!ctx->dirty[EC_DATA_TXN]);
|
|
|
d1681e |
ctx->dirty[EC_DATA_TXN] = 1;
|
|
|
d1681e |
}
|
|
|
d1681e |
op_errno = 0;
|
|
|
d1681e |
unlock:
|
|
|
d1681e |
|
|
|
d1681e |
- lock->waiting_flags ^= waiting_flags;
|
|
|
d1681e |
+ lock->waiting_flags ^= provided_flags;
|
|
|
d1681e |
|
|
|
d1681e |
if (op_errno == 0) {
|
|
|
d1681e |
/* If the fop fails on any of the good bricks, it is important to mark
|
|
|
d1681e |
@@ -1267,6 +1269,24 @@ unlock:
|
|
|
d1681e |
return 0;
|
|
|
d1681e |
}
|
|
|
d1681e |
|
|
|
d1681e |
+static gf_boolean_t
|
|
|
d1681e |
+ec_set_needed_flag(ec_lock_t *lock, ec_lock_link_t *link, uint64_t flag)
|
|
|
d1681e |
+{
|
|
|
d1681e |
+ uint64_t current;
|
|
|
d1681e |
+
|
|
|
d1681e |
+ link->waiting_flags |= EC_FLAG_NEEDS(flag);
|
|
|
d1681e |
+
|
|
|
d1681e |
+ current = EC_NEEDED_FLAGS(lock->waiting_flags);
|
|
|
d1681e |
+ if (!EC_FLAGS_HAVE(current, flag)) {
|
|
|
d1681e |
+ lock->waiting_flags |= EC_FLAG_NEEDS(flag);
|
|
|
d1681e |
+ link->waiting_flags |= EC_FLAG_PROVIDES(flag);
|
|
|
d1681e |
+
|
|
|
d1681e |
+ return _gf_true;
|
|
|
d1681e |
+ }
|
|
|
d1681e |
+
|
|
|
d1681e |
+ return _gf_false;
|
|
|
d1681e |
+}
|
|
|
d1681e |
+
|
|
|
d1681e |
static uint64_t
|
|
|
d1681e |
ec_set_xattrop_flags_and_params (ec_lock_t *lock, ec_lock_link_t *link,
|
|
|
d1681e |
uint64_t *dirty)
|
|
|
d1681e |
@@ -1275,31 +1295,25 @@ ec_set_xattrop_flags_and_params (ec_lock_t *lock, ec_lock_link_t *link,
|
|
|
d1681e |
uint64_t newflags = 0;
|
|
|
d1681e |
ec_inode_t *ctx = lock->ctx;
|
|
|
d1681e |
|
|
|
d1681e |
- oldflags = lock->waiting_flags & EC_XATTROP_ALL_WAITING_FLAGS;
|
|
|
d1681e |
+ oldflags = EC_NEEDED_FLAGS(lock->waiting_flags);
|
|
|
d1681e |
|
|
|
d1681e |
if (lock->query && !ctx->have_info) {
|
|
|
d1681e |
- lock->waiting_flags |= EC_FLAG_WAITING_XATTROP;
|
|
|
d1681e |
- link->waiting_flags |= EC_FLAG_WAITING_XATTROP;
|
|
|
d1681e |
+ ec_set_needed_flag(lock, link, EC_FLAG_XATTROP);
|
|
|
d1681e |
}
|
|
|
d1681e |
|
|
|
d1681e |
if (dirty[EC_DATA_TXN]) {
|
|
|
d1681e |
- if (oldflags & EC_FLAG_WAITING_DATA_DIRTY) {
|
|
|
d1681e |
+ if (!ec_set_needed_flag(lock, link, EC_FLAG_DATA_DIRTY)) {
|
|
|
d1681e |
dirty[EC_DATA_TXN] = 0;
|
|
|
d1681e |
- } else {
|
|
|
d1681e |
- lock->waiting_flags |= EC_FLAG_WAITING_DATA_DIRTY;
|
|
|
d1681e |
}
|
|
|
d1681e |
- link->waiting_flags |= EC_FLAG_WAITING_DATA_DIRTY;
|
|
|
d1681e |
}
|
|
|
d1681e |
|
|
|
d1681e |
if (dirty[EC_METADATA_TXN]) {
|
|
|
d1681e |
- if (oldflags & EC_FLAG_WAITING_METADATA_DIRTY) {
|
|
|
d1681e |
+ if (!ec_set_needed_flag(lock, link, EC_FLAG_METADATA_DIRTY)) {
|
|
|
d1681e |
dirty[EC_METADATA_TXN] = 0;
|
|
|
d1681e |
- } else {
|
|
|
d1681e |
- lock->waiting_flags |= EC_FLAG_WAITING_METADATA_DIRTY;
|
|
|
d1681e |
}
|
|
|
d1681e |
- link->waiting_flags |= EC_FLAG_WAITING_METADATA_DIRTY;
|
|
|
d1681e |
}
|
|
|
d1681e |
- newflags = lock->waiting_flags & EC_XATTROP_ALL_WAITING_FLAGS;
|
|
|
d1681e |
+ newflags = EC_NEEDED_FLAGS(lock->waiting_flags);
|
|
|
d1681e |
+
|
|
|
d1681e |
return oldflags ^ newflags;
|
|
|
d1681e |
}
|
|
|
d1681e |
|
|
|
d1681e |
@@ -1369,7 +1383,7 @@ void ec_get_size_version(ec_lock_link_t *link)
|
|
|
d1681e |
goto out;
|
|
|
d1681e |
}
|
|
|
d1681e |
|
|
|
d1681e |
- if (changed_flags & EC_FLAG_WAITING_XATTROP) {
|
|
|
d1681e |
+ if (EC_FLAGS_HAVE(changed_flags, EC_FLAG_XATTROP)) {
|
|
|
d1681e |
/* Once we know that an xattrop will be needed,
|
|
|
d1681e |
* we try to get all available information in a
|
|
|
d1681e |
* single call. */
|
|
|
d1681e |
@@ -1646,10 +1660,6 @@ static gf_boolean_t
|
|
|
d1681e |
ec_link_has_lock_conflict (ec_lock_link_t *link, gf_boolean_t waitlist_check)
|
|
|
d1681e |
{
|
|
|
d1681e |
ec_lock_link_t *trav_link = NULL;
|
|
|
d1681e |
- ec_t *ec = link->fop->xl->private;
|
|
|
d1681e |
-
|
|
|
d1681e |
- if (!ec->parallel_writes)
|
|
|
d1681e |
- return _gf_true;
|
|
|
d1681e |
|
|
|
d1681e |
list_for_each_entry (trav_link, &link->lock->owners, owner_list) {
|
|
|
d1681e |
if (ec_lock_conflict (trav_link, link))
|
|
|
d1681e |
diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h
|
|
|
d1681e |
index c0ad604..85bf819 100644
|
|
|
d1681e |
--- a/xlators/cluster/ec/src/ec-common.h
|
|
|
d1681e |
+++ b/xlators/cluster/ec/src/ec-common.h
|
|
|
d1681e |
@@ -29,9 +29,31 @@ typedef enum {
|
|
|
d1681e |
|
|
|
d1681e |
#define EC_FLAG_LOCK_SHARED 0x0001
|
|
|
d1681e |
|
|
|
d1681e |
-#define EC_FLAG_WAITING_XATTROP 0x0001
|
|
|
d1681e |
-#define EC_FLAG_WAITING_DATA_DIRTY 0x0002
|
|
|
d1681e |
-#define EC_FLAG_WAITING_METADATA_DIRTY 0x0004
|
|
|
d1681e |
+enum _ec_xattrop_flags {
|
|
|
d1681e |
+ EC_FLAG_XATTROP,
|
|
|
d1681e |
+ EC_FLAG_DATA_DIRTY,
|
|
|
d1681e |
+ EC_FLAG_METADATA_DIRTY,
|
|
|
d1681e |
+
|
|
|
d1681e |
+ /* Add any new flag here, before EC_FLAG_MAX. The maximum number of
|
|
|
d1681e |
+ * flags that can be defined is 16. */
|
|
|
d1681e |
+
|
|
|
d1681e |
+ EC_FLAG_MAX
|
|
|
d1681e |
+};
|
|
|
d1681e |
+
|
|
|
d1681e |
+/* We keep two sets of flags. One to determine what's really providing the
|
|
|
d1681e |
+ * currect xattrop and the other to know what the parent fop of the xattrop
|
|
|
d1681e |
+ * needs to proceed. It might happen that a fop needs some information that
|
|
|
d1681e |
+ * is being already requested by a previous fop. The two sets are stored
|
|
|
d1681e |
+ * contiguously. */
|
|
|
d1681e |
+
|
|
|
d1681e |
+#define EC_FLAG_NEEDS(_flag) (1 << (_flag))
|
|
|
d1681e |
+#define EC_FLAG_PROVIDES(_flag) (1 << ((_flag) + EC_FLAG_MAX))
|
|
|
d1681e |
+
|
|
|
d1681e |
+#define EC_NEEDED_FLAGS(_flags) ((_flags) & ((1 << EC_FLAG_MAX) - 1))
|
|
|
d1681e |
+
|
|
|
d1681e |
+#define EC_PROVIDED_FLAGS(_flags) EC_NEEDED_FLAGS((_flags) >> EC_FLAG_MAX)
|
|
|
d1681e |
+
|
|
|
d1681e |
+#define EC_FLAGS_HAVE(_flags, _flag) (((_flags) & (1 << (_flag))) != 0)
|
|
|
d1681e |
|
|
|
d1681e |
#define EC_SELFHEAL_BIT 62
|
|
|
d1681e |
|
|
|
d1681e |
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
|
|
|
d1681e |
index eb91c4a..0d59efd 100644
|
|
|
d1681e |
--- a/xlators/cluster/ec/src/ec.c
|
|
|
d1681e |
+++ b/xlators/cluster/ec/src/ec.c
|
|
|
d1681e |
@@ -1331,6 +1331,7 @@ int32_t ec_dump_private(xlator_t *this)
|
|
|
d1681e |
gf_proc_dump_write("healers", "%d", ec->healers);
|
|
|
d1681e |
gf_proc_dump_write("heal-waiters", "%d", ec->heal_waiters);
|
|
|
d1681e |
gf_proc_dump_write("read-policy", "%s", ec_read_policies[ec->read_policy]);
|
|
|
d1681e |
+ gf_proc_dump_write("parallel-writes", "%d", ec->parallel_writes);
|
|
|
d1681e |
|
|
|
d1681e |
return 0;
|
|
|
d1681e |
}
|
|
|
d1681e |
--
|
|
|
d1681e |
1.8.3.1
|
|
|
d1681e |
|