From fb62cbef3fcaa3e2a23a98182edaae332803b3bb Mon Sep 17 00:00:00 2001
From: Xavi Hernandez <xhernandez@redhat.com>
Date: Tue, 15 May 2018 11:37:16 +0200
Subject: [PATCH 283/305] cluster/ec: Fix pre-op xattrop management
Multiple pre-op xattrop can be simultaneously being processed. On the cbk
it was checked if the fop was waiting for some specific data (like size and
version) and, if so, it was assumed that this answer should contain that
data.
This is not true, since a fop can be waiting for some data, but it may come
from the xattrop of another fop.
This patch differentiates between needing some information and providing it.
This is related to parallel writes. Disabling them fixed the problem, but
also prevented concurrent reads. A change has been made so that disabling
parallel writes still allows parallel reads.
Upstream patch: https://review.gluster.org/20024
BUG: 1567001
Change-Id: I74772ad6b80b7b37805da93d5ec3ae099e96b041
Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/139707
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
---
xlators/cluster/ec/src/ec-common.c | 70 ++++++++++++++++++++++----------------
xlators/cluster/ec/src/ec-common.h | 28 +++++++++++++--
xlators/cluster/ec/src/ec.c | 1 +
3 files changed, 66 insertions(+), 33 deletions(-)
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
index bd2ae50..b74bce0 100644
--- a/xlators/cluster/ec/src/ec-common.c
+++ b/xlators/cluster/ec/src/ec-common.c
@@ -21,10 +21,6 @@
#include "ec.h"
#include "ec-messages.h"
-#define EC_XATTROP_ALL_WAITING_FLAGS (EC_FLAG_WAITING_XATTROP |\
- EC_FLAG_WAITING_DATA_DIRTY |\
- EC_FLAG_WAITING_METADATA_DIRTY)
-
void
ec_update_fd_status (fd_t *fd, xlator_t *xl, int idx,
int32_t ret_status)
@@ -160,10 +156,16 @@ ec_is_range_conflict (ec_lock_link_t *l1, ec_lock_link_t *l2)
static gf_boolean_t
ec_lock_conflict (ec_lock_link_t *l1, ec_lock_link_t *l2)
{
+ ec_t *ec = l1->fop->xl->private;
+
if ((l1->fop->flags & EC_FLAG_LOCK_SHARED) &&
(l2->fop->flags & EC_FLAG_LOCK_SHARED))
return _gf_false;
+ if (!ec->parallel_writes) {
+ return _gf_true;
+ }
+
return ec_is_range_conflict (l1, l2);
}
@@ -1118,7 +1120,7 @@ ec_prepare_update_cbk (call_frame_t *frame, void *cookie,
ec_lock_t *lock = NULL;
ec_inode_t *ctx;
gf_boolean_t release = _gf_false;
- uint64_t waiting_flags = 0;
+ uint64_t provided_flags = 0;
uint64_t dirty[EC_VERSION_SIZE] = {0, 0};
lock = parent_link->lock;
@@ -1126,14 +1128,14 @@ ec_prepare_update_cbk (call_frame_t *frame, void *cookie,
ctx = lock->ctx;
INIT_LIST_HEAD(&list);
- waiting_flags = parent_link->waiting_flags & EC_XATTROP_ALL_WAITING_FLAGS;
+ provided_flags = EC_PROVIDED_FLAGS(parent_link->waiting_flags);
LOCK(&lock->loc.inode->lock);
list_for_each_entry(link, &lock->owners, owner_list) {
- if ((link->waiting_flags & waiting_flags) != 0) {
- link->waiting_flags ^= (link->waiting_flags & waiting_flags);
- if ((link->waiting_flags & EC_XATTROP_ALL_WAITING_FLAGS) == 0)
+ if ((link->waiting_flags & provided_flags) != 0) {
+ link->waiting_flags ^= (link->waiting_flags & provided_flags);
+ if (EC_NEEDED_FLAGS(link->waiting_flags) == 0)
list_add_tail(&link->fop->cbk_list, &list);
}
}
@@ -1146,7 +1148,7 @@ ec_prepare_update_cbk (call_frame_t *frame, void *cookie,
goto unlock;
}
- if (waiting_flags & EC_FLAG_WAITING_XATTROP) {
+ if (EC_FLAGS_HAVE(provided_flags, EC_FLAG_XATTROP)) {
op_errno = -ec_dict_del_array(dict, EC_XATTR_VERSION,
ctx->pre_version,
EC_VERSION_SIZE);
@@ -1207,20 +1209,20 @@ ec_prepare_update_cbk (call_frame_t *frame, void *cookie,
ec_set_dirty_flag (fop->data, ctx, dirty);
if (dirty[EC_METADATA_TXN] &&
- (waiting_flags & EC_FLAG_WAITING_METADATA_DIRTY)) {
+ (EC_FLAGS_HAVE(provided_flags, EC_FLAG_METADATA_DIRTY))) {
GF_ASSERT (!ctx->dirty[EC_METADATA_TXN]);
ctx->dirty[EC_METADATA_TXN] = 1;
}
if (dirty[EC_DATA_TXN] &&
- (waiting_flags & EC_FLAG_WAITING_DATA_DIRTY)) {
+ (EC_FLAGS_HAVE(provided_flags, EC_FLAG_DATA_DIRTY))) {
GF_ASSERT (!ctx->dirty[EC_DATA_TXN]);
ctx->dirty[EC_DATA_TXN] = 1;
}
op_errno = 0;
unlock:
- lock->waiting_flags ^= waiting_flags;
+ lock->waiting_flags ^= provided_flags;
if (op_errno == 0) {
/* If the fop fails on any of the good bricks, it is important to mark
@@ -1267,6 +1269,24 @@ unlock:
return 0;
}
+static gf_boolean_t
+ec_set_needed_flag(ec_lock_t *lock, ec_lock_link_t *link, uint64_t flag)
+{
+ uint64_t current;
+
+ link->waiting_flags |= EC_FLAG_NEEDS(flag);
+
+ current = EC_NEEDED_FLAGS(lock->waiting_flags);
+ if (!EC_FLAGS_HAVE(current, flag)) {
+ lock->waiting_flags |= EC_FLAG_NEEDS(flag);
+ link->waiting_flags |= EC_FLAG_PROVIDES(flag);
+
+ return _gf_true;
+ }
+
+ return _gf_false;
+}
+
static uint64_t
ec_set_xattrop_flags_and_params (ec_lock_t *lock, ec_lock_link_t *link,
uint64_t *dirty)
@@ -1275,31 +1295,25 @@ ec_set_xattrop_flags_and_params (ec_lock_t *lock, ec_lock_link_t *link,
uint64_t newflags = 0;
ec_inode_t *ctx = lock->ctx;
- oldflags = lock->waiting_flags & EC_XATTROP_ALL_WAITING_FLAGS;
+ oldflags = EC_NEEDED_FLAGS(lock->waiting_flags);
if (lock->query && !ctx->have_info) {
- lock->waiting_flags |= EC_FLAG_WAITING_XATTROP;
- link->waiting_flags |= EC_FLAG_WAITING_XATTROP;
+ ec_set_needed_flag(lock, link, EC_FLAG_XATTROP);
}
if (dirty[EC_DATA_TXN]) {
- if (oldflags & EC_FLAG_WAITING_DATA_DIRTY) {
+ if (!ec_set_needed_flag(lock, link, EC_FLAG_DATA_DIRTY)) {
dirty[EC_DATA_TXN] = 0;
- } else {
- lock->waiting_flags |= EC_FLAG_WAITING_DATA_DIRTY;
}
- link->waiting_flags |= EC_FLAG_WAITING_DATA_DIRTY;
}
if (dirty[EC_METADATA_TXN]) {
- if (oldflags & EC_FLAG_WAITING_METADATA_DIRTY) {
+ if (!ec_set_needed_flag(lock, link, EC_FLAG_METADATA_DIRTY)) {
dirty[EC_METADATA_TXN] = 0;
- } else {
- lock->waiting_flags |= EC_FLAG_WAITING_METADATA_DIRTY;
}
- link->waiting_flags |= EC_FLAG_WAITING_METADATA_DIRTY;
}
- newflags = lock->waiting_flags & EC_XATTROP_ALL_WAITING_FLAGS;
+ newflags = EC_NEEDED_FLAGS(lock->waiting_flags);
+
return oldflags ^ newflags;
}
@@ -1369,7 +1383,7 @@ void ec_get_size_version(ec_lock_link_t *link)
goto out;
}
- if (changed_flags & EC_FLAG_WAITING_XATTROP) {
+ if (EC_FLAGS_HAVE(changed_flags, EC_FLAG_XATTROP)) {
/* Once we know that an xattrop will be needed,
* we try to get all available information in a
* single call. */
@@ -1646,10 +1660,6 @@ static gf_boolean_t
ec_link_has_lock_conflict (ec_lock_link_t *link, gf_boolean_t waitlist_check)
{
ec_lock_link_t *trav_link = NULL;
- ec_t *ec = link->fop->xl->private;
-
- if (!ec->parallel_writes)
- return _gf_true;
list_for_each_entry (trav_link, &link->lock->owners, owner_list) {
if (ec_lock_conflict (trav_link, link))
diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h
index c0ad604..85bf819 100644
--- a/xlators/cluster/ec/src/ec-common.h
+++ b/xlators/cluster/ec/src/ec-common.h
@@ -29,9 +29,31 @@ typedef enum {
#define EC_FLAG_LOCK_SHARED 0x0001
-#define EC_FLAG_WAITING_XATTROP 0x0001
-#define EC_FLAG_WAITING_DATA_DIRTY 0x0002
-#define EC_FLAG_WAITING_METADATA_DIRTY 0x0004
+enum _ec_xattrop_flags {
+ EC_FLAG_XATTROP,
+ EC_FLAG_DATA_DIRTY,
+ EC_FLAG_METADATA_DIRTY,
+
+ /* Add any new flag here, before EC_FLAG_MAX. The maximum number of
+ * flags that can be defined is 16. */
+
+ EC_FLAG_MAX
+};
+
+/* We keep two sets of flags. One to determine what's really providing the
+ * currect xattrop and the other to know what the parent fop of the xattrop
+ * needs to proceed. It might happen that a fop needs some information that
+ * is being already requested by a previous fop. The two sets are stored
+ * contiguously. */
+
+#define EC_FLAG_NEEDS(_flag) (1 << (_flag))
+#define EC_FLAG_PROVIDES(_flag) (1 << ((_flag) + EC_FLAG_MAX))
+
+#define EC_NEEDED_FLAGS(_flags) ((_flags) & ((1 << EC_FLAG_MAX) - 1))
+
+#define EC_PROVIDED_FLAGS(_flags) EC_NEEDED_FLAGS((_flags) >> EC_FLAG_MAX)
+
+#define EC_FLAGS_HAVE(_flags, _flag) (((_flags) & (1 << (_flag))) != 0)
#define EC_SELFHEAL_BIT 62
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
index eb91c4a..0d59efd 100644
--- a/xlators/cluster/ec/src/ec.c
+++ b/xlators/cluster/ec/src/ec.c
@@ -1331,6 +1331,7 @@ int32_t ec_dump_private(xlator_t *this)
gf_proc_dump_write("healers", "%d", ec->healers);
gf_proc_dump_write("heal-waiters", "%d", ec->heal_waiters);
gf_proc_dump_write("read-policy", "%s", ec_read_policies[ec->read_policy]);
+ gf_proc_dump_write("parallel-writes", "%d", ec->parallel_writes);
return 0;
}
--
1.8.3.1