d1681e
From fb62cbef3fcaa3e2a23a98182edaae332803b3bb Mon Sep 17 00:00:00 2001
d1681e
From: Xavi Hernandez <xhernandez@redhat.com>
d1681e
Date: Tue, 15 May 2018 11:37:16 +0200
d1681e
Subject: [PATCH 283/305] cluster/ec: Fix pre-op xattrop management
d1681e
d1681e
Multiple pre-op xattrop can be simultaneously being processed. On the cbk
d1681e
it was checked if the fop was waiting for some specific data (like size and
d1681e
version) and, if so, it was assumed that this answer should contain that
d1681e
data.
d1681e
d1681e
This is not true, since a fop can be waiting for some data, but it may come
d1681e
from the xattrop of another fop.
d1681e
d1681e
This patch differentiates between needing some information and providing it.
d1681e
d1681e
This is related to parallel writes. Disabling them fixed the problem, but
d1681e
also prevented concurrent reads. A change has been made so that disabling
d1681e
parallel writes still allows parallel reads.
d1681e
d1681e
Upstream patch: https://review.gluster.org/20024
d1681e
d1681e
BUG: 1567001
d1681e
Change-Id: I74772ad6b80b7b37805da93d5ec3ae099e96b041
d1681e
Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
d1681e
Reviewed-on: https://code.engineering.redhat.com/gerrit/139707
d1681e
Tested-by: RHGS Build Bot <nigelb@redhat.com>
d1681e
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
d1681e
---
d1681e
 xlators/cluster/ec/src/ec-common.c | 70 ++++++++++++++++++++++----------------
d1681e
 xlators/cluster/ec/src/ec-common.h | 28 +++++++++++++--
d1681e
 xlators/cluster/ec/src/ec.c        |  1 +
d1681e
 3 files changed, 66 insertions(+), 33 deletions(-)
d1681e
d1681e
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
d1681e
index bd2ae50..b74bce0 100644
d1681e
--- a/xlators/cluster/ec/src/ec-common.c
d1681e
+++ b/xlators/cluster/ec/src/ec-common.c
d1681e
@@ -21,10 +21,6 @@
d1681e
 #include "ec.h"
d1681e
 #include "ec-messages.h"
d1681e
 
d1681e
-#define EC_XATTROP_ALL_WAITING_FLAGS (EC_FLAG_WAITING_XATTROP |\
d1681e
-                                   EC_FLAG_WAITING_DATA_DIRTY |\
d1681e
-                                   EC_FLAG_WAITING_METADATA_DIRTY)
d1681e
-
d1681e
 void
d1681e
 ec_update_fd_status (fd_t *fd, xlator_t *xl, int idx,
d1681e
                      int32_t ret_status)
d1681e
@@ -160,10 +156,16 @@ ec_is_range_conflict (ec_lock_link_t *l1, ec_lock_link_t *l2)
d1681e
 static gf_boolean_t
d1681e
 ec_lock_conflict (ec_lock_link_t *l1, ec_lock_link_t *l2)
d1681e
 {
d1681e
+        ec_t *ec = l1->fop->xl->private;
d1681e
+
d1681e
         if ((l1->fop->flags & EC_FLAG_LOCK_SHARED) &&
d1681e
             (l2->fop->flags & EC_FLAG_LOCK_SHARED))
d1681e
                 return _gf_false;
d1681e
 
d1681e
+        if (!ec->parallel_writes) {
d1681e
+                return _gf_true;
d1681e
+        }
d1681e
+
d1681e
         return ec_is_range_conflict (l1, l2);
d1681e
 }
d1681e
 
d1681e
@@ -1118,7 +1120,7 @@ ec_prepare_update_cbk (call_frame_t *frame, void *cookie,
d1681e
     ec_lock_t *lock = NULL;
d1681e
     ec_inode_t *ctx;
d1681e
     gf_boolean_t release = _gf_false;
d1681e
-    uint64_t waiting_flags = 0;
d1681e
+    uint64_t provided_flags = 0;
d1681e
     uint64_t dirty[EC_VERSION_SIZE] = {0, 0};
d1681e
 
d1681e
     lock = parent_link->lock;
d1681e
@@ -1126,14 +1128,14 @@ ec_prepare_update_cbk (call_frame_t *frame, void *cookie,
d1681e
     ctx = lock->ctx;
d1681e
 
d1681e
     INIT_LIST_HEAD(&list);
d1681e
-    waiting_flags = parent_link->waiting_flags & EC_XATTROP_ALL_WAITING_FLAGS;
d1681e
+    provided_flags = EC_PROVIDED_FLAGS(parent_link->waiting_flags);
d1681e
 
d1681e
     LOCK(&lock->loc.inode->lock);
d1681e
 
d1681e
     list_for_each_entry(link, &lock->owners, owner_list) {
d1681e
-        if ((link->waiting_flags & waiting_flags) != 0) {
d1681e
-            link->waiting_flags ^= (link->waiting_flags & waiting_flags);
d1681e
-            if ((link->waiting_flags & EC_XATTROP_ALL_WAITING_FLAGS) == 0)
d1681e
+        if ((link->waiting_flags & provided_flags) != 0) {
d1681e
+            link->waiting_flags ^= (link->waiting_flags & provided_flags);
d1681e
+            if (EC_NEEDED_FLAGS(link->waiting_flags) == 0)
d1681e
                     list_add_tail(&link->fop->cbk_list, &list);
d1681e
         }
d1681e
     }
d1681e
@@ -1146,7 +1148,7 @@ ec_prepare_update_cbk (call_frame_t *frame, void *cookie,
d1681e
         goto unlock;
d1681e
     }
d1681e
 
d1681e
-    if (waiting_flags & EC_FLAG_WAITING_XATTROP) {
d1681e
+    if (EC_FLAGS_HAVE(provided_flags, EC_FLAG_XATTROP)) {
d1681e
             op_errno = -ec_dict_del_array(dict, EC_XATTR_VERSION,
d1681e
                                           ctx->pre_version,
d1681e
                                           EC_VERSION_SIZE);
d1681e
@@ -1207,20 +1209,20 @@ ec_prepare_update_cbk (call_frame_t *frame, void *cookie,
d1681e
 
d1681e
     ec_set_dirty_flag (fop->data, ctx, dirty);
d1681e
     if (dirty[EC_METADATA_TXN] &&
d1681e
-        (waiting_flags & EC_FLAG_WAITING_METADATA_DIRTY)) {
d1681e
+        (EC_FLAGS_HAVE(provided_flags, EC_FLAG_METADATA_DIRTY))) {
d1681e
             GF_ASSERT (!ctx->dirty[EC_METADATA_TXN]);
d1681e
             ctx->dirty[EC_METADATA_TXN] = 1;
d1681e
     }
d1681e
 
d1681e
     if (dirty[EC_DATA_TXN] &&
d1681e
-        (waiting_flags & EC_FLAG_WAITING_DATA_DIRTY)) {
d1681e
+        (EC_FLAGS_HAVE(provided_flags, EC_FLAG_DATA_DIRTY))) {
d1681e
             GF_ASSERT (!ctx->dirty[EC_DATA_TXN]);
d1681e
             ctx->dirty[EC_DATA_TXN] = 1;
d1681e
     }
d1681e
     op_errno = 0;
d1681e
 unlock:
d1681e
 
d1681e
-    lock->waiting_flags ^= waiting_flags;
d1681e
+    lock->waiting_flags ^= provided_flags;
d1681e
 
d1681e
     if (op_errno == 0) {
d1681e
         /* If the fop fails on any of the good bricks, it is important to mark
d1681e
@@ -1267,6 +1269,24 @@ unlock:
d1681e
     return 0;
d1681e
 }
d1681e
 
d1681e
+static gf_boolean_t
d1681e
+ec_set_needed_flag(ec_lock_t *lock, ec_lock_link_t *link, uint64_t flag)
d1681e
+{
d1681e
+        uint64_t current;
d1681e
+
d1681e
+        link->waiting_flags |= EC_FLAG_NEEDS(flag);
d1681e
+
d1681e
+        current = EC_NEEDED_FLAGS(lock->waiting_flags);
d1681e
+        if (!EC_FLAGS_HAVE(current, flag)) {
d1681e
+                lock->waiting_flags |= EC_FLAG_NEEDS(flag);
d1681e
+                link->waiting_flags |= EC_FLAG_PROVIDES(flag);
d1681e
+
d1681e
+                return _gf_true;
d1681e
+        }
d1681e
+
d1681e
+        return _gf_false;
d1681e
+}
d1681e
+
d1681e
 static uint64_t
d1681e
 ec_set_xattrop_flags_and_params (ec_lock_t *lock, ec_lock_link_t *link,
d1681e
                                  uint64_t *dirty)
d1681e
@@ -1275,31 +1295,25 @@ ec_set_xattrop_flags_and_params (ec_lock_t *lock, ec_lock_link_t *link,
d1681e
         uint64_t        newflags = 0;
d1681e
         ec_inode_t *ctx     = lock->ctx;
d1681e
 
d1681e
-        oldflags = lock->waiting_flags & EC_XATTROP_ALL_WAITING_FLAGS;
d1681e
+        oldflags = EC_NEEDED_FLAGS(lock->waiting_flags);
d1681e
 
d1681e
         if (lock->query && !ctx->have_info) {
d1681e
-                lock->waiting_flags |= EC_FLAG_WAITING_XATTROP;
d1681e
-                link->waiting_flags |= EC_FLAG_WAITING_XATTROP;
d1681e
+                ec_set_needed_flag(lock, link, EC_FLAG_XATTROP);
d1681e
         }
d1681e
 
d1681e
         if (dirty[EC_DATA_TXN]) {
d1681e
-                if (oldflags & EC_FLAG_WAITING_DATA_DIRTY) {
d1681e
+                if (!ec_set_needed_flag(lock, link, EC_FLAG_DATA_DIRTY)) {
d1681e
                         dirty[EC_DATA_TXN] = 0;
d1681e
-                } else {
d1681e
-                        lock->waiting_flags |= EC_FLAG_WAITING_DATA_DIRTY;
d1681e
                 }
d1681e
-                link->waiting_flags |= EC_FLAG_WAITING_DATA_DIRTY;
d1681e
         }
d1681e
 
d1681e
         if (dirty[EC_METADATA_TXN]) {
d1681e
-                if (oldflags & EC_FLAG_WAITING_METADATA_DIRTY) {
d1681e
+                if (!ec_set_needed_flag(lock, link, EC_FLAG_METADATA_DIRTY)) {
d1681e
                         dirty[EC_METADATA_TXN] = 0;
d1681e
-                } else {
d1681e
-                        lock->waiting_flags |= EC_FLAG_WAITING_METADATA_DIRTY;
d1681e
                 }
d1681e
-                link->waiting_flags |= EC_FLAG_WAITING_METADATA_DIRTY;
d1681e
         }
d1681e
-        newflags = lock->waiting_flags & EC_XATTROP_ALL_WAITING_FLAGS;
d1681e
+        newflags = EC_NEEDED_FLAGS(lock->waiting_flags);
d1681e
+
d1681e
         return oldflags ^ newflags;
d1681e
 }
d1681e
 
d1681e
@@ -1369,7 +1383,7 @@ void ec_get_size_version(ec_lock_link_t *link)
d1681e
         goto out;
d1681e
     }
d1681e
 
d1681e
-    if (changed_flags & EC_FLAG_WAITING_XATTROP) {
d1681e
+    if (EC_FLAGS_HAVE(changed_flags, EC_FLAG_XATTROP)) {
d1681e
             /* Once we know that an xattrop will be needed,
d1681e
              * we try to get all available information in a
d1681e
              * single call. */
d1681e
@@ -1646,10 +1660,6 @@ static gf_boolean_t
d1681e
 ec_link_has_lock_conflict (ec_lock_link_t *link, gf_boolean_t waitlist_check)
d1681e
 {
d1681e
         ec_lock_link_t *trav_link = NULL;
d1681e
-        ec_t           *ec = link->fop->xl->private;
d1681e
-
d1681e
-        if (!ec->parallel_writes)
d1681e
-                return _gf_true;
d1681e
 
d1681e
         list_for_each_entry (trav_link, &link->lock->owners, owner_list) {
d1681e
                 if (ec_lock_conflict (trav_link, link))
d1681e
diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h
d1681e
index c0ad604..85bf819 100644
d1681e
--- a/xlators/cluster/ec/src/ec-common.h
d1681e
+++ b/xlators/cluster/ec/src/ec-common.h
d1681e
@@ -29,9 +29,31 @@ typedef enum {
d1681e
 
d1681e
 #define EC_FLAG_LOCK_SHARED             0x0001
d1681e
 
d1681e
-#define EC_FLAG_WAITING_XATTROP         0x0001
d1681e
-#define EC_FLAG_WAITING_DATA_DIRTY      0x0002
d1681e
-#define EC_FLAG_WAITING_METADATA_DIRTY  0x0004
d1681e
+enum _ec_xattrop_flags {
d1681e
+        EC_FLAG_XATTROP,
d1681e
+        EC_FLAG_DATA_DIRTY,
d1681e
+        EC_FLAG_METADATA_DIRTY,
d1681e
+
d1681e
+        /* Add any new flag here, before EC_FLAG_MAX. The maximum number of
d1681e
+         * flags that can be defined is 16. */
d1681e
+
d1681e
+        EC_FLAG_MAX
d1681e
+};
d1681e
+
d1681e
+/* We keep two sets of flags. One to determine what's really providing the
d1681e
+ * currect xattrop and the other to know what the parent fop of the xattrop
d1681e
+ * needs to proceed. It might happen that a fop needs some information that
d1681e
+ * is being already requested by a previous fop. The two sets are stored
d1681e
+ * contiguously. */
d1681e
+
d1681e
+#define EC_FLAG_NEEDS(_flag) (1 << (_flag))
d1681e
+#define EC_FLAG_PROVIDES(_flag) (1 << ((_flag) + EC_FLAG_MAX))
d1681e
+
d1681e
+#define EC_NEEDED_FLAGS(_flags) ((_flags) & ((1 << EC_FLAG_MAX) - 1))
d1681e
+
d1681e
+#define EC_PROVIDED_FLAGS(_flags) EC_NEEDED_FLAGS((_flags) >> EC_FLAG_MAX)
d1681e
+
d1681e
+#define EC_FLAGS_HAVE(_flags, _flag) (((_flags) & (1 << (_flag))) != 0)
d1681e
 
d1681e
 #define EC_SELFHEAL_BIT 62
d1681e
 
d1681e
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
d1681e
index eb91c4a..0d59efd 100644
d1681e
--- a/xlators/cluster/ec/src/ec.c
d1681e
+++ b/xlators/cluster/ec/src/ec.c
d1681e
@@ -1331,6 +1331,7 @@ int32_t ec_dump_private(xlator_t *this)
d1681e
     gf_proc_dump_write("healers", "%d", ec->healers);
d1681e
     gf_proc_dump_write("heal-waiters", "%d", ec->heal_waiters);
d1681e
     gf_proc_dump_write("read-policy", "%s", ec_read_policies[ec->read_policy]);
d1681e
+    gf_proc_dump_write("parallel-writes", "%d", ec->parallel_writes);
d1681e
 
d1681e
     return 0;
d1681e
 }
d1681e
-- 
d1681e
1.8.3.1
d1681e