a3470f
From fb62cbef3fcaa3e2a23a98182edaae332803b3bb Mon Sep 17 00:00:00 2001
a3470f
From: Xavi Hernandez <xhernandez@redhat.com>
a3470f
Date: Tue, 15 May 2018 11:37:16 +0200
a3470f
Subject: [PATCH 283/305] cluster/ec: Fix pre-op xattrop management
a3470f
a3470f
Multiple pre-op xattrop can be simultaneously being processed. On the cbk
a3470f
it was checked if the fop was waiting for some specific data (like size and
a3470f
version) and, if so, it was assumed that this answer should contain that
a3470f
data.
a3470f
a3470f
This is not true, since a fop can be waiting for some data, but it may come
a3470f
from the xattrop of another fop.
a3470f
a3470f
This patch differentiates between needing some information and providing it.
a3470f
a3470f
This is related to parallel writes. Disabling them fixed the problem, but
a3470f
also prevented concurrent reads. A change has been made so that disabling
a3470f
parallel writes still allows parallel reads.
a3470f
a3470f
Upstream patch: https://review.gluster.org/20024
a3470f
a3470f
BUG: 1567001
a3470f
Change-Id: I74772ad6b80b7b37805da93d5ec3ae099e96b041
a3470f
Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
a3470f
Reviewed-on: https://code.engineering.redhat.com/gerrit/139707
a3470f
Tested-by: RHGS Build Bot <nigelb@redhat.com>
a3470f
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
a3470f
---
a3470f
 xlators/cluster/ec/src/ec-common.c | 70 ++++++++++++++++++++++----------------
a3470f
 xlators/cluster/ec/src/ec-common.h | 28 +++++++++++++--
a3470f
 xlators/cluster/ec/src/ec.c        |  1 +
a3470f
 3 files changed, 66 insertions(+), 33 deletions(-)
a3470f
a3470f
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
a3470f
index bd2ae50..b74bce0 100644
a3470f
--- a/xlators/cluster/ec/src/ec-common.c
a3470f
+++ b/xlators/cluster/ec/src/ec-common.c
a3470f
@@ -21,10 +21,6 @@
a3470f
 #include "ec.h"
a3470f
 #include "ec-messages.h"
a3470f
 
a3470f
-#define EC_XATTROP_ALL_WAITING_FLAGS (EC_FLAG_WAITING_XATTROP |\
a3470f
-                                   EC_FLAG_WAITING_DATA_DIRTY |\
a3470f
-                                   EC_FLAG_WAITING_METADATA_DIRTY)
a3470f
-
a3470f
 void
a3470f
 ec_update_fd_status (fd_t *fd, xlator_t *xl, int idx,
a3470f
                      int32_t ret_status)
a3470f
@@ -160,10 +156,16 @@ ec_is_range_conflict (ec_lock_link_t *l1, ec_lock_link_t *l2)
a3470f
 static gf_boolean_t
a3470f
 ec_lock_conflict (ec_lock_link_t *l1, ec_lock_link_t *l2)
a3470f
 {
a3470f
+        ec_t *ec = l1->fop->xl->private;
a3470f
+
a3470f
         if ((l1->fop->flags & EC_FLAG_LOCK_SHARED) &&
a3470f
             (l2->fop->flags & EC_FLAG_LOCK_SHARED))
a3470f
                 return _gf_false;
a3470f
 
a3470f
+        if (!ec->parallel_writes) {
a3470f
+                return _gf_true;
a3470f
+        }
a3470f
+
a3470f
         return ec_is_range_conflict (l1, l2);
a3470f
 }
a3470f
 
a3470f
@@ -1118,7 +1120,7 @@ ec_prepare_update_cbk (call_frame_t *frame, void *cookie,
a3470f
     ec_lock_t *lock = NULL;
a3470f
     ec_inode_t *ctx;
a3470f
     gf_boolean_t release = _gf_false;
a3470f
-    uint64_t waiting_flags = 0;
a3470f
+    uint64_t provided_flags = 0;
a3470f
     uint64_t dirty[EC_VERSION_SIZE] = {0, 0};
a3470f
 
a3470f
     lock = parent_link->lock;
a3470f
@@ -1126,14 +1128,14 @@ ec_prepare_update_cbk (call_frame_t *frame, void *cookie,
a3470f
     ctx = lock->ctx;
a3470f
 
a3470f
     INIT_LIST_HEAD(&list);
a3470f
-    waiting_flags = parent_link->waiting_flags & EC_XATTROP_ALL_WAITING_FLAGS;
a3470f
+    provided_flags = EC_PROVIDED_FLAGS(parent_link->waiting_flags);
a3470f
 
a3470f
     LOCK(&lock->loc.inode->lock);
a3470f
 
a3470f
     list_for_each_entry(link, &lock->owners, owner_list) {
a3470f
-        if ((link->waiting_flags & waiting_flags) != 0) {
a3470f
-            link->waiting_flags ^= (link->waiting_flags & waiting_flags);
a3470f
-            if ((link->waiting_flags & EC_XATTROP_ALL_WAITING_FLAGS) == 0)
a3470f
+        if ((link->waiting_flags & provided_flags) != 0) {
a3470f
+            link->waiting_flags ^= (link->waiting_flags & provided_flags);
a3470f
+            if (EC_NEEDED_FLAGS(link->waiting_flags) == 0)
a3470f
                     list_add_tail(&link->fop->cbk_list, &list);
a3470f
         }
a3470f
     }
a3470f
@@ -1146,7 +1148,7 @@ ec_prepare_update_cbk (call_frame_t *frame, void *cookie,
a3470f
         goto unlock;
a3470f
     }
a3470f
 
a3470f
-    if (waiting_flags & EC_FLAG_WAITING_XATTROP) {
a3470f
+    if (EC_FLAGS_HAVE(provided_flags, EC_FLAG_XATTROP)) {
a3470f
             op_errno = -ec_dict_del_array(dict, EC_XATTR_VERSION,
a3470f
                                           ctx->pre_version,
a3470f
                                           EC_VERSION_SIZE);
a3470f
@@ -1207,20 +1209,20 @@ ec_prepare_update_cbk (call_frame_t *frame, void *cookie,
a3470f
 
a3470f
     ec_set_dirty_flag (fop->data, ctx, dirty);
a3470f
     if (dirty[EC_METADATA_TXN] &&
a3470f
-        (waiting_flags & EC_FLAG_WAITING_METADATA_DIRTY)) {
a3470f
+        (EC_FLAGS_HAVE(provided_flags, EC_FLAG_METADATA_DIRTY))) {
a3470f
             GF_ASSERT (!ctx->dirty[EC_METADATA_TXN]);
a3470f
             ctx->dirty[EC_METADATA_TXN] = 1;
a3470f
     }
a3470f
 
a3470f
     if (dirty[EC_DATA_TXN] &&
a3470f
-        (waiting_flags & EC_FLAG_WAITING_DATA_DIRTY)) {
a3470f
+        (EC_FLAGS_HAVE(provided_flags, EC_FLAG_DATA_DIRTY))) {
a3470f
             GF_ASSERT (!ctx->dirty[EC_DATA_TXN]);
a3470f
             ctx->dirty[EC_DATA_TXN] = 1;
a3470f
     }
a3470f
     op_errno = 0;
a3470f
 unlock:
a3470f
 
a3470f
-    lock->waiting_flags ^= waiting_flags;
a3470f
+    lock->waiting_flags ^= provided_flags;
a3470f
 
a3470f
     if (op_errno == 0) {
a3470f
         /* If the fop fails on any of the good bricks, it is important to mark
a3470f
@@ -1267,6 +1269,24 @@ unlock:
a3470f
     return 0;
a3470f
 }
a3470f
 
a3470f
+static gf_boolean_t
a3470f
+ec_set_needed_flag(ec_lock_t *lock, ec_lock_link_t *link, uint64_t flag)
a3470f
+{
a3470f
+        uint64_t current;
a3470f
+
a3470f
+        link->waiting_flags |= EC_FLAG_NEEDS(flag);
a3470f
+
a3470f
+        current = EC_NEEDED_FLAGS(lock->waiting_flags);
a3470f
+        if (!EC_FLAGS_HAVE(current, flag)) {
a3470f
+                lock->waiting_flags |= EC_FLAG_NEEDS(flag);
a3470f
+                link->waiting_flags |= EC_FLAG_PROVIDES(flag);
a3470f
+
a3470f
+                return _gf_true;
a3470f
+        }
a3470f
+
a3470f
+        return _gf_false;
a3470f
+}
a3470f
+
a3470f
 static uint64_t
a3470f
 ec_set_xattrop_flags_and_params (ec_lock_t *lock, ec_lock_link_t *link,
a3470f
                                  uint64_t *dirty)
a3470f
@@ -1275,31 +1295,25 @@ ec_set_xattrop_flags_and_params (ec_lock_t *lock, ec_lock_link_t *link,
a3470f
         uint64_t        newflags = 0;
a3470f
         ec_inode_t *ctx     = lock->ctx;
a3470f
 
a3470f
-        oldflags = lock->waiting_flags & EC_XATTROP_ALL_WAITING_FLAGS;
a3470f
+        oldflags = EC_NEEDED_FLAGS(lock->waiting_flags);
a3470f
 
a3470f
         if (lock->query && !ctx->have_info) {
a3470f
-                lock->waiting_flags |= EC_FLAG_WAITING_XATTROP;
a3470f
-                link->waiting_flags |= EC_FLAG_WAITING_XATTROP;
a3470f
+                ec_set_needed_flag(lock, link, EC_FLAG_XATTROP);
a3470f
         }
a3470f
 
a3470f
         if (dirty[EC_DATA_TXN]) {
a3470f
-                if (oldflags & EC_FLAG_WAITING_DATA_DIRTY) {
a3470f
+                if (!ec_set_needed_flag(lock, link, EC_FLAG_DATA_DIRTY)) {
a3470f
                         dirty[EC_DATA_TXN] = 0;
a3470f
-                } else {
a3470f
-                        lock->waiting_flags |= EC_FLAG_WAITING_DATA_DIRTY;
a3470f
                 }
a3470f
-                link->waiting_flags |= EC_FLAG_WAITING_DATA_DIRTY;
a3470f
         }
a3470f
 
a3470f
         if (dirty[EC_METADATA_TXN]) {
a3470f
-                if (oldflags & EC_FLAG_WAITING_METADATA_DIRTY) {
a3470f
+                if (!ec_set_needed_flag(lock, link, EC_FLAG_METADATA_DIRTY)) {
a3470f
                         dirty[EC_METADATA_TXN] = 0;
a3470f
-                } else {
a3470f
-                        lock->waiting_flags |= EC_FLAG_WAITING_METADATA_DIRTY;
a3470f
                 }
a3470f
-                link->waiting_flags |= EC_FLAG_WAITING_METADATA_DIRTY;
a3470f
         }
a3470f
-        newflags = lock->waiting_flags & EC_XATTROP_ALL_WAITING_FLAGS;
a3470f
+        newflags = EC_NEEDED_FLAGS(lock->waiting_flags);
a3470f
+
a3470f
         return oldflags ^ newflags;
a3470f
 }
a3470f
 
a3470f
@@ -1369,7 +1383,7 @@ void ec_get_size_version(ec_lock_link_t *link)
a3470f
         goto out;
a3470f
     }
a3470f
 
a3470f
-    if (changed_flags & EC_FLAG_WAITING_XATTROP) {
a3470f
+    if (EC_FLAGS_HAVE(changed_flags, EC_FLAG_XATTROP)) {
a3470f
             /* Once we know that an xattrop will be needed,
a3470f
              * we try to get all available information in a
a3470f
              * single call. */
a3470f
@@ -1646,10 +1660,6 @@ static gf_boolean_t
a3470f
 ec_link_has_lock_conflict (ec_lock_link_t *link, gf_boolean_t waitlist_check)
a3470f
 {
a3470f
         ec_lock_link_t *trav_link = NULL;
a3470f
-        ec_t           *ec = link->fop->xl->private;
a3470f
-
a3470f
-        if (!ec->parallel_writes)
a3470f
-                return _gf_true;
a3470f
 
a3470f
         list_for_each_entry (trav_link, &link->lock->owners, owner_list) {
a3470f
                 if (ec_lock_conflict (trav_link, link))
a3470f
diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h
a3470f
index c0ad604..85bf819 100644
a3470f
--- a/xlators/cluster/ec/src/ec-common.h
a3470f
+++ b/xlators/cluster/ec/src/ec-common.h
a3470f
@@ -29,9 +29,31 @@ typedef enum {
a3470f
 
a3470f
 #define EC_FLAG_LOCK_SHARED             0x0001
a3470f
 
a3470f
-#define EC_FLAG_WAITING_XATTROP         0x0001
a3470f
-#define EC_FLAG_WAITING_DATA_DIRTY      0x0002
a3470f
-#define EC_FLAG_WAITING_METADATA_DIRTY  0x0004
a3470f
+enum _ec_xattrop_flags {
a3470f
+        EC_FLAG_XATTROP,
a3470f
+        EC_FLAG_DATA_DIRTY,
a3470f
+        EC_FLAG_METADATA_DIRTY,
a3470f
+
a3470f
+        /* Add any new flag here, before EC_FLAG_MAX. The maximum number of
a3470f
+         * flags that can be defined is 16. */
a3470f
+
a3470f
+        EC_FLAG_MAX
a3470f
+};
a3470f
+
a3470f
+/* We keep two sets of flags. One to determine what's really providing the
a3470f
+ * currect xattrop and the other to know what the parent fop of the xattrop
a3470f
+ * needs to proceed. It might happen that a fop needs some information that
a3470f
+ * is being already requested by a previous fop. The two sets are stored
a3470f
+ * contiguously. */
a3470f
+
a3470f
+#define EC_FLAG_NEEDS(_flag) (1 << (_flag))
a3470f
+#define EC_FLAG_PROVIDES(_flag) (1 << ((_flag) + EC_FLAG_MAX))
a3470f
+
a3470f
+#define EC_NEEDED_FLAGS(_flags) ((_flags) & ((1 << EC_FLAG_MAX) - 1))
a3470f
+
a3470f
+#define EC_PROVIDED_FLAGS(_flags) EC_NEEDED_FLAGS((_flags) >> EC_FLAG_MAX)
a3470f
+
a3470f
+#define EC_FLAGS_HAVE(_flags, _flag) (((_flags) & (1 << (_flag))) != 0)
a3470f
 
a3470f
 #define EC_SELFHEAL_BIT 62
a3470f
 
a3470f
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
a3470f
index eb91c4a..0d59efd 100644
a3470f
--- a/xlators/cluster/ec/src/ec.c
a3470f
+++ b/xlators/cluster/ec/src/ec.c
a3470f
@@ -1331,6 +1331,7 @@ int32_t ec_dump_private(xlator_t *this)
a3470f
     gf_proc_dump_write("healers", "%d", ec->healers);
a3470f
     gf_proc_dump_write("heal-waiters", "%d", ec->heal_waiters);
a3470f
     gf_proc_dump_write("read-policy", "%s", ec_read_policies[ec->read_policy]);
a3470f
+    gf_proc_dump_write("parallel-writes", "%d", ec->parallel_writes);
a3470f
 
a3470f
     return 0;
a3470f
 }
a3470f
-- 
a3470f
1.8.3.1
a3470f