a3470f
From 6c176a6f9743ab0518619f784a1fc5ac9562b991 Mon Sep 17 00:00:00 2001
a3470f
From: Pranith Kumar K <pkarampu@redhat.com>
a3470f
Date: Tue, 18 Jul 2017 18:39:01 +0530
a3470f
Subject: [PATCH 076/128] cluster/ec: Handle parallel get_size_version
a3470f
a3470f
upstream patch: https://review.gluster.org/#/c/17820/
a3470f
a3470f
>Updates #251
a3470f
>Change-Id: I6244014dbc90af3239d63d75a064ae22ec12a054
a3470f
>Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
a3470f
a3470f
BUG: 1459101
a3470f
Change-Id: I6244014dbc90af3239d63d75a064ae22ec12a054
a3470f
Signed-off-by: Sunil Kumar Acharya <sheggodu@redhat.com>
a3470f
Reviewed-on: https://code.engineering.redhat.com/gerrit/123551
a3470f
Tested-by: RHGS Build Bot <nigelb@redhat.com>
a3470f
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
a3470f
Reviewed-by: Ashish Pandey <aspandey@redhat.com>
a3470f
---
a3470f
 xlators/cluster/ec/src/ec-common.c | 151 +++++++++++++++++++++++--------------
a3470f
 xlators/cluster/ec/src/ec-common.h |   8 +-
a3470f
 xlators/cluster/ec/src/ec-types.h  |   3 +-
a3470f
 3 files changed, 103 insertions(+), 59 deletions(-)
a3470f
a3470f
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
a3470f
index 732d422..6963907 100644
a3470f
--- a/xlators/cluster/ec/src/ec-common.c
a3470f
+++ b/xlators/cluster/ec/src/ec-common.c
a3470f
@@ -21,6 +21,10 @@
a3470f
 #include "ec.h"
a3470f
 #include "ec-messages.h"
a3470f
 
a3470f
+#define EC_XATTROP_ALL_WAITING_FLAGS (EC_FLAG_WAITING_XATTROP |\
a3470f
+                                   EC_FLAG_WAITING_DATA_DIRTY |\
a3470f
+                                   EC_FLAG_WAITING_METADATA_DIRTY)
a3470f
+
a3470f
 uint32_t
a3470f
 ec_select_first_by_read_policy (ec_t *ec, ec_fop_data_t *fop)
a3470f
 {
a3470f
@@ -882,11 +886,11 @@ void ec_lock_prepare_fd(ec_fop_data_t *fop, fd_t *fd, uint32_t flags)
a3470f
 }
a3470f
 
a3470f
 gf_boolean_t
a3470f
-ec_config_check (ec_fop_data_t *fop, ec_config_t *config)
a3470f
+ec_config_check (xlator_t *xl, ec_config_t *config)
a3470f
 {
a3470f
     ec_t *ec;
a3470f
 
a3470f
-    ec = fop->xl->private;
a3470f
+    ec = xl->private;
a3470f
     if ((config->version != EC_CONFIG_VERSION) ||
a3470f
         (config->algorithm != EC_CONFIG_ALGORITHM) ||
a3470f
         (config->gf_word_size != EC_GF_BITS) ||
a3470f
@@ -911,11 +915,11 @@ ec_config_check (ec_fop_data_t *fop, ec_config_t *config)
a3470f
             !ec_is_power_of_2(config->gf_word_size) ||
a3470f
             ((config->chunk_size * 8) % (config->gf_word_size * data_bricks)
a3470f
                                                                        != 0)) {
a3470f
-            gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL,
a3470f
+            gf_msg (xl->name, GF_LOG_ERROR, EINVAL,
a3470f
                     EC_MSG_INVALID_CONFIG,
a3470f
                     "Invalid or corrupted config");
a3470f
         } else {
a3470f
-            gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL,
a3470f
+            gf_msg (xl->name, GF_LOG_ERROR, EINVAL,
a3470f
                     EC_MSG_INVALID_CONFIG,
a3470f
                     "Unsupported config "
a3470f
                     "(V=%u, A=%u, W=%u, "
a3470f
@@ -962,24 +966,28 @@ ec_prepare_update_cbk (call_frame_t *frame, void *cookie,
a3470f
 {
a3470f
     struct list_head list;
a3470f
     ec_fop_data_t *fop = cookie, *parent, *tmp;
a3470f
-    ec_lock_link_t *link = fop->data;
a3470f
+    ec_lock_link_t *parent_link = fop->data;
a3470f
+    ec_lock_link_t *link = NULL;
a3470f
     ec_lock_t *lock = NULL;
a3470f
     ec_inode_t *ctx;
a3470f
     gf_boolean_t release = _gf_false;
a3470f
+    uint64_t waiting_flags = 0;
a3470f
+    uint64_t dirty[EC_VERSION_SIZE] = {0, 0};
a3470f
 
a3470f
-    lock = link->lock;
a3470f
-    parent = link->fop;
a3470f
+    lock = parent_link->lock;
a3470f
+    parent = parent_link->fop;
a3470f
     ctx = lock->ctx;
a3470f
 
a3470f
     INIT_LIST_HEAD(&list);
a3470f
+    waiting_flags = parent_link->waiting_flags & EC_XATTROP_ALL_WAITING_FLAGS;
a3470f
 
a3470f
     LOCK(&lock->loc.inode->lock);
a3470f
 
a3470f
     list_for_each_entry(link, &lock->owners, owner_list) {
a3470f
-        if ((link->fop->flags & EC_FLAG_WAITING_XATTROP) != 0) {
a3470f
-            link->fop->flags ^= EC_FLAG_WAITING_XATTROP;
a3470f
-
a3470f
-            list_add_tail(&link->fop->cbk_list, &list);
a3470f
+        if ((link->waiting_flags & waiting_flags) != 0) {
a3470f
+            link->waiting_flags ^= (link->waiting_flags & waiting_flags);
a3470f
+            if ((link->waiting_flags & EC_XATTROP_ALL_WAITING_FLAGS) == 0)
a3470f
+                    list_add_tail(&link->fop->cbk_list, &list);
a3470f
         }
a3470f
     }
a3470f
 
a3470f
@@ -991,8 +999,7 @@ ec_prepare_update_cbk (call_frame_t *frame, void *cookie,
a3470f
         goto unlock;
a3470f
     }
a3470f
 
a3470f
-    if (parent->flags & EC_FLAG_QUERY_METADATA) {
a3470f
-            parent->flags ^= EC_FLAG_QUERY_METADATA;
a3470f
+    if (waiting_flags & EC_FLAG_WAITING_XATTROP) {
a3470f
             op_errno = -ec_dict_del_array(dict, EC_XATTR_VERSION,
a3470f
                                           ctx->pre_version,
a3470f
                                           EC_VERSION_SIZE);
a3470f
@@ -1036,7 +1043,7 @@ ec_prepare_update_cbk (call_frame_t *frame, void *cookie,
a3470f
                         goto unlock;
a3470f
                     }
a3470f
                 } else {
a3470f
-                    if (!ec_config_check(parent, &ctx->config)) {
a3470f
+                    if (!ec_config_check(parent->xl, &ctx->config)) {
a3470f
                         gf_msg (this->name, GF_LOG_ERROR, EINVAL,
a3470f
                                 EC_MSG_CONFIG_XATTR_INVALID,
a3470f
                                 "Invalid config xattr");
a3470f
@@ -1051,12 +1058,22 @@ ec_prepare_update_cbk (call_frame_t *frame, void *cookie,
a3470f
             ctx->have_info = _gf_true;
a3470f
     }
a3470f
 
a3470f
-    ec_set_dirty_flag (fop->data, ctx, ctx->dirty);
a3470f
+    ec_set_dirty_flag (fop->data, ctx, dirty);
a3470f
+    if (dirty[EC_METADATA_TXN] &&
a3470f
+        (waiting_flags & EC_FLAG_WAITING_METADATA_DIRTY)) {
a3470f
+            GF_ASSERT (!ctx->dirty[EC_METADATA_TXN]);
a3470f
+            ctx->dirty[EC_METADATA_TXN] = 1;
a3470f
+    }
a3470f
+
a3470f
+    if (dirty[EC_DATA_TXN] &&
a3470f
+        (waiting_flags & EC_FLAG_WAITING_DATA_DIRTY)) {
a3470f
+            GF_ASSERT (!ctx->dirty[EC_DATA_TXN]);
a3470f
+            ctx->dirty[EC_DATA_TXN] = 1;
a3470f
+    }
a3470f
     op_errno = 0;
a3470f
 unlock:
a3470f
-    lock->getting_xattr = _gf_false;
a3470f
 
a3470f
-    UNLOCK(&lock->loc.inode->lock);
a3470f
+    lock->waiting_flags ^= waiting_flags;
a3470f
 
a3470f
     if (op_errno == 0) {
a3470f
         /* If the fop fails on any of the good bricks, it is important to mark
a3470f
@@ -1066,33 +1083,24 @@ unlock:
a3470f
                 release = _gf_true;
a3470f
         }
a3470f
 
a3470f
-        /* lock->release is a critical field that is checked and modified most
a3470f
-         * of the time inside a locked region. This use here is safe because we
a3470f
-         * are in a modifying fop and we currently don't allow two modifying
a3470f
-         * fops to be processed concurrently, so no one else could be checking
a3470f
-         * or modifying it.*/
a3470f
-        if (link->update[0] && !link->dirty[0]) {
a3470f
+        if (parent_link->update[0] && !parent_link->dirty[0]) {
a3470f
                 lock->release |= release;
a3470f
         }
a3470f
 
a3470f
-        if (link->update[1] && !link->dirty[1]) {
a3470f
+        if (parent_link->update[1] && !parent_link->dirty[1]) {
a3470f
                 lock->release |= release;
a3470f
         }
a3470f
 
a3470f
         /* We don't allow the main fop to be executed on bricks that have not
a3470f
          * succeeded the initial xattrop. */
a3470f
-        parent->mask &= fop->good;
a3470f
         ec_lock_update_good (lock, fop);
a3470f
 
a3470f
         /*As of now only data healing marks bricks as healing*/
a3470f
         lock->healing |= fop->healing;
a3470f
-        if (ec_is_data_fop (parent->id)) {
a3470f
-            parent->healing |= fop->healing;
a3470f
-        }
a3470f
-    } else {
a3470f
-        ec_fop_set_error(parent, op_errno);
a3470f
     }
a3470f
 
a3470f
+    UNLOCK(&lock->loc.inode->lock);
a3470f
+
a3470f
     while (!list_empty(&list)) {
a3470f
         tmp = list_entry(list.next, ec_fop_data_t, cbk_list);
a3470f
         list_del_init(&tmp->cbk_list);
a3470f
@@ -1104,16 +1112,50 @@ unlock:
a3470f
             if (ec_is_data_fop (tmp->id)) {
a3470f
                 tmp->healing |= fop->healing;
a3470f
             }
a3470f
-        } else {
a3470f
-            ec_fop_set_error(tmp, op_errno);
a3470f
         }
a3470f
 
a3470f
-        ec_resume(tmp, 0);
a3470f
+        ec_resume(tmp, op_errno);
a3470f
     }
a3470f
 
a3470f
     return 0;
a3470f
 }
a3470f
 
a3470f
+static uint64_t
a3470f
+ec_set_xattrop_flags_and_params (ec_lock_t *lock, ec_lock_link_t *link,
a3470f
+                                 uint64_t *dirty)
a3470f
+{
a3470f
+        uint64_t        oldflags = 0;
a3470f
+        uint64_t        newflags = 0;
a3470f
+        ec_inode_t *ctx     = lock->ctx;
a3470f
+
a3470f
+        oldflags = lock->waiting_flags & EC_XATTROP_ALL_WAITING_FLAGS;
a3470f
+
a3470f
+        if (lock->query && !ctx->have_info) {
a3470f
+                lock->waiting_flags |= EC_FLAG_WAITING_XATTROP;
a3470f
+                link->waiting_flags |= EC_FLAG_WAITING_XATTROP;
a3470f
+        }
a3470f
+
a3470f
+        if (dirty[EC_DATA_TXN]) {
a3470f
+                if (oldflags & EC_FLAG_WAITING_DATA_DIRTY) {
a3470f
+                        dirty[EC_DATA_TXN] = 0;
a3470f
+                } else {
a3470f
+                        lock->waiting_flags |= EC_FLAG_WAITING_DATA_DIRTY;
a3470f
+                }
a3470f
+                link->waiting_flags |= EC_FLAG_WAITING_DATA_DIRTY;
a3470f
+        }
a3470f
+
a3470f
+        if (dirty[EC_METADATA_TXN]) {
a3470f
+                if (oldflags & EC_FLAG_WAITING_METADATA_DIRTY) {
a3470f
+                        dirty[EC_METADATA_TXN] = 0;
a3470f
+                } else {
a3470f
+                        lock->waiting_flags |= EC_FLAG_WAITING_METADATA_DIRTY;
a3470f
+                }
a3470f
+                link->waiting_flags |= EC_FLAG_WAITING_METADATA_DIRTY;
a3470f
+        }
a3470f
+        newflags = lock->waiting_flags & EC_XATTROP_ALL_WAITING_FLAGS;
a3470f
+        return oldflags ^ newflags;
a3470f
+}
a3470f
+
a3470f
 void ec_get_size_version(ec_lock_link_t *link)
a3470f
 {
a3470f
     loc_t loc;
a3470f
@@ -1124,7 +1166,6 @@ void ec_get_size_version(ec_lock_link_t *link)
a3470f
     dict_t *xdata = NULL;
a3470f
     ec_t   *ec = NULL;
a3470f
     int32_t error = 0;
a3470f
-    gf_boolean_t getting_xattr;
a3470f
     gf_boolean_t set_dirty = _gf_false;
a3470f
     uint64_t allzero[EC_VERSION_SIZE] = {0, 0};
a3470f
     uint64_t dirty[EC_VERSION_SIZE] = {0, 0};
a3470f
@@ -1132,6 +1173,7 @@ void ec_get_size_version(ec_lock_link_t *link)
a3470f
     ctx = lock->ctx;
a3470f
     fop = link->fop;
a3470f
     ec  = fop->xl->private;
a3470f
+    uint64_t changed_flags = 0;
a3470f
 
a3470f
     if (ec->optimistic_changelog &&
a3470f
         !(ec->node_mask & ~link->lock->good_mask) && !ec_is_data_fop (fop->id))
a3470f
@@ -1159,19 +1201,20 @@ void ec_get_size_version(ec_lock_link_t *link)
a3470f
 
a3470f
     LOCK(&lock->loc.inode->lock);
a3470f
 
a3470f
-    getting_xattr = lock->getting_xattr;
a3470f
-    lock->getting_xattr = _gf_true;
a3470f
-    if (getting_xattr) {
a3470f
-        fop->flags |= EC_FLAG_WAITING_XATTROP;
a3470f
-
a3470f
-        ec_sleep(fop);
a3470f
+    changed_flags = ec_set_xattrop_flags_and_params (lock, link, dirty);
a3470f
+    if (link->waiting_flags) {
a3470f
+            /* This fop needs to wait until all its flags are cleared which
a3470f
+             * potentially can be cleared by other xattrops that are already
a3470f
+             * wound*/
a3470f
+            ec_sleep(fop);
a3470f
+    } else {
a3470f
+            GF_ASSERT (!changed_flags);
a3470f
     }
a3470f
 
a3470f
     UNLOCK(&lock->loc.inode->lock);
a3470f
 
a3470f
-    if (getting_xattr) {
a3470f
+    if (!changed_flags)
a3470f
         goto out;
a3470f
-    }
a3470f
 
a3470f
     dict = dict_new();
a3470f
     if (dict == NULL) {
a3470f
@@ -1179,17 +1222,7 @@ void ec_get_size_version(ec_lock_link_t *link)
a3470f
         goto out;
a3470f
     }
a3470f
 
a3470f
-    if (lock->loc.inode->ia_type == IA_IFREG ||
a3470f
-        lock->loc.inode->ia_type == IA_INVAL) {
a3470f
-            xdata = dict_new();
a3470f
-            if (xdata == NULL || dict_set_int32 (xdata, GF_GET_SIZE, 1)) {
a3470f
-                error = -ENOMEM;
a3470f
-                goto out;
a3470f
-            }
a3470f
-    }
a3470f
-
a3470f
-    if (lock->query && !ctx->have_info) {
a3470f
-            fop->flags |= EC_FLAG_QUERY_METADATA;
a3470f
+    if (changed_flags & EC_FLAG_WAITING_XATTROP) {
a3470f
             /* Once we know that an xattrop will be needed,
a3470f
              * we try to get all available information in a
a3470f
              * single call. */
a3470f
@@ -1208,9 +1241,17 @@ void ec_get_size_version(ec_lock_link_t *link)
a3470f
                 if (error != 0) {
a3470f
                     goto out;
a3470f
                 }
a3470f
+
a3470f
+                xdata = dict_new();
a3470f
+                if (xdata == NULL || dict_set_int32 (xdata, GF_GET_SIZE, 1)) {
a3470f
+                    error = -ENOMEM;
a3470f
+                    goto out;
a3470f
+                }
a3470f
+
a3470f
             }
a3470f
     }
a3470f
-    if (set_dirty) {
a3470f
+
a3470f
+    if (memcmp (allzero, dirty, sizeof (allzero))) {
a3470f
             error = ec_dict_set_array(dict, EC_XATTR_DIRTY, dirty,
a3470f
                                       EC_VERSION_SIZE);
a3470f
             if (error != 0) {
a3470f
@@ -1943,7 +1984,7 @@ int32_t ec_update_size_version_done(call_frame_t * frame, void * cookie,
a3470f
             ctx->have_size = _gf_true;
a3470f
         }
a3470f
         if ((ec_dict_del_config(xdata, EC_XATTR_CONFIG, &ctx->config) == 0) &&
a3470f
-            ec_config_check(fop->parent, &ctx->config)) {
a3470f
+            ec_config_check(fop->xl, &ctx->config)) {
a3470f
             ctx->have_config = _gf_true;
a3470f
         }
a3470f
 
a3470f
diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h
a3470f
index a03a590..8f5d20a 100644
a3470f
--- a/xlators/cluster/ec/src/ec-common.h
a3470f
+++ b/xlators/cluster/ec/src/ec-common.h
a3470f
@@ -27,9 +27,11 @@ typedef enum {
a3470f
 
a3470f
 #define EC_CONFIG_ALGORITHM 0
a3470f
 
a3470f
-#define EC_FLAG_LOCK_SHARED       0x0001
a3470f
-#define EC_FLAG_WAITING_XATTROP   0x0002
a3470f
-#define EC_FLAG_QUERY_METADATA    0x0004
a3470f
+#define EC_FLAG_LOCK_SHARED             0x0001
a3470f
+
a3470f
+#define EC_FLAG_WAITING_XATTROP         0x0001
a3470f
+#define EC_FLAG_WAITING_DATA_DIRTY      0x0002
a3470f
+#define EC_FLAG_WAITING_METADATA_DIRTY  0x0004
a3470f
 
a3470f
 #define EC_SELFHEAL_BIT 62
a3470f
 
a3470f
diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h
a3470f
index 3e93a1a..5601f96 100644
a3470f
--- a/xlators/cluster/ec/src/ec-types.h
a3470f
+++ b/xlators/cluster/ec/src/ec-types.h
a3470f
@@ -227,8 +227,8 @@ struct _ec_lock {
a3470f
     uintptr_t          healing;
a3470f
     uint32_t           refs_owners;  /* Refs for fops owning the lock */
a3470f
     uint32_t           refs_pending; /* Refs assigned to fops being prepared */
a3470f
+    uint32_t           waiting_flags; /*Track xattrop/dirty marking*/
a3470f
     gf_boolean_t       acquired;
a3470f
-    gf_boolean_t       getting_xattr;
a3470f
     gf_boolean_t       unlock_now;
a3470f
     gf_boolean_t       release;
a3470f
     gf_boolean_t       query;
a3470f
@@ -250,6 +250,7 @@ struct _ec_lock_link {
a3470f
     gf_boolean_t      optimistic_changelog;
a3470f
     loc_t            *base;
a3470f
     uint64_t          size;
a3470f
+    uint32_t          waiting_flags;
a3470f
 };
a3470f
 
a3470f
 struct _ec_fop_data {
a3470f
-- 
a3470f
1.8.3.1
a3470f