3604df
From 0d9aeed1092e7ce206a43e9fa29134de73731306 Mon Sep 17 00:00:00 2001
3604df
From: Ashish Pandey <aspandey@redhat.com>
3604df
Date: Thu, 13 Oct 2016 14:13:51 +0530
3604df
Subject: [PATCH 098/141] cluster/ec: set/unset dirty flag for data/metadata update
3604df
3604df
Currently, for all the update operations, metadata or data,
3604df
we set the dirty flag at the end of the operation only if
3604df
a brick is down. This leads to delay in healing and in some
3604df
cases not at all.
3604df
In this patch we set (+1) the dirty flag
3604df
at the start of the metadata or data update operations and
3604df
after successfull completion of the fop, we unset (-1) it again.
3604df
3604df
>Reviewed-on: http://review.gluster.org/13733
3604df
>Tested-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
3604df
>Smoke: Gluster Build System <jenkins@build.gluster.org>
3604df
>NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
3604df
>CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
3604df
>Reviewed-by: Xavier Hernandez <xhernandez@datalab.es>
3604df
>Signed-off-by: Ashish Pandey <aspandey@redhat.com>
3604df
3604df
Change-Id: Ide5668bdec7b937a61c5c840cdc79a967598e1e9
3604df
BUG: 1361513
3604df
Signed-off-by: Ashish Pandey <aspandey@redhat.com>
3604df
Reviewed-on: https://code.engineering.redhat.com/gerrit/87008
3604df
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
3604df
Tested-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
3604df
---
3604df
 tests/basic/ec/ec-background-heals.t |    1 +
3604df
 tests/basic/ec/ec-new-entry.t        |   14 ++
3604df
 xlators/cluster/ec/src/ec-common.c   |  284 +++++++++++++++++++---------------
3604df
 xlators/cluster/ec/src/ec-common.h   |    3 +-
3604df
 xlators/cluster/ec/src/ec-data.h     |    3 +-
3604df
 5 files changed, 181 insertions(+), 124 deletions(-)
3604df
3604df
diff --git a/tests/basic/ec/ec-background-heals.t b/tests/basic/ec/ec-background-heals.t
3604df
index 726e60d..7ac6c0e 100644
3604df
--- a/tests/basic/ec/ec-background-heals.t
3604df
+++ b/tests/basic/ec/ec-background-heals.t
3604df
@@ -23,6 +23,7 @@ EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
3604df
 EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "0" mount_get_option_value $M0 $V0-disperse-0 background-heals
3604df
 EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "0" mount_get_option_value $M0 $V0-disperse-0 heal-wait-qlength
3604df
 TEST touch $M0/a
3604df
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "0" get_pending_heal_count $V0 #One for each active brick
3604df
 TEST kill_brick $V0 $H0 $B0/${V0}2
3604df
 echo abc > $M0/a
3604df
 EXPECT 2 get_pending_heal_count $V0 #One for each active brick
3604df
diff --git a/tests/basic/ec/ec-new-entry.t b/tests/basic/ec/ec-new-entry.t
3604df
index 3a5c2ee..2ba2bf5 100644
3604df
--- a/tests/basic/ec/ec-new-entry.t
3604df
+++ b/tests/basic/ec/ec-new-entry.t
3604df
@@ -12,6 +12,17 @@ function get_md5sum {
3604df
         md5sum $1 | awk '{print $1}'
3604df
 }
3604df
 
3604df
+#after replace-brick immediately trusted.ec.version will be absent, so if it
3604df
+#is present we can assume that heal attempted on root
3604df
+function root_heal_attempted {
3604df
+        if [ -z $(get_hex_xattr trusted.ec.version $1) ];
3604df
+        then
3604df
+                echo "N";
3604df
+        else
3604df
+                echo "Y";
3604df
+        fi
3604df
+}
3604df
+
3604df
 TEST glusterd
3604df
 TEST pidof glusterd
3604df
 TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
3604df
@@ -23,6 +34,9 @@ touch $M0/11
3604df
 for i in {1..10}; do dd if=/dev/zero of=$M0/$i bs=1M count=1; done
3604df
 TEST $CLI volume replace-brick $V0 $H0:$B0/${V0}5 $H0:$B0/${V0}6 commit force
3604df
 EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
3604df
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
3604df
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count_shd $V0 0
3604df
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" root_heal_attempted $B0/${V0}6
3604df
 EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
3604df
 #ls -l gives "Total" line so number of lines will be 1 more
3604df
 EXPECT "^12$" num_entries $B0/${V0}6
3604df
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
3604df
index 2e6759a..6ff87ad 100644
3604df
--- a/xlators/cluster/ec/src/ec-common.c
3604df
+++ b/xlators/cluster/ec/src/ec-common.c
3604df
@@ -886,6 +886,27 @@ ec_config_check (ec_fop_data_t *fop, ec_config_t *config)
3604df
     return _gf_true;
3604df
 }
3604df
 
3604df
+gf_boolean_t
3604df
+ec_set_dirty_flag (ec_lock_link_t *link, ec_inode_t *ctx, uint64_t *dirty)
3604df
+{
3604df
+
3604df
+    gf_boolean_t set_dirty = _gf_false;
3604df
+
3604df
+    if (link->update[EC_DATA_TXN] && !ctx->dirty[EC_DATA_TXN]) {
3604df
+                dirty[EC_DATA_TXN] = 1;
3604df
+    }
3604df
+
3604df
+    if (link->update[EC_METADATA_TXN] && !ctx->dirty[EC_METADATA_TXN]) {
3604df
+                dirty[EC_METADATA_TXN] = 1;
3604df
+    }
3604df
+
3604df
+    if (dirty[EC_METADATA_TXN] || dirty[EC_DATA_TXN]) {
3604df
+        set_dirty = _gf_true;
3604df
+    }
3604df
+
3604df
+    return set_dirty;
3604df
+}
3604df
+
3604df
 int32_t
3604df
 ec_prepare_update_cbk (call_frame_t *frame, void *cookie,
3604df
                        xlator_t *this, int32_t op_ret, int32_t op_errno,
3604df
@@ -906,8 +927,8 @@ ec_prepare_update_cbk (call_frame_t *frame, void *cookie,
3604df
     LOCK(&lock->loc.inode->lock);
3604df
 
3604df
     list_for_each_entry(link, &lock->owners, owner_list) {
3604df
-        if ((link->fop->flags & EC_FLAG_WAITING_SIZE) != 0) {
3604df
-            link->fop->flags ^= EC_FLAG_WAITING_SIZE;
3604df
+        if ((link->fop->flags & EC_FLAG_WAITING_XATTROP) != 0) {
3604df
+            link->fop->flags ^= EC_FLAG_WAITING_XATTROP;
3604df
 
3604df
             list_add_tail(&link->fop->cbk_list, &list);
3604df
         }
3604df
@@ -921,68 +942,70 @@ ec_prepare_update_cbk (call_frame_t *frame, void *cookie,
3604df
         goto unlock;
3604df
     }
3604df
 
3604df
-    op_errno = -ec_dict_del_array(dict, EC_XATTR_VERSION, ctx->pre_version,
3604df
-                                  EC_VERSION_SIZE);
3604df
-    if (op_errno != 0) {
3604df
-        gf_msg (this->name, GF_LOG_ERROR, op_errno,
3604df
-                EC_MSG_VER_XATTR_GET_FAIL,
3604df
-                "Unable to get version xattr");
3604df
-
3604df
-        goto unlock;
3604df
-    }
3604df
-    ctx->post_version[0] += ctx->pre_version[0];
3604df
-    ctx->post_version[1] += ctx->pre_version[1];
3604df
-
3604df
-    ctx->have_version = _gf_true;
3604df
-
3604df
-    if (lock->loc.inode->ia_type == IA_IFREG ||
3604df
-        lock->loc.inode->ia_type == IA_INVAL) {
3604df
-        op_errno = -ec_dict_del_number(dict, EC_XATTR_SIZE, &ctx->pre_size);
3604df
-        if (op_errno != 0) {
3604df
-            if (lock->loc.inode->ia_type == IA_IFREG) {
3604df
+    if (parent->flags & EC_FLAG_QUERY_METADATA) {
3604df
+            parent->flags ^= EC_FLAG_QUERY_METADATA;
3604df
+            op_errno = -ec_dict_del_array(dict, EC_XATTR_VERSION,
3604df
+                                          ctx->pre_version,
3604df
+                                          EC_VERSION_SIZE);
3604df
+            if (op_errno != 0) {
3604df
                 gf_msg (this->name, GF_LOG_ERROR, op_errno,
3604df
-                        EC_MSG_SIZE_XATTR_GET_FAIL,
3604df
-                        "Unable to get size xattr");
3604df
-
3604df
+                        EC_MSG_VER_XATTR_GET_FAIL,
3604df
+                        "Unable to get version xattr");
3604df
                 goto unlock;
3604df
             }
3604df
-        } else {
3604df
-            ctx->post_size = ctx->pre_size;
3604df
+            ctx->post_version[0] += ctx->pre_version[0];
3604df
+            ctx->post_version[1] += ctx->pre_version[1];
3604df
 
3604df
-            ctx->have_size = _gf_true;
3604df
-        }
3604df
+            ctx->have_version = _gf_true;
3604df
 
3604df
-        op_errno = -ec_dict_del_config(dict, EC_XATTR_CONFIG, &ctx->config);
3604df
-        if (op_errno != 0) {
3604df
-            if ((lock->loc.inode->ia_type == IA_IFREG) ||
3604df
-                (op_errno != ENODATA)) {
3604df
-                gf_msg (this->name, GF_LOG_ERROR, op_errno,
3604df
-                        EC_MSG_CONFIG_XATTR_GET_FAIL,
3604df
-                        "Unable to get config xattr");
3604df
+            if (lock->loc.inode->ia_type == IA_IFREG ||
3604df
+                lock->loc.inode->ia_type == IA_INVAL) {
3604df
+                op_errno = -ec_dict_del_number(dict, EC_XATTR_SIZE,
3604df
+                                               &ctx->pre_size);
3604df
+                if (op_errno != 0) {
3604df
+                    if (lock->loc.inode->ia_type == IA_IFREG) {
3604df
+                        gf_msg (this->name, GF_LOG_ERROR, op_errno,
3604df
+                                EC_MSG_SIZE_XATTR_GET_FAIL,
3604df
+                                "Unable to get size xattr");
3604df
+                        goto unlock;
3604df
+                    }
3604df
+                } else {
3604df
+                    ctx->post_size = ctx->pre_size;
3604df
 
3604df
-                goto unlock;
3604df
-            }
3604df
-        } else {
3604df
-            if (!ec_config_check(parent, &ctx->config)) {
3604df
-                gf_msg (this->name, GF_LOG_ERROR, EINVAL,
3604df
-                        EC_MSG_CONFIG_XATTR_INVALID,
3604df
-                        "Invalid config xattr");
3604df
+                    ctx->have_size = _gf_true;
3604df
+                }
3604df
 
3604df
-                op_errno = EINVAL;
3604df
+                op_errno = -ec_dict_del_config(dict, EC_XATTR_CONFIG,
3604df
+                                               &ctx->config);
3604df
+                if (op_errno != 0) {
3604df
+                    if ((lock->loc.inode->ia_type == IA_IFREG) ||
3604df
+                        (op_errno != ENODATA)) {
3604df
+                        gf_msg (this->name, GF_LOG_ERROR, op_errno,
3604df
+                                EC_MSG_CONFIG_XATTR_GET_FAIL,
3604df
+                                "Unable to get config xattr");
3604df
+
3604df
+                        goto unlock;
3604df
+                    }
3604df
+                } else {
3604df
+                    if (!ec_config_check(parent, &ctx->config)) {
3604df
+                        gf_msg (this->name, GF_LOG_ERROR, EINVAL,
3604df
+                                EC_MSG_CONFIG_XATTR_INVALID,
3604df
+                                "Invalid config xattr");
3604df
 
3604df
-                goto unlock;
3604df
-            }
3604df
+                        op_errno = EINVAL;
3604df
 
3604df
-            ctx->have_config = _gf_true;
3604df
-        }
3604df
+                        goto unlock;
3604df
+                    }
3604df
+                    ctx->have_config = _gf_true;
3604df
+                }
3604df
+            }
3604df
+            ctx->have_info = _gf_true;
3604df
     }
3604df
 
3604df
-    ctx->have_info = _gf_true;
3604df
-
3604df
+    ec_set_dirty_flag (fop->data, ctx, ctx->dirty);
3604df
     op_errno = 0;
3604df
-
3604df
 unlock:
3604df
-    lock->getting_size = _gf_false;
3604df
+    lock->getting_xattr = _gf_false;
3604df
 
3604df
     UNLOCK(&lock->loc.inode->lock);
3604df
 
3604df
@@ -1029,16 +1052,19 @@ void ec_get_size_version(ec_lock_link_t *link)
3604df
     ec_inode_t *ctx;
3604df
     ec_fop_data_t *fop;
3604df
     dict_t *dict = NULL;
3604df
-    int32_t error = -ENOMEM;
3604df
-    gf_boolean_t getting_size;
3604df
+    int32_t error = 0;
3604df
+    gf_boolean_t getting_xattr;
3604df
+    gf_boolean_t set_dirty = _gf_false;
3604df
     uint64_t allzero[EC_VERSION_SIZE] = {0, 0};
3604df
-
3604df
+    uint64_t dirty[EC_VERSION_SIZE] = {0, 0};
3604df
     lock = link->lock;
3604df
     ctx = lock->ctx;
3604df
     fop = link->fop;
3604df
 
3604df
+    set_dirty = ec_set_dirty_flag (link, ctx, dirty);
3604df
+
3604df
     /* If ec metadata has already been retrieved, do not try again. */
3604df
-    if (ctx->have_info) {
3604df
+    if (ctx->have_info && (!set_dirty)) {
3604df
         if (ec_is_data_fop (fop->id)) {
3604df
             fop->healing |= lock->healing;
3604df
         }
3604df
@@ -1047,58 +1073,63 @@ void ec_get_size_version(ec_lock_link_t *link)
3604df
 
3604df
     /* Determine if there's something we need to retrieve for the current
3604df
      * operation. */
3604df
-    if (!lock->query &&
3604df
+    if (!set_dirty && !lock->query &&
3604df
         (lock->loc.inode->ia_type != IA_IFREG) &&
3604df
         (lock->loc.inode->ia_type != IA_INVAL)) {
3604df
-        return;
3604df
+            return;
3604df
     }
3604df
 
3604df
     memset(&loc, 0, sizeof(loc));
3604df
 
3604df
     LOCK(&lock->loc.inode->lock);
3604df
 
3604df
-    getting_size = lock->getting_size;
3604df
-    lock->getting_size = _gf_true;
3604df
-    if (getting_size) {
3604df
-        fop->flags |= EC_FLAG_WAITING_SIZE;
3604df
+    getting_xattr = lock->getting_xattr;
3604df
+    lock->getting_xattr = _gf_true;
3604df
+    if (getting_xattr) {
3604df
+        fop->flags |= EC_FLAG_WAITING_XATTROP;
3604df
 
3604df
         ec_sleep(fop);
3604df
     }
3604df
 
3604df
     UNLOCK(&lock->loc.inode->lock);
3604df
 
3604df
-    if (getting_size) {
3604df
-        error = 0;
3604df
-
3604df
+    if (getting_xattr) {
3604df
         goto out;
3604df
     }
3604df
 
3604df
     dict = dict_new();
3604df
     if (dict == NULL) {
3604df
+        error = -ENOMEM;
3604df
         goto out;
3604df
     }
3604df
+    if (lock->query && !ctx->have_info) {
3604df
+            fop->flags |= EC_FLAG_QUERY_METADATA;
3604df
+            /* Once we know that an xattrop will be needed,
3604df
+             * we try to get all available information in a
3604df
+             * single call. */
3604df
+            error = ec_dict_set_array(dict, EC_XATTR_VERSION, allzero,
3604df
+                                      EC_VERSION_SIZE);
3604df
+            if (error != 0) {
3604df
+                goto out;
3604df
+            }
3604df
 
3604df
-    /* Once we know that an xattrop will be needed, we try to get all available
3604df
-     * information in a single call. */
3604df
-    error = ec_dict_set_array(dict, EC_XATTR_VERSION, allzero,
3604df
-                              EC_VERSION_SIZE);
3604df
-    if (error == 0) {
3604df
-        error = ec_dict_set_array(dict, EC_XATTR_DIRTY, allzero,
3604df
-                                  EC_VERSION_SIZE);
3604df
-    }
3604df
-    if (error != 0) {
3604df
-        goto out;
3604df
+            if (lock->loc.inode->ia_type == IA_IFREG ||
3604df
+                lock->loc.inode->ia_type == IA_INVAL) {
3604df
+                error = ec_dict_set_number(dict, EC_XATTR_SIZE, 0);
3604df
+                if (error == 0) {
3604df
+                    error = ec_dict_set_number(dict, EC_XATTR_CONFIG, 0);
3604df
+                }
3604df
+                if (error != 0) {
3604df
+                    goto out;
3604df
+                }
3604df
+            }
3604df
     }
3604df
-
3604df
-    if (lock->loc.inode->ia_type == IA_IFREG ||
3604df
-        lock->loc.inode->ia_type == IA_INVAL) {
3604df
-        error = ec_dict_set_number(dict, EC_XATTR_SIZE, 0);
3604df
-        if (error == 0) {
3604df
-            error = ec_dict_set_number(dict, EC_XATTR_CONFIG, 0);
3604df
-        }
3604df
-        if (error != 0) {
3604df
-            goto out;
3604df
-        }
3604df
+    if (set_dirty) {
3604df
+            error = ec_dict_set_array(dict, EC_XATTR_DIRTY, dirty,
3604df
+                                      EC_VERSION_SIZE);
3604df
+            if (error != 0) {
3604df
+                goto out;
3604df
+            }
3604df
     }
3604df
 
3604df
     fop->frame->root->uid = 0;
3604df
@@ -1634,15 +1665,9 @@ ec_lock_next_owner(ec_lock_link_t *link, ec_cbk_data_t *cbk,
3604df
     if ((fop->error == 0) && (cbk != NULL) && (cbk->op_ret >= 0)) {
3604df
         if (link->update[0]) {
3604df
             ctx->post_version[0]++;
3604df
-            if (ec->node_mask & ~fop->good) {
3604df
-                ctx->dirty[0]++;
3604df
-            }
3604df
         }
3604df
         if (link->update[1]) {
3604df
             ctx->post_version[1]++;
3604df
-            if (ec->node_mask & ~fop->good) {
3604df
-                ctx->dirty[1]++;
3604df
-            }
3604df
         }
3604df
     }
3604df
 
3604df
@@ -1764,11 +1789,11 @@ void ec_unlock_lock(ec_lock_link_t *link)
3604df
     lock = link->lock;
3604df
     fop = link->fop;
3604df
 
3604df
+    lock->unlock_now = _gf_false;
3604df
     ec_clear_inode_info(fop, lock->loc.inode);
3604df
 
3604df
     if ((lock->mask != 0) && lock->acquired) {
3604df
         ec_owner_set(fop->frame, lock);
3604df
-
3604df
         lock->flock.l_type = F_UNLCK;
3604df
         ec_trace("UNLOCK_INODELK", fop, "lock=%p, inode=%p", lock,
3604df
                  lock->loc.inode);
3604df
@@ -1791,15 +1816,16 @@ int32_t ec_update_size_version_done(call_frame_t * frame, void * cookie,
3604df
     ec_lock_t *lock;
3604df
     ec_inode_t *ctx;
3604df
 
3604df
+    link = fop->data;
3604df
+    lock = link->lock;
3604df
+    ctx = lock->ctx;
3604df
+
3604df
     if (op_ret < 0) {
3604df
         gf_msg(fop->xl->name, fop_log_level (fop->id, op_errno), op_errno,
3604df
                EC_MSG_SIZE_VERS_UPDATE_FAIL,
3604df
                "Failed to update version and size");
3604df
     } else {
3604df
         fop->parent->good &= fop->good;
3604df
-        link = fop->data;
3604df
-        lock = link->lock;
3604df
-        ctx = lock->ctx;
3604df
 
3604df
         ec_lock_update_good(lock, fop);
3604df
 
3604df
@@ -1822,10 +1848,11 @@ int32_t ec_update_size_version_done(call_frame_t * frame, void * cookie,
3604df
 
3604df
         ctx->have_info = _gf_true;
3604df
     }
3604df
-
3604df
-    if ((fop->parent->id != GF_FOP_FLUSH) &&
3604df
-        (fop->parent->id != GF_FOP_FSYNC) &&
3604df
-        (fop->parent->id != GF_FOP_FSYNCDIR)) {
3604df
+    /* If we are here because of fop's and other than unlock request,
3604df
+     * that means we are still holding a lock. That make sure
3604df
+     * lock->unlock_now can not be modified.
3604df
+     */
3604df
+    if (lock->unlock_now) {
3604df
         ec_unlock_lock(fop->data);
3604df
     }
3604df
 
3604df
@@ -1843,6 +1870,9 @@ ec_update_size_version(ec_lock_link_t *link, uint64_t *version,
3604df
     int32_t err = -ENOMEM;
3604df
 
3604df
     fop = link->fop;
3604df
+    ec_t *ec = fop->xl->private;
3604df
+    lock = link->lock;
3604df
+    ctx = lock->ctx;
3604df
 
3604df
     ec_trace("UPDATE", fop, "version=%ld/%ld, size=%ld, dirty=%ld/%ld",
3604df
              version[0], version[1], size, dirty[0], dirty[1]);
3604df
@@ -1852,9 +1882,6 @@ ec_update_size_version(ec_lock_link_t *link, uint64_t *version,
3604df
         goto out;
3604df
     }
3604df
 
3604df
-    lock = link->lock;
3604df
-    ctx = lock->ctx;
3604df
-
3604df
     /* If we don't have version information or it has been modified, we
3604df
      * update it. */
3604df
     if (!ctx->have_version || (version[0] != 0) || (version[1] != 0)) {
3604df
@@ -1866,8 +1893,8 @@ ec_update_size_version(ec_lock_link_t *link, uint64_t *version,
3604df
     }
3604df
 
3604df
     if (size != 0) {
3604df
-        /* If size has been changed, we should already know the previous size
3604df
-         * of the file. */
3604df
+        /* If size has been changed, we should already
3604df
+         * know the previous size of the file. */
3604df
         GF_ASSERT(ctx->have_size);
3604df
 
3604df
         err = ec_dict_set_number(dict, EC_XATTR_SIZE, size);
3604df
@@ -1876,13 +1903,12 @@ ec_update_size_version(ec_lock_link_t *link, uint64_t *version,
3604df
         }
3604df
     }
3604df
 
3604df
-    /* If we don't have dirty information or it has been modified, we update
3604df
-     * it. */
3604df
-    if ((dirty[0] != 0) || (dirty[1] != 0)) {
3604df
-        err = ec_dict_set_array(dict, EC_XATTR_DIRTY, dirty, EC_VERSION_SIZE);
3604df
-        if (err != 0) {
3604df
-            goto out;
3604df
-        }
3604df
+    if (dirty[0] || dirty[1]) {
3604df
+            err = ec_dict_set_array(dict, EC_XATTR_DIRTY,
3604df
+                                    dirty, EC_VERSION_SIZE);
3604df
+            if (err != 0) {
3604df
+                goto out;
3604df
+            }
3604df
     }
3604df
 
3604df
     /* If config information is not known, we request it now. */
3604df
@@ -1922,9 +1948,7 @@ out:
3604df
     gf_msg (fop->xl->name, GF_LOG_ERROR, -err, EC_MSG_SIZE_VERS_UPDATE_FAIL,
3604df
             "Unable to update version and size");
3604df
 
3604df
-    if ((fop->parent->id != GF_FOP_FLUSH) &&
3604df
-        (fop->parent->id != GF_FOP_FSYNC) &&
3604df
-        (fop->parent->id != GF_FOP_FSYNCDIR)) {
3604df
+    if (lock->unlock_now) {
3604df
         ec_unlock_lock(fop->data);
3604df
     }
3604df
 
3604df
@@ -1935,28 +1959,36 @@ ec_update_info(ec_lock_link_t *link)
3604df
 {
3604df
     ec_lock_t *lock;
3604df
     ec_inode_t *ctx;
3604df
-    uint64_t version[2];
3604df
-    uint64_t dirty[2];
3604df
+    uint64_t version[2] = {0, 0};
3604df
+    uint64_t dirty[2] = {0, 0};
3604df
     uint64_t size;
3604df
+    ec_t *ec = NULL;
3604df
 
3604df
     lock = link->lock;
3604df
     ctx = lock->ctx;
3604df
+    ec = link->fop->xl->private;
3604df
 
3604df
     /* pre_version[*] will be 0 if have_version is false */
3604df
     version[0] = ctx->post_version[0] - ctx->pre_version[0];
3604df
     version[1] = ctx->post_version[1] - ctx->pre_version[1];
3604df
 
3604df
     size = ctx->post_size - ctx->pre_size;
3604df
-
3604df
-    dirty[0] = ctx->dirty[0];
3604df
-    dirty[1] = ctx->dirty[1];
3604df
-    /*Dirty is not combined so just reset it right here*/
3604df
-    memset(ctx->dirty, 0, sizeof(ctx->dirty));
3604df
-
3604df
+    /* If we set the dirty flag for update fop, we have to unset it.
3604df
+     * If fop has failed on some bricks, leave the dirty as marked. */
3604df
+    if (lock->unlock_now) {
3604df
+            if (!(ec->node_mask & ~lock->good_mask)) {
3604df
+                    if (ctx->dirty[0] != 0) {
3604df
+                        dirty[0] = -1;
3604df
+                    }
3604df
+                    if (ctx->dirty[1] != 0) {
3604df
+                        dirty[1] = -1;
3604df
+                    }
3604df
+            }
3604df
+            memset(ctx->dirty, 0, sizeof(ctx->dirty));
3604df
+    }
3604df
     if ((version[0] != 0) || (version[1] != 0) ||
3604df
         (dirty[0] != 0) || (dirty[1] != 0)) {
3604df
         ec_update_size_version(link, version, size, dirty);
3604df
-
3604df
         return _gf_true;
3604df
     }
3604df
 
3604df
@@ -1966,7 +1998,15 @@ ec_update_info(ec_lock_link_t *link)
3604df
 void
3604df
 ec_unlock_now(ec_lock_link_t *link)
3604df
 {
3604df
+    ec_lock_t *lock;
3604df
+    lock = link->lock;
3604df
+
3604df
     ec_trace("UNLOCK_NOW", link->fop, "lock=%p", link->lock);
3604df
+    /*At this point, lock is not being used by any fop and
3604df
+     *can not be reused by any fop as it is going to be released.
3604df
+     *lock->unlock_now can not be modified at any other place.
3604df
+     */
3604df
+    lock->unlock_now = _gf_true;
3604df
 
3604df
     if (!ec_update_info(link)) {
3604df
         ec_unlock_lock(link);
3604df
diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h
3604df
index 8e724a8..d720d24 100644
3604df
--- a/xlators/cluster/ec/src/ec-common.h
3604df
+++ b/xlators/cluster/ec/src/ec-common.h
3604df
@@ -28,7 +28,8 @@ typedef enum {
3604df
 #define EC_CONFIG_ALGORITHM 0
3604df
 
3604df
 #define EC_FLAG_LOCK_SHARED       0x0001
3604df
-#define EC_FLAG_WAITING_SIZE      0x0002
3604df
+#define EC_FLAG_WAITING_XATTROP   0x0002
3604df
+#define EC_FLAG_QUERY_METADATA    0x0004
3604df
 
3604df
 #define EC_SELFHEAL_BIT 62
3604df
 
3604df
diff --git a/xlators/cluster/ec/src/ec-data.h b/xlators/cluster/ec/src/ec-data.h
3604df
index 4a2a11f..a207b11 100644
3604df
--- a/xlators/cluster/ec/src/ec-data.h
3604df
+++ b/xlators/cluster/ec/src/ec-data.h
3604df
@@ -163,7 +163,8 @@ struct _ec_lock
3604df
     uint32_t           refs_owners;  /* Refs for fops owning the lock */
3604df
     uint32_t           refs_pending; /* Refs assigned to fops being prepared */
3604df
     gf_boolean_t       acquired;
3604df
-    gf_boolean_t       getting_size;
3604df
+    gf_boolean_t       getting_xattr;
3604df
+    gf_boolean_t       unlock_now;
3604df
     gf_boolean_t       release;
3604df
     gf_boolean_t       query;
3604df
     fd_t              *fd;
3604df
-- 
3604df
1.7.1
3604df