From ca3909276d32698f7011b5ba82580d966b24f9a6 Mon Sep 17 00:00:00 2001
From: CentOS Sources <>
Date: May 20 2020 14:24:16 +0000
Subject: import glusterfs-6.0-29.el7


diff --git a/.glusterfs.metadata b/.glusterfs.metadata
index bd41365..98d5fc3 100644
--- a/.glusterfs.metadata
+++ b/.glusterfs.metadata
@@ -1 +1 @@
-bf1d8624cb45d10cf4ebf43bf7d3dc53dd55485a SOURCES/glusterfs-6.0.tar.gz
+c9d75f37e00502a10f64cd4ba9aafb17552e0800 SOURCES/glusterfs-6.0.tar.gz
diff --git a/README.debrand b/README.debrand
deleted file mode 100644
index 01c46d2..0000000
--- a/README.debrand
+++ /dev/null
@@ -1,2 +0,0 @@
-Warning: This package was configured for automatic debranding, but the changes
-failed to apply.
diff --git a/SOURCES/0277-geo-rep-Fix-Config-Get-Race.patch b/SOURCES/0277-geo-rep-Fix-Config-Get-Race.patch
new file mode 100644
index 0000000..45dada1
--- /dev/null
+++ b/SOURCES/0277-geo-rep-Fix-Config-Get-Race.patch
@@ -0,0 +1,109 @@
+From f40570f2f784dc61edb061a4931dcfc16bf51e7e Mon Sep 17 00:00:00 2001
+From: Aravinda VK <>
+Date: Mon, 5 Aug 2019 19:00:21 +0530
+Subject: [PATCH 277/284] geo-rep: Fix Config Get Race
+When two threads(sync jobs) in Geo-rep worker calls `gconf.get` and
+`gconf.getr`(realtime) at the sametime, `getr` resets the conf object
+and other one gets None. Thread Lock is introduced to fix the issue.
+  File "/usr/libexec/glusterfs/python/syncdaemon/",
+  line 368, in twrap
+    tf(*aargs)
+  File "/usr/libexec/glusterfs/python/syncdaemon/", line 1987,
+  in syncjob
+    po = self.sync_engine(pb, self.log_err)
+  File "/usr/libexec/glusterfs/python/syncdaemon/",
+  line 1444, in rsync
+    rconf.ssh_ctl_args + \
+AttributeError: 'NoneType' object has no attribute 'split'
+Backport of:
+ > Patch:
+ > Change-Id: I9c245e5c36338265354e158f5baa32b119eb2da5
+ > Updates: bz#1737484
+ > Signed-off-by: Aravinda VK <>
+Change-Id: I9c245e5c36338265354e158f5baa32b119eb2da5
+BUG: 1729915
+Signed-off-by: Kotresh HR <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <>
+ geo-replication/syncdaemon/ | 27 +++++++++++++++++++++------
+ 1 file changed, 21 insertions(+), 6 deletions(-)
+diff --git a/geo-replication/syncdaemon/ b/geo-replication/syncdaemon/
+index 1fc451f..38f3594 100644
+--- a/geo-replication/syncdaemon/
++++ b/geo-replication/syncdaemon/
+@@ -17,6 +17,7 @@ import os
+ import shutil
+ from string import Template
+ from datetime import datetime
++from threading import Lock
+ # Global object which can be used in other modules
+@@ -35,6 +36,7 @@ class GconfInvalidValue(Exception):
+ class Gconf(object):
+     def __init__(self, default_conf_file, custom_conf_file=None,
+                  args={}, extra_tmpl_args={}, override_from_args=False):
++        self.lock = Lock()
+         self.default_conf_file = default_conf_file
+         self.custom_conf_file = custom_conf_file
+         self.tmp_conf_file = None
+@@ -163,6 +165,11 @@ class Gconf(object):
+         if value is not None and not self._is_valid_value(name, value):
+             raise GconfInvalidValue()
++    def _load_with_lock(self):
++        with self.lock:
++            self._load()
+     def _load(self):
+         self.gconf = {}
+         self.template_conf = []
+@@ -230,12 +237,19 @@ class Gconf(object):
+         self._tmpl_substitute()
+         self._do_typecast()
+-    def reload(self):
++    def reload(self, with_lock=True):
+         if self._is_config_changed():
+-            self._load()
++            if with_lock:
++                self._load_with_lock()
++            else:
++                self._load()
+-    def get(self, name, default_value=None):
+-        return self.gconf.get(name, default_value)
++    def get(self, name, default_value=None, with_lock=True):
++        if with_lock:
++            with self.lock:
++                return self.gconf.get(name, default_value)
++        else:
++            return self.gconf.get(name, default_value)
+     def getall(self, show_defaults=False, show_non_configurable=False):
+         cnf = {}
+@@ -276,8 +290,9 @@ class Gconf(object):
+         return cnf
+     def getr(self, name, default_value=None):
+-        self.reload()
+-        return self.get(name, default_value)
++        with self.lock:
++            self.reload(with_lock=False)
++            return self.get(name, default_value, with_lock=False)
+     def get_help(self, name=None):
+         pass
diff --git a/SOURCES/0278-geo-rep-Fix-worker-connection-issue.patch b/SOURCES/0278-geo-rep-Fix-worker-connection-issue.patch
new file mode 100644
index 0000000..00cb48f
--- /dev/null
+++ b/SOURCES/0278-geo-rep-Fix-worker-connection-issue.patch
@@ -0,0 +1,45 @@
+From 924a25990948c9d76001cf4134fc5a2fcbf5c02c Mon Sep 17 00:00:00 2001
+From: Kotresh HR <>
+Date: Fri, 16 Aug 2019 15:38:49 +0530
+Subject: [PATCH 278/284] geo-rep: Fix worker connection issue
+All the workers connects to primary slave node. It should
+connect to available slave nodes in round robin fashion
+and choose different slave node if the corresponding slave
+node is down. This patch fixes the same.
+Thanks Aravinda for the help in root causing this.
+Backport of:
+ > Patch:
+ > Change-Id: I9f8e7744f4adb8a24833cf173681d109710f98cb
+ > Signed-off-by: Kotresh HR <>
+ > Updates: bz#1737484
+Change-Id: I9f8e7744f4adb8a24833cf173681d109710f98cb
+Signed-off-by: Kotresh HR <>
+BUG: 1729915
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <>
+ geo-replication/syncdaemon/ | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+diff --git a/geo-replication/syncdaemon/ b/geo-replication/syncdaemon/
+index 4ece7e0..8de7db2 100644
+--- a/geo-replication/syncdaemon/
++++ b/geo-replication/syncdaemon/
+@@ -73,7 +73,8 @@ def subcmd_worker(args):
+     Popen.init_errhandler()
+     fcntl.fcntl(args.feedback_fd, fcntl.F_SETFD, fcntl.FD_CLOEXEC)
+     local = GLUSTER("localhost", args.master)
+-    slavehost, slavevol = args.slave.split("::")
++    slavevol = args.slave.split("::")[-1]
++    slavehost = args.resource_remote
+     remote = SSH(slavehost, slavevol)
+     remote.connect_remote()
+     local.connect()
diff --git a/SOURCES/0279-posix-In-brick_mux-brick-is-crashed-while-start-stop.patch b/SOURCES/0279-posix-In-brick_mux-brick-is-crashed-while-start-stop.patch
new file mode 100644
index 0000000..3bbd56c
--- /dev/null
+++ b/SOURCES/0279-posix-In-brick_mux-brick-is-crashed-while-start-stop.patch
@@ -0,0 +1,253 @@
+From bf24623765817ede84ea47f3265f5e6c2ae17ee7 Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <>
+Date: Tue, 16 Jul 2019 20:36:57 +0530
+Subject: [PATCH 279/284] posix: In brick_mux brick is crashed while start/stop
+ volume in loop
+Problem: In brick_mux environment sometime brick is crashed while
+         volume stop/start in a loop.Brick is crashed in janitor task
+         at the time of accessing priv.If posix priv is cleaned up before
+         call janitor task then janitor task is crashed.
+Solution: To avoid the crash in brick_mux environment introduce a new
+          flag janitor_task_stop in posix_private and before send CHILD_DOWN event
+          wait for update the flag by janitor_task_done
+> Change-Id: Id9fa5d183a463b2b682774ab5cb9868357d139a4
+> fixes: bz#1730409
+> Signed-off-by: Mohit Agrawal <>
+> (Cherry picked from commit f138d3fa2237e7fa940ecf17153fd700350c4138)
+> (Reviewed on upstream link
+Change-Id: Id9fa5d183a463b2b682774ab5cb9868357d139a4
+fixex: bz#1729971
+Signed-off-by: Mohit Agrawal <>
+Tested-by: Mohit Agrawal <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <>
+ libglusterfs/src/glusterfs/xlator.h        |  3 +++
+ xlators/mgmt/glusterd/src/glusterd-utils.c |  5 ++--
+ xlators/protocol/server/src/server.c       |  6 ++++-
+ xlators/storage/posix/src/posix-common.c   | 40 +++++++++++++++++++++++++++++-
+ xlators/storage/posix/src/posix-helpers.c  | 16 ++++++++++++
+ xlators/storage/posix/src/posix.h          |  3 +++
+ 6 files changed, 69 insertions(+), 4 deletions(-)
+diff --git a/libglusterfs/src/glusterfs/xlator.h b/libglusterfs/src/glusterfs/xlator.h
+index b78daad..da551e9 100644
+--- a/libglusterfs/src/glusterfs/xlator.h
++++ b/libglusterfs/src/glusterfs/xlator.h
+@@ -861,6 +861,9 @@ struct _xlator {
+     /* Flag to notify got CHILD_DOWN event for detach brick */
+     uint32_t notify_down;
++    /* Flag to avoid throw duplicate PARENT_DOWN event */
++    uint32_t parent_down;
+ };
+ /* This would be the only structure which needs to be exported by
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
+index 2aa975b..812c698 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
+@@ -4082,8 +4082,9 @@ out:
+     if (msg[0]) {
+         gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_BRICK_IMPORT_FAIL, "%s",
+                msg);
+-        gf_event(EVENT_IMPORT_BRICK_FAILED, "peer=%s;brick=%s",
+-                 new_brickinfo->hostname, new_brickinfo->path);
++        if (new_brickinfo)
++            gf_event(EVENT_IMPORT_BRICK_FAILED, "peer=%s;brick=%s",
++                     new_brickinfo->hostname, new_brickinfo->path);
+     }
+     gf_msg_debug("glusterd", 0, "Returning with %d", ret);
+     return ret;
+diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c
+index 6ae63ba..a5f09fe 100644
+--- a/xlators/protocol/server/src/server.c
++++ b/xlators/protocol/server/src/server.c
+@@ -580,6 +580,7 @@ server_graph_janitor_threads(void *data)
+     gf_boolean_t victim_found = _gf_false;
+     xlator_list_t **trav_p = NULL;
+     xlator_t *top = NULL;
++    uint32_t parent_down = 0;
+     GF_ASSERT(data);
+@@ -598,7 +599,10 @@ server_graph_janitor_threads(void *data)
+         victim = (*trav_p)->xlator;
+         if (victim->cleanup_starting &&
+             strcmp(victim->name, victim_name) == 0) {
+-            victim_found = _gf_true;
++            parent_down = victim->parent_down;
++            victim->parent_down = 1;
++            if (!parent_down)
++                victim_found = _gf_true;
+             break;
+         }
+     }
+diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c
+index d738692..69857d9 100644
+--- a/xlators/storage/posix/src/posix-common.c
++++ b/xlators/storage/posix/src/posix-common.c
+@@ -146,10 +146,15 @@ int32_t
+ posix_notify(xlator_t *this, int32_t event, void *data, ...)
+ {
+     xlator_t *victim = data;
++    struct posix_private *priv = this->private;
++    int ret = 0;
++    struct timespec sleep_till = {
++        0,
++    };
+     switch (event) {
+         case GF_EVENT_PARENT_UP: {
+-            /* Tell the parent that posix xlator is up */
++            /* the parent that posix xlator is up */
+             default_notify(this, GF_EVENT_CHILD_UP, data);
+         } break;
+@@ -158,6 +163,31 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...)
+                 break;
+             gf_log(this->name, GF_LOG_INFO, "Sending CHILD_DOWN for brick %s",
+                    victim->name);
++            if (priv->janitor) {
++                pthread_mutex_lock(&priv->janitor_mutex);
++                {
++                    priv->janitor_task_stop = _gf_true;
++                    ret = gf_tw_del_timer(this->ctx->tw->timer_wheel,
++                                          priv->janitor);
++                    if (!ret) {
++                        clock_gettime(CLOCK_REALTIME, &sleep_till);
++                        sleep_till.tv_sec += 1;
++                        /* Wait to set janitor_task flag to _gf_false by
++                         * janitor_task_done */
++                        while (priv->janitor_task_stop) {
++                            (void)pthread_cond_timedwait(&priv->janitor_cond,
++                                                         &priv->janitor_mutex,
++                                                         &sleep_till);
++                            clock_gettime(CLOCK_REALTIME, &sleep_till);
++                            sleep_till.tv_sec += 1;
++                        }
++                    }
++                }
++                pthread_mutex_unlock(&priv->janitor_mutex);
++                GF_FREE(priv->janitor);
++            }
++            priv->janitor = NULL;
+             default_notify(this->parents->xlator, GF_EVENT_CHILD_DOWN, data);
+         } break;
+         default:
+@@ -1008,6 +1038,8 @@ posix_init(xlator_t *this)
+     pthread_mutex_init(&_private->fsync_mutex, NULL);
+     pthread_cond_init(&_private->fsync_cond, NULL);
++    pthread_mutex_init(&_private->janitor_mutex, NULL);
++    pthread_cond_init(&_private->janitor_cond, NULL);
+     INIT_LIST_HEAD(&_private->fsyncs);
+     ret = posix_spawn_ctx_janitor_thread(this);
+     if (ret)
+@@ -1128,6 +1160,7 @@ posix_fini(xlator_t *this)
+         (void)gf_thread_cleanup_xint(priv->disk_space_check);
+         priv->disk_space_check = 0;
+     }
+     if (priv->janitor) {
+         /*TODO: Make sure the synctask is also complete */
+         ret = gf_tw_del_timer(this->ctx->tw->timer_wheel, priv->janitor);
+@@ -1135,8 +1168,10 @@ posix_fini(xlator_t *this)
+             gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_TIMER_DELETE_FAILED,
+                    "Failed to delete janitor timer");
+         }
++        GF_FREE(priv->janitor);
+         priv->janitor = NULL;
+     }
+     if (priv->fsyncer) {
+         (void)gf_thread_cleanup_xint(priv->fsyncer);
+         priv->fsyncer = 0;
+@@ -1148,6 +1183,9 @@ posix_fini(xlator_t *this)
+     GF_FREE(priv->base_path);
+     LOCK_DESTROY(&priv->lock);
+     pthread_mutex_destroy(&priv->fsync_mutex);
++    pthread_cond_destroy(&priv->fsync_cond);
++    pthread_mutex_destroy(&priv->janitor_mutex);
++    pthread_cond_destroy(&priv->janitor_cond);
+     GF_FREE(priv->hostname);
+     GF_FREE(priv->trash_path);
+     GF_FREE(priv);
+diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
+index 07169b5..ef5bfd5 100644
+--- a/xlators/storage/posix/src/posix-helpers.c
++++ b/xlators/storage/posix/src/posix-helpers.c
+@@ -1432,12 +1432,24 @@ posix_janitor_task_done(int ret, call_frame_t *frame, void *data)
+     this = data;
+     priv = this->private;
++    pthread_mutex_lock(&priv->janitor_mutex);
++    {
++        if (priv->janitor_task_stop) {
++            priv->janitor_task_stop = _gf_false;
++            pthread_cond_signal(&priv->janitor_cond);
++            pthread_mutex_unlock(&priv->janitor_mutex);
++            goto out;
++        }
++    }
++    pthread_mutex_unlock(&priv->janitor_mutex);
+     LOCK(&priv->lock);
+     {
+         __posix_janitor_timer_start(this);
+     }
+     UNLOCK(&priv->lock);
+     return 0;
+ }
+@@ -1456,6 +1468,9 @@ posix_janitor_task(void *data)
+     old_this = THIS;
+     THIS = this;
++    if (!priv)
++        goto out;
+     time(&now);
+     if ((now - priv->last_landfill_check) > priv->janitor_sleep_duration) {
+         if (priv->disable_landfill_purge) {
+@@ -1475,6 +1490,7 @@ posix_janitor_task(void *data)
+     THIS = old_this;
+     return 0;
+ }
+diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h
+index b0935a7..64288a7 100644
+--- a/xlators/storage/posix/src/posix.h
++++ b/xlators/storage/posix/src/posix.h
+@@ -203,6 +203,8 @@ struct posix_private {
+     struct list_head fsyncs;
+     pthread_mutex_t fsync_mutex;
+     pthread_cond_t fsync_cond;
++    pthread_mutex_t janitor_mutex;
++    pthread_cond_t janitor_cond;
+     int fsync_queue_count;
+     enum {
+@@ -257,6 +259,7 @@ struct posix_private {
+     gf_boolean_t fips_mode_rchecksum;
+     gf_boolean_t ctime;
++    gf_boolean_t janitor_task_stop;
+ };
+ typedef struct {
diff --git a/SOURCES/0280-performance-md-cache-Do-not-skip-caching-of-null-cha.patch b/SOURCES/0280-performance-md-cache-Do-not-skip-caching-of-null-cha.patch
new file mode 100644
index 0000000..38b4d48
--- /dev/null
+++ b/SOURCES/0280-performance-md-cache-Do-not-skip-caching-of-null-cha.patch
@@ -0,0 +1,153 @@
+From 2d7d9165c6a8619eef553859b4b7136b8e9ccb55 Mon Sep 17 00:00:00 2001
+From: Anoop C S <>
+Date: Sat, 10 Aug 2019 10:30:26 +0530
+Subject: [PATCH 280/284] performance/md-cache: Do not skip caching of null
+ character xattr values
+Null character string is a valid xattr value in file system. But for
+those xattrs processed by md-cache, it does not update its entries if
+value is null('\0'). This results in ENODATA when those xattrs are
+queried afterwards via getxattr() causing failures in basic operations
+like create, copy etc in a specially configured Samba setup for Mac OS
+On the other side snapview-server is internally setting empty string("")
+as value for xattrs received as part of listxattr() and are not intended
+to be cached. Therefore we try to maintain that behaviour using an
+additional dictionary key to prevent updation of entries in getxattr()
+and fgetxattr() callbacks in md-cache.
+Credits: Poornima G <>
+Backport of
+Change-Id: I7859cbad0a06ca6d788420c2a495e658699c6ff7
+Fixes: bz#1732376
+Signed-off-by: Anoop C S <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <>
+ tests/bugs/md-cache/bug-1726205.t                  | 22 +++++++++++++++
+ .../features/snapview-server/src/snapview-server.c | 12 ++++++++-
+ xlators/performance/md-cache/src/md-cache.c        | 31 +++++++++-------------
+ 3 files changed, 45 insertions(+), 20 deletions(-)
+ create mode 100644 tests/bugs/md-cache/bug-1726205.t
+diff --git a/tests/bugs/md-cache/bug-1726205.t b/tests/bugs/md-cache/bug-1726205.t
+new file mode 100644
+index 0000000..795130e
+--- /dev/null
++++ b/tests/bugs/md-cache/bug-1726205.t
+@@ -0,0 +1,22 @@
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++TEST glusterd;
++TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2,3};
++TEST $CLI volume start $V0
++TEST $CLI volume set $V0 group samba
++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
++TEST touch $M0/file
++TEST "setfattr -n "user.DosStream.Zone.Identifier:\$DATA" -v '\0' $M0/file"
++TEST "getfattr -n "user.DosStream.Zone.Identifier:\$DATA" -e hex $M0/file | grep -q 0x00"
+diff --git a/xlators/features/snapview-server/src/snapview-server.c b/xlators/features/snapview-server/src/snapview-server.c
+index b4998b8..1d6a5e5 100644
+--- a/xlators/features/snapview-server/src/snapview-server.c
++++ b/xlators/features/snapview-server/src/snapview-server.c
+@@ -828,7 +828,8 @@ out:
+  * back into the dict. But to get the values for those xattrs it has to do the
+  * getxattr operation on each xattr which might turn out to be a costly
+  * operation. So for each of the xattrs present in the list, a 0 byte value
+- * ("") is set into the dict before unwinding. This can be treated as an
++ * ("") is set into the dict before unwinding. Since ("") is also a valid xattr
++ * value(in a file system) we use an extra key in the same dictionary as an
+  * indicator to other xlators which want to cache the xattrs (as of now,
+  * md-cache which caches acl and selinux related xattrs) to not to cache the
+  * values of the xattrs present in the dict.
+@@ -871,6 +872,15 @@ svs_add_xattrs_to_dict(xlator_t *this, dict_t *dict, char *list, ssize_t size)
+         list_offset += strlen(keybuffer) + 1;
+     } /* while (remaining_size > 0) */
++    /* Add an additional key to indicate that we don't need to cache these
++     * xattrs(with value "") */
++    ret = dict_set_str(dict, "glusterfs.skip-cache", "");
++    if (ret < 0) {
++        gf_msg(this->name, GF_LOG_ERROR, 0, SVS_MSG_DICT_SET_FAILED,
++               "dict set operation for the key glusterfs.skip-cache failed.");
++        goto out;
++    }
+     ret = 0;
+ out:
+diff --git a/xlators/performance/md-cache/src/md-cache.c b/xlators/performance/md-cache/src/md-cache.c
+index 6e0468f..a6b363f 100644
+--- a/xlators/performance/md-cache/src/md-cache.c
++++ b/xlators/performance/md-cache/src/md-cache.c
+@@ -698,25 +698,6 @@ updatefn(dict_t *dict, char *key, data_t *value, void *data)
+             }
+         }
+-        /* posix xlator as part of listxattr will send both names
+-         * and values of the xattrs in the dict. But as per man page
+-         * listxattr is mainly supposed to send names of the all the
+-         * xattrs. gfapi, as of now will put all the keys it obtained
+-         * in the dict (sent by posix) into a buffer provided by the
+-         * caller (thus the values of those xattrs are lost). If some
+-         * xlator makes gfapi based calls (ex: snapview-server), then
+-         * it has to unwind the calls by putting those names it got
+-         * in the buffer again into the dict. But now it would not be
+-         * having the values for those xattrs. So it might just put
+-         * a 0 byte value ("") into the dict for each xattr and unwind
+-         * the call. So the xlators which cache the xattrs (as of now
+-         * md-cache caches the acl and selinux related xattrs), should
+-         * not update their cache if the value of a xattr is a 0 byte
+-         * data (i.e. "").
+-         */
+-        if (value->len == 1 && value->data[0] == '\0')
+-            return 0;
+         if (dict_set(u->dict, key, value) < 0) {
+             u->ret = -1;
+             return -1;
+@@ -2406,6 +2387,12 @@ mdc_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+         goto out;
+     }
++    if (dict_get(xattr, "glusterfs.skip-cache")) {
++        gf_msg(this->name, GF_LOG_DEBUG, 0, 0,
++               "Skipping xattr update due to empty value");
++        goto out;
++    }
+     mdc_inode_xatt_set(this, local->loc.inode, xdata);
+ out:
+@@ -2488,6 +2475,12 @@ mdc_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+         goto out;
+     }
++    if (dict_get(xattr, "glusterfs.skip-cache")) {
++        gf_msg(this->name, GF_LOG_DEBUG, 0, 0,
++               "Skipping xattr update due to empty value");
++        goto out;
++    }
+     mdc_inode_xatt_set(this, local->fd->inode, xdata);
+ out:
diff --git a/SOURCES/0281-ctime-Fix-incorrect-realtime-passed-to-frame-root-ct.patch b/SOURCES/0281-ctime-Fix-incorrect-realtime-passed-to-frame-root-ct.patch
new file mode 100644
index 0000000..5af12d1
--- /dev/null
+++ b/SOURCES/0281-ctime-Fix-incorrect-realtime-passed-to-frame-root-ct.patch
@@ -0,0 +1,105 @@
+From fa3cc9971bf1bf4ea52edfedc0cea67a0d6990d1 Mon Sep 17 00:00:00 2001
+From: Kotresh HR <>
+Date: Tue, 20 Aug 2019 15:49:40 +0530
+Subject: [PATCH 281/284] ctime: Fix incorrect realtime passed to
+ frame->root->ctime
+On systems that don't support "timespec_get"(e.g., centos6), it
+was using "clock_gettime" with "CLOCK_MONOTONIC" to get unix epoch
+time which is incorrect. This patch introduces "timespec_now_realtime"
+which uses "clock_gettime" with "CLOCK_REALTIME" which fixes
+the issue.
+Backport of:
+ > Patch:
+ > Change-Id: I57be35ce442d7e05319e82112b687eb4f28d7612
+ > Signed-off-by: Kotresh HR <>
+ > fixes: bz#1743652
+Change-Id: I57be35ce442d7e05319e82112b687eb4f28d7612
+Signed-off-by: Kotresh HR <>
+BUG: 1743611
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <>
+ libglusterfs/src/glusterfs/timespec.h      |  2 ++
+ libglusterfs/src/libglusterfs.sym          |  1 +
+ libglusterfs/src/timespec.c                | 22 ++++++++++++++++++++++
+ xlators/features/utime/src/utime-helpers.c |  2 +-
+ 4 files changed, 26 insertions(+), 1 deletion(-)
+diff --git a/libglusterfs/src/glusterfs/timespec.h b/libglusterfs/src/glusterfs/timespec.h
+index 871871d..bb9ab44 100644
+--- a/libglusterfs/src/glusterfs/timespec.h
++++ b/libglusterfs/src/glusterfs/timespec.h
+@@ -21,6 +21,8 @@
+ void
+ timespec_now(struct timespec *ts);
+ void
++timespec_now_realtime(struct timespec *ts);
+ timespec_adjust_delta(struct timespec *ts, struct timespec delta);
+ void
+ timespec_sub(const struct timespec *begin, const struct timespec *end,
+diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym
+index b161380..467a1b7 100644
+--- a/libglusterfs/src/libglusterfs.sym
++++ b/libglusterfs/src/libglusterfs.sym
+@@ -1073,6 +1073,7 @@ sys_accept
+ tbf_init
+ tbf_throttle
+ timespec_now
+ timespec_sub
+ timespec_adjust_delta
+ timespec_cmp
+diff --git a/libglusterfs/src/timespec.c b/libglusterfs/src/timespec.c
+index c01527f..d0d5005 100644
+--- a/libglusterfs/src/timespec.c
++++ b/libglusterfs/src/timespec.c
+@@ -71,6 +71,28 @@ timespec_now(struct timespec *ts)
+ }
+ void
++timespec_now_realtime(struct timespec *ts)
++#if defined GF_LINUX_HOST_OS || defined GF_SOLARIS_HOST_OS ||                  \
++    defined GF_BSD_HOST_OS
++    if (0 == clock_gettime(CLOCK_REALTIME, ts)) {
++        return;
++    }
++    /* Fall back to gettimeofday()*/
++    struct timeval tv = {
++        0,
++    };
++    if (0 == gettimeofday(&tv, NULL)) {
++        TIMEVAL_TO_TIMESPEC(&tv, ts);
++        return;
++    }
++    return;
+ timespec_adjust_delta(struct timespec *ts, struct timespec delta)
+ {
+     ts->tv_nsec = ((ts->tv_nsec + delta.tv_nsec) % 1000000000);
+diff --git a/xlators/features/utime/src/utime-helpers.c b/xlators/features/utime/src/utime-helpers.c
+index 79cc014..29d9ad9 100644
+--- a/xlators/features/utime/src/utime-helpers.c
++++ b/xlators/features/utime/src/utime-helpers.c
+@@ -17,7 +17,7 @@ gl_timespec_get(struct timespec *ts)
+ #ifdef TIME_UTC
+     timespec_get(ts, TIME_UTC);
+ #else
+-    timespec_now(ts);
++    timespec_now_realtime(ts);
+ #endif
+ }
diff --git a/SOURCES/0282-geo-rep-Fix-the-name-of-changelog-archive-file.patch b/SOURCES/0282-geo-rep-Fix-the-name-of-changelog-archive-file.patch
new file mode 100644
index 0000000..37a0f12
--- /dev/null
+++ b/SOURCES/0282-geo-rep-Fix-the-name-of-changelog-archive-file.patch
@@ -0,0 +1,116 @@
+From 98c9fc8d774ae153ca6b44d3337cf5d9f7a030e2 Mon Sep 17 00:00:00 2001
+From: Kotresh HR <>
+Date: Fri, 16 Aug 2019 16:07:03 +0530
+Subject: [PATCH 282/284] geo-rep: Fix the name of changelog archive file
+The processed changelogs are archived each month in a single tar file.
+The default format is "archive_YYYYMM.tar" which is specified as "%%Y%%m"
+in configuration file.
+The created changelog archive file didn't have corresponding year
+and month. It created as "archive_%Y%m.tar" on python2 only systems.
+Cause and Fix:
+Geo-rep expects "%Y%m" after the ConfigParser reads it from config file.
+Since it was "%%Y%%m" in config file, geo-rep used to get correct value
+"%Y%m" in python3 and "%%Y%%m" in python2 which is incorrect.
+The fix can be to use "%Y%m" in config file but that fails in python3.
+So the fix is to use "RawConfigParser" in geo-rep and use "%Y%m". This
+works both in python2 and python3.
+Backport of:
+ > Patch:
+ > Change-Id: Ie5b7d2bc04d0d53cd1769e064c2d67aaf95d557c
+ > fixes: bz#1741890
+ > Signed-off-by: Kotresh HR <>
+Change-Id: Ie5b7d2bc04d0d53cd1769e064c2d67aaf95d557c
+BUG: 1743634
+Signed-off-by: Kotresh HR <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <>
+ geo-replication/             |  2 +-
+ geo-replication/syncdaemon/ | 14 +++++++-------
+ 2 files changed, 8 insertions(+), 8 deletions(-)
+diff --git a/geo-replication/ b/geo-replication/
+index c2e4f0d..5ebd57a 100644
+--- a/geo-replication/
++++ b/geo-replication/
+@@ -109,7 +109,7 @@ type=int
+ help=Minimum time interval in seconds for passive worker to become Active
+ [changelog-archive-format]
+ help=Processed changelogs will be archived in working directory. Pattern for archive file
+ [use-meta-volume]
+diff --git a/geo-replication/syncdaemon/ b/geo-replication/syncdaemon/
+index 38f3594..f823311 100644
+--- a/geo-replication/syncdaemon/
++++ b/geo-replication/syncdaemon/
+@@ -10,9 +10,9 @@
+ #
+ try:
+-    from ConfigParser import ConfigParser, NoSectionError
++    from ConfigParser import RawConfigParser, NoSectionError
+ except ImportError:
+-    from configparser import ConfigParser, NoSectionError
++    from configparser import RawConfigParser, NoSectionError
+ import os
+ import shutil
+ from string import Template
+@@ -94,7 +94,7 @@ class Gconf(object):
+         if name != "all" and not self._is_configurable(name):
+             raise GconfNotConfigurable()
+-        cnf = ConfigParser()
++        cnf = RawConfigParser()
+         with open(self.custom_conf_file) as f:
+             cnf.readfp(f)
+@@ -138,7 +138,7 @@ class Gconf(object):
+         if curr_val == value:
+             return True
+-        cnf = ConfigParser()
++        cnf = RawConfigParser()
+         with open(self.custom_conf_file) as f:
+             cnf.readfp(f)
+@@ -178,7 +178,7 @@ class Gconf(object):
+         self.session_conf_items = []
+         self.default_values = {}
+-        conf = ConfigParser()
++        conf = RawConfigParser()
+         # Default Template config file
+         with open(self.default_conf_file) as f:
+             conf.readfp(f)
+@@ -342,7 +342,7 @@ class Gconf(object):
+         return False
+ def is_config_file_old(config_file, mastervol, slavevol):
+-    cnf = ConfigParser()
++    cnf = RawConfigParser()
+     session_section = "peers %s %s" % (mastervol, slavevol)
+     try:
+@@ -357,7 +357,7 @@ def config_upgrade(config_file, ret):
+     shutil.copyfile(config_file, config_file_backup)
+     #write a new config file
+-    config = ConfigParser()
++    config = RawConfigParser()
+     config.add_section('vars')
+     for key, value in ret.items():
diff --git a/SOURCES/0283-ctime-Fix-ctime-issue-with-utime-family-of-syscalls.patch b/SOURCES/0283-ctime-Fix-ctime-issue-with-utime-family-of-syscalls.patch
new file mode 100644
index 0000000..eb9d8f8
--- /dev/null
+++ b/SOURCES/0283-ctime-Fix-ctime-issue-with-utime-family-of-syscalls.patch
@@ -0,0 +1,285 @@
+From 55eb2e7642e3428eaa1b2d833c0daa1d34b98324 Mon Sep 17 00:00:00 2001
+From: Kotresh HR <>
+Date: Thu, 8 Aug 2019 10:05:12 +0530
+Subject: [PATCH 283/284] ctime: Fix ctime issue with utime family of syscalls
+When atime|mtime is updated via utime family of syscalls,
+ctime is not updated. This patch fixes the same.
+Backport of:
+ > Patch:
+ > Change-Id: I7f86d8f8a1e06a332c3449b5bbdbf128c9690f25
+ > fixes: bz#1738786
+ > Signed-off-by: Kotresh HR <>
+Change-Id: I7f86d8f8a1e06a332c3449b5bbdbf128c9690f25
+BUG: 1743627
+Signed-off-by: Kotresh HR <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <>
+ xlators/features/utime/src/ | 13 +++-
+ xlators/storage/posix/src/posix-inode-fd-ops.c |  8 +--
+ xlators/storage/posix/src/posix-metadata.c     | 96 ++++++++++++++------------
+ xlators/storage/posix/src/posix-metadata.h     |  3 +-
+ 4 files changed, 68 insertions(+), 52 deletions(-)
+diff --git a/xlators/features/utime/src/ b/xlators/features/utime/src/
+index a8637ff..8730a51 100755
+--- a/xlators/features/utime/src/
++++ b/xlators/features/utime/src/
+@@ -82,7 +82,18 @@ gf_utime_@NAME@ (call_frame_t *frame, xlator_t *this,
+              @LONG_ARGS@)
+ {
+         gl_timespec_get(&frame->root->ctime);
+-        frame->root->flags |= MDATA_CTIME;
++        if (!valid) {
++                frame->root->flags |= MDATA_CTIME;
++        }
++        if (valid & (GF_SET_ATTR_UID | GF_SET_ATTR_GID)) {
++                frame->root->flags |= MDATA_CTIME;
++        }
++        if (valid & GF_SET_ATTR_MODE) {
++                frame->root->flags |= MDATA_CTIME;
++        }
+         STACK_WIND (frame, gf_utime_@NAME@_cbk, FIRST_CHILD(this),
+                     FIRST_CHILD(this)->fops->@NAME@, @SHORT_ARGS@);
+diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c
+index d22bbc2..e0ea85b 100644
+--- a/xlators/storage/posix/src/posix-inode-fd-ops.c
++++ b/xlators/storage/posix/src/posix-inode-fd-ops.c
+@@ -425,8 +425,8 @@ posix_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+                    real_path);
+             goto out;
+         }
+-        posix_update_utime_in_mdata(this, real_path, -1, loc->inode, stbuf,
+-                                    valid);
++        posix_update_utime_in_mdata(this, real_path, -1, loc->inode,
++                                    &frame->root->ctime, stbuf, valid);
+     }
+     if (valid & GF_SET_ATTR_CTIME && !priv->ctime) {
+@@ -652,8 +652,8 @@ posix_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+                    fd);
+             goto out;
+         }
+-        posix_update_utime_in_mdata(this, NULL, pfd->fd, fd->inode, stbuf,
+-                                    valid);
++        posix_update_utime_in_mdata(this, NULL, pfd->fd, fd->inode,
++                                    &frame->root->ctime, stbuf, valid);
+     }
+     if (!valid) {
+diff --git a/xlators/storage/posix/src/posix-metadata.c b/xlators/storage/posix/src/posix-metadata.c
+index 5cbdc98..532daa2 100644
+--- a/xlators/storage/posix/src/posix-metadata.c
++++ b/xlators/storage/posix/src/posix-metadata.c
+@@ -432,8 +432,10 @@ out:
+  */
+ static int
+ posix_set_mdata_xattr(xlator_t *this, const char *real_path, int fd,
+-                      inode_t *inode, struct timespec *time, struct iatt *stbuf,
+-                      posix_mdata_flag_t *flag, gf_boolean_t update_utime)
++                      inode_t *inode, struct timespec *time,
++                      struct timespec *u_atime, struct timespec *u_mtime,
++                      struct iatt *stbuf, posix_mdata_flag_t *flag,
++                      gf_boolean_t update_utime)
+ {
+     posix_mdata_t *mdata = NULL;
+     int ret = -1;
+@@ -443,6 +445,10 @@ posix_set_mdata_xattr(xlator_t *this, const char *real_path, int fd,
+     GF_VALIDATE_OR_GOTO(this->name, inode, out);
+     GF_VALIDATE_OR_GOTO(this->name, time, out);
++    if (update_utime && (!u_atime || !u_mtime)) {
++        goto out;
++    }
+     LOCK(&inode->lock);
+     {
+         ret = __inode_ctx_get1(inode, this, (uint64_t *)&mdata);
+@@ -506,32 +512,30 @@ posix_set_mdata_xattr(xlator_t *this, const char *real_path, int fd,
+             }
+         }
+-        /* Earlier, mdata was updated only if the existing time is less
+-         * than the time to be updated. This would fail the scenarios
+-         * where mtime can be set to any time using the syscall. Hence
+-         * just updating without comparison. But the ctime is not
+-         * allowed to changed to older date.
+-         */
+-        if (flag->ctime && posix_compare_timespec(time, &mdata->ctime) > 0) {
+-            mdata->ctime = *time;
+-        }
+         /* In distributed systems, there could be races with fops
+          * updating mtime/atime which could result in different
+          * mtime/atime for same file. So this makes sure, only the
+          * highest time is retained. If the mtime/atime update comes
+          * from the explicit utime syscall, it is allowed to set to
+-         * previous time
++         * previous or future time but the ctime is always set to
++         * current time.
+          */
+         if (update_utime) {
++            if (flag->ctime &&
++                posix_compare_timespec(time, &mdata->ctime) > 0) {
++                mdata->ctime = *time;
++            }
+             if (flag->mtime) {
+-                mdata->mtime = *time;
++                mdata->mtime = *u_mtime;
+             }
+             if (flag->atime) {
+-                mdata->atime = *time;
++                mdata->atime = *u_atime;
+             }
+         } else {
++            if (flag->ctime &&
++                posix_compare_timespec(time, &mdata->ctime) > 0) {
++                mdata->ctime = *time;
++            }
+             if (flag->mtime &&
+                 posix_compare_timespec(time, &mdata->mtime) > 0) {
+                 mdata->mtime = *time;
+@@ -584,15 +588,22 @@ out:
+  */
+ void
+ posix_update_utime_in_mdata(xlator_t *this, const char *real_path, int fd,
+-                            inode_t *inode, struct iatt *stbuf, int valid)
++                            inode_t *inode, struct timespec *ctime,
++                            struct iatt *stbuf, int valid)
+ {
+     int32_t ret = 0;
+ #if defined(HAVE_UTIMENSAT)
+-    struct timespec tv = {
++    struct timespec tv_atime = {
++        0,
++    };
++    struct timespec tv_mtime = {
+         0,
+     };
+ #else
+-    struct timeval tv = {
++    struct timeval tv_atime = {
++        0,
++    };
++    struct timeval tv_mtime = {
+         0,
+     };
+ #endif
+@@ -611,35 +622,28 @@ posix_update_utime_in_mdata(xlator_t *this, const char *real_path, int fd,
+      */
+     if (inode && priv->ctime) {
+         if ((valid & GF_SET_ATTR_ATIME) == GF_SET_ATTR_ATIME) {
+-            tv.tv_sec = stbuf->ia_atime;
+-            SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, stbuf->ia_atime_nsec);
++            tv_atime.tv_sec = stbuf->ia_atime;
++            SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv_atime, stbuf->ia_atime_nsec);
+-            flag.ctime = 0;
+-            flag.mtime = 0;
++            flag.ctime = 1;
+             flag.atime = 1;
+-            ret = posix_set_mdata_xattr(this, real_path, -1, inode, &tv, NULL,
+-                                        &flag, _gf_true);
+-            if (ret) {
+-                gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED,
+-                       "posix set mdata atime failed on file:"
+-                       " %s gfid:%s",
+-                       real_path, uuid_utoa(inode->gfid));
+-            }
+         }
+         if ((valid & GF_SET_ATTR_MTIME) == GF_SET_ATTR_MTIME) {
+-            tv.tv_sec = stbuf->ia_mtime;
+-            SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, stbuf->ia_mtime_nsec);
++            tv_mtime.tv_sec = stbuf->ia_mtime;
++            SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv_mtime, stbuf->ia_mtime_nsec);
+-            flag.ctime = 0;
++            flag.ctime = 1;
+             flag.mtime = 1;
+-            flag.atime = 0;
++        }
+-            ret = posix_set_mdata_xattr(this, real_path, -1, inode, &tv, NULL,
+-                                        &flag, _gf_true);
++        if (flag.mtime || flag.atime) {
++            ret = posix_set_mdata_xattr(this, real_path, -1, inode, ctime,
++                                        &tv_atime, &tv_mtime, NULL, &flag,
++                                        _gf_true);
+             if (ret) {
+                 gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED,
+-                       "posix set mdata mtime failed on file:"
++                       "posix set mdata atime failed on file:"
+                        " %s gfid:%s",
+                        real_path, uuid_utoa(inode->gfid));
+             }
+@@ -702,8 +706,8 @@ posix_set_ctime(call_frame_t *frame, xlator_t *this, const char *real_path,
+             goto out;
+         }
+         ret = posix_set_mdata_xattr(this, real_path, fd, inode,
+-                                    &frame->root->ctime, stbuf, &flag,
+-                                    _gf_false);
++                                    &frame->root->ctime, NULL, NULL, stbuf,
++                                    &flag, _gf_false);
+         if (ret) {
+             gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED,
+                    "posix set mdata failed on file: %s gfid:%s", real_path,
+@@ -733,8 +737,8 @@ posix_set_parent_ctime(call_frame_t *frame, xlator_t *this,
+             goto out;
+         }
+         ret = posix_set_mdata_xattr(this, real_path, fd, inode,
+-                                    &frame->root->ctime, stbuf, &flag,
+-                                    _gf_false);
++                                    &frame->root->ctime, NULL, NULL, stbuf,
++                                    &flag, _gf_false);
+         if (ret) {
+             gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED,
+                    "posix set mdata failed on file: %s gfid:%s", real_path,
+@@ -792,8 +796,8 @@ posix_set_ctime_cfr(call_frame_t *frame, xlator_t *this,
+             flag_dup.atime = 0;
+         ret = posix_set_mdata_xattr(this, real_path_out, fd_out, inode_out,
+-                                    &frame->root->ctime, stbuf_out, &flag_dup,
+-                                    _gf_false);
++                                    &frame->root->ctime, NULL, NULL, stbuf_out,
++                                    &flag_dup, _gf_false);
+         if (ret) {
+             gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED,
+                    "posix set mdata failed on file: %s gfid:%s", real_path_out,
+@@ -811,8 +815,8 @@ posix_set_ctime_cfr(call_frame_t *frame, xlator_t *this,
+         flag_dup.ctime = 0;
+         ret = posix_set_mdata_xattr(this, real_path_in, fd_out, inode_out,
+-                                    &frame->root->ctime, stbuf_out, &flag_dup,
+-                                    _gf_false);
++                                    &frame->root->ctime, NULL, NULL, stbuf_out,
++                                    &flag_dup, _gf_false);
+         if (ret) {
+             gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED,
+                    "posix set mdata failed on file: %s gfid:%s", real_path_in,
+diff --git a/xlators/storage/posix/src/posix-metadata.h b/xlators/storage/posix/src/posix-metadata.h
+index dc25e59..c176699 100644
+--- a/xlators/storage/posix/src/posix-metadata.h
++++ b/xlators/storage/posix/src/posix-metadata.h
+@@ -40,7 +40,8 @@ __posix_get_mdata_xattr(xlator_t *this, const char *real_path, int _fd,
+                         inode_t *inode, struct iatt *stbuf);
+ void
+ posix_update_utime_in_mdata(xlator_t *this, const char *real_path, int fd,
+-                            inode_t *inode, struct iatt *stbuf, int valid);
++                            inode_t *inode, struct timespec *ctime,
++                            struct iatt *stbuf, int valid);
+ void
+ posix_set_ctime(call_frame_t *frame, xlator_t *this, const char *real_path,
+                 int fd, inode_t *inode, struct iatt *stbuf);
diff --git a/SOURCES/0284-posix-log-aio_error-return-codes-in-posix_fs_health_.patch b/SOURCES/0284-posix-log-aio_error-return-codes-in-posix_fs_health_.patch
new file mode 100644
index 0000000..4078bfc
--- /dev/null
+++ b/SOURCES/0284-posix-log-aio_error-return-codes-in-posix_fs_health_.patch
@@ -0,0 +1,61 @@
+From 243075b593c6fccbffb3e82ffcfdb58acfd68269 Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <>
+Date: Thu, 22 Aug 2019 15:51:43 +0530
+Subject: [PATCH 284/284] posix: log aio_error return codes in
+ posix_fs_health_check
+Problem: Sometime brick is going down to health check thread is
+         failed without logging error codes return by aio system calls.
+         As per aio_error man page it returns a positive error number
+         if the asynchronous I/O operation failed.
+Solution: log aio_error return codes in error message
+> Change-Id: I2496b1bc16e602b0fd3ad53e211de11ec8c641ef
+> Fixes: bz#1744519
+> Signed-off-by: Mohit Agrawal <>
+> Reviewed on upstream link
+Change-Id: I2496b1bc16e602b0fd3ad53e211de11ec8c641ef
+BUG: 1744518
+Signed-off-by: Mohit Agrawal <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <>
+ xlators/storage/posix/src/posix-helpers.c | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
+index ef5bfd5..d143d4c 100644
+--- a/xlators/storage/posix/src/posix-helpers.c
++++ b/xlators/storage/posix/src/posix-helpers.c
+@@ -2025,7 +2025,6 @@ posix_fs_health_check(xlator_t *this)
+     if (ret != 0) {
+         op_errno = errno;
+         op = "aio_write_error";
+-        ret = -1;
+         goto out;
+     }
+@@ -2064,7 +2063,6 @@ posix_fs_health_check(xlator_t *this)
+     if (ret != 0) {
+         op_errno = errno;
+         op = "aio_read_error";
+-        ret = -1;
+         goto out;
+     }
+@@ -2089,7 +2087,8 @@ out:
+     }
+     if (ret && file_path[0]) {
+         gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HEALTHCHECK_FAILED,
+-               "%s() on %s returned", op, file_path);
++               "%s() on %s returned ret is %d error is %s", op, file_path, ret,
++               ret != -1 ? strerror(ret) : strerror(op_errno));
+                  "op=%s;path=%s;error=%s;brick=%s:%s timeout is %d", op,
+                  file_path, strerror(op_errno), priv->hostname, priv->base_path,
diff --git a/SOURCES/0285-glusterd-glusterd-service-is-getting-timed-out-on-sc.patch b/SOURCES/0285-glusterd-glusterd-service-is-getting-timed-out-on-sc.patch
new file mode 100644
index 0000000..12549e7
--- /dev/null
+++ b/SOURCES/0285-glusterd-glusterd-service-is-getting-timed-out-on-sc.patch
@@ -0,0 +1,43 @@
+From 49cd9ef7487ba88796315b897823837a9cbd535e Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <>
+Date: Wed, 28 Aug 2019 09:05:20 +0530
+Subject: [PATCH 285/297] glusterd: glusterd service is getting timed out on
+ scaled setup
+Problem: On a three node cluster with 2000 replica volumes systemctl is getting
+         timed out for glusted service.
+Solution: Configure TimeoutSec 300 to wait for glusterd startup.
+> Change-Id: Idb3f3f3e56e6216a0ebd754cbb9e8e37ce9e636d
+> Fixes: bz#1746228
+> Signed-off-by: Mohit Agrawal <>
+> (Cherry picked from commit c90dc63ec9eee0f43ba8e489876fdf8b8810bbdc)
+> (Reviewed on upstream link
+Change-Id: Idb3f3f3e56e6216a0ebd754cbb9e8e37ce9e636d
+BUG: 1746027
+Signed-off-by: Mohit Agrawal <>
+Tested-by: Mohit Agrawal <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <>
+ extras/systemd/ | 1 +
+ 1 file changed, 1 insertion(+)
+diff --git a/extras/systemd/ b/extras/systemd/
+index c33351c..f604160 100644
+--- a/extras/systemd/
++++ b/extras/systemd/
+@@ -13,6 +13,7 @@ Environment="LOG_LEVEL=INFO"
+ EnvironmentFile=-@sysconfdir@/sysconfig/glusterd
+ ExecStart=@prefix@/sbin/glusterd -p @localstatedir@/run/  --log-level $LOG_LEVEL $GLUSTERD_OPTIONS
+ KillMode=process
+ SuccessExitStatus=15
+ [Install]
diff --git a/SOURCES/ b/SOURCES/
new file mode 100644
index 0000000..415a07b
--- /dev/null
+++ b/SOURCES/
@@ -0,0 +1,162 @@
+From 2a905a8ae6b4737e84543ad76b55f3346fa0f32c Mon Sep 17 00:00:00 2001
+From: Hari Gowtham <>
+Date: Tue, 27 Aug 2019 14:12:31 +0530
+Subject: [PATCH 286/297] added script files for machine /
+ component stats
+Have added the file (extras/ to the code base.
+And included the following to be packaged:
+Quota Accounting issue:
+extras/quota/ (made available only on server)
+extras/quota/ (made available only on server)
+Debugging Statedumps:
+Note: rest of the files were already included.
+Change-Id: I2efb959865c3f381166c6a25c6eef613d13dd5ee
+fixes: bz#1719171
+Signed-off-by: Hari Gowtham <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Atin Mukherjee <>
+ extras/       |  9 +++++++-
+ extras/ | 53 ++++++++++++++++++++++++++++++++++++++++++++++++
+        |  8 ++++++++
+ 3 files changed, 69 insertions(+), 1 deletion(-)
+ create mode 100644 extras/
+diff --git a/extras/ b/extras/
+index 983f014..8cbfda1 100644
+--- a/extras/
++++ b/extras/
+@@ -30,9 +30,14 @@ endif
+ scriptsdir = $(datadir)/glusterfs/scripts
+ scripts_SCRIPTS = thin-arbiter/
++scripts_SCRIPTS += quota/
++scripts_SCRIPTS +=
++scripts_SCRIPTS +=
+ scripts_SCRIPTS += \
++scripts_SCRIPTS += quota/
++scripts_SCRIPTS += quota/
+ scripts_SCRIPTS +=
+ scripts_SCRIPTS +=
+@@ -50,7 +55,9 @@ EXTRA_DIST = glusterfs-logrotate gluster-rsyslog-7.2.conf
+ 	command-completion/Makefile command-completion/README \
+ \
+ group-distributed-virt \
+-	thin-arbiter/thin-arbiter.vol thin-arbiter/
++	thin-arbiter/thin-arbiter.vol thin-arbiter/ \
++	quota/ quota/ quota/ \
+ install-data-local:
+diff --git a/extras/ b/extras/
+new file mode 100644
+index 0000000..ebc6bf1
+--- /dev/null
++++ b/extras/
+@@ -0,0 +1,53 @@
++function get_statedump_fnames_without_timestamps
++    ls | grep -E "[.]dump[.][0-9][0-9]*" | cut -f1-3 -d'.' | sort -u
++function get_non_uniq_fields
++    local statedump_fname_prefix=$1
++    print_stack_lkowner_unique_in_one_line "$statedump_fname_prefix" | sort | uniq -c | grep -vE "^\s*1 " | awk '{$1="repeats="$1; print $0}'
++function print_stack_lkowner_unique_in_one_line
++    local statedump_fname_prefix=$1
++    sed -e '/./{H;$!d;}' -e 'x;/unique=/!d;/stack=/!d;/lk-owner=/!d;/pid=/!d;' "${statedump_fname_prefix}"* | grep -E "(stack|lk-owner|unique|pid)=" | paste -d " " - - - -
++function get_stacks_that_appear_in_multiple_statedumps
++    #If a stack with same 'unique/lk-owner/stack' appears in multiple statedumps
++    #print the stack
++    local statedump_fname_prefix=$1
++    while read -r non_uniq_stack;
++    do
++        if [ -z "$printed" ];
++        then
++            printed="1"
++        fi
++        echo "$statedump_fname_prefix" "$non_uniq_stack"
++    done < <(get_non_uniq_fields "$statedump_fname_prefix")
++if [ -z "$statedumpdir" ];
++    echo "Usage: $0 <statedump-dir>"
++    exit 1
++if [ ! -d "$statedumpdir" ];
++    echo "$statedumpdir: Is not a directory"
++    echo "Usage: $0 <statedump-dir>"
++    exit 1
++cd "$statedumpdir" || exit 1
++for statedump_fname_prefix in $(get_statedump_fnames_without_timestamps);
++    get_stacks_that_appear_in_multiple_statedumps "$statedump_fname_prefix"
++done | column -t
++echo "NOTE: stacks with lk-owner=\"\"/lk-owner=0000000000000000/unique=0 may not be hung frames and need further inspection" >&2
+diff --git a/ b/
+index 00603ec..3c2e2dc 100644
+--- a/
++++ b/
+@@ -1107,6 +1107,9 @@ exit 0
+      %{_datadir}/glusterfs/scripts/
+      %{_datadir}/glusterfs/scripts/
+ %endif
+ # xlators that are needed on the client- and on the server-side
+ %dir %{_libdir}/glusterfs
+ %dir %{_libdir}/glusterfs/%{version}%{?prereltag}
+@@ -1352,6 +1355,8 @@ exit 0
+ %if ( 0%{!?_without_server:1} )
+ %files server
+ %doc extras/
+ # sysconf
+ %config(noreplace) %{_sysconfdir}/glusterfs
+ %exclude %{_sysconfdir}/glusterfs/thin-arbiter.vol
+@@ -1942,6 +1947,9 @@ fi
+ %endif
+ %changelog
++* Tue Aug 27 2019 Hari Gowtham <>
++- Added scripts to collect machine stats and component stats (#1719171)
+ * Tue Jun 18  2019 Jiffin Tony Thottan <>
+ - build glusterfs-ganesha for rhel 7 and above (#1720551)
diff --git a/SOURCES/0287-cluster-ec-Fail-fsync-flush-for-files-on-update-size.patch b/SOURCES/0287-cluster-ec-Fail-fsync-flush-for-files-on-update-size.patch
new file mode 100644
index 0000000..93bd3c9
--- /dev/null
+++ b/SOURCES/0287-cluster-ec-Fail-fsync-flush-for-files-on-update-size.patch
@@ -0,0 +1,372 @@
+From 546f412c155dd5aca2b3cd4202f80c9977b215dc Mon Sep 17 00:00:00 2001
+From: Pranith Kumar K <>
+Date: Wed, 4 Sep 2019 12:06:34 +0530
+Subject: [PATCH 287/297] cluster/ec: Fail fsync/flush for files on update
+ size/version failure
+If update size/version is not successful on the file, updates on the
+same stripe could lead to data corruptions if the earlier un-aligned
+write is not successful on all the bricks. Application won't have
+any knowledge of this because update size/version happens in the
+Fail fsync/flush on fds that are opened before update-size-version
+went bad.
+fixes: bz#1745107
+Change-Id: I9d323eddcda703bd27d55f340c4079d76e06e492
+Signed-off-by: Pranith Kumar K <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Ashish Pandey <>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <>
+ tests/basic/ec/ec-badfd.c            | 124 +++++++++++++++++++++++++++++++++++
+ tests/basic/ec/ec-badfd.t            |  26 ++++++++
+ xlators/cluster/ec/src/ec-common.c   |  23 +++++++
+ xlators/cluster/ec/src/ec-generic.c  |  47 +++++++++++++
+ xlators/cluster/ec/src/ec-helpers.c  |   7 ++
+ xlators/cluster/ec/src/ec-messages.h |   2 +-
+ xlators/cluster/ec/src/ec-types.h    |   2 +
+ 7 files changed, 230 insertions(+), 1 deletion(-)
+ create mode 100644 tests/basic/ec/ec-badfd.c
+ create mode 100755 tests/basic/ec/ec-badfd.t
+diff --git a/tests/basic/ec/ec-badfd.c b/tests/basic/ec/ec-badfd.c
+new file mode 100644
+index 0000000..8be23c1
+--- /dev/null
++++ b/tests/basic/ec/ec-badfd.c
+@@ -0,0 +1,124 @@
++#include <stdio.h>
++#include <fcntl.h>
++#include <unistd.h>
++#include <time.h>
++#include <limits.h>
++#include <string.h>
++#include <stdlib.h>
++#include <errno.h>
++#include <glusterfs/api/glfs.h>
++#include <glusterfs/api/glfs-handles.h>
++fill_iov(struct iovec *iov, char fillchar, int count)
++    int ret = -1;
++    iov->iov_base = malloc(count + 1);
++    if (iov->iov_base == NULL) {
++        return ret;
++    } else {
++        iov->iov_len = count;
++        ret = 0;
++    }
++    memset(iov->iov_base, fillchar, count);
++    memset(iov->iov_base + count, '\0', 1);
++    return ret;
++write_sync(glfs_t *fs, glfs_fd_t *glfd, int char_count)
++    ssize_t ret = -1;
++    int flags = O_RDWR;
++    struct iovec iov = {0};
++    ret = fill_iov(&iov, 'a', char_count);
++    if (ret) {
++        fprintf(stderr, "failed to create iov");
++        goto out;
++    }
++    ret = glfs_pwritev(glfd, &iov, 1, 0, flags);
++    if (ret < 0) {
++        fprintf(stderr, "glfs_pwritev failed, %d", errno);
++    }
++    return ret;
++main(int argc, char *argv[])
++    glfs_t *fs = NULL;
++    glfs_fd_t *fd = NULL;
++    int ret = 1;
++    char volume_cmd[4096] = {0};
++    if (argc != 4) {
++        fprintf(stderr, "Syntax: %s <host> <volname> <file>\n", argv[0]);
++        return 1;
++    }
++    fs = glfs_new(argv[2]);
++    if (!fs) {
++        fprintf(stderr, "glfs_new: returned NULL\n");
++        return 1;
++    }
++    ret = glfs_set_volfile_server(fs, "tcp", argv[1], 24007);
++    if (ret != 0) {
++        fprintf(stderr, "glfs_set_volfile_server: returned %d\n", ret);
++        goto out;
++    }
++    ret = glfs_set_logging(fs, "/tmp/ec-badfd.log", 7);
++    if (ret != 0) {
++        fprintf(stderr, "glfs_set_logging: returned %d\n", ret);
++        goto out;
++    }
++    ret = glfs_init(fs);
++    if (ret != 0) {
++        fprintf(stderr, "glfs_init: returned %d\n", ret);
++        goto out;
++    }
++    fd = glfs_open(fs, argv[3], O_RDWR);
++    if (fd == NULL) {
++        fprintf(stderr, "glfs_open: returned NULL\n");
++        goto out;
++    }
++    ret = write_sync(fs, fd, 16);
++    if (ret < 0) {
++        fprintf(stderr, "write_sync failed\n");
++    }
++    snprintf(volume_cmd, sizeof(volume_cmd),
++             "gluster --mode=script volume stop %s", argv[2]);
++    /*Stop the volume so that update-size-version fails*/
++    system(volume_cmd);
++    sleep(8); /* 3 seconds more than eager-lock-timeout*/
++    snprintf(volume_cmd, sizeof(volume_cmd),
++             "gluster --mode=script volume start %s", argv[2]);
++    system(volume_cmd);
++    sleep(8); /*wait for bricks to come up*/
++    ret = glfs_fsync(fd, NULL, NULL);
++    if (ret == 0) {
++        fprintf(stderr, "fsync succeeded on a BADFD\n");
++        exit(1);
++    }
++    ret = glfs_close(fd);
++    if (ret == 0) {
++        fprintf(stderr, "flush succeeded on a BADFD\n");
++        exit(1);
++    }
++    ret = 0;
++    unlink("/tmp/ec-badfd.log");
++    glfs_fini(fs);
++    return ret;
+diff --git a/tests/basic/ec/ec-badfd.t b/tests/basic/ec/ec-badfd.t
+new file mode 100755
+index 0000000..56feb47
+--- /dev/null
++++ b/tests/basic/ec/ec-badfd.t
+@@ -0,0 +1,26 @@
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{1..6}
++TEST $CLI volume set $V0 performance.write-behind off
++TEST $CLI volume set $V0 disperse.eager-lock-timeout 5
++TEST $CLI volume start $V0
++EXPECT 'Started' volinfo_field $V0 'Status'
++TEST $GFS -s $H0 --volfile-id $V0 $M0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
++TEST touch $M0/file
++TEST build_tester $(dirname $0)/ec-badfd.c -lgfapi -Wall -O2
++TEST $(dirname $0)/ec-badfd $H0 $V0 /file
++cleanup_tester $(dirname ${0})/ec-badfd
+diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
+index 5fb4610..92d4e5d 100644
+--- a/xlators/cluster/ec/src/ec-common.c
++++ b/xlators/cluster/ec/src/ec-common.c
+@@ -2255,6 +2255,23 @@ ec_unlock_lock(ec_lock_link_t *link)
+     }
+ }
++ec_inode_bad_inc(inode_t *inode, xlator_t *xl)
++    ec_inode_t *ctx = NULL;
++    LOCK(&inode->lock);
++    {
++        ctx = __ec_inode_get(inode, xl);
++        if (ctx == NULL) {
++            goto unlock;
++        }
++        ctx->bad_version++;
++    }
++    UNLOCK(&inode->lock);
+ int32_t
+ ec_update_size_version_done(call_frame_t *frame, void *cookie, xlator_t *this,
+                             int32_t op_ret, int32_t op_errno, dict_t *xattr,
+@@ -2270,6 +2287,12 @@ ec_update_size_version_done(call_frame_t *frame, void *cookie, xlator_t *this,
+     ctx = lock->ctx;
+     if (op_ret < 0) {
++        if (link->lock->fd == NULL) {
++            ec_inode_bad_inc(link->lock->loc.inode, this);
++        } else {
++            ec_inode_bad_inc(link->lock->fd->inode, this);
++        }
+         gf_msg(fop->xl->name, fop_log_level(fop->id, op_errno), op_errno,
+                EC_MSG_SIZE_VERS_UPDATE_FAIL,
+                "Failed to update version and size. %s", ec_msg_str(fop));
+diff --git a/xlators/cluster/ec/src/ec-generic.c b/xlators/cluster/ec/src/ec-generic.c
+index acc16b5..b019050 100644
+--- a/xlators/cluster/ec/src/ec-generic.c
++++ b/xlators/cluster/ec/src/ec-generic.c
+@@ -150,6 +150,37 @@ ec_manager_flush(ec_fop_data_t *fop, int32_t state)
+     }
+ }
++static int32_t
++ec_validate_fd(fd_t *fd, xlator_t *xl)
++    uint64_t iversion = 0;
++    uint64_t fversion = 0;
++    ec_inode_t *inode_ctx = NULL;
++    ec_fd_t *fd_ctx = NULL;
++    LOCK(&fd->lock);
++    {
++        fd_ctx = __ec_fd_get(fd, xl);
++        if (fd_ctx) {
++            fversion = fd_ctx->bad_version;
++        }
++    }
++    UNLOCK(&fd->lock);
++    LOCK(&fd->inode->lock);
++    {
++        inode_ctx = __ec_inode_get(fd->inode, xl);
++        if (inode_ctx) {
++            iversion = inode_ctx->bad_version;
++        }
++    }
++    UNLOCK(&fd->inode->lock);
++    if (fversion < iversion) {
++        return EBADF;
++    }
++    return 0;
+ void
+ ec_flush(call_frame_t *frame, xlator_t *this, uintptr_t target,
+          uint32_t fop_flags, fop_flush_cbk_t func, void *data, fd_t *fd,
+@@ -165,6 +196,14 @@ ec_flush(call_frame_t *frame, xlator_t *this, uintptr_t target,
+     GF_VALIDATE_OR_GOTO(this->name, frame, out);
+     GF_VALIDATE_OR_GOTO(this->name, this->private, out);
++    error = ec_validate_fd(fd, this);
++    if (error) {
++        gf_msg(this->name, GF_LOG_ERROR, EBADF, EC_MSG_FD_BAD,
++               "Failing %s on %s", gf_fop_list[GF_FOP_FLUSH],
++               fd->inode ? uuid_utoa(fd->inode->gfid) : "");
++        goto out;
++    }
+     fop = ec_fop_data_allocate(frame, this, GF_FOP_FLUSH, 0, target, fop_flags,
+                                ec_wind_flush, ec_manager_flush, callback, data);
+     if (fop == NULL) {
+@@ -381,6 +420,14 @@ ec_fsync(call_frame_t *frame, xlator_t *this, uintptr_t target,
+     GF_VALIDATE_OR_GOTO(this->name, frame, out);
+     GF_VALIDATE_OR_GOTO(this->name, this->private, out);
++    error = ec_validate_fd(fd, this);
++    if (error) {
++        gf_msg(this->name, GF_LOG_ERROR, EBADF, EC_MSG_FD_BAD,
++               "Failing %s on %s", gf_fop_list[GF_FOP_FSYNC],
++               fd->inode ? uuid_utoa(fd->inode->gfid) : "");
++        goto out;
++    }
+     fop = ec_fop_data_allocate(frame, this, GF_FOP_FSYNC, 0, target, fop_flags,
+                                ec_wind_fsync, ec_manager_fsync, callback, data);
+     if (fop == NULL) {
+diff --git a/xlators/cluster/ec/src/ec-helpers.c b/xlators/cluster/ec/src/ec-helpers.c
+index 43f6e3b..baac001 100644
+--- a/xlators/cluster/ec/src/ec-helpers.c
++++ b/xlators/cluster/ec/src/ec-helpers.c
+@@ -753,6 +753,7 @@ __ec_fd_get(fd_t *fd, xlator_t *xl)
+ {
+     int i = 0;
+     ec_fd_t *ctx = NULL;
++    ec_inode_t *ictx = NULL;
+     uint64_t value = 0;
+     ec_t *ec = xl->private;
+@@ -775,6 +776,12 @@ __ec_fd_get(fd_t *fd, xlator_t *xl)
+                 GF_FREE(ctx);
+                 return NULL;
+             }
++            /* Only refering bad-version so no need for lock
++             * */
++            ictx = __ec_inode_get(fd->inode, xl);
++            if (ictx) {
++                ctx->bad_version = ictx->bad_version;
++            }
+         }
+     } else {
+         ctx = (ec_fd_t *)(uintptr_t)value;
+diff --git a/xlators/cluster/ec/src/ec-messages.h b/xlators/cluster/ec/src/ec-messages.h
+index 7c28808..be86b37 100644
+--- a/xlators/cluster/ec/src/ec-messages.h
++++ b/xlators/cluster/ec/src/ec-messages.h
+ #endif /* !_EC_MESSAGES_H_ */
+diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h
+index 1c295c0..f27f2ec 100644
+--- a/xlators/cluster/ec/src/ec-types.h
++++ b/xlators/cluster/ec/src/ec-types.h
+@@ -150,6 +150,7 @@ struct _ec_fd {
+     loc_t loc;
+     uintptr_t open;
+     int32_t flags;
++    uint64_t bad_version;
+     ec_fd_status_t fd_status[0];
+ };
+@@ -180,6 +181,7 @@ struct _ec_inode {
+     uint64_t dirty[2];
+     struct list_head heal;
+     ec_stripe_list_t stripe_cache;
++    uint64_t bad_version;
+ };
+ typedef int32_t (*fop_heal_cbk_t)(call_frame_t *, void *, xlator_t *, int32_t,
diff --git a/SOURCES/0288-cluster-ec-Fix-coverity-issues.patch b/SOURCES/0288-cluster-ec-Fix-coverity-issues.patch
new file mode 100644
index 0000000..8dd3fca
--- /dev/null
+++ b/SOURCES/0288-cluster-ec-Fix-coverity-issues.patch
@@ -0,0 +1,77 @@
+From ccf7775760dd923e21341438725946737eb8d8af Mon Sep 17 00:00:00 2001
+From: Pranith Kumar K <>
+Date: Sat, 7 Sep 2019 20:18:01 +0530
+Subject: [PATCH 288/297] cluster/ec: Fix coverity issues
+Fixed the following coverity issue in both flush/fsync
+>>>     CID 1404964:  Null pointer dereferences  (REVERSE_INULL)
+>>>     Null-checking "fd" suggests that it may be null, but it has already
+been dereferenced on all paths leading to the check.
+>>>         if (fd != NULL) {
+>>>           fop->fd = fd_ref(fd);
+>>>             if (fop->fd == NULL) {
+>>>                 gf_msg(this->name, GF_LOG_ERROR, 0,
+>>>                        "Failed to reference a "
+>>>                        "file descriptor.");
+fixes: bz#1745107
+Change-Id: I19c05d585e23f8fbfbc195d1f3775ec528eed671
+Signed-off-by: Pranith Kumar K <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Ashish Pandey <>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <>
+ xlators/cluster/ec/src/ec-generic.c | 28 ++++++++++++++++------------
+ 1 file changed, 16 insertions(+), 12 deletions(-)
+diff --git a/xlators/cluster/ec/src/ec-generic.c b/xlators/cluster/ec/src/ec-generic.c
+index b019050..192bb02 100644
+--- a/xlators/cluster/ec/src/ec-generic.c
++++ b/xlators/cluster/ec/src/ec-generic.c
+@@ -196,12 +196,14 @@ ec_flush(call_frame_t *frame, xlator_t *this, uintptr_t target,
+     GF_VALIDATE_OR_GOTO(this->name, frame, out);
+     GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+-    error = ec_validate_fd(fd, this);
+-    if (error) {
+-        gf_msg(this->name, GF_LOG_ERROR, EBADF, EC_MSG_FD_BAD,
+-               "Failing %s on %s", gf_fop_list[GF_FOP_FLUSH],
+-               fd->inode ? uuid_utoa(fd->inode->gfid) : "");
+-        goto out;
++    if (fd) {
++        error = ec_validate_fd(fd, this);
++        if (error) {
++            gf_msg(this->name, GF_LOG_ERROR, EBADF, EC_MSG_FD_BAD,
++                   "Failing %s on %s", gf_fop_list[GF_FOP_FLUSH],
++                   fd->inode ? uuid_utoa(fd->inode->gfid) : "");
++            goto out;
++        }
+     }
+     fop = ec_fop_data_allocate(frame, this, GF_FOP_FLUSH, 0, target, fop_flags,
+@@ -420,12 +422,14 @@ ec_fsync(call_frame_t *frame, xlator_t *this, uintptr_t target,
+     GF_VALIDATE_OR_GOTO(this->name, frame, out);
+     GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+-    error = ec_validate_fd(fd, this);
+-    if (error) {
+-        gf_msg(this->name, GF_LOG_ERROR, EBADF, EC_MSG_FD_BAD,
+-               "Failing %s on %s", gf_fop_list[GF_FOP_FSYNC],
+-               fd->inode ? uuid_utoa(fd->inode->gfid) : "");
+-        goto out;
++    if (fd) {
++        error = ec_validate_fd(fd, this);
++        if (error) {
++            gf_msg(this->name, GF_LOG_ERROR, EBADF, EC_MSG_FD_BAD,
++                   "Failing %s on %s", gf_fop_list[GF_FOP_FSYNC],
++                   fd->inode ? uuid_utoa(fd->inode->gfid) : "");
++            goto out;
++        }
+     }
+     fop = ec_fop_data_allocate(frame, this, GF_FOP_FSYNC, 0, target, fop_flags,
diff --git a/SOURCES/0289-cluster-ec-quorum-count-implementation.patch b/SOURCES/0289-cluster-ec-quorum-count-implementation.patch
new file mode 100644
index 0000000..6d24813
--- /dev/null
+++ b/SOURCES/0289-cluster-ec-quorum-count-implementation.patch
@@ -0,0 +1,721 @@
+From 0d54bb417e982a100ceefb5eab2a61a17e840f39 Mon Sep 17 00:00:00 2001
+From: Pranith Kumar K <>
+Date: Thu, 5 Sep 2019 16:12:39 +0530
+Subject: [PATCH 289/297] cluster/ec: quorum-count implementation
+upstream-issue: #721
+fixes: bz#1748688
+Change-Id: I5333540e3c635ccf441cf1f4696e4c8986e38ea8
+Signed-off-by: Pranith Kumar K <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Ashish Pandey <>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <>
+ libglusterfs/src/glusterfs/globals.h             |   4 +-
+ tests/basic/ec/ec-quorum-count-partial-failure.t |  50 +++++++
+ tests/basic/ec/ec-quorum-count.t                 | 165 +++++++++++++++++++++++
+ tests/ec.rc                                      |   9 ++
+ xlators/cluster/ec/src/ec-common.c               |  13 ++
+ xlators/cluster/ec/src/ec-common.h               |  24 ++++
+ xlators/cluster/ec/src/ec-dir-write.c            |  57 ++++----
+ xlators/cluster/ec/src/ec-inode-write.c          |  61 ++++-----
+ xlators/cluster/ec/src/ec-types.h                |   1 +
+ xlators/cluster/ec/src/ec.c                      |  13 ++
+ xlators/mgmt/glusterd/src/glusterd-volume-set.c  |  46 +++++++
+ 11 files changed, 383 insertions(+), 60 deletions(-)
+ create mode 100755 tests/basic/ec/ec-quorum-count-partial-failure.t
+ create mode 100644 tests/basic/ec/ec-quorum-count.t
+diff --git a/libglusterfs/src/glusterfs/globals.h b/libglusterfs/src/glusterfs/globals.h
+index 55476f6..bdc8b3d 100644
+--- a/libglusterfs/src/glusterfs/globals.h
++++ b/libglusterfs/src/glusterfs/globals.h
+@@ -50,7 +50,7 @@
+     1 /* MIN is the fresh start op-version, mostly                             \
+          should not change */
+ #define GD_OP_VERSION_MAX                                                      \
+-    GD_OP_VERSION_7_0 /* MAX VERSION is the maximum                            \
++    GD_OP_VERSION_8_0 /* MAX VERSION is the maximum                            \
+                          count in VME table, should                            \
+                          keep changing with                                    \
+                          introduction of newer                                 \
+@@ -136,6 +136,8 @@
+ #define GD_OP_VERSION_7_0 70000 /* Op-version for GlusterFS 7.0 */
++#define GD_OP_VERSION_8_0 80000 /* Op-version for GlusterFS 8.0 */
+ #include "glusterfs/xlator.h"
+ #include "glusterfs/options.h"
+diff --git a/tests/basic/ec/ec-quorum-count-partial-failure.t b/tests/basic/ec/ec-quorum-count-partial-failure.t
+new file mode 100755
+index 0000000..79f5825
+--- /dev/null
++++ b/tests/basic/ec/ec-quorum-count-partial-failure.t
+@@ -0,0 +1,50 @@
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++#This test checks that partial failure of fop results in main fop failure only
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
++TEST $CLI volume create $V1 $H0:$B0/${V1}{0..5}
++TEST $CLI volume set $V0 performance.flush-behind off
++TEST $CLI volume start $V0
++TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=/$V0 $M0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
++TEST dd if=/dev/urandom of=$M0/a bs=12347 count=1
++TEST dd if=/dev/urandom of=$M0/b bs=12347 count=1
++TEST cp $M0/b $M0/c
++TEST fallocate -p -l 101 $M0/c
++TEST $CLI volume stop $V0
++TEST $CLI volume set $V0 debug.delay-gen posix;
++TEST $CLI volume set $V0 delay-gen.delay-duration 10000000;
++TEST $CLI volume set $V0 delay-gen.enable WRITE;
++TEST $CLI volume set $V0 delay-gen.delay-percentage 100
++TEST $CLI volume set $V0 disperse.quorum-count 6
++TEST $CLI volume start $V0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
++cksum=$(dd if=$M0/a bs=12345 count=1 | md5sum | awk '{print $1}')
++truncate -s 12345 $M0/a & #While write is waiting for 5 seconds, introduce failure
++fallocate -p -l 101 $M0/b &
++sleep 1
++TEST kill_brick $V0 $H0 $B0/${V0}0
++TEST wait
++TEST $CLI volume start $V0 force
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count ${V0}
++EXPECT "12345" stat --format=%s $M0/a
++TEST kill_brick $V0 $H0 $B0/${V0}1
++TEST kill_brick $V0 $H0 $B0/${V0}2
++TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "4" ec_child_up_count $V0 0
++cksum_after_heal=$(dd if=$M0/a | md5sum | awk '{print $1}')
++TEST [[ $cksum == $cksum_after_heal ]]
++cksum=$(dd if=$M0/c | md5sum | awk '{print $1}')
++cksum_after_heal=$(dd if=$M0/b | md5sum | awk '{print $1}')
++TEST [[ $cksum == $cksum_after_heal ]]
+diff --git a/tests/basic/ec/ec-quorum-count.t b/tests/basic/ec/ec-quorum-count.t
+new file mode 100644
+index 0000000..56b5329
+--- /dev/null
++++ b/tests/basic/ec/ec-quorum-count.t
+@@ -0,0 +1,165 @@
++ #!/bin/bash
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../ec.rc
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
++TEST $CLI volume create $V1 $H0:$B0/${V1}{0..5}
++TEST $CLI volume set $V0 disperse.eager-lock-timeout 5
++TEST $CLI volume set $V0 performance.flush-behind off
++#Should fail on non-disperse volume
++TEST ! $CLI volume set $V1 disperse.quorum-count 5
++#Should succeed on a valid range
++TEST ! $CLI volume set $V0 disperse.quorum-count 0
++TEST ! $CLI volume set $V0 disperse.quorum-count -0
++TEST ! $CLI volume set $V0 disperse.quorum-count abc
++TEST ! $CLI volume set $V0 disperse.quorum-count 10abc
++TEST ! $CLI volume set $V0 disperse.quorum-count 1
++TEST ! $CLI volume set $V0 disperse.quorum-count 2
++TEST ! $CLI volume set $V0 disperse.quorum-count 3
++TEST $CLI volume set $V0 disperse.quorum-count 4
++TEST $CLI volume start $V0
++TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
++#Test that the option is reflected in the mount
++EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^4$" ec_option_value $V0 $M0 0 quorum-count
++TEST $CLI volume reset $V0 disperse.quorum-count
++EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^0$" ec_option_value $V0 $M0 0 quorum-count
++TEST $CLI volume set $V0 disperse.quorum-count 6
++EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^6$" ec_option_value $V0 $M0 0 quorum-count
++TEST touch $M0/a
++TEST touch $M0/data
++TEST setfattr -n trusted.def -v def $M0/a
++TEST touch $M0/src
++TEST touch $M0/del-me
++TEST mkdir $M0/dir1
++TEST dd if=/dev/zero of=$M0/read-file bs=1M count=1 oflag=direct
++TEST dd if=/dev/zero of=$M0/del-file bs=1M count=1 oflag=direct
++TEST gf_rm_file_and_gfid_link $B0/${V0}0 del-file
++#modify operations should fail as the file is not in quorum
++TEST ! dd if=/dev/zero of=$M0/del-file bs=1M count=1 oflag=direct
++TEST kill_brick $V0 $H0 $B0/${V0}0
++#Read should succeed even when quorum-count is not met
++TEST dd if=$M0/read-file of=/dev/null iflag=direct
++TEST ! touch $M0/a2
++TEST ! mkdir $M0/dir2
++TEST ! mknod  $M0/b2 b 4 5
++TEST ! ln -s $M0/a $M0/symlink
++TEST ! ln $M0/a $M0/link
++TEST ! mv $M0/src $M0/dst
++TEST ! rm -f $M0/del-me
++TEST ! rmdir $M0/dir1
++TEST ! dd if=/dev/zero of=$M0/a bs=1M count=1 conv=notrunc
++TEST ! dd if=/dev/zero of=$M0/data bs=1M count=1 conv=notrunc
++TEST ! truncate -s 0 $M0/a
++TEST ! setfattr -n -v abc $M0/a
++TEST ! setfattr -x trusted.def $M0/a
++TEST ! chmod +x $M0/a
++TEST ! fallocate -l 2m -n $M0/a
++TEST ! fallocate -p -l 512k $M0/a
++TEST $CLI volume start $V0 force
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count ${V0}
++# reset the option and check whether the default redundancy count is
++# accepted or not.
++TEST $CLI volume reset $V0 disperse.quorum-count
++EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^0$" ec_option_value $V0 $M0 0 quorum-count
++TEST touch $M0/a1
++TEST touch $M0/data1
++TEST setfattr -n trusted.def -v def $M0/a1
++TEST touch $M0/src1
++TEST touch $M0/del-me1
++TEST mkdir $M0/dir11
++TEST kill_brick $V0 $H0 $B0/${V0}0
++TEST kill_brick $V0 $H0 $B0/${V0}1
++TEST touch $M0/a21
++TEST mkdir $M0/dir21
++TEST mknod  $M0/b21 b 4 5
++TEST ln -s $M0/a1 $M0/symlink1
++TEST ln $M0/a1 $M0/link1
++TEST mv $M0/src1 $M0/dst1
++TEST rm -f $M0/del-me1
++TEST rmdir $M0/dir11
++TEST dd if=/dev/zero of=$M0/a1 bs=1M count=1 conv=notrunc
++TEST dd if=/dev/zero of=$M0/data1 bs=1M count=1 conv=notrunc
++TEST truncate -s 0 $M0/a1
++TEST setfattr -n -v abc $M0/a1
++TEST setfattr -x trusted.def $M0/a1
++TEST chmod +x $M0/a1
++TEST fallocate -l 2m -n $M0/a1
++TEST fallocate -p -l 512k $M0/a1
++TEST $CLI volume start $V0 force
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
++TEST touch $M0/a2
++TEST touch $M0/data2
++TEST setfattr -n trusted.def -v def $M0/a1
++TEST touch $M0/src2
++TEST touch $M0/del-me2
++TEST mkdir $M0/dir12
++TEST kill_brick $V0 $H0 $B0/${V0}0
++TEST kill_brick $V0 $H0 $B0/${V0}1
++TEST kill_brick $V0 $H0 $B0/${V0}2
++TEST ! touch $M0/a22
++TEST ! mkdir $M0/dir22
++TEST ! mknod  $M0/b22 b 4 5
++TEST ! ln -s $M0/a2 $M0/symlink2
++TEST ! ln $M0/a2 $M0/link2
++TEST ! mv $M0/src2 $M0/dst2
++TEST ! rm -f $M0/del-me2
++TEST ! rmdir $M0/dir12
++TEST ! dd if=/dev/zero of=$M0/a2 bs=1M count=1 conv=notrunc
++TEST ! dd if=/dev/zero of=$M0/data2 bs=1M count=1 conv=notrunc
++TEST ! truncate -s 0 $M0/a2
++TEST ! setfattr -n -v abc $M0/a2
++TEST ! setfattr -x trusted.def $M0/a2
++TEST ! chmod +x $M0/a2
++TEST ! fallocate -l 2m -n $M0/a2
++TEST ! fallocate -p -l 512k $M0/a2
++TEST $CLI volume start $V0 force
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count ${V0}
++# Set quorum-count to 5 and kill 1 brick and the fops should pass
++TEST $CLI volume set $V0 disperse.quorum-count 5
++EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^5$" ec_option_value $V0 $M0 0 quorum-count
++TEST touch $M0/a3
++TEST touch $M0/data3
++TEST setfattr -n trusted.def -v def $M0/a3
++TEST touch $M0/src3
++TEST touch $M0/del-me3
++TEST mkdir $M0/dir13
++TEST kill_brick $V0 $H0 $B0/${V0}0
++TEST touch $M0/a31
++TEST mkdir $M0/dir31
++TEST mknod  $M0/b31 b 4 5
++TEST ln -s $M0/a3 $M0/symlink3
++TEST ln $M0/a3 $M0/link3
++TEST mv $M0/src3 $M0/dst3
++TEST rm -f $M0/del-me3
++TEST rmdir $M0/dir13
++TEST dd if=/dev/zero of=$M0/a3 bs=1M count=1 conv=notrunc
++TEST dd if=/dev/zero of=$M0/data3 bs=1M count=1 conv=notrunc
++TEST truncate -s 0 $M0/a3
++TEST setfattr -n -v abc $M0/a3
++TEST setfattr -x trusted.def $M0/a3
++TEST chmod +x $M0/a3
++TEST fallocate -l 2m -n $M0/a3
++TEST fallocate -p -l 512k $M0/a3
++TEST dd if=/dev/urandom of=$M0/heal-file bs=1M count=1 oflag=direct
++cksum_before_heal="$(md5sum $M0/heal-file | awk '{print $1}')"
++TEST $CLI volume start $V0 force
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count ${V0}
++TEST kill_brick $V0 $H0 $B0/${V0}4
++TEST kill_brick $V0 $H0 $B0/${V0}5
++cksum_after_heal=$(dd if=$M0/heal-file iflag=direct | md5sum | awk '{print $1}')
++TEST [[ $cksum_before_heal == $cksum_after_heal ]]
+diff --git a/tests/ec.rc b/tests/ec.rc
+index 04405ec..f18752f 100644
+--- a/tests/ec.rc
++++ b/tests/ec.rc
+@@ -7,3 +7,12 @@ function ec_up_status()
+         local ec_id=$3
+         grep -E "^up =" $m/.meta/graphs/active/${v}-disperse-${ec_id}/private | cut -f2 -d'='
+ }
++function ec_option_value()
++    local v=$1
++    local m=$2
++    local ec_id=$3
++    local opt=$4
++    grep -E "^$opt =" $m/.meta/graphs/active/${v}-disperse-${ec_id}/private | cut -f2 -d'='| awk '{print $1}'
+diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
+index 92d4e5d..2e59180 100644
+--- a/xlators/cluster/ec/src/ec-common.c
++++ b/xlators/cluster/ec/src/ec-common.c
+@@ -707,6 +707,19 @@ ec_child_select(ec_fop_data_t *fop)
+         return 0;
+     }
++    if (!fop->parent && fop->lock_count &&
++        (fop->locks[0].update[EC_DATA_TXN] ||
++         fop->locks[0].update[EC_METADATA_TXN])) {
++        if (ec->quorum_count && (num < ec->quorum_count)) {
++            gf_msg(ec->xl->name, GF_LOG_ERROR, 0, EC_MSG_CHILDS_INSUFFICIENT,
++                   "Insufficient available children "
++                   "for this request (have %d, need "
++                   "%d). %s",
++                   num, ec->quorum_count, ec_msg_str(fop));
++            return 0;
++        }
++    }
+     return 1;
+ }
+diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h
+index 3c69471..eab86ee 100644
+--- a/xlators/cluster/ec/src/ec-common.h
++++ b/xlators/cluster/ec/src/ec-common.h
+@@ -26,6 +26,30 @@ typedef enum { EC_DATA_TXN, EC_METADATA_TXN } ec_txn_t;
+ #define EC_FLAG_LOCK_SHARED 0x0001
++#define QUORUM_CBK(fn, fop, frame, cookie, this, op_ret, op_errno, params...)  \
++    do {                                                                       \
++        ec_t *__ec = fop->xl->private;                                         \
++        int32_t __op_ret = 0;                                                  \
++        int32_t __op_errno = 0;                                                \
++        int32_t __success_count = gf_bits_count(fop->good);                    \
++                                                                               \
++        __op_ret = op_ret;                                                     \
++        __op_errno = op_errno;                                                 \
++        if (!fop->parent && frame &&                                           \
++            (GF_CLIENT_PID_SELF_HEALD != frame->root->pid) &&                  \
++            __ec->quorum_count && (__success_count < __ec->quorum_count) &&    \
++            op_ret >= 0) {                                                     \
++            __op_ret = -1;                                                     \
++            __op_errno = EIO;                                                  \
++            gf_msg(__ec->xl->name, GF_LOG_ERROR, 0,                            \
++                   EC_MSG_CHILDS_INSUFFICIENT,                                 \
++                   "Insufficient available children for this request "         \
++                   "(have %d, need %d). %s",                                   \
++                   __success_count, __ec->quorum_count, ec_msg_str(fop));      \
++        }                                                                      \
++        fn(frame, cookie, this, __op_ret, __op_errno, params);                 \
++    } while (0)
+ enum _ec_xattrop_flags {
+diff --git a/xlators/cluster/ec/src/ec-dir-write.c b/xlators/cluster/ec/src/ec-dir-write.c
+index 0b8ee21..8192462 100644
+--- a/xlators/cluster/ec/src/ec-dir-write.c
++++ b/xlators/cluster/ec/src/ec-dir-write.c
+@@ -218,10 +218,10 @@ ec_manager_create(ec_fop_data_t *fop, int32_t state)
+             GF_ASSERT(cbk != NULL);
+             if (fop->cbks.create != NULL) {
+-                fop->cbks.create(fop->req_frame, fop, fop->xl, cbk->op_ret,
+-                                 cbk->op_errno, fop->fd, fop->loc[0].inode,
+-                                 &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2],
+-                                 cbk->xdata);
++                QUORUM_CBK(fop->cbks.create, fop, fop->req_frame, fop, fop->xl,
++                           cbk->op_ret, cbk->op_errno, fop->fd,
++                           fop->loc[0].inode, &cbk->iatt[0], &cbk->iatt[1],
++                           &cbk->iatt[2], cbk->xdata);
+             }
+             return EC_STATE_LOCK_REUSE;
+@@ -390,9 +390,10 @@ ec_manager_link(ec_fop_data_t *fop, int32_t state)
+             GF_ASSERT(cbk != NULL);
+             if (fop-> != NULL) {
+-                fop->>req_frame, fop, fop->xl, cbk->op_ret,
+-                               cbk->op_errno, fop->loc[0].inode, &cbk->iatt[0],
+-                               &cbk->iatt[1], &cbk->iatt[2], cbk->xdata);
++                QUORUM_CBK(fop->, fop, fop->req_frame, fop, fop->xl,
++                           cbk->op_ret, cbk->op_errno, fop->loc[0].inode,
++                           &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2],
++                           cbk->xdata);
+             }
+             return EC_STATE_LOCK_REUSE;
+@@ -569,9 +570,10 @@ ec_manager_mkdir(ec_fop_data_t *fop, int32_t state)
+             GF_ASSERT(cbk != NULL);
+             if (fop->cbks.mkdir != NULL) {
+-                fop->cbks.mkdir(fop->req_frame, fop, fop->xl, cbk->op_ret,
+-                                cbk->op_errno, fop->loc[0].inode, &cbk->iatt[0],
+-                                &cbk->iatt[1], &cbk->iatt[2], cbk->xdata);
++                QUORUM_CBK(fop->cbks.mkdir, fop, fop->req_frame, fop, fop->xl,
++                           cbk->op_ret, cbk->op_errno, fop->loc[0].inode,
++                           &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2],
++                           cbk->xdata);
+             }
+             return EC_STATE_LOCK_REUSE;
+@@ -773,9 +775,10 @@ ec_manager_mknod(ec_fop_data_t *fop, int32_t state)
+             GF_ASSERT(cbk != NULL);
+             if (fop->cbks.mknod != NULL) {
+-                fop->cbks.mknod(fop->req_frame, fop, fop->xl, cbk->op_ret,
+-                                cbk->op_errno, fop->loc[0].inode, &cbk->iatt[0],
+-                                &cbk->iatt[1], &cbk->iatt[2], cbk->xdata);
++                QUORUM_CBK(fop->cbks.mknod, fop, fop->req_frame, fop, fop->xl,
++                           cbk->op_ret, cbk->op_errno, fop->loc[0].inode,
++                           &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2],
++                           cbk->xdata);
+             }
+             return EC_STATE_LOCK_REUSE;
+@@ -931,10 +934,10 @@ ec_manager_rename(ec_fop_data_t *fop, int32_t state)
+             GF_ASSERT(cbk != NULL);
+             if (fop->cbks.rename != NULL) {
+-                fop->cbks.rename(fop->req_frame, fop, fop->xl, cbk->op_ret,
+-                                 cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1],
+-                                 &cbk->iatt[2], &cbk->iatt[3], &cbk->iatt[4],
+-                                 cbk->xdata);
++                QUORUM_CBK(fop->cbks.rename, fop, fop->req_frame, fop, fop->xl,
++                           cbk->op_ret, cbk->op_errno, &cbk->iatt[0],
++                           &cbk->iatt[1], &cbk->iatt[2], &cbk->iatt[3],
++                           &cbk->iatt[4], cbk->xdata);
+             }
+             return EC_STATE_LOCK_REUSE;
+@@ -1083,9 +1086,9 @@ ec_manager_rmdir(ec_fop_data_t *fop, int32_t state)
+             GF_ASSERT(cbk != NULL);
+             if (fop->cbks.rmdir != NULL) {
+-                fop->cbks.rmdir(fop->req_frame, fop, fop->xl, cbk->op_ret,
+-                                cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1],
+-                                cbk->xdata);
++                QUORUM_CBK(fop->cbks.rmdir, fop, fop->req_frame, fop, fop->xl,
++                           cbk->op_ret, cbk->op_errno, &cbk->iatt[0],
++                           &cbk->iatt[1], cbk->xdata);
+             }
+             return EC_STATE_LOCK_REUSE;
+@@ -1237,10 +1240,10 @@ ec_manager_symlink(ec_fop_data_t *fop, int32_t state)
+             GF_ASSERT(cbk != NULL);
+             if (fop->cbks.symlink != NULL) {
+-                fop->cbks.symlink(fop->req_frame, fop, fop->xl, cbk->op_ret,
+-                                  cbk->op_errno, fop->loc[0].inode,
+-                                  &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2],
+-                                  cbk->xdata);
++                QUORUM_CBK(fop->cbks.symlink, fop, fop->req_frame, fop, fop->xl,
++                           cbk->op_ret, cbk->op_errno, fop->loc[0].inode,
++                           &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2],
++                           cbk->xdata);
+             }
+             return EC_STATE_LOCK_REUSE;
+@@ -1392,9 +1395,9 @@ ec_manager_unlink(ec_fop_data_t *fop, int32_t state)
+             GF_ASSERT(cbk != NULL);
+             if (fop->cbks.unlink != NULL) {
+-                fop->cbks.unlink(fop->req_frame, fop, fop->xl, cbk->op_ret,
+-                                 cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1],
+-                                 cbk->xdata);
++                QUORUM_CBK(fop->cbks.unlink, fop, fop->req_frame, fop, fop->xl,
++                           cbk->op_ret, cbk->op_errno, &cbk->iatt[0],
++                           &cbk->iatt[1], cbk->xdata);
+             }
+             return EC_STATE_LOCK_REUSE;
+diff --git a/xlators/cluster/ec/src/ec-inode-write.c b/xlators/cluster/ec/src/ec-inode-write.c
+index 8bfa3b4..2dbb4db 100644
+--- a/xlators/cluster/ec/src/ec-inode-write.c
++++ b/xlators/cluster/ec/src/ec-inode-write.c
+@@ -185,26 +185,26 @@ ec_xattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+     switch (fop->id) {
+         case GF_FOP_SETXATTR:
+             if (fop->cbks.setxattr) {
+-                fop->cbks.setxattr(frame, cookie, this, op_ret, op_errno,
+-                                   xdata);
++                QUORUM_CBK(fop->cbks.setxattr, fop, frame, cookie, this, op_ret,
++                           op_errno, xdata);
+             }
+             break;
+         case GF_FOP_REMOVEXATTR:
+             if (fop->cbks.removexattr) {
+-                fop->cbks.removexattr(frame, cookie, this, op_ret, op_errno,
+-                                      xdata);
++                QUORUM_CBK(fop->cbks.removexattr, fop, frame, cookie, this,
++                           op_ret, op_errno, xdata);
+             }
+             break;
+         case GF_FOP_FSETXATTR:
+             if (fop->cbks.fsetxattr) {
+-                fop->cbks.fsetxattr(frame, cookie, this, op_ret, op_errno,
+-                                    xdata);
++                QUORUM_CBK(fop->cbks.fsetxattr, fop, frame, cookie, this,
++                           op_ret, op_errno, xdata);
+             }
+             break;
+         case GF_FOP_FREMOVEXATTR:
+             if (fop->cbks.fremovexattr) {
+-                fop->cbks.fremovexattr(frame, cookie, this, op_ret, op_errno,
+-                                       xdata);
++                QUORUM_CBK(fop->cbks.fremovexattr, fop, frame, cookie, this,
++                           op_ret, op_errno, xdata);
+             }
+             break;
+     }
+@@ -494,16 +494,15 @@ ec_manager_setattr(ec_fop_data_t *fop, int32_t state)
+             if (fop->id == GF_FOP_SETATTR) {
+                 if (fop->cbks.setattr != NULL) {
+-                    fop->cbks.setattr(fop->req_frame, fop, fop->xl, cbk->op_ret,
+-                                      cbk->op_errno, &cbk->iatt[0],
+-                                      &cbk->iatt[1], cbk->xdata);
++                    QUORUM_CBK(fop->cbks.setattr, fop, fop->req_frame, fop,
++                               fop->xl, cbk->op_ret, cbk->op_errno,
++                               &cbk->iatt[0], &cbk->iatt[1], cbk->xdata);
+                 }
+             } else {
+                 if (fop->cbks.fsetattr != NULL) {
+-                    fop->cbks.fsetattr(fop->req_frame, fop, fop->xl,
+-                                       cbk->op_ret, cbk->op_errno,
+-                                       &cbk->iatt[0], &cbk->iatt[1],
+-                                       cbk->xdata);
++                    QUORUM_CBK(fop->cbks.fsetattr, fop, fop->req_frame, fop,
++                               fop->xl, cbk->op_ret, cbk->op_errno,
++                               &cbk->iatt[0], &cbk->iatt[1], cbk->xdata);
+                 }
+             }
+@@ -994,9 +993,9 @@ ec_manager_fallocate(ec_fop_data_t *fop, int32_t state)
+             GF_ASSERT(cbk != NULL);
+             if (fop->cbks.fallocate != NULL) {
+-                fop->cbks.fallocate(fop->req_frame, fop, fop->xl, cbk->op_ret,
+-                                    cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1],
+-                                    cbk->xdata);
++                QUORUM_CBK(fop->cbks.fallocate, fop, fop->req_frame, fop,
++                           fop->xl, cbk->op_ret, cbk->op_errno, &cbk->iatt[0],
++                           &cbk->iatt[1], cbk->xdata);
+             }
+             return EC_STATE_LOCK_REUSE;
+@@ -1247,9 +1246,9 @@ ec_manager_discard(ec_fop_data_t *fop, int32_t state)
+             GF_ASSERT(cbk != NULL);
+             if (fop->cbks.discard != NULL) {
+-                fop->cbks.discard(fop->req_frame, fop, fop->xl, cbk->op_ret,
+-                                  cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1],
+-                                  cbk->xdata);
++                QUORUM_CBK(fop->cbks.discard, fop, fop->req_frame, fop, fop->xl,
++                           cbk->op_ret, cbk->op_errno, &cbk->iatt[0],
++                           &cbk->iatt[1], cbk->xdata);
+             }
+             return EC_STATE_LOCK_REUSE;
+@@ -1477,17 +1476,15 @@ ec_manager_truncate(ec_fop_data_t *fop, int32_t state)
+             if (fop->id == GF_FOP_TRUNCATE) {
+                 if (fop->cbks.truncate != NULL) {
+-                    fop->cbks.truncate(fop->req_frame, fop, fop->xl,
+-                                       cbk->op_ret, cbk->op_errno,
+-                                       &cbk->iatt[0], &cbk->iatt[1],
+-                                       cbk->xdata);
++                    QUORUM_CBK(fop->cbks.truncate, fop, fop->req_frame, fop,
++                               fop->xl, cbk->op_ret, cbk->op_errno,
++                               &cbk->iatt[0], &cbk->iatt[1], cbk->xdata);
+                 }
+             } else {
+                 if (fop->cbks.ftruncate != NULL) {
+-                    fop->cbks.ftruncate(fop->req_frame, fop, fop->xl,
+-                                        cbk->op_ret, cbk->op_errno,
+-                                        &cbk->iatt[0], &cbk->iatt[1],
+-                                        cbk->xdata);
++                    QUORUM_CBK(fop->cbks.ftruncate, fop, fop->req_frame, fop,
++                               fop->xl, cbk->op_ret, cbk->op_errno,
++                               &cbk->iatt[0], &cbk->iatt[1], cbk->xdata);
+                 }
+             }
+@@ -2245,9 +2242,9 @@ ec_manager_writev(ec_fop_data_t *fop, int32_t state)
+             GF_ASSERT(cbk != NULL);
+             if (fop->cbks.writev != NULL) {
+-                fop->cbks.writev(fop->req_frame, fop, fop->xl, cbk->op_ret,
+-                                 cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1],
+-                                 cbk->xdata);
++                QUORUM_CBK(fop->cbks.writev, fop, fop->req_frame, fop, fop->xl,
++                           cbk->op_ret, cbk->op_errno, &cbk->iatt[0],
++                           &cbk->iatt[1], cbk->xdata);
+             }
+             return EC_STATE_LOCK_REUSE;
+diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h
+index f27f2ec..ea4f6ad 100644
+--- a/xlators/cluster/ec/src/ec-types.h
++++ b/xlators/cluster/ec/src/ec-types.h
+@@ -654,6 +654,7 @@ struct _ec {
+     gf_boolean_t optimistic_changelog;
+     gf_boolean_t parallel_writes;
+     uint32_t stripe_cache;
++    uint32_t quorum_count;
+     uint32_t background_heals;
+     uint32_t heal_wait_qlen;
+     uint32_t self_heal_window_size; /* max size of read/writes */
+diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
+index 3c8013e..19094c4 100644
+--- a/xlators/cluster/ec/src/ec.c
++++ b/xlators/cluster/ec/src/ec.c
+@@ -285,6 +285,7 @@ reconfigure(xlator_t *this, dict_t *options)
+     GF_OPTION_RECONF("parallel-writes", ec->parallel_writes, options, bool,
+                      failed);
+     GF_OPTION_RECONF("stripe-cache", ec->stripe_cache, options, uint32, failed);
++    GF_OPTION_RECONF("quorum-count", ec->quorum_count, options, uint32, failed);
+     ret = 0;
+     if (ec_assign_read_policy(ec, read_policy)) {
+         ret = -1;
+@@ -720,6 +721,7 @@ init(xlator_t *this)
+                    failed);
+     GF_OPTION_INIT("parallel-writes", ec->parallel_writes, bool, failed);
+     GF_OPTION_INIT("stripe-cache", ec->stripe_cache, uint32, failed);
++    GF_OPTION_INIT("quorum-count", ec->quorum_count, uint32, failed);
+     this->itable = inode_table_new(EC_SHD_INODE_LRU_LIMIT, this);
+     if (!this->itable)
+@@ -1402,6 +1404,7 @@ ec_dump_private(xlator_t *this)
+     gf_proc_dump_write("heal-waiters", "%d", ec->heal_waiters);
+     gf_proc_dump_write("read-policy", "%s", ec_read_policies[ec->read_policy]);
+     gf_proc_dump_write("parallel-writes", "%d", ec->parallel_writes);
++    gf_proc_dump_write("quorum-count", "%u", ec->quorum_count);
+     snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s.stats.stripe_cache",
+              this->type, this->name);
+@@ -1672,6 +1675,16 @@ struct volume_options options[] = {
+                     "lead to extra memory consumption, maximum "
+                     "(cache size * stripe size) Bytes per open file."},
+     {
++        .key = {"quorum-count"},
++        .type = GF_OPTION_TYPE_INT,
++        .default_value = "0",
++        .description =
++            "This option can be used to define how many successes on"
++            "the bricks constitute a success to the application. This"
++            " count should be in the range"
++            "[disperse-data-count,  disperse-count] (inclusive)",
++    },
++    {
+         .key = {NULL},
+     },
+ };
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+index 8ce338e..7ca47a6 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+@@ -1128,6 +1128,42 @@ out:
+ }
+ static int
++validate_disperse_quorum_count(glusterd_volinfo_t *volinfo, dict_t *dict,
++                               char *key, char *value, char **op_errstr)
++    int ret = -1;
++    int quorum_count = 0;
++    int data_count = 0;
++    ret = gf_string2int(value, &quorum_count);
++    if (ret) {
++        gf_asprintf(op_errstr,
++                    "%s is not an integer. %s expects a "
++                    "valid integer value.",
++                    value, key);
++        goto out;
++    }
++    if (volinfo->type != GF_CLUSTER_TYPE_DISPERSE) {
++        gf_asprintf(op_errstr, "Cannot set %s for a non-disperse volume.", key);
++        ret = -1;
++        goto out;
++    }
++    data_count = volinfo->disperse_count - volinfo->redundancy_count;
++    if (quorum_count < data_count || quorum_count > volinfo->disperse_count) {
++        gf_asprintf(op_errstr, "%d for %s is out of range [%d - %d]",
++                    quorum_count, key, data_count, volinfo->disperse_count);
++        ret = -1;
++        goto out;
++    }
++    ret = 0;
++    return ret;
++static int
+ validate_parallel_readdir(glusterd_volinfo_t *volinfo, dict_t *dict, char *key,
+                           char *value, char **op_errstr)
+ {
+@@ -3663,6 +3699,16 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+      .type = NO_DOC,
+      .op_version = GD_OP_VERSION_3_13_0,
+      .flags = VOLOPT_FLAG_CLIENT_OPT},
++    {.key = "disperse.quorum-count",
++     .voltype = "cluster/disperse",
++     .type = NO_DOC,
++     .op_version = GD_OP_VERSION_8_0,
++     .validate_fn = validate_disperse_quorum_count,
++     .description = "This option can be used to define how many successes on"
++                    "the bricks constitute a success to the application. This"
++                    " count should be in the range"
++                    "[disperse-data-count,  disperse-count] (inclusive)",
++     .flags = VOLOPT_FLAG_CLIENT_OPT},
+     {
+         .key = "features.sdfs",
+         .voltype = "features/sdfs",
diff --git a/SOURCES/0290-glusterd-tag-disperse.quorum-count-for-31306.patch b/SOURCES/0290-glusterd-tag-disperse.quorum-count-for-31306.patch
new file mode 100644
index 0000000..01ea8c2
--- /dev/null
+++ b/SOURCES/0290-glusterd-tag-disperse.quorum-count-for-31306.patch
@@ -0,0 +1,84 @@
+From 312da653ac80b537af06139f8d83a63180c72461 Mon Sep 17 00:00:00 2001
+From: Pranith Kumar K <>
+Date: Tue, 10 Sep 2019 14:04:17 +0530
+Subject: [PATCH 290/297] glusterd: tag disperse.quorum-count for 31306
+In upstream disperse.quorum-count is makred for release-8
+latest new op-version is 31306.
+fixes: bz#1748688
+Change-Id: I88fdbd56ce3b8475b5ec670659adaa9d11c01d97
+Signed-off-by: Pranith Kumar K <>
+Reviewed-by: Ashish Pandey <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <>
+ libglusterfs/src/glusterfs/globals.h            | 12 ++++++------
+ xlators/mgmt/glusterd/src/glusterd-volume-set.c |  2 +-
+ 2 files changed, 7 insertions(+), 7 deletions(-)
+diff --git a/libglusterfs/src/glusterfs/globals.h b/libglusterfs/src/glusterfs/globals.h
+index bdc8b3d..e218285 100644
+--- a/libglusterfs/src/glusterfs/globals.h
++++ b/libglusterfs/src/glusterfs/globals.h
+@@ -50,19 +50,19 @@
+     1 /* MIN is the fresh start op-version, mostly                             \
+          should not change */
+ #define GD_OP_VERSION_MAX                                                      \
+-    GD_OP_VERSION_8_0 /* MAX VERSION is the maximum                            \
++    GD_OP_VERSION_7_0 /* MAX VERSION is the maximum                            \
+                          count in VME table, should                            \
+                          keep changing with                                    \
+                          introduction of newer                                 \
+                          versions */
+-#define GD_OP_VERSION_RHS_3_0    30000 /* Op-Version of RHS 3.0 */
++#define GD_OP_VERSION_RHS_3_0 30000 /* Op-Version of RHS 3.0 */
+-#define GD_OP_VERSION_RHS_2_1_5  20105 /* RHS 2.1 update 5 */
++#define GD_OP_VERSION_RHS_2_1_5 20105 /* RHS 2.1 update 5 */
+-#define GD_OP_VERSION_RHS_3_0_4  30004 /* Op-Version of RHS 3.0.4 */
++#define GD_OP_VERSION_RHS_3_0_4 30004 /* Op-Version of RHS 3.0.4 */
+ #define GD_OP_VERSION_3_7_0 30700 /* Op-version for GlusterFS 3.7.0 */
+@@ -124,6 +124,8 @@
+ #define GD_OP_VERSION_3_13_5 31305 /* Op-version for GlusterFS 3.13.5 */
++#define GD_OP_VERSION_3_13_6 31306 /* Op-version for GlusterFS 3.13.6 */
+ #define GD_OP_VERSION_4_0_0 40000 /* Op-version for GlusterFS 4.0.0 */
+ #define GD_OP_VERSION_4_1_0 40100 /* Op-version for GlusterFS 4.1.0 */
+@@ -136,8 +138,6 @@
+ #define GD_OP_VERSION_7_0 70000 /* Op-version for GlusterFS 7.0 */
+-#define GD_OP_VERSION_8_0 80000 /* Op-version for GlusterFS 8.0 */
+ #include "glusterfs/xlator.h"
+ #include "glusterfs/options.h"
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+index 7ca47a6..16601a2 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+@@ -3702,7 +3702,7 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+     {.key = "disperse.quorum-count",
+      .voltype = "cluster/disperse",
+      .type = NO_DOC,
+-     .op_version = GD_OP_VERSION_8_0,
++     .op_version = GD_OP_VERSION_3_13_6,
+      .validate_fn = validate_disperse_quorum_count,
+      .description = "This option can be used to define how many successes on"
+                     "the bricks constitute a success to the application. This"
diff --git a/SOURCES/0291-cluster-ec-Mark-release-only-when-it-is-acquired.patch b/SOURCES/0291-cluster-ec-Mark-release-only-when-it-is-acquired.patch
new file mode 100644
index 0000000..efdbc23
--- /dev/null
+++ b/SOURCES/0291-cluster-ec-Mark-release-only-when-it-is-acquired.patch
@@ -0,0 +1,106 @@
+From 87d8070f80487322a1736846a78725fd88f8de34 Mon Sep 17 00:00:00 2001
+From: Pranith Kumar K <>
+Date: Tue, 20 Aug 2019 13:27:24 +0530
+Subject: [PATCH 291/297] cluster/ec: Mark release only when it is acquired
+Mount-1                                Mount-2
+1)Tries to acquire lock on 'dir1'   1)Tries to acquire lock on 'dir1'
+2)Lock is granted on brick-0        2)Lock gets EAGAIN on brick-0 and
+				      leads to blocking lock on brick-0
+3)Gets a lock-contention            3) Doesn't matter what happens on mount-2
+  notification, marks lock->release    from here on.
+  to true.
+4)New fop comes on 'dir1' which will
+  be put in frozen list as lock->release
+  is set to true.
+5) Lock acquisition from step-2 fails because
+3 bricks went down in 4+2 setup.
+Fop on mount-1 which is put in frozen list will hang because no codepath will
+move it from frozen list to any other list and the lock will not be retried.
+Don't set lock->release to true if lock is not acquired at the time of
+fixes: bz#1731896
+Change-Id: Ie6630db8735ccf372cc54b873a3a3aed7a6082b7
+Signed-off-by: Pranith Kumar K <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Ashish Pandey <>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <>
+ xlators/cluster/ec/src/ec-common.c | 20 ++++++++++++++++++--
+ xlators/cluster/ec/src/ec-types.h  |  1 +
+ 2 files changed, 19 insertions(+), 2 deletions(-)
+diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
+index 2e59180..5cae37b 100644
+--- a/xlators/cluster/ec/src/ec-common.c
++++ b/xlators/cluster/ec/src/ec-common.c
+@@ -1867,6 +1867,10 @@ ec_lock_acquired(ec_lock_link_t *link)
+     LOCK(&lock->loc.inode->lock);
+     lock->acquired = _gf_true;
++    if (lock->contention) {
++        lock->release = _gf_true;
++        lock->contention = _gf_false;
++    }
+     ec_lock_update_fd(lock, fop);
+     ec_lock_wake_shared(lock, &list);
+@@ -1892,15 +1896,20 @@ ec_locked(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+     ec_lock_link_t *link = NULL;
+     ec_lock_t *lock = NULL;
++    link = fop->data;
++    lock = link->lock;
+     if (op_ret >= 0) {
+-        link = fop->data;
+-        lock = link->lock;
+         lock->mask = lock->good_mask = fop->good;
+         lock->healing = 0;
+         ec_lock_acquired(link);
+         ec_lock(fop->parent);
+     } else {
++        LOCK(&lock->loc.inode->lock);
++        {
++            lock->contention = _gf_false;
++        }
++        UNLOCK(&lock->loc.inode->lock);
+         gf_msg(this->name, GF_LOG_WARNING, op_errno, EC_MSG_PREOP_LOCK_FAILED,
+                "Failed to complete preop lock");
+     }
+@@ -2547,6 +2556,13 @@ ec_lock_release(ec_t *ec, inode_t *inode)
+     gf_msg_debug(ec->xl->name, 0, "Releasing inode %p due to lock contention",
+                  inode);
++    if (!lock->acquired) {
++        /* This happens if some bricks already got the lock while inodelk is in
++         * progress.  Set release to true after lock is acquired*/
++        lock->contention = _gf_true;
++        goto done;
++    }
+     /* The lock is not marked to be released, so the frozen list should be
+      * empty. */
+     GF_ASSERT(list_empty(&lock->frozen));
+diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h
+index ea4f6ad..34a9768 100644
+--- a/xlators/cluster/ec/src/ec-types.h
++++ b/xlators/cluster/ec/src/ec-types.h
+@@ -267,6 +267,7 @@ struct _ec_lock {
+     uint32_t refs_pending;  /* Refs assigned to fops being prepared */
+     uint32_t waiting_flags; /*Track xattrop/dirty marking*/
+     gf_boolean_t acquired;
++    gf_boolean_t contention;
+     gf_boolean_t unlock_now;
+     gf_boolean_t release;
+     gf_boolean_t query;
diff --git a/SOURCES/0292-rpc-Update-address-family-if-it-is-not-provide-in-cm.patch b/SOURCES/0292-rpc-Update-address-family-if-it-is-not-provide-in-cm.patch
new file mode 100644
index 0000000..07fc8f4
--- /dev/null
+++ b/SOURCES/0292-rpc-Update-address-family-if-it-is-not-provide-in-cm.patch
@@ -0,0 +1,72 @@
+From 769263ad422e3c1069de0994ff2274044982b242 Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <>
+Date: Sun, 1 Sep 2019 12:01:09 +0530
+Subject: [PATCH 292/297] rpc: Update address family if it is not provide in
+ cmd-line arguments
+Problem: After enabling transport-type to inet6 and passed ipv6
+         transport.socket.bind-address in glusterd.vol clients are
+         not started.
+Solution: Need to update address-family based on remote-address for
+          all gluster client process
+> Change-Id: Iaa3588cd87cebc45231bfd675745c1a457dc9b31
+> Fixes: bz#1747746
+> Credits: Amgad Saleh <>
+> Signed-off-by: Mohit Agrawal <>
+> (Cherry picked from commit 80b8cfe3f1386606bada97a76a0cad7acdf6b877)
+> (Reviewed on upstream link
+Change-Id: Iaa3588cd87cebc45231bfd675745c1a457dc9b31
+BUG: 1750241
+Signed-off-by: Mohit Agrawal <>
+Tested-by: Mohit Agrawal <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <>
+ rpc/rpc-transport/socket/src/name.c | 13 ++++++++++++-
+ 1 file changed, 12 insertions(+), 1 deletion(-)
+diff --git a/rpc/rpc-transport/socket/src/name.c b/rpc/rpc-transport/socket/src/name.c
+index 7f18cc4..b473f3b 100644
+--- a/rpc/rpc-transport/socket/src/name.c
++++ b/rpc/rpc-transport/socket/src/name.c
+@@ -214,6 +214,7 @@ af_inet_client_get_remote_sockaddr(rpc_transport_t *this,
+     uint16_t remote_port = 0;
+     struct addrinfo *addr_info = NULL;
+     int32_t ret = 0;
++    struct in6_addr serveraddr;
+     remote_host_data = dict_get(options, "remote-host");
+     if (remote_host_data == NULL) {
+@@ -249,6 +250,13 @@ af_inet_client_get_remote_sockaddr(rpc_transport_t *this,
+         goto err;
+     }
++    /* Need to update transport-address family if address-family is not provide
++       to command-line arguments
++    */
++    if (inet_pton(AF_INET6, remote_host, &serveraddr)) {
++        sockaddr->sa_family = AF_INET6;
++    }
+     /* TODO: gf_resolve is a blocking call. kick in some
+        non blocking dns techniques */
+     ret = gf_resolve_ip6(remote_host, remote_port, sockaddr->sa_family,
+@@ -522,7 +530,10 @@ socket_client_get_remote_sockaddr(rpc_transport_t *this,
+             ret = -1;
+     }
+-    if (*sa_family == AF_UNSPEC) {
++    /* Address-family is updated based on remote_host in
++       af_inet_client_get_remote_sockaddr
++    */
++    if (*sa_family != sockaddr->sa_family) {
+         *sa_family = sockaddr->sa_family;
+     }
diff --git a/SOURCES/0293-glusterd-IPV6-hostname-address-is-not-parsed-correct.patch b/SOURCES/0293-glusterd-IPV6-hostname-address-is-not-parsed-correct.patch
new file mode 100644
index 0000000..23120cb
--- /dev/null
+++ b/SOURCES/0293-glusterd-IPV6-hostname-address-is-not-parsed-correct.patch
@@ -0,0 +1,69 @@
+From 8f89aef9691b0806d7487525c6a54a1a615c8bc1 Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <>
+Date: Mon, 2 Sep 2019 10:46:10 +0530
+Subject: [PATCH 293/297] glusterd: IPV6 hostname address is not parsed
+ correctly
+Problem: IPV6 hostname address is not parsed correctly in function
+         glusterd_check_brick_order
+Solution: Update the code to parse hostname address
+> Change-Id: Ifb2f83f9c6e987b2292070e048e97eeb51b728ab
+> Fixes: bz#1747746
+> Credits: Amgad Saleh <>
+> Signed-off-by: Mohit Agrawal <>
+> (Cherry picked from commit 6563ffb04d7ba51a89726e7c5bbb85c7dbc685b5)
+> (Reviewed on upstream link
+Change-Id: Ifb2f83f9c6e987b2292070e048e97eeb51b728ab
+BUG: 1750241
+Signed-off-by: Mohit Agrawal <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <>
+ xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 16 +++++++++++-----
+ 1 file changed, 11 insertions(+), 5 deletions(-)
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
+index 1ea8ba6..076bc80 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
+@@ -95,6 +95,10 @@ glusterd_check_brick_order(dict_t *dict, char *err_str)
+     int32_t type = GF_CLUSTER_TYPE_NONE;
+     int32_t sub_count = 0;
+     struct addrinfo *ai_info = NULL;
++    char brick_addr[128] = {
++        0,
++    };
++    int addrlen = 0;
+     const char failed_string[2048] =
+         "Failed to perform brick order "
+@@ -182,15 +186,17 @@ glusterd_check_brick_order(dict_t *dict, char *err_str)
+         brick_list_dup = tmpptr;
+         if (brick == NULL)
+             goto check_failed;
+-        brick = strtok_r(brick, ":", &tmpptr);
+-        if (brick == NULL)
++        tmpptr = strrchr(brick, ':');
++        if (tmpptr == NULL)
+             goto check_failed;
+-        ret = getaddrinfo(brick, NULL, NULL, &ai_info);
++        addrlen = strlen(brick) - strlen(tmpptr);
++        strncpy(brick_addr, brick, addrlen);
++        brick_addr[addrlen] = '\0';
++        ret = getaddrinfo(brick_addr, NULL, NULL, &ai_info);
+         if (ret != 0) {
+             ret = 0;
+             gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_HOSTNAME_RESOLVE_FAIL,
+-                   "unable to resolve "
+-                   "host name");
++                   "unable to resolve host name for addr %s", brick_addr);
+             goto out;
+         }
+         ai_list_tmp1 = MALLOC(sizeof(addrinfo_list_t));
diff --git a/SOURCES/0294-eventsapi-Set-IPv4-IPv6-family-based-on-input-IP.patch b/SOURCES/0294-eventsapi-Set-IPv4-IPv6-family-based-on-input-IP.patch
new file mode 100644
index 0000000..1665185
--- /dev/null
+++ b/SOURCES/0294-eventsapi-Set-IPv4-IPv6-family-based-on-input-IP.patch
@@ -0,0 +1,59 @@
+From 2fa5476b95d4547bdde50f2281bf58b7db24e37a Mon Sep 17 00:00:00 2001
+From: Aravinda VK <>
+Date: Mon, 16 Sep 2019 10:04:26 +0530
+Subject: [PATCH 294/297] eventsapi: Set IPv4/IPv6 family based on input IP
+server.sin_family was set to AF_INET while creating socket connection,
+this was failing if the input address is IPv6(`::1`).
+With this patch, sin_family is set by reading the ai_family of
+`getaddrinfo` result.
+> upstream patch :
+>Fixes: bz#1752330
+>Change-Id: I499f957b432842fa989c698f6e5b25b7016084eb
+>Signed-off-by: Aravinda VK <>
+BUG: 1732443
+Change-Id: I499f957b432842fa989c698f6e5b25b7016084eb
+Signed-off-by: Aravinda VK <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <>
+ libglusterfs/src/events.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+diff --git a/libglusterfs/src/events.c b/libglusterfs/src/events.c
+index 2509767..9d33783 100644
+--- a/libglusterfs/src/events.c
++++ b/libglusterfs/src/events.c
+@@ -42,6 +42,7 @@ _gf_event(eventtypes_t event, const char *fmt, ...)
+     struct addrinfo hints;
+     struct addrinfo *result = NULL;
+     xlator_t *this = THIS;
++    int sin_family = AF_INET;
+     /* Global context */
+     ctx = THIS->ctx;
+@@ -75,13 +76,15 @@ _gf_event(eventtypes_t event, const char *fmt, ...)
+             ret = EVENT_ERROR_RESOLVE;
+             goto out;
+         }
++        sin_family = result->ai_family;
+     } else {
+         /* Localhost, Use the defined IP for localhost */
+         host = gf_strdup(EVENT_HOST);
+     }
+     /* Socket Configurations */
+-    server.sin_family = AF_INET;
++    server.sin_family = sin_family;
+     server.sin_port = htons(EVENT_PORT);
+     ret = inet_pton(server.sin_family, host, &server.sin_addr);
+     if (ret <= 0) {
diff --git a/SOURCES/0295-ctime-rebalance-Heal-ctime-xattr-on-directory-during.patch b/SOURCES/0295-ctime-rebalance-Heal-ctime-xattr-on-directory-during.patch
new file mode 100644
index 0000000..9d3820d
--- /dev/null
+++ b/SOURCES/0295-ctime-rebalance-Heal-ctime-xattr-on-directory-during.patch
@@ -0,0 +1,1164 @@
+From d5ce2300f77c25b38a076d4dd6a5521e82c56172 Mon Sep 17 00:00:00 2001
+From: Kotresh HR <>
+Date: Mon, 29 Jul 2019 18:30:42 +0530
+Subject: [PATCH 295/297] ctime/rebalance: Heal ctime xattr on directory during
+ rebalance
+After add-brick and rebalance, the ctime xattr is not present
+on rebalanced directories on new brick. This patch fixes the
+Note that ctime still doesn't support consistent time across
+distribute sub-volume.
+This patch also fixes the in-memory inconsistency of time attributes
+when metadata is self healed.
+Backport of:
+ > Patch:
+ > Change-Id: Ia20506f1839021bf61d4753191e7dc34b31bb2df
+ > fixes: bz#1734026
+ > Signed-off-by: Kotresh HR <>
+Change-Id: Ia20506f1839021bf61d4753191e7dc34b31bb2df
+BUG: 1728673
+Signed-off-by: Kotresh HR <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Atin Mukherjee <>
+ tests/basic/afr/split-brain-healing-ctime.t        | 253 +++++++++++++++++++++
+ tests/basic/afr/split-brain-healing.t              |   1 +
+ tests/basic/ctime/ctime-ec-heal.t                  |  71 ++++++
+ tests/basic/ctime/ctime-ec-rebalance.t             |  44 ++++
+ tests/basic/ctime/ctime-rep-heal.t                 |  71 ++++++
+ tests/basic/ctime/ctime-rep-rebalance.t            |  42 ++++
+ .../bug-1734370-entry-heal-restore-time.t          |  84 +++++++
+ tests/volume.rc                                    |  15 +-
+ xlators/cluster/afr/src/afr-self-heal-common.c     |   3 +-
+ xlators/cluster/afr/src/afr-self-heal-entry.c      |   2 +
+ xlators/cluster/dht/src/dht-common.c               |   1 +
+ xlators/cluster/ec/src/ec-heal.c                   |   7 +-
+ xlators/storage/posix/src/posix-entry-ops.c        |   8 +-
+ xlators/storage/posix/src/posix-helpers.c          |  31 ++-
+ xlators/storage/posix/src/posix-inode-fd-ops.c     |  57 ++---
+ xlators/storage/posix/src/posix-metadata.c         |  65 +++++-
+ xlators/storage/posix/src/posix-metadata.h         |   7 +
+ xlators/storage/posix/src/posix.h                  |   5 +-
+ 18 files changed, 714 insertions(+), 53 deletions(-)
+ create mode 100644 tests/basic/afr/split-brain-healing-ctime.t
+ create mode 100644 tests/basic/ctime/ctime-ec-heal.t
+ create mode 100644 tests/basic/ctime/ctime-ec-rebalance.t
+ create mode 100644 tests/basic/ctime/ctime-rep-heal.t
+ create mode 100644 tests/basic/ctime/ctime-rep-rebalance.t
+ create mode 100644 tests/bugs/replicate/bug-1734370-entry-heal-restore-time.t
+diff --git a/tests/basic/afr/split-brain-healing-ctime.t b/tests/basic/afr/split-brain-healing-ctime.t
+new file mode 100644
+index 0000000..1ca18e3
+--- /dev/null
++++ b/tests/basic/afr/split-brain-healing-ctime.t
+@@ -0,0 +1,253 @@
++#Test the split-brain resolution CLI commands.
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++function get_replicate_subvol_number {
++        local filename=$1
++        #get_backend_paths
++        if [ -f $B0/${V0}1/$filename ]
++        then
++                echo 0
++        elif [ -f $B0/${V0}3/$filename ]
++        then    echo 1
++        else
++                echo -1
++        fi
++AREQUAL_PATH=$(dirname $0)/../../utils
++GET_MDATA_PATH=$(dirname $0)/../../utils
++test "`uname -s`" != "Linux" && {
++    CFLAGS="$CFLAGS -lintl";
++build_tester $AREQUAL_PATH/arequal-checksum.c $CFLAGS
++build_tester $GET_MDATA_PATH/get-mdata-xattr.c
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4}
++TEST $CLI volume set $V0 cluster.self-heal-daemon off
++TEST $CLI volume set $V0 off
++TEST $CLI volume set $V0 cluster.metadata-self-heal off
++TEST $CLI volume set $V0 cluster.entry-self-heal off
++TEST $CLI volume set $V0 ctime on
++TEST $CLI volume start $V0
++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
++cd $M0
++for i in {1..10}
++        echo "Initial content">>file$i
++replica_0_files_list=(`ls $B0/${V0}1|grep -v '^\.'`)
++replica_1_files_list=(`ls $B0/${V0}3|grep -v '^\.'`)
++############ Create data split-brain in the files. ###########################
++TEST kill_brick $V0 $H0 $B0/${V0}1
++for file in ${!replica_0_files_list[*]}
++        echo "B1 is down">>${replica_0_files_list[$file]}
++TEST kill_brick $V0 $H0 $B0/${V0}3
++for file in ${!replica_1_files_list[*]}
++        echo "B3 is down">>${replica_1_files_list[$file]}
++SMALLER_FILE_SIZE=$(stat -c %s file1)
++TEST $CLI volume start $V0 force
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
++TEST kill_brick $V0 $H0 $B0/${V0}2
++for file in ${!replica_0_files_list[*]}
++        echo "B2 is down">>${replica_0_files_list[$file]}
++        echo "appending more content to make it the bigger file">>${replica_0_files_list[$file]}
++TEST kill_brick $V0 $H0 $B0/${V0}4
++for file in ${!replica_1_files_list[*]}
++        echo "B4 is down">>${replica_1_files_list[$file]}
++        echo "appending more content to make it the bigger file">>${replica_1_files_list[$file]}
++BIGGER_FILE_SIZE=$(stat -c %s file1)
++TEST $CLI volume start $V0 force
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 3
++############### Acessing the files should now give EIO. ###############################
++TEST ! cat file1
++TEST ! cat file2
++TEST ! cat file3
++TEST ! cat file4
++TEST ! cat file5
++TEST ! cat file6
++TEST ! cat file7
++TEST ! cat file8
++TEST ! cat file9
++TEST ! cat file10
++TEST $CLI volume set $V0 cluster.self-heal-daemon on
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 3
++################ Heal file1 using the bigger-file option  ##############
++$CLI volume heal $V0 split-brain bigger-file /file1
++EXPECT "0" echo $?
++EXPECT $BIGGER_FILE_SIZE stat -c %s file1
++################ Heal file2 using the bigger-file option and its gfid ##############
++subvolume=$(get_replicate_subvol_number file2)
++if [ $subvolume == 0 ]
++        GFID=$(gf_get_gfid_xattr $B0/${V0}1/file2)
++elif [ $subvolume == 1 ]
++        GFID=$(gf_get_gfid_xattr $B0/${V0}3/file2)
++GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)"
++$CLI volume heal $V0 split-brain bigger-file $GFIDSTR
++EXPECT "0" echo $?
++################ Heal file3 using the source-brick option  ##############
++################ Use the brick having smaller file size as source #######
++subvolume=$(get_replicate_subvol_number file3)
++if [ $subvolume == 0 ]
++        $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}2 /file3
++elif [ $subvolume == 1 ]
++        $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}4 /file3
++EXPECT "0" echo $?
++EXPECT $SMALLER_FILE_SIZE stat -c %s file3
++################ Heal file4 using the source-brick option and it's gfid ##############
++################ Use the brick having smaller file size as source #######
++subvolume=$(get_replicate_subvol_number file4)
++if [ $subvolume == 0 ]
++        GFID=$(gf_get_gfid_xattr $B0/${V0}1/file4)
++        GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)"
++        $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}2 $GFIDSTR
++elif [ $subvolume == 1 ]
++        GFID=$(gf_get_gfid_xattr $B0/${V0}3/file4)
++        GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)"
++        $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}4 $GFIDSTR
++EXPECT "0" echo $?
++EXPECT $SMALLER_FILE_SIZE stat -c %s file4
++# With ctime enabled, the ctime xattr ("trusted.glusterfs.mdata") gets healed
++# as part of metadata heal. So mtime would be same, hence it can't be healed
++# using 'latest-mtime' policy, use 'source-brick' option instead.
++################ Heal file5 using the source-brick option  ##############
++subvolume=$(get_replicate_subvol_number file5)
++if [ $subvolume == 0 ]
++        $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}1 /file5
++elif [ $subvolume == 1 ]
++        $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}3 /file5
++EXPECT "0" echo $?
++if [ $subvolume == 0 ]
++        mtime1_after_heal=$(get_mtime $B0/${V0}1/file5)
++        mtime2_after_heal=$(get_mtime $B0/${V0}2/file5)
++elif [ $subvolume == 1 ]
++        mtime1_after_heal=$(get_mtime $B0/${V0}3/file5)
++        mtime2_after_heal=$(get_mtime $B0/${V0}4/file5)
++#TODO: To below comparisons on full sub-second resolution
++TEST [ $mtime1_after_heal -eq $mtime2_after_heal ]
++mtime_mount_after_heal=$(stat -c %Y file5)
++TEST [ $mtime1_after_heal -eq $mtime_mount_after_heal ]
++################ Heal file6 using the source-brick option and its gfid  ##############
++subvolume=$(get_replicate_subvol_number file6)
++if [ $subvolume == 0 ]
++        GFID=$(gf_get_gfid_xattr $B0/${V0}1/file6)
++        GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)"
++        $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}1 $GFIDSTR
++elif [ $subvolume == 1 ]
++        GFID=$(gf_get_gfid_xattr $B0/${V0}3/file6)
++        GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)"
++        $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}3 $GFIDSTR
++EXPECT "0" echo $?
++if [ $subvolume == 0 ]
++        mtime1_after_heal=$(get_mtime $B0/${V0}1/file6)
++        mtime2_after_heal=$(get_mtime $B0/${V0}2/file6)
++elif [ $subvolume == 1 ]
++        mtime1_after_heal=$(get_mtime $B0/${V0}3/file6)
++        mtime2_after_heal=$(get_mtime $B0/${V0}4/file6)
++#TODO: To below comparisons on full sub-second resolution
++TEST [ $mtime1_after_heal -eq $mtime2_after_heal ]
++mtime_mount_after_heal=$(stat -c %Y file6)
++TEST [ $mtime1_after_heal -eq $mtime_mount_after_heal ]
++################ Heal remaining SB'ed files of replica_0 using B1 as source ##############
++$CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}1
++EXPECT "0" echo $?
++################ Heal remaining SB'ed files of replica_1 using B3 as source ##############
++$CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}3
++EXPECT "0" echo $?
++############### Reading the files should now succeed. ###############################
++TEST  cat file1
++TEST  cat file2
++TEST  cat file3
++TEST  cat file4
++TEST  cat file5
++TEST  cat file6
++TEST  cat file7
++TEST  cat file8
++TEST  cat file9
++TEST  cat file10
++################ File contents on the bricks must be same. ################################
++TEST diff <(arequal-checksum -p $B0/$V01 -i .glusterfs) <(arequal-checksum -p $B0/$V02 -i .glusterfs)
++TEST diff <(arequal-checksum -p $B0/$V03 -i .glusterfs) <(arequal-checksum -p $B0/$V04 -i .glusterfs)
++############### Trying to heal files not in SB should fail. ###############################
++$CLI volume heal $V0 split-brain bigger-file /file1
++EXPECT "1" echo $?
++$CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}4 /file3
++EXPECT "1" echo $?
++cd -
++TEST rm $AREQUAL_PATH/arequal-checksum
++TEST rm $GET_MDATA_PATH/get-mdata-xattr
+diff --git a/tests/basic/afr/split-brain-healing.t b/tests/basic/afr/split-brain-healing.t
+index 78553e6..315e815 100644
+--- a/tests/basic/afr/split-brain-healing.t
++++ b/tests/basic/afr/split-brain-healing.t
+@@ -35,6 +35,7 @@ TEST $CLI volume set $V0 cluster.self-heal-daemon off
+ TEST $CLI volume set $V0 off
+ TEST $CLI volume set $V0 cluster.metadata-self-heal off
+ TEST $CLI volume set $V0 cluster.entry-self-heal off
++TEST $CLI volume set $V0 ctime off
+ TEST $CLI volume start $V0
+ TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+diff --git a/tests/basic/ctime/ctime-ec-heal.t b/tests/basic/ctime/ctime-ec-heal.t
+new file mode 100644
+index 0000000..1cb4516
+--- /dev/null
++++ b/tests/basic/ctime/ctime-ec-heal.t
+@@ -0,0 +1,71 @@
++# This will test self healing of ctime xattr 'trusted.glusterfs.mdata'
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../afr.rc
++#cleate and start volume
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{1..3}
++TEST $CLI volume set $V0 ctime on
++TEST $CLI volume start $V0
++#Mount the volume
++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
++# Create files
++mkdir $M0/dir1
++echo "Initial content" > $M0/file1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/file1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/file1
++# Kill brick
++TEST kill_brick $V0 $H0 $B0/${V0}3
++echo "B3 is down" >> $M0/file1
++echo "Change dir1 time attributes" > $M0/dir1/dir1_file1
++echo "Entry heal file" > $M0/entry_heal_file1
++mkdir $M0/entry_heal_dir1
++# Check xattr
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_uniq_count $B0/${V0}{1..3}/dir1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/file1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_uniq_count $B0/${V0}{1..3}/file1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_count $B0/${V0}{1..3}/dir1/dir1_file1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1/dir1_file1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_count $B0/${V0}{1..3}/entry_heal_file1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_file1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_count $B0/${V0}{1..3}/entry_heal_dir1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_dir1
++TEST $CLI volume start $V0 force
++$CLI volume heal $V0
++# Check xattr
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/file1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/file1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1/dir1_file1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1/dir1_file1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/entry_heal_file1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_file1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/entry_heal_dir1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_dir1
+diff --git a/tests/basic/ctime/ctime-ec-rebalance.t b/tests/basic/ctime/ctime-ec-rebalance.t
+new file mode 100644
+index 0000000..caccdc1
+--- /dev/null
++++ b/tests/basic/ctime/ctime-ec-rebalance.t
+@@ -0,0 +1,44 @@
++# This will test healing of ctime xattr 'trusted.glusterfs.mdata' after add-brick and rebalance
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../fallocate.rc
++#cleate and start volume
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0..5}
++TEST $CLI volume set $V0 ctime on
++TEST $CLI volume start $V0
++#Mount the volume
++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
++# Create files
++mkdir $M0/dir1
++echo "test data" > $M0/dir1/file1
++# Add brick
++TEST $CLI volume add-brick $V0 $H0:$B0/${V0}{6..8}
++#Trigger rebalance
++TEST $CLI volume rebalance $V0 start force
++EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $V0
++#Verify ctime xattr heal on directory
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}6/dir1"
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}7/dir1"
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}8/dir1"
++b6_mdata=$(get_mdata "$B0/${V0}6/dir1")
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "${b6_mdata}" get_mdata $B0/${V0}7/dir1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "${b6_mdata}" get_mdata $B0/${V0}8/dir1
+diff --git a/tests/basic/ctime/ctime-rep-heal.t b/tests/basic/ctime/ctime-rep-heal.t
+new file mode 100644
+index 0000000..ba8b08a
+--- /dev/null
++++ b/tests/basic/ctime/ctime-rep-heal.t
+@@ -0,0 +1,71 @@
++# This will test self healing of ctime xattr 'trusted.glusterfs.mdata'
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../afr.rc
++#cleate and start volume
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1..3}
++TEST $CLI volume set $V0 ctime on
++TEST $CLI volume start $V0
++#Mount the volume
++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
++# Create files
++mkdir $M0/dir1
++echo "Initial content" > $M0/file1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/file1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/file1
++# Kill brick
++TEST kill_brick $V0 $H0 $B0/${V0}3
++echo "B3 is down" >> $M0/file1
++echo "Change dir1 time attributes" > $M0/dir1/dir1_file1
++echo "Entry heal file" > $M0/entry_heal_file1
++mkdir $M0/entry_heal_dir1
++# Check xattr
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_uniq_count $B0/${V0}{1..3}/dir1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/file1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_uniq_count $B0/${V0}{1..3}/file1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_count $B0/${V0}{1..3}/dir1/dir1_file1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1/dir1_file1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_count $B0/${V0}{1..3}/entry_heal_file1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_file1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_count $B0/${V0}{1..3}/entry_heal_dir1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_dir1
++TEST $CLI volume start $V0 force
++$CLI volume heal $V0
++# Check xattr
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/file1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/file1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1/dir1_file1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1/dir1_file1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/entry_heal_file1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_file1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/entry_heal_dir1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_dir1
+diff --git a/tests/basic/ctime/ctime-rep-rebalance.t b/tests/basic/ctime/ctime-rep-rebalance.t
+new file mode 100644
+index 0000000..dd9743e
+--- /dev/null
++++ b/tests/basic/ctime/ctime-rep-rebalance.t
+@@ -0,0 +1,42 @@
++# This will test healing of ctime xattr 'trusted.glusterfs.mdata' after add-brick and rebalance
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../afr.rc
++#cleate and start volume
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0..5}
++TEST $CLI volume set $V0 ctime on
++TEST $CLI volume start $V0
++#Mount the volume
++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
++# Create files
++mkdir $M0/dir1
++# Add brick
++TEST $CLI volume add-brick $V0 $H0:$B0/${V0}{6..8}
++#Trigger rebalance
++TEST $CLI volume rebalance $V0 start force
++EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $V0
++#Verify ctime xattr heal on directory
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}6/dir1"
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}7/dir1"
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}8/dir1"
++b6_mdata=$(get_mdata "$B0/${V0}6/dir1")
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "${b6_mdata}" get_mdata $B0/${V0}7/dir1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "${b6_mdata}" get_mdata $B0/${V0}8/dir1
+diff --git a/tests/bugs/replicate/bug-1734370-entry-heal-restore-time.t b/tests/bugs/replicate/bug-1734370-entry-heal-restore-time.t
+new file mode 100644
+index 0000000..298d6ed
+--- /dev/null
++++ b/tests/bugs/replicate/bug-1734370-entry-heal-restore-time.t
+@@ -0,0 +1,84 @@
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../afr.rc
++function time_stamps_match {
++        path=$1
++        mtime_source_b0=$(get_mtime $B0/${V0}0/$path)
++        atime_source_b0=$(get_atime $B0/${V0}0/$path)
++        mtime_source_b2=$(get_mtime $B0/${V0}2/$path)
++        atime_source_b2=$(get_atime $B0/${V0}2/$path)
++        mtime_sink_b1=$(get_mtime $B0/${V0}1/$path)
++        atime_sink_b1=$(get_atime $B0/${V0}1/$path)
++        #The same brick must be the source of heal for both atime and mtime.
++        if [[ ( $mtime_source_b0 -eq $mtime_sink_b1 && $atime_source_b0 -eq $atime_sink_b1 ) || \
++              ( $mtime_source_b2 -eq $mtime_sink_b1 && $atime_source_b2 -eq $atime_sink_b1 ) ]]
++        then
++            echo "Y"
++        else
++            echo "N"
++        fi
++# Test that the parent dir's timestamps are restored during entry-heal.
++GET_MDATA_PATH=$(dirname $0)/../../utils
++build_tester $GET_MDATA_PATH/get-mdata-xattr.c
++TEST glusterd;
++TEST pidof glusterd;
++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2};
++TEST $CLI volume set $V0 ctime on
++TEST $CLI volume start $V0;
++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
++TEST mkdir $M0/DIR
++TEST kill_brick $V0 $H0 $B0/${V0}1
++TEST touch $M0/DIR/FILE
++TEST $CLI volume start $V0 force
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
++TEST $CLI volume heal $V0
++EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
++EXPECT "Y" time_stamps_match DIR
++ctime_source1=$(get_ctime $B0/${V0}0/$path)
++ctime_source2=$(get_ctime $B0/${V0}2/$path)
++ctime_sink=$(get_ctime $B0/${V0}1/$path)
++TEST [ $ctime_source1 -eq $ctime_sink ]
++TEST [ $ctime_source2 -eq $ctime_sink ]
++# Repeat the test with ctime feature disabled.
++TEST $CLI volume set $V0 features.ctime off
++TEST mkdir $M0/DIR2
++TEST kill_brick $V0 $H0 $B0/${V0}1
++TEST touch $M0/DIR2/FILE
++TEST $CLI volume start $V0 force
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
++TEST $CLI volume heal $V0
++EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
++EXPECT "Y" time_stamps_match DIR2
++TEST rm $GET_MDATA_PATH/get-mdata-xattr
+diff --git a/tests/volume.rc b/tests/volume.rc
+index 76a8fd4..9a002d9 100644
+--- a/tests/volume.rc
++++ b/tests/volume.rc
+@@ -371,6 +371,19 @@ function get_gfid2path {
+         getfattr -h --only-values -n glusterfs.gfidtopath $path 2>/dev/null
+ }
++function get_mdata {
++        local path=$1
++        getfattr -h -e hex -n trusted.glusterfs.mdata $path 2>/dev/null | grep "trusted.glusterfs.mdata" | cut -f2 -d'='
++function get_mdata_count {
++    getfattr -d -m . -e hex $@ 2>/dev/null | grep mdata | wc -l
++function get_mdata_uniq_count {
++    getfattr -d -m . -e hex $@ 2>/dev/null | grep mdata | uniq | wc -l
+ function get_xattr_key {
+         local key=$1
+         local path=$2
+@@ -925,7 +938,7 @@ function get_ctime {
+     local time=$(get-mdata-xattr -c $1)
+     if [ $time == "-1" ];
+     then
+-        echo $(stat -c %Z $2)
++        echo $(stat -c %Z $1)
+     else
+         echo $time
+     fi
+diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
+index b38085a..81ef38a 100644
+--- a/xlators/cluster/afr/src/afr-self-heal-common.c
++++ b/xlators/cluster/afr/src/afr-self-heal-common.c
+@@ -513,7 +513,8 @@ afr_selfheal_restore_time(call_frame_t *frame, xlator_t *this, inode_t *inode,
+     AFR_ONLIST(healed_sinks, frame, afr_sh_generic_fop_cbk, setattr, &loc,
+                &replies[source].poststat,
+-               (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME), NULL);
++               NULL);
+     loc_wipe(&loc);
+diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
+index e07b521..35b600f 100644
+--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
++++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
+@@ -1032,6 +1032,8 @@ unlock:
+             goto postop_unlock;
+         }
++        afr_selfheal_restore_time(frame, this, fd->inode, source, healed_sinks,
++                                  locked_replies);
+         ret = afr_selfheal_undo_pending(
+             frame, this, fd->inode, sources, sinks, healed_sinks, undid_pending,
+             AFR_ENTRY_TRANSACTION, locked_replies, postop_lock);
+diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
+index 219b072..99cccd6 100644
+--- a/xlators/cluster/dht/src/dht-common.c
++++ b/xlators/cluster/dht/src/dht-common.c
+@@ -115,6 +115,7 @@ char *xattrs_to_heal[] = {"user.",
+                           QUOTA_LIMIT_KEY,
+                           QUOTA_LIMIT_OBJECTS_KEY,
+                           GF_SELINUX_XATTR_KEY,
++                          GF_XATTR_MDATA_KEY,
+                           NULL};
+ char *dht_dbg_vxattrs[] = {DHT_DBG_HASHED_SUBVOL_PATTERN, NULL};
+diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
+index 0f0f398..06a7016 100644
+--- a/xlators/cluster/ec/src/ec-heal.c
++++ b/xlators/cluster/ec/src/ec-heal.c
+@@ -2301,9 +2301,10 @@ ec_restore_time_and_adjust_versions(call_frame_t *frame, ec_t *ec, fd_t *fd,
+         loc.inode = inode_ref(fd->inode);
+         gf_uuid_copy(loc.gfid, fd->inode->gfid);
+-        ret = cluster_setattr(ec->xl_list, healed_sinks, ec->nodes, replies,
+-                              output, frame, ec->xl, &loc, &source_buf,
+-                              GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME, NULL);
++        ret = cluster_setattr(
++            ec->xl_list, healed_sinks, ec->nodes, replies, output, frame,
++            ec->xl, &loc, &source_buf,
+         EC_INTERSECT(healed_sinks, healed_sinks, output, ec->nodes);
+         if (EC_COUNT(healed_sinks, ec->nodes) == 0) {
+             ret = -ENOTCONN;
+diff --git a/xlators/storage/posix/src/posix-entry-ops.c b/xlators/storage/posix/src/posix-entry-ops.c
+index 34ee2b8..283b305 100644
+--- a/xlators/storage/posix/src/posix-entry-ops.c
++++ b/xlators/storage/posix/src/posix-entry-ops.c
+@@ -500,7 +500,7 @@ post_op:
+         posix_set_gfid2path_xattr(this, real_path, loc->pargfid, loc->name);
+     }
+-    op_ret = posix_entry_create_xattr_set(this, real_path, xdata);
++    op_ret = posix_entry_create_xattr_set(this, loc, real_path, xdata);
+     if (op_ret) {
+         if (errno != EEXIST)
+             gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
+@@ -828,7 +828,7 @@ posix_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+                "setting ACLs on %s failed ", real_path);
+     }
+-    op_ret = posix_entry_create_xattr_set(this, real_path, xdata);
++    op_ret = posix_entry_create_xattr_set(this, loc, real_path, xdata);
+     if (op_ret) {
+         gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
+                "setting xattrs on %s failed", real_path);
+@@ -1529,7 +1529,7 @@ posix_symlink(call_frame_t *frame, xlator_t *this, const char *linkname,
+     }
+ ignore:
+-    op_ret = posix_entry_create_xattr_set(this, real_path, xdata);
++    op_ret = posix_entry_create_xattr_set(this, loc, real_path, xdata);
+     if (op_ret) {
+         gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
+                "setting xattrs on %s failed ", real_path);
+@@ -2167,7 +2167,7 @@ posix_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+         posix_set_gfid2path_xattr(this, real_path, loc->pargfid, loc->name);
+     }
+ ignore:
+-    op_ret = posix_entry_create_xattr_set(this, real_path, xdata);
++    op_ret = posix_entry_create_xattr_set(this, loc, real_path, xdata);
+     if (op_ret) {
+         gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
+                "setting xattrs on %s failed ", real_path);
+diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
+index d143d4c..6a1a35c 100644
+--- a/xlators/storage/posix/src/posix-helpers.c
++++ b/xlators/storage/posix/src/posix-helpers.c
+@@ -1188,11 +1188,15 @@ posix_dump_buffer(xlator_t *this, const char *real_path, const char *key,
+ #endif
+ int
+-posix_handle_pair(xlator_t *this, const char *real_path, char *key,
++posix_handle_pair(xlator_t *this, loc_t *loc, const char *real_path, char *key,
+                   data_t *value, int flags, struct iatt *stbuf)
+ {
+     int sys_ret = -1;
+     int ret = 0;
++    int op_errno = 0;
++    struct mdata_iatt mdata_iatt = {
++        0,
++    };
+     const int error_code = EINVAL;
+ #else
+@@ -1216,6 +1220,23 @@ posix_handle_pair(xlator_t *this, const char *real_path, char *key,
+         /* ignore this key value pair */
+         ret = 0;
+         goto out;
++    } else if (!strncmp(key, GF_XATTR_MDATA_KEY, strlen(key))) {
++        /* This is either by rebalance or self heal. Create the xattr if it's
++         * not present. Compare and update the larger value if the xattr is
++         * already present.
++         */
++        if (loc == NULL) {
++            ret = -EINVAL;
++            goto out;
++        }
++        posix_mdata_iatt_from_disk(&mdata_iatt,
++                                   (posix_mdata_disk_t *)value->data);
++        ret = posix_set_mdata_xattr_legacy_files(this, loc->inode, real_path,
++                                                 &mdata_iatt, &op_errno);
++        if (ret != 0) {
++            ret = -op_errno;
++        }
++        goto out;
+     } else {
+         sys_ret = sys_lsetxattr(real_path, key, value->data, value->len, flags);
+@@ -1810,8 +1831,8 @@ _handle_entry_create_keyvalue_pair(dict_t *d, char *k, data_t *v, void *tmp)
+         return 0;
+     }
+-    ret = posix_handle_pair(filler->this, filler->real_path, k, v, XATTR_CREATE,
+-                            filler->stbuf);
++    ret = posix_handle_pair(filler->this, filler->loc, filler->real_path, k, v,
++                            XATTR_CREATE, filler->stbuf);
+     if (ret < 0) {
+         errno = -ret;
+         return -1;
+@@ -1820,7 +1841,8 @@ _handle_entry_create_keyvalue_pair(dict_t *d, char *k, data_t *v, void *tmp)
+ }
+ int
+-posix_entry_create_xattr_set(xlator_t *this, const char *path, dict_t *dict)
++posix_entry_create_xattr_set(xlator_t *this, loc_t *loc, const char *path,
++                             dict_t *dict)
+ {
+     int ret = -1;
+@@ -1834,6 +1856,7 @@ posix_entry_create_xattr_set(xlator_t *this, const char *path, dict_t *dict)
+     filler.this = this;
+     filler.real_path = path;
+     filler.stbuf = NULL;
++    filler.loc = loc;
+     ret = dict_foreach(dict, _handle_entry_create_keyvalue_pair, &filler);
+diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c
+index e0ea85b..a2a518f 100644
+--- a/xlators/storage/posix/src/posix-inode-fd-ops.c
++++ b/xlators/storage/posix/src/posix-inode-fd-ops.c
+@@ -429,22 +429,9 @@ posix_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+                                     &frame->root->ctime, stbuf, valid);
+     }
+-    if (valid & GF_SET_ATTR_CTIME && !priv->ctime) {
+-        /*
+-         * If ctime is not enabled, we have no means to associate an
+-         * arbitrary ctime with the file, so as a fallback, we ignore
+-         * the ctime payload and update the file ctime to current time
+-         * (which is possible directly with the POSIX API).
+-         */
+-        op_ret = PATH_SET_TIMESPEC_OR_TIMEVAL(real_path, NULL);
+-        if (op_ret == -1) {
+-            op_errno = errno;
+-            gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_UTIMES_FAILED,
+-                   "setattr (utimes) on %s "
+-                   "failed",
+-                   real_path);
+-            goto out;
+-        }
++    if ((valid & GF_SET_ATTR_CTIME) && priv->ctime) {
++        posix_update_ctime_in_mdata(this, real_path, -1, loc->inode,
++                                    &frame->root->ctime, stbuf, valid);
+     }
+     if (!valid) {
+@@ -469,14 +456,6 @@ posix_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+         goto out;
+     }
+-    if (valid & GF_SET_ATTR_CTIME && priv->ctime) {
+-        /*
+-         * If we got ctime payload, we override
+-         * the ctime of statpost with that.
+-         */
+-        statpost.ia_ctime = stbuf->ia_ctime;
+-        statpost.ia_ctime_nsec = stbuf->ia_ctime_nsec;
+-    }
+     posix_set_ctime(frame, this, real_path, -1, loc->inode, &statpost);
+     if (xdata)
+@@ -592,6 +571,7 @@ posix_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+     struct iatt statpost = {
+         0,
+     };
++    struct posix_private *priv = NULL;
+     struct posix_fd *pfd = NULL;
+     dict_t *xattr_rsp = NULL;
+     int32_t ret = -1;
+@@ -604,6 +584,9 @@ posix_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+     VALIDATE_OR_GOTO(this, out);
+     VALIDATE_OR_GOTO(fd, out);
++    priv = this->private;
++    VALIDATE_OR_GOTO(priv, out);
+     ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
+     if (ret < 0) {
+         gf_msg_debug(this->name, 0, "pfd is NULL from fd=%p", fd);
+@@ -656,6 +639,11 @@ posix_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+                                     &frame->root->ctime, stbuf, valid);
+     }
++    if ((valid & GF_SET_ATTR_CTIME) && priv->ctime) {
++        posix_update_ctime_in_mdata(this, NULL, pfd->fd, fd->inode,
++                                    &frame->root->ctime, stbuf, valid);
++    }
+     if (!valid) {
+         op_ret = sys_fchown(pfd->fd, -1, -1);
+         if (op_ret == -1) {
+@@ -2578,7 +2566,7 @@ _handle_setxattr_keyvalue_pair(dict_t *d, char *k, data_t *v, void *tmp)
+     filler = tmp;
+-    return posix_handle_pair(filler->this, filler->real_path, k, v,
++    return posix_handle_pair(filler->this, filler->loc, filler->real_path, k, v,
+                              filler->flags, filler->stbuf);
+ }
+@@ -2641,27 +2629,27 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+     priv = this->private;
+     DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out);
++    MAKE_INODE_HANDLE(real_path, this, loc, NULL);
++    if (!real_path) {
++        op_ret = -1;
++        op_errno = ESTALE;
++        goto out;
++    }
+     ret = dict_get_mdata(dict, CTIME_MDATA_XDATA_KEY, &mdata_iatt);
+     if (ret == 0) {
+         /* This is initiated by lookup when ctime feature is enabled to create
+          * "trusted.glusterfs.mdata" xattr if not present. These are the files
+          * which were created when ctime feature is disabled.
+          */
+-        ret = posix_set_mdata_xattr_legacy_files(this, loc->inode, &mdata_iatt,
+-                                                 &op_errno);
++        ret = posix_set_mdata_xattr_legacy_files(this, loc->inode, real_path,
++                                                 &mdata_iatt, &op_errno);
+         if (ret != 0) {
+             op_ret = -1;
+         }
+         goto out;
+     }
+-    MAKE_INODE_HANDLE(real_path, this, loc, NULL);
+-    if (!real_path) {
+-        op_ret = -1;
+-        op_errno = ESTALE;
+-        goto out;
+-    }
+     posix_pstat(this, loc->inode, loc->gfid, real_path, &preop, _gf_false);
+     op_ret = -1;
+@@ -2796,6 +2784,7 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+     filler.real_path = real_path;
+     filler.this = this;
+     filler.stbuf = &preop;
++    filler.loc = loc;
+     filler.flags = map_xattr_flags(flags);
+diff --git a/xlators/storage/posix/src/posix-metadata.c b/xlators/storage/posix/src/posix-metadata.c
+index 532daa2..9efaf99 100644
+--- a/xlators/storage/posix/src/posix-metadata.c
++++ b/xlators/storage/posix/src/posix-metadata.c
+@@ -56,6 +56,19 @@ posix_mdata_from_disk(posix_mdata_t *out, posix_mdata_disk_t *in)
+     out->atime.tv_nsec = be64toh(in->atime.tv_nsec);
+ }
++posix_mdata_iatt_from_disk(struct mdata_iatt *out, posix_mdata_disk_t *in)
++    out->ia_ctime = be64toh(in->ctime.tv_sec);
++    out->ia_ctime_nsec = be64toh(in->ctime.tv_nsec);
++    out->ia_mtime = be64toh(in->mtime.tv_sec);
++    out->ia_mtime_nsec = be64toh(in->mtime.tv_nsec);
++    out->ia_atime = be64toh(in->atime.tv_sec);
++    out->ia_atime_nsec = be64toh(in->atime.tv_nsec);
+ /* posix_fetch_mdata_xattr fetches the posix_mdata_t from disk */
+ static int
+ posix_fetch_mdata_xattr(xlator_t *this, const char *real_path_arg, int _fd,
+@@ -341,6 +354,7 @@ posix_compare_timespec(struct timespec *first, struct timespec *second)
+ int
+ posix_set_mdata_xattr_legacy_files(xlator_t *this, inode_t *inode,
++                                   const char *realpath,
+                                    struct mdata_iatt *mdata_iatt, int *op_errno)
+ {
+     posix_mdata_t *mdata = NULL;
+@@ -369,8 +383,8 @@ posix_set_mdata_xattr_legacy_files(xlator_t *this, inode_t *inode,
+                 goto unlock;
+             }
+-            ret = posix_fetch_mdata_xattr(this, NULL, -1, inode, (void *)mdata,
+-                                          op_errno);
++            ret = posix_fetch_mdata_xattr(this, realpath, -1, inode,
++                                          (void *)mdata, op_errno);
+             if (ret == 0) {
+                 /* Got mdata from disk. This is a race, another client
+                  * has healed the xattr during lookup. So set it in inode
+@@ -412,7 +426,7 @@ posix_set_mdata_xattr_legacy_files(xlator_t *this, inode_t *inode,
+             }
+         }
+-        ret = posix_store_mdata_xattr(this, NULL, -1, inode, mdata);
++        ret = posix_store_mdata_xattr(this, realpath, -1, inode, mdata);
+         if (ret) {
+             gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_STOREMDATA_FAILED,
+                    "gfid: %s key:%s ", uuid_utoa(inode->gfid),
+@@ -445,7 +459,8 @@ posix_set_mdata_xattr(xlator_t *this, const char *real_path, int fd,
+     GF_VALIDATE_OR_GOTO(this->name, inode, out);
+     GF_VALIDATE_OR_GOTO(this->name, time, out);
+-    if (update_utime && (!u_atime || !u_mtime)) {
++    if (update_utime && (flag->ctime && !time) && (flag->atime && !u_atime) &&
++        (flag->mtime && !u_mtime)) {
+         goto out;
+     }
+@@ -652,6 +667,48 @@ posix_update_utime_in_mdata(xlator_t *this, const char *real_path, int fd,
+     return;
+ }
++/* posix_update_ctime_in_mdata updates the posix_mdata_t when ctime needs
++ * to be modified
++ */
++posix_update_ctime_in_mdata(xlator_t *this, const char *real_path, int fd,
++                            inode_t *inode, struct timespec *ctime,
++                            struct iatt *stbuf, int valid)
++    int32_t ret = 0;
++#if defined(HAVE_UTIMENSAT)
++    struct timespec tv_ctime = {
++        0,
++    };
++    struct timeval tv_ctime = {
++        0,
++    };
++    posix_mdata_flag_t flag = {
++        0,
++    };
++    struct posix_private *priv = NULL;
++    priv = this->private;
++    if (inode && priv->ctime) {
++        tv_ctime.tv_sec = stbuf->ia_ctime;
++        SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv_ctime, stbuf->ia_ctime_nsec);
++        flag.ctime = 1;
++        ret = posix_set_mdata_xattr(this, real_path, -1, inode, &tv_ctime, NULL,
++                                    NULL, NULL, &flag, _gf_true);
++        if (ret) {
++            gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED,
++                   "posix set mdata atime failed on file:"
++                   " %s gfid:%s",
++                   real_path, uuid_utoa(inode->gfid));
++        }
++    }
++    return;
+ static void
+ posix_get_mdata_flag(uint64_t flags, posix_mdata_flag_t *flag)
+ {
+diff --git a/xlators/storage/posix/src/posix-metadata.h b/xlators/storage/posix/src/posix-metadata.h
+index c176699..63e8771 100644
+--- a/xlators/storage/posix/src/posix-metadata.h
++++ b/xlators/storage/posix/src/posix-metadata.h
+@@ -43,6 +43,10 @@ posix_update_utime_in_mdata(xlator_t *this, const char *real_path, int fd,
+                             inode_t *inode, struct timespec *ctime,
+                             struct iatt *stbuf, int valid);
+ void
++posix_update_ctime_in_mdata(xlator_t *this, const char *real_path, int fd,
++                            inode_t *inode, struct timespec *ctime,
++                            struct iatt *stbuf, int valid);
+ posix_set_ctime(call_frame_t *frame, xlator_t *this, const char *real_path,
+                 int fd, inode_t *inode, struct iatt *stbuf);
+ void
+@@ -56,7 +60,10 @@ posix_set_ctime_cfr(call_frame_t *frame, xlator_t *this,
+                     int fd_out, inode_t *inode_out, struct iatt *stbuf_out);
+ int
+ posix_set_mdata_xattr_legacy_files(xlator_t *this, inode_t *inode,
++                                   const char *realpath,
+                                    struct mdata_iatt *mdata_iatt,
+                                    int *op_errno);
++posix_mdata_iatt_from_disk(struct mdata_iatt *out, posix_mdata_disk_t *in);
+ #endif /* _POSIX_METADATA_H */
+diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h
+index 64288a7..dd51062 100644
+--- a/xlators/storage/posix/src/posix.h
++++ b/xlators/storage/posix/src/posix.h
+@@ -339,7 +339,7 @@ dict_t *
+ posix_xattr_fill(xlator_t *this, const char *path, loc_t *loc, fd_t *fd,
+                  int fdnum, dict_t *xattr, struct iatt *buf);
+ int
+-posix_handle_pair(xlator_t *this, const char *real_path, char *key,
++posix_handle_pair(xlator_t *this, loc_t *loc, const char *real_path, char *key,
+                   data_t *value, int flags, struct iatt *stbuf);
+ int
+ posix_fhandle_pair(call_frame_t *frame, xlator_t *this, int fd, char *key,
+@@ -352,7 +352,8 @@ int
+ posix_gfid_heal(xlator_t *this, const char *path, loc_t *loc,
+                 dict_t *xattr_req);
+ int
+-posix_entry_create_xattr_set(xlator_t *this, const char *path, dict_t *dict);
++posix_entry_create_xattr_set(xlator_t *this, loc_t *loc, const char *path,
++                             dict_t *dict);
+ int
+ posix_fd_ctx_get(fd_t *fd, xlator_t *this, struct posix_fd **pfd,
diff --git a/SOURCES/0296-glusterfind-pre-command-failure-on-a-modify.patch b/SOURCES/0296-glusterfind-pre-command-failure-on-a-modify.patch
new file mode 100644
index 0000000..9f43ff8
--- /dev/null
+++ b/SOURCES/0296-glusterfind-pre-command-failure-on-a-modify.patch
@@ -0,0 +1,62 @@
+From bfb64a0e685eb5755ceda6c54690335564e135c9 Mon Sep 17 00:00:00 2001
+From: Hari Gowtham <>
+Date: Mon, 16 Sep 2019 14:22:34 +0530
+Subject: [PATCH 296/297] glusterfind: pre command failure on a modify
+On upstream we have gfid_to_all_paths_using_gfid2path instead of
+gfid_to_path_using_pgfid and so we do not hit this in upstream.
+Problem: On a modify, the pre commands runs through the find function.
+where the number of arguments sent mismatches and causes a stderr.
+The mismatch is because of both changelog and brickfind use the find(),
+but the brickfind was alone handled.
+Fix: Have handled the additional argument on the changelog side as well.
+Received it as a dummy variable for changelog.
+Change-Id: I5eecdd993e477b68a0e486db2ad7e56ba94bbf02
+fixes: bz#1733970
+Signed-off-by: Hari Gowtham <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Aravinda Vishwanathapura Krishna Murthy <>
+Reviewed-by: Rinku Kothiya <>
+ tools/glusterfind/src/ | 5 +++--
+ tools/glusterfind/src/     | 2 +-
+ 2 files changed, 4 insertions(+), 3 deletions(-)
+diff --git a/tools/glusterfind/src/ b/tools/glusterfind/src/
+index 40c381b..ef982db 100644
+--- a/tools/glusterfind/src/
++++ b/tools/glusterfind/src/
+@@ -141,8 +141,9 @@ def gfid_to_path_using_pgfid(brick, changelog_data, args):
+     # Length of brick path, to remove from output path
+     brick_path_len = len(brick)
+-    def output_callback(path, inode):
++    # is_dir is a dummy variable to make it compitable with the find
++    # used in brickfind
++    def output_callback(path, inode, is_dir):
+         # For each path found, encodes it and updates path1
+         # Also updates converted flag in inodegfid table as 1
+         path = path.strip()
+diff --git a/tools/glusterfind/src/ b/tools/glusterfind/src/
+index cc09903..e226c5a 100644
+--- a/tools/glusterfind/src/
++++ b/tools/glusterfind/src/
+@@ -70,7 +70,7 @@ def find(path, callback_func=lambda x: True, filter_func=lambda x: True,
+             else:
+                 filter_result = filter_func(full_path)
+                 if filter_result is not None:
+-                    callback_func(full_path, filter_result)
++                    callback_func(full_path, filter_result, None)
+         else:
+             filter_result = filter_func(full_path)
+             if filter_result is not None:
diff --git a/SOURCES/0297-rpmbuild-fixing-the-build-errors-with-2a905a8ae.patch b/SOURCES/0297-rpmbuild-fixing-the-build-errors-with-2a905a8ae.patch
new file mode 100644
index 0000000..47b5da0
--- /dev/null
+++ b/SOURCES/0297-rpmbuild-fixing-the-build-errors-with-2a905a8ae.patch
@@ -0,0 +1,89 @@
+From 37555b6c83d3a979033111a754ee1728dab254f5 Mon Sep 17 00:00:00 2001
+From: Hari Gowtham <>
+Date: Wed, 18 Sep 2019 17:38:52 +0530
+Subject: [PATCH 297/297] rpmbuild: fixing the build errors with 2a905a8ae
+Have added a Makefile inside extras/quota to remove the
+No rule to make target error for quota/
+Change-Id: Ia3f6b3fa21a0de7eb3bdb31b3d205139df412aca
+fixes: bz#1719171
+Signed-off-by: Hari Gowtham <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Aravinda Vishwanathapura Krishna Murthy <>
+Reviewed-by: Atin Mukherjee <>
+             | 1 +
+ extras/       | 6 +-----
+ extras/quota/ | 8 ++++++++
+ 3 files changed, 10 insertions(+), 5 deletions(-)
+ create mode 100644 extras/quota/
+diff --git a/ b/
+index f597b86..327733e 100644
+--- a/
++++ b/
+@@ -232,6 +232,7 @@ AC_CONFIG_FILES([Makefile
+                 extras/hook-scripts/reset/pre/Makefile
+                 extras/python/Makefile
+                 extras/snap_scheduler/Makefile
++                extras/quota/Makefile
+                 events/Makefile
+                 events/src/Makefile
+                 events/src/
+diff --git a/extras/ b/extras/
+index 8cbfda1..31ccdf5 100644
+--- a/extras/
++++ b/extras/
+@@ -12,7 +12,7 @@ EditorMode_DATA = glusterfs-mode.el glusterfs.vim
+ SUBDIRS = init.d systemd benchmarking hook-scripts $(OCF_SUBDIR) LinuxRPM \
+           $(GEOREP_EXTRAS_SUBDIR) snap_scheduler firewalld cliutils python \
+-		  ganesha
++		  ganesha quota
+ confdir = $(sysconfdir)/glusterfs
+@@ -30,14 +30,11 @@ endif
+ scriptsdir = $(datadir)/glusterfs/scripts
+ scripts_SCRIPTS = thin-arbiter/
+-scripts_SCRIPTS += quota/
+ scripts_SCRIPTS +=
+ scripts_SCRIPTS +=
+ scripts_SCRIPTS += \
+-scripts_SCRIPTS += quota/
+-scripts_SCRIPTS += quota/
+ scripts_SCRIPTS +=
+ scripts_SCRIPTS +=
+@@ -56,7 +53,6 @@ EXTRA_DIST = glusterfs-logrotate gluster-rsyslog-7.2.conf
+ \
+ group-distributed-virt \
+ 	thin-arbiter/thin-arbiter.vol thin-arbiter/ \
+-	quota/ quota/ quota/ \
+diff --git a/extras/quota/ b/extras/quota/
+new file mode 100644
+index 0000000..cdb6be1
+--- /dev/null
++++ b/extras/quota/
+@@ -0,0 +1,8 @@
++scriptsdir = $(datadir)/glusterfs/scripts
++scripts_SCRIPTS =
++scripts_SCRIPTS +=
diff --git a/SOURCES/0298-geo-rep-fix-sub-command-during-worker-connection.patch b/SOURCES/0298-geo-rep-fix-sub-command-during-worker-connection.patch
new file mode 100644
index 0000000..72daa15
--- /dev/null
+++ b/SOURCES/0298-geo-rep-fix-sub-command-during-worker-connection.patch
@@ -0,0 +1,56 @@
+From f65f4739914cf317da7e5eaa3b5a06fe64f338c2 Mon Sep 17 00:00:00 2001
+From: Sunny Kumar <>
+Date: Sat, 21 Sep 2019 01:07:30 +0530
+Subject: [PATCH 298/302] geo-rep : fix sub-command during worker connection
+Geo-rep session for non-root going faulty.
+During worker start we do not construct slave url and use 'args.resource_remote'
+which is basically just slave-hostname.
+This works better for root session but fails in non-root session during
+ssh command.
+Using slave url solves this issue.
+Backport of:
+    >fixes: bz#1753928
+    >Change-Id: Ib83552fde77f81c208896494b323514ab37ebf22
+    >Signed-off-by: Sunny Kumar <>
+Upstream patch:
+BUG: 1754407
+Change-Id: Ib83552fde77f81c208896494b323514ab37ebf22
+Signed-off-by: Sunny Kumar <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <>
+ geo-replication/syncdaemon/ | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+diff --git a/geo-replication/syncdaemon/ b/geo-replication/syncdaemon/
+index 8de7db2..f8515f2 100644
+--- a/geo-replication/syncdaemon/
++++ b/geo-replication/syncdaemon/
+@@ -73,8 +73,11 @@ def subcmd_worker(args):
+     Popen.init_errhandler()
+     fcntl.fcntl(args.feedback_fd, fcntl.F_SETFD, fcntl.FD_CLOEXEC)
+     local = GLUSTER("localhost", args.master)
+-    slavevol = args.slave.split("::")[-1]
+-    slavehost = args.resource_remote
++    slave_url, slavevol = args.slave.split("::")
++    if "@" not in slave_url:
++        slavehost = args.resource_remote
++    else:
++        slavehost = "%s@%s" % (slave_url.split("@")[0], args.resource_remote)
+     remote = SSH(slavehost, slavevol)
+     remote.connect_remote()
+     local.connect()
diff --git a/SOURCES/0299-geo-rep-performance-improvement-while-syncing-rename.patch b/SOURCES/0299-geo-rep-performance-improvement-while-syncing-rename.patch
new file mode 100644
index 0000000..9dea8cc
--- /dev/null
+++ b/SOURCES/0299-geo-rep-performance-improvement-while-syncing-rename.patch
@@ -0,0 +1,156 @@
+From f293f7ac2f75c58d81da1229b484eb530b7083b5 Mon Sep 17 00:00:00 2001
+From: Sunny Kumar <>
+Date: Fri, 20 Sep 2019 09:39:12 +0530
+Subject: [PATCH 299/302] geo-rep: performance improvement while syncing
+ renames with existing gfid
+The bug[1] addresses issue of data inconsistency when handling RENAME with
+existing destination. This fix requires some performance tuning considering
+this issue occurs in heavy rename workload.
+If distribution count for master volume is one do not verify op's on
+master and go ahead with rename.
+The performance improvement with this patch can only be observed if
+master volume has distribution count one.
+Backport of:
+    >fixes: bz#1753857
+    >Change-Id: I8e9bcd575e7e35f40f9f78b7961c92dee642f47b
+    >Signed-off-by: Sunny Kumar <>
+Upstream Patch:
+BUG: 1726000
+Change-Id: I8e9bcd575e7e35f40f9f78b7961c92dee642f47b
+Signed-off-by: Sunny Kumar <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Atin Mukherjee <>
+ geo-replication/           |  5 +++++
+ geo-replication/syncdaemon/     |  2 ++
+ geo-replication/syncdaemon/    |  2 ++
+ geo-replication/syncdaemon/   | 13 +++++++++++--
+ geo-replication/syncdaemon/ | 11 +++++++++++
+ 5 files changed, 31 insertions(+), 2 deletions(-)
+diff --git a/geo-replication/ b/geo-replication/
+index 5ebd57a..9155cd8 100644
+--- a/geo-replication/
++++ b/geo-replication/
+@@ -23,6 +23,11 @@ configurable=false
+ type=int
+ value=1
+ [glusterd-workdir]
+diff --git a/geo-replication/syncdaemon/ b/geo-replication/syncdaemon/
+index a4c6f32..6ae5269 100644
+--- a/geo-replication/syncdaemon/
++++ b/geo-replication/syncdaemon/
+@@ -134,6 +134,8 @@ def main():
+                    help="Directory where Gluster binaries exist on slave")
+     p.add_argument("--slave-access-mount", action="store_true",
+                    help="Do not lazy umount the slave volume")
++    p.add_argument("--master-dist-count", type=int,
++                   help="Master Distribution count")
+     # Status
+     p = sp.add_parser("status")
+diff --git a/geo-replication/syncdaemon/ b/geo-replication/syncdaemon/
+index 234f3f1..236afe7 100644
+--- a/geo-replication/syncdaemon/
++++ b/geo-replication/syncdaemon/
+@@ -37,6 +37,8 @@ def get_subvol_num(brick_idx, vol, hot):
+     tier = vol.is_tier()
+     disperse_count = vol.disperse_count(tier, hot)
+     replica_count = vol.replica_count(tier, hot)
++    distribute_count = vol.distribution_count(tier, hot)
++    gconf.setconfig("master-distribution-count", distribute_count)
+     if (tier and not hot):
+         brick_idx = brick_idx - vol.get_hot_bricks_count(tier)
+diff --git a/geo-replication/syncdaemon/ b/geo-replication/syncdaemon/
+index b16db60..189d8a1 100644
+--- a/geo-replication/syncdaemon/
++++ b/geo-replication/syncdaemon/
+@@ -377,6 +377,7 @@ class Server(object):
+     def entry_ops(cls, entries):
+         pfx = gauxpfx()
+         logging.debug('entries: %s' % repr(entries))
++        dist_count = rconf.args.master_dist_count
+         def entry_purge(op, entry, gfid, e, uid, gid):
+             # This is an extremely racy code and needs to be fixed ASAP.
+@@ -686,9 +687,15 @@ class Server(object):
+                                             raise
+                                 else:
+                                     raise
+-                        elif not matching_disk_gfid(gfid, en):
++                        elif not matching_disk_gfid(gfid, en) and dist_count > 1:
+                             collect_failure(e, EEXIST, uid, gid, True)
+                         else:
++                            # We are here which means matching_disk_gfid for
++                            # both source and destination has returned false
++                            # and distribution count for master vol is greater
++                            # then one. Which basically says both the source and
++                            # destination exist and not hardlinks.
++                            # So we are safe to go ahead with rename here.
+                             rename_with_disk_gfid_confirmation(gfid, entry, en,
+                                                                uid, gid)
+             if blob:
+@@ -1409,7 +1416,9 @@ class SSH(object):
+                 '--slave-gluster-log-level',
+                 gconf.get("slave-gluster-log-level"),
+                 '--slave-gluster-command-dir',
+-                gconf.get("slave-gluster-command-dir")]
++                gconf.get("slave-gluster-command-dir"),
++                '--master-dist-count',
++                str(gconf.get("master-distribution-count"))]
+         if gconf.get("slave-access-mount"):
+             args_to_slave.append('--slave-access-mount')
+diff --git a/geo-replication/syncdaemon/ b/geo-replication/syncdaemon/
+index 2ee10ac..aadaebd 100644
+--- a/geo-replication/syncdaemon/
++++ b/geo-replication/syncdaemon/
+@@ -926,6 +926,14 @@ class Volinfo(object):
+         else:
+             return int(self.get('disperseCount')[0].text)
++    def distribution_count(self, tier, hot):
++        if (tier and hot):
++            return int(self.get('hotBricks/hotdistCount')[0].text)
++        elif (tier and not hot):
++            return int(self.get('coldBricks/colddistCount')[0].text)
++        else:
++            return int(self.get('distCount')[0].text)
+     @property
+     @memoize
+     def hot_bricks(self):
+@@ -994,6 +1002,9 @@ class VolinfoFromGconf(object):
+     def disperse_count(self, tier, hot):
+         return gconf.get("master-disperse-count")
++    def distribution_count(self, tier, hot):
++        return gconf.get("master-distribution-count")
+     @property
+     @memoize
+     def hot_bricks(self):
diff --git a/SOURCES/0300-cli-remove-the-warning-displayed-when-remove-brick-s.patch b/SOURCES/0300-cli-remove-the-warning-displayed-when-remove-brick-s.patch
new file mode 100644
index 0000000..62bac41
--- /dev/null
+++ b/SOURCES/0300-cli-remove-the-warning-displayed-when-remove-brick-s.patch
@@ -0,0 +1,70 @@
+From 039a3f81209706261fc809eac94564e81a3377da Mon Sep 17 00:00:00 2001
+From: Sanju Rakonde <>
+Date: Wed, 25 Sep 2019 14:55:19 +0530
+Subject: [PATCH 300/302] cli: remove the warning displayed when remove brick
+ start issued
+remove-brick start command gives displays below error:
+It is recommended that remove-brick be run with cluster.force-migration
+option disabled to prevent possible data corruption. Doing so will ensure
+that files that receive writes during migration will not be migrated and
+will need to be manually copied after the remove-brick commit operation.
+Please check the value of the option and update accordingly.
+Do you want to continue with your current cluster.force-migration settings? (y/n)
+As we are not qualifying cluster.force-migration for 3.5.0,
+we should not display this message. So, removing it.
+BUG: 1755227
+Change-Id: I409f2059d43c5e867788f19d2ccb8d6d839520f7
+fixes: bz#1755227
+Signed-off-by: Sanju Rakonde <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Atin Mukherjee <>
+ cli/src/cli-cmd-parser.c |  2 --
+ cli/src/cli-cmd-volume.c | 11 -----------
+ 2 files changed, 13 deletions(-)
+diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c
+index 92ceb8e..4456a7b 100644
+--- a/cli/src/cli-cmd-parser.c
++++ b/cli/src/cli-cmd-parser.c
+@@ -2101,8 +2101,6 @@ cli_cmd_volume_remove_brick_parse(struct cli_state *state, const char **words,
+         wordcount--;
+         if (!strcmp("start", w)) {
+             command = GF_OP_CMD_START;
+-            if (question)
+-                *question = 1;
+         } else if (!strcmp("commit", w)) {
+             command = GF_OP_CMD_COMMIT;
+         } else if (!strcmp("stop", w)) {
+diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c
+index a42e663..6b958bd 100644
+--- a/cli/src/cli-cmd-volume.c
++++ b/cli/src/cli-cmd-volume.c
+@@ -2088,17 +2088,6 @@ cli_cmd_volume_remove_brick_cbk(struct cli_state *state,
+             "Remove-brick force will not migrate files from the "
+             "removed bricks, so they will no longer be available"
+             " on the volume.\nDo you want to continue?";
+-    } else if (command == GF_OP_CMD_START) {
+-        question =
+-            "It is recommended that remove-brick be run with"
+-            " cluster.force-migration option disabled to prevent"
+-            " possible data corruption. Doing so will ensure that"
+-            " files that receive writes during migration will not"
+-            " be migrated and will need to be manually copied"
+-            " after the remove-brick commit operation. Please"
+-            " check the value of the option and update accordingly."
+-            " \nDo you want to continue with your current"
+-            " cluster.force-migration settings?";
+     }
+     if (!brick_count) {
diff --git a/SOURCES/0301-posix-Brick-is-going-down-unexpectedly.patch b/SOURCES/0301-posix-Brick-is-going-down-unexpectedly.patch
new file mode 100644
index 0000000..270a0d7
--- /dev/null
+++ b/SOURCES/0301-posix-Brick-is-going-down-unexpectedly.patch
@@ -0,0 +1,61 @@
+From 913a0dc8f1eaa2fb18a6ebd6fcf66f46b48039f1 Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <>
+Date: Wed, 18 Sep 2019 19:11:33 +0530
+Subject: [PATCH 301/302] posix: Brick is going down unexpectedly
+Problem: In brick_mux environment, while multiple volumes are
+         created (1-1000) sometimes brick is going down due to
+         health_check thread failure
+Solution: Ignore EAGAIN error in health_check thread code to
+          avoid the issue
+> Change-Id: Id44c59f8e071a363a14d09d188813a6633855213
+> Fixes: bz#1751907
+> Signed-off-by: Mohit Agrawal <>
+> (Cherry picked from commit c4d926900dc36f71c04b3f65ceca5150ce0e8c81)
+> (Reviewed on upstream link
+Change-Id: Id44c59f8e071a363a14d09d188813a6633855213
+BUG: 1731826
+Signed-off-by: Mohit Agrawal <>
+Tested-by: Mohit Agrawal <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <>
+ xlators/storage/posix/src/posix-helpers.c | 14 ++++++++++----
+ 1 file changed, 10 insertions(+), 4 deletions(-)
+diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
+index 6a1a35c..35dd3b6 100644
+--- a/xlators/storage/posix/src/posix-helpers.c
++++ b/xlators/storage/posix/src/posix-helpers.c
+@@ -2108,14 +2108,20 @@ out:
+     if (fd != -1) {
+         sys_close(fd);
+     }
+     if (ret && file_path[0]) {
+         gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HEALTHCHECK_FAILED,
+                "%s() on %s returned ret is %d error is %s", op, file_path, ret,
+                ret != -1 ? strerror(ret) : strerror(op_errno));
+-                 "op=%s;path=%s;error=%s;brick=%s:%s timeout is %d", op,
+-                 file_path, strerror(op_errno), priv->hostname, priv->base_path,
+-                 timeout);
++        if ((op_errno == EAGAIN) || (ret == EAGAIN)) {
++            ret = 0;
++        } else {
++            gf_event(EVENT_POSIX_HEALTH_CHECK_FAILED,
++                     "op=%s;path=%s;error=%s;brick=%s:%s timeout is %d", op,
++                     file_path, strerror(op_errno), priv->hostname,
++                     priv->base_path, timeout);
++        }
+     }
+     return ret;
+ }
diff --git a/SOURCES/0302-cluster-ec-prevent-filling-shd-log-with-table-not-fo.patch b/SOURCES/0302-cluster-ec-prevent-filling-shd-log-with-table-not-fo.patch
new file mode 100644
index 0000000..7972767
--- /dev/null
+++ b/SOURCES/0302-cluster-ec-prevent-filling-shd-log-with-table-not-fo.patch
@@ -0,0 +1,67 @@
+From fb1d503791c874296afab0cd7be59b6865340d72 Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <>
+Date: Wed, 25 Sep 2019 11:56:35 +0200
+Subject: [PATCH 302/302] cluster/ec: prevent filling shd log with "table not
+ found" messages
+When self-heal daemon receives an inodelk contention notification, it tries
+to locate the related inode using inode_find() and the inode table owned by
+top-most xlator, which in this case doesn't have any inode table. This causes
+many messages to be logged by inode_find() function because the inode table
+passed is NULL.
+This patch prevents this by making sure the inode table is not NULL before
+calling inode_find().
+Upstream patch:
+> Change-Id: I8d001bd180aaaf1521ba40a536b097fcf70c991f
+> Upstream patch link:
+> Fixes: bz#1755344
+> Signed-off-by: Xavi Hernandez <>
+Change-Id: I8d001bd180aaaf1521ba40a536b097fcf70c991f
+BUG: 1754790
+Signed-off-by: Xavi Hernandez <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <>
+ xlators/cluster/ec/src/ec.c | 15 +++++++++++++--
+ 1 file changed, 13 insertions(+), 2 deletions(-)
+diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
+index 19094c4..3f31c74 100644
+--- a/xlators/cluster/ec/src/ec.c
++++ b/xlators/cluster/ec/src/ec.c
+@@ -463,6 +463,7 @@ ec_upcall(ec_t *ec, struct gf_upcall *upcall)
+     struct gf_upcall_cache_invalidation *ci = NULL;
+     struct gf_upcall_inodelk_contention *lc = NULL;
+     inode_t *inode;
++    inode_table_t *table;
+     switch (upcall->event_type) {
+@@ -476,8 +477,18 @@ ec_upcall(ec_t *ec, struct gf_upcall *upcall)
+                 /* The lock is not owned by EC, ignore it. */
+                 return _gf_true;
+             }
+-            inode = inode_find(((xlator_t *)ec->xl->graph->top)->itable,
+-                               upcall->gfid);
++            table = ((xlator_t *)ec->xl->graph->top)->itable;
++            if (table == NULL) {
++                /* Self-heal daemon doesn't have an inode table on the top
++                 * xlator because it doesn't need it. In this case we should
++                 * use the inode table managed by EC itself where all inodes
++                 * being healed should be present. However self-heal doesn't
++                 * use eager-locking and inodelk's are already released as
++                 * soon as possible. In this case we can safely ignore these
++                 * notifications. */
++                return _gf_false;
++            }
++            inode = inode_find(table, upcall->gfid);
+             /* If inode is not found, it means that it's already released,
+              * so we can ignore it. Probably it has been released and
+              * destroyed while the contention notification was being sent.
diff --git a/SOURCES/0303-posix-heketidbstorage-bricks-go-down-during-PVC-crea.patch b/SOURCES/0303-posix-heketidbstorage-bricks-go-down-during-PVC-crea.patch
new file mode 100644
index 0000000..8641353
--- /dev/null
+++ b/SOURCES/0303-posix-heketidbstorage-bricks-go-down-during-PVC-crea.patch
@@ -0,0 +1,45 @@
+From ae4f538065d26a277e38810c6eef18c0312cd1f3 Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <>
+Date: Thu, 26 Sep 2019 17:52:30 +0530
+Subject: [PATCH 303/304] posix: heketidbstorage bricks go down during PVC
+ creation
+Problem: In OCS environment heketidbstorage is detached due
+         to health_check thread is failed.Sometime aio_write
+         is not successfully finished within default health-check-timeout
+         limit and the brick is detached.
+Solution: To avoid the issue increase default timeout to 20s
+> Change-Id: Idff283d5713da571f9d20a6b296274f69c3e5b7b
+> Fixes: bz#1755900
+> Signed-off-by: Mohit Agrawal <>
+> (Cherry picked from commit c6df9e962483bac5bfcd8916318b19040387ce81)
+> (Reviewed on upstream link
+Change-Id: Idff283d5713da571f9d20a6b296274f69c3e5b7b
+BUG: 1752713
+Signed-off-by: Mohit Agrawal <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Atin Mukherjee <>
+ xlators/storage/posix/src/posix-common.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c
+index 69857d9..2cb58ba 100644
+--- a/xlators/storage/posix/src/posix-common.c
++++ b/xlators/storage/posix/src/posix-common.c
+@@ -1257,7 +1257,7 @@ struct volume_options posix_options[] = {
+     {.key = {"health-check-timeout"},
+      .type = GF_OPTION_TYPE_INT,
+      .min = 0,
+-     .default_value = "10",
++     .default_value = "20",
+      .validate = GF_OPT_VALIDATE_MIN,
+      .description =
+          "Interval in seconds to wait aio_write finish for health check, "
diff --git a/SOURCES/0304-cluster-dht-Correct-fd-processing-loop.patch b/SOURCES/0304-cluster-dht-Correct-fd-processing-loop.patch
new file mode 100644
index 0000000..5f16e0a
--- /dev/null
+++ b/SOURCES/0304-cluster-dht-Correct-fd-processing-loop.patch
@@ -0,0 +1,194 @@
+From ad233c1b3abdfe2bdfd1eacc83b5f84b7afa6b46 Mon Sep 17 00:00:00 2001
+From: N Balachandran <>
+Date: Tue, 1 Oct 2019 17:37:15 +0530
+Subject: [PATCH 304/304] cluster/dht: Correct fd processing loop
+The fd processing loops in the
+dht_migration_complete_check_task and the
+dht_rebalance_inprogress_task functions were unsafe
+and could cause an open to be sent on an already freed
+fd. This has been fixed.
+> Change-Id: I0a3c7d2fba314089e03dfd704f9dceb134749540
+> Fixes: bz#1757399
+> Signed-off-by: N Balachandran <>
+> (Cherry picked from commit 9b15867070b0cc241ab165886292ecffc3bc0aed)
+> (Reviewed on upstream link
+Change-Id: I0a3c7d2fba314089e03dfd704f9dceb134749540
+BUG: 1756325
+Signed-off-by: Mohit Agrawal <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <>
+ xlators/cluster/dht/src/dht-helper.c | 84 ++++++++++++++++++++++++++----------
+ 1 file changed, 62 insertions(+), 22 deletions(-)
+diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c
+index 4c57e0d..1e9fee0 100644
+--- a/xlators/cluster/dht/src/dht-helper.c
++++ b/xlators/cluster/dht/src/dht-helper.c
+@@ -1261,6 +1261,7 @@ dht_migration_complete_check_task(void *data)
+     fd_t *tmp = NULL;
+     uint64_t tmp_miginfo = 0;
+     dht_migrate_info_t *miginfo = NULL;
++    gf_boolean_t skip_open = _gf_false;
+     int open_failed = 0;
+     this = THIS;
+@@ -1399,24 +1400,34 @@ dht_migration_complete_check_task(void *data)
+      * the loop will cause the destruction of the fd. So we need to
+      * iterate the list safely because iter_fd cannot be trusted.
+      */
+-    list_for_each_entry_safe(iter_fd, tmp, &inode->fd_list, inode_list)
+-    {
+-        if (fd_is_anonymous(iter_fd))
+-            continue;
+-        if (dht_fd_open_on_dst(this, iter_fd, dst_node))
+-            continue;
++    iter_fd = list_entry((&inode->fd_list)->next, typeof(*iter_fd), inode_list);
++    while (&iter_fd->inode_list != (&inode->fd_list)) {
++        if (fd_is_anonymous(iter_fd) ||
++            (dht_fd_open_on_dst(this, iter_fd, dst_node))) {
++            if (!tmp) {
++                iter_fd = list_entry(iter_fd->, typeof(*iter_fd),
++                                     inode_list);
++                continue;
++            }
++            skip_open = _gf_true;
++        }
+         /* We need to release the inode->lock before calling
+          * syncop_open() to avoid possible deadlocks. However this
+          * can cause the iter_fd to be released by other threads.
+          * To avoid this, we take a reference before releasing the
+          * lock.
+          */
+-        __fd_ref(iter_fd);
++        fd_ref(iter_fd);
+         UNLOCK(&inode->lock);
++        if (tmp) {
++            fd_unref(tmp);
++            tmp = NULL;
++        }
++        if (skip_open)
++            goto next;
+         /* flags for open are stripped down to allow following the
+          * new location of the file, otherwise we can get EEXIST or
+          * truncate the file again as rebalance is moving the data */
+@@ -1438,9 +1449,11 @@ dht_migration_complete_check_task(void *data)
+             dht_fd_ctx_set(this, iter_fd, dst_node);
+         }
+-        fd_unref(iter_fd);
++    next:
+         LOCK(&inode->lock);
++        skip_open = _gf_false;
++        tmp = iter_fd;
++        iter_fd = list_entry(tmp->, typeof(*tmp), inode_list);
+     }
+     SYNCTASK_SETID(frame->root->uid, frame->root->gid);
+@@ -1453,6 +1466,10 @@ dht_migration_complete_check_task(void *data)
+ unlock:
+     UNLOCK(&inode->lock);
++    if (tmp) {
++        fd_unref(tmp);
++        tmp = NULL;
++    }
+ out:
+     if (dict) {
+@@ -1534,6 +1551,7 @@ dht_rebalance_inprogress_task(void *data)
+     int open_failed = 0;
+     uint64_t tmp_miginfo = 0;
+     dht_migrate_info_t *miginfo = NULL;
++    gf_boolean_t skip_open = _gf_false;
+     this = THIS;
+     frame = data;
+@@ -1654,24 +1672,40 @@ dht_rebalance_inprogress_task(void *data)
+      * the loop will cause the destruction of the fd. So we need to
+      * iterate the list safely because iter_fd cannot be trusted.
+      */
+-    list_for_each_entry_safe(iter_fd, tmp, &inode->fd_list, inode_list)
+-    {
+-        if (fd_is_anonymous(iter_fd))
+-            continue;
+-        if (dht_fd_open_on_dst(this, iter_fd, dst_node))
+-            continue;
++    iter_fd = list_entry((&inode->fd_list)->next, typeof(*iter_fd), inode_list);
++    while (&iter_fd->inode_list != (&inode->fd_list)) {
+         /* We need to release the inode->lock before calling
+          * syncop_open() to avoid possible deadlocks. However this
+          * can cause the iter_fd to be released by other threads.
+          * To avoid this, we take a reference before releasing the
+          * lock.
+          */
+-        __fd_ref(iter_fd);
++        if (fd_is_anonymous(iter_fd) ||
++            (dht_fd_open_on_dst(this, iter_fd, dst_node))) {
++            if (!tmp) {
++                iter_fd = list_entry(iter_fd->, typeof(*iter_fd),
++                                     inode_list);
++                continue;
++            }
++            skip_open = _gf_true;
++        }
++        /* Yes, this is ugly but there isn't a cleaner way to do this
++         * the fd_ref is an atomic increment so not too bad. We want to
++         * reduce the number of inode locks and unlocks.
++         */
++        fd_ref(iter_fd);
+         UNLOCK(&inode->lock);
++        if (tmp) {
++            fd_unref(tmp);
++            tmp = NULL;
++        }
++        if (skip_open)
++            goto next;
+         /* flags for open are stripped down to allow following the
+          * new location of the file, otherwise we can get EEXIST or
+          * truncate the file again as rebalance is moving the data */
+@@ -1692,9 +1726,11 @@ dht_rebalance_inprogress_task(void *data)
+             dht_fd_ctx_set(this, iter_fd, dst_node);
+         }
+-        fd_unref(iter_fd);
++    next:
+         LOCK(&inode->lock);
++        skip_open = _gf_false;
++        tmp = iter_fd;
++        iter_fd = list_entry(tmp->, typeof(*tmp), inode_list);
+     }
+     SYNCTASK_SETID(frame->root->uid, frame->root->gid);
+@@ -1702,6 +1738,10 @@ dht_rebalance_inprogress_task(void *data)
+ unlock:
+     UNLOCK(&inode->lock);
++    if (tmp) {
++        fd_unref(tmp);
++        tmp = NULL;
++    }
+     if (open_failed) {
+         ret = -1;
+         goto out;
diff --git a/SOURCES/0305-glusterd-rebalance-start-should-fail-when-quorum-is-.patch b/SOURCES/0305-glusterd-rebalance-start-should-fail-when-quorum-is-.patch
new file mode 100644
index 0000000..a1e77c6
--- /dev/null
+++ b/SOURCES/0305-glusterd-rebalance-start-should-fail-when-quorum-is-.patch
@@ -0,0 +1,56 @@
+From 90e52f3b44da0ed05e35ebd474e284d45794b0d6 Mon Sep 17 00:00:00 2001
+From: Sanju Rakonde <>
+Date: Thu, 10 Oct 2019 20:40:49 +0530
+Subject: [PATCH 305/307] glusterd: rebalance start should fail when quorum is
+ not met
+rebalance start should not succeed if quorum is not met.
+this patch adds a condition to check whether quorum is met
+in pre-validation stage.
+> fixes: bz#1760467
+> Change-Id: Ic7d0d08f69e4bc6d5e7abae713ec1881531c8ad4
+> Signed-off-by: Sanju Rakonde <>
+upstream patch:
+BUG: 1760261
+Change-Id: Ic7d0d08f69e4bc6d5e7abae713ec1881531c8ad4
+Signed-off-by: Sanju Rakonde <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <>
+ tests/bugs/glusterd/quorum-validation.t   | 2 ++
+ xlators/mgmt/glusterd/src/glusterd-mgmt.c | 3 ++-
+ 2 files changed, 4 insertions(+), 1 deletion(-)
+diff --git a/tests/bugs/glusterd/quorum-validation.t b/tests/bugs/glusterd/quorum-validation.t
+index ff46729..3cc3351 100644
+--- a/tests/bugs/glusterd/quorum-validation.t
++++ b/tests/bugs/glusterd/quorum-validation.t
+@@ -34,6 +34,8 @@ TEST ! $CLI_1 volume add-brick $V0 $H1:$B1/${V0}2
+ TEST ! $CLI_1 volume remove-brick $V0 $H1:$B1/${V0}0 start
+ TEST ! $CLI_1 volume set $V0 barrier enable
++#quorum is not met, rebalance/profile start should fail
++TEST ! $CLI_1 volume rebalance $V0 start
+ TEST ! $CLI_1 volume profile $V0 start
+ #bug-1690753 - Volume stop when quorum not met is successful
+diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-mgmt.c
+index ec78913..a4915f3 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-mgmt.c
++++ b/xlators/mgmt/glusterd/src/glusterd-mgmt.c
+@@ -1059,7 +1059,8 @@ glusterd_mgmt_v3_pre_validate(glusterd_op_t op, dict_t *req_dict,
+         goto out;
+     }
+-    if (op == GD_OP_PROFILE_VOLUME || op == GD_OP_STOP_VOLUME) {
++    if (op == GD_OP_PROFILE_VOLUME || op == GD_OP_STOP_VOLUME ||
++        op == GD_OP_REBALANCE) {
+         ret = glusterd_validate_quorum(this, op, req_dict, op_errstr);
+         if (ret) {
+             gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SERVER_QUORUM_NOT_MET,
diff --git a/SOURCES/0306-cli-fix-distCount-value.patch b/SOURCES/0306-cli-fix-distCount-value.patch
new file mode 100644
index 0000000..0e8b9f2
--- /dev/null
+++ b/SOURCES/0306-cli-fix-distCount-value.patch
@@ -0,0 +1,43 @@
+From 167980565e1ab56989b25fe6aa0203aeb7970c8b Mon Sep 17 00:00:00 2001
+From: Sanju Rakonde <>
+Date: Sun, 6 Oct 2019 19:05:28 +0530
+Subject: [PATCH 306/307] cli: fix distCount value
+gluster volume info --xml id displaying wrong distCount
+value. This patch addresses it.
+> fixes: bz#1758878
+> Change-Id: I64081597e06018361e6524587b433b0c4b2a0260
+> Signed-off-by: Sanju Rakonde <>
+upstream patch:
+BUG: 1758618
+Change-Id: I64081597e06018361e6524587b433b0c4b2a0260
+Signed-off-by: Sanju Rakonde <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <>
+ cli/src/cli-xml-output.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+diff --git a/cli/src/cli-xml-output.c b/cli/src/cli-xml-output.c
+index b417bb8..006e2fb 100644
+--- a/cli/src/cli-xml-output.c
++++ b/cli/src/cli-xml-output.c
+@@ -2548,8 +2548,9 @@ cli_xml_output_vol_info(cli_local_t *local, dict_t *dict)
+         ret = dict_get_int32(dict, key, &dist_count);
+         if (ret)
+             goto out;
+-        ret = xmlTextWriterWriteFormatElement(
+-            local->writer, (xmlChar *)"distCount", "%d", dist_count);
++        ret = xmlTextWriterWriteFormatElement(local->writer,
++                                              (xmlChar *)"distCount", "%d",
++                                              (brick_count / dist_count));
+         XML_RET_CHECK_AND_GOTO(ret, out);
+         snprintf(key, sizeof(key), "volume%d.stripe_count", i);
diff --git a/SOURCES/0307-ssl-fix-RHEL8-regression-failure.patch b/SOURCES/0307-ssl-fix-RHEL8-regression-failure.patch
new file mode 100644
index 0000000..7a85b50
--- /dev/null
+++ b/SOURCES/0307-ssl-fix-RHEL8-regression-failure.patch
@@ -0,0 +1,42 @@
+From be9695391f39fe6eb1d157f6bfd018116d1ee42b Mon Sep 17 00:00:00 2001
+From: Sanju Rakonde <>
+Date: Mon, 30 Sep 2019 13:14:06 +0530
+Subject: [PATCH 307/307] ssl: fix RHEL8 regression failure
+This tests is failing with
+"SSL routines:SSL_CTX_use_certificate:ee key too small"
+in RHEL8. This change is made according to
+> updates: bz#1756900
+> Change-Id: Ib436372c3bd94bcf7324976337add7da4088b3d5
+> Signed-off-by: Sanju Rakonde <>
+upstream patch:
+BUG: 1704562
+Change-Id: Ib436372c3bd94bcf7324976337add7da4088b3d5
+Signed-off-by: Sanju Rakonde <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <>
+ tests/bugs/cli/bug-1320388.t | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+diff --git a/tests/bugs/cli/bug-1320388.t b/tests/bugs/cli/bug-1320388.t
+index f5ffcbe..8e5d77b 100755
+--- a/tests/bugs/cli/bug-1320388.t
++++ b/tests/bugs/cli/bug-1320388.t
+@@ -21,7 +21,7 @@ cleanup;
+ rm -f $SSL_BASE/glusterfs.*
+ touch "$GLUSTERD_WORKDIR"/secure-access
+-TEST openssl genrsa -out $SSL_KEY 1024
++TEST openssl genrsa -out $SSL_KEY 3072
+ TEST openssl req -new -x509 -key $SSL_KEY -subj /CN=Anyone -out $SSL_CERT
diff --git a/SOURCES/0308-dht-Rebalance-causing-IO-Error-File-descriptor-in-ba.patch b/SOURCES/0308-dht-Rebalance-causing-IO-Error-File-descriptor-in-ba.patch
new file mode 100644
index 0000000..adbeb43
--- /dev/null
+++ b/SOURCES/0308-dht-Rebalance-causing-IO-Error-File-descriptor-in-ba.patch
@@ -0,0 +1,347 @@
+From 27f799563c1c2c1986662ed4a3a83d834c04fd98 Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <>
+Date: Mon, 14 Oct 2019 15:42:31 +0530
+Subject: [PATCH 308/308] dht: Rebalance causing IO Error - File descriptor in
+ bad state
+Problem : When a file is migrated, dht attempts to re-open all open
+          fds on the new cached subvol. Earlier, if dht had not opened the fd,
+          the client xlator would be unable to find the remote fd and would
+          fall back to using an anon fd for the fop. That behavior changed with
+, causing fops to fail
+          with EBADFD if the fd was not available on the cached subvol.
+          The client xlator returns EBADFD if the remote fd is not found but
+          dht only checks for EBADF before re-opening fds on the new cached subvol.
+Solution: Handle EBADFD at dht code path to avoid the issue
+> Change-Id: I43c51995cdd48d05b12e4b2889c8dbe2bb2a72d8
+> Fixes: bz#1758579
+> Signed-off-by: Mohit Agrawal <>
+> (Cherry pick from commit 9314a9fbf487614c736cf6c4c1b93078d37bb9df)
+> (Reviewed on upstream link
+Change-Id: I43c51995cdd48d05b12e4b2889c8dbe2bb2a72d8
+BUG: 1758432
+Signed-off-by: Mohit Agrawal <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <>
+ xlators/cluster/dht/src/dht-common.c      | 27 +++++++++++++++++---
+ xlators/cluster/dht/src/dht-common.h      | 19 ++++++++++++++
+ xlators/cluster/dht/src/dht-helper.c      | 29 +++++++++++++++++++++
+ xlators/cluster/dht/src/dht-inode-read.c  | 42 +++++++++++++++++++++++++++----
+ xlators/cluster/dht/src/dht-inode-write.c | 16 ++++++------
+ 5 files changed, 116 insertions(+), 17 deletions(-)
+diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
+index 99cccd6..37952ba 100644
+--- a/xlators/cluster/dht/src/dht-common.c
++++ b/xlators/cluster/dht/src/dht-common.c
+@@ -53,6 +53,17 @@ dht_set_dir_xattr_req(xlator_t *this, loc_t *loc, dict_t *xattr_req);
+ int
+ dht_do_fresh_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc);
++/* Check the xdata to make sure EBADF has been set by client xlator */
++dht_check_remote_fd_failed_error(dht_local_t *local, int op_ret, int op_errno)
++    if (op_ret == -1 && (op_errno == EBADF || op_errno == EBADFD) &&
++        !(local->fd_checked)) {
++        return 1;
++    }
++    return 0;
+ /* Sets the blocks and size values to fixed values. This is to be called
+  * only for dirs. The caller is responsible for checking the type
+  */
+@@ -4529,6 +4540,7 @@ dht_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+     int this_call_cnt = 0;
+     dht_local_t *local = NULL;
+     dht_conf_t *conf = NULL;
++    int ret = 0;
+     VALIDATE_OR_GOTO(frame, err);
+     VALIDATE_OR_GOTO(frame->local, err);
+@@ -4537,6 +4549,13 @@ dht_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+     conf = this->private;
+     local = frame->local;
++    if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
++        ret = dht_check_and_open_fd_on_subvol(this, frame);
++        if (ret)
++            goto err;
++        return 0;
++    }
+     LOCK(&frame->lock);
+     {
+         if (!xattr || (op_ret == -1)) {
+@@ -5204,8 +5223,8 @@ dht_file_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+     local->op_errno = op_errno;
+-    if ((local->fop == GF_FOP_FSETXATTR) && op_ret == -1 &&
+-        (op_errno == EBADF) && !(local->fd_checked)) {
++    if ((local->fop == GF_FOP_FSETXATTR) &&
++        dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+         ret = dht_check_and_open_fd_on_subvol(this, frame);
+         if (ret)
+             goto out;
+@@ -5929,8 +5948,8 @@ dht_file_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+     local->op_errno = op_errno;
+-    if ((local->fop == GF_FOP_FREMOVEXATTR) && (op_ret == -1) &&
+-        (op_errno == EBADF) && !(local->fd_checked)) {
++    if ((local->fop == GF_FOP_FREMOVEXATTR) &&
++        dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+         ret = dht_check_and_open_fd_on_subvol(this, frame);
+         if (ret)
+             goto out;
+diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
+index c516271..ce11f02 100644
+--- a/xlators/cluster/dht/src/dht-common.h
++++ b/xlators/cluster/dht/src/dht-common.h
+@@ -1230,6 +1230,22 @@ dht_newfile_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+                 struct iatt *preparent, struct iatt *postparent, dict_t *xdata);
+ int
++dht_finodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                 int32_t op_ret, int32_t op_errno, dict_t *xdata);
++dht_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
++                 int op_errno, dict_t *xattr, dict_t *xdata);
++dht_common_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                       int32_t op_ret, int32_t op_errno, dict_t *dict,
++                       dict_t *xdata);
++dht_fxattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                 int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata);
+ gf_defrag_status_get(dht_conf_t *conf, dict_t *dict);
+ void
+@@ -1525,4 +1541,7 @@ int
+ dht_pt_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+               dict_t *xdata);
++dht_check_remote_fd_failed_error(dht_local_t *local, int op_ret, int op_errno);
+ #endif /* _DHT_H */
+diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c
+index 1e9fee0..4f7370d 100644
+--- a/xlators/cluster/dht/src/dht-helper.c
++++ b/xlators/cluster/dht/src/dht-helper.c
+@@ -366,6 +366,23 @@ dht_check_and_open_fd_on_subvol_complete(int ret, call_frame_t *frame,
+             break;
++        case GF_FOP_FXATTROP:
++            STACK_WIND(frame, dht_common_xattrop_cbk, subvol,
++                       subvol->fops->fxattrop, local->fd,
++                       local->rebalance.flags, local->rebalance.xattr,
++                       local->xattr_req);
++            break;
++        case GF_FOP_FGETXATTR:
++            STACK_WIND(frame, dht_getxattr_cbk, subvol, subvol->fops->fgetxattr,
++                       local->fd, local->key, NULL);
++            break;
++        case GF_FOP_FINODELK:
++            STACK_WIND(frame, dht_finodelk_cbk, subvol, subvol->fops->finodelk,
++                       local->key, local->fd, local->rebalance.lock_cmd,
++                       &local->rebalance.flock, local->xattr_req);
++            break;
+         default:
+             gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_UNKNOWN_FOP,
+                    "Unknown FOP on fd (%p) on file %s @ %s", fd,
+@@ -429,6 +446,18 @@ handle_err:
+             DHT_STACK_UNWIND(fremovexattr, frame, -1, op_errno, NULL);
+             break;
++        case GF_FOP_FXATTROP:
++            DHT_STACK_UNWIND(fxattrop, frame, -1, op_errno, NULL, NULL);
++            break;
++        case GF_FOP_FGETXATTR:
++            DHT_STACK_UNWIND(fgetxattr, frame, -1, op_errno, NULL, NULL);
++            break;
++        case GF_FOP_FINODELK:
++            DHT_STACK_UNWIND(finodelk, frame, -1, op_errno, NULL);
++            break;
+         default:
+             gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_UNKNOWN_FOP,
+                    "Unknown FOP on fd (%p) on file %s @ %s", fd,
+diff --git a/xlators/cluster/dht/src/dht-inode-read.c b/xlators/cluster/dht/src/dht-inode-read.c
+index cacfe35..0c209a5 100644
+--- a/xlators/cluster/dht/src/dht-inode-read.c
++++ b/xlators/cluster/dht/src/dht-inode-read.c
+@@ -162,8 +162,8 @@ dht_file_attr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+     local = frame->local;
+     prev = cookie;
+-    if ((local->fop == GF_FOP_FSTAT) && (op_ret == -1) && (op_errno == EBADF) &&
+-        !(local->fd_checked)) {
++    if ((local->fop == GF_FOP_FSTAT) &&
++        dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+         ret = dht_check_and_open_fd_on_subvol(this, frame);
+         if (ret)
+             goto out;
+@@ -431,7 +431,7 @@ dht_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+     if (local->call_cnt != 1)
+         goto out;
+-    if (op_ret == -1 && (op_errno == EBADF) && !(local->fd_checked)) {
++    if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+         ret = dht_check_and_open_fd_on_subvol(this, frame);
+         if (ret)
+             goto out;
+@@ -703,7 +703,7 @@ dht_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+     if (local->call_cnt != 1)
+         goto out;
+-    if (op_ret == -1 && (op_errno == EBADF) && !(local->fd_checked)) {
++    if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+         ret = dht_check_and_open_fd_on_subvol(this, frame);
+         if (ret)
+             goto out;
+@@ -820,7 +820,7 @@ dht_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+     local->op_errno = op_errno;
+-    if (op_ret == -1 && (op_errno == EBADF) && !(local->fd_checked)) {
++    if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+         ret = dht_check_and_open_fd_on_subvol(this, frame);
+         if (ret)
+             goto out;
+@@ -1223,6 +1223,13 @@ dht_common_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+     if (local->call_cnt != 1)
+         goto out;
++    if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
++        ret = dht_check_and_open_fd_on_subvol(this, frame);
++        if (ret)
++            goto out;
++        return 0;
++    }
+     ret = dht_read_iatt_from_xdata(this, xdata, &stbuf);
+     if ((!op_ret) && (ret)) {
+@@ -1535,8 +1542,26 @@ dht_finodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+                  int32_t op_ret, int32_t op_errno, dict_t *xdata)
+ {
++    dht_local_t *local = NULL;
++    int ret = 0;
++    GF_VALIDATE_OR_GOTO("dht", frame, out);
++    GF_VALIDATE_OR_GOTO("dht", this, out);
++    GF_VALIDATE_OR_GOTO("dht", frame->local, out);
++    local = frame->local;
++    if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
++        ret = dht_check_and_open_fd_on_subvol(this, frame);
++        if (ret)
++            goto out;
++        return 0;
++    }
+     dht_lk_inode_unref(frame, op_ret);
+     DHT_STACK_UNWIND(finodelk, frame, op_ret, op_errno, xdata);
+     return 0;
+ }
+@@ -1574,6 +1599,13 @@ dht_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+             if (ret)
+                     goto err;
+     */
++    local->rebalance.flock = *lock;
++    local->rebalance.lock_cmd = cmd;
++    local->key = gf_strdup(volume);
++    if (xdata)
++        local->xattr_req = dict_ref(xdata);
+     STACK_WIND(frame, dht_finodelk_cbk, lock_subvol,
+                lock_subvol->fops->finodelk, volume, fd, cmd, lock, xdata);
+diff --git a/xlators/cluster/dht/src/dht-inode-write.c b/xlators/cluster/dht/src/dht-inode-write.c
+index b26b705..b6b349d 100644
+--- a/xlators/cluster/dht/src/dht-inode-write.c
++++ b/xlators/cluster/dht/src/dht-inode-write.c
+@@ -49,7 +49,7 @@ dht_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+      * We only check once as this could be a valid bad fd error.
+      */
+-    if (op_ret == -1 && (op_errno == EBADF) && !(local->fd_checked)) {
++    if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+         ret = dht_check_and_open_fd_on_subvol(this, frame);
+         if (ret)
+             goto out;
+@@ -262,8 +262,8 @@ dht_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+      * We only check once as this could actually be a valid error.
+      */
+-    if ((local->fop == GF_FOP_FTRUNCATE) && (op_ret == -1) &&
+-        ((op_errno == EBADF) || (op_errno == EINVAL)) && !(local->fd_checked)) {
++    if ((local->fop == GF_FOP_FTRUNCATE) &&
++        dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+         ret = dht_check_and_open_fd_on_subvol(this, frame);
+         if (ret)
+             goto out;
+@@ -489,7 +489,7 @@ dht_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+      * We only check once as this could actually be a valid error.
+      */
+-    if ((op_ret == -1) && (op_errno == EBADF) && !(local->fd_checked)) {
++    if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+         ret = dht_check_and_open_fd_on_subvol(this, frame);
+         if (ret)
+             goto out;
+@@ -666,7 +666,7 @@ dht_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+      * and a lookup updated the cached subvol in the inode ctx.
+      * We only check once as this could actually be a valid error.
+      */
+-    if ((op_ret == -1) && (op_errno == EBADF) && !(local->fd_checked)) {
++    if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+         ret = dht_check_and_open_fd_on_subvol(this, frame);
+         if (ret)
+             goto out;
+@@ -838,7 +838,7 @@ dht_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+      * and a lookup updated the cached subvol in the inode ctx.
+      * We only check once as this could actually be a valid error.
+      */
+-    if ((op_ret == -1) && (op_errno == EBADF) && !(local->fd_checked)) {
++    if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+         ret = dht_check_and_open_fd_on_subvol(this, frame);
+         if (ret)
+             goto out;
+@@ -1005,8 +1005,8 @@ dht_file_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+     local->op_errno = op_errno;
+-    if ((local->fop == GF_FOP_FSETATTR) && (op_ret == -1) &&
+-        (op_errno == EBADF) && !(local->fd_checked)) {
++    if ((local->fop == GF_FOP_FSETATTR) &&
++        dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+         ret = dht_check_and_open_fd_on_subvol(this, frame);
+         if (ret)
+             goto out;
diff --git a/SOURCES/0309-geo-rep-Fix-config-upgrade-on-non-participating-node.patch b/SOURCES/0309-geo-rep-Fix-config-upgrade-on-non-participating-node.patch
new file mode 100644
index 0000000..6ae359e
--- /dev/null
+++ b/SOURCES/0309-geo-rep-Fix-config-upgrade-on-non-participating-node.patch
@@ -0,0 +1,240 @@
+From 2b1738402276f43d7cb64542b74cb50145e46d77 Mon Sep 17 00:00:00 2001
+From: Kotresh HR <>
+Date: Wed, 16 Oct 2019 14:25:47 +0530
+Subject: [PATCH 309/309] geo-rep: Fix config upgrade on non-participating node
+After upgrade, if the config files are of old format, it
+gets migrated to new format. Monitor process migrates it.
+Since monitor doesn't run on nodes where bricks are not
+hosted, it doesn't get migrated there. So this patch fixes
+the config upgrade on nodes which doesn't host bricks.
+This happens during config either on get/set/reset.
+Backport of:
+ > Patch:
+ > Change-Id: Ibade2f2310b0f3affea21a3baa1ae0eb71162cba
+ > Signed-off-by: Kotresh HR <>
+ > fixes: bz#1762220
+Change-Id: Ibade2f2310b0f3affea21a3baa1ae0eb71162cba
+Signed-off-by: Kotresh HR <>
+BUG: 1760939
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <>
+ geo-replication/syncdaemon/     |   3 +-
+ tests/00-geo-rep/georep-config-upgrade.t | 132 +++++++++++++++++++++++++++++++
+ tests/00-geo-rep/gsyncd.conf.old         |  47 +++++++++++
+ 3 files changed, 181 insertions(+), 1 deletion(-)
+ create mode 100644 tests/00-geo-rep/georep-config-upgrade.t
+ create mode 100644 tests/00-geo-rep/gsyncd.conf.old
+diff --git a/geo-replication/syncdaemon/ b/geo-replication/syncdaemon/
+index 6ae5269..7b48d82 100644
+--- a/geo-replication/syncdaemon/
++++ b/geo-replication/syncdaemon/
+@@ -255,7 +255,8 @@ def main():
+     if args.subcmd == "slave":
+         override_from_args = True
+-    if args.subcmd == "monitor":
++    if config_file is not None and \
++       args.subcmd in ["monitor", "config-get", "config-set", "config-reset"]:
+         ret = gconf.is_config_file_old(config_file, args.master, extra_tmpl_args["slavevol"])
+         if ret is not None:
+            gconf.config_upgrade(config_file, ret)
+diff --git a/tests/00-geo-rep/georep-config-upgrade.t b/tests/00-geo-rep/georep-config-upgrade.t
+new file mode 100644
+index 0000000..557461c
+--- /dev/null
++++ b/tests/00-geo-rep/georep-config-upgrade.t
+@@ -0,0 +1,132 @@
++. $(dirname $0)/../include.rc
++. $(dirname $0)/../volume.rc
++. $(dirname $0)/../geo-rep.rc
++. $(dirname $0)/../env.rc
++OLD_CONFIG_PATH=$(dirname $0)/gsyncd.conf.old
++##Cleanup and start glusterd
++TEST glusterd;
++TEST pidof glusterd
++GEOREP_CLI="$CLI volume geo-replication"
++TEST $CLI volume create $GMV0 replica 2 $H0:$B0/${GMV0}{1,2,3,4};
++TEST $CLI volume start $GMV0
++TEST $CLI volume create $GSV0 replica 2 $H0:$B0/${GSV0}{1,2,3,4};
++TEST $CLI volume start $GSV0
++##Create, start and mount meta_volume
++TEST $CLI volume create $META_VOL replica 3 $H0:$B0/${META_VOL}{1,2,3};
++TEST $CLI volume start $META_VOL
++TEST mkdir -p $META_MNT
++TEST glusterfs -s $H0 --volfile-id $META_VOL $META_MNT
++##Mount master
++TEST glusterfs -s $H0 --volfile-id $GMV0 $M0
++##Mount slave
++TEST glusterfs -s $H0 --volfile-id $GSV0 $M1
++#Create geo-rep session
++TEST create_georep_session $master $slave
++#Config gluster-command-dir
++TEST $GEOREP_CLI $master $slave config gluster-command-dir ${GLUSTER_CMD_DIR}
++#Config gluster-command-dir
++TEST $GEOREP_CLI $master $slave config slave-gluster-command-dir ${GLUSTER_CMD_DIR}
++TEST $GEOREP_CLI $master $slave config use_meta_volume true
++#Wait for common secret pem file to be created
++EXPECT_WITHIN $GEO_REP_TIMEOUT  0 check_common_secret_file
++#Verify the keys are distributed
++EXPECT_WITHIN $GEO_REP_TIMEOUT  0 check_keys_distributed
++TEST $GEOREP_CLI $master $slave start
++EXPECT_WITHIN $GEO_REP_TIMEOUT  2 check_status_num_rows "Active"
++EXPECT_WITHIN $GEO_REP_TIMEOUT  2 check_status_num_rows "Passive"
++TEST $GEOREP_CLI $master $slave config sync-method tarssh
++#Stop Geo-rep
++TEST $GEOREP_CLI $master $slave stop
++#Copy old config file
++mv -f $WORKING_DIR/gsyncd.conf $WORKING_DIR/
++cp -p $OLD_CONFIG_PATH $WORKING_DIR/gsyncd.conf
++#Check if config get all updates config_file
++TEST ! grep "sync-method" $WORKING_DIR/gsyncd.conf
++TEST $GEOREP_CLI $master $slave config
++TEST grep "sync-method" $WORKING_DIR/gsyncd.conf
++#Check if config get updates config_file
++rm -f $WORKING_DIR/gsyncd.conf
++cp -p $OLD_CONFIG_PATH $WORKING_DIR/gsyncd.conf
++TEST ! grep "sync-method" $WORKING_DIR/gsyncd.conf
++TEST $GEOREP_CLI $master $slave config sync-method
++TEST grep "sync-method" $WORKING_DIR/gsyncd.conf
++#Check if config set updates config_file
++rm -f $WORKING_DIR/gsyncd.conf
++cp -p $OLD_CONFIG_PATH $WORKING_DIR/gsyncd.conf
++TEST ! grep "sync-method" $WORKING_DIR/gsyncd.conf
++TEST $GEOREP_CLI $master $slave config sync-xattrs false
++TEST grep "sync-method" $WORKING_DIR/gsyncd.conf
++#Check if config reset updates config_file
++rm -f $WORKING_DIR/gsyncd.conf
++cp -p $OLD_CONFIG_PATH $WORKING_DIR/gsyncd.conf
++TEST ! grep "sync-method" $WORKING_DIR/gsyncd.conf
++TEST $GEOREP_CLI $master $slave config \!sync-xattrs
++TEST grep "sync-method" $WORKING_DIR/gsyncd.conf
++#Check if geo-rep start updates config_file
++rm -f $WORKING_DIR/gsyncd.conf
++cp -p $OLD_CONFIG_PATH $WORKING_DIR/gsyncd.conf
++TEST ! grep "sync-method" $WORKING_DIR/gsyncd.conf
++TEST $GEOREP_CLI $master $slave start
++TEST grep "sync-method" $WORKING_DIR/gsyncd.conf
++#Stop geo-rep
++TEST $GEOREP_CLI $master $slave stop
++#Delete Geo-rep
++TEST $GEOREP_CLI $master $slave delete
++#Cleanup authorized keys
++sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' ~/.ssh/authorized_keys
++sed -i '/^command=.*gsyncd.*/d' ~/.ssh/authorized_keys
+diff --git a/tests/00-geo-rep/gsyncd.conf.old b/tests/00-geo-rep/gsyncd.conf.old
+new file mode 100644
+index 0000000..519acaf
+--- /dev/null
++++ b/tests/00-geo-rep/gsyncd.conf.old
+@@ -0,0 +1,47 @@
++version = 2.0
++[peersrx . .]
++remote_gsyncd = /usr/local/libexec/glusterfs/gsyncd
++georep_session_working_dir = /var/lib/glusterd/geo-replication/${mastervol}_${remotehost}_${slavevol}/
++ssh_command_tar = ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i /var/lib/glusterd/geo-replication/tar_ssh.pem
++changelog_log_file = /var/log/glusterfs/geo-replication/${mastervol}/${eSlave}${local_id}-changes.log
++working_dir = /var/lib/misc/glusterfsd/${mastervol}/${eSlave}
++ignore_deletes = false
++pid_file = /var/lib/glusterd/geo-replication/${mastervol}_${remotehost}_${slavevol}/
++state_file = /var/lib/glusterd/geo-replication/${mastervol}_${remotehost}_${slavevol}/monitor.status
++gluster_command_dir = /usr/local/sbin/
++gluster_params = aux-gfid-mount acl
++ssh_command = ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i /var/lib/glusterd/geo-replication/secret.pem
++state_detail_file = /var/lib/glusterd/geo-replication/${mastervol}_${remotehost}_${slavevol}/${eSlave}-detail.status
++state_socket_unencoded = /var/lib/glusterd/geo-replication/${mastervol}_${remotehost}_${slavevol}/${eSlave}.socket
++socketdir = /var/run/gluster
++log_file = /var/log/glusterfs/geo-replication/${mastervol}/${eSlave}.log
++gluster_log_file = /var/log/glusterfs/geo-replication/${mastervol}/${eSlave}${local_id}.gluster.log
++special_sync_mode = partial
++change_detector = changelog
++pid-file = /var/lib/glusterd/geo-replication/${mastervol}_${remotehost}_${slavevol}/
++state-file = /var/lib/glusterd/geo-replication/${mastervol}_${remotehost}_${slavevol}/monitor.status
++peersrx . . = 0
++peersrx . %5essh%3a = 2
++peersrx . = 3
++peers master slave = 4
++[peersrx . %5Essh%3A]
++remote_gsyncd = /nonexistent/gsyncd
++[peersrx .]
++gluster_command_dir = /usr/local/sbin/
++gluster_params = aux-gfid-mount acl
++log_file = /var/log/glusterfs/geo-replication-slaves/${session_owner}:${local_node}${local_id}.${slavevol}.log
++log_file_mbr = /var/log/glusterfs/geo-replication-slaves/mbr/${session_owner}:${local_node}${local_id}.${slavevol}.log
++gluster_log_file = /var/log/glusterfs/geo-replication-slaves/${session_owner}:${local_node}${local_id}.${slavevol}.gluster.log
++[peers master slave]
++session_owner = 0732cbd1-3ec5-4920-ab0d-aa5a896d5214
++master.stime_xattr_name = trusted.glusterfs.0732cbd1-3ec5-4920-ab0d-aa5a896d5214.07a9005c-ace4-4f67-b3c0-73938fb236c4.stime
++volume_id = 0732cbd1-3ec5-4920-ab0d-aa5a896d5214
++use_tarssh = true
diff --git a/SOURCES/0310-tests-test-case-for-non-root-geo-rep-setup.patch b/SOURCES/0310-tests-test-case-for-non-root-geo-rep-setup.patch
new file mode 100644
index 0000000..a38a4aa
--- /dev/null
+++ b/SOURCES/0310-tests-test-case-for-non-root-geo-rep-setup.patch
@@ -0,0 +1,284 @@
+From c2decfb59bd1be7cd2b0d792fd2ca2627913638a Mon Sep 17 00:00:00 2001
+From: Sunny Kumar <>
+Date: Tue, 24 Sep 2019 18:22:13 +0530
+Subject: [PATCH 310/313] tests : test case for non-root geo-rep setup
+Added test case for non-root geo-rep setup.
+Backport of:
+ > Patch:
+ > Change-Id: Ib6ebee79949a9f61bdc5c7b5e11b51b262750e98
+ > fixes: bz#1717827
+ > Signed-off-by: Sunny Kumar <>
+Change-Id: Ib6ebee79949a9f61bdc5c7b5e11b51b262750e98
+BUG: 1763412
+Signed-off-by: Kotresh HR <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <>
+ tests/00-geo-rep/00-georep-verify-non-root-setup.t | 251 +++++++++++++++++++++
+ 1 file changed, 251 insertions(+)
+ create mode 100644 tests/00-geo-rep/00-georep-verify-non-root-setup.t
+diff --git a/tests/00-geo-rep/00-georep-verify-non-root-setup.t b/tests/00-geo-rep/00-georep-verify-non-root-setup.t
+new file mode 100644
+index 0000000..e753c1f
+--- /dev/null
++++ b/tests/00-geo-rep/00-georep-verify-non-root-setup.t
+@@ -0,0 +1,251 @@
++. $(dirname $0)/../include.rc
++. $(dirname $0)/../volume.rc
++. $(dirname $0)/../geo-rep.rc
++. $(dirname $0)/../env.rc
++### Basic Non-root geo-rep setup test with Distribute Replicate volumes
++##Cleanup and start glusterd
++TEST glusterd;
++TEST pidof glusterd
++GEOREP_CLI="$CLI volume geo-replication"
++##User and group to be used for non-root geo-rep setup
++TEST $CLI volume create $GMV0 replica 2 $H0:$B0/${GMV0}{1,2,3,4};
++TEST $CLI volume start $GMV0
++TEST $CLI volume create $GSV0 replica 2 $H0:$B0/${GSV0}{1,2,3,4};
++TEST $CLI volume start $GSV0
++##Mount master
++#TEST glusterfs -s $H0 --volfile-id $GMV0 $M0
++##Mount slave
++#TEST glusterfs -s $H0 --volfile-id $GSV0 $M1
++function distribute_key_non_root()
++    ${GLUSTER_LIBEXECDIR}/ $usr $master $slave_vol
++    echo $?
++function check_status_non_root()
++    local search_key=$1
++    $GEOREP_CLI $master $slave_url status | grep -F "$search_key" | wc -l
++function check_and_clean_group()
++        if [ $(getent group $grp) ]
++        then
++                groupdel $grp;
++                echo $?
++        else
++                echo 0
++        fi
++function clean_lock_files()
++        if [ ! -f /etc/passwd.lock ];
++        then
++                rm -rf /etc/passwd.lock;
++        fi
++        if [ ! -f /etc/group.lock ];
++        then
++                rm -rf /etc/group.lock;
++        fi
++        if [ ! -f /etc/shadow.lock ];
++        then
++                rm -rf /etc/shadow.lock;
++        fi
++        if [ ! -f /etc/gshadow.lock ];
++        then
++                rm -rf /etc/gshadow.lock;
++        fi
++##Create ggroup group
++##First test if group exists and then create new one
++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_and_clean_group
++##cleanup *.lock files
++TEST /usr/sbin/groupadd $grp
++##Create non-root user and assign it to newly created group
++TEST /usr/sbin/useradd -G $grp $usr
++##Modify password for non-root user to have control over distributing ssh-key
++echo "$usr:pass" | chpasswd
++##Set up mountbroker root
++TEST gluster-mountbroker setup /var/mountbroker-root $grp
++##Associate volume and non-root user to the mountbroker
++TEST gluster-mountbroker add $slave_vol $usr
++##Check ssh setting for clear text passwords
++sed '/^PasswordAuthentication /{s/no/yes/}' -i /etc/ssh/sshd_config && grep '^PasswordAuthentication ' /etc/ssh/sshd_config && service sshd restart
++##Restart glusterd to reflect mountbroker changages
++TEST killall_gluster;
++TEST glusterd;
++TEST pidof glusterd;
++##Create, start and mount meta_volume
++TEST $CLI volume create $META_VOL replica 3 $H0:$B0/${META_VOL}{1,2,3};
++TEST $CLI volume start $META_VOL
++TEST mkdir -p $META_MNT
++TEST glusterfs -s $H0 --volfile-id $META_VOL $META_MNT
++##Mount master
++TEST glusterfs -s $H0 --volfile-id $GMV0 $M0
++##Mount slave
++TEST glusterfs -s $H0 --volfile-id $GSV0 $M1
++## Check status of mount-broker
++TEST gluster-mountbroker status
++##Setup password-less ssh for non-root user
++#sshpass -p "pass" ssh-copy-id -i ~/.ssh/ $ssh_url
++##Run ssh agent
++eval "$(ssh-agent -s)"
++##Create a temp script to echo the SSH password, used by SSH_ASKPASS
++echo "${PASS}"
++chmod u+x ${SSH_ASKPASS_SCRIPT}
++##set no display, necessary for ssh to use with setsid and SSH_ASKPASS
++#export DISPLAY=:0
++DISPLAY=: setsid ssh-copy-id -i ~/.ssh/ $ssh_url
++##Setting up PATH for gluster binaries in case of source installation
++##ssh -oNumberOfPasswordPrompts=0 -oStrictHostKeyChecking=no $ssh_url "echo "export PATH=$PATH:/usr/local/sbin" >> ~/.bashrc"
++##Creating secret pem pub file
++TEST gluster-georep-sshkey generate
++##Create geo-rep non-root setup
++TEST $GEOREP_CLI $master $slave_url create push-pem
++#Config gluster-command-dir
++TEST $GEOREP_CLI $master $slave_url config gluster-command-dir ${GLUSTER_CMD_DIR}
++#Config gluster-command-dir
++TEST $GEOREP_CLI $master $slave_url config slave-gluster-command-dir ${GLUSTER_CMD_DIR}
++## Test for key distribution
++EXPECT_WITHIN $GEO_REP_TIMEOUT  0 distribute_key_non_root
++##Wait for common secret pem file to be created
++EXPECT_WITHIN $GEO_REP_TIMEOUT  0 check_common_secret_file
++TEST $GEOREP_CLI $master $slave config use_meta_volume true
++TEST $GEOREP_CLI $master $slave_url start
++## Meta volume is enabled so looking for 2 Active and 2 Passive sessions
++EXPECT_WITHIN $GEO_REP_TIMEOUT  2 check_status_non_root "Active"
++EXPECT_WITHIN $GEO_REP_TIMEOUT  2 check_status_non_root "Passive"
++#Pause geo-replication session
++TEST $GEOREP_CLI  $master $slave_url pause
++#Resume geo-replication session
++TEST $GEOREP_CLI  $master $slave_url resume
++#Validate failure of volume stop when geo-rep is running
++TEST ! $CLI volume stop $GMV0
++#Stop Geo-rep
++TEST $GEOREP_CLI $master $slave_url stop
++#Delete Geo-rep
++TEST $GEOREP_CLI $master $slave_url delete
++#Cleanup authorized_keys
++sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' ~/.ssh/authorized_keys
++sed -i '/^command=.*gsyncd.*/d' ~/.ssh/authorized_keys
++#clear mountbroker
++gluster-mountbroker remove --user $usr
++gluster-mountbroker remove --volume $slave_vol
++#delete group and user created for non-root setup
++TEST userdel -r -f $usr
++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_and_clean_group
++##password script cleanup
++rm -rf /tmp/ssh-askpass-script
diff --git a/SOURCES/0311-geo-rep-Fix-Permission-denied-traceback-on-non-root-.patch b/SOURCES/0311-geo-rep-Fix-Permission-denied-traceback-on-non-root-.patch
new file mode 100644
index 0000000..af0206a
--- /dev/null
+++ b/SOURCES/0311-geo-rep-Fix-Permission-denied-traceback-on-non-root-.patch
@@ -0,0 +1,186 @@
+From 4a2441e76f4240568093080769ede07bb7fb2016 Mon Sep 17 00:00:00 2001
+From: Kotresh HR <>
+Date: Sun, 20 Oct 2019 01:01:39 +0530
+Subject: [PATCH 311/313] geo-rep: Fix Permission denied traceback on non root
+ setup
+While syncing rename of directory in hybrid crawl, geo-rep
+crashes as below.
+Traceback (most recent call last):
+  File "/usr/local/libexec/glusterfs/python/syncdaemon/", line 118, in worker
+    res = getattr(self.obj, rmeth)(*in_data[2:])
+  File "/usr/local/libexec/glusterfs/python/syncdaemon/", line 588, in entry_ops
+    src_entry = get_slv_dir_path(slv_host, slv_volume, gfid)
+  File "/usr/local/libexec/glusterfs/python/syncdaemon/", line 687, in get_slv_dir_path
+    [ENOENT], [ESTALE])
+  File "/usr/local/libexec/glusterfs/python/syncdaemon/", line 546, in errno_wrap
+    return call(*arg)
+PermissionError: [Errno 13] Permission denied: '/bricks/brick1/b1/.glusterfs/8e/c0/8ec0fcd4-d50f-4a6e-b473-a7943ab66640'
+Conversion of gfid to path for a directory uses readlink on backend
+.glusterfs gfid path. But this fails for non root user with
+permission denied.
+Use gfid2path interface to get the path from gfid
+Backport of:
+ > Patch:
+ > Change-Id: I9d40c713a1b32cea95144cbc0f384ada82972222
+ > fixes: bz#1763439
+ > Signed-off-by: Kotresh HR <>
+Change-Id: I9d40c713a1b32cea95144cbc0f384ada82972222
+BUG: 1763412
+Signed-off-by: Kotresh HR <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <>
+ geo-replication/syncdaemon/               |  3 +-
+ geo-replication/syncdaemon/           | 35 ++++++++++++++++------
+ tests/00-geo-rep/00-georep-verify-non-root-setup.t | 30 +++++++++++++++----
+ 3 files changed, 52 insertions(+), 16 deletions(-)
+diff --git a/geo-replication/syncdaemon/ b/geo-replication/syncdaemon/
+index 7b48d82..8940384 100644
+--- a/geo-replication/syncdaemon/
++++ b/geo-replication/syncdaemon/
+@@ -231,7 +231,8 @@ def main():
+     # Set default path for config file in that case
+     # If an subcmd accepts config file then it also accepts
+     # master and Slave arguments.
+-    if config_file is None and hasattr(args, "config_file"):
++    if config_file is None and hasattr(args, "config_file") \
++        and args.subcmd != "slave":
+         config_file = "%s/geo-replication/%s_%s_%s/gsyncd.conf" % (
+             GLUSTERD_WORKDIR,
+             args.master,
+diff --git a/geo-replication/syncdaemon/ b/geo-replication/syncdaemon/
+index aadaebd..b08098e 100644
+--- a/geo-replication/syncdaemon/
++++ b/geo-replication/syncdaemon/
+@@ -57,6 +57,7 @@ from hashlib import sha256 as sha256
+ # auxiliary gfid based access prefix
+ _CL_AUX_GFID_PFX = ".gfid/"
++ROOT_GFID = "00000000-0000-0000-0000-000000000001"
+ GX_GFID_CANONICAL_LEN = 37  # canonical gfid len + '\0'
+@@ -670,6 +671,7 @@ def get_slv_dir_path(slv_host, slv_volume, gfid):
+     global slv_bricks
+     dir_path = ENOENT
++    pfx = gauxpfx()
+     if not slv_bricks:
+         slv_info = Volinfo(slv_volume, slv_host, master=False)
+@@ -683,15 +685,30 @@ def get_slv_dir_path(slv_host, slv_volume, gfid):
+                                gfid[2:4],
+                                gfid], [ENOENT], [ESTALE])
+         if dir_path != ENOENT:
+-            realpath = errno_wrap(os.readlink, [dir_path],
+-                                  [ENOENT], [ESTALE])
+-            if not isinstance(realpath, int):
+-                realpath_parts = realpath.split('/')
+-                pargfid = realpath_parts[-2]
+-                basename = realpath_parts[-1]
+-                pfx = gauxpfx()
+-                dir_entry = os.path.join(pfx, pargfid, basename)
+-                return dir_entry
++            try:
++                realpath = errno_wrap(os.readlink, [dir_path],
++                                      [ENOENT], [ESTALE])
++                if not isinstance(realpath, int):
++                    realpath_parts = realpath.split('/')
++                    pargfid = realpath_parts[-2]
++                    basename = realpath_parts[-1]
++                    dir_entry = os.path.join(pfx, pargfid, basename)
++                    return dir_entry
++            except OSError:
++                # .gfid/GFID
++                gfidpath = unescape_space_newline(os.path.join(pfx, gfid))
++                realpath = errno_wrap(Xattr.lgetxattr_buf,
++                      [gfidpath, 'glusterfs.gfid2path'], [ENOENT], [ESTALE])
++                if not isinstance(realpath, int):
++                    basename = os.path.basename(realpath).rstrip('\x00')
++                    dirpath = os.path.dirname(realpath)
++                    if dirpath is "/":
++                        pargfid = ROOT_GFID
++                    else:
++                        dirpath = dirpath.strip("/")
++                        pargfid = get_gfid_from_mnt(dirpath)
++                    dir_entry = os.path.join(pfx, pargfid, basename)
++                    return dir_entry
+     return None
+diff --git a/tests/00-geo-rep/00-georep-verify-non-root-setup.t b/tests/00-geo-rep/00-georep-verify-non-root-setup.t
+index e753c1f..c9fd8b2 100644
+--- a/tests/00-geo-rep/00-georep-verify-non-root-setup.t
++++ b/tests/00-geo-rep/00-georep-verify-non-root-setup.t
+@@ -118,8 +118,8 @@ clean_lock_files
+ TEST /usr/sbin/groupadd $grp
+ clean_lock_files
+-##Create non-root user and assign it to newly created group
++##Del if exists and create non-root user and assign it to newly created group
++userdel -r -f $usr
+ TEST /usr/sbin/useradd -G $grp $usr
+ ##Modify password for non-root user to have control over distributing ssh-key
+@@ -140,8 +140,6 @@ TEST killall_gluster;
+ TEST glusterd;
+ TEST pidof glusterd;
+ ##Create, start and mount meta_volume
+ TEST $CLI volume create $META_VOL replica 3 $H0:$B0/${META_VOL}{1,2,3};
+ TEST $CLI volume start $META_VOL
+@@ -225,6 +223,26 @@ TEST $GEOREP_CLI  $master $slave_url resume
+ #Validate failure of volume stop when geo-rep is running
+ TEST ! $CLI volume stop $GMV0
++#Hybrid directory rename test BZ#1763439
++TEST $GEOREP_CLI $master $slave_url config change_detector xsync
++mkdir ${master_mnt}/dir1
++mkdir ${master_mnt}/dir1/dir2
++mkdir ${master_mnt}/dir1/dir3
++mkdir ${master_mnt}/hybrid_d1
++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/hybrid_d1
++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/dir1
++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/dir1/dir2
++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/dir1/dir3
++mv ${master_mnt}/hybrid_d1 ${master_mnt}/hybrid_rn_d1
++mv ${master_mnt}/dir1/dir2 ${master_mnt}/rn_dir2
++mv ${master_mnt}/dir1/dir3 ${master_mnt}/dir1/rn_dir3
++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/hybrid_rn_d1
++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/rn_dir2
++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/dir1/rn_dir3
+ #Stop Geo-rep
+ TEST $GEOREP_CLI $master $slave_url stop
+@@ -232,8 +250,8 @@ TEST $GEOREP_CLI $master $slave_url stop
+ TEST $GEOREP_CLI $master $slave_url delete
+ #Cleanup authorized_keys
+-sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' ~/.ssh/authorized_keys
+-sed -i '/^command=.*gsyncd.*/d' ~/.ssh/authorized_keys
++sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' /home/$usr/.ssh/authorized_keys
++sed -i '/^command=.*gsyncd.*/d' /home/$usr/.ssh/authorized_keys
+ #clear mountbroker
+ gluster-mountbroker remove --user $usr
diff --git a/SOURCES/0312-Scripts-quota_fsck-script-KeyError-contri_size.patch b/SOURCES/0312-Scripts-quota_fsck-script-KeyError-contri_size.patch
new file mode 100644
index 0000000..bf8c820
--- /dev/null
+++ b/SOURCES/0312-Scripts-quota_fsck-script-KeyError-contri_size.patch
@@ -0,0 +1,59 @@
+From b1d8a5ee8b2e320aaaf9b2a145fbc285178d07bb Mon Sep 17 00:00:00 2001
+From: hari gowtham <>
+Date: Tue, 22 Oct 2019 15:11:03 +0530
+Subject: [PATCH 312/313] Scripts: quota_fsck script KeyError: 'contri_size'
+    back-port of:
+Problem: In a certain code flow, we weren't handling the
+unavailability of the contri value in the dict. Trying to print
+without the value resulted in erroring out.
+Fix: Have printed the whole of dictionary as the values will be
+helpful in understanding the state of the file/dir
+>Fixes: bz#1764129
+>Change-Id: I99c538adb712f281ca10e4e0088f404f515b9725
+>Signed-off-by: hari gowtham <>
+BUG: 1719171
+Change-Id: I99c538adb712f281ca10e4e0088f404f515b9725
+Signed-off-by: hari gowtham <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <>
+ extras/quota/ | 12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+diff --git a/extras/quota/ b/extras/quota/
+index f03895d..485a37a 100755
+--- a/extras/quota/
++++ b/extras/quota/
+@@ -52,17 +52,17 @@ epilog_msg='''
+ def print_msg(log_type, path, xattr_dict = {}, stbuf = "", dir_size = None):
+     if log_type == QUOTA_VERBOSE:
+-        print('%-24s %-60s\nxattr_values: %s\n%s\n' % {"Verbose", path,  xattr_dict, stbuf})
++        print('%-24s %-60s\nxattr_values: %s\n%s\n' % ("Verbose", path, xattr_dict, stbuf))
+     elif log_type == QUOTA_META_ABSENT:
+-        print('%-24s %-60s\n%s\n' % {"Quota-Meta Absent", path, xattr_dict})
++        print('%-24s %-60s\n%s\n' % ("Quota-Meta Absent", path, xattr_dict))
+     elif log_type == QUOTA_SIZE_MISMATCH:
+         print("mismatch")
+         if dir_size is not None:
+-            print('%24s %60s %12s %12s' % {"Size Mismatch", path, xattr_dict['contri_size'],
+-                   dir_size})
++            print('%24s %60s %12s %12s' % ("Size Mismatch", path, 
++                xattr_dict, dir_size))
+         else:
+-            print('%-24s %-60s %-12i %-12i' % {"Size Mismatch", path, xattr_dict['contri_size'],
+-                   stbuf.st_size})
++            print('%-24s %-60s %-12i %-12i' % ("Size Mismatch", path, xattr_dict,
++                   stbuf.st_size))
+ def size_differs_lot(s1, s2):
+     '''
diff --git a/SOURCES/0313-extras-Cgroup-CPU-Mem-restriction-are-not-working-on.patch b/SOURCES/0313-extras-Cgroup-CPU-Mem-restriction-are-not-working-on.patch
new file mode 100644
index 0000000..e4887b8
--- /dev/null
+++ b/SOURCES/0313-extras-Cgroup-CPU-Mem-restriction-are-not-working-on.patch
@@ -0,0 +1,60 @@
+From 23091d24d34102c7938ae2890930b73c89c5a8e7 Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <>
+Date: Tue, 22 Oct 2019 18:52:25 +0530
+Subject: [PATCH 313/313] extras: Cgroup(CPU/Mem) restriction are not working
+ on gluster process
+Problem: After Configure the Cgroup(CPU/MEM) limit to a gluster processes
+         resource(CPU/MEM) limits are not applicable to the gluster
+         processes.Cgroup limits are not applicable because all threads are
+         not moved into a newly created cgroup to apply restriction.
+Solution: To move a gluster thread to newly created cgroup change the
+          condition in script
+> Change-Id: I8ad81c69200e4ec43a74f6052481551cf835354c
+> Fixes: bz#1764208
+> (Cherry pick from commit 38de02012948013a88597545cf49380ce97f6fa7)
+> (Reviewed on upstream link
+> Signed-off-by: Mohit Agrawal <>
+Change-Id: I8ad81c69200e4ec43a74f6052481551cf835354c
+BUG: 1764202
+Signed-off-by: Mohit Agrawal <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <>
+ extras/ | 2 +-
+ extras/      | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+diff --git a/extras/ b/extras/
+index b739c82..52dcf62 100755
+--- a/extras/
++++ b/extras/
+@@ -104,7 +104,7 @@ echo "Setting $quota_value to cpu.cfs_quota_us for gluster_cgroup."
+ echo ${quota_value} > ${LOC}/${cgroup_name}/cpu.cfs_quota_us
+ if ps -T -p ${daemon_pid} | grep gluster > /dev/null; then
+-  for thid in `ps -T -p ${daemon_pid} | grep gluster | awk -F " " '{print $2}'`;
++  for thid in `ps -T -p ${daemon_pid} | grep -v SPID | awk -F " " '{print $2}'`;
+     do
+       echo ${thid} > ${LOC}/${cgroup_name}/tasks ;
+     done
+diff --git a/extras/ b/extras/
+index 38aa2a0..91b36f8 100755
+--- a/extras/
++++ b/extras/
+@@ -116,7 +116,7 @@ else
+ fi
+ if ps -T -p ${daemon_pid} | grep gluster > /dev/null; then
+-  for thid in `ps -T -p ${daemon_pid} | grep gluster | awk -F " " '{print $2}'`;
++  for thid in `ps -T -p ${daemon_pid} | grep -v SPID | awk -F " " '{print $2}'`;
+     do
+       echo ${thid} > ${LOC}/${cgroup_name}/tasks ;
+     done
diff --git a/SOURCES/0314-glusterd-tier-is_tier_enabled-inserted-causing-check.patch b/SOURCES/0314-glusterd-tier-is_tier_enabled-inserted-causing-check.patch
new file mode 100644
index 0000000..adde426
--- /dev/null
+++ b/SOURCES/0314-glusterd-tier-is_tier_enabled-inserted-causing-check.patch
@@ -0,0 +1,38 @@
+From 2a4f19df70276ba41db19938507297f7580286fa Mon Sep 17 00:00:00 2001
+From: Atin Mukherjee <>
+Date: Fri, 25 Oct 2019 18:07:27 +0530
+Subject: [PATCH 314/314] glusterd/tier: is_tier_enabled inserted causing
+ checksum mismatch
+the volfile entry is_tier_enabled is checked for version 3.7.6 while it was
+supposed to check for 3.10. this is to fix it downstream only but changing the
+version of check to 3.13.1
+BUG: 1765555
+Change-Id: Id631f3ba520b3e7b126c7607dca1bb7874532e81
+Signed-off-by: Atin Mukherjee <>
+Reviewed-by: Sanju Rakonde <>
+Tested-by: Sanju Rakonde <>
+Tested-by: RHGS Build Bot <>
+ xlators/mgmt/glusterd/src/glusterd-store.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c
+index 4889217..8a10eb8 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-store.c
++++ b/xlators/mgmt/glusterd/src/glusterd-store.c
+@@ -1036,7 +1036,7 @@ glusterd_volume_exclude_options_write(int fd, glusterd_volinfo_t *volinfo)
+         if (ret)
+             goto out;
+     }
+-    if (conf->op_version >= GD_OP_VERSION_3_10_0) {
++    if (conf->op_version >= GD_OP_VERSION_3_13_1) {
+         snprintf(buf, sizeof(buf), "%d", volinfo->is_tier_enabled);
+         ret = gf_store_save_value(fd, GF_TIER_ENABLED, buf);
+         if (ret)
diff --git a/SOURCES/0315-geo-rep-Fix-py2-py3-compatibility-in-repce.patch b/SOURCES/0315-geo-rep-Fix-py2-py3-compatibility-in-repce.patch
new file mode 100644
index 0000000..a0448cc
--- /dev/null
+++ b/SOURCES/0315-geo-rep-Fix-py2-py3-compatibility-in-repce.patch
@@ -0,0 +1,52 @@
+From 4a04e1b5540921db22f1894f71eb30342127192d Mon Sep 17 00:00:00 2001
+From: Kotresh HR <>
+Date: Tue, 12 Nov 2019 21:53:20 +0530
+Subject: [PATCH 315/316] geo-rep: Fix py2/py3 compatibility in repce
+Geo-rep fails to start on python2 only machine like
+centos6. It fails with "ImportError no module named _io".
+This patch fixes the same.
+Backport of:
+ > Patch:
+ > fixes: bz#1771577
+ > Change-Id: I8228458a853a230546f9faf29a0e9e0f23b3efec
+ > Signed-off-by: Kotresh HR <>
+BUG: 1771524
+Change-Id: I8228458a853a230546f9faf29a0e9e0f23b3efec
+Signed-off-by: Kotresh HR <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Sunny Kumar <>
+ geo-replication/syncdaemon/ | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+diff --git a/geo-replication/syncdaemon/ b/geo-replication/syncdaemon/
+index 6065b82..c622afa 100644
+--- a/geo-replication/syncdaemon/
++++ b/geo-replication/syncdaemon/
+@@ -8,7 +8,6 @@
+ # cases as published by the Free Software Foundation.
+ #
+-import _io
+ import os
+ import sys
+ import time
+@@ -58,9 +57,9 @@ def recv(inf):
+     """load an object from input stream
+     python2 and python3 compatibility, inf is sys.stdin
+     and is opened as text stream by default. Hence using the
+-    buffer attribute
++    buffer attribute in python3
+     """
+-    if isinstance(inf, _io.TextIOWrapper):
++    if hasattr(inf, "buffer"):
+         return pickle.load(inf.buffer)
+     else:
+         return pickle.load(inf)
diff --git a/SOURCES/0316-spec-fixed-python-prettytable-dependency-for-rhel6.patch b/SOURCES/0316-spec-fixed-python-prettytable-dependency-for-rhel6.patch
new file mode 100644
index 0000000..c2045a0
--- /dev/null
+++ b/SOURCES/0316-spec-fixed-python-prettytable-dependency-for-rhel6.patch
@@ -0,0 +1,51 @@
+From b9a19aef5de94eb91162448ad687f2d2d194f82c Mon Sep 17 00:00:00 2001
+From: Rinku Kothiya <>
+Date: Thu, 14 Nov 2019 09:55:15 +0000
+Subject: [PATCH 316/316] spec: fixed python-prettytable dependency for rhel6
+Installing glusterfs on rhel6 was failing with python-prettytable
+dependency as it required python2-prettytable for glusterfs-events.
+This patch conditionally sets the python version for rhel7 and
+fixes the problem.
+BUG: 1771614
+Change-Id: I6288daa5d8c2d82a6d73a0d9722786a2a99b9db5
+fixes: bz#1771614
+Signed-off-by: Rinku Kothiya <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <>
+ | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+diff --git a/ b/
+index 3c2e2dc..eeadb65 100644
+--- a/
++++ b/
+@@ -706,7 +706,7 @@ This package provides the translators needed on any GlusterFS client.
+ %package events
+ Summary:          GlusterFS Events
+ Requires:         %{name}-server%{?_isa} = %{version}-%{release}
+-Requires:         python%{_pythonver} python%{_pythonver}-prettytable
++Requires:         python%{_pythonver}
+ Requires:         python%{_pythonver}-gluster = %{version}-%{release}
+ %if ( 0%{?rhel} && 0%{?rhel} < 8 )
+ Requires:         python-requests
+@@ -714,7 +714,10 @@ Requires:         python-requests
+ Requires:         python%{_pythonver}-requests
+ %endif
+ %if ( 0%{?rhel} && 0%{?rhel} < 7 )
++Requires:         python-prettytable
+ Requires:         python-argparse
++Requires:         python%{_pythonver}-prettytable
+ %endif
+ %if ( 0%{?_with_systemd:1} )
+ %{?systemd_requires}
diff --git a/SOURCES/ b/SOURCES/
new file mode 100644
index 0000000..eccf2e3
--- /dev/null
+++ b/SOURCES/
@@ -0,0 +1,43 @@
+From 985ef94c63859907339c11b158e4540a5568d638 Mon Sep 17 00:00:00 2001
+From: Rinku Kothiya <>
+Date: Mon, 18 Nov 2019 02:25:25 -0500
+Subject: [PATCH 317/335] Update to rhgs-3.5.1
+Signed-off-by: Rinku Kothiya <>
+ README | 9 +++++++++
+ | 2 +-
+ 2 files changed, 10 insertions(+), 1 deletion(-)
+ create mode 100644 README
+diff --git a/README b/README
+new file mode 100644
+index 0000000..44a118b
+--- /dev/null
++++ b/README
+@@ -0,0 +1,9 @@
++'master' branch is just dummy branch in downstream. Any reference to 'upstream'
++will point to
++You can checkout the release specific branch by running below command
++ bash$ git checkout -t -b rhs-x.y origin/rhs-x.y
++Happy Hacking!!
+diff --git a/ b/
+index 94c92ef..69ddd2b 100755
+--- a/
++++ b/
+@@ -18,7 +18,7 @@ done
+ shift $((OPTIND-1))
+ set_hooks_commit_msg()
+ {
diff --git a/SOURCES/ b/SOURCES/
new file mode 100644
index 0000000..e65ae38
--- /dev/null
+++ b/SOURCES/
@@ -0,0 +1,114 @@
+From 1f03327887645be2500cd29f69f7a77a4f5d0164 Mon Sep 17 00:00:00 2001
+From: Rinku Kothiya <>
+Date: Mon, 18 Nov 2019 14:25:12 -0500
+Subject: [PATCH 318/335] Update to rhgs-3.5.1
+Removed the checks for updates and fixes from
+Change-Id: I436c959aa3b3366cd313b29f41c2466c4072efd7
+Signed-off-by: Rinku Kothiya <>
+ | 47 ++++++++---------------------------------------
+ 1 file changed, 8 insertions(+), 39 deletions(-)
+diff --git a/ b/
+index 69ddd2b..918fb11 100755
+--- a/
++++ b/
+@@ -129,13 +129,8 @@ editor_mode()
+     if [ $(basename "$1") = "COMMIT_EDITMSG" ]; then
+         # see note above function warn_reference_missing for regex elaboration
+-        # Lets first check for github issues
+-        ref=$(git log -n1 --format='%b' | grep -ow -E "([fF][iI][xX][eE][sS]|[uU][pP][dD][aA][tT][eE][sS])(:)?[[:space:]]+(gluster\/glusterfs)?#[[:digit:]]+" | awk -F '#' '{print $2}');
+-        if [ "x${ref}" = "x" ]; then
+-            # if not found, check for bugs
+-            ref=$(git log -n1 --format='%b' | grep -ow -E "([fF][iI][xX][eE][sS]|[uU][pP][dD][aA][tT][eE][sS])(:)?[[:space:]]+bz#[[:digit:]]+" | awk -F '#' '{print $2}');
+-        fi
++        ref=$(git log -n1 --format='%b' | grep -ow -E "^[bB][uU][gG](:)[[:space:]]+[[:digit:]]+")
+         if [ "x${ref}" != "x" ]; then
+             return;
+         fi
+@@ -157,16 +152,6 @@ editor_mode()
+                 bz_string=""
+             fi
+-            echo "Select yes '(y)' if this patch fixes the bug/feature completely,"
+-            echo -n "or is the last of the patchset which brings feature (Y/n): "
+-            read fixes
+-            fixes_string="fixes"
+-            if [ "${fixes}" = 'N' ] || [ "${fixes}" = 'n' ]; then
+-                fixes_string="updates"
+-            fi
+-            sed "/^Change-Id:/{p; s/^.*$/${fixes_string}: ${bz_string}#${bug}/;}" $1 > $ && \
+-                mv $ $1;
+             return;
+         done
+     fi
+@@ -234,8 +219,8 @@ check_patches_for_coding_style()
+ #   IOW, the above helps us find the pattern with leading or training spaces
+ #   or non word consituents like , or ;
+ #
+-#   [fF][iI][xX][eE][sS]|[uU][pP][dD][aA][tT][eE][sS])
+-#      Finds 'fixes' OR 'updates' in any case combination
++#   [bB][uU][gG]
++#      Finds 'bug' in any case
+ #
+ #   (:)?
+ #      Followed by an optional : (colon)
+@@ -256,28 +241,11 @@ warn_reference_missing()
+     echo ""
+     echo "=== Missing a reference in commit! ==="
+     echo ""
+-    echo "Gluster commits are made with a reference to a bug or a github issue"
+-    echo ""
+-    echo "Submissions that are enhancements (IOW, not functional"
+-    echo "bug fixes, but improvements of any nature to the code) are tracked"
+-    echo "using github issues [1]."
++    echo "You must give BUG: <bugid>"
+     echo ""
+-    echo "Submissions that are bug fixes are tracked using Bugzilla [2]."
++    echo "for example:"
+     echo ""
+-    echo "A check on the commit message, reveals that there is no bug or"
+-    echo "github issue referenced in the commit message"
+-    echo ""
+-    echo "[1]"
+-    echo "[2]"
+-    echo ""
+-    echo "Please file an issue or a bug report and reference the same in the"
+-    echo "commit message using the following tags:"
+-    echo "GitHub Issues:"
+-    echo "\"Fixes: gluster/glusterfs#n\" OR \"Updates: gluster/glusterfs#n\","
+-    echo "\"Fixes: #n\" OR \"Updates: #n\","
+-    echo "Bugzilla ID:"
+-    echo "\"Fixes: bz#n\" OR \"Updates: bz#n\","
+-    echo "where n is the issue or bug number"
++    echo "BUG: 1234567"
+     echo ""
+     echo "You may abort the submission choosing 'N' below and use"
+     echo "'git commit --amend' to add the issue reference before posting"
+@@ -312,7 +280,7 @@ main()
+     assert_diverge;
+     # see note above function warn_reference_missing for regex elaboration
+-    reference=$(git log -n1 --format='%b' | grep -ow -E "([fF][iI][xX][eE][sS]|[uU][pP][dD][aA][tT][eE][sS])(:)?[[:space:]]+(gluster\/glusterfs)?(bz)?#[[:digit:]]+" | awk -F '#' '{print $2}');
++    reference=$(git log -n1 --format='%b' | grep -ow -E "^[bB][uU][gG](:)[[:space:]]+[[:digit:]]+" | awk  '{print $2}')
+     # If this is a commit against master and does not have a bug ID or a github
+     # issue reference. Warn the contributor that one of the 2 is required
+@@ -320,6 +288,7 @@ main()
+         warn_reference_missing;
+     fi
+     # TODO: add clang-format command here. It will after the changes are done everywhere else
+     clang_format=$(clang-format --version)
+     if [ ! -z "${clang_format}" ]; then
diff --git a/SOURCES/0319-features-snapview-server-obtain-the-list-of-snapshot.patch b/SOURCES/0319-features-snapview-server-obtain-the-list-of-snapshot.patch
new file mode 100644
index 0000000..d37efaf
--- /dev/null
+++ b/SOURCES/0319-features-snapview-server-obtain-the-list-of-snapshot.patch
@@ -0,0 +1,48 @@
+From 659bd2a0fde9ba0cb8fc3905bcdb63d91e3dfa9d Mon Sep 17 00:00:00 2001
+From: Raghavendra Bhat <>
+Date: Tue, 2 Jul 2019 16:50:23 -0400
+Subject: [PATCH 319/335] features/snapview-server: obtain the list of
+ snapshots inside the lock
+The current list of snapshots from priv->dirents is obtained outside
+the lock.
+Upstream patch:
+> Change-Id: I8876ec0a38308da5db058397382fbc82cc7ac177
+> Fixes: bz#1726783
+> Signed-off-by: Raghavendra Bhat <>
+> patch:
+BUG: 1731513
+Change-Id: I8876ec0a38308da5db058397382fbc82cc7ac177
+Signed-off-by: Raghavendra Bhat <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <>
+ xlators/features/snapview-server/src/snapview-server-mgmt.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+diff --git a/xlators/features/snapview-server/src/snapview-server-mgmt.c b/xlators/features/snapview-server/src/snapview-server-mgmt.c
+index bc415ef..3d64383 100644
+--- a/xlators/features/snapview-server/src/snapview-server-mgmt.c
++++ b/xlators/features/snapview-server/src/snapview-server-mgmt.c
+@@ -256,7 +256,6 @@ mgmt_get_snapinfo_cbk(struct rpc_req *req, struct iovec *iov, int count,
+     this = frame->this;
+     ctx = frame->this->ctx;
+     priv = this->private;
+-    old_dirents = priv->dirents;
+     if (!ctx) {
+         errno = EINVAL;
+@@ -388,6 +387,7 @@ mgmt_get_snapinfo_cbk(struct rpc_req *req, struct iovec *iov, int count,
+     LOCK(&priv->snaplist_lock);
+     {
+         oldcount = priv->num_snaps;
++        old_dirents = priv->dirents;
+         for (i = 0; i < priv->num_snaps; i++) {
+             for (j = 0; j < snapcount; j++) {
+                 if ((!strcmp(old_dirents[i].name, dirents[j].name)) &&
diff --git a/SOURCES/0320-gf-event-Handle-unix-volfile-servers.patch b/SOURCES/0320-gf-event-Handle-unix-volfile-servers.patch
new file mode 100644
index 0000000..48a9cad
--- /dev/null
+++ b/SOURCES/0320-gf-event-Handle-unix-volfile-servers.patch
@@ -0,0 +1,58 @@
+From 7e5d8dcb4f557eaca259e8d81cf34d651907396c Mon Sep 17 00:00:00 2001
+From: Pranith Kumar K <>
+Date: Thu, 24 Oct 2019 12:24:35 +0530
+Subject: [PATCH 320/335] gf-event: Handle unix volfile-servers
+glfsheal program uses unix-socket-based volfile server.
+volfile server will be the path to socket in this case.
+gf_event expects this to be hostname in all cases. So getaddrinfo
+will fail on the unix-socket path, events won't be sent in this case.
+In case of unix sockets, default to localhost
+BUG: 1758923
+Change-Id: I60d27608792c29d83fb82beb5fde5ef4754bece8
+Signed-off-by: Pranith Kumar K <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <>
+ libglusterfs/src/events.c | 11 ++++++++++-
+ 1 file changed, 10 insertions(+), 1 deletion(-)
+diff --git a/libglusterfs/src/events.c b/libglusterfs/src/events.c
+index 9d33783..4e2f8f9 100644
+--- a/libglusterfs/src/events.c
++++ b/libglusterfs/src/events.c
+@@ -43,6 +43,7 @@ _gf_event(eventtypes_t event, const char *fmt, ...)
+     struct addrinfo *result = NULL;
+     xlator_t *this = THIS;
+     int sin_family = AF_INET;
++    char *volfile_server_transport = NULL;
+     /* Global context */
+     ctx = THIS->ctx;
+@@ -62,8 +63,16 @@ _gf_event(eventtypes_t event, const char *fmt, ...)
+     memset(&hints, 0, sizeof(hints));
+     hints.ai_family = AF_UNSPEC;
++    if (ctx) {
++        volfile_server_transport = ctx->cmd_args.volfile_server_transport;
++    }
++    if (!volfile_server_transport) {
++        volfile_server_transport = "tcp";
++    }
+     /* Get Host name to send message */
+-    if (ctx && ctx->cmd_args.volfile_server) {
++    if (ctx && ctx->cmd_args.volfile_server &&
++        (strcmp(volfile_server_transport, "unix"))) {
+         /* If it is client code then volfile_server is set
+            use that information to push the events. */
+         if ((getaddrinfo(ctx->cmd_args.volfile_server, NULL, &hints,
diff --git a/SOURCES/0321-Adding-white-spaces-to-description-of-set-group.patch b/SOURCES/0321-Adding-white-spaces-to-description-of-set-group.patch
new file mode 100644
index 0000000..8dec96f
--- /dev/null
+++ b/SOURCES/0321-Adding-white-spaces-to-description-of-set-group.patch
@@ -0,0 +1,55 @@
+From 5e7a2ad35a174d6d0ee5ed58a3e27955e85aa47c Mon Sep 17 00:00:00 2001
+From: kshithijiyer <>
+Date: Mon, 24 Jun 2019 20:08:48 +0530
+Subject: [PATCH 321/335] Adding white spaces to description of set group.
+The description of set group is missing spaces which
+leads to the description look like:
+volume set <VOLNAME> group  <GROUP> - This option can be used for
+setting multiple pre-defined volume optionswhere group_name is a
+file under /var/lib/glusterd/groups containing onekey, value pair
+per line
+Instead of:
+volume set <VOLNAME> group <GROUP> - This option can be used for
+setting multiple pre-defined volume options where group_name is a
+file under /var/lib/glusterd/groups containing one key value
+pair per line
+> upstream patch:
+> Fixes: bz#1723455
+> Change-Id: I4957988c0c1f35f043db3f64089c049193e60e8f
+> Signed-off-by: kshithijiyer <>
+BUG: 1724021
+Change-Id: I4957988c0c1f35f043db3f64089c049193e60e8f
+Signed-off-by: Sanju Rakonde <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <>
+ cli/src/cli-cmd-volume.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c
+index 6b958bd..66beb1b 100644
+--- a/cli/src/cli-cmd-volume.c
++++ b/cli/src/cli-cmd-volume.c
+@@ -3393,10 +3393,10 @@ struct cli_cmd volume_cmds[] = {
+     {"volume set <VOLNAME> <KEY> <VALUE>", cli_cmd_volume_set_cbk,
+      "set options for volume <VOLNAME>"},
+-    {"volume set <VOLNAME> group  <GROUP>", cli_cmd_volume_set_cbk,
+-     "This option can be used for setting multiple pre-defined volume options"
+-     "where group_name is a file under /var/lib/glusterd/groups containing one"
+-     "key, value pair per line"},
++    {"volume set <VOLNAME> group <GROUP>", cli_cmd_volume_set_cbk,
++     "This option can be used for setting multiple pre-defined volume options "
++     "where group_name is a file under /var/lib/glusterd/groups containing one "
++     "key value pair per line"},
+     {"volume log <VOLNAME> rotate [BRICK]", cli_cmd_log_rotate_cbk,
+      "rotate the log file for corresponding volume/brick"},
diff --git a/SOURCES/0322-glusterd-display-correct-rebalance-data-size-after-g.patch b/SOURCES/0322-glusterd-display-correct-rebalance-data-size-after-g.patch
new file mode 100644
index 0000000..35a234b
--- /dev/null
+++ b/SOURCES/0322-glusterd-display-correct-rebalance-data-size-after-g.patch
@@ -0,0 +1,65 @@
+From 9be255f76c78fcbbda1e3a72eb2e99d3aface53e Mon Sep 17 00:00:00 2001
+From: Sanju Rakonde <>
+Date: Wed, 16 Oct 2019 23:26:03 +0530
+Subject: [PATCH 322/335] glusterd: display correct rebalance data size after
+ glusterd restart
+Problem: After completion of rebalance, if glusterd is restarted,
+rebalance status displays wrong rebalance data size in its output.
+Cause: While glusterd restoring the information from /var/lib/glusterd/
+into its memory, glusterd fetches rebalance_data from
+/var/lib/glusterd/vols/volname/ This value is
+converted into an integer using atoi(), which is returning
+incorrect value for larger values.
+Solution: use sscanf() instead of atoi() to convert string to
+integer(in this case it is unsigned long)
+> upstream patch:
+> fixes: bz#1762438
+> Change-Id: Icbdb096919612b4a1d6fb0e315f09d38900abf4e
+> Signed-off-by: Sanju Rakonde <>
+BUG: 1761486
+Change-Id: Icbdb096919612b4a1d6fb0e315f09d38900abf4e
+Signed-off-by: Sanju Rakonde <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <>
+ xlators/mgmt/glusterd/src/glusterd-store.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c
+index 8a10eb8..b3b5ee9 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-store.c
++++ b/xlators/mgmt/glusterd/src/glusterd-store.c
+@@ -2974,19 +2974,19 @@ glusterd_store_retrieve_node_state(glusterd_volinfo_t *volinfo)
+             volinfo->rebal.op = atoi(value);
+         } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG_REB_FILES,
+                             SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG_REB_FILES))) {
+-            volinfo->rebal.rebalance_files = atoi(value);
++            sscanf(value, "%" PRIu64, &volinfo->rebal.rebalance_files);
+         } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG_SIZE,
+                             SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG_SIZE))) {
+-            volinfo->rebal.rebalance_data = atoi(value);
++            sscanf(value, "%" PRIu64, &volinfo->rebal.rebalance_data);
+         } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG_SCANNED,
+                             SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG_SCANNED))) {
+-            volinfo->rebal.lookedup_files = atoi(value);
++            sscanf(value, "%" PRIu64, &volinfo->rebal.lookedup_files);
+         } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG_FAILURES,
+                             SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG_FAILURES))) {
+-            volinfo->rebal.rebalance_failures = atoi(value);
++            sscanf(value, "%" PRIu64, &volinfo->rebal.rebalance_failures);
+         } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG_SKIPPED,
+                             SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG_SKIPPED))) {
+-            volinfo->rebal.skipped_files = atoi(value);
++            sscanf(value, "%" PRIu64, &volinfo->rebal.skipped_files);
+         } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG_RUN_TIME,
+                             SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG_RUN_TIME))) {
+             volinfo->rebal.rebalance_time = atoi(value);
diff --git a/SOURCES/0323-cli-display-detailed-rebalance-info.patch b/SOURCES/0323-cli-display-detailed-rebalance-info.patch
new file mode 100644
index 0000000..a00faf8
--- /dev/null
+++ b/SOURCES/0323-cli-display-detailed-rebalance-info.patch
@@ -0,0 +1,101 @@
+From 852c475040a599ed35798dbb388c6b59c1d0a820 Mon Sep 17 00:00:00 2001
+From: Sanju Rakonde <>
+Date: Tue, 22 Oct 2019 15:06:29 +0530
+Subject: [PATCH 323/335] cli: display detailed rebalance info
+Problem: When one of the node is down in cluster,
+rebalance status is not displaying detailed
+Cause: In glusterd_volume_rebalance_use_rsp_dict()
+we are aggregating rsp from all the nodes into a
+dictionary and sending it to cli for printing. While
+assigning a index to keys we are considering all the
+peers instead of considering only the peers which are
+up. Because of which, index is not reaching till 1.
+while parsing the rsp cli unable to find status-1
+key in dictionary and going out without printing
+any information.
+Solution: The simplest fix for this without much
+code change is to continue to look for other keys
+when status-1 key is not found.
+> upstream patch:
+> fixes: bz#1764119
+> Change-Id: I0062839933c9706119eb85416256eade97e976dc
+> Signed-off-by: Sanju Rakonde <>
+BUG: 1761326
+Change-Id: I0062839933c9706119eb85416256eade97e976dc
+Signed-off-by: Sanju Rakonde <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <>
+ cli/src/cli-rpc-ops.c                      | 21 ++++++++++++++-------
+ tests/bugs/glusterd/rebalance-in-cluster.t |  9 +++++++++
+ 2 files changed, 23 insertions(+), 7 deletions(-)
+diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c
+index b167e26..4e91265 100644
+--- a/cli/src/cli-rpc-ops.c
++++ b/cli/src/cli-rpc-ops.c
+@@ -1597,13 +1597,20 @@ gf_cli_print_rebalance_status(dict_t *dict, enum gf_task_types task_type,
+         goto out;
+     }
+-    snprintf(key, sizeof(key), "status-1");
+-    ret = dict_get_int32(dict, key, (int32_t *)&status_rcd);
+-    if (ret) {
+-        gf_log("cli", GF_LOG_TRACE, "count %d %d", count, 1);
+-        gf_log("cli", GF_LOG_TRACE, "failed to get status");
+-        goto out;
++    for (i = 1; i <= count; i++) {
++        snprintf(key, sizeof(key), "status-%d", i);
++        ret = dict_get_int32(dict, key, (int32_t *)&status_rcd);
++        /* If information from a node is missing we should skip
++         * the node and try to fetch information of other nodes.
++         * If information is not found for all nodes, we should
++         * error out.
++         */
++        if (!ret)
++            break;
++        if (ret && i == count) {
++            gf_log("cli", GF_LOG_TRACE, "failed to get status");
++            goto out;
++        }
+     }
+     /* Fix layout will be sent to all nodes for the volume
+diff --git a/tests/bugs/glusterd/rebalance-in-cluster.t b/tests/bugs/glusterd/rebalance-in-cluster.t
+index 9565fae..469ec6c 100644
+--- a/tests/bugs/glusterd/rebalance-in-cluster.t
++++ b/tests/bugs/glusterd/rebalance-in-cluster.t
+@@ -4,6 +4,10 @@
+ . $(dirname $0)/../../cluster.rc
+ . $(dirname $0)/../../volume.rc
++function rebalance_status_field_1 {
++        $CLI_1 volume rebalance $1 status | awk '{print $7}' | sed -n 3p
+ cleanup;
+ TEST launch_cluster 2;
+ TEST $CLI_1 peer probe $H2;
+@@ -29,6 +33,11 @@ TEST $CLI_1 volume add-brick $V0 $H1:$B1/${V0}1 $H2:$B2/${V0}1
+ TEST $CLI_1 volume rebalance $V0  start
+ EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" cluster_rebalance_status_field 1  $V0
++#bug - 1764119 - rebalance status should display detailed info when any of the node is dowm
++TEST kill_glusterd 2
++EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field_1 $V0
++TEST start_glusterd 2
+ #bug-1245142
+ $CLI_1 volume rebalance $V0  start &
diff --git a/SOURCES/0324-extras-hooks-Add-SELinux-label-on-new-bricks-during-.patch b/SOURCES/0324-extras-hooks-Add-SELinux-label-on-new-bricks-during-.patch
new file mode 100644
index 0000000..26e1577
--- /dev/null
+++ b/SOURCES/0324-extras-hooks-Add-SELinux-label-on-new-bricks-during-.patch
@@ -0,0 +1,128 @@
+From dcf3f74fa7e812dfe89667bd6219f70a8457f755 Mon Sep 17 00:00:00 2001
+From: Anoop C S <>
+Date: Thu, 6 Jun 2019 18:33:19 +0530
+Subject: [PATCH 324/335] extras/hooks: Add SELinux label on new bricks during
+ add-brick
+Backport of
+Change-Id: Ifd8ae5eeb91b968cc1a9a9b5d15844c5233d56db
+BUG: 1686800
+Signed-off-by: Anoop C S <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <>
+ .../add-brick/post/       | 100 +++++++++++++++++++++
+ 1 file changed, 100 insertions(+)
+ create mode 100755 extras/hook-scripts/add-brick/post/
+diff --git a/extras/hook-scripts/add-brick/post/ b/extras/hook-scripts/add-brick/post/
+new file mode 100755
+index 0000000..4a17c99
+--- /dev/null
++++ b/extras/hook-scripts/add-brick/post/
+@@ -0,0 +1,100 @@
++# Install to hooks/<HOOKS_VER>/add-brick/post
++# Add an SELinux file context for each brick using the glusterd_brick_t type.
++# This ensures that the brick is relabeled correctly on an SELinux restart or
++# restore. Subsequently, run a restore on the brick path to set the selinux
++# labels.
++parse_args () {
++  ARGS=$(getopt -o '' -l ${OPTSPEC} -n ${PROGNAME} -- "$@")
++  eval set -- "${ARGS}"
++  while true; do
++    case ${1} in
++      --volname)
++        shift
++        VOL=${1}
++        ;;
++      --gd-workdir)
++          shift
++          GLUSTERD_WORKDIR=$1
++          ;;
++      --version)
++          shift
++          ;;
++      --volume-op)
++          shift
++          ;;
++      *)
++          shift
++          break
++          ;;
++    esac
++    shift
++  done
++  local volname="${1}"
++  local fctx
++  local list=()
++  fctx="$(semanage fcontext --list -C)"
++  # wait for new brick path to be updated under
++  # ${GLUSTERD_WORKDIR}/vols/${volname}/bricks/
++  sleep 5
++  # grab the path for each local brick
++  brickpath="${GLUSTERD_WORKDIR}/vols/${volname}/bricks/"
++  brickdirs=$(
++    find "${brickpath}" -type f -exec grep '^path=' {} \; | \
++    cut -d= -f 2 | \
++    sort -u
++  )
++  # create a list of bricks for which custom SELinux
++  # label doesn't exist
++  for b in ${brickdirs}; do
++    pattern="${b}(/.*)?"
++    echo "${fctx}" | grep "^${pattern}\s" >/dev/null
++    if [[ $? -ne 0 ]]; then
++      list+=("${pattern}")
++    fi
++  done
++  # Add a file context for each brick path in the list and associate with the
++  # glusterd_brick_t SELinux type.
++  for p in ${list[@]}
++  do
++    semanage fcontext --add -t glusterd_brick_t -r s0 "${p}"
++  done
++  # Set the labels for which SELinux label was added above
++  for b in ${brickdirs}
++  do
++    echo "${list[@]}" | grep "${b}" >/dev/null
++    if [[ $? -eq 0 ]]; then
++      restorecon -R "${b}"
++    fi
++  done
++SELINUX_STATE=$(which getenforce && getenforce)
++[ "${SELINUX_STATE}" = 'Disabled' ] && exit 0
++parse_args "$@"
++[ -z "${VOL}" ] && exit 1
++set_brick_labels "${VOL}"
++exit 0
diff --git a/SOURCES/0325-extras-hooks-Install-and-package-newly-added-post-ad.patch b/SOURCES/0325-extras-hooks-Install-and-package-newly-added-post-ad.patch
new file mode 100644
index 0000000..8e5a5fa
--- /dev/null
+++ b/SOURCES/0325-extras-hooks-Install-and-package-newly-added-post-ad.patch
@@ -0,0 +1,52 @@
+From 27d69d8927a946562aef08a6edfee38b9998f96d Mon Sep 17 00:00:00 2001
+From: Anoop C S <>
+Date: Wed, 12 Jun 2019 15:41:27 +0530
+Subject: [PATCH 325/335] extras/hooks: Install and package newly added post
+ add-brick hook script
+Previously a new SELinux hook script was added as a post add-brick
+operation to label new brick paths. But the change failed to install
+and package new script. Therefore making necessary changes to Makefile
+and spec file to get it installed and packaged.
+Backport of
+Change-Id: I67b8f4982c2783c34a4bc749fb4387c19a038225
+BUG: 1686800
+Signed-off-by: Anoop C S <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <>
+ extras/hook-scripts/add-brick/post/ | 4 ++--
+                              | 1 +
+ 2 files changed, 3 insertions(+), 2 deletions(-)
+diff --git a/extras/hook-scripts/add-brick/post/ b/extras/hook-scripts/add-brick/post/
+index bfc0c1c..9b236df 100644
+--- a/extras/hook-scripts/add-brick/post/
++++ b/extras/hook-scripts/add-brick/post/
+@@ -1,6 +1,6 @@
+ hookdir = $(GLUSTERD_WORKDIR)/hooks/1/add-brick/post/
+-hook_SCRIPTS =
++hook_SCRIPTS =
+ endif
+diff --git a/ b/
+index eeadb65..91180db 100644
+--- a/
++++ b/
+@@ -1447,6 +1447,7 @@ exit 0
+        %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick
+        %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post
+             %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post/
++            %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post/
+             %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post/
+        %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/pre
+             %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/pre/
diff --git a/SOURCES/0326-tests-subdir-mount.t-is-failing-for-brick_mux-regrss.patch b/SOURCES/0326-tests-subdir-mount.t-is-failing-for-brick_mux-regrss.patch
new file mode 100644
index 0000000..b0afcc7
--- /dev/null
+++ b/SOURCES/0326-tests-subdir-mount.t-is-failing-for-brick_mux-regrss.patch
@@ -0,0 +1,51 @@
+From a4f01ad90a0c0dfd0655da509c5ed2a11a507cc3 Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <>
+Date: Mon, 17 Jun 2019 11:10:42 +0530
+Subject: [PATCH 326/335] tests: subdir-mount.t is failing for brick_mux
+ regrssion
+To avoid the failure wait to run hook script
+after executed add-brick command by test case.
+This is required as a dependency for the bz referenced below.
+Backport of
+Change-Id: I063b6d0f86a550ed0a0527255e4dfbe8f0a8c02e
+BUG: 1686800
+Signed-off-by: Mohit Agrawal <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <>
+ tests/features/subdir-mount.t | 11 ++++++++---
+ 1 file changed, 8 insertions(+), 3 deletions(-)
+diff --git a/tests/features/subdir-mount.t b/tests/features/subdir-mount.t
+index 8401946..a02bd6b 100644
+--- a/tests/features/subdir-mount.t
++++ b/tests/features/subdir-mount.t
+@@ -85,12 +85,17 @@ TEST $CLI volume start $V0
+ TEST $GFS --subdir-mount /subdir1/subdir1.1/subdir1.2 -s $H0 --volfile-id $V0 $M2
+ TEST stat $M2
++initcnt=`grep -i create-subdir-mounts /var/log/glusterfs/glusterd.log  | wc -l`
+ # mount shouldn't fail even after add-brick
+ TEST $CLI volume add-brick $V0 replica 2 $H0:$B0/${V0}{5,6};
+-# Give time for client process to get notified and use the new
+-# volfile after add-brick
+-sleep 1
++# Wait to execute script by glusterd
++newcnt=`grep -i create-subdir-mounts /var/log/glusterfs/glusterd.log  | wc -l`
++while [ $newcnt -eq $initcnt ]
++   newcnt=`grep -i create-subdir-mounts /var/log/glusterfs/glusterd.log  | wc -l`
++   sleep 1
+ # Existing mount should still be active
+ mount_inode=$(stat --format "%i" "$M2")
diff --git a/SOURCES/0327-glusterfind-integrate-with-gfid2path.patch b/SOURCES/0327-glusterfind-integrate-with-gfid2path.patch
new file mode 100644
index 0000000..e3e42fa
--- /dev/null
+++ b/SOURCES/0327-glusterfind-integrate-with-gfid2path.patch
@@ -0,0 +1,93 @@
+From f89242132dc4756c827113154cc6ad18ad6bde88 Mon Sep 17 00:00:00 2001
+From: Milind Changire <>
+Date: Tue, 19 Feb 2019 12:49:12 +0530
+Subject: [PATCH 327/335] glusterfind: integrate with gfid2path
+Integration with gfid2path helps avoid file-system crawl and saves
+precious time. Extended attributes starting with "trusted.gfid2path."
+are read and the <PGFID>/<BN> values are extracted and the <PGFID> is
+iteratively resolved from the brick backend to arrive at the full path.
+>Change-Id: I593b02880e3413b77bfceed4a36b00d401f03bc0
+>fixes: #529
+>Signed-off-by: Milind Changire <>
+>Signed-off-by: Shwetha K Acharya <>
+backport of
+BUG: 1599802
+Change-Id: I593b02880e3413b77bfceed4a36b00d401f03bc0
+Signed-off-by: Milind Changire <>
+Signed-off-by: Shwetha K Acharya <>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <>
+ tools/glusterfind/src/ | 45 ++++++++++++++++++++++++++++++++++----
+ 1 file changed, 41 insertions(+), 4 deletions(-)
+diff --git a/tools/glusterfind/src/ b/tools/glusterfind/src/
+index ef982db..d8f97e0 100644
+--- a/tools/glusterfind/src/
++++ b/tools/glusterfind/src/
+@@ -114,6 +114,43 @@ def populate_pgfid_and_inodegfid(brick, changelog_data):
+                 continue
++def enum_hard_links_using_gfid2path(brick, gfid, args):
++    hardlinks = []
++    p = os.path.join(brick, ".glusterfs", gfid[0:2], gfid[2:4], gfid)
++    if not os.path.isdir(p):
++        # we have a symlink or a normal file
++        try:
++            file_xattrs = xattr.list(p)
++            for x in file_xattrs:
++                if x.startswith("trusted.gfid2path."):
++                    # get the value for the xattr i.e. <PGFID>/<BN>
++                    v = xattr.getxattr(p, x)
++                    pgfid, bn = v.split(os.sep)
++                    try:
++                        path = symlink_gfid_to_path(brick, pgfid)
++                        fullpath = os.path.join(path, bn)
++                        fullpath = output_path_prepare(fullpath, args)
++                        hardlinks.append(fullpath)
++                    except (IOError, OSError) as e:
++                        logger.warn("Error converting to path: %s" % e)
++                        continue
++        except (IOError, OSError):
++            pass
++    return hardlinks
++def gfid_to_all_paths_using_gfid2path(brick, changelog_data, args):
++    path = ""
++    for row in changelog_data.gfidpath_get({"path1": "", "type": "MODIFY"}):
++        gfid = row[3].strip()
++        logger.debug("Processing gfid %s" % gfid)
++        hardlinks = enum_hard_links_using_gfid2path(brick, gfid, args)
++        path = ",".join(hardlinks)
++        changelog_data.gfidpath_update({"path1": path}, {"gfid": gfid})
+ def gfid_to_path_using_pgfid(brick, changelog_data, args):
+     """
+     For all the pgfids collected, Converts to Path and
+@@ -314,11 +351,11 @@ def get_changes(brick, hash_dir, log_file, start, end, args):
+     changelog_data.commit()
+"[2/4] Finished 'pgfid to path' conversions.")
+-    # Convert all GFIDs for which no other additional details available
+-"[3/4] Starting 'gfid to path using pgfid' conversions ...")
+-    gfid_to_path_using_pgfid(brick, changelog_data, args)
++    # Convert all gfids recorded for data and metadata to all hardlink paths
++"[3/4] Starting 'gfid2path' conversions ...")
++    gfid_to_all_paths_using_gfid2path(brick, changelog_data, args)
+     changelog_data.commit()
+-"[3/4] Finished 'gfid to path using pgfid' conversions.")
++"[3/4] Finished 'gfid2path' conversions.")
+     # If some GFIDs fail to get converted from previous step,
+     # convert using find
diff --git a/SOURCES/0328-glusterd-Add-warning-and-abort-in-case-of-failures-i.patch b/SOURCES/0328-glusterd-Add-warning-and-abort-in-case-of-failures-i.patch
new file mode 100644
index 0000000..0d12daa
--- /dev/null
+++ b/SOURCES/0328-glusterd-Add-warning-and-abort-in-case-of-failures-i.patch
@@ -0,0 +1,55 @@
+From a8d8fc91af226fbf49e9dd1d7d91ad287707c4fe Mon Sep 17 00:00:00 2001
+From: Vishal Pandey <>
+Date: Wed, 7 Aug 2019 12:53:06 +0530
+Subject: [PATCH 328/335] glusterd: Add warning and abort in case of failures
+ in migration during remove-brick commit
+Problem -
+Currently remove-brick commit goes through even though there were files
+that failed to migrate or were skipped. There is no warning raised to the user.
+Add a check in the remove brick staging phase to verify if the status of the
+rebalnce process is complete but there has been failures or some skipped files
+while migration, In this case user will be given a warning and remove-brick
+commit. User will need to use the force option to remove the bricks.
+> Upstream Path Link:
+> Fixes: bz#1514683
+> Signed-offby- Vishal Pandey <>
+> Change-Id: I014d0f0afb4b2fac35ab0de52227f98dbae079d5
+BUG: 1344758
+Change-Id: I014d0f0afb4b2fac35ab0de52227f98dbae079d5
+Signed-off-by: Vishal Pandey <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <>
+ xlators/mgmt/glusterd/src/glusterd-brick-ops.c | 11 +++++++++++
+ 1 file changed, 11 insertions(+)
+diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
+index ad9a572..c5141de 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
++++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
+@@ -2191,6 +2191,17 @@ glusterd_op_stage_remove_brick(dict_t *dict, char **op_errstr)
+                 goto out;
+             }
++            if (volinfo->rebal.defrag_status == GF_DEFRAG_STATUS_COMPLETE) {
++                if (volinfo->rebal.rebalance_failures > 0 ||
++                    volinfo->rebal.skipped_files > 0) {
++                    errstr = gf_strdup(
++                        "use 'force' option as migration "
++                        "of some files might have been skipped or "
++                        "has failed");
++                    goto out;
++                }
++            }
+             ret = glusterd_remove_brick_validate_bricks(
+                 cmd, brick_count, dict, volinfo, &errstr, GF_DEFRAG_CMD_NONE);
+             if (ret)
diff --git a/SOURCES/0329-cluster-afr-Heal-entries-when-there-is-a-source-no-h.patch b/SOURCES/0329-cluster-afr-Heal-entries-when-there-is-a-source-no-h.patch
new file mode 100644
index 0000000..935824d
--- /dev/null
+++ b/SOURCES/0329-cluster-afr-Heal-entries-when-there-is-a-source-no-h.patch
@@ -0,0 +1,165 @@
+From babbd49cc053993a4ecff8eaf178d5a29f3a0bf0 Mon Sep 17 00:00:00 2001
+From: karthik-us <>
+Date: Wed, 20 Nov 2019 12:26:11 +0530
+Subject: [PATCH 329/335] cluster/afr: Heal entries when there is a source & no
+ healed_sinks
+Backport of:
+In a situation where B1 blames B2, B2 blames B1 and B3 doesn't blame
+anything for entry heal, heal will not complete even though we have
+clear source and sinks. This will happen because while doing
+afr_selfheal_find_direction() only the bricks which are blamed by
+non-accused bricks are considered as sinks. Later in
+__afr_selfheal_entry_finalize_source() when it tries to mark all the
+non-sources as sinks it fails to do so because there won't be any
+healed_sinks marked, no witness present and there will be a source.
+If there is a source and no healed_sinks, then reset all the locked
+sources to 0 and healed sinks to 1 to do conservative merge.
+Change-Id: I8831603ac037b6a3000bee092abfdcc92f7f2e57
+Signed-off-by: karthik-us <>
+BUG: 1764095
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <>
+ .../bug-1749322-entry-heal-not-happening.t         | 89 ++++++++++++++++++++++
+ xlators/cluster/afr/src/afr-self-heal-entry.c      | 15 ++++
+ 2 files changed, 104 insertions(+)
+ create mode 100644 tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t
+diff --git a/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t b/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t
+new file mode 100644
+index 0000000..9627908
+--- /dev/null
++++ b/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t
+@@ -0,0 +1,89 @@
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../afr.rc
++function check_gfid_and_link_count
++        local file=$1
++        file_gfid_b0=$(gf_get_gfid_xattr $B0/${V0}0/$file)
++        TEST [ ! -z $file_gfid_b0 ]
++        file_gfid_b1=$(gf_get_gfid_xattr $B0/${V0}1/$file)
++        file_gfid_b2=$(gf_get_gfid_xattr $B0/${V0}2/$file)
++        EXPECT $file_gfid_b0 echo $file_gfid_b1
++        EXPECT $file_gfid_b0 echo $file_gfid_b2
++        EXPECT "2" stat -c %h $B0/${V0}0/$file
++        EXPECT "2" stat -c %h $B0/${V0}1/$file
++        EXPECT "2" stat -c %h $B0/${V0}2/$file
++## Start and create a volume
++TEST glusterd;
++TEST pidof glusterd;
++TEST $CLI volume info;
++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2};
++TEST $CLI volume start $V0;
++TEST $CLI volume set $V0 cluster.heal-timeout 5
++TEST $CLI volume heal $V0 disable
++EXPECT 'Started' volinfo_field $V0 'Status';
++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
++TEST mkdir $M0/dir
++TEST `echo "File 1 " > $M0/dir/file1`
++TEST touch $M0/dir/file{2..4}
++# Remove file2 from 1st & 3rd bricks
++TEST rm -f $B0/$V0"0"/dir/file2
++TEST rm -f $B0/$V0"2"/dir/file2
++# Remove file3 and the .glusterfs hardlink from 1st & 2nd bricks
++gfid_file3=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file3)
++gfid_str_file3=$(gf_gfid_xattr_to_str $gfid_file3)
++TEST rm $B0/$V0"0"/.glusterfs/${gfid_str_file3:0:2}/${gfid_str_file3:2:2}/$gfid_str_file3
++TEST rm $B0/$V0"1"/.glusterfs/${gfid_str_file3:0:2}/${gfid_str_file3:2:2}/$gfid_str_file3
++TEST rm -f $B0/$V0"0"/dir/file3
++TEST rm -f $B0/$V0"1"/dir/file3
++# Remove the .glusterfs hardlink and the gfid xattr of file4 on 3rd brick
++gfid_file4=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file4)
++gfid_str_file4=$(gf_gfid_xattr_to_str $gfid_file4)
++TEST rm $B0/$V0"2"/.glusterfs/${gfid_str_file4:0:2}/${gfid_str_file4:2:2}/$gfid_str_file4
++TEST setfattr -x trusted.gfid $B0/$V0"2"/dir/file4
++# B0 and B2 blame each other
++setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir
++setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir
++# Add entry to xattrop dir on first brick.
++xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
++base_entry_b0=`ls $xattrop_dir0`
++gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/))
++TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
++EXPECT "^1$" get_pending_heal_count $V0
++# Launch heal
++TEST $CLI volume heal $V0 enable
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
++TEST $CLI volume heal $V0
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
++# All the files must be present on all the bricks after conservative merge and
++# should have the gfid xattr and the .glusterfs hardlink.
++check_gfid_and_link_count dir/file1
++check_gfid_and_link_count dir/file2
++check_gfid_and_link_count dir/file3
++check_gfid_and_link_count dir/file4
+diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
+index 35b600f..3ce882e 100644
+--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
++++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
+@@ -479,6 +479,7 @@ __afr_selfheal_entry_finalize_source(xlator_t *this, unsigned char *sources,
+     afr_private_t *priv = NULL;
+     int source = -1;
+     int sources_count = 0;
++    int i = 0;
+     priv = this->private;
+@@ -492,6 +493,20 @@ __afr_selfheal_entry_finalize_source(xlator_t *this, unsigned char *sources,
+     }
+     source = afr_choose_source_by_policy(priv, sources, AFR_ENTRY_TRANSACTION);
++    /*If the selected source does not blame any other brick, then mark
++     * everything as sink to trigger conservative merge.
++     */
++    if (source != -1 && !AFR_COUNT(healed_sinks, priv->child_count)) {
++        for (i = 0; i < priv->child_count; i++) {
++            if (locked_on[i]) {
++                sources[i] = 0;
++                healed_sinks[i] = 1;
++            }
++        }
++        return -1;
++    }
+     return source;
+ }
diff --git a/SOURCES/0330-mount.glusterfs-change-the-error-message.patch b/SOURCES/0330-mount.glusterfs-change-the-error-message.patch
new file mode 100644
index 0000000..b64f0c6
--- /dev/null
+++ b/SOURCES/0330-mount.glusterfs-change-the-error-message.patch
@@ -0,0 +1,59 @@
+From 72168245761592a2cd0ebec05dd9bd9bc00745ca Mon Sep 17 00:00:00 2001
+From: Amar Tumballi <>
+Date: Wed, 13 Mar 2019 08:51:31 +0530
+Subject: [PATCH 330/335] mount.glusterfs: change the error message
+In scenarios where a mount fails before creating log file, doesn't
+make sense to give message to 'check log file'. See below:
+ERROR: failed to create logfile "/var/log/glusterfs/mnt.log" (No space left on device)
+ERROR: failed to open logfile /var/log/glusterfs/mnt.log
+Mount failed. Please check the log file for more details.
+>upstream patch:
+>Fixes: bz#1688068
+>Change-Id: I1d837caa4f9bc9f1a37780783e95007e01ae4e3f
+>Signed-off-by: Amar Tumballi <>
+BUG: 1685406
+Change-Id: I1d837caa4f9bc9f1a37780783e95007e01ae4e3f
+Signed-off-by: Sheetal Pamecha <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <>
+ xlators/mount/fuse/utils/ | 9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+diff --git a/xlators/mount/fuse/utils/ b/xlators/mount/fuse/utils/
+index 3f5d76d..cbde42d 100755
+--- a/xlators/mount/fuse/utils/
++++ b/xlators/mount/fuse/utils/
+@@ -361,7 +361,10 @@ start_glusterfs ()
+     cmd_line=$(echo "$cmd_line $mount_point");
+     $cmd_line;
+     if [ $? -ne 0 ]; then
+-        warn "Mount failed. Please check the log file for more details."
++        # If this is true, then glusterfs process returned error without
++        # getting daemonized. We have made sure the logs are posted to
++        # 'stderr', so no need to point them to logfile.
++        warn "Mounting glusterfs on $mount_point failed."
+         exit 1;
+     fi
+@@ -369,7 +372,9 @@ start_glusterfs ()
+     inode=$( ${getinode} $mount_point 2>/dev/null);
+     # this is required if the stat returns error
+     if [ $? -ne 0 ]; then
+-        warn "Mount failed. Please check the log file for more details."
++        # At this time, glusterfs got daemonized, and then later exited.
++        # These failures are only logged in log file.
++        warn "Mount failed. Check the log file ${log_file} for more details."
+         umount $mount_point > /dev/null 2>&1;
+         exit 1;
+     fi
diff --git a/SOURCES/0331-features-locks-Do-special-handling-for-op-version-3..patch b/SOURCES/0331-features-locks-Do-special-handling-for-op-version-3..patch
new file mode 100644
index 0000000..6eb15b0
--- /dev/null
+++ b/SOURCES/0331-features-locks-Do-special-handling-for-op-version-3..patch
@@ -0,0 +1,44 @@
+From 147cff762b307bf60519bae4cdefc62f655119a7 Mon Sep 17 00:00:00 2001
+From: Pranith Kumar K <>
+Date: Wed, 30 Oct 2019 10:47:17 +0530
+Subject: [PATCH 331/335] features/locks: Do special handling for op-version <
+ 3.12.0
+Patch diverges from
+its upstream patch( in op-version.
+On upstream special-handling happens for version < 3.10.0 whereas for downstream
+special-handling happens for version < 3.12.0.
+    When rebase happened for 3.5.0 from upstream, this downstream specific change
+is missed as there was no special downstream-only patch tracking this difference.
+This leads to I/O errors on upgrade from 3.3.1->3.5.0
+Do special handling for op-version < 3.12.0 as in 3.4.x
+Change-Id: I72fec058bdfb3cd30d017d205c90aa61aec86c5d
+BUG: 1766640
+Signed-off-by: Pranith Kumar K <>
+Reviewed-by: Xavi Hernandez Juan <>
+ xlators/features/locks/src/posix.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c
+index 9db5ac6..4592240 100644
+--- a/xlators/features/locks/src/posix.c
++++ b/xlators/features/locks/src/posix.c
+@@ -57,7 +57,7 @@ fetch_pathinfo(xlator_t *, inode_t *, int32_t *, char **);
+     do {                                                                       \
+         pl_local_t *__local = NULL;                                            \
+         if (frame->root->client &&                                             \
+-            (frame->root->client->opversion < GD_OP_VERSION_3_10_0)) {         \
++            (frame->root->client->opversion < GD_OP_VERSION_3_12_0)) {         \
+             __local = frame->local;                                            \
+             PL_STACK_UNWIND_AND_FREE(__local, fop, frame, op_ret, params);     \
+         } else {                                                               \
diff --git a/SOURCES/0332-Removing-one-top-command-from-gluster-v-help.patch b/SOURCES/0332-Removing-one-top-command-from-gluster-v-help.patch
new file mode 100644
index 0000000..c9b2b56
--- /dev/null
+++ b/SOURCES/0332-Removing-one-top-command-from-gluster-v-help.patch
@@ -0,0 +1,57 @@
+From 808f311bd4f38f06b8afc49fc8d2c65fc4797431 Mon Sep 17 00:00:00 2001
+From: kshithijiyer <>
+Date: Fri, 28 Jun 2019 15:32:31 +0530
+Subject: [PATCH 332/335] Removing one top command from gluster v help
+The current help show 2 different top commands
+intead of one single top command which can be
+easily observed when "# gluster v help" command
+is issued. Removing one "volume top <VOLNAME>"
+and clubbing into them into a single command.
+Current help:
+volume top <VOLNAME> {open|read|write|opendir|readdir|clear}
+[nfs|brick <brick>] [list-cnt <value>] |
+volume top <VOLNAME> {read-perf|write-perf}
+[bs <size> count <count>] [brick <brick>]
+[list-cnt <value>] - volume top operations
+Expected help:
+volume top <VOLNAME> {open|read|write|opendir|readdir|clear}
+[nfs|brick <brick>] [list-cnt <value>] | {read-perf|write-perf}
+[bs <size> count <count>] [brick <brick>] [list-cnt <value>]
+- volume top operations
+> upstream patch:
+> fixes: bz#1725034
+> Change-Id: Ifbc4c95f2558286e27dfc5e9667046b80eb1715d
+> Signed-off-by: kshithijiyer <>
+BUG: 1726058
+Change-Id: Ifbc4c95f2558286e27dfc5e9667046b80eb1715d
+Signed-off-by: Sanju Rakonde <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <>
+ cli/src/cli-cmd-volume.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c
+index 66beb1b..754d333 100644
+--- a/cli/src/cli-cmd-volume.c
++++ b/cli/src/cli-cmd-volume.c
+@@ -3427,8 +3427,8 @@ struct cli_cmd volume_cmds[] = {
+      cli_cmd_volume_profile_cbk, "volume profile operations"},
+     {"volume top <VOLNAME> {open|read|write|opendir|readdir|clear} [nfs|brick "
+-     "<brick>] [list-cnt <value>] |\n"
+-     "volume top <VOLNAME> {read-perf|write-perf} [bs <size> count <count>] "
++     "<brick>] [list-cnt <value>] | "
++     "{read-perf|write-perf} [bs <size> count <count>] "
+      "[brick <brick>] [list-cnt <value>]",
+      cli_cmd_volume_top_cbk, "volume top operations"},
diff --git a/SOURCES/0333-rpc-Synchronize-slot-allocation-code.patch b/SOURCES/0333-rpc-Synchronize-slot-allocation-code.patch
new file mode 100644
index 0000000..b1d94b4
--- /dev/null
+++ b/SOURCES/0333-rpc-Synchronize-slot-allocation-code.patch
@@ -0,0 +1,195 @@
+From f199094cb61341a47c98a8ed91b293446182b5a9 Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <>
+Date: Thu, 3 Oct 2019 14:06:52 +0530
+Subject: [PATCH 333/335] rpc: Synchronize slot allocation code
+Problem: Current slot allocation/deallocation code path is not
+         synchronized.There are scenario when due to race condition
+         in slot allocation/deallocation code path brick is crashed.
+Solution: Synchronize slot allocation/deallocation code path to
+          avoid the issue
+> Change-Id: I4fb659a75234218ffa0e5e0bf9308f669f75fc25
+> Fixes: bz#1763036
+> Signed-off-by: Mohit Agrawal <>
+> (Reviewed on upstream link
+> (Cherry pick from commit faf5ac13c4ee00a05e9451bf8da3be2a9043bbf2)
+Change-Id: I4fb659a75234218ffa0e5e0bf9308f669f75fc25
+BUG: 1741193
+Signed-off-by: Mohit Agrawal <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <>
+ libglusterfs/src/event-epoll.c | 74 +++++++++++++++++++++++-------------------
+ 1 file changed, 41 insertions(+), 33 deletions(-)
+diff --git a/libglusterfs/src/event-epoll.c b/libglusterfs/src/event-epoll.c
+index 0cec47e..65f5efd 100644
+--- a/libglusterfs/src/event-epoll.c
++++ b/libglusterfs/src/event-epoll.c
+@@ -69,15 +69,27 @@ __event_newtable(struct event_pool *event_pool, int table_idx)
+ }
+ static int
++event_slot_ref(struct event_slot_epoll *slot)
++    if (!slot)
++        return -1;
++    return GF_ATOMIC_INC(slot->ref);
++static int
+ __event_slot_alloc(struct event_pool *event_pool, int fd,
+-                   char notify_poller_death)
++                   char notify_poller_death, struct event_slot_epoll **slot)
+ {
+     int i = 0;
++    int j = 0;
+     int table_idx = -1;
+     int gen = -1;
+     struct event_slot_epoll *table = NULL;
+-    for (i = 0; i < EVENT_EPOLL_TABLES; i++) {
++    while (i < EVENT_EPOLL_TABLES) {
+         switch (event_pool->slots_used[i]) {
+             case EVENT_EPOLL_SLOTS:
+                 continue;
+@@ -98,6 +110,7 @@ __event_slot_alloc(struct event_pool *event_pool, int fd,
+         if (table)
+             /* break out of the loop */
+             break;
++        i++;
+     }
+     if (!table)
+@@ -105,20 +118,20 @@ __event_slot_alloc(struct event_pool *event_pool, int fd,
+     table_idx = i;
+-    for (i = 0; i < EVENT_EPOLL_SLOTS; i++) {
+-        if (table[i].fd == -1) {
++    for (j = 0; j < EVENT_EPOLL_SLOTS; j++) {
++        if (table[j].fd == -1) {
+             /* wipe everything except bump the generation */
+-            gen = table[i].gen;
+-            memset(&table[i], 0, sizeof(table[i]));
+-            table[i].gen = gen + 1;
++            gen = table[j].gen;
++            memset(&table[j], 0, sizeof(table[j]));
++            table[j].gen = gen + 1;
+-            LOCK_INIT(&table[i].lock);
+-            INIT_LIST_HEAD(&table[i].poller_death);
++            LOCK_INIT(&table[j].lock);
++            INIT_LIST_HEAD(&table[j].poller_death);
+-            table[i].fd = fd;
++            table[j].fd = fd;
+             if (notify_poller_death) {
+-                table[i].idx = table_idx * EVENT_EPOLL_SLOTS + i;
+-                list_add_tail(&table[i].poller_death,
++                table[j].idx = table_idx * EVENT_EPOLL_SLOTS + j;
++                list_add_tail(&table[j].poller_death,
+                               &event_pool->poller_death);
+             }
+@@ -128,18 +141,26 @@ __event_slot_alloc(struct event_pool *event_pool, int fd,
+         }
+     }
+-    return table_idx * EVENT_EPOLL_SLOTS + i;
++    if (j == EVENT_EPOLL_SLOTS) {
++        table = NULL;
++        i++;
++        goto retry;
++    } else {
++        (*slot) = &table[j];
++        event_slot_ref(*slot);
++        return table_idx * EVENT_EPOLL_SLOTS + j;
++    }
+ }
+ static int
+ event_slot_alloc(struct event_pool *event_pool, int fd,
+-                 char notify_poller_death)
++                 char notify_poller_death, struct event_slot_epoll **slot)
+ {
+     int idx = -1;
+     pthread_mutex_lock(&event_pool->mutex);
+     {
+-        idx = __event_slot_alloc(event_pool, fd, notify_poller_death);
++        idx = __event_slot_alloc(event_pool, fd, notify_poller_death, slot);
+     }
+     pthread_mutex_unlock(&event_pool->mutex);
+@@ -153,6 +174,7 @@ __event_slot_dealloc(struct event_pool *event_pool, int idx)
+     int offset = 0;
+     struct event_slot_epoll *table = NULL;
+     struct event_slot_epoll *slot = NULL;
++    int fd = -1;
+     table_idx = idx / EVENT_EPOLL_SLOTS;
+     offset = idx % EVENT_EPOLL_SLOTS;
+@@ -164,11 +186,13 @@ __event_slot_dealloc(struct event_pool *event_pool, int idx)
+     slot = &table[offset];
+     slot->gen++;
++    fd = slot->fd;
+     slot->fd = -1;
+     slot->handled_error = 0;
+     slot->in_handler = 0;
+     list_del_init(&slot->poller_death);
+-    event_pool->slots_used[table_idx]--;
++    if (fd != -1)
++        event_pool->slots_used[table_idx]--;
+     return;
+ }
+@@ -185,15 +209,6 @@ event_slot_dealloc(struct event_pool *event_pool, int idx)
+     return;
+ }
+-static int
+-event_slot_ref(struct event_slot_epoll *slot)
+-    if (!slot)
+-        return -1;
+-    return GF_ATOMIC_INC(slot->ref);
+ static struct event_slot_epoll *
+ event_slot_get(struct event_pool *event_pool, int idx)
+ {
+@@ -379,20 +394,13 @@ event_register_epoll(struct event_pool *event_pool, int fd,
+     if (destroy == 1)
+         goto out;
+-    idx = event_slot_alloc(event_pool, fd, notify_poller_death);
++    idx = event_slot_alloc(event_pool, fd, notify_poller_death, &slot);
+     if (idx == -1) {
+         gf_msg("epoll", GF_LOG_ERROR, 0, LG_MSG_SLOT_NOT_FOUND,
+                "could not find slot for fd=%d", fd);
+         return -1;
+     }
+-    slot = event_slot_get(event_pool, idx);
+-    if (!slot) {
+-        gf_msg("epoll", GF_LOG_ERROR, 0, LG_MSG_SLOT_NOT_FOUND,
+-               "could not find slot for fd=%d idx=%d", fd, idx);
+-        return -1;
+-    }
+     assert(slot->fd == fd);
+     LOCK(&slot->lock);
diff --git a/SOURCES/0334-dht-log-getxattr-failure-for-node-uuid-at-DEBUG.patch b/SOURCES/0334-dht-log-getxattr-failure-for-node-uuid-at-DEBUG.patch
new file mode 100644
index 0000000..48f927f
--- /dev/null
+++ b/SOURCES/0334-dht-log-getxattr-failure-for-node-uuid-at-DEBUG.patch
@@ -0,0 +1,54 @@
+From 17940583c4d991a568582581f68dcbf08463ccaf Mon Sep 17 00:00:00 2001
+From: Susant Palai <>
+Date: Tue, 16 Jul 2019 10:31:46 +0530
+Subject: [PATCH 334/335] dht: log getxattr failure for node-uuid at "DEBUG"
+There are two ways to fetch node-uuid information from dht.
+1 - #define GF_XATTR_LIST_NODE_UUIDS_KEY "trusted.glusterfs.list-node-uuids"
+This key is used by AFR.
+2 - #define GF_REBAL_FIND_LOCAL_SUBVOL "glusterfs.find-local-subvol"
+This key is used for non-afr volume type.
+We do two getxattr operations. First on the #1 key followed by on #2 if
+getxattr on #1 key fails.
+Since the parent function "dht_init_local_subvols_and_nodeuuids" logs failure,
+moving the log-level to DEBUG in dht_find_local_subvol_cbk.
+>fixes: bz#1730175
+>Change-Id: I4d88244dc26587b111ca5b00d4c00118efdaac14
+>Signed-off-by: Susant Palai <>
+Upstream patch:
+BUG: 1727755
+Change-Id: I4d88244dc26587b111ca5b00d4c00118efdaac14
+Signed-off-by: Sunil Kumar Acharya <>
+Tested-by: RHGS Build Bot <>
+ xlators/cluster/dht/src/dht-common.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
+index 37952ba..d0b5287 100644
+--- a/xlators/cluster/dht/src/dht-common.c
++++ b/xlators/cluster/dht/src/dht-common.c
+@@ -4253,8 +4253,11 @@ dht_find_local_subvol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+             local->op_ret = -1;
+             local->op_errno = op_errno;
+             UNLOCK(&frame->lock);
+-            gf_msg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_GET_XATTR_FAILED,
+-                   "getxattr err for dir");
++            if (op_errno == ENODATA)
++                gf_msg_debug(this->name, 0, "failed to get node-uuid");
++            else
++                gf_msg(this->name, GF_LOG_ERROR, op_errno,
++                       DHT_MSG_GET_XATTR_FAILED, "failed to get node-uuid");
+             goto post_unlock;
+         }
diff --git a/SOURCES/0335-tests-RHEL8-test-failure-fixes-for-RHGS.patch b/SOURCES/0335-tests-RHEL8-test-failure-fixes-for-RHGS.patch
new file mode 100644
index 0000000..c3341df
--- /dev/null
+++ b/SOURCES/0335-tests-RHEL8-test-failure-fixes-for-RHGS.patch
@@ -0,0 +1,15991 @@
+From 39523fd6c1b4789b12c8db81f4e08a3eb0c6a65c Mon Sep 17 00:00:00 2001
+From: Sunil Kumar Acharya <>
+Date: Thu, 17 Oct 2019 13:03:56 +0530
+Subject: [PATCH 335/335] tests: RHEL8 test failure fixes for RHGS
+- tests/bugs/shard/bug-1272986.t
+- tests/basic/posix/shared-statfs.t
+- tests/basic/fops-sanity.t
+- tests/bugs/transport/bug-873367.t
+- tests/features/ssl-authz.t
+- tests/bugs/snapshot/bug-1399598-uss-with-ssl.t
+- remove gnfs relatedtests
+- tests/bugs/shard/unlinks-and-renames.t
+- tests/bugs/rpc/bug-954057.t
+- tests/bugs/glusterfs-server/bug-887145.t
+- tests/features/ssl-ciphers.t
+- tests/bugs/fuse/bug-985074.t
+BUG: 1762180
+Change-Id: I97b344a632b49ca9ca332a5a463756b160aee5bd
+Signed-off-by: Sunil Kumar Acharya <>
+Tested-by: RHGS Build Bot <>
+ tests/basic/fops-sanity.c                         |  1862 ++--
+ tests/basic/posix/shared-statfs.t                 |    11 +-
+ tests/bugs/cli/bug-1320388.t                      |     2 +-
+ tests/bugs/fuse/bug-985074.t                      |     4 +-
+ tests/bugs/glusterd/quorum-value-check.t          |    35 -
+ tests/bugs/glusterfs-server/bug-887145.t          |    14 +-
+ tests/bugs/nfs/bug-1053579.t                      |   114 -
+ tests/bugs/nfs/bug-1116503.t                      |    47 -
+ tests/bugs/nfs/bug-1143880-fix-gNFSd-auth-crash.t |    24 -
+ tests/bugs/nfs/bug-1157223-symlink-mounting.t     |   126 -
+ tests/bugs/nfs/bug-1161092-nfs-acls.t             |    39 -
+ tests/bugs/nfs/bug-1166862.t                      |    69 -
+ tests/bugs/nfs/bug-1210338.c                      |    31 -
+ tests/bugs/nfs/bug-1210338.t                      |    30 -
+ tests/bugs/nfs/bug-1302948.t                      |    13 -
+ tests/bugs/nfs/bug-847622.t                       |    39 -
+ tests/bugs/nfs/bug-877885.t                       |    39 -
+ tests/bugs/nfs/bug-904065.t                       |   100 -
+ tests/bugs/nfs/bug-915280.t                       |    54 -
+ tests/bugs/nfs/bug-970070.t                       |    13 -
+ tests/bugs/nfs/bug-974972.t                       |    41 -
+ tests/bugs/nfs/showmount-many-clients.t           |    41 -
+ tests/bugs/nfs/                  |    33 -
+ tests/bugs/nfs/socket-as-fifo.t                   |    25 -
+ tests/bugs/nfs/subdir-trailing-slash.t            |    32 -
+ tests/bugs/nfs/zero-atime.t                       |    33 -
+ tests/bugs/rpc/bug-954057.t                       |    10 +-
+ tests/bugs/shard/bug-1272986.t                    |     6 +-
+ tests/bugs/transport/bug-873367.t                 |     2 +-
+ tests/features/ssl-authz.t                        |     2 +-
+ tests/features/ssl-ciphers.t                      |    61 +-
+ tests/ssl.rc                                      |     2 +-
+ xlators/features/shard/src/shard.c                | 11754 ++++++++++----------
+ 33 files changed, 6638 insertions(+), 8070 deletions(-)
+ delete mode 100755 tests/bugs/glusterd/quorum-value-check.t
+ delete mode 100755 tests/bugs/nfs/bug-1053579.t
+ delete mode 100644 tests/bugs/nfs/bug-1116503.t
+ delete mode 100644 tests/bugs/nfs/bug-1143880-fix-gNFSd-auth-crash.t
+ delete mode 100644 tests/bugs/nfs/bug-1157223-symlink-mounting.t
+ delete mode 100644 tests/bugs/nfs/bug-1161092-nfs-acls.t
+ delete mode 100755 tests/bugs/nfs/bug-1166862.t
+ delete mode 100644 tests/bugs/nfs/bug-1210338.c
+ delete mode 100644 tests/bugs/nfs/bug-1210338.t
+ delete mode 100755 tests/bugs/nfs/bug-1302948.t
+ delete mode 100755 tests/bugs/nfs/bug-847622.t
+ delete mode 100755 tests/bugs/nfs/bug-877885.t
+ delete mode 100755 tests/bugs/nfs/bug-904065.t
+ delete mode 100755 tests/bugs/nfs/bug-915280.t
+ delete mode 100755 tests/bugs/nfs/bug-970070.t
+ delete mode 100755 tests/bugs/nfs/bug-974972.t
+ delete mode 100644 tests/bugs/nfs/showmount-many-clients.t
+ delete mode 100755 tests/bugs/nfs/
+ delete mode 100644 tests/bugs/nfs/socket-as-fifo.t
+ delete mode 100644 tests/bugs/nfs/subdir-trailing-slash.t
+ delete mode 100755 tests/bugs/nfs/zero-atime.t
+diff --git a/tests/basic/fops-sanity.c b/tests/basic/fops-sanity.c
+index aff72d8..171d003 100644
+--- a/tests/basic/fops-sanity.c
++++ b/tests/basic/fops-sanity.c
+@@ -17,15 +17,16 @@
+ /* Filesystem basic sanity check, tests all (almost) fops. */
+-#include <stdio.h>
++#include <dirent.h>
++#include <errno.h>
+ #include <fcntl.h>
+-#include <unistd.h>
+-#include <sys/types.h>
++#include <stdio.h>
++#include <string.h>
+ #include <sys/stat.h>
++#include <sys/sysmacros.h>
++#include <sys/types.h>
+ #include <sys/xattr.h>
+-#include <errno.h>
+-#include <string.h>
+-#include <dirent.h>
++#include <unistd.h>
+ #ifndef linux
+ #include <sys/socket.h>
+@@ -34,904 +35,880 @@
+ #endif
+ /* for fd based fops after unlink */
+-fd_based_fops_1(char *filename);
++int fd_based_fops_1(char *filename);
+ /* for fd based fops before unlink */
+-fd_based_fops_2(char *filename);
++int fd_based_fops_2(char *filename);
+ /* fops based on fd after dup */
+-dup_fd_based_fops(char *filename);
++int dup_fd_based_fops(char *filename);
+ /* for fops based on path */
+-path_based_fops(char *filename);
++int path_based_fops(char *filename);
+ /* for fops which operate on directory */
+-dir_based_fops(char *filename);
++int dir_based_fops(char *filename);
+ /* for fops which operate in link files (symlinks) */
+-link_based_fops(char *filename);
++int link_based_fops(char *filename);
+ /* to test open syscall with open modes available. */
+-test_open_modes(char *filename);
++int test_open_modes(char *filename);
+ /* generic function which does open write and read. */
+-generic_open_read_write(char *filename, int flag, mode_t mode);
++int generic_open_read_write(char *filename, int flag, mode_t mode);
+ #define OPEN_MODE 0666
+-main(int argc, char *argv[])
+-    int ret = -1;
+-    int result = 0;
+-    char filename[255] = {
+-        0,
+-    };
+-    if (argc > 1)
+-        strcpy(filename, argv[1]);
+-    else
+-        strcpy(filename, "temp-xattr-test-file");
+-    ret = fd_based_fops_1(strcat(filename, "_1"));
+-    if (ret < 0) {
+-        fprintf(stderr, "fd based file operation 1 failed\n");
+-        result |= ret;
+-    } else {
+-        fprintf(stdout, "fd based file operation 1 passed\n");
+-    }
+-    ret = fd_based_fops_2(strcat(filename, "_2"));
+-    if (ret < 0) {
+-        result |= ret;
+-        fprintf(stderr, "fd based file operation 2 failed\n");
+-    } else {
+-        fprintf(stdout, "fd based file operation 2 passed\n");
+-    }
+-    ret = dup_fd_based_fops(strcat(filename, "_3"));
+-    if (ret < 0) {
+-        result |= ret;
+-        fprintf(stderr, "dup fd based file operation failed\n");
+-    } else {
+-        fprintf(stdout, "dup fd based file operation passed\n");
+-    }
+-    ret = path_based_fops(strcat(filename, "_4"));
+-    if (ret < 0) {
+-        result |= ret;
+-        fprintf(stderr, "path based file operation failed\n");
+-    } else {
+-        fprintf(stdout, "path based file operation passed\n");
+-    }
+-    ret = dir_based_fops(strcat(filename, "_5"));
+-    if (ret < 0) {
+-        result |= ret;
+-        fprintf(stderr, "directory based file operation failed\n");
+-    } else {
+-        fprintf(stdout, "directory based file operation passed\n");
+-    }
+-    ret = link_based_fops(strcat(filename, "_5"));
+-    if (ret < 0) {
+-        result |= ret;
+-        fprintf(stderr, "link based file operation failed\n");
+-    } else {
+-        fprintf(stdout, "link based file operation passed\n");
+-    }
+-    ret = test_open_modes(strcat(filename, "_5"));
+-    if (ret < 0) {
+-        result |= ret;
+-        fprintf(stderr, "testing modes of `open' call failed\n");
+-    } else {
+-        fprintf(stdout, "testing modes of `open' call passed\n");
+-    }
+-    return result;
++int main(int argc, char *argv[]) {
++  int ret = -1;
++  int result = 0;
++  char filename[255] = {
++      0,
++  };
++  if (argc > 1)
++    strcpy(filename, argv[1]);
++  else
++    strcpy(filename, "temp-xattr-test-file");
++  ret = fd_based_fops_1(strcat(filename, "_1"));
++  if (ret < 0) {
++    fprintf(stderr, "fd based file operation 1 failed\n");
++    result |= ret;
++  } else {
++    fprintf(stdout, "fd based file operation 1 passed\n");
++  }
++  ret = fd_based_fops_2(strcat(filename, "_2"));
++  if (ret < 0) {
++    result |= ret;
++    fprintf(stderr, "fd based file operation 2 failed\n");
++  } else {
++    fprintf(stdout, "fd based file operation 2 passed\n");
++  }
++  ret = dup_fd_based_fops(strcat(filename, "_3"));
++  if (ret < 0) {
++    result |= ret;
++    fprintf(stderr, "dup fd based file operation failed\n");
++  } else {
++    fprintf(stdout, "dup fd based file operation passed\n");
++  }
++  ret = path_based_fops(strcat(filename, "_4"));
++  if (ret < 0) {
++    result |= ret;
++    fprintf(stderr, "path based file operation failed\n");
++  } else {
++    fprintf(stdout, "path based file operation passed\n");
++  }
++  ret = dir_based_fops(strcat(filename, "_5"));
++  if (ret < 0) {
++    result |= ret;
++    fprintf(stderr, "directory based file operation failed\n");
++  } else {
++    fprintf(stdout, "directory based file operation passed\n");
++  }
++  ret = link_based_fops(strcat(filename, "_5"));
++  if (ret < 0) {
++    result |= ret;
++    fprintf(stderr, "link based file operation failed\n");
++  } else {
++    fprintf(stdout, "link based file operation passed\n");
++  }
++  ret = test_open_modes(strcat(filename, "_5"));
++  if (ret < 0) {
++    result |= ret;
++    fprintf(stderr, "testing modes of `open' call failed\n");
++  } else {
++    fprintf(stdout, "testing modes of `open' call passed\n");
++  }
++  return result;
+ }
+ /* Execute all possible fops on a fd which is unlinked */
+-fd_based_fops_1(char *filename)
+-    int fd = 0;
+-    int ret = -1;
+-    int result = 0;
+-    struct stat stbuf = {
+-        0,
+-    };
+-    char wstr[50] = {
+-        0,
+-    };
+-    char rstr[50] = {
+-        0,
+-    };
+-    fd = open(filename, O_RDWR | O_CREAT, OPEN_MODE);
+-    if (fd < 0) {
+-        fprintf(stderr, "open failed : %s\n", strerror(errno));
+-        return ret;
+-    }
+-    ret = unlink(filename);
+-    if (ret < 0) {
+-        fprintf(stderr, "unlink failed : %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    strcpy(wstr, "This is my string\n");
+-    ret = write(fd, wstr, strlen(wstr));
+-    if (ret <= 0) {
+-        fprintf(stderr, "write failed: %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    ret = lseek(fd, 0, SEEK_SET);
+-    if (ret < 0) {
+-        fprintf(stderr, "lseek failed: %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    ret = read(fd, rstr, strlen(wstr));
+-    if (ret <= 0) {
+-        fprintf(stderr, "read failed: %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    ret = memcmp(rstr, wstr, strlen(wstr));
+-    if (ret != 0) {
+-        fprintf(stderr, "read returning junk\n");
+-        result |= ret;
+-    }
+-    ret = ftruncate(fd, 0);
+-    if (ret < 0) {
+-        fprintf(stderr, "ftruncate failed : %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    ret = fstat(fd, &stbuf);
+-    if (ret < 0) {
+-        fprintf(stderr, "fstat failed : %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    ret = fsync(fd);
+-    if (ret < 0) {
+-        fprintf(stderr, "fsync failed : %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    ret = fdatasync(fd);
+-    if (ret < 0) {
+-        fprintf(stderr, "fdatasync failed : %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    /*
+-     *      These metadata operations fail at the moment because kernel doesn't
+-     *      pass the client fd in the operation.
+-     *      The following bug tracks this change.
+-     *
+-     *      ret = fchmod (fd, 0640);
+-     *      if (ret < 0) {
+-     *              fprintf (stderr, "fchmod failed : %s\n", strerror (errno));
+-     *              result |= ret;
+-     *      }
+-     *      ret = fchown (fd, 10001, 10001);
+-     *      if (ret < 0) {
+-     *              fprintf (stderr, "fchown failed : %s\n", strerror (errno));
+-     *              result |= ret;
+-     *      }
+-     *      ret = fsetxattr (fd, "trusted.xattr-test", "working", 8, 0);
+-     *      if (ret < 0) {
+-     *              fprintf (stderr, "fsetxattr failed : %s\n", strerror
+-     (errno));
+-     *              result |= ret;
+-     *      }
+-     *      ret = flistxattr (fd, NULL, 0);
+-     *      if (ret <= 0) {
+-     *              fprintf (stderr, "flistxattr failed : %s\n", strerror
+-     (errno));
+-     *              result |= ret;
+-     *      }
+-     *      ret = fgetxattr (fd, "trusted.xattr-test", NULL, 0);
+-     *      if (ret <= 0) {
+-     *              fprintf (stderr, "fgetxattr failed : %s\n", strerror
+-     (errno));
+-     *              result |= ret;
+-     *      }
+-     *      ret = fremovexattr (fd, "trusted.xattr-test");
+-     *      if (ret < 0) {
+-     *              fprintf (stderr, "fremovexattr failed : %s\n", strerror
+-     (errno));
+-     *              result |= ret;
+-     *      }
+-     */
+-    if (fd)
+-        close(fd);
+-    return result;
++int fd_based_fops_1(char *filename) {
++  int fd = 0;
++  int ret = -1;
++  int result = 0;
++  struct stat stbuf = {
++      0,
++  };
++  char wstr[50] = {
++      0,
++  };
++  char rstr[50] = {
++      0,
++  };
++  fd = open(filename, O_RDWR | O_CREAT, OPEN_MODE);
++  if (fd < 0) {
++    fprintf(stderr, "open failed : %s\n", strerror(errno));
++    return ret;
++  }
++  ret = unlink(filename);
++  if (ret < 0) {
++    fprintf(stderr, "unlink failed : %s\n", strerror(errno));
++    result |= ret;
++  }
++  strcpy(wstr, "This is my string\n");
++  ret = write(fd, wstr, strlen(wstr));
++  if (ret <= 0) {
++    fprintf(stderr, "write failed: %s\n", strerror(errno));
++    result |= ret;
++  }
++  ret = lseek(fd, 0, SEEK_SET);
++  if (ret < 0) {
++    fprintf(stderr, "lseek failed: %s\n", strerror(errno));
++    result |= ret;
++  }
++  ret = read(fd, rstr, strlen(wstr));
++  if (ret <= 0) {
++    fprintf(stderr, "read failed: %s\n", strerror(errno));
++    result |= ret;
++  }
++  ret = memcmp(rstr, wstr, strlen(wstr));
++  if (ret != 0) {
++    fprintf(stderr, "read returning junk\n");
++    result |= ret;
++  }
++  ret = ftruncate(fd, 0);
++  if (ret < 0) {
++    fprintf(stderr, "ftruncate failed : %s\n", strerror(errno));
++    result |= ret;
++  }
++  ret = fstat(fd, &stbuf);
++  if (ret < 0) {
++    fprintf(stderr, "fstat failed : %s\n", strerror(errno));
++    result |= ret;
++  }
++  ret = fsync(fd);
++  if (ret < 0) {
++    fprintf(stderr, "fsync failed : %s\n", strerror(errno));
++    result |= ret;
++  }
++  ret = fdatasync(fd);
++  if (ret < 0) {
++    fprintf(stderr, "fdatasync failed : %s\n", strerror(errno));
++    result |= ret;
++  }
++  /*
++   *      These metadata operations fail at the moment because kernel doesn't
++   *      pass the client fd in the operation.
++   *      The following bug tracks this change.
++   *
++   *      ret = fchmod (fd, 0640);
++   *      if (ret < 0) {
++   *              fprintf (stderr, "fchmod failed : %s\n", strerror (errno));
++   *              result |= ret;
++   *      }
++   *      ret = fchown (fd, 10001, 10001);
++   *      if (ret < 0) {
++   *              fprintf (stderr, "fchown failed : %s\n", strerror (errno));
++   *              result |= ret;
++   *      }
++   *      ret = fsetxattr (fd, "trusted.xattr-test", "working", 8, 0);
++   *      if (ret < 0) {
++   *              fprintf (stderr, "fsetxattr failed : %s\n", strerror
++   (errno));
++   *              result |= ret;
++   *      }
++   *      ret = flistxattr (fd, NULL, 0);
++   *      if (ret <= 0) {
++   *              fprintf (stderr, "flistxattr failed : %s\n", strerror
++   (errno));
++   *              result |= ret;
++   *      }
++   *      ret = fgetxattr (fd, "trusted.xattr-test", NULL, 0);
++   *      if (ret <= 0) {
++   *              fprintf (stderr, "fgetxattr failed : %s\n", strerror
++   (errno));
++   *              result |= ret;
++   *      }
++   *      ret = fremovexattr (fd, "trusted.xattr-test");
++   *      if (ret < 0) {
++   *              fprintf (stderr, "fremovexattr failed : %s\n", strerror
++   (errno));
++   *              result |= ret;
++   *      }
++   */
++  if (fd)
++    close(fd);
++  return result;
+ }
+-fd_based_fops_2(char *filename)
+-    int fd = 0;
+-    int ret = -1;
+-    int result = 0;
+-    struct stat stbuf = {
+-        0,
+-    };
+-    char wstr[50] = {
+-        0,
+-    };
+-    char rstr[50] = {
+-        0,
+-    };
+-    fd = open(filename, O_RDWR | O_CREAT, OPEN_MODE);
+-    if (fd < 0) {
+-        fprintf(stderr, "open failed : %s\n", strerror(errno));
+-        return ret;
+-    }
+-    ret = ftruncate(fd, 0);
+-    if (ret < 0) {
+-        fprintf(stderr, "ftruncate failed : %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    strcpy(wstr, "This is my second string\n");
+-    ret = write(fd, wstr, strlen(wstr));
+-    if (ret < 0) {
+-        fprintf(stderr, "write failed: %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    lseek(fd, 0, SEEK_SET);
+-    if (ret < 0) {
+-        fprintf(stderr, "lseek failed: %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    ret = read(fd, rstr, strlen(wstr));
+-    if (ret <= 0) {
+-        fprintf(stderr, "read failed: %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    ret = memcmp(rstr, wstr, strlen(wstr));
+-    if (ret != 0) {
+-        fprintf(stderr, "read returning junk\n");
+-        result |= ret;
+-    }
+-    ret = fstat(fd, &stbuf);
+-    if (ret < 0) {
+-        fprintf(stderr, "fstat failed : %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    ret = fchmod(fd, 0640);
+-    if (ret < 0) {
+-        fprintf(stderr, "fchmod failed : %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    ret = fchown(fd, 10001, 10001);
+-    if (ret < 0) {
+-        fprintf(stderr, "fchown failed : %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    ret = fsync(fd);
+-    if (ret < 0) {
+-        fprintf(stderr, "fsync failed : %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    ret = fsetxattr(fd, "trusted.xattr-test", "working", 8, 0);
+-    if (ret < 0) {
+-        fprintf(stderr, "fsetxattr failed : %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    ret = fdatasync(fd);
+-    if (ret < 0) {
+-        fprintf(stderr, "fdatasync failed : %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    ret = flistxattr(fd, NULL, 0);
+-    if (ret <= 0) {
+-        fprintf(stderr, "flistxattr failed : %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    ret = fgetxattr(fd, "trusted.xattr-test", NULL, 0);
+-    if (ret <= 0) {
+-        fprintf(stderr, "fgetxattr failed : %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    ret = fremovexattr(fd, "trusted.xattr-test");
+-    if (ret < 0) {
+-        fprintf(stderr, "fremovexattr failed : %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    if (fd)
+-        close(fd);
+-    unlink(filename);
++int fd_based_fops_2(char *filename) {
++  int fd = 0;
++  int ret = -1;
++  int result = 0;
++  struct stat stbuf = {
++      0,
++  };
++  char wstr[50] = {
++      0,
++  };
++  char rstr[50] = {
++      0,
++  };
++  fd = open(filename, O_RDWR | O_CREAT, OPEN_MODE);
++  if (fd < 0) {
++    fprintf(stderr, "open failed : %s\n", strerror(errno));
++    return ret;
++  }
++  ret = ftruncate(fd, 0);
++  if (ret < 0) {
++    fprintf(stderr, "ftruncate failed : %s\n", strerror(errno));
++    result |= ret;
++  }
++  strcpy(wstr, "This is my second string\n");
++  ret = write(fd, wstr, strlen(wstr));
++  if (ret < 0) {
++    fprintf(stderr, "write failed: %s\n", strerror(errno));
++    result |= ret;
++  }
++  lseek(fd, 0, SEEK_SET);
++  if (ret < 0) {
++    fprintf(stderr, "lseek failed: %s\n", strerror(errno));
++    result |= ret;
++  }
++  ret = read(fd, rstr, strlen(wstr));
++  if (ret <= 0) {
++    fprintf(stderr, "read failed: %s\n", strerror(errno));
++    result |= ret;
++  }
++  ret = memcmp(rstr, wstr, strlen(wstr));
++  if (ret != 0) {
++    fprintf(stderr, "read returning junk\n");
++    result |= ret;
++  }
++  ret = fstat(fd, &stbuf);
++  if (ret < 0) {
++    fprintf(stderr, "fstat failed : %s\n", strerror(errno));
++    result |= ret;
++  }
++  ret = fchmod(fd, 0640);
++  if (ret < 0) {
++    fprintf(stderr, "fchmod failed : %s\n", strerror(errno));
++    result |= ret;
++  }
++  ret = fchown(fd, 10001, 10001);
++  if (ret < 0) {
++    fprintf(stderr, "fchown failed : %s\n", strerror(errno));
++    result |= ret;
++  }
++  ret = fsync(fd);
++  if (ret < 0) {
++    fprintf(stderr, "fsync failed : %s\n", strerror(errno));
++    result |= ret;
++  }
++  ret = fsetxattr(fd, "trusted.xattr-test", "working", 8, 0);
++  if (ret < 0) {
++    fprintf(stderr, "fsetxattr failed : %s\n", strerror(errno));
++    result |= ret;
++  }
++  ret = fdatasync(fd);
++  if (ret < 0) {
++    fprintf(stderr, "fdatasync failed : %s\n", strerror(errno));
++    result |= ret;
++  }
++  ret = flistxattr(fd, NULL, 0);
++  if (ret <= 0) {
++    fprintf(stderr, "flistxattr failed : %s\n", strerror(errno));
++    result |= ret;
++  }
++  ret = fgetxattr(fd, "trusted.xattr-test", NULL, 0);
++  if (ret <= 0) {
++    fprintf(stderr, "fgetxattr failed : %s\n", strerror(errno));
++    result |= ret;
++  }
++  ret = fremovexattr(fd, "trusted.xattr-test");
++  if (ret < 0) {
++    fprintf(stderr, "fremovexattr failed : %s\n", strerror(errno));
++    result |= ret;
++  }
++  if (fd)
++    close(fd);
++  unlink(filename);
+-    return result;
++  return result;
+ }
+-path_based_fops(char *filename)
+-    int ret = -1;
+-    int fd = 0;
+-    int result = 0;
+-    struct stat stbuf = {
+-        0,
+-    };
+-    char newfilename[255] = {
+-        0,
+-    };
+-    char *hardlink = "linkfile-hard.txt";
+-    char *symlnk = "linkfile-soft.txt";
+-    char buf[1024] = {
+-        0,
+-    };
+-    fd = creat(filename, 0644);
+-    if (fd < 0) {
+-        fprintf(stderr, "creat failed: %s\n", strerror(errno));
+-        return ret;
+-    }
+-    ret = truncate(filename, 0);
+-    if (ret < 0) {
+-        fprintf(stderr, "truncate failed: %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    ret = stat(filename, &stbuf);
+-    if (ret < 0) {
+-        fprintf(stderr, "stat failed: %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    ret = chmod(filename, 0640);
+-    if (ret < 0) {
+-        fprintf(stderr, "chmod failed: %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    ret = chown(filename, 10001, 10001);
+-    if (ret < 0) {
+-        fprintf(stderr, "chown failed: %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    ret = setxattr(filename, "trusted.xattr-test", "working", 8, 0);
+-    if (ret < 0) {
+-        fprintf(stderr, "setxattr failed: %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    ret = listxattr(filename, NULL, 0);
+-    if (ret <= 0) {
+-        ret = -1;
+-        fprintf(stderr, "listxattr failed: %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    ret = getxattr(filename, "trusted.xattr-test", NULL, 0);
+-    if (ret <= 0) {
+-        fprintf(stderr, "getxattr failed: %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    ret = removexattr(filename, "trusted.xattr-test");
+-    if (ret < 0) {
+-        fprintf(stderr, "removexattr failed: %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    ret = access(filename, R_OK | W_OK);
+-    if (ret < 0) {
+-        fprintf(stderr, "access failed: %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    ret = link(filename, hardlink);
+-    if (ret < 0) {
+-        fprintf(stderr, "link failed: %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    unlink(hardlink);
+-    ret = symlink(filename, symlnk);
+-    if (ret < 0) {
+-        fprintf(stderr, "symlink failed: %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    ret = readlink(symlnk, buf, sizeof(buf));
+-    if (ret < 0) {
+-        fprintf(stderr, "readlink failed: %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    unlink(symlnk);
+-    /* Create a character special file */
+-    ret = mknod("cspecial", S_IFCHR | S_IRWXU | S_IRWXG, makedev(2, 3));
+-    if (ret < 0) {
+-        fprintf(stderr, "cpsecial mknod failed: %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    unlink("cspecial");
+-    ret = mknod("bspecial", S_IFBLK | S_IRWXU | S_IRWXG, makedev(4, 5));
+-    if (ret < 0) {
+-        fprintf(stderr, "bspecial mknod failed: %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    unlink("bspecial");
++int path_based_fops(char *filename) {
++  int ret = -1;
++  int fd = 0;
++  int result = 0;
++  struct stat stbuf = {
++      0,
++  };
++  char newfilename[255] = {
++      0,
++  };
++  char *hardlink = "linkfile-hard.txt";
++  char *symlnk = "linkfile-soft.txt";
++  char buf[1024] = {
++      0,
++  };
++  fd = creat(filename, 0644);
++  if (fd < 0) {
++    fprintf(stderr, "creat failed: %s\n", strerror(errno));
++    return ret;
++  }
++  ret = truncate(filename, 0);
++  if (ret < 0) {
++    fprintf(stderr, "truncate failed: %s\n", strerror(errno));
++    result |= ret;
++  }
++  ret = stat(filename, &stbuf);
++  if (ret < 0) {
++    fprintf(stderr, "stat failed: %s\n", strerror(errno));
++    result |= ret;
++  }
++  ret = chmod(filename, 0640);
++  if (ret < 0) {
++    fprintf(stderr, "chmod failed: %s\n", strerror(errno));
++    result |= ret;
++  }
++  ret = chown(filename, 10001, 10001);
++  if (ret < 0) {
++    fprintf(stderr, "chown failed: %s\n", strerror(errno));
++    result |= ret;
++  }
++  ret = setxattr(filename, "trusted.xattr-test", "working", 8, 0);
++  if (ret < 0) {
++    fprintf(stderr, "setxattr failed: %s\n", strerror(errno));
++    result |= ret;
++  }
++  ret = listxattr(filename, NULL, 0);
++  if (ret <= 0) {
++    ret = -1;
++    fprintf(stderr, "listxattr failed: %s\n", strerror(errno));
++    result |= ret;
++  }
++  ret = getxattr(filename, "trusted.xattr-test", NULL, 0);
++  if (ret <= 0) {
++    fprintf(stderr, "getxattr failed: %s\n", strerror(errno));
++    result |= ret;
++  }
++  ret = removexattr(filename, "trusted.xattr-test");
++  if (ret < 0) {
++    fprintf(stderr, "removexattr failed: %s\n", strerror(errno));
++    result |= ret;
++  }
++  ret = access(filename, R_OK | W_OK);
++  if (ret < 0) {
++    fprintf(stderr, "access failed: %s\n", strerror(errno));
++    result |= ret;
++  }
++  ret = link(filename, hardlink);
++  if (ret < 0) {
++    fprintf(stderr, "link failed: %s\n", strerror(errno));
++    result |= ret;
++  }
++  unlink(hardlink);
++  ret = symlink(filename, symlnk);
++  if (ret < 0) {
++    fprintf(stderr, "symlink failed: %s\n", strerror(errno));
++    result |= ret;
++  }
++  ret = readlink(symlnk, buf, sizeof(buf));
++  if (ret < 0) {
++    fprintf(stderr, "readlink failed: %s\n", strerror(errno));
++    result |= ret;
++  }
++  unlink(symlnk);
++  /* Create a character special file */
++  ret = mknod("cspecial", S_IFCHR | S_IRWXU | S_IRWXG, makedev(2, 3));
++  if (ret < 0) {
++    fprintf(stderr, "cpsecial mknod failed: %s\n", strerror(errno));
++    result |= ret;
++  }
++  unlink("cspecial");
++  ret = mknod("bspecial", S_IFBLK | S_IRWXU | S_IRWXG, makedev(4, 5));
++  if (ret < 0) {
++    fprintf(stderr, "bspecial mknod failed: %s\n", strerror(errno));
++    result |= ret;
++  }
++  unlink("bspecial");
+ #ifdef linux
+-    ret = mknod("fifo", S_IFIFO | S_IRWXU | S_IRWXG, 0);
++  ret = mknod("fifo", S_IFIFO | S_IRWXU | S_IRWXG, 0);
+ #else
+-    ret = mkfifo("fifo", 0);
++  ret = mkfifo("fifo", 0);
+ #endif
+-    if (ret < 0) {
+-        fprintf(stderr, "fifo mknod failed: %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    unlink("fifo");
++  if (ret < 0) {
++    fprintf(stderr, "fifo mknod failed: %s\n", strerror(errno));
++    result |= ret;
++  }
++  unlink("fifo");
+ #ifdef linux
+-    ret = mknod("sock", S_IFSOCK | S_IRWXU | S_IRWXG, 0);
+-    if (ret < 0) {
+-        fprintf(stderr, "sock mknod failed: %s\n", strerror(errno));
+-        result |= ret;
+-    }
++  ret = mknod("sock", S_IFSOCK | S_IRWXU | S_IRWXG, 0);
++  if (ret < 0) {
++    fprintf(stderr, "sock mknod failed: %s\n", strerror(errno));
++    result |= ret;
++  }
+ #else
+-    {
+-        int s;
+-        const char *pathname = "sock";
+-        struct sockaddr_un addr;
+-        s = socket(PF_LOCAL, SOCK_STREAM, 0);
+-        memset(&addr, 0, sizeof(addr));
+-        strncpy(addr.sun_path, pathname, sizeof(addr.sun_path));
+-        ret = bind(s, (const struct sockaddr *)&addr, SUN_LEN(&addr));
+-        if (ret < 0) {
+-            fprintf(stderr, "fifo mknod failed: %s\n", strerror(errno));
+-            result |= ret;
+-        }
+-        close(s);
+-    }
+-    unlink("sock");
++  {
++    int s;
++    const char *pathname = "sock";
++    struct sockaddr_un addr;
+-    strcpy(newfilename, filename);
+-    strcat(newfilename, "_new");
+-    ret = rename(filename, newfilename);
++    s = socket(PF_LOCAL, SOCK_STREAM, 0);
++    memset(&addr, 0, sizeof(addr));
++    strncpy(addr.sun_path, pathname, sizeof(addr.sun_path));
++    ret = bind(s, (const struct sockaddr *)&addr, SUN_LEN(&addr));
+     if (ret < 0) {
+-        fprintf(stderr, "rename failed: %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    unlink(newfilename);
+-    if (fd)
+-        close(fd);
+-    unlink(filename);
+-    return result;
+-dup_fd_based_fops(char *filename)
+-    int fd = 0;
+-    int result = 0;
+-    int newfd = 0;
+-    int ret = -1;
+-    struct stat stbuf = {
+-        0,
+-    };
+-    char wstr[50] = {
+-        0,
+-    };
+-    char rstr[50] = {
+-        0,
+-    };
+-    fd = open(filename, O_RDWR | O_CREAT, OPEN_MODE);
+-    if (fd < 0) {
+-        fprintf(stderr, "open failed : %s\n", strerror(errno));
+-        return ret;
+-    }
+-    newfd = dup(fd);
+-    if (newfd < 0) {
+-        fprintf(stderr, "dup failed: %s\n", strerror(errno));
+-        result |= ret;
++      fprintf(stderr, "fifo mknod failed: %s\n", strerror(errno));
++      result |= ret;
+     }
++    close(s);
++  }
++  unlink("sock");
++  strcpy(newfilename, filename);
++  strcat(newfilename, "_new");
++  ret = rename(filename, newfilename);
++  if (ret < 0) {
++    fprintf(stderr, "rename failed: %s\n", strerror(errno));
++    result |= ret;
++  }
++  unlink(newfilename);
++  if (fd)
+     close(fd);
+-    strcpy(wstr, "This is my string\n");
+-    ret = write(newfd, wstr, strlen(wstr));
+-    if (ret <= 0) {
+-        fprintf(stderr, "write failed: %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    ret = lseek(newfd, 0, SEEK_SET);
+-    if (ret < 0) {
+-        fprintf(stderr, "lseek failed: %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    ret = read(newfd, rstr, strlen(wstr));
+-    if (ret <= 0) {
+-        fprintf(stderr, "read failed: %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    ret = memcmp(rstr, wstr, strlen(wstr));
+-    if (ret != 0) {
+-        fprintf(stderr, "read returning junk\n");
+-        result |= ret;
+-    }
+-    ret = ftruncate(newfd, 0);
+-    if (ret < 0) {
+-        fprintf(stderr, "ftruncate failed : %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    ret = fstat(newfd, &stbuf);
+-    if (ret < 0) {
+-        fprintf(stderr, "fstat failed : %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    ret = fchmod(newfd, 0640);
+-    if (ret < 0) {
+-        fprintf(stderr, "fchmod failed : %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    ret = fchown(newfd, 10001, 10001);
+-    if (ret < 0) {
+-        fprintf(stderr, "fchown failed : %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    ret = fsync(newfd);
+-    if (ret < 0) {
+-        fprintf(stderr, "fsync failed : %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    ret = fsetxattr(newfd, "trusted.xattr-test", "working", 8, 0);
+-    if (ret < 0) {
+-        fprintf(stderr, "fsetxattr failed : %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    ret = fdatasync(newfd);
+-    if (ret < 0) {
+-        fprintf(stderr, "fdatasync failed : %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    ret = flistxattr(newfd, NULL, 0);
+-    if (ret <= 0) {
+-        fprintf(stderr, "flistxattr failed : %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    ret = fgetxattr(newfd, "trusted.xattr-test", NULL, 0);
+-    if (ret <= 0) {
+-        fprintf(stderr, "fgetxattr failed : %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    ret = fremovexattr(newfd, "trusted.xattr-test");
+-    if (ret < 0) {
+-        fprintf(stderr, "fremovexattr failed : %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    if (newfd)
+-        close(newfd);
+-    ret = unlink(filename);
+-    if (ret < 0) {
+-        fprintf(stderr, "unlink failed : %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    return result;
++  unlink(filename);
++  return result;
+ }
+-dir_based_fops(char *dirname)
+-    int ret = -1;
+-    int result = 0;
+-    DIR *dp = NULL;
+-    char buff[255] = {
+-        0,
+-    };
+-    struct dirent *dbuff = {
+-        0,
+-    };
+-    struct stat stbuff = {
+-        0,
+-    };
+-    char newdname[255] = {
+-        0,
+-    };
+-    char *cwd = NULL;
+-    ret = mkdir(dirname, 0755);
+-    if (ret < 0) {
+-        fprintf(stderr, "mkdir failed: %s\n", strerror(errno));
+-        return ret;
+-    }
+-    dp = opendir(dirname);
+-    if (dp == NULL) {
+-        fprintf(stderr, "opendir failed: %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    dbuff = readdir(dp);
+-    if (NULL == dbuff) {
+-        fprintf(stderr, "readdir failed: %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    ret = closedir(dp);
+-    if (ret < 0) {
+-        fprintf(stderr, "closedir failed: %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    ret = stat(dirname, &stbuff);
+-    if (ret < 0) {
+-        fprintf(stderr, "stat failed: %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    ret = chmod(dirname, 0744);
+-    if (ret < 0) {
+-        fprintf(stderr, "chmod failed: %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    ret = chown(dirname, 10001, 10001);
+-    if (ret < 0) {
+-        fprintf(stderr, "chmod failed: %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    ret = setxattr(dirname, "trusted.xattr-test", "working", 8, 0);
+-    if (ret < 0) {
+-        fprintf(stderr, "setxattr failed: %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    ret = listxattr(dirname, NULL, 0);
+-    if (ret <= 0) {
+-        ret = -1;
+-        fprintf(stderr, "listxattr failed: %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    ret = getxattr(dirname, "trusted.xattr-test", NULL, 0);
+-    if (ret <= 0) {
+-        ret = -1;
+-        fprintf(stderr, "getxattr failed: %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    ret = removexattr(dirname, "trusted.xattr-test");
+-    if (ret < 0) {
+-        fprintf(stderr, "removexattr failed: %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    strcpy(newdname, dirname);
+-    strcat(newdname, "/../");
+-    ret = chdir(newdname);
+-    if (ret < 0) {
+-        fprintf(stderr, "chdir failed: %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    cwd = getcwd(buff, 255);
+-    if (NULL == cwd) {
+-        fprintf(stderr, "getcwd failed: %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    strcpy(newdname, dirname);
+-    strcat(newdname, "new");
+-    ret = rename(dirname, newdname);
+-    if (ret < 0) {
+-        fprintf(stderr, "rename failed: %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    ret = rmdir(newdname);
+-    if (ret < 0) {
+-        fprintf(stderr, "rmdir failed: %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    rmdir(dirname);
+-    return result;
++int dup_fd_based_fops(char *filename) {
++  int fd = 0;
++  int result = 0;
++  int newfd = 0;
++  int ret = -1;
++  struct stat stbuf = {
++      0,
++  };
++  char wstr[50] = {
++      0,
++  };
++  char rstr[50] = {
++      0,
++  };
++  fd = open(filename, O_RDWR | O_CREAT, OPEN_MODE);
++  if (fd < 0) {
++    fprintf(stderr, "open failed : %s\n", strerror(errno));
++    return ret;
++  }
++  newfd = dup(fd);
++  if (newfd < 0) {
++    fprintf(stderr, "dup failed: %s\n", strerror(errno));
++    result |= ret;
++  }
++  close(fd);
++  strcpy(wstr, "This is my string\n");
++  ret = write(newfd, wstr, strlen(wstr));
++  if (ret <= 0) {
++    fprintf(stderr, "write failed: %s\n", strerror(errno));
++    result |= ret;
++  }
++  ret = lseek(newfd, 0, SEEK_SET);
++  if (ret < 0) {
++    fprintf(stderr, "lseek failed: %s\n", strerror(errno));
++    result |= ret;
++  }
++  ret = read(newfd, rstr, strlen(wstr));
++  if (ret <= 0) {
++    fprintf(stderr, "read failed: %s\n", strerror(errno));
++    result |= ret;
++  }
++  ret = memcmp(rstr, wstr, strlen(wstr));
++  if (ret != 0) {
++    fprintf(stderr, "read returning junk\n");
++    result |= ret;
++  }
++  ret = ftruncate(newfd, 0);
++  if (ret < 0) {
++    fprintf(stderr, "ftruncate failed : %s\n", strerror(errno));
++    result |= ret;
++  }
++  ret = fstat(newfd, &stbuf);
++  if (ret < 0) {
++    fprintf(stderr, "fstat failed : %s\n", strerror(errno));
++    result |= ret;
++  }
++  ret = fchmod(newfd, 0640);
++  if (ret < 0) {
++    fprintf(stderr, "fchmod failed : %s\n", strerror(errno));
++    result |= ret;
++  }
++  ret = fchown(newfd, 10001, 10001);
++  if (ret < 0) {
++    fprintf(stderr, "fchown failed : %s\n", strerror(errno));
++    result |= ret;
++  }
++  ret = fsync(newfd);
++  if (ret < 0) {
++    fprintf(stderr, "fsync failed : %s\n", strerror(errno));
++    result |= ret;
++  }
++  ret = fsetxattr(newfd, "trusted.xattr-test", "working", 8, 0);
++  if (ret < 0) {
++    fprintf(stderr, "fsetxattr failed : %s\n", strerror(errno));
++    result |= ret;
++  }
++  ret = fdatasync(newfd);
++  if (ret < 0) {
++    fprintf(stderr, "fdatasync failed : %s\n", strerror(errno));
++    result |= ret;
++  }
++  ret = flistxattr(newfd, NULL, 0);
++  if (ret <= 0) {
++    fprintf(stderr, "flistxattr failed : %s\n", strerror(errno));
++    result |= ret;
++  }
++  ret = fgetxattr(newfd, "trusted.xattr-test", NULL, 0);
++  if (ret <= 0) {
++    fprintf(stderr, "fgetxattr failed : %s\n", strerror(errno));
++    result |= ret;
++  }
++  ret = fremovexattr(newfd, "trusted.xattr-test");
++  if (ret < 0) {
++    fprintf(stderr, "fremovexattr failed : %s\n", strerror(errno));
++    result |= ret;
++  }
++  if (newfd)
++    close(newfd);
++  ret = unlink(filename);
++  if (ret < 0) {
++    fprintf(stderr, "unlink failed : %s\n", strerror(errno));
++    result |= ret;
++  }
++  return result;
+ }
+-link_based_fops(char *filename)
+-    int ret = -1;
+-    int result = 0;
+-    int fd = 0;
+-    char newname[255] = {
+-        0,
+-    };
+-    char linkname[255] = {
+-        0,
+-    };
+-    struct stat lstbuf = {
+-        0,
+-    };
+-    fd = creat(filename, 0644);
+-    if (fd < 0) {
+-        fd = 0;
+-        fprintf(stderr, "creat failed: %s\n", strerror(errno));
+-        return ret;
+-    }
+-    strcpy(newname, filename);
+-    strcat(newname, "_hlink");
+-    ret = link(filename, newname);
+-    if (ret < 0) {
+-        fprintf(stderr, "link failed: %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    ret = unlink(filename);
+-    if (ret < 0) {
+-        fprintf(stderr, "unlink failed: %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    strcpy(linkname, filename);
+-    strcat(linkname, "_slink");
+-    ret = symlink(newname, linkname);
+-    if (ret < 0) {
+-        fprintf(stderr, "symlink failed: %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    ret = lstat(linkname, &lstbuf);
+-    if (ret < 0) {
+-        fprintf(stderr, "lstbuf failed: %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    ret = lchown(linkname, 10001, 10001);
+-    if (ret < 0) {
+-        fprintf(stderr, "lchown failed: %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    ret = lsetxattr(linkname, "trusted.lxattr-test", "working", 8, 0);
+-    if (ret < 0) {
+-        fprintf(stderr, "lsetxattr failed: %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    ret = llistxattr(linkname, NULL, 0);
+-    if (ret < 0) {
+-        ret = -1;
+-        fprintf(stderr, "llistxattr failed: %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    ret = lgetxattr(linkname, "trusted.lxattr-test", NULL, 0);
+-    if (ret < 0) {
+-        ret = -1;
+-        fprintf(stderr, "lgetxattr failed: %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    ret = lremovexattr(linkname, "trusted.lxattr-test");
+-    if (ret < 0) {
+-        fprintf(stderr, "lremovexattr failed: %s\n", strerror(errno));
+-        result |= ret;
+-    }
+-    if (fd)
+-        close(fd);
+-    unlink(linkname);
+-    unlink(newname);
+-    return result;
++int dir_based_fops(char *dirname) {
++  int ret = -1;
++  int result = 0;
++  DIR *dp = NULL;
++  char buff[255] = {
++      0,
++  };
++  struct dirent *dbuff = {
++      0,
++  };
++  struct stat stbuff = {
++      0,
++  };
++  char newdname[255] = {
++      0,
++  };
++  char *cwd = NULL;
++  ret = mkdir(dirname, 0755);
++  if (ret < 0) {
++    fprintf(stderr, "mkdir failed: %s\n", strerror(errno));
++    return ret;
++  }
++  dp = opendir(dirname);
++  if (dp == NULL) {
++    fprintf(stderr, "opendir failed: %s\n", strerror(errno));
++    result |= ret;
++  }
++  dbuff = readdir(dp);
++  if (NULL == dbuff) {
++    fprintf(stderr, "readdir failed: %s\n", strerror(errno));
++    result |= ret;
++  }
++  ret = closedir(dp);
++  if (ret < 0) {
++    fprintf(stderr, "closedir failed: %s\n", strerror(errno));
++    result |= ret;
++  }
++  ret = stat(dirname, &stbuff);
++  if (ret < 0) {
++    fprintf(stderr, "stat failed: %s\n", strerror(errno));
++    result |= ret;
++  }
++  ret = chmod(dirname, 0744);
++  if (ret < 0) {
++    fprintf(stderr, "chmod failed: %s\n", strerror(errno));
++    result |= ret;
++  }
++  ret = chown(dirname, 10001, 10001);
++  if (ret < 0) {
++    fprintf(stderr, "chmod failed: %s\n", strerror(errno));
++    result |= ret;
++  }
++  ret = setxattr(dirname, "trusted.xattr-test", "working", 8, 0);
++  if (ret < 0) {
++    fprintf(stderr, "setxattr failed: %s\n", strerror(errno));
++    result |= ret;
++  }
++  ret = listxattr(dirname, NULL, 0);
++  if (ret <= 0) {
++    ret = -1;
++    fprintf(stderr, "listxattr failed: %s\n", strerror(errno));
++    result |= ret;
++  }
++  ret = getxattr(dirname, "trusted.xattr-test", NULL, 0);
++  if (ret <= 0) {
++    ret = -1;
++    fprintf(stderr, "getxattr failed: %s\n", strerror(errno));
++    result |= ret;
++  }
++  ret = removexattr(dirname, "trusted.xattr-test");
++  if (ret < 0) {
++    fprintf(stderr, "removexattr failed: %s\n", strerror(errno));
++    result |= ret;
++  }
++  strcpy(newdname, dirname);
++  strcat(newdname, "/../");
++  ret = chdir(newdname);
++  if (ret < 0) {
++    fprintf(stderr, "chdir failed: %s\n", strerror(errno));
++    result |= ret;
++  }
++  cwd = getcwd(buff, 255);
++  if (NULL == cwd) {
++    fprintf(stderr, "getcwd failed: %s\n", strerror(errno));
++    result |= ret;
++  }
++  strcpy(newdname, dirname);
++  strcat(newdname, "new");
++  ret = rename(dirname, newdname);
++  if (ret < 0) {
++    fprintf(stderr, "rename failed: %s\n", strerror(errno));
++    result |= ret;
++  }
++  ret = rmdir(newdname);
++  if (ret < 0) {
++    fprintf(stderr, "rmdir failed: %s\n", strerror(errno));
++    result |= ret;
++  }
++  rmdir(dirname);
++  return result;
+ }
+-test_open_modes(char *filename)
+-    int ret = -1;
+-    int result = 0;
+-    ret = generic_open_read_write(filename, O_CREAT | O_WRONLY, OPEN_MODE);
+-    if (ret != 0) {
+-        fprintf(stderr, "flag O_CREAT|O_WRONLY failed: \n");
+-        result |= ret;
+-    }
+-    ret = generic_open_read_write(filename, O_CREAT | O_RDWR, OPEN_MODE);
+-    if (ret != 0) {
+-        fprintf(stderr, "flag O_CREAT|O_RDWR failed\n");
+-        result |= ret;
+-    }
+-    ret = generic_open_read_write(filename, O_CREAT | O_RDONLY, OPEN_MODE);
+-    if (ret != 0) {
+-        fprintf(stderr, "flag O_CREAT|O_RDONLY failed\n");
+-        result |= ret;
+-    }
+-    ret = creat(filename, 0644);
+-    close(ret);
+-    ret = generic_open_read_write(filename, O_WRONLY, 0);
+-    if (ret != 0) {
+-        fprintf(stderr, "flag O_WRONLY failed\n");
+-        result |= ret;
+-    }
+-    ret = creat(filename, 0644);
+-    close(ret);
+-    ret = generic_open_read_write(filename, O_RDWR, 0);
+-    if (0 != ret) {
+-        fprintf(stderr, "flag O_RDWR failed\n");
+-        result |= ret;
+-    }
+-    ret = creat(filename, 0644);
+-    close(ret);
+-    ret = generic_open_read_write(filename, O_RDONLY, 0);
+-    if (0 != ret) {
+-        fprintf(stderr, "flag O_RDONLY failed\n");
+-        result |= ret;
+-    }
++int link_based_fops(char *filename) {
++  int ret = -1;
++  int result = 0;
++  int fd = 0;
++  char newname[255] = {
++      0,
++  };
++  char linkname[255] = {
++      0,
++  };
++  struct stat lstbuf = {
++      0,
++  };
++  fd = creat(filename, 0644);
++  if (fd < 0) {
++    fd = 0;
++    fprintf(stderr, "creat failed: %s\n", strerror(errno));
++    return ret;
++  }
++  strcpy(newname, filename);
++  strcat(newname, "_hlink");
++  ret = link(filename, newname);
++  if (ret < 0) {
++    fprintf(stderr, "link failed: %s\n", strerror(errno));
++    result |= ret;
++  }
++  ret = unlink(filename);
++  if (ret < 0) {
++    fprintf(stderr, "unlink failed: %s\n", strerror(errno));
++    result |= ret;
++  }
++  strcpy(linkname, filename);
++  strcat(linkname, "_slink");
++  ret = symlink(newname, linkname);
++  if (ret < 0) {
++    fprintf(stderr, "symlink failed: %s\n", strerror(errno));
++    result |= ret;
++  }
++  ret = lstat(linkname, &lstbuf);
++  if (ret < 0) {
++    fprintf(stderr, "lstbuf failed: %s\n", strerror(errno));
++    result |= ret;
++  }
++  ret = lchown(linkname, 10001, 10001);
++  if (ret < 0) {
++    fprintf(stderr, "lchown failed: %s\n", strerror(errno));
++    result |= ret;
++  }
++  ret = lsetxattr(linkname, "trusted.lxattr-test", "working", 8, 0);
++  if (ret < 0) {
++    fprintf(stderr, "lsetxattr failed: %s\n", strerror(errno));
++    result |= ret;
++  }
++  ret = llistxattr(linkname, NULL, 0);
++  if (ret < 0) {
++    ret = -1;
++    fprintf(stderr, "llistxattr failed: %s\n", strerror(errno));
++    result |= ret;
++  }
++  ret = lgetxattr(linkname, "trusted.lxattr-test", NULL, 0);
++  if (ret < 0) {
++    ret = -1;
++    fprintf(stderr, "lgetxattr failed: %s\n", strerror(errno));
++    result |= ret;
++  }
++  ret = lremovexattr(linkname, "trusted.lxattr-test");
++  if (ret < 0) {
++    fprintf(stderr, "lremovexattr failed: %s\n", strerror(errno));
++    result |= ret;
++  }
++  if (fd)
++    close(fd);
++  unlink(linkname);
++  unlink(newname);
++  return result;
+-    ret = creat(filename, 0644);
+-    close(ret);
+-    ret = generic_open_read_write(filename, O_TRUNC | O_WRONLY, 0);
+-    if (0 != ret) {
+-        fprintf(stderr, "flag O_TRUNC|O_WRONLY failed\n");
+-        result |= ret;
+-    }
++int test_open_modes(char *filename) {
++  int ret = -1;
++  int result = 0;
++  ret = generic_open_read_write(filename, O_CREAT | O_WRONLY, OPEN_MODE);
++  if (ret != 0) {
++    fprintf(stderr, "flag O_CREAT|O_WRONLY failed: \n");
++    result |= ret;
++  }
++  ret = generic_open_read_write(filename, O_CREAT | O_RDWR, OPEN_MODE);
++  if (ret != 0) {
++    fprintf(stderr, "flag O_CREAT|O_RDWR failed\n");
++    result |= ret;
++  }
++  ret = generic_open_read_write(filename, O_CREAT | O_RDONLY, OPEN_MODE);
++  if (ret != 0) {
++    fprintf(stderr, "flag O_CREAT|O_RDONLY failed\n");
++    result |= ret;
++  }
++  ret = creat(filename, 0644);
++  close(ret);
++  ret = generic_open_read_write(filename, O_WRONLY, 0);
++  if (ret != 0) {
++    fprintf(stderr, "flag O_WRONLY failed\n");
++    result |= ret;
++  }
++  ret = creat(filename, 0644);
++  close(ret);
++  ret = generic_open_read_write(filename, O_RDWR, 0);
++  if (0 != ret) {
++    fprintf(stderr, "flag O_RDWR failed\n");
++    result |= ret;
++  }
++  ret = creat(filename, 0644);
++  close(ret);
++  ret = generic_open_read_write(filename, O_RDONLY, 0);
++  if (0 != ret) {
++    fprintf(stderr, "flag O_RDONLY failed\n");
++    result |= ret;
++  }
++  ret = creat(filename, 0644);
++  close(ret);
++  ret = generic_open_read_write(filename, O_TRUNC | O_WRONLY, 0);
++  if (0 != ret) {
++    fprintf(stderr, "flag O_TRUNC|O_WRONLY failed\n");
++    result |= ret;
++  }
+ #if 0 /* undefined behaviour, unable to reliably test */
+         ret = creat (filename, 0644);
+@@ -943,90 +920,87 @@ test_open_modes(char *filename)
+         }
+ #endif
+-    ret = generic_open_read_write(filename, O_CREAT | O_RDWR | O_SYNC,
+-                                  OPEN_MODE);
+-    if (0 != ret) {
+-        fprintf(stderr, "flag O_CREAT|O_RDWR|O_SYNC failed\n");
+-        result |= ret;
+-    }
+-    ret = creat(filename, 0644);
+-    close(ret);
+-    ret = generic_open_read_write(filename, O_CREAT | O_EXCL, OPEN_MODE);
+-    if (0 != ret) {
+-        fprintf(stderr, "flag O_CREAT|O_EXCL failed\n");
+-        result |= ret;
+-    }
+-    return result;
++  ret = generic_open_read_write(filename, O_CREAT | O_RDWR | O_SYNC, OPEN_MODE);
++  if (0 != ret) {
++    fprintf(stderr, "flag O_CREAT|O_RDWR|O_SYNC failed\n");
++    result |= ret;
++  }
++  ret = creat(filename, 0644);
++  close(ret);
++  ret = generic_open_read_write(filename, O_CREAT | O_EXCL, OPEN_MODE);
++  if (0 != ret) {
++    fprintf(stderr, "flag O_CREAT|O_EXCL failed\n");
++    result |= ret;
++  }
++  return result;
+ }
+-generic_open_read_write(char *filename, int flag, mode_t mode)
+-    int fd = 0;
+-    int ret = -1;
+-    char wstring[50] = {
+-        0,
+-    };
+-    char rstring[50] = {
+-        0,
+-    };
+-    fd = open(filename, flag, mode);
+-    if (fd < 0) {
+-        if (flag == (O_CREAT | O_EXCL) && errno == EEXIST) {
+-            unlink(filename);
+-            return 0;
+-        } else {
+-            fprintf(stderr, "open failed: %s\n", strerror(errno));
+-            return -1;
+-        }
+-    }
+-    strcpy(wstring, "My string to write\n");
+-    ret = write(fd, wstring, strlen(wstring));
+-    if (ret <= 0) {
+-        if (errno != EBADF) {
+-            fprintf(stderr, "write failed: %s\n", strerror(errno));
+-            close(fd);
+-            unlink(filename);
+-            return ret;
+-        }
+-    }
+-    ret = lseek(fd, 0, SEEK_SET);
+-    if (ret < 0) {
+-        close(fd);
+-        unlink(filename);
+-        return ret;
++int generic_open_read_write(char *filename, int flag, mode_t mode) {
++  int fd = 0;
++  int ret = -1;
++  char wstring[50] = {
++      0,
++  };
++  char rstring[50] = {
++      0,
++  };
++  fd = open(filename, flag, mode);
++  if (fd < 0) {
++    if (flag == (O_CREAT | O_EXCL) && errno == EEXIST) {
++      unlink(filename);
++      return 0;
++    } else {
++      fprintf(stderr, "open failed: %s\n", strerror(errno));
++      return -1;
+     }
++  }
+-    ret = read(fd, rstring, strlen(wstring));
+-    if (ret < 0 && flag != (O_CREAT | O_WRONLY) && flag != O_WRONLY &&
+-        flag != (O_TRUNC | O_WRONLY)) {
+-        close(fd);
+-        unlink(filename);
+-        return ret;
++  strcpy(wstring, "My string to write\n");
++  ret = write(fd, wstring, strlen(wstring));
++  if (ret <= 0) {
++    if (errno != EBADF) {
++      fprintf(stderr, "write failed: %s\n", strerror(errno));
++      close(fd);
++      unlink(filename);
++      return ret;
+     }
++  }
+-    /* Compare the rstring with wstring. But we do not want to return
+-     * error when the flag is either O_RDONLY, O_CREAT|O_RDONLY or
+-     * O_TRUNC|O_RDONLY. Because in that case we are not writing
+-     * anything to the file.*/
+-    ret = memcmp(wstring, rstring, strlen(wstring));
+-    if (0 != ret && flag != (O_TRUNC | O_WRONLY) && flag != O_WRONLY &&
+-        flag != (O_CREAT | O_WRONLY) &&
+-        !(flag == (O_CREAT | O_RDONLY) || flag == O_RDONLY ||
+-          flag == (O_TRUNC | O_RDONLY))) {
+-        fprintf(stderr, "read is returning junk\n");
+-        close(fd);
+-        unlink(filename);
+-        return ret;
+-    }
++  ret = lseek(fd, 0, SEEK_SET);
++  if (ret < 0) {
++    close(fd);
++    unlink(filename);
++    return ret;
++  }
++  ret = read(fd, rstring, strlen(wstring));
++  if (ret < 0 && flag != (O_CREAT | O_WRONLY) && flag != O_WRONLY &&
++      flag != (O_TRUNC | O_WRONLY)) {
++    close(fd);
++    unlink(filename);
++    return ret;
++  }
++  /* Compare the rstring with wstring. But we do not want to return
++   * error when the flag is either O_RDONLY, O_CREAT|O_RDONLY or
++   * O_TRUNC|O_RDONLY. Because in that case we are not writing
++   * anything to the file.*/
++  ret = memcmp(wstring, rstring, strlen(wstring));
++  if (0 != ret && flag != (O_TRUNC | O_WRONLY) && flag != O_WRONLY &&
++      flag != (O_CREAT | O_WRONLY) &&
++      !(flag == (O_CREAT | O_RDONLY) || flag == O_RDONLY ||
++        flag == (O_TRUNC | O_RDONLY))) {
++    fprintf(stderr, "read is returning junk\n");
+     close(fd);
+     unlink(filename);
+-    return 0;
++    return ret;
++  }
++  close(fd);
++  unlink(filename);
++  return 0;
+ }
+diff --git a/tests/basic/posix/shared-statfs.t b/tests/basic/posix/shared-statfs.t
+index 3343956..0e4a1bb 100644
+--- a/tests/basic/posix/shared-statfs.t
++++ b/tests/basic/posix/shared-statfs.t
+@@ -20,15 +20,18 @@ TEST mkdir -p $B0/${V0}1 $B0/${V0}2
++total_brick_blocks=$(df -P $B0/${V0}1 $B0/${V0}2 | tail -2 | awk '{sum = sum+$2}END{print sum}')
++#Account for rounding error
+ # Create a subdir in mountpoint and use that for volume.
+ TEST $CLI volume create $V0 $H0:$B0/${V0}1/1 $H0:$B0/${V0}2/1;
+ TEST $CLI volume start $V0
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" online_brick_count
+ TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0
+-total_space=$(df -P $M0 | tail -1 | awk '{ print $2}')
++total_mount_blocks=$(df -P $M0 | tail -1 | awk '{ print $2}')
+ # Keeping the size less than 200M mainly because XFS will use
+ # some storage in brick to keep its own metadata.
+-TEST [ $total_space -gt 194000 -a $total_space -lt 200000 ]
++TEST [ $total_mount_blocks -gt $brick_blocks_two_percent_less -a $total_mount_blocks -lt 200000 ]
+ TEST force_umount $M0
+@@ -41,8 +44,8 @@ TEST $CLI volume add-brick $V0 $H0:$B0/${V0}1/2 $H0:$B0/${V0}2/2 $H0:$B0/${V0}1/
+ TEST $CLI volume start $V0
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "6" online_brick_count
+ TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0
+-total_space=$(df -P $M0 | tail -1 | awk '{ print $2}')
+-TEST [ $total_space -gt 194000 -a $total_space -lt 200000 ]
++total_mount_blocks=$(df -P $M0 | tail -1 | awk '{ print $2}')
++TEST [ $total_mount_blocks -gt $brick_blocks_two_percent_less -a $total_mount_blocks -lt 200000 ]
+ TEST force_umount $M0
+ TEST $CLI volume stop $V0
+diff --git a/tests/bugs/cli/bug-1320388.t b/tests/bugs/cli/bug-1320388.t
+index 8e5d77b..e719fc5 100755
+--- a/tests/bugs/cli/bug-1320388.t
++++ b/tests/bugs/cli/bug-1320388.t
+@@ -21,7 +21,7 @@ cleanup;
+ rm -f $SSL_BASE/glusterfs.*
+ touch "$GLUSTERD_WORKDIR"/secure-access
+-TEST openssl genrsa -out $SSL_KEY 3072
++TEST openssl genrsa -out $SSL_KEY 2048
+ TEST openssl req -new -x509 -key $SSL_KEY -subj /CN=Anyone -out $SSL_CERT
+diff --git a/tests/bugs/fuse/bug-985074.t b/tests/bugs/fuse/bug-985074.t
+index d10fd9f..26d196e 100644
+--- a/tests/bugs/fuse/bug-985074.t
++++ b/tests/bugs/fuse/bug-985074.t
+@@ -30,7 +30,7 @@ TEST glusterd
+ TEST $CLI volume create $V0 $H0:$B0/$V0
+ TEST $CLI volume start $V0
+-TEST $CLI volume set $V0 md-cache-timeout 3
++TEST $CLI volume set $V0 performance.stat-prefetch off
+ TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 --entry-timeout=0 --attribute-timeout=0
+ TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M1 --entry-timeout=0 --attribute-timeout=0
+@@ -40,8 +40,6 @@ TEST ln $M0/file $M0/
+ TEST ls -ali $M0 $M1
+ TEST rm -f $M1/
+ TEST ls -ali $M0 $M1
+-# expire the md-cache timeout
+-sleep 3
+ TEST mv $M0/file $M0/
+ TEST stat $M0/
+ TEST ! stat $M0/file
+diff --git a/tests/bugs/glusterd/quorum-value-check.t b/tests/bugs/glusterd/quorum-value-check.t
+deleted file mode 100755
+index aaf6362..0000000
+--- a/tests/bugs/glusterd/quorum-value-check.t
++++ /dev/null
+@@ -1,35 +0,0 @@
+-. $(dirname $0)/../../include.rc
+-. $(dirname $0)/../../volume.rc
+-function check_quorum_nfs() {
+-    local qnfs="$(less /var/lib/glusterd/nfs/nfs-server.vol | grep "quorum-count"| awk '{print $3}')"
+-    local qinfo="$($CLI volume info $V0| grep "cluster.quorum-count"| awk '{print $2}')"
+-    if [ $qnfs = $qinfo ]; then
+-        echo "Y"
+-    else
+-        echo "N"
+-    fi
+-TEST glusterd
+-TEST pidof glusterd
+-TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+-TEST $CLI volume set $V0 nfs.disable off
+-TEST $CLI volume set $V0 performance.write-behind off
+-TEST $CLI volume set $V0 cluster.self-heal-daemon off
+-TEST $CLI volume set $V0 cluster.quorum-type fixed
+-TEST $CLI volume start $V0
+-TEST $CLI volume set $V0 cluster.quorum-count 1
+-TEST $CLI volume set $V0 cluster.quorum-count 2
+-TEST $CLI volume set $V0 cluster.quorum-count 3
+diff --git a/tests/bugs/glusterfs-server/bug-887145.t b/tests/bugs/glusterfs-server/bug-887145.t
+index 82f7cca..f65b1bd 100755
+--- a/tests/bugs/glusterfs-server/bug-887145.t
++++ b/tests/bugs/glusterfs-server/bug-887145.t
+@@ -29,7 +29,15 @@ chmod 600 $M0/file;
+ TEST mount_nfs $H0:/$V0 $N0 nolock;
+-chown -R nfsnobody:nfsnobody $M0/dir;
++grep nfsnobody /etc/passwd > /dev/nul
++if [ $? -eq 1 ]; then
++chown -R $usr:$grp $M0/dir;
+ chown -R tmp_user:tmp_user $M0/other;
+ TEST $CLI volume set $V0 server.root-squash on;
+@@ -38,7 +46,7 @@ EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+ # create files and directories in the root of the glusterfs and nfs mount
+ # which is owned by root and hence the right behavior is getting EACCESS
+-# as the fops are executed as nfsnobody.
++# as the fops are executed as nfsnobody/nobody.
+ touch $M0/foo 2>/dev/null;
+ TEST [ $? -ne 0 ]
+ touch $N0/foo 2>/dev/null;
+@@ -61,7 +69,7 @@ cat $N0/passwd 1>/dev/null;
+ TEST [ $? -eq 0 ]
+ # create files and directories should succeed as the fops are being executed
+-# inside the directory owned by nfsnobody
++# inside the directory owned by nfsnobody/nobody
+ TEST touch $M0/dir/file;
+ TEST touch $N0/dir/foo;
+ TEST mkdir $M0/dir/new;
+diff --git a/tests/bugs/nfs/bug-1053579.t b/tests/bugs/nfs/bug-1053579.t
+deleted file mode 100755
+index 2f53172..0000000
+--- a/tests/bugs/nfs/bug-1053579.t
++++ /dev/null
+@@ -1,114 +0,0 @@
+-. $(dirname $0)/../../include.rc
+-. $(dirname $0)/../../volume.rc
+-. $(dirname $0)/../../nfs.rc
+-# prepare the users and groups
+-# OS-specific overrides
+-case $OSTYPE in
+-        # only NGROUPS_MAX=16 secondary groups are supported
+-        LAST_GID=1053593
+-        ;;
+-        # NGROUPS_MAX=1023 (FreeBSD>=8.0), we can afford 200 groups
+-        ;;
+-        # NGROUPS_MAX=65536, we can afford 200 groups
+-        ;;
+-        ;;
+-# create a user that belongs to many groups
+-for GID in $(seq  -f '%6.0f' ${NEW_GID} ${LAST_GID})
+-        groupadd -o -g ${GID} ${NEW_USER}-${GID}
+-        NEW_GIDS="${NEW_GIDS},${NEW_USER}-${GID}"
+-TEST useradd -o -M -u ${NEW_UID} -g ${NEW_GID} -G ${NEW_USER}-${NEW_GIDS} ${NEW_USER}
+-# preparation done, start the tests
+-TEST glusterd
+-TEST pidof glusterd
+-TEST $CLI volume create $V0 $H0:$B0/${V0}1
+-TEST $CLI volume set $V0 nfs.disable false
+-TEST $CLI volume set $V0 nfs.server-aux-gids on
+-TEST $CLI volume start $V0
+-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available
+-# mount the volume
+-TEST mount_nfs $H0:/$V0 $N0 nolock
+-TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+-# the actual test, this used to crash
+-su -m ${NEW_USER} -c "stat $N0/. > /dev/null"
+-TEST [ $? -eq 0 ]
+-# create a file that only a user in a high-group can access
+-echo 'Hello World!' > $N0/README
+-chgrp ${LAST_GID} $N0/README
+-chmod 0640 $N0/README
+-#su -m ${NEW_USER} -c "cat $N0/README 2>&1 > /dev/null"
+-su -m ${NEW_USER} -c "cat $N0/README"
+-case $OSTYPE in
+-Linux)  # Linux NFS fails with big GID
+-        if [ $ret -ne 0 ] ; then
+-            res="Y"
+-        else
+-            res="N"
+-        fi
+-        ;;
+-*)      # Other systems should cope better
+-        if [ $ret -eq 0 ] ; then
+-            res="Y"
+-        else
+-            res="N"
+-        fi
+-        ;;
+-TEST [ "x$res" = "xY" ]
+-# This passes only on, not reproducible on other machines?!
+-#su -m ${NEW_USER}  -c "cat $M0/README 2>&1 > /dev/null"
+-#TEST [ $? -ne 0 ]
+-# enable server.manage-gids and things should work
+-TEST $CLI volume set $V0 server.manage-gids on
+-su -m ${NEW_USER} -c "cat $N0/README 2>&1 > /dev/null"
+-TEST [ $? -eq 0 ]
+-su -m ${NEW_USER} -c "cat $M0/README 2>&1 > /dev/null"
+-TEST [ $? -eq 0 ]
+-# cleanup
+-userdel --force ${NEW_USER}
+-for GID in $(seq  -f '%6.0f' ${NEW_GID} ${LAST_GID})
+-        groupdel ${NEW_USER}-${GID}
+-rm -f $N0/README
+-TEST $CLI volume stop $V0
+-TEST $CLI volume delete $V0
+diff --git a/tests/bugs/nfs/bug-1116503.t b/tests/bugs/nfs/bug-1116503.t
+deleted file mode 100644
+index dd3998d..0000000
+--- a/tests/bugs/nfs/bug-1116503.t
++++ /dev/null
+@@ -1,47 +0,0 @@
+-# Verify that mounting NFS over UDP (MOUNT service only) works.
+-. $(dirname $0)/../../include.rc
+-. $(dirname $0)/../../volume.rc
+-. $(dirname $0)/../../nfs.rc
+-TEST glusterd
+-TEST pidof glusterd
+-TEST $CLI volume create $V0 $H0:$B0/$V0
+-TEST $CLI volume set $V0 nfs.disable false
+-TEST $CLI volume set $V0 nfs.mount-udp on
+-TEST $CLI volume start $V0
+-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+-TEST mount_nfs $H0:/$V0 $N0 nolock,mountproto=udp,proto=tcp;
+-TEST mkdir -p $N0/foo/bar
+-TEST ls $N0/foo
+-TEST ls $N0/foo/bar
+-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+-TEST mount_nfs $H0:/$V0/foo $N0 nolock,mountproto=udp,proto=tcp;
+-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+-TEST mount_nfs $H0:/$V0/foo/bar $N0 nolock,mountproto=udp,proto=tcp;
+-TEST $CLI volume set $V0 nfs.addr-namelookup on
+-TEST $CLI volume set $V0 nfs.rpc-auth-allow $H0
+-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+-TEST mount_nfs $H0:/$V0/foo/bar $N0 nolock,mountproto=udp,proto=tcp;
+-TEST $CLI volume set $V0 nfs.rpc-auth-reject $H0
+-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+-TEST ! mount_nfs $H0:/$V0/foo/bar $N0 nolock,mountproto=udp,proto=tcp;
+diff --git a/tests/bugs/nfs/bug-1143880-fix-gNFSd-auth-crash.t b/tests/bugs/nfs/bug-1143880-fix-gNFSd-auth-crash.t
+deleted file mode 100644
+index c360db4..0000000
+--- a/tests/bugs/nfs/bug-1143880-fix-gNFSd-auth-crash.t
++++ /dev/null
+@@ -1,24 +0,0 @@
+-. $(dirname $0)/../../include.rc
+-. $(dirname $0)/../../nfs.rc
+-TEST glusterd
+-TEST pidof glusterd
+-TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2}
+-TEST $CLI volume set $V0 nfs.disable false
+-TEST $CLI volume set $V0 off
+-TEST $CLI volume start $V0
+-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+-TEST mount_nfs $H0:/$V0 $N0 nolock
+-TEST mkdir -p $N0/foo
+-TEST mount_nfs $H0:/$V0/foo $N0 nolock
+diff --git a/tests/bugs/nfs/bug-1157223-symlink-mounting.t b/tests/bugs/nfs/bug-1157223-symlink-mounting.t
+deleted file mode 100644
+index dea609e..0000000
+--- a/tests/bugs/nfs/bug-1157223-symlink-mounting.t
++++ /dev/null
+@@ -1,126 +0,0 @@
+-. $(dirname $0)/../../include.rc
+-. $(dirname $0)/../../nfs.rc
+-## Start and create a volume
+-TEST glusterd
+-TEST pidof glusterd
+-TEST $CLI volume info;
+-TEST $CLI volume create $V0  $H0:$B0/$V0
+-TEST $CLI volume set $V0 nfs.disable false
+-TEST $CLI volume start $V0;
+-## Wait for volume to register with rpc.mountd
+-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+-## Mount NFS
+-TEST mount_nfs $H0:/$V0 $N0 nolock;
+-mkdir $N0/dir1;
+-mkdir $N0/dir2;
+-pushd $N0/ ;
+-##link created using relative path
+-ln -s dir1 symlink1;
+-##relative path contains ".."
+-ln -s ../dir1 dir2/symlink2;
+-##link created using absolute path
+-ln -s $N0/dir1 symlink3;
+-##link pointing to another symlinks
+-ln -s symlink1 symlink4
+-ln -s symlink3 symlink5
+-##dead links
+-ln -s does/not/exist symlink6
+-##link which contains ".." points out of glusterfs
+-ln -s ../../ symlink7
+-##links pointing to unauthorized area
+-ln -s .glusterfs symlink8
+-popd ;
+-##Umount the volume
+-## Mount and umount NFS via directory
+-TEST mount_nfs $H0:/$V0/dir1 $N0 nolock;
+-## Mount and umount NFS via symlink1
+-TEST mount_nfs $H0:/$V0/symlink1 $N0 nolock;
+-## Mount and umount NFS via symlink2
+-TEST  mount_nfs $H0:/$V0/dir2/symlink2 $N0 nolock;
+-## Mount NFS via symlink3 should fail
+-TEST ! mount_nfs $H0:/$V0/symlink3 $N0 nolock;
+-## Mount and umount NFS via symlink4
+-TEST  mount_nfs $H0:/$V0/symlink4 $N0 nolock;
+-## Mount NFS via symlink5 should fail
+-TEST ! mount_nfs $H0:/$V0/symlink5 $N0 nolock;
+-## Mount NFS via symlink6 should fail
+-TEST ! mount_nfs $H0:/$V0/symlink6 $N0 nolock;
+-## Mount NFS via symlink7 should fail
+-TEST ! mount_nfs $H0:/$V0/symlink7 $N0 nolock;
+-## Mount NFS via symlink8 should fail
+-TEST ! mount_nfs $H0:/$V0/symlink8 $N0 nolock;
+-##Similar check for udp mount
+-$CLI volume stop $V0
+-TEST $CLI volume set $V0 nfs.mount-udp on
+-$CLI volume start $V0
+-## Wait for volume to register with rpc.mountd
+-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+-## Mount and umount NFS via directory
+-TEST mount_nfs $H0:/$V0/dir1 $N0 nolock,mountproto=udp,proto=tcp;
+-## Mount and umount NFS via symlink1
+-TEST mount_nfs $H0:/$V0/symlink1 $N0 nolock,mountproto=udp,proto=tcp;
+-## Mount and umount NFS via symlink2
+-TEST  mount_nfs $H0:/$V0/dir2/symlink2 $N0 nolock,mountproto=udp,proto=tcp;
+-## Mount NFS via symlink3 should fail
+-TEST ! mount_nfs $H0:/$V0/symlink3 $N0 nolock,mountproto=udp,proto=tcp;
+-## Mount and umount NFS via symlink4
+-TEST  mount_nfs $H0:/$V0/symlink4 $N0 nolock,mountproto=udp,proto=tcp;
+-## Mount NFS via symlink5 should fail
+-TEST ! mount_nfs $H0:/$V0/symlink5 $N0 nolock,mountproto=udp,proto=tcp;
+-## Mount NFS via symlink6 should fail
+-TEST ! mount_nfs $H0:/$V0/symlink6 $N0 nolock,mountproto=udp,proto=tcp;
+-##symlink7 is not check here, because in udp mount ../../ resolves into root '/'
+-## Mount NFS via symlink8 should fail
+-TEST ! mount_nfs $H0:/$V0/symlink8 $N0 nolock,mountproto=udp,proto=tcp;
+-rm -rf $H0:$B0/
+diff --git a/tests/bugs/nfs/bug-1161092-nfs-acls.t b/tests/bugs/nfs/bug-1161092-nfs-acls.t
+deleted file mode 100644
+index 45a22e7..0000000
+--- a/tests/bugs/nfs/bug-1161092-nfs-acls.t
++++ /dev/null
+@@ -1,39 +0,0 @@
+-. $(dirname $0)/../../include.rc
+-. $(dirname $0)/../../nfs.rc
+-TEST glusterd
+-TEST pidof glusterd
+-TEST $CLI volume info
+-TEST $CLI volume create $V0 $H0:$B0/brick1;
+-EXPECT 'Created' volinfo_field $V0 'Status';
+-TEST $CLI volume set $V0 nfs.disable false
+-TEST $CLI volume start $V0;
+-EXPECT 'Started' volinfo_field $V0 'Status';
+-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+-TEST mount_nfs $H0:/$V0 $N0
+-TEST touch $N0/file1
+-TEST chmod 700 $N0/file1
+-TEST getfacl $N0/file1
+-TEST $CLI volume set $V0 root-squash on
+-TEST getfacl $N0/file1
+-TEST umount_nfs $H0:/$V0 $N0
+-TEST mount_nfs $H0:/$V0 $N0
+-TEST getfacl $N0/file1
+-## Before killing daemon to avoid deadlocks
+-umount_nfs $N0
+diff --git a/tests/bugs/nfs/bug-1166862.t b/tests/bugs/nfs/bug-1166862.t
+deleted file mode 100755
+index c4f51a2..0000000
+--- a/tests/bugs/nfs/bug-1166862.t
++++ /dev/null
+@@ -1,69 +0,0 @@
+-# When nfs.mount-rmtab is disabled, it should not get updated.
+-# Based on: bug-904065.t
+-# count the lines of a file, return 0 if the file does not exist
+-function count_lines()
+-        if [ -n "$1" ]
+-        then
+-                $@ 2>/dev/null | wc -l
+-        else
+-                echo 0
+-        fi
+-. $(dirname $0)/../../include.rc
+-. $(dirname $0)/../../nfs.rc
+-. $(dirname $0)/../../volume.rc
+-TEST glusterd
+-TEST pidof glusterd
+-TEST $CLI volume create $V0 $H0:$B0/brick1
+-EXPECT 'Created' volinfo_field $V0 'Status'
+-TEST $CLI volume set $V0 nfs.disable false
+-TEST $CLI volume start $V0;
+-EXPECT 'Started' volinfo_field $V0 'Status'
+-# glusterfs/nfs needs some time to start up in the background
+-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+-# disable the rmtab by settting it to the magic "/-" value
+-TEST $CLI volume set $V0 nfs.mount-rmtab /-
+-# before mounting the rmtab should be empty
+-EXPECT '0' count_lines cat $GLUSTERD_WORKDIR/nfs/rmtab
+-TEST mount_nfs $H0:/$V0 $N0 nolock
+-EXPECT '0' count_lines cat $GLUSTERD_WORKDIR/nfs/rmtab
+-# showmount should list one client
+-EXPECT '1' count_lines showmount --no-headers $H0
+-# unmount
+-# after resetting the option, the rmtab should get updated again
+-TEST $CLI volume reset $V0 nfs.mount-rmtab
+-# before mounting the rmtab should be empty
+-EXPECT '0' count_lines cat $GLUSTERD_WORKDIR/nfs/rmtab
+-TEST mount_nfs $H0:/$V0 $N0 nolock
+-EXPECT '2' count_lines cat $GLUSTERD_WORKDIR/nfs/rmtab
+-# removing a mount
+-EXPECT '0' count_lines cat $GLUSTERD_WORKDIR/nfs/rmtab
+diff --git a/tests/bugs/nfs/bug-1210338.c b/tests/bugs/nfs/bug-1210338.c
+deleted file mode 100644
+index d409924..0000000
+--- a/tests/bugs/nfs/bug-1210338.c
++++ /dev/null
+@@ -1,31 +0,0 @@
+-#include <stdio.h>
+-#include <stdlib.h>
+-#include <unistd.h>
+-#include <string.h>
+-#include <errno.h>
+-#include <sys/types.h>
+-#include <fcntl.h>
+-#include <sys/stat.h>
+-main(int argc, char *argv[])
+-    int ret = -1;
+-    int fd = -1;
+-    fd = open(argv[1], O_CREAT | O_EXCL, 0644);
+-    if (fd == -1) {
+-        fprintf(stderr, "creation of the file %s failed (%s)\n", argv[1],
+-                strerror(errno));
+-        goto out;
+-    }
+-    ret = 0;
+-    if (fd > 0)
+-        close(fd);
+-    return ret;
+diff --git a/tests/bugs/nfs/bug-1210338.t b/tests/bugs/nfs/bug-1210338.t
+deleted file mode 100644
+index b5c9245..0000000
+--- a/tests/bugs/nfs/bug-1210338.t
++++ /dev/null
+@@ -1,30 +0,0 @@
+-. $(dirname $0)/../../include.rc
+-. $(dirname $0)/../../volume.rc
+-. $(dirname $0)/../../nfs.rc
+-NFS_SOURCE=$(dirname $0)/bug-1210338.c
+-NFS_EXEC=$(dirname $0)/excl_create
+-TEST glusterd
+-TEST pidof glusterd
+-TEST $CLI volume create $V0 $H0:$B0/$V0
+-TEST $CLI volume set $V0 nfs.disable false
+-TEST $CLI volume start $V0
+-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+-TEST mount_nfs $H0:/$V0 $N0 nolock
+-build_tester $NFS_SOURCE -o $NFS_EXEC
+-TEST [ -e $NFS_EXEC ]
+-TEST $NFS_EXEC $N0/my_file
+-rm -f $NFS_EXEC;
+diff --git a/tests/bugs/nfs/bug-1302948.t b/tests/bugs/nfs/bug-1302948.t
+deleted file mode 100755
+index a2fb0e6..0000000
+--- a/tests/bugs/nfs/bug-1302948.t
++++ /dev/null
+@@ -1,13 +0,0 @@
+-# TEST the nfs.rdirplus option
+-. $(dirname $0)/../../include.rc
+-TEST glusterd
+-TEST pidof glusterd
+-TEST $CLI volume create $V0 $H0:$B0/$V0
+-TEST $CLI volume start $V0
+-TEST $CLI volume set $V0 nfs.rdirplus off
+-TEST $CLI volume set $V0 nfs.rdirplus on
+diff --git a/tests/bugs/nfs/bug-847622.t b/tests/bugs/nfs/bug-847622.t
+deleted file mode 100755
+index 5ccee72..0000000
+--- a/tests/bugs/nfs/bug-847622.t
++++ /dev/null
+@@ -1,39 +0,0 @@
+-. $(dirname $0)/../../include.rc
+-. $(dirname $0)/../../nfs.rc
+-. $(dirname $0)/../../volume.rc
+-case $OSTYPE in
+-        echo "Skip test on ACL which are not available on NetBSD" >&2
+-        SKIP_TESTS
+-        exit 0
+-        ;;
+-        ;;
+-TEST glusterd
+-TEST pidof glusterd
+-TEST $CLI volume create $V0 $H0:$B0/brick0
+-TEST $CLI volume set $V0 nfs.disable false
+-TEST $CLI volume start $V0
+-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+-TEST mount_nfs $H0:/$V0 $N0 nolock
+-cd $N0
+-# simple getfacl setfacl commands
+-TEST touch testfile
+-TEST setfacl -m u:14:r testfile
+-TEST getfacl testfile
+diff --git a/tests/bugs/nfs/bug-877885.t b/tests/bugs/nfs/bug-877885.t
+deleted file mode 100755
+index dca315a..0000000
+--- a/tests/bugs/nfs/bug-877885.t
++++ /dev/null
+@@ -1,39 +0,0 @@
+-. $(dirname $0)/../../include.rc
+-. $(dirname $0)/../../nfs.rc
+-. $(dirname $0)/../../volume.rc
+-TEST glusterd
+-TEST pidof glusterd
+-TEST $CLI volume create $V0 replica 2 $H0:$B0/brick0 $H0:$B0/brick1
+-TEST $CLI volume set $V0 nfs.disable false
+-TEST $CLI volume start $V0
+-## Mount FUSE with caching disabled
+-TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 \
+-TEST touch $M0/file
+-TEST mkdir $M0/dir
+-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+-TEST mount_nfs $H0:/$V0 $N0 nolock
+-cd $N0
+-rm -rf * &
+-TEST mount_nfs $H0:/$V0 $N1 retry=0,nolock;
+-kill %1;
+diff --git a/tests/bugs/nfs/bug-904065.t b/tests/bugs/nfs/bug-904065.t
+deleted file mode 100755
+index 0eba86e..0000000
+--- a/tests/bugs/nfs/bug-904065.t
++++ /dev/null
+@@ -1,100 +0,0 @@
+-# This test does not use 'showmount' from the nfs-utils package, it would
+-# require setting up a portmapper (either rpcbind or portmap, depending on the
+-# Linux distribution used for testing). The persistancy of the rmtab should not
+-# affect the current showmount outputs, so existing regression tests should be
+-# sufficient.
+-# count the lines of a file, return 0 if the file does not exist
+-function count_lines()
+-        if [ -e "$1" ]
+-        then
+-                wc -l < $1
+-        else
+-                echo 0
+-        fi
+-. $(dirname $0)/../../include.rc
+-. $(dirname $0)/../../nfs.rc
+-. $(dirname $0)/../../volume.rc
+-TEST glusterd
+-TEST pidof glusterd
+-TEST $CLI volume create $V0 $H0:$B0/brick1
+-EXPECT 'Created' volinfo_field $V0 'Status'
+-TEST $CLI volume set $V0 nfs.disable false
+-TEST $CLI volume start $V0;
+-EXPECT 'Started' volinfo_field $V0 'Status'
+-# glusterfs/nfs needs some time to start up in the background
+-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+-# before mounting the rmtab should be empty
+-EXPECT '0' count_lines $GLUSTERD_WORKDIR/nfs/rmtab
+-TEST mount_nfs $H0:/$V0 $N0 nolock
+-# the output would looks similar to:
+-#   hostname-0=
+-#   mountpoint-0=/ufo
+-EXPECT '2' count_lines $GLUSTERD_WORKDIR/nfs/rmtab
+-# duplicate mounts should not be recorded (client could have crashed)
+-TEST mount_nfs $H0:/$V0 $N1 nolock
+-EXPECT '2' count_lines $GLUSTERD_WORKDIR/nfs/rmtab
+-# removing a mount should (even if there are two) should remove the entry
+-EXPECT '0' count_lines $GLUSTERD_WORKDIR/nfs/rmtab
+-# unmounting the other mount should work flawlessly
+-EXPECT '0' count_lines $GLUSTERD_WORKDIR/nfs/rmtab
+-TEST glusterfs --entry-timeout=0 --attribute-timeout=0 --volfile-server=$H0 --volfile-id=$V0 $M0
+-# we'll create a fake rmtab here, similar to how an other storage server would do
+-# using an invalid IP address to prevent (unlikely) collisions on the test-machine
+-cat << EOF > $M0/rmtab
+-EXPECT '2' count_lines $M0/rmtab
+-# reconfigure merges the rmtab with the one on the volume
+-TEST gluster volume set $V0 nfs.mount-rmtab $M0/rmtab
+-# glusterfs/nfs needs some time to restart
+-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+-# Apparently "is_nfs_export_available" might return even if the export is
+-# not, in fact, available.  (eyeroll)  Give it a bit of extra time.
+-# TBD: fix the broken shell function instead of working around it here
+-sleep 5
+-# a new mount should be added to the rmtab, not overwrite exiting ones
+-TEST mount_nfs $H0:/$V0 $N0 nolock
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT '4' count_lines $M0/rmtab
+-EXPECT '2' count_lines $M0/rmtab
+-# TODO: nfs/reconfigure() is never called and is therefor disabled. When the
+-# NFS-server supports reloading and does not get restarted anymore, we should
+-# add a test that includes the merging of entries in the old rmtab with the new
+-# rmtab.
+diff --git a/tests/bugs/nfs/bug-915280.t b/tests/bugs/nfs/bug-915280.t
+deleted file mode 100755
+index bd27915..0000000
+--- a/tests/bugs/nfs/bug-915280.t
++++ /dev/null
+@@ -1,54 +0,0 @@
+-. $(dirname $0)/../../include.rc
+-. $(dirname $0)/../../volume.rc
+-. $(dirname $0)/../../nfs.rc
+-TEST glusterd
+-TEST pidof glusterd
+-function volinfo_field()
+-    local vol=$1;
+-    local field=$2;
+-    $CLI volume info $vol | grep "^$field: " | sed 's/.*: //';
+-TEST $CLI volume create $V0 $H0:$B0/brick1 $H0:$B0/brick2;
+-EXPECT 'Created' volinfo_field $V0 'Status';
+-TEST $CLI volume set $V0 nfs.disable false
+-TEST $CLI volume start $V0;
+-EXPECT 'Started' volinfo_field $V0 'Status';
+-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+-TEST mount_nfs $H0:/$V0 $N0 nolock,timeo=30,retrans=1
+-TEST touch $N0/testfile
+-TEST $CLI volume set $V0 debug.error-gen client
+-TEST $CLI volume set $V0 debug.error-fops stat
+-TEST $CLI volume set $V0 debug.error-failure 100
+-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+-getfacl $N0/testfile 2>/dev/null
+-if [ ! $nfs_pid ]
+-  nfs_pid=0;
+-TEST [ $nfs_pid -eq $pid_file ]
+diff --git a/tests/bugs/nfs/bug-970070.t b/tests/bugs/nfs/bug-970070.t
+deleted file mode 100755
+index 61be484..0000000
+--- a/tests/bugs/nfs/bug-970070.t
++++ /dev/null
+@@ -1,13 +0,0 @@
+-# TEST the nfs.acl option
+-. $(dirname $0)/../../include.rc
+-TEST glusterd
+-TEST pidof glusterd
+-TEST $CLI volume create $V0 $H0:$B0/$V0
+-TEST $CLI volume start $V0
+-TEST $CLI volume set $V0 nfs.acl off
+-TEST $CLI volume set $V0 nfs.acl on
+diff --git a/tests/bugs/nfs/bug-974972.t b/tests/bugs/nfs/bug-974972.t
+deleted file mode 100755
+index 975c46f..0000000
+--- a/tests/bugs/nfs/bug-974972.t
++++ /dev/null
+@@ -1,41 +0,0 @@
+-. $(dirname $0)/../../include.rc
+-. $(dirname $0)/../../volume.rc
+-. $(dirname $0)/../../nfs.rc
+-#This script checks that nfs mount does not fail lookup on files with split-brain
+-TEST glusterd
+-TEST pidof glusterd
+-TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+-TEST $CLI volume set $V0 self-heal-daemon off
+-TEST $CLI volume set $V0 cluster.eager-lock off
+-TEST $CLI volume set $V0 nfs.disable false
+-TEST $CLI volume start $V0
+-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+-TEST mount_nfs $H0:/$V0 $N0
+-TEST touch $N0/1
+-TEST kill_brick ${V0} ${H0} ${B0}/${V0}1
+-echo abc > $N0/1
+-TEST $CLI volume start $V0 force
+-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_nfs $V0 0
+-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_nfs $V0 1
+-TEST kill_brick ${V0} ${H0} ${B0}/${V0}0
+-echo def > $N0/1
+-TEST $CLI volume start $V0 force
+-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_nfs $V0 0
+-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_nfs $V0 1
+-#Lookup should not fail
+-TEST ls $N0/1
+-TEST ! cat $N0/1
+diff --git a/tests/bugs/nfs/showmount-many-clients.t b/tests/bugs/nfs/showmount-many-clients.t
+deleted file mode 100644
+index f1b6859..0000000
+--- a/tests/bugs/nfs/showmount-many-clients.t
++++ /dev/null
+@@ -1,41 +0,0 @@
+-# The nfs.rpc-auth-allow volume option is used to generate the list of clients
+-# that are displayed as able to mount the export. The "group" in the export
+-# should be a list of all clients, identified by "name". In previous versions,
+-# the "name" was the copied string from nfs.rpc-auth-allow. This is not
+-# correct, as the volume option should be parsed and split into different
+-# groups.
+-# When the single string is passed, this testcase fails when the
+-# nfs.rpc-auth-allow volume option is longer than 256 characters. By splitting
+-# the groups into their own structures, this testcase passes.
+-. $(dirname $0)/../../include.rc
+-. $(dirname $0)/../../nfs.rc
+-. $(dirname $0)/../../volume.rc
+-TEST glusterd
+-TEST pidof glusterd
+-TEST $CLI volume create $V0 $H0:$B0/brick1
+-EXPECT 'Created' volinfo_field $V0 'Status'
+-TEST $CLI volume set $V0 nfs.disable false
+-CLIENTS=$(echo 127.0.0.{1..128} | tr ' ' ,)
+-TEST $CLI volume set $V0 nfs.rpc-auth-allow ${CLIENTS}
+-TEST $CLI volume set $V0 nfs.rpc-auth-reject all
+-TEST $CLI volume start $V0;
+-EXPECT 'Started' volinfo_field $V0 'Status'
+-# glusterfs/nfs needs some time to start up in the background
+-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+-# showmount should not timeout (no reply is sent on error)
+-TEST showmount -e $H0
+diff --git a/tests/bugs/nfs/ b/tests/bugs/nfs/
+deleted file mode 100755
+index eb507e1..0000000
+--- a/tests/bugs/nfs/
++++ /dev/null
+@@ -1,33 +0,0 @@
+-# Create a unix domain socket and test if it is a socket (and not a fifo/pipe).
+-# Author: Niels de Vos <>
+-from __future__ import print_function
+-import os
+-import stat
+-import sys
+-import socket
+-ret = 1
+-if len(sys.argv) != 2:
+-        print('Usage: %s <socket>' % (sys.argv[0]))
+-        sys.exit(ret)
+-path = sys.argv[1]
+-sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+-stbuf = os.stat(path)
+-mode = stbuf.st_mode
+-if stat.S_ISSOCK(mode):
+-        ret = 0
+diff --git a/tests/bugs/nfs/socket-as-fifo.t b/tests/bugs/nfs/socket-as-fifo.t
+deleted file mode 100644
+index d9b9e95..0000000
+--- a/tests/bugs/nfs/socket-as-fifo.t
++++ /dev/null
+@@ -1,25 +0,0 @@
+-. $(dirname $0)/../../include.rc
+-. $(dirname $0)/../../volume.rc
+-. $(dirname $0)/../../nfs.rc
+-TEST glusterd
+-TEST pidof glusterd
+-TEST $CLI volume create $V0 $H0:$B0/$V0
+-TEST $CLI volume set $V0 nfs.disable false
+-TEST $CLI volume start $V0
+-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+-TEST mount_nfs $H0:/$V0 $N0 nolock
+-# this is the actual test
+-TEST $PYTHON $(dirname $0)/ $N0/not-a-fifo.socket
+-TEST umount_nfs $N0
+diff --git a/tests/bugs/nfs/subdir-trailing-slash.t b/tests/bugs/nfs/subdir-trailing-slash.t
+deleted file mode 100644
+index 6a11487..0000000
+--- a/tests/bugs/nfs/subdir-trailing-slash.t
++++ /dev/null
+@@ -1,32 +0,0 @@
+-# Verify that mounting a subdir over NFS works, even with a trailing /
+-# For example:
+-#    mount -t nfs
+-. $(dirname $0)/../../include.rc
+-. $(dirname $0)/../../volume.rc
+-. $(dirname $0)/../../nfs.rc
+-TEST glusterd
+-TEST pidof glusterd
+-TEST $CLI volume create $V0 $H0:$B0/$V0
+-TEST $CLI volume set $V0 nfs.disable false
+-TEST $CLI volume start $V0
+-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available
+-TEST mount_nfs $H0:/$V0 $N0 nolock
+-TEST mkdir -p $N0/subdir
+-TEST mount_nfs $H0:/$V0/subdir/ $N0 nolock
+diff --git a/tests/bugs/nfs/zero-atime.t b/tests/bugs/nfs/zero-atime.t
+deleted file mode 100755
+index 2a94009..0000000
+--- a/tests/bugs/nfs/zero-atime.t
++++ /dev/null
+@@ -1,33 +0,0 @@
+-# posix_do_utimes() sets atime and mtime to the values in the passed IATT. If
+-# not set, these values are 0 and cause a atime/mtime set to the Epoch.
+-. $(dirname $0)/../../include.rc
+-. $(dirname $0)/../../volume.rc
+-. $(dirname $0)/../../nfs.rc
+-TEST glusterd
+-TEST pidof glusterd
+-TEST $CLI volume create $V0 $H0:$B0/$V0
+-TEST $CLI volume set $V0 nfs.disable false
+-TEST $CLI volume start $V0
+-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+-TEST mount_nfs $H0:/$V0 $N0 nolock
+-# create a file for testing
+-TEST dd if=/dev/urandom of=$M0/small count=1 bs=1024k
+-# timezone in UTC results in atime=0 if not set correctly
+-TEST TZ=UTC dd if=/dev/urandom of=$M0/small bs=64k count=1 conv=nocreat
+-TEST [ "$(stat --format=%X $M0/small)" != "0" ]
+-TEST rm $M0/small
+diff --git a/tests/bugs/rpc/bug-954057.t b/tests/bugs/rpc/bug-954057.t
+index 65af274..9ad0ab2 100755
+--- a/tests/bugs/rpc/bug-954057.t
++++ b/tests/bugs/rpc/bug-954057.t
+@@ -25,7 +25,15 @@ TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+ TEST mkdir $M0/dir
+ TEST mkdir $M0/nobody
+-TEST chown nfsnobody:nfsnobody $M0/nobody
++grep nfsnobody /etc/passwd > /dev/nul
++if [ $? -eq 1 ]; then
++TEST chown $usr:$grp $M0/nobody
+ TEST `echo "file" >> $M0/file`
+ TEST cp $M0/file $M0/new
+ TEST chmod 700 $M0/new
+diff --git a/tests/bugs/shard/bug-1272986.t b/tests/bugs/shard/bug-1272986.t
+index 7628870..66e896a 100644
+--- a/tests/bugs/shard/bug-1272986.t
++++ b/tests/bugs/shard/bug-1272986.t
+@@ -16,16 +16,16 @@ TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0
+ TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M1
+ # Write some data into a file, such that its size crosses the shard block size.
+-TEST dd if=/dev/zero of=$M1/file bs=1M count=5 conv=notrunc
++TEST dd if=/dev/urandom of=$M1/file bs=1M count=5 conv=notrunc oflag=direct
+ md5sum1_reader=$(md5sum $M0/file | awk '{print $1}')
+ EXPECT "$md5sum1_reader" echo `md5sum $M1/file | awk '{print $1}'`
+ # Append some more data into the file.
+-TEST `echo "abcdefg" >> $M1/file`
++TEST dd if=/dev/urandom of=$M1/file bs=256k count=1 conv=notrunc oflag=direct
+-md5sum2_reader=$(md5sum $M0/file | awk '{print $1}')
++md5sum2_reader=$(dd if=$M0/file iflag=direct bs=256k| md5sum | awk '{print $1}')
+ # Test to see if the reader refreshes its cache correctly as part of the reads
+ # triggered through md5sum. If it does, then the md5sum on the reader and writer
+diff --git a/tests/bugs/transport/bug-873367.t b/tests/bugs/transport/bug-873367.t
+index d4c0702..8070bc1 100755
+--- a/tests/bugs/transport/bug-873367.t
++++ b/tests/bugs/transport/bug-873367.t
+@@ -13,7 +13,7 @@ rm -f $SSL_BASE/glusterfs.*
+ mkdir -p $B0/1
+ mkdir -p $M0
+-TEST openssl genrsa -out $SSL_KEY 1024
++TEST openssl genrsa -out $SSL_KEY 2048
+ TEST openssl req -new -x509 -key $SSL_KEY -subj /CN=Anyone -out $SSL_CERT
+diff --git a/tests/features/ssl-authz.t b/tests/features/ssl-authz.t
+index 3cb45b5..cae010c 100755
+--- a/tests/features/ssl-authz.t
++++ b/tests/features/ssl-authz.t
+@@ -41,7 +41,7 @@ function valid_ciphers {
+ 		-e '/:$/s///'
+ }
+-TEST openssl genrsa -out $SSL_KEY 1024
++TEST openssl genrsa -out $SSL_KEY 2048
+ TEST openssl req -new -x509 -key $SSL_KEY -subj /CN=Anyone -out $SSL_CERT
+diff --git a/tests/features/ssl-ciphers.t b/tests/features/ssl-ciphers.t
+index 7e1e199..e4bcdf5 100644
+--- a/tests/features/ssl-ciphers.t
++++ b/tests/features/ssl-ciphers.t
+@@ -33,18 +33,26 @@ wait_mount() {
+ openssl_connect() {
+ 	ssl_opt="-verify 3 -verify_return_error -CAfile $SSL_CA"
+ 	ssl_opt="$ssl_opt -crl_check_all -CApath $TMPDIR"
+-	#echo openssl s_client $ssl_opt $@ > /dev/tty
+-	#read -p "Continue? " nothing
+-	CIPHER=`echo "" |
+-                openssl s_client $ssl_opt $@ 2>/dev/null |
+-		awk '/^    Cipher/{print $3}'`
+-	if [ "x${CIPHER}" = "x" -o "x${CIPHER}" = "x0000" ] ; then
++        cmd="echo "" | openssl s_client $ssl_opt $@ 2>/dev/null"
++        CIPHER=$(eval $cmd | awk -F "Cipher is" '{print $2}' | tr -d '[:space:]' | awk -F " " '{print $1}')
++	if [ "x${CIPHER}" = "x" -o "x${CIPHER}" = "x0000" -o "x${CIPHER}" = "x(NONE)" ] ; then
+ 		echo "N"
+ 	else
+ 		echo "Y"
+ 	fi
+ }
++#Validate the cipher to pass EXPECT test case before call openssl_connect
++check_cipher() {
++       cmd="echo "" | openssl s_client $@ 2> /dev/null"
++       cipher=$(eval $cmd |awk -F "Cipher is" '{print $2}' | tr -d '[:space:]' | awk -F " " '{print $1}')
++       if [ "x${cipher}" = "x" -o "x${cipher}" = "x0000" -o "x${cipher}" = "x(NONE)" ] ; then
++                echo "N"
++        else
++                echo "Y"
++       fi
+ cleanup;
+ mkdir -p $B0
+ mkdir -p $M0
+@@ -65,7 +73,7 @@ TEST glusterd
+ TEST pidof glusterd
+ TEST $CLI volume info;
+-TEST openssl genrsa -out $SSL_KEY 1024 2>/dev/null
++TEST openssl genrsa -out $SSL_KEY 2048 2>/dev/null
+ TEST openssl req -config $SSL_CFG -new -key $SSL_KEY -x509 \
+                   -subj /CN=CA -out $SSL_CA
+ TEST openssl req -config $SSL_CFG -new -key $SSL_KEY \
+@@ -106,28 +114,36 @@ EXPECT "N" openssl_connect -ssl3 -connect $H0:$BRICK_PORT
+ EXPECT "N" openssl_connect -tls1 -connect $H0:$BRICK_PORT
+ # Test a HIGH CBC cipher
+-EXPECT "Y" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT
++cph=`check_cipher -cipher AES256-SHA -connect $H0:$BRICK_PORT`
++EXPECT "$cph" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT
+ # Test EECDH
+-EXPECT "Y" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT
++cph=`check_cipher -cipher EECDH -connect $H0:$BRICK_PORT`
++EXPECT "$cph" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT
+ # test MD5 fails
+-EXPECT "N" openssl_connect -cipher DES-CBC3-MD5 -connect $H0:$BRICK_PORT
++cph=`check_cipher -cipher DES-CBC3-MD5 -connect $H0:$BRICK_PORT`
++EXPECT "$cph" openssl_connect -cipher DES-CBC3-MD5 -connect $H0:$BRICK_PORT
+ # test RC4 fails
+-EXPECT "N" openssl_connect -cipher RC4-SHA -connect $H0:$BRICK_PORT
++cph=`check_cipher -cipher RC4-SHA -connect $H0:$BRICK_PORT`
++EXPECT "$cph" openssl_connect -cipher RC4-SHA -connect $H0:$BRICK_PORT
+ # test eNULL fails
+-EXPECT "N" openssl_connect -cipher NULL-SHA256 -connect $H0:$BRICK_PORT
++cph=`check_cipher -cipher NULL-SHA256 -connect $H0:$BRICK_PORT`
++EXPECT "$cph" openssl_connect -cipher NULL-SHA256 -connect $H0:$BRICK_PORT
+ # test SHA2
+-EXPECT "Y" openssl_connect -cipher AES256-SHA256 -connect $H0:$BRICK_PORT
++cph=`check_cipher -cipher AES256-SHA256 -connect $H0:$BRICK_PORT`
++EXPECT "$cph" openssl_connect -cipher AES256-SHA256 -connect $H0:$BRICK_PORT
+ # test GCM
+-EXPECT "Y" openssl_connect -cipher AES256-GCM-SHA384 -connect $H0:$BRICK_PORT
++cph=`check_cipher -cipher AES256-GCM-SHA384 -connect $H0:$BRICK_PORT`
++EXPECT "$cph" openssl_connect -cipher AES256-GCM-SHA384 -connect $H0:$BRICK_PORT
+ # Test DH fails without DH params
+-EXPECT "N" openssl_connect -cipher EDH -connect $H0:$BRICK_PORT
++cph=`check_cipher -cipher EDH -connect $H0:$BRICK_PORT`
++EXPECT "$cph" openssl_connect -cipher EDH -connect $H0:$BRICK_PORT
+ # Test DH with DH params
+ TEST $CLI volume set $V0 ssl.dh-param `pwd`/`dirname $0`/dh1024.pem
+@@ -145,8 +161,10 @@ TEST $CLI volume stop $V0
+ TEST $CLI volume start $V0
+ EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
+ BRICK_PORT=`brick_port $V0`
+-EXPECT "Y" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT
+-EXPECT "N" openssl_connect -cipher AES128-SHA -connect $H0:$BRICK_PORT
++cph=`check_cipher -cipher AES256-SHA -connect $H0:$BRICK_PORT`
++EXPECT "$cph" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT
++cph=`check_cipher -cipher AES128-SHA -connect $H0:$BRICK_PORT`
++EXPECT "$cph" openssl_connect -cipher AES128-SHA -connect $H0:$BRICK_PORT
+ # Test the ec-curve option
+ TEST $CLI volume set $V0 ssl.cipher-list EECDH:EDH:!TLSv1
+@@ -155,8 +173,10 @@ TEST $CLI volume stop $V0
+ TEST $CLI volume start $V0
+ EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
+ BRICK_PORT=`brick_port $V0`
+-EXPECT "N" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT
+-EXPECT "Y" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT
++cph=`check_cipher -cipher AES256-SHA -connect $H0:$BRICK_PORT`
++EXPECT "$cph" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT
++cph=`check_cipher -cipher EECDH -connect $H0:$BRICK_PORT`
++EXPECT "$cph" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT
+ TEST $CLI volume set $V0 invalid
+ EXPECT invalid volume_option $V0
+@@ -164,7 +184,8 @@ TEST $CLI volume stop $V0
+ TEST $CLI volume start $V0
+ EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
+ BRICK_PORT=`brick_port $V0`
+-EXPECT "N" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT
++cph=`check_cipher -cipher EECDH -connect $H0:$BRICK_PORT`
++EXPECT "$cph" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT
+ TEST $CLI volume set $V0 secp521r1
+ EXPECT secp521r1 volume_option $V0
+diff --git a/tests/ssl.rc b/tests/ssl.rc
+index 127f83f..b1ccc4c 100644
+--- a/tests/ssl.rc
++++ b/tests/ssl.rc
+@@ -20,7 +20,7 @@ SSL_CA=$SSL_BASE/
+ # Create self-signed certificates
+ function create_self_signed_certs (){
+-        openssl genrsa -out $SSL_KEY 1024
++        openssl genrsa -out $SSL_KEY 2048
+         openssl req -new -x509 -key $SSL_KEY -subj /CN=Anyone -out $SSL_CERT
+         ln $SSL_CERT $SSL_CA
+         return $?
+diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
+index b248767..b224abd 100644
+--- a/xlators/features/shard/src/shard.c
++++ b/xlators/features/shard/src/shard.c
+@@ -10,6883 +10,6417 @@
+ #include <unistd.h>
+-#include "shard.h"
+ #include "shard-mem-types.h"
++#include "shard.h"
+ #include <glusterfs/byte-order.h>
+ #include <glusterfs/defaults.h>
+ #include <glusterfs/statedump.h>
+-static gf_boolean_t
+-__is_shard_dir(uuid_t gfid)
+-    shard_priv_t *priv = THIS->private;
++static gf_boolean_t __is_shard_dir(uuid_t gfid) {
++  shard_priv_t *priv = THIS->private;
+-    if (gf_uuid_compare(gfid, priv->dot_shard_gfid) == 0)
+-        return _gf_true;
++  if (gf_uuid_compare(gfid, priv->dot_shard_gfid) == 0)
++    return _gf_true;
+-    return _gf_false;
++  return _gf_false;
+ }
+-static gf_boolean_t
+-__is_gsyncd_on_shard_dir(call_frame_t *frame, loc_t *loc)
+-    if (frame->root->pid == GF_CLIENT_PID_GSYNCD &&
+-        (__is_shard_dir(loc->pargfid) ||
+-         (loc->parent && __is_shard_dir(loc->parent->gfid))))
+-        return _gf_true;
++static gf_boolean_t __is_gsyncd_on_shard_dir(call_frame_t *frame, loc_t *loc) {
++  if (frame->root->pid == GF_CLIENT_PID_GSYNCD &&
++      (__is_shard_dir(loc->pargfid) ||
++       (loc->parent && __is_shard_dir(loc->parent->gfid))))
++    return _gf_true;
+-    return _gf_false;
++  return _gf_false;
+ }
+-shard_make_block_bname(int block_num, uuid_t gfid, char *buf, size_t len)
+-    char gfid_str[GF_UUID_BUF_SIZE] = {
+-        0,
+-    };
++void shard_make_block_bname(int block_num, uuid_t gfid, char *buf, size_t len) {
++  char gfid_str[GF_UUID_BUF_SIZE] = {
++      0,
++  };
+-    gf_uuid_unparse(gfid, gfid_str);
+-    snprintf(buf, len, "%s.%d", gfid_str, block_num);
++  gf_uuid_unparse(gfid, gfid_str);
++  snprintf(buf, len, "%s.%d", gfid_str, block_num);
+ }
+-shard_make_block_abspath(int block_num, uuid_t gfid, char *filepath, size_t len)
+-    char gfid_str[GF_UUID_BUF_SIZE] = {
+-        0,
+-    };
++void shard_make_block_abspath(int block_num, uuid_t gfid, char *filepath,
++                              size_t len) {
++  char gfid_str[GF_UUID_BUF_SIZE] = {
++      0,
++  };
+-    gf_uuid_unparse(gfid, gfid_str);
+-    snprintf(filepath, len, "/%s/%s.%d", GF_SHARD_DIR, gfid_str, block_num);
++  gf_uuid_unparse(gfid, gfid_str);
++  snprintf(filepath, len, "/%s/%s.%d", GF_SHARD_DIR, gfid_str, block_num);
+ }
+-__shard_inode_ctx_get(inode_t *inode, xlator_t *this, shard_inode_ctx_t **ctx)
+-    int ret = -1;
+-    uint64_t ctx_uint = 0;
+-    shard_inode_ctx_t *ctx_p = NULL;
++int __shard_inode_ctx_get(inode_t *inode, xlator_t *this,
++                          shard_inode_ctx_t **ctx) {
++  int ret = -1;
++  uint64_t ctx_uint = 0;
++  shard_inode_ctx_t *ctx_p = NULL;
+-    ret = __inode_ctx_get(inode, this, &ctx_uint);
+-    if (ret == 0) {
+-        *ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
+-        return ret;
+-    }
++  ret = __inode_ctx_get(inode, this, &ctx_uint);
++  if (ret == 0) {
++    *ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
++    return ret;
++  }
+-    ctx_p = GF_CALLOC(1, sizeof(*ctx_p), gf_shard_mt_inode_ctx_t);
+-    if (!ctx_p)
+-        return ret;
++  ctx_p = GF_CALLOC(1, sizeof(*ctx_p), gf_shard_mt_inode_ctx_t);
++  if (!ctx_p)
++    return ret;
+-    INIT_LIST_HEAD(&ctx_p->ilist);
+-    INIT_LIST_HEAD(&ctx_p->to_fsync_list);
++  INIT_LIST_HEAD(&ctx_p->ilist);
++  INIT_LIST_HEAD(&ctx_p->to_fsync_list);
+-    ret = __inode_ctx_set(inode, this, (uint64_t *)&ctx_p);
+-    if (ret < 0) {
+-        GF_FREE(ctx_p);
+-        return ret;
+-    }
++  ret = __inode_ctx_set(inode, this, (uint64_t *)&ctx_p);
++  if (ret < 0) {
++    GF_FREE(ctx_p);
++    return ret;
++  }
+-    *ctx = ctx_p;
++  *ctx = ctx_p;
+-    return ret;
++  return ret;
+ }
+-shard_inode_ctx_get(inode_t *inode, xlator_t *this, shard_inode_ctx_t **ctx)
+-    int ret = 0;
++int shard_inode_ctx_get(inode_t *inode, xlator_t *this,
++                        shard_inode_ctx_t **ctx) {
++  int ret = 0;
+-    LOCK(&inode->lock);
+-    {
+-        ret = __shard_inode_ctx_get(inode, this, ctx);
+-    }
+-    UNLOCK(&inode->lock);
++  LOCK(&inode->lock);
++  { ret = __shard_inode_ctx_get(inode, this, ctx); }
++  UNLOCK(&inode->lock);
+-    return ret;
++  return ret;
+ }
+-__shard_inode_ctx_set(inode_t *inode, xlator_t *this, struct iatt *stbuf,
+-                      uint64_t block_size, int32_t valid)
+-    int ret = -1;
+-    shard_inode_ctx_t *ctx = NULL;
++int __shard_inode_ctx_set(inode_t *inode, xlator_t *this, struct iatt *stbuf,
++                          uint64_t block_size, int32_t valid) {
++  int ret = -1;
++  shard_inode_ctx_t *ctx = NULL;
+-    ret = __shard_inode_ctx_get(inode, this, &ctx);
+-    if (ret)
+-        return ret;
++  ret = __shard_inode_ctx_get(inode, this, &ctx);
++  if (ret)
++    return ret;
+-    if (valid & SHARD_MASK_BLOCK_SIZE)
+-        ctx->block_size = block_size;
++  if (valid & SHARD_MASK_BLOCK_SIZE)
++    ctx->block_size = block_size;
+-    if (valid & SHARD_MASK_PROT)
+-        ctx->stat.ia_prot = stbuf->ia_prot;
++  if (valid & SHARD_MASK_PROT)
++    ctx->stat.ia_prot = stbuf->ia_prot;
+-    if (valid & SHARD_MASK_NLINK)
+-        ctx->stat.ia_nlink = stbuf->ia_nlink;
++  if (valid & SHARD_MASK_NLINK)
++    ctx->stat.ia_nlink = stbuf->ia_nlink;
+-    if (valid & SHARD_MASK_UID)
+-        ctx->stat.ia_uid = stbuf->ia_uid;
++  if (valid & SHARD_MASK_UID)
++    ctx->stat.ia_uid = stbuf->ia_uid;
+-    if (valid & SHARD_MASK_GID)
+-        ctx->stat.ia_gid = stbuf->ia_gid;
++  if (valid & SHARD_MASK_GID)
++    ctx->stat.ia_gid = stbuf->ia_gid;
+-    if (valid & SHARD_MASK_SIZE)
+-        ctx->stat.ia_size = stbuf->ia_size;
++  if (valid & SHARD_MASK_SIZE)
++    ctx->stat.ia_size = stbuf->ia_size;
+-    if (valid & SHARD_MASK_BLOCKS)
+-        ctx->stat.ia_blocks = stbuf->ia_blocks;
++  if (valid & SHARD_MASK_BLOCKS)
++    ctx->stat.ia_blocks = stbuf->ia_blocks;
+-    if (valid & SHARD_MASK_TIMES) {
+-        SHARD_TIME_UPDATE(ctx->stat.ia_mtime, ctx->stat.ia_mtime_nsec,
+-                          stbuf->ia_mtime, stbuf->ia_mtime_nsec);
+-        SHARD_TIME_UPDATE(ctx->stat.ia_ctime, ctx->stat.ia_ctime_nsec,
+-                          stbuf->ia_ctime, stbuf->ia_ctime_nsec);
+-        SHARD_TIME_UPDATE(ctx->stat.ia_atime, ctx->stat.ia_atime_nsec,
+-                          stbuf->ia_atime, stbuf->ia_atime_nsec);
+-    }
++  if (valid & SHARD_MASK_TIMES) {
++    SHARD_TIME_UPDATE(ctx->stat.ia_mtime, ctx->stat.ia_mtime_nsec,
++                      stbuf->ia_mtime, stbuf->ia_mtime_nsec);
++    SHARD_TIME_UPDATE(ctx->stat.ia_ctime, ctx->stat.ia_ctime_nsec,
++                      stbuf->ia_ctime, stbuf->ia_ctime_nsec);
++    SHARD_TIME_UPDATE(ctx->stat.ia_atime, ctx->stat.ia_atime_nsec,
++                      stbuf->ia_atime, stbuf->ia_atime_nsec);
++  }
+-    if (valid & SHARD_MASK_OTHERS) {
+-        ctx->stat.ia_ino = stbuf->ia_ino;
+-        gf_uuid_copy(ctx->stat.ia_gfid, stbuf->ia_gfid);
+-        ctx->stat.ia_dev = stbuf->ia_dev;
+-        ctx->stat.ia_type = stbuf->ia_type;
+-        ctx->stat.ia_rdev = stbuf->ia_rdev;
+-        ctx->stat.ia_blksize = stbuf->ia_blksize;
+-    }
++  if (valid & SHARD_MASK_OTHERS) {
++    ctx->stat.ia_ino = stbuf->ia_ino;
++    gf_uuid_copy(ctx->stat.ia_gfid, stbuf->ia_gfid);
++    ctx->stat.ia_dev = stbuf->ia_dev;
++    ctx->stat.ia_type = stbuf->ia_type;
++    ctx->stat.ia_rdev = stbuf->ia_rdev;
++    ctx->stat.ia_blksize = stbuf->ia_blksize;
++  }
+-    if (valid & SHARD_MASK_REFRESH_RESET)
+-        ctx->refresh = _gf_false;
++    ctx->refresh = _gf_false;
+-    return 0;
++  return 0;
+ }
+-shard_inode_ctx_set(inode_t *inode, xlator_t *this, struct iatt *stbuf,
+-                    uint64_t block_size, int32_t valid)
+-    int ret = -1;
++int shard_inode_ctx_set(inode_t *inode, xlator_t *this, struct iatt *stbuf,
++                        uint64_t block_size, int32_t valid) {
++  int ret = -1;
+-    LOCK(&inode->lock);
+-    {
+-        ret = __shard_inode_ctx_set(inode, this, stbuf, block_size, valid);
+-    }
+-    UNLOCK(&inode->lock);
++  LOCK(&inode->lock);
++  { ret = __shard_inode_ctx_set(inode, this, stbuf, block_size, valid); }
++  UNLOCK(&inode->lock);
+-    return ret;
++  return ret;
+ }
+-__shard_inode_ctx_set_refresh_flag(inode_t *inode, xlator_t *this)
+-    int ret = -1;
+-    shard_inode_ctx_t *ctx = NULL;
++int __shard_inode_ctx_set_refresh_flag(inode_t *inode, xlator_t *this) {
++  int ret = -1;
++  shard_inode_ctx_t *ctx = NULL;
+-    ret = __shard_inode_ctx_get(inode, this, &ctx);
+-    if (ret)
+-        return ret;
++  ret = __shard_inode_ctx_get(inode, this, &ctx);
++  if (ret)
++    return ret;
+-    ctx->refresh = _gf_true;
++  ctx->refresh = _gf_true;
+-    return 0;
++  return 0;
+ }
+-shard_inode_ctx_set_refresh_flag(inode_t *inode, xlator_t *this)
+-    int ret = -1;
++int shard_inode_ctx_set_refresh_flag(inode_t *inode, xlator_t *this) {
++  int ret = -1;
+-    LOCK(&inode->lock);
+-    {
+-        ret = __shard_inode_ctx_set_refresh_flag(inode, this);
+-    }
+-    UNLOCK(&inode->lock);
++  LOCK(&inode->lock);
++  { ret = __shard_inode_ctx_set_refresh_flag(inode, this); }
++  UNLOCK(&inode->lock);
+-    return ret;
++  return ret;
+ }
+-__shard_inode_ctx_mark_dir_refreshed(inode_t *inode, xlator_t *this)
+-    int ret = -1;
+-    shard_inode_ctx_t *ctx = NULL;
++int __shard_inode_ctx_mark_dir_refreshed(inode_t *inode, xlator_t *this) {
++  int ret = -1;
++  shard_inode_ctx_t *ctx = NULL;
+-    ret = __shard_inode_ctx_get(inode, this, &ctx);
+-    if (ret)
+-        return ret;
++  ret = __shard_inode_ctx_get(inode, this, &ctx);
++  if (ret)
++    return ret;
+-    ctx->refreshed = _gf_true;
+-    return 0;
++  ctx->refreshed = _gf_true;
++  return 0;
+ }
+-shard_inode_ctx_mark_dir_refreshed(inode_t *inode, xlator_t *this)
+-    int ret = -1;
++int shard_inode_ctx_mark_dir_refreshed(inode_t *inode, xlator_t *this) {
++  int ret = -1;
+-    LOCK(&inode->lock);
+-    {
+-        ret = __shard_inode_ctx_mark_dir_refreshed(inode, this);
+-    }
+-    UNLOCK(&inode->lock);
++  LOCK(&inode->lock);
++  { ret = __shard_inode_ctx_mark_dir_refreshed(inode, this); }
++  UNLOCK(&inode->lock);
+-    return ret;
++  return ret;
+ }
+-__shard_inode_ctx_add_to_fsync_list(inode_t *base_inode, xlator_t *this,
+-                                    inode_t *shard_inode)
+-    int ret = -1;
+-    shard_inode_ctx_t *base_ictx = NULL;
+-    shard_inode_ctx_t *shard_ictx = NULL;
+-    ret = __shard_inode_ctx_get(base_inode, this, &base_ictx);
+-    if (ret)
+-        return ret;
++int __shard_inode_ctx_add_to_fsync_list(inode_t *base_inode, xlator_t *this,
++                                        inode_t *shard_inode) {
++  int ret = -1;
++  shard_inode_ctx_t *base_ictx = NULL;
++  shard_inode_ctx_t *shard_ictx = NULL;
+-    ret = __shard_inode_ctx_get(shard_inode, this, &shard_ictx);
+-    if (ret)
+-        return ret;
++  ret = __shard_inode_ctx_get(base_inode, this, &base_ictx);
++  if (ret)
++    return ret;
+-    if (shard_ictx->fsync_needed) {
+-        shard_ictx->fsync_needed++;
+-        return 1;
+-    }
++  ret = __shard_inode_ctx_get(shard_inode, this, &shard_ictx);
++  if (ret)
++    return ret;
+-    list_add_tail(&shard_ictx->to_fsync_list, &base_ictx->to_fsync_list);
+-    shard_ictx->inode = shard_inode;
++  if (shard_ictx->fsync_needed) {
+     shard_ictx->fsync_needed++;
+-    base_ictx->fsync_count++;
+-    shard_ictx->base_inode = base_inode;
++    return 1;
++  }
+-    return 0;
++  list_add_tail(&shard_ictx->to_fsync_list, &base_ictx->to_fsync_list);
++  shard_ictx->inode = shard_inode;
++  shard_ictx->fsync_needed++;
++  base_ictx->fsync_count++;
++  shard_ictx->base_inode = base_inode;
++  return 0;
+ }
+-shard_inode_ctx_add_to_fsync_list(inode_t *base_inode, xlator_t *this,
+-                                  inode_t *shard_inode)
+-    int ret = -1;
++int shard_inode_ctx_add_to_fsync_list(inode_t *base_inode, xlator_t *this,
++                                      inode_t *shard_inode) {
++  int ret = -1;
+-    /* This ref acts as a refkeepr on the base inode. We
+-     * need to keep this inode alive as it holds the head
+-     * of the to_fsync_list.
+-     */
+-    inode_ref(base_inode);
+-    inode_ref(shard_inode);
++  /* This ref acts as a refkeepr on the base inode. We
++   * need to keep this inode alive as it holds the head
++   * of the to_fsync_list.
++   */
++  inode_ref(base_inode);
++  inode_ref(shard_inode);
+-    LOCK(&base_inode->lock);
+-    LOCK(&shard_inode->lock);
+-    {
+-        ret = __shard_inode_ctx_add_to_fsync_list(base_inode, this,
+-                                                  shard_inode);
+-    }
+-    UNLOCK(&shard_inode->lock);
+-    UNLOCK(&base_inode->lock);
++  LOCK(&base_inode->lock);
++  LOCK(&shard_inode->lock);
++  { ret = __shard_inode_ctx_add_to_fsync_list(base_inode, this, shard_inode); }
++  UNLOCK(&shard_inode->lock);
++  UNLOCK(&base_inode->lock);
+-    /* Unref the base inode corresponding to the ref above, if the shard is
+-     * found to be already part of the fsync list.
+-     */
+-    if (ret != 0) {
+-        inode_unref(base_inode);
+-        inode_unref(shard_inode);
+-    }
+-    return ret;
++  /* Unref the base inode corresponding to the ref above, if the shard is
++   * found to be already part of the fsync list.
++   */
++  if (ret != 0) {
++    inode_unref(base_inode);
++    inode_unref(shard_inode);
++  }
++  return ret;
+ }
+-__shard_inode_ctx_needs_lookup(inode_t *inode, xlator_t *this)
+-    int ret = -1;
+-    shard_inode_ctx_t *ctx = NULL;
++gf_boolean_t __shard_inode_ctx_needs_lookup(inode_t *inode, xlator_t *this) {
++  int ret = -1;
++  shard_inode_ctx_t *ctx = NULL;
+-    ret = __shard_inode_ctx_get(inode, this, &ctx);
+-    /* If inode ctx get fails, better to err on the side of caution and
+-     * try again? Unless the failure is due to mem-allocation.
+-     */
+-    if (ret)
+-        return _gf_true;
++  ret = __shard_inode_ctx_get(inode, this, &ctx);
++  /* If inode ctx get fails, better to err on the side of caution and
++   * try again? Unless the failure is due to mem-allocation.
++   */
++  if (ret)
++    return _gf_true;
+-    return !ctx->refreshed;
++  return !ctx->refreshed;
+ }
+-shard_inode_ctx_needs_lookup(inode_t *inode, xlator_t *this)
+-    gf_boolean_t flag = _gf_false;
++gf_boolean_t shard_inode_ctx_needs_lookup(inode_t *inode, xlator_t *this) {
++  gf_boolean_t flag = _gf_false;
+-    LOCK(&inode->lock);
+-    {
+-        flag = __shard_inode_ctx_needs_lookup(inode, this);
+-    }
+-    UNLOCK(&inode->lock);
++  LOCK(&inode->lock);
++  { flag = __shard_inode_ctx_needs_lookup(inode, this); }
++  UNLOCK(&inode->lock);
+-    return flag;
++  return flag;
+ }
+-__shard_inode_ctx_invalidate(inode_t *inode, xlator_t *this, struct iatt *stbuf)
+-    int ret = -1;
+-    shard_inode_ctx_t *ctx = NULL;
++int __shard_inode_ctx_invalidate(inode_t *inode, xlator_t *this,
++                                 struct iatt *stbuf) {
++  int ret = -1;
++  shard_inode_ctx_t *ctx = NULL;
+-    ret = __shard_inode_ctx_get(inode, this, &ctx);
+-    if (ret)
+-        return ret;
++  ret = __shard_inode_ctx_get(inode, this, &ctx);
++  if (ret)
++    return ret;
+-    if ((stbuf->ia_size != ctx->stat.ia_size) ||
+-        (stbuf->ia_blocks != ctx->stat.ia_blocks))
+-        ctx->refresh = _gf_true;
++  if ((stbuf->ia_size != ctx->stat.ia_size) ||
++      (stbuf->ia_blocks != ctx->stat.ia_blocks))
++    ctx->refresh = _gf_true;
+-    return 0;
++  return 0;
+ }
+-shard_inode_ctx_invalidate(inode_t *inode, xlator_t *this, struct iatt *stbuf)
+-    int ret = -1;
++int shard_inode_ctx_invalidate(inode_t *inode, xlator_t *this,
++                               struct iatt *stbuf) {
++  int ret = -1;
+-    LOCK(&inode->lock);
+-    {
+-        ret = __shard_inode_ctx_invalidate(inode, this, stbuf);
+-    }
+-    UNLOCK(&inode->lock);
++  LOCK(&inode->lock);
++  { ret = __shard_inode_ctx_invalidate(inode, this, stbuf); }
++  UNLOCK(&inode->lock);
+-    return ret;
++  return ret;
+ }
+-__shard_inode_ctx_get_block_size(inode_t *inode, xlator_t *this,
+-                                 uint64_t *block_size)
+-    int ret = -1;
+-    uint64_t ctx_uint = 0;
+-    shard_inode_ctx_t *ctx = NULL;
++int __shard_inode_ctx_get_block_size(inode_t *inode, xlator_t *this,
++                                     uint64_t *block_size) {
++  int ret = -1;
++  uint64_t ctx_uint = 0;
++  shard_inode_ctx_t *ctx = NULL;
+-    ret = __inode_ctx_get(inode, this, &ctx_uint);
+-    if (ret < 0)
+-        return ret;
++  ret = __inode_ctx_get(inode, this, &ctx_uint);
++  if (ret < 0)
++    return ret;
+-    ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
++  ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
+-    *block_size = ctx->block_size;
++  *block_size = ctx->block_size;
+-    return 0;
++  return 0;
+ }
+-shard_inode_ctx_get_block_size(inode_t *inode, xlator_t *this,
+-                               uint64_t *block_size)
+-    int ret = -1;
++int shard_inode_ctx_get_block_size(inode_t *inode, xlator_t *this,
++                                   uint64_t *block_size) {
++  int ret = -1;
+-    LOCK(&inode->lock);
+-    {
+-        ret = __shard_inode_ctx_get_block_size(inode, this, block_size);
+-    }
+-    UNLOCK(&inode->lock);
++  LOCK(&inode->lock);
++  { ret = __shard_inode_ctx_get_block_size(inode, this, block_size); }
++  UNLOCK(&inode->lock);
+-    return ret;
++  return ret;
+ }
+-__shard_inode_ctx_get_fsync_count(inode_t *inode, xlator_t *this,
+-                                  int *fsync_count)
+-    int ret = -1;
+-    uint64_t ctx_uint = 0;
+-    shard_inode_ctx_t *ctx = NULL;
++int __shard_inode_ctx_get_fsync_count(inode_t *inode, xlator_t *this,
++                                      int *fsync_count) {
++  int ret = -1;
++  uint64_t ctx_uint = 0;
++  shard_inode_ctx_t *ctx = NULL;
+-    ret = __inode_ctx_get(inode, this, &ctx_uint);
+-    if (ret < 0)
+-        return ret;
++  ret = __inode_ctx_get(inode, this, &ctx_uint);
++  if (ret < 0)
++    return ret;
+-    ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
++  ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
+-    *fsync_count = ctx->fsync_needed;
++  *fsync_count = ctx->fsync_needed;
+-    return 0;
++  return 0;
+ }
+-shard_inode_ctx_get_fsync_count(inode_t *inode, xlator_t *this,
+-                                int *fsync_count)
+-    int ret = -1;
++int shard_inode_ctx_get_fsync_count(inode_t *inode, xlator_t *this,
++                                    int *fsync_count) {
++  int ret = -1;
+-    LOCK(&inode->lock);
+-    {
+-        ret = __shard_inode_ctx_get_fsync_count(inode, this, fsync_count);
+-    }
+-    UNLOCK(&inode->lock);
++  LOCK(&inode->lock);
++  { ret = __shard_inode_ctx_get_fsync_count(inode, this, fsync_count); }
++  UNLOCK(&inode->lock);
+-    return ret;
++  return ret;
+ }
+-__shard_inode_ctx_get_all(inode_t *inode, xlator_t *this,
+-                          shard_inode_ctx_t *ctx_out)
+-    int ret = -1;
+-    uint64_t ctx_uint = 0;
+-    shard_inode_ctx_t *ctx = NULL;
++int __shard_inode_ctx_get_all(inode_t *inode, xlator_t *this,
++                              shard_inode_ctx_t *ctx_out) {
++  int ret = -1;
++  uint64_t ctx_uint = 0;
++  shard_inode_ctx_t *ctx = NULL;
+-    ret = __inode_ctx_get(inode, this, &ctx_uint);
+-    if (ret < 0)
+-        return ret;
++  ret = __inode_ctx_get(inode, this, &ctx_uint);
++  if (ret < 0)
++    return ret;
+-    ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
++  ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
+-    memcpy(ctx_out, ctx, sizeof(shard_inode_ctx_t));
+-    return 0;
++  memcpy(ctx_out, ctx, sizeof(shard_inode_ctx_t));
++  return 0;
+ }
+-shard_inode_ctx_get_all(inode_t *inode, xlator_t *this,
+-                        shard_inode_ctx_t *ctx_out)
+-    int ret = -1;
++int shard_inode_ctx_get_all(inode_t *inode, xlator_t *this,
++                            shard_inode_ctx_t *ctx_out) {
++  int ret = -1;
+-    LOCK(&inode->lock);
+-    {
+-        ret = __shard_inode_ctx_get_all(inode, this, ctx_out);
+-    }
+-    UNLOCK(&inode->lock);
++  LOCK(&inode->lock);
++  { ret = __shard_inode_ctx_get_all(inode, this, ctx_out); }
++  UNLOCK(&inode->lock);
+-    return ret;
++  return ret;
+ }
+-__shard_inode_ctx_fill_iatt_from_cache(inode_t *inode, xlator_t *this,
+-                                       struct iatt *buf,
+-                                       gf_boolean_t *need_refresh)
+-    int ret = -1;
+-    uint64_t ctx_uint = 0;
+-    shard_inode_ctx_t *ctx = NULL;
++int __shard_inode_ctx_fill_iatt_from_cache(inode_t *inode, xlator_t *this,
++                                           struct iatt *buf,
++                                           gf_boolean_t *need_refresh) {
++  int ret = -1;
++  uint64_t ctx_uint = 0;
++  shard_inode_ctx_t *ctx = NULL;
+-    ret = __inode_ctx_get(inode, this, &ctx_uint);
+-    if (ret < 0)
+-        return ret;
++  ret = __inode_ctx_get(inode, this, &ctx_uint);
++  if (ret < 0)
++    return ret;
+-    ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
++  ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
+-    if (ctx->refresh == _gf_false)
+-        *buf = ctx->stat;
+-    else
+-        *need_refresh = _gf_true;
++  if (ctx->refresh == _gf_false)
++    *buf = ctx->stat;
++  else
++    *need_refresh = _gf_true;
+-    return 0;
++  return 0;
+ }
+-shard_inode_ctx_fill_iatt_from_cache(inode_t *inode, xlator_t *this,
+-                                     struct iatt *buf,
+-                                     gf_boolean_t *need_refresh)
+-    int ret = -1;
++int shard_inode_ctx_fill_iatt_from_cache(inode_t *inode, xlator_t *this,
++                                         struct iatt *buf,
++                                         gf_boolean_t *need_refresh) {
++  int ret = -1;
+-    LOCK(&inode->lock);
+-    {
+-        ret = __shard_inode_ctx_fill_iatt_from_cache(inode, this, buf,
+-                                                     need_refresh);
+-    }
+-    UNLOCK(&inode->lock);
++  LOCK(&inode->lock);
++  {
++    ret =
++        __shard_inode_ctx_fill_iatt_from_cache(inode, this, buf, need_refresh);
++  }
++  UNLOCK(&inode->lock);
+-    return ret;
++  return ret;
+ }
+-shard_local_wipe(shard_local_t *local)
+-    int i = 0;
+-    int count = 0;
+-    count = local->num_blocks;
+-    syncbarrier_destroy(&local->barrier);
+-    loc_wipe(&local->loc);
+-    loc_wipe(&local->dot_shard_loc);
+-    loc_wipe(&local->dot_shard_rm_loc);
+-    loc_wipe(&local->loc2);
+-    loc_wipe(&local->tmp_loc);
+-    loc_wipe(&local->int_inodelk.loc);
+-    loc_wipe(&local->int_entrylk.loc);
+-    loc_wipe(&local->newloc);
+-    if (local->int_entrylk.basename)
+-        GF_FREE(local->int_entrylk.basename);
+-    if (local->fd)
+-        fd_unref(local->fd);
+-    if (local->xattr_req)
+-        dict_unref(local->xattr_req);
+-    if (local->xattr_rsp)
+-        dict_unref(local->xattr_rsp);
+-    for (i = 0; i < count; i++) {
+-        if (!local->inode_list)
+-            break;
+-        if (local->inode_list[i])
+-            inode_unref(local->inode_list[i]);
+-    }
+-    GF_FREE(local->inode_list);
+-    GF_FREE(local->vector);
+-    if (local->iobref)
+-        iobref_unref(local->iobref);
+-    if (local->list_inited)
+-        gf_dirent_free(&local->entries_head);
+-    if (local->inodelk_frame)
+-        SHARD_STACK_DESTROY(local->inodelk_frame);
+-    if (local->entrylk_frame)
+-        SHARD_STACK_DESTROY(local->entrylk_frame);
+-shard_modify_size_and_block_count(struct iatt *stbuf, dict_t *dict)
+-    int ret = -1;
+-    void *size_attr = NULL;
+-    uint64_t size_array[4];
+-    ret = dict_get_ptr(dict, GF_XATTR_SHARD_FILE_SIZE, &size_attr);
+-    if (ret) {
+-        gf_msg_callingfn(THIS->name, GF_LOG_ERROR, 0,
+-                         SHARD_MSG_INTERNAL_XATTR_MISSING,
+-                         "Failed to "
+-                         "get " GF_XATTR_SHARD_FILE_SIZE " for %s",
+-                         uuid_utoa(stbuf->ia_gfid));
+-        return ret;
+-    }
++void shard_local_wipe(shard_local_t *local) {
++  int i = 0;
++  int count = 0;
+-    memcpy(size_array, size_attr, sizeof(size_array));
++  count = local->num_blocks;
+-    stbuf->ia_size = ntoh64(size_array[0]);
+-    stbuf->ia_blocks = ntoh64(size_array[2]);
++  syncbarrier_destroy(&local->barrier);
++  loc_wipe(&local->loc);
++  loc_wipe(&local->dot_shard_loc);
++  loc_wipe(&local->dot_shard_rm_loc);
++  loc_wipe(&local->loc2);
++  loc_wipe(&local->tmp_loc);
++  loc_wipe(&local->int_inodelk.loc);
++  loc_wipe(&local->int_entrylk.loc);
++  loc_wipe(&local->newloc);
+-    return 0;
++  if (local->int_entrylk.basename)
++    GF_FREE(local->int_entrylk.basename);
++  if (local->fd)
++    fd_unref(local->fd);
+-shard_call_count_return(call_frame_t *frame)
+-    int call_count = 0;
+-    shard_local_t *local = NULL;
++  if (local->xattr_req)
++    dict_unref(local->xattr_req);
++  if (local->xattr_rsp)
++    dict_unref(local->xattr_rsp);
+-    local = frame->local;
++  for (i = 0; i < count; i++) {
++    if (!local->inode_list)
++      break;
++    if (local->inode_list[i])
++      inode_unref(local->inode_list[i]);
++  }
++  GF_FREE(local->inode_list);
++  GF_FREE(local->vector);
++  if (local->iobref)
++    iobref_unref(local->iobref);
++  if (local->list_inited)
++    gf_dirent_free(&local->entries_head);
++  if (local->inodelk_frame)
++    SHARD_STACK_DESTROY(local->inodelk_frame);
++  if (local->entrylk_frame)
++    SHARD_STACK_DESTROY(local->entrylk_frame);
++int shard_modify_size_and_block_count(struct iatt *stbuf, dict_t *dict) {
++  int ret = -1;
++  void *size_attr = NULL;
++  uint64_t size_array[4];
++  ret = dict_get_ptr(dict, GF_XATTR_SHARD_FILE_SIZE, &size_attr);
++  if (ret) {
++    gf_msg_callingfn(THIS->name, GF_LOG_ERROR, 0,
++                     SHARD_MSG_INTERNAL_XATTR_MISSING,
++                     "Failed to "
++                     "get " GF_XATTR_SHARD_FILE_SIZE " for %s",
++                     uuid_utoa(stbuf->ia_gfid));
++    return ret;
++  }
++  memcpy(size_array, size_attr, sizeof(size_array));
++  stbuf->ia_size = ntoh64(size_array[0]);
++  stbuf->ia_blocks = ntoh64(size_array[2]);
++  return 0;
++int shard_call_count_return(call_frame_t *frame) {
++  int call_count = 0;
++  shard_local_t *local = NULL;
++  local = frame->local;
++  LOCK(&frame->lock);
++  { call_count = --local->call_count; }
++  UNLOCK(&frame->lock);
++  return call_count;
++static char *shard_internal_dir_string(shard_internal_dir_type_t type) {
++  char *str = NULL;
++  switch (type) {
++    str = GF_SHARD_DIR;
++    break;
++    break;
++  default:
++    break;
++  }
++  return str;
++static int shard_init_internal_dir_loc(xlator_t *this, shard_local_t *local,
++                                       shard_internal_dir_type_t type) {
++  int ret = -1;
++  char *bname = NULL;
++  inode_t *parent = NULL;
++  loc_t *internal_dir_loc = NULL;
++  shard_priv_t *priv = NULL;
++  priv = this->private;
++  if (!local)
++    return -1;
++  switch (type) {
++    internal_dir_loc = &local->dot_shard_loc;
++    bname = GF_SHARD_DIR;
++    parent = inode_ref(this->itable->root);
++    break;
++    internal_dir_loc = &local->dot_shard_rm_loc;
++    bname = GF_SHARD_REMOVE_ME_DIR;
++    parent = inode_ref(priv->dot_shard_inode);
++    break;
++  default:
++    break;
++  }
++  internal_dir_loc->inode = inode_new(this->itable);
++  internal_dir_loc->parent = parent;
++  ret = inode_path(internal_dir_loc->parent, bname,
++                   (char **)&internal_dir_loc->path);
++  if (ret < 0 || !(internal_dir_loc->inode)) {
++    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
++           "Inode path failed on %s", bname);
++    goto out;
++  }
++  internal_dir_loc->name = strrchr(internal_dir_loc->path, '/');
++  if (internal_dir_loc->name)
++    internal_dir_loc->name++;
++  ret = 0;
++  return ret;
++inode_t *__shard_update_shards_inode_list(inode_t *linked_inode, xlator_t *this,
++                                          inode_t *base_inode, int block_num,
++                                          uuid_t gfid) {
++  char block_bname[256] = {
++      0,
++  };
++  inode_t *lru_inode = NULL;
++  shard_priv_t *priv = NULL;
++  shard_inode_ctx_t *ctx = NULL;
++  shard_inode_ctx_t *lru_inode_ctx = NULL;
++  shard_inode_ctx_t *lru_base_inode_ctx = NULL;
++  inode_t *fsync_inode = NULL;
++  inode_t *lru_base_inode = NULL;
++  gf_boolean_t do_fsync = _gf_false;
++  priv = this->private;
++  shard_inode_ctx_get(linked_inode, this, &ctx);
++  if (list_empty(&ctx->ilist)) {
++    if (priv->inode_count + 1 <= priv->lru_limit) {
++      /* If this inode was linked here for the first time (indicated
++       * by empty list), and if there is still space in the priv list,
++       * add this ctx to the tail of the list.
++       */
++      /* For as long as an inode is in lru list, we try to
++       * keep it alive by holding a ref on it.
++       */
++      inode_ref(linked_inode);
++      if (base_inode)
++        gf_uuid_copy(ctx->base_gfid, base_inode->gfid);
++      else
++        gf_uuid_copy(ctx->base_gfid, gfid);
++      ctx->block_num = block_num;
++      list_add_tail(&ctx->ilist, &priv->ilist_head);
++      priv->inode_count++;
++      ctx->base_inode = inode_ref(base_inode);
++    } else {
++      /*If on the other hand there is no available slot for this inode
++       * in the list, delete the lru inode from the head of the list,
++       * unlink it. And in its place add this new inode into the list.
++       */
++      lru_inode_ctx =
++          list_first_entry(&priv->ilist_head, shard_inode_ctx_t, ilist);
++      GF_ASSERT(lru_inode_ctx->block_num > 0);
++      lru_base_inode = lru_inode_ctx->base_inode;
++      list_del_init(&lru_inode_ctx->ilist);
++      lru_inode = inode_find(linked_inode->table, lru_inode_ctx->stat.ia_gfid);
++      /* If the lru inode was part of the pending-fsync list,
++       * the base inode needs to be unref'd, the lru inode
++       * deleted from fsync list and fsync'd in a new frame,
++       * and then unlinked in memory and forgotten.
++       */
++      if (!lru_base_inode)
++        goto after_fsync_check;
++      LOCK(&lru_base_inode->lock);
++      LOCK(&lru_inode->lock);
++      {
++        if (!list_empty(&lru_inode_ctx->to_fsync_list)) {
++          list_del_init(&lru_inode_ctx->to_fsync_list);
++          lru_inode_ctx->fsync_needed = 0;
++          do_fsync = _gf_true;
++          __shard_inode_ctx_get(lru_base_inode, this, &lru_base_inode_ctx);
++          lru_base_inode_ctx->fsync_count--;
++        }
++      }
++      UNLOCK(&lru_inode->lock);
++      UNLOCK(&lru_base_inode->lock);
++    after_fsync_check:
++      if (!do_fsync) {
++        shard_make_block_bname(lru_inode_ctx->block_num,
++                               lru_inode_ctx->base_gfid, block_bname,
++                               sizeof(block_bname));
++        /* The following unref corresponds to the ref held at
++         * the time the shard was added to the lru list.
++         */
++        inode_unref(lru_inode);
++        inode_unlink(lru_inode, priv->dot_shard_inode, block_bname);
++        inode_forget(lru_inode, 0);
++      } else {
++        /* The following unref corresponds to the ref
++         * held when the shard was added to fsync list.
++         */
++        inode_unref(lru_inode);
++        fsync_inode = lru_inode;
++        if (lru_base_inode)
++          inode_unref(lru_base_inode);
++      }
++      /* The following unref corresponds to the ref
++       * held by inode_find() above.
++       */
++      inode_unref(lru_inode);
++      /* The following unref corresponds to the ref held on the base shard
++       * at the time of adding shard inode to lru list
++       */
++      if (lru_base_inode)
++        inode_unref(lru_base_inode);
++      /* For as long as an inode is in lru list, we try to
++       * keep it alive by holding a ref on it.
++       */
++      inode_ref(linked_inode);
++      if (base_inode)
++        gf_uuid_copy(ctx->base_gfid, base_inode->gfid);
++      else
++        gf_uuid_copy(ctx->base_gfid, gfid);
++      ctx->block_num = block_num;
++      ctx->base_inode = inode_ref(base_inode);
++      list_add_tail(&ctx->ilist, &priv->ilist_head);
++    }
++  } else {
++    /* If this is not the first time this inode is being operated on, move
++     * it to the most recently used end of the list.
++     */
++    list_move_tail(&ctx->ilist, &priv->ilist_head);
++  }
++  return fsync_inode;
++int shard_common_failure_unwind(glusterfs_fop_t fop, call_frame_t *frame,
++                                int32_t op_ret, int32_t op_errno) {
++  switch (fop) {
++  case GF_FOP_LOOKUP:
++    SHARD_STACK_UNWIND(lookup, frame, op_ret, op_errno, NULL, NULL, NULL, NULL);
++    break;
++  case GF_FOP_STAT:
++    SHARD_STACK_UNWIND(stat, frame, op_ret, op_errno, NULL, NULL);
++    break;
++  case GF_FOP_FSTAT:
++    SHARD_STACK_UNWIND(fstat, frame, op_ret, op_errno, NULL, NULL);
++    break;
++    SHARD_STACK_UNWIND(truncate, frame, op_ret, op_errno, NULL, NULL, NULL);
++    break;
++    SHARD_STACK_UNWIND(ftruncate, frame, op_ret, op_errno, NULL, NULL, NULL);
++    break;
++  case GF_FOP_MKNOD:
++    SHARD_STACK_UNWIND(mknod, frame, op_ret, op_errno, NULL, NULL, NULL, NULL,
++                       NULL);
++    break;
++  case GF_FOP_LINK:
++    SHARD_STACK_UNWIND(link, frame, op_ret, op_errno, NULL, NULL, NULL, NULL,
++                       NULL);
++    break;
++  case GF_FOP_CREATE:
++    SHARD_STACK_UNWIND(create, frame, op_ret, op_errno, NULL, NULL, NULL, NULL,
++                       NULL, NULL);
++    break;
++  case GF_FOP_UNLINK:
++    SHARD_STACK_UNWIND(unlink, frame, op_ret, op_errno, NULL, NULL, NULL);
++    break;
++  case GF_FOP_RENAME:
++    SHARD_STACK_UNWIND(rename, frame, op_ret, op_errno, NULL, NULL, NULL, NULL,
++                       NULL, NULL);
++    break;
++  case GF_FOP_WRITE:
++    SHARD_STACK_UNWIND(writev, frame, op_ret, op_errno, NULL, NULL, NULL);
++    break;
++    SHARD_STACK_UNWIND(fallocate, frame, op_ret, op_errno, NULL, NULL, NULL);
++    break;
++    SHARD_STACK_UNWIND(zerofill, frame, op_ret, op_errno, NULL, NULL, NULL);
++    break;
++  case GF_FOP_DISCARD:
++    SHARD_STACK_UNWIND(discard, frame, op_ret, op_errno, NULL, NULL, NULL);
++    break;
++  case GF_FOP_READ:
++    SHARD_STACK_UNWIND(readv, frame, op_ret, op_errno, NULL, -1, NULL, NULL,
++                       NULL);
++    break;
++  case GF_FOP_FSYNC:
++    SHARD_STACK_UNWIND(fsync, frame, op_ret, op_errno, NULL, NULL, NULL);
++    break;
++    SHARD_STACK_UNWIND(removexattr, frame, op_ret, op_errno, NULL);
++    break;
++    SHARD_STACK_UNWIND(fremovexattr, frame, op_ret, op_errno, NULL);
++    break;
++    SHARD_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, NULL, NULL);
++    break;
++    SHARD_STACK_UNWIND(getxattr, frame, op_ret, op_errno, NULL, NULL);
++    break;
++    SHARD_STACK_UNWIND(fsetxattr, frame, op_ret, op_errno, NULL);
++    break;
++    SHARD_STACK_UNWIND(setxattr, frame, op_ret, op_errno, NULL);
++    break;
++  case GF_FOP_SETATTR:
++    SHARD_STACK_UNWIND(setattr, frame, op_ret, op_errno, NULL, NULL, NULL);
++    break;
++    SHARD_STACK_UNWIND(fsetattr, frame, op_ret, op_errno, NULL, NULL, NULL);
++    break;
++  case GF_FOP_SEEK:
++    SHARD_STACK_UNWIND(seek, frame, op_ret, op_errno, 0, NULL);
++    break;
++  default:
++           "Invalid fop id = %d", fop);
++    break;
++  }
++  return 0;
++int shard_common_inode_write_success_unwind(glusterfs_fop_t fop,
++                                            call_frame_t *frame,
++                                            int32_t op_ret) {
++  shard_local_t *local = NULL;
++  local = frame->local;
++  switch (fop) {
++  case GF_FOP_WRITE:
++    SHARD_STACK_UNWIND(writev, frame, op_ret, 0, &local->prebuf,
++                       &local->postbuf, local->xattr_rsp);
++    break;
++    SHARD_STACK_UNWIND(fallocate, frame, op_ret, 0, &local->prebuf,
++                       &local->postbuf, local->xattr_rsp);
++    break;
++    SHARD_STACK_UNWIND(zerofill, frame, op_ret, 0, &local->prebuf,
++                       &local->postbuf, local->xattr_rsp);
++    break;
++  case GF_FOP_DISCARD:
++    SHARD_STACK_UNWIND(discard, frame, op_ret, 0, &local->prebuf,
++                       &local->postbuf, local->xattr_rsp);
++    break;
++  default:
++           "Invalid fop id = %d", fop);
++    break;
++  }
++  return 0;
++int shard_evicted_inode_fsync_cbk(call_frame_t *frame, void *cookie,
++                                  xlator_t *this, int32_t op_ret,
++                                  int32_t op_errno, struct iatt *prebuf,
++                                  struct iatt *postbuf, dict_t *xdata) {
++  char block_bname[256] = {
++      0,
++  };
++  fd_t *anon_fd = cookie;
++  inode_t *shard_inode = NULL;
++  shard_inode_ctx_t *ctx = NULL;
++  shard_priv_t *priv = NULL;
++  priv = this->private;
++  if (anon_fd == NULL || op_ret < 0) {
++    gf_msg(this->name, GF_LOG_WARNING, op_errno, SHARD_MSG_MEMALLOC_FAILED,
++           "fsync failed on shard");
++    goto out;
++  }
++  shard_inode = anon_fd->inode;
++  LOCK(&priv->lock);
++  LOCK(&shard_inode->lock);
++  {
++    __shard_inode_ctx_get(shard_inode, this, &ctx);
++    if ((list_empty(&ctx->to_fsync_list)) && (list_empty(&ctx->ilist))) {
++      shard_make_block_bname(ctx->block_num, shard_inode->gfid, block_bname,
++                             sizeof(block_bname));
++      inode_unlink(shard_inode, priv->dot_shard_inode, block_bname);
++      /* The following unref corresponds to the ref held by
++       * inode_link() at the time the shard was created or
++       * looked up
++       */
++      inode_unref(shard_inode);
++      inode_forget(shard_inode, 0);
++    }
++  }
++  UNLOCK(&shard_inode->lock);
++  UNLOCK(&priv->lock);
+-    LOCK(&frame->lock);
+-    {
+-        call_count = --local->call_count;
++  if (anon_fd)
++    fd_unref(anon_fd);
++  STACK_DESTROY(frame->root);
++  return 0;
++int shard_initiate_evicted_inode_fsync(xlator_t *this, inode_t *inode) {
++  fd_t *anon_fd = NULL;
++  call_frame_t *fsync_frame = NULL;
++  fsync_frame = create_frame(this, this->ctx->pool);
++  if (!fsync_frame) {
++           "Failed to create new frame "
++           "to fsync shard");
++    return -1;
++  }
++  anon_fd = fd_anonymous(inode);
++  if (!anon_fd) {
++           "Failed to create anon fd to"
++           " fsync shard");
++    STACK_DESTROY(fsync_frame->root);
++    return -1;
++  }
++  STACK_WIND_COOKIE(fsync_frame, shard_evicted_inode_fsync_cbk, anon_fd,
++                    FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsync, anon_fd,
++                    1, NULL);
++  return 0;
++int shard_common_resolve_shards(
++    call_frame_t *frame, xlator_t *this,
++    shard_post_resolve_fop_handler_t post_res_handler) {
++  int i = -1;
++  uint32_t shard_idx_iter = 0;
++  char path[PATH_MAX] = {
++      0,
++  };
++  uuid_t gfid = {
++      0,
++  };
++  inode_t *inode = NULL;
++  inode_t *res_inode = NULL;
++  inode_t *fsync_inode = NULL;
++  shard_priv_t *priv = NULL;
++  shard_local_t *local = NULL;
++  priv = this->private;
++  local = frame->local;
++  local->call_count = 0;
++  shard_idx_iter = local->first_block;
++  res_inode = local->resolver_base_inode;
++  if (res_inode)
++    gf_uuid_copy(gfid, res_inode->gfid);
++  else
++    gf_uuid_copy(gfid, local->base_gfid);
++  if ((local->op_ret < 0) || (local->resolve_not))
++    goto out;
++  while (shard_idx_iter <= local->last_block) {
++    i++;
++    if (shard_idx_iter == 0) {
++      local->inode_list[i] = inode_ref(res_inode);
++      shard_idx_iter++;
++      continue;
++    }
++    shard_make_block_abspath(shard_idx_iter, gfid, path, sizeof(path));
++    inode = NULL;
++    inode = inode_resolve(this->itable, path);
++    if (inode) {
++      gf_msg_debug(this->name, 0, "Shard %d already "
++                                  "present. gfid=%s. Saving inode for future.",
++                   shard_idx_iter, uuid_utoa(inode->gfid));
++      local->inode_list[i] = inode;
++      /* Let the ref on the inodes that are already present
++       * in inode table still be held so that they don't get
++       * forgotten by the time the fop reaches the actual
++       * write stage.
++       */
++      LOCK(&priv->lock);
++      {
++        fsync_inode = __shard_update_shards_inode_list(inode, this, res_inode,
++                                                       shard_idx_iter, gfid);
++      }
++      UNLOCK(&priv->lock);
++      shard_idx_iter++;
++      if (fsync_inode)
++        shard_initiate_evicted_inode_fsync(this, fsync_inode);
++      continue;
++    } else {
++      local->call_count++;
++      shard_idx_iter++;
+     }
+-    UNLOCK(&frame->lock);
++  }
++  post_res_handler(frame, this);
++  return 0;
++int shard_update_file_size_cbk(call_frame_t *frame, void *cookie,
++                               xlator_t *this, int32_t op_ret, int32_t op_errno,
++                               dict_t *dict, dict_t *xdata) {
++  inode_t *inode = NULL;
++  shard_local_t *local = NULL;
++  local = frame->local;
++  if ((local->fd) && (local->fd->inode))
++    inode = local->fd->inode;
++  else if (local->loc.inode)
++    inode = local->loc.inode;
++  if (op_ret < 0) {
++    gf_msg(this->name, GF_LOG_ERROR, op_errno,
++           SHARD_MSG_UPDATE_FILE_SIZE_FAILED, "Update to file size"
++                                              " xattr failed on %s",
++           uuid_utoa(inode->gfid));
++    local->op_ret = op_ret;
++    local->op_errno = op_errno;
++    goto err;
++  }
+-    return call_count;
++  if (shard_modify_size_and_block_count(&local->postbuf, dict)) {
++    local->op_ret = -1;
++    local->op_errno = ENOMEM;
++    goto err;
++  }
++  local->post_update_size_handler(frame, this);
++  return 0;
+ }
+-static char *
+-shard_internal_dir_string(shard_internal_dir_type_t type)
+-    char *str = NULL;
++int shard_set_size_attrs(int64_t size, int64_t block_count,
++                         int64_t **size_attr_p) {
++  int ret = -1;
++  int64_t *size_attr = NULL;
+-    switch (type) {
+-            str = GF_SHARD_DIR;
+-            break;
+-            str = GF_SHARD_REMOVE_ME_DIR;
+-            break;
+-        default:
+-            break;
+-    }
+-    return str;
+-static int
+-shard_init_internal_dir_loc(xlator_t *this, shard_local_t *local,
+-                            shard_internal_dir_type_t type)
+-    int ret = -1;
+-    char *bname = NULL;
+-    inode_t *parent = NULL;
+-    loc_t *internal_dir_loc = NULL;
+-    shard_priv_t *priv = NULL;
+-    priv = this->private;
+-    if (!local)
+-        return -1;
+-    switch (type) {
+-            internal_dir_loc = &local->dot_shard_loc;
+-            bname = GF_SHARD_DIR;
+-            parent = inode_ref(this->itable->root);
+-            break;
+-            internal_dir_loc = &local->dot_shard_rm_loc;
+-            bname = GF_SHARD_REMOVE_ME_DIR;
+-            parent = inode_ref(priv->dot_shard_inode);
+-            break;
+-        default:
+-            break;
+-    }
++  if (!size_attr_p)
++    goto out;
+-    internal_dir_loc->inode = inode_new(this->itable);
+-    internal_dir_loc->parent = parent;
+-    ret = inode_path(internal_dir_loc->parent, bname,
+-                     (char **)&internal_dir_loc->path);
+-    if (ret < 0 || !(internal_dir_loc->inode)) {
+-        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+-               "Inode path failed on %s", bname);
+-        goto out;
+-    }
++  size_attr = GF_CALLOC(4, sizeof(int64_t), gf_shard_mt_int64_t);
++  if (!size_attr)
++    goto out;
+-    internal_dir_loc->name = strrchr(internal_dir_loc->path, '/');
+-    if (internal_dir_loc->name)
+-        internal_dir_loc->name++;
++  size_attr[0] = hton64(size);
++  /* As sharding evolves, it _may_ be necessary to embed more pieces of
++   * information within the same xattr. So allocating slots for them in
++   * advance. For now, only bytes 0-63 and 128-191 which would make up the
++   * current size and block count respectively of the file are valid.
++   */
++  size_attr[2] = hton64(block_count);
+-    ret = 0;
++  *size_attr_p = size_attr;
++  ret = 0;
+ out:
+-    return ret;
++  return ret;
+ }
+-inode_t *
+-__shard_update_shards_inode_list(inode_t *linked_inode, xlator_t *this,
+-                                 inode_t *base_inode, int block_num,
+-                                 uuid_t gfid)
+-    char block_bname[256] = {
+-        0,
+-    };
+-    inode_t *lru_inode = NULL;
+-    shard_priv_t *priv = NULL;
+-    shard_inode_ctx_t *ctx = NULL;
+-    shard_inode_ctx_t *lru_inode_ctx = NULL;
+-    shard_inode_ctx_t *lru_base_inode_ctx = NULL;
+-    inode_t *fsync_inode = NULL;
+-    inode_t *lru_base_inode = NULL;
+-    gf_boolean_t do_fsync = _gf_false;
+-    priv = this->private;
+-    shard_inode_ctx_get(linked_inode, this, &ctx);
+-    if (list_empty(&ctx->ilist)) {
+-        if (priv->inode_count + 1 <= priv->lru_limit) {
+-            /* If this inode was linked here for the first time (indicated
+-             * by empty list), and if there is still space in the priv list,
+-             * add this ctx to the tail of the list.
+-             */
+-            /* For as long as an inode is in lru list, we try to
+-             * keep it alive by holding a ref on it.
+-             */
+-            inode_ref(linked_inode);
+-            if (base_inode)
+-                gf_uuid_copy(ctx->base_gfid, base_inode->gfid);
+-            else
+-                gf_uuid_copy(ctx->base_gfid, gfid);
+-            ctx->block_num = block_num;
+-            list_add_tail(&ctx->ilist, &priv->ilist_head);
+-            priv->inode_count++;
+-            ctx->base_inode = inode_ref(base_inode);
+-        } else {
+-            /*If on the other hand there is no available slot for this inode
+-             * in the list, delete the lru inode from the head of the list,
+-             * unlink it. And in its place add this new inode into the list.
+-             */
+-            lru_inode_ctx = list_first_entry(&priv->ilist_head,
+-                                             shard_inode_ctx_t, ilist);
+-            GF_ASSERT(lru_inode_ctx->block_num > 0);
+-            lru_base_inode = lru_inode_ctx->base_inode;
+-            list_del_init(&lru_inode_ctx->ilist);
+-            lru_inode = inode_find(linked_inode->table,
+-                                   lru_inode_ctx->stat.ia_gfid);
+-            /* If the lru inode was part of the pending-fsync list,
+-             * the base inode needs to be unref'd, the lru inode
+-             * deleted from fsync list and fsync'd in a new frame,
+-             * and then unlinked in memory and forgotten.
+-             */
+-            if (!lru_base_inode)
+-                goto after_fsync_check;
+-            LOCK(&lru_base_inode->lock);
+-            LOCK(&lru_inode->lock);
+-            {
+-                if (!list_empty(&lru_inode_ctx->to_fsync_list)) {
+-                    list_del_init(&lru_inode_ctx->to_fsync_list);
+-                    lru_inode_ctx->fsync_needed = 0;
+-                    do_fsync = _gf_true;
+-                    __shard_inode_ctx_get(lru_base_inode, this,
+-                                          &lru_base_inode_ctx);
+-                    lru_base_inode_ctx->fsync_count--;
+-                }
+-            }
+-            UNLOCK(&lru_inode->lock);
+-            UNLOCK(&lru_base_inode->lock);
+-        after_fsync_check:
+-            if (!do_fsync) {
+-                shard_make_block_bname(lru_inode_ctx->block_num,
+-                                       lru_inode_ctx->base_gfid, block_bname,
+-                                       sizeof(block_bname));
+-                /* The following unref corresponds to the ref held at
+-                 * the time the shard was added to the lru list.
+-                 */
+-                inode_unref(lru_inode);
+-                inode_unlink(lru_inode, priv->dot_shard_inode, block_bname);
+-                inode_forget(lru_inode, 0);
+-            } else {
+-                /* The following unref corresponds to the ref
+-                 * held when the shard was added to fsync list.
+-                 */
+-                inode_unref(lru_inode);
+-                fsync_inode = lru_inode;
+-                if (lru_base_inode)
+-                    inode_unref(lru_base_inode);
+-            }
+-            /* The following unref corresponds to the ref
+-             * held by inode_find() above.
+-             */
+-            inode_unref(lru_inode);
+-            /* The following unref corresponds to the ref held on the base shard
+-             * at the time of adding shard inode to lru list
+-             */
+-            if (lru_base_inode)
+-                inode_unref(lru_base_inode);
+-            /* For as long as an inode is in lru list, we try to
+-             * keep it alive by holding a ref on it.
+-             */
+-            inode_ref(linked_inode);
+-            if (base_inode)
+-                gf_uuid_copy(ctx->base_gfid, base_inode->gfid);
+-            else
+-                gf_uuid_copy(ctx->base_gfid, gfid);
+-            ctx->block_num = block_num;
+-            ctx->base_inode = inode_ref(base_inode);
+-            list_add_tail(&ctx->ilist, &priv->ilist_head);
+-        }
+-    } else {
+-        /* If this is not the first time this inode is being operated on, move
+-         * it to the most recently used end of the list.
+-         */
+-        list_move_tail(&ctx->ilist, &priv->ilist_head);
+-    }
+-    return fsync_inode;
++int shard_update_file_size(call_frame_t *frame, xlator_t *this, fd_t *fd,
++                           loc_t *loc,
++                           shard_post_update_size_fop_handler_t handler) {
++  int ret = -1;
++  int64_t *size_attr = NULL;
++  int64_t delta_blocks = 0;
++  inode_t *inode = NULL;
++  shard_local_t *local = NULL;
++  dict_t *xattr_req = NULL;
+-shard_common_failure_unwind(glusterfs_fop_t fop, call_frame_t *frame,
+-                            int32_t op_ret, int32_t op_errno)
+-    switch (fop) {
+-        case GF_FOP_LOOKUP:
+-            SHARD_STACK_UNWIND(lookup, frame, op_ret, op_errno, NULL, NULL,
+-                               NULL, NULL);
+-            break;
+-        case GF_FOP_STAT:
+-            SHARD_STACK_UNWIND(stat, frame, op_ret, op_errno, NULL, NULL);
+-            break;
+-        case GF_FOP_FSTAT:
+-            SHARD_STACK_UNWIND(fstat, frame, op_ret, op_errno, NULL, NULL);
+-            break;
+-        case GF_FOP_TRUNCATE:
+-            SHARD_STACK_UNWIND(truncate, frame, op_ret, op_errno, NULL, NULL,
+-                               NULL);
+-            break;
+-        case GF_FOP_FTRUNCATE:
+-            SHARD_STACK_UNWIND(ftruncate, frame, op_ret, op_errno, NULL, NULL,
+-                               NULL);
+-            break;
+-        case GF_FOP_MKNOD:
+-            SHARD_STACK_UNWIND(mknod, frame, op_ret, op_errno, NULL, NULL, NULL,
+-                               NULL, NULL);
+-            break;
+-        case GF_FOP_LINK:
+-            SHARD_STACK_UNWIND(link, frame, op_ret, op_errno, NULL, NULL, NULL,
+-                               NULL, NULL);
+-            break;
+-        case GF_FOP_CREATE:
+-            SHARD_STACK_UNWIND(create, frame, op_ret, op_errno, NULL, NULL,
+-                               NULL, NULL, NULL, NULL);
+-            break;
+-        case GF_FOP_UNLINK:
+-            SHARD_STACK_UNWIND(unlink, frame, op_ret, op_errno, NULL, NULL,
+-                               NULL);
+-            break;
+-        case GF_FOP_RENAME:
+-            SHARD_STACK_UNWIND(rename, frame, op_ret, op_errno, NULL, NULL,
+-                               NULL, NULL, NULL, NULL);
+-            break;
+-        case GF_FOP_WRITE:
+-            SHARD_STACK_UNWIND(writev, frame, op_ret, op_errno, NULL, NULL,
+-                               NULL);
+-            break;
+-        case GF_FOP_FALLOCATE:
+-            SHARD_STACK_UNWIND(fallocate, frame, op_ret, op_errno, NULL, NULL,
+-                               NULL);
+-            break;
+-        case GF_FOP_ZEROFILL:
+-            SHARD_STACK_UNWIND(zerofill, frame, op_ret, op_errno, NULL, NULL,
+-                               NULL);
+-            break;
+-        case GF_FOP_DISCARD:
+-            SHARD_STACK_UNWIND(discard, frame, op_ret, op_errno, NULL, NULL,
+-                               NULL);
+-            break;
+-        case GF_FOP_READ:
+-            SHARD_STACK_UNWIND(readv, frame, op_ret, op_errno, NULL, -1, NULL,
+-                               NULL, NULL);
+-            break;
+-        case GF_FOP_FSYNC:
+-            SHARD_STACK_UNWIND(fsync, frame, op_ret, op_errno, NULL, NULL,
+-                               NULL);
+-            break;
+-        case GF_FOP_REMOVEXATTR:
+-            SHARD_STACK_UNWIND(removexattr, frame, op_ret, op_errno, NULL);
+-            break;
+-        case GF_FOP_FREMOVEXATTR:
+-            SHARD_STACK_UNWIND(fremovexattr, frame, op_ret, op_errno, NULL);
+-            break;
+-        case GF_FOP_FGETXATTR:
+-            SHARD_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, NULL, NULL);
+-            break;
+-        case GF_FOP_GETXATTR:
+-            SHARD_STACK_UNWIND(getxattr, frame, op_ret, op_errno, NULL, NULL);
+-            break;
+-        case GF_FOP_FSETXATTR:
+-            SHARD_STACK_UNWIND(fsetxattr, frame, op_ret, op_errno, NULL);
+-            break;
+-        case GF_FOP_SETXATTR:
+-            SHARD_STACK_UNWIND(setxattr, frame, op_ret, op_errno, NULL);
+-            break;
+-        case GF_FOP_SETATTR:
+-            SHARD_STACK_UNWIND(setattr, frame, op_ret, op_errno, NULL, NULL,
+-                               NULL);
+-            break;
+-        case GF_FOP_FSETATTR:
+-            SHARD_STACK_UNWIND(fsetattr, frame, op_ret, op_errno, NULL, NULL,
+-                               NULL);
+-            break;
+-        case GF_FOP_SEEK:
+-            SHARD_STACK_UNWIND(seek, frame, op_ret, op_errno, 0, NULL);
+-            break;
+-        default:
+-            gf_msg(THIS->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
+-                   "Invalid fop id = %d", fop);
+-            break;
+-    }
+-    return 0;
++  local = frame->local;
++  local->post_update_size_handler = handler;
+-shard_common_inode_write_success_unwind(glusterfs_fop_t fop,
+-                                        call_frame_t *frame, int32_t op_ret)
+-    shard_local_t *local = NULL;
++  xattr_req = dict_new();
++  if (!xattr_req) {
++    local->op_ret = -1;
++    local->op_errno = ENOMEM;
++    goto out;
++  }
++  if (fd)
++    inode = fd->inode;
++  else
++    inode = loc->inode;
++  /* If both size and block count have not changed, then skip the xattrop.
++   */
++  delta_blocks = GF_ATOMIC_GET(local->delta_blocks);
++  if ((local->delta_size + local->hole_size == 0) && (delta_blocks == 0)) {
++    goto out;
++  }
++  ret = shard_set_size_attrs(local->delta_size + local->hole_size, delta_blocks,
++                             &size_attr);
++  if (ret) {
++    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_SIZE_SET_FAILED,
++           "Failed to set size attrs for %s", uuid_utoa(inode->gfid));
++    local->op_ret = -1;
++    local->op_errno = ENOMEM;
++    goto out;
++  }
++  ret = dict_set_bin(xattr_req, GF_XATTR_SHARD_FILE_SIZE, size_attr, 8 * 4);
++  if (ret) {
++    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
++           "Failed to set key %s into dict. gfid=%s", GF_XATTR_SHARD_FILE_SIZE,
++           uuid_utoa(inode->gfid));
++    GF_FREE(size_attr);
++    local->op_ret = -1;
++    local->op_errno = ENOMEM;
++    goto out;
++  }
+-    local = frame->local;
++  if (fd)
++    STACK_WIND(frame, shard_update_file_size_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->fxattrop, fd, GF_XATTROP_ADD_ARRAY64,
++               xattr_req, NULL);
++  else
++    STACK_WIND(frame, shard_update_file_size_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->xattrop, loc, GF_XATTROP_ADD_ARRAY64,
++               xattr_req, NULL);
+-    switch (fop) {
+-        case GF_FOP_WRITE:
+-            SHARD_STACK_UNWIND(writev, frame, op_ret, 0, &local->prebuf,
+-                               &local->postbuf, local->xattr_rsp);
+-            break;
+-        case GF_FOP_FALLOCATE:
+-            SHARD_STACK_UNWIND(fallocate, frame, op_ret, 0, &local->prebuf,
+-                               &local->postbuf, local->xattr_rsp);
+-            break;
+-        case GF_FOP_ZEROFILL:
+-            SHARD_STACK_UNWIND(zerofill, frame, op_ret, 0, &local->prebuf,
+-                               &local->postbuf, local->xattr_rsp);
+-            break;
+-        case GF_FOP_DISCARD:
+-            SHARD_STACK_UNWIND(discard, frame, op_ret, 0, &local->prebuf,
+-                               &local->postbuf, local->xattr_rsp);
+-            break;
+-        default:
+-            gf_msg(THIS->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
+-                   "Invalid fop id = %d", fop);
+-            break;
+-    }
+-    return 0;
++  dict_unref(xattr_req);
++  return 0;
+-shard_evicted_inode_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                              int32_t op_ret, int32_t op_errno,
+-                              struct iatt *prebuf, struct iatt *postbuf,
+-                              dict_t *xdata)
+-    char block_bname[256] = {
+-        0,
+-    };
+-    fd_t *anon_fd = cookie;
+-    inode_t *shard_inode = NULL;
+-    shard_inode_ctx_t *ctx = NULL;
+-    shard_priv_t *priv = NULL;
++  if (xattr_req)
++    dict_unref(xattr_req);
++  handler(frame, this);
++  return 0;
++static inode_t *shard_link_internal_dir_inode(shard_local_t *local,
++                                              inode_t *inode, struct iatt *buf,
++                                              shard_internal_dir_type_t type) {
++  inode_t *linked_inode = NULL;
++  shard_priv_t *priv = NULL;
++  char *bname = NULL;
++  inode_t **priv_inode = NULL;
++  inode_t *parent = NULL;
++  priv = THIS->private;
++  switch (type) {
++    bname = GF_SHARD_DIR;
++    priv_inode = &priv->dot_shard_inode;
++    parent = inode->table->root;
++    break;
++    bname = GF_SHARD_REMOVE_ME_DIR;
++    priv_inode = &priv->dot_shard_rm_inode;
++    parent = priv->dot_shard_inode;
++    break;
++  default:
++    break;
++  }
++  linked_inode = inode_link(inode, parent, bname, buf);
++  inode_lookup(linked_inode);
++  *priv_inode = linked_inode;
++  return linked_inode;
++int shard_refresh_internal_dir_cbk(call_frame_t *frame, void *cookie,
++                                   xlator_t *this, int32_t op_ret,
++                                   int32_t op_errno, inode_t *inode,
++                                   struct iatt *buf, dict_t *xdata,
++                                   struct iatt *postparent) {
++  shard_local_t *local = NULL;
++  inode_t *linked_inode = NULL;
++  shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie;
++  local = frame->local;
++  if (op_ret) {
++    local->op_ret = op_ret;
++    local->op_errno = op_errno;
++    goto out;
++  }
++  /* To-Do: Fix refcount increment per call to
++   * shard_link_internal_dir_inode().
++   */
++  linked_inode = shard_link_internal_dir_inode(local, inode, buf, type);
++  shard_inode_ctx_mark_dir_refreshed(linked_inode, this);
++  shard_common_resolve_shards(frame, this, local->post_res_handler);
++  return 0;
++int shard_refresh_internal_dir(call_frame_t *frame, xlator_t *this,
++                               shard_internal_dir_type_t type) {
++  loc_t loc = {
++      0,
++  };
++  inode_t *inode = NULL;
++  shard_priv_t *priv = NULL;
++  shard_local_t *local = NULL;
++  uuid_t gfid = {
++      0,
++  };
++  local = frame->local;
++  priv = this->private;
++  switch (type) {
++    gf_uuid_copy(gfid, priv->dot_shard_gfid);
++    break;
++    gf_uuid_copy(gfid, priv->dot_shard_rm_gfid);
++    break;
++  default:
++    break;
++  }
++  inode = inode_find(this->itable, gfid);
++  if (!shard_inode_ctx_needs_lookup(inode, this)) {
++    local->op_ret = 0;
++    goto out;
++  }
+-    priv = this->private;
++  /* Plain assignment because the ref is already taken above through
++   * call to inode_find()
++   */
++  loc.inode = inode;
++  gf_uuid_copy(loc.gfid, gfid);
+-    if (anon_fd == NULL || op_ret < 0) {
+-        gf_msg(this->name, GF_LOG_WARNING, op_errno, SHARD_MSG_MEMALLOC_FAILED,
+-               "fsync failed on shard");
+-        goto out;
+-    }
+-    shard_inode = anon_fd->inode;
++  STACK_WIND_COOKIE(frame, shard_refresh_internal_dir_cbk, (void *)(long)type,
++                    FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup, &loc,
++                    NULL);
++  loc_wipe(&loc);
+-    LOCK(&priv->lock);
+-    LOCK(&shard_inode->lock);
+-    {
+-        __shard_inode_ctx_get(shard_inode, this, &ctx);
+-        if ((list_empty(&ctx->to_fsync_list)) && (list_empty(&ctx->ilist))) {
+-            shard_make_block_bname(ctx->block_num, shard_inode->gfid,
+-                                   block_bname, sizeof(block_bname));
+-            inode_unlink(shard_inode, priv->dot_shard_inode, block_bname);
+-            /* The following unref corresponds to the ref held by
+-             * inode_link() at the time the shard was created or
+-             * looked up
+-             */
+-            inode_unref(shard_inode);
+-            inode_forget(shard_inode, 0);
+-        }
+-    }
+-    UNLOCK(&shard_inode->lock);
+-    UNLOCK(&priv->lock);
++  return 0;
+ out:
+-    if (anon_fd)
+-        fd_unref(anon_fd);
+-    STACK_DESTROY(frame->root);
+-    return 0;
++  shard_common_resolve_shards(frame, this, local->post_res_handler);
++  return 0;
+ }
+-shard_initiate_evicted_inode_fsync(xlator_t *this, inode_t *inode)
+-    fd_t *anon_fd = NULL;
+-    call_frame_t *fsync_frame = NULL;
++int shard_lookup_internal_dir_cbk(call_frame_t *frame, void *cookie,
++                                  xlator_t *this, int32_t op_ret,
++                                  int32_t op_errno, inode_t *inode,
++                                  struct iatt *buf, dict_t *xdata,
++                                  struct iatt *postparent) {
++  inode_t *link_inode = NULL;
++  shard_local_t *local = NULL;
++  shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie;
+-    fsync_frame = create_frame(this, this->ctx->pool);
+-    if (!fsync_frame) {
+-               "Failed to create new frame "
+-               "to fsync shard");
+-        return -1;
+-    }
++  local = frame->local;
+-    anon_fd = fd_anonymous(inode);
+-    if (!anon_fd) {
+-               "Failed to create anon fd to"
+-               " fsync shard");
+-        STACK_DESTROY(fsync_frame->root);
+-        return -1;
+-    }
++  if (op_ret) {
++    local->op_ret = op_ret;
++    local->op_errno = op_errno;
++    goto unwind;
++  }
++  if (!IA_ISDIR(buf->ia_type)) {
++    gf_msg(this->name, GF_LOG_CRITICAL, 0, SHARD_MSG_DOT_SHARD_NODIR,
++           "%s already exists and "
++           "is not a directory. Please remove it from all bricks "
++           "and try again",
++           shard_internal_dir_string(type));
++    local->op_ret = -1;
++    local->op_errno = EIO;
++    goto unwind;
++  }
++  link_inode = shard_link_internal_dir_inode(local, inode, buf, type);
++  if (link_inode != inode) {
++    shard_refresh_internal_dir(frame, this, type);
++  } else {
++    shard_inode_ctx_mark_dir_refreshed(link_inode, this);
++    shard_common_resolve_shards(frame, this, local->post_res_handler);
++  }
++  return 0;
+-    STACK_WIND_COOKIE(fsync_frame, shard_evicted_inode_fsync_cbk, anon_fd,
+-                      FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsync,
+-                      anon_fd, 1, NULL);
+-    return 0;
++  local->post_res_handler(frame, this);
++  return 0;
++int shard_lookup_internal_dir(call_frame_t *frame, xlator_t *this,
++                              shard_post_resolve_fop_handler_t post_res_handler,
++                              shard_internal_dir_type_t type) {
++  int ret = -1;
++  dict_t *xattr_req = NULL;
++  shard_priv_t *priv = NULL;
++  shard_local_t *local = NULL;
++  uuid_t *gfid = NULL;
++  loc_t *loc = NULL;
++  gf_boolean_t free_gfid = _gf_true;
++  local = frame->local;
++  priv = this->private;
++  local->post_res_handler = post_res_handler;
++  gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t);
++  if (!gfid)
++    goto err;
++  xattr_req = dict_new();
++  if (!xattr_req) {
++    local->op_ret = -1;
++    local->op_errno = ENOMEM;
++    goto err;
++  }
++  switch (type) {
++    gf_uuid_copy(*gfid, priv->dot_shard_gfid);
++    loc = &local->dot_shard_loc;
++    break;
++    gf_uuid_copy(*gfid, priv->dot_shard_rm_gfid);
++    loc = &local->dot_shard_rm_loc;
++    break;
++  default:
++    bzero(*gfid, sizeof(uuid_t));
++    break;
++  }
++  ret = dict_set_gfuuid(xattr_req, "gfid-req", *gfid, false);
++  if (ret) {
++    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
++           "Failed to set gfid of %s into dict",
++           shard_internal_dir_string(type));
++    local->op_ret = -1;
++    local->op_errno = ENOMEM;
++    goto err;
++  } else {
++    free_gfid = _gf_false;
++  }
+-shard_common_resolve_shards(call_frame_t *frame, xlator_t *this,
+-                            shard_post_resolve_fop_handler_t post_res_handler)
+-    int i = -1;
+-    uint32_t shard_idx_iter = 0;
+-    char path[PATH_MAX] = {
+-        0,
+-    };
+-    uuid_t gfid = {
+-        0,
+-    };
+-    inode_t *inode = NULL;
+-    inode_t *res_inode = NULL;
+-    inode_t *fsync_inode = NULL;
+-    shard_priv_t *priv = NULL;
+-    shard_local_t *local = NULL;
+-    priv = this->private;
+-    local = frame->local;
+-    local->call_count = 0;
+-    shard_idx_iter = local->first_block;
+-    res_inode = local->resolver_base_inode;
+-    if (res_inode)
+-        gf_uuid_copy(gfid, res_inode->gfid);
+-    else
+-        gf_uuid_copy(gfid, local->base_gfid);
++  STACK_WIND_COOKIE(frame, shard_lookup_internal_dir_cbk, (void *)(long)type,
++                    FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup, loc,
++                    xattr_req);
+-    if ((local->op_ret < 0) || (local->resolve_not))
+-        goto out;
++  dict_unref(xattr_req);
++  return 0;
+-    while (shard_idx_iter <= local->last_block) {
+-        i++;
+-        if (shard_idx_iter == 0) {
+-            local->inode_list[i] = inode_ref(res_inode);
+-            shard_idx_iter++;
+-            continue;
+-        }
++  if (xattr_req)
++    dict_unref(xattr_req);
++  if (free_gfid)
++    GF_FREE(gfid);
++  post_res_handler(frame, this);
++  return 0;
++static void shard_inode_ctx_update(inode_t *inode, xlator_t *this,
++                                   dict_t *xdata, struct iatt *buf) {
++  int ret = 0;
++  uint64_t size = 0;
++  void *bsize = NULL;
++  if (shard_inode_ctx_get_block_size(inode, this, &size)) {
++    /* Fresh lookup */
++    ret = dict_get_ptr(xdata, GF_XATTR_SHARD_BLOCK_SIZE, &bsize);
++    if (!ret)
++      size = ntoh64(*((uint64_t *)bsize));
++    /* If the file is sharded, set its block size, otherwise just
++     * set 0.
++     */
+-        shard_make_block_abspath(shard_idx_iter, gfid, path, sizeof(path));
+-        inode = NULL;
+-        inode = inode_resolve(this->itable, path);
+-        if (inode) {
+-            gf_msg_debug(this->name, 0,
+-                         "Shard %d already "
+-                         "present. gfid=%s. Saving inode for future.",
+-                         shard_idx_iter, uuid_utoa(inode->gfid));
+-            local->inode_list[i] = inode;
+-            /* Let the ref on the inodes that are already present
+-             * in inode table still be held so that they don't get
+-             * forgotten by the time the fop reaches the actual
+-             * write stage.
+-             */
+-            LOCK(&priv->lock);
+-            {
+-                fsync_inode = __shard_update_shards_inode_list(
+-                    inode, this, res_inode, shard_idx_iter, gfid);
+-            }
+-            UNLOCK(&priv->lock);
+-            shard_idx_iter++;
+-            if (fsync_inode)
+-                shard_initiate_evicted_inode_fsync(this, fsync_inode);
+-            continue;
+-        } else {
+-            local->call_count++;
+-            shard_idx_iter++;
+-        }
+-    }
+-    post_res_handler(frame, this);
+-    return 0;
++    shard_inode_ctx_set(inode, this, buf, size, SHARD_MASK_BLOCK_SIZE);
++  }
++  /* If the file is sharded, also set the remaining attributes,
++   * except for ia_size and ia_blocks.
++   */
++  if (size) {
++    shard_inode_ctx_set(inode, this, buf, 0, SHARD_LOOKUP_MASK);
++    (void)shard_inode_ctx_invalidate(inode, this, buf);
++  }
++int shard_delete_shards(void *opaque);
++int shard_delete_shards_cbk(int ret, call_frame_t *frame, void *data);
++int shard_start_background_deletion(xlator_t *this) {
++  int ret = 0;
++  gf_boolean_t i_cleanup = _gf_true;
++  shard_priv_t *priv = NULL;
++  call_frame_t *cleanup_frame = NULL;
++  priv = this->private;
++  LOCK(&priv->lock);
++  {
++    switch (priv->bg_del_state) {
++      i_cleanup = _gf_true;
++      priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING;
++      break;
++      i_cleanup = _gf_false;
++      break;
++      priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING;
++      i_cleanup = _gf_false;
++      break;
++    default:
++      break;
++    }
++  }
++  UNLOCK(&priv->lock);
++  if (!i_cleanup)
++    return 0;
++  cleanup_frame = create_frame(this, this->ctx->pool);
++  if (!cleanup_frame) {
++           "Failed to create "
++           "new frame to delete shards");
++    ret = -ENOMEM;
++    goto err;
++  }
++  set_lk_owner_from_ptr(&cleanup_frame->root->lk_owner, cleanup_frame->root);
++  ret = synctask_new(this->ctx->env, shard_delete_shards,
++                     shard_delete_shards_cbk, cleanup_frame, cleanup_frame);
++  if (ret < 0) {
++    gf_msg(this->name, GF_LOG_WARNING, errno, SHARD_MSG_SHARDS_DELETION_FAILED,
++           "failed to create task to do background "
++           "cleanup of shards");
++    STACK_DESTROY(cleanup_frame->root);
++    goto err;
++  }
++  return 0;
++  LOCK(&priv->lock);
++  { priv->bg_del_state = SHARD_BG_DELETION_NONE; }
++  UNLOCK(&priv->lock);
++  return ret;
+ }
+-shard_update_file_size_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                           int32_t op_ret, int32_t op_errno, dict_t *dict,
+-                           dict_t *xdata)
+-    inode_t *inode = NULL;
+-    shard_local_t *local = NULL;
++int shard_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                     int32_t op_ret, int32_t op_errno, inode_t *inode,
++                     struct iatt *buf, dict_t *xdata, struct iatt *postparent) {
++  int ret = -1;
++  shard_priv_t *priv = NULL;
++  gf_boolean_t i_start_cleanup = _gf_false;
+-    local = frame->local;
++  priv = this->private;
+-    if ((local->fd) && (local->fd->inode))
+-        inode = local->fd->inode;
+-    else if (local->loc.inode)
+-        inode = local->loc.inode;
++  if (op_ret < 0)
++    goto unwind;
+-    if (op_ret < 0) {
+-        gf_msg(this->name, GF_LOG_ERROR, op_errno,
+-               "Update to file size"
+-               " xattr failed on %s",
+-               uuid_utoa(inode->gfid));
+-        local->op_ret = op_ret;
+-        local->op_errno = op_errno;
+-        goto err;
+-    }
++  if (IA_ISDIR(buf->ia_type))
++    goto unwind;
+-    if (shard_modify_size_and_block_count(&local->postbuf, dict)) {
+-        local->op_ret = -1;
+-        local->op_errno = ENOMEM;
+-        goto err;
+-    }
+-    local->post_update_size_handler(frame, this);
+-    return 0;
++  /* Also, if the file is sharded, get the file size and block cnt xattr,
++   * and store them in the stbuf appropriately.
++   */
+-shard_set_size_attrs(int64_t size, int64_t block_count, int64_t **size_attr_p)
+-    int ret = -1;
+-    int64_t *size_attr = NULL;
++  if (dict_get(xdata, GF_XATTR_SHARD_FILE_SIZE) &&
++      frame->root->pid != GF_CLIENT_PID_GSYNCD)
++    shard_modify_size_and_block_count(buf, xdata);
+-    if (!size_attr_p)
+-        goto out;
++  /* If this was a fresh lookup, there are two possibilities:
++   * 1) If the file is sharded (indicated by the presence of block size
++   *    xattr), store this block size, along with rdev and mode in its
++   *    inode ctx.
++   * 2) If the file is not sharded, store size along with rdev and mode
++   *    (which are anyway don't cares) in inode ctx. Since @ctx_tmp is
++   *    already initialised to all zeroes, nothing more needs to be done.
++   */
+-    size_attr = GF_CALLOC(4, sizeof(int64_t), gf_shard_mt_int64_t);
+-    if (!size_attr)
+-        goto out;
++  (void)shard_inode_ctx_update(inode, this, xdata, buf);
+-    size_attr[0] = hton64(size);
+-    /* As sharding evolves, it _may_ be necessary to embed more pieces of
+-     * information within the same xattr. So allocating slots for them in
+-     * advance. For now, only bytes 0-63 and 128-191 which would make up the
+-     * current size and block count respectively of the file are valid.
+-     */
+-    size_attr[2] = hton64(block_count);
++  LOCK(&priv->lock);
++  {
++    if (priv->first_lookup_done == _gf_false) {
++      priv->first_lookup_done = _gf_true;
++      i_start_cleanup = _gf_true;
++    }
++  }
++  UNLOCK(&priv->lock);
+-    *size_attr_p = size_attr;
++  if (!i_start_cleanup)
++    goto unwind;
+-    ret = 0;
+-    return ret;
++  ret = shard_start_background_deletion(this);
++  if (ret < 0) {
++    LOCK(&priv->lock);
++    { priv->first_lookup_done = _gf_false; }
++    UNLOCK(&priv->lock);
++  }
++  SHARD_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, buf, xdata,
++                     postparent);
++  return 0;
+ }
+-shard_update_file_size(call_frame_t *frame, xlator_t *this, fd_t *fd,
+-                       loc_t *loc, shard_post_update_size_fop_handler_t handler)
+-    int ret = -1;
+-    int64_t *size_attr = NULL;
+-    int64_t delta_blocks = 0;
+-    inode_t *inode = NULL;
+-    shard_local_t *local = NULL;
+-    dict_t *xattr_req = NULL;
++int shard_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc,
++                 dict_t *xattr_req) {
++  int ret = -1;
++  int32_t op_errno = ENOMEM;
++  uint64_t block_size = 0;
++  shard_local_t *local = NULL;
+-    local = frame->local;
+-    local->post_update_size_handler = handler;
++  this->itable = loc->inode->table;
++  if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
++    SHARD_ENTRY_FOP_CHECK(loc, op_errno, err);
++  }
+-    xattr_req = dict_new();
+-    if (!xattr_req) {
+-        local->op_ret = -1;
+-        local->op_errno = ENOMEM;
+-        goto out;
+-    }
++  local = mem_get0(this->local_pool);
++  if (!local)
++    goto err;
+-    if (fd)
+-        inode = fd->inode;
+-    else
+-        inode = loc->inode;
++  frame->local = local;
+-    /* If both size and block count have not changed, then skip the xattrop.
+-     */
+-    delta_blocks = GF_ATOMIC_GET(local->delta_blocks);
+-    if ((local->delta_size + local->hole_size == 0) && (delta_blocks == 0)) {
+-        goto out;
+-    }
++  loc_copy(&local->loc, loc);
+-    ret = shard_set_size_attrs(local->delta_size + local->hole_size,
+-                               delta_blocks, &size_attr);
++  local->xattr_req = xattr_req ? dict_ref(xattr_req) : dict_new();
++  if (!local->xattr_req)
++    goto err;
++  if (shard_inode_ctx_get_block_size(loc->inode, this, &block_size)) {
++    ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0);
+     if (ret) {
+-        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_SIZE_SET_FAILED,
+-               "Failed to set size attrs for %s", uuid_utoa(inode->gfid));
+-        local->op_ret = -1;
+-        local->op_errno = ENOMEM;
+-        goto out;
++      gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
++             "Failed to set dict"
++             " value: key:%s for path %s",
++             GF_XATTR_SHARD_BLOCK_SIZE, loc->path);
++      goto err;
+     }
++  }
+-    ret = dict_set_bin(xattr_req, GF_XATTR_SHARD_FILE_SIZE, size_attr, 8 * 4);
++  if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
++    ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_FILE_SIZE, 8 * 4);
+     if (ret) {
+-        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
+-               "Failed to set key %s into dict. gfid=%s",
+-               GF_XATTR_SHARD_FILE_SIZE, uuid_utoa(inode->gfid));
+-        GF_FREE(size_attr);
+-        local->op_ret = -1;
+-        local->op_errno = ENOMEM;
+-        goto out;
++      gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
++             "Failed to set dict value: key:%s for path %s.",
++             GF_XATTR_SHARD_FILE_SIZE, loc->path);
++      goto err;
+     }
++  }
+-    if (fd)
+-        STACK_WIND(frame, shard_update_file_size_cbk, FIRST_CHILD(this),
+-                   FIRST_CHILD(this)->fops->fxattrop, fd,
+-                   GF_XATTROP_ADD_ARRAY64, xattr_req, NULL);
+-    else
+-        STACK_WIND(frame, shard_update_file_size_cbk, FIRST_CHILD(this),
+-                   FIRST_CHILD(this)->fops->xattrop, loc,
+-                   GF_XATTROP_ADD_ARRAY64, xattr_req, NULL);
+-    dict_unref(xattr_req);
+-    return 0;
+-    if (xattr_req)
+-        dict_unref(xattr_req);
+-    handler(frame, this);
+-    return 0;
+-static inode_t *
+-shard_link_internal_dir_inode(shard_local_t *local, inode_t *inode,
+-                              struct iatt *buf, shard_internal_dir_type_t type)
+-    inode_t *linked_inode = NULL;
+-    shard_priv_t *priv = NULL;
+-    char *bname = NULL;
+-    inode_t **priv_inode = NULL;
+-    inode_t *parent = NULL;
+-    priv = THIS->private;
+-    switch (type) {
+-            bname = GF_SHARD_DIR;
+-            priv_inode = &priv->dot_shard_inode;
+-            parent = inode->table->root;
+-            break;
+-            bname = GF_SHARD_REMOVE_ME_DIR;
+-            priv_inode = &priv->dot_shard_rm_inode;
+-            parent = priv->dot_shard_inode;
+-            break;
+-        default:
+-            break;
+-    }
++  if ((xattr_req) && (dict_get(xattr_req, GF_CONTENT_KEY)))
++    dict_del(xattr_req, GF_CONTENT_KEY);
+-    linked_inode = inode_link(inode, parent, bname, buf);
+-    inode_lookup(linked_inode);
+-    *priv_inode = linked_inode;
+-    return linked_inode;
++  STACK_WIND(frame, shard_lookup_cbk, FIRST_CHILD(this),
++             FIRST_CHILD(this)->fops->lookup, loc, local->xattr_req);
++  return 0;
++  shard_common_failure_unwind(GF_FOP_LOOKUP, frame, -1, op_errno);
++  return 0;
+ }
+-shard_refresh_internal_dir_cbk(call_frame_t *frame, void *cookie,
++int shard_lookup_base_file_cbk(call_frame_t *frame, void *cookie,
+                                xlator_t *this, int32_t op_ret, int32_t op_errno,
+                                inode_t *inode, struct iatt *buf, dict_t *xdata,
+-                               struct iatt *postparent)
+-    shard_local_t *local = NULL;
+-    inode_t *linked_inode = NULL;
+-    shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie;
+-    local = frame->local;
+-    if (op_ret) {
+-        local->op_ret = op_ret;
+-        local->op_errno = op_errno;
+-        goto out;
+-    }
++                               struct iatt *postparent) {
++  int ret = -1;
++  int32_t mask = SHARD_INODE_WRITE_MASK;
++  shard_local_t *local = NULL;
++  shard_inode_ctx_t ctx = {
++      0,
++  };
++  local = frame->local;
++  if (op_ret < 0) {
++    gf_msg(this->name, GF_LOG_ERROR, op_errno,
++           SHARD_MSG_BASE_FILE_LOOKUP_FAILED, "Lookup on base file"
++                                              " failed : %s",
++           loc_gfid_utoa(&(local->loc)));
++    local->op_ret = op_ret;
++    local->op_errno = op_errno;
++    goto unwind;
++  }
++  local->prebuf = *buf;
++  if (shard_modify_size_and_block_count(&local->prebuf, xdata)) {
++    local->op_ret = -1;
++    local->op_errno = EINVAL;
++    goto unwind;
++  }
++  if (shard_inode_ctx_get_all(inode, this, &ctx))
++    mask = SHARD_ALL_MASK;
++  ret = shard_inode_ctx_set(inode, this, &local->prebuf, 0,
++                            (mask | SHARD_MASK_REFRESH_RESET));
++  if (ret) {
++    gf_msg(this->name, GF_LOG_ERROR, SHARD_MSG_INODE_CTX_SET_FAILED, 0,
++           "Failed to set inode"
++           " write params into inode ctx for %s",
++           uuid_utoa(buf->ia_gfid));
++    local->op_ret = -1;
++    local->op_errno = ENOMEM;
++    goto unwind;
++  }
++  local->handler(frame, this);
++  return 0;
++int shard_lookup_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc,
++                           shard_post_fop_handler_t handler) {
++  int ret = -1;
++  shard_local_t *local = NULL;
++  dict_t *xattr_req = NULL;
++  gf_boolean_t need_refresh = _gf_false;
++  local = frame->local;
++  local->handler = handler;
++  ret = shard_inode_ctx_fill_iatt_from_cache(loc->inode, this, &local->prebuf,
++                                             &need_refresh);
++  /* By this time, inode ctx should have been created either in create,
++   * mknod, readdirp or lookup. If not it is a bug!
++   */
++  if ((ret == 0) && (need_refresh == _gf_false)) {
++    gf_msg_debug(this->name, 0, "Skipping lookup on base file: %s"
++                                "Serving prebuf off the inode ctx cache",
++                 uuid_utoa(loc->gfid));
++    goto out;
++  }
++  xattr_req = dict_new();
++  if (!xattr_req) {
++    local->op_ret = -1;
++    local->op_errno = ENOMEM;
++    goto out;
++  }
++  SHARD_MD_READ_FOP_INIT_REQ_DICT(this, xattr_req, loc->gfid, local, out);
++  STACK_WIND(frame, shard_lookup_base_file_cbk, FIRST_CHILD(this),
++             FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
++  dict_unref(xattr_req);
++  return 0;
+-    /* To-Do: Fix refcount increment per call to
+-     * shard_link_internal_dir_inode().
+-     */
+-    linked_inode = shard_link_internal_dir_inode(local, inode, buf, type);
+-    shard_inode_ctx_mark_dir_refreshed(linked_inode, this);
+ out:
+-    shard_common_resolve_shards(frame, this, local->post_res_handler);
+-    return 0;
++  if (xattr_req)
++    dict_unref(xattr_req);
++  handler(frame, this);
++  return 0;
+ }
+-shard_refresh_internal_dir(call_frame_t *frame, xlator_t *this,
+-                           shard_internal_dir_type_t type)
+-    loc_t loc = {
+-        0,
+-    };
+-    inode_t *inode = NULL;
+-    shard_priv_t *priv = NULL;
+-    shard_local_t *local = NULL;
+-    uuid_t gfid = {
+-        0,
+-    };
++int shard_post_fstat_handler(call_frame_t *frame, xlator_t *this) {
++  shard_local_t *local = NULL;
+-    local = frame->local;
+-    priv = this->private;
+-    switch (type) {
+-            gf_uuid_copy(gfid, priv->dot_shard_gfid);
+-            break;
+-            gf_uuid_copy(gfid, priv->dot_shard_rm_gfid);
+-            break;
+-        default:
+-            break;
+-    }
++  local = frame->local;
+-    inode = inode_find(this->itable, gfid);
++  if (local->op_ret >= 0)
++    shard_inode_ctx_set(local->fd->inode, this, &local->prebuf, 0,
++                        SHARD_LOOKUP_MASK);
+-    if (!shard_inode_ctx_needs_lookup(inode, this)) {
+-        local->op_ret = 0;
+-        goto out;
+-    }
++  SHARD_STACK_UNWIND(fstat, frame, local->op_ret, local->op_errno,
++                     &local->prebuf, local->xattr_rsp);
++  return 0;
+-    /* Plain assignment because the ref is already taken above through
+-     * call to inode_find()
+-     */
+-    loc.inode = inode;
+-    gf_uuid_copy(loc.gfid, gfid);
++int shard_post_stat_handler(call_frame_t *frame, xlator_t *this) {
++  shard_local_t *local = NULL;
+-    STACK_WIND_COOKIE(frame, shard_refresh_internal_dir_cbk, (void *)(long)type,
+-                      FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup, &loc,
+-                      NULL);
+-    loc_wipe(&loc);
++  local = frame->local;
+-    return 0;
++  if (local->op_ret >= 0)
++    shard_inode_ctx_set(local->loc.inode, this, &local->prebuf, 0,
++                        SHARD_LOOKUP_MASK);
+-    shard_common_resolve_shards(frame, this, local->post_res_handler);
+-    return 0;
++  SHARD_STACK_UNWIND(stat, frame, local->op_ret, local->op_errno,
++                     &local->prebuf, local->xattr_rsp);
++  return 0;
+ }
+-shard_lookup_internal_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                              int32_t op_ret, int32_t op_errno, inode_t *inode,
+-                              struct iatt *buf, dict_t *xdata,
+-                              struct iatt *postparent)
+-    inode_t *link_inode = NULL;
+-    shard_local_t *local = NULL;
+-    shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie;
++int shard_common_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                          int32_t op_ret, int32_t op_errno, struct iatt *buf,
++                          dict_t *xdata) {
++  inode_t *inode = NULL;
++  shard_local_t *local = NULL;
+-    local = frame->local;
++  local = frame->local;
+-    if (op_ret) {
+-        local->op_ret = op_ret;
+-        local->op_errno = op_errno;
+-        goto unwind;
+-    }
++  if (op_ret < 0) {
++    gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_STAT_FAILED,
++           "stat failed: %s", local->fd ? uuid_utoa(local->fd->inode->gfid)
++                                        : uuid_utoa((local->loc.inode)->gfid));
++    local->op_ret = op_ret;
++    local->op_errno = op_errno;
++    goto unwind;
++  }
+-    if (!IA_ISDIR(buf->ia_type)) {
+-        gf_msg(this->name, GF_LOG_CRITICAL, 0, SHARD_MSG_DOT_SHARD_NODIR,
+-               "%s already exists and "
+-               "is not a directory. Please remove it from all bricks "
+-               "and try again",
+-               shard_internal_dir_string(type));
+-        local->op_ret = -1;
+-        local->op_errno = EIO;
+-        goto unwind;
+-    }
++  local->prebuf = *buf;
++  if (shard_modify_size_and_block_count(&local->prebuf, xdata)) {
++    local->op_ret = -1;
++    local->op_errno = EINVAL;
++    goto unwind;
++  }
++  local->xattr_rsp = dict_ref(xdata);
+-    link_inode = shard_link_internal_dir_inode(local, inode, buf, type);
+-    if (link_inode != inode) {
+-        shard_refresh_internal_dir(frame, this, type);
+-    } else {
+-        shard_inode_ctx_mark_dir_refreshed(link_inode, this);
+-        shard_common_resolve_shards(frame, this, local->post_res_handler);
+-    }
+-    return 0;
++  if (local->loc.inode)
++    inode = local->loc.inode;
++  else
++    inode = local->fd->inode;
++  shard_inode_ctx_invalidate(inode, this, &local->prebuf);
+ unwind:
+-    local->post_res_handler(frame, this);
+-    return 0;
++  local->handler(frame, this);
++  return 0;
+ }
+-shard_lookup_internal_dir(call_frame_t *frame, xlator_t *this,
+-                          shard_post_resolve_fop_handler_t post_res_handler,
+-                          shard_internal_dir_type_t type)
+-    int ret = -1;
+-    dict_t *xattr_req = NULL;
+-    shard_priv_t *priv = NULL;
+-    shard_local_t *local = NULL;
+-    uuid_t *gfid = NULL;
+-    loc_t *loc = NULL;
+-    gf_boolean_t free_gfid = _gf_true;
+-    local = frame->local;
+-    priv = this->private;
+-    local->post_res_handler = post_res_handler;
+-    gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t);
+-    if (!gfid)
+-        goto err;
+-    xattr_req = dict_new();
+-    if (!xattr_req) {
+-        local->op_ret = -1;
+-        local->op_errno = ENOMEM;
+-        goto err;
+-    }
+-    switch (type) {
+-            gf_uuid_copy(*gfid, priv->dot_shard_gfid);
+-            loc = &local->dot_shard_loc;
+-            break;
+-            gf_uuid_copy(*gfid, priv->dot_shard_rm_gfid);
+-            loc = &local->dot_shard_rm_loc;
+-            break;
+-        default:
+-            bzero(*gfid, sizeof(uuid_t));
+-            break;
+-    }
++int shard_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) {
++  int ret = -1;
++  uint64_t block_size = 0;
++  shard_local_t *local = NULL;
+-    ret = dict_set_gfuuid(xattr_req, "gfid-req", *gfid, false);
+-    if (ret) {
+-        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
+-               "Failed to set gfid of %s into dict",
+-               shard_internal_dir_string(type));
+-        local->op_ret = -1;
+-        local->op_errno = ENOMEM;
+-        goto err;
+-    } else {
+-        free_gfid = _gf_false;
+-    }
++  if ((IA_ISDIR(loc->inode->ia_type)) || (IA_ISLNK(loc->inode->ia_type))) {
++    STACK_WIND(frame, default_stat_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->stat, loc, xdata);
++    return 0;
++  }
+-    STACK_WIND_COOKIE(frame, shard_lookup_internal_dir_cbk, (void *)(long)type,
+-                      FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup, loc,
+-                      xattr_req);
++  ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size);
++  if (ret) {
++    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
++           "Failed to get block "
++           "size from inode ctx of %s",
++           uuid_utoa(loc->inode->gfid));
++    goto err;
++  }
+-    dict_unref(xattr_req);
++  if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
++    STACK_WIND(frame, default_stat_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->stat, loc, xdata);
+     return 0;
++  }
++  local = mem_get0(this->local_pool);
++  if (!local)
++    goto err;
++  frame->local = local;
++  local->handler = shard_post_stat_handler;
++  loc_copy(&local->loc, loc);
++  local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
++  if (!local->xattr_req)
++    goto err;
++  SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, local->loc.gfid,
++                                  local, err);
++  STACK_WIND(frame, shard_common_stat_cbk, FIRST_CHILD(this),
++             FIRST_CHILD(this)->fops->stat, loc, local->xattr_req);
++  return 0;
+ err:
+-    if (xattr_req)
+-        dict_unref(xattr_req);
+-    if (free_gfid)
+-        GF_FREE(gfid);
+-    post_res_handler(frame, this);
+-    return 0;
++  shard_common_failure_unwind(GF_FOP_STAT, frame, -1, ENOMEM);
++  return 0;
+ }
+-static void
+-shard_inode_ctx_update(inode_t *inode, xlator_t *this, dict_t *xdata,
+-                       struct iatt *buf)
+-    int ret = 0;
+-    uint64_t size = 0;
+-    void *bsize = NULL;
+-    if (shard_inode_ctx_get_block_size(inode, this, &size)) {
+-        /* Fresh lookup */
+-        ret = dict_get_ptr(xdata, GF_XATTR_SHARD_BLOCK_SIZE, &bsize);
+-        if (!ret)
+-            size = ntoh64(*((uint64_t *)bsize));
+-        /* If the file is sharded, set its block size, otherwise just
+-         * set 0.
+-         */
++int shard_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) {
++  int ret = -1;
++  uint64_t block_size = 0;
++  shard_local_t *local = NULL;
+-        shard_inode_ctx_set(inode, this, buf, size, SHARD_MASK_BLOCK_SIZE);
+-    }
+-    /* If the file is sharded, also set the remaining attributes,
+-     * except for ia_size and ia_blocks.
+-     */
+-    if (size) {
+-        shard_inode_ctx_set(inode, this, buf, 0, SHARD_LOOKUP_MASK);
+-        (void)shard_inode_ctx_invalidate(inode, this, buf);
+-    }
++  if ((IA_ISDIR(fd->inode->ia_type)) || (IA_ISLNK(fd->inode->ia_type))) {
++    STACK_WIND(frame, default_fstat_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->fstat, fd, xdata);
++    return 0;
++  }
+-shard_delete_shards(void *opaque);
++  ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size);
++  if (ret) {
++    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
++           "Failed to get block "
++           "size from inode ctx of %s",
++           uuid_utoa(fd->inode->gfid));
++    goto err;
++  }
+-shard_delete_shards_cbk(int ret, call_frame_t *frame, void *data);
++  if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
++    STACK_WIND(frame, default_fstat_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->fstat, fd, xdata);
++    return 0;
++  }
+-shard_start_background_deletion(xlator_t *this)
+-    int ret = 0;
+-    gf_boolean_t i_cleanup = _gf_true;
+-    shard_priv_t *priv = NULL;
+-    call_frame_t *cleanup_frame = NULL;
++  if (!this->itable)
++    this->itable = fd->inode->table;
+-    priv = this->private;
++  local = mem_get0(this->local_pool);
++  if (!local)
++    goto err;
+-    LOCK(&priv->lock);
+-    {
+-        switch (priv->bg_del_state) {
+-            case SHARD_BG_DELETION_NONE:
+-                i_cleanup = _gf_true;
+-                priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING;
+-                break;
+-                i_cleanup = _gf_false;
+-                break;
+-                priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING;
+-                i_cleanup = _gf_false;
+-                break;
+-            default:
+-                break;
+-        }
+-    }
+-    UNLOCK(&priv->lock);
+-    if (!i_cleanup)
+-        return 0;
+-    cleanup_frame = create_frame(this, this->ctx->pool);
+-    if (!cleanup_frame) {
+-               "Failed to create "
+-               "new frame to delete shards");
+-        ret = -ENOMEM;
+-        goto err;
+-    }
++  frame->local = local;
+-    set_lk_owner_from_ptr(&cleanup_frame->root->lk_owner, cleanup_frame->root);
++  local->handler = shard_post_fstat_handler;
++  local->fd = fd_ref(fd);
++  local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
++  if (!local->xattr_req)
++    goto err;
+-    ret = synctask_new(this->ctx->env, shard_delete_shards,
+-                       shard_delete_shards_cbk, cleanup_frame, cleanup_frame);
+-    if (ret < 0) {
+-        gf_msg(this->name, GF_LOG_WARNING, errno,
+-               "failed to create task to do background "
+-               "cleanup of shards");
+-        STACK_DESTROY(cleanup_frame->root);
+-        goto err;
+-    }
+-    return 0;
++  SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid,
++                                  local, err);
++  STACK_WIND(frame, shard_common_stat_cbk, FIRST_CHILD(this),
++             FIRST_CHILD(this)->fops->fstat, fd, local->xattr_req);
++  return 0;
+ err:
+-    LOCK(&priv->lock);
+-    {
+-        priv->bg_del_state = SHARD_BG_DELETION_NONE;
+-    }
+-    UNLOCK(&priv->lock);
+-    return ret;
++  shard_common_failure_unwind(GF_FOP_FSTAT, frame, -1, ENOMEM);
++  return 0;
+ }
+-shard_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                 int32_t op_ret, int32_t op_errno, inode_t *inode,
+-                 struct iatt *buf, dict_t *xdata, struct iatt *postparent)
+-    int ret = -1;
+-    shard_priv_t *priv = NULL;
+-    gf_boolean_t i_start_cleanup = _gf_false;
++int shard_post_update_size_truncate_handler(call_frame_t *frame,
++                                            xlator_t *this) {
++  shard_local_t *local = NULL;
+-    priv = this->private;
++  local = frame->local;
+-    if (op_ret < 0)
+-        goto unwind;
++  if (local->fop == GF_FOP_TRUNCATE)
++    SHARD_STACK_UNWIND(truncate, frame, local->op_ret, local->op_errno,
++                       &local->prebuf, &local->postbuf, NULL);
++  else
++    SHARD_STACK_UNWIND(ftruncate, frame, local->op_ret, local->op_errno,
++                       &local->prebuf, &local->postbuf, NULL);
++  return 0;
+-    if (IA_ISDIR(buf->ia_type))
+-        goto unwind;
++int shard_truncate_last_shard_cbk(call_frame_t *frame, void *cookie,
++                                  xlator_t *this, int32_t op_ret,
++                                  int32_t op_errno, struct iatt *prebuf,
++                                  struct iatt *postbuf, dict_t *xdata) {
++  inode_t *inode = NULL;
++  int64_t delta_blocks = 0;
++  shard_local_t *local = NULL;
+-    /* Also, if the file is sharded, get the file size and block cnt xattr,
+-     * and store them in the stbuf appropriately.
+-     */
++  local = frame->local;
+-    if (dict_get(xdata, GF_XATTR_SHARD_FILE_SIZE) &&
+-        frame->root->pid != GF_CLIENT_PID_GSYNCD)
+-        shard_modify_size_and_block_count(buf, xdata);
+-    /* If this was a fresh lookup, there are two possibilities:
+-     * 1) If the file is sharded (indicated by the presence of block size
+-     *    xattr), store this block size, along with rdev and mode in its
+-     *    inode ctx.
+-     * 2) If the file is not sharded, store size along with rdev and mode
+-     *    (which are anyway don't cares) in inode ctx. Since @ctx_tmp is
+-     *    already initialised to all zeroes, nothing more needs to be done.
+-     */
++  SHARD_UNSET_ROOT_FS_ID(frame, local);
+-    (void)shard_inode_ctx_update(inode, this, xdata, buf);
++  inode = (local->fop == GF_FOP_TRUNCATE) ? local->loc.inode : local->fd->inode;
++  if (op_ret < 0) {
++    gf_msg(this->name, GF_LOG_ERROR, op_errno,
++           SHARD_MSG_TRUNCATE_LAST_SHARD_FAILED, "truncate on last"
++                                                 " shard failed : %s",
++           uuid_utoa(inode->gfid));
++    local->op_ret = op_ret;
++    local->op_errno = op_errno;
++    goto err;
++  }
++  local->postbuf.ia_size = local->offset;
++  /* Let the delta be negative. We want xattrop to do subtraction */
++  local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size;
++  delta_blocks = GF_ATOMIC_ADD(local->delta_blocks,
++                               postbuf->ia_blocks - prebuf->ia_blocks);
++  GF_ASSERT(delta_blocks <= 0);
++  local->postbuf.ia_blocks += delta_blocks;
++  local->hole_size = 0;
++  shard_inode_ctx_set(inode, this, &local->postbuf, 0, SHARD_MASK_TIMES);
++  shard_update_file_size(frame, this, NULL, &local->loc,
++                         shard_post_update_size_truncate_handler);
++  return 0;
++  shard_common_failure_unwind(local->fop, frame, local->op_ret,
++                              local->op_errno);
++  return 0;
++int shard_truncate_last_shard(call_frame_t *frame, xlator_t *this,
++                              inode_t *inode) {
++  size_t last_shard_size_after = 0;
++  loc_t loc = {
++      0,
++  };
++  shard_local_t *local = NULL;
++  local = frame->local;
++  /* A NULL inode could be due to the fact that the last shard which
++   * needs to be truncated does not exist due to it lying in a hole
++   * region. So the only thing left to do in that case would be an
++   * update to file size xattr.
++   */
++  if (!inode) {
++    gf_msg_debug(this->name, 0,
++                 "Last shard to be truncated absent"
++                 " in backend: %s. Directly proceeding to update "
++                 "file size",
++                 uuid_utoa(inode->gfid));
++    shard_update_file_size(frame, this, NULL, &local->loc,
++                           shard_post_update_size_truncate_handler);
++    return 0;
++  }
+-    LOCK(&priv->lock);
+-    {
+-        if (priv->first_lookup_done == _gf_false) {
+-            priv->first_lookup_done = _gf_true;
+-            i_start_cleanup = _gf_true;
+-        }
+-    }
+-    UNLOCK(&priv->lock);
++  SHARD_SET_ROOT_FS_ID(frame, local);
+-    if (!i_start_cleanup)
+-        goto unwind;
++  loc.inode = inode_ref(inode);
++  gf_uuid_copy(loc.gfid, inode->gfid);
+-    ret = shard_start_background_deletion(this);
+-    if (ret < 0) {
+-        LOCK(&priv->lock);
+-        {
+-            priv->first_lookup_done = _gf_false;
+-        }
+-        UNLOCK(&priv->lock);
+-    }
++  last_shard_size_after = (local->offset % local->block_size);
+-    SHARD_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, buf, xdata,
+-                       postparent);
+-    return 0;
++  STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this),
++             FIRST_CHILD(this)->fops->truncate, &loc, last_shard_size_after,
++             NULL);
++  loc_wipe(&loc);
++  return 0;
+ }
+-shard_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
+-    int ret = -1;
+-    int32_t op_errno = ENOMEM;
+-    uint64_t block_size = 0;
+-    shard_local_t *local = NULL;
+-    this->itable = loc->inode->table;
+-    if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
+-        SHARD_ENTRY_FOP_CHECK(loc, op_errno, err);
+-    }
++void shard_unlink_block_inode(shard_local_t *local, int shard_block_num);
+-    local = mem_get0(this->local_pool);
+-    if (!local)
+-        goto err;
++int shard_truncate_htol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                            int32_t op_ret, int32_t op_errno,
++                            struct iatt *preparent, struct iatt *postparent,
++                            dict_t *xdata) {
++  int ret = 0;
++  int call_count = 0;
++  int shard_block_num = (long)cookie;
++  uint64_t block_count = 0;
++  shard_local_t *local = NULL;
+-    frame->local = local;
++  local = frame->local;
+-    loc_copy(&local->loc, loc);
++  if (op_ret < 0) {
++    local->op_ret = op_ret;
++    local->op_errno = op_errno;
++    goto done;
++  }
++  ret = dict_get_uint64(xdata, GF_GET_FILE_BLOCK_COUNT, &block_count);
++  if (!ret) {
++    GF_ATOMIC_SUB(local->delta_blocks, block_count);
++  } else {
++    /* dict_get failed possibly due to a heterogeneous cluster? */
++    gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
++           "Failed to get key %s from dict during truncate of gfid %s",
++           uuid_utoa(local->resolver_base_inode->gfid));
++  }
++  shard_unlink_block_inode(local, shard_block_num);
++  call_count = shard_call_count_return(frame);
++  if (call_count == 0) {
++    SHARD_UNSET_ROOT_FS_ID(frame, local);
++    shard_truncate_last_shard(frame, this, local->inode_list[0]);
++  }
++  return 0;
++int shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode) {
++  int i = 1;
++  int ret = -1;
++  int call_count = 0;
++  uint32_t cur_block = 0;
++  uint32_t last_block = 0;
++  char path[PATH_MAX] = {
++      0,
++  };
++  char *bname = NULL;
++  loc_t loc = {
++      0,
++  };
++  gf_boolean_t wind_failed = _gf_false;
++  shard_local_t *local = NULL;
++  shard_priv_t *priv = NULL;
++  dict_t *xdata_req = NULL;
++  local = frame->local;
++  priv = this->private;
++  cur_block = local->first_block + 1;
++  last_block = local->last_block;
++  /* Determine call count */
++  for (i = 1; i < local->num_blocks; i++) {
++    if (!local->inode_list[i])
++      continue;
++    call_count++;
++  }
++  if (!call_count) {
++    /* Call count = 0 implies that all of the shards that need to be
++     * unlinked do not exist. So shard xlator would now proceed to
++     * do the final truncate + size updates.
++     */
++    gf_msg_debug(this->name, 0, "Shards to be unlinked as part of "
++                                "truncate absent in backend: %s. Directly "
++                                "proceeding to update file size",
++                 uuid_utoa(inode->gfid));
++    local->postbuf.ia_size = local->offset;
++    local->postbuf.ia_blocks = local->prebuf.ia_blocks;
++    local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size;
++    GF_ATOMIC_INIT(local->delta_blocks, 0);
++    local->hole_size = 0;
++    shard_update_file_size(frame, this, local->fd, &local->loc,
++                           shard_post_update_size_truncate_handler);
++    return 0;
++  }
+-    local->xattr_req = xattr_req ? dict_ref(xattr_req) : dict_new();
+-    if (!local->xattr_req)
+-        goto err;
++  local->call_count = call_count;
++  i = 1;
++  xdata_req = dict_new();
++  if (!xdata_req) {
++    shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
++    return 0;
++  }
++  ret = dict_set_uint64(xdata_req, GF_GET_FILE_BLOCK_COUNT, 8 * 8);
++  if (ret) {
++    gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
++           "Failed to set key %s into dict during truncate of %s",
++           uuid_utoa(local->resolver_base_inode->gfid));
++    dict_unref(xdata_req);
++    shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
++    return 0;
++  }
+-    if (shard_inode_ctx_get_block_size(loc->inode, this, &block_size)) {
+-        ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0);
+-        if (ret) {
+-            gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
+-                   "Failed to set dict"
+-                   " value: key:%s for path %s",
+-                   GF_XATTR_SHARD_BLOCK_SIZE, loc->path);
+-            goto err;
+-        }
++  SHARD_SET_ROOT_FS_ID(frame, local);
++  while (cur_block <= last_block) {
++    if (!local->inode_list[i]) {
++      cur_block++;
++      i++;
++      continue;
++    }
++    if (wind_failed) {
++      shard_truncate_htol_cbk(frame, (void *)(long)cur_block, this, -1, ENOMEM,
++                              NULL, NULL, NULL);
++      goto next;
+     }
+-    if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
+-        ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_FILE_SIZE,
+-                              8 * 4);
+-        if (ret) {
+-            gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
+-                   "Failed to set dict value: key:%s for path %s.",
+-                   GF_XATTR_SHARD_FILE_SIZE, loc->path);
+-            goto err;
+-        }
++    shard_make_block_abspath(cur_block, inode->gfid, path, sizeof(path));
++    bname = strrchr(path, '/') + 1;
++    loc.parent = inode_ref(priv->dot_shard_inode);
++    ret = inode_path(loc.parent, bname, (char **)&(loc.path));
++    if (ret < 0) {
++      gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
++             "Inode path failed"
++             " on %s. Base file gfid = %s",
++             bname, uuid_utoa(inode->gfid));
++      local->op_ret = -1;
++      local->op_errno = ENOMEM;
++      loc_wipe(&loc);
++      wind_failed = _gf_true;
++      shard_truncate_htol_cbk(frame, (void *)(long)cur_block, this, -1, ENOMEM,
++                              NULL, NULL, NULL);
++      goto next;
+     }
++ = strrchr(loc.path, '/');
++    if (
++    loc.inode = inode_ref(local->inode_list[i]);
+-    if ((xattr_req) && (dict_get(xattr_req, GF_CONTENT_KEY)))
+-        dict_del(xattr_req, GF_CONTENT_KEY);
++    STACK_WIND_COOKIE(frame, shard_truncate_htol_cbk, (void *)(long)cur_block,
++                      FIRST_CHILD(this), FIRST_CHILD(this)->fops->unlink, &loc,
++                      0, xdata_req);
++    loc_wipe(&loc);
++  next:
++    i++;
++    cur_block++;
++    if (!--call_count)
++      break;
++  }
++  dict_unref(xdata_req);
++  return 0;
+-    STACK_WIND(frame, shard_lookup_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->lookup, loc, local->xattr_req);
+-    return 0;
+-    shard_common_failure_unwind(GF_FOP_LOOKUP, frame, -1, op_errno);
++int shard_truncate_do(call_frame_t *frame, xlator_t *this) {
++  shard_local_t *local = NULL;
++  local = frame->local;
++  if (local->num_blocks == 1) {
++    /* This means that there are no shards to be unlinked.
++     * The fop boils down to truncating the last shard, updating
++     * the size and unwinding.
++     */
++    shard_truncate_last_shard(frame, this, local->inode_list[0]);
+     return 0;
++  } else {
++    shard_truncate_htol(frame, this, local->loc.inode);
++  }
++  return 0;
+ }
+-shard_lookup_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                           int32_t op_ret, int32_t op_errno, inode_t *inode,
+-                           struct iatt *buf, dict_t *xdata,
+-                           struct iatt *postparent)
+-    int ret = -1;
+-    int32_t mask = SHARD_INODE_WRITE_MASK;
+-    shard_local_t *local = NULL;
+-    shard_inode_ctx_t ctx = {
+-        0,
+-    };
+-    local = frame->local;
++int shard_post_lookup_shards_truncate_handler(call_frame_t *frame,
++                                              xlator_t *this) {
++  shard_local_t *local = NULL;
+-    if (op_ret < 0) {
+-        gf_msg(this->name, GF_LOG_ERROR, op_errno,
+-               "Lookup on base file"
+-               " failed : %s",
+-               loc_gfid_utoa(&(local->loc)));
+-        local->op_ret = op_ret;
+-        local->op_errno = op_errno;
+-        goto unwind;
+-    }
++  local = frame->local;
+-    local->prebuf = *buf;
+-    if (shard_modify_size_and_block_count(&local->prebuf, xdata)) {
+-        local->op_ret = -1;
+-        local->op_errno = EINVAL;
+-        goto unwind;
++  if (local->op_ret < 0) {
++    shard_common_failure_unwind(local->fop, frame, local->op_ret,
++                                local->op_errno);
++    return 0;
++  }
++  shard_truncate_do(frame, this);
++  return 0;
++void shard_link_block_inode(shard_local_t *local, int block_num, inode_t *inode,
++                            struct iatt *buf) {
++  int list_index = 0;
++  char block_bname[256] = {
++      0,
++  };
++  uuid_t gfid = {
++      0,
++  };
++  inode_t *linked_inode = NULL;
++  xlator_t *this = NULL;
++  inode_t *fsync_inode = NULL;
++  shard_priv_t *priv = NULL;
++  inode_t *base_inode = NULL;
++  this = THIS;
++  priv = this->private;
++  if (local->loc.inode) {
++    gf_uuid_copy(gfid, local->loc.inode->gfid);
++    base_inode = local->loc.inode;
++  } else if (local->resolver_base_inode) {
++    gf_uuid_copy(gfid, local->resolver_base_inode->gfid);
++    base_inode = local->resolver_base_inode;
++  } else {
++    gf_uuid_copy(gfid, local->base_gfid);
++  }
++  shard_make_block_bname(block_num, gfid, block_bname, sizeof(block_bname));
++  shard_inode_ctx_set(inode, this, buf, 0, SHARD_LOOKUP_MASK);
++  linked_inode = inode_link(inode, priv->dot_shard_inode, block_bname, buf);
++  inode_lookup(linked_inode);
++  list_index = block_num - local->first_block;
++  local->inode_list[list_index] = linked_inode;
++  LOCK(&priv->lock);
++  {
++    fsync_inode = __shard_update_shards_inode_list(linked_inode, this,
++                                                   base_inode, block_num, gfid);
++  }
++  UNLOCK(&priv->lock);
++  if (fsync_inode)
++    shard_initiate_evicted_inode_fsync(this, fsync_inode);
++int shard_common_lookup_shards_cbk(call_frame_t *frame, void *cookie,
++                                   xlator_t *this, int32_t op_ret,
++                                   int32_t op_errno, inode_t *inode,
++                                   struct iatt *buf, dict_t *xdata,
++                                   struct iatt *postparent) {
++  int call_count = 0;
++  int shard_block_num = (long)cookie;
++  uuid_t gfid = {
++      0,
++  };
++  shard_local_t *local = NULL;
++  local = frame->local;
++  if (local->resolver_base_inode)
++    gf_uuid_copy(gfid, local->resolver_base_inode->gfid);
++  else
++    gf_uuid_copy(gfid, local->base_gfid);
++  if (op_ret < 0) {
++    /* Ignore absence of shards in the backend in truncate fop. */
++    switch (local->fop) {
++    case GF_FOP_TRUNCATE:
++    case GF_FOP_FTRUNCATE:
++    case GF_FOP_RENAME:
++    case GF_FOP_UNLINK:
++      if (op_errno == ENOENT)
++        goto done;
++      break;
++    case GF_FOP_WRITE:
++    case GF_FOP_READ:
++    case GF_FOP_ZEROFILL:
++    case GF_FOP_DISCARD:
++    case GF_FOP_FALLOCATE:
++      if ((!local->first_lookup_done) && (op_errno == ENOENT)) {
++        LOCK(&frame->lock);
++        { local->create_count++; }
++        UNLOCK(&frame->lock);
++        goto done;
++      }
++      break;
++    default:
++      break;
+     }
+-    if (shard_inode_ctx_get_all(inode, this, &ctx))
+-        mask = SHARD_ALL_MASK;
++    /* else */
++    gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_LOOKUP_SHARD_FAILED,
++           "Lookup on shard %d "
++           "failed. Base file gfid = %s",
++           shard_block_num, uuid_utoa(gfid));
++    local->op_ret = op_ret;
++    local->op_errno = op_errno;
++    goto done;
++  }
+-    ret = shard_inode_ctx_set(inode, this, &local->prebuf, 0,
+-                              (mask | SHARD_MASK_REFRESH_RESET));
+-    if (ret) {
+-        gf_msg(this->name, GF_LOG_ERROR, SHARD_MSG_INODE_CTX_SET_FAILED, 0,
+-               "Failed to set inode"
+-               " write params into inode ctx for %s",
+-               uuid_utoa(buf->ia_gfid));
+-        local->op_ret = -1;
+-        local->op_errno = ENOMEM;
+-        goto unwind;
+-    }
++  shard_link_block_inode(local, shard_block_num, inode, buf);
+-    local->handler(frame, this);
++  if (local->lookup_shards_barriered) {
++    syncbarrier_wake(&local->barrier);
+     return 0;
++  } else {
++    call_count = shard_call_count_return(frame);
++    if (call_count == 0) {
++      if (!local->first_lookup_done)
++        local->first_lookup_done = _gf_true;
++      local->pls_fop_handler(frame, this);
++    }
++  }
++  return 0;
+ }
+-shard_lookup_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc,
+-                       shard_post_fop_handler_t handler)
+-    int ret = -1;
+-    shard_local_t *local = NULL;
+-    dict_t *xattr_req = NULL;
+-    gf_boolean_t need_refresh = _gf_false;
++dict_t *shard_create_gfid_dict(dict_t *dict) {
++  int ret = 0;
++  dict_t *new = NULL;
++  unsigned char *gfid = NULL;
+-    local = frame->local;
+-    local->handler = handler;
++  new = dict_copy_with_ref(dict, NULL);
++  if (!new)
++    return NULL;
+-    ret = shard_inode_ctx_fill_iatt_from_cache(loc->inode, this, &local->prebuf,
+-                                               &need_refresh);
+-    /* By this time, inode ctx should have been created either in create,
+-     * mknod, readdirp or lookup. If not it is a bug!
+-     */
+-    if ((ret == 0) && (need_refresh == _gf_false)) {
+-        gf_msg_debug(this->name, 0,
+-                     "Skipping lookup on base file: %s"
+-                     "Serving prebuf off the inode ctx cache",
+-                     uuid_utoa(loc->gfid));
+-        goto out;
++  gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_char);
++  if (!gfid) {
++    ret = -1;
++    goto out;
++  }
++  gf_uuid_generate(gfid);
++  ret = dict_set_gfuuid(new, "gfid-req", gfid, false);
++  if (ret) {
++    dict_unref(new);
++    new = NULL;
++    GF_FREE(gfid);
++  }
++  return new;
++int shard_common_lookup_shards(call_frame_t *frame, xlator_t *this,
++                               inode_t *inode,
++                               shard_post_lookup_shards_fop_handler_t handler) {
++  int i = 0;
++  int ret = 0;
++  int count = 0;
++  int call_count = 0;
++  int32_t shard_idx_iter = 0;
++  int last_block = 0;
++  char path[PATH_MAX] = {
++      0,
++  };
++  char *bname = NULL;
++  uuid_t gfid = {
++      0,
++  };
++  loc_t loc = {
++      0,
++  };
++  shard_local_t *local = NULL;
++  shard_priv_t *priv = NULL;
++  gf_boolean_t wind_failed = _gf_false;
++  dict_t *xattr_req = NULL;
++  priv = this->private;
++  local = frame->local;
++  count = call_count = local->call_count;
++  shard_idx_iter = local->first_block;
++  last_block = local->last_block;
++  local->pls_fop_handler = handler;
++  if (local->lookup_shards_barriered)
++    local->barrier.waitfor = local->call_count;
++  if (inode)
++    gf_uuid_copy(gfid, inode->gfid);
++  else
++    gf_uuid_copy(gfid, local->base_gfid);
++  while (shard_idx_iter <= last_block) {
++    if (local->inode_list[i]) {
++      i++;
++      shard_idx_iter++;
++      continue;
++    }
++    if (wind_failed) {
++      shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter, this,
++                                     -1, ENOMEM, NULL, NULL, NULL, NULL);
++      goto next;
++    }
++    shard_make_block_abspath(shard_idx_iter, gfid, path, sizeof(path));
++    bname = strrchr(path, '/') + 1;
++    loc.inode = inode_new(this->itable);
++    loc.parent = inode_ref(priv->dot_shard_inode);
++    gf_uuid_copy(loc.pargfid, priv->dot_shard_gfid);
++    ret = inode_path(loc.parent, bname, (char **)&(loc.path));
++    if (ret < 0 || !(loc.inode)) {
++      gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
++             "Inode path failed"
++             " on %s, base file gfid = %s",
++             bname, uuid_utoa(gfid));
++      local->op_ret = -1;
++      local->op_errno = ENOMEM;
++      loc_wipe(&loc);
++      wind_failed = _gf_true;
++      shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter, this,
++                                     -1, ENOMEM, NULL, NULL, NULL, NULL);
++      goto next;
+     }
+-    xattr_req = dict_new();
++ = strrchr(loc.path, '/');
++    if (
++    xattr_req = shard_create_gfid_dict(local->xattr_req);
+     if (!xattr_req) {
+-        local->op_ret = -1;
+-        local->op_errno = ENOMEM;
+-        goto out;
++      local->op_ret = -1;
++      local->op_errno = ENOMEM;
++      wind_failed = _gf_true;
++      loc_wipe(&loc);
++      shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter, this,
++                                     -1, ENOMEM, NULL, NULL, NULL, NULL);
++      goto next;
++    }
++    STACK_WIND_COOKIE(frame, shard_common_lookup_shards_cbk,
++                      (void *)(long)shard_idx_iter, FIRST_CHILD(this),
++                      FIRST_CHILD(this)->fops->lookup, &loc, xattr_req);
++    loc_wipe(&loc);
++    dict_unref(xattr_req);
++  next:
++    shard_idx_iter++;
++    i++;
++    if (!--call_count)
++      break;
++  }
++  if (local->lookup_shards_barriered) {
++    syncbarrier_wait(&local->barrier, count);
++    local->pls_fop_handler(frame, this);
++  }
++  return 0;
++int shard_post_resolve_truncate_handler(call_frame_t *frame, xlator_t *this) {
++  shard_local_t *local = NULL;
++  local = frame->local;
++  if (local->op_ret < 0) {
++    if (local->op_errno == ENOENT) {
++      /* If lookup on /.shard fails with ENOENT, it means that
++       * the file was 0-byte in size but truncated sometime in
++       * the past to a higher size which is reflected in the
++       * size xattr, and now being truncated to a lower size.
++       * In this case, the only thing that needs to be done is
++       * to update the size xattr of the file and unwind.
++       */
++      local->first_block = local->last_block = 0;
++      local->num_blocks = 1;
++      local->call_count = 0;
++      local->op_ret = 0;
++      local->postbuf.ia_size = local->offset;
++      shard_update_file_size(frame, this, local->fd, &local->loc,
++                             shard_post_update_size_truncate_handler);
++      return 0;
++    } else {
++      shard_common_failure_unwind(local->fop, frame, local->op_ret,
++                                  local->op_errno);
++      return 0;
+     }
++  }
+-    SHARD_MD_READ_FOP_INIT_REQ_DICT(this, xattr_req, loc->gfid, local, out);
++  if (!local->call_count)
++    shard_truncate_do(frame, this);
++  else
++    shard_common_lookup_shards(frame, this, local->loc.inode,
++                               shard_post_lookup_shards_truncate_handler);
++  return 0;
++int shard_truncate_begin(call_frame_t *frame, xlator_t *this) {
++  int ret = 0;
++  shard_local_t *local = NULL;
++  shard_priv_t *priv = NULL;
++  priv = this->private;
++  local = frame->local;
++  /* First participant block here is the lowest numbered block that would
++   * hold the last byte of the file post successful truncation.
++   * Last participant block is the block that contains the last byte in
++   * the current state of the file.
++   * If (first block == last_block):
++   *         then that means that the file only needs truncation of the
++   *         first (or last since both are same) block.
++   * Else
++   *         if (new_size % block_size == 0)
++   *                 then that means there is no truncate to be done with
++   *                 only shards from first_block + 1 through the last
++   *                 block needing to be unlinked.
++   *         else
++   *                 both truncate of the first block and unlink of the
++   *                 remaining shards until end of file is required.
++   */
++  local->first_block =
++      (local->offset == 0) ? 0 : get_lowest_block(local->offset - 1,
++                                                  local->block_size);
++  local->last_block =
++      get_highest_block(0, local->prebuf.ia_size, local->block_size);
++  local->num_blocks = local->last_block - local->first_block + 1;
++  local->resolver_base_inode =
++      (local->fop == GF_FOP_TRUNCATE) ? local->loc.inode : local->fd->inode;
++  if ((local->first_block == 0) && (local->num_blocks == 1)) {
++    if (local->fop == GF_FOP_TRUNCATE)
++      STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this),
++                 FIRST_CHILD(this)->fops->truncate, &local->loc, local->offset,
++                 local->xattr_req);
++    else
++      STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this),
++                 FIRST_CHILD(this)->fops->ftruncate, local->fd, local->offset,
++                 local->xattr_req);
++    return 0;
++  }
+-    STACK_WIND(frame, shard_lookup_base_file_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
++  local->inode_list =
++      GF_CALLOC(local->num_blocks, sizeof(inode_t *), gf_shard_mt_inode_list);
++  if (!local->inode_list)
++    goto err;
+-    dict_unref(xattr_req);
+-    return 0;
++  local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid);
++  if (!local->dot_shard_loc.inode) {
++    ret =
++        shard_init_internal_dir_loc(this, local, SHARD_INTERNAL_DIR_DOT_SHARD);
++    if (ret)
++      goto err;
++    shard_lookup_internal_dir(frame, this, shard_post_resolve_truncate_handler,
++                              SHARD_INTERNAL_DIR_DOT_SHARD);
++  } else {
++    local->post_res_handler = shard_post_resolve_truncate_handler;
++    shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD);
++  }
++  return 0;
+-    if (xattr_req)
+-        dict_unref(xattr_req);
+-    handler(frame, this);
+-    return 0;
++  shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
++  return 0;
+ }
+-shard_post_fstat_handler(call_frame_t *frame, xlator_t *this)
+-    shard_local_t *local = NULL;
++int shard_post_lookup_truncate_handler(call_frame_t *frame, xlator_t *this) {
++  shard_local_t *local = NULL;
++  struct iatt tmp_stbuf = {
++      0,
++  };
+-    local = frame->local;
+-    if (local->op_ret >= 0)
+-        shard_inode_ctx_set(local->fd->inode, this, &local->prebuf, 0,
+-                            SHARD_LOOKUP_MASK);
++  local = frame->local;
+-    SHARD_STACK_UNWIND(fstat, frame, local->op_ret, local->op_errno,
+-                       &local->prebuf, local->xattr_rsp);
++  if (local->op_ret < 0) {
++    shard_common_failure_unwind(local->fop, frame, local->op_ret,
++                                local->op_errno);
+     return 0;
++  }
++  local->postbuf = tmp_stbuf = local->prebuf;
++  if (local->prebuf.ia_size == local->offset) {
++    /* If the file size is same as requested size, unwind the call
++     * immediately.
++     */
++    if (local->fop == GF_FOP_TRUNCATE)
++      SHARD_STACK_UNWIND(truncate, frame, 0, 0, &local->prebuf, &local->postbuf,
++                         NULL);
++    else
++      SHARD_STACK_UNWIND(ftruncate, frame, 0, 0, &local->prebuf,
++                         &local->postbuf, NULL);
++  } else if (local->offset > local->prebuf.ia_size) {
++    /* If the truncate is from a lower to a higher size, set the
++     * new size xattr and unwind.
++     */
++    local->hole_size = local->offset - local->prebuf.ia_size;
++    local->delta_size = 0;
++    GF_ATOMIC_INIT(local->delta_blocks, 0);
++    local->postbuf.ia_size = local->offset;
++    tmp_stbuf.ia_size = local->offset;
++    shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0,
++                        SHARD_INODE_WRITE_MASK);
++    shard_update_file_size(frame, this, NULL, &local->loc,
++                           shard_post_update_size_truncate_handler);
++  } else {
++    /* ... else
++     * i.   unlink all shards that need to be unlinked.
++     * ii.  truncate the last of the shards.
++     * iii. update the new size using setxattr.
++     * and unwind the fop.
++     */
++    local->hole_size = 0;
++    local->delta_size = (local->offset - local->prebuf.ia_size);
++    GF_ATOMIC_INIT(local->delta_blocks, 0);
++    tmp_stbuf.ia_size = local->offset;
++    shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0,
++                        SHARD_INODE_WRITE_MASK);
++    shard_truncate_begin(frame, this);
++  }
++  return 0;
+ }
+-shard_post_stat_handler(call_frame_t *frame, xlator_t *this)
+-    shard_local_t *local = NULL;
++/* TO-DO:
++ * Fix updates to size and block count with racing write(s) and truncate(s).
++ */
+-    local = frame->local;
++int shard_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc,
++                   off_t offset, dict_t *xdata) {
++  int ret = -1;
++  uint64_t block_size = 0;
++  shard_local_t *local = NULL;
+-    if (local->op_ret >= 0)
+-        shard_inode_ctx_set(local->loc.inode, this, &local->prebuf, 0,
+-                            SHARD_LOOKUP_MASK);
++  ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size);
++  if (ret) {
++    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
++           "Failed to get block "
++           "size from inode ctx of %s",
++           uuid_utoa(loc->inode->gfid));
++    goto err;
++  }
+-    SHARD_STACK_UNWIND(stat, frame, local->op_ret, local->op_errno,
+-                       &local->prebuf, local->xattr_rsp);
++  if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
++    STACK_WIND(frame, default_truncate_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
+     return 0;
++  }
+-shard_common_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                      int32_t op_ret, int32_t op_errno, struct iatt *buf,
+-                      dict_t *xdata)
+-    inode_t *inode = NULL;
+-    shard_local_t *local = NULL;
++  if (!this->itable)
++    this->itable = loc->inode->table;
+-    local = frame->local;
++  local = mem_get0(this->local_pool);
++  if (!local)
++    goto err;
++  frame->local = local;
++  ret = syncbarrier_init(&local->barrier);
++  if (ret)
++    goto err;
++  loc_copy(&local->loc, loc);
++  local->offset = offset;
++  local->block_size = block_size;
++  local->fop = GF_FOP_TRUNCATE;
++  local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
++  if (!local->xattr_req)
++    goto err;
++  local->resolver_base_inode = loc->inode;
++  GF_ATOMIC_INIT(local->delta_blocks, 0);
++  shard_lookup_base_file(frame, this, &local->loc,
++                         shard_post_lookup_truncate_handler);
++  return 0;
+-    if (op_ret < 0) {
+-        gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_STAT_FAILED,
+-               "stat failed: %s",
+-               local->fd ? uuid_utoa(local->fd->inode->gfid)
+-                         : uuid_utoa((local->loc.inode)->gfid));
+-        local->op_ret = op_ret;
+-        local->op_errno = op_errno;
+-        goto unwind;
+-    }
++  shard_common_failure_unwind(GF_FOP_TRUNCATE, frame, -1, ENOMEM);
++  return 0;
++int shard_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
++                    dict_t *xdata) {
++  int ret = -1;
++  uint64_t block_size = 0;
++  shard_local_t *local = NULL;
++  ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size);
++  if (ret) {
++    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
++           "Failed to get block "
++           "size from inode ctx of %s",
++           uuid_utoa(fd->inode->gfid));
++    goto err;
++  }
++  if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
++    STACK_WIND(frame, default_ftruncate_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
++    return 0;
++  }
++  if (!this->itable)
++    this->itable = fd->inode->table;
++  local = mem_get0(this->local_pool);
++  if (!local)
++    goto err;
++  frame->local = local;
++  ret = syncbarrier_init(&local->barrier);
++  if (ret)
++    goto err;
++  local->fd = fd_ref(fd);
++  local->offset = offset;
++  local->block_size = block_size;
++  local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
++  if (!local->xattr_req)
++    goto err;
++  local->fop = GF_FOP_FTRUNCATE;
++  local->loc.inode = inode_ref(fd->inode);
++  gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
++  local->resolver_base_inode = fd->inode;
++  GF_ATOMIC_INIT(local->delta_blocks, 0);
++  shard_lookup_base_file(frame, this, &local->loc,
++                         shard_post_lookup_truncate_handler);
++  return 0;
++  shard_common_failure_unwind(GF_FOP_FTRUNCATE, frame, -1, ENOMEM);
++  return 0;
+-    local->prebuf = *buf;
+-    if (shard_modify_size_and_block_count(&local->prebuf, xdata)) {
+-        local->op_ret = -1;
+-        local->op_errno = EINVAL;
+-        goto unwind;
+-    }
+-    local->xattr_rsp = dict_ref(xdata);
++int shard_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                    int32_t op_ret, int32_t op_errno, inode_t *inode,
++                    struct iatt *buf, struct iatt *preparent,
++                    struct iatt *postparent, dict_t *xdata) {
++  int ret = -1;
++  shard_local_t *local = NULL;
+-    if (local->loc.inode)
+-        inode = local->loc.inode;
+-    else
+-        inode = local->fd->inode;
++  local = frame->local;
+-    shard_inode_ctx_invalidate(inode, this, &local->prebuf);
++  if (op_ret == -1)
++    goto unwind;
++  ret =
++      shard_inode_ctx_set(inode, this, buf, local->block_size, SHARD_ALL_MASK);
++  if (ret)
++    gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INODE_CTX_SET_FAILED,
++           "Failed to set inode "
++           "ctx for %s",
++           uuid_utoa(inode->gfid));
+ unwind:
+-    local->handler(frame, this);
+-    return 0;
++  SHARD_STACK_UNWIND(mknod, frame, op_ret, op_errno, inode, buf, preparent,
++                     postparent, xdata);
+-shard_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+-    int ret = -1;
+-    uint64_t block_size = 0;
+-    shard_local_t *local = NULL;
++  return 0;
+-    if ((IA_ISDIR(loc->inode->ia_type)) || (IA_ISLNK(loc->inode->ia_type))) {
+-        STACK_WIND(frame, default_stat_cbk, FIRST_CHILD(this),
+-                   FIRST_CHILD(this)->fops->stat, loc, xdata);
+-        return 0;
+-    }
++int shard_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
++                dev_t rdev, mode_t umask, dict_t *xdata) {
++  shard_priv_t *priv = NULL;
++  shard_local_t *local = NULL;
+-    ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size);
+-    if (ret) {
+-        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+-               "Failed to get block "
+-               "size from inode ctx of %s",
+-               uuid_utoa(loc->inode->gfid));
+-        goto err;
+-    }
++  priv = this->private;
++  local = mem_get0(this->local_pool);
++  if (!local)
++    goto err;
+-    if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+-        STACK_WIND(frame, default_stat_cbk, FIRST_CHILD(this),
+-                   FIRST_CHILD(this)->fops->stat, loc, xdata);
+-        return 0;
+-    }
++  frame->local = local;
++  local->block_size = priv->block_size;
++  if (!__is_gsyncd_on_shard_dir(frame, loc)) {
++    SHARD_INODE_CREATE_INIT(this, local->block_size, xdata, loc, 0, 0, err);
++  }
+-    local = mem_get0(this->local_pool);
+-    if (!local)
+-        goto err;
++  STACK_WIND(frame, shard_mknod_cbk, FIRST_CHILD(this),
++             FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata);
++  return 0;
++  shard_common_failure_unwind(GF_FOP_MKNOD, frame, -1, ENOMEM);
++  return 0;
+-    frame->local = local;
++int32_t shard_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                       int32_t op_ret, int32_t op_errno, inode_t *inode,
++                       struct iatt *buf, struct iatt *preparent,
++                       struct iatt *postparent, dict_t *xdata) {
++  shard_local_t *local = NULL;
+-    local->handler = shard_post_stat_handler;
+-    loc_copy(&local->loc, loc);
+-    local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+-    if (!local->xattr_req)
+-        goto err;
++  local = frame->local;
++  if (op_ret < 0)
++    goto err;
+-    SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, local->loc.gfid,
+-                                    local, err);
++  shard_inode_ctx_set(inode, this, buf, 0, SHARD_MASK_NLINK | SHARD_MASK_TIMES);
++  buf->ia_size = local->prebuf.ia_size;
++  buf->ia_blocks = local->prebuf.ia_blocks;
+-    STACK_WIND(frame, shard_common_stat_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->stat, loc, local->xattr_req);
+-    return 0;
++  SHARD_STACK_UNWIND(link, frame, op_ret, op_errno, inode, buf, preparent,
++                     postparent, xdata);
++  return 0;
+ err:
+-    shard_common_failure_unwind(GF_FOP_STAT, frame, -1, ENOMEM);
+-    return 0;
++  shard_common_failure_unwind(GF_FOP_LINK, frame, op_ret, op_errno);
++  return 0;
+ }
+-shard_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+-    int ret = -1;
+-    uint64_t block_size = 0;
+-    shard_local_t *local = NULL;
++int shard_post_lookup_link_handler(call_frame_t *frame, xlator_t *this) {
++  shard_local_t *local = NULL;
+-    if ((IA_ISDIR(fd->inode->ia_type)) || (IA_ISLNK(fd->inode->ia_type))) {
+-        STACK_WIND(frame, default_fstat_cbk, FIRST_CHILD(this),
+-                   FIRST_CHILD(this)->fops->fstat, fd, xdata);
+-        return 0;
+-    }
++  local = frame->local;
+-    ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size);
+-    if (ret) {
+-        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+-               "Failed to get block "
+-               "size from inode ctx of %s",
+-               uuid_utoa(fd->inode->gfid));
+-        goto err;
+-    }
++  if (local->op_ret < 0) {
++    SHARD_STACK_UNWIND(link, frame, local->op_ret, local->op_errno, NULL, NULL,
++                       NULL, NULL, NULL);
++    return 0;
++  }
+-    if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+-        STACK_WIND(frame, default_fstat_cbk, FIRST_CHILD(this),
+-                   FIRST_CHILD(this)->fops->fstat, fd, xdata);
+-        return 0;
+-    }
++  STACK_WIND(frame, shard_link_cbk, FIRST_CHILD(this),
++             FIRST_CHILD(this)->fops->link, &local->loc, &local->loc2,
++             local->xattr_req);
++  return 0;
+-    if (!this->itable)
+-        this->itable = fd->inode->table;
++int32_t shard_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc,
++                   loc_t *newloc, dict_t *xdata) {
++  int ret = -1;
++  uint64_t block_size = 0;
++  shard_local_t *local = NULL;
+-    local = mem_get0(this->local_pool);
+-    if (!local)
+-        goto err;
++  ret = shard_inode_ctx_get_block_size(oldloc->inode, this, &block_size);
++  if (ret) {
++    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
++           "Failed to get block "
++           "size from inode ctx of %s",
++           uuid_utoa(oldloc->inode->gfid));
++    goto err;
++  }
+-    frame->local = local;
++  if (!block_size) {
++    STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->link,
++                    oldloc, newloc, xdata);
++    return 0;
++  }
+-    local->handler = shard_post_fstat_handler;
+-    local->fd = fd_ref(fd);
+-    local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+-    if (!local->xattr_req)
+-        goto err;
++  if (!this->itable)
++    this->itable = oldloc->inode->table;
+-    SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid,
+-                                    local, err);
++  local = mem_get0(this->local_pool);
++  if (!local)
++    goto err;
+-    STACK_WIND(frame, shard_common_stat_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->fstat, fd, local->xattr_req);
+-    return 0;
++  frame->local = local;
++  loc_copy(&local->loc, oldloc);
++  loc_copy(&local->loc2, newloc);
++  local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
++  if (!local->xattr_req)
++    goto err;
++  shard_lookup_base_file(frame, this, &local->loc,
++                         shard_post_lookup_link_handler);
++  return 0;
+ err:
+-    shard_common_failure_unwind(GF_FOP_FSTAT, frame, -1, ENOMEM);
+-    return 0;
++  shard_common_failure_unwind(GF_FOP_LINK, frame, -1, ENOMEM);
++  return 0;
+ }
+-shard_post_update_size_truncate_handler(call_frame_t *frame, xlator_t *this)
+-    shard_local_t *local = NULL;
++int shard_unlink_shards_do(call_frame_t *frame, xlator_t *this, inode_t *inode);
+-    local = frame->local;
++int shard_post_lookup_shards_unlink_handler(call_frame_t *frame,
++                                            xlator_t *this) {
++  shard_local_t *local = NULL;
+-    if (local->fop == GF_FOP_TRUNCATE)
+-        SHARD_STACK_UNWIND(truncate, frame, local->op_ret, local->op_errno,
+-                           &local->prebuf, &local->postbuf, NULL);
+-    else
+-        SHARD_STACK_UNWIND(ftruncate, frame, local->op_ret, local->op_errno,
+-                           &local->prebuf, &local->postbuf, NULL);
++  local = frame->local;
++  if ((local->op_ret < 0) && (local->op_errno != ENOENT)) {
++    gf_msg(this->name, GF_LOG_ERROR, local->op_errno, SHARD_MSG_FOP_FAILED,
++           "failed to delete shards of %s",
++           uuid_utoa(local->resolver_base_inode->gfid));
+     return 0;
++  }
++  local->op_ret = 0;
++  local->op_errno = 0;
+-shard_truncate_last_shard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                              int32_t op_ret, int32_t op_errno,
+-                              struct iatt *prebuf, struct iatt *postbuf,
+-                              dict_t *xdata)
+-    inode_t *inode = NULL;
+-    int64_t delta_blocks = 0;
+-    shard_local_t *local = NULL;
++  shard_unlink_shards_do(frame, this, local->resolver_base_inode);
++  return 0;
+-    local = frame->local;
++int shard_post_resolve_unlink_handler(call_frame_t *frame, xlator_t *this) {
++  shard_local_t *local = NULL;
+-    SHARD_UNSET_ROOT_FS_ID(frame, local);
++  local = frame->local;
++  local->lookup_shards_barriered = _gf_true;
+-    inode = (local->fop == GF_FOP_TRUNCATE) ? local->loc.inode
+-                                            : local->fd->inode;
+-    if (op_ret < 0) {
+-        gf_msg(this->name, GF_LOG_ERROR, op_errno,
+-               "truncate on last"
+-               " shard failed : %s",
+-               uuid_utoa(inode->gfid));
+-        local->op_ret = op_ret;
+-        local->op_errno = op_errno;
+-        goto err;
+-    }
++  if (!local->call_count)
++    shard_unlink_shards_do(frame, this, local->resolver_base_inode);
++  else
++    shard_common_lookup_shards(frame, this, local->resolver_base_inode,
++                               shard_post_lookup_shards_unlink_handler);
++  return 0;
++void shard_unlink_block_inode(shard_local_t *local, int shard_block_num) {
++  char block_bname[256] = {
++      0,
++  };
++  uuid_t gfid = {
++      0,
++  };
++  inode_t *inode = NULL;
++  inode_t *base_inode = NULL;
++  xlator_t *this = NULL;
++  shard_priv_t *priv = NULL;
++  shard_inode_ctx_t *ctx = NULL;
++  shard_inode_ctx_t *base_ictx = NULL;
++  int unref_base_inode = 0;
++  int unref_shard_inode = 0;
++  this = THIS;
++  priv = this->private;
++  inode = local->inode_list[shard_block_num - local->first_block];
++  shard_inode_ctx_get(inode, this, &ctx);
++  base_inode = ctx->base_inode;
++  if (base_inode)
++    gf_uuid_copy(gfid, base_inode->gfid);
++  else
++    gf_uuid_copy(gfid, ctx->base_gfid);
++  shard_make_block_bname(shard_block_num, gfid, block_bname,
++                         sizeof(block_bname));
++  LOCK(&priv->lock);
++  if (base_inode)
++    LOCK(&base_inode->lock);
++  LOCK(&inode->lock);
++  {
++    __shard_inode_ctx_get(inode, this, &ctx);
++    if (!list_empty(&ctx->ilist)) {
++      list_del_init(&ctx->ilist);
++      priv->inode_count--;
++      unref_base_inode++;
++      unref_shard_inode++;
++      GF_ASSERT(priv->inode_count >= 0);
++    }
++    if (ctx->fsync_needed) {
++      unref_base_inode++;
++      unref_shard_inode++;
++      list_del_init(&ctx->to_fsync_list);
++      if (base_inode) {
++        __shard_inode_ctx_get(base_inode, this, &base_ictx);
++        base_ictx->fsync_count--;
++      }
++    }
++  }
++  UNLOCK(&inode->lock);
++  if (base_inode)
++    UNLOCK(&base_inode->lock);
+-    local->postbuf.ia_size = local->offset;
+-    /* Let the delta be negative. We want xattrop to do subtraction */
+-    local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size;
+-    delta_blocks = GF_ATOMIC_ADD(local->delta_blocks,
+-                                 postbuf->ia_blocks - prebuf->ia_blocks);
+-    GF_ASSERT(delta_blocks <= 0);
+-    local->postbuf.ia_blocks += delta_blocks;
+-    local->hole_size = 0;
++  inode_unlink(inode, priv->dot_shard_inode, block_bname);
++  inode_ref_reduce_by_n(inode, unref_shard_inode);
++  inode_forget(inode, 0);
+-    shard_inode_ctx_set(inode, this, &local->postbuf, 0, SHARD_MASK_TIMES);
+-    shard_update_file_size(frame, this, NULL, &local->loc,
+-                           shard_post_update_size_truncate_handler);
+-    return 0;
+-    shard_common_failure_unwind(local->fop, frame, local->op_ret,
+-                                local->op_errno);
+-    return 0;
++  if (base_inode && unref_base_inode)
++    inode_ref_reduce_by_n(base_inode, unref_base_inode);
++  UNLOCK(&priv->lock);
+ }
+-shard_truncate_last_shard(call_frame_t *frame, xlator_t *this, inode_t *inode)
+-    size_t last_shard_size_after = 0;
+-    loc_t loc = {
+-        0,
+-    };
+-    shard_local_t *local = NULL;
++int shard_rename_cbk(call_frame_t *frame, xlator_t *this) {
++  shard_local_t *local = NULL;
+-    local = frame->local;
++  local = frame->local;
+-    /* A NULL inode could be due to the fact that the last shard which
+-     * needs to be truncated does not exist due to it lying in a hole
+-     * region. So the only thing left to do in that case would be an
+-     * update to file size xattr.
+-     */
+-    if (!inode) {
+-        gf_msg_debug(this->name, 0,
+-                     "Last shard to be truncated absent"
+-                     " in backend: %s. Directly proceeding to update "
+-                     "file size",
+-                     uuid_utoa(inode->gfid));
+-        shard_update_file_size(frame, this, NULL, &local->loc,
+-                               shard_post_update_size_truncate_handler);
+-        return 0;
+-    }
++  SHARD_STACK_UNWIND(rename, frame, local->op_ret, local->op_errno,
++                     &local->prebuf, &local->preoldparent,
++                     &local->postoldparent, &local->prenewparent,
++                     &local->postnewparent, local->xattr_rsp);
++  return 0;
+-    SHARD_SET_ROOT_FS_ID(frame, local);
++int32_t shard_unlink_cbk(call_frame_t *frame, xlator_t *this) {
++  shard_local_t *local = frame->local;
+-    loc.inode = inode_ref(inode);
+-    gf_uuid_copy(loc.gfid, inode->gfid);
++  SHARD_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno,
++                     &local->preoldparent, &local->postoldparent,
++                     local->xattr_rsp);
++  return 0;
+-    last_shard_size_after = (local->offset % local->block_size);
++int shard_unlink_shards_do_cbk(call_frame_t *frame, void *cookie,
++                               xlator_t *this, int32_t op_ret, int32_t op_errno,
++                               struct iatt *preparent, struct iatt *postparent,
++                               dict_t *xdata) {
++  int shard_block_num = (long)cookie;
++  shard_local_t *local = NULL;
+-    STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->truncate, &loc, last_shard_size_after,
+-               NULL);
+-    loc_wipe(&loc);
+-    return 0;
++  local = frame->local;
+-shard_unlink_block_inode(shard_local_t *local, int shard_block_num);
++  if (op_ret < 0) {
++    local->op_ret = op_ret;
++    local->op_errno = op_errno;
++    goto done;
++  }
+-shard_truncate_htol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                        int32_t op_ret, int32_t op_errno,
+-                        struct iatt *preparent, struct iatt *postparent,
+-                        dict_t *xdata)
+-    int ret = 0;
+-    int call_count = 0;
+-    int shard_block_num = (long)cookie;
+-    uint64_t block_count = 0;
+-    shard_local_t *local = NULL;
++  shard_unlink_block_inode(local, shard_block_num);
++  syncbarrier_wake(&local->barrier);
++  return 0;
++int shard_unlink_shards_do(call_frame_t *frame, xlator_t *this,
++                           inode_t *inode) {
++  int i = 0;
++  int ret = -1;
++  int count = 0;
++  uint32_t cur_block = 0;
++  uint32_t cur_block_idx = 0; /*this is idx into inode_list[] array */
++  char *bname = NULL;
++  char path[PATH_MAX] = {
++      0,
++  };
++  uuid_t gfid = {
++      0,
++  };
++  loc_t loc = {
++      0,
++  };
++  gf_boolean_t wind_failed = _gf_false;
++  shard_local_t *local = NULL;
++  shard_priv_t *priv = NULL;
++  priv = this->private;
++  local = frame->local;
++  if (inode)
++    gf_uuid_copy(gfid, inode->gfid);
++  else
++    gf_uuid_copy(gfid, local->base_gfid);
++  for (i = 0; i < local->num_blocks; i++) {
++    if (!local->inode_list[i])
++      continue;
++    count++;
++  }
++  if (!count) {
++    /* callcount = 0 implies that all of the shards that need to be
++     * unlinked are non-existent (in other words the file is full of
++     * holes).
++     */
++    gf_msg_debug(this->name, 0, "All shards that need to be "
++                                "unlinked are non-existent: %s",
++                 uuid_utoa(gfid));
++    return 0;
++  }
+-    local = frame->local;
++  SHARD_SET_ROOT_FS_ID(frame, local);
++  local->barrier.waitfor = count;
++  cur_block = cur_block_idx + local->first_block;
+-    if (op_ret < 0) {
+-        local->op_ret = op_ret;
+-        local->op_errno = op_errno;
+-        goto done;
+-    }
+-    ret = dict_get_uint64(xdata, GF_GET_FILE_BLOCK_COUNT, &block_count);
+-    if (!ret) {
+-        GF_ATOMIC_SUB(local->delta_blocks, block_count);
+-    } else {
+-        /* dict_get failed possibly due to a heterogeneous cluster? */
+-        gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
+-               "Failed to get key %s from dict during truncate of gfid %s",
+-               GF_GET_FILE_BLOCK_COUNT,
+-               uuid_utoa(local->resolver_base_inode->gfid));
+-    }
+-    shard_unlink_block_inode(local, shard_block_num);
+-    call_count = shard_call_count_return(frame);
+-    if (call_count == 0) {
+-        SHARD_UNSET_ROOT_FS_ID(frame, local);
+-        shard_truncate_last_shard(frame, this, local->inode_list[0]);
+-    }
+-    return 0;
+-shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode)
+-    int i = 1;
+-    int ret = -1;
+-    int call_count = 0;
+-    uint32_t cur_block = 0;
+-    uint32_t last_block = 0;
+-    char path[PATH_MAX] = {
+-        0,
+-    };
+-    char *bname = NULL;
+-    loc_t loc = {
+-        0,
+-    };
+-    gf_boolean_t wind_failed = _gf_false;
+-    shard_local_t *local = NULL;
+-    shard_priv_t *priv = NULL;
+-    dict_t *xdata_req = NULL;
+-    local = frame->local;
+-    priv = this->private;
+-    cur_block = local->first_block + 1;
+-    last_block = local->last_block;
+-    /* Determine call count */
+-    for (i = 1; i < local->num_blocks; i++) {
+-        if (!local->inode_list[i])
+-            continue;
+-        call_count++;
+-    }
++  while (cur_block_idx < local->num_blocks) {
++    if (!local->inode_list[cur_block_idx])
++      goto next;
+-    if (!call_count) {
+-        /* Call count = 0 implies that all of the shards that need to be
+-         * unlinked do not exist. So shard xlator would now proceed to
+-         * do the final truncate + size updates.
+-         */
+-        gf_msg_debug(this->name, 0,
+-                     "Shards to be unlinked as part of "
+-                     "truncate absent in backend: %s. Directly "
+-                     "proceeding to update file size",
+-                     uuid_utoa(inode->gfid));
+-        local->postbuf.ia_size = local->offset;
+-        local->postbuf.ia_blocks = local->prebuf.ia_blocks;
+-        local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size;
+-        GF_ATOMIC_INIT(local->delta_blocks, 0);
+-        local->hole_size = 0;
+-        shard_update_file_size(frame, this, local->fd, &local->loc,
+-                               shard_post_update_size_truncate_handler);
+-        return 0;
++    if (wind_failed) {
++      shard_unlink_shards_do_cbk(frame, (void *)(long)cur_block, this, -1,
++                                 ENOMEM, NULL, NULL, NULL);
++      goto next;
+     }
+-    local->call_count = call_count;
+-    i = 1;
+-    xdata_req = dict_new();
+-    if (!xdata_req) {
+-        shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
+-        return 0;
+-    }
+-    ret = dict_set_uint64(xdata_req, GF_GET_FILE_BLOCK_COUNT, 8 * 8);
+-    if (ret) {
+-        gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
+-               "Failed to set key %s into dict during truncate of %s",
+-               GF_GET_FILE_BLOCK_COUNT,
+-               uuid_utoa(local->resolver_base_inode->gfid));
+-        dict_unref(xdata_req);
+-        shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
+-        return 0;
++    shard_make_block_abspath(cur_block, gfid, path, sizeof(path));
++    bname = strrchr(path, '/') + 1;
++    loc.parent = inode_ref(priv->dot_shard_inode);
++    ret = inode_path(loc.parent, bname, (char **)&(loc.path));
++    if (ret < 0) {
++      gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
++             "Inode path failed"
++             " on %s, base file gfid = %s",
++             bname, uuid_utoa(gfid));
++      local->op_ret = -1;
++      local->op_errno = ENOMEM;
++      loc_wipe(&loc);
++      wind_failed = _gf_true;
++      shard_unlink_shards_do_cbk(frame, (void *)(long)cur_block, this, -1,
++                                 ENOMEM, NULL, NULL, NULL);
++      goto next;
+     }
+-    SHARD_SET_ROOT_FS_ID(frame, local);
+-    while (cur_block <= last_block) {
+-        if (!local->inode_list[i]) {
+-            cur_block++;
+-            i++;
+-            continue;
+-        }
+-        if (wind_failed) {
+-            shard_truncate_htol_cbk(frame, (void *)(long)cur_block, this, -1,
+-                                    ENOMEM, NULL, NULL, NULL);
+-            goto next;
+-        }
+-        shard_make_block_abspath(cur_block, inode->gfid, path, sizeof(path));
+-        bname = strrchr(path, '/') + 1;
+-        loc.parent = inode_ref(priv->dot_shard_inode);
+-        ret = inode_path(loc.parent, bname, (char **)&(loc.path));
+-        if (ret < 0) {
+-            gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+-                   "Inode path failed"
+-                   " on %s. Base file gfid = %s",
+-                   bname, uuid_utoa(inode->gfid));
+-            local->op_ret = -1;
+-            local->op_errno = ENOMEM;
+-            loc_wipe(&loc);
+-            wind_failed = _gf_true;
+-            shard_truncate_htol_cbk(frame, (void *)(long)cur_block, this, -1,
+-                                    ENOMEM, NULL, NULL, NULL);
+-            goto next;
+-        }
+- = strrchr(loc.path, '/');
+-        if (
+-  ;
+-        loc.inode = inode_ref(local->inode_list[i]);
+-        STACK_WIND_COOKIE(frame, shard_truncate_htol_cbk,
+-                          (void *)(long)cur_block, FIRST_CHILD(this),
+-                          FIRST_CHILD(this)->fops->unlink, &loc, 0, xdata_req);
+-        loc_wipe(&loc);
+-    next:
+-        i++;
+-        cur_block++;
+-        if (!--call_count)
+-            break;
+-    }
+-    dict_unref(xdata_req);
+-    return 0;
++ = strrchr(loc.path, '/');
++    if (
++    loc.inode = inode_ref(local->inode_list[cur_block_idx]);
+-shard_truncate_do(call_frame_t *frame, xlator_t *this)
+-    shard_local_t *local = NULL;
++    STACK_WIND_COOKIE(frame, shard_unlink_shards_do_cbk,
++                      (void *)(long)cur_block, FIRST_CHILD(this),
++                      FIRST_CHILD(this)->fops->unlink, &loc, local->xflag,
++                      local->xattr_req);
++    loc_wipe(&loc);
++  next:
++    cur_block++;
++    cur_block_idx++;
++  }
++  syncbarrier_wait(&local->barrier, count);
++  SHARD_UNSET_ROOT_FS_ID(frame, local);
++  return 0;
++int shard_regulated_shards_deletion(call_frame_t *cleanup_frame, xlator_t *this,
++                                    int now, int first_block,
++                                    gf_dirent_t *entry) {
++  int i = 0;
++  int ret = 0;
++  shard_local_t *local = NULL;
++  uuid_t gfid = {
++      0,
++  };
++  local = cleanup_frame->local;
++  local->inode_list = GF_CALLOC(now, sizeof(inode_t *), gf_shard_mt_inode_list);
++  if (!local->inode_list)
++    return -ENOMEM;
++  local->first_block = first_block;
++  local->last_block = first_block + now - 1;
++  local->num_blocks = now;
++  gf_uuid_parse(entry->d_name, gfid);
++  gf_uuid_copy(local->base_gfid, gfid);
++  local->resolver_base_inode = inode_find(this->itable, gfid);
++  local->call_count = 0;
++  ret = syncbarrier_init(&local->barrier);
++  if (ret) {
++    GF_FREE(local->inode_list);
++    local->inode_list = NULL;
++    inode_unref(local->resolver_base_inode);
++    local->resolver_base_inode = NULL;
++    return -errno;
++  }
++  shard_common_resolve_shards(cleanup_frame, this,
++                              shard_post_resolve_unlink_handler);
++  for (i = 0; i < local->num_blocks; i++) {
++    if (local->inode_list[i])
++      inode_unref(local->inode_list[i]);
++  }
++  GF_FREE(local->inode_list);
++  local->inode_list = NULL;
++  if (local->op_ret)
++    ret = -local->op_errno;
++  syncbarrier_destroy(&local->barrier);
++  inode_unref(local->resolver_base_inode);
++  local->resolver_base_inode = NULL;
++  STACK_RESET(cleanup_frame->root);
++  return ret;
++int __shard_delete_shards_of_entry(call_frame_t *cleanup_frame, xlator_t *this,
++                                   gf_dirent_t *entry, inode_t *inode) {
++  int ret = 0;
++  int shard_count = 0;
++  int first_block = 0;
++  int now = 0;
++  uint64_t size = 0;
++  uint64_t block_size = 0;
++  uint64_t size_array[4] = {
++      0,
++  };
++  void *bsize = NULL;
++  void *size_attr = NULL;
++  dict_t *xattr_rsp = NULL;
++  loc_t loc = {
++      0,
++  };
++  shard_local_t *local = NULL;
++  shard_priv_t *priv = NULL;
++  priv = this->private;
++  local = cleanup_frame->local;
++  ret = dict_reset(local->xattr_req);
++  if (ret) {
++    gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
++           "Failed to reset dict");
++    ret = -ENOMEM;
++    goto err;
++  }
++  ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0);
++  if (ret) {
++    gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
++           "Failed to set dict value: key:%s", GF_XATTR_SHARD_BLOCK_SIZE);
++    ret = -ENOMEM;
++    goto err;
++  }
++  ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_FILE_SIZE, 8 * 4);
++  if (ret) {
++    gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
++           "Failed to set dict value: key:%s", GF_XATTR_SHARD_FILE_SIZE);
++    ret = -ENOMEM;
++    goto err;
++  }
++  loc.inode = inode_ref(inode);
++  loc.parent = inode_ref(priv->dot_shard_rm_inode);
++  ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path));
++  if (ret < 0) {
++    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
++           "Inode path  failed on %s", entry->d_name);
++    ret = -ENOMEM;
++    goto err;
++  }
++ = strrchr(loc.path, '/');
++  if (
++  ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, local->xattr_req,
++                      &xattr_rsp);
++  if (ret)
++    goto err;
++  ret = dict_get_ptr(xattr_rsp, GF_XATTR_SHARD_BLOCK_SIZE, &bsize);
++  if (ret) {
++    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
++           "Failed to get dict value: key:%s", GF_XATTR_SHARD_BLOCK_SIZE);
++    goto err;
++  }
++  block_size = ntoh64(*((uint64_t *)bsize));
++  ret = dict_get_ptr(xattr_rsp, GF_XATTR_SHARD_FILE_SIZE, &size_attr);
++  if (ret) {
++    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
++           "Failed to get dict value: key:%s", GF_XATTR_SHARD_FILE_SIZE);
++    goto err;
++  }
++  memcpy(size_array, size_attr, sizeof(size_array));
++  size = ntoh64(size_array[0]);
++  shard_count = (size / block_size) - 1;
++  if (shard_count < 0) {
++    gf_msg_debug(this->name, 0, "Size of %s hasn't grown beyond "
++                                "its shard-block-size. Nothing to delete. "
++                                "Returning",
++                 entry->d_name);
++    /* File size < shard-block-size, so nothing to delete */
++    ret = 0;
++    goto delete_marker;
++  }
++  if ((size % block_size) > 0)
++    shard_count++;
++  if (shard_count == 0) {
++    gf_msg_debug(this->name, 0, "Size of %s is exactly equal to "
++                                "its shard-block-size. Nothing to delete. "
++                                "Returning",
++                 entry->d_name);
++    ret = 0;
++    goto delete_marker;
++  }
++  gf_msg_debug(this->name, 0,
++               "base file = %s, "
++               "shard-block-size=%" PRIu64 ", file-size=%" PRIu64 ", "
++               "shard_count=%d",
++               entry->d_name, block_size, size, shard_count);
++  /* Perform a gfid-based lookup to see if gfid corresponding to marker
++   * file's base name exists.
++   */
++  loc_wipe(&loc);
++  loc.inode = inode_new(this->itable);
++  if (!loc.inode) {
++    ret = -ENOMEM;
++    goto err;
++  }
++  gf_uuid_parse(entry->d_name, loc.gfid);
++  ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, NULL, NULL);
++  if (!ret) {
++    gf_msg_debug(this->name, 0, "Base shard corresponding to gfid "
++                                "%s is present. Skipping shard deletion. "
++                                "Returning",
++                 entry->d_name);
++    ret = 0;
++    goto delete_marker;
++  }
+-    local = frame->local;
++  first_block = 1;
+-    if (local->num_blocks == 1) {
+-        /* This means that there are no shards to be unlinked.
+-         * The fop boils down to truncating the last shard, updating
+-         * the size and unwinding.
+-         */
+-        shard_truncate_last_shard(frame, this, local->inode_list[0]);
+-        return 0;
++  while (shard_count) {
++    if (shard_count < local->deletion_rate) {
++      now = shard_count;
++      shard_count = 0;
+     } else {
+-        shard_truncate_htol(frame, this, local->loc.inode);
+-    }
+-    return 0;
+-shard_post_lookup_shards_truncate_handler(call_frame_t *frame, xlator_t *this)
+-    shard_local_t *local = NULL;
+-    local = frame->local;
+-    if (local->op_ret < 0) {
+-        shard_common_failure_unwind(local->fop, frame, local->op_ret,
+-                                    local->op_errno);
+-        return 0;
++      now = local->deletion_rate;
++      shard_count -= local->deletion_rate;
+     }
+-    shard_truncate_do(frame, this);
+-    return 0;
++    gf_msg_debug(this->name, 0, "deleting %d shards starting from "
++                                "block %d of gfid %s",
++                 now, first_block, entry->d_name);
++    ret = shard_regulated_shards_deletion(cleanup_frame, this, now, first_block,
++                                          entry);
++    if (ret)
++      goto err;
++    first_block += now;
++  }
+-shard_link_block_inode(shard_local_t *local, int block_num, inode_t *inode,
+-                       struct iatt *buf)
+-    int list_index = 0;
+-    char block_bname[256] = {
+-        0,
+-    };
+-    uuid_t gfid = {
+-        0,
+-    };
+-    inode_t *linked_inode = NULL;
+-    xlator_t *this = NULL;
+-    inode_t *fsync_inode = NULL;
+-    shard_priv_t *priv = NULL;
+-    inode_t *base_inode = NULL;
+-    this = THIS;
+-    priv = this->private;
+-    if (local->loc.inode) {
+-        gf_uuid_copy(gfid, local->loc.inode->gfid);
+-        base_inode = local->loc.inode;
+-    } else if (local->resolver_base_inode) {
+-        gf_uuid_copy(gfid, local->resolver_base_inode->gfid);
+-        base_inode = local->resolver_base_inode;
++  loc_wipe(&loc);
++  loc.inode = inode_ref(inode);
++  loc.parent = inode_ref(priv->dot_shard_rm_inode);
++  ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path));
++  if (ret < 0) {
++    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
++           "Inode path  failed on %s", entry->d_name);
++    ret = -ENOMEM;
++    goto err;
++  }
++ = strrchr(loc.path, '/');
++  if (
++  ret = syncop_unlink(FIRST_CHILD(this), &loc, NULL, NULL);
++  if (ret)
++           "Failed to delete %s "
++           "from /%s",
++           entry->d_name, GF_SHARD_REMOVE_ME_DIR);
++  if (xattr_rsp)
++    dict_unref(xattr_rsp);
++  loc_wipe(&loc);
++  return ret;
++int shard_delete_shards_of_entry(call_frame_t *cleanup_frame, xlator_t *this,
++                                 gf_dirent_t *entry, inode_t *inode) {
++  int ret = -1;
++  loc_t loc = {
++      0,
++  };
++  shard_priv_t *priv = NULL;
++  priv = this->private;
++  loc.inode = inode_ref(priv->dot_shard_rm_inode);
++  ret = syncop_entrylk(FIRST_CHILD(this), this->name, &loc, entry->d_name,
++                       ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL, NULL);
++  if (ret < 0) {
++    if (ret == -EAGAIN) {
++      ret = 0;
++    }
++    goto out;
++  }
++  { ret = __shard_delete_shards_of_entry(cleanup_frame, this, entry, inode); }
++  syncop_entrylk(FIRST_CHILD(this), this->name, &loc, entry->d_name,
++  loc_wipe(&loc);
++  return ret;
++int shard_delete_shards_cbk(int ret, call_frame_t *frame, void *data) {
++  return 0;
++int shard_resolve_internal_dir(xlator_t *this, shard_local_t *local,
++                               shard_internal_dir_type_t type) {
++  int ret = 0;
++  char *bname = NULL;
++  loc_t *loc = NULL;
++  shard_priv_t *priv = NULL;
++  uuid_t gfid = {
++      0,
++  };
++  struct iatt stbuf = {
++      0,
++  };
++  priv = this->private;
++  switch (type) {
++    loc = &local->dot_shard_loc;
++    gf_uuid_copy(gfid, priv->dot_shard_gfid);
++    bname = GF_SHARD_DIR;
++    break;
++    loc = &local->dot_shard_rm_loc;
++    gf_uuid_copy(gfid, priv->dot_shard_rm_gfid);
++    bname = GF_SHARD_REMOVE_ME_DIR;
++    break;
++  default:
++    break;
++  }
++  loc->inode = inode_find(this->itable, gfid);
++  if (!loc->inode) {
++    ret = shard_init_internal_dir_loc(this, local, type);
++    if (ret)
++      goto err;
++    ret = dict_reset(local->xattr_req);
++    if (ret) {
++      gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
++             "Failed to reset "
++             "dict");
++      ret = -ENOMEM;
++      goto err;
++    }
++    ret = dict_set_gfuuid(local->xattr_req, "gfid-req", gfid, true);
++    ret = syncop_lookup(FIRST_CHILD(this), loc, &stbuf, NULL, local->xattr_req,
++                        NULL);
++    if (ret < 0) {
++      if (ret != -ENOENT)
++        gf_msg(this->name, GF_LOG_ERROR, -ret, SHARD_MSG_SHARDS_DELETION_FAILED,
++               "Lookup on %s failed, exiting", bname);
++      goto err;
+     } else {
+-        gf_uuid_copy(gfid, local->base_gfid);
++      shard_link_internal_dir_inode(local, loc->inode, &stbuf, type);
+     }
++  }
++  ret = 0;
++  return ret;
++int shard_lookup_marker_entry(xlator_t *this, shard_local_t *local,
++                              gf_dirent_t *entry) {
++  int ret = 0;
++  loc_t loc = {
++      0,
++  };
++  loc.inode = inode_new(this->itable);
++  if (!loc.inode) {
++    ret = -ENOMEM;
++    goto err;
++  }
++  loc.parent = inode_ref(local->fd->inode);
++  ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path));
++  if (ret < 0) {
++    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
++           "Inode path failed on %s", entry->d_name);
++    ret = -ENOMEM;
++    goto err;
++  }
++ = strrchr(loc.path, '/');
++  if (
++  ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, NULL, NULL);
++  if (ret < 0) {
++    goto err;
++  }
++  entry->inode = inode_ref(loc.inode);
++  ret = 0;
++  loc_wipe(&loc);
++  return ret;
++int shard_delete_shards(void *opaque) {
++  int ret = 0;
++  off_t offset = 0;
++  loc_t loc = {
++      0,
++  };
++  inode_t *link_inode = NULL;
++  xlator_t *this = NULL;
++  shard_priv_t *priv = NULL;
++  shard_local_t *local = NULL;
++  gf_dirent_t entries;
++  gf_dirent_t *entry = NULL;
++  call_frame_t *cleanup_frame = NULL;
++  gf_boolean_t done = _gf_false;
++  this = THIS;
++  priv = this->private;
++  INIT_LIST_HEAD(&entries.list);
++  cleanup_frame = opaque;
++  local = mem_get0(this->local_pool);
++  if (!local) {
++           "Failed to create local to "
++           "delete shards");
++    ret = -ENOMEM;
++    goto err;
++  }
++  cleanup_frame->local = local;
++  local->fop = GF_FOP_UNLINK;
++  local->xattr_req = dict_new();
++  if (!local->xattr_req) {
++    ret = -ENOMEM;
++    goto err;
++  }
++  local->deletion_rate = priv->deletion_rate;
++  ret = shard_resolve_internal_dir(this, local, SHARD_INTERNAL_DIR_DOT_SHARD);
++  if (ret == -ENOENT) {
++    gf_msg_debug(this->name, 0, ".shard absent. Nothing to"
++                                " delete. Exiting");
++    ret = 0;
++    goto err;
++  } else if (ret < 0) {
++    goto err;
++  }
+-    shard_make_block_bname(block_num, gfid, block_bname, sizeof(block_bname));
+-    shard_inode_ctx_set(inode, this, buf, 0, SHARD_LOOKUP_MASK);
+-    linked_inode = inode_link(inode, priv->dot_shard_inode, block_bname, buf);
+-    inode_lookup(linked_inode);
+-    list_index = block_num - local->first_block;
+-    local->inode_list[list_index] = linked_inode;
++  ret = shard_resolve_internal_dir(this, local,
++                                   SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME);
++  if (ret == -ENOENT) {
++    gf_msg_debug(this->name, 0, ".remove_me absent. "
++                                "Nothing to delete. Exiting");
++    ret = 0;
++    goto err;
++  } else if (ret < 0) {
++    goto err;
++  }
++  local->fd = fd_anonymous(local->dot_shard_rm_loc.inode);
++  if (!local->fd) {
++    ret = -ENOMEM;
++    goto err;
++  }
++  for (;;) {
++    offset = 0;
+     LOCK(&priv->lock);
+     {
+-        fsync_inode = __shard_update_shards_inode_list(
+-            linked_inode, this, base_inode, block_num, gfid);
++      if (priv->bg_del_state == SHARD_BG_DELETION_LAUNCHING) {
++        priv->bg_del_state = SHARD_BG_DELETION_IN_PROGRESS;
++      } else if (priv->bg_del_state == SHARD_BG_DELETION_IN_PROGRESS) {
++        priv->bg_del_state = SHARD_BG_DELETION_NONE;
++        done = _gf_true;
++      }
+     }
+     UNLOCK(&priv->lock);
+-    if (fsync_inode)
+-        shard_initiate_evicted_inode_fsync(this, fsync_inode);
+-shard_common_lookup_shards_cbk(call_frame_t *frame, void *cookie,
+-                               xlator_t *this, int32_t op_ret, int32_t op_errno,
+-                               inode_t *inode, struct iatt *buf, dict_t *xdata,
+-                               struct iatt *postparent)
+-    int call_count = 0;
+-    int shard_block_num = (long)cookie;
+-    uuid_t gfid = {
+-        0,
+-    };
+-    shard_local_t *local = NULL;
+-    local = frame->local;
+-    if (local->resolver_base_inode)
+-        gf_uuid_copy(gfid, local->resolver_base_inode->gfid);
+-    else
+-        gf_uuid_copy(gfid, local->base_gfid);
+-    if (op_ret < 0) {
+-        /* Ignore absence of shards in the backend in truncate fop. */
+-        switch (local->fop) {
+-            case GF_FOP_TRUNCATE:
+-            case GF_FOP_FTRUNCATE:
+-            case GF_FOP_RENAME:
+-            case GF_FOP_UNLINK:
+-                if (op_errno == ENOENT)
+-                    goto done;
+-                break;
+-            case GF_FOP_WRITE:
+-            case GF_FOP_READ:
+-            case GF_FOP_ZEROFILL:
+-            case GF_FOP_DISCARD:
+-            case GF_FOP_FALLOCATE:
+-                if ((!local->first_lookup_done) && (op_errno == ENOENT)) {
+-                    LOCK(&frame->lock);
+-                    {
+-                        local->create_count++;
+-                    }
+-                    UNLOCK(&frame->lock);
+-                    goto done;
+-                }
+-                break;
+-            default:
+-                break;
+-        }
+-        /* else */
+-        gf_msg(this->name, GF_LOG_ERROR, op_errno,
+-               "Lookup on shard %d "
+-               "failed. Base file gfid = %s",
+-               shard_block_num, uuid_utoa(gfid));
+-        local->op_ret = op_ret;
+-        local->op_errno = op_errno;
+-        goto done;
+-    }
+-    shard_link_block_inode(local, shard_block_num, inode, buf);
+-    if (local->lookup_shards_barriered) {
+-        syncbarrier_wake(&local->barrier);
+-        return 0;
+-    } else {
+-        call_count = shard_call_count_return(frame);
+-        if (call_count == 0) {
+-            if (!local->first_lookup_done)
+-                local->first_lookup_done = _gf_true;
+-            local->pls_fop_handler(frame, this);
+-        }
+-    }
+-    return 0;
+-dict_t *
+-shard_create_gfid_dict(dict_t *dict)
+-    int ret = 0;
+-    dict_t *new = NULL;
+-    unsigned char *gfid = NULL;
+-    new = dict_copy_with_ref(dict, NULL);
+-    if (!new)
+-        return NULL;
+-    gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_char);
+-    if (!gfid) {
+-        ret = -1;
+-        goto out;
+-    }
+-    gf_uuid_generate(gfid);
+-    ret = dict_set_gfuuid(new, "gfid-req", gfid, false);
+-    if (ret) {
+-        dict_unref(new);
+-        new = NULL;
+-        GF_FREE(gfid);
+-    }
+-    return new;
++    if (done)
++      break;
++    while ((ret = syncop_readdirp(FIRST_CHILD(this), local->fd, 131072, offset,
++                                  &entries, local->xattr_req, NULL))) {
++      if (ret > 0)
++        ret = 0;
++      list_for_each_entry(entry, &entries.list, list) {
++        offset = entry->d_off;
+-shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode,
+-                           shard_post_lookup_shards_fop_handler_t handler)
+-    int i = 0;
+-    int ret = 0;
+-    int count = 0;
+-    int call_count = 0;
+-    int32_t shard_idx_iter = 0;
+-    int last_block = 0;
+-    char path[PATH_MAX] = {
+-        0,
+-    };
+-    char *bname = NULL;
+-    uuid_t gfid = {
+-        0,
+-    };
+-    loc_t loc = {
+-        0,
+-    };
+-    shard_local_t *local = NULL;
+-    shard_priv_t *priv = NULL;
+-    gf_boolean_t wind_failed = _gf_false;
+-    dict_t *xattr_req = NULL;
+-    priv = this->private;
+-    local = frame->local;
+-    count = call_count = local->call_count;
+-    shard_idx_iter = local->first_block;
+-    last_block = local->last_block;
+-    local->pls_fop_handler = handler;
+-    if (local->lookup_shards_barriered)
+-        local->barrier.waitfor = local->call_count;
+-    if (inode)
+-        gf_uuid_copy(gfid, inode->gfid);
+-    else
+-        gf_uuid_copy(gfid, local->base_gfid);
++        if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, ".."))
++          continue;
+-    while (shard_idx_iter <= last_block) {
+-        if (local->inode_list[i]) {
+-            i++;
+-            shard_idx_iter++;
++        if (!entry->inode) {
++          ret = shard_lookup_marker_entry(this, local, entry);
++          if (ret < 0)
+             continue;
+         }
++        link_inode = inode_link(entry->inode, local->fd->inode, entry->d_name,
++                                &entry->d_stat);
+-        if (wind_failed) {
+-            shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter,
+-                                           this, -1, ENOMEM, NULL, NULL, NULL,
+-                                           NULL);
+-            goto next;
+-        }
+-        shard_make_block_abspath(shard_idx_iter, gfid, path, sizeof(path));
+-        bname = strrchr(path, '/') + 1;
+-        loc.inode = inode_new(this->itable);
+-        loc.parent = inode_ref(priv->dot_shard_inode);
+-        gf_uuid_copy(loc.pargfid, priv->dot_shard_gfid);
+-        ret = inode_path(loc.parent, bname, (char **)&(loc.path));
+-        if (ret < 0 || !(loc.inode)) {
+-            gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+-                   "Inode path failed"
+-                   " on %s, base file gfid = %s",
+-                   bname, uuid_utoa(gfid));
+-            local->op_ret = -1;
+-            local->op_errno = ENOMEM;
+-            loc_wipe(&loc);
+-            wind_failed = _gf_true;
+-            shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter,
+-                                           this, -1, ENOMEM, NULL, NULL, NULL,
+-                                           NULL);
+-            goto next;
+-        }
+- = strrchr(loc.path, '/');
+-        if (
+-  ;
+-        xattr_req = shard_create_gfid_dict(local->xattr_req);
+-        if (!xattr_req) {
+-            local->op_ret = -1;
+-            local->op_errno = ENOMEM;
+-            wind_failed = _gf_true;
+-            loc_wipe(&loc);
+-            shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter,
+-                                           this, -1, ENOMEM, NULL, NULL, NULL,
+-                                           NULL);
+-            goto next;
++        gf_msg_debug(this->name, 0, "Initiating deletion of "
++                                    "shards of gfid %s",
++                     entry->d_name);
++        ret = shard_delete_shards_of_entry(cleanup_frame, this, entry,
++                                           link_inode);
++        inode_unlink(link_inode, local->fd->inode, entry->d_name);
++        inode_unref(link_inode);
++        if (ret) {
++          gf_msg(this->name, GF_LOG_ERROR, -ret,
++                 "Failed to clean up shards of gfid %s", entry->d_name);
++          continue;
+         }
++        gf_msg(this->name, GF_LOG_INFO, 0, SHARD_MSG_SHARD_DELETION_COMPLETED,
++               "Deleted "
++               "shards of gfid=%s from backend",
++               entry->d_name);
++      }
++      gf_dirent_free(&entries);
++      if (ret)
++        break;
++    }
++  }
++  ret = 0;
++  loc_wipe(&loc);
++  return ret;
+-        STACK_WIND_COOKIE(frame, shard_common_lookup_shards_cbk,
+-                          (void *)(long)shard_idx_iter, FIRST_CHILD(this),
+-                          FIRST_CHILD(this)->fops->lookup, &loc, xattr_req);
+-        loc_wipe(&loc);
+-        dict_unref(xattr_req);
+-    next:
+-        shard_idx_iter++;
+-        i++;
+-        if (!--call_count)
+-            break;
+-    }
+-    if (local->lookup_shards_barriered) {
+-        syncbarrier_wait(&local->barrier, count);
+-        local->pls_fop_handler(frame, this);
+-    }
+-    return 0;
++  LOCK(&priv->lock);
++  { priv->bg_del_state = SHARD_BG_DELETION_NONE; }
++  UNLOCK(&priv->lock);
++  loc_wipe(&loc);
++  return ret;
++int shard_unlock_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                             int32_t op_ret, int32_t op_errno, dict_t *xdata) {
++  if (op_ret)
++    gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED,
++           "Unlock failed. Please check brick logs for "
++           "more details");
++  return 0;
++int shard_unlock_inodelk(call_frame_t *frame, xlator_t *this) {
++  loc_t *loc = NULL;
++  call_frame_t *lk_frame = NULL;
++  shard_local_t *local = NULL;
++  shard_local_t *lk_local = NULL;
++  shard_inodelk_t *lock = NULL;
++  local = frame->local;
++  lk_frame = local->inodelk_frame;
++  lk_local = lk_frame->local;
++  local->inodelk_frame = NULL;
++  loc = &local->int_inodelk.loc;
++  lock = &lk_local->int_inodelk;
++  lock->flock.l_type = F_UNLCK;
++  STACK_WIND(lk_frame, shard_unlock_inodelk_cbk, FIRST_CHILD(this),
++             FIRST_CHILD(this)->fops->inodelk, lock->domain, loc, F_SETLK,
++             &lock->flock, NULL);
++  local->int_inodelk.acquired_lock = _gf_false;
++  return 0;
++int shard_rename_src_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                         int32_t op_ret, int32_t op_errno, struct iatt *buf,
++                         struct iatt *preoldparent, struct iatt *postoldparent,
++                         struct iatt *prenewparent, struct iatt *postnewparent,
++                         dict_t *xdata);
++int shard_rename_src_base_file(call_frame_t *frame, xlator_t *this) {
++  int ret = 0;
++  loc_t *dst_loc = NULL;
++  loc_t tmp_loc = {
++      0,
++  };
++  shard_local_t *local = frame->local;
++  if (local->dst_block_size) {
++    tmp_loc.parent = inode_ref(local->loc2.parent);
++    ret = inode_path(tmp_loc.parent, local->, (char **)&tmp_loc.path);
++    if (ret < 0) {
++      gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
++             "Inode path failed"
++             " on pargfid=%s bname=%s",
++             uuid_utoa(tmp_loc.parent->gfid), local->;
++      local->op_ret = -1;
++      local->op_errno = ENOMEM;
++      goto err;
++    }
++ = strrchr(tmp_loc.path, '/');
++    if (
++    dst_loc = &tmp_loc;
++  } else {
++    dst_loc = &local->loc2;
++  }
++  /* To-Do: Request open-fd count on dst base file */
++  STACK_WIND(frame, shard_rename_src_cbk, FIRST_CHILD(this),
++             FIRST_CHILD(this)->fops->rename, &local->loc, dst_loc,
++             local->xattr_req);
++  loc_wipe(&tmp_loc);
++  return 0;
++  loc_wipe(&tmp_loc);
++  shard_common_failure_unwind(local->fop, frame, local->op_ret,
++                              local->op_errno);
++  return 0;
++int shard_unlink_base_file(call_frame_t *frame, xlator_t *this);
++int shard_set_size_attrs_on_marker_file_cbk(call_frame_t *frame, void *cookie,
++                                            xlator_t *this, int32_t op_ret,
++                                            int32_t op_errno, dict_t *dict,
++                                            dict_t *xdata) {
++  shard_priv_t *priv = NULL;
++  shard_local_t *local = NULL;
++  priv = this->private;
++  local = frame->local;
++  if (op_ret < 0) {
++    gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED,
++           "Xattrop on marker file failed "
++           "while performing %s; entry gfid=%s",
++           gf_fop_string(local->fop), local->;
++    goto err;
++  }
++  inode_unlink(local->newloc.inode, priv->dot_shard_rm_inode,
++               local->;
++  if (local->fop == GF_FOP_UNLINK)
++    shard_unlink_base_file(frame, this);
++  else if (local->fop == GF_FOP_RENAME)
++    shard_rename_src_base_file(frame, this);
++  return 0;
++  shard_common_failure_unwind(local->fop, frame, op_ret, op_errno);
++  return 0;
++int shard_set_size_attrs_on_marker_file(call_frame_t *frame, xlator_t *this) {
++  int op_errno = ENOMEM;
++  uint64_t bs = 0;
++  dict_t *xdata = NULL;
++  shard_local_t *local = NULL;
++  local = frame->local;
++  xdata = dict_new();
++  if (!xdata)
++    goto err;
++  if (local->fop == GF_FOP_UNLINK)
++    bs = local->block_size;
++  else if (local->fop == GF_FOP_RENAME)
++    bs = local->dst_block_size;
++  SHARD_INODE_CREATE_INIT(this, bs, xdata, &local->newloc,
++                          local->prebuf.ia_size, 0, err);
++  STACK_WIND(frame, shard_set_size_attrs_on_marker_file_cbk, FIRST_CHILD(this),
++             FIRST_CHILD(this)->fops->xattrop, &local->newloc,
++             GF_XATTROP_GET_AND_SET, xdata, NULL);
++  dict_unref(xdata);
++  return 0;
++  if (xdata)
++    dict_unref(xdata);
++  shard_common_failure_unwind(local->fop, frame, -1, op_errno);
++  return 0;
+ }
+-shard_post_resolve_truncate_handler(call_frame_t *frame, xlator_t *this)
+-    shard_local_t *local = NULL;
+-    local = frame->local;
+-    if (local->op_ret < 0) {
+-        if (local->op_errno == ENOENT) {
+-            /* If lookup on /.shard fails with ENOENT, it means that
+-             * the file was 0-byte in size but truncated sometime in
+-             * the past to a higher size which is reflected in the
+-             * size xattr, and now being truncated to a lower size.
+-             * In this case, the only thing that needs to be done is
+-             * to update the size xattr of the file and unwind.
+-             */
+-            local->first_block = local->last_block = 0;
+-            local->num_blocks = 1;
+-            local->call_count = 0;
+-            local->op_ret = 0;
+-            local->postbuf.ia_size = local->offset;
+-            shard_update_file_size(frame, this, local->fd, &local->loc,
+-                                   shard_post_update_size_truncate_handler);
+-            return 0;
+-        } else {
+-            shard_common_failure_unwind(local->fop, frame, local->op_ret,
+-                                        local->op_errno);
+-            return 0;
+-        }
+-    }
+-    if (!local->call_count)
+-        shard_truncate_do(frame, this);
+-    else
+-        shard_common_lookup_shards(frame, this, local->loc.inode,
+-                                   shard_post_lookup_shards_truncate_handler);
+-    return 0;
++int shard_lookup_marker_file_cbk(call_frame_t *frame, void *cookie,
++                                 xlator_t *this, int32_t op_ret,
++                                 int32_t op_errno, inode_t *inode,
++                                 struct iatt *buf, dict_t *xdata,
++                                 struct iatt *postparent) {
++  inode_t *linked_inode = NULL;
++  shard_priv_t *priv = NULL;
++  shard_local_t *local = NULL;
++  local = frame->local;
++  priv = this->private;
++  if (op_ret < 0) {
++    gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED,
++           "Lookup on marker file failed "
++           "while performing %s; entry gfid=%s",
++           gf_fop_string(local->fop), local->;
++    goto err;
++  }
++  linked_inode =
++      inode_link(inode, priv->dot_shard_rm_inode, local->, buf);
++  inode_unref(local->newloc.inode);
++  local->newloc.inode = linked_inode;
++  shard_set_size_attrs_on_marker_file(frame, this);
++  return 0;
++  shard_common_failure_unwind(local->fop, frame, op_ret, op_errno);
++  return 0;
+ }
+-shard_truncate_begin(call_frame_t *frame, xlator_t *this)
+-    int ret = 0;
+-    shard_local_t *local = NULL;
+-    shard_priv_t *priv = NULL;
+-    priv = this->private;
+-    local = frame->local;
+-    /* First participant block here is the lowest numbered block that would
+-     * hold the last byte of the file post successful truncation.
+-     * Last participant block is the block that contains the last byte in
+-     * the current state of the file.
+-     * If (first block == last_block):
+-     *         then that means that the file only needs truncation of the
+-     *         first (or last since both are same) block.
+-     * Else
+-     *         if (new_size % block_size == 0)
+-     *                 then that means there is no truncate to be done with
+-     *                 only shards from first_block + 1 through the last
+-     *                 block needing to be unlinked.
+-     *         else
+-     *                 both truncate of the first block and unlink of the
+-     *                 remaining shards until end of file is required.
+-     */
+-    local->first_block = (local->offset == 0)
+-                             ? 0
+-                             : get_lowest_block(local->offset - 1,
+-                                                local->block_size);
+-    local->last_block = get_highest_block(0, local->prebuf.ia_size,
+-                                          local->block_size);
+-    local->num_blocks = local->last_block - local->first_block + 1;
+-    local->resolver_base_inode = (local->fop == GF_FOP_TRUNCATE)
+-                                     ? local->loc.inode
+-                                     : local->fd->inode;
+-    if ((local->first_block == 0) && (local->num_blocks == 1)) {
+-        if (local->fop == GF_FOP_TRUNCATE)
+-            STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this),
+-                       FIRST_CHILD(this)->fops->truncate, &local->loc,
+-                       local->offset, local->xattr_req);
+-        else
+-            STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this),
+-                       FIRST_CHILD(this)->fops->ftruncate, local->fd,
+-                       local->offset, local->xattr_req);
+-        return 0;
+-    }
++int shard_lookup_marker_file(call_frame_t *frame, xlator_t *this) {
++  int op_errno = ENOMEM;
++  dict_t *xattr_req = NULL;
++  shard_local_t *local = NULL;
+-    local->inode_list = GF_CALLOC(local->num_blocks, sizeof(inode_t *),
+-                                  gf_shard_mt_inode_list);
+-    if (!local->inode_list)
+-        goto err;
++  local = frame->local;
+-    local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid);
+-    if (!local->dot_shard_loc.inode) {
+-        ret = shard_init_internal_dir_loc(this, local,
+-                                          SHARD_INTERNAL_DIR_DOT_SHARD);
+-        if (ret)
+-            goto err;
+-        shard_lookup_internal_dir(frame, this,
+-                                  shard_post_resolve_truncate_handler,
+-                                  SHARD_INTERNAL_DIR_DOT_SHARD);
+-    } else {
+-        local->post_res_handler = shard_post_resolve_truncate_handler;
+-        shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD);
+-    }
+-    return 0;
++  xattr_req = shard_create_gfid_dict(local->xattr_req);
++  if (!xattr_req)
++    goto err;
++  STACK_WIND(frame, shard_lookup_marker_file_cbk, FIRST_CHILD(this),
++             FIRST_CHILD(this)->fops->lookup, &local->newloc, xattr_req);
++  dict_unref(xattr_req);
++  return 0;
+ err:
+-    shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
+-    return 0;
++  shard_common_failure_unwind(local->fop, frame, -1, op_errno);
++  return 0;
+ }
+-shard_post_lookup_truncate_handler(call_frame_t *frame, xlator_t *this)
+-    shard_local_t *local = NULL;
+-    struct iatt tmp_stbuf = {
+-        0,
+-    };
+-    local = frame->local;
+-    if (local->op_ret < 0) {
+-        shard_common_failure_unwind(local->fop, frame, local->op_ret,
+-                                    local->op_errno);
+-        return 0;
+-    }
+-    local->postbuf = tmp_stbuf = local->prebuf;
+-    if (local->prebuf.ia_size == local->offset) {
+-        /* If the file size is same as requested size, unwind the call
+-         * immediately.
+-         */
+-        if (local->fop == GF_FOP_TRUNCATE)
+-            SHARD_STACK_UNWIND(truncate, frame, 0, 0, &local->prebuf,
+-                               &local->postbuf, NULL);
+-        else
+-            SHARD_STACK_UNWIND(ftruncate, frame, 0, 0, &local->prebuf,
+-                               &local->postbuf, NULL);
+-    } else if (local->offset > local->prebuf.ia_size) {
+-        /* If the truncate is from a lower to a higher size, set the
+-         * new size xattr and unwind.
+-         */
+-        local->hole_size = local->offset - local->prebuf.ia_size;
+-        local->delta_size = 0;
+-        GF_ATOMIC_INIT(local->delta_blocks, 0);
+-        local->postbuf.ia_size = local->offset;
+-        tmp_stbuf.ia_size = local->offset;
+-        shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0,
+-                            SHARD_INODE_WRITE_MASK);
+-        shard_update_file_size(frame, this, NULL, &local->loc,
+-                               shard_post_update_size_truncate_handler);
++int shard_create_marker_file_under_remove_me_cbk(
++    call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
++    int32_t op_errno, inode_t *inode, struct iatt *buf, struct iatt *preparent,
++    struct iatt *postparent, dict_t *xdata) {
++  inode_t *linked_inode = NULL;
++  shard_priv_t *priv = NULL;
++  shard_local_t *local = NULL;
++  local = frame->local;
++  priv = this->private;
++  SHARD_UNSET_ROOT_FS_ID(frame, local);
++  if (op_ret < 0) {
++    if ((op_errno != EEXIST) && (op_errno != ENODATA)) {
++      local->op_ret = op_ret;
++      local->op_errno = op_errno;
++      gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED,
++             "Marker file creation "
++             "failed while performing %s; entry gfid=%s",
++             gf_fop_string(local->fop), local->;
++      goto err;
+     } else {
+-        /* ... else
+-         * i.   unlink all shards that need to be unlinked.
+-         * ii.  truncate the last of the shards.
+-         * iii. update the new size using setxattr.
+-         * and unwind the fop.
+-         */
+-        local->hole_size = 0;
+-        local->delta_size = (local->offset - local->prebuf.ia_size);
+-        GF_ATOMIC_INIT(local->delta_blocks, 0);
+-        tmp_stbuf.ia_size = local->offset;
+-        shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0,
+-                            SHARD_INODE_WRITE_MASK);
+-        shard_truncate_begin(frame, this);
+-    }
+-    return 0;
+-/* TO-DO:
+- * Fix updates to size and block count with racing write(s) and truncate(s).
+- */
+-shard_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+-               dict_t *xdata)
+-    int ret = -1;
+-    uint64_t block_size = 0;
+-    shard_local_t *local = NULL;
+-    ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size);
+-    if (ret) {
+-        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+-               "Failed to get block "
+-               "size from inode ctx of %s",
+-               uuid_utoa(loc->inode->gfid));
+-        goto err;
++      shard_lookup_marker_file(frame, this);
++      return 0;
+     }
++  }
+-    if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+-        STACK_WIND(frame, default_truncate_cbk, FIRST_CHILD(this),
+-                   FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
+-        return 0;
+-    }
+-    if (!this->itable)
+-        this->itable = loc->inode->table;
+-    local = mem_get0(this->local_pool);
+-    if (!local)
+-        goto err;
+-    frame->local = local;
+-    ret = syncbarrier_init(&local->barrier);
+-    if (ret)
+-        goto err;
+-    loc_copy(&local->loc, loc);
+-    local->offset = offset;
+-    local->block_size = block_size;
+-    local->fop = GF_FOP_TRUNCATE;
+-    local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+-    if (!local->xattr_req)
+-        goto err;
+-    local->resolver_base_inode = loc->inode;
+-    GF_ATOMIC_INIT(local->delta_blocks, 0);
+-    shard_lookup_base_file(frame, this, &local->loc,
+-                           shard_post_lookup_truncate_handler);
+-    return 0;
++  linked_inode =
++      inode_link(inode, priv->dot_shard_rm_inode, local->, buf);
++  inode_unref(local->newloc.inode);
++  local->newloc.inode = linked_inode;
++  if (local->fop == GF_FOP_UNLINK)
++    shard_unlink_base_file(frame, this);
++  else if (local->fop == GF_FOP_RENAME)
++    shard_rename_src_base_file(frame, this);
++  return 0;
+ err:
+-    shard_common_failure_unwind(GF_FOP_TRUNCATE, frame, -1, ENOMEM);
+-    return 0;
+-shard_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+-                dict_t *xdata)
+-    int ret = -1;
+-    uint64_t block_size = 0;
+-    shard_local_t *local = NULL;
+-    ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size);
+-    if (ret) {
+-        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+-               "Failed to get block "
+-               "size from inode ctx of %s",
+-               uuid_utoa(fd->inode->gfid));
+-        goto err;
+-    }
+-    if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+-        STACK_WIND(frame, default_ftruncate_cbk, FIRST_CHILD(this),
+-                   FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
+-        return 0;
+-    }
+-    if (!this->itable)
+-        this->itable = fd->inode->table;
+-    local = mem_get0(this->local_pool);
+-    if (!local)
+-        goto err;
+-    frame->local = local;
+-    ret = syncbarrier_init(&local->barrier);
+-    if (ret)
+-        goto err;
+-    local->fd = fd_ref(fd);
+-    local->offset = offset;
+-    local->block_size = block_size;
+-    local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+-    if (!local->xattr_req)
+-        goto err;
+-    local->fop = GF_FOP_FTRUNCATE;
++  shard_common_failure_unwind(local->fop, frame, -1, local->op_errno);
++  return 0;
++int shard_create_marker_file_under_remove_me(call_frame_t *frame,
++                                             xlator_t *this, loc_t *loc) {
++  int ret = 0;
++  int op_errno = ENOMEM;
++  uint64_t bs = 0;
++  char g1[64] = {
++      0,
++  };
++  char g2[64] = {
++      0,
++  };
++  dict_t *xattr_req = NULL;
++  shard_priv_t *priv = NULL;
++  shard_local_t *local = NULL;
++  priv = this->private;
++  local = frame->local;
++  SHARD_SET_ROOT_FS_ID(frame, local);
++  xattr_req = shard_create_gfid_dict(local->xattr_req);
++  if (!xattr_req)
++    goto err;
++  local->newloc.inode = inode_new(this->itable);
++  local->newloc.parent = inode_ref(priv->dot_shard_rm_inode);
++  ret = inode_path(local->newloc.parent, uuid_utoa(loc->inode->gfid),
++                   (char **)&local->newloc.path);
++  if (ret < 0) {
++    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
++           "Inode path failed on "
++           "pargfid=%s bname=%s",
++           uuid_utoa_r(priv->dot_shard_rm_gfid, g1),
++           uuid_utoa_r(loc->inode->gfid, g2));
++    goto err;
++  }
++  local-> = strrchr(local->newloc.path, '/');
++  if (local->
++    local->;
++  if (local->fop == GF_FOP_UNLINK)
++    bs = local->block_size;
++  else if (local->fop == GF_FOP_RENAME)
++    bs = local->dst_block_size;
++  SHARD_INODE_CREATE_INIT(this, bs, xattr_req, &local->newloc,
++                          local->prebuf.ia_size, 0, err);
++  STACK_WIND(frame, shard_create_marker_file_under_remove_me_cbk,
++             FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod, &local->newloc,
++             0, 0, 0644, xattr_req);
++  dict_unref(xattr_req);
++  return 0;
+-    local->loc.inode = inode_ref(fd->inode);
+-    gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
+-    local->resolver_base_inode = fd->inode;
+-    GF_ATOMIC_INIT(local->delta_blocks, 0);
+-    shard_lookup_base_file(frame, this, &local->loc,
+-                           shard_post_lookup_truncate_handler);
+-    return 0;
+ err:
+-    shard_common_failure_unwind(GF_FOP_FTRUNCATE, frame, -1, ENOMEM);
+-    return 0;
++  if (xattr_req)
++    dict_unref(xattr_req);
++  shard_create_marker_file_under_remove_me_cbk(frame, 0, this, -1, op_errno,
++                                               NULL, NULL, NULL, NULL, NULL);
++  return 0;
+ }
+-shard_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                int32_t op_ret, int32_t op_errno, inode_t *inode,
+-                struct iatt *buf, struct iatt *preparent,
+-                struct iatt *postparent, dict_t *xdata)
+-    int ret = -1;
+-    shard_local_t *local = NULL;
+-    local = frame->local;
+-    if (op_ret == -1)
+-        goto unwind;
+-    ret = shard_inode_ctx_set(inode, this, buf, local->block_size,
+-                              SHARD_ALL_MASK);
+-    if (ret)
+-        gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INODE_CTX_SET_FAILED,
+-               "Failed to set inode "
+-               "ctx for %s",
+-               uuid_utoa(inode->gfid));
+-    SHARD_STACK_UNWIND(mknod, frame, op_ret, op_errno, inode, buf, preparent,
+-                       postparent, xdata);
++int shard_unlock_entrylk(call_frame_t *frame, xlator_t *this);
+-    return 0;
++int shard_unlink_base_file_cbk(call_frame_t *frame, void *cookie,
++                               xlator_t *this, int32_t op_ret, int32_t op_errno,
++                               struct iatt *preparent, struct iatt *postparent,
++                               dict_t *xdata) {
++  int ret = 0;
++  shard_local_t *local = NULL;
+-shard_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+-            dev_t rdev, mode_t umask, dict_t *xdata)
+-    shard_priv_t *priv = NULL;
+-    shard_local_t *local = NULL;
++  local = frame->local;
+-    priv = this->private;
+-    local = mem_get0(this->local_pool);
+-    if (!local)
+-        goto err;
++  if (op_ret < 0) {
++    local->op_ret = op_ret;
++    local->op_errno = op_errno;
++  } else {
++    shard_inode_ctx_set_refresh_flag(local->int_inodelk.loc.inode, this);
++    local->preoldparent = *preparent;
++    local->postoldparent = *postparent;
++    if (xdata)
++      local->xattr_rsp = dict_ref(xdata);
++    if (local->cleanup_required)
++      shard_start_background_deletion(this);
++  }
+-    frame->local = local;
+-    local->block_size = priv->block_size;
+-    if (!__is_gsyncd_on_shard_dir(frame, loc)) {
+-        SHARD_INODE_CREATE_INIT(this, local->block_size, xdata, loc, 0, 0, err);
++  if (local->entrylk_frame) {
++    ret = shard_unlock_entrylk(frame, this);
++    if (ret < 0) {
++      local->op_ret = -1;
++      local->op_errno = -ret;
+     }
++  }
+-    STACK_WIND(frame, shard_mknod_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata);
+-    return 0;
+-    shard_common_failure_unwind(GF_FOP_MKNOD, frame, -1, ENOMEM);
+-    return 0;
+-shard_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-               int32_t op_ret, int32_t op_errno, inode_t *inode,
+-               struct iatt *buf, struct iatt *preparent,
+-               struct iatt *postparent, dict_t *xdata)
+-    shard_local_t *local = NULL;
+-    local = frame->local;
+-    if (op_ret < 0)
+-        goto err;
+-    shard_inode_ctx_set(inode, this, buf, 0,
+-                        SHARD_MASK_NLINK | SHARD_MASK_TIMES);
+-    buf->ia_size = local->prebuf.ia_size;
+-    buf->ia_blocks = local->prebuf.ia_blocks;
+-    SHARD_STACK_UNWIND(link, frame, op_ret, op_errno, inode, buf, preparent,
+-                       postparent, xdata);
+-    return 0;
++  ret = shard_unlock_inodelk(frame, this);
++  if (ret < 0) {
++    local->op_ret = -1;
++    local->op_errno = -ret;
++  }
++  shard_unlink_cbk(frame, this);
++  return 0;
++int shard_unlink_base_file(call_frame_t *frame, xlator_t *this) {
++  shard_local_t *local = frame->local;
++  /* To-Do: Request open-fd count on base file */
++  STACK_WIND(frame, shard_unlink_base_file_cbk, FIRST_CHILD(this),
++             FIRST_CHILD(this)->fops->unlink, &local->loc, local->xflag,
++             local->xattr_req);
++  return 0;
++int shard_unlock_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                             int32_t op_ret, int32_t op_errno, dict_t *xdata) {
++  if (op_ret)
++    gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED,
++           "Unlock failed. Please check brick logs for "
++           "more details");
++  return 0;
++int shard_unlock_entrylk(call_frame_t *frame, xlator_t *this) {
++  loc_t *loc = NULL;
++  call_frame_t *lk_frame = NULL;
++  shard_local_t *local = NULL;
++  shard_local_t *lk_local = NULL;
++  shard_entrylk_t *lock = NULL;
++  local = frame->local;
++  lk_frame = local->entrylk_frame;
++  lk_local = lk_frame->local;
++  local->entrylk_frame = NULL;
++  lock = &lk_local->int_entrylk;
++  loc = &lock->loc;
++  STACK_WIND(lk_frame, shard_unlock_entrylk_cbk, FIRST_CHILD(this),
++             FIRST_CHILD(this)->fops->entrylk, this->name, loc,
++             lk_local->int_entrylk.basename, ENTRYLK_UNLOCK, ENTRYLK_WRLCK,
++             NULL);
++  local->int_entrylk.acquired_lock = _gf_false;
++  return 0;
++int shard_post_entrylk_fop_handler(call_frame_t *frame, xlator_t *this) {
++  shard_local_t *local = NULL;
++  local = frame->local;
++  switch (local->fop) {
++  case GF_FOP_UNLINK:
++  case GF_FOP_RENAME:
++    shard_create_marker_file_under_remove_me(frame, this,
++                                             &local->int_inodelk.loc);
++    break;
++  default:
++    gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
++           "post-entrylk handler not defined. This case should not"
++           " be hit");
++    break;
++  }
++  return 0;
++int shard_acquire_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                              int32_t op_ret, int32_t op_errno, dict_t *xdata) {
++  call_frame_t *main_frame = NULL;
++  shard_local_t *local = NULL;
++  shard_local_t *main_local = NULL;
++  local = frame->local;
++  main_frame = local->main_frame;
++  main_local = main_frame->local;
++  if (local->op_ret < 0) {
++    shard_common_failure_unwind(main_local->fop, main_frame, op_ret, op_errno);
++    return 0;
++  }
++  main_local->int_entrylk.acquired_lock = _gf_true;
++  shard_post_entrylk_fop_handler(main_frame, this);
++  return 0;
++int shard_acquire_entrylk(call_frame_t *frame, xlator_t *this, inode_t *inode,
++                          uuid_t gfid) {
++  char gfid_str[GF_UUID_BUF_SIZE] = {
++      0,
++  };
++  shard_local_t *local = NULL;
++  shard_local_t *entrylk_local = NULL;
++  shard_entrylk_t *int_entrylk = NULL;
++  call_frame_t *entrylk_frame = NULL;
++  local = frame->local;
++  entrylk_frame = create_frame(this, this->ctx->pool);
++  if (!entrylk_frame) {
++           "Failed to create new frame "
++           "to lock marker file");
++    goto err;
++  }
++  entrylk_local = mem_get0(this->local_pool);
++  if (!entrylk_local) {
++    STACK_DESTROY(entrylk_frame->root);
++    goto err;
++  }
++  entrylk_frame->local = entrylk_local;
++  entrylk_local->main_frame = frame;
++  int_entrylk = &entrylk_local->int_entrylk;
++  int_entrylk->loc.inode = inode_ref(inode);
++  set_lk_owner_from_ptr(&entrylk_frame->root->lk_owner, entrylk_frame->root);
++  local->entrylk_frame = entrylk_frame;
++  gf_uuid_unparse(gfid, gfid_str);
++  int_entrylk->basename = gf_strdup(gfid_str);
++  STACK_WIND(entrylk_frame, shard_acquire_entrylk_cbk, FIRST_CHILD(this),
++             FIRST_CHILD(this)->fops->entrylk, this->name, &int_entrylk->loc,
++             int_entrylk->basename, ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
++  return 0;
+ err:
+-    shard_common_failure_unwind(GF_FOP_LINK, frame, op_ret, op_errno);
+-    return 0;
+-shard_post_lookup_link_handler(call_frame_t *frame, xlator_t *this)
+-    shard_local_t *local = NULL;
+-    local = frame->local;
+-    if (local->op_ret < 0) {
+-        SHARD_STACK_UNWIND(link, frame, local->op_ret, local->op_errno, NULL,
+-                           NULL, NULL, NULL, NULL);
+-        return 0;
+-    }
+-    STACK_WIND(frame, shard_link_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->link, &local->loc, &local->loc2,
+-               local->xattr_req);
+-    return 0;
++  shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
++  return 0;
+ }
+-shard_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+-           dict_t *xdata)
+-    int ret = -1;
+-    uint64_t block_size = 0;
+-    shard_local_t *local = NULL;
+-    ret = shard_inode_ctx_get_block_size(oldloc->inode, this, &block_size);
+-    if (ret) {
+-        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+-               "Failed to get block "
+-               "size from inode ctx of %s",
+-               uuid_utoa(oldloc->inode->gfid));
+-        goto err;
+-    }
+-    if (!block_size) {
+-        STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->link,
+-                        oldloc, newloc, xdata);
+-        return 0;
+-    }
+-    if (!this->itable)
+-        this->itable = oldloc->inode->table;
+-    local = mem_get0(this->local_pool);
+-    if (!local)
+-        goto err;
+-    frame->local = local;
+-    loc_copy(&local->loc, oldloc);
+-    loc_copy(&local->loc2, newloc);
+-    local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+-    if (!local->xattr_req)
+-        goto err;
+-    shard_lookup_base_file(frame, this, &local->loc,
+-                           shard_post_lookup_link_handler);
+-    return 0;
+-    shard_common_failure_unwind(GF_FOP_LINK, frame, -1, ENOMEM);
+-    return 0;
+-shard_unlink_shards_do(call_frame_t *frame, xlator_t *this, inode_t *inode);
+-shard_post_lookup_shards_unlink_handler(call_frame_t *frame, xlator_t *this)
+-    shard_local_t *local = NULL;
+-    local = frame->local;
+-    if ((local->op_ret < 0) && (local->op_errno != ENOENT)) {
+-        gf_msg(this->name, GF_LOG_ERROR, local->op_errno, SHARD_MSG_FOP_FAILED,
+-               "failed to delete shards of %s",
+-               uuid_utoa(local->resolver_base_inode->gfid));
+-        return 0;
+-    }
+-    local->op_ret = 0;
+-    local->op_errno = 0;
+-    shard_unlink_shards_do(frame, this, local->resolver_base_inode);
+-    return 0;
+-shard_post_resolve_unlink_handler(call_frame_t *frame, xlator_t *this)
+-    shard_local_t *local = NULL;
+-    local = frame->local;
+-    local->lookup_shards_barriered = _gf_true;
+-    if (!local->call_count)
+-        shard_unlink_shards_do(frame, this, local->resolver_base_inode);
+-    else
+-        shard_common_lookup_shards(frame, this, local->resolver_base_inode,
+-                                   shard_post_lookup_shards_unlink_handler);
+-    return 0;
+-shard_unlink_block_inode(shard_local_t *local, int shard_block_num)
+-    char block_bname[256] = {
+-        0,
+-    };
+-    uuid_t gfid = {
+-        0,
+-    };
+-    inode_t *inode = NULL;
+-    inode_t *base_inode = NULL;
+-    xlator_t *this = NULL;
+-    shard_priv_t *priv = NULL;
+-    shard_inode_ctx_t *ctx = NULL;
+-    shard_inode_ctx_t *base_ictx = NULL;
+-    int unref_base_inode = 0;
+-    int unref_shard_inode = 0;
+-    this = THIS;
+-    priv = this->private;
+-    inode = local->inode_list[shard_block_num - local->first_block];
+-    shard_inode_ctx_get(inode, this, &ctx);
+-    base_inode = ctx->base_inode;
+-    if (base_inode)
+-        gf_uuid_copy(gfid, base_inode->gfid);
+-    else
+-        gf_uuid_copy(gfid, ctx->base_gfid);
+-    shard_make_block_bname(shard_block_num, gfid, block_bname,
+-                           sizeof(block_bname));
+-    LOCK(&priv->lock);
+-    if (base_inode)
+-        LOCK(&base_inode->lock);
+-    LOCK(&inode->lock);
+-    {
+-        __shard_inode_ctx_get(inode, this, &ctx);
+-        if (!list_empty(&ctx->ilist)) {
+-            list_del_init(&ctx->ilist);
+-            priv->inode_count--;
+-            unref_base_inode++;
+-            unref_shard_inode++;
+-            GF_ASSERT(priv->inode_count >= 0);
+-        }
+-        if (ctx->fsync_needed) {
+-            unref_base_inode++;
+-            unref_shard_inode++;
+-            list_del_init(&ctx->to_fsync_list);
+-            if (base_inode) {
+-                __shard_inode_ctx_get(base_inode, this, &base_ictx);
+-                base_ictx->fsync_count--;
+-            }
+-        }
+-    }
+-    UNLOCK(&inode->lock);
+-    if (base_inode)
+-        UNLOCK(&base_inode->lock);
+-    inode_unlink(inode, priv->dot_shard_inode, block_bname);
+-    inode_ref_reduce_by_n(inode, unref_shard_inode);
+-    inode_forget(inode, 0);
+-    if (base_inode && unref_base_inode)
+-        inode_ref_reduce_by_n(base_inode, unref_base_inode);
+-    UNLOCK(&priv->lock);
+-shard_rename_cbk(call_frame_t *frame, xlator_t *this)
+-    shard_local_t *local = NULL;
+-    local = frame->local;
+-    SHARD_STACK_UNWIND(rename, frame, local->op_ret, local->op_errno,
+-                       &local->prebuf, &local->preoldparent,
+-                       &local->postoldparent, &local->prenewparent,
+-                       &local->postnewparent, local->xattr_rsp);
+-    return 0;
+-shard_unlink_cbk(call_frame_t *frame, xlator_t *this)
+-    shard_local_t *local = frame->local;
+-    SHARD_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno,
+-                       &local->preoldparent, &local->postoldparent,
+-                       local->xattr_rsp);
+-    return 0;
+-shard_unlink_shards_do_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                           int32_t op_ret, int32_t op_errno,
+-                           struct iatt *preparent, struct iatt *postparent,
+-                           dict_t *xdata)
+-    int shard_block_num = (long)cookie;
+-    shard_local_t *local = NULL;
+-    local = frame->local;
+-    if (op_ret < 0) {
+-        local->op_ret = op_ret;
+-        local->op_errno = op_errno;
+-        goto done;
+-    }
+-    shard_unlink_block_inode(local, shard_block_num);
+-    syncbarrier_wake(&local->barrier);
+-    return 0;
+-shard_unlink_shards_do(call_frame_t *frame, xlator_t *this, inode_t *inode)
+-    int i = 0;
+-    int ret = -1;
+-    int count = 0;
+-    uint32_t cur_block = 0;
+-    uint32_t cur_block_idx = 0; /*this is idx into inode_list[] array */
+-    char *bname = NULL;
+-    char path[PATH_MAX] = {
+-        0,
+-    };
+-    uuid_t gfid = {
+-        0,
+-    };
+-    loc_t loc = {
+-        0,
+-    };
+-    gf_boolean_t wind_failed = _gf_false;
+-    shard_local_t *local = NULL;
+-    shard_priv_t *priv = NULL;
+-    priv = this->private;
+-    local = frame->local;
+-    if (inode)
+-        gf_uuid_copy(gfid, inode->gfid);
+-    else
+-        gf_uuid_copy(gfid, local->base_gfid);
+-    for (i = 0; i < local->num_blocks; i++) {
+-        if (!local->inode_list[i])
+-            continue;
+-        count++;
+-    }
+-    if (!count) {
+-        /* callcount = 0 implies that all of the shards that need to be
+-         * unlinked are non-existent (in other words the file is full of
+-         * holes).
+-         */
+-        gf_msg_debug(this->name, 0,
+-                     "All shards that need to be "
+-                     "unlinked are non-existent: %s",
+-                     uuid_utoa(gfid));
+-        return 0;
+-    }
+-    SHARD_SET_ROOT_FS_ID(frame, local);
+-    local->barrier.waitfor = count;
+-    cur_block = cur_block_idx + local->first_block;
+-    while (cur_block_idx < local->num_blocks) {
+-        if (!local->inode_list[cur_block_idx])
+-            goto next;
+-        if (wind_failed) {
+-            shard_unlink_shards_do_cbk(frame, (void *)(long)cur_block, this, -1,
+-                                       ENOMEM, NULL, NULL, NULL);
+-            goto next;
+-        }
+-        shard_make_block_abspath(cur_block, gfid, path, sizeof(path));
+-        bname = strrchr(path, '/') + 1;
+-        loc.parent = inode_ref(priv->dot_shard_inode);
+-        ret = inode_path(loc.parent, bname, (char **)&(loc.path));
+-        if (ret < 0) {
+-            gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+-                   "Inode path failed"
+-                   " on %s, base file gfid = %s",
+-                   bname, uuid_utoa(gfid));
+-            local->op_ret = -1;
+-            local->op_errno = ENOMEM;
+-            loc_wipe(&loc);
+-            wind_failed = _gf_true;
+-            shard_unlink_shards_do_cbk(frame, (void *)(long)cur_block, this, -1,
+-                                       ENOMEM, NULL, NULL, NULL);
+-            goto next;
+-        }
+- = strrchr(loc.path, '/');
+-        if (
+-  ;
+-        loc.inode = inode_ref(local->inode_list[cur_block_idx]);
+-        STACK_WIND_COOKIE(frame, shard_unlink_shards_do_cbk,
+-                          (void *)(long)cur_block, FIRST_CHILD(this),
+-                          FIRST_CHILD(this)->fops->unlink, &loc, local->xflag,
+-                          local->xattr_req);
+-        loc_wipe(&loc);
+-    next:
+-        cur_block++;
+-        cur_block_idx++;
+-    }
+-    syncbarrier_wait(&local->barrier, count);
+-    SHARD_UNSET_ROOT_FS_ID(frame, local);
+-    return 0;
+-shard_regulated_shards_deletion(call_frame_t *cleanup_frame, xlator_t *this,
+-                                int now, int first_block, gf_dirent_t *entry)
+-    int i = 0;
+-    int ret = 0;
+-    shard_local_t *local = NULL;
+-    uuid_t gfid = {
+-        0,
+-    };
+-    local = cleanup_frame->local;
+-    local->inode_list = GF_CALLOC(now, sizeof(inode_t *),
+-                                  gf_shard_mt_inode_list);
+-    if (!local->inode_list)
+-        return -ENOMEM;
+-    local->first_block = first_block;
+-    local->last_block = first_block + now - 1;
+-    local->num_blocks = now;
+-    gf_uuid_parse(entry->d_name, gfid);
+-    gf_uuid_copy(local->base_gfid, gfid);
+-    local->resolver_base_inode = inode_find(this->itable, gfid);
+-    local->call_count = 0;
+-    ret = syncbarrier_init(&local->barrier);
+-    if (ret) {
+-        GF_FREE(local->inode_list);
+-        local->inode_list = NULL;
+-        inode_unref(local->resolver_base_inode);
+-        local->resolver_base_inode = NULL;
+-        return -errno;
+-    }
+-    shard_common_resolve_shards(cleanup_frame, this,
+-                                shard_post_resolve_unlink_handler);
+-    for (i = 0; i < local->num_blocks; i++) {
+-        if (local->inode_list[i])
+-            inode_unref(local->inode_list[i]);
+-    }
+-    GF_FREE(local->inode_list);
+-    local->inode_list = NULL;
+-    if (local->op_ret)
+-        ret = -local->op_errno;
+-    syncbarrier_destroy(&local->barrier);
+-    inode_unref(local->resolver_base_inode);
+-    local->resolver_base_inode = NULL;
+-    STACK_RESET(cleanup_frame->root);
+-    return ret;
+-__shard_delete_shards_of_entry(call_frame_t *cleanup_frame, xlator_t *this,
+-                               gf_dirent_t *entry, inode_t *inode)
+-    int ret = 0;
+-    int shard_count = 0;
+-    int first_block = 0;
+-    int now = 0;
+-    uint64_t size = 0;
+-    uint64_t block_size = 0;
+-    uint64_t size_array[4] = {
+-        0,
+-    };
+-    void *bsize = NULL;
+-    void *size_attr = NULL;
+-    dict_t *xattr_rsp = NULL;
+-    loc_t loc = {
+-        0,
+-    };
+-    shard_local_t *local = NULL;
+-    shard_priv_t *priv = NULL;
+-    priv = this->private;
+-    local = cleanup_frame->local;
+-    ret = dict_reset(local->xattr_req);
+-    if (ret) {
+-        gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
+-               "Failed to reset dict");
+-        ret = -ENOMEM;
+-        goto err;
+-    }
+-    ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0);
+-    if (ret) {
+-        gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
+-               "Failed to set dict value: key:%s", GF_XATTR_SHARD_BLOCK_SIZE);
+-        ret = -ENOMEM;
+-        goto err;
+-    }
+-    ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_FILE_SIZE, 8 * 4);
+-    if (ret) {
+-        gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
+-               "Failed to set dict value: key:%s", GF_XATTR_SHARD_FILE_SIZE);
+-        ret = -ENOMEM;
+-        goto err;
+-    }
+-    loc.inode = inode_ref(inode);
+-    loc.parent = inode_ref(priv->dot_shard_rm_inode);
+-    ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path));
+-    if (ret < 0) {
+-        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+-               "Inode path  failed on %s", entry->d_name);
+-        ret = -ENOMEM;
+-        goto err;
+-    }
+- = strrchr(loc.path, '/');
+-    if (
+-    ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, local->xattr_req,
+-                        &xattr_rsp);
+-    if (ret)
+-        goto err;
+-    ret = dict_get_ptr(xattr_rsp, GF_XATTR_SHARD_BLOCK_SIZE, &bsize);
+-    if (ret) {
+-        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
+-               "Failed to get dict value: key:%s", GF_XATTR_SHARD_BLOCK_SIZE);
+-        goto err;
+-    }
+-    block_size = ntoh64(*((uint64_t *)bsize));
+-    ret = dict_get_ptr(xattr_rsp, GF_XATTR_SHARD_FILE_SIZE, &size_attr);
+-    if (ret) {
+-        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
+-               "Failed to get dict value: key:%s", GF_XATTR_SHARD_FILE_SIZE);
+-        goto err;
+-    }
+-    memcpy(size_array, size_attr, sizeof(size_array));
+-    size = ntoh64(size_array[0]);
+-    shard_count = (size / block_size) - 1;
+-    if (shard_count < 0) {
+-        gf_msg_debug(this->name, 0,
+-                     "Size of %s hasn't grown beyond "
+-                     "its shard-block-size. Nothing to delete. "
+-                     "Returning",
+-                     entry->d_name);
+-        /* File size < shard-block-size, so nothing to delete */
+-        ret = 0;
+-        goto delete_marker;
+-    }
+-    if ((size % block_size) > 0)
+-        shard_count++;
+-    if (shard_count == 0) {
+-        gf_msg_debug(this->name, 0,
+-                     "Size of %s is exactly equal to "
+-                     "its shard-block-size. Nothing to delete. "
+-                     "Returning",
+-                     entry->d_name);
+-        ret = 0;
+-        goto delete_marker;
+-    }
+-    gf_msg_debug(this->name, 0,
+-                 "base file = %s, "
+-                 "shard-block-size=%" PRIu64 ", file-size=%" PRIu64
+-                 ", "
+-                 "shard_count=%d",
+-                 entry->d_name, block_size, size, shard_count);
+-    /* Perform a gfid-based lookup to see if gfid corresponding to marker
+-     * file's base name exists.
+-     */
+-    loc_wipe(&loc);
+-    loc.inode = inode_new(this->itable);
+-    if (!loc.inode) {
+-        ret = -ENOMEM;
+-        goto err;
+-    }
+-    gf_uuid_parse(entry->d_name, loc.gfid);
+-    ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, NULL, NULL);
+-    if (!ret) {
+-        gf_msg_debug(this->name, 0,
+-                     "Base shard corresponding to gfid "
+-                     "%s is present. Skipping shard deletion. "
+-                     "Returning",
+-                     entry->d_name);
+-        ret = 0;
+-        goto delete_marker;
+-    }
+-    first_block = 1;
+-    while (shard_count) {
+-        if (shard_count < local->deletion_rate) {
+-            now = shard_count;
+-            shard_count = 0;
+-        } else {
+-            now = local->deletion_rate;
+-            shard_count -= local->deletion_rate;
+-        }
+-        gf_msg_debug(this->name, 0,
+-                     "deleting %d shards starting from "
+-                     "block %d of gfid %s",
+-                     now, first_block, entry->d_name);
+-        ret = shard_regulated_shards_deletion(cleanup_frame, this, now,
+-                                              first_block, entry);
+-        if (ret)
+-            goto err;
+-        first_block += now;
+-    }
+-    loc_wipe(&loc);
+-    loc.inode = inode_ref(inode);
+-    loc.parent = inode_ref(priv->dot_shard_rm_inode);
+-    ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path));
+-    if (ret < 0) {
+-        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+-               "Inode path  failed on %s", entry->d_name);
+-        ret = -ENOMEM;
+-        goto err;
+-    }
+- = strrchr(loc.path, '/');
+-    if (
+-    ret = syncop_unlink(FIRST_CHILD(this), &loc, NULL, NULL);
+-    if (ret)
+-        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_SHARDS_DELETION_FAILED,
+-               "Failed to delete %s "
+-               "from /%s",
+-               entry->d_name, GF_SHARD_REMOVE_ME_DIR);
+-    if (xattr_rsp)
+-        dict_unref(xattr_rsp);
+-    loc_wipe(&loc);
+-    return ret;
+-shard_delete_shards_of_entry(call_frame_t *cleanup_frame, xlator_t *this,
+-                             gf_dirent_t *entry, inode_t *inode)
+-    int ret = -1;
+-    loc_t loc = {
+-        0,
+-    };
+-    shard_priv_t *priv = NULL;
+-    priv = this->private;
+-    loc.inode = inode_ref(priv->dot_shard_rm_inode);
+-    ret = syncop_entrylk(FIRST_CHILD(this), this->name, &loc, entry->d_name,
+-                         ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL, NULL);
+-    if (ret < 0) {
+-        if (ret == -EAGAIN) {
+-            ret = 0;
+-        }
+-        goto out;
+-    }
+-    {
+-        ret = __shard_delete_shards_of_entry(cleanup_frame, this, entry, inode);
+-    }
+-    syncop_entrylk(FIRST_CHILD(this), this->name, &loc, entry->d_name,
+-    loc_wipe(&loc);
+-    return ret;
+-shard_delete_shards_cbk(int ret, call_frame_t *frame, void *data)
+-    return 0;
+-shard_resolve_internal_dir(xlator_t *this, shard_local_t *local,
+-                           shard_internal_dir_type_t type)
+-    int ret = 0;
+-    char *bname = NULL;
+-    loc_t *loc = NULL;
+-    shard_priv_t *priv = NULL;
+-    uuid_t gfid = {
+-        0,
+-    };
+-    struct iatt stbuf = {
+-        0,
+-    };
+-    priv = this->private;
+-    switch (type) {
+-            loc = &local->dot_shard_loc;
+-            gf_uuid_copy(gfid, priv->dot_shard_gfid);
+-            bname = GF_SHARD_DIR;
+-            break;
+-            loc = &local->dot_shard_rm_loc;
+-            gf_uuid_copy(gfid, priv->dot_shard_rm_gfid);
+-            bname = GF_SHARD_REMOVE_ME_DIR;
+-            break;
+-        default:
+-            break;
+-    }
+-    loc->inode = inode_find(this->itable, gfid);
+-    if (!loc->inode) {
+-        ret = shard_init_internal_dir_loc(this, local, type);
+-        if (ret)
+-            goto err;
+-        ret = dict_reset(local->xattr_req);
+-        if (ret) {
+-            gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
+-                   "Failed to reset "
+-                   "dict");
+-            ret = -ENOMEM;
+-            goto err;
+-        }
+-        ret = dict_set_gfuuid(local->xattr_req, "gfid-req", gfid, true);
+-        ret = syncop_lookup(FIRST_CHILD(this), loc, &stbuf, NULL,
+-                            local->xattr_req, NULL);
+-        if (ret < 0) {
+-            if (ret != -ENOENT)
+-                gf_msg(this->name, GF_LOG_ERROR, -ret,
+-                       SHARD_MSG_SHARDS_DELETION_FAILED,
+-                       "Lookup on %s failed, exiting", bname);
+-            goto err;
+-        } else {
+-            shard_link_internal_dir_inode(local, loc->inode, &stbuf, type);
+-        }
+-    }
+-    ret = 0;
+-    return ret;
+-shard_lookup_marker_entry(xlator_t *this, shard_local_t *local,
+-                          gf_dirent_t *entry)
+-    int ret = 0;
+-    loc_t loc = {
+-        0,
+-    };
+-    loc.inode = inode_new(this->itable);
+-    if (!loc.inode) {
+-        ret = -ENOMEM;
+-        goto err;
+-    }
+-    loc.parent = inode_ref(local->fd->inode);
+-    ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path));
+-    if (ret < 0) {
+-        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+-               "Inode path failed on %s", entry->d_name);
+-        ret = -ENOMEM;
+-        goto err;
+-    }
+- = strrchr(loc.path, '/');
+-    if (
+-    ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, NULL, NULL);
+-    if (ret < 0) {
+-        goto err;
+-    }
+-    entry->inode = inode_ref(loc.inode);
+-    ret = 0;
+-    loc_wipe(&loc);
+-    return ret;
+-shard_delete_shards(void *opaque)
+-    int ret = 0;
+-    off_t offset = 0;
+-    loc_t loc = {
+-        0,
+-    };
+-    inode_t *link_inode = NULL;
+-    xlator_t *this = NULL;
+-    shard_priv_t *priv = NULL;
+-    shard_local_t *local = NULL;
+-    gf_dirent_t entries;
+-    gf_dirent_t *entry = NULL;
+-    call_frame_t *cleanup_frame = NULL;
+-    gf_boolean_t done = _gf_false;
+-    this = THIS;
+-    priv = this->private;
+-    INIT_LIST_HEAD(&entries.list);
+-    cleanup_frame = opaque;
+-    local = mem_get0(this->local_pool);
+-    if (!local) {
+-               "Failed to create local to "
+-               "delete shards");
+-        ret = -ENOMEM;
+-        goto err;
+-    }
+-    cleanup_frame->local = local;
+-    local->fop = GF_FOP_UNLINK;
+-    local->xattr_req = dict_new();
+-    if (!local->xattr_req) {
+-        ret = -ENOMEM;
+-        goto err;
+-    }
+-    local->deletion_rate = priv->deletion_rate;
+-    ret = shard_resolve_internal_dir(this, local, SHARD_INTERNAL_DIR_DOT_SHARD);
+-    if (ret == -ENOENT) {
+-        gf_msg_debug(this->name, 0,
+-                     ".shard absent. Nothing to"
+-                     " delete. Exiting");
+-        ret = 0;
+-        goto err;
+-    } else if (ret < 0) {
+-        goto err;
+-    }
+-    ret = shard_resolve_internal_dir(this, local,
+-                                     SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME);
+-    if (ret == -ENOENT) {
+-        gf_msg_debug(this->name, 0,
+-                     ".remove_me absent. "
+-                     "Nothing to delete. Exiting");
+-        ret = 0;
+-        goto err;
+-    } else if (ret < 0) {
+-        goto err;
+-    }
+-    local->fd = fd_anonymous(local->dot_shard_rm_loc.inode);
+-    if (!local->fd) {
+-        ret = -ENOMEM;
+-        goto err;
+-    }
+-    for (;;) {
+-        offset = 0;
+-        LOCK(&priv->lock);
+-        {
+-            if (priv->bg_del_state == SHARD_BG_DELETION_LAUNCHING) {
+-                priv->bg_del_state = SHARD_BG_DELETION_IN_PROGRESS;
+-            } else if (priv->bg_del_state == SHARD_BG_DELETION_IN_PROGRESS) {
+-                priv->bg_del_state = SHARD_BG_DELETION_NONE;
+-                done = _gf_true;
+-            }
+-        }
+-        UNLOCK(&priv->lock);
+-        if (done)
+-            break;
+-        while (
+-            (ret = syncop_readdirp(FIRST_CHILD(this), local->fd, 131072, offset,
+-                                   &entries, local->xattr_req, NULL))) {
+-            if (ret > 0)
+-                ret = 0;
+-            list_for_each_entry(entry, &entries.list, list)
+-            {
+-                offset = entry->d_off;
+-                if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, ".."))
+-                    continue;
+-                if (!entry->inode) {
+-                    ret = shard_lookup_marker_entry(this, local, entry);
+-                    if (ret < 0)
+-                        continue;
+-                }
+-                link_inode = inode_link(entry->inode, local->fd->inode,
+-                                        entry->d_name, &entry->d_stat);
+-                gf_msg_debug(this->name, 0,
+-                             "Initiating deletion of "
+-                             "shards of gfid %s",
+-                             entry->d_name);
+-                ret = shard_delete_shards_of_entry(cleanup_frame, this, entry,
+-                                                   link_inode);
+-                inode_unlink(link_inode, local->fd->inode, entry->d_name);
+-                inode_unref(link_inode);
+-                if (ret) {
+-                    gf_msg(this->name, GF_LOG_ERROR, -ret,
+-                           SHARD_MSG_SHARDS_DELETION_FAILED,
+-                           "Failed to clean up shards of gfid %s",
+-                           entry->d_name);
+-                    continue;
+-                }
+-                gf_msg(this->name, GF_LOG_INFO, 0,
+-                       SHARD_MSG_SHARD_DELETION_COMPLETED,
+-                       "Deleted "
+-                       "shards of gfid=%s from backend",
+-                       entry->d_name);
+-            }
+-            gf_dirent_free(&entries);
+-            if (ret)
+-                break;
+-        }
+-    }
+-    ret = 0;
+-    loc_wipe(&loc);
+-    return ret;
+-    LOCK(&priv->lock);
+-    {
+-        priv->bg_del_state = SHARD_BG_DELETION_NONE;
+-    }
+-    UNLOCK(&priv->lock);
+-    loc_wipe(&loc);
+-    return ret;
+-shard_unlock_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                         int32_t op_ret, int32_t op_errno, dict_t *xdata)
+-    if (op_ret)
+-        gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED,
+-               "Unlock failed. Please check brick logs for "
+-               "more details");
+-    return 0;
+-shard_unlock_inodelk(call_frame_t *frame, xlator_t *this)
+-    loc_t *loc = NULL;
+-    call_frame_t *lk_frame = NULL;
+-    shard_local_t *local = NULL;
+-    shard_local_t *lk_local = NULL;
+-    shard_inodelk_t *lock = NULL;
+-    local = frame->local;
+-    lk_frame = local->inodelk_frame;
+-    lk_local = lk_frame->local;
+-    local->inodelk_frame = NULL;
+-    loc = &local->int_inodelk.loc;
+-    lock = &lk_local->int_inodelk;
+-    lock->flock.l_type = F_UNLCK;
+-    STACK_WIND(lk_frame, shard_unlock_inodelk_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->inodelk, lock->domain, loc, F_SETLK,
+-               &lock->flock, NULL);
+-    local->int_inodelk.acquired_lock = _gf_false;
+-    return 0;
+-shard_rename_src_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                     int32_t op_ret, int32_t op_errno, struct iatt *buf,
+-                     struct iatt *preoldparent, struct iatt *postoldparent,
+-                     struct iatt *prenewparent, struct iatt *postnewparent,
+-                     dict_t *xdata);
+-shard_rename_src_base_file(call_frame_t *frame, xlator_t *this)
+-    int ret = 0;
+-    loc_t *dst_loc = NULL;
+-    loc_t tmp_loc = {
+-        0,
+-    };
+-    shard_local_t *local = frame->local;
+-    if (local->dst_block_size) {
+-        tmp_loc.parent = inode_ref(local->loc2.parent);
+-        ret = inode_path(tmp_loc.parent, local->,
+-                         (char **)&tmp_loc.path);
+-        if (ret < 0) {
+-            gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+-                   "Inode path failed"
+-                   " on pargfid=%s bname=%s",
+-                   uuid_utoa(tmp_loc.parent->gfid), local->;
+-            local->op_ret = -1;
+-            local->op_errno = ENOMEM;
+-            goto err;
+-        }
+- = strrchr(tmp_loc.path, '/');
+-        if (
+-  ;
+-        dst_loc = &tmp_loc;
+-    } else {
+-        dst_loc = &local->loc2;
+-    }
+-    /* To-Do: Request open-fd count on dst base file */
+-    STACK_WIND(frame, shard_rename_src_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->rename, &local->loc, dst_loc,
+-               local->xattr_req);
+-    loc_wipe(&tmp_loc);
+-    return 0;
+-    loc_wipe(&tmp_loc);
+-    shard_common_failure_unwind(local->fop, frame, local->op_ret,
+-                                local->op_errno);
+-    return 0;
+-shard_unlink_base_file(call_frame_t *frame, xlator_t *this);
+-shard_set_size_attrs_on_marker_file_cbk(call_frame_t *frame, void *cookie,
+-                                        xlator_t *this, int32_t op_ret,
+-                                        int32_t op_errno, dict_t *dict,
+-                                        dict_t *xdata)
+-    shard_priv_t *priv = NULL;
+-    shard_local_t *local = NULL;
+-    priv = this->private;
+-    local = frame->local;
+-    if (op_ret < 0) {
+-        gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED,
+-               "Xattrop on marker file failed "
+-               "while performing %s; entry gfid=%s",
+-               gf_fop_string(local->fop), local->;
+-        goto err;
+-    }
+-    inode_unlink(local->newloc.inode, priv->dot_shard_rm_inode,
+-                 local->;
+-    if (local->fop == GF_FOP_UNLINK)
+-        shard_unlink_base_file(frame, this);
+-    else if (local->fop == GF_FOP_RENAME)
+-        shard_rename_src_base_file(frame, this);
+-    return 0;
+-    shard_common_failure_unwind(local->fop, frame, op_ret, op_errno);
+-    return 0;
+-shard_set_size_attrs_on_marker_file(call_frame_t *frame, xlator_t *this)
+-    int op_errno = ENOMEM;
+-    uint64_t bs = 0;
+-    dict_t *xdata = NULL;
+-    shard_local_t *local = NULL;
+-    local = frame->local;
+-    xdata = dict_new();
+-    if (!xdata)
+-        goto err;
+-    if (local->fop == GF_FOP_UNLINK)
+-        bs = local->block_size;
+-    else if (local->fop == GF_FOP_RENAME)
+-        bs = local->dst_block_size;
+-    SHARD_INODE_CREATE_INIT(this, bs, xdata, &local->newloc,
+-                            local->prebuf.ia_size, 0, err);
+-    STACK_WIND(frame, shard_set_size_attrs_on_marker_file_cbk,
+-               FIRST_CHILD(this), FIRST_CHILD(this)->fops->xattrop,
+-               &local->newloc, GF_XATTROP_GET_AND_SET, xdata, NULL);
+-    dict_unref(xdata);
+-    return 0;
+-    if (xdata)
+-        dict_unref(xdata);
+-    shard_common_failure_unwind(local->fop, frame, -1, op_errno);
+-    return 0;
+-shard_lookup_marker_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                             int32_t op_ret, int32_t op_errno, inode_t *inode,
+-                             struct iatt *buf, dict_t *xdata,
+-                             struct iatt *postparent)
+-    inode_t *linked_inode = NULL;
+-    shard_priv_t *priv = NULL;
+-    shard_local_t *local = NULL;
+-    local = frame->local;
+-    priv = this->private;
+-    if (op_ret < 0) {
+-        gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED,
+-               "Lookup on marker file failed "
+-               "while performing %s; entry gfid=%s",
+-               gf_fop_string(local->fop), local->;
+-        goto err;
+-    }
+-    linked_inode = inode_link(inode, priv->dot_shard_rm_inode,
+-                              local->, buf);
+-    inode_unref(local->newloc.inode);
+-    local->newloc.inode = linked_inode;
+-    shard_set_size_attrs_on_marker_file(frame, this);
+-    return 0;
+-    shard_common_failure_unwind(local->fop, frame, op_ret, op_errno);
+-    return 0;
+-shard_lookup_marker_file(call_frame_t *frame, xlator_t *this)
+-    int op_errno = ENOMEM;
+-    dict_t *xattr_req = NULL;
+-    shard_local_t *local = NULL;
+-    local = frame->local;
+-    xattr_req = shard_create_gfid_dict(local->xattr_req);
+-    if (!xattr_req)
+-        goto err;
+-    STACK_WIND(frame, shard_lookup_marker_file_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->lookup, &local->newloc, xattr_req);
+-    dict_unref(xattr_req);
+-    return 0;
+-    shard_common_failure_unwind(local->fop, frame, -1, op_errno);
+-    return 0;
+-    call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+-    int32_t op_errno, inode_t *inode, struct iatt *buf, struct iatt *preparent,
+-    struct iatt *postparent, dict_t *xdata)
+-    inode_t *linked_inode = NULL;
+-    shard_priv_t *priv = NULL;
+-    shard_local_t *local = NULL;
+-    local = frame->local;
+-    priv = this->private;
+-    SHARD_UNSET_ROOT_FS_ID(frame, local);
+-    if (op_ret < 0) {
+-        if ((op_errno != EEXIST) && (op_errno != ENODATA)) {
+-            local->op_ret = op_ret;
+-            local->op_errno = op_errno;
+-            gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED,
+-                   "Marker file creation "
+-                   "failed while performing %s; entry gfid=%s",
+-                   gf_fop_string(local->fop), local->;
+-            goto err;
+-        } else {
+-            shard_lookup_marker_file(frame, this);
+-            return 0;
+-        }
+-    }
+-    linked_inode = inode_link(inode, priv->dot_shard_rm_inode,
+-                              local->, buf);
+-    inode_unref(local->newloc.inode);
+-    local->newloc.inode = linked_inode;
+-    if (local->fop == GF_FOP_UNLINK)
+-        shard_unlink_base_file(frame, this);
+-    else if (local->fop == GF_FOP_RENAME)
+-        shard_rename_src_base_file(frame, this);
+-    return 0;
+-    shard_common_failure_unwind(local->fop, frame, -1, local->op_errno);
+-    return 0;
+-shard_create_marker_file_under_remove_me(call_frame_t *frame, xlator_t *this,
+-                                         loc_t *loc)
+-    int ret = 0;
+-    int op_errno = ENOMEM;
+-    uint64_t bs = 0;
+-    char g1[64] = {
+-        0,
+-    };
+-    char g2[64] = {
+-        0,
+-    };
+-    dict_t *xattr_req = NULL;
+-    shard_priv_t *priv = NULL;
+-    shard_local_t *local = NULL;
+-    priv = this->private;
+-    local = frame->local;
+-    SHARD_SET_ROOT_FS_ID(frame, local);
+-    xattr_req = shard_create_gfid_dict(local->xattr_req);
+-    if (!xattr_req)
+-        goto err;
+-    local->newloc.inode = inode_new(this->itable);
+-    local->newloc.parent = inode_ref(priv->dot_shard_rm_inode);
+-    ret = inode_path(local->newloc.parent, uuid_utoa(loc->inode->gfid),
+-                     (char **)&local->newloc.path);
+-    if (ret < 0) {
+-        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+-               "Inode path failed on "
+-               "pargfid=%s bname=%s",
+-               uuid_utoa_r(priv->dot_shard_rm_gfid, g1),
+-               uuid_utoa_r(loc->inode->gfid, g2));
+-        goto err;
+-    }
+-    local-> = strrchr(local->newloc.path, '/');
+-    if (local->
+-        local->;
+-    if (local->fop == GF_FOP_UNLINK)
+-        bs = local->block_size;
+-    else if (local->fop == GF_FOP_RENAME)
+-        bs = local->dst_block_size;
+-    SHARD_INODE_CREATE_INIT(this, bs, xattr_req, &local->newloc,
+-                            local->prebuf.ia_size, 0, err);
+-    STACK_WIND(frame, shard_create_marker_file_under_remove_me_cbk,
+-               FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod,
+-               &local->newloc, 0, 0, 0644, xattr_req);
+-    dict_unref(xattr_req);
+-    return 0;
+-    if (xattr_req)
+-        dict_unref(xattr_req);
+-    shard_create_marker_file_under_remove_me_cbk(frame, 0, this, -1, op_errno,
+-                                                 NULL, NULL, NULL, NULL, NULL);
+-    return 0;
+-shard_unlock_entrylk(call_frame_t *frame, xlator_t *this);
+-shard_unlink_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                           int32_t op_ret, int32_t op_errno,
+-                           struct iatt *preparent, struct iatt *postparent,
+-                           dict_t *xdata)
+-    int ret = 0;
+-    shard_local_t *local = NULL;
+-    local = frame->local;
+-    if (op_ret < 0) {
+-        local->op_ret = op_ret;
+-        local->op_errno = op_errno;
+-    } else {
+-        local->preoldparent = *preparent;
+-        local->postoldparent = *postparent;
+-        if (xdata)
+-            local->xattr_rsp = dict_ref(xdata);
+-        if (local->cleanup_required)
+-            shard_start_background_deletion(this);
+-    }
+-    if (local->entrylk_frame) {
+-        ret = shard_unlock_entrylk(frame, this);
+-        if (ret < 0) {
+-            local->op_ret = -1;
+-            local->op_errno = -ret;
+-        }
+-    }
+-    ret = shard_unlock_inodelk(frame, this);
+-    if (ret < 0) {
+-        local->op_ret = -1;
+-        local->op_errno = -ret;
+-    }
+-    shard_unlink_cbk(frame, this);
+-    return 0;
+-shard_unlink_base_file(call_frame_t *frame, xlator_t *this)
+-    shard_local_t *local = frame->local;
+-    /* To-Do: Request open-fd count on base file */
+-    STACK_WIND(frame, shard_unlink_base_file_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->unlink, &local->loc, local->xflag,
+-               local->xattr_req);
+-    return 0;
+-shard_unlock_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                         int32_t op_ret, int32_t op_errno, dict_t *xdata)
+-    if (op_ret)
+-        gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED,
+-               "Unlock failed. Please check brick logs for "
+-               "more details");
+-    return 0;
+-shard_unlock_entrylk(call_frame_t *frame, xlator_t *this)
+-    loc_t *loc = NULL;
+-    call_frame_t *lk_frame = NULL;
+-    shard_local_t *local = NULL;
+-    shard_local_t *lk_local = NULL;
+-    shard_entrylk_t *lock = NULL;
+-    local = frame->local;
+-    lk_frame = local->entrylk_frame;
+-    lk_local = lk_frame->local;
+-    local->entrylk_frame = NULL;
+-    lock = &lk_local->int_entrylk;
+-    loc = &lock->loc;
+-    STACK_WIND(lk_frame, shard_unlock_entrylk_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->entrylk, this->name, loc,
+-               lk_local->int_entrylk.basename, ENTRYLK_UNLOCK, ENTRYLK_WRLCK,
+-               NULL);
+-    local->int_entrylk.acquired_lock = _gf_false;
+-    return 0;
+-shard_post_entrylk_fop_handler(call_frame_t *frame, xlator_t *this)
+-    shard_local_t *local = NULL;
+-    local = frame->local;
+-    switch (local->fop) {
+-        case GF_FOP_UNLINK:
+-        case GF_FOP_RENAME:
+-            shard_create_marker_file_under_remove_me(frame, this,
+-                                                     &local->int_inodelk.loc);
+-            break;
+-        default:
+-            gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
+-                   "post-entrylk handler not defined. This case should not"
+-                   " be hit");
+-            break;
+-    }
+-    return 0;
+-shard_acquire_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                          int32_t op_ret, int32_t op_errno, dict_t *xdata)
+-    call_frame_t *main_frame = NULL;
+-    shard_local_t *local = NULL;
+-    shard_local_t *main_local = NULL;
+-    local = frame->local;
+-    main_frame = local->main_frame;
+-    main_local = main_frame->local;
+-    if (local->op_ret < 0) {
+-        shard_common_failure_unwind(main_local->fop, main_frame, op_ret,
+-                                    op_errno);
+-        return 0;
+-    }
+-    main_local->int_entrylk.acquired_lock = _gf_true;
+-    shard_post_entrylk_fop_handler(main_frame, this);
+-    return 0;
+-shard_acquire_entrylk(call_frame_t *frame, xlator_t *this, inode_t *inode,
+-                      uuid_t gfid)
+-    char gfid_str[GF_UUID_BUF_SIZE] = {
+-        0,
+-    };
+-    shard_local_t *local = NULL;
+-    shard_local_t *entrylk_local = NULL;
+-    shard_entrylk_t *int_entrylk = NULL;
+-    call_frame_t *entrylk_frame = NULL;
+-    local = frame->local;
+-    entrylk_frame = create_frame(this, this->ctx->pool);
+-    if (!entrylk_frame) {
+-               "Failed to create new frame "
+-               "to lock marker file");
+-        goto err;
+-    }
+-    entrylk_local = mem_get0(this->local_pool);
+-    if (!entrylk_local) {
+-        STACK_DESTROY(entrylk_frame->root);
+-        goto err;
+-    }
+-    entrylk_frame->local = entrylk_local;
+-    entrylk_local->main_frame = frame;
+-    int_entrylk = &entrylk_local->int_entrylk;
+-    int_entrylk->loc.inode = inode_ref(inode);
+-    set_lk_owner_from_ptr(&entrylk_frame->root->lk_owner, entrylk_frame->root);
+-    local->entrylk_frame = entrylk_frame;
+-    gf_uuid_unparse(gfid, gfid_str);
+-    int_entrylk->basename = gf_strdup(gfid_str);
+-    STACK_WIND(entrylk_frame, shard_acquire_entrylk_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->entrylk, this->name, &int_entrylk->loc,
+-               int_entrylk->basename, ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
+-    return 0;
+-    shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
+-    return 0;
+-shard_post_lookup_base_shard_rm_handler(call_frame_t *frame, xlator_t *this)
+-    shard_local_t *local = NULL;
+-    shard_priv_t *priv = NULL;
+-    priv = this->private;
+-    local = frame->local;
+-    if (local->op_ret < 0) {
+-        shard_common_failure_unwind(local->fop, frame, -1, local->op_errno);
+-        return 0;
+-    }
+-    if (local->prebuf.ia_nlink > 1) {
+-        gf_msg_debug(this->name, 0,
+-                     "link count on %s > 1:%d, "
+-                     "performing rename()/unlink()",
+-                     local->int_inodelk.loc.path, local->prebuf.ia_nlink);
+-        if (local->fop == GF_FOP_RENAME)
+-            shard_rename_src_base_file(frame, this);
+-        else if (local->fop == GF_FOP_UNLINK)
+-            shard_unlink_base_file(frame, this);
+-    } else {
+-        gf_msg_debug(this->name, 0,
+-                     "link count on %s = 1, creating "
+-                     "file under .remove_me",
+-                     local->int_inodelk.loc.path);
+-        local->cleanup_required = _gf_true;
+-        shard_acquire_entrylk(frame, this, priv->dot_shard_rm_inode,
+-                              local->prebuf.ia_gfid);
+-    }
+-    return 0;
+-shard_post_inodelk_fop_handler(call_frame_t *frame, xlator_t *this)
+-    shard_local_t *local = NULL;
+-    local = frame->local;
+-    switch (local->fop) {
+-        case GF_FOP_UNLINK:
+-        case GF_FOP_RENAME:
+-            shard_lookup_base_file(frame, this, &local->int_inodelk.loc,
+-                                   shard_post_lookup_base_shard_rm_handler);
+-            break;
+-        default:
+-            gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
+-                   "post-inodelk handler not defined. This case should not"
+-                   " be hit");
+-            break;
+-    }
+-    return 0;
+-shard_acquire_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                          int32_t op_ret, int32_t op_errno, dict_t *xdata)
+-    call_frame_t *main_frame = NULL;
+-    shard_local_t *local = NULL;
+-    shard_local_t *main_local = NULL;
+-    local = frame->local;
+-    main_frame = local->main_frame;
+-    main_local = main_frame->local;
+-    if (local->op_ret < 0) {
+-        shard_common_failure_unwind(main_local->fop, main_frame, op_ret,
+-                                    op_errno);
+-        return 0;
+-    }
+-    main_local->int_inodelk.acquired_lock = _gf_true;
+-    shard_post_inodelk_fop_handler(main_frame, this);
+-    return 0;
+-shard_acquire_inodelk(call_frame_t *frame, xlator_t *this, loc_t *loc)
+-    call_frame_t *lk_frame = NULL;
+-    shard_local_t *local = NULL;
+-    shard_local_t *lk_local = NULL;
+-    shard_inodelk_t *int_inodelk = NULL;
+-    local = frame->local;
+-    lk_frame = create_frame(this, this->ctx->pool);
+-    if (!lk_frame) {
+-               "Failed to create new frame "
+-               "to lock base shard");
+-        goto err;
+-    }
+-    lk_local = mem_get0(this->local_pool);
+-    if (!lk_local) {
+-        STACK_DESTROY(lk_frame->root);
+-        goto err;
+-    }
+-    lk_frame->local = lk_local;
+-    lk_local->main_frame = frame;
+-    int_inodelk = &lk_local->int_inodelk;
+-    int_inodelk->flock.l_len = 0;
+-    int_inodelk->flock.l_start = 0;
+-    int_inodelk->domain = this->name;
+-    int_inodelk->flock.l_type = F_WRLCK;
+-    loc_copy(&local->int_inodelk.loc, loc);
+-    set_lk_owner_from_ptr(&lk_frame->root->lk_owner, lk_frame->root);
+-    local->inodelk_frame = lk_frame;
+-    STACK_WIND(lk_frame, shard_acquire_inodelk_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->inodelk, int_inodelk->domain,
+-               &local->int_inodelk.loc, F_SETLKW, &int_inodelk->flock, NULL);
+-    return 0;
+-    shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
+-    return 0;
+-shard_post_mkdir_rm_handler(call_frame_t *frame, xlator_t *this)
+-    loc_t *loc = NULL;
+-    shard_local_t *local = NULL;
+-    local = frame->local;
+-    if (local->op_ret < 0) {
+-        shard_common_failure_unwind(local->fop, frame, -1, local->op_errno);
+-        return 0;
+-    }
+-    if (local->fop == GF_FOP_UNLINK)
+-        loc = &local->loc;
+-    else if (local->fop == GF_FOP_RENAME)
+-        loc = &local->loc2;
+-    shard_acquire_inodelk(frame, this, loc);
+-    return 0;
+-shard_mkdir_internal_dir(call_frame_t *frame, xlator_t *this,
+-                         shard_post_resolve_fop_handler_t handler,
+-                         shard_internal_dir_type_t type);
+-shard_pre_mkdir_rm_handler(call_frame_t *frame, xlator_t *this)
+-    shard_local_t *local = NULL;
+-    local = frame->local;
+-    if (local->op_ret < 0) {
+-        shard_common_failure_unwind(local->fop, frame, -1, local->op_errno);
+-        return 0;
+-    }
+-    shard_mkdir_internal_dir(frame, this, shard_post_mkdir_rm_handler,
+-                             SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME);
+-    return 0;
+-shard_begin_rm_resolution(call_frame_t *frame, xlator_t *this)
+-    shard_priv_t *priv = NULL;
+-    shard_local_t *local = NULL;
+-    priv = this->private;
+-    local = frame->local;
+-    local->dot_shard_rm_loc.inode = inode_find(this->itable,
+-                                               priv->dot_shard_rm_gfid);
+-    if (!local->dot_shard_rm_loc.inode) {
+-        local->dot_shard_loc.inode = inode_find(this->itable,
+-                                                priv->dot_shard_gfid);
+-        if (!local->dot_shard_loc.inode) {
+-            shard_mkdir_internal_dir(frame, this, shard_pre_mkdir_rm_handler,
+-                                     SHARD_INTERNAL_DIR_DOT_SHARD);
+-        } else {
+-            local->post_res_handler = shard_pre_mkdir_rm_handler;
+-            shard_refresh_internal_dir(frame, this,
+-                                       SHARD_INTERNAL_DIR_DOT_SHARD);
+-        }
+-    } else {
+-        local->post_res_handler = shard_post_mkdir_rm_handler;
+-        shard_refresh_internal_dir(frame, this,
+-                                   SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME);
+-    }
+-shard_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+-             dict_t *xdata)
+-    int ret = -1;
+-    uint64_t block_size = 0;
+-    shard_local_t *local = NULL;
+-    ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size);
+-    if ((ret) && (!IA_ISLNK(loc->inode->ia_type))) {
+-        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+-               "Failed to get block "
+-               "size from inode ctx of %s",
+-               uuid_utoa(loc->inode->gfid));
+-        goto err;
+-    }
+-    if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+-        STACK_WIND(frame, default_unlink_cbk, FIRST_CHILD(this),
+-                   FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
+-        return 0;
+-    }
+-    local = mem_get0(this->local_pool);
+-    if (!local)
+-        goto err;
+-    frame->local = local;
+-    loc_copy(&local->loc, loc);
+-    local->xflag = xflag;
+-    local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+-    local->block_size = block_size;
+-    local->resolver_base_inode = loc->inode;
+-    local->fop = GF_FOP_UNLINK;
+-    if (!this->itable)
+-        this->itable = (local->loc.inode)->table;
+-    local->resolve_not = _gf_true;
+-    shard_begin_rm_resolution(frame, this);
+-    return 0;
+-    shard_common_failure_unwind(GF_FOP_UNLINK, frame, -1, ENOMEM);
+-    return 0;
+-shard_post_rename_lookup_handler(call_frame_t *frame, xlator_t *this)
+-    shard_rename_cbk(frame, this);
+-    return 0;
+-shard_rename_src_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                     int32_t op_ret, int32_t op_errno, struct iatt *buf,
+-                     struct iatt *preoldparent, struct iatt *postoldparent,
+-                     struct iatt *prenewparent, struct iatt *postnewparent,
+-                     dict_t *xdata)
+-    int ret = 0;
+-    shard_local_t *local = NULL;
+-    local = frame->local;
+-    if (op_ret < 0) {
+-        local->op_ret = op_ret;
+-        local->op_errno = op_errno;
+-        goto err;
+-    }
+-    /* Set ctx->refresh to TRUE to force a lookup on disk when
+-     * shard_lookup_base_file() is called next to refresh the hard link
+-     * count in ctx. Note that this is applicable only to the case where
+-     * the rename dst is already existent and sharded.
+-     */
+-    if ((local->dst_block_size) && (!local->cleanup_required))
+-        shard_inode_ctx_set_refresh_flag(local->int_inodelk.loc.inode, this);
+-    local->prebuf = *buf;
+-    local->preoldparent = *preoldparent;
+-    local->postoldparent = *postoldparent;
+-    local->prenewparent = *prenewparent;
+-    local->postnewparent = *postnewparent;
+-    if (xdata)
+-        local->xattr_rsp = dict_ref(xdata);
+-    if (local->dst_block_size) {
+-        if (local->entrylk_frame) {
+-            ret = shard_unlock_entrylk(frame, this);
+-            if (ret < 0) {
+-                local->op_ret = -1;
+-                local->op_errno = -ret;
+-            }
+-        }
+-        ret = shard_unlock_inodelk(frame, this);
+-        if (ret < 0) {
+-            local->op_ret = -1;
+-            local->op_errno = -ret;
+-            goto err;
+-        }
+-        if (local->cleanup_required)
+-            shard_start_background_deletion(this);
+-    }
+-    /* Now the base file of src, if sharded, is looked up to gather ia_size
+-     * and ia_blocks.*/
+-    if (local->block_size) {
+-        local->tmp_loc.inode = inode_new(this->itable);
+-        gf_uuid_copy(local->tmp_loc.gfid, (local->loc.inode)->gfid);
+-        shard_lookup_base_file(frame, this, &local->tmp_loc,
+-                               shard_post_rename_lookup_handler);
+-    } else {
+-        shard_rename_cbk(frame, this);
+-    }
+-    return 0;
+-    shard_common_failure_unwind(local->fop, frame, local->op_ret,
+-                                local->op_errno);
+-    return 0;
+-shard_post_lookup_dst_base_file_handler(call_frame_t *frame, xlator_t *this)
+-    shard_local_t *local = NULL;
+-    local = frame->local;
+-    if (local->op_ret < 0) {
+-        shard_common_failure_unwind(local->fop, frame, local->op_ret,
+-                                    local->op_errno);
+-        return 0;
+-    }
+-    /* Save dst base file attributes into postbuf so the information is not
+-     * lost when it is overwritten after lookup on base file of src in
+-     * shard_lookup_base_file_cbk().
+-     */
+-    local->postbuf = local->prebuf;
+-    shard_rename_src_base_file(frame, this);
+-    return 0;
+-shard_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+-             dict_t *xdata)
+-    int ret = -1;
+-    uint64_t block_size = 0;
+-    uint64_t dst_block_size = 0;
+-    shard_local_t *local = NULL;
+-    if (IA_ISDIR(oldloc->inode->ia_type)) {
+-        STACK_WIND(frame, default_rename_cbk, FIRST_CHILD(this),
+-                   FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
+-        return 0;
+-    }
+-    ret = shard_inode_ctx_get_block_size(oldloc->inode, this, &block_size);
+-    if ((ret) && (!IA_ISLNK(oldloc->inode->ia_type))) {
+-        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+-               "Failed to get block "
+-               "size from inode ctx of %s",
+-               uuid_utoa(oldloc->inode->gfid));
+-        goto err;
+-    }
+-    if (newloc->inode)
+-        ret = shard_inode_ctx_get_block_size(newloc->inode, this,
+-                                             &dst_block_size);
+-    /* The following stack_wind covers the case where:
+-     * a. the src file is not sharded and dst doesn't exist, OR
+-     * b. the src and dst both exist but are not sharded.
+-     */
+-    if (((!block_size) && (!dst_block_size)) ||
+-        frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+-        STACK_WIND(frame, default_rename_cbk, FIRST_CHILD(this),
+-                   FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
+-        return 0;
+-    }
+-    local = mem_get0(this->local_pool);
+-    if (!local)
+-        goto err;
+-    frame->local = local;
+-    loc_copy(&local->loc, oldloc);
+-    loc_copy(&local->loc2, newloc);
+-    local->resolver_base_inode = newloc->inode;
+-    local->fop = GF_FOP_RENAME;
+-    local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+-    if (!local->xattr_req)
+-        goto err;
+-    local->block_size = block_size;
+-    local->dst_block_size = dst_block_size;
+-    if (!this->itable)
+-        this->itable = (local->loc.inode)->table;
+-    local->resolve_not = _gf_true;
+-    /* The following if-block covers the case where the dst file exists
+-     * and is sharded.
+-     */
+-    if (local->dst_block_size) {
+-        shard_begin_rm_resolution(frame, this);
+-    } else {
+-        /* The following block covers the case where the dst either doesn't
+-         * exist or is NOT sharded but the src is sharded. In this case, shard
+-         * xlator would go ahead and rename src to dst. Once done, it would also
+-         * lookup the base shard of src to get the ia_size and ia_blocks xattr
+-         * values.
+-         */
+-        shard_rename_src_base_file(frame, this);
+-    }
+-    return 0;
+-    shard_common_failure_unwind(GF_FOP_RENAME, frame, -1, ENOMEM);
+-    return 0;
+-shard_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                 int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+-                 struct iatt *stbuf, struct iatt *preparent,
+-                 struct iatt *postparent, dict_t *xdata)
+-    int ret = -1;
+-    shard_local_t *local = NULL;
+-    local = frame->local;
+-    if (op_ret == -1)
+-        goto unwind;
+-    ret = shard_inode_ctx_set(inode, this, stbuf, local->block_size,
+-                              SHARD_ALL_MASK);
+-    if (ret)
+-        gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INODE_CTX_SET_FAILED,
+-               "Failed to set inode "
+-               "ctx for %s",
+-               uuid_utoa(inode->gfid));
+-    SHARD_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, stbuf,
+-                       preparent, postparent, xdata);
+-    return 0;
+-shard_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+-             mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+-    shard_priv_t *priv = NULL;
+-    shard_local_t *local = NULL;
+-    priv = this->private;
+-    local = mem_get0(this->local_pool);
+-    if (!local)
+-        goto err;
+-    frame->local = local;
+-    local->block_size = priv->block_size;
+-    if (!__is_gsyncd_on_shard_dir(frame, loc)) {
+-        SHARD_INODE_CREATE_INIT(this, local->block_size, xdata, loc, 0, 0, err);
+-    }
+-    STACK_WIND(frame, shard_create_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
+-               xdata);
+-    return 0;
+-    shard_common_failure_unwind(GF_FOP_CREATE, frame, -1, ENOMEM);
+-    return 0;
+-shard_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-               int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+-    /* To-Do: Handle open with O_TRUNC under locks */
+-    SHARD_STACK_UNWIND(open, frame, op_ret, op_errno, fd, xdata);
+-    return 0;
+-shard_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+-           fd_t *fd, dict_t *xdata)
+-    STACK_WIND(frame, shard_open_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
+-    return 0;
+-shard_readv_do_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                   int32_t op_ret, int32_t op_errno, struct iovec *vector,
+-                   int32_t count, struct iatt *stbuf, struct iobref *iobref,
+-                   dict_t *xdata)
+-    int i = 0;
+-    int call_count = 0;
+-    void *address = NULL;
+-    uint64_t block_num = 0;
+-    off_t off = 0;
+-    struct iovec vec = {
+-        0,
+-    };
+-    shard_local_t *local = NULL;
+-    fd_t *anon_fd = cookie;
+-    shard_inode_ctx_t *ctx = NULL;
+-    local = frame->local;
+-    /* If shard has already seen a failure here before, there is no point
+-     * in aggregating subsequent reads, so just go to out.
+-     */
+-    if (local->op_ret < 0)
+-        goto out;
+-    if (op_ret < 0) {
+-        local->op_ret = op_ret;
+-        local->op_errno = op_errno;
+-        goto out;
+-    }
+-    if (local->op_ret >= 0)
+-        local->op_ret += op_ret;
+-    shard_inode_ctx_get(anon_fd->inode, this, &ctx);
+-    block_num = ctx->block_num;
+-    if (block_num == local->first_block) {
+-        address = local->iobuf->ptr;
+-    } else {
+-        /* else
+-         * address to start writing to = beginning of buffer +
+-         *                    number of bytes until end of first block +
+-         *                    + block_size times number of blocks
+-         *                    between the current block and the first
+-         */
+-        address = (char *)local->iobuf->ptr +
+-                  (local->block_size - (local->offset % local->block_size)) +
+-                  ((block_num - local->first_block - 1) * local->block_size);
+-    }
+-    for (i = 0; i < count; i++) {
+-        address = (char *)address + off;
+-        memcpy(address, vector[i].iov_base, vector[i].iov_len);
+-        off += vector[i].iov_len;
+-    }
+-    if (anon_fd)
+-        fd_unref(anon_fd);
+-    call_count = shard_call_count_return(frame);
+-    if (call_count == 0) {
+-        SHARD_UNSET_ROOT_FS_ID(frame, local);
+-        if (local->op_ret < 0) {
+-            shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret,
+-                                        local->op_errno);
+-        } else {
+-            if (xdata)
+-                local->xattr_rsp = dict_ref(xdata);
+-            vec.iov_base = local->iobuf->ptr;
+-            vec.iov_len = local->total_size;
+-            local->op_ret = local->total_size;
+-            SHARD_STACK_UNWIND(readv, frame, local->op_ret, local->op_errno,
+-                               &vec, 1, &local->prebuf, local->iobref,
+-                               local->xattr_rsp);
+-            return 0;
+-        }
+-    }
+-    return 0;
+-shard_readv_do(call_frame_t *frame, xlator_t *this)
+-    int i = 0;
+-    int call_count = 0;
+-    int last_block = 0;
+-    int cur_block = 0;
+-    off_t orig_offset = 0;
+-    off_t shard_offset = 0;
+-    size_t read_size = 0;
+-    size_t remaining_size = 0;
+-    fd_t *fd = NULL;
+-    fd_t *anon_fd = NULL;
+-    shard_local_t *local = NULL;
+-    gf_boolean_t wind_failed = _gf_false;
+-    local = frame->local;
+-    fd = local->fd;
+-    orig_offset = local->offset;
+-    cur_block = local->first_block;
+-    last_block = local->last_block;
+-    remaining_size = local->total_size;
+-    local->call_count = call_count = local->num_blocks;
+-    SHARD_SET_ROOT_FS_ID(frame, local);
+-    if (fd->flags & O_DIRECT)
+-        local->flags = O_DIRECT;
+-    while (cur_block <= last_block) {
+-        if (wind_failed) {
+-            shard_readv_do_cbk(frame, (void *)(long)0, this, -1, ENOMEM, NULL,
+-                               0, NULL, NULL, NULL);
+-            goto next;
+-        }
+-        shard_offset = orig_offset % local->block_size;
+-        read_size = local->block_size - shard_offset;
+-        if (read_size > remaining_size)
+-            read_size = remaining_size;
+-        remaining_size -= read_size;
+-        if (cur_block == 0) {
+-            anon_fd = fd_ref(fd);
+-        } else {
+-            anon_fd = fd_anonymous(local->inode_list[i]);
+-            if (!anon_fd) {
+-                local->op_ret = -1;
+-                local->op_errno = ENOMEM;
+-                wind_failed = _gf_true;
+-                shard_readv_do_cbk(frame, (void *)(long)anon_fd, this, -1,
+-                                   ENOMEM, NULL, 0, NULL, NULL, NULL);
+-                goto next;
+-            }
+-        }
++int shard_post_lookup_base_shard_rm_handler(call_frame_t *frame,
++                                            xlator_t *this) {
++  shard_local_t *local = NULL;
++  shard_priv_t *priv = NULL;
+-        STACK_WIND_COOKIE(frame, shard_readv_do_cbk, anon_fd, FIRST_CHILD(this),
+-                          FIRST_CHILD(this)->fops->readv, anon_fd, read_size,
+-                          shard_offset, local->flags, local->xattr_req);
++  priv = this->private;
++  local = frame->local;
+-        orig_offset += read_size;
+-    next:
+-        cur_block++;
+-        i++;
+-        call_count--;
+-    }
++  if (local->op_ret < 0) {
++    shard_common_failure_unwind(local->fop, frame, -1, local->op_errno);
+     return 0;
++  }
++  if (local->prebuf.ia_nlink > 1) {
++    gf_msg_debug(this->name, 0, "link count on %s > 1:%d, "
++                                "performing rename()/unlink()",
++                 local->int_inodelk.loc.path, local->prebuf.ia_nlink);
++    if (local->fop == GF_FOP_RENAME)
++      shard_rename_src_base_file(frame, this);
++    else if (local->fop == GF_FOP_UNLINK)
++      shard_unlink_base_file(frame, this);
++  } else {
++    gf_msg_debug(this->name, 0, "link count on %s = 1, creating "
++                                "file under .remove_me",
++                 local->int_inodelk.loc.path);
++    local->cleanup_required = _gf_true;
++    shard_acquire_entrylk(frame, this, priv->dot_shard_rm_inode,
++                          local->prebuf.ia_gfid);
++  }
++  return 0;
++int shard_post_inodelk_fop_handler(call_frame_t *frame, xlator_t *this) {
++  shard_local_t *local = NULL;
++  local = frame->local;
++  switch (local->fop) {
++  case GF_FOP_UNLINK:
++  case GF_FOP_RENAME:
++    shard_lookup_base_file(frame, this, &local->int_inodelk.loc,
++                           shard_post_lookup_base_shard_rm_handler);
++    break;
++  default:
++    gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
++           "post-inodelk handler not defined. This case should not"
++           " be hit");
++    break;
++  }
++  return 0;
++int shard_acquire_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                              int32_t op_ret, int32_t op_errno, dict_t *xdata) {
++  call_frame_t *main_frame = NULL;
++  shard_local_t *local = NULL;
++  shard_local_t *main_local = NULL;
++  local = frame->local;
++  main_frame = local->main_frame;
++  main_local = main_frame->local;
++  if (local->op_ret < 0) {
++    shard_common_failure_unwind(main_local->fop, main_frame, op_ret, op_errno);
++    return 0;
++  }
++  main_local->int_inodelk.acquired_lock = _gf_true;
++  shard_post_inodelk_fop_handler(main_frame, this);
++  return 0;
++int shard_acquire_inodelk(call_frame_t *frame, xlator_t *this, loc_t *loc) {
++  call_frame_t *lk_frame = NULL;
++  shard_local_t *local = NULL;
++  shard_local_t *lk_local = NULL;
++  shard_inodelk_t *int_inodelk = NULL;
++  local = frame->local;
++  lk_frame = create_frame(this, this->ctx->pool);
++  if (!lk_frame) {
++           "Failed to create new frame "
++           "to lock base shard");
++    goto err;
++  }
++  lk_local = mem_get0(this->local_pool);
++  if (!lk_local) {
++    STACK_DESTROY(lk_frame->root);
++    goto err;
++  }
++  lk_frame->local = lk_local;
++  lk_local->main_frame = frame;
++  int_inodelk = &lk_local->int_inodelk;
++  int_inodelk->flock.l_len = 0;
++  int_inodelk->flock.l_start = 0;
++  int_inodelk->domain = this->name;
++  int_inodelk->flock.l_type = F_WRLCK;
++  loc_copy(&local->int_inodelk.loc, loc);
++  set_lk_owner_from_ptr(&lk_frame->root->lk_owner, lk_frame->root);
++  local->inodelk_frame = lk_frame;
++  STACK_WIND(lk_frame, shard_acquire_inodelk_cbk, FIRST_CHILD(this),
++             FIRST_CHILD(this)->fops->inodelk, int_inodelk->domain,
++             &local->int_inodelk.loc, F_SETLKW, &int_inodelk->flock, NULL);
++  return 0;
++  shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
++  return 0;
+ }
+-shard_common_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                       int32_t op_ret, int32_t op_errno, inode_t *inode,
+-                       struct iatt *buf, struct iatt *preparent,
+-                       struct iatt *postparent, dict_t *xdata)
+-    int shard_block_num = (long)cookie;
+-    int call_count = 0;
+-    shard_local_t *local = NULL;
++int shard_post_mkdir_rm_handler(call_frame_t *frame, xlator_t *this) {
++  loc_t *loc = NULL;
++  shard_local_t *local = NULL;
+-    local = frame->local;
++  local = frame->local;
+-    if (op_ret < 0) {
+-        if (op_errno == EEXIST) {
+-            LOCK(&frame->lock);
+-            {
+-                local->eexist_count++;
+-            }
+-            UNLOCK(&frame->lock);
+-        } else {
+-            local->op_ret = op_ret;
+-            local->op_errno = op_errno;
+-        }
+-        gf_msg_debug(this->name, 0,
+-                     "mknod of shard %d "
+-                     "failed: %s",
+-                     shard_block_num, strerror(op_errno));
+-        goto done;
+-    }
++  if (local->op_ret < 0) {
++    shard_common_failure_unwind(local->fop, frame, -1, local->op_errno);
++    return 0;
++  }
++  if (local->fop == GF_FOP_UNLINK)
++    loc = &local->loc;
++  else if (local->fop == GF_FOP_RENAME)
++    loc = &local->loc2;
++  shard_acquire_inodelk(frame, this, loc);
++  return 0;
+-    shard_link_block_inode(local, shard_block_num, inode, buf);
++int shard_mkdir_internal_dir(call_frame_t *frame, xlator_t *this,
++                             shard_post_resolve_fop_handler_t handler,
++                             shard_internal_dir_type_t type);
++int shard_pre_mkdir_rm_handler(call_frame_t *frame, xlator_t *this) {
++  shard_local_t *local = NULL;
+-    call_count = shard_call_count_return(frame);
+-    if (call_count == 0) {
+-        SHARD_UNSET_ROOT_FS_ID(frame, local);
+-        local->create_count = 0;
+-        local->post_mknod_handler(frame, this);
+-    }
++  local = frame->local;
++  if (local->op_ret < 0) {
++    shard_common_failure_unwind(local->fop, frame, -1, local->op_errno);
+     return 0;
++  }
++  shard_mkdir_internal_dir(frame, this, shard_post_mkdir_rm_handler,
++                           SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME);
++  return 0;
+ }
+-shard_common_resume_mknod(call_frame_t *frame, xlator_t *this,
+-                          shard_post_mknod_fop_handler_t post_mknod_handler)
+-    int i = 0;
+-    int shard_idx_iter = 0;
+-    int last_block = 0;
+-    int ret = 0;
+-    int call_count = 0;
+-    char path[PATH_MAX] = {
+-        0,
+-    };
+-    mode_t mode = 0;
+-    char *bname = NULL;
+-    shard_priv_t *priv = NULL;
+-    shard_inode_ctx_t ctx_tmp = {
+-        0,
+-    };
+-    shard_local_t *local = NULL;
+-    gf_boolean_t wind_failed = _gf_false;
+-    fd_t *fd = NULL;
+-    loc_t loc = {
+-        0,
+-    };
+-    dict_t *xattr_req = NULL;
+-    local = frame->local;
+-    priv = this->private;
+-    fd = local->fd;
+-    shard_idx_iter = local->first_block;
+-    last_block = local->last_block;
+-    call_count = local->call_count = local->create_count;
+-    local->post_mknod_handler = post_mknod_handler;
++void shard_begin_rm_resolution(call_frame_t *frame, xlator_t *this) {
++  shard_priv_t *priv = NULL;
++  shard_local_t *local = NULL;
+-    SHARD_SET_ROOT_FS_ID(frame, local);
++  priv = this->private;
++  local = frame->local;
+-    ret = shard_inode_ctx_get_all(fd->inode, this, &ctx_tmp);
+-    if (ret) {
+-        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+-               "Failed to get inode "
+-               "ctx for %s",
+-               uuid_utoa(fd->inode->gfid));
+-        local->op_ret = -1;
+-        local->op_errno = ENOMEM;
+-        goto err;
+-    }
+-    mode = st_mode_from_ia(ctx_tmp.stat.ia_prot, ctx_tmp.stat.ia_type);
++  local->dot_shard_rm_loc.inode =
++      inode_find(this->itable, priv->dot_shard_rm_gfid);
++  if (!local->dot_shard_rm_loc.inode) {
++    local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid);
++    if (!local->dot_shard_loc.inode) {
++      shard_mkdir_internal_dir(frame, this, shard_pre_mkdir_rm_handler,
++                               SHARD_INTERNAL_DIR_DOT_SHARD);
++    } else {
++      local->post_res_handler = shard_pre_mkdir_rm_handler;
++      shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD);
++    }
++  } else {
++    local->post_res_handler = shard_post_mkdir_rm_handler;
++    shard_refresh_internal_dir(frame, this,
++                               SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME);
++  }
++int shard_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
++                 dict_t *xdata) {
++  int ret = -1;
++  uint64_t block_size = 0;
++  shard_local_t *local = NULL;
++  ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size);
++  if ((ret) && (!IA_ISLNK(loc->inode->ia_type))) {
++    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
++           "Failed to get block "
++           "size from inode ctx of %s",
++           uuid_utoa(loc->inode->gfid));
++    goto err;
++  }
++  if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
++    STACK_WIND(frame, default_unlink_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
++    return 0;
++  }
++  local = mem_get0(this->local_pool);
++  if (!local)
++    goto err;
++  frame->local = local;
++  loc_copy(&local->loc, loc);
++  local->xflag = xflag;
++  local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
++  local->block_size = block_size;
++  local->resolver_base_inode = loc->inode;
++  local->fop = GF_FOP_UNLINK;
++  if (!this->itable)
++    this->itable = (local->loc.inode)->table;
++  local->resolve_not = _gf_true;
++  shard_begin_rm_resolution(frame, this);
++  return 0;
++  shard_common_failure_unwind(GF_FOP_UNLINK, frame, -1, ENOMEM);
++  return 0;
+-    while (shard_idx_iter <= last_block) {
+-        if (local->inode_list[i]) {
+-            shard_idx_iter++;
+-            i++;
+-            continue;
+-        }
++int shard_post_rename_lookup_handler(call_frame_t *frame, xlator_t *this) {
++  shard_rename_cbk(frame, this);
++  return 0;
+-        if (wind_failed) {
+-            shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this,
+-                                   -1, ENOMEM, NULL, NULL, NULL, NULL, NULL);
+-            goto next;
+-        }
++int shard_rename_src_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                         int32_t op_ret, int32_t op_errno, struct iatt *buf,
++                         struct iatt *preoldparent, struct iatt *postoldparent,
++                         struct iatt *prenewparent, struct iatt *postnewparent,
++                         dict_t *xdata) {
++  int ret = 0;
++  shard_local_t *local = NULL;
+-        shard_make_block_abspath(shard_idx_iter, fd->inode->gfid, path,
+-                                 sizeof(path));
+-        xattr_req = shard_create_gfid_dict(local->xattr_req);
+-        if (!xattr_req) {
+-            local->op_ret = -1;
+-            local->op_errno = ENOMEM;
+-            wind_failed = _gf_true;
+-            shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this,
+-                                   -1, ENOMEM, NULL, NULL, NULL, NULL, NULL);
+-            goto next;
+-        }
++  local = frame->local;
+-        bname = strrchr(path, '/') + 1;
+-        loc.inode = inode_new(this->itable);
+-        loc.parent = inode_ref(priv->dot_shard_inode);
+-        ret = inode_path(loc.parent, bname, (char **)&(loc.path));
+-        if (ret < 0 || !(loc.inode)) {
+-            gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+-                   "Inode path failed"
+-                   "on %s, base file gfid = %s",
+-                   bname, uuid_utoa(fd->inode->gfid));
+-            local->op_ret = -1;
+-            local->op_errno = ENOMEM;
+-            wind_failed = _gf_true;
+-            loc_wipe(&loc);
+-            dict_unref(xattr_req);
+-            shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this,
+-                                   -1, ENOMEM, NULL, NULL, NULL, NULL, NULL);
+-            goto next;
+-        }
++  if (op_ret < 0) {
++    local->op_ret = op_ret;
++    local->op_errno = op_errno;
++    goto err;
++  }
++  /* Set ctx->refresh to TRUE to force a lookup on disk when
++   * shard_lookup_base_file() is called next to refresh the hard link
++   * count in ctx. Note that this is applicable only to the case where
++   * the rename dst is already existent and sharded.
++   */
++  if ((local->dst_block_size) && (!local->cleanup_required))
++    shard_inode_ctx_set_refresh_flag(local->int_inodelk.loc.inode, this);
++  local->prebuf = *buf;
++  local->preoldparent = *preoldparent;
++  local->postoldparent = *postoldparent;
++  local->prenewparent = *prenewparent;
++  local->postnewparent = *postnewparent;
++  if (xdata)
++    local->xattr_rsp = dict_ref(xdata);
+- = strrchr(loc.path, '/');
+-        if (
+-  ;
+-        STACK_WIND_COOKIE(frame, shard_common_mknod_cbk,
+-                          (void *)(long)shard_idx_iter, FIRST_CHILD(this),
+-                          FIRST_CHILD(this)->fops->mknod, &loc, mode,
+-                          ctx_tmp.stat.ia_rdev, 0, xattr_req);
+-        loc_wipe(&loc);
+-        dict_unref(xattr_req);
+-    next:
+-        shard_idx_iter++;
+-        i++;
+-        if (!--call_count)
+-            break;
++  if (local->dst_block_size) {
++    if (local->entrylk_frame) {
++      ret = shard_unlock_entrylk(frame, this);
++      if (ret < 0) {
++        local->op_ret = -1;
++        local->op_errno = -ret;
++      }
+     }
+-    return 0;
++    ret = shard_unlock_inodelk(frame, this);
++    if (ret < 0) {
++      local->op_ret = -1;
++      local->op_errno = -ret;
++      goto err;
++    }
++    if (local->cleanup_required)
++      shard_start_background_deletion(this);
++  }
++  /* Now the base file of src, if sharded, is looked up to gather ia_size
++   * and ia_blocks.*/
++  if (local->block_size) {
++    local->tmp_loc.inode = inode_new(this->itable);
++    gf_uuid_copy(local->tmp_loc.gfid, (local->loc.inode)->gfid);
++    shard_lookup_base_file(frame, this, &local->tmp_loc,
++                           shard_post_rename_lookup_handler);
++  } else {
++    shard_rename_cbk(frame, this);
++  }
++  return 0;
+ err:
+-    /*
+-     * This block is for handling failure in shard_inode_ctx_get_all().
+-     * Failures in the while-loop are handled within the loop.
+-     */
+-    SHARD_UNSET_ROOT_FS_ID(frame, local);
+-    post_mknod_handler(frame, this);
+-    return 0;
++  shard_common_failure_unwind(local->fop, frame, local->op_ret,
++                              local->op_errno);
++  return 0;
+ }
+-shard_post_mknod_readv_handler(call_frame_t *frame, xlator_t *this);
+-shard_post_lookup_shards_readv_handler(call_frame_t *frame, xlator_t *this)
+-    shard_local_t *local = NULL;
+-    local = frame->local;
+-    if (local->op_ret < 0) {
+-        shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret,
+-                                    local->op_errno);
+-        return 0;
+-    }
++int shard_post_lookup_dst_base_file_handler(call_frame_t *frame,
++                                            xlator_t *this) {
++  shard_local_t *local = NULL;
+-    if (local->create_count) {
+-        shard_common_resume_mknod(frame, this, shard_post_mknod_readv_handler);
+-    } else {
+-        shard_readv_do(frame, this);
+-    }
++  local = frame->local;
++  if (local->op_ret < 0) {
++    shard_common_failure_unwind(local->fop, frame, local->op_ret,
++                                local->op_errno);
+     return 0;
++  }
++  /* Save dst base file attributes into postbuf so the information is not
++   * lost when it is overwritten after lookup on base file of src in
++   * shard_lookup_base_file_cbk().
++   */
++  local->postbuf = local->prebuf;
++  shard_rename_src_base_file(frame, this);
++  return 0;
++int shard_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc,
++                 loc_t *newloc, dict_t *xdata) {
++  int ret = -1;
++  uint64_t block_size = 0;
++  uint64_t dst_block_size = 0;
++  shard_local_t *local = NULL;
++  if (IA_ISDIR(oldloc->inode->ia_type)) {
++    STACK_WIND(frame, default_rename_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
++    return 0;
++  }
++  ret = shard_inode_ctx_get_block_size(oldloc->inode, this, &block_size);
++  if ((ret) && (!IA_ISLNK(oldloc->inode->ia_type))) {
++    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
++           "Failed to get block "
++           "size from inode ctx of %s",
++           uuid_utoa(oldloc->inode->gfid));
++    goto err;
++  }
++  if (newloc->inode)
++    ret = shard_inode_ctx_get_block_size(newloc->inode, this, &dst_block_size);
++  /* The following stack_wind covers the case where:
++   * a. the src file is not sharded and dst doesn't exist, OR
++   * b. the src and dst both exist but are not sharded.
++   */
++  if (((!block_size) && (!dst_block_size)) ||
++      frame->root->pid == GF_CLIENT_PID_GSYNCD) {
++    STACK_WIND(frame, default_rename_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
++    return 0;
++  }
++  local = mem_get0(this->local_pool);
++  if (!local)
++    goto err;
++  frame->local = local;
++  loc_copy(&local->loc, oldloc);
++  loc_copy(&local->loc2, newloc);
++  local->resolver_base_inode = newloc->inode;
++  local->fop = GF_FOP_RENAME;
++  local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
++  if (!local->xattr_req)
++    goto err;
++  local->block_size = block_size;
++  local->dst_block_size = dst_block_size;
++  if (!this->itable)
++    this->itable = (local->loc.inode)->table;
++  local->resolve_not = _gf_true;
++  /* The following if-block covers the case where the dst file exists
++   * and is sharded.
++   */
++  if (local->dst_block_size) {
++    shard_begin_rm_resolution(frame, this);
++  } else {
++    /* The following block covers the case where the dst either doesn't
++     * exist or is NOT sharded but the src is sharded. In this case, shard
++     * xlator would go ahead and rename src to dst. Once done, it would also
++     * lookup the base shard of src to get the ia_size and ia_blocks xattr
++     * values.
++     */
++    shard_rename_src_base_file(frame, this);
++  }
++  return 0;
++  shard_common_failure_unwind(GF_FOP_RENAME, frame, -1, ENOMEM);
++  return 0;
+ }
+-shard_post_mknod_readv_handler(call_frame_t *frame, xlator_t *this)
+-    shard_local_t *local = NULL;
++int shard_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                     int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
++                     struct iatt *stbuf, struct iatt *preparent,
++                     struct iatt *postparent, dict_t *xdata) {
++  int ret = -1;
++  shard_local_t *local = NULL;
+-    local = frame->local;
++  local = frame->local;
+-    if (local->op_ret < 0) {
+-        shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret,
+-                                    local->op_errno);
+-        return 0;
+-    }
++  if (op_ret == -1)
++    goto unwind;
+-    if (!local->eexist_count) {
+-        shard_readv_do(frame, this);
+-    } else {
+-        local->call_count = local->eexist_count;
+-        shard_common_lookup_shards(frame, this, local->loc.inode,
+-                                   shard_post_lookup_shards_readv_handler);
+-    }
+-    return 0;
++  ret = shard_inode_ctx_set(inode, this, stbuf, local->block_size,
++                            SHARD_ALL_MASK);
++  if (ret)
++    gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INODE_CTX_SET_FAILED,
++           "Failed to set inode "
++           "ctx for %s",
++           uuid_utoa(inode->gfid));
++  SHARD_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, stbuf,
++                     preparent, postparent, xdata);
++  return 0;
+ }
+-shard_post_resolve_readv_handler(call_frame_t *frame, xlator_t *this)
+-    shard_local_t *local = NULL;
++int shard_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
++                 mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) {
++  shard_priv_t *priv = NULL;
++  shard_local_t *local = NULL;
+-    local = frame->local;
++  priv = this->private;
++  local = mem_get0(this->local_pool);
++  if (!local)
++    goto err;
+-    if (local->op_ret < 0) {
+-        if (local->op_errno != ENOENT) {
+-            shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret,
+-                                        local->op_errno);
+-            return 0;
+-        } else {
+-            struct iovec vec = {
+-                0,
+-            };
+-            vec.iov_base = local->iobuf->ptr;
+-            vec.iov_len = local->total_size;
+-            local->op_ret = local->total_size;
+-            SHARD_STACK_UNWIND(readv, frame, local->op_ret, 0, &vec, 1,
+-                               &local->prebuf, local->iobref, NULL);
+-            return 0;
+-        }
+-    }
++  frame->local = local;
++  local->block_size = priv->block_size;
+-    if (local->call_count) {
+-        shard_common_lookup_shards(frame, this, local->resolver_base_inode,
+-                                   shard_post_lookup_shards_readv_handler);
+-    } else {
+-        shard_readv_do(frame, this);
+-    }
++  if (!__is_gsyncd_on_shard_dir(frame, loc)) {
++    SHARD_INODE_CREATE_INIT(this, local->block_size, xdata, loc, 0, 0, err);
++  }
+-    return 0;
++  STACK_WIND(frame, shard_create_cbk, FIRST_CHILD(this),
++             FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
++             xdata);
++  return 0;
++  shard_common_failure_unwind(GF_FOP_CREATE, frame, -1, ENOMEM);
++  return 0;
++int shard_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                   int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) {
++  /* To-Do: Handle open with O_TRUNC under locks */
++  SHARD_STACK_UNWIND(open, frame, op_ret, op_errno, fd, xdata);
++  return 0;
++int shard_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
++               fd_t *fd, dict_t *xdata) {
++  STACK_WIND(frame, shard_open_cbk, FIRST_CHILD(this),
++             FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
++  return 0;
++int shard_readv_do_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                       int32_t op_ret, int32_t op_errno, struct iovec *vector,
++                       int32_t count, struct iatt *stbuf, struct iobref *iobref,
++                       dict_t *xdata) {
++  int i = 0;
++  int call_count = 0;
++  void *address = NULL;
++  uint64_t block_num = 0;
++  off_t off = 0;
++  struct iovec vec = {
++      0,
++  };
++  shard_local_t *local = NULL;
++  fd_t *anon_fd = cookie;
++  shard_inode_ctx_t *ctx = NULL;
++  local = frame->local;
++  /* If shard has already seen a failure here before, there is no point
++   * in aggregating subsequent reads, so just go to out.
++   */
++  if (local->op_ret < 0)
++    goto out;
++  if (op_ret < 0) {
++    local->op_ret = op_ret;
++    local->op_errno = op_errno;
++    goto out;
++  }
++  if (local->op_ret >= 0)
++    local->op_ret += op_ret;
+-shard_post_lookup_readv_handler(call_frame_t *frame, xlator_t *this)
+-    int ret = 0;
+-    struct iobuf *iobuf = NULL;
+-    shard_local_t *local = NULL;
+-    shard_priv_t *priv = NULL;
++  shard_inode_ctx_get(anon_fd->inode, this, &ctx);
++  block_num = ctx->block_num;
++  if (block_num == local->first_block) {
++    address = local->iobuf->ptr;
++  } else {
++    /* else
++     * address to start writing to = beginning of buffer +
++     *                    number of bytes until end of first block +
++     *                    + block_size times number of blocks
++     *                    between the current block and the first
++     */
++    address = (char *)local->iobuf->ptr +
++              (local->block_size - (local->offset % local->block_size)) +
++              ((block_num - local->first_block - 1) * local->block_size);
++  }
+-    priv = this->private;
+-    local = frame->local;
++  for (i = 0; i < count; i++) {
++    address = (char *)address + off;
++    memcpy(address, vector[i].iov_base, vector[i].iov_len);
++    off += vector[i].iov_len;
++  }
++  if (anon_fd)
++    fd_unref(anon_fd);
++  call_count = shard_call_count_return(frame);
++  if (call_count == 0) {
++    SHARD_UNSET_ROOT_FS_ID(frame, local);
+     if (local->op_ret < 0) {
+-        shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret,
+-                                    local->op_errno);
+-        return 0;
++      shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret,
++                                  local->op_errno);
++    } else {
++      if (xdata)
++        local->xattr_rsp = dict_ref(xdata);
++      vec.iov_base = local->iobuf->ptr;
++      vec.iov_len = local->total_size;
++      local->op_ret = local->total_size;
++      SHARD_STACK_UNWIND(readv, frame, local->op_ret, local->op_errno, &vec, 1,
++                         &local->prebuf, local->iobref, local->xattr_rsp);
++      return 0;
++    }
++  }
++  return 0;
++int shard_readv_do(call_frame_t *frame, xlator_t *this) {
++  int i = 0;
++  int call_count = 0;
++  int last_block = 0;
++  int cur_block = 0;
++  off_t orig_offset = 0;
++  off_t shard_offset = 0;
++  size_t read_size = 0;
++  size_t remaining_size = 0;
++  fd_t *fd = NULL;
++  fd_t *anon_fd = NULL;
++  shard_local_t *local = NULL;
++  gf_boolean_t wind_failed = _gf_false;
++  local = frame->local;
++  fd = local->fd;
++  orig_offset = local->offset;
++  cur_block = local->first_block;
++  last_block = local->last_block;
++  remaining_size = local->total_size;
++  local->call_count = call_count = local->num_blocks;
++  SHARD_SET_ROOT_FS_ID(frame, local);
++  if (fd->flags & O_DIRECT)
++    local->flags = O_DIRECT;
++  while (cur_block <= last_block) {
++    if (wind_failed) {
++      shard_readv_do_cbk(frame, (void *)(long)0, this, -1, ENOMEM, NULL, 0,
++                         NULL, NULL, NULL);
++      goto next;
++    }
++    shard_offset = orig_offset % local->block_size;
++    read_size = local->block_size - shard_offset;
++    if (read_size > remaining_size)
++      read_size = remaining_size;
++    remaining_size -= read_size;
++    if (cur_block == 0) {
++      anon_fd = fd_ref(fd);
++    } else {
++      anon_fd = fd_anonymous(local->inode_list[i]);
++      if (!anon_fd) {
++        local->op_ret = -1;
++        local->op_errno = ENOMEM;
++        wind_failed = _gf_true;
++        shard_readv_do_cbk(frame, (void *)(long)anon_fd, this, -1, ENOMEM, NULL,
++                           0, NULL, NULL, NULL);
++        goto next;
++      }
+     }
+-    if (local->offset >= local->prebuf.ia_size) {
+-        /* If the read is being performed past the end of the file,
+-         * unwind the FOP with 0 bytes read as status.
+-         */
+-        struct iovec vec = {
+-            0,
+-        };
+-        iobuf = iobuf_get2(this->ctx->iobuf_pool, local->req_size);
+-        if (!iobuf)
+-            goto err;
+-        vec.iov_base = iobuf->ptr;
+-        vec.iov_len = 0;
+-        local->iobref = iobref_new();
+-        iobref_add(local->iobref, iobuf);
+-        iobuf_unref(iobuf);
+-        SHARD_STACK_UNWIND(readv, frame, 0, 0, &vec, 1, &local->prebuf,
+-                           local->iobref, NULL);
+-        return 0;
+-    }
++    STACK_WIND_COOKIE(frame, shard_readv_do_cbk, anon_fd, FIRST_CHILD(this),
++                      FIRST_CHILD(this)->fops->readv, anon_fd, read_size,
++                      shard_offset, local->flags, local->xattr_req);
++    orig_offset += read_size;
++  next:
++    cur_block++;
++    i++;
++    call_count--;
++  }
++  return 0;
+-    local->first_block = get_lowest_block(local->offset, local->block_size);
++int shard_common_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                           int32_t op_ret, int32_t op_errno, inode_t *inode,
++                           struct iatt *buf, struct iatt *preparent,
++                           struct iatt *postparent, dict_t *xdata) {
++  int shard_block_num = (long)cookie;
++  int call_count = 0;
++  shard_local_t *local = NULL;
++  local = frame->local;
++  if (op_ret < 0) {
++    if (op_errno == EEXIST) {
++      LOCK(&frame->lock);
++      { local->eexist_count++; }
++      UNLOCK(&frame->lock);
++    } else {
++      local->op_ret = op_ret;
++      local->op_errno = op_errno;
++    }
++    gf_msg_debug(this->name, 0, "mknod of shard %d "
++                                "failed: %s",
++                 shard_block_num, strerror(op_errno));
++    goto done;
++  }
+-    local->total_size = local->req_size;
++  shard_link_block_inode(local, shard_block_num, inode, buf);
+-    local->last_block = get_highest_block(local->offset, local->total_size,
+-                                          local->block_size);
++  call_count = shard_call_count_return(frame);
++  if (call_count == 0) {
++    SHARD_UNSET_ROOT_FS_ID(frame, local);
++    local->create_count = 0;
++    local->post_mknod_handler(frame, this);
++  }
++  return 0;
++int shard_common_resume_mknod(
++    call_frame_t *frame, xlator_t *this,
++    shard_post_mknod_fop_handler_t post_mknod_handler) {
++  int i = 0;
++  int shard_idx_iter = 0;
++  int last_block = 0;
++  int ret = 0;
++  int call_count = 0;
++  char path[PATH_MAX] = {
++      0,
++  };
++  mode_t mode = 0;
++  char *bname = NULL;
++  shard_priv_t *priv = NULL;
++  shard_inode_ctx_t ctx_tmp = {
++      0,
++  };
++  shard_local_t *local = NULL;
++  gf_boolean_t wind_failed = _gf_false;
++  fd_t *fd = NULL;
++  loc_t loc = {
++      0,
++  };
++  dict_t *xattr_req = NULL;
++  local = frame->local;
++  priv = this->private;
++  fd = local->fd;
++  shard_idx_iter = local->first_block;
++  last_block = local->last_block;
++  call_count = local->call_count = local->create_count;
++  local->post_mknod_handler = post_mknod_handler;
++  SHARD_SET_ROOT_FS_ID(frame, local);
++  ret = shard_inode_ctx_get_all(fd->inode, this, &ctx_tmp);
++  if (ret) {
++    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
++           "Failed to get inode "
++           "ctx for %s",
++           uuid_utoa(fd->inode->gfid));
++    local->op_ret = -1;
++    local->op_errno = ENOMEM;
++    goto err;
++  }
++  mode = st_mode_from_ia(ctx_tmp.stat.ia_prot, ctx_tmp.stat.ia_type);
+-    local->num_blocks = local->last_block - local->first_block + 1;
+-    local->resolver_base_inode = local->loc.inode;
++  while (shard_idx_iter <= last_block) {
++    if (local->inode_list[i]) {
++      shard_idx_iter++;
++      i++;
++      continue;
++    }
+-    local->inode_list = GF_CALLOC(local->num_blocks, sizeof(inode_t *),
+-                                  gf_shard_mt_inode_list);
+-    if (!local->inode_list)
+-        goto err;
++    if (wind_failed) {
++      shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this, -1,
++                             ENOMEM, NULL, NULL, NULL, NULL, NULL);
++      goto next;
++    }
+-    iobuf = iobuf_get2(this->ctx->iobuf_pool, local->total_size);
+-    if (!iobuf)
+-        goto err;
++    shard_make_block_abspath(shard_idx_iter, fd->inode->gfid, path,
++                             sizeof(path));
+-    local->iobref = iobref_new();
+-    if (!local->iobref) {
+-        iobuf_unref(iobuf);
+-        goto err;
++    xattr_req = shard_create_gfid_dict(local->xattr_req);
++    if (!xattr_req) {
++      local->op_ret = -1;
++      local->op_errno = ENOMEM;
++      wind_failed = _gf_true;
++      shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this, -1,
++                             ENOMEM, NULL, NULL, NULL, NULL, NULL);
++      goto next;
+     }
+-    if (iobref_add(local->iobref, iobuf) != 0) {
+-        iobuf_unref(iobuf);
+-        goto err;
++    bname = strrchr(path, '/') + 1;
++    loc.inode = inode_new(this->itable);
++    loc.parent = inode_ref(priv->dot_shard_inode);
++    ret = inode_path(loc.parent, bname, (char **)&(loc.path));
++    if (ret < 0 || !(loc.inode)) {
++      gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
++             "Inode path failed"
++             "on %s, base file gfid = %s",
++             bname, uuid_utoa(fd->inode->gfid));
++      local->op_ret = -1;
++      local->op_errno = ENOMEM;
++      wind_failed = _gf_true;
++      loc_wipe(&loc);
++      dict_unref(xattr_req);
++      shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this, -1,
++                             ENOMEM, NULL, NULL, NULL, NULL, NULL);
++      goto next;
+     }
+-    memset(iobuf->ptr, 0, local->total_size);
+-    iobuf_unref(iobuf);
+-    local->iobuf = iobuf;
++ = strrchr(loc.path, '/');
++    if (
+-    local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid);
+-    if (!local->dot_shard_loc.inode) {
+-        ret = shard_init_internal_dir_loc(this, local,
+-                                          SHARD_INTERNAL_DIR_DOT_SHARD);
+-        if (ret)
+-            goto err;
+-        shard_lookup_internal_dir(frame, this, shard_post_resolve_readv_handler,
+-                                  SHARD_INTERNAL_DIR_DOT_SHARD);
+-    } else {
+-        local->post_res_handler = shard_post_resolve_readv_handler;
+-        shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD);
+-    }
+-    return 0;
++    STACK_WIND_COOKIE(frame, shard_common_mknod_cbk,
++                      (void *)(long)shard_idx_iter, FIRST_CHILD(this),
++                      FIRST_CHILD(this)->fops->mknod, &loc, mode,
++                      ctx_tmp.stat.ia_rdev, 0, xattr_req);
++    loc_wipe(&loc);
++    dict_unref(xattr_req);
++  next:
++    shard_idx_iter++;
++    i++;
++    if (!--call_count)
++      break;
++  }
++  return 0;
+ err:
+-    shard_common_failure_unwind(GF_FOP_READ, frame, -1, ENOMEM);
+-    return 0;
++  /*
++   * This block is for handling failure in shard_inode_ctx_get_all().
++   * Failures in the while-loop are handled within the loop.
++   */
++  SHARD_UNSET_ROOT_FS_ID(frame, local);
++  post_mknod_handler(frame, this);
++  return 0;
+ }
+-shard_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+-            off_t offset, uint32_t flags, dict_t *xdata)
+-    int ret = 0;
+-    uint64_t block_size = 0;
+-    shard_local_t *local = NULL;
++int shard_post_mknod_readv_handler(call_frame_t *frame, xlator_t *this);
+-    ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size);
+-    if (ret) {
+-        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+-               "Failed to get block "
+-               "size for %s from its inode ctx",
+-               uuid_utoa(fd->inode->gfid));
+-        goto err;
+-    }
++int shard_post_lookup_shards_readv_handler(call_frame_t *frame,
++                                           xlator_t *this) {
++  shard_local_t *local = NULL;
+-    if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+-        /* block_size = 0 means that the file was created before
+-         * sharding was enabled on the volume.
+-         */
+-        STACK_WIND(frame, default_readv_cbk, FIRST_CHILD(this),
+-                   FIRST_CHILD(this)->fops->readv, fd, size, offset, flags,
+-                   xdata);
+-        return 0;
+-    }
++  local = frame->local;
+-    if (!this->itable)
+-        this->itable = fd->inode->table;
++  if (local->op_ret < 0) {
++    shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret,
++                                local->op_errno);
++    return 0;
++  }
+-    local = mem_get0(this->local_pool);
+-    if (!local)
+-        goto err;
++  if (local->create_count) {
++    shard_common_resume_mknod(frame, this, shard_post_mknod_readv_handler);
++  } else {
++    shard_readv_do(frame, this);
++  }
+-    frame->local = local;
++  return 0;
+-    ret = syncbarrier_init(&local->barrier);
+-    if (ret)
+-        goto err;
+-    local->fd = fd_ref(fd);
+-    local->block_size = block_size;
+-    local->offset = offset;
+-    local->req_size = size;
+-    local->flags = flags;
+-    local->fop = GF_FOP_READ;
+-    local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+-    if (!local->xattr_req)
+-        goto err;
++int shard_post_mknod_readv_handler(call_frame_t *frame, xlator_t *this) {
++  shard_local_t *local = NULL;
+-    local->loc.inode = inode_ref(fd->inode);
+-    gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
++  local = frame->local;
+-    shard_lookup_base_file(frame, this, &local->loc,
+-                           shard_post_lookup_readv_handler);
+-    return 0;
+-    shard_common_failure_unwind(GF_FOP_READ, frame, -1, ENOMEM);
++  if (local->op_ret < 0) {
++    shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret,
++                                local->op_errno);
+     return 0;
++  }
++  if (!local->eexist_count) {
++    shard_readv_do(frame, this);
++  } else {
++    local->call_count = local->eexist_count;
++    shard_common_lookup_shards(frame, this, local->loc.inode,
++                               shard_post_lookup_shards_readv_handler);
++  }
++  return 0;
+ }
+-shard_common_inode_write_post_update_size_handler(call_frame_t *frame,
+-                                                  xlator_t *this)
+-    shard_local_t *local = NULL;
++int shard_post_resolve_readv_handler(call_frame_t *frame, xlator_t *this) {
++  shard_local_t *local = NULL;
+-    local = frame->local;
++  local = frame->local;
+-    if (local->op_ret < 0) {
+-        shard_common_failure_unwind(local->fop, frame, local->op_ret,
+-                                    local->op_errno);
++  if (local->op_ret < 0) {
++    if (local->op_errno != ENOENT) {
++      shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret,
++                                  local->op_errno);
++      return 0;
+     } else {
+-        shard_common_inode_write_success_unwind(local->fop, frame,
+-                                                local->written_size);
++      struct iovec vec = {
++          0,
++      };
++      vec.iov_base = local->iobuf->ptr;
++      vec.iov_len = local->total_size;
++      local->op_ret = local->total_size;
++      SHARD_STACK_UNWIND(readv, frame, local->op_ret, 0, &vec, 1,
++                         &local->prebuf, local->iobref, NULL);
++      return 0;
+     }
+-    return 0;
++  }
+-static gf_boolean_t
+-shard_is_appending_write(shard_local_t *local)
+-    if (local->fop != GF_FOP_WRITE)
+-        return _gf_false;
+-    if (local->flags & O_APPEND)
+-        return _gf_true;
+-    if (local->fd->flags & O_APPEND)
+-        return _gf_true;
+-    return _gf_false;
++  if (local->call_count) {
++    shard_common_lookup_shards(frame, this, local->resolver_base_inode,
++                               shard_post_lookup_shards_readv_handler);
++  } else {
++    shard_readv_do(frame, this);
++  }
++  return 0;
+ }
+-__shard_get_delta_size_from_inode_ctx(shard_local_t *local, inode_t *inode,
+-                                      xlator_t *this)
+-    int ret = -1;
+-    uint64_t ctx_uint = 0;
+-    shard_inode_ctx_t *ctx = NULL;
++int shard_post_lookup_readv_handler(call_frame_t *frame, xlator_t *this) {
++  int ret = 0;
++  struct iobuf *iobuf = NULL;
++  shard_local_t *local = NULL;
++  shard_priv_t *priv = NULL;
++  priv = this->private;
++  local = frame->local;
+-    ret = __inode_ctx_get(inode, this, &ctx_uint);
+-    if (ret < 0)
+-        return ret;
++  if (local->op_ret < 0) {
++    shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret,
++                                local->op_errno);
++    return 0;
++  }
+-    ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
++  if (local->offset >= local->prebuf.ia_size) {
++    /* If the read is being performed past the end of the file,
++     * unwind the FOP with 0 bytes read as status.
++     */
++    struct iovec vec = {
++        0,
++    };
+-    if (shard_is_appending_write(local)) {
+-        local->delta_size = local->total_size;
+-    } else if (local->offset + local->total_size > ctx->stat.ia_size) {
+-        local->delta_size = (local->offset + local->total_size) -
+-                            ctx->stat.ia_size;
+-    } else {
+-        local->delta_size = 0;
+-    }
+-    ctx->stat.ia_size += (local->delta_size);
+-    local->postbuf = ctx->stat;
++    iobuf = iobuf_get2(this->ctx->iobuf_pool, local->req_size);
++    if (!iobuf)
++      goto err;
++    vec.iov_base = iobuf->ptr;
++    vec.iov_len = 0;
++    local->iobref = iobref_new();
++    iobref_add(local->iobref, iobuf);
++    iobuf_unref(iobuf);
++    SHARD_STACK_UNWIND(readv, frame, 0, 0, &vec, 1, &local->prebuf,
++                       local->iobref, NULL);
+     return 0;
++  }
+-shard_get_delta_size_from_inode_ctx(shard_local_t *local, inode_t *inode,
+-                                    xlator_t *this)
+-    int ret = -1;
++  local->first_block = get_lowest_block(local->offset, local->block_size);
+-    LOCK(&inode->lock);
+-    {
+-        ret = __shard_get_delta_size_from_inode_ctx(local, inode, this);
+-    }
+-    UNLOCK(&inode->lock);
++  local->total_size = local->req_size;
+-    return ret;
++  local->last_block =
++      get_highest_block(local->offset, local->total_size, local->block_size);
+-shard_common_inode_write_do_cbk(call_frame_t *frame, void *cookie,
+-                                xlator_t *this, int32_t op_ret,
+-                                int32_t op_errno, struct iatt *pre,
+-                                struct iatt *post, dict_t *xdata)
+-    int call_count = 0;
+-    fd_t *anon_fd = cookie;
+-    shard_local_t *local = NULL;
+-    glusterfs_fop_t fop = 0;
++  local->num_blocks = local->last_block - local->first_block + 1;
++  local->resolver_base_inode = local->loc.inode;
+-    local = frame->local;
+-    fop = local->fop;
++  local->inode_list =
++      GF_CALLOC(local->num_blocks, sizeof(inode_t *), gf_shard_mt_inode_list);
++  if (!local->inode_list)
++    goto err;
+-    LOCK(&frame->lock);
+-    {
+-        if (op_ret < 0) {
+-            local->op_ret = op_ret;
+-            local->op_errno = op_errno;
+-        } else {
+-            local->written_size += op_ret;
+-            GF_ATOMIC_ADD(local->delta_blocks,
+-                          post->ia_blocks - pre->ia_blocks);
+-            local->delta_size += (post->ia_size - pre->ia_size);
+-            shard_inode_ctx_set(local->fd->inode, this, post, 0,
+-                                SHARD_MASK_TIMES);
+-            if (local->fd->inode != anon_fd->inode)
+-                shard_inode_ctx_add_to_fsync_list(local->fd->inode, this,
+-                                                  anon_fd->inode);
+-        }
+-    }
+-    UNLOCK(&frame->lock);
++  iobuf = iobuf_get2(this->ctx->iobuf_pool, local->total_size);
++  if (!iobuf)
++    goto err;
+-    if (anon_fd)
+-        fd_unref(anon_fd);
++  local->iobref = iobref_new();
++  if (!local->iobref) {
++    iobuf_unref(iobuf);
++    goto err;
++  }
+-    call_count = shard_call_count_return(frame);
+-    if (call_count == 0) {
+-        SHARD_UNSET_ROOT_FS_ID(frame, local);
+-        if (local->op_ret < 0) {
+-            shard_common_failure_unwind(fop, frame, local->op_ret,
+-                                        local->op_errno);
+-        } else {
+-            shard_get_delta_size_from_inode_ctx(local, local->fd->inode, this);
+-            local->hole_size = 0;
+-            if (xdata)
+-                local->xattr_rsp = dict_ref(xdata);
+-            shard_update_file_size(
+-                frame, this, local->fd, NULL,
+-                shard_common_inode_write_post_update_size_handler);
+-        }
+-    }
++  if (iobref_add(local->iobref, iobuf) != 0) {
++    iobuf_unref(iobuf);
++    goto err;
++  }
+-    return 0;
++  memset(iobuf->ptr, 0, local->total_size);
++  iobuf_unref(iobuf);
++  local->iobuf = iobuf;
++  local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid);
++  if (!local->dot_shard_loc.inode) {
++    ret =
++        shard_init_internal_dir_loc(this, local, SHARD_INTERNAL_DIR_DOT_SHARD);
++    if (ret)
++      goto err;
++    shard_lookup_internal_dir(frame, this, shard_post_resolve_readv_handler,
++                              SHARD_INTERNAL_DIR_DOT_SHARD);
++  } else {
++    local->post_res_handler = shard_post_resolve_readv_handler;
++    shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD);
++  }
++  return 0;
++  shard_common_failure_unwind(GF_FOP_READ, frame, -1, ENOMEM);
++  return 0;
++int shard_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
++                off_t offset, uint32_t flags, dict_t *xdata) {
++  int ret = 0;
++  uint64_t block_size = 0;
++  shard_local_t *local = NULL;
++  ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size);
++  if (ret) {
++    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
++           "Failed to get block "
++           "size for %s from its inode ctx",
++           uuid_utoa(fd->inode->gfid));
++    goto err;
++  }
++  if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
++    /* block_size = 0 means that the file was created before
++     * sharding was enabled on the volume.
++     */
++    STACK_WIND(frame, default_readv_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata);
++    return 0;
++  }
++  if (!this->itable)
++    this->itable = fd->inode->table;
++  local = mem_get0(this->local_pool);
++  if (!local)
++    goto err;
++  frame->local = local;
++  ret = syncbarrier_init(&local->barrier);
++  if (ret)
++    goto err;
++  local->fd = fd_ref(fd);
++  local->block_size = block_size;
++  local->offset = offset;
++  local->req_size = size;
++  local->flags = flags;
++  local->fop = GF_FOP_READ;
++  local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
++  if (!local->xattr_req)
++    goto err;
++  local->loc.inode = inode_ref(fd->inode);
++  gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
++  shard_lookup_base_file(frame, this, &local->loc,
++                         shard_post_lookup_readv_handler);
++  return 0;
++  shard_common_failure_unwind(GF_FOP_READ, frame, -1, ENOMEM);
++  return 0;
+ }
+-shard_common_inode_write_wind(call_frame_t *frame, xlator_t *this, fd_t *fd,
+-                              struct iovec *vec, int count, off_t shard_offset,
+-                              size_t size)
+-    shard_local_t *local = NULL;
++int shard_common_inode_write_post_update_size_handler(call_frame_t *frame,
++                                                      xlator_t *this) {
++  shard_local_t *local = NULL;
+-    local = frame->local;
++  local = frame->local;
+-    switch (local->fop) {
+-        case GF_FOP_WRITE:
+-            STACK_WIND_COOKIE(
+-                frame, shard_common_inode_write_do_cbk, fd, FIRST_CHILD(this),
+-                FIRST_CHILD(this)->fops->writev, fd, vec, count, shard_offset,
+-                local->flags, local->iobref, local->xattr_req);
+-            break;
+-        case GF_FOP_FALLOCATE:
+-            STACK_WIND_COOKIE(
+-                frame, shard_common_inode_write_do_cbk, fd, FIRST_CHILD(this),
+-                FIRST_CHILD(this)->fops->fallocate, fd, local->flags,
+-                shard_offset, size, local->xattr_req);
+-            break;
+-        case GF_FOP_ZEROFILL:
+-            STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd,
+-                              FIRST_CHILD(this),
+-                              FIRST_CHILD(this)->fops->zerofill, fd,
+-                              shard_offset, size, local->xattr_req);
+-            break;
+-        case GF_FOP_DISCARD:
+-            STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd,
+-                              FIRST_CHILD(this),
+-                              FIRST_CHILD(this)->fops->discard, fd,
+-                              shard_offset, size, local->xattr_req);
+-            break;
+-        default:
+-            gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
+-                   "Invalid fop id = %d", local->fop);
+-            break;
+-    }
+-    return 0;
++  if (local->op_ret < 0) {
++    shard_common_failure_unwind(local->fop, frame, local->op_ret,
++                                local->op_errno);
++  } else {
++    shard_common_inode_write_success_unwind(local->fop, frame,
++                                            local->written_size);
++  }
++  return 0;
+ }
+-shard_common_inode_write_do(call_frame_t *frame, xlator_t *this)
+-    int i = 0;
+-    int count = 0;
+-    int call_count = 0;
+-    int last_block = 0;
+-    uint32_t cur_block = 0;
+-    fd_t *fd = NULL;
+-    fd_t *anon_fd = NULL;
+-    shard_local_t *local = NULL;
+-    struct iovec *vec = NULL;
+-    gf_boolean_t wind_failed = _gf_false;
+-    gf_boolean_t odirect = _gf_false;
+-    off_t orig_offset = 0;
+-    off_t shard_offset = 0;
+-    off_t vec_offset = 0;
+-    size_t remaining_size = 0;
+-    size_t shard_write_size = 0;
+-    local = frame->local;
+-    fd = local->fd;
+-    orig_offset = local->offset;
+-    remaining_size = local->total_size;
+-    cur_block = local->first_block;
+-    local->call_count = call_count = local->num_blocks;
+-    last_block = local->last_block;
+-    SHARD_SET_ROOT_FS_ID(frame, local);
+-    if (dict_set_uint32(local->xattr_req, GLUSTERFS_WRITE_UPDATE_ATOMIC, 4)) {
+-        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
+-               "Failed to set " GLUSTERFS_WRITE_UPDATE_ATOMIC
+-               " into "
+-               "dict: %s",
+-               uuid_utoa(fd->inode->gfid));
+-        local->op_ret = -1;
+-        local->op_errno = ENOMEM;
+-        local->call_count = 1;
+-        shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, -1,
+-                                        ENOMEM, NULL, NULL, NULL);
+-        return 0;
+-    }
++static gf_boolean_t shard_is_appending_write(shard_local_t *local) {
++  if (local->fop != GF_FOP_WRITE)
++    return _gf_false;
++  if (local->flags & O_APPEND)
++    return _gf_true;
++  if (local->fd->flags & O_APPEND)
++    return _gf_true;
++  return _gf_false;
+-    if ((fd->flags & O_DIRECT) && (local->fop == GF_FOP_WRITE))
+-        odirect = _gf_true;
++int __shard_get_delta_size_from_inode_ctx(shard_local_t *local, inode_t *inode,
++                                          xlator_t *this) {
++  int ret = -1;
++  uint64_t ctx_uint = 0;
++  shard_inode_ctx_t *ctx = NULL;
+-    while (cur_block <= last_block) {
+-        if (wind_failed) {
+-            shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, -1,
+-                                            ENOMEM, NULL, NULL, NULL);
+-            goto next;
+-        }
++  ret = __inode_ctx_get(inode, this, &ctx_uint);
++  if (ret < 0)
++    return ret;
+-        shard_offset = orig_offset % local->block_size;
+-        shard_write_size = local->block_size - shard_offset;
+-        if (shard_write_size > remaining_size)
+-            shard_write_size = remaining_size;
+-        remaining_size -= shard_write_size;
+-        if (local->fop == GF_FOP_WRITE) {
+-            count = iov_subset(local->vector, local->count, vec_offset,
+-                               vec_offset + shard_write_size, NULL);
+-            vec = GF_CALLOC(count, sizeof(struct iovec), gf_shard_mt_iovec);
+-            if (!vec) {
+-                local->op_ret = -1;
+-                local->op_errno = ENOMEM;
+-                wind_failed = _gf_true;
+-                GF_FREE(vec);
+-                shard_common_inode_write_do_cbk(frame, (void *)(long)0, this,
+-                                                -1, ENOMEM, NULL, NULL, NULL);
+-                goto next;
+-            }
+-            count = iov_subset(local->vector, local->count, vec_offset,
+-                               vec_offset + shard_write_size, vec);
+-        }
++  ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
+-        if (cur_block == 0) {
+-            anon_fd = fd_ref(fd);
+-        } else {
+-            anon_fd = fd_anonymous(local->inode_list[i]);
+-            if (!anon_fd) {
+-                local->op_ret = -1;
+-                local->op_errno = ENOMEM;
+-                wind_failed = _gf_true;
+-                GF_FREE(vec);
+-                shard_common_inode_write_do_cbk(frame, (void *)(long)anon_fd,
+-                                                this, -1, ENOMEM, NULL, NULL,
+-                                                NULL);
+-                goto next;
+-            }
+-            if (local->fop == GF_FOP_WRITE) {
+-                if (odirect)
+-                    local->flags = O_DIRECT;
+-                else
+-                    local->flags = GF_ANON_FD_FLAGS;
+-            }
+-        }
++  if (shard_is_appending_write(local)) {
++    local->delta_size = local->total_size;
++  } else if (local->offset + local->total_size > ctx->stat.ia_size) {
++    local->delta_size = (local->offset + local->total_size) - ctx->stat.ia_size;
++  } else {
++    local->delta_size = 0;
++  }
++  ctx->stat.ia_size += (local->delta_size);
++  local->postbuf = ctx->stat;
+-        shard_common_inode_write_wind(frame, this, anon_fd, vec, count,
+-                                      shard_offset, shard_write_size);
+-        if (vec)
+-            vec_offset += shard_write_size;
+-        orig_offset += shard_write_size;
+-        GF_FREE(vec);
+-        vec = NULL;
+-    next:
+-        cur_block++;
+-        i++;
+-        call_count--;
+-    }
+-    return 0;
++  return 0;
+ }
+-shard_common_inode_write_post_mknod_handler(call_frame_t *frame,
+-                                            xlator_t *this);
++int shard_get_delta_size_from_inode_ctx(shard_local_t *local, inode_t *inode,
++                                        xlator_t *this) {
++  int ret = -1;
++  LOCK(&inode->lock);
++  { ret = __shard_get_delta_size_from_inode_ctx(local, inode, this); }
++  UNLOCK(&inode->lock);
+-shard_common_inode_write_post_lookup_shards_handler(call_frame_t *frame,
+-                                                    xlator_t *this)
+-    shard_local_t *local = NULL;
++  return ret;
+-    local = frame->local;
++int shard_common_inode_write_do_cbk(call_frame_t *frame, void *cookie,
++                                    xlator_t *this, int32_t op_ret,
++                                    int32_t op_errno, struct iatt *pre,
++                                    struct iatt *post, dict_t *xdata) {
++  int call_count = 0;
++  fd_t *anon_fd = cookie;
++  shard_local_t *local = NULL;
++  glusterfs_fop_t fop = 0;
+-    if (local->op_ret < 0) {
+-        shard_common_failure_unwind(local->fop, frame, local->op_ret,
+-                                    local->op_errno);
+-        return 0;
+-    }
++  local = frame->local;
++  fop = local->fop;
+-    if (local->create_count) {
+-        shard_common_resume_mknod(frame, this,
+-                                  shard_common_inode_write_post_mknod_handler);
++  LOCK(&frame->lock);
++  {
++    if (op_ret < 0) {
++      local->op_ret = op_ret;
++      local->op_errno = op_errno;
+     } else {
+-        shard_common_inode_write_do(frame, this);
++      local->written_size += op_ret;
++      GF_ATOMIC_ADD(local->delta_blocks, post->ia_blocks - pre->ia_blocks);
++      local->delta_size += (post->ia_size - pre->ia_size);
++      shard_inode_ctx_set(local->fd->inode, this, post, 0, SHARD_MASK_TIMES);
++      if (local->fd->inode != anon_fd->inode)
++        shard_inode_ctx_add_to_fsync_list(local->fd->inode, this,
++                                          anon_fd->inode);
++    }
++  }
++  UNLOCK(&frame->lock);
++  if (anon_fd)
++    fd_unref(anon_fd);
++  call_count = shard_call_count_return(frame);
++  if (call_count == 0) {
++    SHARD_UNSET_ROOT_FS_ID(frame, local);
++    if (local->op_ret < 0) {
++      shard_common_failure_unwind(fop, frame, local->op_ret, local->op_errno);
++    } else {
++      shard_get_delta_size_from_inode_ctx(local, local->fd->inode, this);
++      local->hole_size = 0;
++      if (xdata)
++        local->xattr_rsp = dict_ref(xdata);
++      shard_update_file_size(frame, this, local->fd, NULL,
++                             shard_common_inode_write_post_update_size_handler);
+     }
++  }
+-    return 0;
++  return 0;
+ }
+-shard_common_inode_write_post_mknod_handler(call_frame_t *frame, xlator_t *this)
+-    shard_local_t *local = NULL;
++int shard_common_inode_write_wind(call_frame_t *frame, xlator_t *this, fd_t *fd,
++                                  struct iovec *vec, int count,
++                                  off_t shard_offset, size_t size) {
++  shard_local_t *local = NULL;
+-    local = frame->local;
++  local = frame->local;
+-    if (local->op_ret < 0) {
+-        shard_common_failure_unwind(local->fop, frame, local->op_ret,
+-                                    local->op_errno);
+-        return 0;
+-    }
++  switch (local->fop) {
++  case GF_FOP_WRITE:
++    STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd,
++                      FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev, fd,
++                      vec, count, shard_offset, local->flags, local->iobref,
++                      local->xattr_req);
++    break;
++    STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd,
++                      FIRST_CHILD(this), FIRST_CHILD(this)->fops->fallocate, fd,
++                      local->flags, shard_offset, size, local->xattr_req);
++    break;
++    STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd,
++                      FIRST_CHILD(this), FIRST_CHILD(this)->fops->zerofill, fd,
++                      shard_offset, size, local->xattr_req);
++    break;
++  case GF_FOP_DISCARD:
++    STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd,
++                      FIRST_CHILD(this), FIRST_CHILD(this)->fops->discard, fd,
++                      shard_offset, size, local->xattr_req);
++    break;
++  default:
++    gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
++           "Invalid fop id = %d", local->fop);
++    break;
++  }
++  return 0;
++int shard_common_inode_write_do(call_frame_t *frame, xlator_t *this) {
++  int i = 0;
++  int count = 0;
++  int call_count = 0;
++  int last_block = 0;
++  uint32_t cur_block = 0;
++  fd_t *fd = NULL;
++  fd_t *anon_fd = NULL;
++  shard_local_t *local = NULL;
++  struct iovec *vec = NULL;
++  gf_boolean_t wind_failed = _gf_false;
++  gf_boolean_t odirect = _gf_false;
++  off_t orig_offset = 0;
++  off_t shard_offset = 0;
++  off_t vec_offset = 0;
++  size_t remaining_size = 0;
++  size_t shard_write_size = 0;
++  local = frame->local;
++  fd = local->fd;
++  orig_offset = local->offset;
++  remaining_size = local->total_size;
++  cur_block = local->first_block;
++  local->call_count = call_count = local->num_blocks;
++  last_block = local->last_block;
++  SHARD_SET_ROOT_FS_ID(frame, local);
++  if (dict_set_uint32(local->xattr_req, GLUSTERFS_WRITE_UPDATE_ATOMIC, 4)) {
++    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
++           "Failed to set " GLUSTERFS_WRITE_UPDATE_ATOMIC " into "
++           "dict: %s",
++           uuid_utoa(fd->inode->gfid));
++    local->op_ret = -1;
++    local->op_errno = ENOMEM;
++    local->call_count = 1;
++    shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, -1, ENOMEM,
++                                    NULL, NULL, NULL);
++    return 0;
++  }
+-    if (!local->eexist_count) {
+-        shard_common_inode_write_do(frame, this);
+-    } else {
+-        local->call_count = local->eexist_count;
+-        shard_common_lookup_shards(
+-            frame, this, local->loc.inode,
+-            shard_common_inode_write_post_lookup_shards_handler);
++  if ((fd->flags & O_DIRECT) && (local->fop == GF_FOP_WRITE))
++    odirect = _gf_true;
++  while (cur_block <= last_block) {
++    if (wind_failed) {
++      shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, -1, ENOMEM,
++                                      NULL, NULL, NULL);
++      goto next;
+     }
+-    return 0;
++    shard_offset = orig_offset % local->block_size;
++    shard_write_size = local->block_size - shard_offset;
++    if (shard_write_size > remaining_size)
++      shard_write_size = remaining_size;
+-shard_common_inode_write_post_resolve_handler(call_frame_t *frame,
+-                                              xlator_t *this)
+-    shard_local_t *local = NULL;
++    remaining_size -= shard_write_size;
+-    local = frame->local;
++    if (local->fop == GF_FOP_WRITE) {
++      count = iov_subset(local->vector, local->count, vec_offset,
++                         vec_offset + shard_write_size, NULL);
+-    if (local->op_ret < 0) {
+-        shard_common_failure_unwind(local->fop, frame, local->op_ret,
+-                                    local->op_errno);
+-        return 0;
++      vec = GF_CALLOC(count, sizeof(struct iovec), gf_shard_mt_iovec);
++      if (!vec) {
++        local->op_ret = -1;
++        local->op_errno = ENOMEM;
++        wind_failed = _gf_true;
++        GF_FREE(vec);
++        shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, -1,
++                                        ENOMEM, NULL, NULL, NULL);
++        goto next;
++      }
++      count = iov_subset(local->vector, local->count, vec_offset,
++                         vec_offset + shard_write_size, vec);
+     }
+-    if (local->call_count) {
+-        shard_common_lookup_shards(
+-            frame, this, local->resolver_base_inode,
+-            shard_common_inode_write_post_lookup_shards_handler);
++    if (cur_block == 0) {
++      anon_fd = fd_ref(fd);
+     } else {
+-        shard_common_inode_write_do(frame, this);
+-    }
++      anon_fd = fd_anonymous(local->inode_list[i]);
++      if (!anon_fd) {
++        local->op_ret = -1;
++        local->op_errno = ENOMEM;
++        wind_failed = _gf_true;
++        GF_FREE(vec);
++        shard_common_inode_write_do_cbk(frame, (void *)(long)anon_fd, this, -1,
++                                        ENOMEM, NULL, NULL, NULL);
++        goto next;
++      }
+-    return 0;
++      if (local->fop == GF_FOP_WRITE) {
++        if (odirect)
++          local->flags = O_DIRECT;
++        else
++          local->flags = GF_ANON_FD_FLAGS;
++      }
++    }
++    shard_common_inode_write_wind(frame, this, anon_fd, vec, count,
++                                  shard_offset, shard_write_size);
++    if (vec)
++      vec_offset += shard_write_size;
++    orig_offset += shard_write_size;
++    GF_FREE(vec);
++    vec = NULL;
++  next:
++    cur_block++;
++    i++;
++    call_count--;
++  }
++  return 0;
+ }
+-shard_common_inode_write_post_lookup_handler(call_frame_t *frame,
+-                                             xlator_t *this)
+-    shard_local_t *local = frame->local;
+-    shard_priv_t *priv = this->private;
+-    if (local->op_ret < 0) {
+-        shard_common_failure_unwind(local->fop, frame, local->op_ret,
+-                                    local->op_errno);
+-        return 0;
+-    }
+-    local->postbuf = local->prebuf;
+-    /*Adjust offset to EOF so that correct shard is chosen for append*/
+-    if (shard_is_appending_write(local))
+-        local->offset = local->prebuf.ia_size;
+-    local->first_block = get_lowest_block(local->offset, local->block_size);
+-    local->last_block = get_highest_block(local->offset, local->total_size,
+-                                          local->block_size);
+-    local->num_blocks = local->last_block - local->first_block + 1;
+-    local->inode_list = GF_CALLOC(local->num_blocks, sizeof(inode_t *),
+-                                  gf_shard_mt_inode_list);
+-    if (!local->inode_list) {
+-        shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
+-        return 0;
+-    }
++int shard_common_inode_write_post_mknod_handler(call_frame_t *frame,
++                                                xlator_t *this);
+-    gf_msg_trace(this->name, 0,
+-                 "%s: gfid=%s first_block=%" PRIu32
+-                 " "
+-                 "last_block=%" PRIu32 " num_blocks=%" PRIu32 " offset=%" PRId64
+-                 " total_size=%zu flags=%" PRId32 "",
+-                 gf_fop_list[local->fop],
+-                 uuid_utoa(local->resolver_base_inode->gfid),
+-                 local->first_block, local->last_block, local->num_blocks,
+-                 local->offset, local->total_size, local->flags);
++int shard_common_inode_write_post_lookup_shards_handler(call_frame_t *frame,
++                                                        xlator_t *this) {
++  shard_local_t *local = NULL;
+-    local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid);
++  local = frame->local;
+-    if (!local->dot_shard_loc.inode) {
+-        /*change handler*/
+-        shard_mkdir_internal_dir(frame, this,
+-                                 shard_common_inode_write_post_resolve_handler,
+-                                 SHARD_INTERNAL_DIR_DOT_SHARD);
+-    } else {
+-        /*change handler*/
+-        local->post_res_handler = shard_common_inode_write_post_resolve_handler;
+-        shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD);
+-    }
++  if (local->op_ret < 0) {
++    shard_common_failure_unwind(local->fop, frame, local->op_ret,
++                                local->op_errno);
+     return 0;
+-shard_mkdir_internal_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                             int32_t op_ret, int32_t op_errno, inode_t *inode,
+-                             struct iatt *buf, struct iatt *preparent,
+-                             struct iatt *postparent, dict_t *xdata)
+-    inode_t *link_inode = NULL;
+-    shard_local_t *local = NULL;
+-    shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie;
++  }
+-    local = frame->local;
++  if (local->create_count) {
++    shard_common_resume_mknod(frame, this,
++                              shard_common_inode_write_post_mknod_handler);
++  } else {
++    shard_common_inode_write_do(frame, this);
++  }
+-    SHARD_UNSET_ROOT_FS_ID(frame, local);
++  return 0;
+-    if (op_ret == -1) {
+-        if (op_errno != EEXIST) {
+-            local->op_ret = op_ret;
+-            local->op_errno = op_errno;
+-            goto unwind;
+-        } else {
+-            gf_msg_debug(this->name, 0,
+-                         "mkdir on %s failed "
+-                         "with EEXIST. Attempting lookup now",
+-                         shard_internal_dir_string(type));
+-            shard_lookup_internal_dir(frame, this, local->post_res_handler,
+-                                      type);
+-            return 0;
+-        }
+-    }
++int shard_common_inode_write_post_mknod_handler(call_frame_t *frame,
++                                                xlator_t *this) {
++  shard_local_t *local = NULL;
+-    link_inode = shard_link_internal_dir_inode(local, inode, buf, type);
+-    if (link_inode != inode) {
+-        shard_refresh_internal_dir(frame, this, type);
+-    } else {
+-        shard_inode_ctx_mark_dir_refreshed(link_inode, this);
+-        shard_common_resolve_shards(frame, this, local->post_res_handler);
+-    }
+-    return 0;
+-    shard_common_resolve_shards(frame, this, local->post_res_handler);
+-    return 0;
++  local = frame->local;
+-shard_mkdir_internal_dir(call_frame_t *frame, xlator_t *this,
+-                         shard_post_resolve_fop_handler_t handler,
+-                         shard_internal_dir_type_t type)
+-    int ret = -1;
+-    shard_local_t *local = NULL;
+-    shard_priv_t *priv = NULL;
+-    dict_t *xattr_req = NULL;
+-    uuid_t *gfid = NULL;
+-    loc_t *loc = NULL;
+-    gf_boolean_t free_gfid = _gf_true;
+-    local = frame->local;
+-    priv = this->private;
+-    local->post_res_handler = handler;
+-    gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t);
+-    if (!gfid)
+-        goto err;
+-    switch (type) {
+-            gf_uuid_copy(*gfid, priv->dot_shard_gfid);
+-            loc = &local->dot_shard_loc;
+-            break;
+-            gf_uuid_copy(*gfid, priv->dot_shard_rm_gfid);
+-            loc = &local->dot_shard_rm_loc;
+-            break;
+-        default:
+-            bzero(*gfid, sizeof(uuid_t));
+-            break;
+-    }
++  if (local->op_ret < 0) {
++    shard_common_failure_unwind(local->fop, frame, local->op_ret,
++                                local->op_errno);
++    return 0;
++  }
+-    xattr_req = dict_new();
+-    if (!xattr_req)
+-        goto err;
++  if (!local->eexist_count) {
++    shard_common_inode_write_do(frame, this);
++  } else {
++    local->call_count = local->eexist_count;
++    shard_common_lookup_shards(
++        frame, this, local->loc.inode,
++        shard_common_inode_write_post_lookup_shards_handler);
++  }
+-    ret = shard_init_internal_dir_loc(this, local, type);
+-    if (ret)
+-        goto err;
++  return 0;
+-    ret = dict_set_gfuuid(xattr_req, "gfid-req", *gfid, false);
+-    if (ret) {
+-        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
+-               "Failed to set gfid-req for %s",
+-               shard_internal_dir_string(type));
+-        goto err;
+-    } else {
+-        free_gfid = _gf_false;
+-    }
++int shard_common_inode_write_post_resolve_handler(call_frame_t *frame,
++                                                  xlator_t *this) {
++  shard_local_t *local = NULL;
+-    SHARD_SET_ROOT_FS_ID(frame, local);
++  local = frame->local;
+-    STACK_WIND_COOKIE(frame, shard_mkdir_internal_dir_cbk, (void *)(long)type,
+-                      FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir, loc,
+-                      0755, 0, xattr_req);
+-    dict_unref(xattr_req);
++  if (local->op_ret < 0) {
++    shard_common_failure_unwind(local->fop, frame, local->op_ret,
++                                local->op_errno);
+     return 0;
++  }
+-    if (xattr_req)
+-        dict_unref(xattr_req);
+-    local->op_ret = -1;
+-    local->op_errno = ENOMEM;
+-    if (free_gfid)
+-        GF_FREE(gfid);
+-    handler(frame, this);
+-    return 0;
++  if (local->call_count) {
++    shard_common_lookup_shards(
++        frame, this, local->resolver_base_inode,
++        shard_common_inode_write_post_lookup_shards_handler);
++  } else {
++    shard_common_inode_write_do(frame, this);
++  }
+-shard_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                int32_t op_ret, int32_t op_errno, dict_t *xdata)
+-    /* To-Do: Wind flush on all shards of the file */
+-    SHARD_STACK_UNWIND(flush, frame, op_ret, op_errno, xdata);
+-    return 0;
++  return 0;
+ }
+-shard_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+-    STACK_WIND(frame, shard_flush_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->flush, fd, xdata);
++int shard_common_inode_write_post_lookup_handler(call_frame_t *frame,
++                                                 xlator_t *this) {
++  shard_local_t *local = frame->local;
++  shard_priv_t *priv = this->private;
++  if (local->op_ret < 0) {
++    shard_common_failure_unwind(local->fop, frame, local->op_ret,
++                                local->op_errno);
+     return 0;
++  }
+-__shard_get_timestamps_from_inode_ctx(shard_local_t *local, inode_t *inode,
+-                                      xlator_t *this)
+-    int ret = -1;
+-    uint64_t ctx_uint = 0;
+-    shard_inode_ctx_t *ctx = NULL;
++  local->postbuf = local->prebuf;
+-    ret = __inode_ctx_get(inode, this, &ctx_uint);
+-    if (ret < 0)
+-        return ret;
++  /*Adjust offset to EOF so that correct shard is chosen for append*/
++  if (shard_is_appending_write(local))
++    local->offset = local->prebuf.ia_size;
+-    ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
++  local->first_block = get_lowest_block(local->offset, local->block_size);
++  local->last_block =
++      get_highest_block(local->offset, local->total_size, local->block_size);
++  local->num_blocks = local->last_block - local->first_block + 1;
++  local->inode_list =
++      GF_CALLOC(local->num_blocks, sizeof(inode_t *), gf_shard_mt_inode_list);
++  if (!local->inode_list) {
++    shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
++    return 0;
++  }
+-    local->postbuf.ia_ctime = ctx->stat.ia_ctime;
+-    local->postbuf.ia_ctime_nsec = ctx->stat.ia_ctime_nsec;
+-    local->postbuf.ia_atime = ctx->stat.ia_atime;
+-    local->postbuf.ia_atime_nsec = ctx->stat.ia_atime_nsec;
+-    local->postbuf.ia_mtime = ctx->stat.ia_mtime;
+-    local->postbuf.ia_mtime_nsec = ctx->stat.ia_mtime_nsec;
++  gf_msg_trace(
++      this->name, 0, "%s: gfid=%s first_block=%" PRIu32 " "
++                     "last_block=%" PRIu32 " num_blocks=%" PRIu32
++                     " offset=%" PRId64 " total_size=%zu flags=%" PRId32 "",
++      gf_fop_list[local->fop], uuid_utoa(local->resolver_base_inode->gfid),
++      local->first_block, local->last_block, local->num_blocks, local->offset,
++      local->total_size, local->flags);
+-    return 0;
++  local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid);
+-shard_get_timestamps_from_inode_ctx(shard_local_t *local, inode_t *inode,
+-                                    xlator_t *this)
+-    int ret = 0;
++  if (!local->dot_shard_loc.inode) {
++    /*change handler*/
++    shard_mkdir_internal_dir(frame, this,
++                             shard_common_inode_write_post_resolve_handler,
++                             SHARD_INTERNAL_DIR_DOT_SHARD);
++  } else {
++    /*change handler*/
++    local->post_res_handler = shard_common_inode_write_post_resolve_handler;
++    shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD);
++  }
++  return 0;
+-    LOCK(&inode->lock);
+-    {
+-        ret = __shard_get_timestamps_from_inode_ctx(local, inode, this);
+-    }
+-    UNLOCK(&inode->lock);
++int shard_mkdir_internal_dir_cbk(call_frame_t *frame, void *cookie,
++                                 xlator_t *this, int32_t op_ret,
++                                 int32_t op_errno, inode_t *inode,
++                                 struct iatt *buf, struct iatt *preparent,
++                                 struct iatt *postparent, dict_t *xdata) {
++  inode_t *link_inode = NULL;
++  shard_local_t *local = NULL;
++  shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie;
++  local = frame->local;
++  SHARD_UNSET_ROOT_FS_ID(frame, local);
++  if (op_ret == -1) {
++    if (op_errno != EEXIST) {
++      local->op_ret = op_ret;
++      local->op_errno = op_errno;
++      goto unwind;
++    } else {
++      gf_msg_debug(this->name, 0, "mkdir on %s failed "
++                                  "with EEXIST. Attempting lookup now",
++                   shard_internal_dir_string(type));
++      shard_lookup_internal_dir(frame, this, local->post_res_handler, type);
++      return 0;
++    }
++  }
++  link_inode = shard_link_internal_dir_inode(local, inode, buf, type);
++  if (link_inode != inode) {
++    shard_refresh_internal_dir(frame, this, type);
++  } else {
++    shard_inode_ctx_mark_dir_refreshed(link_inode, this);
++    shard_common_resolve_shards(frame, this, local->post_res_handler);
++  }
++  return 0;
++  shard_common_resolve_shards(frame, this, local->post_res_handler);
++  return 0;
++int shard_mkdir_internal_dir(call_frame_t *frame, xlator_t *this,
++                             shard_post_resolve_fop_handler_t handler,
++                             shard_internal_dir_type_t type) {
++  int ret = -1;
++  shard_local_t *local = NULL;
++  shard_priv_t *priv = NULL;
++  dict_t *xattr_req = NULL;
++  uuid_t *gfid = NULL;
++  loc_t *loc = NULL;
++  gf_boolean_t free_gfid = _gf_true;
++  local = frame->local;
++  priv = this->private;
++  local->post_res_handler = handler;
++  gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t);
++  if (!gfid)
++    goto err;
++  switch (type) {
++    gf_uuid_copy(*gfid, priv->dot_shard_gfid);
++    loc = &local->dot_shard_loc;
++    break;
++    gf_uuid_copy(*gfid, priv->dot_shard_rm_gfid);
++    loc = &local->dot_shard_rm_loc;
++    break;
++  default:
++    bzero(*gfid, sizeof(uuid_t));
++    break;
++  }
++  xattr_req = dict_new();
++  if (!xattr_req)
++    goto err;
++  ret = shard_init_internal_dir_loc(this, local, type);
++  if (ret)
++    goto err;
++  ret = dict_set_gfuuid(xattr_req, "gfid-req", *gfid, false);
++  if (ret) {
++    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
++           "Failed to set gfid-req for %s", shard_internal_dir_string(type));
++    goto err;
++  } else {
++    free_gfid = _gf_false;
++  }
++  SHARD_SET_ROOT_FS_ID(frame, local);
++  STACK_WIND_COOKIE(frame, shard_mkdir_internal_dir_cbk, (void *)(long)type,
++                    FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir, loc,
++                    0755, 0, xattr_req);
++  dict_unref(xattr_req);
++  return 0;
+-    return ret;
++  if (xattr_req)
++    dict_unref(xattr_req);
++  local->op_ret = -1;
++  local->op_errno = ENOMEM;
++  if (free_gfid)
++    GF_FREE(gfid);
++  handler(frame, this);
++  return 0;
+ }
+-shard_fsync_shards_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                       int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+-                       struct iatt *postbuf, dict_t *xdata)
+-    int call_count = 0;
+-    uint64_t fsync_count = 0;
+-    fd_t *anon_fd = cookie;
+-    shard_local_t *local = NULL;
+-    shard_inode_ctx_t *ctx = NULL;
+-    shard_inode_ctx_t *base_ictx = NULL;
+-    inode_t *base_inode = NULL;
+-    gf_boolean_t unref_shard_inode = _gf_false;
++int shard_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                    int32_t op_ret, int32_t op_errno, dict_t *xdata) {
++  /* To-Do: Wind flush on all shards of the file */
++  SHARD_STACK_UNWIND(flush, frame, op_ret, op_errno, xdata);
++  return 0;
+-    local = frame->local;
+-    base_inode = local->fd->inode;
++int shard_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) {
++  STACK_WIND(frame, shard_flush_cbk, FIRST_CHILD(this),
++             FIRST_CHILD(this)->fops->flush, fd, xdata);
++  return 0;
+-    if (local->op_ret < 0)
+-        goto out;
++int __shard_get_timestamps_from_inode_ctx(shard_local_t *local, inode_t *inode,
++                                          xlator_t *this) {
++  int ret = -1;
++  uint64_t ctx_uint = 0;
++  shard_inode_ctx_t *ctx = NULL;
+-    LOCK(&frame->lock);
+-    {
+-        if (op_ret < 0) {
+-            local->op_ret = op_ret;
+-            local->op_errno = op_errno;
+-            UNLOCK(&frame->lock);
+-            goto out;
+-        }
+-        shard_inode_ctx_set(local->fd->inode, this, postbuf, 0,
+-                            SHARD_MASK_TIMES);
+-    }
+-    UNLOCK(&frame->lock);
+-    fd_ctx_get(anon_fd, this, &fsync_count);
+-    if (anon_fd && (base_inode != anon_fd->inode)) {
+-        LOCK(&base_inode->lock);
+-        LOCK(&anon_fd->inode->lock);
+-        {
+-            __shard_inode_ctx_get(anon_fd->inode, this, &ctx);
+-            __shard_inode_ctx_get(base_inode, this, &base_ictx);
+-            if (op_ret == 0)
+-                ctx->fsync_needed -= fsync_count;
+-            GF_ASSERT(ctx->fsync_needed >= 0);
+-            if (ctx->fsync_needed != 0) {
+-                list_add_tail(&ctx->to_fsync_list, &base_ictx->to_fsync_list);
+-                base_ictx->fsync_count++;
+-            } else {
+-                unref_shard_inode = _gf_true;
+-            }
+-        }
+-        UNLOCK(&anon_fd->inode->lock);
+-        UNLOCK(&base_inode->lock);
+-    }
++  ret = __inode_ctx_get(inode, this, &ctx_uint);
++  if (ret < 0)
++    return ret;
+-    if (unref_shard_inode)
+-        inode_unref(anon_fd->inode);
+-    if (anon_fd)
+-        fd_unref(anon_fd);
++  ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
+-    call_count = shard_call_count_return(frame);
+-    if (call_count != 0)
+-        return 0;
++  local->postbuf.ia_ctime = ctx->stat.ia_ctime;
++  local->postbuf.ia_ctime_nsec = ctx->stat.ia_ctime_nsec;
++  local->postbuf.ia_atime = ctx->stat.ia_atime;
++  local->postbuf.ia_atime_nsec = ctx->stat.ia_atime_nsec;
++  local->postbuf.ia_mtime = ctx->stat.ia_mtime;
++  local->postbuf.ia_mtime_nsec = ctx->stat.ia_mtime_nsec;
+-    if (local->op_ret < 0) {
+-        shard_common_failure_unwind(GF_FOP_FSYNC, frame, local->op_ret,
+-                                    local->op_errno);
+-    } else {
+-        shard_get_timestamps_from_inode_ctx(local, base_inode, this);
+-        SHARD_STACK_UNWIND(fsync, frame, local->op_ret, local->op_errno,
+-                           &local->prebuf, &local->postbuf, local->xattr_rsp);
+-    }
+-    return 0;
++  return 0;
+ }
+-shard_post_lookup_fsync_handler(call_frame_t *frame, xlator_t *this)
+-    int ret = 0;
+-    int call_count = 0;
+-    int fsync_count = 0;
+-    fd_t *anon_fd = NULL;
+-    inode_t *base_inode = NULL;
+-    shard_local_t *local = NULL;
+-    shard_inode_ctx_t *ctx = NULL;
+-    shard_inode_ctx_t *iter = NULL;
+-    struct list_head copy = {
+-        0,
+-    };
+-    shard_inode_ctx_t *tmp = NULL;
++int shard_get_timestamps_from_inode_ctx(shard_local_t *local, inode_t *inode,
++                                        xlator_t *this) {
++  int ret = 0;
+-    local = frame->local;
+-    base_inode = local->fd->inode;
+-    local->postbuf = local->prebuf;
+-    INIT_LIST_HEAD(&copy);
++  LOCK(&inode->lock);
++  { ret = __shard_get_timestamps_from_inode_ctx(local, inode, this); }
++  UNLOCK(&inode->lock);
+-    if (local->op_ret < 0) {
+-        shard_common_failure_unwind(GF_FOP_FSYNC, frame, local->op_ret,
+-                                    local->op_errno);
+-        return 0;
+-    }
++  return ret;
++int shard_fsync_shards_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                           int32_t op_ret, int32_t op_errno,
++                           struct iatt *prebuf, struct iatt *postbuf,
++                           dict_t *xdata) {
++  int call_count = 0;
++  uint64_t fsync_count = 0;
++  fd_t *anon_fd = cookie;
++  shard_local_t *local = NULL;
++  shard_inode_ctx_t *ctx = NULL;
++  shard_inode_ctx_t *base_ictx = NULL;
++  inode_t *base_inode = NULL;
++  gf_boolean_t unref_shard_inode = _gf_false;
++  local = frame->local;
++  base_inode = local->fd->inode;
++  if (local->op_ret < 0)
++    goto out;
++  LOCK(&frame->lock);
++  {
++    if (op_ret < 0) {
++      local->op_ret = op_ret;
++      local->op_errno = op_errno;
++      UNLOCK(&frame->lock);
++      goto out;
++    }
++    shard_inode_ctx_set(local->fd->inode, this, postbuf, 0, SHARD_MASK_TIMES);
++  }
++  UNLOCK(&frame->lock);
++  fd_ctx_get(anon_fd, this, &fsync_count);
++  if (anon_fd && (base_inode != anon_fd->inode)) {
+     LOCK(&base_inode->lock);
++    LOCK(&anon_fd->inode->lock);
+     {
+-        __shard_inode_ctx_get(base_inode, this, &ctx);
+-        list_splice_init(&ctx->to_fsync_list, &copy);
+-        call_count = ctx->fsync_count;
+-        ctx->fsync_count = 0;
+-    }
++      __shard_inode_ctx_get(anon_fd->inode, this, &ctx);
++      __shard_inode_ctx_get(base_inode, this, &base_ictx);
++      if (op_ret == 0)
++        ctx->fsync_needed -= fsync_count;
++      GF_ASSERT(ctx->fsync_needed >= 0);
++      if (ctx->fsync_needed != 0) {
++        list_add_tail(&ctx->to_fsync_list, &base_ictx->to_fsync_list);
++        base_ictx->fsync_count++;
++      } else {
++        unref_shard_inode = _gf_true;
++      }
++    }
++    UNLOCK(&anon_fd->inode->lock);
+     UNLOCK(&base_inode->lock);
++  }
++  if (unref_shard_inode)
++    inode_unref(anon_fd->inode);
++  if (anon_fd)
++    fd_unref(anon_fd);
++  call_count = shard_call_count_return(frame);
++  if (call_count != 0)
++    return 0;
+-    local->call_count = ++call_count;
++  if (local->op_ret < 0) {
++    shard_common_failure_unwind(GF_FOP_FSYNC, frame, local->op_ret,
++                                local->op_errno);
++  } else {
++    shard_get_timestamps_from_inode_ctx(local, base_inode, this);
++    SHARD_STACK_UNWIND(fsync, frame, local->op_ret, local->op_errno,
++                       &local->prebuf, &local->postbuf, local->xattr_rsp);
++  }
++  return 0;
++int shard_post_lookup_fsync_handler(call_frame_t *frame, xlator_t *this) {
++  int ret = 0;
++  int call_count = 0;
++  int fsync_count = 0;
++  fd_t *anon_fd = NULL;
++  inode_t *base_inode = NULL;
++  shard_local_t *local = NULL;
++  shard_inode_ctx_t *ctx = NULL;
++  shard_inode_ctx_t *iter = NULL;
++  struct list_head copy = {
++      0,
++  };
++  shard_inode_ctx_t *tmp = NULL;
++  local = frame->local;
++  base_inode = local->fd->inode;
++  local->postbuf = local->prebuf;
++  INIT_LIST_HEAD(&copy);
++  if (local->op_ret < 0) {
++    shard_common_failure_unwind(GF_FOP_FSYNC, frame, local->op_ret,
++                                local->op_errno);
++    return 0;
++  }
++  LOCK(&base_inode->lock);
++  {
++    __shard_inode_ctx_get(base_inode, this, &ctx);
++    list_splice_init(&ctx->to_fsync_list, &copy);
++    call_count = ctx->fsync_count;
++    ctx->fsync_count = 0;
++  }
++  UNLOCK(&base_inode->lock);
++  local->call_count = ++call_count;
++  /* Send fsync() on the base shard first */
++  anon_fd = fd_ref(local->fd);
++  STACK_WIND_COOKIE(frame, shard_fsync_shards_cbk, anon_fd, FIRST_CHILD(this),
++                    FIRST_CHILD(this)->fops->fsync, anon_fd, local->datasync,
++                    local->xattr_req);
++  call_count--;
++  anon_fd = NULL;
++  list_for_each_entry_safe(iter, tmp, &copy, to_fsync_list) {
++    list_del_init(&iter->to_fsync_list);
++    fsync_count = 0;
++    shard_inode_ctx_get_fsync_count(iter->inode, this, &fsync_count);
++    GF_ASSERT(fsync_count > 0);
++    anon_fd = fd_anonymous(iter->inode);
++    if (!anon_fd) {
++      local->op_ret = -1;
++      local->op_errno = ENOMEM;
++             "Failed to create "
++             "anon fd to fsync shard");
++      shard_fsync_shards_cbk(frame, (void *)(long)anon_fd, this, -1, ENOMEM,
++                             NULL, NULL, NULL);
++      continue;
++    }
+-    /* Send fsync() on the base shard first */
+-    anon_fd = fd_ref(local->fd);
++    ret = fd_ctx_set(anon_fd, this, fsync_count);
++    if (ret) {
++      gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_FD_CTX_SET_FAILED,
++             "Failed to set fd "
++             "ctx for shard inode gfid=%s",
++             uuid_utoa(iter->inode->gfid));
++      local->op_ret = -1;
++      local->op_errno = ENOMEM;
++      shard_fsync_shards_cbk(frame, (void *)(long)anon_fd, this, -1, ENOMEM,
++                             NULL, NULL, NULL);
++      continue;
++    }
+     STACK_WIND_COOKIE(frame, shard_fsync_shards_cbk, anon_fd, FIRST_CHILD(this),
+                       FIRST_CHILD(this)->fops->fsync, anon_fd, local->datasync,
+                       local->xattr_req);
+     call_count--;
+-    anon_fd = NULL;
+-    list_for_each_entry_safe(iter, tmp, &copy, to_fsync_list)
+-    {
+-        list_del_init(&iter->to_fsync_list);
+-        fsync_count = 0;
+-        shard_inode_ctx_get_fsync_count(iter->inode, this, &fsync_count);
+-        GF_ASSERT(fsync_count > 0);
+-        anon_fd = fd_anonymous(iter->inode);
+-        if (!anon_fd) {
+-            local->op_ret = -1;
+-            local->op_errno = ENOMEM;
+-            gf_msg(this->name, GF_LOG_WARNING, ENOMEM,
+-                   SHARD_MSG_MEMALLOC_FAILED,
+-                   "Failed to create "
+-                   "anon fd to fsync shard");
+-            shard_fsync_shards_cbk(frame, (void *)(long)anon_fd, this, -1,
+-                                   ENOMEM, NULL, NULL, NULL);
+-            continue;
+-        }
+-        ret = fd_ctx_set(anon_fd, this, fsync_count);
+-        if (ret) {
+-            gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_FD_CTX_SET_FAILED,
+-                   "Failed to set fd "
+-                   "ctx for shard inode gfid=%s",
+-                   uuid_utoa(iter->inode->gfid));
+-            local->op_ret = -1;
+-            local->op_errno = ENOMEM;
+-            shard_fsync_shards_cbk(frame, (void *)(long)anon_fd, this, -1,
+-                                   ENOMEM, NULL, NULL, NULL);
+-            continue;
+-        }
+-        STACK_WIND_COOKIE(frame, shard_fsync_shards_cbk, anon_fd,
+-                          FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsync,
+-                          anon_fd, local->datasync, local->xattr_req);
+-        call_count--;
+-    }
++  }
+-    return 0;
++  return 0;
+ }
+-shard_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+-            dict_t *xdata)
+-    int ret = 0;
+-    uint64_t block_size = 0;
+-    shard_local_t *local = NULL;
++int shard_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
++                dict_t *xdata) {
++  int ret = 0;
++  uint64_t block_size = 0;
++  shard_local_t *local = NULL;
+-    ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size);
+-    if (ret) {
+-        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+-               "Failed to get block "
+-               "size for %s from its inode ctx",
+-               uuid_utoa(fd->inode->gfid));
+-        goto err;
+-    }
++  ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size);
++  if (ret) {
++    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
++           "Failed to get block "
++           "size for %s from its inode ctx",
++           uuid_utoa(fd->inode->gfid));
++    goto err;
++  }
+-    if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+-        STACK_WIND(frame, default_fsync_cbk, FIRST_CHILD(this),
+-                   FIRST_CHILD(this)->fops->fsync, fd, datasync, xdata);
+-        return 0;
+-    }
++  if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
++    STACK_WIND(frame, default_fsync_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->fsync, fd, datasync, xdata);
++    return 0;
++  }
+-    if (!this->itable)
+-        this->itable = fd->inode->table;
++  if (!this->itable)
++    this->itable = fd->inode->table;
+-    local = mem_get0(this->local_pool);
+-    if (!local)
+-        goto err;
++  local = mem_get0(this->local_pool);
++  if (!local)
++    goto err;
+-    frame->local = local;
++  frame->local = local;
+-    local->fd = fd_ref(fd);
+-    local->fop = GF_FOP_FSYNC;
+-    local->datasync = datasync;
+-    local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+-    if (!local->xattr_req)
+-        goto err;
++  local->fd = fd_ref(fd);
++  local->fop = GF_FOP_FSYNC;
++  local->datasync = datasync;
++  local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
++  if (!local->xattr_req)
++    goto err;
+-    local->loc.inode = inode_ref(fd->inode);
+-    gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
++  local->loc.inode = inode_ref(fd->inode);
++  gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
+-    shard_lookup_base_file(frame, this, &local->loc,
+-                           shard_post_lookup_fsync_handler);
+-    return 0;
++  shard_lookup_base_file(frame, this, &local->loc,
++                         shard_post_lookup_fsync_handler);
++  return 0;
+ err:
+-    shard_common_failure_unwind(GF_FOP_FSYNC, frame, -1, ENOMEM);
+-    return 0;
++  shard_common_failure_unwind(GF_FOP_FSYNC, frame, -1, ENOMEM);
++  return 0;
+ }
+-shard_readdir_past_dot_shard_cbk(call_frame_t *frame, void *cookie,
+-                                 xlator_t *this, int32_t op_ret,
+-                                 int32_t op_errno, gf_dirent_t *orig_entries,
+-                                 dict_t *xdata)
+-    gf_dirent_t *entry = NULL;
+-    gf_dirent_t *tmp = NULL;
+-    shard_local_t *local = NULL;
++int shard_readdir_past_dot_shard_cbk(call_frame_t *frame, void *cookie,
++                                     xlator_t *this, int32_t op_ret,
++                                     int32_t op_errno,
++                                     gf_dirent_t *orig_entries, dict_t *xdata) {
++  gf_dirent_t *entry = NULL;
++  gf_dirent_t *tmp = NULL;
++  shard_local_t *local = NULL;
+-    local = frame->local;
++  local = frame->local;
+-    if (op_ret < 0)
+-        goto unwind;
++  if (op_ret < 0)
++    goto unwind;
+-    list_for_each_entry_safe(entry, tmp, (&orig_entries->list), list)
+-    {
+-        list_del_init(&entry->list);
+-        list_add_tail(&entry->list, &local->entries_head.list);
++  list_for_each_entry_safe(entry, tmp, (&orig_entries->list), list) {
++    list_del_init(&entry->list);
++    list_add_tail(&entry->list, &local->entries_head.list);
+-        if (!entry->dict)
+-            continue;
++    if (!entry->dict)
++      continue;
+-        if (IA_ISDIR(entry->d_stat.ia_type))
+-            continue;
++    if (IA_ISDIR(entry->d_stat.ia_type))
++      continue;
+-        if (dict_get(entry->dict, GF_XATTR_SHARD_FILE_SIZE))
+-            shard_modify_size_and_block_count(&entry->d_stat, entry->dict);
+-        if (!entry->inode)
+-            continue;
++    if (dict_get(entry->dict, GF_XATTR_SHARD_FILE_SIZE))
++      shard_modify_size_and_block_count(&entry->d_stat, entry->dict);
++    if (!entry->inode)
++      continue;
+-        shard_inode_ctx_update(entry->inode, this, entry->dict, &entry->d_stat);
+-    }
+-    local->op_ret += op_ret;
++    shard_inode_ctx_update(entry->inode, this, entry->dict, &entry->d_stat);
++  }
++  local->op_ret += op_ret;
+ unwind:
+-    if (local->fop == GF_FOP_READDIR)
+-        SHARD_STACK_UNWIND(readdir, frame, local->op_ret, local->op_errno,
+-                           &local->entries_head, xdata);
+-    else
+-        SHARD_STACK_UNWIND(readdirp, frame, op_ret, op_errno,
+-                           &local->entries_head, xdata);
+-    return 0;
++  if (local->fop == GF_FOP_READDIR)
++    SHARD_STACK_UNWIND(readdir, frame, local->op_ret, local->op_errno,
++                       &local->entries_head, xdata);
++  else
++    SHARD_STACK_UNWIND(readdirp, frame, op_ret, op_errno, &local->entries_head,
++                       xdata);
++  return 0;
+ }
+-shard_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                  int32_t op_ret, int32_t op_errno, gf_dirent_t *orig_entries,
+-                  dict_t *xdata)
+-    fd_t *fd = NULL;
+-    gf_dirent_t *entry = NULL;
+-    gf_dirent_t *tmp = NULL;
+-    shard_local_t *local = NULL;
+-    gf_boolean_t last_entry = _gf_false;
++int32_t shard_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                          int32_t op_ret, int32_t op_errno,
++                          gf_dirent_t *orig_entries, dict_t *xdata) {
++  fd_t *fd = NULL;
++  gf_dirent_t *entry = NULL;
++  gf_dirent_t *tmp = NULL;
++  shard_local_t *local = NULL;
++  gf_boolean_t last_entry = _gf_false;
+-    local = frame->local;
+-    fd = local->fd;
++  local = frame->local;
++  fd = local->fd;
+-    if (op_ret < 0)
+-        goto unwind;
++  if (op_ret < 0)
++    goto unwind;
+-    list_for_each_entry_safe(entry, tmp, (&orig_entries->list), list)
+-    {
+-        if (last_entry)
+-            last_entry = _gf_false;
+-        if (__is_root_gfid(fd->inode->gfid) &&
+-            !(strcmp(entry->d_name, GF_SHARD_DIR))) {
+-            local->offset = entry->d_off;
+-            op_ret--;
+-            last_entry = _gf_true;
+-            continue;
+-        }
++  list_for_each_entry_safe(entry, tmp, (&orig_entries->list), list) {
++    if (last_entry)
++      last_entry = _gf_false;
+-        list_del_init(&entry->list);
+-        list_add_tail(&entry->list, &local->entries_head.list);
++    if (__is_root_gfid(fd->inode->gfid) &&
++        !(strcmp(entry->d_name, GF_SHARD_DIR))) {
++      local->offset = entry->d_off;
++      op_ret--;
++      last_entry = _gf_true;
++      continue;
++    }
+-        if (!entry->dict)
+-            continue;
++    list_del_init(&entry->list);
++    list_add_tail(&entry->list, &local->entries_head.list);
+-        if (IA_ISDIR(entry->d_stat.ia_type))
+-            continue;
++    if (!entry->dict)
++      continue;
+-        if (dict_get(entry->dict, GF_XATTR_SHARD_FILE_SIZE) &&
+-            frame->root->pid != GF_CLIENT_PID_GSYNCD)
+-            shard_modify_size_and_block_count(&entry->d_stat, entry->dict);
++    if (IA_ISDIR(entry->d_stat.ia_type))
++      continue;
+-        if (!entry->inode)
+-            continue;
++    if (dict_get(entry->dict, GF_XATTR_SHARD_FILE_SIZE) &&
++        frame->root->pid != GF_CLIENT_PID_GSYNCD)
++      shard_modify_size_and_block_count(&entry->d_stat, entry->dict);
+-        shard_inode_ctx_update(entry->inode, this, entry->dict, &entry->d_stat);
+-    }
++    if (!entry->inode)
++      continue;
+-    local->op_ret = op_ret;
++    shard_inode_ctx_update(entry->inode, this, entry->dict, &entry->d_stat);
++  }
+-    if (last_entry) {
+-        if (local->fop == GF_FOP_READDIR)
+-            STACK_WIND(frame, shard_readdir_past_dot_shard_cbk,
+-                       FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdir,
+-                       local->fd, local->readdir_size, local->offset,
+-                       local->xattr_req);
+-        else
+-            STACK_WIND(frame, shard_readdir_past_dot_shard_cbk,
+-                       FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdirp,
+-                       local->fd, local->readdir_size, local->offset,
+-                       local->xattr_req);
+-        return 0;
+-    }
++  local->op_ret = op_ret;
++  if (last_entry) {
+     if (local->fop == GF_FOP_READDIR)
+-        SHARD_STACK_UNWIND(readdir, frame, op_ret, op_errno,
+-                           &local->entries_head, xdata);
++      STACK_WIND(frame, shard_readdir_past_dot_shard_cbk, FIRST_CHILD(this),
++                 FIRST_CHILD(this)->fops->readdir, local->fd,
++                 local->readdir_size, local->offset, local->xattr_req);
+     else
+-        SHARD_STACK_UNWIND(readdirp, frame, op_ret, op_errno,
+-                           &local->entries_head, xdata);
++      STACK_WIND(frame, shard_readdir_past_dot_shard_cbk, FIRST_CHILD(this),
++                 FIRST_CHILD(this)->fops->readdirp, local->fd,
++                 local->readdir_size, local->offset, local->xattr_req);
+     return 0;
++  }
+-shard_readdir_do(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+-                 off_t offset, int whichop, dict_t *xdata)
+-    int ret = 0;
+-    shard_local_t *local = NULL;
+-    local = mem_get0(this->local_pool);
+-    if (!local) {
+-        goto err;
++  if (local->fop == GF_FOP_READDIR)
++    SHARD_STACK_UNWIND(readdir, frame, op_ret, op_errno, &local->entries_head,
++                       xdata);
++  else
++    SHARD_STACK_UNWIND(readdirp, frame, op_ret, op_errno, &local->entries_head,
++                       xdata);
++  return 0;
++int shard_readdir_do(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
++                     off_t offset, int whichop, dict_t *xdata) {
++  int ret = 0;
++  shard_local_t *local = NULL;
++  local = mem_get0(this->local_pool);
++  if (!local) {
++    goto err;
++  }
++  frame->local = local;
++  local->fd = fd_ref(fd);
++  local->fop = whichop;
++  local->readdir_size = size;
++  INIT_LIST_HEAD(&local->entries_head.list);
++  local->list_inited = _gf_true;
++  if (whichop == GF_FOP_READDIR) {
++    STACK_WIND(frame, shard_readdir_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->readdir, fd, size, offset, xdata);
++  } else {
++    local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
++    SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid,
++                                    local, err);
++    ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0);
++    if (ret) {
++      gf_log(this->name, GF_LOG_WARNING,
++             "Failed to set "
++             "dict value: key:%s, directory gfid=%s",
++             GF_XATTR_SHARD_BLOCK_SIZE, uuid_utoa(fd->inode->gfid));
++      goto err;
+     }
+-    frame->local = local;
+-    local->fd = fd_ref(fd);
+-    local->fop = whichop;
+-    local->readdir_size = size;
+-    INIT_LIST_HEAD(&local->entries_head.list);
+-    local->list_inited = _gf_true;
+-    if (whichop == GF_FOP_READDIR) {
+-        STACK_WIND(frame, shard_readdir_cbk, FIRST_CHILD(this),
+-                   FIRST_CHILD(this)->fops->readdir, fd, size, offset, xdata);
+-    } else {
+-        local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+-        SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid,
+-                                        local, err);
+-        ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0);
+-        if (ret) {
+-            gf_log(this->name, GF_LOG_WARNING,
+-                   "Failed to set "
+-                   "dict value: key:%s, directory gfid=%s",
+-                   GF_XATTR_SHARD_BLOCK_SIZE, uuid_utoa(fd->inode->gfid));
+-            goto err;
+-        }
+-        STACK_WIND(frame, shard_readdir_cbk, FIRST_CHILD(this),
+-                   FIRST_CHILD(this)->fops->readdirp, fd, size, offset,
+-                   local->xattr_req);
+-    }
++    STACK_WIND(frame, shard_readdir_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->readdirp, fd, size, offset,
++               local->xattr_req);
++  }
+-    return 0;
++  return 0;
+ err:
+-    STACK_UNWIND_STRICT(readdir, frame, -1, ENOMEM, NULL, NULL);
+-    return 0;
++  STACK_UNWIND_STRICT(readdir, frame, -1, ENOMEM, NULL, NULL);
++  return 0;
+ }
+-shard_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+-              off_t offset, dict_t *xdata)
+-    shard_readdir_do(frame, this, fd, size, offset, GF_FOP_READDIR, xdata);
+-    return 0;
++int32_t shard_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd,
++                      size_t size, off_t offset, dict_t *xdata) {
++  shard_readdir_do(frame, this, fd, size, offset, GF_FOP_READDIR, xdata);
++  return 0;
+ }
+-shard_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+-               off_t offset, dict_t *xdata)
+-    shard_readdir_do(frame, this, fd, size, offset, GF_FOP_READDIRP, xdata);
+-    return 0;
++int32_t shard_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd,
++                       size_t size, off_t offset, dict_t *xdata) {
++  shard_readdir_do(frame, this, fd, size, offset, GF_FOP_READDIRP, xdata);
++  return 0;
+ }
+-shard_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+-                  const char *name, dict_t *xdata)
+-    int op_errno = EINVAL;
++int32_t shard_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
++                          const char *name, dict_t *xdata) {
++  int op_errno = EINVAL;
+-    if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
+-        GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, out);
+-    }
++  if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
++    GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, out);
++  }
+-    if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) {
+-        dict_del(xdata, GF_XATTR_SHARD_BLOCK_SIZE);
+-        dict_del(xdata, GF_XATTR_SHARD_FILE_SIZE);
+-    }
++  if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) {
++    dict_del(xdata, GF_XATTR_SHARD_BLOCK_SIZE);
++    dict_del(xdata, GF_XATTR_SHARD_FILE_SIZE);
++  }
+-    STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+-                    FIRST_CHILD(this)->fops->removexattr, loc, name, xdata);
+-    return 0;
++                  FIRST_CHILD(this)->fops->removexattr, loc, name, xdata);
++  return 0;
+ out:
+-    shard_common_failure_unwind(GF_FOP_REMOVEXATTR, frame, -1, op_errno);
+-    return 0;
++  shard_common_failure_unwind(GF_FOP_REMOVEXATTR, frame, -1, op_errno);
++  return 0;
+ }
+-shard_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+-                   const char *name, dict_t *xdata)
+-    int op_errno = EINVAL;
++int32_t shard_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
++                           const char *name, dict_t *xdata) {
++  int op_errno = EINVAL;
+-    if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
+-        GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, out);
+-    }
++  if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
++    GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, out);
++  }
+-    if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) {
+-        dict_del(xdata, GF_XATTR_SHARD_BLOCK_SIZE);
+-        dict_del(xdata, GF_XATTR_SHARD_FILE_SIZE);
+-    }
++  if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) {
++    dict_del(xdata, GF_XATTR_SHARD_BLOCK_SIZE);
++    dict_del(xdata, GF_XATTR_SHARD_FILE_SIZE);
++  }
+-    STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+-                    FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata);
+-    return 0;
++                  FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata);
++  return 0;
+ out:
+-    shard_common_failure_unwind(GF_FOP_FREMOVEXATTR, frame, -1, op_errno);
+-    return 0;
++  shard_common_failure_unwind(GF_FOP_FREMOVEXATTR, frame, -1, op_errno);
++  return 0;
+ }
+-shard_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                    int32_t op_ret, int32_t op_errno, dict_t *dict,
+-                    dict_t *xdata)
+-    if (op_ret < 0)
+-        goto unwind;
++int32_t shard_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                            int32_t op_ret, int32_t op_errno, dict_t *dict,
++                            dict_t *xdata) {
++  if (op_ret < 0)
++    goto unwind;
+-    if (dict && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) {
+-        dict_del(dict, GF_XATTR_SHARD_BLOCK_SIZE);
+-        dict_del(dict, GF_XATTR_SHARD_FILE_SIZE);
+-    }
++  if (dict && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) {
++    dict_del(dict, GF_XATTR_SHARD_BLOCK_SIZE);
++    dict_del(dict, GF_XATTR_SHARD_FILE_SIZE);
++  }
+ unwind:
+-    SHARD_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, dict, xdata);
+-    return 0;
++  SHARD_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, dict, xdata);
++  return 0;
+ }
+-shard_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
+-                dict_t *xdata)
+-    int op_errno = EINVAL;
++int32_t shard_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
++                        const char *name, dict_t *xdata) {
++  int op_errno = EINVAL;
+-    if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) && (name) &&
+-        (!strncmp(name, SHARD_XATTR_PREFIX, SLEN(SHARD_XATTR_PREFIX)))) {
+-        op_errno = ENODATA;
+-        goto out;
+-    }
++  if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) && (name) &&
++      (!strncmp(name, SHARD_XATTR_PREFIX, SLEN(SHARD_XATTR_PREFIX)))) {
++    op_errno = ENODATA;
++    goto out;
++  }
+-    STACK_WIND(frame, shard_fgetxattr_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata);
+-    return 0;
++  STACK_WIND(frame, shard_fgetxattr_cbk, FIRST_CHILD(this),
++             FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata);
++  return 0;
+ out:
+-    shard_common_failure_unwind(GF_FOP_FGETXATTR, frame, -1, op_errno);
+-    return 0;
++  shard_common_failure_unwind(GF_FOP_FGETXATTR, frame, -1, op_errno);
++  return 0;
+ }
+-shard_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                   int32_t op_ret, int32_t op_errno, dict_t *dict,
+-                   dict_t *xdata)
+-    if (op_ret < 0)
+-        goto unwind;
++int32_t shard_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                           int32_t op_ret, int32_t op_errno, dict_t *dict,
++                           dict_t *xdata) {
++  if (op_ret < 0)
++    goto unwind;
+-    if (dict && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) {
+-        dict_del(dict, GF_XATTR_SHARD_BLOCK_SIZE);
+-        dict_del(dict, GF_XATTR_SHARD_FILE_SIZE);
+-    }
++  if (dict && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) {
++    dict_del(dict, GF_XATTR_SHARD_BLOCK_SIZE);
++    dict_del(dict, GF_XATTR_SHARD_FILE_SIZE);
++  }
+ unwind:
+-    SHARD_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, xdata);
+-    return 0;
++  SHARD_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, xdata);
++  return 0;
+ }
+-shard_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+-               const char *name, dict_t *xdata)
+-    int op_errno = EINVAL;
++int32_t shard_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
++                       const char *name, dict_t *xdata) {
++  int op_errno = EINVAL;
+-    if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) && (name) &&
+-        (!strncmp(name, SHARD_XATTR_PREFIX, sizeof(SHARD_XATTR_PREFIX) - 1))) {
+-        op_errno = ENODATA;
+-        goto out;
+-    }
++  if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) && (name) &&
++      (!strncmp(name, SHARD_XATTR_PREFIX, sizeof(SHARD_XATTR_PREFIX) - 1))) {
++    op_errno = ENODATA;
++    goto out;
++  }
+-    STACK_WIND(frame, shard_getxattr_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
+-    return 0;
++  STACK_WIND(frame, shard_getxattr_cbk, FIRST_CHILD(this),
++             FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
++  return 0;
+ out:
+-    shard_common_failure_unwind(GF_FOP_GETXATTR, frame, -1, op_errno);
+-    return 0;
++  shard_common_failure_unwind(GF_FOP_GETXATTR, frame, -1, op_errno);
++  return 0;
+ }
+-shard_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+-                int32_t flags, dict_t *xdata)
+-    int op_errno = EINVAL;
++int32_t shard_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
++                        dict_t *dict, int32_t flags, dict_t *xdata) {
++  int op_errno = EINVAL;
+-    if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
+-        GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, out);
+-    }
++  if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
++    GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, out);
++  }
+-    STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+-                    FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
+-    return 0;
++  STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsetxattr,
++                  fd, dict, flags, xdata);
++  return 0;
+ out:
+-    shard_common_failure_unwind(GF_FOP_FSETXATTR, frame, -1, op_errno);
+-    return 0;
++  shard_common_failure_unwind(GF_FOP_FSETXATTR, frame, -1, op_errno);
++  return 0;
+ }
+-shard_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+-               int32_t flags, dict_t *xdata)
+-    int op_errno = EINVAL;
++int32_t shard_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
++                       dict_t *dict, int32_t flags, dict_t *xdata) {
++  int op_errno = EINVAL;
+-    if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
+-        GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, out);
+-    }
++  if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
++    GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, out);
++  }
+-    STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr,
+-                    loc, dict, flags, xdata);
+-    return 0;
++  STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr,
++                  loc, dict, flags, xdata);
++  return 0;
+ out:
+-    shard_common_failure_unwind(GF_FOP_SETXATTR, frame, -1, op_errno);
+-    return 0;
++  shard_common_failure_unwind(GF_FOP_SETXATTR, frame, -1, op_errno);
++  return 0;
+ }
+-shard_post_setattr_handler(call_frame_t *frame, xlator_t *this)
+-    shard_local_t *local = NULL;
+-    local = frame->local;
+-    if (local->fop == GF_FOP_SETATTR) {
+-        if (local->op_ret >= 0)
+-            shard_inode_ctx_set(local->loc.inode, this, &local->postbuf, 0,
+-                                SHARD_LOOKUP_MASK);
+-        SHARD_STACK_UNWIND(setattr, frame, local->op_ret, local->op_errno,
+-                           &local->prebuf, &local->postbuf, local->xattr_rsp);
+-    } else if (local->fop == GF_FOP_FSETATTR) {
+-        if (local->op_ret >= 0)
+-            shard_inode_ctx_set(local->fd->inode, this, &local->postbuf, 0,
+-                                SHARD_LOOKUP_MASK);
+-        SHARD_STACK_UNWIND(fsetattr, frame, local->op_ret, local->op_errno,
+-                           &local->prebuf, &local->postbuf, local->xattr_rsp);
+-    }
+-    return 0;
++int shard_post_setattr_handler(call_frame_t *frame, xlator_t *this) {
++  shard_local_t *local = NULL;
+-shard_common_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                         int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+-                         struct iatt *postbuf, dict_t *xdata)
+-    shard_local_t *local = NULL;
+-    local = frame->local;
+-    if (op_ret < 0) {
+-        local->op_ret = op_ret;
+-        local->op_errno = op_errno;
+-        goto unwind;
+-    }
++  local = frame->local;
+-    local->prebuf = *prebuf;
+-    if (shard_modify_size_and_block_count(&local->prebuf, xdata)) {
+-        local->op_ret = -1;
+-        local->op_errno = EINVAL;
+-        goto unwind;
+-    }
+-    if (xdata)
+-        local->xattr_rsp = dict_ref(xdata);
+-    local->postbuf = *postbuf;
+-    local->postbuf.ia_size = local->prebuf.ia_size;
+-    local->postbuf.ia_blocks = local->prebuf.ia_blocks;
++  if (local->fop == GF_FOP_SETATTR) {
++    if (local->op_ret >= 0)
++      shard_inode_ctx_set(local->loc.inode, this, &local->postbuf, 0,
++                          SHARD_LOOKUP_MASK);
++    SHARD_STACK_UNWIND(setattr, frame, local->op_ret, local->op_errno,
++                       &local->prebuf, &local->postbuf, local->xattr_rsp);
++  } else if (local->fop == GF_FOP_FSETATTR) {
++    if (local->op_ret >= 0)
++      shard_inode_ctx_set(local->fd->inode, this, &local->postbuf, 0,
++                          SHARD_LOOKUP_MASK);
++    SHARD_STACK_UNWIND(fsetattr, frame, local->op_ret, local->op_errno,
++                       &local->prebuf, &local->postbuf, local->xattr_rsp);
++  }
+-    local->handler(frame, this);
+-    return 0;
++  return 0;
+ }
+-shard_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+-              struct iatt *stbuf, int32_t valid, dict_t *xdata)
+-    int ret = -1;
+-    uint64_t block_size = 0;
+-    shard_local_t *local = NULL;
+-    if ((IA_ISDIR(loc->inode->ia_type)) || (IA_ISLNK(loc->inode->ia_type))) {
+-        STACK_WIND(frame, default_setattr_cbk, FIRST_CHILD(this),
+-                   FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata);
+-        return 0;
+-    }
+-    ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size);
+-    if (ret) {
+-        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+-               "Failed to get block size from inode ctx of %s",
+-               uuid_utoa(loc->inode->gfid));
+-        goto err;
+-    }
+-    if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+-        STACK_WIND(frame, default_setattr_cbk, FIRST_CHILD(this),
+-                   FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata);
+-        return 0;
+-    }
+-    local = mem_get0(this->local_pool);
+-    if (!local)
+-        goto err;
++int shard_common_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                             int32_t op_ret, int32_t op_errno,
++                             struct iatt *prebuf, struct iatt *postbuf,
++                             dict_t *xdata) {
++  shard_local_t *local = NULL;
+-    frame->local = local;
++  local = frame->local;
+-    local->handler = shard_post_setattr_handler;
+-    local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+-    if (!local->xattr_req)
+-        goto err;
+-    local->fop = GF_FOP_SETATTR;
+-    loc_copy(&local->loc, loc);
++  if (op_ret < 0) {
++    local->op_ret = op_ret;
++    local->op_errno = op_errno;
++    goto unwind;
++  }
+-    SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, local->loc.gfid,
+-                                    local, err);
++  local->prebuf = *prebuf;
++  if (shard_modify_size_and_block_count(&local->prebuf, xdata)) {
++    local->op_ret = -1;
++    local->op_errno = EINVAL;
++    goto unwind;
++  }
++  if (xdata)
++    local->xattr_rsp = dict_ref(xdata);
++  local->postbuf = *postbuf;
++  local->postbuf.ia_size = local->prebuf.ia_size;
++  local->postbuf.ia_blocks = local->prebuf.ia_blocks;
+-    STACK_WIND(frame, shard_common_setattr_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid,
+-               local->xattr_req);
+-    return 0;
+-    shard_common_failure_unwind(GF_FOP_SETATTR, frame, -1, ENOMEM);
+-    return 0;
++  local->handler(frame, this);
++  return 0;
+ }
+-shard_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+-               struct iatt *stbuf, int32_t valid, dict_t *xdata)
+-    int ret = -1;
+-    uint64_t block_size = 0;
+-    shard_local_t *local = NULL;
+-    if ((IA_ISDIR(fd->inode->ia_type)) || (IA_ISLNK(fd->inode->ia_type))) {
+-        STACK_WIND(frame, default_fsetattr_cbk, FIRST_CHILD(this),
+-                   FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata);
+-        return 0;
+-    }
++int shard_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
++                  struct iatt *stbuf, int32_t valid, dict_t *xdata) {
++  int ret = -1;
++  uint64_t block_size = 0;
++  shard_local_t *local = NULL;
+-    ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size);
+-    if (ret) {
+-        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+-               "Failed to get block size from inode ctx of %s",
+-               uuid_utoa(fd->inode->gfid));
+-        goto err;
+-    }
++  if ((IA_ISDIR(loc->inode->ia_type)) || (IA_ISLNK(loc->inode->ia_type))) {
++    STACK_WIND(frame, default_setattr_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata);
++    return 0;
++  }
+-    if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+-        STACK_WIND(frame, default_fsetattr_cbk, FIRST_CHILD(this),
+-                   FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata);
+-        return 0;
+-    }
++  ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size);
++  if (ret) {
++    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
++           "Failed to get block size from inode ctx of %s",
++           uuid_utoa(loc->inode->gfid));
++    goto err;
++  }
+-    if (!this->itable)
+-        this->itable = fd->inode->table;
++  if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
++    STACK_WIND(frame, default_setattr_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata);
++    return 0;
++  }
+-    local = mem_get0(this->local_pool);
+-    if (!local)
+-        goto err;
++  local = mem_get0(this->local_pool);
++  if (!local)
++    goto err;
+-    frame->local = local;
++  frame->local = local;
+-    local->handler = shard_post_setattr_handler;
+-    local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+-    if (!local->xattr_req)
+-        goto err;
+-    local->fop = GF_FOP_FSETATTR;
+-    local->fd = fd_ref(fd);
++  local->handler = shard_post_setattr_handler;
++  local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
++  if (!local->xattr_req)
++    goto err;
++  local->fop = GF_FOP_SETATTR;
++  loc_copy(&local->loc, loc);
+-    SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid,
+-                                    local, err);
++  SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, local->loc.gfid,
++                                  local, err);
+-    STACK_WIND(frame, shard_common_setattr_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid,
+-               local->xattr_req);
+-    return 0;
++  STACK_WIND(frame, shard_common_setattr_cbk, FIRST_CHILD(this),
++             FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid,
++             local->xattr_req);
++  return 0;
+ err:
+-    shard_common_failure_unwind(GF_FOP_FSETATTR, frame, -1, ENOMEM);
+-    return 0;
++  shard_common_failure_unwind(GF_FOP_SETATTR, frame, -1, ENOMEM);
++  return 0;
+ }
+-shard_common_inode_write_begin(call_frame_t *frame, xlator_t *this,
+-                               glusterfs_fop_t fop, fd_t *fd,
+-                               struct iovec *vector, int32_t count,
+-                               off_t offset, uint32_t flags, size_t len,
+-                               struct iobref *iobref, dict_t *xdata)
+-    int ret = 0;
+-    int i = 0;
+-    uint64_t block_size = 0;
+-    shard_local_t *local = NULL;
++int shard_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
++                   struct iatt *stbuf, int32_t valid, dict_t *xdata) {
++  int ret = -1;
++  uint64_t block_size = 0;
++  shard_local_t *local = NULL;
+-    ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size);
+-    if (ret) {
+-        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+-               "Failed to get block "
+-               "size for %s from its inode ctx",
+-               uuid_utoa(fd->inode->gfid));
+-        goto out;
+-    }
++  if ((IA_ISDIR(fd->inode->ia_type)) || (IA_ISLNK(fd->inode->ia_type))) {
++    STACK_WIND(frame, default_fsetattr_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata);
++    return 0;
++  }
+-    if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+-        /* block_size = 0 means that the file was created before
+-         * sharding was enabled on the volume.
+-         */
+-        switch (fop) {
+-            case GF_FOP_WRITE:
+-                STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+-                                FIRST_CHILD(this)->fops->writev, fd, vector,
+-                                count, offset, flags, iobref, xdata);
+-                break;
+-            case GF_FOP_FALLOCATE:
+-                STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+-                                FIRST_CHILD(this)->fops->fallocate, fd, flags,
+-                                offset, len, xdata);
+-                break;
+-            case GF_FOP_ZEROFILL:
+-                STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+-                                FIRST_CHILD(this)->fops->zerofill, fd, offset,
+-                                len, xdata);
+-                break;
+-            case GF_FOP_DISCARD:
+-                STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+-                                FIRST_CHILD(this)->fops->discard, fd, offset,
+-                                len, xdata);
+-                break;
+-            default:
+-                gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
+-                       "Invalid fop id = %d", fop);
+-                break;
+-        }
+-        return 0;
+-    }
++  ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size);
++  if (ret) {
++    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
++           "Failed to get block size from inode ctx of %s",
++           uuid_utoa(fd->inode->gfid));
++    goto err;
++  }
+-    if (!this->itable)
+-        this->itable = fd->inode->table;
++  if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
++    STACK_WIND(frame, default_fsetattr_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata);
++    return 0;
++  }
+-    local = mem_get0(this->local_pool);
+-    if (!local)
+-        goto out;
++  if (!this->itable)
++    this->itable = fd->inode->table;
+-    frame->local = local;
++  local = mem_get0(this->local_pool);
++  if (!local)
++    goto err;
+-    ret = syncbarrier_init(&local->barrier);
+-    if (ret)
+-        goto out;
+-    local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+-    if (!local->xattr_req)
+-        goto out;
+-    if (vector) {
+-        local->vector = iov_dup(vector, count);
+-        if (!local->vector)
+-            goto out;
+-        for (i = 0; i < count; i++)
+-            local->total_size += vector[i].iov_len;
+-        local->count = count;
+-    } else {
+-        local->total_size = len;
+-    }
++  frame->local = local;
+-    local->fop = fop;
+-    local->offset = offset;
+-    local->flags = flags;
+-    if (iobref)
+-        local->iobref = iobref_ref(iobref);
+-    local->fd = fd_ref(fd);
+-    local->block_size = block_size;
+-    local->resolver_base_inode = local->fd->inode;
+-    GF_ATOMIC_INIT(local->delta_blocks, 0);
++  local->handler = shard_post_setattr_handler;
++  local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
++  if (!local->xattr_req)
++    goto err;
++  local->fop = GF_FOP_FSETATTR;
++  local->fd = fd_ref(fd);
+-    local->loc.inode = inode_ref(fd->inode);
+-    gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
++  SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid,
++                                  local, err);
+-    shard_lookup_base_file(frame, this, &local->loc,
+-                           shard_common_inode_write_post_lookup_handler);
+-    return 0;
++  STACK_WIND(frame, shard_common_setattr_cbk, FIRST_CHILD(this),
++             FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid,
++             local->xattr_req);
++  return 0;
++  shard_common_failure_unwind(GF_FOP_FSETATTR, frame, -1, ENOMEM);
++  return 0;
++int shard_common_inode_write_begin(call_frame_t *frame, xlator_t *this,
++                                   glusterfs_fop_t fop, fd_t *fd,
++                                   struct iovec *vector, int32_t count,
++                                   off_t offset, uint32_t flags, size_t len,
++                                   struct iobref *iobref, dict_t *xdata) {
++  int ret = 0;
++  int i = 0;
++  uint64_t block_size = 0;
++  shard_local_t *local = NULL;
++  ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size);
++  if (ret) {
++    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
++           "Failed to get block "
++           "size for %s from its inode ctx",
++           uuid_utoa(fd->inode->gfid));
++    goto out;
++  }
++  if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
++    /* block_size = 0 means that the file was created before
++     * sharding was enabled on the volume.
++     */
++    switch (fop) {
++    case GF_FOP_WRITE:
++      STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev,
++                      fd, vector, count, offset, flags, iobref, xdata);
++      break;
++    case GF_FOP_FALLOCATE:
++      STACK_WIND_TAIL(frame, FIRST_CHILD(this),
++                      FIRST_CHILD(this)->fops->fallocate, fd, flags, offset,
++                      len, xdata);
++      break;
++    case GF_FOP_ZEROFILL:
++      STACK_WIND_TAIL(frame, FIRST_CHILD(this),
++                      FIRST_CHILD(this)->fops->zerofill, fd, offset, len,
++                      xdata);
++      break;
++    case GF_FOP_DISCARD:
++      STACK_WIND_TAIL(frame, FIRST_CHILD(this),
++                      FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata);
++      break;
++    default:
++      gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
++             "Invalid fop id = %d", fop);
++      break;
++    }
++    return 0;
++  }
++  if (!this->itable)
++    this->itable = fd->inode->table;
++  local = mem_get0(this->local_pool);
++  if (!local)
++    goto out;
++  frame->local = local;
++  ret = syncbarrier_init(&local->barrier);
++  if (ret)
++    goto out;
++  local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
++  if (!local->xattr_req)
++    goto out;
++  if (vector) {
++    local->vector = iov_dup(vector, count);
++    if (!local->vector)
++      goto out;
++    for (i = 0; i < count; i++)
++      local->total_size += vector[i].iov_len;
++    local->count = count;
++  } else {
++    local->total_size = len;
++  }
++  local->fop = fop;
++  local->offset = offset;
++  local->flags = flags;
++  if (iobref)
++    local->iobref = iobref_ref(iobref);
++  local->fd = fd_ref(fd);
++  local->block_size = block_size;
++  local->resolver_base_inode = local->fd->inode;
++  GF_ATOMIC_INIT(local->delta_blocks, 0);
++  local->loc.inode = inode_ref(fd->inode);
++  gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
++  shard_lookup_base_file(frame, this, &local->loc,
++                         shard_common_inode_write_post_lookup_handler);
++  return 0;
+ out:
+-    shard_common_failure_unwind(fop, frame, -1, ENOMEM);
+-    return 0;
++  shard_common_failure_unwind(fop, frame, -1, ENOMEM);
++  return 0;
+ }
+-shard_writev(call_frame_t *frame, xlator_t *this, fd_t *fd,
+-             struct iovec *vector, int32_t count, off_t offset, uint32_t flags,
+-             struct iobref *iobref, dict_t *xdata)
+-    shard_common_inode_write_begin(frame, this, GF_FOP_WRITE, fd, vector, count,
+-                                   offset, flags, 0, iobref, xdata);
+-    return 0;
++int shard_writev(call_frame_t *frame, xlator_t *this, fd_t *fd,
++                 struct iovec *vector, int32_t count, off_t offset,
++                 uint32_t flags, struct iobref *iobref, dict_t *xdata) {
++  shard_common_inode_write_begin(frame, this, GF_FOP_WRITE, fd, vector, count,
++                                 offset, flags, 0, iobref, xdata);
++  return 0;
+ }
+-shard_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd,
+-                int32_t keep_size, off_t offset, size_t len, dict_t *xdata)
+-    if ((keep_size != 0) && (keep_size != FALLOC_FL_ZERO_RANGE) &&
+-        (keep_size != (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)))
+-        goto out;
++int shard_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd,
++                    int32_t keep_size, off_t offset, size_t len,
++                    dict_t *xdata) {
++  if ((keep_size != 0) && (keep_size != FALLOC_FL_ZERO_RANGE) &&
++      (keep_size != (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)))
++    goto out;
+-    shard_common_inode_write_begin(frame, this, GF_FOP_FALLOCATE, fd, NULL, 0,
+-                                   offset, keep_size, len, NULL, xdata);
+-    return 0;
++  shard_common_inode_write_begin(frame, this, GF_FOP_FALLOCATE, fd, NULL, 0,
++                                 offset, keep_size, len, NULL, xdata);
++  return 0;
+ out:
+-    shard_common_failure_unwind(GF_FOP_FALLOCATE, frame, -1, ENOTSUP);
+-    return 0;
++  shard_common_failure_unwind(GF_FOP_FALLOCATE, frame, -1, ENOTSUP);
++  return 0;
+ }
+-shard_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+-               off_t len, dict_t *xdata)
+-    shard_common_inode_write_begin(frame, this, GF_FOP_ZEROFILL, fd, NULL, 0,
+-                                   offset, 0, len, NULL, xdata);
+-    return 0;
++int shard_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
++                   off_t len, dict_t *xdata) {
++  shard_common_inode_write_begin(frame, this, GF_FOP_ZEROFILL, fd, NULL, 0,
++                                 offset, 0, len, NULL, xdata);
++  return 0;
+ }
+-shard_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+-              size_t len, dict_t *xdata)
+-    shard_common_inode_write_begin(frame, this, GF_FOP_DISCARD, fd, NULL, 0,
+-                                   offset, 0, len, NULL, xdata);
+-    return 0;
++int shard_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
++                  size_t len, dict_t *xdata) {
++  shard_common_inode_write_begin(frame, this, GF_FOP_DISCARD, fd, NULL, 0,
++                                 offset, 0, len, NULL, xdata);
++  return 0;
+ }
+-shard_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+-           gf_seek_what_t what, dict_t *xdata)
+-    /* TBD */
+-           "seek called on %s.", uuid_utoa(fd->inode->gfid));
+-    shard_common_failure_unwind(GF_FOP_SEEK, frame, -1, ENOTSUP);
+-    return 0;
++int32_t shard_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
++                   gf_seek_what_t what, dict_t *xdata) {
++  /* TBD */
++         "seek called on %s.", uuid_utoa(fd->inode->gfid));
++  shard_common_failure_unwind(GF_FOP_SEEK, frame, -1, ENOTSUP);
++  return 0;
+ }
+-mem_acct_init(xlator_t *this)
+-    int ret = -1;
+-    if (!this)
+-        return ret;
++int32_t mem_acct_init(xlator_t *this) {
++  int ret = -1;
+-    ret = xlator_mem_acct_init(this, gf_shard_mt_end + 1);
++  if (!this)
++    return ret;
+-    if (ret != 0) {
+-        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_MEM_ACCT_INIT_FAILED,
+-               "Memory accounting init"
+-               "failed");
+-        return ret;
+-    }
++  ret = xlator_mem_acct_init(this, gf_shard_mt_end + 1);
++  if (ret != 0) {
++    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_MEM_ACCT_INIT_FAILED,
++           "Memory accounting init"
++           "failed");
+     return ret;
++  }
++  return ret;
+ }
+-init(xlator_t *this)
+-    int ret = -1;
+-    shard_priv_t *priv = NULL;
++int init(xlator_t *this) {
++  int ret = -1;
++  shard_priv_t *priv = NULL;
+-    if (!this) {
+-        gf_msg("shard", GF_LOG_ERROR, 0, SHARD_MSG_NULL_THIS,
+-               "this is NULL. init() failed");
+-        return -1;
+-    }
++  if (!this) {
++    gf_msg("shard", GF_LOG_ERROR, 0, SHARD_MSG_NULL_THIS,
++           "this is NULL. init() failed");
++    return -1;
++  }
+-    if (!this->parents) {
+-        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INVALID_VOLFILE,
+-               "Dangling volume. Check volfile");
+-        goto out;
+-    }
++  if (!this->parents) {
++    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INVALID_VOLFILE,
++           "Dangling volume. Check volfile");
++    goto out;
++  }
+-    if (!this->children || this->children->next) {
+-        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INVALID_VOLFILE,
+-               "shard not configured with exactly one sub-volume. "
+-               "Check volfile");
+-        goto out;
+-    }
++  if (!this->children || this->children->next) {
++    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INVALID_VOLFILE,
++           "shard not configured with exactly one sub-volume. "
++           "Check volfile");
++    goto out;
++  }
+-    priv = GF_CALLOC(1, sizeof(shard_priv_t), gf_shard_mt_priv_t);
+-    if (!priv)
+-        goto out;
++  priv = GF_CALLOC(1, sizeof(shard_priv_t), gf_shard_mt_priv_t);
++  if (!priv)
++    goto out;
+-    GF_OPTION_INIT("shard-block-size", priv->block_size, size_uint64, out);
++  GF_OPTION_INIT("shard-block-size", priv->block_size, size_uint64, out);
+-    GF_OPTION_INIT("shard-deletion-rate", priv->deletion_rate, uint32, out);
++  GF_OPTION_INIT("shard-deletion-rate", priv->deletion_rate, uint32, out);
+-    GF_OPTION_INIT("shard-lru-limit", priv->lru_limit, uint64, out);
++  GF_OPTION_INIT("shard-lru-limit", priv->lru_limit, uint64, out);
+-    this->local_pool = mem_pool_new(shard_local_t, 128);
+-    if (!this->local_pool) {
+-        ret = -1;
+-        goto out;
+-    }
+-    gf_uuid_parse(SHARD_ROOT_GFID, priv->dot_shard_gfid);
+-    gf_uuid_parse(DOT_SHARD_REMOVE_ME_GFID, priv->dot_shard_rm_gfid);
++  this->local_pool = mem_pool_new(shard_local_t, 128);
++  if (!this->local_pool) {
++    ret = -1;
++    goto out;
++  }
++  gf_uuid_parse(SHARD_ROOT_GFID, priv->dot_shard_gfid);
++  gf_uuid_parse(DOT_SHARD_REMOVE_ME_GFID, priv->dot_shard_rm_gfid);
+-    this->private = priv;
+-    LOCK_INIT(&priv->lock);
+-    INIT_LIST_HEAD(&priv->ilist_head);
+-    ret = 0;
++  this->private = priv;
++  LOCK_INIT(&priv->lock);
++  INIT_LIST_HEAD(&priv->ilist_head);
++  ret = 0;
+ out:
+-    if (ret) {
+-        GF_FREE(priv);
+-        mem_pool_destroy(this->local_pool);
+-    }
++  if (ret) {
++    GF_FREE(priv);
++    mem_pool_destroy(this->local_pool);
++  }
+-    return ret;
++  return ret;
+ }
+-fini(xlator_t *this)
+-    shard_priv_t *priv = NULL;
++void fini(xlator_t *this) {
++  shard_priv_t *priv = NULL;
+-    GF_VALIDATE_OR_GOTO("shard", this, out);
++  GF_VALIDATE_OR_GOTO("shard", this, out);
+-    mem_pool_destroy(this->local_pool);
+-    this->local_pool = NULL;
++  mem_pool_destroy(this->local_pool);
++  this->local_pool = NULL;
+-    priv = this->private;
+-    if (!priv)
+-        goto out;
++  priv = this->private;
++  if (!priv)
++    goto out;
+-    this->private = NULL;
+-    LOCK_DESTROY(&priv->lock);
+-    GF_FREE(priv);
++  this->private = NULL;
++  LOCK_DESTROY(&priv->lock);
++  GF_FREE(priv);
+ out:
+-    return;
++  return;
+ }
+-reconfigure(xlator_t *this, dict_t *options)
+-    int ret = -1;
+-    shard_priv_t *priv = NULL;
++int reconfigure(xlator_t *this, dict_t *options) {
++  int ret = -1;
++  shard_priv_t *priv = NULL;
+-    priv = this->private;
++  priv = this->private;
+-    GF_OPTION_RECONF("shard-block-size", priv->block_size, options, size, out);
++  GF_OPTION_RECONF("shard-block-size", priv->block_size, options, size, out);
+-    GF_OPTION_RECONF("shard-deletion-rate", priv->deletion_rate, options,
+-                     uint32, out);
+-    ret = 0;
++  GF_OPTION_RECONF("shard-deletion-rate", priv->deletion_rate, options, uint32,
++                   out);
++  ret = 0;
+ out:
+-    return ret;
++  return ret;
+ }
+-shard_forget(xlator_t *this, inode_t *inode)
+-    uint64_t ctx_uint = 0;
+-    shard_inode_ctx_t *ctx = NULL;
+-    shard_priv_t *priv = NULL;
++int shard_forget(xlator_t *this, inode_t *inode) {
++  uint64_t ctx_uint = 0;
++  shard_inode_ctx_t *ctx = NULL;
++  shard_priv_t *priv = NULL;
+-    priv = this->private;
+-    if (!priv)
+-        return 0;
++  priv = this->private;
++  if (!priv)
++    return 0;
+-    inode_ctx_del(inode, this, &ctx_uint);
+-    if (!ctx_uint)
+-        return 0;
++  inode_ctx_del(inode, this, &ctx_uint);
++  if (!ctx_uint)
++    return 0;
+-    ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
++  ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
+-    /* When LRU limit reaches inode will be forcefully removed from the
+-     * table, inode needs to be removed from LRU of shard as well.
+-     */
+-    if (!list_empty(&ctx->ilist)) {
+-        LOCK(&priv->lock);
+-        {
+-            list_del_init(&ctx->ilist);
+-            priv->inode_count--;
+-        }
+-        UNLOCK(&priv->lock);
++  /* When LRU limit reaches inode will be forcefully removed from the
++   * table, inode needs to be removed from LRU of shard as well.
++   */
++  if (!list_empty(&ctx->ilist)) {
++    LOCK(&priv->lock);
++    {
++      list_del_init(&ctx->ilist);
++      priv->inode_count--;
+     }
+-    GF_FREE(ctx);
++    UNLOCK(&priv->lock);
++  }
++  GF_FREE(ctx);
+-    return 0;
++  return 0;
+ }
+-shard_release(xlator_t *this, fd_t *fd)
+-    /* TBD */
+-    return 0;
++int shard_release(xlator_t *this, fd_t *fd) {
++  /* TBD */
++  return 0;
+ }
+-shard_priv_dump(xlator_t *this)
+-    shard_priv_t *priv = NULL;
+-    char key_prefix[GF_DUMP_MAX_BUF_LEN] = {
+-        0,
+-    };
+-    char *str = NULL;
++int shard_priv_dump(xlator_t *this) {
++  shard_priv_t *priv = NULL;
++  char key_prefix[GF_DUMP_MAX_BUF_LEN] = {
++      0,
++  };
++  char *str = NULL;
+-    priv = this->private;
++  priv = this->private;
+-    snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name);
+-    gf_proc_dump_add_section("%s", key_prefix);
+-    str = gf_uint64_2human_readable(priv->block_size);
+-    gf_proc_dump_write("shard-block-size", "%s", str);
+-    gf_proc_dump_write("inode-count", "%d", priv->inode_count);
+-    gf_proc_dump_write("ilist_head", "%p", &priv->ilist_head);
+-    gf_proc_dump_write("lru-max-limit", "%" PRIu64, priv->lru_limit);
++  snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name);
++  gf_proc_dump_add_section("%s", key_prefix);
++  str = gf_uint64_2human_readable(priv->block_size);
++  gf_proc_dump_write("shard-block-size", "%s", str);
++  gf_proc_dump_write("inode-count", "%d", priv->inode_count);
++  gf_proc_dump_write("ilist_head", "%p", &priv->ilist_head);
++  gf_proc_dump_write("lru-max-limit", "%" PRIu64, priv->lru_limit);
+-    GF_FREE(str);
++  GF_FREE(str);
+-    return 0;
++  return 0;
+ }
+-shard_releasedir(xlator_t *this, fd_t *fd)
+-    return 0;
++int shard_releasedir(xlator_t *this, fd_t *fd) { return 0; }
+ struct xlator_fops fops = {
+     .lookup = shard_lookup,
diff --git a/SOURCES/0336-spec-check-and-return-exit-code-in-rpm-scripts.patch b/SOURCES/0336-spec-check-and-return-exit-code-in-rpm-scripts.patch
new file mode 100644
index 0000000..df971b8
--- /dev/null
+++ b/SOURCES/0336-spec-check-and-return-exit-code-in-rpm-scripts.patch
@@ -0,0 +1,162 @@
+From 562283ad34021bbf4fc540127ee7072d5152d34d Mon Sep 17 00:00:00 2001
+From: Yuval Turgeman <>
+Date: Wed, 24 Jul 2019 16:42:22 +0300
+Subject: [PATCH 336/336] spec: check and return exit code in rpm scripts
+lua's error() call expects a value as its second argument, and this is
+taken from the `val` variable, while the `ok` is boolean.  This causes
+the rpm scripts to fail on:
+bad argument #2 to 'error' (number expected, got boolean)
+BUG: 1768786
+Change-Id: I9c6b1f62ebf15dbc93196d018bc1fd628b36fc33
+>Signed-off-by: Yuval Turgeman <>
+Reviewed-by: Mohit Agrawal <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <>
+ | 55 +++++++++++++++++++++++++++++++++----------------------
+ 1 file changed, 33 insertions(+), 22 deletions(-)
+diff --git a/ b/
+index 91180db..1b975b2 100644
+--- a/
++++ b/
+@@ -1572,8 +1572,9 @@ fi
+ ]]
+ ok, how, val = os.execute(script)
+-if not (ok == 0) then
+-   error("Detected running glusterfs processes", ok)
++rc = val or ok
++if not (rc == 0) then
++   error("Detected running glusterfs processes", rc)
+ end
+@@ -1606,8 +1607,9 @@ fi
+ ]]
+ ok, how, val = os.execute(script)
+-if not (ok == 0) then
+-   error("Detected running glusterfs processes", ok)
++rc = val or ok
++if not (rc == 0) then
++   error("Detected running glusterfs processes", rc)
+ end
+@@ -1640,8 +1642,9 @@ fi
+ ]]
+ ok, how, val = os.execute(script)
+-if not (ok == 0) then
+-   error("Detected running glusterfs processes", ok)
++rc = val or ok
++if not (rc == 0) then
++   error("Detected running glusterfs processes", rc)
+ end
+@@ -1674,8 +1677,9 @@ fi
+ ]]
+ ok, how, val = os.execute(script)
+-if not (ok == 0) then
+-   error("Detected running glusterfs processes", ok)
++rc = val or ok
++if not (rc == 0) then
++   error("Detected running glusterfs processes", rc)
+ end
+@@ -1707,8 +1711,9 @@ fi
+ ]]
+ ok, how, val = os.execute(script)
+-if not (ok == 0) then
+-   error("Detected running glusterfs processes", ok)
++rc = val or ok
++if not (rc == 0) then
++   error("Detected running glusterfs processes", rc)
+ end
+@@ -1740,8 +1745,9 @@ fi
+ ]]
+ ok, how, val = os.execute(script)
+-if not (ok == 0) then
+-   error("Detected running glusterfs processes", ok)
++rc = val or ok
++if not (rc == 0) then
++   error("Detected running glusterfs processes", rc)
+ end
+@@ -1775,8 +1781,9 @@ fi
+ ]]
+ ok, how, val = os.execute(script)
+-if not (ok == 0) then
+-   error("Detected running glusterfs processes", ok)
++rc = val or ok
++if not (rc == 0) then
++   error("Detected running glusterfs processes", rc)
+ end
+ %endif
+@@ -1810,8 +1817,9 @@ fi
+ ]]
+ ok, how, val = os.execute(script)
+-if not (ok == 0) then
+-   error("Detected running glusterfs processes", ok)
++rc = val or ok
++if not (rc == 0) then
++   error("Detected running glusterfs processes", rc)
+ end
+@@ -1845,8 +1853,9 @@ fi
+ ]]
+ ok, how, val = os.execute(script)
+-if not (ok == 0) then
+-   error("Detected running glusterfs processes", ok)
++rc = val or ok
++if not (rc == 0) then
++   error("Detected running glusterfs processes", rc)
+ end
+ %endif
+@@ -1881,8 +1890,9 @@ fi
+ ]]
+ ok, how, val = os.execute(script)
+-if not (ok == 0) then
+-   error("Detected running glusterfs processes", ok)
++rc = val or ok
++if not (rc == 0) then
++   error("Detected running glusterfs processes", rc)
+ end
+ %endif
+@@ -1916,8 +1926,9 @@ fi
+ ]]
+ ok, how, val = os.execute(script)
+-if not (ok == 0) then
+-   error("Detected running glusterfs processes", ok)
++rc = val or ok
++if not (rc == 0) then
++   error("Detected running glusterfs processes", rc)
+ end
+ %posttrans server
diff --git a/SOURCES/0337-fuse-Set-limit-on-invalidate-queue-size.patch b/SOURCES/0337-fuse-Set-limit-on-invalidate-queue-size.patch
new file mode 100644
index 0000000..b18ef4f
--- /dev/null
+++ b/SOURCES/0337-fuse-Set-limit-on-invalidate-queue-size.patch
@@ -0,0 +1,455 @@
+From ddb0038de77a4269fa7eed1bb217bfb6bed1b7ba Mon Sep 17 00:00:00 2001
+From: N Balachandran <>
+Date: Fri, 9 Aug 2019 14:34:22 +0530
+Subject: [PATCH 337/344] fuse: Set limit on invalidate queue size
+If the glusterfs fuse client process is unable to
+process the invalidate requests quickly enough, the
+number of such requests quickly grows large enough
+to use a significant amount of memory.
+We are now introducing another option to set an upper
+limit on these to prevent runaway memory usage.
+> Upstream
+> Change-Id: Iddfff1ee2de1466223e6717f7abd4b28ed947788
+> Fixes: bz#1732717
+> Signed-off-by: N Balachandran <>
+BUG: 1763208
+Change-Id: I666cdf6c70999a0f0bc79969e8df0a9dde93b6e4
+Signed-off-by: Csaba Henk <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <>
+ doc/mount.glusterfs.8                       |  5 +++
+ glusterfsd/src/glusterfsd.c                 | 21 ++++++++++
+ glusterfsd/src/glusterfsd.h                 |  3 +-
+ libglusterfs/src/glusterfs/glusterfs.h      |  1 +
+ libglusterfs/src/glusterfs/inode.h          |  1 +
+ libglusterfs/src/inode.c                    | 31 +++++++++++----
+ xlators/mount/fuse/src/fuse-bridge.c        | 60 ++++++++++++++++++++++-------
+ xlators/mount/fuse/src/fuse-bridge.h        |  3 +-
+ xlators/mount/fuse/utils/ |  7 ++++
+ 9 files changed, 108 insertions(+), 24 deletions(-)
+diff --git a/doc/mount.glusterfs.8 b/doc/mount.glusterfs.8
+index 286631b..b35b362 100644
+--- a/doc/mount.glusterfs.8
++++ b/doc/mount.glusterfs.8
+@@ -126,6 +126,11 @@ Provide list of backup volfile servers in the following format [default: None]
+ Set fuse module's limit for number of inodes kept in LRU list to N [default: 131072]
+ .TP
+ .TP
++Suspend fuse invalidations implied by 'lru-limit' if  number of outstanding
++invalidations reaches N
+ \fBbackground-qlen=\fRN
+ Set fuse module's background queue length to N [default: 64]
+ .TP
+diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c
+index 5b5e996..0856471 100644
+--- a/glusterfsd/src/glusterfsd.c
++++ b/glusterfsd/src/glusterfsd.c
+@@ -212,6 +212,9 @@ static struct argp_option gf_options[] = {
+     {"lru-limit", ARGP_FUSE_LRU_LIMIT_KEY, "N", 0,
+      "Set fuse module's limit for number of inodes kept in LRU list to N "
+      "[default: 131072]"},
++    {"invalidate-limit", ARGP_FUSE_INVALIDATE_LIMIT_KEY, "N", 0,
++     "Suspend inode invalidations implied by 'lru-limit' if the number of "
++     "outstanding invalidations reaches N"},
+     {"background-qlen", ARGP_FUSE_BACKGROUND_QLEN_KEY, "N", 0,
+      "Set fuse module's background queue length to N "
+      "[default: 64]"},
+@@ -504,6 +507,16 @@ set_fuse_mount_options(glusterfs_ctx_t *ctx, dict_t *options)
+         }
+     }
++    if (cmd_args->invalidate_limit >= 0) {
++        ret = dict_set_int32(options, "invalidate-limit",
++                             cmd_args->invalidate_limit);
++        if (ret < 0) {
++            gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4,
++                   "invalidate-limit");
++            goto err;
++        }
++    }
+     if (cmd_args->background_qlen) {
+         ret = dict_set_int32(options, "background-qlen",
+                              cmd_args->background_qlen);
+@@ -1283,6 +1296,14 @@ parse_opts(int key, char *arg, struct argp_state *state)
+             argp_failure(state, -1, 0, "unknown LRU limit option %s", arg);
+             break;
++            if (!gf_string2int32(arg, &cmd_args->invalidate_limit))
++                break;
++            argp_failure(state, -1, 0, "unknown invalidate limit option %s",
++                         arg);
++            break;
+             if (!gf_string2int(arg, &cmd_args->background_qlen))
+                 break;
+diff --git a/glusterfsd/src/glusterfsd.h b/glusterfsd/src/glusterfsd.h
+index fa55789..ee655f0 100644
+--- a/glusterfsd/src/glusterfsd.h
++++ b/glusterfsd/src/glusterfsd.h
+@@ -111,7 +111,8 @@ enum argp_option_keys {
+-    ARGP_BRICK_MUX_KEY = 192
++    ARGP_BRICK_MUX_KEY = 192,
+ };
+ struct _gfd_vol_top_priv {
+diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h
+index 79c93ae..3b594c0 100644
+--- a/libglusterfs/src/glusterfs/glusterfs.h
++++ b/libglusterfs/src/glusterfs/glusterfs.h
+@@ -541,6 +541,7 @@ struct _cmd_args {
+     int client_pid_set;
+     unsigned uid_map_root;
+     int32_t lru_limit;
++    int32_t invalidate_limit;
+     int background_qlen;
+     int congestion_threshold;
+     char *fuse_mountopts;
+diff --git a/libglusterfs/src/glusterfs/inode.h b/libglusterfs/src/glusterfs/inode.h
+index 52efdd8..4421c47 100644
+--- a/libglusterfs/src/glusterfs/inode.h
++++ b/libglusterfs/src/glusterfs/inode.h
+@@ -107,6 +107,7 @@ struct _inode {
+     struct list_head list;        /* active/lru/purge */
+     struct _inode_ctx *_ctx; /* replacement for dict_t *(inode->ctx) */
++    bool in_invalidate_list; /* Set if inode is in table invalidate list */
+     bool invalidate_sent;    /* Set it if invalidator_fn is called for inode */
+ };
+diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c
+index 96ddea5..5331e93 100644
+--- a/libglusterfs/src/inode.c
++++ b/libglusterfs/src/inode.c
+@@ -558,8 +558,8 @@ __inode_unref(inode_t *inode, bool clear)
+     this = THIS;
+-    if (clear && inode->invalidate_sent) {
+-        inode->invalidate_sent = false;
++    if (clear && inode->in_invalidate_list) {
++        inode->in_invalidate_list = false;
+         inode->table->invalidate_size--;
+         __inode_activate(inode);
+     }
+@@ -573,7 +573,7 @@ __inode_unref(inode_t *inode, bool clear)
+         inode->_ctx[index].ref--;
+     }
+-    if (!inode->ref && !inode->invalidate_sent) {
++    if (!inode->ref && !inode->in_invalidate_list) {
+         inode->table->active_size--;
+         nlookup = GF_ATOMIC_GET(inode->nlookup);
+@@ -609,14 +609,14 @@ __inode_ref(inode_t *inode, bool is_invalidate)
+         return inode;
+     if (!inode->ref) {
+-        if (inode->invalidate_sent) {
+-            inode->invalidate_sent = false;
++        if (inode->in_invalidate_list) {
++            inode->in_invalidate_list = false;
+             inode->table->invalidate_size--;
+         } else {
+             inode->table->lru_size--;
+         }
+         if (is_invalidate) {
+-            inode->invalidate_sent = true;
++            inode->in_invalidate_list = true;
+             inode->table->invalidate_size++;
+             list_move_tail(&inode->list, &inode->table->invalidate);
+         } else {
+@@ -1609,6 +1609,7 @@ static int
+ inode_table_prune(inode_table_t *table)
+ {
+     int ret = 0;
++    int ret1 = 0;
+     struct list_head purge = {
+         0,
+     };
+@@ -1647,6 +1648,10 @@ inode_table_prune(inode_table_t *table)
+                 /* check for valid inode with 'nlookup' */
+                 nlookup = GF_ATOMIC_GET(entry->nlookup);
+                 if (nlookup) {
++                    if (entry->invalidate_sent) {
++                        list_move_tail(&entry->list, &table->lru);
++                        continue;
++                    }
+                     __inode_ref(entry, true);
+                     tmp = entry;
+                     break;
+@@ -1668,9 +1673,19 @@ inode_table_prune(inode_table_t *table)
+     if (tmp) {
+         xlator_t *old_THIS = THIS;
+         THIS = table->invalidator_xl;
+-        table->invalidator_fn(table->invalidator_xl, tmp);
++        ret1 = table->invalidator_fn(table->invalidator_xl, tmp);
+         THIS = old_THIS;
+-        inode_unref(tmp);
++        pthread_mutex_lock(&table->lock);
++        {
++            if (!ret1) {
++                tmp->invalidate_sent = true;
++                __inode_unref(tmp, false);
++            } else {
++                /* Move this back to the lru list*/
++                __inode_unref(tmp, true);
++            }
++        }
++        pthread_mutex_unlock(&table->lock);
+     }
+     /* Just so that if purge list is handled too, then clear it off */
+diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
+index 1c946a2..8b2e7f0 100644
+--- a/xlators/mount/fuse/src/fuse-bridge.c
++++ b/xlators/mount/fuse/src/fuse-bridge.c
+@@ -26,7 +26,7 @@ static int gf_fuse_xattr_enotsup_log;
+ void
+ fini(xlator_t *this_xl);
+-static void
++static int32_t
+ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino);
+ /*
+@@ -312,7 +312,7 @@ send_fuse_data(xlator_t *this, fuse_in_header_t *finh, void *data, size_t size)
+ #define send_fuse_obj(this, finh, obj)                                         \
+     send_fuse_data(this, finh, obj, sizeof(*(obj)))
+-static void
++static int32_t
+ fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino)
+ {
+@@ -328,17 +328,22 @@ fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino)
+     priv = this->private;
+     if (!priv->reverse_fuse_thread_started)
+-        return;
++        return -1;
++    if (priv->invalidate_limit &&
++        (priv->invalidate_count >= priv->invalidate_limit)) {
++        return -1;
++    }
+     inode = (inode_t *)(unsigned long)fuse_ino;
+     if (inode == NULL)
+-        return;
++        return -1;
+     list_for_each_entry_safe(dentry, tmp, &inode->dentry_list, inode_list)
+     {
+         node = GF_CALLOC(1, sizeof(*node), gf_fuse_mt_invalidate_node_t);
+         if (node == NULL)
+-            break;
++            return -1;
+         INIT_LIST_HEAD(&node->next);
+@@ -375,20 +380,21 @@ fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino)
+         pthread_mutex_lock(&priv->invalidate_mutex);
+         {
+             list_add_tail(&node->next, &priv->invalidate_list);
++            priv->invalidate_count++;
+             pthread_cond_signal(&priv->invalidate_cond);
+         }
+         pthread_mutex_unlock(&priv->invalidate_mutex);
+     }
+ #endif
+-    return;
++    return 0;
+ }
+ /*
+  * Send an inval inode notification to fuse. This causes an invalidation of the
+  * entire page cache mapping on the inode.
+  */
+-static void
++static int32_t
+ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
+ {
+@@ -401,15 +407,20 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
+     priv = this->private;
+     if (!priv->reverse_fuse_thread_started)
+-        return;
++        return -1;
++    if (priv->invalidate_limit &&
++        (priv->invalidate_count >= priv->invalidate_limit)) {
++        return -1;
++    }
+     inode = (inode_t *)(unsigned long)fuse_ino;
+     if (inode == NULL)
+-        return;
++        return -1;
+     node = GF_CALLOC(1, sizeof(*node), gf_fuse_mt_invalidate_node_t);
+     if (node == NULL)
+-        return;
++        return -1;
+     INIT_LIST_HEAD(&node->next);
+@@ -435,6 +446,7 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
+     pthread_mutex_lock(&priv->invalidate_mutex);
+     {
+         list_add_tail(&node->next, &priv->invalidate_list);
++        priv->invalidate_count++;
+         pthread_cond_signal(&priv->invalidate_cond);
+     }
+     pthread_mutex_unlock(&priv->invalidate_mutex);
+@@ -443,7 +455,7 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
+     gf_log("glusterfs-fuse", GF_LOG_WARNING,
+            "fuse_invalidate_inode not implemented on this system");
+ #endif
+-    return;
++    return 0;
+ }
+@@ -451,8 +463,9 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
+ static int32_t
+ fuse_inode_invalidate_fn(xlator_t *this, inode_t *inode)
+ {
+-    fuse_invalidate_entry(this, (uint64_t)(uintptr_t)inode);
+-    return 0;
++    int32_t ret = 0;
++    ret = fuse_invalidate_entry(this, (uint64_t)(uintptr_t)inode);
++    return ret;
+ }
+ #endif
+@@ -4003,7 +4016,9 @@ fuse_setxattr(xlator_t *this, fuse_in_header_t *finh, void *msg,
+         gf_log("fuse", GF_LOG_TRACE, "got request to invalidate %" PRIu64,
+                finh->nodeid);
+-        fuse_invalidate_entry(this, finh->nodeid);
++        ret = fuse_invalidate_entry(this, finh->nodeid);
++        if (ret)
++            op_errno = EBUSY;
+ #endif
+         goto done;
+     }
+@@ -4812,6 +4827,7 @@ notify_kernel_loop(void *data)
+                               fuse_invalidate_node_t, next);
+             list_del_init(&node->next);
++            priv->invalidate_count--;
+         }
+         pthread_mutex_unlock(&priv->invalidate_mutex);
+@@ -4855,6 +4871,7 @@ notify_kernel_loop(void *data)
+             list_del_init(&node->next);
+             GF_FREE(node);
+         }
++        priv->invalidate_count = 0;
+     }
+     pthread_mutex_unlock(&priv->invalidate_mutex);
+@@ -6080,6 +6097,9 @@ fuse_priv_dump(xlator_t *this)
+                        (int)private->timed_response_fuse_thread_started);
+     gf_proc_dump_write("reverse_thread_started", "%d",
+                        (int)private->reverse_fuse_thread_started);
++    gf_proc_dump_write("invalidate_limit", "%u", private->invalidate_limit);
++    gf_proc_dump_write("invalidate_queue_length", "%" PRIu64,
++                       private->invalidate_count);
+     gf_proc_dump_write("use_readdirp", "%d", private->use_readdirp);
+     return 0;
+@@ -6619,6 +6639,9 @@ init(xlator_t *this_xl)
+     GF_OPTION_INIT("lru-limit", priv->lru_limit, uint32, cleanup_exit);
++    GF_OPTION_INIT("invalidate-limit", priv->invalidate_limit, uint32,
++                   cleanup_exit);
+     GF_OPTION_INIT("event-history", priv->event_history, bool, cleanup_exit);
+     GF_OPTION_INIT("thin-client", priv->thin_client, bool, cleanup_exit);
+@@ -6955,6 +6978,15 @@ struct volume_options options[] = {
+                        "reaching this limit (0 means 'unlimited')",
+     },
+     {
++        .key = {"invalidate-limit"},
++        .type = GF_OPTION_TYPE_INT,
++        .default_value = "0",
++        .min = 0,
++        .description = "suspend invalidations as of 'lru-limit' if the number "
++                       "of outstanding invalidations reaches this limit "
++                       "(0 means 'unlimited')",
++    },
++    {
+         .key = {"auto-invalidation"},
+         .type = GF_OPTION_TYPE_BOOL,
+         .default_value = "true",
+diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h
+index 697bd88..2311582 100644
+--- a/xlators/mount/fuse/src/fuse-bridge.h
++++ b/xlators/mount/fuse/src/fuse-bridge.h
+@@ -139,7 +139,7 @@ struct fuse_private {
+     pthread_cond_t invalidate_cond;
+     pthread_mutex_t invalidate_mutex;
+     gf_boolean_t reverse_fuse_thread_started;
++    uint64_t invalidate_count;
+     /* For communicating with separate mount thread. */
+     int status_pipe[2];
+@@ -191,6 +191,7 @@ struct fuse_private {
+     /* LRU Limit, if not set, default is 128k for now */
+     uint32_t lru_limit;
++    uint32_t invalidate_limit;
+ };
+ typedef struct fuse_private fuse_private_t;
+diff --git a/xlators/mount/fuse/utils/ b/xlators/mount/fuse/utils/
+index cbde42d..61d7422 100755
+--- a/xlators/mount/fuse/utils/
++++ b/xlators/mount/fuse/utils/
+@@ -257,6 +257,10 @@ start_glusterfs ()
+         cmd_line=$(echo "$cmd_line --lru-limit=$lru_limit");
+     fi
++    if [ -n "$invalidate_limit" ]; then
++        cmd_line=$(echo "$cmd_line --invalidate-limit=$invalidate_limit");
++    fi
+     if [ -n "$bg_qlen" ]; then
+         cmd_line=$(echo "$cmd_line --background-qlen=$bg_qlen");
+     fi
+@@ -505,6 +509,9 @@ with_options()
+         "lru-limit")
+             lru_limit=$value
+             ;;
++        "invalidate-limit")
++            invalidate_limit=$value
++            ;;
+         "background-qlen")
+             bg_qlen=$value
+             ;;
diff --git a/SOURCES/0338-glusterfs-fuse-Reduce-the-default-lru-limit-value.patch b/SOURCES/0338-glusterfs-fuse-Reduce-the-default-lru-limit-value.patch
new file mode 100644
index 0000000..b108bd0
--- /dev/null
+++ b/SOURCES/0338-glusterfs-fuse-Reduce-the-default-lru-limit-value.patch
@@ -0,0 +1,83 @@
+From 6d2e12a53ef0bcbeea274c47537a0c707a3f7b1e Mon Sep 17 00:00:00 2001
+From: N Balachandran <>
+Date: Fri, 20 Sep 2019 13:30:42 +0530
+Subject: [PATCH 338/344] glusterfs/fuse: Reduce the default lru-limit value
+The current lru-limit value still uses memory for
+upto 128K inodes.
+Reduce the default value of lru-limit to 64K.
+> Upstream
+> Change-Id: Ica2dd4f8f5fde45cb5180d8f02c3d86114ac52b3
+> Fixes: bz#1753880
+> Signed-off-by: N Balachandran <>
+> Signed-off-by: Csaba Henk <>
+BUG: 1763208
+Change-Id: I04ab39b5278e702aacdceebfa5b63702b9f9703b
+Signed-off-by: Csaba Henk <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <>
+ doc/mount.glusterfs.8                | 2 +-
+ glusterfsd/src/glusterfsd.c          | 2 +-
+ xlators/mount/fuse/src/fuse-bridge.c | 2 +-
+ xlators/mount/fuse/src/fuse-bridge.h | 2 +-
+ 4 files changed, 4 insertions(+), 4 deletions(-)
+diff --git a/doc/mount.glusterfs.8 b/doc/mount.glusterfs.8
+index b35b362..87a5669 100644
+--- a/doc/mount.glusterfs.8
++++ b/doc/mount.glusterfs.8
+@@ -123,7 +123,7 @@ Provide list of backup volfile servers in the following format [default: None]
+ .TP
+ .TP
+ \fBlru-limit=\fRN
+-Set fuse module's limit for number of inodes kept in LRU list to N [default: 131072]
++Set fuse module's limit for number of inodes kept in LRU list to N [default: 65536]
+ .TP
+ .TP
+ \fBinvalidate-limit=\fRN
+diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c
+index 0856471..974fb88 100644
+--- a/glusterfsd/src/glusterfsd.c
++++ b/glusterfsd/src/glusterfsd.c
+@@ -211,7 +211,7 @@ static struct argp_option gf_options[] = {
+      "Resolve all auxiliary groups in fuse translator (max 32 otherwise)"},
+     {"lru-limit", ARGP_FUSE_LRU_LIMIT_KEY, "N", 0,
+      "Set fuse module's limit for number of inodes kept in LRU list to N "
+-     "[default: 131072]"},
++     "[default: 65536]"},
+     {"invalidate-limit", ARGP_FUSE_INVALIDATE_LIMIT_KEY, "N", 0,
+      "Suspend inode invalidations implied by 'lru-limit' if the number of "
+      "outstanding invalidations reaches N"},
+diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
+index 8b2e7f0..ebe5c28 100644
+--- a/xlators/mount/fuse/src/fuse-bridge.c
++++ b/xlators/mount/fuse/src/fuse-bridge.c
+@@ -6972,7 +6972,7 @@ struct volume_options options[] = {
+     {
+         .key = {"lru-limit"},
+         .type = GF_OPTION_TYPE_INT,
+-        .default_value = "131072",
++        .default_value = "65536",
+         .min = 0,
+         .description = "makes glusterfs invalidate kernel inodes after "
+                        "reaching this limit (0 means 'unlimited')",
+diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h
+index 2311582..cf4479c 100644
+--- a/xlators/mount/fuse/src/fuse-bridge.h
++++ b/xlators/mount/fuse/src/fuse-bridge.h
+@@ -189,7 +189,7 @@ struct fuse_private {
+     gf_boolean_t flush_handle_interrupt;
+     gf_boolean_t fuse_auto_inval;
+-    /* LRU Limit, if not set, default is 128k for now */
++    /* LRU Limit, if not set, default is 64k for now */
+     uint32_t lru_limit;
+     uint32_t invalidate_limit;
+ };
diff --git a/SOURCES/0339-geo-rep-fix-integer-config-validation.patch b/SOURCES/0339-geo-rep-fix-integer-config-validation.patch
new file mode 100644
index 0000000..45f3ede
--- /dev/null
+++ b/SOURCES/0339-geo-rep-fix-integer-config-validation.patch
@@ -0,0 +1,93 @@
+From 8b5b3b247a00515d3188453c27b0ba749e93d325 Mon Sep 17 00:00:00 2001
+From: Aravinda VK <>
+Date: Tue, 26 Mar 2019 13:20:13 +0530
+Subject: [PATCH 339/344] geo-rep: fix integer config validation
+ssh-port validation is mentioned as `validation=int` in template
+`gsyncd.conf`, but not handled this during geo-rep config set.
+upstream patch:
+Backport of:
+    >Fixes: bz#1692666
+    >Change-Id: I3f19d9b471b0a3327e4d094dfbefcc58ed2c34f6
+    >Signed-off-by: Aravinda VK <>
+    >Signed-off-by: Sunny Kumar <>
+BUG: 1782162
+Change-Id: I3f19d9b471b0a3327e4d094dfbefcc58ed2c34f6
+Signed-off-by: Sunny Kumar <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <>
+ geo-replication/syncdaemon/ | 23 ++++++++++++++++++-----
+ tests/00-geo-rep/georep-basic-dr-rsync.t   |  3 +++
+ 2 files changed, 21 insertions(+), 5 deletions(-)
+diff --git a/geo-replication/syncdaemon/ b/geo-replication/syncdaemon/
+index f823311..8848071 100644
+--- a/geo-replication/syncdaemon/
++++ b/geo-replication/syncdaemon/
+@@ -329,6 +329,9 @@ class Gconf(object):
+         if item["validation"] == "unixtime":
+             return validate_unixtime(value)
++        if item["validation"] == "int":
++            return validate_int(value)
+         return False
+     def _is_config_changed(self):
+@@ -381,6 +384,14 @@ def config_upgrade(config_file, ret):
+         config.write(configfile)
++def validate_int(value):
++    try:
++        _ = int(value)
++        return True
++    except ValueError:
++        return False
+ def validate_unixtime(value):
+     try:
+         y = datetime.fromtimestamp(int(value)).strftime("%Y")
+@@ -393,11 +404,13 @@ def validate_unixtime(value):
+ def validate_minmax(value, minval, maxval):
+-    value = int(value)
+-    minval = int(minval)
+-    maxval = int(maxval)
+-    return value >= minval and value <= maxval
++    try:
++        value = int(value)
++        minval = int(minval)
++        maxval = int(maxval)
++        return value >= minval and value <= maxval
++    except ValueError:
++        return False
+ def validate_choice(value, allowed_values):
+diff --git a/tests/00-geo-rep/georep-basic-dr-rsync.t b/tests/00-geo-rep/georep-basic-dr-rsync.t
+index b432635..b6fbf18 100644
+--- a/tests/00-geo-rep/georep-basic-dr-rsync.t
++++ b/tests/00-geo-rep/georep-basic-dr-rsync.t
+@@ -71,6 +71,9 @@ EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_status_num_rows "Created"
+ #Config gluster-command-dir
+ TEST $GEOREP_CLI $master $slave config gluster-command-dir ${GLUSTER_CMD_DIR}
++#Config Set ssh-port to validate int validation
++TEST $GEOREP_CLI $master $slave config ssh-port 22
+ #Config gluster-command-dir
+ TEST $GEOREP_CLI $master $slave config slave-gluster-command-dir ${GLUSTER_CMD_DIR}
diff --git a/SOURCES/0340-rpc-event_slot_alloc-converted-infinite-loop-after-r.patch b/SOURCES/0340-rpc-event_slot_alloc-converted-infinite-loop-after-r.patch
new file mode 100644
index 0000000..54b2706
--- /dev/null
+++ b/SOURCES/0340-rpc-event_slot_alloc-converted-infinite-loop-after-r.patch
@@ -0,0 +1,46 @@
+From 0c996d6c40c625f8a0ee6be2c220c89aaf70c840 Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <>
+Date: Tue, 10 Dec 2019 08:35:23 +0530
+Subject: [PATCH 340/344] rpc: event_slot_alloc converted infinite loop after
+ reach slot_used to 1024
+Problem: In the commit faf5ac13c4ee00a05e9451bf8da3be2a9043bbf2 missed one
+         condition to come out from the loop so after reach the slot_used to
+         1024 loop has become infinite loop
+Solution: Correct the code path to avoid the infinite loop
+> Change-Id: Ia02a109571f0d8cc9902c32db3e9b9282ee5c1db
+> Fixes: bz#1781440
+> Credits: Xavi Hernandez <>
+> Signed-off-by: Mohit Agrawal <>
+> (Cherry picked from commit 8030f9c0f092170ceb50cedf59b9c330022825b7)
+> (Reviewed on upstream link
+Change-Id: Ia02a109571f0d8cc9902c32db3e9b9282ee5c1db
+BUG: 1781444
+Credits: Xavi Hernandez <>
+Signed-off-by: Mohit Agrawal <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Xavi Hernandez Juan <>
+ libglusterfs/src/event-epoll.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+diff --git a/libglusterfs/src/event-epoll.c b/libglusterfs/src/event-epoll.c
+index 65f5efd..5afb2f2 100644
+--- a/libglusterfs/src/event-epoll.c
++++ b/libglusterfs/src/event-epoll.c
+@@ -92,7 +92,7 @@ retry:
+     while (i < EVENT_EPOLL_TABLES) {
+         switch (event_pool->slots_used[i]) {
+             case EVENT_EPOLL_SLOTS:
+-                continue;
++                break;
+             case 0:
+                 if (!event_pool->ereg[i]) {
+                     table = __event_newtable(event_pool, i);
diff --git a/SOURCES/0341-socket-fix-error-handling.patch b/SOURCES/0341-socket-fix-error-handling.patch
new file mode 100644
index 0000000..0eb68d1
--- /dev/null
+++ b/SOURCES/0341-socket-fix-error-handling.patch
@@ -0,0 +1,742 @@
+From 2c99b7db00a6238fd43053dd672c8ce519d8fd27 Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <>
+Date: Wed, 11 Dec 2019 18:21:14 +0100
+Subject: [PATCH 341/344] socket: fix error handling
+When __socket_proto_state_machine() detected a problem in the size of
+the request or it couldn't allocate an iobuf of the requested size, it
+returned -ENOMEM (-12). However the caller was expecting only -1 in
+case of error. For this reason the error passes undetected initially,
+adding back the socket to the epoll object. On further processing,
+however, the error is finally detected and the connection terminated.
+Meanwhile, another thread could receive a poll_in event from the same
+connection, which could cause races with the connection destruction.
+When this happened, the process crashed.
+To fix this, all error detection conditions have been hardened to be
+more strict on what is valid and what not. Also, we don't return
+-ENOMEM anymore. We always return -1 in case of error.
+An additional change has been done to prevent destruction of the
+transport object while it may still be needed.
+Upstream patch:
+> Change-Id: I6e59cd81cbf670f7adfdde942625d4e6c3fbc82d
+> Upstream patch link:
+> Fixes: bz#1782495
+> Signed-off-by: Xavi Hernandez <>
+Change-Id: I6e59cd81cbf670f7adfdde942625d4e6c3fbc82d
+BUG: 1779696
+Signed-off-by: Xavi Hernandez <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Raghavendra Gowdappa <>
+ rpc/rpc-transport/socket/src/socket.c | 173 ++++++++++++++++++----------------
+ 1 file changed, 90 insertions(+), 83 deletions(-)
+diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c
+index bf2fa71..f54ca83 100644
+--- a/rpc/rpc-transport/socket/src/socket.c
++++ b/rpc/rpc-transport/socket/src/socket.c
+@@ -173,7 +173,7 @@ ssl_setup_connection_params(rpc_transport_t *this);
+                                                                                \
+         ret = __socket_readv(this, in->pending_vector, 1, &in->pending_vector, \
+                              &in->pending_count, &bytes_read);                 \
+-        if (ret == -1)                                                         \
++        if (ret < 0)                                                           \
+             break;                                                             \
+         __socket_proto_update_priv_after_read(priv, ret, bytes_read);          \
+     }
+@@ -739,7 +739,7 @@ __socket_rwv(rpc_transport_t *this, struct iovec *vector, int count,
+                 ret = sys_writev(sock, opvector, IOV_MIN(opcount));
+             }
+-            if (ret == 0 || (ret == -1 && errno == EAGAIN)) {
++            if ((ret == 0) || ((ret < 0) && (errno == EAGAIN))) {
+                 /* done for now */
+                 break;
+             } else if (ret > 0)
+@@ -754,7 +754,7 @@ __socket_rwv(rpc_transport_t *this, struct iovec *vector, int count,
+                 errno = ENODATA;
+                 ret = -1;
+             }
+-            if (ret == -1 && errno == EAGAIN) {
++            if ((ret < 0) && (errno == EAGAIN)) {
+                 /* done for now */
+                 break;
+             } else if (ret > 0)
+@@ -770,7 +770,7 @@ __socket_rwv(rpc_transport_t *this, struct iovec *vector, int count,
+             errno = ENOTCONN;
+             break;
+         }
+-        if (ret == -1) {
++        if (ret < 0) {
+             if (errno == EINTR)
+                 continue;
+@@ -907,7 +907,7 @@ __socket_disconnect(rpc_transport_t *this)
+     gf_log(this->name, GF_LOG_TRACE, "disconnecting %p, sock=%d", this,
+            priv->sock);
+-    if (priv->sock != -1) {
++    if (priv->sock >= 0) {
+         gf_log_callingfn(this->name, GF_LOG_TRACE,
+                          "tearing down socket connection");
+         ret = __socket_teardown_connection(this);
+@@ -942,7 +942,7 @@ __socket_server_bind(rpc_transport_t *this)
+     ret = setsockopt(priv->sock, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt));
+-    if (ret == -1) {
++    if (ret != 0) {
+         gf_log(this->name, GF_LOG_ERROR,
+                "setsockopt() for SO_REUSEADDR failed (%s)", strerror(errno));
+     }
+@@ -955,7 +955,7 @@ __socket_server_bind(rpc_transport_t *this)
+         if (reuse_check_sock >= 0) {
+             ret = connect(reuse_check_sock, SA(&unix_addr),
+                           this->myinfo.sockaddr_len);
+-            if ((ret == -1) && (ECONNREFUSED == errno)) {
++            if ((ret != 0) && (ECONNREFUSED == errno)) {
+                 sys_unlink(((struct sockaddr_un *)&unix_addr)->sun_path);
+             }
+             gf_log(this->name, GF_LOG_INFO,
+@@ -967,7 +967,7 @@ __socket_server_bind(rpc_transport_t *this)
+     ret = bind(priv->sock, (struct sockaddr *)&this->myinfo.sockaddr,
+                this->myinfo.sockaddr_len);
+-    if (ret == -1) {
++    if (ret != 0) {
+         gf_log(this->name, GF_LOG_ERROR, "binding to %s failed: %s",
+                this->myinfo.identifier, strerror(errno));
+         if (errno == EADDRINUSE) {
+@@ -976,7 +976,7 @@ __socket_server_bind(rpc_transport_t *this)
+     }
+     if (AF_UNIX != SA(&this->myinfo.sockaddr)->sa_family) {
+         if (getsockname(priv->sock, SA(&this->myinfo.sockaddr),
+-                        &this->myinfo.sockaddr_len) == -1) {
++                        &this->myinfo.sockaddr_len) != 0) {
+             gf_log(this->name, GF_LOG_WARNING,
+                    "getsockname on (%d) failed (%s)", priv->sock,
+                    strerror(errno));
+@@ -1004,7 +1004,7 @@ __socket_nonblock(int fd)
+     flags = fcntl(fd, F_GETFL);
+-    if (flags != -1)
++    if (flags >= 0)
+         ret = fcntl(fd, F_SETFL, flags | O_NONBLOCK);
+     return ret;
+@@ -1034,7 +1034,7 @@ __socket_keepalive(int fd, int family, int keepaliveintvl, int keepaliveidle,
+ #endif
+     ret = setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on));
+-    if (ret == -1) {
++    if (ret != 0) {
+         gf_log("socket", GF_LOG_WARNING,
+                "failed to set keep alive option on socket %d", fd);
+         goto err;
+@@ -1051,7 +1051,7 @@ __socket_keepalive(int fd, int family, int keepaliveintvl, int keepaliveidle,
+     ret = setsockopt(fd, IPPROTO_TCP, TCP_KEEPALIVE, &keepaliveintvl,
+                      sizeof(keepaliveintvl));
+ #endif
+-    if (ret == -1) {
++    if (ret != 0) {
+         gf_log("socket", GF_LOG_WARNING,
+                "failed to set keep alive interval on socket %d", fd);
+         goto err;
+@@ -1062,7 +1062,7 @@ __socket_keepalive(int fd, int family, int keepaliveintvl, int keepaliveidle,
+     ret = setsockopt(fd, IPPROTO_TCP, TCP_KEEPIDLE, &keepaliveidle,
+                      sizeof(keepaliveidle));
+-    if (ret == -1) {
++    if (ret != 0) {
+         gf_log("socket", GF_LOG_WARNING,
+                "failed to set keep idle %d on socket %d, %s", keepaliveidle, fd,
+                strerror(errno));
+@@ -1070,7 +1070,7 @@ __socket_keepalive(int fd, int family, int keepaliveintvl, int keepaliveidle,
+     }
+     ret = setsockopt(fd, IPPROTO_TCP, TCP_KEEPINTVL, &keepaliveintvl,
+                      sizeof(keepaliveintvl));
+-    if (ret == -1) {
++    if (ret != 0) {
+         gf_log("socket", GF_LOG_WARNING,
+                "failed to set keep interval %d on socket %d, %s",
+                keepaliveintvl, fd, strerror(errno));
+@@ -1082,7 +1082,7 @@ __socket_keepalive(int fd, int family, int keepaliveintvl, int keepaliveidle,
+         goto done;
+     ret = setsockopt(fd, IPPROTO_TCP, TCP_USER_TIMEOUT, &timeout_ms,
+                      sizeof(timeout_ms));
+-    if (ret == -1) {
++    if (ret != 0) {
+         gf_log("socket", GF_LOG_WARNING,
+                "failed to set "
+                "TCP_USER_TIMEOUT %d on socket %d, %s",
+@@ -1093,7 +1093,7 @@ __socket_keepalive(int fd, int family, int keepaliveintvl, int keepaliveidle,
+ #if defined(TCP_KEEPCNT)
+     ret = setsockopt(fd, IPPROTO_TCP, TCP_KEEPCNT, &keepalivecnt,
+                      sizeof(keepalivecnt));
+-    if (ret == -1) {
++    if (ret != 0) {
+         gf_log("socket", GF_LOG_WARNING,
+                "failed to set "
+                "TCP_KEEPCNT %d on socket %d, %s",
+@@ -1366,7 +1366,7 @@ socket_event_poll_err(rpc_transport_t *this, int gen, int idx)
+     pthread_mutex_lock(&priv->out_lock);
+     {
+-        if ((priv->gen == gen) && (priv->idx == idx) && (priv->sock != -1)) {
++        if ((priv->gen == gen) && (priv->idx == idx) && (priv->sock >= 0)) {
+             __socket_ioq_flush(this);
+             __socket_reset(this);
+             socket_closed = _gf_true;
+@@ -1405,7 +1405,7 @@ socket_event_poll_out(rpc_transport_t *this)
+         if (priv->connected == 1) {
+             ret = __socket_ioq_churn(this);
+-            if (ret == -1) {
++            if (ret < 0) {
+                 gf_log(this->name, GF_LOG_TRACE,
+                        "__socket_ioq_churn returned -1; "
+                        "disconnecting socket");
+@@ -1463,7 +1463,7 @@ __socket_read_simple_msg(rpc_transport_t *this)
+                                      &bytes_read);
+             }
+-            if (ret == -1) {
++            if (ret < 0) {
+                 gf_log(this->name, GF_LOG_WARNING,
+                        "reading from socket failed. Error (%s), "
+                        "peer (%s)",
+@@ -1661,8 +1661,8 @@ __socket_read_vectored_request(rpc_transport_t *this,
+             remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read;
+-            if ((ret == -1) || ((ret == 0) && (remaining_size == 0) &&
+-                                RPC_LASTFRAG(in->fraghdr))) {
++            if ((ret < 0) || ((ret == 0) && (remaining_size == 0) &&
++                              RPC_LASTFRAG(in->fraghdr))) {
+                 request->vector_state = SP_STATE_VECTORED_REQUEST_INIT;
+                 in->payload_vector.iov_len = ((unsigned long)frag->fragcurrent -
+                                               (unsigned long)
+@@ -1739,8 +1739,8 @@ __socket_read_request(rpc_transport_t *this)
+             remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read;
+-            if ((ret == -1) || ((ret == 0) && (remaining_size == 0) &&
+-                                (RPC_LASTFRAG(in->fraghdr)))) {
++            if ((ret < 0) || ((ret == 0) && (remaining_size == 0) &&
++                              (RPC_LASTFRAG(in->fraghdr)))) {
+                 request->header_state = SP_STATE_REQUEST_HEADER_INIT;
+             }
+@@ -1870,8 +1870,8 @@ __socket_read_accepted_successful_reply(rpc_transport_t *this)
+             /* now read the entire remaining msg into new iobuf */
+             ret = __socket_read_simple_msg(this);
+             remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read;
+-            if ((ret == -1) || ((ret == 0) && (remaining_size == 0) &&
+-                                RPC_LASTFRAG(in->fraghdr))) {
++            if ((ret < 0) || ((ret == 0) && (remaining_size == 0) &&
++                              RPC_LASTFRAG(in->fraghdr))) {
+                 frag->call_body.reply.accepted_success_state =
+             }
+@@ -2003,8 +2003,8 @@ __socket_read_accepted_successful_reply_v2(rpc_transport_t *this)
+             /* now read the entire remaining msg into new iobuf */
+             ret = __socket_read_simple_msg(this);
+             remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read;
+-            if ((ret == -1) || ((ret == 0) && (remaining_size == 0) &&
+-                                RPC_LASTFRAG(in->fraghdr))) {
++            if ((ret < 0) || ((ret == 0) && (remaining_size == 0) &&
++                              RPC_LASTFRAG(in->fraghdr))) {
+                 frag->call_body.reply.accepted_success_state =
+             }
+@@ -2103,8 +2103,8 @@ __socket_read_accepted_reply(rpc_transport_t *this)
+             remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read;
+-            if ((ret == -1) || ((ret == 0) && (remaining_size == 0) &&
+-                                (RPC_LASTFRAG(in->fraghdr)))) {
++            if ((ret < 0) || ((ret == 0) && (remaining_size == 0) &&
++                              (RPC_LASTFRAG(in->fraghdr)))) {
+                 frag->call_body.reply
+                     .accepted_state = SP_STATE_ACCEPTED_REPLY_INIT;
+             }
+@@ -2169,8 +2169,8 @@ __socket_read_vectored_reply(rpc_transport_t *this)
+             remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read;
+-            if ((ret == -1) || ((ret == 0) && (remaining_size == 0) &&
+-                                (RPC_LASTFRAG(in->fraghdr)))) {
++            if ((ret < 0) || ((ret == 0) && (remaining_size == 0) &&
++                              (RPC_LASTFRAG(in->fraghdr)))) {
+                 frag->call_body.reply
+                     .status_state = SP_STATE_VECTORED_REPLY_STATUS_INIT;
+                 in->payload_vector.iov_len = (unsigned long)frag->fragcurrent -
+@@ -2237,7 +2237,7 @@ __socket_read_reply(rpc_transport_t *this)
+         /* Transition back to externally visible state. */
+         frag->state = SP_STATE_READ_MSGTYPE;
+-        if (ret == -1) {
++        if (ret < 0) {
+             gf_log(this->name, GF_LOG_WARNING,
+                    "notify for event MAP_XID failed for %s",
+                    this->peerinfo.identifier);
+@@ -2315,8 +2315,8 @@ __socket_read_frag(rpc_transport_t *this)
+             remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read;
+-            if ((ret == -1) || ((ret == 0) && (remaining_size == 0) &&
+-                                (RPC_LASTFRAG(in->fraghdr)))) {
++            if ((ret < 0) || ((ret == 0) && (remaining_size == 0) &&
++                              (RPC_LASTFRAG(in->fraghdr)))) {
+                 /* frag->state = SP_STATE_NADA; */
+                 frag->state = SP_STATE_RPCFRAG_INIT;
+             }
+@@ -2400,7 +2400,7 @@ __socket_proto_state_machine(rpc_transport_t *this,
+                 ret = __socket_readv(this, in->pending_vector, 1,
+                                      &in->pending_vector, &in->pending_count,
+                                      NULL);
+-                if (ret == -1)
++                if (ret < 0)
+                     goto out;
+                 if (ret > 0) {
+@@ -2422,7 +2422,7 @@ __socket_proto_state_machine(rpc_transport_t *this,
+                 in->total_bytes_read += RPC_FRAGSIZE(in->fraghdr);
+                 if (in->total_bytes_read >= GF_UNIT_GB) {
+-                    ret = -ENOMEM;
++                    ret = -1;
+                     goto out;
+                 }
+@@ -2430,7 +2430,7 @@ __socket_proto_state_machine(rpc_transport_t *this,
+                     this->ctx->iobuf_pool,
+                     (in->total_bytes_read + sizeof(in->fraghdr)));
+                 if (!iobuf) {
+-                    ret = -ENOMEM;
++                    ret = -1;
+                     goto out;
+                 }
+@@ -2457,7 +2457,7 @@ __socket_proto_state_machine(rpc_transport_t *this,
+             case SP_STATE_READING_FRAG:
+                 ret = __socket_read_frag(this);
+-                if ((ret == -1) ||
++                if ((ret < 0) ||
+                     (frag->bytes_read != RPC_FRAGSIZE(in->fraghdr))) {
+                     goto out;
+                 }
+@@ -2575,7 +2575,7 @@ socket_event_poll_in(rpc_transport_t *this, gf_boolean_t notify_handled)
+         pthread_mutex_unlock(&priv->notify.lock);
+     }
+-    if (notify_handled && (ret != -1))
++    if (notify_handled && (ret >= 0))
+         event_handled(ctx->event_pool, priv->sock, priv->idx, priv->gen);
+     if (pollin) {
+@@ -2618,10 +2618,10 @@ socket_connect_finish(rpc_transport_t *this)
+         ret = __socket_connect_finish(priv->sock);
+-        if (ret == -1 && errno == EINPROGRESS)
++        if ((ret < 0) && (errno == EINPROGRESS))
+             ret = 1;
+-        if (ret == -1 && errno != EINPROGRESS) {
++        if ((ret < 0) && (errno != EINPROGRESS)) {
+             if (!priv->connect_finish_log) {
+                 gf_log(this->name, GF_LOG_ERROR,
+                        "connection to %s failed (%s); "
+@@ -2640,7 +2640,7 @@ socket_connect_finish(rpc_transport_t *this)
+             ret = getsockname(priv->sock, SA(&this->myinfo.sockaddr),
+                               &this->myinfo.sockaddr_len);
+-            if (ret == -1) {
++            if (ret != 0) {
+                 gf_log(this->name, GF_LOG_WARNING,
+                        "getsockname on (%d) failed (%s) - "
+                        "disconnecting socket",
+@@ -2924,6 +2924,13 @@ socket_event_handler(int fd, int idx, int gen, void *data, int poll_in,
+         return;
+     }
++    /* At this point we are sure no other thread is using the transport because
++     * we cannot receive more events until we call gf_event_handled(). However
++     * this function may call gf_event_handled() in some cases. When this is
++     * done, the transport may be destroyed at any moment if another thread
++     * handled an error event. To prevent that we take a reference here. */
++    rpc_transport_ref(this);
+     GF_VALIDATE_OR_GOTO("socket", this, out);
+     GF_VALIDATE_OR_GOTO("socket", this->private, out);
+     GF_VALIDATE_OR_GOTO("socket", this->xl, out);
+@@ -2960,7 +2967,7 @@ socket_event_handler(int fd, int idx, int gen, void *data, int poll_in,
+             if (ret > 0) {
+                 gf_log(this->name, GF_LOG_TRACE,
+                        "(sock:%d) returning to wait on socket", priv->sock);
+-                return;
++                goto out;
+             }
+         } else {
+             char *sock_type = (priv->is_server ? "Server" : "Client");
+@@ -3015,7 +3022,7 @@ socket_event_handler(int fd, int idx, int gen, void *data, int poll_in,
+     }
+ out:
+-    return;
++    rpc_transport_unref(this);
+ }
+ static void
+@@ -3074,7 +3081,7 @@ socket_server_event_handler(int fd, int idx, int gen, void *data, int poll_in,
+         event_handled(ctx->event_pool, fd, idx, gen);
+-        if (new_sock == -1) {
++        if (new_sock < 0) {
+             gf_log(this->name, GF_LOG_WARNING, "accept on %d failed (%s)",
+                    priv->sock, strerror(errno));
+             goto out;
+@@ -3082,7 +3089,7 @@ socket_server_event_handler(int fd, int idx, int gen, void *data, int poll_in,
+         if (priv->nodelay && (new_sockaddr.ss_family != AF_UNIX)) {
+             ret = __socket_nodelay(new_sock);
+-            if (ret == -1) {
++            if (ret != 0) {
+                 gf_log(this->name, GF_LOG_WARNING,
+                        "setsockopt() failed for "
+                        "NODELAY (%s)",
+@@ -3094,7 +3101,7 @@ socket_server_event_handler(int fd, int idx, int gen, void *data, int poll_in,
+             ret = __socket_keepalive(new_sock, new_sockaddr.ss_family,
+                                      priv->keepaliveintvl, priv->keepaliveidle,
+                                      priv->keepalivecnt, priv->timeout);
+-            if (ret == -1)
++            if (ret != 0)
+                 gf_log(this->name, GF_LOG_WARNING,
+                        "Failed to set keep-alive: %s", strerror(errno));
+         }
+@@ -3110,7 +3117,7 @@ socket_server_event_handler(int fd, int idx, int gen, void *data, int poll_in,
+         }
+         ret = pthread_mutex_init(&new_trans->lock, NULL);
+-        if (ret == -1) {
++        if (ret != 0) {
+             gf_log(this->name, GF_LOG_WARNING,
+                    "pthread_mutex_init() failed: %s; closing newly accepted "
+                    "socket %d",
+@@ -3130,7 +3137,7 @@ socket_server_event_handler(int fd, int idx, int gen, void *data, int poll_in,
+         ret = getsockname(new_sock, SA(&new_trans->myinfo.sockaddr),
+                           &new_trans->myinfo.sockaddr_len);
+-        if (ret == -1) {
++        if (ret != 0) {
+             gf_log(this->name, GF_LOG_WARNING,
+                    "getsockname on socket %d "
+                    "failed (errno:%s); closing newly accepted socket",
+@@ -3237,7 +3244,7 @@ socket_server_event_handler(int fd, int idx, int gen, void *data, int poll_in,
+              */
+             ret = rpc_transport_notify(this, RPC_TRANSPORT_ACCEPT, new_trans);
+-            if (ret != -1) {
++            if (ret >= 0) {
+                 new_priv->idx = event_register(
+                     ctx->event_pool, new_sock, socket_event_handler, new_trans,
+                     1, 0, new_trans->notify_poller_death);
+@@ -3275,7 +3282,7 @@ socket_server_event_handler(int fd, int idx, int gen, void *data, int poll_in,
+             rpc_transport_unref(new_trans);
+         }
+-        if (ret == -1) {
++        if (ret < 0) {
+             gf_log(this->name, GF_LOG_WARNING, "closing newly accepted socket");
+             sys_close(new_sock);
+             /* this unref is to actually cause the destruction of
+@@ -3406,7 +3413,7 @@ socket_connect(rpc_transport_t *this, int port)
+     pthread_mutex_lock(&priv->out_lock);
+     {
+-        if (priv->sock != -1) {
++        if (priv->sock >= 0) {
+             gf_log_callingfn(this->name, GF_LOG_TRACE,
+                              "connect () called on transport "
+                              "already connected");
+@@ -3420,7 +3427,7 @@ socket_connect(rpc_transport_t *this, int port)
+         ret = socket_client_get_remote_sockaddr(this, &,
+                                                 &sockaddr_len, &sa_family);
+-        if (ret == -1) {
++        if (ret < 0) {
+             /* logged inside client_get_remote_sockaddr */
+             goto unlock;
+         }
+@@ -3439,7 +3446,7 @@ socket_connect(rpc_transport_t *this, int port)
+         this->peerinfo.sockaddr_len = sockaddr_len;
+         priv->sock = sys_socket(sa_family, SOCK_STREAM, 0);
+-        if (priv->sock == -1) {
++        if (priv->sock < 0) {
+             gf_log(this->name, GF_LOG_ERROR, "socket creation failed (%s)",
+                    strerror(errno));
+             ret = -1;
+@@ -3451,7 +3458,7 @@ socket_connect(rpc_transport_t *this, int port)
+          */
+         if (priv->windowsize != 0) {
+             if (setsockopt(priv->sock, SOL_SOCKET, SO_RCVBUF, &priv->windowsize,
+-                           sizeof(priv->windowsize)) < 0) {
++                           sizeof(priv->windowsize)) != 0) {
+                 gf_log(this->name, GF_LOG_ERROR,
+                        "setting receive window "
+                        "size failed: %d: %d: %s",
+@@ -3459,7 +3466,7 @@ socket_connect(rpc_transport_t *this, int port)
+             }
+             if (setsockopt(priv->sock, SOL_SOCKET, SO_SNDBUF, &priv->windowsize,
+-                           sizeof(priv->windowsize)) < 0) {
++                           sizeof(priv->windowsize)) != 0) {
+                 gf_log(this->name, GF_LOG_ERROR,
+                        "setting send window size "
+                        "failed: %d: %d: %s",
+@@ -3484,7 +3491,7 @@ socket_connect(rpc_transport_t *this, int port)
+         if (priv->nodelay && (sa_family != AF_UNIX)) {
+             ret = __socket_nodelay(priv->sock);
+-            if (ret == -1) {
++            if (ret != 0) {
+                 gf_log(this->name, GF_LOG_ERROR, "NODELAY on %d failed (%s)",
+                        priv->sock, strerror(errno));
+             }
+@@ -3494,7 +3501,7 @@ socket_connect(rpc_transport_t *this, int port)
+             ret = __socket_keepalive(priv->sock, sa_family,
+                                      priv->keepaliveintvl, priv->keepaliveidle,
+                                      priv->keepalivecnt, priv->timeout);
+-            if (ret == -1)
++            if (ret != 0)
+                 gf_log(this->name, GF_LOG_ERROR, "Failed to set keep-alive: %s",
+                        strerror(errno));
+         }
+@@ -3516,7 +3523,7 @@ socket_connect(rpc_transport_t *this, int port)
+         ret = client_bind(this, SA(&this->myinfo.sockaddr),
+                           &this->myinfo.sockaddr_len, priv->sock);
+-        if (ret == -1) {
++        if (ret < 0) {
+             gf_log(this->name, GF_LOG_WARNING, "client bind failed: %s",
+                    strerror(errno));
+             goto handler;
+@@ -3525,7 +3532,7 @@ socket_connect(rpc_transport_t *this, int port)
+         /* make socket non-blocking for all types of sockets */
+         if (!priv->bio) {
+             ret = __socket_nonblock(priv->sock);
+-            if (ret == -1) {
++            if (ret != 0) {
+                 gf_log(this->name, GF_LOG_ERROR, "NBIO on %d failed (%s)",
+                        priv->sock, strerror(errno));
+                 goto handler;
+@@ -3552,7 +3559,7 @@ socket_connect(rpc_transport_t *this, int port)
+         connect_attempted = _gf_true;
+-        if (ret == -1 && errno == ENOENT && ign_enoent) {
++        if ((ret != 0) && (errno == ENOENT) && ign_enoent) {
+             gf_log(this->name, GF_LOG_WARNING,
+                    "Ignore failed connection attempt on %s, (%s) ",
+                    this->peerinfo.identifier, strerror(errno));
+@@ -3570,7 +3577,7 @@ socket_connect(rpc_transport_t *this, int port)
+             goto handler;
+         }
+-        if (ret == -1 && ((errno != EINPROGRESS) && (errno != ENOENT))) {
++        if ((ret != 0) && (errno != EINPROGRESS) && (errno != ENOENT)) {
+             /* For unix path based sockets, the socket path is
+              * cryptic (md5sum of path) and may not be useful for
+              * the user in debugging so log it in DEBUG
+@@ -3634,8 +3641,8 @@ socket_connect(rpc_transport_t *this, int port)
+     pthread_mutex_unlock(&priv->out_lock);
+ err:
+-    /* if sock != -1, then cleanup is done from the event handler */
+-    if (ret == -1 && sock == -1) {
++    /* if sock >= 0, then cleanup is done from the event handler */
++    if ((ret < 0) && (sock < 0)) {
+         /* Cleaup requires to send notification to upper layer which
+            intern holds the big_lock. There can be dead-lock situation
+            if big_lock is already held by the current thread.
+@@ -3689,20 +3696,20 @@ socket_listen(rpc_transport_t *this)
+     }
+     pthread_mutex_unlock(&priv->out_lock);
+-    if (sock != -1) {
++    if (sock >= 0) {
+         gf_log_callingfn(this->name, GF_LOG_DEBUG, "already listening");
+         return ret;
+     }
+     ret = socket_server_get_local_sockaddr(this, SA(&sockaddr), &sockaddr_len,
+                                            &sa_family);
+-    if (ret == -1) {
++    if (ret < 0) {
+         return ret;
+     }
+     pthread_mutex_lock(&priv->out_lock);
+     {
+-        if (priv->sock != -1) {
++        if (priv->sock >= 0) {
+             gf_log(this->name, GF_LOG_DEBUG, "already listening");
+             goto unlock;
+         }
+@@ -3712,7 +3719,7 @@ socket_listen(rpc_transport_t *this)
+         priv->sock = sys_socket(sa_family, SOCK_STREAM, 0);
+-        if (priv->sock == -1) {
++        if (priv->sock < 0) {
+             gf_log(this->name, GF_LOG_ERROR, "socket creation failed (%s)",
+                    strerror(errno));
+             goto unlock;
+@@ -3723,7 +3730,7 @@ socket_listen(rpc_transport_t *this)
+          */
+         if (priv->windowsize != 0) {
+             if (setsockopt(priv->sock, SOL_SOCKET, SO_RCVBUF, &priv->windowsize,
+-                           sizeof(priv->windowsize)) < 0) {
++                           sizeof(priv->windowsize)) != 0) {
+                 gf_log(this->name, GF_LOG_ERROR,
+                        "setting receive window size "
+                        "failed: %d: %d: %s",
+@@ -3731,7 +3738,7 @@ socket_listen(rpc_transport_t *this)
+             }
+             if (setsockopt(priv->sock, SOL_SOCKET, SO_SNDBUF, &priv->windowsize,
+-                           sizeof(priv->windowsize)) < 0) {
++                           sizeof(priv->windowsize)) != 0) {
+                 gf_log(this->name, GF_LOG_ERROR,
+                        "setting send window size failed:"
+                        " %d: %d: %s",
+@@ -3741,7 +3748,7 @@ socket_listen(rpc_transport_t *this)
+         if (priv->nodelay && (sa_family != AF_UNIX)) {
+             ret = __socket_nodelay(priv->sock);
+-            if (ret == -1) {
++            if (ret != 0) {
+                 gf_log(this->name, GF_LOG_ERROR,
+                        "setsockopt() failed for NODELAY (%s)", strerror(errno));
+             }
+@@ -3750,7 +3757,7 @@ socket_listen(rpc_transport_t *this)
+         if (!priv->bio) {
+             ret = __socket_nonblock(priv->sock);
+-            if (ret == -1) {
++            if (ret != 0) {
+                 gf_log(this->name, GF_LOG_ERROR,
+                        "NBIO on socket %d failed "
+                        "(errno:%s); closing socket",
+@@ -3763,7 +3770,7 @@ socket_listen(rpc_transport_t *this)
+         ret = __socket_server_bind(this);
+-        if ((ret == -EADDRINUSE) || (ret == -1)) {
++        if (ret < 0) {
+             /* logged inside __socket_server_bind() */
+             gf_log(this->name, GF_LOG_ERROR,
+                    "__socket_server_bind failed;"
+@@ -3779,7 +3786,7 @@ socket_listen(rpc_transport_t *this)
+         ret = listen(priv->sock, priv->backlog);
+-        if (ret == -1) {
++        if (ret != 0) {
+             gf_log(this->name, GF_LOG_ERROR,
+                    "could not set socket %d to listen mode (errno:%s); "
+                    "closing socket",
+@@ -4025,7 +4032,7 @@ reconfigure(rpc_transport_t *this, dict_t *options)
+     priv = this->private;
+     if (dict_get_str(options, "transport.socket.keepalive", &optstr) == 0) {
+-        if (gf_string2boolean(optstr, &tmp_bool) == -1) {
++        if (gf_string2boolean(optstr, &tmp_bool) != 0) {
+             gf_log(this->name, GF_LOG_ERROR,
+                    "'transport.socket.keepalive' takes only "
+                    "boolean options, not taking any action");
+@@ -4094,7 +4101,7 @@ reconfigure(rpc_transport_t *this, dict_t *options)
+     if (dict_get(options, "non-blocking-io")) {
+         optstr = data_to_str(dict_get(options, "non-blocking-io"));
+-        if (gf_string2boolean(optstr, &tmp_bool) == -1) {
++        if (gf_string2boolean(optstr, &tmp_bool) != 0) {
+             gf_log(this->name, GF_LOG_ERROR,
+                    "'non-blocking-io' takes only boolean options,"
+                    " not taking any action");
+@@ -4109,7 +4116,7 @@ reconfigure(rpc_transport_t *this, dict_t *options)
+     if (!priv->bio) {
+         ret = __socket_nonblock(priv->sock);
+-        if (ret == -1) {
++        if (ret != 0) {
+             gf_log(this->name, GF_LOG_WARNING, "NBIO on %d failed (%s)",
+                    priv->sock, strerror(errno));
+             goto out;
+@@ -4508,7 +4515,7 @@ socket_init(rpc_transport_t *this)
+     if (dict_get(this->options, "non-blocking-io")) {
+         optstr = data_to_str(dict_get(this->options, "non-blocking-io"));
+-        if (gf_string2boolean(optstr, &tmp_bool) == -1) {
++        if (gf_string2boolean(optstr, &tmp_bool) != 0) {
+             gf_log(this->name, GF_LOG_ERROR,
+                    "'non-blocking-io' takes only boolean options,"
+                    " not taking any action");
+@@ -4528,7 +4535,7 @@ socket_init(rpc_transport_t *this)
+         optstr = data_to_str(
+             dict_get(this->options, "transport.socket.nodelay"));
+-        if (gf_string2boolean(optstr, &tmp_bool) == -1) {
++        if (gf_string2boolean(optstr, &tmp_bool) != 0) {
+             gf_log(this->name, GF_LOG_ERROR,
+                    "'transport.socket.nodelay' takes only "
+                    "boolean options, not taking any action");
+@@ -4559,7 +4566,7 @@ socket_init(rpc_transport_t *this)
+     priv->keepalivecnt = GF_KEEPALIVE_COUNT;
+     if (dict_get_str(this->options, "transport.socket.keepalive", &optstr) ==
+         0) {
+-        if (gf_string2boolean(optstr, &tmp_bool) == -1) {
++        if (gf_string2boolean(optstr, &tmp_bool) != 0) {
+             gf_log(this->name, GF_LOG_ERROR,
+                    "'transport.socket.keepalive' takes only "
+                    "boolean options, not taking any action");
+@@ -4609,7 +4616,7 @@ socket_init(rpc_transport_t *this)
+     if (dict_get(this->options, "")) {
+         optstr = data_to_str(
+             dict_get(this->options, ""));
+-        if (gf_string2boolean(optstr, &tmp_bool) == -1) {
++        if (gf_string2boolean(optstr, &tmp_bool) != 0) {
+             gf_log(this->name, GF_LOG_WARNING,
+                    "'' takes only "
+                    "boolean options; logging socket read fails");
+@@ -4646,7 +4653,7 @@ fini(rpc_transport_t *this)
+     priv = this->private;
+     if (priv) {
+-        if (priv->sock != -1) {
++        if (priv->sock >= 0) {
+             pthread_mutex_lock(&priv->out_lock);
+             {
+                 __socket_ioq_flush(this);
+@@ -4683,7 +4690,7 @@ init(rpc_transport_t *this)
+     ret = socket_init(this);
+-    if (ret == -1) {
++    if (ret < 0) {
+         gf_log(this->name, GF_LOG_DEBUG, "socket_init() failed");
+     }
diff --git a/SOURCES/0342-Revert-hooks-remove-selinux-hooks.patch b/SOURCES/0342-Revert-hooks-remove-selinux-hooks.patch
new file mode 100644
index 0000000..028a227
--- /dev/null
+++ b/SOURCES/0342-Revert-hooks-remove-selinux-hooks.patch
@@ -0,0 +1,120 @@
+From eb37a3b57415d2d4206ecdd2db10530366a0d1b1 Mon Sep 17 00:00:00 2001
+From: Anoop C S <>
+Date: Fri, 13 Dec 2019 15:20:27 +0530
+Subject: [PATCH 342/344] Revert "hooks: remove selinux hooks"
+This reverts commit 421743b7cfa6a249544f6abb4cca5a612bd20ea1.
+Note:- We are not bringing back features.selinux but just the hooks for
+       setting SELinux context on bricks
+Change-Id: Iccc10428361cac59b294e1d7aa1ba8187c20029e
+BUG: 1686800
+Signed-off-by: Anoop C S <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Niels de Vos <>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <>
+                                | 4 ++++
+ extras/hook-scripts/             | 2 +-
+ extras/hook-scripts/create/      | 1 +
+ extras/hook-scripts/create/post/ | 6 ++++++
+ extras/hook-scripts/delete/      | 1 +
+ extras/hook-scripts/delete/pre/  | 6 ++++++
+                           | 2 ++
+ 7 files changed, 21 insertions(+), 1 deletion(-)
+ create mode 100644 extras/hook-scripts/create/
+ create mode 100644 extras/hook-scripts/create/post/
+ create mode 100644 extras/hook-scripts/delete/
+ create mode 100644 extras/hook-scripts/delete/pre/
+diff --git a/ b/
+index 327733e..98ee311 100644
+--- a/
++++ b/
+@@ -221,6 +221,10 @@ AC_CONFIG_FILES([Makefile
+                 extras/hook-scripts/add-brick/Makefile
+                 extras/hook-scripts/add-brick/pre/Makefile
+                 extras/hook-scripts/add-brick/post/Makefile
++                extras/hook-scripts/create/Makefile
++                extras/hook-scripts/create/post/Makefile
++                extras/hook-scripts/delete/Makefile
++                extras/hook-scripts/delete/pre/Makefile
+                 extras/hook-scripts/start/Makefile
+                 extras/hook-scripts/start/post/Makefile
+                 extras/hook-scripts/set/Makefile
+diff --git a/extras/hook-scripts/ b/extras/hook-scripts/
+index 771b37e..26059d7 100644
+--- a/extras/hook-scripts/
++++ b/extras/hook-scripts/
+@@ -1,5 +1,5 @@
+-SUBDIRS = add-brick set start stop reset
++SUBDIRS = add-brick create delete set start stop reset
+ scriptsdir = $(GLUSTERD_WORKDIR)/hooks/1/gsync-create/post/
+diff --git a/extras/hook-scripts/create/ b/extras/hook-scripts/create/
+new file mode 100644
+index 0000000..b083a91
+--- /dev/null
++++ b/extras/hook-scripts/create/
+@@ -0,0 +1 @@
++SUBDIRS = post
+diff --git a/extras/hook-scripts/create/post/ b/extras/hook-scripts/create/post/
+new file mode 100644
+index 0000000..919801a
+--- /dev/null
++++ b/extras/hook-scripts/create/post/
+@@ -0,0 +1,6 @@
++scriptsdir = $(GLUSTERD_WORKDIR)/hooks/1/create/post/
++scripts_SCRIPTS =
+diff --git a/extras/hook-scripts/delete/ b/extras/hook-scripts/delete/
+new file mode 100644
+index 0000000..c98a05d
+--- /dev/null
++++ b/extras/hook-scripts/delete/
+@@ -0,0 +1 @@
++SUBDIRS = pre
+diff --git a/extras/hook-scripts/delete/pre/ b/extras/hook-scripts/delete/pre/
+new file mode 100644
+index 0000000..93a6b85
+--- /dev/null
++++ b/extras/hook-scripts/delete/pre/
+@@ -0,0 +1,6 @@
++scriptsdir = $(GLUSTERD_WORKDIR)/hooks/1/delete/pre/
++scripts_SCRIPTS =
+diff --git a/ b/
+index 1b975b2..012989a 100644
+--- a/
++++ b/
+@@ -1453,6 +1453,7 @@ exit 0
+             %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/pre/
+        %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create
+        %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create/post
++            %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create/post/
+ %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create/pre
+ %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/copy-file
+ %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/copy-file/post
+@@ -1461,6 +1462,7 @@ exit 0
+        %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete/post
+                             %{_sharedstatedir}/glusterd/hooks/1/delete/post/S57glusterfind-delete-post
+        %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete/pre
++            %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete/pre/
+ %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/remove-brick
+ %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/remove-brick/post
+ %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/remove-brick/pre
diff --git a/SOURCES/0343-extras-hooks-syntactical-errors-in-SELinux-hooks-sci.patch b/SOURCES/0343-extras-hooks-syntactical-errors-in-SELinux-hooks-sci.patch
new file mode 100644
index 0000000..77d2f64
--- /dev/null
+++ b/SOURCES/0343-extras-hooks-syntactical-errors-in-SELinux-hooks-sci.patch
@@ -0,0 +1,155 @@
+From 8a8c508b529f7609fc5caa10bc79ba817f5d274a Mon Sep 17 00:00:00 2001
+From: Milan Zink <>
+Date: Mon, 5 Feb 2018 15:04:37 +0100
+Subject: [PATCH 343/344] extras/hooks: syntactical errors in SELinux hooks,
+ scipt logic improved
+Backport of
+Change-Id: Ia5fa1df81bbaec3a84653d136a331c76b457f42c
+BUG: 1686800
+Signed-off-by: Anoop C S <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Niels de Vos <>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <>
+ .../create/post/          | 13 +++--
+ .../delete/pre/          | 60 +++++++++++++---------
+ tests/bugs/glusterfs-server/bug-877992.t           |  4 +-
+ 3 files changed, 46 insertions(+), 31 deletions(-)
+diff --git a/extras/hook-scripts/create/post/ b/extras/hook-scripts/create/post/
+index de242d2..f9b4b1a 100755
+--- a/extras/hook-scripts/create/post/
++++ b/extras/hook-scripts/create/post/
+@@ -34,18 +34,21 @@ parse_args () {
+ set_brick_labels()
+ {
+-  volname=${1}
++  volname="${1}"
+   # grab the path for each local brick
+-  brickpath="/var/lib/glusterd/vols/${volname}/bricks/*"
+-  brickdirs=$(grep '^path=' "${brickpath}" | cut -d= -f 2 | sort -u)
++  brickpath="/var/lib/glusterd/vols/${volname}/bricks/"
++  brickdirs=$(
++    find "${brickpath}" -type f -exec grep '^path=' {} \; | \
++    cut -d= -f 2 | \
++    sort -u
++  )
+   for b in ${brickdirs}; do
+     # Add a file context for each brick path and associate with the
+     # glusterd_brick_t SELinux type.
+-    pattern="${b}\(/.*\)?"
++    pattern="${b}(/.*)?"
+     semanage fcontext --add -t glusterd_brick_t -r s0 "${pattern}"
+     # Set the labels on the new brick path.
+     restorecon -R "${b}"
+   done
+diff --git a/extras/hook-scripts/delete/pre/ b/extras/hook-scripts/delete/pre/
+index 6eba66f..e7f4e8f 100755
+--- a/extras/hook-scripts/delete/pre/
++++ b/extras/hook-scripts/delete/pre/
+@@ -15,45 +15,55 @@ OPTSPEC="volname:"
+ VOL=
+ function parse_args () {
+-        ARGS=$(getopt -o '' -l $OPTSPEC -n $PROGNAME -- "$@")
+-        eval set -- "$ARGS"
+-        while true; do
+-        case $1 in
+-        --volname)
+-         shift
+-         VOL=$1
+-         ;;
+-        *)
+-         shift
+-         break
+-         ;;
+-        esac
++  ARGS=$(getopt -o '' -l ${OPTSPEC} -n ${PROGNAME} -- "$@")
++  eval set -- "${ARGS}"
++  while true; do
++    case ${1} in
++      --volname)
++        shift
++        VOL=${1}
++      ;;
++      *)
+         shift
+-        done
++        break
++      ;;
++    esac
++    shift
++  done
+ }
+ function delete_brick_fcontext()
+ {
+-        volname=$1
++  volname="${1}"
++  # grab the path for each local brick
++  brickpath="/var/lib/glusterd/vols/${volname}/bricks/"
++  brickdirs=$(
++    find "${brickpath}" -type f -exec grep '^path=' {} \; | \
++    cut -d= -f 2 | \
++    sort -u
++  )
++  for b in ${brickdirs}
++  do
++    # remove the file context associated with the brick path
++    pattern="${b}(/.*)?"
++    semanage fcontext --delete "${pattern}"
+-        # grab the path for each local brick
+-        brickdirs=$(grep '^path=' /var/lib/glusterd/vols/${volname}/bricks/* | cut -d= -f 2)
++    # remove the labels on brick path.
++    restorecon -R "${b}"
++ done
+-        for b in $brickdirs
+-        do
+-                # remove the file context associated with the brick path
+-                semanage fcontext --delete $b\(/.*\)?
+-        done
+ }
+ SELINUX_STATE=$(which getenforce && getenforce)
+ [ "${SELINUX_STATE}" = 'Disabled' ] && exit 0
+ parse_args "$@"
+-[ -z "$VOL" ] && exit 1
++[ -z "${VOL}" ] && exit 1
+-delete_brick_fcontext $VOL
++delete_brick_fcontext "${VOL}"
+ # failure to delete the fcontext is not fatal
+ exit 0
+diff --git a/tests/bugs/glusterfs-server/bug-877992.t b/tests/bugs/glusterfs-server/bug-877992.t
+index aeb73ed..300000b 100755
+--- a/tests/bugs/glusterfs-server/bug-877992.t
++++ b/tests/bugs/glusterfs-server/bug-877992.t
+@@ -46,7 +46,9 @@ TEST $CLI volume create $V0 $H0:$B0/${V0}1;
+ EXPECT "$V0" volinfo_field $V0 'Volume Name';
+ EXPECT 'Created' volinfo_field $V0 'Status';
+ EXPECT 'createPre' cat /tmp/pre.out;
+-EXPECT 'createPost' cat /tmp/post.out;
++# comes after under create post hook script
++# list. So consider the delay in setting SELinux context on bricks
++EXPECT_WITHIN 5 'createPost' cat /tmp/post.out;
+ hooks_cleanup 'create'
diff --git a/SOURCES/0344-Revert-all-fixes-to-include-SELinux-hook-scripts.patch b/SOURCES/0344-Revert-all-fixes-to-include-SELinux-hook-scripts.patch
new file mode 100644
index 0000000..341aeae
--- /dev/null
+++ b/SOURCES/0344-Revert-all-fixes-to-include-SELinux-hook-scripts.patch
@@ -0,0 +1,412 @@
+From 02a93265fe4e78e7fc3fa8c6caa773cbe02f50b6 Mon Sep 17 00:00:00 2001
+From: Anoop C S <>
+Date: Fri, 20 Dec 2019 16:01:59 +0530
+Subject: [PATCH 344/344] Revert all fixes to include SELinux hook scripts
+Following are the reverts included with this change:
+Revert "extras/hooks: syntactical errors in SELinux hooks, scipt logic improved"
+Revert "Revert "hooks: remove selinux hooks""
+Revert "tests: subdir-mount.t is failing for brick_mux regrssion"
+Revert "extras/hooks: Install and package newly added post add-brick hook script"
+Revert "extras/hooks: Add SELinux label on new bricks during add-brick"
+See bug for more details.
+Change-Id: I5c9b9e0e6446568ce16af17257fa39338198a827
+BUG: 1686800
+Signed-off-by: Anoop C S <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <>
+                                       |   4 -
+ extras/hook-scripts/                    |   2 +-
+ extras/hook-scripts/add-brick/post/     |   4 +-
+ .../add-brick/post/       | 100 ---------------------
+ extras/hook-scripts/create/             |   1 -
+ extras/hook-scripts/create/post/        |   6 --
+ .../create/post/          |  13 ++-
+ extras/hook-scripts/delete/             |   1 -
+ extras/hook-scripts/delete/pre/         |   6 --
+ .../delete/pre/          |  60 ++++++-------
+                                  |   3 -
+ tests/bugs/glusterfs-server/bug-877992.t           |   4 +-
+ tests/features/subdir-mount.t                      |  11 +--
+ 13 files changed, 37 insertions(+), 178 deletions(-)
+ delete mode 100755 extras/hook-scripts/add-brick/post/
+ delete mode 100644 extras/hook-scripts/create/
+ delete mode 100644 extras/hook-scripts/create/post/
+ delete mode 100644 extras/hook-scripts/delete/
+ delete mode 100644 extras/hook-scripts/delete/pre/
+diff --git a/ b/
+index 98ee311..327733e 100644
+--- a/
++++ b/
+@@ -221,10 +221,6 @@ AC_CONFIG_FILES([Makefile
+                 extras/hook-scripts/add-brick/Makefile
+                 extras/hook-scripts/add-brick/pre/Makefile
+                 extras/hook-scripts/add-brick/post/Makefile
+-                extras/hook-scripts/create/Makefile
+-                extras/hook-scripts/create/post/Makefile
+-                extras/hook-scripts/delete/Makefile
+-                extras/hook-scripts/delete/pre/Makefile
+                 extras/hook-scripts/start/Makefile
+                 extras/hook-scripts/start/post/Makefile
+                 extras/hook-scripts/set/Makefile
+diff --git a/extras/hook-scripts/ b/extras/hook-scripts/
+index 26059d7..771b37e 100644
+--- a/extras/hook-scripts/
++++ b/extras/hook-scripts/
+@@ -1,5 +1,5 @@
+-SUBDIRS = add-brick create delete set start stop reset
++SUBDIRS = add-brick set start stop reset
+ scriptsdir = $(GLUSTERD_WORKDIR)/hooks/1/gsync-create/post/
+diff --git a/extras/hook-scripts/add-brick/post/ b/extras/hook-scripts/add-brick/post/
+index 9b236df..bfc0c1c 100644
+--- a/extras/hook-scripts/add-brick/post/
++++ b/extras/hook-scripts/add-brick/post/
+@@ -1,6 +1,6 @@
+ hookdir = $(GLUSTERD_WORKDIR)/hooks/1/add-brick/post/
+-hook_SCRIPTS =
++hook_SCRIPTS =
+ endif
+diff --git a/extras/hook-scripts/add-brick/post/ b/extras/hook-scripts/add-brick/post/
+deleted file mode 100755
+index 4a17c99..0000000
+--- a/extras/hook-scripts/add-brick/post/
++++ /dev/null
+@@ -1,100 +0,0 @@
+-# Install to hooks/<HOOKS_VER>/add-brick/post
+-# Add an SELinux file context for each brick using the glusterd_brick_t type.
+-# This ensures that the brick is relabeled correctly on an SELinux restart or
+-# restore. Subsequently, run a restore on the brick path to set the selinux
+-# labels.
+-parse_args () {
+-  ARGS=$(getopt -o '' -l ${OPTSPEC} -n ${PROGNAME} -- "$@")
+-  eval set -- "${ARGS}"
+-  while true; do
+-    case ${1} in
+-      --volname)
+-        shift
+-        VOL=${1}
+-        ;;
+-      --gd-workdir)
+-          shift
+-          GLUSTERD_WORKDIR=$1
+-          ;;
+-      --version)
+-          shift
+-          ;;
+-      --volume-op)
+-          shift
+-          ;;
+-      *)
+-          shift
+-          break
+-          ;;
+-    esac
+-    shift
+-  done
+-  local volname="${1}"
+-  local fctx
+-  local list=()
+-  fctx="$(semanage fcontext --list -C)"
+-  # wait for new brick path to be updated under
+-  # ${GLUSTERD_WORKDIR}/vols/${volname}/bricks/
+-  sleep 5
+-  # grab the path for each local brick
+-  brickpath="${GLUSTERD_WORKDIR}/vols/${volname}/bricks/"
+-  brickdirs=$(
+-    find "${brickpath}" -type f -exec grep '^path=' {} \; | \
+-    cut -d= -f 2 | \
+-    sort -u
+-  )
+-  # create a list of bricks for which custom SELinux
+-  # label doesn't exist
+-  for b in ${brickdirs}; do
+-    pattern="${b}(/.*)?"
+-    echo "${fctx}" | grep "^${pattern}\s" >/dev/null
+-    if [[ $? -ne 0 ]]; then
+-      list+=("${pattern}")
+-    fi
+-  done
+-  # Add a file context for each brick path in the list and associate with the
+-  # glusterd_brick_t SELinux type.
+-  for p in ${list[@]}
+-  do
+-    semanage fcontext --add -t glusterd_brick_t -r s0 "${p}"
+-  done
+-  # Set the labels for which SELinux label was added above
+-  for b in ${brickdirs}
+-  do
+-    echo "${list[@]}" | grep "${b}" >/dev/null
+-    if [[ $? -eq 0 ]]; then
+-      restorecon -R "${b}"
+-    fi
+-  done
+-SELINUX_STATE=$(which getenforce && getenforce)
+-[ "${SELINUX_STATE}" = 'Disabled' ] && exit 0
+-parse_args "$@"
+-[ -z "${VOL}" ] && exit 1
+-set_brick_labels "${VOL}"
+-exit 0
+diff --git a/extras/hook-scripts/create/ b/extras/hook-scripts/create/
+deleted file mode 100644
+index b083a91..0000000
+--- a/extras/hook-scripts/create/
++++ /dev/null
+@@ -1 +0,0 @@
+-SUBDIRS = post
+diff --git a/extras/hook-scripts/create/post/ b/extras/hook-scripts/create/post/
+deleted file mode 100644
+index 919801a..0000000
+--- a/extras/hook-scripts/create/post/
++++ /dev/null
+@@ -1,6 +0,0 @@
+-scriptsdir = $(GLUSTERD_WORKDIR)/hooks/1/create/post/
+-scripts_SCRIPTS =
+diff --git a/extras/hook-scripts/create/post/ b/extras/hook-scripts/create/post/
+index f9b4b1a..de242d2 100755
+--- a/extras/hook-scripts/create/post/
++++ b/extras/hook-scripts/create/post/
+@@ -34,21 +34,18 @@ parse_args () {
+ set_brick_labels()
+ {
+-  volname="${1}"
++  volname=${1}
+   # grab the path for each local brick
+-  brickpath="/var/lib/glusterd/vols/${volname}/bricks/"
+-  brickdirs=$(
+-    find "${brickpath}" -type f -exec grep '^path=' {} \; | \
+-    cut -d= -f 2 | \
+-    sort -u
+-  )
++  brickpath="/var/lib/glusterd/vols/${volname}/bricks/*"
++  brickdirs=$(grep '^path=' "${brickpath}" | cut -d= -f 2 | sort -u)
+   for b in ${brickdirs}; do
+     # Add a file context for each brick path and associate with the
+     # glusterd_brick_t SELinux type.
+-    pattern="${b}(/.*)?"
++    pattern="${b}\(/.*\)?"
+     semanage fcontext --add -t glusterd_brick_t -r s0 "${pattern}"
+     # Set the labels on the new brick path.
+     restorecon -R "${b}"
+   done
+diff --git a/extras/hook-scripts/delete/ b/extras/hook-scripts/delete/
+deleted file mode 100644
+index c98a05d..0000000
+--- a/extras/hook-scripts/delete/
++++ /dev/null
+@@ -1 +0,0 @@
+-SUBDIRS = pre
+diff --git a/extras/hook-scripts/delete/pre/ b/extras/hook-scripts/delete/pre/
+deleted file mode 100644
+index 93a6b85..0000000
+--- a/extras/hook-scripts/delete/pre/
++++ /dev/null
+@@ -1,6 +0,0 @@
+-scriptsdir = $(GLUSTERD_WORKDIR)/hooks/1/delete/pre/
+-scripts_SCRIPTS =
+diff --git a/extras/hook-scripts/delete/pre/ b/extras/hook-scripts/delete/pre/
+index e7f4e8f..6eba66f 100755
+--- a/extras/hook-scripts/delete/pre/
++++ b/extras/hook-scripts/delete/pre/
+@@ -15,55 +15,45 @@ OPTSPEC="volname:"
+ VOL=
+ function parse_args () {
+-  ARGS=$(getopt -o '' -l ${OPTSPEC} -n ${PROGNAME} -- "$@")
+-  eval set -- "${ARGS}"
+-  while true; do
+-    case ${1} in
+-      --volname)
+-        shift
+-        VOL=${1}
+-      ;;
+-      *)
++        ARGS=$(getopt -o '' -l $OPTSPEC -n $PROGNAME -- "$@")
++        eval set -- "$ARGS"
++        while true; do
++        case $1 in
++        --volname)
++         shift
++         VOL=$1
++         ;;
++        *)
++         shift
++         break
++         ;;
++        esac
+         shift
+-        break
+-      ;;
+-    esac
+-    shift
+-  done
++        done
+ }
+ function delete_brick_fcontext()
+ {
+-  volname="${1}"
+-  # grab the path for each local brick
+-  brickpath="/var/lib/glusterd/vols/${volname}/bricks/"
+-  brickdirs=$(
+-    find "${brickpath}" -type f -exec grep '^path=' {} \; | \
+-    cut -d= -f 2 | \
+-    sort -u
+-  )
+-  for b in ${brickdirs}
+-  do
+-    # remove the file context associated with the brick path
+-    pattern="${b}(/.*)?"
+-    semanage fcontext --delete "${pattern}"
++        volname=$1
+-    # remove the labels on brick path.
+-    restorecon -R "${b}"
+- done
++        # grab the path for each local brick
++        brickdirs=$(grep '^path=' /var/lib/glusterd/vols/${volname}/bricks/* | cut -d= -f 2)
++        for b in $brickdirs
++        do
++                # remove the file context associated with the brick path
++                semanage fcontext --delete $b\(/.*\)?
++        done
+ }
+ SELINUX_STATE=$(which getenforce && getenforce)
+ [ "${SELINUX_STATE}" = 'Disabled' ] && exit 0
+ parse_args "$@"
+-[ -z "${VOL}" ] && exit 1
++[ -z "$VOL" ] && exit 1
+-delete_brick_fcontext "${VOL}"
++delete_brick_fcontext $VOL
+ # failure to delete the fcontext is not fatal
+ exit 0
+diff --git a/ b/
+index 012989a..671ee27 100644
+--- a/
++++ b/
+@@ -1447,13 +1447,11 @@ exit 0
+        %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick
+        %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post
+             %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post/
+-            %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post/
+             %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post/
+        %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/pre
+             %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/pre/
+        %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create
+        %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create/post
+-            %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create/post/
+ %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create/pre
+ %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/copy-file
+ %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/copy-file/post
+@@ -1462,7 +1460,6 @@ exit 0
+        %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete/post
+                             %{_sharedstatedir}/glusterd/hooks/1/delete/post/S57glusterfind-delete-post
+        %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete/pre
+-            %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete/pre/
+ %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/remove-brick
+ %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/remove-brick/post
+ %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/remove-brick/pre
+diff --git a/tests/bugs/glusterfs-server/bug-877992.t b/tests/bugs/glusterfs-server/bug-877992.t
+index 300000b..aeb73ed 100755
+--- a/tests/bugs/glusterfs-server/bug-877992.t
++++ b/tests/bugs/glusterfs-server/bug-877992.t
+@@ -46,9 +46,7 @@ TEST $CLI volume create $V0 $H0:$B0/${V0}1;
+ EXPECT "$V0" volinfo_field $V0 'Volume Name';
+ EXPECT 'Created' volinfo_field $V0 'Status';
+ EXPECT 'createPre' cat /tmp/pre.out;
+-# comes after under create post hook script
+-# list. So consider the delay in setting SELinux context on bricks
+-EXPECT_WITHIN 5 'createPost' cat /tmp/post.out;
++EXPECT 'createPost' cat /tmp/post.out;
+ hooks_cleanup 'create'
+diff --git a/tests/features/subdir-mount.t b/tests/features/subdir-mount.t
+index a02bd6b..8401946 100644
+--- a/tests/features/subdir-mount.t
++++ b/tests/features/subdir-mount.t
+@@ -85,17 +85,12 @@ TEST $CLI volume start $V0
+ TEST $GFS --subdir-mount /subdir1/subdir1.1/subdir1.2 -s $H0 --volfile-id $V0 $M2
+ TEST stat $M2
+-initcnt=`grep -i create-subdir-mounts /var/log/glusterfs/glusterd.log  | wc -l`
+ # mount shouldn't fail even after add-brick
+ TEST $CLI volume add-brick $V0 replica 2 $H0:$B0/${V0}{5,6};
+-# Wait to execute script by glusterd
+-newcnt=`grep -i create-subdir-mounts /var/log/glusterfs/glusterd.log  | wc -l`
+-while [ $newcnt -eq $initcnt ]
+-   newcnt=`grep -i create-subdir-mounts /var/log/glusterfs/glusterd.log  | wc -l`
+-   sleep 1
++# Give time for client process to get notified and use the new
++# volfile after add-brick
++sleep 1
+ # Existing mount should still be active
+ mount_inode=$(stat --format "%i" "$M2")
diff --git a/SOURCES/0345-read-ahead-io-cache-turn-off-by-default.patch b/SOURCES/0345-read-ahead-io-cache-turn-off-by-default.patch
new file mode 100644
index 0000000..48b0cc8
--- /dev/null
+++ b/SOURCES/0345-read-ahead-io-cache-turn-off-by-default.patch
@@ -0,0 +1,82 @@
+From d45c64e17e1eb8003ac1086cbd3abea32414c7f9 Mon Sep 17 00:00:00 2001
+From: Raghavendra Gowdappa <>
+Date: Tue, 12 Feb 2019 18:33:44 +0530
+Subject: [PATCH 345/346] read-ahead/io-cache: turn off by default
+We've found perf xlators io-cache and read-ahead not adding any
+performance improvement. At best read-ahead is redundant due to kernel
+read-ahead and at worst io-cache is degrading the performance for
+workloads that doesn't involve re-read. Given that VFS already have
+both these functionalities, this patch makes these two
+translators turned off by default for native fuse mounts.
+For non-native fuse mounts like gfapi (NFS-ganesha/samba) we can have
+these xlators on by having custom profiles.
+>Change-Id: Ie7535788909d4c741844473696f001274dc0bb60
+>Signed-off-by: Raghavendra Gowdappa <>
+>fixes: bz#1676479
+Upstream fix link:
+BUG: 1788656
+Change-Id: Ie7535788909d4c741844473696f001274dc0bb60
+Signed-off-by: Sunil Kumar Acharya <>
+Tested-by: RHGS Build Bot <>
+ tests/basic/ec/self-heal.t                      | 2 ++
+ tests/basic/glusterd/volfile_server_switch.t    | 2 +-
+ xlators/mgmt/glusterd/src/glusterd-volume-set.c | 4 ++--
+ 3 files changed, 5 insertions(+), 3 deletions(-)
+diff --git a/tests/basic/ec/self-heal.t b/tests/basic/ec/self-heal.t
+index d217559..6329bb6 100644
+--- a/tests/basic/ec/self-heal.t
++++ b/tests/basic/ec/self-heal.t
+@@ -131,6 +131,8 @@ TEST $CLI volume create $V0 redundancy 2 $H0:$B0/${V0}{0..5}
+ TEST $CLI volume set $V0 client-log-level DEBUG
+ #Write-behind has a bug where lookup can race over write which leads to size mismatch on the mount after a 'cp'
+ TEST $CLI volume set $V0 performance.write-behind off
++#md-cache can return stale stat due to default timeout being 1 sec
++TEST $CLI volume set $V0 performance.stat-prefetch off
+ EXPECT "Created" volinfo_field $V0 'Status'
+ TEST $CLI volume start $V0
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Started" volinfo_field $V0 'Status'
+diff --git a/tests/basic/glusterd/volfile_server_switch.t b/tests/basic/glusterd/volfile_server_switch.t
+index 3090609..e11cfed 100644
+--- a/tests/basic/glusterd/volfile_server_switch.t
++++ b/tests/basic/glusterd/volfile_server_switch.t
+@@ -34,7 +34,7 @@ TEST glusterfs --volfile-id=/$V0 --volfile-server=$H1 --volfile-server=$H2 --vol
+ TEST kill_glusterd 1
+-TEST $CLI_2 volume set $V0 off
++TEST $CLI_2 volume set $V0 performance.write-behind off
+ # make sure by this time directory will be created
+ # TODO: suggest ideal time to wait
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+index 16601a2..9001b88 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+@@ -2235,7 +2235,7 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+     {.key = "",
+      .voltype = "performance/read-ahead",
+      .option = "!perf",
+-     .value = "on",
++     .value = "off",
+      .op_version = 1,
+      .description = "enable/disable read-ahead translator in the volume.",
+@@ -2249,7 +2249,7 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+     {.key = "",
+      .voltype = "performance/io-cache",
+      .option = "!perf",
+-     .value = "on",
++     .value = "off",
+      .op_version = 1,
+      .description = "enable/disable io-cache translator in the volume.",
+      .flags = VOLOPT_FLAG_CLIENT_OPT},
diff --git a/SOURCES/0346-fuse-degrade-logging-of-write-failure-to-fuse-device.patch b/SOURCES/0346-fuse-degrade-logging-of-write-failure-to-fuse-device.patch
new file mode 100644
index 0000000..9fca79e
--- /dev/null
+++ b/SOURCES/0346-fuse-degrade-logging-of-write-failure-to-fuse-device.patch
@@ -0,0 +1,223 @@
+From e2af9793014ad67859aa73088765a52307cbe466 Mon Sep 17 00:00:00 2001
+From: Csaba Henk <>
+Date: Tue, 7 Jan 2020 19:43:05 +0100
+Subject: [PATCH 346/346] fuse: degrade logging of write failure to fuse device
+FUSE uses failures of communicating with /dev/fuse with various
+errnos to indicate in-kernel conditions to userspace. Some of these
+shouldn't be handled as an application error. Also the standard
+POSIX errno description should not be shown as they are misleading
+in this context.
+When writing to the fuse device, the caller of the respective
+convenience routine can mask those errnos which don't qualify to
+be an error for the application in that context, so then those
+shall be reported at DEBUG level.
+The possible non-standard errnos are reported with their
+POSIX name instead of their description to avoid confusion.
+(Eg. for ENOENT we don't log "no such file or directory",
+we log indeed literal "ENOENT".)
+Upstream on
+> Change-Id: I510158843e4b1d482bdc496c2e97b1860dc1ba93
+> updates: bz#1193929
+> Signed-off-by: Csaba Henk <>
+BUG: 1763208
+Change-Id: Ib1676bb334ed153ce74ae1c0413fc0e58fb388c7
+Signed-off-by: Csaba Henk <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <>
+ xlators/mount/fuse/src/fuse-bridge.c | 78 +++++++++++++++++++++++++++++++++---
+ xlators/mount/fuse/src/fuse-bridge.h |  9 ++++-
+ 2 files changed, 80 insertions(+), 7 deletions(-)
+diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
+index ebe5c28..6e99053 100644
+--- a/xlators/mount/fuse/src/fuse-bridge.c
++++ b/xlators/mount/fuse/src/fuse-bridge.c
+@@ -198,7 +198,7 @@ fusedump_setup_meta(struct iovec *iovs, char *dir,
+ static int
+ check_and_dump_fuse_W(fuse_private_t *priv, struct iovec *iov_out, int count,
+-                      ssize_t res)
++                      ssize_t res, errnomask_t errnomask)
+ {
+     char w = 'W';
+     struct iovec diov[4] = {
+@@ -216,8 +216,59 @@ check_and_dump_fuse_W(fuse_private_t *priv, struct iovec *iov_out, int count,
+     struct fuse_out_header *fouh = NULL;
+     if (res == -1) {
+-        gf_log_callingfn("glusterfs-fuse", GF_LOG_ERROR,
+-                         "writing to fuse device failed: %s", strerror(errno));
++        const char *errdesc = NULL;
++        gf_loglevel_t loglevel = GF_LOG_ERROR;
++        /* If caller masked the errno, then it
++         * does not indicate an error at the application
++         * level, so we degrade the log severity to DEBUG.
++         */
++        if (errnomask && errno < ERRNOMASK_MAX &&
++            GET_ERRNO_MASK(errnomask, errno))
++            loglevel = GF_LOG_DEBUG;
++        switch (errno) {
++            /* The listed errnos are FUSE status indicators,
++             * not legit values according to POSIX (see write(3p)),
++             * so resolving them according to the standard
++             * POSIX interpretation would be misleading.
++             */
++            case ENOENT:
++                errdesc = "ENOENT";
++                break;
++            case ENOTDIR:
++                errdesc = "ENOTDIR";
++                break;
++            case ENODEV:
++                errdesc = "ENODEV";
++                break;
++            case EPERM:
++                errdesc = "EPERM";
++                break;
++            case ENOMEM:
++                errdesc = "ENOMEM";
++                break;
++            case ENOTCONN:
++                errdesc = "ENOTCONN";
++                break;
++            case ECONNREFUSED:
++                errdesc = "ECONNREFUSED";
++                break;
++            case EOVERFLOW:
++                errdesc = "EOVERFLOW";
++                break;
++            case EBUSY:
++                errdesc = "EBUSY";
++                break;
++            case ENOTEMPTY:
++                errdesc = "ENOTEMPTY";
++                break;
++            default:
++                errdesc = strerror(errno);
++        }
++        gf_log_callingfn("glusterfs-fuse", loglevel,
++                         "writing to fuse device failed: %s", errdesc);
+         return errno;
+     }
+@@ -282,7 +333,7 @@ send_fuse_iov(xlator_t *this, fuse_in_header_t *finh, struct iovec *iov_out,
+     gf_log("glusterfs-fuse", GF_LOG_TRACE, "writev() result %d/%d %s", res,
+            fouh->len, res == -1 ? strerror(errno) : "");
+-    return check_and_dump_fuse_W(priv, iov_out, count, res);
++    return check_and_dump_fuse_W(priv, iov_out, count, res, NULL);
+ }
+ static int
+@@ -353,6 +404,15 @@ fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino)
+         fouh->unique = 0;
+         fouh->error = FUSE_NOTIFY_INVAL_ENTRY;
++        if (ENOENT < ERRNOMASK_MAX)
++            MASK_ERRNO(node->errnomask, ENOENT);
++        if (ENOTDIR < ERRNOMASK_MAX)
++            MASK_ERRNO(node->errnomask, ENOTDIR);
++        if (EBUSY < ERRNOMASK_MAX)
++            MASK_ERRNO(node->errnomask, EBUSY);
++            MASK_ERRNO(node->errnomask, ENOTEMPTY);
+         if (dentry->name) {
+             nlen = strlen(dentry->name);
+             fouh->len = sizeof(*fouh) + sizeof(*fnieo) + nlen + 1;
+@@ -437,6 +497,9 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
+     fniio->off = 0;
+     fniio->len = -1;
++        MASK_ERRNO(node->errnomask, ENOENT);
+     fuse_log_eh(this, "Invalidated inode %" PRIu64 " (gfid: %s)", fuse_ino,
+                 uuid_utoa(inode->gfid));
+     gf_log("glusterfs-fuse", GF_LOG_TRACE,
+@@ -482,6 +545,7 @@ fuse_timed_message_new(void)
+     /* should be NULL if not set */
+     dmsg->fuse_message_body = NULL;
+     INIT_LIST_HEAD(&dmsg->next);
++    memset(dmsg->errnomask, 0, sizeof(dmsg->errnomask));
+     return dmsg;
+ }
+@@ -680,6 +744,8 @@ fuse_interrupt(xlator_t *this, fuse_in_header_t *finh, void *msg,
+         dmsg->fuse_out_header.unique = finh->unique;
+         dmsg->fuse_out_header.len = sizeof(dmsg->fuse_out_header);
+         dmsg->fuse_out_header.error = -EAGAIN;
++        if (ENOENT < ERRNOMASK_MAX)
++            MASK_ERRNO(dmsg->errnomask, ENOENT);
+         timespec_now(&dmsg->scheduled_ts);
+         timespec_adjust_delta(&dmsg->scheduled_ts,
+                               (struct timespec){0, 10000000});
+@@ -4848,7 +4914,7 @@ notify_kernel_loop(void *data)
+         iov_out.iov_base = node->inval_buf;
+         iov_out.iov_len = len;
+         rv = sys_writev(priv->fd, &iov_out, 1);
+-        check_and_dump_fuse_W(priv, &iov_out, 1, rv);
++        check_and_dump_fuse_W(priv, &iov_out, 1, rv, node->errnomask);
+         GF_FREE(node);
+@@ -4940,7 +5006,7 @@ timed_response_loop(void *data)
+         iovs[1] = (struct iovec){dmsg->fuse_message_body,
+                                  len - sizeof(struct fuse_out_header)};
+         rv = sys_writev(priv->fd, iovs, 2);
+-        check_and_dump_fuse_W(priv, iovs, 2, rv);
++        check_and_dump_fuse_W(priv, iovs, 2, rv, dmsg->errnomask);
+         fuse_timed_message_free(dmsg);
+diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h
+index cf4479c..d2d462c 100644
+--- a/xlators/mount/fuse/src/fuse-bridge.h
++++ b/xlators/mount/fuse/src/fuse-bridge.h
+@@ -195,14 +195,20 @@ struct fuse_private {
+ };
+ typedef struct fuse_private fuse_private_t;
++typedef uint64_t errnomask_t[2];
++#define MASK_ERRNO(mask, n) ((mask)[(n) >> 6] |= ((uint64_t)1 << ((n)&63)))
++#define GET_ERRNO_MASK(mask, n) ((mask)[(n) >> 6] & ((uint64_t)1 << ((n)&63)))
++#define ERRNOMASK_MAX (64 * (sizeof(errnomask_t) / sizeof(uint64_t)))
+ #define INVAL_BUF_SIZE                                                         \
+     (sizeof(struct fuse_out_header) +                                          \
+      max(sizeof(struct fuse_notify_inval_inode_out),                           \
+          sizeof(struct fuse_notify_inval_entry_out) + NAME_MAX + 1))
+ struct fuse_invalidate_node {
+-    char inval_buf[INVAL_BUF_SIZE];
++    errnomask_t errnomask;
+     struct list_head next;
++    char inval_buf[INVAL_BUF_SIZE];
+ };
+ typedef struct fuse_invalidate_node fuse_invalidate_node_t;
+@@ -210,6 +216,7 @@ struct fuse_timed_message {
+     struct fuse_out_header fuse_out_header;
+     void *fuse_message_body;
+     struct timespec scheduled_ts;
++    errnomask_t errnomask;
+     struct list_head next;
+ };
+ typedef struct fuse_timed_message fuse_timed_message_t;
diff --git a/SOURCES/0347-tools-glusterfind-handle-offline-bricks.patch b/SOURCES/0347-tools-glusterfind-handle-offline-bricks.patch
new file mode 100644
index 0000000..ff5251d
--- /dev/null
+++ b/SOURCES/0347-tools-glusterfind-handle-offline-bricks.patch
@@ -0,0 +1,236 @@
+From 87e6ea2cd63898c5d243b0f0c719f4f6347fb829 Mon Sep 17 00:00:00 2001
+From: Milind Changire <>
+Date: Thu, 5 Jan 2017 19:53:19 +0530
+Subject: [PATCH 347/349] tools/glusterfind: handle offline bricks
+glusterfind is unable to copy remote output file to local node when a
+remove-brick is in progress on the remote node. After copying remote
+files, in the --full output listing path, a "sort -u" command is run on
+the collected files. However, "sort" exits with an error code if it
+finds any file missing.
+Maintain a map of (pid, output file) when the node commands are started
+and remove the mapping for the pid for which the command returns an
+error. Use the list of files present in the map for the "sort" command.
+Backport of:
+> Patch:
+> Change-Id: Ie6e019037379f4cb163f24b1c65eb382efc2fb3b
+> fixes: bz#1410439
+> Signed-off-by: Milind Changire <>
+> Signed-off-by: Shwetha K Acharya <>
+BUG: 1789447
+Change-Id: Ie6e019037379f4cb163f24b1c65eb382efc2fb3b
+Signed-off-by: Kotresh HR <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Sunny Kumar <>
+ tools/glusterfind/src/ | 25 ++++++++++++++
+ tools/glusterfind/src/         | 61 +++++++++++++++++++++--------------
+ 2 files changed, 61 insertions(+), 25 deletions(-)
+diff --git a/tools/glusterfind/src/ b/tools/glusterfind/src/
+index 1d41ec5..87324fb 100644
+--- a/tools/glusterfind/src/
++++ b/tools/glusterfind/src/
+@@ -40,6 +40,19 @@ if sys.version_info >= (3,):
+     def gfind_history_changelog_done(libgfc, clfile):
+         return libgfc.gf_history_changelog_done(clfile.encode())
++    def gfind_write_row(f, row, field_separator, p_rep, row_2_rep):
++        f.write(u"{0}{1}{2}{3}{4}\n".format(row,
++                                            field_separator,
++                                            p_rep,
++                                            field_separator,
++                                            row_2_rep))
++    def gfind_write(f, row, field_separator, p_rep):
++        f.write(u"{0}{1}{2}\n".format(row,
++                                      field_separator,
++                                      p_rep))
+ else:
+     # Raw conversion of bytearray to string
+@@ -61,3 +74,15 @@ else:
+     def gfind_history_changelog_done(libgfc, clfile):
+         return libgfc.gf_history_changelog_done(clfile)
++    def gfind_write_row(f, row, field_separator, p_rep, row_2_rep):
++        f.write(u"{0}{1}{2}{3}{4}\n".format(row,
++                                            field_separator,
++                                            p_rep,
++                                            field_separator,
++                                            row_2_rep).encode())
++    def gfind_write(f, row, field_separator, p_rep):
++        f.write(u"{0}{1}{2}\n".format(row,
++                                      field_separator,
++                                      p_rep).encode())
+diff --git a/tools/glusterfind/src/ b/tools/glusterfind/src/
+index cc5a86f..fefe4a3 100644
+--- a/tools/glusterfind/src/
++++ b/tools/glusterfind/src/
+@@ -16,6 +16,7 @@ from multiprocessing import Process
+ import os
+ import xml.etree.cElementTree as etree
+ from argparse import ArgumentParser, RawDescriptionHelpFormatter, Action
++from gfind_py2py3 import gfind_write_row, gfind_write
+ import logging
+ import shutil
+ import tempfile
+@@ -35,9 +36,9 @@ GlusterFS Incremental API
+ ParseError = etree.ParseError if hasattr(etree, 'ParseError') else SyntaxError
+ logger = logging.getLogger()
+-node_outfiles = []
+ vol_statusStr = ""
+ gtmpfilename = None
++g_pid_nodefile_map = {}
+ class StoreAbsPath(Action):
+@@ -111,7 +112,7 @@ def node_cmd(host, host_uuid, task, cmd, args, opts):
+ def run_cmd_nodes(task, args, **kwargs):
+-    global node_outfiles
++    global g_pid_nodefile_map
+     nodes = get_nodes(args.volume)
+     pool = []
+     for num, node in enumerate(nodes):
+@@ -142,7 +143,6 @@ def run_cmd_nodes(task, args, **kwargs):
+                 if tag == "":
+                     tag = '""' if not is_host_local(host_uuid) else ""
+-            node_outfiles.append(node_outfile)
+             # remote file will be copied into this directory
+             mkdirp(os.path.dirname(node_outfile),
+                    exit_on_err=True, logger=logger)
+@@ -180,7 +180,6 @@ def run_cmd_nodes(task, args, **kwargs):
+                 if tag == "":
+                     tag = '""' if not is_host_local(host_uuid) else ""
+-            node_outfiles.append(node_outfile)
+             # remote file will be copied into this directory
+             mkdirp(os.path.dirname(node_outfile),
+                    exit_on_err=True, logger=logger)
+@@ -264,6 +263,7 @@ def run_cmd_nodes(task, args, **kwargs):
+                         args=(host, host_uuid, task, cmd, args, opts))
+             p.start()
+             pool.append(p)
++            g_pid_nodefile_map[] = node_outfile
+     for num, p in enumerate(pool):
+         p.join()
+@@ -271,8 +271,11 @@ def run_cmd_nodes(task, args, **kwargs):
+             logger.warn("Command %s failed in %s" % (task, nodes[num][1]))
+             if task in ["create", "delete"]:
+                 fail("Command %s failed in %s" % (task, nodes[num][1]))
+-            elif task == "pre" and args.disable_partial:
+-                sys.exit(1)
++            elif task == "pre" or task == "query":
++                if args.disable_partial:
++                    sys.exit(1)
++                else:
++                    del g_pid_nodefile_map[]
+ @cache_output
+@@ -512,16 +515,10 @@ def write_output(outfile, outfilemerger, field_separator):
+                     continue
+                 if row_2_rep and row_2_rep != "":
+-                    f.write(u"{0}{1}{2}{3}{4}\n".format(row[0],
+-                                                        field_separator,
+-                                                        p_rep,
+-                                                        field_separator,
+-                                                        row_2_rep).encode())
+-                else:
+-                    f.write(u"{0}{1}{2}\n".format(row[0],
+-                                                  field_separator,
+-                                                  p_rep).encode())
++                    gfind_write_row(f, row[0], field_separator, p_rep, field_separator, row_2_rep)
++                else:
++                    gfind_write(f, row[0], field_separator, p_rep)
+ def mode_create(session_dir, args):
+     logger.debug("Init is called - Session: %s, Volume: %s"
+@@ -571,6 +568,7 @@ def mode_create(session_dir, args):
+ def mode_query(session_dir, args):
+     global gtmpfilename
++    global g_pid_nodefile_map
+     # Verify volume status
+     cmd = ["gluster", 'volume', 'info', args.volume, "--xml"]
+@@ -634,14 +632,20 @@ def mode_query(session_dir, args):
+     # Merger
+     if args.full:
+-        cmd = ["sort", "-u"] + node_outfiles + ["-o", args.outfile]
+-        execute(cmd,
+-                exit_msg="Failed to merge output files "
+-                "collected from nodes", logger=logger)
++        if len(g_pid_nodefile_map) > 0:
++            cmd = ["sort", "-u"] + g_pid_nodefile_map.values() + \
++                  ["-o", args.outfile]
++            execute(cmd,
++                    exit_msg="Failed to merge output files "
++                    "collected from nodes", logger=logger)
++        else:
++            fail("Failed to collect any output files from peers. "
++                 "Looks like all bricks are offline.", logger=logger)
+     else:
+         # Read each Changelogs db and generate finaldb
+         create_file(args.outfile, exit_on_err=True, logger=logger)
+-        outfilemerger = OutputMerger(args.outfile + ".db", node_outfiles)
++        outfilemerger = OutputMerger(args.outfile + ".db",
++                                     g_pid_nodefile_map.values())
+         write_output(args.outfile, outfilemerger, args.field_separator)
+     try:
+@@ -656,6 +660,7 @@ def mode_query(session_dir, args):
+ def mode_pre(session_dir, args):
+     global gtmpfilename
++    global g_pid_nodefile_map
+     """
+     Read from Session file and write to session.pre file
+@@ -696,14 +701,20 @@ def mode_pre(session_dir, args):
+     # Merger
+     if args.full:
+-        cmd = ["sort", "-u"] + node_outfiles + ["-o", args.outfile]
+-        execute(cmd,
+-                exit_msg="Failed to merge output files "
+-                "collected from nodes", logger=logger)
++        if len(g_pid_nodefile_map) > 0:
++            cmd = ["sort", "-u"] + g_pid_nodefile_map.values() + \
++                  ["-o", args.outfile]
++            execute(cmd,
++                    exit_msg="Failed to merge output files "
++                    "collected from nodes", logger=logger)
++        else:
++            fail("Failed to collect any output files from peers. "
++                 "Looks like all bricks are offline.", logger=logger)
+     else:
+         # Read each Changelogs db and generate finaldb
+         create_file(args.outfile, exit_on_err=True, logger=logger)
+-        outfilemerger = OutputMerger(args.outfile + ".db", node_outfiles)
++        outfilemerger = OutputMerger(args.outfile + ".db",
++                                     g_pid_nodefile_map.values())
+         write_output(args.outfile, outfilemerger, args.field_separator)
+     try:
diff --git a/SOURCES/0348-glusterfind-Fix-py2-py3-issues.patch b/SOURCES/0348-glusterfind-Fix-py2-py3-issues.patch
new file mode 100644
index 0000000..e1f89f9
--- /dev/null
+++ b/SOURCES/0348-glusterfind-Fix-py2-py3-issues.patch
@@ -0,0 +1,113 @@
+From 1ca8a545833e0a6e674984245338b8675ddc58bc Mon Sep 17 00:00:00 2001
+From: Kotresh HR <>
+Date: Fri, 10 Jan 2020 16:48:14 +0530
+Subject: [PATCH 348/349] glusterfind: Fix py2/py3 issues
+1. In dictionary values(), returns list in py2 and not in py3.
+   So explicitly convert it into list.
+2. xattr module returns values in bytes. So explicitly convert
+   them to str to work both with py2 and py3
+Backport of:
+ > Patch:
+ > fixes: bz#1789439
+ > Change-Id: I27a639cda4f7a4ece9744a97c3d16e247906bd94
+ > Signed-off-by: Kotresh HR <>
+BUG: 1789447
+Change-Id: I27a639cda4f7a4ece9744a97c3d16e247906bd94
+Signed-off-by: Kotresh HR <>
+Reviewed-by: Shwetha Acharya <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Hari Gowtham Gopal <>
+Reviewed-by: Sunny Kumar <>
+ tools/glusterfind/src/ | 14 +++++++++-----
+ tools/glusterfind/src/      |  8 ++++----
+ 2 files changed, 13 insertions(+), 9 deletions(-)
+diff --git a/tools/glusterfind/src/ b/tools/glusterfind/src/
+index d8f97e0..d972fb5 100644
+--- a/tools/glusterfind/src/
++++ b/tools/glusterfind/src/
+@@ -14,6 +14,7 @@ import sys
+ import time
+ import xattr
+ import logging
++from gfind_py2py3 import bytearray_to_str
+ from argparse import ArgumentParser, RawDescriptionHelpFormatter
+ import hashlib
+ try:
+@@ -105,9 +106,10 @@ def populate_pgfid_and_inodegfid(brick, changelog_data):
+                 changelog_data.inodegfid_add(os.stat(p).st_ino, gfid)
+                 file_xattrs = xattr.list(p)
+                 for x in file_xattrs:
+-                    if x.startswith("trusted.pgfid."):
++                    x_str = bytearray_to_str(x)
++                    if x_str.startswith("trusted.pgfid."):
+                         # PGFID in pgfid table
+-                        changelog_data.pgfid_add(x.split(".")[-1])
++                        changelog_data.pgfid_add(x_str.split(".")[-1])
+             except (IOError, OSError):
+                 # All OS Errors ignored, since failures will be logged
+                 # in End. All GFIDs present in gfidpath table
+@@ -122,10 +124,12 @@ def enum_hard_links_using_gfid2path(brick, gfid, args):
+         try:
+             file_xattrs = xattr.list(p)
+             for x in file_xattrs:
+-                if x.startswith("trusted.gfid2path."):
++                x_str = bytearray_to_str(x)
++                if x_str.startswith("trusted.gfid2path."):
+                     # get the value for the xattr i.e. <PGFID>/<BN>
+-                    v = xattr.getxattr(p, x)
+-                    pgfid, bn = v.split(os.sep)
++                    v = xattr.getxattr(p, x_str)
++                    v_str = bytearray_to_str(v)
++                    pgfid, bn = v_str.split(os.sep)
+                     try:
+                         path = symlink_gfid_to_path(brick, pgfid)
+                         fullpath = os.path.join(path, bn)
+diff --git a/tools/glusterfind/src/ b/tools/glusterfind/src/
+index fefe4a3..dfc9d07 100644
+--- a/tools/glusterfind/src/
++++ b/tools/glusterfind/src/
+@@ -633,7 +633,7 @@ def mode_query(session_dir, args):
+     # Merger
+     if args.full:
+         if len(g_pid_nodefile_map) > 0:
+-            cmd = ["sort", "-u"] + g_pid_nodefile_map.values() + \
++            cmd = ["sort", "-u"] + list(g_pid_nodefile_map.values()) + \
+                   ["-o", args.outfile]
+             execute(cmd,
+                     exit_msg="Failed to merge output files "
+@@ -645,7 +645,7 @@ def mode_query(session_dir, args):
+         # Read each Changelogs db and generate finaldb
+         create_file(args.outfile, exit_on_err=True, logger=logger)
+         outfilemerger = OutputMerger(args.outfile + ".db",
+-                                     g_pid_nodefile_map.values())
++                                     list(g_pid_nodefile_map.values()))
+         write_output(args.outfile, outfilemerger, args.field_separator)
+     try:
+@@ -702,7 +702,7 @@ def mode_pre(session_dir, args):
+     # Merger
+     if args.full:
+         if len(g_pid_nodefile_map) > 0:
+-            cmd = ["sort", "-u"] + g_pid_nodefile_map.values() + \
++            cmd = ["sort", "-u"] + list(g_pid_nodefile_map.values()) + \
+                   ["-o", args.outfile]
+             execute(cmd,
+                     exit_msg="Failed to merge output files "
+@@ -714,7 +714,7 @@ def mode_pre(session_dir, args):
+         # Read each Changelogs db and generate finaldb
+         create_file(args.outfile, exit_on_err=True, logger=logger)
+         outfilemerger = OutputMerger(args.outfile + ".db",
+-                                     g_pid_nodefile_map.values())
++                                     list(g_pid_nodefile_map.values()))
+         write_output(args.outfile, outfilemerger, args.field_separator)
+     try:
diff --git a/SOURCES/0349-glusterfind-python3-compatibility.patch b/SOURCES/0349-glusterfind-python3-compatibility.patch
new file mode 100644
index 0000000..7f1c274
--- /dev/null
+++ b/SOURCES/0349-glusterfind-python3-compatibility.patch
@@ -0,0 +1,56 @@
+From 1354a492cbc758f9801568153380ca896fab7765 Mon Sep 17 00:00:00 2001
+From: Sunny Kumar <>
+Date: Fri, 10 Jan 2020 14:28:35 +0000
+Subject: [PATCH 349/349] glusterfind: python3 compatibility
+While we delete gluster volume the hook script ''
+is failed to execute and error message can be observed in glusterd log.
+  File "/var/lib/glusterd/hooks/1/delete/post/S57glusterfind-delete-post", line 69, in <module>
+    main()
+  File "/var/lib/glusterd/hooks/1/delete/post/S57glusterfind-delete-post", line 39, in main
+    glusterfind_dir = os.path.join(get_glusterd_workdir(), "glusterfind")
+  File "/usr/lib64/python3.7/", line 94, in join
+    genericpath._check_arg_types('join', a, *p)
+  File "/usr/lib64/python3.7/", line 155, in _check_arg_types
+    raise TypeError("Can't mix strings and bytes in path components") from None
+TypeError: Can't mix strings and bytes in path components
+Added the 'universal_newlines' flag to Popen to support backward compatibility.
+Backport of:
+ > Patch:
+ > Change-Id: Ie5655b11b55535c5ad2338108d0448e6fdaacf4f
+ > Fixes: bz#1789478
+ > Signed-off-by: Sunny Kumar <>
+Change-Id: Ie5655b11b55535c5ad2338108d0448e6fdaacf4f
+BUG: 1789447
+Signed-off-by: Sunny Kumar <>
+Signed-off-by: Kotresh HR <>
+Tested-by: RHGS Build Bot <>
+ tools/glusterfind/ | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+diff --git a/tools/glusterfind/ b/tools/glusterfind/
+index 5b5142d..5beece2 100755
+--- a/tools/glusterfind/
++++ b/tools/glusterfind/
+@@ -18,7 +18,7 @@ def handle_rm_error(func, path, exc_info):
+ def get_glusterd_workdir():
+     p = Popen(["gluster", "system::", "getwd"],
+-              stdout=PIPE, stderr=PIPE)
++              stdout=PIPE, stderr=PIPE, universal_newlines=True)
+     out, _ = p.communicate()
diff --git a/SOURCES/0350-tools-glusterfind-Remove-an-extra-argument.patch b/SOURCES/0350-tools-glusterfind-Remove-an-extra-argument.patch
new file mode 100644
index 0000000..08f70a7
--- /dev/null
+++ b/SOURCES/0350-tools-glusterfind-Remove-an-extra-argument.patch
@@ -0,0 +1,37 @@
+From 6c06ac0571fb6bf0734b173cc3a75badc7554601 Mon Sep 17 00:00:00 2001
+From: Shwetha K Acharya <>
+Date: Tue, 14 Jan 2020 10:51:06 +0530
+Subject: [PATCH 350/350] tools/glusterfind: Remove an extra argument
+Backport of:
+>    Upstream Patch:
+>    fixes: bz#1790748
+>    Change-Id: I1cb12c975142794139456d0f8e99fbdbb03c53a1
+>    Signed-off-by: Shwetha K Acharya <>
+Change-Id: I1cb12c975142794139456d0f8e99fbdbb03c53a1
+BUG: 1789447
+Signed-off-by: Sunny Kumar <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <>
+ tools/glusterfind/src/ | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+diff --git a/tools/glusterfind/src/ b/tools/glusterfind/src/
+index dfc9d07..5ca1fec 100644
+--- a/tools/glusterfind/src/
++++ b/tools/glusterfind/src/
+@@ -515,7 +515,7 @@ def write_output(outfile, outfilemerger, field_separator):
+                     continue
+                 if row_2_rep and row_2_rep != "":
+-                    gfind_write_row(f, row[0], field_separator, p_rep, field_separator, row_2_rep)
++                    gfind_write_row(f, row[0], field_separator, p_rep, row_2_rep)
+                 else:
+                     gfind_write(f, row[0], field_separator, p_rep)
diff --git a/SOURCES/0351-server-Mount-fails-after-reboot-1-3-gluster-nodes.patch b/SOURCES/0351-server-Mount-fails-after-reboot-1-3-gluster-nodes.patch
new file mode 100644
index 0000000..51dc3bb
--- /dev/null
+++ b/SOURCES/0351-server-Mount-fails-after-reboot-1-3-gluster-nodes.patch
@@ -0,0 +1,131 @@
+From f38f0988eb6c0d72677abceba5ebeb51ea8d44ad Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <>
+Date: Tue, 21 Jan 2020 21:09:56 +0530
+Subject: [PATCH 351/351] server: Mount fails after reboot 1/3 gluster nodes
+Problem: At the time of coming up one server node(1x3) after reboot
+client is unmounted.The client is unmounted because a client
+is getting AUTH_FAILED event and client call fini for the graph.The
+client is getting AUTH_FAILED because brick is not attached with a
+graph at that moment
+Solution: To avoid the unmounting the client graph throw ENOENT error
+          from server in case if brick is not attached with server at
+          the time of authenticate clients.
+> Credits: Xavi Hernandez <>
+> Change-Id: Ie6fbd73cbcf23a35d8db8841b3b6036e87682f5e
+> Fixes: bz#1793852
+> Signed-off-by: Mohit Agrawal <>
+> (Cherry picked from commit e4f776308d5ee7ffeb07de0fd9e1edae6944030d)
+> (Reviewd on upstream link
+Change-Id: Ie6fbd73cbcf23a35d8db8841b3b6036e87682f5e
+BUG: 1793035
+Signed-off-by: Mohit Agrawal <>
+Tested-by: RHGS Build Bot <>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <>
+ tests/bugs/protocol/bug-1433815-auth-allow.t   |  1 +
+ xlators/protocol/client/src/client-handshake.c |  3 +-
+ xlators/protocol/server/src/server-handshake.c | 41 +++++++++++++++++---------
+ 3 files changed, 29 insertions(+), 16 deletions(-)
+diff --git a/tests/bugs/protocol/bug-1433815-auth-allow.t b/tests/bugs/protocol/bug-1433815-auth-allow.t
+index fa22ad8..a78c0eb 100644
+--- a/tests/bugs/protocol/bug-1433815-auth-allow.t
++++ b/tests/bugs/protocol/bug-1433815-auth-allow.t
+@@ -17,6 +17,7 @@ TEST $CLI volume create $V0 $H0:$B0/$V0
+ # Set auth.allow so it *doesn't* include ourselves.
+ TEST $CLI volume set $V0 auth.allow
+ TEST $CLI volume start $V0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
+ # "System getspec" will include the username and password if the request comes
+ # from a server (which we are).  Unfortunately, this will cause authentication
+diff --git a/xlators/protocol/client/src/client-handshake.c b/xlators/protocol/client/src/client-handshake.c
+index c43756a..0002361 100644
+--- a/xlators/protocol/client/src/client-handshake.c
++++ b/xlators/protocol/client/src/client-handshake.c
+@@ -1031,8 +1031,7 @@ client_setvolume_cbk(struct rpc_req *req, struct iovec *iov, int count,
+                "SETVOLUME on remote-host failed: %s", remote_error);
+         errno = op_errno;
+-        if (remote_error &&
+-            (strcmp("Authentication failed", remote_error) == 0)) {
++        if (remote_error && (op_errno == EACCES)) {
+             auth_fail = _gf_true;
+             op_ret = 0;
+         }
+diff --git a/xlators/protocol/server/src/server-handshake.c b/xlators/protocol/server/src/server-handshake.c
+index 382f241..1d1177d 100644
+--- a/xlators/protocol/server/src/server-handshake.c
++++ b/xlators/protocol/server/src/server-handshake.c
+@@ -250,6 +250,7 @@ server_setvolume(rpcsvc_request_t *req)
+     char *subdir_mount = NULL;
+     char *client_name = NULL;
+     gf_boolean_t cleanup_starting = _gf_false;
++    gf_boolean_t xlator_in_graph = _gf_true;
+     params = dict_new();
+     reply = dict_new();
+@@ -311,8 +312,10 @@ server_setvolume(rpcsvc_request_t *req)
+     LOCK(&ctx->volfile_lock);
+     {
+         xl = get_xlator_by_name(this, name);
+-        if (!xl)
++        if (!xl) {
++            xlator_in_graph = _gf_false;
+             xl = this;
++        }
+     }
+     UNLOCK(&ctx->volfile_lock);
+     if (xl == NULL) {
+@@ -568,20 +571,30 @@ server_setvolume(rpcsvc_request_t *req)
+                          "failed to set error "
+                          "msg");
+     } else {
+-        gf_event(EVENT_CLIENT_AUTH_REJECT,
+-                 "client_uid=%s;"
+-                 "client_identifier=%s;server_identifier=%s;"
+-                 "brick_path=%s",
+-                 client->client_uid, req->trans->peerinfo.identifier,
+-                 req->trans->myinfo.identifier, name);
+-        gf_msg(this->name, GF_LOG_ERROR, EACCES, PS_MSG_AUTHENTICATE_ERROR,
+-               "Cannot authenticate client"
+-               " from %s %s",
+-               client->client_uid, (clnt_version) ? clnt_version : "old");
+         op_ret = -1;
+-        op_errno = EACCES;
+-        ret = dict_set_str(reply, "ERROR", "Authentication failed");
++        if (!xlator_in_graph) {
++            gf_msg(this->name, GF_LOG_ERROR, ENOENT, PS_MSG_AUTHENTICATE_ERROR,
++                   "Cannot authenticate client"
++                   " from %s %s because brick is not attached in graph",
++                   client->client_uid, (clnt_version) ? clnt_version : "old");
++            op_errno = ENOENT;
++            ret = dict_set_str(reply, "ERROR", "Brick not found");
++        } else {
++            gf_event(EVENT_CLIENT_AUTH_REJECT,
++                     "client_uid=%s;"
++                     "client_identifier=%s;server_identifier=%s;"
++                     "brick_path=%s",
++                     client->client_uid, req->trans->peerinfo.identifier,
++                     req->trans->myinfo.identifier, name);
++            gf_msg(this->name, GF_LOG_ERROR, EACCES, PS_MSG_AUTHENTICATE_ERROR,
++                   "Cannot authenticate client"
++                   " from %s %s",
++                   client->client_uid, (clnt_version) ? clnt_version : "old");
++            op_errno = EACCES;
++            ret = dict_set_str(reply, "ERROR", "Authentication failed");
++        }
+         if (ret < 0)
+             gf_msg_debug(this->name, 0,
+                          "failed to set error "
diff --git a/SPECS/glusterfs.spec b/SPECS/glusterfs.spec
index 4e2fa5b..84a0141 100644
--- a/SPECS/glusterfs.spec
+++ b/SPECS/glusterfs.spec
@@ -231,7 +231,7 @@ Release:          0.1%{?prereltag:.%{prereltag}}%{?dist}
 Name:             glusterfs
 Version:          6.0
-Release:          12%{?dist}
+Release:          29%{?dist}
 ExcludeArch:      i686
 License:          GPLv2 or LGPLv3+
@@ -585,6 +585,81 @@ Patch0273: 0273-cluster-ec-Fix-reopen-flags-to-avoid-misbehavior.patch
 Patch0274: 0274-cluster-ec-Update-lock-good_mask-on-parent-fop-failu.patch
 Patch0275: 0275-cluster-ec-Create-heal-task-with-heal-process-id.patch
 Patch0276: 0276-features-utime-always-update-ctime-at-setattr.patch
+Patch0277: 0277-geo-rep-Fix-Config-Get-Race.patch
+Patch0278: 0278-geo-rep-Fix-worker-connection-issue.patch
+Patch0279: 0279-posix-In-brick_mux-brick-is-crashed-while-start-stop.patch
+Patch0280: 0280-performance-md-cache-Do-not-skip-caching-of-null-cha.patch
+Patch0281: 0281-ctime-Fix-incorrect-realtime-passed-to-frame-root-ct.patch
+Patch0282: 0282-geo-rep-Fix-the-name-of-changelog-archive-file.patch
+Patch0283: 0283-ctime-Fix-ctime-issue-with-utime-family-of-syscalls.patch
+Patch0284: 0284-posix-log-aio_error-return-codes-in-posix_fs_health_.patch
+Patch0285: 0285-glusterd-glusterd-service-is-getting-timed-out-on-sc.patch
+Patch0287: 0287-cluster-ec-Fail-fsync-flush-for-files-on-update-size.patch
+Patch0288: 0288-cluster-ec-Fix-coverity-issues.patch
+Patch0289: 0289-cluster-ec-quorum-count-implementation.patch
+Patch0290: 0290-glusterd-tag-disperse.quorum-count-for-31306.patch
+Patch0291: 0291-cluster-ec-Mark-release-only-when-it-is-acquired.patch
+Patch0292: 0292-rpc-Update-address-family-if-it-is-not-provide-in-cm.patch
+Patch0293: 0293-glusterd-IPV6-hostname-address-is-not-parsed-correct.patch
+Patch0294: 0294-eventsapi-Set-IPv4-IPv6-family-based-on-input-IP.patch
+Patch0295: 0295-ctime-rebalance-Heal-ctime-xattr-on-directory-during.patch
+Patch0296: 0296-glusterfind-pre-command-failure-on-a-modify.patch
+Patch0297: 0297-rpmbuild-fixing-the-build-errors-with-2a905a8ae.patch
+Patch0298: 0298-geo-rep-fix-sub-command-during-worker-connection.patch
+Patch0299: 0299-geo-rep-performance-improvement-while-syncing-rename.patch
+Patch0300: 0300-cli-remove-the-warning-displayed-when-remove-brick-s.patch
+Patch0301: 0301-posix-Brick-is-going-down-unexpectedly.patch
+Patch0302: 0302-cluster-ec-prevent-filling-shd-log-with-table-not-fo.patch
+Patch0303: 0303-posix-heketidbstorage-bricks-go-down-during-PVC-crea.patch
+Patch0304: 0304-cluster-dht-Correct-fd-processing-loop.patch
+Patch0305: 0305-glusterd-rebalance-start-should-fail-when-quorum-is-.patch
+Patch0306: 0306-cli-fix-distCount-value.patch
+Patch0307: 0307-ssl-fix-RHEL8-regression-failure.patch
+Patch0308: 0308-dht-Rebalance-causing-IO-Error-File-descriptor-in-ba.patch
+Patch0309: 0309-geo-rep-Fix-config-upgrade-on-non-participating-node.patch
+Patch0310: 0310-tests-test-case-for-non-root-geo-rep-setup.patch
+Patch0311: 0311-geo-rep-Fix-Permission-denied-traceback-on-non-root-.patch
+Patch0312: 0312-Scripts-quota_fsck-script-KeyError-contri_size.patch
+Patch0313: 0313-extras-Cgroup-CPU-Mem-restriction-are-not-working-on.patch
+Patch0314: 0314-glusterd-tier-is_tier_enabled-inserted-causing-check.patch
+Patch0315: 0315-geo-rep-Fix-py2-py3-compatibility-in-repce.patch
+Patch0316: 0316-spec-fixed-python-prettytable-dependency-for-rhel6.patch
+Patch0319: 0319-features-snapview-server-obtain-the-list-of-snapshot.patch
+Patch0320: 0320-gf-event-Handle-unix-volfile-servers.patch
+Patch0321: 0321-Adding-white-spaces-to-description-of-set-group.patch
+Patch0322: 0322-glusterd-display-correct-rebalance-data-size-after-g.patch
+Patch0323: 0323-cli-display-detailed-rebalance-info.patch
+Patch0324: 0324-extras-hooks-Add-SELinux-label-on-new-bricks-during-.patch
+Patch0325: 0325-extras-hooks-Install-and-package-newly-added-post-ad.patch
+Patch0326: 0326-tests-subdir-mount.t-is-failing-for-brick_mux-regrss.patch
+Patch0327: 0327-glusterfind-integrate-with-gfid2path.patch
+Patch0328: 0328-glusterd-Add-warning-and-abort-in-case-of-failures-i.patch
+Patch0329: 0329-cluster-afr-Heal-entries-when-there-is-a-source-no-h.patch
+Patch0330: 0330-mount.glusterfs-change-the-error-message.patch
+Patch0331: 0331-features-locks-Do-special-handling-for-op-version-3..patch
+Patch0332: 0332-Removing-one-top-command-from-gluster-v-help.patch
+Patch0333: 0333-rpc-Synchronize-slot-allocation-code.patch
+Patch0334: 0334-dht-log-getxattr-failure-for-node-uuid-at-DEBUG.patch
+Patch0335: 0335-tests-RHEL8-test-failure-fixes-for-RHGS.patch
+Patch0336: 0336-spec-check-and-return-exit-code-in-rpm-scripts.patch
+Patch0337: 0337-fuse-Set-limit-on-invalidate-queue-size.patch
+Patch0338: 0338-glusterfs-fuse-Reduce-the-default-lru-limit-value.patch
+Patch0339: 0339-geo-rep-fix-integer-config-validation.patch
+Patch0340: 0340-rpc-event_slot_alloc-converted-infinite-loop-after-r.patch
+Patch0341: 0341-socket-fix-error-handling.patch
+Patch0342: 0342-Revert-hooks-remove-selinux-hooks.patch
+Patch0343: 0343-extras-hooks-syntactical-errors-in-SELinux-hooks-sci.patch
+Patch0344: 0344-Revert-all-fixes-to-include-SELinux-hook-scripts.patch
+Patch0345: 0345-read-ahead-io-cache-turn-off-by-default.patch
+Patch0346: 0346-fuse-degrade-logging-of-write-failure-to-fuse-device.patch
+Patch0347: 0347-tools-glusterfind-handle-offline-bricks.patch
+Patch0348: 0348-glusterfind-Fix-py2-py3-issues.patch
+Patch0349: 0349-glusterfind-python3-compatibility.patch
+Patch0350: 0350-tools-glusterfind-Remove-an-extra-argument.patch
+Patch0351: 0351-server-Mount-fails-after-reboot-1-3-gluster-nodes.patch
 GlusterFS is a distributed file-system capable of scaling to several
@@ -984,7 +1059,7 @@ This package provides the translators needed on any GlusterFS client.
 %package events
 Summary:          GlusterFS Events
 Requires:         %{name}-server%{?_isa} = %{version}-%{release}
-Requires:         python%{_pythonver} python%{_pythonver}-prettytable
+Requires:         python%{_pythonver}
 Requires:         python%{_pythonver}-gluster = %{version}-%{release}
 %if ( 0%{?rhel} && 0%{?rhel} < 8 )
 Requires:         python-requests
@@ -992,7 +1067,10 @@ Requires:         python-requests
 Requires:         python%{_pythonver}-requests
 %if ( 0%{?rhel} && 0%{?rhel} < 7 )
+Requires:         python-prettytable
 Requires:         python-argparse
+Requires:         python%{_pythonver}-prettytable
 %if ( 0%{?_with_systemd:1} )
@@ -1458,6 +1536,9 @@ exit 0
 # xlators that are needed on the client- and on the server-side
 %dir %{_libdir}/glusterfs
 %dir %{_libdir}/glusterfs/%{version}%{?prereltag}
@@ -1703,6 +1784,8 @@ exit 0
 %if ( 0%{!?_without_server:1} )
 %files server
 %doc extras/
 # sysconf
 %config(noreplace) %{_sysconfdir}/glusterfs
 %exclude %{_sysconfdir}/glusterfs/thin-arbiter.vol
@@ -1914,8 +1997,9 @@ fi
 ok, how, val = os.execute(script)
-if not (ok == 0) then
-   error("Detected running glusterfs processes", ok)
+rc = val or ok
+if not (rc == 0) then
+   error("Detected running glusterfs processes", rc)
@@ -1948,8 +2032,9 @@ fi
 ok, how, val = os.execute(script)
-if not (ok == 0) then
-   error("Detected running glusterfs processes", ok)
+rc = val or ok
+if not (rc == 0) then
+   error("Detected running glusterfs processes", rc)
@@ -1982,8 +2067,9 @@ fi
 ok, how, val = os.execute(script)
-if not (ok == 0) then
-   error("Detected running glusterfs processes", ok)
+rc = val or ok
+if not (rc == 0) then
+   error("Detected running glusterfs processes", rc)
@@ -2016,8 +2102,9 @@ fi
 ok, how, val = os.execute(script)
-if not (ok == 0) then
-   error("Detected running glusterfs processes", ok)
+rc = val or ok
+if not (rc == 0) then
+   error("Detected running glusterfs processes", rc)
@@ -2049,8 +2136,9 @@ fi
 ok, how, val = os.execute(script)
-if not (ok == 0) then
-   error("Detected running glusterfs processes", ok)
+rc = val or ok
+if not (rc == 0) then
+   error("Detected running glusterfs processes", rc)
@@ -2082,8 +2170,9 @@ fi
 ok, how, val = os.execute(script)
-if not (ok == 0) then
-   error("Detected running glusterfs processes", ok)
+rc = val or ok
+if not (rc == 0) then
+   error("Detected running glusterfs processes", rc)
@@ -2117,8 +2206,9 @@ fi
 ok, how, val = os.execute(script)
-if not (ok == 0) then
-   error("Detected running glusterfs processes", ok)
+rc = val or ok
+if not (rc == 0) then
+   error("Detected running glusterfs processes", rc)
@@ -2152,8 +2242,9 @@ fi
 ok, how, val = os.execute(script)
-if not (ok == 0) then
-   error("Detected running glusterfs processes", ok)
+rc = val or ok
+if not (rc == 0) then
+   error("Detected running glusterfs processes", rc)
@@ -2187,8 +2278,9 @@ fi
 ok, how, val = os.execute(script)
-if not (ok == 0) then
-   error("Detected running glusterfs processes", ok)
+rc = val or ok
+if not (rc == 0) then
+   error("Detected running glusterfs processes", rc)
@@ -2223,8 +2315,9 @@ fi
 ok, how, val = os.execute(script)
-if not (ok == 0) then
-   error("Detected running glusterfs processes", ok)
+rc = val or ok
+if not (rc == 0) then
+   error("Detected running glusterfs processes", rc)
@@ -2258,8 +2351,9 @@ fi
 ok, how, val = os.execute(script)
-if not (ok == 0) then
-   error("Detected running glusterfs processes", ok)
+rc = val or ok
+if not (rc == 0) then
+   error("Detected running glusterfs processes", rc)
 %posttrans server
@@ -2293,8 +2387,59 @@ fi
-* Tue Oct 29 2019 CentOS Sources <> - 6.0-12.el7.centos
-- remove vendor and/or packager lines
+* Thu Jan 23 2020 Rinku Kothiya <> - 6.0-29
+- fixes bugs bz#1793035
+* Tue Jan 14 2020 Rinku Kothiya <> - 6.0-28
+- fixes bugs bz#1789447
+* Mon Jan 13 2020 Rinku Kothiya <> - 6.0-27
+- fixes bugs bz#1789447
+* Fri Jan 10 2020 Rinku Kothiya <> - 6.0-26
+- fixes bugs bz#1763208 bz#1788656
+* Mon Dec 23 2019 Rinku Kothiya <> - 6.0-25
+- fixes bugs bz#1686800 bz#1763208 bz#1779696 bz#1781444 bz#1782162
+* Thu Nov 28 2019 Rinku Kothiya <> - 6.0-24
+- fixes bugs bz#1768786
+* Thu Nov 21 2019 Rinku Kothiya <> - 6.0-23
+- fixes bugs bz#1344758 bz#1599802 bz#1685406 bz#1686800 bz#1724021 
+  bz#1726058 bz#1727755 bz#1731513 bz#1741193 bz#1758923 bz#1761326 bz#1761486 
+  bz#1762180 bz#1764095 bz#1766640
+* Thu Nov 14 2019 Rinku Kothiya <> - 6.0-22
+- fixes bugs bz#1771524 bz#1771614
+* Fri Oct 25 2019 Rinku Kothiya <> - 6.0-21
+- fixes bugs bz#1765555
+* Wed Oct 23 2019 Rinku Kothiya <> - 6.0-20
+- fixes bugs bz#1719171 bz#1763412 bz#1764202
+* Thu Oct 17 2019 Rinku Kothiya <> - 6.0-19
+- fixes bugs bz#1760939
+* Wed Oct 16 2019 Rinku Kothiya <> - 6.0-18
+- fixes bugs bz#1758432
+* Fri Oct 11 2019 Rinku Kothiya <> - 6.0-17
+- fixes bugs bz#1704562 bz#1758618 bz#1760261
+* Wed Oct 09 2019 Rinku Kothiya <> - 6.0-16
+- fixes bugs bz#1752713 bz#1756325
+* Fri Sep 27 2019 Rinku Kothiya <> - 6.0-15
+- fixes bugs bz#1726000 bz#1731826 bz#1754407 bz#1754790 bz#1755227
+* Fri Sep 20 2019 Sunil Kumar Acharya <> - 6.0-14
+- fixes bugs bz#1719171 bz#1728673 bz#1731896 bz#1732443 bz#1733970 
+  bz#1745107 bz#1746027 bz#1748688 bz#1750241 bz#1572163
+* Fri Aug 23 2019 Rinku Kothiya <> - 6.0-13
+- fixes bugs bz#1729915 bz#1732376 bz#1743611 bz#1743627 bz#1743634 bz#1744518
 * Fri Aug 09 2019 Sunil Kumar Acharya <> - 6.0-12
 - fixes bugs bz#1730914 bz#1731448 bz#1732770 bz#1732792 bz#1733531