diff --git a/SOURCES/0544-RHGS-3.5.4-rebuild-to-ship-with-RHEL-8.5.patch b/SOURCES/0544-RHGS-3.5.4-rebuild-to-ship-with-RHEL-8.5.patch
deleted file mode 100644
index 171ed10..0000000
--- a/SOURCES/0544-RHGS-3.5.4-rebuild-to-ship-with-RHEL-8.5.patch
+++ /dev/null
@@ -1,47 +0,0 @@
-From 840f437d232fbafac9f4448b0f8d0e9976ea1e1d Mon Sep 17 00:00:00 2001
-From: Tamar Shacked <tshacked@redhat.com>
-Date: Mon, 23 Aug 2021 20:46:13 +0300
-Subject: [PATCH 544/544] RHGS-3.5.4: rebuild to ship with RHEL-8.5
-
-Label: DOWNSTREAM ONLY
-BUG: 1996984
-
-Signed-off-by: Tamar Shacked <tshacked@redhat.com>
-Change-Id: Idafc64b8ee5da165c87428b8a5166cf319ef7660
-Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/267350
-Tested-by: RHGS Build Bot <nigelb@redhat.com>
-Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
----
- glusterfs.spec.in | 2 ++
- rfc.sh            | 2 +-
- 2 files changed, 3 insertions(+), 1 deletion(-)
-
-diff --git a/glusterfs.spec.in b/glusterfs.spec.in
-index 2be7677..4511979 100644
---- a/glusterfs.spec.in
-+++ b/glusterfs.spec.in
-@@ -1982,6 +1982,8 @@ fi
- %endif
- 
- %changelog
-+* Tue Aug 24 2021 Tamar Shacked <tshacked@redhat.com>
-+- build RGHS client for RHEL-8.5 (#1996984)
- 
- * Mon May 11 2020 Sunny Kumar <sunkumar@redhat.com>
- - added requires policycoreutils-python-utils on rhel8 for geo-replication
-diff --git a/rfc.sh b/rfc.sh
-index c0559b9..b1153be 100755
---- a/rfc.sh
-+++ b/rfc.sh
-@@ -315,7 +315,7 @@ main()
-     if [ -z "${reference}" ]; then
-         $drier git push $ORIGIN HEAD:refs/for/$branch/rfc;
-     else
--        $drier git push $ORIGIN HEAD:refs/for/$branch/ref-${reference};
-+        $drier git push $ORIGIN HEAD:refs/for/$branch;
-     fi
- }
- 
--- 
-1.8.3.1
-
diff --git a/SOURCES/0544-tests-avoid-empty-paths-in-environment-variables.patch b/SOURCES/0544-tests-avoid-empty-paths-in-environment-variables.patch
new file mode 100644
index 0000000..cb5e80b
--- /dev/null
+++ b/SOURCES/0544-tests-avoid-empty-paths-in-environment-variables.patch
@@ -0,0 +1,86 @@
+From 3eaf937e69fe4219738c93d39af1cc909b1ee3f8 Mon Sep 17 00:00:00 2001
+From: Rinku Kothiya <rkothiya@redhat.com>
+Date: Fri, 23 Apr 2021 09:30:35 +0000
+Subject: [PATCH 544/584] tests: avoid empty paths in environment variables
+
+Many variables containing paths in env.rc.in are defined in a way
+that leave a trailing ':' in the variable when the previous value
+was empty or undefined.
+
+In the particular case of 'LD_PRELOAD_PATH' variable, this causes
+that the system looks for dynamic libraries in the current working
+directory. When this directory is inside a Gluster mount point, a
+significant delay is caused each time a program is run (and testing
+framework can run lots of programs for each test).
+
+This patch prevents that variables containing paths could end with
+a trailing ':'.
+
+Backport of :
+>Upstream-patch-link: https://github.com/gluster/glusterfs/pull/2349
+>Fixes: #2348
+>Change-Id: I669f5a78e14f176c0a58824ba577330989d84769
+>Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+>Signed-off-by: Rinku Kothiya <rkothiya@redhat.com>
+
+Change-Id: Ie903ca443aa4789553ac4687818a7f69c113af41
+Signed-off-by: Rinku Kothiya <rkothiya@redhat.com>
+---
+ tests/env.rc.in | 17 +++++++----------
+ 1 file changed, 7 insertions(+), 10 deletions(-)
+
+diff --git a/tests/env.rc.in b/tests/env.rc.in
+index 1f0ca88..2d8ff0e 100644
+--- a/tests/env.rc.in
++++ b/tests/env.rc.in
+@@ -2,34 +2,31 @@ prefix=@prefix@
+ exec_prefix=@exec_prefix@
+ libdir=@libdir@
+ 
+-PATH=@sbindir@:$PATH
++PATH=@bindir@:@sbindir@${PATH:+:${PATH}}
+ export PATH
+ 
+ GLUSTERD_PIDFILEDIR=@localstatedir@/run/gluster
+ export GLUSTERD_PIDFILEDIR
+ 
+-LD_LIBRARY_PATH=@libdir@:$LD_LIBRARY_PATH
++LD_LIBRARY_PATH=@libdir@${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}
+ export LD_LIBRARY_PATH
+ 
+-LIBRARY_PATH=@libdir@:$LIBRARY_PATH
++LIBRARY_PATH=@libdir@${LIBRARY_PATH:+:${LIBRARY_PATH}}
+ export LIBRARY_PATH
+ 
+-CPATH=@includedir@:$CPATH
++CPATH=@includedir@${CPATH:+:${CPATH}}
+ export CPATH
+ 
+ GLUSTERD_WORKDIR=@GLUSTERD_WORKDIR@
+ export GLUSTERD_WORKDIR
+ 
+-PKG_CONFIG_PATH=@pkgconfigdir@:$PKG_CONFIG_PATH
++PKG_CONFIG_PATH=@pkgconfigdir@${PKG_CONFIG_PATH:+:${PKG_CONFIG_PATH}}
+ export PKG_CONFIG_PATH
+ 
+-PYTHONPATH=@BUILD_PYTHON_SITE_PACKAGES@:$PYTHON_PATH
+-export PYTHONPATH
+-
+ PYTHON=@PYTHON@
+ export PYTHON
+ 
+-PYTHONPATH=@BUILD_PYTHON_SITE_PACKAGES@:$PYTHON_PATH
++PYTHONPATH=@BUILD_PYTHON_SITE_PACKAGES@${PYTHONPATH:+:${PYTHONPATH}}
+ export PYTHONPATH
+ 
+ GLUSTER_CMD_DIR=@sbindir@
+@@ -42,4 +39,4 @@ RUN_NFS_TESTS=@BUILD_GNFS@
+ export RUN_NFS_TESTS
+ 
+ GLUSTER_XLATOR_DIR=@libdir@/glusterfs/@PACKAGE_VERSION@/xlator
+-export GLUSTER_XLATOR_DIR
+\ No newline at end of file
++export GLUSTER_XLATOR_DIR
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0545-tests-Excluded-tests-for-unsupported-components.patch b/SOURCES/0545-tests-Excluded-tests-for-unsupported-components.patch
new file mode 100644
index 0000000..add8025
--- /dev/null
+++ b/SOURCES/0545-tests-Excluded-tests-for-unsupported-components.patch
@@ -0,0 +1,32 @@
+From 6b340470e01dc177767fae990cf19037202140b7 Mon Sep 17 00:00:00 2001
+From: Tamar Shacked <tshacked@redhat.com>
+Date: Mon, 31 May 2021 21:27:41 +0300
+Subject: [PATCH 545/584] tests: Excluded tests for unsupported components
+
+Quota and Tier are depricated from RHGS-3.5.5.
+Stop running regression tests for them.
+
+Label: DOWNSTREAM ONLY
+
+Signed-off-by: Tamar Shacked <tshacked@redhat.com>
+Change-Id: I3ca1aacba9a31129f5e68fcffdd80e69e51f7bcc
+---
+ run-tests.sh | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/run-tests.sh b/run-tests.sh
+index c835d93..5cc18b0 100755
+--- a/run-tests.sh
++++ b/run-tests.sh
+@@ -349,7 +349,7 @@ function run_tests()
+     fi
+ 
+     for t in $(find ${regression_testsdir}/tests -name '*.t' \
+-               | LC_COLLATE=C sort) ; do
++               | egrep -v "tier|quota" | LC_COLLATE=C sort) ; do
+         old_cores=$(ls /*-*.core 2> /dev/null | wc -l)
+         total_tests=$((total_tests+1))
+         if match $t "$@" ; then
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0546-Update-rfc.sh-to-rhgs-3.5.5.patch b/SOURCES/0546-Update-rfc.sh-to-rhgs-3.5.5.patch
new file mode 100644
index 0000000..935f533
--- /dev/null
+++ b/SOURCES/0546-Update-rfc.sh-to-rhgs-3.5.5.patch
@@ -0,0 +1,36 @@
+From 6ff3314f24687c8224a5520f9c4d2b3c39e730b7 Mon Sep 17 00:00:00 2001
+From: Tamar Shacked <tshacked@redhat.com>
+Date: Tue, 1 Jun 2021 13:02:24 +0300
+Subject: [PATCH 546/584] Update rfc.sh to rhgs-3.5.5
+
+Signed-off-by: Tamar Shacked <tshacked@redhat.com>
+Change-Id: Iff543dc77174f983dd39f9fb7cc5005b49594750
+---
+ rfc.sh | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/rfc.sh b/rfc.sh
+index c0559b9..daeff32 100755
+--- a/rfc.sh
++++ b/rfc.sh
+@@ -18,7 +18,7 @@ done
+ shift $((OPTIND-1))
+ 
+ 
+-branch="rhgs-3.5.4";
++branch="rhgs-3.5.5";
+ 
+ set_hooks_commit_msg()
+ {
+@@ -315,7 +315,7 @@ main()
+     if [ -z "${reference}" ]; then
+         $drier git push $ORIGIN HEAD:refs/for/$branch/rfc;
+     else
+-        $drier git push $ORIGIN HEAD:refs/for/$branch/ref-${reference};
++        $drier git push $ORIGIN HEAD:refs/for/$branch;
+     fi
+ }
+ 
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0547-perf-write-behind-Clear-frame-local-on-conflict-erro.patch b/SOURCES/0547-perf-write-behind-Clear-frame-local-on-conflict-erro.patch
new file mode 100644
index 0000000..2bd8e28
--- /dev/null
+++ b/SOURCES/0547-perf-write-behind-Clear-frame-local-on-conflict-erro.patch
@@ -0,0 +1,47 @@
+From 08c57926118b1ab8fa1fcd5b16913ff22d97d065 Mon Sep 17 00:00:00 2001
+From: N Balachandran <nbalacha@redhat.com>
+Date: Wed, 25 Sep 2019 19:50:27 +0530
+Subject: [PATCH 547/584] perf/write-behind: Clear frame->local on conflict
+ error
+
+WB saves the wb_inode in frame->local for the truncate and
+ftruncate fops. This value is not cleared in case of error
+on a conflicting write request. FRAME_DESTROY finds a non-null
+frame->local and tries to free it using mem_put. However,
+wb_inode is allocated using GF_CALLOC, causing the
+process to crash.
+
+credit: vpolakis@gmail.com
+
+Upstream Patch: https://review.gluster.org/#/c/glusterfs/+/23485/
+>Change-Id: I217f61470445775e05145aebe44c814731c1b8c5
+>Fixes: bz#1753592
+>Signed-off-by: N Balachandran <nbalacha@redhat.com>
+
+BUG: 1917488
+Change-Id: I217f61470445775e05145aebe44c814731c1b8c5
+Signed-off-by: Sunil Kumar H G <sheggodu@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244277
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+---
+ xlators/performance/write-behind/src/write-behind.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/xlators/performance/write-behind/src/write-behind.c b/xlators/performance/write-behind/src/write-behind.c
+index 90a0bcf..31ab723 100644
+--- a/xlators/performance/write-behind/src/write-behind.c
++++ b/xlators/performance/write-behind/src/write-behind.c
+@@ -1523,6 +1523,10 @@ __wb_handle_failed_conflict(wb_request_t *req, wb_request_t *conflict,
+              */
+             req->op_ret = -1;
+             req->op_errno = conflict->op_errno;
++            if ((req->stub->fop == GF_FOP_TRUNCATE) ||
++                (req->stub->fop == GF_FOP_FTRUNCATE)) {
++                req->stub->frame->local = NULL;
++            }
+ 
+             list_del_init(&req->todo);
+             list_add_tail(&req->winds, tasks);
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0548-Add-tar-as-dependency-to-geo-rep-rpm-for-RHEL-8.3-an.patch b/SOURCES/0548-Add-tar-as-dependency-to-geo-rep-rpm-for-RHEL-8.3-an.patch
new file mode 100644
index 0000000..aed347c
--- /dev/null
+++ b/SOURCES/0548-Add-tar-as-dependency-to-geo-rep-rpm-for-RHEL-8.3-an.patch
@@ -0,0 +1,49 @@
+From cb7e72bce8b6a46605753b72919c1c839ecb4cc9 Mon Sep 17 00:00:00 2001
+From: root <root@sacharya.remote.csb>
+Date: Thu, 3 Jun 2021 12:08:24 +0530
+Subject: [PATCH 548/584] Add tar as dependency to geo-rep rpm for RHEL 8.3 and
+ above
+
+Reason: from RHEL 8.3, tar is not bundled by default
+
+>Fixes: #1849
+>Signed-off-by: Shwetha K Acharya <sacharya@redhat.com>
+>Change-Id: Ic1424e0550cef6a78e3e9e7b42665ab01016436f
+Upstream Patch: https://github.com/gluster/glusterfs/pull/1850
+
+BUG: 1901468
+Change-Id: Ic1424e0550cef6a78e3e9e7b42665ab01016436f
+Signed-off-by: Shwetha K Acharya <sacharya@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244896
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Srijan Sivakumar <ssivakum@redhat.com>
+---
+ glusterfs.spec.in | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index 2be7677..424f4ab 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -521,6 +521,9 @@ Requires:         python%{_pythonver}-gluster = %{version}-%{release}
+ Requires:         rsync
+ Requires:         util-linux
+ Requires:         %{name}-libs%{?_isa} = %{version}-%{release}
++%if ( 0%{?rhel} && ( ( 0%{?rhel} == 8 && 0%{?rhel_minor_version} >= 3 ) || 0%{?rhel} >= 9 ) )
++Requires:         tar
++%endif
+ # required for setting selinux bools
+ %if ( 0%{?rhel} && 0%{?rhel} >= 8 )
+ Requires(post):      policycoreutils-python-utils
+@@ -1982,6 +1985,8 @@ fi
+ %endif
+ 
+ %changelog
++* Thu Nov 26 2020 Shwetha K Acharya <sacharya@redhat.com>
++- Add tar as dependency to georeplication rpm for RHEL version >= 8.3
+ 
+ * Mon May 11 2020 Sunny Kumar <sunkumar@redhat.com>
+ - added requires policycoreutils-python-utils on rhel8 for geo-replication
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0549-geo-rep-Change-in-attribute-for-getting-function-nam.patch b/SOURCES/0549-geo-rep-Change-in-attribute-for-getting-function-nam.patch
new file mode 100644
index 0000000..b61e5ea
--- /dev/null
+++ b/SOURCES/0549-geo-rep-Change-in-attribute-for-getting-function-nam.patch
@@ -0,0 +1,45 @@
+From f90c13912a9c64e4479b55fee4ba4ac50e509302 Mon Sep 17 00:00:00 2001
+From: schaffung <ssivakum@redhat.com>
+Date: Sat, 9 Jan 2021 15:41:15 +0530
+Subject: [PATCH 549/584] geo-rep : Change in attribute for getting function
+ name in py 3 (#1900)
+
+Issue: The schedule_geo-rep script uses `func_name` to obtain
+the name of the function being referred to but from python3
+onwards, the attribute has been changed to `__name__`.
+
+Code Change:
+ Changing `func_name` to `__name__`.
+
+>Fixes: #1898
+>Signed-off-by: srijan-sivakumar <ssivakum@redhat.com>
+>Change-Id: I4ed69a06cffed9db17c8f8949b8000c74be1d717
+Upstream Patch : https://github.com/gluster/glusterfs/pull/1900
+
+BUG: 1903911
+Change-Id: I4ed69a06cffed9db17c8f8949b8000c74be1d717
+Signed-off-by: srijan-sivakumar <ssivakum@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244570
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Shwetha Acharya <sacharya@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ extras/geo-rep/schedule_georep.py.in | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/extras/geo-rep/schedule_georep.py.in b/extras/geo-rep/schedule_georep.py.in
+index ac93716..9bb3df5 100644
+--- a/extras/geo-rep/schedule_georep.py.in
++++ b/extras/geo-rep/schedule_georep.py.in
+@@ -102,7 +102,7 @@ def cache_output_with_args(func):
+     """
+     def wrapper(*args, **kwargs):
+         global cache_data
+-        key = "_".join([func.func_name] + list(args))
++        key = "_".join([func.__name__] + list(args))
+         if cache_data.get(key, None) is None:
+             cache_data[key] = func(*args, **kwargs)
+ 
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0550-common-ha-stability-fixes-for-ganesha_grace-and-gane.patch b/SOURCES/0550-common-ha-stability-fixes-for-ganesha_grace-and-gane.patch
new file mode 100644
index 0000000..8bc6694
--- /dev/null
+++ b/SOURCES/0550-common-ha-stability-fixes-for-ganesha_grace-and-gane.patch
@@ -0,0 +1,184 @@
+From 053bb9c7356eae82b1089582bb2844388ae4df57 Mon Sep 17 00:00:00 2001
+From: "Kaleb S. KEITHLEY" <kkeithle@redhat.com>
+Date: Wed, 2 Jun 2021 07:49:12 -0400
+Subject: [PATCH 550/584] common-ha: stability fixes for ganesha_grace and
+ ganesha_mon RAs
+
+Include fixes suggested by ClusterHA devs.
+
+1) It turns out that crm_attribute attrs and attrd_updater attrs really
+are one and the same, despite what I was told years ago.
+
+attrs created with crm_attribute ... --lifetime=reboot ... or
+attrd_updater are one and same. As per ClusterHA devs having an attr
+created with crm_attribute ... --lifetime=forever and also
+creating/updating the same attr with attrd_updater is a recipe for
+weird things to happen that will be difficult to debug.
+
+2) using hostname -s or hostname for node names in crm_attribute and
+attrd_updater potentially could use the wrong name if the host has
+been renamed; use ocf_local_nodename() (in ocf-shellfuncs) instead.
+
+https://github.com/gluster/glusterfs/issues/2276
+https://github.com/gluster/glusterfs/pull/2283
+commit 9bd2c697686ec40e2c4f711df961860c8a735baa
+
+Change-Id:If572d396fae9206628714fb2ce00f72e94f2258f
+BUG: 1945143
+Signed-off-by: Kaleb S. KEITHLEY <kkeithle@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244593
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ extras/ganesha/ocf/ganesha_grace | 28 +++++++++---------------
+ extras/ganesha/ocf/ganesha_mon   | 47 ++++++++++++++--------------------------
+ 2 files changed, 26 insertions(+), 49 deletions(-)
+
+diff --git a/extras/ganesha/ocf/ganesha_grace b/extras/ganesha/ocf/ganesha_grace
+index 825f716..edc6fa2 100644
+--- a/extras/ganesha/ocf/ganesha_grace
++++ b/extras/ganesha/ocf/ganesha_grace
+@@ -94,25 +94,21 @@ esac
+ ganesha_grace_start()
+ {
+ 	local rc=${OCF_ERR_GENERIC}
+-	local host=$(hostname -s)
++	local host=$(ocf_local_nodename)
+ 
+-	ocf_log debug "ganesha_grace_start()"
+-	# give ganesha_mon RA a chance to set the crm_attr first
++	ocf_log debug "ganesha_grace_start ${host}"
++	# give ganesha_mon RA a chance to set the attr first
+ 	# I mislike the sleep, but it's not clear that looping
+ 	# with a small sleep is necessarily better
+ 	# start has a 40sec timeout, so a 5sec sleep here is okay
+         sleep 5
+-	attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null)
++	attr=$(attrd_updater --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null)
+         if [ $? -ne 0 ]; then
+-		host=$(hostname)
+-		attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null )
+-                if [ $? -ne 0 ]; then
+-	                ocf_log info "grace start: crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} failed"
+-                fi
++	        ocf_log info "grace start: attrd_updater --query --node=${host} --name=${OCF_RESKEY_grace_active} failed"
+         fi
+ 
+ 	# Three possibilities:
+-	# 1. There is no attribute at all and attr_updater returns
++	# 1. There is no attribute at all and attrd_updater returns
+ 	#    a zero length string. This happens when
+ 	#    ganesha_mon::monitor hasn't run at least once to set
+ 	#    the attribute. The assumption here is that the system
+@@ -164,17 +160,13 @@ ganesha_grace_notify()
+ 
+ ganesha_grace_monitor()
+ {
+-	local host=$(hostname -s)
++	local host=$(ocf_local_nodename)
+ 
+-	ocf_log debug "monitor"
++	ocf_log debug "ganesha_grace monitor ${host}"
+ 
+-	attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null)
++	attr=$(attrd_updater --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null)
+         if [ $? -ne 0 ]; then
+-		host=$(hostname)
+-	        attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null)
+-                if [ $? -ne 0 ]; then
+-	                ocf_log info "crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} failed"
+-                fi
++	        ocf_log info "attrd_updater --query --node=${host} --name=${OCF_RESKEY_grace_active} failed"
+         fi
+ 
+ 	# if there is no attribute (yet), maybe it's because
+diff --git a/extras/ganesha/ocf/ganesha_mon b/extras/ganesha/ocf/ganesha_mon
+index 2b4a9d6..7fbbf70 100644
+--- a/extras/ganesha/ocf/ganesha_mon
++++ b/extras/ganesha/ocf/ganesha_mon
+@@ -124,7 +124,6 @@ ganesha_mon_stop()
+ 
+ ganesha_mon_monitor()
+ {
+-	local host=$(hostname -s)
+ 	local pid_file="/var/run/ganesha.pid"
+ 	local rhel6_pid_file="/var/run/ganesha.nfsd.pid"
+ 	local proc_pid="/proc/"
+@@ -141,31 +140,27 @@ ganesha_mon_monitor()
+ 
+ 	if [ "x${proc_pid}" != "x/proc/" -a -d ${proc_pid} ]; then
+ 
+-		attrd_updater -n ${OCF_RESKEY_ganesha_active} -v 1
++		attrd_updater --name ${OCF_RESKEY_ganesha_active} -v 1
+ 		if [ $? -ne 0 ]; then
+-			ocf_log info "warning: attrd_updater -n ${OCF_RESKEY_ganesha_active} -v 1 failed"
++			ocf_log info "warning: attrd_updater --name ${OCF_RESKEY_ganesha_active} -v 1 failed"
+ 		fi
+ 
+ 		# ganesha_grace (nfs-grace) RA follows grace-active attr
+ 		# w/ constraint location
+-		attrd_updater -n ${OCF_RESKEY_grace_active} -v 1
++		attrd_updater --name ${OCF_RESKEY_grace_active} -v 1
+ 		if [ $? -ne 0 ]; then
+-			ocf_log info "warning: attrd_updater -n ${OCF_RESKEY_grace_active} -v 1 failed"
++			ocf_log info "warning: attrd_updater --name ${OCF_RESKEY_grace_active} -v 1 failed"
+ 		fi
+ 
+ 		# ganesha_mon (nfs-mon) and ganesha_grace (nfs-grace)
+-		# track grace-active crm_attr (attr != crm_attr)
+-		# we can't just use the attr as there's no way to query
+-		# its value in RHEL6 pacemaker
+-
+-		crm_attribute --node=${host} --lifetime=forever --name=${OCF_RESKEY_grace_active} --update=1 2> /dev/null
+-		if [ $? -ne 0 ]; then
+-			host=$(hostname)
+-			crm_attribute --node=${host} --lifetime=forever --name=${OCF_RESKEY_grace_active} --update=1 2> /dev/null
+-			if [ $? -ne 0 ]; then
+-				ocf_log info "mon monitor warning: crm_attribute --node=${host} --lifetime=forever --name=${OCF_RESKEY_grace_active} --update=1 failed"
+-			fi
+-		fi
++		# track grace-active attr.
++		#
++		# Originally we were told that attrs set with attrd_updater
++		# are different/distinct than attrs set with crm_attribute.
++		# Now, years later, we are told that they are the same and
++		# that the values of attrs set with attrd_updater can be
++		# retrieved with crm_attribute. Or with attrd_updater -Q
++		# now that we no longer have to deal with rhel6.
+ 
+ 		return ${OCF_SUCCESS}
+ 	fi
+@@ -182,26 +177,16 @@ ganesha_mon_monitor()
+ 	# the remaining ganesha.nfsds into grace before
+ 	# initiating the VIP fail-over.
+ 
+-	attrd_updater -D -n ${OCF_RESKEY_grace_active}
+-	if [ $? -ne 0 ]; then
+-		ocf_log info "warning: attrd_updater -D -n ${OCF_RESKEY_grace_active} failed"
+-	fi
+-
+-	host=$(hostname -s)
+-	crm_attribute --node=${host} --name=${OCF_RESKEY_grace_active} --update=0 2> /dev/null
++	attrd_updater --delete --name ${OCF_RESKEY_grace_active}
+ 	if [ $? -ne 0 ]; then
+-		host=$(hostname)
+-		crm_attribute --node=${host} --name=${OCF_RESKEY_grace_active} --update=0 2> /dev/null
+-		if [ $? -ne 0 ]; then
+-			ocf_log info "mon monitor warning: crm_attribute --node=${host} --name=${OCF_RESKEY_grace_active} --update=0 failed"
+-		fi
++		ocf_log info "warning: attrd_updater --delete --name ${OCF_RESKEY_grace_active} failed"
+ 	fi
+ 
+ 	sleep ${OCF_RESKEY_grace_delay}
+ 
+-	attrd_updater -D -n ${OCF_RESKEY_ganesha_active}
++	attrd_updater --delete --name ${OCF_RESKEY_ganesha_active}
+ 	if [ $? -ne 0 ]; then
+-		ocf_log info "warning: attrd_updater -D -n ${OCF_RESKEY_ganesha_active} failed"
++		ocf_log info "warning: attrd_updater --delete --name ${OCF_RESKEY_ganesha_active} failed"
+ 	fi
+ 
+ 	return ${OCF_SUCCESS}
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0551-common-ha-ensure-shared_storage-is-mounted-before-se.patch b/SOURCES/0551-common-ha-ensure-shared_storage-is-mounted-before-se.patch
new file mode 100644
index 0000000..e3a107f
--- /dev/null
+++ b/SOURCES/0551-common-ha-ensure-shared_storage-is-mounted-before-se.patch
@@ -0,0 +1,52 @@
+From fcfd40132624df5e888d53b4a8c4ce1cf7087413 Mon Sep 17 00:00:00 2001
+From: "Kaleb S. KEITHLEY" <kkeithle@redhat.com>
+Date: Wed, 2 Jun 2021 07:40:04 -0400
+Subject: [PATCH 551/584] common-ha: ensure shared_storage is mounted before
+ setup (#2296)
+
+If gluster shared-storage isn't mounted, ganesha will fail to start
+
+commit a249b9020d281d0482db0aeb52e8856acd931e02
+https://github.com/gluster/glusterfs/issues/2278
+https://github.com/gluster/glusterfs/pull/2296
+
+Change-Id: I6ed7044ea6b6c61b013ebe17088bfde311b109b7
+BUG: 1918018
+Signed-off-by: Kaleb S. KEITHLEY <kkeithle@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244592
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ extras/ganesha/scripts/ganesha-ha.sh | 13 +++++++++++++
+ 1 file changed, 13 insertions(+)
+
+diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh
+index 491c61d..012084f 100644
+--- a/extras/ganesha/scripts/ganesha-ha.sh
++++ b/extras/ganesha/scripts/ganesha-ha.sh
+@@ -195,9 +195,22 @@ setup_cluster()
+     local servers=${3}
+     local unclean=""
+     local quorum_policy="stop"
++    local dfresult=""
+ 
+     logger "setting up cluster ${name} with the following ${servers}"
+ 
++    # check that shared_storage is mounted
++    dfresult=$(df -T ${HA_VOL_MNT})
++    if [[ -z "${dfresult}" ]]; then
++        logger "gluster shared_storage is not mounted, exiting..."
++        exit 1
++    fi
++
++    if [[ "${dfresult}" != *"fuse.glusterfs"* ]]; then
++        logger "gluster shared_storage is not mounted, exiting..."
++        exit 1
++    fi
++
+     # pcs cluster setup --force ${PCS9OR10_PCS_CNAME_OPTION} ${name} ${servers}
+     pcs cluster setup --force ${PCS9OR10_PCS_CNAME_OPTION} ${name} --enable ${servers}
+     if [ $? -ne 0 ]; then
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0552-cluster-afr-Change-default-self-heal-window-size-to-.patch b/SOURCES/0552-cluster-afr-Change-default-self-heal-window-size-to-.patch
new file mode 100644
index 0000000..41b94cd
--- /dev/null
+++ b/SOURCES/0552-cluster-afr-Change-default-self-heal-window-size-to-.patch
@@ -0,0 +1,67 @@
+From e9e1b0bc6e2deaf44190636ab6826065ed3c0392 Mon Sep 17 00:00:00 2001
+From: Pranith Kumar Karampuri <pranith.karampuri@phonepe.com>
+Date: Wed, 3 Feb 2021 18:10:40 +0530
+Subject: [PATCH 552/584] cluster/afr: Change default self-heal-window-size to
+ 1MB (#2068)
+
+At the moment self-heal-window-size is 128KB. This leads to healing data
+in 128KB chunks. With the growth of data and the avg file sizes
+nowadays, 1MB seems like a better default.
+
+Upstream patch details:
+> https://github.com/gluster/glusterfs/pull/2111
+> Change-Id: I70c42c83b16c7adb53d6b5762969e878477efb5c
+> Fixes: #2067
+> Signed-off-by: Pranith Kumar K <pranith.karampuri@phonepe.com>
+
+BUG: 1946171
+Change-Id: Icd6a5c02ca16a1a6095f7bc10feed8ddc2505f41
+Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244557
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/afr/src/afr-self-heal-data.c | 6 ++++++
+ xlators/cluster/afr/src/afr.c                | 6 +++---
+ 2 files changed, 9 insertions(+), 3 deletions(-)
+
+diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
+index b97c66b..156cb18 100644
+--- a/xlators/cluster/afr/src/afr-self-heal-data.c
++++ b/xlators/cluster/afr/src/afr-self-heal-data.c
+@@ -337,6 +337,12 @@ afr_selfheal_data_do(call_frame_t *frame, xlator_t *this, fd_t *fd, int source,
+     }
+ 
+     block = 128 * 1024 * priv->data_self_heal_window_size;
++    if (HAS_HOLES((&replies[source].poststat))) {
++        /*Reduce the possibility of data-block allocations in case of files
++         * with holes. Correct way to fix it would be to use seek fop while
++         * healing data*/
++        block = 128 * 1024;
++    }
+ 
+     type = afr_data_self_heal_type_get(priv, healed_sinks, source, replies);
+ 
+diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
+index 33fe4d8..0956e5a 100644
+--- a/xlators/cluster/afr/src/afr.c
++++ b/xlators/cluster/afr/src/afr.c
+@@ -910,12 +910,12 @@ struct volume_options options[] = {
+      .type = GF_OPTION_TYPE_INT,
+      .min = 1,
+      .max = 1024,
+-     .default_value = "1",
++     .default_value = "8",
+      .op_version = {1},
+      .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+      .tags = {"replicate"},
+-     .description = "Maximum number blocks per file for which self-heal "
+-                    "process would be applied simultaneously."},
++     .description = "Maximum number of 128KB blocks per file for which "
++                    "self-heal process would be applied simultaneously."},
+     {.key = {"metadata-self-heal"},
+      .type = GF_OPTION_TYPE_BOOL,
+      .default_value = "off",
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0553-cluster-ec-Change-self-heal-window-size-to-4MiB-by-d.patch b/SOURCES/0553-cluster-ec-Change-self-heal-window-size-to-4MiB-by-d.patch
new file mode 100644
index 0000000..2144845
--- /dev/null
+++ b/SOURCES/0553-cluster-ec-Change-self-heal-window-size-to-4MiB-by-d.patch
@@ -0,0 +1,46 @@
+From 1fa01865eb9bf6a1113669c262fc526ef11f61f2 Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <xhernandez@users.noreply.github.com>
+Date: Sat, 6 Feb 2021 01:53:28 +0100
+Subject: [PATCH 553/584] cluster/ec: Change self-heal-window-size to 4MiB by
+ default (#2071)
+
+The current block size used for self-heal by default is 128 KiB. This
+requires a significant amount of management requests for a very small
+portion of data healed.
+
+With this patch the block size is increased to 4 MiB. For a standard
+EC volume configuration of 4+2, this means that each healed block of
+a file will update 1 MiB on each brick.
+
+Upstream patch details:
+> https://github.com/gluster/glusterfs/pull/2071
+> Change-Id: Ifeec4a2d54988017d038085720513c121b03445b
+> Updates: #2067
+> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+
+BUG: 1946171
+Change-Id: I9e3eed2d83c9de54242e6161b2e3951c2f6f8000
+Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244558
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/ec/src/ec.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
+index 4118c3b..a930089 100644
+--- a/xlators/cluster/ec/src/ec.c
++++ b/xlators/cluster/ec/src/ec.c
+@@ -1644,7 +1644,7 @@ struct volume_options options[] = {
+      .type = GF_OPTION_TYPE_INT,
+      .min = 1,
+      .max = 1024,
+-     .default_value = "1",
++     .default_value = "32",
+      .op_version = {GD_OP_VERSION_3_11_0},
+      .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+      .tags = {"disperse"},
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0554-dht-fix-rebalance-of-sparse-files.patch b/SOURCES/0554-dht-fix-rebalance-of-sparse-files.patch
new file mode 100644
index 0000000..935303b
--- /dev/null
+++ b/SOURCES/0554-dht-fix-rebalance-of-sparse-files.patch
@@ -0,0 +1,245 @@
+From 2cb90b7798fa469f2d7d938ae88733eb1962d63d Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <xhernandez@gmail.com>
+Date: Fri, 9 Apr 2021 18:13:30 +0200
+Subject: [PATCH 554/584] dht: fix rebalance of sparse files
+
+Current implementation of rebalance for sparse files has a bug that,
+in some cases, causes a read of 0 bytes from the source subvolume.
+Posix xlator doesn't allow 0 byte reads and fails them with EINVAL,
+which causes rebalance to abort the migration.
+
+This patch implements a more robust way of finding data segments in
+a sparse file that avoids 0 byte reads, allowing the file to be
+migrated successfully.
+
+Backport of:
+> Upstream-patch: https://github.com/gluster/glusterfs/pull/2318
+> Fixes: #2317
+> Change-Id: Iff168dda2fb0f2edf716b21eb04cc2cc8ac3915c
+> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+
+BUG: 1957641
+Change-Id: Iff168dda2fb0f2edf716b21eb04cc2cc8ac3915c
+Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244551
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/bugs/distribute/issue-2317.t      |  29 ++++++++
+ tests/volume.rc                         |   4 ++
+ xlators/cluster/dht/src/dht-rebalance.c | 116 +++++++++++++++++---------------
+ 3 files changed, 93 insertions(+), 56 deletions(-)
+ create mode 100755 tests/bugs/distribute/issue-2317.t
+
+diff --git a/tests/bugs/distribute/issue-2317.t b/tests/bugs/distribute/issue-2317.t
+new file mode 100755
+index 0000000..e29d003
+--- /dev/null
++++ b/tests/bugs/distribute/issue-2317.t
+@@ -0,0 +1,29 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++
++TESTS_EXPECTED_IN_LOOP=126
++
++cleanup
++
++TEST glusterd
++TEST ${CLI} volume create ${V0} replica 3 ${H0}:/$B0/${V0}_{0..2}
++TEST ${CLI} volume start ${V0}
++
++TEST ${GFS} --volfile-server ${H0} --volfile-id ${V0} ${M0}
++
++# Create several files to make sure that at least some of them should be
++# migrated by rebalance.
++for i in {0..63}; do
++    TEST dd if=/dev/urandom of=${M0}/file.${i} bs=4k count=1
++    TEST dd if=/dev/urandom of=${M0}/file.${i} bs=4k count=1 seek=128
++done
++
++TEST ${CLI} volume add-brick ${V0} ${H0}:${B0}/${V0}_{3..5}
++TEST ${CLI} volume rebalance ${V0} start force
++EXPECT_WITHIN ${REBALANCE_TIMEOUT} "completed" rebalance_status_field "${V0}"
++
++EXPECT "^0$" rebalance_failed_field "${V0}"
++
++cleanup
+diff --git a/tests/volume.rc b/tests/volume.rc
+index 9a002d9..f5dd0b1 100644
+--- a/tests/volume.rc
++++ b/tests/volume.rc
+@@ -75,6 +75,10 @@ function rebalance_status_field {
+         $CLI volume rebalance $1 status | awk '{print $7}' | sed -n 3p
+ }
+ 
++function rebalance_failed_field {
++        $CLI volume rebalance $1 status | awk '{print $5}' | sed -n 3p
++}
++
+ function fix-layout_status_field {
+         #The fix-layout status can be up to 3 words, (ex:'fix-layout in progress'), hence the awk-print $2 thru $4.
+         #But if the status is less than 3 words, it also prints the next field i.e the run_time_in_secs.(ex:'completed 3.00').
+diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
+index 072896d..eab7558 100644
+--- a/xlators/cluster/dht/src/dht-rebalance.c
++++ b/xlators/cluster/dht/src/dht-rebalance.c
+@@ -1024,6 +1024,46 @@ out:
+     return ret;
+ }
+ 
++static int32_t
++dht_rebalance_sparse_segment(xlator_t *subvol, fd_t *fd, off_t *offset,
++                             size_t *size)
++{
++    off_t hole;
++    int32_t ret;
++
++    do {
++        ret = syncop_seek(subvol, fd, *offset, GF_SEEK_DATA, NULL, offset);
++        if (ret >= 0) {
++            /* Starting at the offset of the last data segment, find the
++             * next hole. After a data segment there should always be a
++             * hole, since EOF is considered a hole. */
++            ret = syncop_seek(subvol, fd, *offset, GF_SEEK_HOLE, NULL, &hole);
++        }
++
++        if (ret < 0) {
++            if (ret == -ENXIO) {
++                /* This can happen if there are no more data segments (i.e.
++                 * the offset is at EOF), or there was a data segment but the
++                 * file has been truncated to a smaller size between both
++                 * seek requests. In both cases we are done. The file doesn't
++                 * contain more data. */
++                ret = 0;
++            }
++            return ret;
++        }
++
++        /* It could happen that at the same offset we detected data in the
++         * first seek, there could be a hole in the second seek if user is
++         * modifying the file concurrently. In this case we need to find a
++         * new data segment to migrate. */
++    } while (hole <= *offset);
++
++    /* Calculate the total size of the current data block */
++    *size = hole - *offset;
++
++    return 1;
++}
++
+ static int
+ __dht_rebalance_migrate_data(xlator_t *this, gf_defrag_info_t *defrag,
+                              xlator_t *from, xlator_t *to, fd_t *src, fd_t *dst,
+@@ -1032,8 +1072,6 @@ __dht_rebalance_migrate_data(xlator_t *this, gf_defrag_info_t *defrag,
+     int ret = 0;
+     int count = 0;
+     off_t offset = 0;
+-    off_t data_offset = 0;
+-    off_t hole_offset = 0;
+     struct iovec *vector = NULL;
+     struct iobref *iobref = NULL;
+     uint64_t total = 0;
+@@ -1048,71 +1086,36 @@ __dht_rebalance_migrate_data(xlator_t *this, gf_defrag_info_t *defrag,
+     while (total < ia_size) {
+         /* This is a regular file - read it sequentially */
+         if (!hole_exists) {
+-            read_size = (((ia_size - total) > DHT_REBALANCE_BLKSIZE)
+-                             ? DHT_REBALANCE_BLKSIZE
+-                             : (ia_size - total));
++            data_block_size = ia_size - total;
+         } else {
+             /* This is a sparse file - read only the data segments in the file
+              */
+ 
+             /* If the previous data block is fully copied, find the next data
+-             * segment
+-             * starting at the offset of the last read and written byte,  */
++             * segment starting at the offset of the last read and written
++             * byte. */
+             if (data_block_size <= 0) {
+-                ret = syncop_seek(from, src, offset, GF_SEEK_DATA, NULL,
+-                                  &data_offset);
+-                if (ret) {
+-                    if (ret == -ENXIO)
+-                        ret = 0; /* No more data segments */
+-                    else
+-                        *fop_errno = -ret; /* Error occurred */
+-
++                ret = dht_rebalance_sparse_segment(from, src, &offset,
++                                                   &data_block_size);
++                if (ret <= 0) {
++                    *fop_errno = -ret;
+                     break;
+                 }
+-
+-                /* If the position of the current data segment is greater than
+-                 * the position of the next hole, find the next hole in order to
+-                 * calculate the length of the new data segment */
+-                if (data_offset > hole_offset) {
+-                    /* Starting at the offset of the last data segment, find the
+-                     * next hole */
+-                    ret = syncop_seek(from, src, data_offset, GF_SEEK_HOLE,
+-                                      NULL, &hole_offset);
+-                    if (ret) {
+-                        /* If an error occurred here it's a real error because
+-                         * if the seek for a data segment was successful then
+-                         * necessarily another hole must exist (EOF is a hole)
+-                         */
+-                        *fop_errno = -ret;
+-                        break;
+-                    }
+-
+-                    /* Calculate the total size of the current data block */
+-                    data_block_size = hole_offset - data_offset;
+-                }
+-            } else {
+-                /* There is still data in the current segment, move the
+-                 * data_offset to the position of the last written byte */
+-                data_offset = offset;
+             }
+-
+-            /* Calculate how much data needs to be read and written. If the data
+-             * segment's length is bigger than DHT_REBALANCE_BLKSIZE, read and
+-             * write DHT_REBALANCE_BLKSIZE data length and the rest in the
+-             * next iteration(s) */
+-            read_size = ((data_block_size > DHT_REBALANCE_BLKSIZE)
+-                             ? DHT_REBALANCE_BLKSIZE
+-                             : data_block_size);
+-
+-            /* Calculate the remaining size of the data block - maybe there's no
+-             * need to seek for data in the next iteration */
+-            data_block_size -= read_size;
+-
+-            /* Set offset to the offset of the data segment so read and write
+-             * will have the correct position */
+-            offset = data_offset;
+         }
+ 
++        /* Calculate how much data needs to be read and written. If the data
++         * segment's length is bigger than DHT_REBALANCE_BLKSIZE, read and
++         * write DHT_REBALANCE_BLKSIZE data length and the rest in the
++         * next iteration(s) */
++        read_size = ((data_block_size > DHT_REBALANCE_BLKSIZE)
++                         ? DHT_REBALANCE_BLKSIZE
++                         : data_block_size);
++
++        /* Calculate the remaining size of the data block - maybe there's no
++         * need to seek for data in the next iteration */
++        data_block_size -= read_size;
++
+         ret = syncop_readv(from, src, read_size, offset, 0, &vector, &count,
+                            &iobref, NULL, NULL, NULL);
+ 
+@@ -1177,6 +1180,7 @@ __dht_rebalance_migrate_data(xlator_t *this, gf_defrag_info_t *defrag,
+         iobref = NULL;
+         vector = NULL;
+     }
++
+     if (iobref)
+         iobref_unref(iobref);
+     GF_FREE(vector);
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0555-geo-rep-Improve-handling-of-gfid-mismatches.patch b/SOURCES/0555-geo-rep-Improve-handling-of-gfid-mismatches.patch
new file mode 100644
index 0000000..85b19e0
--- /dev/null
+++ b/SOURCES/0555-geo-rep-Improve-handling-of-gfid-mismatches.patch
@@ -0,0 +1,79 @@
+From f2d3866e617d25ea62cda01afddc81ef0db3356e Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <xhernandez@redhat.com>
+Date: Tue, 4 May 2021 22:39:03 +0200
+Subject: [PATCH 555/584] geo-rep: Improve handling of gfid mismatches
+
+In some circumstances geo-replication can detect mismatching gfids
+between primary and secondary. These entries are fixed in an iterative
+way, assuming that after a fix, a previously failing entry could
+succeed.
+
+Previous code was trying to fix them in a loop that can be executed
+up to 10 times. If some entry cannot be fixed after 10 attempts, it's
+discarded. These fixes are very slow, so trying to do them many times
+causes geo-replication to get out of sync.
+
+To minimize the number of iterations done, this patch checks if the
+number of entries and failures remains constant after each iteration.
+If they are constant, it means that nothing else can be fixed, so it
+makes no sense to do more iterations. This reduces the number of
+iterations to 2 or 3 in most of the cases, improving geo-replication
+performance.
+
+Backport of:
+> Upstream-patch: https://github.com/gluster/glusterfs/pull/2389
+> Fixes: #2388
+> Change-Id: I6d9a623a60045694e1a832195e1dc1fb9e88ae54
+> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+
+BUG: 1957191
+Change-Id: I6d9a623a60045694e1a832195e1dc1fb9e88ae54
+Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244550
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ geo-replication/syncdaemon/master.py | 18 +++++++++++++++++-
+ 1 file changed, 17 insertions(+), 1 deletion(-)
+
+diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py
+index 98637e7..aef9373 100644
+--- a/geo-replication/syncdaemon/master.py
++++ b/geo-replication/syncdaemon/master.py
+@@ -1224,9 +1224,11 @@ class GMasterChangelogMixin(GMasterCommon):
+ 
+             if gconf.get("gfid-conflict-resolution"):
+                 count = 0
++                num_entries = len(entries)
++                num_failures = len(failures)
+                 if failures:
+                     logging.info(lf('Entry ops failed with gfid mismatch',
+-                                count=len(failures)))
++                                    count=num_failures))
+                 while failures and count < self.MAX_OE_RETRIES:
+                     count += 1
+                     self.handle_entry_failures(failures, entries)
+@@ -1237,6 +1239,20 @@ class GMasterChangelogMixin(GMasterCommon):
+                                      "gfid mismatch")
+                         break
+ 
++                    # If this iteration has not removed any entry or reduced
++                    # the number of failures compared to the previous one, we
++                    # don't need to keep iterating because we'll get the same
++                    # result in all other attempts.
++                    if ((num_entries == len(entries)) and
++                        (num_failures == len(failures))):
++                        logging.info(lf("No more gfid mismatches can be fixed",
++                                        entries=num_entries,
++                                        failures=num_failures))
++                        break
++
++                    num_entries = len(entries)
++                    num_failures = len(failures)
++
+             self.log_failures(failures, 'gfid', gauxpfx(), 'ENTRY')
+             self.status.dec_value("entry", len(entries))
+ 
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0556-dht-don-t-ignore-xdata-in-fgetxattr.patch b/SOURCES/0556-dht-don-t-ignore-xdata-in-fgetxattr.patch
new file mode 100644
index 0000000..0cf3545
--- /dev/null
+++ b/SOURCES/0556-dht-don-t-ignore-xdata-in-fgetxattr.patch
@@ -0,0 +1,52 @@
+From a7f6ad0c617a36414c8232cb692471703923b16d Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <xhernandez@users.noreply.github.com>
+Date: Tue, 19 Jan 2021 18:03:33 +0100
+Subject: [PATCH 556/584] dht: don't ignore xdata in fgetxattr
+
+DHT was passing NULL for xdata in fgetxattr() request, ignoring any
+data sent by upper xlators.
+
+This patch fixes the issue by sending the received xdata to lower
+xlators, as it's currently done for getxattr().
+
+Backport of:
+> Upstream-patch: https://github.com/gluster/glusterfs/pull/2020
+> Fixes: #1991
+> Change-Id: If3d3f1f2ce6215f3b1acc46480e133cb4294eaec
+> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+
+BUG: 1919132
+Change-Id: If3d3f1f2ce6215f3b1acc46480e133cb4294eaec
+Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244538
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/dht/src/dht-common.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
+index 7425c1a..0773092 100644
+--- a/xlators/cluster/dht/src/dht-common.c
++++ b/xlators/cluster/dht/src/dht-common.c
+@@ -5262,7 +5262,7 @@ dht_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *key,
+ 
+         if (!ret && key && local->mds_subvol && dht_match_xattr(key)) {
+             STACK_WIND(frame, dht_mds_getxattr_cbk, local->mds_subvol,
+-                       local->mds_subvol->fops->fgetxattr, fd, key, NULL);
++                       local->mds_subvol->fops->fgetxattr, fd, key, xdata);
+ 
+             return 0;
+         }
+@@ -5274,7 +5274,7 @@ dht_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *key,
+     for (i = 0; i < cnt; i++) {
+         subvol = layout->list[i].xlator;
+         STACK_WIND(frame, dht_getxattr_cbk, subvol, subvol->fops->fgetxattr, fd,
+-                   key, NULL);
++                   key, xdata);
+     }
+     return 0;
+ 
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0557-cluster-dht-Fix-stack-overflow-in-readdir-p.patch b/SOURCES/0557-cluster-dht-Fix-stack-overflow-in-readdir-p.patch
new file mode 100644
index 0000000..2add6cb
--- /dev/null
+++ b/SOURCES/0557-cluster-dht-Fix-stack-overflow-in-readdir-p.patch
@@ -0,0 +1,306 @@
+From ba57b043db1e19196cf860baeeeb1acfc9985cd2 Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <xhernandez@users.noreply.github.com>
+Date: Wed, 24 Feb 2021 15:04:23 +0100
+Subject: [PATCH 557/584] cluster/dht: Fix stack overflow in readdir(p)
+
+When parallel-readdir is enabled, readdir(p) requests sent by DHT can be
+immediately processed and answered in the same thread before the call to
+STACK_WIND_COOKIE() completes.
+
+This means that the readdir(p) cbk is processed synchronously. In some
+cases it may decide to send another readdir(p) request, which causes a
+recursive call.
+
+When some special conditions happen and the directories are big, it's
+possible that the number of nested calls is so high that the process
+crashes because of a stack overflow.
+
+This patch fixes this by not allowing nested readdir(p) calls. When a
+nested call is detected, it's queued instead of sending it. The queued
+request is processed when the current call finishes by the top level
+stack function.
+
+Backport of 3 patches:
+> Upstream-patch: https://github.com/gluster/glusterfs/pull/2170
+> Fixes: #2169
+> Change-Id: Id763a8a51fb3c3314588ec7c162f649babf33099
+> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+
+> Upstream-patch: https://github.com/gluster/glusterfs/pull/2202
+> Updates: #2169
+> Change-Id: I97e73c0aae74fc5d80c975f56f2f7a64e3e1ae95
+> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+
+> Upstream-patch: https://github.com/gluster/glusterfs/pull/2242
+> Fixes: #2239
+> Change-Id: I6b2e48e87c85de27fad67a12d97abd91fa27c0c1
+> Signed-off-by: Pranith Kumar K <pranith.karampuri@phonepe.com>
+
+BUG: 1798897
+Change-Id: Id763a8a51fb3c3314588ec7c162f649babf33099
+Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244549
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/bugs/distribute/issue-2169.t   |  33 +++++++++
+ xlators/cluster/dht/src/dht-common.c | 134 ++++++++++++++++++++++++++++++++---
+ xlators/cluster/dht/src/dht-common.h |   5 ++
+ 3 files changed, 162 insertions(+), 10 deletions(-)
+ create mode 100755 tests/bugs/distribute/issue-2169.t
+
+diff --git a/tests/bugs/distribute/issue-2169.t b/tests/bugs/distribute/issue-2169.t
+new file mode 100755
+index 0000000..91fa72a
+--- /dev/null
++++ b/tests/bugs/distribute/issue-2169.t
+@@ -0,0 +1,33 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++
++cleanup
++
++TEST glusterd
++TEST ${CLI} volume create ${V0} ${H0}:/$B0/${V0}_0
++TEST ${CLI} volume set ${V0} readdir-ahead on
++TEST ${CLI} volume set ${V0} parallel-readdir on
++TEST ${CLI} volume start ${V0}
++
++TEST ${GFS} --volfile-server ${H0} --volfile-id ${V0} ${M0}
++
++TEST mkdir -p ${M0}/d/d.{000..999}
++
++EXPECT_WITHIN ${UMOUNT_TIMEOUT} "Y" force_umount ${M0}
++
++TEST ${CLI} volume add-brick ${V0} ${H0}:${B0}/${V0}_{1..7}
++
++TEST ${GFS} --volfile-server ${H0} --volfile-id ${V0} ${M0}
++
++ls -l ${M0}/d/ | wc -l
++
++EXPECT_WITHIN ${UMOUNT_TIMEOUT} "Y" force_umount ${M0}
++TEST ${GFS} --volfile-server ${H0} --volfile-id ${V0} ${M0}
++
++ls -l ${M0}/d/ | wc -l
++
++TEST ls ${M0}/d
++
++cleanup
+diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
+index 0773092..ce0fbbf 100644
+--- a/xlators/cluster/dht/src/dht-common.c
++++ b/xlators/cluster/dht/src/dht-common.c
+@@ -24,8 +24,15 @@
+ #include <libgen.h>
+ #include <signal.h>
+ 
++#include <urcu/uatomic.h>
++
+ int run_defrag = 0;
+ 
++static int
++dht_rmdir_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                       int op_ret, int op_errno, gf_dirent_t *entries,
++                       dict_t *xdata);
++
+ int
+ dht_link2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret);
+ 
+@@ -6681,6 +6688,94 @@ out:
+     return;
+ }
+ 
++/* Execute a READDIR request if no other request is in progress. Otherwise
++ * queue it to be executed when the current one finishes.
++ *
++ * When parallel-readdir is enabled and directory contents are cached, the
++ * callback of a readdirp will be called before returning from STACK_WIND.
++ * If the returned contents are not useful for DHT, and the buffer is not
++ * yet full, a nested readdirp request will be sent. This means that there
++ * will be many recursive calls. In the worst case there might be a stack
++ * overflow.
++ *
++ * To avoid this, we only wind a request if no other request is being wound.
++ * If there's another request, we simple store the values for the next call.
++ * When the thread processing the current wind completes it, it will take
++ * the new arguments and send the request from the top level stack. */
++static void
++dht_queue_readdir(call_frame_t *frame, xlator_t *xl, off_t offset,
++                  fop_readdir_cbk_t cbk)
++{
++    dht_local_t *local;
++    int32_t queue;
++    xlator_t *this = NULL;
++
++    local = frame->local;
++    this = frame->this;
++
++    local->queue_xl = xl;
++    local->queue_offset = offset;
++
++    if (uatomic_add_return(&local->queue, 1) == 1) {
++        /* If we are here it means that we are the first one to send a
++         * readdir request. Any attempt to send more readdir requests will
++         * find local->queue > 1, so it won't do anything. The needed data
++         * to send the request has been stored into local->queue_*.
++         *
++         * Note: this works because we will only have 1 additional request
++         *       at most (the one called by the cbk function) while we are
++         *       processing another readdir. */
++        do {
++            STACK_WIND_COOKIE(frame, cbk, local->queue_xl, local->queue_xl,
++                              local->queue_xl->fops->readdir, local->fd,
++                              local->size, local->queue_offset, local->xattr);
++
++            /* If a new readdirp request has been added before returning
++             * from winding, we process it. */
++        } while ((queue = uatomic_sub_return(&local->queue, 1)) > 0);
++
++        if (queue < 0) {
++            /* A negative value means that an unwind has been called before
++             * returning from the previous wind. This means that 'local' is
++             * not needed anymore and must be destroyed. */
++            dht_local_wipe(this, local);
++        }
++    }
++}
++
++/* Execute a READDIRP request if no other request is in progress. Otherwise
++ * queue it to be executed when the current one finishes. */
++static void
++dht_queue_readdirp(call_frame_t *frame, xlator_t *xl, off_t offset,
++                   fop_readdirp_cbk_t cbk)
++{
++    dht_local_t *local;
++    int32_t queue;
++    xlator_t *this = NULL;
++
++    local = frame->local;
++    this = frame->this;
++
++    local->queue_xl = xl;
++    local->queue_offset = offset;
++
++    /* Check dht_queue_readdir() comments for an explanation of this. */
++    if (uatomic_add_return(&local->queue, 1) == 1) {
++        do {
++            STACK_WIND_COOKIE(frame, cbk, local->queue_xl, local->queue_xl,
++                              local->queue_xl->fops->readdirp, local->fd,
++                              local->size, local->queue_offset, local->xattr);
++        } while ((queue = uatomic_sub_return(&local->queue, 1)) > 0);
++
++        if (queue < 0) {
++            /* A negative value means that an unwind has been called before
++             * returning from the previous wind. This means that 'local' is
++             * not needed anymore and must be destroyed. */
++            dht_local_wipe(this, local);
++        }
++    }
++}
++
+ /* Posix returns op_errno = ENOENT to indicate that there are no more
+  * entries
+  */
+@@ -6950,9 +7045,8 @@ done:
+             }
+         }
+ 
+-        STACK_WIND_COOKIE(frame, dht_readdirp_cbk, next_subvol, next_subvol,
+-                          next_subvol->fops->readdirp, local->fd, local->size,
+-                          next_offset, local->xattr);
++        dht_queue_readdirp(frame, next_subvol, next_offset, dht_readdirp_cbk);
++
+         return 0;
+     }
+ 
+@@ -6970,6 +7064,17 @@ unwind:
+     if (prev != dht_last_up_subvol(this))
+         op_errno = 0;
+ 
++    /* If we are inside a recursive call (or not inside a recursive call but
++     * the cbk is completed before the wind returns), local->queue will be 1.
++     * In this case we cannot destroy 'local' because it will be needed by
++     * the caller of STACK_WIND. In this case, we decrease the value to let
++     * the caller know that the operation has terminated and it must destroy
++     * 'local'. If local->queue 0, we can destroy it here because there are
++     * no other users. */
++    if (uatomic_sub_return(&local->queue, 1) >= 0) {
++        frame->local = NULL;
++    }
++
+     DHT_STACK_UNWIND(readdirp, frame, op_ret, op_errno, &entries, NULL);
+ 
+     gf_dirent_free(&entries);
+@@ -7071,9 +7176,8 @@ done:
+             goto unwind;
+         }
+ 
+-        STACK_WIND_COOKIE(frame, dht_readdir_cbk, next_subvol, next_subvol,
+-                          next_subvol->fops->readdir, local->fd, local->size,
+-                          next_offset, NULL);
++        dht_queue_readdir(frame, next_subvol, next_offset, dht_readdir_cbk);
++
+         return 0;
+     }
+ 
+@@ -7089,6 +7193,17 @@ unwind:
+     if (prev != dht_last_up_subvol(this))
+         op_errno = 0;
+ 
++    /* If we are inside a recursive call (or not inside a recursive call but
++     * the cbk is completed before the wind returns), local->queue will be 1.
++     * In this case we cannot destroy 'local' because it will be needed by
++     * the caller of STACK_WIND. In this case, we decrease the value to let
++     * the caller know that the operation has terminated and it must destroy
++     * 'local'. If local->queue 0, we can destroy it here because there are
++     * no other users. */
++    if (uatomic_sub_return(&local->queue, 1) >= 0) {
++        frame->local = NULL;
++    }
++
+     if (!skip_hashed_check) {
+         DHT_STACK_UNWIND(readdir, frame, op_ret, op_errno, &entries, NULL);
+         gf_dirent_free(&entries);
+@@ -7096,6 +7211,7 @@ unwind:
+     } else {
+         DHT_STACK_UNWIND(readdir, frame, op_ret, op_errno, orig_entries, NULL);
+     }
++
+     return 0;
+ }
+ 
+@@ -7172,11 +7288,9 @@ dht_do_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+             }
+         }
+ 
+-        STACK_WIND_COOKIE(frame, dht_readdirp_cbk, xvol, xvol,
+-                          xvol->fops->readdirp, fd, size, yoff, local->xattr);
++        dht_queue_readdirp(frame, xvol, yoff, dht_readdirp_cbk);
+     } else {
+-        STACK_WIND_COOKIE(frame, dht_readdir_cbk, xvol, xvol,
+-                          xvol->fops->readdir, fd, size, yoff, local->xattr);
++        dht_queue_readdir(frame, xvol, yoff, dht_readdir_cbk);
+     }
+ 
+     return 0;
+diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
+index 92f1b89..132b3b3 100644
+--- a/xlators/cluster/dht/src/dht-common.h
++++ b/xlators/cluster/dht/src/dht-common.h
+@@ -369,6 +369,11 @@ struct dht_local {
+ 
+     dht_dir_transaction_t lock[2], *current;
+ 
++    /* for nested readdirs */
++    xlator_t *queue_xl;
++    off_t queue_offset;
++    int32_t queue;
++
+     /* inodelks during filerename for backward compatibility */
+     dht_lock_t **rename_inodelk_backward_compatible;
+     int rename_inodelk_bc_count;
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0558-afr-fix-directory-entry-count.patch b/SOURCES/0558-afr-fix-directory-entry-count.patch
new file mode 100644
index 0000000..4134f77
--- /dev/null
+++ b/SOURCES/0558-afr-fix-directory-entry-count.patch
@@ -0,0 +1,238 @@
+From 9bf6986f8ea3edd9de3d2629404f7ab11c1597de Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <xhernandez@redhat.com>
+Date: Tue, 9 Mar 2021 00:24:07 +0100
+Subject: [PATCH 558/584] afr: fix directory entry count
+
+AFR may hide some existing entries from a directory when reading it
+because they are generated internally for private management. However
+the returned number of entries from readdir() function is not updated
+accordingly. So it may return a number higher than the real entries
+present in the gf_dirent list.
+
+This may cause unexpected behavior of clients, including gfapi which
+incorrectly assumes that there was an entry when the list was actually
+empty.
+
+This patch also makes the check in gfapi more robust to avoid similar
+issues that could appear in the future.
+
+Backport of:
+> Upstream-patch: https://github.com/gluster/glusterfs/pull/2233
+> Fixes: #2232
+> Change-Id: I81ba3699248a53ebb0ee4e6e6231a4301436f763
+> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+
+BUG: 1927411
+Change-Id: I81ba3699248a53ebb0ee4e6e6231a4301436f763
+Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244535
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ api/src/glfs-fops.c                    |  3 +-
+ tests/bugs/replicate/issue-2232.c      | 85 ++++++++++++++++++++++++++++++++++
+ tests/bugs/replicate/issue-2232.t      | 34 ++++++++++++++
+ xlators/cluster/afr/src/afr-dir-read.c | 11 +++--
+ 4 files changed, 129 insertions(+), 4 deletions(-)
+ create mode 100644 tests/bugs/replicate/issue-2232.c
+ create mode 100644 tests/bugs/replicate/issue-2232.t
+
+diff --git a/api/src/glfs-fops.c b/api/src/glfs-fops.c
+index 6dc3b66..821d250 100644
+--- a/api/src/glfs-fops.c
++++ b/api/src/glfs-fops.c
+@@ -3748,8 +3748,9 @@ glfd_entry_refresh(struct glfs_fd *glfd, int plus)
+         errno = 0;
+     }
+ 
+-    if (ret > 0)
++    if ((ret > 0) && !list_empty(&glfd->entries)) {
+         glfd->next = list_entry(glfd->entries.next, gf_dirent_t, list);
++    }
+ 
+     gf_dirent_free(&old);
+ out:
+diff --git a/tests/bugs/replicate/issue-2232.c b/tests/bugs/replicate/issue-2232.c
+new file mode 100644
+index 0000000..df547c2
+--- /dev/null
++++ b/tests/bugs/replicate/issue-2232.c
+@@ -0,0 +1,85 @@
++
++#include <stdio.h>
++#include <errno.h>
++#include <stdlib.h>
++#include <errno.h>
++#include <string.h>
++#include <glusterfs/api/glfs.h>
++
++int main(int argc, char **argv)
++{
++    char log[128];
++    struct dirent entry;
++    struct dirent *ent;
++    glfs_xreaddirp_stat_t *xstat;
++    int ret, flags;
++
++    if (argc != 3) {
++        fprintf(stderr, "Syntax: %s <hostname> <volume>\n", argv[0]);
++        exit(1);
++    }
++    char *hostname = argv[1];
++    char *volname = argv[2];
++
++    glfs_t *fs = glfs_new(volname);
++    if (!fs) {
++        fprintf(stderr, "glfs_new() failed\n");
++        exit(1);
++    }
++
++    ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
++    if (ret < 0) {
++        fprintf(stderr, "glfs_set_volfile_server() failed\n");
++        return ret;
++    }
++
++    sprintf(log, "/tmp/logs-%d.log", getpid());
++    ret = glfs_set_logging(fs, log, 9);
++    if (ret < 0) {
++        fprintf(stderr, "glfs_set_logging() failed\n");
++        return ret;
++    }
++
++    ret = glfs_init(fs);
++    if (ret < 0) {
++        fprintf(stderr, "glfs_init() failed\n");
++        return ret;
++    }
++
++    glfs_fd_t *fd = glfs_opendir(fs, "/");
++    if (fd == NULL) {
++        fprintf(stderr, "glfs_opendir() failed\n");
++        return 1;
++    }
++
++    flags = GFAPI_XREADDIRP_STAT | GFAPI_XREADDIRP_HANDLE;
++    xstat = NULL;
++    while ((ret = glfs_xreaddirplus_r(fd, flags, &xstat, &entry, &ent)) > 0) {
++        if (xstat != NULL) {
++            glfs_free(xstat);
++        }
++        if ((strcmp(ent->d_name, ".") == 0) ||
++            (strcmp(ent->d_name, "..") == 0)) {
++            xstat = NULL;
++            continue;
++        }
++        if ((xstat == NULL) || ((ret & GFAPI_XREADDIRP_HANDLE) == 0)) {
++            fprintf(stderr, "glfs_xreaddirplus_r() failed: %s\n",
++                    strerror(errno));
++            return 1;
++        }
++
++        xstat = NULL;
++    }
++
++    if (ret < 0) {
++        fprintf(stderr, "glfs_xreaddirplus_r() failed\n");
++        return ret;
++    }
++
++    glfs_close(fd);
++
++    glfs_fini(fs);
++
++    return ret;
++}
+diff --git a/tests/bugs/replicate/issue-2232.t b/tests/bugs/replicate/issue-2232.t
+new file mode 100644
+index 0000000..66a41e0
+--- /dev/null
++++ b/tests/bugs/replicate/issue-2232.t
+@@ -0,0 +1,34 @@
++#!/bin/bash
++
++. $(dirname "${0}")/../../include.rc
++. $(dirname "${0}")/../../volume.rc
++
++cleanup;
++TEST gcc $(dirname "${0}")/issue-2232.c -o $(dirname "${0}")/issue-2232 -lgfapi
++TEST glusterd
++TEST pidof glusterd
++
++TEST $CLI volume create ${V0} replica 3 ${H0}:${B0}/${V0}{0..2}
++
++# Create a fake .glusterfs-anonymous-inode-... entry
++ANONINO=".glusterfs-anonymous-inode-aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"
++TEST mkdir ${B0}/${V0}{0..2}/${ANONINO}
++gfid="$(uuidgen)"
++hex="0x$(echo "${gfid}" | tr -d '-')"
++TEST assign_gfid "${hex}" "${B0}/${V0}0/${ANONINO}"
++TEST assign_gfid "${hex}" "${B0}/${V0}1/${ANONINO}"
++TEST assign_gfid "${hex}" "${B0}/${V0}2/${ANONINO}"
++TEST mkdir -p "${B0}/${V0}0/.glusterfs/${gfid:0:2}/${gfid:2:2}"
++TEST mkdir -p "${B0}/${V0}1/.glusterfs/${gfid:0:2}/${gfid:2:2}"
++TEST mkdir -p "${B0}/${V0}2/.glusterfs/${gfid:0:2}/${gfid:2:2}"
++TEST ln -s "../../00/00/00000000-0000-0000-0000-000000000001/${ANONINO}" "${B0}/${V0}0/.glusterfs/${gfid:0:2}/${gfid:2:2}/${gfid}"
++TEST ln -s "../../00/00/00000000-0000-0000-0000-000000000001/${ANONINO}" "${B0}/${V0}1/.glusterfs/${gfid:0:2}/${gfid:2:2}/${gfid}"
++TEST ln -s "../../00/00/00000000-0000-0000-0000-000000000001/${ANONINO}" "${B0}/${V0}2/.glusterfs/${gfid:0:2}/${gfid:2:2}/${gfid}"
++
++TEST $CLI volume start ${V0}
++
++TEST $(dirname "${0}")/issue-2232 ${H0} ${V0}
++
++TEST rm -f $(dirname $0)/issue-2232
++
++cleanup
+diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c
+index d64b6a9..a98f8df 100644
+--- a/xlators/cluster/afr/src/afr-dir-read.c
++++ b/xlators/cluster/afr/src/afr-dir-read.c
+@@ -157,7 +157,7 @@ afr_validate_read_subvol(inode_t *inode, xlator_t *this, int par_read_subvol)
+     return 0;
+ }
+ 
+-static void
++static int32_t
+ afr_readdir_transform_entries(call_frame_t *frame, gf_dirent_t *subvol_entries,
+                               int subvol, gf_dirent_t *entries, fd_t *fd)
+ {
+@@ -168,6 +168,7 @@ afr_readdir_transform_entries(call_frame_t *frame, gf_dirent_t *subvol_entries,
+     afr_private_t *priv = NULL;
+     gf_boolean_t need_heal = _gf_false;
+     gf_boolean_t validate_subvol = _gf_false;
++    int32_t count = 0;
+ 
+     this = THIS;
+     priv = this->private;
+@@ -184,6 +185,7 @@ afr_readdir_transform_entries(call_frame_t *frame, gf_dirent_t *subvol_entries,
+ 
+         list_del_init(&entry->list);
+         list_add_tail(&entry->list, &entries->list);
++        count++;
+ 
+         if (!validate_subvol)
+             continue;
+@@ -197,6 +199,8 @@ afr_readdir_transform_entries(call_frame_t *frame, gf_dirent_t *subvol_entries,
+             }
+         }
+     }
++
++    return count;
+ }
+ 
+ int32_t
+@@ -222,8 +226,9 @@ afr_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+     }
+ 
+     if (op_ret >= 0)
+-        afr_readdir_transform_entries(frame, subvol_entries, (long)cookie,
+-                                      &entries, local->fd);
++        op_ret = afr_readdir_transform_entries(frame, subvol_entries,
++                                               (long)cookie, &entries,
++                                               local->fd);
+ 
+     AFR_STACK_UNWIND(readdir, frame, op_ret, op_errno, &entries, xdata);
+ 
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0559-afr-make-fsync-post-op-aware-of-inodelk-count-2273.patch b/SOURCES/0559-afr-make-fsync-post-op-aware-of-inodelk-count-2273.patch
new file mode 100644
index 0000000..91add36
--- /dev/null
+++ b/SOURCES/0559-afr-make-fsync-post-op-aware-of-inodelk-count-2273.patch
@@ -0,0 +1,163 @@
+From 2b6e6c234dffa72c9f2af747908b1e1f29080698 Mon Sep 17 00:00:00 2001
+From: Ravishankar N <ravishankar@redhat.com>
+Date: Thu, 25 Mar 2021 11:52:13 +0530
+Subject: [PATCH 559/584] afr: make fsync post-op aware of inodelk count
+ (#2273)
+
+Problem:
+Since commit bd540db1e, eager-locking was enabled for fsync. But on
+certain VM workloads wit sharding enabled, shard xlator keeps sending
+fsync on the base shard. This can cause blocked inodelks from other
+clients (including shd) to time out due to call bail.
+
+Fix:
+Make afr fsync aware of inodelk count and not delay post-op + unlock
+when inodelk count > 1, just like writev.
+
+Code is restructured so that any fd based AFR_DATA_TRANSACTION can be made
+aware by setting GLUSTERFS_INODELK_DOM_COUNT in xdata request.
+
+Note: We do not know yet why VMs go in to paused state because of the
+blocked inodelks but this patch should be a first step in reducing the
+occurence.
+
+Upstream patch details:
+> https://github.com/gluster/glusterfs/pull/2273/
+> Updates: #2198
+> Change-Id: Ib91ebdd3101d590c326e69c829cf9335003e260b
+> Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+
+BUG: 1943467
+Change-Id: Id407ca54007e3bbb206a1d9431ebaf89a2167f74
+Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244516
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/afr/src/afr-inode-write.c | 40 ++++++++++++++++++-------------
+ xlators/features/locks/src/posix.c        |  1 +
+ 2 files changed, 24 insertions(+), 17 deletions(-)
+
+diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
+index df82b6e..962a7b1 100644
+--- a/xlators/cluster/afr/src/afr-inode-write.c
++++ b/xlators/cluster/afr/src/afr-inode-write.c
+@@ -42,6 +42,7 @@ __afr_inode_write_finalize(call_frame_t *frame, xlator_t *this)
+     struct iatt *stbuf = NULL;
+     afr_local_t *local = NULL;
+     afr_private_t *priv = NULL;
++    afr_lock_t *lock = NULL;
+     afr_read_subvol_args_t args = {
+         0,
+     };
+@@ -50,6 +51,12 @@ __afr_inode_write_finalize(call_frame_t *frame, xlator_t *this)
+     priv = this->private;
+     GF_VALIDATE_OR_GOTO(this->name, local->inode, out);
+ 
++    if (local->update_num_inodelks &&
++        local->transaction.type == AFR_DATA_TRANSACTION) {
++        lock = &local->inode_ctx->lock[local->transaction.type];
++        lock->num_inodelks = local->num_inodelks;
++    }
++
+     /*This code needs to stay till DHT sends fops on linked
+      * inodes*/
+     if (!inode_is_linked(local->inode)) {
+@@ -134,6 +141,7 @@ __afr_inode_write_fill(call_frame_t *frame, xlator_t *this, int child_index,
+ {
+     afr_local_t *local = NULL;
+     afr_private_t *priv = NULL;
++    int num_inodelks = 0;
+ 
+     local = frame->local;
+     priv = this->private;
+@@ -146,8 +154,16 @@ __afr_inode_write_fill(call_frame_t *frame, xlator_t *this, int child_index,
+ 
+     local->replies[child_index].op_ret = op_ret;
+     local->replies[child_index].op_errno = op_errno;
+-    if (xdata)
++    if (xdata) {
+         local->replies[child_index].xdata = dict_ref(xdata);
++        if (dict_get_int32_sizen(xdata, GLUSTERFS_INODELK_COUNT,
++                                 &num_inodelks) == 0) {
++            if (num_inodelks > local->num_inodelks) {
++                local->num_inodelks = num_inodelks;
++                local->update_num_inodelks = _gf_true;
++            }
++        }
++    }
+ 
+     if (op_ret >= 0) {
+         if (prebuf)
+@@ -284,7 +300,6 @@ afr_inode_write_fill(call_frame_t *frame, xlator_t *this, int child_index,
+     afr_local_t *local = frame->local;
+     uint32_t open_fd_count = 0;
+     uint32_t write_is_append = 0;
+-    int32_t num_inodelks = 0;
+ 
+     LOCK(&frame->lock);
+     {
+@@ -306,15 +321,6 @@ afr_inode_write_fill(call_frame_t *frame, xlator_t *this, int child_index,
+             local->open_fd_count = open_fd_count;
+             local->update_open_fd_count = _gf_true;
+         }
+-
+-        ret = dict_get_int32_sizen(xdata, GLUSTERFS_INODELK_COUNT,
+-                                   &num_inodelks);
+-        if (ret < 0)
+-            goto unlock;
+-        if (num_inodelks > local->num_inodelks) {
+-            local->num_inodelks = num_inodelks;
+-            local->update_num_inodelks = _gf_true;
+-        }
+     }
+ unlock:
+     UNLOCK(&frame->lock);
+@@ -324,7 +330,6 @@ void
+ afr_process_post_writev(call_frame_t *frame, xlator_t *this)
+ {
+     afr_local_t *local = NULL;
+-    afr_lock_t *lock = NULL;
+ 
+     local = frame->local;
+ 
+@@ -343,11 +348,6 @@ afr_process_post_writev(call_frame_t *frame, xlator_t *this)
+ 
+     if (local->update_open_fd_count)
+         local->inode_ctx->open_fd_count = local->open_fd_count;
+-    if (local->update_num_inodelks &&
+-        local->transaction.type == AFR_DATA_TRANSACTION) {
+-        lock = &local->inode_ctx->lock[local->transaction.type];
+-        lock->num_inodelks = local->num_inodelks;
+-    }
+ }
+ 
+ int
+@@ -2516,6 +2516,12 @@ afr_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+     if (!local->xdata_req)
+         goto out;
+ 
++    if (dict_set_str_sizen(local->xdata_req, GLUSTERFS_INODELK_DOM_COUNT,
++                           this->name)) {
++        op_errno = ENOMEM;
++        goto out;
++    }
++
+     local->fd = fd_ref(fd);
+     ret = afr_set_inode_local(this, local, fd->inode);
+     if (ret)
+diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c
+index cdd1ff7..22ef5b8 100644
+--- a/xlators/features/locks/src/posix.c
++++ b/xlators/features/locks/src/posix.c
+@@ -4943,6 +4943,7 @@ struct xlator_fops fops = {
+     .rchecksum = pl_rchecksum,
+     .statfs = pl_statfs,
+     .fsyncdir = pl_fsyncdir,
++    .fsync = pl_fsync,
+     .readdir = pl_readdir,
+     .symlink = pl_symlink,
+     .link = pl_link,
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0560-posix-Avoid-dict_del-logs-in-posix_is_layout_stale-w.patch b/SOURCES/0560-posix-Avoid-dict_del-logs-in-posix_is_layout_stale-w.patch
new file mode 100644
index 0000000..cccac36
--- /dev/null
+++ b/SOURCES/0560-posix-Avoid-dict_del-logs-in-posix_is_layout_stale-w.patch
@@ -0,0 +1,73 @@
+From e56605d5808b41335026a5470fa10f5e5b5389f3 Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <moagrawal@redhat.com>
+Date: Mon, 6 Apr 2020 21:58:03 +0530
+Subject: [PATCH 560/584] posix: Avoid dict_del logs in posix_is_layout_stale
+ while key is NULL
+
+Problem: The key "GF_PREOP_PARENT_KEY" has been populated by dht and
+         for non-distribute volume like 1x3 key is not populated so
+         posix_is_layout stale throw a message while a file is created
+
+Solution: To avoid a log put a condition before delete a key
+
+Upstream patch details:
+> https://review.gluster.org/#/c/glusterfs/+/24297/
+> Change-Id: I813ee7960633e7f9f5e9ad2f42f288053d9eb71f
+> Fixes: #1150
+> Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
+
+BUG: 1942816
+Change-Id: I746a2619989265f3bc9bb648c4b8e4bbefaedc56
+Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244925
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/bugs/glusterd/brick-mux-validation.t | 4 ++--
+ xlators/storage/posix/src/posix-helpers.c  | 5 +++--
+ 2 files changed, 5 insertions(+), 4 deletions(-)
+
+diff --git a/tests/bugs/glusterd/brick-mux-validation.t b/tests/bugs/glusterd/brick-mux-validation.t
+index 03a4768..61b0455 100644
+--- a/tests/bugs/glusterd/brick-mux-validation.t
++++ b/tests/bugs/glusterd/brick-mux-validation.t
+@@ -24,7 +24,7 @@ TEST $CLI volume create $V0 $H0:$B0/${V0}{1..3}
+ TEST $CLI volume start $V0
+ 
+ EXPECT 1 count_brick_processes
+-EXPECT 1 count_brick_pids
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_pids
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 online_brick_count
+ 
+ pkill gluster
+@@ -101,4 +101,4 @@ TEST $CLI_IGNORE_PARTITION volume reset-brick $V1 $H0:$B0/${V1}1 $H0:$B0/${V1}1
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT 6 online_brick_count
+ EXPECT 1 count_brick_processes
+ 
+-cleanup;
+\ No newline at end of file
++cleanup;
+diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
+index 110d383..16351d8 100644
+--- a/xlators/storage/posix/src/posix-helpers.c
++++ b/xlators/storage/posix/src/posix-helpers.c
+@@ -3596,13 +3596,14 @@ posix_is_layout_stale(dict_t *xdata, char *par_path, xlator_t *this)
+     op_ret = dict_get_str_sizen(xdata, GF_PREOP_PARENT_KEY, &xattr_name);
+     if (xattr_name == NULL) {
+         op_ret = 0;
+-        goto out;
++        return is_stale;
+     }
+ 
+     arg_data = dict_get(xdata, xattr_name);
+     if (!arg_data) {
+         op_ret = 0;
+-        goto out;
++        dict_del_sizen(xdata, GF_PREOP_PARENT_KEY);
++        return is_stale;
+     }
+ 
+     size = sys_lgetxattr(par_path, xattr_name, value_buf,
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0561-cluster-ec-Inform-failure-when-some-bricks-are-unava.patch b/SOURCES/0561-cluster-ec-Inform-failure-when-some-bricks-are-unava.patch
new file mode 100644
index 0000000..4f191cc
--- /dev/null
+++ b/SOURCES/0561-cluster-ec-Inform-failure-when-some-bricks-are-unava.patch
@@ -0,0 +1,202 @@
+From 488a5aa4932842334e2749224e9c39f8b6fd379c Mon Sep 17 00:00:00 2001
+From: Ashish Pandey <aspandey@redhat.com>
+Date: Wed, 20 May 2020 11:30:17 +0530
+Subject: [PATCH 561/584] cluster/ec: Inform failure when some bricks are
+ unavailable.
+
+Provide proper information about failure when a fop
+fails on some of the brick.
+Also provide information about parent fop and
+the map of the bricks on which it is failing.
+
+Upstream patch details:
+>Change-Id: If812739617df65cd146c8e667fbacff653717248
+>updates #1434
+>Signed-off-by: Ashish Pandey <aspandey@redhat.com>
+>https://review.gluster.org/#/c/glusterfs/+/24858/
+
+Change-Id: I3549d637e7345f05f21ac1c0e8106973c69d1be9
+BUG: 1908635
+Signed-off-by: Ashish Pandey <aspandey@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244926
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/ec/src/ec-common.c | 76 +++++++++++++++++++++++---------------
+ xlators/cluster/ec/src/ec.c        | 14 ++++++-
+ 2 files changed, 58 insertions(+), 32 deletions(-)
+
+diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
+index e3f8769..a9624d8 100644
+--- a/xlators/cluster/ec/src/ec-common.c
++++ b/xlators/cluster/ec/src/ec-common.c
+@@ -316,17 +316,19 @@ ec_check_status(ec_fop_data_t *fop)
+         }
+     }
+ 
+-    gf_msg(fop->xl->name, GF_LOG_WARNING, 0, EC_MSG_OP_FAIL_ON_SUBVOLS,
+-           "Operation failed on %d of %d subvolumes.(up=%s, mask=%s, "
+-           "remaining=%s, good=%s, bad=%s, %s)",
+-           gf_bits_count(ec->xl_up & ~(fop->remaining | fop->good)), ec->nodes,
+-           ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
+-           ec_bin(str2, sizeof(str2), fop->mask, ec->nodes),
+-           ec_bin(str3, sizeof(str3), fop->remaining, ec->nodes),
+-           ec_bin(str4, sizeof(str4), fop->good, ec->nodes),
+-           ec_bin(str5, sizeof(str5), ec->xl_up & ~(fop->remaining | fop->good),
+-                  ec->nodes),
+-           ec_msg_str(fop));
++    gf_msg(
++        fop->xl->name, GF_LOG_WARNING, 0, EC_MSG_OP_FAIL_ON_SUBVOLS,
++        "Operation failed on %d of %d subvolumes.(up=%s, mask=%s, "
++        "remaining=%s, good=%s, bad=%s,"
++        "(Least significant bit represents first client/brick of subvol), %s)",
++        gf_bits_count(ec->xl_up & ~(fop->remaining | fop->good)), ec->nodes,
++        ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
++        ec_bin(str2, sizeof(str2), fop->mask, ec->nodes),
++        ec_bin(str3, sizeof(str3), fop->remaining, ec->nodes),
++        ec_bin(str4, sizeof(str4), fop->good, ec->nodes),
++        ec_bin(str5, sizeof(str5), ec->xl_up & ~(fop->remaining | fop->good),
++               ec->nodes),
++        ec_msg_str(fop));
+     if (fop->use_fd) {
+         if (fop->fd != NULL) {
+             ec_fheal(NULL, fop->xl, -1, EC_MINIMUM_ONE, ec_heal_report, NULL,
+@@ -614,10 +616,10 @@ ec_msg_str(ec_fop_data_t *fop)
+     loc_t *loc2 = NULL;
+     char gfid1[64] = {0};
+     char gfid2[64] = {0};
++    ec_fop_data_t *parent = fop->parent;
+ 
+     if (fop->errstr)
+         return fop->errstr;
+-
+     if (!fop->use_fd) {
+         loc1 = &fop->loc[0];
+         loc2 = &fop->loc[1];
+@@ -625,23 +627,45 @@ ec_msg_str(ec_fop_data_t *fop)
+         if (fop->id == GF_FOP_RENAME) {
+             gf_asprintf(&fop->errstr,
+                         "FOP : '%s' failed on '%s' and '%s' with gfids "
+-                        "%s and %s respectively",
++                        "%s and %s respectively. Parent FOP: %s",
+                         ec_fop_name(fop->id), loc1->path, loc2->path,
+                         uuid_utoa_r(loc1->gfid, gfid1),
+-                        uuid_utoa_r(loc2->gfid, gfid2));
++                        uuid_utoa_r(loc2->gfid, gfid2),
++                        parent ? ec_fop_name(parent->id) : "No Parent");
+         } else {
+-            gf_asprintf(&fop->errstr, "FOP : '%s' failed on '%s' with gfid %s",
+-                        ec_fop_name(fop->id), loc1->path,
+-                        uuid_utoa_r(loc1->gfid, gfid1));
++            gf_asprintf(
++                &fop->errstr,
++                "FOP : '%s' failed on '%s' with gfid %s. Parent FOP: %s",
++                ec_fop_name(fop->id), loc1->path,
++                uuid_utoa_r(loc1->gfid, gfid1),
++                parent ? ec_fop_name(parent->id) : "No Parent");
+         }
+     } else {
+-        gf_asprintf(&fop->errstr, "FOP : '%s' failed on gfid %s",
+-                    ec_fop_name(fop->id),
+-                    uuid_utoa_r(fop->fd->inode->gfid, gfid1));
++        gf_asprintf(
++            &fop->errstr, "FOP : '%s' failed on gfid %s. Parent FOP: %s",
++            ec_fop_name(fop->id), uuid_utoa_r(fop->fd->inode->gfid, gfid1),
++            parent ? ec_fop_name(parent->id) : "No Parent");
+     }
+     return fop->errstr;
+ }
+ 
++static void
++ec_log_insufficient_vol(ec_fop_data_t *fop, int32_t have, uint32_t need,
++                        int32_t loglevel)
++{
++    ec_t *ec = fop->xl->private;
++    char str1[32], str2[32], str3[32];
++
++    gf_msg(ec->xl->name, loglevel, 0, EC_MSG_CHILDS_INSUFFICIENT,
++           "Insufficient available children for this request: "
++           "Have : %d, Need : %u : Child UP : %s "
++           "Mask: %s, Healing : %s : %s ",
++           have, need, ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
++           ec_bin(str2, sizeof(str2), fop->mask, ec->nodes),
++           ec_bin(str3, sizeof(str3), fop->healing, ec->nodes),
++           ec_msg_str(fop));
++}
++
+ static int32_t
+ ec_child_select(ec_fop_data_t *fop)
+ {
+@@ -699,11 +723,7 @@ ec_child_select(ec_fop_data_t *fop)
+     ec_trace("SELECT", fop, "");
+ 
+     if ((num < fop->minimum) && (num < ec->fragments)) {
+-        gf_msg(ec->xl->name, GF_LOG_ERROR, 0, EC_MSG_CHILDS_INSUFFICIENT,
+-               "Insufficient available children "
+-               "for this request (have %d, need "
+-               "%d). %s",
+-               num, fop->minimum, ec_msg_str(fop));
++        ec_log_insufficient_vol(fop, num, fop->minimum, GF_LOG_ERROR);
+         return 0;
+     }
+ 
+@@ -711,11 +731,7 @@ ec_child_select(ec_fop_data_t *fop)
+         (fop->locks[0].update[EC_DATA_TXN] ||
+          fop->locks[0].update[EC_METADATA_TXN])) {
+         if (ec->quorum_count && (num < ec->quorum_count)) {
+-            gf_msg(ec->xl->name, GF_LOG_ERROR, 0, EC_MSG_CHILDS_INSUFFICIENT,
+-                   "Insufficient available children "
+-                   "for this request (have %d, need "
+-                   "%d). %s",
+-                   num, ec->quorum_count, ec_msg_str(fop));
++            ec_log_insufficient_vol(fop, num, ec->quorum_count, GF_LOG_ERROR);
+             return 0;
+         }
+     }
+diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
+index a930089..047cdd8 100644
+--- a/xlators/cluster/ec/src/ec.c
++++ b/xlators/cluster/ec/src/ec.c
+@@ -325,13 +325,18 @@ ec_get_event_from_state(ec_t *ec)
+ void
+ ec_up(xlator_t *this, ec_t *ec)
+ {
++    char str1[32], str2[32];
++
+     if (ec->timer != NULL) {
+         gf_timer_call_cancel(this->ctx, ec->timer);
+         ec->timer = NULL;
+     }
+ 
+     ec->up = 1;
+-    gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_UP, "Going UP");
++    gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_UP,
++           "Going UP : Child UP = %s Child Notify = %s",
++           ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
++           ec_bin(str2, sizeof(str2), ec->xl_notify, ec->nodes));
+ 
+     gf_event(EVENT_EC_MIN_BRICKS_UP, "subvol=%s", this->name);
+ }
+@@ -339,13 +344,18 @@ ec_up(xlator_t *this, ec_t *ec)
+ void
+ ec_down(xlator_t *this, ec_t *ec)
+ {
++    char str1[32], str2[32];
++
+     if (ec->timer != NULL) {
+         gf_timer_call_cancel(this->ctx, ec->timer);
+         ec->timer = NULL;
+     }
+ 
+     ec->up = 0;
+-    gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_DOWN, "Going DOWN");
++    gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_DOWN,
++           "Going DOWN : Child UP = %s Child Notify = %s",
++           ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
++           ec_bin(str2, sizeof(str2), ec->xl_notify, ec->nodes));
+ 
+     gf_event(EVENT_EC_MIN_BRICKS_NOT_UP, "subvol=%s", this->name);
+ }
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0562-shard.c-Fix-formatting.patch b/SOURCES/0562-shard.c-Fix-formatting.patch
new file mode 100644
index 0000000..14fbed6
--- /dev/null
+++ b/SOURCES/0562-shard.c-Fix-formatting.patch
@@ -0,0 +1,12513 @@
+From ea96fcd832de0b49f0e050f535d22a500da1503a Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <xhernandez@redhat.com>
+Date: Thu, 3 Jun 2021 13:14:04 +0200
+Subject: [PATCH 562/584] shard.c: Fix formatting
+
+A previous downstream change [1] had changed the formatting of the
+entire xlators/features/shard/src/shard.c. This patch reapplies the
+correct formatting. No other changes have been made.
+
+[1] https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/185716/
+
+BUG: 1925425
+Change-Id: Ie655ddaaa26aa884878e66bc0d9ce1f021f6a85f
+Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244956
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/features/shard/src/shard.c | 11701 ++++++++++++++++++-----------------
+ 1 file changed, 6084 insertions(+), 5617 deletions(-)
+
+diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
+index 099b062..c5cc224 100644
+--- a/xlators/features/shard/src/shard.c
++++ b/xlators/features/shard/src/shard.c
+@@ -16,5813 +16,6226 @@
+ #include <glusterfs/defaults.h>
+ #include <glusterfs/statedump.h>
+ 
+-static gf_boolean_t __is_shard_dir(uuid_t gfid) {
+-  shard_priv_t *priv = THIS->private;
++static gf_boolean_t
++__is_shard_dir(uuid_t gfid)
++{
++    shard_priv_t *priv = THIS->private;
+ 
+-  if (gf_uuid_compare(gfid, priv->dot_shard_gfid) == 0)
+-    return _gf_true;
++    if (gf_uuid_compare(gfid, priv->dot_shard_gfid) == 0)
++        return _gf_true;
+ 
+-  return _gf_false;
++    return _gf_false;
+ }
+ 
+-static gf_boolean_t __is_gsyncd_on_shard_dir(call_frame_t *frame, loc_t *loc) {
+-  if (frame->root->pid == GF_CLIENT_PID_GSYNCD &&
+-      (__is_shard_dir(loc->pargfid) ||
+-       (loc->parent && __is_shard_dir(loc->parent->gfid))))
+-    return _gf_true;
++static gf_boolean_t
++__is_gsyncd_on_shard_dir(call_frame_t *frame, loc_t *loc)
++{
++    if (frame->root->pid == GF_CLIENT_PID_GSYNCD &&
++        (__is_shard_dir(loc->pargfid) ||
++         (loc->parent && __is_shard_dir(loc->parent->gfid))))
++        return _gf_true;
+ 
+-  return _gf_false;
++    return _gf_false;
+ }
+ 
+-void shard_make_block_bname(int block_num, uuid_t gfid, char *buf, size_t len) {
+-  char gfid_str[GF_UUID_BUF_SIZE] = {
+-      0,
+-  };
++void
++shard_make_block_bname(int block_num, uuid_t gfid, char *buf, size_t len)
++{
++    char gfid_str[GF_UUID_BUF_SIZE] = {
++        0,
++    };
+ 
+-  gf_uuid_unparse(gfid, gfid_str);
+-  snprintf(buf, len, "%s.%d", gfid_str, block_num);
++    gf_uuid_unparse(gfid, gfid_str);
++    snprintf(buf, len, "%s.%d", gfid_str, block_num);
+ }
+ 
+-void shard_make_block_abspath(int block_num, uuid_t gfid, char *filepath,
+-                              size_t len) {
+-  char gfid_str[GF_UUID_BUF_SIZE] = {
+-      0,
+-  };
++void
++shard_make_block_abspath(int block_num, uuid_t gfid, char *filepath, size_t len)
++{
++    char gfid_str[GF_UUID_BUF_SIZE] = {
++        0,
++    };
+ 
+-  gf_uuid_unparse(gfid, gfid_str);
+-  snprintf(filepath, len, "/%s/%s.%d", GF_SHARD_DIR, gfid_str, block_num);
++    gf_uuid_unparse(gfid, gfid_str);
++    snprintf(filepath, len, "/%s/%s.%d", GF_SHARD_DIR, gfid_str, block_num);
+ }
+ 
+-int __shard_inode_ctx_get(inode_t *inode, xlator_t *this,
+-                          shard_inode_ctx_t **ctx) {
+-  int ret = -1;
+-  uint64_t ctx_uint = 0;
+-  shard_inode_ctx_t *ctx_p = NULL;
++int
++__shard_inode_ctx_get(inode_t *inode, xlator_t *this, shard_inode_ctx_t **ctx)
++{
++    int ret = -1;
++    uint64_t ctx_uint = 0;
++    shard_inode_ctx_t *ctx_p = NULL;
+ 
+-  ret = __inode_ctx_get(inode, this, &ctx_uint);
+-  if (ret == 0) {
+-    *ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
+-    return ret;
+-  }
++    ret = __inode_ctx_get(inode, this, &ctx_uint);
++    if (ret == 0) {
++        *ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
++        return ret;
++    }
+ 
+-  ctx_p = GF_CALLOC(1, sizeof(*ctx_p), gf_shard_mt_inode_ctx_t);
+-  if (!ctx_p)
+-    return ret;
++    ctx_p = GF_CALLOC(1, sizeof(*ctx_p), gf_shard_mt_inode_ctx_t);
++    if (!ctx_p)
++        return ret;
+ 
+-  INIT_LIST_HEAD(&ctx_p->ilist);
+-  INIT_LIST_HEAD(&ctx_p->to_fsync_list);
++    INIT_LIST_HEAD(&ctx_p->ilist);
++    INIT_LIST_HEAD(&ctx_p->to_fsync_list);
+ 
+-  ret = __inode_ctx_set(inode, this, (uint64_t *)&ctx_p);
+-  if (ret < 0) {
+-    GF_FREE(ctx_p);
+-    return ret;
+-  }
++    ret = __inode_ctx_set(inode, this, (uint64_t *)&ctx_p);
++    if (ret < 0) {
++        GF_FREE(ctx_p);
++        return ret;
++    }
+ 
+-  *ctx = ctx_p;
++    *ctx = ctx_p;
+ 
+-  return ret;
++    return ret;
+ }
+ 
+-int shard_inode_ctx_get(inode_t *inode, xlator_t *this,
+-                        shard_inode_ctx_t **ctx) {
+-  int ret = 0;
++int
++shard_inode_ctx_get(inode_t *inode, xlator_t *this, shard_inode_ctx_t **ctx)
++{
++    int ret = 0;
+ 
+-  LOCK(&inode->lock);
+-  { ret = __shard_inode_ctx_get(inode, this, ctx); }
+-  UNLOCK(&inode->lock);
++    LOCK(&inode->lock);
++    {
++        ret = __shard_inode_ctx_get(inode, this, ctx);
++    }
++    UNLOCK(&inode->lock);
+ 
+-  return ret;
++    return ret;
+ }
+ 
+-int __shard_inode_ctx_set(inode_t *inode, xlator_t *this, struct iatt *stbuf,
+-                          uint64_t block_size, int32_t valid) {
+-  int ret = -1;
+-  shard_inode_ctx_t *ctx = NULL;
++int
++__shard_inode_ctx_set(inode_t *inode, xlator_t *this, struct iatt *stbuf,
++                      uint64_t block_size, int32_t valid)
++{
++    int ret = -1;
++    shard_inode_ctx_t *ctx = NULL;
+ 
+-  ret = __shard_inode_ctx_get(inode, this, &ctx);
+-  if (ret)
+-    return ret;
++    ret = __shard_inode_ctx_get(inode, this, &ctx);
++    if (ret)
++        return ret;
+ 
+-  if (valid & SHARD_MASK_BLOCK_SIZE)
+-    ctx->block_size = block_size;
++    if (valid & SHARD_MASK_BLOCK_SIZE)
++        ctx->block_size = block_size;
+ 
+-  if (valid & SHARD_MASK_PROT)
+-    ctx->stat.ia_prot = stbuf->ia_prot;
++    if (valid & SHARD_MASK_PROT)
++        ctx->stat.ia_prot = stbuf->ia_prot;
+ 
+-  if (valid & SHARD_MASK_NLINK)
+-    ctx->stat.ia_nlink = stbuf->ia_nlink;
++    if (valid & SHARD_MASK_NLINK)
++        ctx->stat.ia_nlink = stbuf->ia_nlink;
+ 
+-  if (valid & SHARD_MASK_UID)
+-    ctx->stat.ia_uid = stbuf->ia_uid;
++    if (valid & SHARD_MASK_UID)
++        ctx->stat.ia_uid = stbuf->ia_uid;
+ 
+-  if (valid & SHARD_MASK_GID)
+-    ctx->stat.ia_gid = stbuf->ia_gid;
++    if (valid & SHARD_MASK_GID)
++        ctx->stat.ia_gid = stbuf->ia_gid;
+ 
+-  if (valid & SHARD_MASK_SIZE)
+-    ctx->stat.ia_size = stbuf->ia_size;
++    if (valid & SHARD_MASK_SIZE)
++        ctx->stat.ia_size = stbuf->ia_size;
+ 
+-  if (valid & SHARD_MASK_BLOCKS)
+-    ctx->stat.ia_blocks = stbuf->ia_blocks;
++    if (valid & SHARD_MASK_BLOCKS)
++        ctx->stat.ia_blocks = stbuf->ia_blocks;
+ 
+-  if (valid & SHARD_MASK_TIMES) {
+-    SHARD_TIME_UPDATE(ctx->stat.ia_mtime, ctx->stat.ia_mtime_nsec,
+-                      stbuf->ia_mtime, stbuf->ia_mtime_nsec);
+-    SHARD_TIME_UPDATE(ctx->stat.ia_ctime, ctx->stat.ia_ctime_nsec,
+-                      stbuf->ia_ctime, stbuf->ia_ctime_nsec);
+-    SHARD_TIME_UPDATE(ctx->stat.ia_atime, ctx->stat.ia_atime_nsec,
+-                      stbuf->ia_atime, stbuf->ia_atime_nsec);
+-  }
++    if (valid & SHARD_MASK_TIMES) {
++        SHARD_TIME_UPDATE(ctx->stat.ia_mtime, ctx->stat.ia_mtime_nsec,
++                          stbuf->ia_mtime, stbuf->ia_mtime_nsec);
++        SHARD_TIME_UPDATE(ctx->stat.ia_ctime, ctx->stat.ia_ctime_nsec,
++                          stbuf->ia_ctime, stbuf->ia_ctime_nsec);
++        SHARD_TIME_UPDATE(ctx->stat.ia_atime, ctx->stat.ia_atime_nsec,
++                          stbuf->ia_atime, stbuf->ia_atime_nsec);
++    }
+ 
+-  if (valid & SHARD_MASK_OTHERS) {
+-    ctx->stat.ia_ino = stbuf->ia_ino;
+-    gf_uuid_copy(ctx->stat.ia_gfid, stbuf->ia_gfid);
+-    ctx->stat.ia_dev = stbuf->ia_dev;
+-    ctx->stat.ia_type = stbuf->ia_type;
+-    ctx->stat.ia_rdev = stbuf->ia_rdev;
+-    ctx->stat.ia_blksize = stbuf->ia_blksize;
+-  }
++    if (valid & SHARD_MASK_OTHERS) {
++        ctx->stat.ia_ino = stbuf->ia_ino;
++        gf_uuid_copy(ctx->stat.ia_gfid, stbuf->ia_gfid);
++        ctx->stat.ia_dev = stbuf->ia_dev;
++        ctx->stat.ia_type = stbuf->ia_type;
++        ctx->stat.ia_rdev = stbuf->ia_rdev;
++        ctx->stat.ia_blksize = stbuf->ia_blksize;
++    }
+ 
+-  if (valid & SHARD_MASK_REFRESH_RESET)
+-    ctx->refresh = _gf_false;
++    if (valid & SHARD_MASK_REFRESH_RESET)
++        ctx->refresh = _gf_false;
+ 
+-  return 0;
++    return 0;
+ }
+ 
+-int shard_inode_ctx_set(inode_t *inode, xlator_t *this, struct iatt *stbuf,
+-                        uint64_t block_size, int32_t valid) {
+-  int ret = -1;
++int
++shard_inode_ctx_set(inode_t *inode, xlator_t *this, struct iatt *stbuf,
++                    uint64_t block_size, int32_t valid)
++{
++    int ret = -1;
+ 
+-  LOCK(&inode->lock);
+-  { ret = __shard_inode_ctx_set(inode, this, stbuf, block_size, valid); }
+-  UNLOCK(&inode->lock);
++    LOCK(&inode->lock);
++    {
++        ret = __shard_inode_ctx_set(inode, this, stbuf, block_size, valid);
++    }
++    UNLOCK(&inode->lock);
+ 
+-  return ret;
++    return ret;
+ }
+ 
+-int __shard_inode_ctx_set_refresh_flag(inode_t *inode, xlator_t *this) {
+-  int ret = -1;
+-  shard_inode_ctx_t *ctx = NULL;
++int
++__shard_inode_ctx_set_refresh_flag(inode_t *inode, xlator_t *this)
++{
++    int ret = -1;
++    shard_inode_ctx_t *ctx = NULL;
+ 
+-  ret = __shard_inode_ctx_get(inode, this, &ctx);
+-  if (ret)
+-    return ret;
++    ret = __shard_inode_ctx_get(inode, this, &ctx);
++    if (ret)
++        return ret;
+ 
+-  ctx->refresh = _gf_true;
++    ctx->refresh = _gf_true;
+ 
+-  return 0;
++    return 0;
+ }
+-int shard_inode_ctx_set_refresh_flag(inode_t *inode, xlator_t *this) {
+-  int ret = -1;
++int
++shard_inode_ctx_set_refresh_flag(inode_t *inode, xlator_t *this)
++{
++    int ret = -1;
+ 
+-  LOCK(&inode->lock);
+-  { ret = __shard_inode_ctx_set_refresh_flag(inode, this); }
+-  UNLOCK(&inode->lock);
++    LOCK(&inode->lock);
++    {
++        ret = __shard_inode_ctx_set_refresh_flag(inode, this);
++    }
++    UNLOCK(&inode->lock);
+ 
+-  return ret;
++    return ret;
+ }
+ 
+-int __shard_inode_ctx_mark_dir_refreshed(inode_t *inode, xlator_t *this) {
+-  int ret = -1;
+-  shard_inode_ctx_t *ctx = NULL;
++int
++__shard_inode_ctx_mark_dir_refreshed(inode_t *inode, xlator_t *this)
++{
++    int ret = -1;
++    shard_inode_ctx_t *ctx = NULL;
+ 
+-  ret = __shard_inode_ctx_get(inode, this, &ctx);
+-  if (ret)
+-    return ret;
++    ret = __shard_inode_ctx_get(inode, this, &ctx);
++    if (ret)
++        return ret;
+ 
+-  ctx->refreshed = _gf_true;
+-  return 0;
++    ctx->refreshed = _gf_true;
++    return 0;
+ }
+ 
+-int shard_inode_ctx_mark_dir_refreshed(inode_t *inode, xlator_t *this) {
+-  int ret = -1;
++int
++shard_inode_ctx_mark_dir_refreshed(inode_t *inode, xlator_t *this)
++{
++    int ret = -1;
+ 
+-  LOCK(&inode->lock);
+-  { ret = __shard_inode_ctx_mark_dir_refreshed(inode, this); }
+-  UNLOCK(&inode->lock);
++    LOCK(&inode->lock);
++    {
++        ret = __shard_inode_ctx_mark_dir_refreshed(inode, this);
++    }
++    UNLOCK(&inode->lock);
+ 
+-  return ret;
++    return ret;
+ }
+ 
+-int __shard_inode_ctx_add_to_fsync_list(inode_t *base_inode, xlator_t *this,
+-                                        inode_t *shard_inode) {
+-  int ret = -1;
+-  shard_inode_ctx_t *base_ictx = NULL;
+-  shard_inode_ctx_t *shard_ictx = NULL;
++int
++__shard_inode_ctx_add_to_fsync_list(inode_t *base_inode, xlator_t *this,
++                                    inode_t *shard_inode)
++{
++    int ret = -1;
++    shard_inode_ctx_t *base_ictx = NULL;
++    shard_inode_ctx_t *shard_ictx = NULL;
+ 
+-  ret = __shard_inode_ctx_get(base_inode, this, &base_ictx);
+-  if (ret)
+-    return ret;
++    ret = __shard_inode_ctx_get(base_inode, this, &base_ictx);
++    if (ret)
++        return ret;
+ 
+-  ret = __shard_inode_ctx_get(shard_inode, this, &shard_ictx);
+-  if (ret)
+-    return ret;
++    ret = __shard_inode_ctx_get(shard_inode, this, &shard_ictx);
++    if (ret)
++        return ret;
+ 
+-  if (shard_ictx->fsync_needed) {
+-    shard_ictx->fsync_needed++;
+-    return 1;
+-  }
++    if (shard_ictx->fsync_needed) {
++        shard_ictx->fsync_needed++;
++        return 1;
++    }
+ 
+-  list_add_tail(&shard_ictx->to_fsync_list, &base_ictx->to_fsync_list);
+-  shard_ictx->inode = shard_inode;
+-  shard_ictx->fsync_needed++;
+-  base_ictx->fsync_count++;
+-  shard_ictx->base_inode = base_inode;
++    list_add_tail(&shard_ictx->to_fsync_list, &base_ictx->to_fsync_list);
++    shard_ictx->inode = shard_inode;
++    shard_ictx->fsync_needed++;
++    base_ictx->fsync_count++;
++    shard_ictx->base_inode = base_inode;
+ 
+-  return 0;
++    return 0;
+ }
+ 
+-int shard_inode_ctx_add_to_fsync_list(inode_t *base_inode, xlator_t *this,
+-                                      inode_t *shard_inode) {
+-  int ret = -1;
++int
++shard_inode_ctx_add_to_fsync_list(inode_t *base_inode, xlator_t *this,
++                                  inode_t *shard_inode)
++{
++    int ret = -1;
+ 
+-  /* This ref acts as a refkeepr on the base inode. We
+-   * need to keep this inode alive as it holds the head
+-   * of the to_fsync_list.
+-   */
+-  inode_ref(base_inode);
+-  inode_ref(shard_inode);
++    /* This ref acts as a refkeepr on the base inode. We
++     * need to keep this inode alive as it holds the head
++     * of the to_fsync_list.
++     */
++    inode_ref(base_inode);
++    inode_ref(shard_inode);
+ 
+-  LOCK(&base_inode->lock);
+-  LOCK(&shard_inode->lock);
+-  { ret = __shard_inode_ctx_add_to_fsync_list(base_inode, this, shard_inode); }
+-  UNLOCK(&shard_inode->lock);
+-  UNLOCK(&base_inode->lock);
++    LOCK(&base_inode->lock);
++    LOCK(&shard_inode->lock);
++    {
++        ret = __shard_inode_ctx_add_to_fsync_list(base_inode, this,
++                                                  shard_inode);
++    }
++    UNLOCK(&shard_inode->lock);
++    UNLOCK(&base_inode->lock);
+ 
+-  /* Unref the base inode corresponding to the ref above, if the shard is
+-   * found to be already part of the fsync list.
+-   */
+-  if (ret != 0) {
+-    inode_unref(base_inode);
+-    inode_unref(shard_inode);
+-  }
+-  return ret;
++    /* Unref the base inode corresponding to the ref above, if the shard is
++     * found to be already part of the fsync list.
++     */
++    if (ret != 0) {
++        inode_unref(base_inode);
++        inode_unref(shard_inode);
++    }
++    return ret;
+ }
+ 
+-gf_boolean_t __shard_inode_ctx_needs_lookup(inode_t *inode, xlator_t *this) {
+-  int ret = -1;
+-  shard_inode_ctx_t *ctx = NULL;
++gf_boolean_t
++__shard_inode_ctx_needs_lookup(inode_t *inode, xlator_t *this)
++{
++    int ret = -1;
++    shard_inode_ctx_t *ctx = NULL;
+ 
+-  ret = __shard_inode_ctx_get(inode, this, &ctx);
+-  /* If inode ctx get fails, better to err on the side of caution and
+-   * try again? Unless the failure is due to mem-allocation.
+-   */
+-  if (ret)
+-    return _gf_true;
++    ret = __shard_inode_ctx_get(inode, this, &ctx);
++    /* If inode ctx get fails, better to err on the side of caution and
++     * try again? Unless the failure is due to mem-allocation.
++     */
++    if (ret)
++        return _gf_true;
+ 
+-  return !ctx->refreshed;
++    return !ctx->refreshed;
+ }
+ 
+-gf_boolean_t shard_inode_ctx_needs_lookup(inode_t *inode, xlator_t *this) {
+-  gf_boolean_t flag = _gf_false;
++gf_boolean_t
++shard_inode_ctx_needs_lookup(inode_t *inode, xlator_t *this)
++{
++    gf_boolean_t flag = _gf_false;
+ 
+-  LOCK(&inode->lock);
+-  { flag = __shard_inode_ctx_needs_lookup(inode, this); }
+-  UNLOCK(&inode->lock);
++    LOCK(&inode->lock);
++    {
++        flag = __shard_inode_ctx_needs_lookup(inode, this);
++    }
++    UNLOCK(&inode->lock);
+ 
+-  return flag;
++    return flag;
+ }
+-int __shard_inode_ctx_invalidate(inode_t *inode, xlator_t *this,
+-                                 struct iatt *stbuf) {
+-  int ret = -1;
+-  shard_inode_ctx_t *ctx = NULL;
++int
++__shard_inode_ctx_invalidate(inode_t *inode, xlator_t *this, struct iatt *stbuf)
++{
++    int ret = -1;
++    shard_inode_ctx_t *ctx = NULL;
+ 
+-  ret = __shard_inode_ctx_get(inode, this, &ctx);
+-  if (ret)
+-    return ret;
++    ret = __shard_inode_ctx_get(inode, this, &ctx);
++    if (ret)
++        return ret;
+ 
+-  if ((stbuf->ia_size != ctx->stat.ia_size) ||
+-      (stbuf->ia_blocks != ctx->stat.ia_blocks))
+-    ctx->refresh = _gf_true;
++    if ((stbuf->ia_size != ctx->stat.ia_size) ||
++        (stbuf->ia_blocks != ctx->stat.ia_blocks))
++        ctx->refresh = _gf_true;
+ 
+-  return 0;
++    return 0;
+ }
+ 
+-int shard_inode_ctx_invalidate(inode_t *inode, xlator_t *this,
+-                               struct iatt *stbuf) {
+-  int ret = -1;
++int
++shard_inode_ctx_invalidate(inode_t *inode, xlator_t *this, struct iatt *stbuf)
++{
++    int ret = -1;
+ 
+-  LOCK(&inode->lock);
+-  { ret = __shard_inode_ctx_invalidate(inode, this, stbuf); }
+-  UNLOCK(&inode->lock);
++    LOCK(&inode->lock);
++    {
++        ret = __shard_inode_ctx_invalidate(inode, this, stbuf);
++    }
++    UNLOCK(&inode->lock);
+ 
+-  return ret;
++    return ret;
+ }
+ 
+-int __shard_inode_ctx_get_block_size(inode_t *inode, xlator_t *this,
+-                                     uint64_t *block_size) {
+-  int ret = -1;
+-  uint64_t ctx_uint = 0;
+-  shard_inode_ctx_t *ctx = NULL;
++int
++__shard_inode_ctx_get_block_size(inode_t *inode, xlator_t *this,
++                                 uint64_t *block_size)
++{
++    int ret = -1;
++    uint64_t ctx_uint = 0;
++    shard_inode_ctx_t *ctx = NULL;
+ 
+-  ret = __inode_ctx_get(inode, this, &ctx_uint);
+-  if (ret < 0)
+-    return ret;
++    ret = __inode_ctx_get(inode, this, &ctx_uint);
++    if (ret < 0)
++        return ret;
+ 
+-  ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
++    ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
+ 
+-  *block_size = ctx->block_size;
++    *block_size = ctx->block_size;
+ 
+-  return 0;
++    return 0;
+ }
+ 
+-int shard_inode_ctx_get_block_size(inode_t *inode, xlator_t *this,
+-                                   uint64_t *block_size) {
+-  int ret = -1;
++int
++shard_inode_ctx_get_block_size(inode_t *inode, xlator_t *this,
++                               uint64_t *block_size)
++{
++    int ret = -1;
+ 
+-  LOCK(&inode->lock);
+-  { ret = __shard_inode_ctx_get_block_size(inode, this, block_size); }
+-  UNLOCK(&inode->lock);
++    LOCK(&inode->lock);
++    {
++        ret = __shard_inode_ctx_get_block_size(inode, this, block_size);
++    }
++    UNLOCK(&inode->lock);
+ 
+-  return ret;
++    return ret;
+ }
+ 
+-int __shard_inode_ctx_get_fsync_count(inode_t *inode, xlator_t *this,
+-                                      int *fsync_count) {
+-  int ret = -1;
+-  uint64_t ctx_uint = 0;
+-  shard_inode_ctx_t *ctx = NULL;
++int
++__shard_inode_ctx_get_fsync_count(inode_t *inode, xlator_t *this,
++                                  int *fsync_count)
++{
++    int ret = -1;
++    uint64_t ctx_uint = 0;
++    shard_inode_ctx_t *ctx = NULL;
+ 
+-  ret = __inode_ctx_get(inode, this, &ctx_uint);
+-  if (ret < 0)
+-    return ret;
++    ret = __inode_ctx_get(inode, this, &ctx_uint);
++    if (ret < 0)
++        return ret;
+ 
+-  ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
++    ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
+ 
+-  *fsync_count = ctx->fsync_needed;
++    *fsync_count = ctx->fsync_needed;
+ 
+-  return 0;
++    return 0;
+ }
+ 
+-int shard_inode_ctx_get_fsync_count(inode_t *inode, xlator_t *this,
+-                                    int *fsync_count) {
+-  int ret = -1;
++int
++shard_inode_ctx_get_fsync_count(inode_t *inode, xlator_t *this,
++                                int *fsync_count)
++{
++    int ret = -1;
+ 
+-  LOCK(&inode->lock);
+-  { ret = __shard_inode_ctx_get_fsync_count(inode, this, fsync_count); }
+-  UNLOCK(&inode->lock);
++    LOCK(&inode->lock);
++    {
++        ret = __shard_inode_ctx_get_fsync_count(inode, this, fsync_count);
++    }
++    UNLOCK(&inode->lock);
+ 
+-  return ret;
++    return ret;
+ }
+-int __shard_inode_ctx_get_all(inode_t *inode, xlator_t *this,
+-                              shard_inode_ctx_t *ctx_out) {
+-  int ret = -1;
+-  uint64_t ctx_uint = 0;
+-  shard_inode_ctx_t *ctx = NULL;
++int
++__shard_inode_ctx_get_all(inode_t *inode, xlator_t *this,
++                          shard_inode_ctx_t *ctx_out)
++{
++    int ret = -1;
++    uint64_t ctx_uint = 0;
++    shard_inode_ctx_t *ctx = NULL;
+ 
+-  ret = __inode_ctx_get(inode, this, &ctx_uint);
+-  if (ret < 0)
+-    return ret;
++    ret = __inode_ctx_get(inode, this, &ctx_uint);
++    if (ret < 0)
++        return ret;
+ 
+-  ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
++    ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
+ 
+-  memcpy(ctx_out, ctx, sizeof(shard_inode_ctx_t));
+-  return 0;
++    memcpy(ctx_out, ctx, sizeof(shard_inode_ctx_t));
++    return 0;
+ }
+ 
+-int shard_inode_ctx_get_all(inode_t *inode, xlator_t *this,
+-                            shard_inode_ctx_t *ctx_out) {
+-  int ret = -1;
++int
++shard_inode_ctx_get_all(inode_t *inode, xlator_t *this,
++                        shard_inode_ctx_t *ctx_out)
++{
++    int ret = -1;
+ 
+-  LOCK(&inode->lock);
+-  { ret = __shard_inode_ctx_get_all(inode, this, ctx_out); }
+-  UNLOCK(&inode->lock);
++    LOCK(&inode->lock);
++    {
++        ret = __shard_inode_ctx_get_all(inode, this, ctx_out);
++    }
++    UNLOCK(&inode->lock);
+ 
+-  return ret;
++    return ret;
+ }
+ 
+-int __shard_inode_ctx_fill_iatt_from_cache(inode_t *inode, xlator_t *this,
+-                                           struct iatt *buf,
+-                                           gf_boolean_t *need_refresh) {
+-  int ret = -1;
+-  uint64_t ctx_uint = 0;
+-  shard_inode_ctx_t *ctx = NULL;
++int
++__shard_inode_ctx_fill_iatt_from_cache(inode_t *inode, xlator_t *this,
++                                       struct iatt *buf,
++                                       gf_boolean_t *need_refresh)
++{
++    int ret = -1;
++    uint64_t ctx_uint = 0;
++    shard_inode_ctx_t *ctx = NULL;
+ 
+-  ret = __inode_ctx_get(inode, this, &ctx_uint);
+-  if (ret < 0)
+-    return ret;
++    ret = __inode_ctx_get(inode, this, &ctx_uint);
++    if (ret < 0)
++        return ret;
+ 
+-  ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
++    ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
+ 
+-  if (ctx->refresh == _gf_false)
+-    *buf = ctx->stat;
+-  else
+-    *need_refresh = _gf_true;
++    if (ctx->refresh == _gf_false)
++        *buf = ctx->stat;
++    else
++        *need_refresh = _gf_true;
+ 
+-  return 0;
++    return 0;
+ }
+ 
+-int shard_inode_ctx_fill_iatt_from_cache(inode_t *inode, xlator_t *this,
+-                                         struct iatt *buf,
+-                                         gf_boolean_t *need_refresh) {
+-  int ret = -1;
++int
++shard_inode_ctx_fill_iatt_from_cache(inode_t *inode, xlator_t *this,
++                                     struct iatt *buf,
++                                     gf_boolean_t *need_refresh)
++{
++    int ret = -1;
+ 
+-  LOCK(&inode->lock);
+-  {
+-    ret =
+-        __shard_inode_ctx_fill_iatt_from_cache(inode, this, buf, need_refresh);
+-  }
+-  UNLOCK(&inode->lock);
++    LOCK(&inode->lock);
++    {
++        ret = __shard_inode_ctx_fill_iatt_from_cache(inode, this, buf,
++                                                     need_refresh);
++    }
++    UNLOCK(&inode->lock);
+ 
+-  return ret;
++    return ret;
+ }
+ 
+-void shard_local_wipe(shard_local_t *local) {
+-  int i = 0;
+-  int count = 0;
++void
++shard_local_wipe(shard_local_t *local)
++{
++    int i = 0;
++    int count = 0;
++
++    count = local->num_blocks;
++
++    syncbarrier_destroy(&local->barrier);
++    loc_wipe(&local->loc);
++    loc_wipe(&local->dot_shard_loc);
++    loc_wipe(&local->dot_shard_rm_loc);
++    loc_wipe(&local->loc2);
++    loc_wipe(&local->tmp_loc);
++    loc_wipe(&local->int_inodelk.loc);
++    loc_wipe(&local->int_entrylk.loc);
++    loc_wipe(&local->newloc);
++
++    if (local->name)
++        GF_FREE(local->name);
++
++    if (local->int_entrylk.basename)
++        GF_FREE(local->int_entrylk.basename);
++    if (local->fd)
++        fd_unref(local->fd);
+ 
+-  count = local->num_blocks;
++    if (local->xattr_req)
++        dict_unref(local->xattr_req);
++    if (local->xattr_rsp)
++        dict_unref(local->xattr_rsp);
+ 
+-  syncbarrier_destroy(&local->barrier);
+-  loc_wipe(&local->loc);
+-  loc_wipe(&local->dot_shard_loc);
+-  loc_wipe(&local->dot_shard_rm_loc);
+-  loc_wipe(&local->loc2);
+-  loc_wipe(&local->tmp_loc);
+-  loc_wipe(&local->int_inodelk.loc);
+-  loc_wipe(&local->int_entrylk.loc);
+-  loc_wipe(&local->newloc);
++    for (i = 0; i < count; i++) {
++        if (!local->inode_list)
++            break;
+ 
+-  if (local->name)
+-    GF_FREE(local->name);
++        if (local->inode_list[i])
++            inode_unref(local->inode_list[i]);
++    }
+ 
+-  if (local->int_entrylk.basename)
+-    GF_FREE(local->int_entrylk.basename);
+-  if (local->fd)
+-    fd_unref(local->fd);
++    GF_FREE(local->inode_list);
+ 
+-  if (local->xattr_req)
+-    dict_unref(local->xattr_req);
+-  if (local->xattr_rsp)
+-    dict_unref(local->xattr_rsp);
++    GF_FREE(local->vector);
++    if (local->iobref)
++        iobref_unref(local->iobref);
++    if (local->list_inited)
++        gf_dirent_free(&local->entries_head);
++    if (local->inodelk_frame)
++        SHARD_STACK_DESTROY(local->inodelk_frame);
++    if (local->entrylk_frame)
++        SHARD_STACK_DESTROY(local->entrylk_frame);
++}
+ 
+-  for (i = 0; i < count; i++) {
+-    if (!local->inode_list)
+-      break;
+-
+-    if (local->inode_list[i])
+-      inode_unref(local->inode_list[i]);
+-  }
+-
+-  GF_FREE(local->inode_list);
+-
+-  GF_FREE(local->vector);
+-  if (local->iobref)
+-    iobref_unref(local->iobref);
+-  if (local->list_inited)
+-    gf_dirent_free(&local->entries_head);
+-  if (local->inodelk_frame)
+-    SHARD_STACK_DESTROY(local->inodelk_frame);
+-  if (local->entrylk_frame)
+-    SHARD_STACK_DESTROY(local->entrylk_frame);
+-}
+-
+-int shard_modify_size_and_block_count(struct iatt *stbuf, dict_t *dict) {
+-  int ret = -1;
+-  void *size_attr = NULL;
+-  uint64_t size_array[4];
+-
+-  ret = dict_get_ptr(dict, GF_XATTR_SHARD_FILE_SIZE, &size_attr);
+-  if (ret) {
+-    gf_msg_callingfn(THIS->name, GF_LOG_ERROR, 0,
+-                     SHARD_MSG_INTERNAL_XATTR_MISSING,
+-                     "Failed to "
+-                     "get " GF_XATTR_SHARD_FILE_SIZE " for %s",
+-                     uuid_utoa(stbuf->ia_gfid));
+-    return ret;
+-  }
++int
++shard_modify_size_and_block_count(struct iatt *stbuf, dict_t *dict)
++{
++    int ret = -1;
++    void *size_attr = NULL;
++    uint64_t size_array[4];
++
++    ret = dict_get_ptr(dict, GF_XATTR_SHARD_FILE_SIZE, &size_attr);
++    if (ret) {
++        gf_msg_callingfn(THIS->name, GF_LOG_ERROR, 0,
++                         SHARD_MSG_INTERNAL_XATTR_MISSING,
++                         "Failed to "
++                         "get " GF_XATTR_SHARD_FILE_SIZE " for %s",
++                         uuid_utoa(stbuf->ia_gfid));
++        return ret;
++    }
+ 
+-  memcpy(size_array, size_attr, sizeof(size_array));
++    memcpy(size_array, size_attr, sizeof(size_array));
+ 
+-  stbuf->ia_size = ntoh64(size_array[0]);
+-  stbuf->ia_blocks = ntoh64(size_array[2]);
++    stbuf->ia_size = ntoh64(size_array[0]);
++    stbuf->ia_blocks = ntoh64(size_array[2]);
+ 
+-  return 0;
++    return 0;
+ }
+ 
+-int shard_call_count_return(call_frame_t *frame) {
+-  int call_count = 0;
+-  shard_local_t *local = NULL;
++int
++shard_call_count_return(call_frame_t *frame)
++{
++    int call_count = 0;
++    shard_local_t *local = NULL;
+ 
+-  local = frame->local;
++    local = frame->local;
+ 
+-  LOCK(&frame->lock);
+-  { call_count = --local->call_count; }
+-  UNLOCK(&frame->lock);
++    LOCK(&frame->lock);
++    {
++        call_count = --local->call_count;
++    }
++    UNLOCK(&frame->lock);
+ 
+-  return call_count;
++    return call_count;
+ }
+ 
+-static char *shard_internal_dir_string(shard_internal_dir_type_t type) {
+-  char *str = NULL;
+-
+-  switch (type) {
+-  case SHARD_INTERNAL_DIR_DOT_SHARD:
+-    str = GF_SHARD_DIR;
+-    break;
+-  case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME:
+-    str = GF_SHARD_REMOVE_ME_DIR;
+-    break;
+-  default:
+-    break;
+-  }
+-  return str;
++static char *
++shard_internal_dir_string(shard_internal_dir_type_t type)
++{
++    char *str = NULL;
++
++    switch (type) {
++        case SHARD_INTERNAL_DIR_DOT_SHARD:
++            str = GF_SHARD_DIR;
++            break;
++        case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME:
++            str = GF_SHARD_REMOVE_ME_DIR;
++            break;
++        default:
++            break;
++    }
++    return str;
+ }
+ 
+-static int shard_init_internal_dir_loc(xlator_t *this, shard_local_t *local,
+-                                       shard_internal_dir_type_t type) {
+-  int ret = -1;
+-  char *bname = NULL;
+-  inode_t *parent = NULL;
+-  loc_t *internal_dir_loc = NULL;
+-  shard_priv_t *priv = NULL;
++static int
++shard_init_internal_dir_loc(xlator_t *this, shard_local_t *local,
++                            shard_internal_dir_type_t type)
++{
++    int ret = -1;
++    char *bname = NULL;
++    inode_t *parent = NULL;
++    loc_t *internal_dir_loc = NULL;
++    shard_priv_t *priv = NULL;
+ 
+-  priv = this->private;
+-  if (!local)
+-    return -1;
++    priv = this->private;
++    if (!local)
++        return -1;
++
++    switch (type) {
++        case SHARD_INTERNAL_DIR_DOT_SHARD:
++            internal_dir_loc = &local->dot_shard_loc;
++            bname = GF_SHARD_DIR;
++            parent = inode_ref(this->itable->root);
++            break;
++        case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME:
++            internal_dir_loc = &local->dot_shard_rm_loc;
++            bname = GF_SHARD_REMOVE_ME_DIR;
++            parent = inode_ref(priv->dot_shard_inode);
++            break;
++        default:
++            break;
++    }
+ 
+-  switch (type) {
+-  case SHARD_INTERNAL_DIR_DOT_SHARD:
+-    internal_dir_loc = &local->dot_shard_loc;
+-    bname = GF_SHARD_DIR;
+-    parent = inode_ref(this->itable->root);
+-    break;
+-  case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME:
+-    internal_dir_loc = &local->dot_shard_rm_loc;
+-    bname = GF_SHARD_REMOVE_ME_DIR;
+-    parent = inode_ref(priv->dot_shard_inode);
+-    break;
+-  default:
+-    break;
+-  }
+-
+-  internal_dir_loc->inode = inode_new(this->itable);
+-  internal_dir_loc->parent = parent;
+-  ret = inode_path(internal_dir_loc->parent, bname,
+-                   (char **)&internal_dir_loc->path);
+-  if (ret < 0 || !(internal_dir_loc->inode)) {
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+-           "Inode path failed on %s", bname);
+-    goto out;
+-  }
+-
+-  internal_dir_loc->name = strrchr(internal_dir_loc->path, '/');
+-  if (internal_dir_loc->name)
+-    internal_dir_loc->name++;
+-
+-  ret = 0;
+-out:
+-  return ret;
+-}
+-
+-inode_t *__shard_update_shards_inode_list(inode_t *linked_inode, xlator_t *this,
+-                                          inode_t *base_inode, int block_num,
+-                                          uuid_t gfid) {
+-  char block_bname[256] = {
+-      0,
+-  };
+-  inode_t *lru_inode = NULL;
+-  shard_priv_t *priv = NULL;
+-  shard_inode_ctx_t *ctx = NULL;
+-  shard_inode_ctx_t *lru_inode_ctx = NULL;
+-  shard_inode_ctx_t *lru_base_inode_ctx = NULL;
+-  inode_t *fsync_inode = NULL;
+-  inode_t *lru_base_inode = NULL;
+-  gf_boolean_t do_fsync = _gf_false;
+-
+-  priv = this->private;
+-
+-  shard_inode_ctx_get(linked_inode, this, &ctx);
+-
+-  if (list_empty(&ctx->ilist)) {
+-    if (priv->inode_count + 1 <= priv->lru_limit) {
+-      /* If this inode was linked here for the first time (indicated
+-       * by empty list), and if there is still space in the priv list,
+-       * add this ctx to the tail of the list.
+-       */
+-      /* For as long as an inode is in lru list, we try to
+-       * keep it alive by holding a ref on it.
+-       */
+-      inode_ref(linked_inode);
+-      if (base_inode)
+-        gf_uuid_copy(ctx->base_gfid, base_inode->gfid);
+-      else
+-        gf_uuid_copy(ctx->base_gfid, gfid);
+-      ctx->block_num = block_num;
+-      list_add_tail(&ctx->ilist, &priv->ilist_head);
+-      priv->inode_count++;
+-      ctx->base_inode = inode_ref(base_inode);
+-    } else {
+-      /*If on the other hand there is no available slot for this inode
+-       * in the list, delete the lru inode from the head of the list,
+-       * unlink it. And in its place add this new inode into the list.
+-       */
+-      lru_inode_ctx =
+-          list_first_entry(&priv->ilist_head, shard_inode_ctx_t, ilist);
+-      GF_ASSERT(lru_inode_ctx->block_num > 0);
+-      lru_base_inode = lru_inode_ctx->base_inode;
+-      list_del_init(&lru_inode_ctx->ilist);
+-      lru_inode = inode_find(linked_inode->table, lru_inode_ctx->stat.ia_gfid);
+-      /* If the lru inode was part of the pending-fsync list,
+-       * the base inode needs to be unref'd, the lru inode
+-       * deleted from fsync list and fsync'd in a new frame,
+-       * and then unlinked in memory and forgotten.
+-       */
+-      if (!lru_base_inode)
+-        goto after_fsync_check;
+-      LOCK(&lru_base_inode->lock);
+-      LOCK(&lru_inode->lock);
+-      {
+-        if (!list_empty(&lru_inode_ctx->to_fsync_list)) {
+-          list_del_init(&lru_inode_ctx->to_fsync_list);
+-          lru_inode_ctx->fsync_needed = 0;
+-          do_fsync = _gf_true;
+-          __shard_inode_ctx_get(lru_base_inode, this, &lru_base_inode_ctx);
+-          lru_base_inode_ctx->fsync_count--;
+-        }
+-      }
+-      UNLOCK(&lru_inode->lock);
+-      UNLOCK(&lru_base_inode->lock);
+-
+-    after_fsync_check:
+-      if (!do_fsync) {
+-        shard_make_block_bname(lru_inode_ctx->block_num,
+-                               lru_inode_ctx->base_gfid, block_bname,
+-                               sizeof(block_bname));
+-        /* The following unref corresponds to the ref held at
+-         * the time the shard was added to the lru list.
+-         */
+-        inode_unref(lru_inode);
+-        inode_unlink(lru_inode, priv->dot_shard_inode, block_bname);
+-        inode_forget(lru_inode, 0);
+-      } else {
+-        /* The following unref corresponds to the ref
+-         * held when the shard was added to fsync list.
+-         */
+-        inode_unref(lru_inode);
+-        fsync_inode = lru_inode;
+-        if (lru_base_inode)
+-          inode_unref(lru_base_inode);
+-      }
+-      /* The following unref corresponds to the ref
+-       * held by inode_find() above.
+-       */
+-      inode_unref(lru_inode);
+-
+-      /* The following unref corresponds to the ref held on the base shard
+-       * at the time of adding shard inode to lru list
+-       */
+-      if (lru_base_inode)
+-        inode_unref(lru_base_inode);
+-
+-      /* For as long as an inode is in lru list, we try to
+-       * keep it alive by holding a ref on it.
+-       */
+-      inode_ref(linked_inode);
+-      if (base_inode)
+-        gf_uuid_copy(ctx->base_gfid, base_inode->gfid);
+-      else
+-        gf_uuid_copy(ctx->base_gfid, gfid);
+-      ctx->block_num = block_num;
+-      ctx->base_inode = inode_ref(base_inode);
+-      list_add_tail(&ctx->ilist, &priv->ilist_head);
+-    }
+-  } else {
+-    /* If this is not the first time this inode is being operated on, move
+-     * it to the most recently used end of the list.
+-     */
+-    list_move_tail(&ctx->ilist, &priv->ilist_head);
+-  }
+-  return fsync_inode;
+-}
+-
+-int shard_common_failure_unwind(glusterfs_fop_t fop, call_frame_t *frame,
+-                                int32_t op_ret, int32_t op_errno) {
+-  switch (fop) {
+-  case GF_FOP_LOOKUP:
+-    SHARD_STACK_UNWIND(lookup, frame, op_ret, op_errno, NULL, NULL, NULL, NULL);
+-    break;
+-  case GF_FOP_STAT:
+-    SHARD_STACK_UNWIND(stat, frame, op_ret, op_errno, NULL, NULL);
+-    break;
+-  case GF_FOP_FSTAT:
+-    SHARD_STACK_UNWIND(fstat, frame, op_ret, op_errno, NULL, NULL);
+-    break;
+-  case GF_FOP_TRUNCATE:
+-    SHARD_STACK_UNWIND(truncate, frame, op_ret, op_errno, NULL, NULL, NULL);
+-    break;
+-  case GF_FOP_FTRUNCATE:
+-    SHARD_STACK_UNWIND(ftruncate, frame, op_ret, op_errno, NULL, NULL, NULL);
+-    break;
+-  case GF_FOP_MKNOD:
+-    SHARD_STACK_UNWIND(mknod, frame, op_ret, op_errno, NULL, NULL, NULL, NULL,
+-                       NULL);
+-    break;
+-  case GF_FOP_LINK:
+-    SHARD_STACK_UNWIND(link, frame, op_ret, op_errno, NULL, NULL, NULL, NULL,
+-                       NULL);
+-    break;
+-  case GF_FOP_CREATE:
+-    SHARD_STACK_UNWIND(create, frame, op_ret, op_errno, NULL, NULL, NULL, NULL,
+-                       NULL, NULL);
+-    break;
+-  case GF_FOP_UNLINK:
+-    SHARD_STACK_UNWIND(unlink, frame, op_ret, op_errno, NULL, NULL, NULL);
+-    break;
+-  case GF_FOP_RENAME:
+-    SHARD_STACK_UNWIND(rename, frame, op_ret, op_errno, NULL, NULL, NULL, NULL,
+-                       NULL, NULL);
+-    break;
+-  case GF_FOP_WRITE:
+-    SHARD_STACK_UNWIND(writev, frame, op_ret, op_errno, NULL, NULL, NULL);
+-    break;
+-  case GF_FOP_FALLOCATE:
+-    SHARD_STACK_UNWIND(fallocate, frame, op_ret, op_errno, NULL, NULL, NULL);
+-    break;
+-  case GF_FOP_ZEROFILL:
+-    SHARD_STACK_UNWIND(zerofill, frame, op_ret, op_errno, NULL, NULL, NULL);
+-    break;
+-  case GF_FOP_DISCARD:
+-    SHARD_STACK_UNWIND(discard, frame, op_ret, op_errno, NULL, NULL, NULL);
+-    break;
+-  case GF_FOP_READ:
+-    SHARD_STACK_UNWIND(readv, frame, op_ret, op_errno, NULL, -1, NULL, NULL,
+-                       NULL);
+-    break;
+-  case GF_FOP_FSYNC:
+-    SHARD_STACK_UNWIND(fsync, frame, op_ret, op_errno, NULL, NULL, NULL);
+-    break;
+-  case GF_FOP_REMOVEXATTR:
+-    SHARD_STACK_UNWIND(removexattr, frame, op_ret, op_errno, NULL);
+-    break;
+-  case GF_FOP_FREMOVEXATTR:
+-    SHARD_STACK_UNWIND(fremovexattr, frame, op_ret, op_errno, NULL);
+-    break;
+-  case GF_FOP_FGETXATTR:
+-    SHARD_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, NULL, NULL);
+-    break;
+-  case GF_FOP_GETXATTR:
+-    SHARD_STACK_UNWIND(getxattr, frame, op_ret, op_errno, NULL, NULL);
+-    break;
+-  case GF_FOP_FSETXATTR:
+-    SHARD_STACK_UNWIND(fsetxattr, frame, op_ret, op_errno, NULL);
+-    break;
+-  case GF_FOP_SETXATTR:
+-    SHARD_STACK_UNWIND(setxattr, frame, op_ret, op_errno, NULL);
+-    break;
+-  case GF_FOP_SETATTR:
+-    SHARD_STACK_UNWIND(setattr, frame, op_ret, op_errno, NULL, NULL, NULL);
+-    break;
+-  case GF_FOP_FSETATTR:
+-    SHARD_STACK_UNWIND(fsetattr, frame, op_ret, op_errno, NULL, NULL, NULL);
+-    break;
+-  case GF_FOP_SEEK:
+-    SHARD_STACK_UNWIND(seek, frame, op_ret, op_errno, 0, NULL);
+-    break;
+-  default:
+-    gf_msg(THIS->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
+-           "Invalid fop id = %d", fop);
+-    break;
+-  }
+-  return 0;
+-}
+-
+-int shard_common_inode_write_success_unwind(glusterfs_fop_t fop,
+-                                            call_frame_t *frame,
+-                                            int32_t op_ret) {
+-  shard_local_t *local = NULL;
+-
+-  local = frame->local;
+-
+-  switch (fop) {
+-  case GF_FOP_WRITE:
+-    SHARD_STACK_UNWIND(writev, frame, op_ret, 0, &local->prebuf,
+-                       &local->postbuf, local->xattr_rsp);
+-    break;
+-  case GF_FOP_FALLOCATE:
+-    SHARD_STACK_UNWIND(fallocate, frame, op_ret, 0, &local->prebuf,
+-                       &local->postbuf, local->xattr_rsp);
+-    break;
+-  case GF_FOP_ZEROFILL:
+-    SHARD_STACK_UNWIND(zerofill, frame, op_ret, 0, &local->prebuf,
+-                       &local->postbuf, local->xattr_rsp);
+-    break;
+-  case GF_FOP_DISCARD:
+-    SHARD_STACK_UNWIND(discard, frame, op_ret, 0, &local->prebuf,
+-                       &local->postbuf, local->xattr_rsp);
+-    break;
+-  default:
+-    gf_msg(THIS->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
+-           "Invalid fop id = %d", fop);
+-    break;
+-  }
+-  return 0;
+-}
+-
+-int shard_evicted_inode_fsync_cbk(call_frame_t *frame, void *cookie,
+-                                  xlator_t *this, int32_t op_ret,
+-                                  int32_t op_errno, struct iatt *prebuf,
+-                                  struct iatt *postbuf, dict_t *xdata) {
+-  char block_bname[256] = {
+-      0,
+-  };
+-  fd_t *anon_fd = cookie;
+-  inode_t *shard_inode = NULL;
+-  shard_inode_ctx_t *ctx = NULL;
+-  shard_priv_t *priv = NULL;
+-
+-  priv = this->private;
+-
+-  if (anon_fd == NULL || op_ret < 0) {
+-    gf_msg(this->name, GF_LOG_WARNING, op_errno, SHARD_MSG_MEMALLOC_FAILED,
+-           "fsync failed on shard");
+-    goto out;
+-  }
+-  shard_inode = anon_fd->inode;
+-
+-  LOCK(&priv->lock);
+-  LOCK(&shard_inode->lock);
+-  {
+-    __shard_inode_ctx_get(shard_inode, this, &ctx);
+-    if ((list_empty(&ctx->to_fsync_list)) && (list_empty(&ctx->ilist))) {
+-      shard_make_block_bname(ctx->block_num, shard_inode->gfid, block_bname,
+-                             sizeof(block_bname));
+-      inode_unlink(shard_inode, priv->dot_shard_inode, block_bname);
+-      /* The following unref corresponds to the ref held by
+-       * inode_link() at the time the shard was created or
+-       * looked up
+-       */
+-      inode_unref(shard_inode);
+-      inode_forget(shard_inode, 0);
+-    }
+-  }
+-  UNLOCK(&shard_inode->lock);
+-  UNLOCK(&priv->lock);
++    internal_dir_loc->inode = inode_new(this->itable);
++    internal_dir_loc->parent = parent;
++    ret = inode_path(internal_dir_loc->parent, bname,
++                     (char **)&internal_dir_loc->path);
++    if (ret < 0 || !(internal_dir_loc->inode)) {
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
++               "Inode path failed on %s", bname);
++        goto out;
++    }
++
++    internal_dir_loc->name = strrchr(internal_dir_loc->path, '/');
++    if (internal_dir_loc->name)
++        internal_dir_loc->name++;
+ 
++    ret = 0;
+ out:
+-  if (anon_fd)
+-    fd_unref(anon_fd);
+-  STACK_DESTROY(frame->root);
+-  return 0;
++    return ret;
+ }
+ 
+-int shard_initiate_evicted_inode_fsync(xlator_t *this, inode_t *inode) {
+-  fd_t *anon_fd = NULL;
+-  call_frame_t *fsync_frame = NULL;
+-
+-  fsync_frame = create_frame(this, this->ctx->pool);
+-  if (!fsync_frame) {
+-    gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED,
+-           "Failed to create new frame "
+-           "to fsync shard");
+-    return -1;
+-  }
+-
+-  anon_fd = fd_anonymous(inode);
+-  if (!anon_fd) {
+-    gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED,
+-           "Failed to create anon fd to"
+-           " fsync shard");
+-    STACK_DESTROY(fsync_frame->root);
+-    return -1;
+-  }
+-
+-  STACK_WIND_COOKIE(fsync_frame, shard_evicted_inode_fsync_cbk, anon_fd,
+-                    FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsync, anon_fd,
+-                    1, NULL);
+-  return 0;
+-}
+-
+-int shard_common_resolve_shards(
+-    call_frame_t *frame, xlator_t *this,
+-    shard_post_resolve_fop_handler_t post_res_handler) {
+-  int i = -1;
+-  uint32_t shard_idx_iter = 0;
+-  char path[PATH_MAX] = {
+-      0,
+-  };
+-  uuid_t gfid = {
+-      0,
+-  };
+-  inode_t *inode = NULL;
+-  inode_t *res_inode = NULL;
+-  inode_t *fsync_inode = NULL;
+-  shard_priv_t *priv = NULL;
+-  shard_local_t *local = NULL;
+-
+-  priv = this->private;
+-  local = frame->local;
+-  local->call_count = 0;
+-  shard_idx_iter = local->first_block;
+-  res_inode = local->resolver_base_inode;
+-  if (res_inode)
+-    gf_uuid_copy(gfid, res_inode->gfid);
+-  else
+-    gf_uuid_copy(gfid, local->base_gfid);
+-
+-  if ((local->op_ret < 0) || (local->resolve_not))
+-    goto out;
+-
+-  while (shard_idx_iter <= local->last_block) {
+-    i++;
+-    if (shard_idx_iter == 0) {
+-      local->inode_list[i] = inode_ref(res_inode);
+-      shard_idx_iter++;
+-      continue;
+-    }
+-
+-    shard_make_block_abspath(shard_idx_iter, gfid, path, sizeof(path));
+-
+-    inode = NULL;
+-    inode = inode_resolve(this->itable, path);
+-    if (inode) {
+-      gf_msg_debug(this->name, 0, "Shard %d already "
+-                                  "present. gfid=%s. Saving inode for future.",
+-                   shard_idx_iter, uuid_utoa(inode->gfid));
+-      local->inode_list[i] = inode;
+-      /* Let the ref on the inodes that are already present
+-       * in inode table still be held so that they don't get
+-       * forgotten by the time the fop reaches the actual
+-       * write stage.
+-       */
+-      LOCK(&priv->lock);
+-      {
+-        fsync_inode = __shard_update_shards_inode_list(inode, this, res_inode,
+-                                                       shard_idx_iter, gfid);
+-      }
+-      UNLOCK(&priv->lock);
+-      shard_idx_iter++;
+-      if (fsync_inode)
+-        shard_initiate_evicted_inode_fsync(this, fsync_inode);
+-      continue;
++inode_t *
++__shard_update_shards_inode_list(inode_t *linked_inode, xlator_t *this,
++                                 inode_t *base_inode, int block_num,
++                                 uuid_t gfid)
++{
++    char block_bname[256] = {
++        0,
++    };
++    inode_t *lru_inode = NULL;
++    shard_priv_t *priv = NULL;
++    shard_inode_ctx_t *ctx = NULL;
++    shard_inode_ctx_t *lru_inode_ctx = NULL;
++    shard_inode_ctx_t *lru_base_inode_ctx = NULL;
++    inode_t *fsync_inode = NULL;
++    inode_t *lru_base_inode = NULL;
++    gf_boolean_t do_fsync = _gf_false;
++
++    priv = this->private;
++
++    shard_inode_ctx_get(linked_inode, this, &ctx);
++
++    if (list_empty(&ctx->ilist)) {
++        if (priv->inode_count + 1 <= priv->lru_limit) {
++            /* If this inode was linked here for the first time (indicated
++             * by empty list), and if there is still space in the priv list,
++             * add this ctx to the tail of the list.
++             */
++            /* For as long as an inode is in lru list, we try to
++             * keep it alive by holding a ref on it.
++             */
++            inode_ref(linked_inode);
++            if (base_inode)
++                gf_uuid_copy(ctx->base_gfid, base_inode->gfid);
++            else
++                gf_uuid_copy(ctx->base_gfid, gfid);
++            ctx->block_num = block_num;
++            list_add_tail(&ctx->ilist, &priv->ilist_head);
++            priv->inode_count++;
++            ctx->base_inode = inode_ref(base_inode);
++        } else {
++            /*If on the other hand there is no available slot for this inode
++             * in the list, delete the lru inode from the head of the list,
++             * unlink it. And in its place add this new inode into the list.
++             */
++            lru_inode_ctx = list_first_entry(&priv->ilist_head,
++                                             shard_inode_ctx_t, ilist);
++            GF_ASSERT(lru_inode_ctx->block_num > 0);
++            lru_base_inode = lru_inode_ctx->base_inode;
++            list_del_init(&lru_inode_ctx->ilist);
++            lru_inode = inode_find(linked_inode->table,
++                                   lru_inode_ctx->stat.ia_gfid);
++            /* If the lru inode was part of the pending-fsync list,
++             * the base inode needs to be unref'd, the lru inode
++             * deleted from fsync list and fsync'd in a new frame,
++             * and then unlinked in memory and forgotten.
++             */
++            if (!lru_base_inode)
++                goto after_fsync_check;
++            LOCK(&lru_base_inode->lock);
++            LOCK(&lru_inode->lock);
++            {
++                if (!list_empty(&lru_inode_ctx->to_fsync_list)) {
++                    list_del_init(&lru_inode_ctx->to_fsync_list);
++                    lru_inode_ctx->fsync_needed = 0;
++                    do_fsync = _gf_true;
++                    __shard_inode_ctx_get(lru_base_inode, this,
++                                          &lru_base_inode_ctx);
++                    lru_base_inode_ctx->fsync_count--;
++                }
++            }
++            UNLOCK(&lru_inode->lock);
++            UNLOCK(&lru_base_inode->lock);
++
++        after_fsync_check:
++            if (!do_fsync) {
++                shard_make_block_bname(lru_inode_ctx->block_num,
++                                       lru_inode_ctx->base_gfid, block_bname,
++                                       sizeof(block_bname));
++                /* The following unref corresponds to the ref held at
++                 * the time the shard was added to the lru list.
++                 */
++                inode_unref(lru_inode);
++                inode_unlink(lru_inode, priv->dot_shard_inode, block_bname);
++                inode_forget(lru_inode, 0);
++            } else {
++                /* The following unref corresponds to the ref
++                 * held when the shard was added to fsync list.
++                 */
++                inode_unref(lru_inode);
++                fsync_inode = lru_inode;
++                if (lru_base_inode)
++                    inode_unref(lru_base_inode);
++            }
++            /* The following unref corresponds to the ref
++             * held by inode_find() above.
++             */
++            inode_unref(lru_inode);
++
++            /* The following unref corresponds to the ref held on the base shard
++             * at the time of adding shard inode to lru list
++             */
++            if (lru_base_inode)
++                inode_unref(lru_base_inode);
++
++            /* For as long as an inode is in lru list, we try to
++             * keep it alive by holding a ref on it.
++             */
++            inode_ref(linked_inode);
++            if (base_inode)
++                gf_uuid_copy(ctx->base_gfid, base_inode->gfid);
++            else
++                gf_uuid_copy(ctx->base_gfid, gfid);
++            ctx->block_num = block_num;
++            ctx->base_inode = inode_ref(base_inode);
++            list_add_tail(&ctx->ilist, &priv->ilist_head);
++        }
+     } else {
+-      local->call_count++;
+-      shard_idx_iter++;
++        /* If this is not the first time this inode is being operated on, move
++         * it to the most recently used end of the list.
++         */
++        list_move_tail(&ctx->ilist, &priv->ilist_head);
+     }
+-  }
+-out:
+-  post_res_handler(frame, this);
+-  return 0;
++    return fsync_inode;
+ }
+ 
+-int shard_update_file_size_cbk(call_frame_t *frame, void *cookie,
+-                               xlator_t *this, int32_t op_ret, int32_t op_errno,
+-                               dict_t *dict, dict_t *xdata) {
+-  inode_t *inode = NULL;
+-  shard_local_t *local = NULL;
+-
+-  local = frame->local;
+-
+-  if ((local->fd) && (local->fd->inode))
+-    inode = local->fd->inode;
+-  else if (local->loc.inode)
+-    inode = local->loc.inode;
+-
+-  if (op_ret < 0) {
+-    gf_msg(this->name, GF_LOG_ERROR, op_errno,
+-           SHARD_MSG_UPDATE_FILE_SIZE_FAILED, "Update to file size"
+-                                              " xattr failed on %s",
+-           uuid_utoa(inode->gfid));
+-    local->op_ret = op_ret;
+-    local->op_errno = op_errno;
+-    goto err;
+-  }
+-
+-  if (shard_modify_size_and_block_count(&local->postbuf, dict)) {
+-    local->op_ret = -1;
+-    local->op_errno = ENOMEM;
+-    goto err;
+-  }
+-err:
+-  local->post_update_size_handler(frame, this);
+-  return 0;
++int
++shard_common_failure_unwind(glusterfs_fop_t fop, call_frame_t *frame,
++                            int32_t op_ret, int32_t op_errno)
++{
++    switch (fop) {
++        case GF_FOP_LOOKUP:
++            SHARD_STACK_UNWIND(lookup, frame, op_ret, op_errno, NULL, NULL,
++                               NULL, NULL);
++            break;
++        case GF_FOP_STAT:
++            SHARD_STACK_UNWIND(stat, frame, op_ret, op_errno, NULL, NULL);
++            break;
++        case GF_FOP_FSTAT:
++            SHARD_STACK_UNWIND(fstat, frame, op_ret, op_errno, NULL, NULL);
++            break;
++        case GF_FOP_TRUNCATE:
++            SHARD_STACK_UNWIND(truncate, frame, op_ret, op_errno, NULL, NULL,
++                               NULL);
++            break;
++        case GF_FOP_FTRUNCATE:
++            SHARD_STACK_UNWIND(ftruncate, frame, op_ret, op_errno, NULL, NULL,
++                               NULL);
++            break;
++        case GF_FOP_MKNOD:
++            SHARD_STACK_UNWIND(mknod, frame, op_ret, op_errno, NULL, NULL, NULL,
++                               NULL, NULL);
++            break;
++        case GF_FOP_LINK:
++            SHARD_STACK_UNWIND(link, frame, op_ret, op_errno, NULL, NULL, NULL,
++                               NULL, NULL);
++            break;
++        case GF_FOP_CREATE:
++            SHARD_STACK_UNWIND(create, frame, op_ret, op_errno, NULL, NULL,
++                               NULL, NULL, NULL, NULL);
++            break;
++        case GF_FOP_UNLINK:
++            SHARD_STACK_UNWIND(unlink, frame, op_ret, op_errno, NULL, NULL,
++                               NULL);
++            break;
++        case GF_FOP_RENAME:
++            SHARD_STACK_UNWIND(rename, frame, op_ret, op_errno, NULL, NULL,
++                               NULL, NULL, NULL, NULL);
++            break;
++        case GF_FOP_WRITE:
++            SHARD_STACK_UNWIND(writev, frame, op_ret, op_errno, NULL, NULL,
++                               NULL);
++            break;
++        case GF_FOP_FALLOCATE:
++            SHARD_STACK_UNWIND(fallocate, frame, op_ret, op_errno, NULL, NULL,
++                               NULL);
++            break;
++        case GF_FOP_ZEROFILL:
++            SHARD_STACK_UNWIND(zerofill, frame, op_ret, op_errno, NULL, NULL,
++                               NULL);
++            break;
++        case GF_FOP_DISCARD:
++            SHARD_STACK_UNWIND(discard, frame, op_ret, op_errno, NULL, NULL,
++                               NULL);
++            break;
++        case GF_FOP_READ:
++            SHARD_STACK_UNWIND(readv, frame, op_ret, op_errno, NULL, -1, NULL,
++                               NULL, NULL);
++            break;
++        case GF_FOP_FSYNC:
++            SHARD_STACK_UNWIND(fsync, frame, op_ret, op_errno, NULL, NULL,
++                               NULL);
++            break;
++        case GF_FOP_REMOVEXATTR:
++            SHARD_STACK_UNWIND(removexattr, frame, op_ret, op_errno, NULL);
++            break;
++        case GF_FOP_FREMOVEXATTR:
++            SHARD_STACK_UNWIND(fremovexattr, frame, op_ret, op_errno, NULL);
++            break;
++        case GF_FOP_FGETXATTR:
++            SHARD_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, NULL, NULL);
++            break;
++        case GF_FOP_GETXATTR:
++            SHARD_STACK_UNWIND(getxattr, frame, op_ret, op_errno, NULL, NULL);
++            break;
++        case GF_FOP_FSETXATTR:
++            SHARD_STACK_UNWIND(fsetxattr, frame, op_ret, op_errno, NULL);
++            break;
++        case GF_FOP_SETXATTR:
++            SHARD_STACK_UNWIND(setxattr, frame, op_ret, op_errno, NULL);
++            break;
++        case GF_FOP_SETATTR:
++            SHARD_STACK_UNWIND(setattr, frame, op_ret, op_errno, NULL, NULL,
++                               NULL);
++            break;
++        case GF_FOP_FSETATTR:
++            SHARD_STACK_UNWIND(fsetattr, frame, op_ret, op_errno, NULL, NULL,
++                               NULL);
++            break;
++        case GF_FOP_SEEK:
++            SHARD_STACK_UNWIND(seek, frame, op_ret, op_errno, 0, NULL);
++            break;
++        default:
++            gf_msg(THIS->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
++                   "Invalid fop id = %d", fop);
++            break;
++    }
++    return 0;
+ }
+ 
+-int shard_set_size_attrs(int64_t size, int64_t block_count,
+-                         int64_t **size_attr_p) {
+-  int ret = -1;
+-  int64_t *size_attr = NULL;
++int
++shard_common_inode_write_success_unwind(glusterfs_fop_t fop,
++                                        call_frame_t *frame, int32_t op_ret)
++{
++    shard_local_t *local = NULL;
+ 
+-  if (!size_attr_p)
+-    goto out;
++    local = frame->local;
+ 
+-  size_attr = GF_CALLOC(4, sizeof(int64_t), gf_shard_mt_int64_t);
+-  if (!size_attr)
+-    goto out;
++    switch (fop) {
++        case GF_FOP_WRITE:
++            SHARD_STACK_UNWIND(writev, frame, op_ret, 0, &local->prebuf,
++                               &local->postbuf, local->xattr_rsp);
++            break;
++        case GF_FOP_FALLOCATE:
++            SHARD_STACK_UNWIND(fallocate, frame, op_ret, 0, &local->prebuf,
++                               &local->postbuf, local->xattr_rsp);
++            break;
++        case GF_FOP_ZEROFILL:
++            SHARD_STACK_UNWIND(zerofill, frame, op_ret, 0, &local->prebuf,
++                               &local->postbuf, local->xattr_rsp);
++            break;
++        case GF_FOP_DISCARD:
++            SHARD_STACK_UNWIND(discard, frame, op_ret, 0, &local->prebuf,
++                               &local->postbuf, local->xattr_rsp);
++            break;
++        default:
++            gf_msg(THIS->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
++                   "Invalid fop id = %d", fop);
++            break;
++    }
++    return 0;
++}
+ 
+-  size_attr[0] = hton64(size);
+-  /* As sharding evolves, it _may_ be necessary to embed more pieces of
+-   * information within the same xattr. So allocating slots for them in
+-   * advance. For now, only bytes 0-63 and 128-191 which would make up the
+-   * current size and block count respectively of the file are valid.
+-   */
+-  size_attr[2] = hton64(block_count);
++int
++shard_evicted_inode_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                              int32_t op_ret, int32_t op_errno,
++                              struct iatt *prebuf, struct iatt *postbuf,
++                              dict_t *xdata)
++{
++    char block_bname[256] = {
++        0,
++    };
++    fd_t *anon_fd = cookie;
++    inode_t *shard_inode = NULL;
++    shard_inode_ctx_t *ctx = NULL;
++    shard_priv_t *priv = NULL;
+ 
+-  *size_attr_p = size_attr;
++    priv = this->private;
+ 
+-  ret = 0;
+-out:
+-  return ret;
+-}
++    if (anon_fd == NULL || op_ret < 0) {
++        gf_msg(this->name, GF_LOG_WARNING, op_errno, SHARD_MSG_MEMALLOC_FAILED,
++               "fsync failed on shard");
++        goto out;
++    }
++    shard_inode = anon_fd->inode;
+ 
+-int shard_update_file_size(call_frame_t *frame, xlator_t *this, fd_t *fd,
+-                           loc_t *loc,
+-                           shard_post_update_size_fop_handler_t handler) {
+-  int ret = -1;
+-  int64_t *size_attr = NULL;
+-  int64_t delta_blocks = 0;
+-  inode_t *inode = NULL;
+-  shard_local_t *local = NULL;
+-  dict_t *xattr_req = NULL;
++    LOCK(&priv->lock);
++    LOCK(&shard_inode->lock);
++    {
++        __shard_inode_ctx_get(shard_inode, this, &ctx);
++        if ((list_empty(&ctx->to_fsync_list)) && (list_empty(&ctx->ilist))) {
++            shard_make_block_bname(ctx->block_num, shard_inode->gfid,
++                                   block_bname, sizeof(block_bname));
++            inode_unlink(shard_inode, priv->dot_shard_inode, block_bname);
++            /* The following unref corresponds to the ref held by
++             * inode_link() at the time the shard was created or
++             * looked up
++             */
++            inode_unref(shard_inode);
++            inode_forget(shard_inode, 0);
++        }
++    }
++    UNLOCK(&shard_inode->lock);
++    UNLOCK(&priv->lock);
+ 
+-  local = frame->local;
+-  local->post_update_size_handler = handler;
++out:
++    if (anon_fd)
++        fd_unref(anon_fd);
++    STACK_DESTROY(frame->root);
++    return 0;
++}
+ 
+-  xattr_req = dict_new();
+-  if (!xattr_req) {
+-    local->op_ret = -1;
+-    local->op_errno = ENOMEM;
+-    goto out;
+-  }
+-
+-  if (fd)
+-    inode = fd->inode;
+-  else
+-    inode = loc->inode;
+-
+-  /* If both size and block count have not changed, then skip the xattrop.
+-   */
+-  delta_blocks = GF_ATOMIC_GET(local->delta_blocks);
+-  if ((local->delta_size + local->hole_size == 0) && (delta_blocks == 0)) {
+-    goto out;
+-  }
+-
+-  ret = shard_set_size_attrs(local->delta_size + local->hole_size, delta_blocks,
+-                             &size_attr);
+-  if (ret) {
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_SIZE_SET_FAILED,
+-           "Failed to set size attrs for %s", uuid_utoa(inode->gfid));
+-    local->op_ret = -1;
+-    local->op_errno = ENOMEM;
+-    goto out;
+-  }
+-
+-  ret = dict_set_bin(xattr_req, GF_XATTR_SHARD_FILE_SIZE, size_attr, 8 * 4);
+-  if (ret) {
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
+-           "Failed to set key %s into dict. gfid=%s", GF_XATTR_SHARD_FILE_SIZE,
+-           uuid_utoa(inode->gfid));
+-    GF_FREE(size_attr);
+-    local->op_ret = -1;
+-    local->op_errno = ENOMEM;
+-    goto out;
+-  }
++int
++shard_initiate_evicted_inode_fsync(xlator_t *this, inode_t *inode)
++{
++    fd_t *anon_fd = NULL;
++    call_frame_t *fsync_frame = NULL;
++
++    fsync_frame = create_frame(this, this->ctx->pool);
++    if (!fsync_frame) {
++        gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED,
++               "Failed to create new frame "
++               "to fsync shard");
++        return -1;
++    }
+ 
+-  if (fd)
+-    STACK_WIND(frame, shard_update_file_size_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->fxattrop, fd, GF_XATTROP_ADD_ARRAY64,
+-               xattr_req, NULL);
+-  else
+-    STACK_WIND(frame, shard_update_file_size_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->xattrop, loc, GF_XATTROP_ADD_ARRAY64,
+-               xattr_req, NULL);
++    anon_fd = fd_anonymous(inode);
++    if (!anon_fd) {
++        gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED,
++               "Failed to create anon fd to"
++               " fsync shard");
++        STACK_DESTROY(fsync_frame->root);
++        return -1;
++    }
+ 
+-  dict_unref(xattr_req);
+-  return 0;
++    STACK_WIND_COOKIE(fsync_frame, shard_evicted_inode_fsync_cbk, anon_fd,
++                      FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsync,
++                      anon_fd, 1, NULL);
++    return 0;
++}
+ 
+-out:
+-  if (xattr_req)
+-    dict_unref(xattr_req);
+-  handler(frame, this);
+-  return 0;
+-}
+-
+-static inode_t *shard_link_internal_dir_inode(shard_local_t *local,
+-                                              inode_t *inode, struct iatt *buf,
+-                                              shard_internal_dir_type_t type) {
+-  inode_t *linked_inode = NULL;
+-  shard_priv_t *priv = NULL;
+-  char *bname = NULL;
+-  inode_t **priv_inode = NULL;
+-  inode_t *parent = NULL;
+-
+-  priv = THIS->private;
+-
+-  switch (type) {
+-  case SHARD_INTERNAL_DIR_DOT_SHARD:
+-    bname = GF_SHARD_DIR;
+-    priv_inode = &priv->dot_shard_inode;
+-    parent = inode->table->root;
+-    break;
+-  case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME:
+-    bname = GF_SHARD_REMOVE_ME_DIR;
+-    priv_inode = &priv->dot_shard_rm_inode;
+-    parent = priv->dot_shard_inode;
+-    break;
+-  default:
+-    break;
+-  }
+-
+-  linked_inode = inode_link(inode, parent, bname, buf);
+-  inode_lookup(linked_inode);
+-  *priv_inode = linked_inode;
+-  return linked_inode;
+-}
+-
+-int shard_refresh_internal_dir_cbk(call_frame_t *frame, void *cookie,
+-                                   xlator_t *this, int32_t op_ret,
+-                                   int32_t op_errno, inode_t *inode,
+-                                   struct iatt *buf, dict_t *xdata,
+-                                   struct iatt *postparent) {
+-  shard_local_t *local = NULL;
+-  inode_t *linked_inode = NULL;
+-  shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie;
+-
+-  local = frame->local;
+-
+-  if (op_ret) {
+-    local->op_ret = op_ret;
+-    local->op_errno = op_errno;
+-    goto out;
+-  }
+-
+-  /* To-Do: Fix refcount increment per call to
+-   * shard_link_internal_dir_inode().
+-   */
+-  linked_inode = shard_link_internal_dir_inode(local, inode, buf, type);
+-  shard_inode_ctx_mark_dir_refreshed(linked_inode, this);
+-out:
+-  shard_common_resolve_shards(frame, this, local->post_res_handler);
+-  return 0;
+-}
+-
+-int shard_refresh_internal_dir(call_frame_t *frame, xlator_t *this,
+-                               shard_internal_dir_type_t type) {
+-  loc_t loc = {
+-      0,
+-  };
+-  inode_t *inode = NULL;
+-  shard_priv_t *priv = NULL;
+-  shard_local_t *local = NULL;
+-  uuid_t gfid = {
+-      0,
+-  };
+-
+-  local = frame->local;
+-  priv = this->private;
+-
+-  switch (type) {
+-  case SHARD_INTERNAL_DIR_DOT_SHARD:
+-    gf_uuid_copy(gfid, priv->dot_shard_gfid);
+-    break;
+-  case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME:
+-    gf_uuid_copy(gfid, priv->dot_shard_rm_gfid);
+-    break;
+-  default:
+-    break;
+-  }
+-
+-  inode = inode_find(this->itable, gfid);
+-
+-  if (!shard_inode_ctx_needs_lookup(inode, this)) {
+-    local->op_ret = 0;
+-    goto out;
+-  }
++int
++shard_common_resolve_shards(call_frame_t *frame, xlator_t *this,
++                            shard_post_resolve_fop_handler_t post_res_handler)
++{
++    int i = -1;
++    uint32_t shard_idx_iter = 0;
++    char path[PATH_MAX] = {
++        0,
++    };
++    uuid_t gfid = {
++        0,
++    };
++    inode_t *inode = NULL;
++    inode_t *res_inode = NULL;
++    inode_t *fsync_inode = NULL;
++    shard_priv_t *priv = NULL;
++    shard_local_t *local = NULL;
+ 
+-  /* Plain assignment because the ref is already taken above through
+-   * call to inode_find()
+-   */
+-  loc.inode = inode;
+-  gf_uuid_copy(loc.gfid, gfid);
++    priv = this->private;
++    local = frame->local;
++    local->call_count = 0;
++    shard_idx_iter = local->first_block;
++    res_inode = local->resolver_base_inode;
++    if (res_inode)
++        gf_uuid_copy(gfid, res_inode->gfid);
++    else
++        gf_uuid_copy(gfid, local->base_gfid);
+ 
+-  STACK_WIND_COOKIE(frame, shard_refresh_internal_dir_cbk, (void *)(long)type,
+-                    FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup, &loc,
+-                    NULL);
+-  loc_wipe(&loc);
++    if ((local->op_ret < 0) || (local->resolve_not))
++        goto out;
+ 
+-  return 0;
++    while (shard_idx_iter <= local->last_block) {
++        i++;
++        if (shard_idx_iter == 0) {
++            local->inode_list[i] = inode_ref(res_inode);
++            shard_idx_iter++;
++            continue;
++        }
+ 
++        shard_make_block_abspath(shard_idx_iter, gfid, path, sizeof(path));
++
++        inode = NULL;
++        inode = inode_resolve(this->itable, path);
++        if (inode) {
++            gf_msg_debug(this->name, 0,
++                         "Shard %d already "
++                         "present. gfid=%s. Saving inode for future.",
++                         shard_idx_iter, uuid_utoa(inode->gfid));
++            local->inode_list[i] = inode;
++            /* Let the ref on the inodes that are already present
++             * in inode table still be held so that they don't get
++             * forgotten by the time the fop reaches the actual
++             * write stage.
++             */
++            LOCK(&priv->lock);
++            {
++                fsync_inode = __shard_update_shards_inode_list(
++                    inode, this, res_inode, shard_idx_iter, gfid);
++            }
++            UNLOCK(&priv->lock);
++            shard_idx_iter++;
++            if (fsync_inode)
++                shard_initiate_evicted_inode_fsync(this, fsync_inode);
++            continue;
++        } else {
++            local->call_count++;
++            shard_idx_iter++;
++        }
++    }
+ out:
+-  shard_common_resolve_shards(frame, this, local->post_res_handler);
+-  return 0;
++    post_res_handler(frame, this);
++    return 0;
+ }
+ 
+-int shard_lookup_internal_dir_cbk(call_frame_t *frame, void *cookie,
+-                                  xlator_t *this, int32_t op_ret,
+-                                  int32_t op_errno, inode_t *inode,
+-                                  struct iatt *buf, dict_t *xdata,
+-                                  struct iatt *postparent) {
+-  inode_t *link_inode = NULL;
+-  shard_local_t *local = NULL;
+-  shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie;
++int
++shard_update_file_size_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                           int32_t op_ret, int32_t op_errno, dict_t *dict,
++                           dict_t *xdata)
++{
++    inode_t *inode = NULL;
++    shard_local_t *local = NULL;
+ 
+-  local = frame->local;
++    local = frame->local;
+ 
+-  if (op_ret) {
+-    local->op_ret = op_ret;
+-    local->op_errno = op_errno;
+-    goto unwind;
+-  }
+-
+-  if (!IA_ISDIR(buf->ia_type)) {
+-    gf_msg(this->name, GF_LOG_CRITICAL, 0, SHARD_MSG_DOT_SHARD_NODIR,
+-           "%s already exists and "
+-           "is not a directory. Please remove it from all bricks "
+-           "and try again",
+-           shard_internal_dir_string(type));
+-    local->op_ret = -1;
+-    local->op_errno = EIO;
+-    goto unwind;
+-  }
+-
+-  link_inode = shard_link_internal_dir_inode(local, inode, buf, type);
+-  if (link_inode != inode) {
+-    shard_refresh_internal_dir(frame, this, type);
+-  } else {
+-    shard_inode_ctx_mark_dir_refreshed(link_inode, this);
+-    shard_common_resolve_shards(frame, this, local->post_res_handler);
+-  }
+-  return 0;
++    if ((local->fd) && (local->fd->inode))
++        inode = local->fd->inode;
++    else if (local->loc.inode)
++        inode = local->loc.inode;
+ 
+-unwind:
+-  local->post_res_handler(frame, this);
+-  return 0;
+-}
+-
+-int shard_lookup_internal_dir(call_frame_t *frame, xlator_t *this,
+-                              shard_post_resolve_fop_handler_t post_res_handler,
+-                              shard_internal_dir_type_t type) {
+-  int ret = -1;
+-  dict_t *xattr_req = NULL;
+-  shard_priv_t *priv = NULL;
+-  shard_local_t *local = NULL;
+-  uuid_t *gfid = NULL;
+-  loc_t *loc = NULL;
+-  gf_boolean_t free_gfid = _gf_true;
+-
+-  local = frame->local;
+-  priv = this->private;
+-  local->post_res_handler = post_res_handler;
+-
+-  gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t);
+-  if (!gfid)
+-    goto err;
+-
+-  xattr_req = dict_new();
+-  if (!xattr_req) {
+-    local->op_ret = -1;
+-    local->op_errno = ENOMEM;
+-    goto err;
+-  }
+-
+-  switch (type) {
+-  case SHARD_INTERNAL_DIR_DOT_SHARD:
+-    gf_uuid_copy(*gfid, priv->dot_shard_gfid);
+-    loc = &local->dot_shard_loc;
+-    break;
+-  case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME:
+-    gf_uuid_copy(*gfid, priv->dot_shard_rm_gfid);
+-    loc = &local->dot_shard_rm_loc;
+-    break;
+-  default:
+-    bzero(*gfid, sizeof(uuid_t));
+-    break;
+-  }
+-
+-  ret = dict_set_gfuuid(xattr_req, "gfid-req", *gfid, false);
+-  if (ret) {
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
+-           "Failed to set gfid of %s into dict",
+-           shard_internal_dir_string(type));
+-    local->op_ret = -1;
+-    local->op_errno = ENOMEM;
+-    goto err;
+-  } else {
+-    free_gfid = _gf_false;
+-  }
+-
+-  STACK_WIND_COOKIE(frame, shard_lookup_internal_dir_cbk, (void *)(long)type,
+-                    FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup, loc,
+-                    xattr_req);
+-
+-  dict_unref(xattr_req);
+-  return 0;
+-
+-err:
+-  if (xattr_req)
+-    dict_unref(xattr_req);
+-  if (free_gfid)
+-    GF_FREE(gfid);
+-  post_res_handler(frame, this);
+-  return 0;
+-}
+-
+-static void shard_inode_ctx_update(inode_t *inode, xlator_t *this,
+-                                   dict_t *xdata, struct iatt *buf) {
+-  int ret = 0;
+-  uint64_t size = 0;
+-  void *bsize = NULL;
+-
+-  if (shard_inode_ctx_get_block_size(inode, this, &size)) {
+-    /* Fresh lookup */
+-    ret = dict_get_ptr(xdata, GF_XATTR_SHARD_BLOCK_SIZE, &bsize);
+-    if (!ret)
+-      size = ntoh64(*((uint64_t *)bsize));
+-    /* If the file is sharded, set its block size, otherwise just
+-     * set 0.
+-     */
+-
+-    shard_inode_ctx_set(inode, this, buf, size, SHARD_MASK_BLOCK_SIZE);
+-  }
+-  /* If the file is sharded, also set the remaining attributes,
+-   * except for ia_size and ia_blocks.
+-   */
+-  if (size) {
+-    shard_inode_ctx_set(inode, this, buf, 0, SHARD_LOOKUP_MASK);
+-    (void)shard_inode_ctx_invalidate(inode, this, buf);
+-  }
+-}
+-
+-int shard_delete_shards(void *opaque);
+-
+-int shard_delete_shards_cbk(int ret, call_frame_t *frame, void *data);
+-
+-int shard_start_background_deletion(xlator_t *this) {
+-  int ret = 0;
+-  gf_boolean_t i_cleanup = _gf_true;
+-  shard_priv_t *priv = NULL;
+-  call_frame_t *cleanup_frame = NULL;
+-
+-  priv = this->private;
+-
+-  LOCK(&priv->lock);
+-  {
+-    switch (priv->bg_del_state) {
+-    case SHARD_BG_DELETION_NONE:
+-      i_cleanup = _gf_true;
+-      priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING;
+-      break;
+-    case SHARD_BG_DELETION_LAUNCHING:
+-      i_cleanup = _gf_false;
+-      break;
+-    case SHARD_BG_DELETION_IN_PROGRESS:
+-      priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING;
+-      i_cleanup = _gf_false;
+-      break;
+-    default:
+-      break;
+-    }
+-  }
+-  UNLOCK(&priv->lock);
+-  if (!i_cleanup)
+-    return 0;
+-
+-  cleanup_frame = create_frame(this, this->ctx->pool);
+-  if (!cleanup_frame) {
+-    gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED,
+-           "Failed to create "
+-           "new frame to delete shards");
+-    ret = -ENOMEM;
+-    goto err;
+-  }
+-
+-  set_lk_owner_from_ptr(&cleanup_frame->root->lk_owner, cleanup_frame->root);
+-
+-  ret = synctask_new(this->ctx->env, shard_delete_shards,
+-                     shard_delete_shards_cbk, cleanup_frame, cleanup_frame);
+-  if (ret < 0) {
+-    gf_msg(this->name, GF_LOG_WARNING, errno, SHARD_MSG_SHARDS_DELETION_FAILED,
+-           "failed to create task to do background "
+-           "cleanup of shards");
+-    STACK_DESTROY(cleanup_frame->root);
+-    goto err;
+-  }
+-  return 0;
++    if (op_ret < 0) {
++        gf_msg(this->name, GF_LOG_ERROR, op_errno,
++               SHARD_MSG_UPDATE_FILE_SIZE_FAILED,
++               "Update to file size"
++               " xattr failed on %s",
++               uuid_utoa(inode->gfid));
++        local->op_ret = op_ret;
++        local->op_errno = op_errno;
++        goto err;
++    }
+ 
++    if (shard_modify_size_and_block_count(&local->postbuf, dict)) {
++        local->op_ret = -1;
++        local->op_errno = ENOMEM;
++        goto err;
++    }
+ err:
+-  LOCK(&priv->lock);
+-  { priv->bg_del_state = SHARD_BG_DELETION_NONE; }
+-  UNLOCK(&priv->lock);
+-  return ret;
++    local->post_update_size_handler(frame, this);
++    return 0;
+ }
+ 
+-int shard_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                     int32_t op_ret, int32_t op_errno, inode_t *inode,
+-                     struct iatt *buf, dict_t *xdata, struct iatt *postparent) {
+-  int ret = -1;
+-  shard_priv_t *priv = NULL;
+-  gf_boolean_t i_start_cleanup = _gf_false;
+-
+-  priv = this->private;
+-
+-  if (op_ret < 0)
+-    goto unwind;
+-
+-  if (IA_ISDIR(buf->ia_type))
+-    goto unwind;
+-
+-  /* Also, if the file is sharded, get the file size and block cnt xattr,
+-   * and store them in the stbuf appropriately.
+-   */
+-
+-  if (dict_get(xdata, GF_XATTR_SHARD_FILE_SIZE) &&
+-      frame->root->pid != GF_CLIENT_PID_GSYNCD)
+-    shard_modify_size_and_block_count(buf, xdata);
+-
+-  /* If this was a fresh lookup, there are two possibilities:
+-   * 1) If the file is sharded (indicated by the presence of block size
+-   *    xattr), store this block size, along with rdev and mode in its
+-   *    inode ctx.
+-   * 2) If the file is not sharded, store size along with rdev and mode
+-   *    (which are anyway don't cares) in inode ctx. Since @ctx_tmp is
+-   *    already initialised to all zeroes, nothing more needs to be done.
+-   */
++int
++shard_set_size_attrs(int64_t size, int64_t block_count, int64_t **size_attr_p)
++{
++    int ret = -1;
++    int64_t *size_attr = NULL;
+ 
+-  (void)shard_inode_ctx_update(inode, this, xdata, buf);
++    if (!size_attr_p)
++        goto out;
+ 
+-  LOCK(&priv->lock);
+-  {
+-    if (priv->first_lookup_done == _gf_false) {
+-      priv->first_lookup_done = _gf_true;
+-      i_start_cleanup = _gf_true;
+-    }
+-  }
+-  UNLOCK(&priv->lock);
++    size_attr = GF_CALLOC(4, sizeof(int64_t), gf_shard_mt_int64_t);
++    if (!size_attr)
++        goto out;
+ 
+-  if (!i_start_cleanup)
+-    goto unwind;
++    size_attr[0] = hton64(size);
++    /* As sharding evolves, it _may_ be necessary to embed more pieces of
++     * information within the same xattr. So allocating slots for them in
++     * advance. For now, only bytes 0-63 and 128-191 which would make up the
++     * current size and block count respectively of the file are valid.
++     */
++    size_attr[2] = hton64(block_count);
+ 
+-  ret = shard_start_background_deletion(this);
+-  if (ret < 0) {
+-    LOCK(&priv->lock);
+-    { priv->first_lookup_done = _gf_false; }
+-    UNLOCK(&priv->lock);
+-  }
++    *size_attr_p = size_attr;
+ 
+-unwind:
+-  SHARD_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, buf, xdata,
+-                     postparent);
+-  return 0;
++    ret = 0;
++out:
++    return ret;
+ }
+ 
+-int shard_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc,
+-                 dict_t *xattr_req) {
+-  int ret = -1;
+-  int32_t op_errno = ENOMEM;
+-  uint64_t block_size = 0;
+-  shard_local_t *local = NULL;
+-
+-  this->itable = loc->inode->table;
+-  if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) &&
+-      (frame->root->pid != GF_CLIENT_PID_GLFS_HEAL)) {
+-    SHARD_ENTRY_FOP_CHECK(loc, op_errno, err);
+-  }
++int
++shard_update_file_size(call_frame_t *frame, xlator_t *this, fd_t *fd,
++                       loc_t *loc, shard_post_update_size_fop_handler_t handler)
++{
++    int ret = -1;
++    int64_t *size_attr = NULL;
++    int64_t delta_blocks = 0;
++    inode_t *inode = NULL;
++    shard_local_t *local = NULL;
++    dict_t *xattr_req = NULL;
+ 
+-  local = mem_get0(this->local_pool);
+-  if (!local)
+-    goto err;
++    local = frame->local;
++    local->post_update_size_handler = handler;
+ 
+-  frame->local = local;
++    xattr_req = dict_new();
++    if (!xattr_req) {
++        local->op_ret = -1;
++        local->op_errno = ENOMEM;
++        goto out;
++    }
+ 
+-  loc_copy(&local->loc, loc);
++    if (fd)
++        inode = fd->inode;
++    else
++        inode = loc->inode;
+ 
+-  local->xattr_req = xattr_req ? dict_ref(xattr_req) : dict_new();
+-  if (!local->xattr_req)
+-    goto err;
++    /* If both size and block count have not changed, then skip the xattrop.
++     */
++    delta_blocks = GF_ATOMIC_GET(local->delta_blocks);
++    if ((local->delta_size + local->hole_size == 0) && (delta_blocks == 0)) {
++        goto out;
++    }
+ 
+-  if (shard_inode_ctx_get_block_size(loc->inode, this, &block_size)) {
+-    ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0);
++    ret = shard_set_size_attrs(local->delta_size + local->hole_size,
++                               delta_blocks, &size_attr);
+     if (ret) {
+-      gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
+-             "Failed to set dict"
+-             " value: key:%s for path %s",
+-             GF_XATTR_SHARD_BLOCK_SIZE, loc->path);
+-      goto err;
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_SIZE_SET_FAILED,
++               "Failed to set size attrs for %s", uuid_utoa(inode->gfid));
++        local->op_ret = -1;
++        local->op_errno = ENOMEM;
++        goto out;
+     }
+-  }
+ 
+-  if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
+-    ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_FILE_SIZE, 8 * 4);
++    ret = dict_set_bin(xattr_req, GF_XATTR_SHARD_FILE_SIZE, size_attr, 8 * 4);
+     if (ret) {
+-      gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
+-             "Failed to set dict value: key:%s for path %s.",
+-             GF_XATTR_SHARD_FILE_SIZE, loc->path);
+-      goto err;
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
++               "Failed to set key %s into dict. gfid=%s",
++               GF_XATTR_SHARD_FILE_SIZE, uuid_utoa(inode->gfid));
++        GF_FREE(size_attr);
++        local->op_ret = -1;
++        local->op_errno = ENOMEM;
++        goto out;
+     }
+-  }
+ 
+-  if ((xattr_req) && (dict_get(xattr_req, GF_CONTENT_KEY)))
+-    dict_del(xattr_req, GF_CONTENT_KEY);
++    if (fd)
++        STACK_WIND(frame, shard_update_file_size_cbk, FIRST_CHILD(this),
++                   FIRST_CHILD(this)->fops->fxattrop, fd,
++                   GF_XATTROP_ADD_ARRAY64, xattr_req, NULL);
++    else
++        STACK_WIND(frame, shard_update_file_size_cbk, FIRST_CHILD(this),
++                   FIRST_CHILD(this)->fops->xattrop, loc,
++                   GF_XATTROP_ADD_ARRAY64, xattr_req, NULL);
+ 
+-  STACK_WIND(frame, shard_lookup_cbk, FIRST_CHILD(this),
+-             FIRST_CHILD(this)->fops->lookup, loc, local->xattr_req);
+-  return 0;
+-err:
+-  shard_common_failure_unwind(GF_FOP_LOOKUP, frame, -1, op_errno);
+-  return 0;
+-}
++    dict_unref(xattr_req);
++    return 0;
+ 
+-int shard_lookup_base_file_cbk(call_frame_t *frame, void *cookie,
+-                               xlator_t *this, int32_t op_ret, int32_t op_errno,
+-                               inode_t *inode, struct iatt *buf, dict_t *xdata,
+-                               struct iatt *postparent) {
+-  int ret = -1;
+-  int32_t mask = SHARD_INODE_WRITE_MASK;
+-  shard_local_t *local = NULL;
+-  shard_inode_ctx_t ctx = {
+-      0,
+-  };
+-
+-  local = frame->local;
+-
+-  if (op_ret < 0) {
+-    gf_msg(this->name, GF_LOG_ERROR, op_errno,
+-           SHARD_MSG_BASE_FILE_LOOKUP_FAILED, "Lookup on base file"
+-                                              " failed : %s",
+-           loc_gfid_utoa(&(local->loc)));
+-    local->op_ret = op_ret;
+-    local->op_errno = op_errno;
+-    goto unwind;
+-  }
++out:
++    if (xattr_req)
++        dict_unref(xattr_req);
++    handler(frame, this);
++    return 0;
++}
+ 
+-  local->prebuf = *buf;
+-  if (shard_modify_size_and_block_count(&local->prebuf, xdata)) {
+-    local->op_ret = -1;
+-    local->op_errno = EINVAL;
+-    goto unwind;
+-  }
+-
+-  if (shard_inode_ctx_get_all(inode, this, &ctx))
+-    mask = SHARD_ALL_MASK;
+-
+-  ret = shard_inode_ctx_set(inode, this, &local->prebuf, 0,
+-                            (mask | SHARD_MASK_REFRESH_RESET));
+-  if (ret) {
+-    gf_msg(this->name, GF_LOG_ERROR, SHARD_MSG_INODE_CTX_SET_FAILED, 0,
+-           "Failed to set inode"
+-           " write params into inode ctx for %s",
+-           uuid_utoa(buf->ia_gfid));
+-    local->op_ret = -1;
+-    local->op_errno = ENOMEM;
+-    goto unwind;
+-  }
++static inode_t *
++shard_link_internal_dir_inode(shard_local_t *local, inode_t *inode,
++                              struct iatt *buf, shard_internal_dir_type_t type)
++{
++    inode_t *linked_inode = NULL;
++    shard_priv_t *priv = NULL;
++    char *bname = NULL;
++    inode_t **priv_inode = NULL;
++    inode_t *parent = NULL;
++
++    priv = THIS->private;
++
++    switch (type) {
++        case SHARD_INTERNAL_DIR_DOT_SHARD:
++            bname = GF_SHARD_DIR;
++            priv_inode = &priv->dot_shard_inode;
++            parent = inode->table->root;
++            break;
++        case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME:
++            bname = GF_SHARD_REMOVE_ME_DIR;
++            priv_inode = &priv->dot_shard_rm_inode;
++            parent = priv->dot_shard_inode;
++            break;
++        default:
++            break;
++    }
+ 
+-unwind:
+-  local->handler(frame, this);
+-  return 0;
+-}
+-
+-int shard_lookup_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc,
+-                           shard_post_fop_handler_t handler) {
+-  int ret = -1;
+-  shard_local_t *local = NULL;
+-  dict_t *xattr_req = NULL;
+-  gf_boolean_t need_refresh = _gf_false;
+-
+-  local = frame->local;
+-  local->handler = handler;
+-
+-  ret = shard_inode_ctx_fill_iatt_from_cache(loc->inode, this, &local->prebuf,
+-                                             &need_refresh);
+-  /* By this time, inode ctx should have been created either in create,
+-   * mknod, readdirp or lookup. If not it is a bug!
+-   */
+-  if ((ret == 0) && (need_refresh == _gf_false)) {
+-    gf_msg_debug(this->name, 0, "Skipping lookup on base file: %s"
+-                                "Serving prebuf off the inode ctx cache",
+-                 uuid_utoa(loc->gfid));
+-    goto out;
+-  }
+-
+-  xattr_req = dict_new();
+-  if (!xattr_req) {
+-    local->op_ret = -1;
+-    local->op_errno = ENOMEM;
+-    goto out;
+-  }
++    linked_inode = inode_link(inode, parent, bname, buf);
++    inode_lookup(linked_inode);
++    *priv_inode = linked_inode;
++    return linked_inode;
++}
+ 
+-  SHARD_MD_READ_FOP_INIT_REQ_DICT(this, xattr_req, loc->gfid, local, out);
++int
++shard_refresh_internal_dir_cbk(call_frame_t *frame, void *cookie,
++                               xlator_t *this, int32_t op_ret, int32_t op_errno,
++                               inode_t *inode, struct iatt *buf, dict_t *xdata,
++                               struct iatt *postparent)
++{
++    shard_local_t *local = NULL;
++    inode_t *linked_inode = NULL;
++    shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie;
+ 
+-  STACK_WIND(frame, shard_lookup_base_file_cbk, FIRST_CHILD(this),
+-             FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
++    local = frame->local;
+ 
+-  dict_unref(xattr_req);
+-  return 0;
++    if (op_ret) {
++        local->op_ret = op_ret;
++        local->op_errno = op_errno;
++        goto out;
++    }
+ 
++    /* To-Do: Fix refcount increment per call to
++     * shard_link_internal_dir_inode().
++     */
++    linked_inode = shard_link_internal_dir_inode(local, inode, buf, type);
++    shard_inode_ctx_mark_dir_refreshed(linked_inode, this);
+ out:
+-  if (xattr_req)
+-    dict_unref(xattr_req);
+-  handler(frame, this);
+-  return 0;
++    shard_common_resolve_shards(frame, this, local->post_res_handler);
++    return 0;
+ }
+ 
+-int shard_post_fstat_handler(call_frame_t *frame, xlator_t *this) {
+-  shard_local_t *local = NULL;
++int
++shard_refresh_internal_dir(call_frame_t *frame, xlator_t *this,
++                           shard_internal_dir_type_t type)
++{
++    loc_t loc = {
++        0,
++    };
++    inode_t *inode = NULL;
++    shard_priv_t *priv = NULL;
++    shard_local_t *local = NULL;
++    uuid_t gfid = {
++        0,
++    };
+ 
+-  local = frame->local;
++    local = frame->local;
++    priv = this->private;
++
++    switch (type) {
++        case SHARD_INTERNAL_DIR_DOT_SHARD:
++            gf_uuid_copy(gfid, priv->dot_shard_gfid);
++            break;
++        case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME:
++            gf_uuid_copy(gfid, priv->dot_shard_rm_gfid);
++            break;
++        default:
++            break;
++    }
+ 
+-  if (local->op_ret >= 0)
+-    shard_inode_ctx_set(local->fd->inode, this, &local->prebuf, 0,
+-                        SHARD_LOOKUP_MASK);
++    inode = inode_find(this->itable, gfid);
+ 
+-  SHARD_STACK_UNWIND(fstat, frame, local->op_ret, local->op_errno,
+-                     &local->prebuf, local->xattr_rsp);
+-  return 0;
+-}
++    if (!shard_inode_ctx_needs_lookup(inode, this)) {
++        local->op_ret = 0;
++        goto out;
++    }
+ 
+-int shard_post_stat_handler(call_frame_t *frame, xlator_t *this) {
+-  shard_local_t *local = NULL;
++    /* Plain assignment because the ref is already taken above through
++     * call to inode_find()
++     */
++    loc.inode = inode;
++    gf_uuid_copy(loc.gfid, gfid);
+ 
+-  local = frame->local;
++    STACK_WIND_COOKIE(frame, shard_refresh_internal_dir_cbk, (void *)(long)type,
++                      FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup, &loc,
++                      NULL);
++    loc_wipe(&loc);
+ 
+-  if (local->op_ret >= 0)
+-    shard_inode_ctx_set(local->loc.inode, this, &local->prebuf, 0,
+-                        SHARD_LOOKUP_MASK);
++    return 0;
+ 
+-  SHARD_STACK_UNWIND(stat, frame, local->op_ret, local->op_errno,
+-                     &local->prebuf, local->xattr_rsp);
+-  return 0;
++out:
++    shard_common_resolve_shards(frame, this, local->post_res_handler);
++    return 0;
+ }
+ 
+-int shard_common_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                          int32_t op_ret, int32_t op_errno, struct iatt *buf,
+-                          dict_t *xdata) {
+-  inode_t *inode = NULL;
+-  shard_local_t *local = NULL;
+-
+-  local = frame->local;
++int
++shard_lookup_internal_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                              int32_t op_ret, int32_t op_errno, inode_t *inode,
++                              struct iatt *buf, dict_t *xdata,
++                              struct iatt *postparent)
++{
++    inode_t *link_inode = NULL;
++    shard_local_t *local = NULL;
++    shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie;
+ 
+-  if (op_ret < 0) {
+-    gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_STAT_FAILED,
+-           "stat failed: %s", local->fd ? uuid_utoa(local->fd->inode->gfid)
+-                                        : uuid_utoa((local->loc.inode)->gfid));
+-    local->op_ret = op_ret;
+-    local->op_errno = op_errno;
+-    goto unwind;
+-  }
++    local = frame->local;
+ 
+-  local->prebuf = *buf;
+-  if (shard_modify_size_and_block_count(&local->prebuf, xdata)) {
+-    local->op_ret = -1;
+-    local->op_errno = EINVAL;
+-    goto unwind;
+-  }
+-  local->xattr_rsp = dict_ref(xdata);
++    if (op_ret) {
++        local->op_ret = op_ret;
++        local->op_errno = op_errno;
++        goto unwind;
++    }
+ 
+-  if (local->loc.inode)
+-    inode = local->loc.inode;
+-  else
+-    inode = local->fd->inode;
++    if (!IA_ISDIR(buf->ia_type)) {
++        gf_msg(this->name, GF_LOG_CRITICAL, 0, SHARD_MSG_DOT_SHARD_NODIR,
++               "%s already exists and "
++               "is not a directory. Please remove it from all bricks "
++               "and try again",
++               shard_internal_dir_string(type));
++        local->op_ret = -1;
++        local->op_errno = EIO;
++        goto unwind;
++    }
+ 
+-  shard_inode_ctx_invalidate(inode, this, &local->prebuf);
++    link_inode = shard_link_internal_dir_inode(local, inode, buf, type);
++    if (link_inode != inode) {
++        shard_refresh_internal_dir(frame, this, type);
++    } else {
++        shard_inode_ctx_mark_dir_refreshed(link_inode, this);
++        shard_common_resolve_shards(frame, this, local->post_res_handler);
++    }
++    return 0;
+ 
+ unwind:
+-  local->handler(frame, this);
+-  return 0;
++    local->post_res_handler(frame, this);
++    return 0;
+ }
+ 
+-int shard_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) {
+-  int ret = -1;
+-  uint64_t block_size = 0;
+-  shard_local_t *local = NULL;
++int
++shard_lookup_internal_dir(call_frame_t *frame, xlator_t *this,
++                          shard_post_resolve_fop_handler_t post_res_handler,
++                          shard_internal_dir_type_t type)
++{
++    int ret = -1;
++    dict_t *xattr_req = NULL;
++    shard_priv_t *priv = NULL;
++    shard_local_t *local = NULL;
++    uuid_t *gfid = NULL;
++    loc_t *loc = NULL;
++    gf_boolean_t free_gfid = _gf_true;
+ 
+-  if ((IA_ISDIR(loc->inode->ia_type)) || (IA_ISLNK(loc->inode->ia_type))) {
+-    STACK_WIND(frame, default_stat_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->stat, loc, xdata);
+-    return 0;
+-  }
++    local = frame->local;
++    priv = this->private;
++    local->post_res_handler = post_res_handler;
+ 
+-  ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size);
+-  if (ret) {
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+-           "Failed to get block "
+-           "size from inode ctx of %s",
+-           uuid_utoa(loc->inode->gfid));
+-    goto err;
+-  }
++    gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t);
++    if (!gfid)
++        goto err;
+ 
+-  if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+-    STACK_WIND(frame, default_stat_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->stat, loc, xdata);
+-    return 0;
+-  }
++    xattr_req = dict_new();
++    if (!xattr_req) {
++        local->op_ret = -1;
++        local->op_errno = ENOMEM;
++        goto err;
++    }
+ 
+-  local = mem_get0(this->local_pool);
+-  if (!local)
+-    goto err;
++    switch (type) {
++        case SHARD_INTERNAL_DIR_DOT_SHARD:
++            gf_uuid_copy(*gfid, priv->dot_shard_gfid);
++            loc = &local->dot_shard_loc;
++            break;
++        case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME:
++            gf_uuid_copy(*gfid, priv->dot_shard_rm_gfid);
++            loc = &local->dot_shard_rm_loc;
++            break;
++        default:
++            bzero(*gfid, sizeof(uuid_t));
++            break;
++    }
+ 
+-  frame->local = local;
++    ret = dict_set_gfuuid(xattr_req, "gfid-req", *gfid, false);
++    if (ret) {
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
++               "Failed to set gfid of %s into dict",
++               shard_internal_dir_string(type));
++        local->op_ret = -1;
++        local->op_errno = ENOMEM;
++        goto err;
++    } else {
++        free_gfid = _gf_false;
++    }
+ 
+-  local->handler = shard_post_stat_handler;
+-  loc_copy(&local->loc, loc);
+-  local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+-  if (!local->xattr_req)
+-    goto err;
++    STACK_WIND_COOKIE(frame, shard_lookup_internal_dir_cbk, (void *)(long)type,
++                      FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup, loc,
++                      xattr_req);
+ 
+-  SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, local->loc.gfid,
+-                                  local, err);
++    dict_unref(xattr_req);
++    return 0;
+ 
+-  STACK_WIND(frame, shard_common_stat_cbk, FIRST_CHILD(this),
+-             FIRST_CHILD(this)->fops->stat, loc, local->xattr_req);
+-  return 0;
+ err:
+-  shard_common_failure_unwind(GF_FOP_STAT, frame, -1, ENOMEM);
+-  return 0;
++    if (xattr_req)
++        dict_unref(xattr_req);
++    if (free_gfid)
++        GF_FREE(gfid);
++    post_res_handler(frame, this);
++    return 0;
+ }
+ 
+-int shard_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) {
+-  int ret = -1;
+-  uint64_t block_size = 0;
+-  shard_local_t *local = NULL;
++static void
++shard_inode_ctx_update(inode_t *inode, xlator_t *this, dict_t *xdata,
++                       struct iatt *buf)
++{
++    int ret = 0;
++    uint64_t size = 0;
++    void *bsize = NULL;
++
++    if (shard_inode_ctx_get_block_size(inode, this, &size)) {
++        /* Fresh lookup */
++        ret = dict_get_ptr(xdata, GF_XATTR_SHARD_BLOCK_SIZE, &bsize);
++        if (!ret)
++            size = ntoh64(*((uint64_t *)bsize));
++        /* If the file is sharded, set its block size, otherwise just
++         * set 0.
++         */
++
++        shard_inode_ctx_set(inode, this, buf, size, SHARD_MASK_BLOCK_SIZE);
++    }
++    /* If the file is sharded, also set the remaining attributes,
++     * except for ia_size and ia_blocks.
++     */
++    if (size) {
++        shard_inode_ctx_set(inode, this, buf, 0, SHARD_LOOKUP_MASK);
++        (void)shard_inode_ctx_invalidate(inode, this, buf);
++    }
++}
+ 
+-  if ((IA_ISDIR(fd->inode->ia_type)) || (IA_ISLNK(fd->inode->ia_type))) {
+-    STACK_WIND(frame, default_fstat_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->fstat, fd, xdata);
+-    return 0;
+-  }
++int
++shard_delete_shards(void *opaque);
+ 
+-  ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size);
+-  if (ret) {
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+-           "Failed to get block "
+-           "size from inode ctx of %s",
+-           uuid_utoa(fd->inode->gfid));
+-    goto err;
+-  }
++int
++shard_delete_shards_cbk(int ret, call_frame_t *frame, void *data);
+ 
+-  if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+-    STACK_WIND(frame, default_fstat_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->fstat, fd, xdata);
+-    return 0;
+-  }
++int
++shard_start_background_deletion(xlator_t *this)
++{
++    int ret = 0;
++    gf_boolean_t i_cleanup = _gf_true;
++    shard_priv_t *priv = NULL;
++    call_frame_t *cleanup_frame = NULL;
+ 
+-  if (!this->itable)
+-    this->itable = fd->inode->table;
++    priv = this->private;
+ 
+-  local = mem_get0(this->local_pool);
+-  if (!local)
+-    goto err;
++    LOCK(&priv->lock);
++    {
++        switch (priv->bg_del_state) {
++            case SHARD_BG_DELETION_NONE:
++                i_cleanup = _gf_true;
++                priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING;
++                break;
++            case SHARD_BG_DELETION_LAUNCHING:
++                i_cleanup = _gf_false;
++                break;
++            case SHARD_BG_DELETION_IN_PROGRESS:
++                priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING;
++                i_cleanup = _gf_false;
++                break;
++            default:
++                break;
++        }
++    }
++    UNLOCK(&priv->lock);
++    if (!i_cleanup)
++        return 0;
+ 
+-  frame->local = local;
++    cleanup_frame = create_frame(this, this->ctx->pool);
++    if (!cleanup_frame) {
++        gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED,
++               "Failed to create "
++               "new frame to delete shards");
++        ret = -ENOMEM;
++        goto err;
++    }
+ 
+-  local->handler = shard_post_fstat_handler;
+-  local->fd = fd_ref(fd);
+-  local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+-  if (!local->xattr_req)
+-    goto err;
++    set_lk_owner_from_ptr(&cleanup_frame->root->lk_owner, cleanup_frame->root);
+ 
+-  SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid,
+-                                  local, err);
++    ret = synctask_new(this->ctx->env, shard_delete_shards,
++                       shard_delete_shards_cbk, cleanup_frame, cleanup_frame);
++    if (ret < 0) {
++        gf_msg(this->name, GF_LOG_WARNING, errno,
++               SHARD_MSG_SHARDS_DELETION_FAILED,
++               "failed to create task to do background "
++               "cleanup of shards");
++        STACK_DESTROY(cleanup_frame->root);
++        goto err;
++    }
++    return 0;
+ 
+-  STACK_WIND(frame, shard_common_stat_cbk, FIRST_CHILD(this),
+-             FIRST_CHILD(this)->fops->fstat, fd, local->xattr_req);
+-  return 0;
+ err:
+-  shard_common_failure_unwind(GF_FOP_FSTAT, frame, -1, ENOMEM);
+-  return 0;
++    LOCK(&priv->lock);
++    {
++        priv->bg_del_state = SHARD_BG_DELETION_NONE;
++    }
++    UNLOCK(&priv->lock);
++    return ret;
+ }
+ 
+-int shard_post_update_size_truncate_handler(call_frame_t *frame,
+-                                            xlator_t *this) {
+-  shard_local_t *local = NULL;
++int
++shard_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                 int32_t op_ret, int32_t op_errno, inode_t *inode,
++                 struct iatt *buf, dict_t *xdata, struct iatt *postparent)
++{
++    int ret = -1;
++    shard_priv_t *priv = NULL;
++    gf_boolean_t i_start_cleanup = _gf_false;
+ 
+-  local = frame->local;
++    priv = this->private;
+ 
+-  if (local->fop == GF_FOP_TRUNCATE)
+-    SHARD_STACK_UNWIND(truncate, frame, local->op_ret, local->op_errno,
+-                       &local->prebuf, &local->postbuf, NULL);
+-  else
+-    SHARD_STACK_UNWIND(ftruncate, frame, local->op_ret, local->op_errno,
+-                       &local->prebuf, &local->postbuf, NULL);
+-  return 0;
+-}
++    if (op_ret < 0)
++        goto unwind;
+ 
+-int shard_truncate_last_shard_cbk(call_frame_t *frame, void *cookie,
+-                                  xlator_t *this, int32_t op_ret,
+-                                  int32_t op_errno, struct iatt *prebuf,
+-                                  struct iatt *postbuf, dict_t *xdata) {
+-  inode_t *inode = NULL;
+-  int64_t delta_blocks = 0;
+-  shard_local_t *local = NULL;
++    if (IA_ISDIR(buf->ia_type))
++        goto unwind;
+ 
+-  local = frame->local;
++    /* Also, if the file is sharded, get the file size and block cnt xattr,
++     * and store them in the stbuf appropriately.
++     */
+ 
+-  SHARD_UNSET_ROOT_FS_ID(frame, local);
++    if (dict_get(xdata, GF_XATTR_SHARD_FILE_SIZE) &&
++        frame->root->pid != GF_CLIENT_PID_GSYNCD)
++        shard_modify_size_and_block_count(buf, xdata);
++
++    /* If this was a fresh lookup, there are two possibilities:
++     * 1) If the file is sharded (indicated by the presence of block size
++     *    xattr), store this block size, along with rdev and mode in its
++     *    inode ctx.
++     * 2) If the file is not sharded, store size along with rdev and mode
++     *    (which are anyway don't cares) in inode ctx. Since @ctx_tmp is
++     *    already initialised to all zeroes, nothing more needs to be done.
++     */
+ 
+-  inode = (local->fop == GF_FOP_TRUNCATE) ? local->loc.inode : local->fd->inode;
+-  if (op_ret < 0) {
+-    gf_msg(this->name, GF_LOG_ERROR, op_errno,
+-           SHARD_MSG_TRUNCATE_LAST_SHARD_FAILED, "truncate on last"
+-                                                 " shard failed : %s",
+-           uuid_utoa(inode->gfid));
+-    local->op_ret = op_ret;
+-    local->op_errno = op_errno;
+-    goto err;
+-  }
+-
+-  local->postbuf.ia_size = local->offset;
+-  /* Let the delta be negative. We want xattrop to do subtraction */
+-  local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size;
+-  delta_blocks = GF_ATOMIC_ADD(local->delta_blocks,
+-                               postbuf->ia_blocks - prebuf->ia_blocks);
+-  GF_ASSERT(delta_blocks <= 0);
+-  local->postbuf.ia_blocks += delta_blocks;
+-  local->hole_size = 0;
+-
+-  shard_inode_ctx_set(inode, this, &local->postbuf, 0, SHARD_MASK_TIMES);
+-  shard_update_file_size(frame, this, NULL, &local->loc,
+-                         shard_post_update_size_truncate_handler);
+-  return 0;
+-err:
+-  shard_common_failure_unwind(local->fop, frame, local->op_ret,
+-                              local->op_errno);
+-  return 0;
+-}
+-
+-int shard_truncate_last_shard(call_frame_t *frame, xlator_t *this,
+-                              inode_t *inode) {
+-  size_t last_shard_size_after = 0;
+-  loc_t loc = {
+-      0,
+-  };
+-  shard_local_t *local = NULL;
+-
+-  local = frame->local;
+-
+-  /* A NULL inode could be due to the fact that the last shard which
+-   * needs to be truncated does not exist due to it lying in a hole
+-   * region. So the only thing left to do in that case would be an
+-   * update to file size xattr.
+-   */
+-  if (!inode) {
+-    gf_msg_debug(this->name, 0,
+-                 "Last shard to be truncated absent in backend:%" PRIu64
+-                 " of gfid: %s. Directly proceeding to update file size",
+-                 local->first_block, uuid_utoa(local->loc.inode->gfid));
+-    shard_update_file_size(frame, this, NULL, &local->loc,
+-                           shard_post_update_size_truncate_handler);
+-    return 0;
+-  }
++    (void)shard_inode_ctx_update(inode, this, xdata, buf);
+ 
+-  SHARD_SET_ROOT_FS_ID(frame, local);
++    LOCK(&priv->lock);
++    {
++        if (priv->first_lookup_done == _gf_false) {
++            priv->first_lookup_done = _gf_true;
++            i_start_cleanup = _gf_true;
++        }
++    }
++    UNLOCK(&priv->lock);
+ 
+-  loc.inode = inode_ref(inode);
+-  gf_uuid_copy(loc.gfid, inode->gfid);
++    if (!i_start_cleanup)
++        goto unwind;
+ 
+-  last_shard_size_after = (local->offset % local->block_size);
++    ret = shard_start_background_deletion(this);
++    if (ret < 0) {
++        LOCK(&priv->lock);
++        {
++            priv->first_lookup_done = _gf_false;
++        }
++        UNLOCK(&priv->lock);
++    }
+ 
+-  STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this),
+-             FIRST_CHILD(this)->fops->truncate, &loc, last_shard_size_after,
+-             NULL);
+-  loc_wipe(&loc);
+-  return 0;
++unwind:
++    SHARD_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, buf, xdata,
++                       postparent);
++    return 0;
+ }
+ 
+-void shard_unlink_block_inode(shard_local_t *local, int shard_block_num);
++int
++shard_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
++{
++    int ret = -1;
++    int32_t op_errno = ENOMEM;
++    uint64_t block_size = 0;
++    shard_local_t *local = NULL;
+ 
+-int shard_truncate_htol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                            int32_t op_ret, int32_t op_errno,
+-                            struct iatt *preparent, struct iatt *postparent,
+-                            dict_t *xdata) {
+-  int ret = 0;
+-  int call_count = 0;
+-  int shard_block_num = (long)cookie;
+-  uint64_t block_count = 0;
+-  shard_local_t *local = NULL;
++    this->itable = loc->inode->table;
++    if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) &&
++        (frame->root->pid != GF_CLIENT_PID_GLFS_HEAL)) {
++        SHARD_ENTRY_FOP_CHECK(loc, op_errno, err);
++    }
+ 
+-  local = frame->local;
++    local = mem_get0(this->local_pool);
++    if (!local)
++        goto err;
+ 
+-  if (op_ret < 0) {
+-    local->op_ret = op_ret;
+-    local->op_errno = op_errno;
+-    goto done;
+-  }
+-  ret = dict_get_uint64(xdata, GF_GET_FILE_BLOCK_COUNT, &block_count);
+-  if (!ret) {
+-    GF_ATOMIC_SUB(local->delta_blocks, block_count);
+-  } else {
+-    /* dict_get failed possibly due to a heterogeneous cluster? */
+-    gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
+-           "Failed to get key %s from dict during truncate of gfid %s",
+-           GF_GET_FILE_BLOCK_COUNT,
+-           uuid_utoa(local->resolver_base_inode->gfid));
+-  }
+-
+-  shard_unlink_block_inode(local, shard_block_num);
+-done:
+-  call_count = shard_call_count_return(frame);
+-  if (call_count == 0) {
+-    SHARD_UNSET_ROOT_FS_ID(frame, local);
+-    shard_truncate_last_shard(frame, this, local->inode_list[0]);
+-  }
+-  return 0;
+-}
+-
+-int shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode) {
+-  int i = 1;
+-  int ret = -1;
+-  int call_count = 0;
+-  uint32_t cur_block = 0;
+-  uint32_t last_block = 0;
+-  char path[PATH_MAX] = {
+-      0,
+-  };
+-  char *bname = NULL;
+-  loc_t loc = {
+-      0,
+-  };
+-  gf_boolean_t wind_failed = _gf_false;
+-  shard_local_t *local = NULL;
+-  shard_priv_t *priv = NULL;
+-  dict_t *xdata_req = NULL;
+-
+-  local = frame->local;
+-  priv = this->private;
+-
+-  cur_block = local->first_block + 1;
+-  last_block = local->last_block;
+-
+-  /* Determine call count */
+-  for (i = 1; i < local->num_blocks; i++) {
+-    if (!local->inode_list[i])
+-      continue;
+-    call_count++;
+-  }
+-
+-  if (!call_count) {
+-    /* Call count = 0 implies that all of the shards that need to be
+-     * unlinked do not exist. So shard xlator would now proceed to
+-     * do the final truncate + size updates.
+-     */
+-    gf_msg_debug(this->name, 0, "Shards to be unlinked as part of "
+-                                "truncate absent in backend: %s. Directly "
+-                                "proceeding to update file size",
+-                 uuid_utoa(inode->gfid));
+-    local->postbuf.ia_size = local->offset;
+-    local->postbuf.ia_blocks = local->prebuf.ia_blocks;
+-    local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size;
+-    GF_ATOMIC_INIT(local->delta_blocks, 0);
+-    local->hole_size = 0;
+-    shard_update_file_size(frame, this, local->fd, &local->loc,
+-                           shard_post_update_size_truncate_handler);
+-    return 0;
+-  }
++    frame->local = local;
+ 
+-  local->call_count = call_count;
+-  i = 1;
+-  xdata_req = dict_new();
+-  if (!xdata_req) {
+-    shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
+-    return 0;
+-  }
+-  ret = dict_set_uint64(xdata_req, GF_GET_FILE_BLOCK_COUNT, 8 * 8);
+-  if (ret) {
+-    gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
+-           "Failed to set key %s into dict during truncate of %s",
+-           GF_GET_FILE_BLOCK_COUNT,
+-           uuid_utoa(local->resolver_base_inode->gfid));
+-    dict_unref(xdata_req);
+-    shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
+-    return 0;
+-  }
++    loc_copy(&local->loc, loc);
+ 
+-  SHARD_SET_ROOT_FS_ID(frame, local);
+-  while (cur_block <= last_block) {
+-    if (!local->inode_list[i]) {
+-      cur_block++;
+-      i++;
+-      continue;
+-    }
+-    if (wind_failed) {
+-      shard_truncate_htol_cbk(frame, (void *)(long)cur_block, this, -1, ENOMEM,
+-                              NULL, NULL, NULL);
+-      goto next;
+-    }
++    local->xattr_req = xattr_req ? dict_ref(xattr_req) : dict_new();
++    if (!local->xattr_req)
++        goto err;
+ 
+-    shard_make_block_abspath(cur_block, inode->gfid, path, sizeof(path));
+-    bname = strrchr(path, '/') + 1;
+-    loc.parent = inode_ref(priv->dot_shard_inode);
+-    ret = inode_path(loc.parent, bname, (char **)&(loc.path));
+-    if (ret < 0) {
+-      gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+-             "Inode path failed"
+-             " on %s. Base file gfid = %s",
+-             bname, uuid_utoa(inode->gfid));
+-      local->op_ret = -1;
+-      local->op_errno = ENOMEM;
+-      loc_wipe(&loc);
+-      wind_failed = _gf_true;
+-      shard_truncate_htol_cbk(frame, (void *)(long)cur_block, this, -1, ENOMEM,
+-                              NULL, NULL, NULL);
+-      goto next;
++    if (shard_inode_ctx_get_block_size(loc->inode, this, &block_size)) {
++        ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0);
++        if (ret) {
++            gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
++                   "Failed to set dict"
++                   " value: key:%s for path %s",
++                   GF_XATTR_SHARD_BLOCK_SIZE, loc->path);
++            goto err;
++        }
+     }
+-    loc.name = strrchr(loc.path, '/');
+-    if (loc.name)
+-      loc.name++;
+-    loc.inode = inode_ref(local->inode_list[i]);
+ 
+-    STACK_WIND_COOKIE(frame, shard_truncate_htol_cbk, (void *)(long)cur_block,
+-                      FIRST_CHILD(this), FIRST_CHILD(this)->fops->unlink, &loc,
+-                      0, xdata_req);
+-    loc_wipe(&loc);
+-  next:
+-    i++;
+-    cur_block++;
+-    if (!--call_count)
+-      break;
+-  }
+-  dict_unref(xdata_req);
+-  return 0;
+-}
+-
+-int shard_truncate_do(call_frame_t *frame, xlator_t *this) {
+-  shard_local_t *local = NULL;
++    if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
++        ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_FILE_SIZE,
++                              8 * 4);
++        if (ret) {
++            gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
++                   "Failed to set dict value: key:%s for path %s.",
++                   GF_XATTR_SHARD_FILE_SIZE, loc->path);
++            goto err;
++        }
++    }
+ 
+-  local = frame->local;
++    if ((xattr_req) && (dict_get(xattr_req, GF_CONTENT_KEY)))
++        dict_del(xattr_req, GF_CONTENT_KEY);
+ 
+-  if (local->num_blocks == 1) {
+-    /* This means that there are no shards to be unlinked.
+-     * The fop boils down to truncating the last shard, updating
+-     * the size and unwinding.
+-     */
+-    shard_truncate_last_shard(frame, this, local->inode_list[0]);
++    STACK_WIND(frame, shard_lookup_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->lookup, loc, local->xattr_req);
++    return 0;
++err:
++    shard_common_failure_unwind(GF_FOP_LOOKUP, frame, -1, op_errno);
+     return 0;
+-  } else {
+-    shard_truncate_htol(frame, this, local->loc.inode);
+-  }
+-  return 0;
+ }
+ 
+-int shard_post_lookup_shards_truncate_handler(call_frame_t *frame,
+-                                              xlator_t *this) {
+-  shard_local_t *local = NULL;
++int
++shard_lookup_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                           int32_t op_ret, int32_t op_errno, inode_t *inode,
++                           struct iatt *buf, dict_t *xdata,
++                           struct iatt *postparent)
++{
++    int ret = -1;
++    int32_t mask = SHARD_INODE_WRITE_MASK;
++    shard_local_t *local = NULL;
++    shard_inode_ctx_t ctx = {
++        0,
++    };
+ 
+-  local = frame->local;
++    local = frame->local;
+ 
+-  if (local->op_ret < 0) {
+-    shard_common_failure_unwind(local->fop, frame, local->op_ret,
+-                                local->op_errno);
+-    return 0;
+-  }
+-
+-  shard_truncate_do(frame, this);
+-  return 0;
+-}
+-
+-void shard_link_block_inode(shard_local_t *local, int block_num, inode_t *inode,
+-                            struct iatt *buf) {
+-  int list_index = 0;
+-  char block_bname[256] = {
+-      0,
+-  };
+-  uuid_t gfid = {
+-      0,
+-  };
+-  inode_t *linked_inode = NULL;
+-  xlator_t *this = NULL;
+-  inode_t *fsync_inode = NULL;
+-  shard_priv_t *priv = NULL;
+-  inode_t *base_inode = NULL;
+-
+-  this = THIS;
+-  priv = this->private;
+-  if (local->loc.inode) {
+-    gf_uuid_copy(gfid, local->loc.inode->gfid);
+-    base_inode = local->loc.inode;
+-  } else if (local->resolver_base_inode) {
+-    gf_uuid_copy(gfid, local->resolver_base_inode->gfid);
+-    base_inode = local->resolver_base_inode;
+-  } else {
+-    gf_uuid_copy(gfid, local->base_gfid);
+-  }
+-
+-  shard_make_block_bname(block_num, gfid, block_bname, sizeof(block_bname));
+-
+-  shard_inode_ctx_set(inode, this, buf, 0, SHARD_LOOKUP_MASK);
+-  linked_inode = inode_link(inode, priv->dot_shard_inode, block_bname, buf);
+-  inode_lookup(linked_inode);
+-  list_index = block_num - local->first_block;
+-  local->inode_list[list_index] = linked_inode;
+-
+-  LOCK(&priv->lock);
+-  {
+-    fsync_inode = __shard_update_shards_inode_list(linked_inode, this,
+-                                                   base_inode, block_num, gfid);
+-  }
+-  UNLOCK(&priv->lock);
+-  if (fsync_inode)
+-    shard_initiate_evicted_inode_fsync(this, fsync_inode);
+-}
+-
+-int shard_common_lookup_shards_cbk(call_frame_t *frame, void *cookie,
+-                                   xlator_t *this, int32_t op_ret,
+-                                   int32_t op_errno, inode_t *inode,
+-                                   struct iatt *buf, dict_t *xdata,
+-                                   struct iatt *postparent) {
+-  int call_count = 0;
+-  int shard_block_num = (long)cookie;
+-  uuid_t gfid = {
+-      0,
+-  };
+-  shard_local_t *local = NULL;
+-
+-  local = frame->local;
+-  if (local->resolver_base_inode)
+-    gf_uuid_copy(gfid, local->resolver_base_inode->gfid);
+-  else
+-    gf_uuid_copy(gfid, local->base_gfid);
+-
+-  if (op_ret < 0) {
+-    /* Ignore absence of shards in the backend in truncate fop. */
+-    switch (local->fop) {
+-    case GF_FOP_TRUNCATE:
+-    case GF_FOP_FTRUNCATE:
+-    case GF_FOP_RENAME:
+-    case GF_FOP_UNLINK:
+-      if (op_errno == ENOENT)
+-        goto done;
+-      break;
+-    case GF_FOP_WRITE:
+-    case GF_FOP_READ:
+-    case GF_FOP_ZEROFILL:
+-    case GF_FOP_DISCARD:
+-    case GF_FOP_FALLOCATE:
+-      if ((!local->first_lookup_done) && (op_errno == ENOENT)) {
+-        LOCK(&frame->lock);
+-        { local->create_count++; }
+-        UNLOCK(&frame->lock);
+-        goto done;
+-      }
+-      break;
+-    default:
+-      break;
++    if (op_ret < 0) {
++        gf_msg(this->name, GF_LOG_ERROR, op_errno,
++               SHARD_MSG_BASE_FILE_LOOKUP_FAILED,
++               "Lookup on base file"
++               " failed : %s",
++               loc_gfid_utoa(&(local->loc)));
++        local->op_ret = op_ret;
++        local->op_errno = op_errno;
++        goto unwind;
+     }
+ 
+-    /* else */
+-    gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_LOOKUP_SHARD_FAILED,
+-           "Lookup on shard %d "
+-           "failed. Base file gfid = %s",
+-           shard_block_num, uuid_utoa(gfid));
+-    local->op_ret = op_ret;
+-    local->op_errno = op_errno;
+-    goto done;
+-  }
+-
+-  shard_link_block_inode(local, shard_block_num, inode, buf);
+-
+-done:
+-  if (local->lookup_shards_barriered) {
+-    syncbarrier_wake(&local->barrier);
+-    return 0;
+-  } else {
+-    call_count = shard_call_count_return(frame);
+-    if (call_count == 0) {
+-      if (!local->first_lookup_done)
+-        local->first_lookup_done = _gf_true;
+-      local->pls_fop_handler(frame, this);
++    local->prebuf = *buf;
++    if (shard_modify_size_and_block_count(&local->prebuf, xdata)) {
++        local->op_ret = -1;
++        local->op_errno = EINVAL;
++        goto unwind;
+     }
+-  }
+-  return 0;
+-}
+ 
+-dict_t *shard_create_gfid_dict(dict_t *dict) {
+-  int ret = 0;
+-  dict_t *new = NULL;
+-  unsigned char *gfid = NULL;
++    if (shard_inode_ctx_get_all(inode, this, &ctx))
++        mask = SHARD_ALL_MASK;
+ 
+-  new = dict_copy_with_ref(dict, NULL);
+-  if (!new)
+-    return NULL;
++    ret = shard_inode_ctx_set(inode, this, &local->prebuf, 0,
++                              (mask | SHARD_MASK_REFRESH_RESET));
++    if (ret) {
++        gf_msg(this->name, GF_LOG_ERROR, SHARD_MSG_INODE_CTX_SET_FAILED, 0,
++               "Failed to set inode"
++               " write params into inode ctx for %s",
++               uuid_utoa(buf->ia_gfid));
++        local->op_ret = -1;
++        local->op_errno = ENOMEM;
++        goto unwind;
++    }
+ 
+-  gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_char);
+-  if (!gfid) {
+-    ret = -1;
+-    goto out;
+-  }
++unwind:
++    local->handler(frame, this);
++    return 0;
++}
+ 
+-  gf_uuid_generate(gfid);
++int
++shard_lookup_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc,
++                       shard_post_fop_handler_t handler)
++{
++    int ret = -1;
++    shard_local_t *local = NULL;
++    dict_t *xattr_req = NULL;
++    gf_boolean_t need_refresh = _gf_false;
+ 
+-  ret = dict_set_gfuuid(new, "gfid-req", gfid, false);
++    local = frame->local;
++    local->handler = handler;
+ 
+-out:
+-  if (ret) {
+-    dict_unref(new);
+-    new = NULL;
+-    GF_FREE(gfid);
+-  }
+-
+-  return new;
+-}
+-
+-int shard_common_lookup_shards(call_frame_t *frame, xlator_t *this,
+-                               inode_t *inode,
+-                               shard_post_lookup_shards_fop_handler_t handler) {
+-  int i = 0;
+-  int ret = 0;
+-  int count = 0;
+-  int call_count = 0;
+-  int32_t shard_idx_iter = 0;
+-  int last_block = 0;
+-  char path[PATH_MAX] = {
+-      0,
+-  };
+-  char *bname = NULL;
+-  uuid_t gfid = {
+-      0,
+-  };
+-  loc_t loc = {
+-      0,
+-  };
+-  shard_local_t *local = NULL;
+-  shard_priv_t *priv = NULL;
+-  gf_boolean_t wind_failed = _gf_false;
+-  dict_t *xattr_req = NULL;
+-
+-  priv = this->private;
+-  local = frame->local;
+-  count = call_count = local->call_count;
+-  shard_idx_iter = local->first_block;
+-  last_block = local->last_block;
+-  local->pls_fop_handler = handler;
+-  if (local->lookup_shards_barriered)
+-    local->barrier.waitfor = local->call_count;
+-
+-  if (inode)
+-    gf_uuid_copy(gfid, inode->gfid);
+-  else
+-    gf_uuid_copy(gfid, local->base_gfid);
+-
+-  while (shard_idx_iter <= last_block) {
+-    if (local->inode_list[i]) {
+-      i++;
+-      shard_idx_iter++;
+-      continue;
+-    }
+-
+-    if (wind_failed) {
+-      shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter, this,
+-                                     -1, ENOMEM, NULL, NULL, NULL, NULL);
+-      goto next;
+-    }
+-
+-    shard_make_block_abspath(shard_idx_iter, gfid, path, sizeof(path));
+-
+-    bname = strrchr(path, '/') + 1;
+-    loc.inode = inode_new(this->itable);
+-    loc.parent = inode_ref(priv->dot_shard_inode);
+-    gf_uuid_copy(loc.pargfid, priv->dot_shard_gfid);
+-    ret = inode_path(loc.parent, bname, (char **)&(loc.path));
+-    if (ret < 0 || !(loc.inode)) {
+-      gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+-             "Inode path failed"
+-             " on %s, base file gfid = %s",
+-             bname, uuid_utoa(gfid));
+-      local->op_ret = -1;
+-      local->op_errno = ENOMEM;
+-      loc_wipe(&loc);
+-      wind_failed = _gf_true;
+-      shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter, this,
+-                                     -1, ENOMEM, NULL, NULL, NULL, NULL);
+-      goto next;
++    ret = shard_inode_ctx_fill_iatt_from_cache(loc->inode, this, &local->prebuf,
++                                               &need_refresh);
++    /* By this time, inode ctx should have been created either in create,
++     * mknod, readdirp or lookup. If not it is a bug!
++     */
++    if ((ret == 0) && (need_refresh == _gf_false)) {
++        gf_msg_debug(this->name, 0,
++                     "Skipping lookup on base file: %s"
++                     "Serving prebuf off the inode ctx cache",
++                     uuid_utoa(loc->gfid));
++        goto out;
+     }
+ 
+-    loc.name = strrchr(loc.path, '/');
+-    if (loc.name)
+-      loc.name++;
+-
+-    xattr_req = shard_create_gfid_dict(local->xattr_req);
++    xattr_req = dict_new();
+     if (!xattr_req) {
+-      local->op_ret = -1;
+-      local->op_errno = ENOMEM;
+-      wind_failed = _gf_true;
+-      loc_wipe(&loc);
+-      shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter, this,
+-                                     -1, ENOMEM, NULL, NULL, NULL, NULL);
+-      goto next;
+-    }
+-
+-    STACK_WIND_COOKIE(frame, shard_common_lookup_shards_cbk,
+-                      (void *)(long)shard_idx_iter, FIRST_CHILD(this),
+-                      FIRST_CHILD(this)->fops->lookup, &loc, xattr_req);
+-    loc_wipe(&loc);
+-    dict_unref(xattr_req);
+-  next:
+-    shard_idx_iter++;
+-    i++;
+-
+-    if (!--call_count)
+-      break;
+-  }
+-  if (local->lookup_shards_barriered) {
+-    syncbarrier_wait(&local->barrier, count);
+-    local->pls_fop_handler(frame, this);
+-  }
+-  return 0;
+-}
+-
+-int shard_post_resolve_truncate_handler(call_frame_t *frame, xlator_t *this) {
+-  shard_local_t *local = NULL;
+-
+-  local = frame->local;
+-
+-  if (local->op_ret < 0) {
+-    if (local->op_errno == ENOENT) {
+-      /* If lookup on /.shard fails with ENOENT, it means that
+-       * the file was 0-byte in size but truncated sometime in
+-       * the past to a higher size which is reflected in the
+-       * size xattr, and now being truncated to a lower size.
+-       * In this case, the only thing that needs to be done is
+-       * to update the size xattr of the file and unwind.
+-       */
+-      local->first_block = local->last_block = 0;
+-      local->num_blocks = 1;
+-      local->call_count = 0;
+-      local->op_ret = 0;
+-      local->postbuf.ia_size = local->offset;
+-      shard_update_file_size(frame, this, local->fd, &local->loc,
+-                             shard_post_update_size_truncate_handler);
+-      return 0;
+-    } else {
+-      shard_common_failure_unwind(local->fop, frame, local->op_ret,
+-                                  local->op_errno);
+-      return 0;
++        local->op_ret = -1;
++        local->op_errno = ENOMEM;
++        goto out;
+     }
+-  }
+ 
+-  if (!local->call_count)
+-    shard_truncate_do(frame, this);
+-  else
+-    shard_common_lookup_shards(frame, this, local->loc.inode,
+-                               shard_post_lookup_shards_truncate_handler);
+-
+-  return 0;
+-}
+-
+-int shard_truncate_begin(call_frame_t *frame, xlator_t *this) {
+-  int ret = 0;
+-  shard_local_t *local = NULL;
+-  shard_priv_t *priv = NULL;
+-
+-  priv = this->private;
+-  local = frame->local;
+-
+-  /* First participant block here is the lowest numbered block that would
+-   * hold the last byte of the file post successful truncation.
+-   * Last participant block is the block that contains the last byte in
+-   * the current state of the file.
+-   * If (first block == last_block):
+-   *         then that means that the file only needs truncation of the
+-   *         first (or last since both are same) block.
+-   * Else
+-   *         if (new_size % block_size == 0)
+-   *                 then that means there is no truncate to be done with
+-   *                 only shards from first_block + 1 through the last
+-   *                 block needing to be unlinked.
+-   *         else
+-   *                 both truncate of the first block and unlink of the
+-   *                 remaining shards until end of file is required.
+-   */
+-  local->first_block =
+-      (local->offset == 0) ? 0 : get_lowest_block(local->offset - 1,
+-                                                  local->block_size);
+-  local->last_block =
+-      get_highest_block(0, local->prebuf.ia_size, local->block_size);
+-
+-  local->num_blocks = local->last_block - local->first_block + 1;
+-  GF_ASSERT(local->num_blocks > 0);
+-  local->resolver_base_inode =
+-      (local->fop == GF_FOP_TRUNCATE) ? local->loc.inode : local->fd->inode;
+-
+-  if ((local->first_block == 0) && (local->num_blocks == 1)) {
+-    if (local->fop == GF_FOP_TRUNCATE)
+-      STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this),
+-                 FIRST_CHILD(this)->fops->truncate, &local->loc, local->offset,
+-                 local->xattr_req);
+-    else
+-      STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this),
+-                 FIRST_CHILD(this)->fops->ftruncate, local->fd, local->offset,
+-                 local->xattr_req);
+-    return 0;
+-  }
++    SHARD_MD_READ_FOP_INIT_REQ_DICT(this, xattr_req, loc->gfid, local, out);
+ 
+-  local->inode_list =
+-      GF_CALLOC(local->num_blocks, sizeof(inode_t *), gf_shard_mt_inode_list);
+-  if (!local->inode_list)
+-    goto err;
++    STACK_WIND(frame, shard_lookup_base_file_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
+ 
+-  local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid);
+-  if (!local->dot_shard_loc.inode) {
+-    ret =
+-        shard_init_internal_dir_loc(this, local, SHARD_INTERNAL_DIR_DOT_SHARD);
+-    if (ret)
+-      goto err;
+-    shard_lookup_internal_dir(frame, this, shard_post_resolve_truncate_handler,
+-                              SHARD_INTERNAL_DIR_DOT_SHARD);
+-  } else {
+-    local->post_res_handler = shard_post_resolve_truncate_handler;
+-    shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD);
+-  }
+-  return 0;
++    dict_unref(xattr_req);
++    return 0;
+ 
+-err:
+-  shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
+-  return 0;
++out:
++    if (xattr_req)
++        dict_unref(xattr_req);
++    handler(frame, this);
++    return 0;
+ }
+ 
+-int shard_post_lookup_truncate_handler(call_frame_t *frame, xlator_t *this) {
+-  shard_local_t *local = NULL;
+-  struct iatt tmp_stbuf = {
+-      0,
+-  };
+-
+-  local = frame->local;
++int
++shard_post_fstat_handler(call_frame_t *frame, xlator_t *this)
++{
++    shard_local_t *local = NULL;
+ 
+-  if (local->op_ret < 0) {
+-    shard_common_failure_unwind(local->fop, frame, local->op_ret,
+-                                local->op_errno);
+-    return 0;
+-  }
++    local = frame->local;
+ 
+-  local->postbuf = tmp_stbuf = local->prebuf;
++    if (local->op_ret >= 0)
++        shard_inode_ctx_set(local->fd->inode, this, &local->prebuf, 0,
++                            SHARD_LOOKUP_MASK);
+ 
+-  if (local->prebuf.ia_size == local->offset) {
+-    /* If the file size is same as requested size, unwind the call
+-     * immediately.
+-     */
+-    if (local->fop == GF_FOP_TRUNCATE)
+-      SHARD_STACK_UNWIND(truncate, frame, 0, 0, &local->prebuf, &local->postbuf,
+-                         NULL);
+-    else
+-      SHARD_STACK_UNWIND(ftruncate, frame, 0, 0, &local->prebuf,
+-                         &local->postbuf, NULL);
+-  } else if (local->offset > local->prebuf.ia_size) {
+-    /* If the truncate is from a lower to a higher size, set the
+-     * new size xattr and unwind.
+-     */
+-    local->hole_size = local->offset - local->prebuf.ia_size;
+-    local->delta_size = 0;
+-    GF_ATOMIC_INIT(local->delta_blocks, 0);
+-    local->postbuf.ia_size = local->offset;
+-    tmp_stbuf.ia_size = local->offset;
+-    shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0,
+-                        SHARD_INODE_WRITE_MASK);
+-    shard_update_file_size(frame, this, NULL, &local->loc,
+-                           shard_post_update_size_truncate_handler);
+-  } else {
+-    /* ... else
+-     * i.   unlink all shards that need to be unlinked.
+-     * ii.  truncate the last of the shards.
+-     * iii. update the new size using setxattr.
+-     * and unwind the fop.
+-     */
+-    local->hole_size = 0;
+-    local->delta_size = (local->offset - local->prebuf.ia_size);
+-    GF_ATOMIC_INIT(local->delta_blocks, 0);
+-    tmp_stbuf.ia_size = local->offset;
+-    shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0,
+-                        SHARD_INODE_WRITE_MASK);
+-    shard_truncate_begin(frame, this);
+-  }
+-  return 0;
++    SHARD_STACK_UNWIND(fstat, frame, local->op_ret, local->op_errno,
++                       &local->prebuf, local->xattr_rsp);
++    return 0;
+ }
+ 
+-/* TO-DO:
+- * Fix updates to size and block count with racing write(s) and truncate(s).
+- */
++int
++shard_post_stat_handler(call_frame_t *frame, xlator_t *this)
++{
++    shard_local_t *local = NULL;
+ 
+-int shard_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc,
+-                   off_t offset, dict_t *xdata) {
+-  int ret = -1;
+-  uint64_t block_size = 0;
+-  shard_local_t *local = NULL;
++    local = frame->local;
+ 
+-  ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size);
+-  if (ret) {
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+-           "Failed to get block "
+-           "size from inode ctx of %s",
+-           uuid_utoa(loc->inode->gfid));
+-    goto err;
+-  }
++    if (local->op_ret >= 0)
++        shard_inode_ctx_set(local->loc.inode, this, &local->prebuf, 0,
++                            SHARD_LOOKUP_MASK);
+ 
+-  if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+-    STACK_WIND(frame, default_truncate_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
++    SHARD_STACK_UNWIND(stat, frame, local->op_ret, local->op_errno,
++                       &local->prebuf, local->xattr_rsp);
+     return 0;
+-  }
+-
+-  if (!this->itable)
+-    this->itable = loc->inode->table;
++}
+ 
+-  local = mem_get0(this->local_pool);
+-  if (!local)
+-    goto err;
+-
+-  frame->local = local;
+-
+-  ret = syncbarrier_init(&local->barrier);
+-  if (ret)
+-    goto err;
+-  loc_copy(&local->loc, loc);
+-  local->offset = offset;
+-  local->block_size = block_size;
+-  local->fop = GF_FOP_TRUNCATE;
+-  local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+-  if (!local->xattr_req)
+-    goto err;
+-  local->resolver_base_inode = loc->inode;
+-  GF_ATOMIC_INIT(local->delta_blocks, 0);
+-
+-  shard_lookup_base_file(frame, this, &local->loc,
+-                         shard_post_lookup_truncate_handler);
+-  return 0;
++int
++shard_common_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                      int32_t op_ret, int32_t op_errno, struct iatt *buf,
++                      dict_t *xdata)
++{
++    inode_t *inode = NULL;
++    shard_local_t *local = NULL;
+ 
+-err:
+-  shard_common_failure_unwind(GF_FOP_TRUNCATE, frame, -1, ENOMEM);
+-  return 0;
+-}
+-
+-int shard_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+-                    dict_t *xdata) {
+-  int ret = -1;
+-  uint64_t block_size = 0;
+-  shard_local_t *local = NULL;
+-
+-  ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size);
+-  if (ret) {
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+-           "Failed to get block "
+-           "size from inode ctx of %s",
+-           uuid_utoa(fd->inode->gfid));
+-    goto err;
+-  }
+-
+-  if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+-    STACK_WIND(frame, default_ftruncate_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
+-    return 0;
+-  }
+-
+-  if (!this->itable)
+-    this->itable = fd->inode->table;
+-
+-  local = mem_get0(this->local_pool);
+-  if (!local)
+-    goto err;
+-
+-  frame->local = local;
+-  ret = syncbarrier_init(&local->barrier);
+-  if (ret)
+-    goto err;
+-  local->fd = fd_ref(fd);
+-  local->offset = offset;
+-  local->block_size = block_size;
+-  local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+-  if (!local->xattr_req)
+-    goto err;
+-  local->fop = GF_FOP_FTRUNCATE;
+-
+-  local->loc.inode = inode_ref(fd->inode);
+-  gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
+-  local->resolver_base_inode = fd->inode;
+-  GF_ATOMIC_INIT(local->delta_blocks, 0);
+-
+-  shard_lookup_base_file(frame, this, &local->loc,
+-                         shard_post_lookup_truncate_handler);
+-  return 0;
+-err:
+-  shard_common_failure_unwind(GF_FOP_FTRUNCATE, frame, -1, ENOMEM);
+-  return 0;
+-}
++    local = frame->local;
+ 
+-int shard_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                    int32_t op_ret, int32_t op_errno, inode_t *inode,
+-                    struct iatt *buf, struct iatt *preparent,
+-                    struct iatt *postparent, dict_t *xdata) {
+-  int ret = -1;
+-  shard_local_t *local = NULL;
++    if (op_ret < 0) {
++        gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_STAT_FAILED,
++               "stat failed: %s",
++               local->fd ? uuid_utoa(local->fd->inode->gfid)
++                         : uuid_utoa((local->loc.inode)->gfid));
++        local->op_ret = op_ret;
++        local->op_errno = op_errno;
++        goto unwind;
++    }
+ 
+-  local = frame->local;
++    local->prebuf = *buf;
++    if (shard_modify_size_and_block_count(&local->prebuf, xdata)) {
++        local->op_ret = -1;
++        local->op_errno = EINVAL;
++        goto unwind;
++    }
++    local->xattr_rsp = dict_ref(xdata);
+ 
+-  if (op_ret == -1)
+-    goto unwind;
++    if (local->loc.inode)
++        inode = local->loc.inode;
++    else
++        inode = local->fd->inode;
+ 
+-  ret =
+-      shard_inode_ctx_set(inode, this, buf, local->block_size, SHARD_ALL_MASK);
+-  if (ret)
+-    gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INODE_CTX_SET_FAILED,
+-           "Failed to set inode "
+-           "ctx for %s",
+-           uuid_utoa(inode->gfid));
++    shard_inode_ctx_invalidate(inode, this, &local->prebuf);
+ 
+ unwind:
+-  SHARD_STACK_UNWIND(mknod, frame, op_ret, op_errno, inode, buf, preparent,
+-                     postparent, xdata);
+-
+-  return 0;
++    local->handler(frame, this);
++    return 0;
+ }
+ 
+-int shard_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+-                dev_t rdev, mode_t umask, dict_t *xdata) {
+-  shard_priv_t *priv = NULL;
+-  shard_local_t *local = NULL;
++int
++shard_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
++{
++    int ret = -1;
++    uint64_t block_size = 0;
++    shard_local_t *local = NULL;
++
++    if ((IA_ISDIR(loc->inode->ia_type)) || (IA_ISLNK(loc->inode->ia_type))) {
++        STACK_WIND(frame, default_stat_cbk, FIRST_CHILD(this),
++                   FIRST_CHILD(this)->fops->stat, loc, xdata);
++        return 0;
++    }
+ 
+-  priv = this->private;
+-  local = mem_get0(this->local_pool);
+-  if (!local)
+-    goto err;
++    ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size);
++    if (ret) {
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
++               "Failed to get block "
++               "size from inode ctx of %s",
++               uuid_utoa(loc->inode->gfid));
++        goto err;
++    }
+ 
+-  frame->local = local;
+-  local->block_size = priv->block_size;
+-  if (!__is_gsyncd_on_shard_dir(frame, loc)) {
+-    SHARD_INODE_CREATE_INIT(this, local->block_size, xdata, loc, 0, 0, err);
+-  }
++    if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
++        STACK_WIND(frame, default_stat_cbk, FIRST_CHILD(this),
++                   FIRST_CHILD(this)->fops->stat, loc, xdata);
++        return 0;
++    }
+ 
+-  STACK_WIND(frame, shard_mknod_cbk, FIRST_CHILD(this),
+-             FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata);
+-  return 0;
+-err:
+-  shard_common_failure_unwind(GF_FOP_MKNOD, frame, -1, ENOMEM);
+-  return 0;
+-}
++    local = mem_get0(this->local_pool);
++    if (!local)
++        goto err;
+ 
+-int32_t shard_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                       int32_t op_ret, int32_t op_errno, inode_t *inode,
+-                       struct iatt *buf, struct iatt *preparent,
+-                       struct iatt *postparent, dict_t *xdata) {
+-  shard_local_t *local = NULL;
++    frame->local = local;
+ 
+-  local = frame->local;
+-  if (op_ret < 0)
+-    goto err;
++    local->handler = shard_post_stat_handler;
++    loc_copy(&local->loc, loc);
++    local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
++    if (!local->xattr_req)
++        goto err;
+ 
+-  shard_inode_ctx_set(inode, this, buf, 0, SHARD_MASK_NLINK | SHARD_MASK_TIMES);
+-  buf->ia_size = local->prebuf.ia_size;
+-  buf->ia_blocks = local->prebuf.ia_blocks;
++    SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, local->loc.gfid,
++                                    local, err);
+ 
+-  SHARD_STACK_UNWIND(link, frame, op_ret, op_errno, inode, buf, preparent,
+-                     postparent, xdata);
+-  return 0;
++    STACK_WIND(frame, shard_common_stat_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->stat, loc, local->xattr_req);
++    return 0;
+ err:
+-  shard_common_failure_unwind(GF_FOP_LINK, frame, op_ret, op_errno);
+-  return 0;
++    shard_common_failure_unwind(GF_FOP_STAT, frame, -1, ENOMEM);
++    return 0;
+ }
+ 
+-int shard_post_lookup_link_handler(call_frame_t *frame, xlator_t *this) {
+-  shard_local_t *local = NULL;
+-
+-  local = frame->local;
+-
+-  if (local->op_ret < 0) {
+-    SHARD_STACK_UNWIND(link, frame, local->op_ret, local->op_errno, NULL, NULL,
+-                       NULL, NULL, NULL);
+-    return 0;
+-  }
++int
++shard_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
++{
++    int ret = -1;
++    uint64_t block_size = 0;
++    shard_local_t *local = NULL;
+ 
+-  STACK_WIND(frame, shard_link_cbk, FIRST_CHILD(this),
+-             FIRST_CHILD(this)->fops->link, &local->loc, &local->loc2,
+-             local->xattr_req);
+-  return 0;
+-}
++    if ((IA_ISDIR(fd->inode->ia_type)) || (IA_ISLNK(fd->inode->ia_type))) {
++        STACK_WIND(frame, default_fstat_cbk, FIRST_CHILD(this),
++                   FIRST_CHILD(this)->fops->fstat, fd, xdata);
++        return 0;
++    }
+ 
+-int32_t shard_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc,
+-                   loc_t *newloc, dict_t *xdata) {
+-  int ret = -1;
+-  uint64_t block_size = 0;
+-  shard_local_t *local = NULL;
++    ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size);
++    if (ret) {
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
++               "Failed to get block "
++               "size from inode ctx of %s",
++               uuid_utoa(fd->inode->gfid));
++        goto err;
++    }
+ 
+-  ret = shard_inode_ctx_get_block_size(oldloc->inode, this, &block_size);
+-  if (ret) {
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+-           "Failed to get block "
+-           "size from inode ctx of %s",
+-           uuid_utoa(oldloc->inode->gfid));
+-    goto err;
+-  }
++    if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
++        STACK_WIND(frame, default_fstat_cbk, FIRST_CHILD(this),
++                   FIRST_CHILD(this)->fops->fstat, fd, xdata);
++        return 0;
++    }
+ 
+-  if (!block_size) {
+-    STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->link,
+-                    oldloc, newloc, xdata);
+-    return 0;
+-  }
++    if (!this->itable)
++        this->itable = fd->inode->table;
+ 
+-  if (!this->itable)
+-    this->itable = oldloc->inode->table;
++    local = mem_get0(this->local_pool);
++    if (!local)
++        goto err;
+ 
+-  local = mem_get0(this->local_pool);
+-  if (!local)
+-    goto err;
++    frame->local = local;
+ 
+-  frame->local = local;
++    local->handler = shard_post_fstat_handler;
++    local->fd = fd_ref(fd);
++    local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
++    if (!local->xattr_req)
++        goto err;
+ 
+-  loc_copy(&local->loc, oldloc);
+-  loc_copy(&local->loc2, newloc);
+-  local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+-  if (!local->xattr_req)
+-    goto err;
++    SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid,
++                                    local, err);
+ 
+-  shard_lookup_base_file(frame, this, &local->loc,
+-                         shard_post_lookup_link_handler);
+-  return 0;
++    STACK_WIND(frame, shard_common_stat_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->fstat, fd, local->xattr_req);
++    return 0;
+ err:
+-  shard_common_failure_unwind(GF_FOP_LINK, frame, -1, ENOMEM);
+-  return 0;
++    shard_common_failure_unwind(GF_FOP_FSTAT, frame, -1, ENOMEM);
++    return 0;
+ }
+ 
+-int shard_unlink_shards_do(call_frame_t *frame, xlator_t *this, inode_t *inode);
+-
+-int shard_post_lookup_shards_unlink_handler(call_frame_t *frame,
+-                                            xlator_t *this) {
+-  shard_local_t *local = NULL;
+-    uuid_t gfid = {
+-        0,
+-    };
++int
++shard_post_update_size_truncate_handler(call_frame_t *frame, xlator_t *this)
++{
++    shard_local_t *local = NULL;
+ 
+-  local = frame->local;
++    local = frame->local;
+ 
+-    if (local->resolver_base_inode)
+-        gf_uuid_copy(gfid, local->resolver_base_inode->gfid);
++    if (local->fop == GF_FOP_TRUNCATE)
++        SHARD_STACK_UNWIND(truncate, frame, local->op_ret, local->op_errno,
++                           &local->prebuf, &local->postbuf, NULL);
+     else
+-        gf_uuid_copy(gfid, local->base_gfid);
+-
+-  if ((local->op_ret < 0) && (local->op_errno != ENOENT)) {
+-    gf_msg(this->name, GF_LOG_ERROR, local->op_errno, SHARD_MSG_FOP_FAILED,
+-           "failed to delete shards of %s", uuid_utoa(gfid));
++        SHARD_STACK_UNWIND(ftruncate, frame, local->op_ret, local->op_errno,
++                           &local->prebuf, &local->postbuf, NULL);
+     return 0;
+-  }
+-  local->op_ret = 0;
+-  local->op_errno = 0;
+-
+-  shard_unlink_shards_do(frame, this, local->resolver_base_inode);
+-  return 0;
+ }
+ 
+-int shard_post_resolve_unlink_handler(call_frame_t *frame, xlator_t *this) {
+-  shard_local_t *local = NULL;
+-
+-  local = frame->local;
+-  local->lookup_shards_barriered = _gf_true;
+-
+-  if (!local->call_count)
+-    shard_unlink_shards_do(frame, this, local->resolver_base_inode);
+-  else
+-    shard_common_lookup_shards(frame, this, local->resolver_base_inode,
+-                               shard_post_lookup_shards_unlink_handler);
+-  return 0;
+-}
+-
+-void shard_unlink_block_inode(shard_local_t *local, int shard_block_num) {
+-  char block_bname[256] = {
+-      0,
+-  };
+-  uuid_t gfid = {
+-      0,
+-  };
+-  inode_t *inode = NULL;
+-  inode_t *base_inode = NULL;
+-  xlator_t *this = NULL;
+-  shard_priv_t *priv = NULL;
+-  shard_inode_ctx_t *ctx = NULL;
+-  shard_inode_ctx_t *base_ictx = NULL;
+-  int unref_base_inode = 0;
+-  int unref_shard_inode = 0;
+-
+-  this = THIS;
+-  priv = this->private;
+-
+-  inode = local->inode_list[shard_block_num - local->first_block];
+-  shard_inode_ctx_get(inode, this, &ctx);
+-  base_inode = ctx->base_inode;
+-  if (base_inode)
+-    gf_uuid_copy(gfid, base_inode->gfid);
+-  else
+-    gf_uuid_copy(gfid, ctx->base_gfid);
+-  shard_make_block_bname(shard_block_num, gfid, block_bname,
+-                         sizeof(block_bname));
+-
+-  LOCK(&priv->lock);
+-  if (base_inode)
+-    LOCK(&base_inode->lock);
+-  LOCK(&inode->lock);
+-  {
+-    __shard_inode_ctx_get(inode, this, &ctx);
+-    if (!list_empty(&ctx->ilist)) {
+-      list_del_init(&ctx->ilist);
+-      priv->inode_count--;
+-      unref_base_inode++;
+-      unref_shard_inode++;
+-      GF_ASSERT(priv->inode_count >= 0);
+-    }
+-    if (ctx->fsync_needed) {
+-      unref_base_inode++;
+-      unref_shard_inode++;
+-      list_del_init(&ctx->to_fsync_list);
+-      if (base_inode) {
+-        __shard_inode_ctx_get(base_inode, this, &base_ictx);
+-        base_ictx->fsync_count--;
+-      }
+-    }
+-  }
+-  UNLOCK(&inode->lock);
+-  if (base_inode)
+-    UNLOCK(&base_inode->lock);
++int
++shard_truncate_last_shard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                              int32_t op_ret, int32_t op_errno,
++                              struct iatt *prebuf, struct iatt *postbuf,
++                              dict_t *xdata)
++{
++    inode_t *inode = NULL;
++    int64_t delta_blocks = 0;
++    shard_local_t *local = NULL;
+ 
+-  inode_unlink(inode, priv->dot_shard_inode, block_bname);
+-  inode_ref_reduce_by_n(inode, unref_shard_inode);
+-  inode_forget(inode, 0);
++    local = frame->local;
+ 
+-  if (base_inode && unref_base_inode)
+-    inode_ref_reduce_by_n(base_inode, unref_base_inode);
+-  UNLOCK(&priv->lock);
+-}
++    SHARD_UNSET_ROOT_FS_ID(frame, local);
+ 
+-int shard_rename_cbk(call_frame_t *frame, xlator_t *this) {
+-  shard_local_t *local = NULL;
++    inode = (local->fop == GF_FOP_TRUNCATE) ? local->loc.inode
++                                            : local->fd->inode;
++    if (op_ret < 0) {
++        gf_msg(this->name, GF_LOG_ERROR, op_errno,
++               SHARD_MSG_TRUNCATE_LAST_SHARD_FAILED,
++               "truncate on last"
++               " shard failed : %s",
++               uuid_utoa(inode->gfid));
++        local->op_ret = op_ret;
++        local->op_errno = op_errno;
++        goto err;
++    }
+ 
+-  local = frame->local;
++    local->postbuf.ia_size = local->offset;
++    /* Let the delta be negative. We want xattrop to do subtraction */
++    local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size;
++    delta_blocks = GF_ATOMIC_ADD(local->delta_blocks,
++                                 postbuf->ia_blocks - prebuf->ia_blocks);
++    GF_ASSERT(delta_blocks <= 0);
++    local->postbuf.ia_blocks += delta_blocks;
++    local->hole_size = 0;
+ 
+-  SHARD_STACK_UNWIND(rename, frame, local->op_ret, local->op_errno,
+-                     &local->prebuf, &local->preoldparent,
+-                     &local->postoldparent, &local->prenewparent,
+-                     &local->postnewparent, local->xattr_rsp);
+-  return 0;
++    shard_inode_ctx_set(inode, this, &local->postbuf, 0, SHARD_MASK_TIMES);
++    shard_update_file_size(frame, this, NULL, &local->loc,
++                           shard_post_update_size_truncate_handler);
++    return 0;
++err:
++    shard_common_failure_unwind(local->fop, frame, local->op_ret,
++                                local->op_errno);
++    return 0;
+ }
+ 
+-int32_t shard_unlink_cbk(call_frame_t *frame, xlator_t *this) {
+-  shard_local_t *local = frame->local;
++int
++shard_truncate_last_shard(call_frame_t *frame, xlator_t *this, inode_t *inode)
++{
++    size_t last_shard_size_after = 0;
++    loc_t loc = {
++        0,
++    };
++    shard_local_t *local = NULL;
+ 
+-  SHARD_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno,
+-                     &local->preoldparent, &local->postoldparent,
+-                     local->xattr_rsp);
+-  return 0;
+-}
++    local = frame->local;
+ 
+-int shard_unlink_shards_do_cbk(call_frame_t *frame, void *cookie,
+-                               xlator_t *this, int32_t op_ret, int32_t op_errno,
+-                               struct iatt *preparent, struct iatt *postparent,
+-                               dict_t *xdata) {
+-  int shard_block_num = (long)cookie;
+-  shard_local_t *local = NULL;
++    /* A NULL inode could be due to the fact that the last shard which
++     * needs to be truncated does not exist due to it lying in a hole
++     * region. So the only thing left to do in that case would be an
++     * update to file size xattr.
++     */
++    if (!inode) {
++        gf_msg_debug(this->name, 0,
++                     "Last shard to be truncated absent in backend:%" PRIu64
++                     " of gfid: %s. Directly proceeding to update file size",
++                     local->first_block, uuid_utoa(local->loc.inode->gfid));
++        shard_update_file_size(frame, this, NULL, &local->loc,
++                               shard_post_update_size_truncate_handler);
++        return 0;
++    }
+ 
+-  local = frame->local;
++    SHARD_SET_ROOT_FS_ID(frame, local);
+ 
+-  if (op_ret < 0) {
+-    local->op_ret = op_ret;
+-    local->op_errno = op_errno;
+-    goto done;
+-  }
++    loc.inode = inode_ref(inode);
++    gf_uuid_copy(loc.gfid, inode->gfid);
+ 
+-  shard_unlink_block_inode(local, shard_block_num);
+-done:
+-  syncbarrier_wake(&local->barrier);
+-  return 0;
+-}
+-
+-int shard_unlink_shards_do(call_frame_t *frame, xlator_t *this,
+-                           inode_t *inode) {
+-  int i = 0;
+-  int ret = -1;
+-  int count = 0;
+-  uint32_t cur_block = 0;
+-  uint32_t cur_block_idx = 0; /*this is idx into inode_list[] array */
+-  char *bname = NULL;
+-  char path[PATH_MAX] = {
+-      0,
+-  };
+-  uuid_t gfid = {
+-      0,
+-  };
+-  loc_t loc = {
+-      0,
+-  };
+-  gf_boolean_t wind_failed = _gf_false;
+-  shard_local_t *local = NULL;
+-  shard_priv_t *priv = NULL;
+-
+-  priv = this->private;
+-  local = frame->local;
+-
+-  if (inode)
+-    gf_uuid_copy(gfid, inode->gfid);
+-  else
+-    gf_uuid_copy(gfid, local->base_gfid);
+-
+-  for (i = 0; i < local->num_blocks; i++) {
+-    if (!local->inode_list[i])
+-      continue;
+-    count++;
+-  }
+-
+-  if (!count) {
+-    /* callcount = 0 implies that all of the shards that need to be
+-     * unlinked are non-existent (in other words the file is full of
+-     * holes).
+-     */
+-    gf_msg_debug(this->name, 0, "All shards that need to be "
+-                                "unlinked are non-existent: %s",
+-                 uuid_utoa(gfid));
++    last_shard_size_after = (local->offset % local->block_size);
++
++    STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->truncate, &loc, last_shard_size_after,
++               NULL);
++    loc_wipe(&loc);
+     return 0;
+-  }
++}
+ 
+-  SHARD_SET_ROOT_FS_ID(frame, local);
+-  local->barrier.waitfor = count;
+-  cur_block = cur_block_idx + local->first_block;
++void
++shard_unlink_block_inode(shard_local_t *local, int shard_block_num);
+ 
+-  while (cur_block_idx < local->num_blocks) {
+-    if (!local->inode_list[cur_block_idx])
+-      goto next;
++int
++shard_truncate_htol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                        int32_t op_ret, int32_t op_errno,
++                        struct iatt *preparent, struct iatt *postparent,
++                        dict_t *xdata)
++{
++    int ret = 0;
++    int call_count = 0;
++    int shard_block_num = (long)cookie;
++    uint64_t block_count = 0;
++    shard_local_t *local = NULL;
++
++    local = frame->local;
+ 
+-    if (wind_failed) {
+-      shard_unlink_shards_do_cbk(frame, (void *)(long)cur_block, this, -1,
+-                                 ENOMEM, NULL, NULL, NULL);
+-      goto next;
++    if (op_ret < 0) {
++        local->op_ret = op_ret;
++        local->op_errno = op_errno;
++        goto done;
++    }
++    ret = dict_get_uint64(xdata, GF_GET_FILE_BLOCK_COUNT, &block_count);
++    if (!ret) {
++        GF_ATOMIC_SUB(local->delta_blocks, block_count);
++    } else {
++        /* dict_get failed possibly due to a heterogeneous cluster? */
++        gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
++               "Failed to get key %s from dict during truncate of gfid %s",
++               GF_GET_FILE_BLOCK_COUNT,
++               uuid_utoa(local->resolver_base_inode->gfid));
+     }
+ 
+-    shard_make_block_abspath(cur_block, gfid, path, sizeof(path));
+-    bname = strrchr(path, '/') + 1;
+-    loc.parent = inode_ref(priv->dot_shard_inode);
+-    ret = inode_path(loc.parent, bname, (char **)&(loc.path));
+-    if (ret < 0) {
+-      gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+-             "Inode path failed"
+-             " on %s, base file gfid = %s",
+-             bname, uuid_utoa(gfid));
+-      local->op_ret = -1;
+-      local->op_errno = ENOMEM;
+-      loc_wipe(&loc);
+-      wind_failed = _gf_true;
+-      shard_unlink_shards_do_cbk(frame, (void *)(long)cur_block, this, -1,
+-                                 ENOMEM, NULL, NULL, NULL);
+-      goto next;
++    shard_unlink_block_inode(local, shard_block_num);
++done:
++    call_count = shard_call_count_return(frame);
++    if (call_count == 0) {
++        SHARD_UNSET_ROOT_FS_ID(frame, local);
++        shard_truncate_last_shard(frame, this, local->inode_list[0]);
+     }
++    return 0;
++}
+ 
+-    loc.name = strrchr(loc.path, '/');
+-    if (loc.name)
+-      loc.name++;
+-    loc.inode = inode_ref(local->inode_list[cur_block_idx]);
++int
++shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode)
++{
++    int i = 1;
++    int ret = -1;
++    int call_count = 0;
++    uint32_t cur_block = 0;
++    uint32_t last_block = 0;
++    char path[PATH_MAX] = {
++        0,
++    };
++    char *bname = NULL;
++    loc_t loc = {
++        0,
++    };
++    gf_boolean_t wind_failed = _gf_false;
++    shard_local_t *local = NULL;
++    shard_priv_t *priv = NULL;
++    dict_t *xdata_req = NULL;
+ 
+-    STACK_WIND_COOKIE(frame, shard_unlink_shards_do_cbk,
+-                      (void *)(long)cur_block, FIRST_CHILD(this),
+-                      FIRST_CHILD(this)->fops->unlink, &loc, local->xflag,
+-                      local->xattr_req);
+-    loc_wipe(&loc);
+-  next:
+-    cur_block++;
+-    cur_block_idx++;
+-  }
+-  syncbarrier_wait(&local->barrier, count);
+-  SHARD_UNSET_ROOT_FS_ID(frame, local);
+-  return 0;
+-}
+-
+-int shard_regulated_shards_deletion(call_frame_t *cleanup_frame, xlator_t *this,
+-                                    int now, int first_block,
+-                                    gf_dirent_t *entry) {
+-  int i = 0;
+-  int ret = 0;
+-  shard_local_t *local = NULL;
+-  uuid_t gfid = {
+-      0,
+-  };
+-
+-  local = cleanup_frame->local;
+-
+-  local->inode_list = GF_CALLOC(now, sizeof(inode_t *), gf_shard_mt_inode_list);
+-  if (!local->inode_list)
+-    return -ENOMEM;
+-
+-  local->first_block = first_block;
+-  local->last_block = first_block + now - 1;
+-  local->num_blocks = now;
+-  gf_uuid_parse(entry->d_name, gfid);
+-  gf_uuid_copy(local->base_gfid, gfid);
+-  local->resolver_base_inode = inode_find(this->itable, gfid);
+-  local->call_count = 0;
+-  ret = syncbarrier_init(&local->barrier);
+-  if (ret) {
+-    GF_FREE(local->inode_list);
+-    local->inode_list = NULL;
+-    inode_unref(local->resolver_base_inode);
+-    local->resolver_base_inode = NULL;
+-    return -errno;
+-  }
+-  shard_common_resolve_shards(cleanup_frame, this,
+-                              shard_post_resolve_unlink_handler);
+-
+-  for (i = 0; i < local->num_blocks; i++) {
+-    if (local->inode_list[i])
+-      inode_unref(local->inode_list[i]);
+-  }
+-  GF_FREE(local->inode_list);
+-  local->inode_list = NULL;
+-  if (local->op_ret)
+-    ret = -local->op_errno;
+-  syncbarrier_destroy(&local->barrier);
+-  inode_unref(local->resolver_base_inode);
+-  local->resolver_base_inode = NULL;
+-  STACK_RESET(cleanup_frame->root);
+-  return ret;
+-}
+-
+-int __shard_delete_shards_of_entry(call_frame_t *cleanup_frame, xlator_t *this,
+-                                   gf_dirent_t *entry, inode_t *inode) {
+-  int ret = 0;
+-  int shard_count = 0;
+-  int first_block = 0;
+-  int now = 0;
+-  uint64_t size = 0;
+-  uint64_t block_size = 0;
+-  uint64_t size_array[4] = {
+-      0,
+-  };
+-  void *bsize = NULL;
+-  void *size_attr = NULL;
+-  dict_t *xattr_rsp = NULL;
+-  loc_t loc = {
+-      0,
+-  };
+-  shard_local_t *local = NULL;
+-  shard_priv_t *priv = NULL;
+-
+-  priv = this->private;
+-  local = cleanup_frame->local;
+-  ret = dict_reset(local->xattr_req);
+-  if (ret) {
+-    gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
+-           "Failed to reset dict");
+-    ret = -ENOMEM;
+-    goto err;
+-  }
+-
+-  ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0);
+-  if (ret) {
+-    gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
+-           "Failed to set dict value: key:%s", GF_XATTR_SHARD_BLOCK_SIZE);
+-    ret = -ENOMEM;
+-    goto err;
+-  }
+-
+-  ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_FILE_SIZE, 8 * 4);
+-  if (ret) {
+-    gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
+-           "Failed to set dict value: key:%s", GF_XATTR_SHARD_FILE_SIZE);
+-    ret = -ENOMEM;
+-    goto err;
+-  }
+-
+-  loc.inode = inode_ref(inode);
+-  loc.parent = inode_ref(priv->dot_shard_rm_inode);
+-  ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path));
+-  if (ret < 0) {
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+-           "Inode path  failed on %s", entry->d_name);
+-    ret = -ENOMEM;
+-    goto err;
+-  }
+-
+-  loc.name = strrchr(loc.path, '/');
+-  if (loc.name)
+-    loc.name++;
+-  ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, local->xattr_req,
+-                      &xattr_rsp);
+-  if (ret)
+-    goto err;
+-
+-  ret = dict_get_ptr(xattr_rsp, GF_XATTR_SHARD_BLOCK_SIZE, &bsize);
+-  if (ret) {
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
+-           "Failed to get dict value: key:%s", GF_XATTR_SHARD_BLOCK_SIZE);
+-    goto err;
+-  }
+-  block_size = ntoh64(*((uint64_t *)bsize));
+-
+-  ret = dict_get_ptr(xattr_rsp, GF_XATTR_SHARD_FILE_SIZE, &size_attr);
+-  if (ret) {
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
+-           "Failed to get dict value: key:%s", GF_XATTR_SHARD_FILE_SIZE);
+-    goto err;
+-  }
+-
+-  memcpy(size_array, size_attr, sizeof(size_array));
+-  size = ntoh64(size_array[0]);
+-
+-  shard_count = (size / block_size) - 1;
+-  if (shard_count < 0) {
+-    gf_msg_debug(this->name, 0, "Size of %s hasn't grown beyond "
+-                                "its shard-block-size. Nothing to delete. "
+-                                "Returning",
+-                 entry->d_name);
+-    /* File size < shard-block-size, so nothing to delete */
+-    ret = 0;
+-    goto delete_marker;
+-  }
+-  if ((size % block_size) > 0)
+-    shard_count++;
+-
+-  if (shard_count == 0) {
+-    gf_msg_debug(this->name, 0, "Size of %s is exactly equal to "
+-                                "its shard-block-size. Nothing to delete. "
+-                                "Returning",
+-                 entry->d_name);
+-    ret = 0;
+-    goto delete_marker;
+-  }
+-  gf_msg_debug(this->name, 0,
+-               "base file = %s, "
+-               "shard-block-size=%" PRIu64 ", file-size=%" PRIu64 ", "
+-               "shard_count=%d",
+-               entry->d_name, block_size, size, shard_count);
+-
+-  /* Perform a gfid-based lookup to see if gfid corresponding to marker
+-   * file's base name exists.
+-   */
+-  loc_wipe(&loc);
+-  loc.inode = inode_new(this->itable);
+-  if (!loc.inode) {
+-    ret = -ENOMEM;
+-    goto err;
+-  }
+-  gf_uuid_parse(entry->d_name, loc.gfid);
+-  ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, NULL, NULL);
+-  if (!ret) {
+-    gf_msg_debug(this->name, 0, "Base shard corresponding to gfid "
+-                                "%s is present. Skipping shard deletion. "
+-                                "Returning",
+-                 entry->d_name);
+-    ret = 0;
+-    goto delete_marker;
+-  }
++    local = frame->local;
++    priv = this->private;
+ 
+-  first_block = 1;
++    cur_block = local->first_block + 1;
++    last_block = local->last_block;
+ 
+-  while (shard_count) {
+-    if (shard_count < local->deletion_rate) {
+-      now = shard_count;
+-      shard_count = 0;
+-    } else {
+-      now = local->deletion_rate;
+-      shard_count -= local->deletion_rate;
++    /* Determine call count */
++    for (i = 1; i < local->num_blocks; i++) {
++        if (!local->inode_list[i])
++            continue;
++        call_count++;
+     }
+ 
+-    gf_msg_debug(this->name, 0, "deleting %d shards starting from "
+-                                "block %d of gfid %s",
+-                 now, first_block, entry->d_name);
+-    ret = shard_regulated_shards_deletion(cleanup_frame, this, now, first_block,
+-                                          entry);
+-    if (ret)
+-      goto err;
+-    first_block += now;
+-  }
++    if (!call_count) {
++        /* Call count = 0 implies that all of the shards that need to be
++         * unlinked do not exist. So shard xlator would now proceed to
++         * do the final truncate + size updates.
++         */
++        gf_msg_debug(this->name, 0,
++                     "Shards to be unlinked as part of "
++                     "truncate absent in backend: %s. Directly "
++                     "proceeding to update file size",
++                     uuid_utoa(inode->gfid));
++        local->postbuf.ia_size = local->offset;
++        local->postbuf.ia_blocks = local->prebuf.ia_blocks;
++        local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size;
++        GF_ATOMIC_INIT(local->delta_blocks, 0);
++        local->hole_size = 0;
++        shard_update_file_size(frame, this, local->fd, &local->loc,
++                               shard_post_update_size_truncate_handler);
++        return 0;
++    }
+ 
+-delete_marker:
+-  loc_wipe(&loc);
+-  loc.inode = inode_ref(inode);
+-  loc.parent = inode_ref(priv->dot_shard_rm_inode);
+-  ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path));
+-  if (ret < 0) {
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+-           "Inode path  failed on %s", entry->d_name);
+-    ret = -ENOMEM;
+-    goto err;
+-  }
+-  loc.name = strrchr(loc.path, '/');
+-  if (loc.name)
+-    loc.name++;
+-  ret = syncop_unlink(FIRST_CHILD(this), &loc, NULL, NULL);
+-  if (ret)
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_SHARDS_DELETION_FAILED,
+-           "Failed to delete %s "
+-           "from /%s",
+-           entry->d_name, GF_SHARD_REMOVE_ME_DIR);
+-err:
+-  if (xattr_rsp)
+-    dict_unref(xattr_rsp);
+-  loc_wipe(&loc);
+-  return ret;
+-}
+-
+-int shard_delete_shards_of_entry(call_frame_t *cleanup_frame, xlator_t *this,
+-                                 gf_dirent_t *entry, inode_t *inode) {
+-  int ret = -1;
+-  loc_t loc = {
+-      0,
+-  };
+-  shard_priv_t *priv = NULL;
+-
+-  priv = this->private;
+-  loc.inode = inode_ref(priv->dot_shard_rm_inode);
+-
+-  ret = syncop_entrylk(FIRST_CHILD(this), this->name, &loc, entry->d_name,
+-                       ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL, NULL);
+-  if (ret < 0) {
+-    if (ret == -EAGAIN) {
+-      ret = 0;
+-    }
+-    goto out;
+-  }
+-  { ret = __shard_delete_shards_of_entry(cleanup_frame, this, entry, inode); }
+-  syncop_entrylk(FIRST_CHILD(this), this->name, &loc, entry->d_name,
+-                 ENTRYLK_UNLOCK, ENTRYLK_WRLCK, NULL, NULL);
+-out:
+-  loc_wipe(&loc);
+-  return ret;
+-}
+-
+-int shard_delete_shards_cbk(int ret, call_frame_t *frame, void *data) {
+-  SHARD_STACK_DESTROY(frame);
+-  return 0;
+-}
+-
+-int shard_resolve_internal_dir(xlator_t *this, shard_local_t *local,
+-                               shard_internal_dir_type_t type) {
+-  int ret = 0;
+-  char *bname = NULL;
+-  loc_t *loc = NULL;
+-  shard_priv_t *priv = NULL;
+-  uuid_t gfid = {
+-      0,
+-  };
+-  struct iatt stbuf = {
+-      0,
+-  };
+-
+-  priv = this->private;
+-
+-  switch (type) {
+-  case SHARD_INTERNAL_DIR_DOT_SHARD:
+-    loc = &local->dot_shard_loc;
+-    gf_uuid_copy(gfid, priv->dot_shard_gfid);
+-    bname = GF_SHARD_DIR;
+-    break;
+-  case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME:
+-    loc = &local->dot_shard_rm_loc;
+-    gf_uuid_copy(gfid, priv->dot_shard_rm_gfid);
+-    bname = GF_SHARD_REMOVE_ME_DIR;
+-    break;
+-  default:
+-    break;
+-  }
+-
+-  loc->inode = inode_find(this->itable, gfid);
+-  if (!loc->inode) {
+-    ret = shard_init_internal_dir_loc(this, local, type);
+-    if (ret)
+-      goto err;
+-    ret = dict_reset(local->xattr_req);
++    local->call_count = call_count;
++    i = 1;
++    xdata_req = dict_new();
++    if (!xdata_req) {
++        shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
++        return 0;
++    }
++    ret = dict_set_uint64(xdata_req, GF_GET_FILE_BLOCK_COUNT, 8 * 8);
+     if (ret) {
+-      gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
+-             "Failed to reset "
+-             "dict");
+-      ret = -ENOMEM;
+-      goto err;
+-    }
+-    ret = dict_set_gfuuid(local->xattr_req, "gfid-req", gfid, true);
+-    ret = syncop_lookup(FIRST_CHILD(this), loc, &stbuf, NULL, local->xattr_req,
+-                        NULL);
+-    if (ret < 0) {
+-      if (ret != -ENOENT)
+-        gf_msg(this->name, GF_LOG_ERROR, -ret, SHARD_MSG_SHARDS_DELETION_FAILED,
+-               "Lookup on %s failed, exiting", bname);
+-      goto err;
+-    } else {
+-      shard_link_internal_dir_inode(local, loc->inode, &stbuf, type);
++        gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
++               "Failed to set key %s into dict during truncate of %s",
++               GF_GET_FILE_BLOCK_COUNT,
++               uuid_utoa(local->resolver_base_inode->gfid));
++        dict_unref(xdata_req);
++        shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
++        return 0;
+     }
+-  }
+-  ret = 0;
+-err:
+-  return ret;
+-}
+-
+-int shard_lookup_marker_entry(xlator_t *this, shard_local_t *local,
+-                              gf_dirent_t *entry) {
+-  int ret = 0;
+-  loc_t loc = {
+-      0,
+-  };
+-
+-  loc.inode = inode_new(this->itable);
+-  if (!loc.inode) {
+-    ret = -ENOMEM;
+-    goto err;
+-  }
+-  loc.parent = inode_ref(local->fd->inode);
+-
+-  ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path));
+-  if (ret < 0) {
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+-           "Inode path failed on %s", entry->d_name);
+-    ret = -ENOMEM;
+-    goto err;
+-  }
+-
+-  loc.name = strrchr(loc.path, '/');
+-  if (loc.name)
+-    loc.name++;
+-
+-  ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, NULL, NULL);
+-  if (ret < 0) {
+-    goto err;
+-  }
+-  entry->inode = inode_ref(loc.inode);
+-  ret = 0;
+-err:
+-  loc_wipe(&loc);
+-  return ret;
+-}
+-
+-int shard_delete_shards(void *opaque) {
+-  int ret = 0;
+-  off_t offset = 0;
+-  loc_t loc = {
+-      0,
+-  };
+-  inode_t *link_inode = NULL;
+-  xlator_t *this = NULL;
+-  shard_priv_t *priv = NULL;
+-  shard_local_t *local = NULL;
+-  gf_dirent_t entries;
+-  gf_dirent_t *entry = NULL;
+-  call_frame_t *cleanup_frame = NULL;
+-  gf_boolean_t done = _gf_false;
+-
+-  this = THIS;
+-  priv = this->private;
+-  INIT_LIST_HEAD(&entries.list);
+-
+-  cleanup_frame = opaque;
+-
+-  local = mem_get0(this->local_pool);
+-  if (!local) {
+-    gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED,
+-           "Failed to create local to "
+-           "delete shards");
+-    ret = -ENOMEM;
+-    goto err;
+-  }
+-  cleanup_frame->local = local;
+-  local->fop = GF_FOP_UNLINK;
+-
+-  local->xattr_req = dict_new();
+-  if (!local->xattr_req) {
+-    ret = -ENOMEM;
+-    goto err;
+-  }
+-  local->deletion_rate = priv->deletion_rate;
+-
+-  ret = shard_resolve_internal_dir(this, local, SHARD_INTERNAL_DIR_DOT_SHARD);
+-  if (ret == -ENOENT) {
+-    gf_msg_debug(this->name, 0, ".shard absent. Nothing to"
+-                                " delete. Exiting");
+-    ret = 0;
+-    goto err;
+-  } else if (ret < 0) {
+-    goto err;
+-  }
+ 
+-  ret = shard_resolve_internal_dir(this, local,
+-                                   SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME);
+-  if (ret == -ENOENT) {
+-    gf_msg_debug(this->name, 0, ".remove_me absent. "
+-                                "Nothing to delete. Exiting");
+-    ret = 0;
+-    goto err;
+-  } else if (ret < 0) {
+-    goto err;
+-  }
+-
+-  local->fd = fd_anonymous(local->dot_shard_rm_loc.inode);
+-  if (!local->fd) {
+-    ret = -ENOMEM;
+-    goto err;
+-  }
+-
+-  for (;;) {
+-    offset = 0;
++    SHARD_SET_ROOT_FS_ID(frame, local);
++    while (cur_block <= last_block) {
++        if (!local->inode_list[i]) {
++            cur_block++;
++            i++;
++            continue;
++        }
++        if (wind_failed) {
++            shard_truncate_htol_cbk(frame, (void *)(long)cur_block, this, -1,
++                                    ENOMEM, NULL, NULL, NULL);
++            goto next;
++        }
++
++        shard_make_block_abspath(cur_block, inode->gfid, path, sizeof(path));
++        bname = strrchr(path, '/') + 1;
++        loc.parent = inode_ref(priv->dot_shard_inode);
++        ret = inode_path(loc.parent, bname, (char **)&(loc.path));
++        if (ret < 0) {
++            gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
++                   "Inode path failed"
++                   " on %s. Base file gfid = %s",
++                   bname, uuid_utoa(inode->gfid));
++            local->op_ret = -1;
++            local->op_errno = ENOMEM;
++            loc_wipe(&loc);
++            wind_failed = _gf_true;
++            shard_truncate_htol_cbk(frame, (void *)(long)cur_block, this, -1,
++                                    ENOMEM, NULL, NULL, NULL);
++            goto next;
++        }
++        loc.name = strrchr(loc.path, '/');
++        if (loc.name)
++            loc.name++;
++        loc.inode = inode_ref(local->inode_list[i]);
++
++        STACK_WIND_COOKIE(frame, shard_truncate_htol_cbk,
++                          (void *)(long)cur_block, FIRST_CHILD(this),
++                          FIRST_CHILD(this)->fops->unlink, &loc, 0, xdata_req);
++        loc_wipe(&loc);
++    next:
++        i++;
++        cur_block++;
++        if (!--call_count)
++            break;
++    }
++    dict_unref(xdata_req);
++    return 0;
++}
++
++int
++shard_truncate_do(call_frame_t *frame, xlator_t *this)
++{
++    shard_local_t *local = NULL;
++
++    local = frame->local;
++
++    if (local->num_blocks == 1) {
++        /* This means that there are no shards to be unlinked.
++         * The fop boils down to truncating the last shard, updating
++         * the size and unwinding.
++         */
++        shard_truncate_last_shard(frame, this, local->inode_list[0]);
++        return 0;
++    } else {
++        shard_truncate_htol(frame, this, local->loc.inode);
++    }
++    return 0;
++}
++
++int
++shard_post_lookup_shards_truncate_handler(call_frame_t *frame, xlator_t *this)
++{
++    shard_local_t *local = NULL;
++
++    local = frame->local;
++
++    if (local->op_ret < 0) {
++        shard_common_failure_unwind(local->fop, frame, local->op_ret,
++                                    local->op_errno);
++        return 0;
++    }
++
++    shard_truncate_do(frame, this);
++    return 0;
++}
++
++void
++shard_link_block_inode(shard_local_t *local, int block_num, inode_t *inode,
++                       struct iatt *buf)
++{
++    int list_index = 0;
++    char block_bname[256] = {
++        0,
++    };
++    uuid_t gfid = {
++        0,
++    };
++    inode_t *linked_inode = NULL;
++    xlator_t *this = NULL;
++    inode_t *fsync_inode = NULL;
++    shard_priv_t *priv = NULL;
++    inode_t *base_inode = NULL;
++
++    this = THIS;
++    priv = this->private;
++    if (local->loc.inode) {
++        gf_uuid_copy(gfid, local->loc.inode->gfid);
++        base_inode = local->loc.inode;
++    } else if (local->resolver_base_inode) {
++        gf_uuid_copy(gfid, local->resolver_base_inode->gfid);
++        base_inode = local->resolver_base_inode;
++    } else {
++        gf_uuid_copy(gfid, local->base_gfid);
++    }
++
++    shard_make_block_bname(block_num, gfid, block_bname, sizeof(block_bname));
++
++    shard_inode_ctx_set(inode, this, buf, 0, SHARD_LOOKUP_MASK);
++    linked_inode = inode_link(inode, priv->dot_shard_inode, block_bname, buf);
++    inode_lookup(linked_inode);
++    list_index = block_num - local->first_block;
++    local->inode_list[list_index] = linked_inode;
++
+     LOCK(&priv->lock);
+     {
+-      if (priv->bg_del_state == SHARD_BG_DELETION_LAUNCHING) {
+-        priv->bg_del_state = SHARD_BG_DELETION_IN_PROGRESS;
+-      } else if (priv->bg_del_state == SHARD_BG_DELETION_IN_PROGRESS) {
+-        priv->bg_del_state = SHARD_BG_DELETION_NONE;
+-        done = _gf_true;
+-      }
++        fsync_inode = __shard_update_shards_inode_list(
++            linked_inode, this, base_inode, block_num, gfid);
+     }
+     UNLOCK(&priv->lock);
+-    if (done)
+-      break;
+-    while ((ret = syncop_readdirp(FIRST_CHILD(this), local->fd, 131072, offset,
+-                                  &entries, local->xattr_req, NULL))) {
+-      if (ret > 0)
+-        ret = 0;
+-      list_for_each_entry(entry, &entries.list, list) {
+-        offset = entry->d_off;
++    if (fsync_inode)
++        shard_initiate_evicted_inode_fsync(this, fsync_inode);
++}
++
++int
++shard_common_lookup_shards_cbk(call_frame_t *frame, void *cookie,
++                               xlator_t *this, int32_t op_ret, int32_t op_errno,
++                               inode_t *inode, struct iatt *buf, dict_t *xdata,
++                               struct iatt *postparent)
++{
++    int call_count = 0;
++    int shard_block_num = (long)cookie;
++    uuid_t gfid = {
++        0,
++    };
++    shard_local_t *local = NULL;
++
++    local = frame->local;
++    if (local->resolver_base_inode)
++        gf_uuid_copy(gfid, local->resolver_base_inode->gfid);
++    else
++        gf_uuid_copy(gfid, local->base_gfid);
++
++    if (op_ret < 0) {
++        /* Ignore absence of shards in the backend in truncate fop. */
++        switch (local->fop) {
++            case GF_FOP_TRUNCATE:
++            case GF_FOP_FTRUNCATE:
++            case GF_FOP_RENAME:
++            case GF_FOP_UNLINK:
++                if (op_errno == ENOENT)
++                    goto done;
++                break;
++            case GF_FOP_WRITE:
++            case GF_FOP_READ:
++            case GF_FOP_ZEROFILL:
++            case GF_FOP_DISCARD:
++            case GF_FOP_FALLOCATE:
++                if ((!local->first_lookup_done) && (op_errno == ENOENT)) {
++                    LOCK(&frame->lock);
++                    {
++                        local->create_count++;
++                    }
++                    UNLOCK(&frame->lock);
++                    goto done;
++                }
++                break;
++            default:
++                break;
++        }
++
++        /* else */
++        gf_msg(this->name, GF_LOG_ERROR, op_errno,
++               SHARD_MSG_LOOKUP_SHARD_FAILED,
++               "Lookup on shard %d "
++               "failed. Base file gfid = %s",
++               shard_block_num, uuid_utoa(gfid));
++        local->op_ret = op_ret;
++        local->op_errno = op_errno;
++        goto done;
++    }
++
++    shard_link_block_inode(local, shard_block_num, inode, buf);
++
++done:
++    if (local->lookup_shards_barriered) {
++        syncbarrier_wake(&local->barrier);
++        return 0;
++    } else {
++        call_count = shard_call_count_return(frame);
++        if (call_count == 0) {
++            if (!local->first_lookup_done)
++                local->first_lookup_done = _gf_true;
++            local->pls_fop_handler(frame, this);
++        }
++    }
++    return 0;
++}
++
++dict_t *
++shard_create_gfid_dict(dict_t *dict)
++{
++    int ret = 0;
++    dict_t *new = NULL;
++    unsigned char *gfid = NULL;
++
++    new = dict_copy_with_ref(dict, NULL);
++    if (!new)
++        return NULL;
++
++    gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_char);
++    if (!gfid) {
++        ret = -1;
++        goto out;
++    }
++
++    gf_uuid_generate(gfid);
++
++    ret = dict_set_gfuuid(new, "gfid-req", gfid, false);
+ 
+-        if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, ".."))
+-          continue;
++out:
++    if (ret) {
++        dict_unref(new);
++        new = NULL;
++        GF_FREE(gfid);
++    }
++
++    return new;
++}
++
++int
++shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode,
++                           shard_post_lookup_shards_fop_handler_t handler)
++{
++    int i = 0;
++    int ret = 0;
++    int count = 0;
++    int call_count = 0;
++    int32_t shard_idx_iter = 0;
++    int last_block = 0;
++    char path[PATH_MAX] = {
++        0,
++    };
++    char *bname = NULL;
++    uuid_t gfid = {
++        0,
++    };
++    loc_t loc = {
++        0,
++    };
++    shard_local_t *local = NULL;
++    shard_priv_t *priv = NULL;
++    gf_boolean_t wind_failed = _gf_false;
++    dict_t *xattr_req = NULL;
+ 
+-        if (!entry->inode) {
+-          ret = shard_lookup_marker_entry(this, local, entry);
+-          if (ret < 0)
++    priv = this->private;
++    local = frame->local;
++    count = call_count = local->call_count;
++    shard_idx_iter = local->first_block;
++    last_block = local->last_block;
++    local->pls_fop_handler = handler;
++    if (local->lookup_shards_barriered)
++        local->barrier.waitfor = local->call_count;
++
++    if (inode)
++        gf_uuid_copy(gfid, inode->gfid);
++    else
++        gf_uuid_copy(gfid, local->base_gfid);
++
++    while (shard_idx_iter <= last_block) {
++        if (local->inode_list[i]) {
++            i++;
++            shard_idx_iter++;
+             continue;
+         }
+-        link_inode = inode_link(entry->inode, local->fd->inode, entry->d_name,
+-                                &entry->d_stat);
+ 
+-        gf_msg_debug(this->name, 0, "Initiating deletion of "
+-                                    "shards of gfid %s",
+-                     entry->d_name);
+-        ret = shard_delete_shards_of_entry(cleanup_frame, this, entry,
+-                                           link_inode);
+-        inode_unlink(link_inode, local->fd->inode, entry->d_name);
+-        inode_unref(link_inode);
+-        if (ret) {
+-          gf_msg(this->name, GF_LOG_ERROR, -ret,
+-                 SHARD_MSG_SHARDS_DELETION_FAILED,
+-                 "Failed to clean up shards of gfid %s", entry->d_name);
+-          continue;
++        if (wind_failed) {
++            shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter,
++                                           this, -1, ENOMEM, NULL, NULL, NULL,
++                                           NULL);
++            goto next;
+         }
+-        gf_msg(this->name, GF_LOG_INFO, 0, SHARD_MSG_SHARD_DELETION_COMPLETED,
+-               "Deleted "
+-               "shards of gfid=%s from backend",
+-               entry->d_name);
+-      }
+-      gf_dirent_free(&entries);
+-      if (ret)
+-        break;
+-    }
+-  }
+-  ret = 0;
+-  loc_wipe(&loc);
+-  return ret;
++
++        shard_make_block_abspath(shard_idx_iter, gfid, path, sizeof(path));
++
++        bname = strrchr(path, '/') + 1;
++        loc.inode = inode_new(this->itable);
++        loc.parent = inode_ref(priv->dot_shard_inode);
++        gf_uuid_copy(loc.pargfid, priv->dot_shard_gfid);
++        ret = inode_path(loc.parent, bname, (char **)&(loc.path));
++        if (ret < 0 || !(loc.inode)) {
++            gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
++                   "Inode path failed"
++                   " on %s, base file gfid = %s",
++                   bname, uuid_utoa(gfid));
++            local->op_ret = -1;
++            local->op_errno = ENOMEM;
++            loc_wipe(&loc);
++            wind_failed = _gf_true;
++            shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter,
++                                           this, -1, ENOMEM, NULL, NULL, NULL,
++                                           NULL);
++            goto next;
++        }
++
++        loc.name = strrchr(loc.path, '/');
++        if (loc.name)
++            loc.name++;
++
++        xattr_req = shard_create_gfid_dict(local->xattr_req);
++        if (!xattr_req) {
++            local->op_ret = -1;
++            local->op_errno = ENOMEM;
++            wind_failed = _gf_true;
++            loc_wipe(&loc);
++            shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter,
++                                           this, -1, ENOMEM, NULL, NULL, NULL,
++                                           NULL);
++            goto next;
++        }
++
++        STACK_WIND_COOKIE(frame, shard_common_lookup_shards_cbk,
++                          (void *)(long)shard_idx_iter, FIRST_CHILD(this),
++                          FIRST_CHILD(this)->fops->lookup, &loc, xattr_req);
++        loc_wipe(&loc);
++        dict_unref(xattr_req);
++    next:
++        shard_idx_iter++;
++        i++;
++
++        if (!--call_count)
++            break;
++    }
++    if (local->lookup_shards_barriered) {
++        syncbarrier_wait(&local->barrier, count);
++        local->pls_fop_handler(frame, this);
++    }
++    return 0;
++}
++
++int
++shard_post_resolve_truncate_handler(call_frame_t *frame, xlator_t *this)
++{
++    shard_local_t *local = NULL;
++
++    local = frame->local;
++
++    if (local->op_ret < 0) {
++        if (local->op_errno == ENOENT) {
++            /* If lookup on /.shard fails with ENOENT, it means that
++             * the file was 0-byte in size but truncated sometime in
++             * the past to a higher size which is reflected in the
++             * size xattr, and now being truncated to a lower size.
++             * In this case, the only thing that needs to be done is
++             * to update the size xattr of the file and unwind.
++             */
++            local->first_block = local->last_block = 0;
++            local->num_blocks = 1;
++            local->call_count = 0;
++            local->op_ret = 0;
++            local->postbuf.ia_size = local->offset;
++            shard_update_file_size(frame, this, local->fd, &local->loc,
++                                   shard_post_update_size_truncate_handler);
++            return 0;
++        } else {
++            shard_common_failure_unwind(local->fop, frame, local->op_ret,
++                                        local->op_errno);
++            return 0;
++        }
++    }
++
++    if (!local->call_count)
++        shard_truncate_do(frame, this);
++    else
++        shard_common_lookup_shards(frame, this, local->loc.inode,
++                                   shard_post_lookup_shards_truncate_handler);
++
++    return 0;
++}
++
++int
++shard_truncate_begin(call_frame_t *frame, xlator_t *this)
++{
++    int ret = 0;
++    shard_local_t *local = NULL;
++    shard_priv_t *priv = NULL;
++
++    priv = this->private;
++    local = frame->local;
++
++    /* First participant block here is the lowest numbered block that would
++     * hold the last byte of the file post successful truncation.
++     * Last participant block is the block that contains the last byte in
++     * the current state of the file.
++     * If (first block == last_block):
++     *         then that means that the file only needs truncation of the
++     *         first (or last since both are same) block.
++     * Else
++     *         if (new_size % block_size == 0)
++     *                 then that means there is no truncate to be done with
++     *                 only shards from first_block + 1 through the last
++     *                 block needing to be unlinked.
++     *         else
++     *                 both truncate of the first block and unlink of the
++     *                 remaining shards until end of file is required.
++     */
++    local->first_block = (local->offset == 0)
++                             ? 0
++                             : get_lowest_block(local->offset - 1,
++                                                local->block_size);
++    local->last_block = get_highest_block(0, local->prebuf.ia_size,
++                                          local->block_size);
++
++    local->num_blocks = local->last_block - local->first_block + 1;
++    GF_ASSERT(local->num_blocks > 0);
++    local->resolver_base_inode = (local->fop == GF_FOP_TRUNCATE)
++                                     ? local->loc.inode
++                                     : local->fd->inode;
++
++    if ((local->first_block == 0) && (local->num_blocks == 1)) {
++        if (local->fop == GF_FOP_TRUNCATE)
++            STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this),
++                       FIRST_CHILD(this)->fops->truncate, &local->loc,
++                       local->offset, local->xattr_req);
++        else
++            STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this),
++                       FIRST_CHILD(this)->fops->ftruncate, local->fd,
++                       local->offset, local->xattr_req);
++        return 0;
++    }
++
++    local->inode_list = GF_CALLOC(local->num_blocks, sizeof(inode_t *),
++                                  gf_shard_mt_inode_list);
++    if (!local->inode_list)
++        goto err;
++
++    local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid);
++    if (!local->dot_shard_loc.inode) {
++        ret = shard_init_internal_dir_loc(this, local,
++                                          SHARD_INTERNAL_DIR_DOT_SHARD);
++        if (ret)
++            goto err;
++        shard_lookup_internal_dir(frame, this,
++                                  shard_post_resolve_truncate_handler,
++                                  SHARD_INTERNAL_DIR_DOT_SHARD);
++    } else {
++        local->post_res_handler = shard_post_resolve_truncate_handler;
++        shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD);
++    }
++    return 0;
+ 
+ err:
+-  LOCK(&priv->lock);
+-  { priv->bg_del_state = SHARD_BG_DELETION_NONE; }
+-  UNLOCK(&priv->lock);
+-  loc_wipe(&loc);
+-  return ret;
+-}
+-
+-int shard_unlock_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                             int32_t op_ret, int32_t op_errno, dict_t *xdata) {
+-  if (op_ret)
+-    gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED,
+-           "Unlock failed. Please check brick logs for "
+-           "more details");
+-  SHARD_STACK_DESTROY(frame);
+-  return 0;
+-}
+-
+-int shard_unlock_inodelk(call_frame_t *frame, xlator_t *this) {
+-  loc_t *loc = NULL;
+-  call_frame_t *lk_frame = NULL;
+-  shard_local_t *local = NULL;
+-  shard_local_t *lk_local = NULL;
+-  shard_inodelk_t *lock = NULL;
+-
+-  local = frame->local;
+-  lk_frame = local->inodelk_frame;
+-  lk_local = lk_frame->local;
+-  local->inodelk_frame = NULL;
+-  loc = &local->int_inodelk.loc;
+-  lock = &lk_local->int_inodelk;
+-  lock->flock.l_type = F_UNLCK;
+-
+-  STACK_WIND(lk_frame, shard_unlock_inodelk_cbk, FIRST_CHILD(this),
+-             FIRST_CHILD(this)->fops->inodelk, lock->domain, loc, F_SETLK,
+-             &lock->flock, NULL);
+-  local->int_inodelk.acquired_lock = _gf_false;
+-  return 0;
+-}
+-
+-int shard_rename_src_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                         int32_t op_ret, int32_t op_errno, struct iatt *buf,
+-                         struct iatt *preoldparent, struct iatt *postoldparent,
+-                         struct iatt *prenewparent, struct iatt *postnewparent,
+-                         dict_t *xdata);
+-int shard_rename_src_base_file(call_frame_t *frame, xlator_t *this) {
+-  int ret = 0;
+-  loc_t *dst_loc = NULL;
+-  loc_t tmp_loc = {
+-      0,
+-  };
+-  shard_local_t *local = frame->local;
+-
+-  if (local->dst_block_size) {
+-    tmp_loc.parent = inode_ref(local->loc2.parent);
+-    ret = inode_path(tmp_loc.parent, local->loc2.name, (char **)&tmp_loc.path);
+-    if (ret < 0) {
+-      gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+-             "Inode path failed"
+-             " on pargfid=%s bname=%s",
+-             uuid_utoa(tmp_loc.parent->gfid), local->loc2.name);
+-      local->op_ret = -1;
+-      local->op_errno = ENOMEM;
+-      goto err;
+-    }
+-
+-    tmp_loc.name = strrchr(tmp_loc.path, '/');
+-    if (tmp_loc.name)
+-      tmp_loc.name++;
+-    dst_loc = &tmp_loc;
+-  } else {
+-    dst_loc = &local->loc2;
+-  }
+-
+-  /* To-Do: Request open-fd count on dst base file */
+-  STACK_WIND(frame, shard_rename_src_cbk, FIRST_CHILD(this),
+-             FIRST_CHILD(this)->fops->rename, &local->loc, dst_loc,
+-             local->xattr_req);
+-  loc_wipe(&tmp_loc);
+-  return 0;
+-err:
+-  loc_wipe(&tmp_loc);
+-  shard_common_failure_unwind(local->fop, frame, local->op_ret,
+-                              local->op_errno);
+-  return 0;
+-}
+-
+-int shard_unlink_base_file(call_frame_t *frame, xlator_t *this);
+-
+-int shard_set_size_attrs_on_marker_file_cbk(call_frame_t *frame, void *cookie,
+-                                            xlator_t *this, int32_t op_ret,
+-                                            int32_t op_errno, dict_t *dict,
+-                                            dict_t *xdata) {
+-  shard_priv_t *priv = NULL;
+-  shard_local_t *local = NULL;
+-
+-  priv = this->private;
+-  local = frame->local;
+-  if (op_ret < 0) {
+-    gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED,
+-           "Xattrop on marker file failed "
+-           "while performing %s; entry gfid=%s",
+-           gf_fop_string(local->fop), local->newloc.name);
+-    goto err;
+-  }
+-
+-  inode_unlink(local->newloc.inode, priv->dot_shard_rm_inode,
+-               local->newloc.name);
+-
+-  if (local->fop == GF_FOP_UNLINK)
+-    shard_unlink_base_file(frame, this);
+-  else if (local->fop == GF_FOP_RENAME)
+-    shard_rename_src_base_file(frame, this);
+-  return 0;
+-err:
+-  shard_common_failure_unwind(local->fop, frame, op_ret, op_errno);
+-  return 0;
+-}
+-
+-int shard_set_size_attrs_on_marker_file(call_frame_t *frame, xlator_t *this) {
+-  int op_errno = ENOMEM;
+-  uint64_t bs = 0;
+-  dict_t *xdata = NULL;
+-  shard_local_t *local = NULL;
+-
+-  local = frame->local;
+-  xdata = dict_new();
+-  if (!xdata)
+-    goto err;
+-
+-  if (local->fop == GF_FOP_UNLINK)
+-    bs = local->block_size;
+-  else if (local->fop == GF_FOP_RENAME)
+-    bs = local->dst_block_size;
+-  SHARD_INODE_CREATE_INIT(this, bs, xdata, &local->newloc,
+-                          local->prebuf.ia_size, 0, err);
+-  STACK_WIND(frame, shard_set_size_attrs_on_marker_file_cbk, FIRST_CHILD(this),
+-             FIRST_CHILD(this)->fops->xattrop, &local->newloc,
+-             GF_XATTROP_GET_AND_SET, xdata, NULL);
+-  dict_unref(xdata);
+-  return 0;
+-err:
+-  if (xdata)
+-    dict_unref(xdata);
+-  shard_common_failure_unwind(local->fop, frame, -1, op_errno);
+-  return 0;
++    shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
++    return 0;
+ }
+ 
+-int shard_lookup_marker_file_cbk(call_frame_t *frame, void *cookie,
+-                                 xlator_t *this, int32_t op_ret,
+-                                 int32_t op_errno, inode_t *inode,
+-                                 struct iatt *buf, dict_t *xdata,
+-                                 struct iatt *postparent) {
+-  inode_t *linked_inode = NULL;
+-  shard_priv_t *priv = NULL;
+-  shard_local_t *local = NULL;
+-
+-  local = frame->local;
+-  priv = this->private;
+-
+-  if (op_ret < 0) {
+-    gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED,
+-           "Lookup on marker file failed "
+-           "while performing %s; entry gfid=%s",
+-           gf_fop_string(local->fop), local->newloc.name);
+-    goto err;
+-  }
+-
+-  linked_inode =
+-      inode_link(inode, priv->dot_shard_rm_inode, local->newloc.name, buf);
+-  inode_unref(local->newloc.inode);
+-  local->newloc.inode = linked_inode;
+-  shard_set_size_attrs_on_marker_file(frame, this);
+-  return 0;
++int
++shard_post_lookup_truncate_handler(call_frame_t *frame, xlator_t *this)
++{
++    shard_local_t *local = NULL;
++    struct iatt tmp_stbuf = {
++        0,
++    };
++
++    local = frame->local;
++
++    if (local->op_ret < 0) {
++        shard_common_failure_unwind(local->fop, frame, local->op_ret,
++                                    local->op_errno);
++        return 0;
++    }
++
++    local->postbuf = tmp_stbuf = local->prebuf;
++
++    if (local->prebuf.ia_size == local->offset) {
++        /* If the file size is same as requested size, unwind the call
++         * immediately.
++         */
++        if (local->fop == GF_FOP_TRUNCATE)
++            SHARD_STACK_UNWIND(truncate, frame, 0, 0, &local->prebuf,
++                               &local->postbuf, NULL);
++        else
++            SHARD_STACK_UNWIND(ftruncate, frame, 0, 0, &local->prebuf,
++                               &local->postbuf, NULL);
++    } else if (local->offset > local->prebuf.ia_size) {
++        /* If the truncate is from a lower to a higher size, set the
++         * new size xattr and unwind.
++         */
++        local->hole_size = local->offset - local->prebuf.ia_size;
++        local->delta_size = 0;
++        GF_ATOMIC_INIT(local->delta_blocks, 0);
++        local->postbuf.ia_size = local->offset;
++        tmp_stbuf.ia_size = local->offset;
++        shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0,
++                            SHARD_INODE_WRITE_MASK);
++        shard_update_file_size(frame, this, NULL, &local->loc,
++                               shard_post_update_size_truncate_handler);
++    } else {
++        /* ... else
++         * i.   unlink all shards that need to be unlinked.
++         * ii.  truncate the last of the shards.
++         * iii. update the new size using setxattr.
++         * and unwind the fop.
++         */
++        local->hole_size = 0;
++        local->delta_size = (local->offset - local->prebuf.ia_size);
++        GF_ATOMIC_INIT(local->delta_blocks, 0);
++        tmp_stbuf.ia_size = local->offset;
++        shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0,
++                            SHARD_INODE_WRITE_MASK);
++        shard_truncate_begin(frame, this);
++    }
++    return 0;
++}
++
++/* TO-DO:
++ * Fix updates to size and block count with racing write(s) and truncate(s).
++ */
++
++int
++shard_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
++               dict_t *xdata)
++{
++    int ret = -1;
++    uint64_t block_size = 0;
++    shard_local_t *local = NULL;
++
++    ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size);
++    if (ret) {
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
++               "Failed to get block "
++               "size from inode ctx of %s",
++               uuid_utoa(loc->inode->gfid));
++        goto err;
++    }
++
++    if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
++        STACK_WIND(frame, default_truncate_cbk, FIRST_CHILD(this),
++                   FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
++        return 0;
++    }
++
++    if (!this->itable)
++        this->itable = loc->inode->table;
++
++    local = mem_get0(this->local_pool);
++    if (!local)
++        goto err;
++
++    frame->local = local;
++
++    ret = syncbarrier_init(&local->barrier);
++    if (ret)
++        goto err;
++    loc_copy(&local->loc, loc);
++    local->offset = offset;
++    local->block_size = block_size;
++    local->fop = GF_FOP_TRUNCATE;
++    local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
++    if (!local->xattr_req)
++        goto err;
++    local->resolver_base_inode = loc->inode;
++    GF_ATOMIC_INIT(local->delta_blocks, 0);
++
++    shard_lookup_base_file(frame, this, &local->loc,
++                           shard_post_lookup_truncate_handler);
++    return 0;
++
+ err:
+-  shard_common_failure_unwind(local->fop, frame, op_ret, op_errno);
+-  return 0;
++    shard_common_failure_unwind(GF_FOP_TRUNCATE, frame, -1, ENOMEM);
++    return 0;
+ }
+ 
+-int shard_lookup_marker_file(call_frame_t *frame, xlator_t *this) {
+-  int op_errno = ENOMEM;
+-  dict_t *xattr_req = NULL;
+-  shard_local_t *local = NULL;
++int
++shard_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
++                dict_t *xdata)
++{
++    int ret = -1;
++    uint64_t block_size = 0;
++    shard_local_t *local = NULL;
++
++    ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size);
++    if (ret) {
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
++               "Failed to get block "
++               "size from inode ctx of %s",
++               uuid_utoa(fd->inode->gfid));
++        goto err;
++    }
++
++    if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
++        STACK_WIND(frame, default_ftruncate_cbk, FIRST_CHILD(this),
++                   FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
++        return 0;
++    }
++
++    if (!this->itable)
++        this->itable = fd->inode->table;
++
++    local = mem_get0(this->local_pool);
++    if (!local)
++        goto err;
++
++    frame->local = local;
++    ret = syncbarrier_init(&local->barrier);
++    if (ret)
++        goto err;
++    local->fd = fd_ref(fd);
++    local->offset = offset;
++    local->block_size = block_size;
++    local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
++    if (!local->xattr_req)
++        goto err;
++    local->fop = GF_FOP_FTRUNCATE;
++
++    local->loc.inode = inode_ref(fd->inode);
++    gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
++    local->resolver_base_inode = fd->inode;
++    GF_ATOMIC_INIT(local->delta_blocks, 0);
++
++    shard_lookup_base_file(frame, this, &local->loc,
++                           shard_post_lookup_truncate_handler);
++    return 0;
++err:
++    shard_common_failure_unwind(GF_FOP_FTRUNCATE, frame, -1, ENOMEM);
++    return 0;
++}
++
++int
++shard_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                int32_t op_ret, int32_t op_errno, inode_t *inode,
++                struct iatt *buf, struct iatt *preparent,
++                struct iatt *postparent, dict_t *xdata)
++{
++    int ret = -1;
++    shard_local_t *local = NULL;
++
++    local = frame->local;
++
++    if (op_ret == -1)
++        goto unwind;
++
++    ret = shard_inode_ctx_set(inode, this, buf, local->block_size,
++                              SHARD_ALL_MASK);
++    if (ret)
++        gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INODE_CTX_SET_FAILED,
++               "Failed to set inode "
++               "ctx for %s",
++               uuid_utoa(inode->gfid));
++
++unwind:
++    SHARD_STACK_UNWIND(mknod, frame, op_ret, op_errno, inode, buf, preparent,
++                       postparent, xdata);
++
++    return 0;
++}
++
++int
++shard_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
++            dev_t rdev, mode_t umask, dict_t *xdata)
++{
++    shard_priv_t *priv = NULL;
++    shard_local_t *local = NULL;
++
++    priv = this->private;
++    local = mem_get0(this->local_pool);
++    if (!local)
++        goto err;
++
++    frame->local = local;
++    local->block_size = priv->block_size;
++    if (!__is_gsyncd_on_shard_dir(frame, loc)) {
++        SHARD_INODE_CREATE_INIT(this, local->block_size, xdata, loc, 0, 0, err);
++    }
++
++    STACK_WIND(frame, shard_mknod_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata);
++    return 0;
++err:
++    shard_common_failure_unwind(GF_FOP_MKNOD, frame, -1, ENOMEM);
++    return 0;
++}
++
++int32_t
++shard_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++               int32_t op_ret, int32_t op_errno, inode_t *inode,
++               struct iatt *buf, struct iatt *preparent,
++               struct iatt *postparent, dict_t *xdata)
++{
++    shard_local_t *local = NULL;
++
++    local = frame->local;
++    if (op_ret < 0)
++        goto err;
++
++    shard_inode_ctx_set(inode, this, buf, 0,
++                        SHARD_MASK_NLINK | SHARD_MASK_TIMES);
++    buf->ia_size = local->prebuf.ia_size;
++    buf->ia_blocks = local->prebuf.ia_blocks;
++
++    SHARD_STACK_UNWIND(link, frame, op_ret, op_errno, inode, buf, preparent,
++                       postparent, xdata);
++    return 0;
++err:
++    shard_common_failure_unwind(GF_FOP_LINK, frame, op_ret, op_errno);
++    return 0;
++}
++
++int
++shard_post_lookup_link_handler(call_frame_t *frame, xlator_t *this)
++{
++    shard_local_t *local = NULL;
++
++    local = frame->local;
++
++    if (local->op_ret < 0) {
++        SHARD_STACK_UNWIND(link, frame, local->op_ret, local->op_errno, NULL,
++                           NULL, NULL, NULL, NULL);
++        return 0;
++    }
++
++    STACK_WIND(frame, shard_link_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->link, &local->loc, &local->loc2,
++               local->xattr_req);
++    return 0;
++}
++
++int32_t
++shard_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
++           dict_t *xdata)
++{
++    int ret = -1;
++    uint64_t block_size = 0;
++    shard_local_t *local = NULL;
++
++    ret = shard_inode_ctx_get_block_size(oldloc->inode, this, &block_size);
++    if (ret) {
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
++               "Failed to get block "
++               "size from inode ctx of %s",
++               uuid_utoa(oldloc->inode->gfid));
++        goto err;
++    }
++
++    if (!block_size) {
++        STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->link,
++                        oldloc, newloc, xdata);
++        return 0;
++    }
++
++    if (!this->itable)
++        this->itable = oldloc->inode->table;
++
++    local = mem_get0(this->local_pool);
++    if (!local)
++        goto err;
++
++    frame->local = local;
++
++    loc_copy(&local->loc, oldloc);
++    loc_copy(&local->loc2, newloc);
++    local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
++    if (!local->xattr_req)
++        goto err;
++
++    shard_lookup_base_file(frame, this, &local->loc,
++                           shard_post_lookup_link_handler);
++    return 0;
++err:
++    shard_common_failure_unwind(GF_FOP_LINK, frame, -1, ENOMEM);
++    return 0;
++}
++
++int
++shard_unlink_shards_do(call_frame_t *frame, xlator_t *this, inode_t *inode);
++
++int
++shard_post_lookup_shards_unlink_handler(call_frame_t *frame, xlator_t *this)
++{
++    shard_local_t *local = NULL;
++    uuid_t gfid = {
++        0,
++    };
++
++    local = frame->local;
++
++    if (local->resolver_base_inode)
++        gf_uuid_copy(gfid, local->resolver_base_inode->gfid);
++    else
++        gf_uuid_copy(gfid, local->base_gfid);
++
++    if ((local->op_ret < 0) && (local->op_errno != ENOENT)) {
++        gf_msg(this->name, GF_LOG_ERROR, local->op_errno, SHARD_MSG_FOP_FAILED,
++               "failed to delete shards of %s", uuid_utoa(gfid));
++        return 0;
++    }
++    local->op_ret = 0;
++    local->op_errno = 0;
++
++    shard_unlink_shards_do(frame, this, local->resolver_base_inode);
++    return 0;
++}
++
++int
++shard_post_resolve_unlink_handler(call_frame_t *frame, xlator_t *this)
++{
++    shard_local_t *local = NULL;
++
++    local = frame->local;
++    local->lookup_shards_barriered = _gf_true;
++
++    if (!local->call_count)
++        shard_unlink_shards_do(frame, this, local->resolver_base_inode);
++    else
++        shard_common_lookup_shards(frame, this, local->resolver_base_inode,
++                                   shard_post_lookup_shards_unlink_handler);
++    return 0;
++}
++
++void
++shard_unlink_block_inode(shard_local_t *local, int shard_block_num)
++{
++    char block_bname[256] = {
++        0,
++    };
++    uuid_t gfid = {
++        0,
++    };
++    inode_t *inode = NULL;
++    inode_t *base_inode = NULL;
++    xlator_t *this = NULL;
++    shard_priv_t *priv = NULL;
++    shard_inode_ctx_t *ctx = NULL;
++    shard_inode_ctx_t *base_ictx = NULL;
++    int unref_base_inode = 0;
++    int unref_shard_inode = 0;
++
++    this = THIS;
++    priv = this->private;
++
++    inode = local->inode_list[shard_block_num - local->first_block];
++    shard_inode_ctx_get(inode, this, &ctx);
++    base_inode = ctx->base_inode;
++    if (base_inode)
++        gf_uuid_copy(gfid, base_inode->gfid);
++    else
++        gf_uuid_copy(gfid, ctx->base_gfid);
++    shard_make_block_bname(shard_block_num, gfid, block_bname,
++                           sizeof(block_bname));
++
++    LOCK(&priv->lock);
++    if (base_inode)
++        LOCK(&base_inode->lock);
++    LOCK(&inode->lock);
++    {
++        __shard_inode_ctx_get(inode, this, &ctx);
++        if (!list_empty(&ctx->ilist)) {
++            list_del_init(&ctx->ilist);
++            priv->inode_count--;
++            unref_base_inode++;
++            unref_shard_inode++;
++            GF_ASSERT(priv->inode_count >= 0);
++        }
++        if (ctx->fsync_needed) {
++            unref_base_inode++;
++            unref_shard_inode++;
++            list_del_init(&ctx->to_fsync_list);
++            if (base_inode) {
++                __shard_inode_ctx_get(base_inode, this, &base_ictx);
++                base_ictx->fsync_count--;
++            }
++        }
++    }
++    UNLOCK(&inode->lock);
++    if (base_inode)
++        UNLOCK(&base_inode->lock);
++
++    inode_unlink(inode, priv->dot_shard_inode, block_bname);
++    inode_ref_reduce_by_n(inode, unref_shard_inode);
++    inode_forget(inode, 0);
++
++    if (base_inode && unref_base_inode)
++        inode_ref_reduce_by_n(base_inode, unref_base_inode);
++    UNLOCK(&priv->lock);
++}
++
++int
++shard_rename_cbk(call_frame_t *frame, xlator_t *this)
++{
++    shard_local_t *local = NULL;
++
++    local = frame->local;
++
++    SHARD_STACK_UNWIND(rename, frame, local->op_ret, local->op_errno,
++                       &local->prebuf, &local->preoldparent,
++                       &local->postoldparent, &local->prenewparent,
++                       &local->postnewparent, local->xattr_rsp);
++    return 0;
++}
++
++int32_t
++shard_unlink_cbk(call_frame_t *frame, xlator_t *this)
++{
++    shard_local_t *local = frame->local;
++
++    SHARD_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno,
++                       &local->preoldparent, &local->postoldparent,
++                       local->xattr_rsp);
++    return 0;
++}
++
++int
++shard_unlink_shards_do_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                           int32_t op_ret, int32_t op_errno,
++                           struct iatt *preparent, struct iatt *postparent,
++                           dict_t *xdata)
++{
++    int shard_block_num = (long)cookie;
++    shard_local_t *local = NULL;
++
++    local = frame->local;
++
++    if (op_ret < 0) {
++        local->op_ret = op_ret;
++        local->op_errno = op_errno;
++        goto done;
++    }
++
++    shard_unlink_block_inode(local, shard_block_num);
++done:
++    syncbarrier_wake(&local->barrier);
++    return 0;
++}
++
++int
++shard_unlink_shards_do(call_frame_t *frame, xlator_t *this, inode_t *inode)
++{
++    int i = 0;
++    int ret = -1;
++    int count = 0;
++    uint32_t cur_block = 0;
++    uint32_t cur_block_idx = 0; /*this is idx into inode_list[] array */
++    char *bname = NULL;
++    char path[PATH_MAX] = {
++        0,
++    };
++    uuid_t gfid = {
++        0,
++    };
++    loc_t loc = {
++        0,
++    };
++    gf_boolean_t wind_failed = _gf_false;
++    shard_local_t *local = NULL;
++    shard_priv_t *priv = NULL;
++
++    priv = this->private;
++    local = frame->local;
++
++    if (inode)
++        gf_uuid_copy(gfid, inode->gfid);
++    else
++        gf_uuid_copy(gfid, local->base_gfid);
++
++    for (i = 0; i < local->num_blocks; i++) {
++        if (!local->inode_list[i])
++            continue;
++        count++;
++    }
++
++    if (!count) {
++        /* callcount = 0 implies that all of the shards that need to be
++         * unlinked are non-existent (in other words the file is full of
++         * holes).
++         */
++        gf_msg_debug(this->name, 0,
++                     "All shards that need to be "
++                     "unlinked are non-existent: %s",
++                     uuid_utoa(gfid));
++        return 0;
++    }
++
++    SHARD_SET_ROOT_FS_ID(frame, local);
++    local->barrier.waitfor = count;
++    cur_block = cur_block_idx + local->first_block;
++
++    while (cur_block_idx < local->num_blocks) {
++        if (!local->inode_list[cur_block_idx])
++            goto next;
++
++        if (wind_failed) {
++            shard_unlink_shards_do_cbk(frame, (void *)(long)cur_block, this, -1,
++                                       ENOMEM, NULL, NULL, NULL);
++            goto next;
++        }
++
++        shard_make_block_abspath(cur_block, gfid, path, sizeof(path));
++        bname = strrchr(path, '/') + 1;
++        loc.parent = inode_ref(priv->dot_shard_inode);
++        ret = inode_path(loc.parent, bname, (char **)&(loc.path));
++        if (ret < 0) {
++            gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
++                   "Inode path failed"
++                   " on %s, base file gfid = %s",
++                   bname, uuid_utoa(gfid));
++            local->op_ret = -1;
++            local->op_errno = ENOMEM;
++            loc_wipe(&loc);
++            wind_failed = _gf_true;
++            shard_unlink_shards_do_cbk(frame, (void *)(long)cur_block, this, -1,
++                                       ENOMEM, NULL, NULL, NULL);
++            goto next;
++        }
++
++        loc.name = strrchr(loc.path, '/');
++        if (loc.name)
++            loc.name++;
++        loc.inode = inode_ref(local->inode_list[cur_block_idx]);
++
++        STACK_WIND_COOKIE(frame, shard_unlink_shards_do_cbk,
++                          (void *)(long)cur_block, FIRST_CHILD(this),
++                          FIRST_CHILD(this)->fops->unlink, &loc, local->xflag,
++                          local->xattr_req);
++        loc_wipe(&loc);
++    next:
++        cur_block++;
++        cur_block_idx++;
++    }
++    syncbarrier_wait(&local->barrier, count);
++    SHARD_UNSET_ROOT_FS_ID(frame, local);
++    return 0;
++}
++
++int
++shard_regulated_shards_deletion(call_frame_t *cleanup_frame, xlator_t *this,
++                                int now, int first_block, gf_dirent_t *entry)
++{
++    int i = 0;
++    int ret = 0;
++    shard_local_t *local = NULL;
++    uuid_t gfid = {
++        0,
++    };
++
++    local = cleanup_frame->local;
++
++    local->inode_list = GF_CALLOC(now, sizeof(inode_t *),
++                                  gf_shard_mt_inode_list);
++    if (!local->inode_list)
++        return -ENOMEM;
++
++    local->first_block = first_block;
++    local->last_block = first_block + now - 1;
++    local->num_blocks = now;
++    gf_uuid_parse(entry->d_name, gfid);
++    gf_uuid_copy(local->base_gfid, gfid);
++    local->resolver_base_inode = inode_find(this->itable, gfid);
++    local->call_count = 0;
++    ret = syncbarrier_init(&local->barrier);
++    if (ret) {
++        GF_FREE(local->inode_list);
++        local->inode_list = NULL;
++        inode_unref(local->resolver_base_inode);
++        local->resolver_base_inode = NULL;
++        return -errno;
++    }
++    shard_common_resolve_shards(cleanup_frame, this,
++                                shard_post_resolve_unlink_handler);
++
++    for (i = 0; i < local->num_blocks; i++) {
++        if (local->inode_list[i])
++            inode_unref(local->inode_list[i]);
++    }
++    GF_FREE(local->inode_list);
++    local->inode_list = NULL;
++    if (local->op_ret)
++        ret = -local->op_errno;
++    syncbarrier_destroy(&local->barrier);
++    inode_unref(local->resolver_base_inode);
++    local->resolver_base_inode = NULL;
++    STACK_RESET(cleanup_frame->root);
++    return ret;
++}
++
++int
++__shard_delete_shards_of_entry(call_frame_t *cleanup_frame, xlator_t *this,
++                               gf_dirent_t *entry, inode_t *inode)
++{
++    int ret = 0;
++    int shard_count = 0;
++    int first_block = 0;
++    int now = 0;
++    uint64_t size = 0;
++    uint64_t block_size = 0;
++    uint64_t size_array[4] = {
++        0,
++    };
++    void *bsize = NULL;
++    void *size_attr = NULL;
++    dict_t *xattr_rsp = NULL;
++    loc_t loc = {
++        0,
++    };
++    shard_local_t *local = NULL;
++    shard_priv_t *priv = NULL;
++
++    priv = this->private;
++    local = cleanup_frame->local;
++    ret = dict_reset(local->xattr_req);
++    if (ret) {
++        gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
++               "Failed to reset dict");
++        ret = -ENOMEM;
++        goto err;
++    }
++
++    ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0);
++    if (ret) {
++        gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
++               "Failed to set dict value: key:%s", GF_XATTR_SHARD_BLOCK_SIZE);
++        ret = -ENOMEM;
++        goto err;
++    }
++
++    ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_FILE_SIZE, 8 * 4);
++    if (ret) {
++        gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
++               "Failed to set dict value: key:%s", GF_XATTR_SHARD_FILE_SIZE);
++        ret = -ENOMEM;
++        goto err;
++    }
++
++    loc.inode = inode_ref(inode);
++    loc.parent = inode_ref(priv->dot_shard_rm_inode);
++    ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path));
++    if (ret < 0) {
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
++               "Inode path  failed on %s", entry->d_name);
++        ret = -ENOMEM;
++        goto err;
++    }
++
++    loc.name = strrchr(loc.path, '/');
++    if (loc.name)
++        loc.name++;
++    ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, local->xattr_req,
++                        &xattr_rsp);
++    if (ret)
++        goto err;
++
++    ret = dict_get_ptr(xattr_rsp, GF_XATTR_SHARD_BLOCK_SIZE, &bsize);
++    if (ret) {
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
++               "Failed to get dict value: key:%s", GF_XATTR_SHARD_BLOCK_SIZE);
++        goto err;
++    }
++    block_size = ntoh64(*((uint64_t *)bsize));
++
++    ret = dict_get_ptr(xattr_rsp, GF_XATTR_SHARD_FILE_SIZE, &size_attr);
++    if (ret) {
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
++               "Failed to get dict value: key:%s", GF_XATTR_SHARD_FILE_SIZE);
++        goto err;
++    }
++
++    memcpy(size_array, size_attr, sizeof(size_array));
++    size = ntoh64(size_array[0]);
++
++    shard_count = (size / block_size) - 1;
++    if (shard_count < 0) {
++        gf_msg_debug(this->name, 0,
++                     "Size of %s hasn't grown beyond "
++                     "its shard-block-size. Nothing to delete. "
++                     "Returning",
++                     entry->d_name);
++        /* File size < shard-block-size, so nothing to delete */
++        ret = 0;
++        goto delete_marker;
++    }
++    if ((size % block_size) > 0)
++        shard_count++;
++
++    if (shard_count == 0) {
++        gf_msg_debug(this->name, 0,
++                     "Size of %s is exactly equal to "
++                     "its shard-block-size. Nothing to delete. "
++                     "Returning",
++                     entry->d_name);
++        ret = 0;
++        goto delete_marker;
++    }
++    gf_msg_debug(this->name, 0,
++                 "base file = %s, "
++                 "shard-block-size=%" PRIu64 ", file-size=%" PRIu64
++                 ", "
++                 "shard_count=%d",
++                 entry->d_name, block_size, size, shard_count);
++
++    /* Perform a gfid-based lookup to see if gfid corresponding to marker
++     * file's base name exists.
++     */
++    loc_wipe(&loc);
++    loc.inode = inode_new(this->itable);
++    if (!loc.inode) {
++        ret = -ENOMEM;
++        goto err;
++    }
++    gf_uuid_parse(entry->d_name, loc.gfid);
++    ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, NULL, NULL);
++    if (!ret) {
++        gf_msg_debug(this->name, 0,
++                     "Base shard corresponding to gfid "
++                     "%s is present. Skipping shard deletion. "
++                     "Returning",
++                     entry->d_name);
++        ret = 0;
++        goto delete_marker;
++    }
++
++    first_block = 1;
++
++    while (shard_count) {
++        if (shard_count < local->deletion_rate) {
++            now = shard_count;
++            shard_count = 0;
++        } else {
++            now = local->deletion_rate;
++            shard_count -= local->deletion_rate;
++        }
++
++        gf_msg_debug(this->name, 0,
++                     "deleting %d shards starting from "
++                     "block %d of gfid %s",
++                     now, first_block, entry->d_name);
++        ret = shard_regulated_shards_deletion(cleanup_frame, this, now,
++                                              first_block, entry);
++        if (ret)
++            goto err;
++        first_block += now;
++    }
++
++delete_marker:
++    loc_wipe(&loc);
++    loc.inode = inode_ref(inode);
++    loc.parent = inode_ref(priv->dot_shard_rm_inode);
++    ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path));
++    if (ret < 0) {
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
++               "Inode path  failed on %s", entry->d_name);
++        ret = -ENOMEM;
++        goto err;
++    }
++    loc.name = strrchr(loc.path, '/');
++    if (loc.name)
++        loc.name++;
++    ret = syncop_unlink(FIRST_CHILD(this), &loc, NULL, NULL);
++    if (ret)
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_SHARDS_DELETION_FAILED,
++               "Failed to delete %s "
++               "from /%s",
++               entry->d_name, GF_SHARD_REMOVE_ME_DIR);
++err:
++    if (xattr_rsp)
++        dict_unref(xattr_rsp);
++    loc_wipe(&loc);
++    return ret;
++}
++
++int
++shard_delete_shards_of_entry(call_frame_t *cleanup_frame, xlator_t *this,
++                             gf_dirent_t *entry, inode_t *inode)
++{
++    int ret = -1;
++    loc_t loc = {
++        0,
++    };
++    shard_priv_t *priv = NULL;
++
++    priv = this->private;
++    loc.inode = inode_ref(priv->dot_shard_rm_inode);
++
++    ret = syncop_entrylk(FIRST_CHILD(this), this->name, &loc, entry->d_name,
++                         ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL, NULL);
++    if (ret < 0) {
++        if (ret == -EAGAIN) {
++            ret = 0;
++        }
++        goto out;
++    }
++    {
++        ret = __shard_delete_shards_of_entry(cleanup_frame, this, entry, inode);
++    }
++    syncop_entrylk(FIRST_CHILD(this), this->name, &loc, entry->d_name,
++                   ENTRYLK_UNLOCK, ENTRYLK_WRLCK, NULL, NULL);
++out:
++    loc_wipe(&loc);
++    return ret;
++}
++
++int
++shard_delete_shards_cbk(int ret, call_frame_t *frame, void *data)
++{
++    SHARD_STACK_DESTROY(frame);
++    return 0;
++}
++
++int
++shard_resolve_internal_dir(xlator_t *this, shard_local_t *local,
++                           shard_internal_dir_type_t type)
++{
++    int ret = 0;
++    char *bname = NULL;
++    loc_t *loc = NULL;
++    shard_priv_t *priv = NULL;
++    uuid_t gfid = {
++        0,
++    };
++    struct iatt stbuf = {
++        0,
++    };
++
++    priv = this->private;
++
++    switch (type) {
++        case SHARD_INTERNAL_DIR_DOT_SHARD:
++            loc = &local->dot_shard_loc;
++            gf_uuid_copy(gfid, priv->dot_shard_gfid);
++            bname = GF_SHARD_DIR;
++            break;
++        case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME:
++            loc = &local->dot_shard_rm_loc;
++            gf_uuid_copy(gfid, priv->dot_shard_rm_gfid);
++            bname = GF_SHARD_REMOVE_ME_DIR;
++            break;
++        default:
++            break;
++    }
++
++    loc->inode = inode_find(this->itable, gfid);
++    if (!loc->inode) {
++        ret = shard_init_internal_dir_loc(this, local, type);
++        if (ret)
++            goto err;
++        ret = dict_reset(local->xattr_req);
++        if (ret) {
++            gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
++                   "Failed to reset "
++                   "dict");
++            ret = -ENOMEM;
++            goto err;
++        }
++        ret = dict_set_gfuuid(local->xattr_req, "gfid-req", gfid, true);
++        ret = syncop_lookup(FIRST_CHILD(this), loc, &stbuf, NULL,
++                            local->xattr_req, NULL);
++        if (ret < 0) {
++            if (ret != -ENOENT)
++                gf_msg(this->name, GF_LOG_ERROR, -ret,
++                       SHARD_MSG_SHARDS_DELETION_FAILED,
++                       "Lookup on %s failed, exiting", bname);
++            goto err;
++        } else {
++            shard_link_internal_dir_inode(local, loc->inode, &stbuf, type);
++        }
++    }
++    ret = 0;
++err:
++    return ret;
++}
++
++int
++shard_lookup_marker_entry(xlator_t *this, shard_local_t *local,
++                          gf_dirent_t *entry)
++{
++    int ret = 0;
++    loc_t loc = {
++        0,
++    };
++
++    loc.inode = inode_new(this->itable);
++    if (!loc.inode) {
++        ret = -ENOMEM;
++        goto err;
++    }
++    loc.parent = inode_ref(local->fd->inode);
++
++    ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path));
++    if (ret < 0) {
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
++               "Inode path failed on %s", entry->d_name);
++        ret = -ENOMEM;
++        goto err;
++    }
++
++    loc.name = strrchr(loc.path, '/');
++    if (loc.name)
++        loc.name++;
++
++    ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, NULL, NULL);
++    if (ret < 0) {
++        goto err;
++    }
++    entry->inode = inode_ref(loc.inode);
++    ret = 0;
++err:
++    loc_wipe(&loc);
++    return ret;
++}
++
++int
++shard_delete_shards(void *opaque)
++{
++    int ret = 0;
++    off_t offset = 0;
++    loc_t loc = {
++        0,
++    };
++    inode_t *link_inode = NULL;
++    xlator_t *this = NULL;
++    shard_priv_t *priv = NULL;
++    shard_local_t *local = NULL;
++    gf_dirent_t entries;
++    gf_dirent_t *entry = NULL;
++    call_frame_t *cleanup_frame = NULL;
++    gf_boolean_t done = _gf_false;
++
++    this = THIS;
++    priv = this->private;
++    INIT_LIST_HEAD(&entries.list);
++
++    cleanup_frame = opaque;
++
++    local = mem_get0(this->local_pool);
++    if (!local) {
++        gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED,
++               "Failed to create local to "
++               "delete shards");
++        ret = -ENOMEM;
++        goto err;
++    }
++    cleanup_frame->local = local;
++    local->fop = GF_FOP_UNLINK;
++
++    local->xattr_req = dict_new();
++    if (!local->xattr_req) {
++        ret = -ENOMEM;
++        goto err;
++    }
++    local->deletion_rate = priv->deletion_rate;
++
++    ret = shard_resolve_internal_dir(this, local, SHARD_INTERNAL_DIR_DOT_SHARD);
++    if (ret == -ENOENT) {
++        gf_msg_debug(this->name, 0,
++                     ".shard absent. Nothing to"
++                     " delete. Exiting");
++        ret = 0;
++        goto err;
++    } else if (ret < 0) {
++        goto err;
++    }
++
++    ret = shard_resolve_internal_dir(this, local,
++                                     SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME);
++    if (ret == -ENOENT) {
++        gf_msg_debug(this->name, 0,
++                     ".remove_me absent. "
++                     "Nothing to delete. Exiting");
++        ret = 0;
++        goto err;
++    } else if (ret < 0) {
++        goto err;
++    }
++
++    local->fd = fd_anonymous(local->dot_shard_rm_loc.inode);
++    if (!local->fd) {
++        ret = -ENOMEM;
++        goto err;
++    }
++
++    for (;;) {
++        offset = 0;
++        LOCK(&priv->lock);
++        {
++            if (priv->bg_del_state == SHARD_BG_DELETION_LAUNCHING) {
++                priv->bg_del_state = SHARD_BG_DELETION_IN_PROGRESS;
++            } else if (priv->bg_del_state == SHARD_BG_DELETION_IN_PROGRESS) {
++                priv->bg_del_state = SHARD_BG_DELETION_NONE;
++                done = _gf_true;
++            }
++        }
++        UNLOCK(&priv->lock);
++        if (done)
++            break;
++        while (
++            (ret = syncop_readdirp(FIRST_CHILD(this), local->fd, 131072, offset,
++                                   &entries, local->xattr_req, NULL))) {
++            if (ret > 0)
++                ret = 0;
++            list_for_each_entry(entry, &entries.list, list)
++            {
++                offset = entry->d_off;
++
++                if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, ".."))
++                    continue;
++
++                if (!entry->inode) {
++                    ret = shard_lookup_marker_entry(this, local, entry);
++                    if (ret < 0)
++                        continue;
++                }
++                link_inode = inode_link(entry->inode, local->fd->inode,
++                                        entry->d_name, &entry->d_stat);
++
++                gf_msg_debug(this->name, 0,
++                             "Initiating deletion of "
++                             "shards of gfid %s",
++                             entry->d_name);
++                ret = shard_delete_shards_of_entry(cleanup_frame, this, entry,
++                                                   link_inode);
++                inode_unlink(link_inode, local->fd->inode, entry->d_name);
++                inode_unref(link_inode);
++                if (ret) {
++                    gf_msg(this->name, GF_LOG_ERROR, -ret,
++                           SHARD_MSG_SHARDS_DELETION_FAILED,
++                           "Failed to clean up shards of gfid %s",
++                           entry->d_name);
++                    continue;
++                }
++                gf_msg(this->name, GF_LOG_INFO, 0,
++                       SHARD_MSG_SHARD_DELETION_COMPLETED,
++                       "Deleted "
++                       "shards of gfid=%s from backend",
++                       entry->d_name);
++            }
++            gf_dirent_free(&entries);
++            if (ret)
++                break;
++        }
++    }
++    ret = 0;
++    loc_wipe(&loc);
++    return ret;
++
++err:
++    LOCK(&priv->lock);
++    {
++        priv->bg_del_state = SHARD_BG_DELETION_NONE;
++    }
++    UNLOCK(&priv->lock);
++    loc_wipe(&loc);
++    return ret;
++}
++
++int
++shard_unlock_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                         int32_t op_ret, int32_t op_errno, dict_t *xdata)
++{
++    if (op_ret)
++        gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED,
++               "Unlock failed. Please check brick logs for "
++               "more details");
++    SHARD_STACK_DESTROY(frame);
++    return 0;
++}
++
++int
++shard_unlock_inodelk(call_frame_t *frame, xlator_t *this)
++{
++    loc_t *loc = NULL;
++    call_frame_t *lk_frame = NULL;
++    shard_local_t *local = NULL;
++    shard_local_t *lk_local = NULL;
++    shard_inodelk_t *lock = NULL;
++
++    local = frame->local;
++    lk_frame = local->inodelk_frame;
++    lk_local = lk_frame->local;
++    local->inodelk_frame = NULL;
++    loc = &local->int_inodelk.loc;
++    lock = &lk_local->int_inodelk;
++    lock->flock.l_type = F_UNLCK;
++
++    STACK_WIND(lk_frame, shard_unlock_inodelk_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->inodelk, lock->domain, loc, F_SETLK,
++               &lock->flock, NULL);
++    local->int_inodelk.acquired_lock = _gf_false;
++    return 0;
++}
++
++int
++shard_rename_src_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                     int32_t op_ret, int32_t op_errno, struct iatt *buf,
++                     struct iatt *preoldparent, struct iatt *postoldparent,
++                     struct iatt *prenewparent, struct iatt *postnewparent,
++                     dict_t *xdata);
++int
++shard_rename_src_base_file(call_frame_t *frame, xlator_t *this)
++{
++    int ret = 0;
++    loc_t *dst_loc = NULL;
++    loc_t tmp_loc = {
++        0,
++    };
++    shard_local_t *local = frame->local;
++
++    if (local->dst_block_size) {
++        tmp_loc.parent = inode_ref(local->loc2.parent);
++        ret = inode_path(tmp_loc.parent, local->loc2.name,
++                         (char **)&tmp_loc.path);
++        if (ret < 0) {
++            gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
++                   "Inode path failed"
++                   " on pargfid=%s bname=%s",
++                   uuid_utoa(tmp_loc.parent->gfid), local->loc2.name);
++            local->op_ret = -1;
++            local->op_errno = ENOMEM;
++            goto err;
++        }
++
++        tmp_loc.name = strrchr(tmp_loc.path, '/');
++        if (tmp_loc.name)
++            tmp_loc.name++;
++        dst_loc = &tmp_loc;
++    } else {
++        dst_loc = &local->loc2;
++    }
++
++    /* To-Do: Request open-fd count on dst base file */
++    STACK_WIND(frame, shard_rename_src_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->rename, &local->loc, dst_loc,
++               local->xattr_req);
++    loc_wipe(&tmp_loc);
++    return 0;
++err:
++    loc_wipe(&tmp_loc);
++    shard_common_failure_unwind(local->fop, frame, local->op_ret,
++                                local->op_errno);
++    return 0;
++}
++
++int
++shard_unlink_base_file(call_frame_t *frame, xlator_t *this);
++
++int
++shard_set_size_attrs_on_marker_file_cbk(call_frame_t *frame, void *cookie,
++                                        xlator_t *this, int32_t op_ret,
++                                        int32_t op_errno, dict_t *dict,
++                                        dict_t *xdata)
++{
++    shard_priv_t *priv = NULL;
++    shard_local_t *local = NULL;
++
++    priv = this->private;
++    local = frame->local;
++    if (op_ret < 0) {
++        gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED,
++               "Xattrop on marker file failed "
++               "while performing %s; entry gfid=%s",
++               gf_fop_string(local->fop), local->newloc.name);
++        goto err;
++    }
++
++    inode_unlink(local->newloc.inode, priv->dot_shard_rm_inode,
++                 local->newloc.name);
++
++    if (local->fop == GF_FOP_UNLINK)
++        shard_unlink_base_file(frame, this);
++    else if (local->fop == GF_FOP_RENAME)
++        shard_rename_src_base_file(frame, this);
++    return 0;
++err:
++    shard_common_failure_unwind(local->fop, frame, op_ret, op_errno);
++    return 0;
++}
++
++int
++shard_set_size_attrs_on_marker_file(call_frame_t *frame, xlator_t *this)
++{
++    int op_errno = ENOMEM;
++    uint64_t bs = 0;
++    dict_t *xdata = NULL;
++    shard_local_t *local = NULL;
++
++    local = frame->local;
++    xdata = dict_new();
++    if (!xdata)
++        goto err;
++
++    if (local->fop == GF_FOP_UNLINK)
++        bs = local->block_size;
++    else if (local->fop == GF_FOP_RENAME)
++        bs = local->dst_block_size;
++    SHARD_INODE_CREATE_INIT(this, bs, xdata, &local->newloc,
++                            local->prebuf.ia_size, 0, err);
++    STACK_WIND(frame, shard_set_size_attrs_on_marker_file_cbk,
++               FIRST_CHILD(this), FIRST_CHILD(this)->fops->xattrop,
++               &local->newloc, GF_XATTROP_GET_AND_SET, xdata, NULL);
++    dict_unref(xdata);
++    return 0;
++err:
++    if (xdata)
++        dict_unref(xdata);
++    shard_common_failure_unwind(local->fop, frame, -1, op_errno);
++    return 0;
++}
++
++int
++shard_lookup_marker_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                             int32_t op_ret, int32_t op_errno, inode_t *inode,
++                             struct iatt *buf, dict_t *xdata,
++                             struct iatt *postparent)
++{
++    inode_t *linked_inode = NULL;
++    shard_priv_t *priv = NULL;
++    shard_local_t *local = NULL;
++
++    local = frame->local;
++    priv = this->private;
++
++    if (op_ret < 0) {
++        gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED,
++               "Lookup on marker file failed "
++               "while performing %s; entry gfid=%s",
++               gf_fop_string(local->fop), local->newloc.name);
++        goto err;
++    }
++
++    linked_inode = inode_link(inode, priv->dot_shard_rm_inode,
++                              local->newloc.name, buf);
++    inode_unref(local->newloc.inode);
++    local->newloc.inode = linked_inode;
++    shard_set_size_attrs_on_marker_file(frame, this);
++    return 0;
++err:
++    shard_common_failure_unwind(local->fop, frame, op_ret, op_errno);
++    return 0;
++}
++
++int
++shard_lookup_marker_file(call_frame_t *frame, xlator_t *this)
++{
++    int op_errno = ENOMEM;
++    dict_t *xattr_req = NULL;
++    shard_local_t *local = NULL;
++
++    local = frame->local;
++
++    xattr_req = shard_create_gfid_dict(local->xattr_req);
++    if (!xattr_req)
++        goto err;
++
++    STACK_WIND(frame, shard_lookup_marker_file_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->lookup, &local->newloc, xattr_req);
++    dict_unref(xattr_req);
++    return 0;
++err:
++    shard_common_failure_unwind(local->fop, frame, -1, op_errno);
++    return 0;
++}
++
++int
++shard_create_marker_file_under_remove_me_cbk(
++    call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
++    int32_t op_errno, inode_t *inode, struct iatt *buf, struct iatt *preparent,
++    struct iatt *postparent, dict_t *xdata)
++{
++    inode_t *linked_inode = NULL;
++    shard_priv_t *priv = NULL;
++    shard_local_t *local = NULL;
++
++    local = frame->local;
++    priv = this->private;
++
++    SHARD_UNSET_ROOT_FS_ID(frame, local);
++    if (op_ret < 0) {
++        if ((op_errno != EEXIST) && (op_errno != ENODATA)) {
++            local->op_ret = op_ret;
++            local->op_errno = op_errno;
++            gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED,
++                   "Marker file creation "
++                   "failed while performing %s; entry gfid=%s",
++                   gf_fop_string(local->fop), local->newloc.name);
++            goto err;
++        } else {
++            shard_lookup_marker_file(frame, this);
++            return 0;
++        }
++    }
++
++    linked_inode = inode_link(inode, priv->dot_shard_rm_inode,
++                              local->newloc.name, buf);
++    inode_unref(local->newloc.inode);
++    local->newloc.inode = linked_inode;
++
++    if (local->fop == GF_FOP_UNLINK)
++        shard_unlink_base_file(frame, this);
++    else if (local->fop == GF_FOP_RENAME)
++        shard_rename_src_base_file(frame, this);
++    return 0;
++err:
++    shard_common_failure_unwind(local->fop, frame, -1, local->op_errno);
++    return 0;
++}
++
++int
++shard_create_marker_file_under_remove_me(call_frame_t *frame, xlator_t *this,
++                                         loc_t *loc)
++{
++    int ret = 0;
++    int op_errno = ENOMEM;
++    uint64_t bs = 0;
++    char g1[64] = {
++        0,
++    };
++    char g2[64] = {
++        0,
++    };
++    dict_t *xattr_req = NULL;
++    shard_priv_t *priv = NULL;
++    shard_local_t *local = NULL;
++
++    priv = this->private;
++    local = frame->local;
++
++    SHARD_SET_ROOT_FS_ID(frame, local);
++
++    xattr_req = shard_create_gfid_dict(local->xattr_req);
++    if (!xattr_req)
++        goto err;
++
++    local->newloc.inode = inode_new(this->itable);
++    local->newloc.parent = inode_ref(priv->dot_shard_rm_inode);
++    ret = inode_path(local->newloc.parent, uuid_utoa(loc->inode->gfid),
++                     (char **)&local->newloc.path);
++    if (ret < 0) {
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
++               "Inode path failed on "
++               "pargfid=%s bname=%s",
++               uuid_utoa_r(priv->dot_shard_rm_gfid, g1),
++               uuid_utoa_r(loc->inode->gfid, g2));
++        goto err;
++    }
++    local->newloc.name = strrchr(local->newloc.path, '/');
++    if (local->newloc.name)
++        local->newloc.name++;
++
++    if (local->fop == GF_FOP_UNLINK)
++        bs = local->block_size;
++    else if (local->fop == GF_FOP_RENAME)
++        bs = local->dst_block_size;
++
++    SHARD_INODE_CREATE_INIT(this, bs, xattr_req, &local->newloc,
++                            local->prebuf.ia_size, 0, err);
++
++    STACK_WIND(frame, shard_create_marker_file_under_remove_me_cbk,
++               FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod,
++               &local->newloc, 0, 0, 0644, xattr_req);
++    dict_unref(xattr_req);
++    return 0;
++
++err:
++    if (xattr_req)
++        dict_unref(xattr_req);
++    shard_create_marker_file_under_remove_me_cbk(frame, 0, this, -1, op_errno,
++                                                 NULL, NULL, NULL, NULL, NULL);
++    return 0;
++}
++
++int
++shard_unlock_entrylk(call_frame_t *frame, xlator_t *this);
++
++int
++shard_unlink_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                           int32_t op_ret, int32_t op_errno,
++                           struct iatt *preparent, struct iatt *postparent,
++                           dict_t *xdata)
++{
++    int ret = 0;
++    shard_local_t *local = NULL;
++
++    local = frame->local;
++
++    if (op_ret < 0) {
++        local->op_ret = op_ret;
++        local->op_errno = op_errno;
++    } else {
++        shard_inode_ctx_set_refresh_flag(local->int_inodelk.loc.inode, this);
++        local->preoldparent = *preparent;
++        local->postoldparent = *postparent;
++        if (xdata)
++            local->xattr_rsp = dict_ref(xdata);
++        if (local->cleanup_required)
++            shard_start_background_deletion(this);
++    }
++
++    if (local->entrylk_frame) {
++        ret = shard_unlock_entrylk(frame, this);
++        if (ret < 0) {
++            local->op_ret = -1;
++            local->op_errno = -ret;
++        }
++    }
++
++    ret = shard_unlock_inodelk(frame, this);
++    if (ret < 0) {
++        local->op_ret = -1;
++        local->op_errno = -ret;
++    }
++
++    shard_unlink_cbk(frame, this);
++    return 0;
++}
++
++int
++shard_unlink_base_file(call_frame_t *frame, xlator_t *this)
++{
++    shard_local_t *local = frame->local;
++
++    /* To-Do: Request open-fd count on base file */
++    STACK_WIND(frame, shard_unlink_base_file_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->unlink, &local->loc, local->xflag,
++               local->xattr_req);
++    return 0;
++}
++
++int
++shard_unlock_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                         int32_t op_ret, int32_t op_errno, dict_t *xdata)
++{
++    if (op_ret)
++        gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED,
++               "Unlock failed. Please check brick logs for "
++               "more details");
++    SHARD_STACK_DESTROY(frame);
++    return 0;
++}
++
++int
++shard_unlock_entrylk(call_frame_t *frame, xlator_t *this)
++{
++    loc_t *loc = NULL;
++    call_frame_t *lk_frame = NULL;
++    shard_local_t *local = NULL;
++    shard_local_t *lk_local = NULL;
++    shard_entrylk_t *lock = NULL;
++
++    local = frame->local;
++    lk_frame = local->entrylk_frame;
++    lk_local = lk_frame->local;
++    local->entrylk_frame = NULL;
++    lock = &lk_local->int_entrylk;
++    loc = &lock->loc;
++
++    STACK_WIND(lk_frame, shard_unlock_entrylk_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->entrylk, this->name, loc,
++               lk_local->int_entrylk.basename, ENTRYLK_UNLOCK, ENTRYLK_WRLCK,
++               NULL);
++    local->int_entrylk.acquired_lock = _gf_false;
++    return 0;
++}
++
++int
++shard_post_entrylk_fop_handler(call_frame_t *frame, xlator_t *this)
++{
++    shard_local_t *local = NULL;
++
++    local = frame->local;
++
++    switch (local->fop) {
++        case GF_FOP_UNLINK:
++        case GF_FOP_RENAME:
++            shard_create_marker_file_under_remove_me(frame, this,
++                                                     &local->int_inodelk.loc);
++            break;
++        default:
++            gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
++                   "post-entrylk handler not defined. This case should not"
++                   " be hit");
++            break;
++    }
++    return 0;
++}
++
++int
++shard_acquire_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                          int32_t op_ret, int32_t op_errno, dict_t *xdata)
++{
++    call_frame_t *main_frame = NULL;
++    shard_local_t *local = NULL;
++    shard_local_t *main_local = NULL;
++
++    local = frame->local;
++    main_frame = local->main_frame;
++    main_local = main_frame->local;
++
++    if (local->op_ret < 0) {
++        shard_common_failure_unwind(main_local->fop, main_frame, op_ret,
++                                    op_errno);
++        return 0;
++    }
++    main_local->int_entrylk.acquired_lock = _gf_true;
++    shard_post_entrylk_fop_handler(main_frame, this);
++    return 0;
++}
++
++int
++shard_acquire_entrylk(call_frame_t *frame, xlator_t *this, inode_t *inode,
++                      uuid_t gfid)
++{
++    char gfid_str[GF_UUID_BUF_SIZE] = {
++        0,
++    };
++    shard_local_t *local = NULL;
++    shard_local_t *entrylk_local = NULL;
++    shard_entrylk_t *int_entrylk = NULL;
++    call_frame_t *entrylk_frame = NULL;
++
++    local = frame->local;
++    entrylk_frame = create_frame(this, this->ctx->pool);
++    if (!entrylk_frame) {
++        gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED,
++               "Failed to create new frame "
++               "to lock marker file");
++        goto err;
++    }
++
++    entrylk_local = mem_get0(this->local_pool);
++    if (!entrylk_local) {
++        STACK_DESTROY(entrylk_frame->root);
++        goto err;
++    }
++
++    entrylk_frame->local = entrylk_local;
++    entrylk_local->main_frame = frame;
++    int_entrylk = &entrylk_local->int_entrylk;
++
++    int_entrylk->loc.inode = inode_ref(inode);
++    set_lk_owner_from_ptr(&entrylk_frame->root->lk_owner, entrylk_frame->root);
++    local->entrylk_frame = entrylk_frame;
++    gf_uuid_unparse(gfid, gfid_str);
++    int_entrylk->basename = gf_strdup(gfid_str);
++
++    STACK_WIND(entrylk_frame, shard_acquire_entrylk_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->entrylk, this->name, &int_entrylk->loc,
++               int_entrylk->basename, ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
++    return 0;
++err:
++    shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
++    return 0;
++}
++
++int
++shard_post_lookup_base_shard_rm_handler(call_frame_t *frame, xlator_t *this)
++{
++    shard_local_t *local = NULL;
++    shard_priv_t *priv = NULL;
++
++    priv = this->private;
++    local = frame->local;
++
++    if (local->op_ret < 0) {
++        shard_common_failure_unwind(local->fop, frame, -1, local->op_errno);
++        return 0;
++    }
++
++    if (local->prebuf.ia_nlink > 1) {
++        gf_msg_debug(this->name, 0,
++                     "link count on %s > 1:%d, "
++                     "performing rename()/unlink()",
++                     local->int_inodelk.loc.path, local->prebuf.ia_nlink);
++        if (local->fop == GF_FOP_RENAME)
++            shard_rename_src_base_file(frame, this);
++        else if (local->fop == GF_FOP_UNLINK)
++            shard_unlink_base_file(frame, this);
++    } else {
++        gf_msg_debug(this->name, 0,
++                     "link count on %s = 1, creating "
++                     "file under .remove_me",
++                     local->int_inodelk.loc.path);
++        local->cleanup_required = _gf_true;
++        shard_acquire_entrylk(frame, this, priv->dot_shard_rm_inode,
++                              local->prebuf.ia_gfid);
++    }
++    return 0;
++}
++
++int
++shard_post_inodelk_fop_handler(call_frame_t *frame, xlator_t *this)
++{
++    shard_local_t *local = NULL;
++
++    local = frame->local;
++
++    switch (local->fop) {
++        case GF_FOP_UNLINK:
++        case GF_FOP_RENAME:
++            shard_lookup_base_file(frame, this, &local->int_inodelk.loc,
++                                   shard_post_lookup_base_shard_rm_handler);
++            break;
++        default:
++            gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
++                   "post-inodelk handler not defined. This case should not"
++                   " be hit");
++            break;
++    }
++    return 0;
++}
++
++int
++shard_acquire_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                          int32_t op_ret, int32_t op_errno, dict_t *xdata)
++{
++    call_frame_t *main_frame = NULL;
++    shard_local_t *local = NULL;
++    shard_local_t *main_local = NULL;
++
++    local = frame->local;
++    main_frame = local->main_frame;
++    main_local = main_frame->local;
++
++    if (local->op_ret < 0) {
++        shard_common_failure_unwind(main_local->fop, main_frame, op_ret,
++                                    op_errno);
++        return 0;
++    }
++    main_local->int_inodelk.acquired_lock = _gf_true;
++    shard_post_inodelk_fop_handler(main_frame, this);
++    return 0;
++}
++
++int
++shard_acquire_inodelk(call_frame_t *frame, xlator_t *this, loc_t *loc)
++{
++    call_frame_t *lk_frame = NULL;
++    shard_local_t *local = NULL;
++    shard_local_t *lk_local = NULL;
++    shard_inodelk_t *int_inodelk = NULL;
++
++    local = frame->local;
++    lk_frame = create_frame(this, this->ctx->pool);
++    if (!lk_frame) {
++        gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED,
++               "Failed to create new frame "
++               "to lock base shard");
++        goto err;
++    }
++    lk_local = mem_get0(this->local_pool);
++    if (!lk_local) {
++        STACK_DESTROY(lk_frame->root);
++        goto err;
++    }
++
++    lk_frame->local = lk_local;
++    lk_local->main_frame = frame;
++    int_inodelk = &lk_local->int_inodelk;
++
++    int_inodelk->flock.l_len = 0;
++    int_inodelk->flock.l_start = 0;
++    int_inodelk->domain = this->name;
++    int_inodelk->flock.l_type = F_WRLCK;
++    loc_copy(&local->int_inodelk.loc, loc);
++    set_lk_owner_from_ptr(&lk_frame->root->lk_owner, lk_frame->root);
++    local->inodelk_frame = lk_frame;
++
++    STACK_WIND(lk_frame, shard_acquire_inodelk_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->inodelk, int_inodelk->domain,
++               &local->int_inodelk.loc, F_SETLKW, &int_inodelk->flock, NULL);
++    return 0;
++err:
++    shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
++    return 0;
++}
++
++int
++shard_post_mkdir_rm_handler(call_frame_t *frame, xlator_t *this)
++{
++    loc_t *loc = NULL;
++    shard_local_t *local = NULL;
++
++    local = frame->local;
++
++    if (local->op_ret < 0) {
++        shard_common_failure_unwind(local->fop, frame, -1, local->op_errno);
++        return 0;
++    }
++    if (local->fop == GF_FOP_UNLINK)
++        loc = &local->loc;
++    else if (local->fop == GF_FOP_RENAME)
++        loc = &local->loc2;
++    shard_acquire_inodelk(frame, this, loc);
++    return 0;
++}
++
++int
++shard_mkdir_internal_dir(call_frame_t *frame, xlator_t *this,
++                         shard_post_resolve_fop_handler_t handler,
++                         shard_internal_dir_type_t type);
++int
++shard_pre_mkdir_rm_handler(call_frame_t *frame, xlator_t *this)
++{
++    shard_local_t *local = NULL;
++
++    local = frame->local;
++
++    if (local->op_ret < 0) {
++        shard_common_failure_unwind(local->fop, frame, -1, local->op_errno);
++        return 0;
++    }
++    shard_mkdir_internal_dir(frame, this, shard_post_mkdir_rm_handler,
++                             SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME);
++    return 0;
++}
++
++void
++shard_begin_rm_resolution(call_frame_t *frame, xlator_t *this)
++{
++    shard_priv_t *priv = NULL;
++    shard_local_t *local = NULL;
++
++    priv = this->private;
++    local = frame->local;
++
++    local->dot_shard_rm_loc.inode = inode_find(this->itable,
++                                               priv->dot_shard_rm_gfid);
++    if (!local->dot_shard_rm_loc.inode) {
++        local->dot_shard_loc.inode = inode_find(this->itable,
++                                                priv->dot_shard_gfid);
++        if (!local->dot_shard_loc.inode) {
++            shard_mkdir_internal_dir(frame, this, shard_pre_mkdir_rm_handler,
++                                     SHARD_INTERNAL_DIR_DOT_SHARD);
++        } else {
++            local->post_res_handler = shard_pre_mkdir_rm_handler;
++            shard_refresh_internal_dir(frame, this,
++                                       SHARD_INTERNAL_DIR_DOT_SHARD);
++        }
++    } else {
++        local->post_res_handler = shard_post_mkdir_rm_handler;
++        shard_refresh_internal_dir(frame, this,
++                                   SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME);
++    }
++}
++
++int
++shard_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
++             dict_t *xdata)
++{
++    int ret = -1;
++    uint64_t block_size = 0;
++    shard_local_t *local = NULL;
++
++    ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size);
++    if ((ret) && (!IA_ISLNK(loc->inode->ia_type))) {
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
++               "Failed to get block "
++               "size from inode ctx of %s",
++               uuid_utoa(loc->inode->gfid));
++        goto err;
++    }
++
++    if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
++        STACK_WIND(frame, default_unlink_cbk, FIRST_CHILD(this),
++                   FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
++        return 0;
++    }
++
++    local = mem_get0(this->local_pool);
++    if (!local)
++        goto err;
++
++    frame->local = local;
++
++    loc_copy(&local->loc, loc);
++    local->xflag = xflag;
++    local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
++    local->block_size = block_size;
++    local->resolver_base_inode = loc->inode;
++    local->fop = GF_FOP_UNLINK;
++    if (!this->itable)
++        this->itable = (local->loc.inode)->table;
++
++    local->resolve_not = _gf_true;
++    shard_begin_rm_resolution(frame, this);
++    return 0;
++err:
++    shard_common_failure_unwind(GF_FOP_UNLINK, frame, -1, ENOMEM);
++    return 0;
++}
++
++int
++shard_post_rename_lookup_handler(call_frame_t *frame, xlator_t *this)
++{
++    shard_rename_cbk(frame, this);
++    return 0;
++}
++
++int
++shard_rename_src_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                     int32_t op_ret, int32_t op_errno, struct iatt *buf,
++                     struct iatt *preoldparent, struct iatt *postoldparent,
++                     struct iatt *prenewparent, struct iatt *postnewparent,
++                     dict_t *xdata)
++{
++    int ret = 0;
++    shard_local_t *local = NULL;
++
++    local = frame->local;
++
++    if (op_ret < 0) {
++        local->op_ret = op_ret;
++        local->op_errno = op_errno;
++        goto err;
++    }
++    /* Set ctx->refresh to TRUE to force a lookup on disk when
++     * shard_lookup_base_file() is called next to refresh the hard link
++     * count in ctx. Note that this is applicable only to the case where
++     * the rename dst is already existent and sharded.
++     */
++    if ((local->dst_block_size) && (!local->cleanup_required))
++        shard_inode_ctx_set_refresh_flag(local->int_inodelk.loc.inode, this);
++
++    local->prebuf = *buf;
++    local->preoldparent = *preoldparent;
++    local->postoldparent = *postoldparent;
++    local->prenewparent = *prenewparent;
++    local->postnewparent = *postnewparent;
++    if (xdata)
++        local->xattr_rsp = dict_ref(xdata);
++
++    if (local->dst_block_size) {
++        if (local->entrylk_frame) {
++            ret = shard_unlock_entrylk(frame, this);
++            if (ret < 0) {
++                local->op_ret = -1;
++                local->op_errno = -ret;
++            }
++        }
++
++        ret = shard_unlock_inodelk(frame, this);
++        if (ret < 0) {
++            local->op_ret = -1;
++            local->op_errno = -ret;
++            goto err;
++        }
++        if (local->cleanup_required)
++            shard_start_background_deletion(this);
++    }
++
++    /* Now the base file of src, if sharded, is looked up to gather ia_size
++     * and ia_blocks.*/
++    if (local->block_size) {
++        local->tmp_loc.inode = inode_new(this->itable);
++        gf_uuid_copy(local->tmp_loc.gfid, (local->loc.inode)->gfid);
++        shard_lookup_base_file(frame, this, &local->tmp_loc,
++                               shard_post_rename_lookup_handler);
++    } else {
++        shard_rename_cbk(frame, this);
++    }
++    return 0;
++err:
++    shard_common_failure_unwind(local->fop, frame, local->op_ret,
++                                local->op_errno);
++    return 0;
++}
++
++int
++shard_post_lookup_dst_base_file_handler(call_frame_t *frame, xlator_t *this)
++{
++    shard_local_t *local = NULL;
++
++    local = frame->local;
++
++    if (local->op_ret < 0) {
++        shard_common_failure_unwind(local->fop, frame, local->op_ret,
++                                    local->op_errno);
++        return 0;
++    }
++
++    /* Save dst base file attributes into postbuf so the information is not
++     * lost when it is overwritten after lookup on base file of src in
++     * shard_lookup_base_file_cbk().
++     */
++    local->postbuf = local->prebuf;
++    shard_rename_src_base_file(frame, this);
++    return 0;
++}
++
++int
++shard_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
++             dict_t *xdata)
++{
++    int ret = -1;
++    uint64_t block_size = 0;
++    uint64_t dst_block_size = 0;
++    shard_local_t *local = NULL;
++
++    if (IA_ISDIR(oldloc->inode->ia_type)) {
++        STACK_WIND(frame, default_rename_cbk, FIRST_CHILD(this),
++                   FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
++        return 0;
++    }
++
++    ret = shard_inode_ctx_get_block_size(oldloc->inode, this, &block_size);
++    if ((ret) && (!IA_ISLNK(oldloc->inode->ia_type))) {
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
++               "Failed to get block "
++               "size from inode ctx of %s",
++               uuid_utoa(oldloc->inode->gfid));
++        goto err;
++    }
++
++    if (newloc->inode)
++        ret = shard_inode_ctx_get_block_size(newloc->inode, this,
++                                             &dst_block_size);
++
++    /* The following stack_wind covers the case where:
++     * a. the src file is not sharded and dst doesn't exist, OR
++     * b. the src and dst both exist but are not sharded.
++     */
++    if (((!block_size) && (!dst_block_size)) ||
++        frame->root->pid == GF_CLIENT_PID_GSYNCD) {
++        STACK_WIND(frame, default_rename_cbk, FIRST_CHILD(this),
++                   FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
++        return 0;
++    }
++
++    local = mem_get0(this->local_pool);
++    if (!local)
++        goto err;
++
++    frame->local = local;
++    loc_copy(&local->loc, oldloc);
++    loc_copy(&local->loc2, newloc);
++    local->resolver_base_inode = newloc->inode;
++    local->fop = GF_FOP_RENAME;
++    local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
++    if (!local->xattr_req)
++        goto err;
++
++    local->block_size = block_size;
++    local->dst_block_size = dst_block_size;
++    if (!this->itable)
++        this->itable = (local->loc.inode)->table;
++    local->resolve_not = _gf_true;
++
++    /* The following if-block covers the case where the dst file exists
++     * and is sharded.
++     */
++    if (local->dst_block_size) {
++        shard_begin_rm_resolution(frame, this);
++    } else {
++        /* The following block covers the case where the dst either doesn't
++         * exist or is NOT sharded but the src is sharded. In this case, shard
++         * xlator would go ahead and rename src to dst. Once done, it would also
++         * lookup the base shard of src to get the ia_size and ia_blocks xattr
++         * values.
++         */
++        shard_rename_src_base_file(frame, this);
++    }
++    return 0;
++
++err:
++    shard_common_failure_unwind(GF_FOP_RENAME, frame, -1, ENOMEM);
++    return 0;
++}
++
++int
++shard_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                 int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
++                 struct iatt *stbuf, struct iatt *preparent,
++                 struct iatt *postparent, dict_t *xdata)
++{
++    int ret = -1;
++    shard_local_t *local = NULL;
++
++    local = frame->local;
++
++    if (op_ret == -1)
++        goto unwind;
++
++    ret = shard_inode_ctx_set(inode, this, stbuf, local->block_size,
++                              SHARD_ALL_MASK);
++    if (ret)
++        gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INODE_CTX_SET_FAILED,
++               "Failed to set inode "
++               "ctx for %s",
++               uuid_utoa(inode->gfid));
++
++unwind:
++    SHARD_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, stbuf,
++                       preparent, postparent, xdata);
++    return 0;
++}
++
++int
++shard_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
++             mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
++{
++    shard_priv_t *priv = NULL;
++    shard_local_t *local = NULL;
++
++    priv = this->private;
++    local = mem_get0(this->local_pool);
++    if (!local)
++        goto err;
++
++    frame->local = local;
++    local->block_size = priv->block_size;
++
++    if (!__is_gsyncd_on_shard_dir(frame, loc)) {
++        SHARD_INODE_CREATE_INIT(this, local->block_size, xdata, loc, 0, 0, err);
++    }
++
++    STACK_WIND(frame, shard_create_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
++               xdata);
++    return 0;
++err:
++    shard_common_failure_unwind(GF_FOP_CREATE, frame, -1, ENOMEM);
++    return 0;
++}
++
++int
++shard_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++               int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
++{
++    /* To-Do: Handle open with O_TRUNC under locks */
++    SHARD_STACK_UNWIND(open, frame, op_ret, op_errno, fd, xdata);
++    return 0;
++}
++
++int
++shard_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
++           fd_t *fd, dict_t *xdata)
++{
++    STACK_WIND(frame, shard_open_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
++    return 0;
++}
++
++int
++shard_readv_do_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                   int32_t op_ret, int32_t op_errno, struct iovec *vector,
++                   int32_t count, struct iatt *stbuf, struct iobref *iobref,
++                   dict_t *xdata)
++{
++    int i = 0;
++    int call_count = 0;
++    void *address = NULL;
++    uint64_t block_num = 0;
++    off_t off = 0;
++    struct iovec vec = {
++        0,
++    };
++    shard_local_t *local = NULL;
++    fd_t *anon_fd = cookie;
++    shard_inode_ctx_t *ctx = NULL;
++
++    local = frame->local;
++
++    /* If shard has already seen a failure here before, there is no point
++     * in aggregating subsequent reads, so just go to out.
++     */
++    if (local->op_ret < 0)
++        goto out;
++
++    if (op_ret < 0) {
++        local->op_ret = op_ret;
++        local->op_errno = op_errno;
++        goto out;
++    }
++
++    if (local->op_ret >= 0)
++        local->op_ret += op_ret;
++
++    shard_inode_ctx_get(anon_fd->inode, this, &ctx);
++    block_num = ctx->block_num;
++
++    if (block_num == local->first_block) {
++        address = local->iobuf->ptr;
++    } else {
++        /* else
++         * address to start writing to = beginning of buffer +
++         *                    number of bytes until end of first block +
++         *                    + block_size times number of blocks
++         *                    between the current block and the first
++         */
++        address = (char *)local->iobuf->ptr +
++                  (local->block_size - (local->offset % local->block_size)) +
++                  ((block_num - local->first_block - 1) * local->block_size);
++    }
++
++    for (i = 0; i < count; i++) {
++        address = (char *)address + off;
++        memcpy(address, vector[i].iov_base, vector[i].iov_len);
++        off += vector[i].iov_len;
++    }
++
++out:
++    if (anon_fd)
++        fd_unref(anon_fd);
++    call_count = shard_call_count_return(frame);
++    if (call_count == 0) {
++        SHARD_UNSET_ROOT_FS_ID(frame, local);
++        if (local->op_ret < 0) {
++            shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret,
++                                        local->op_errno);
++        } else {
++            if (xdata)
++                local->xattr_rsp = dict_ref(xdata);
++            vec.iov_base = local->iobuf->ptr;
++            if (local->offset + local->req_size > local->prebuf.ia_size)
++                local->total_size = local->prebuf.ia_size - local->offset;
++            vec.iov_len = local->total_size;
++            local->op_ret = local->total_size;
++            SHARD_STACK_UNWIND(readv, frame, local->op_ret, local->op_errno,
++                               &vec, 1, &local->prebuf, local->iobref,
++                               local->xattr_rsp);
++            return 0;
++        }
++    }
++
++    return 0;
++}
++
++int
++shard_readv_do(call_frame_t *frame, xlator_t *this)
++{
++    int i = 0;
++    int call_count = 0;
++    int last_block = 0;
++    int cur_block = 0;
++    off_t orig_offset = 0;
++    off_t shard_offset = 0;
++    size_t read_size = 0;
++    size_t remaining_size = 0;
++    fd_t *fd = NULL;
++    fd_t *anon_fd = NULL;
++    shard_local_t *local = NULL;
++    gf_boolean_t wind_failed = _gf_false;
++
++    local = frame->local;
++    fd = local->fd;
++
++    orig_offset = local->offset;
++    cur_block = local->first_block;
++    last_block = local->last_block;
++    remaining_size = local->total_size;
++    local->call_count = call_count = local->num_blocks;
++
++    SHARD_SET_ROOT_FS_ID(frame, local);
++
++    if (fd->flags & O_DIRECT)
++        local->flags = O_DIRECT;
++
++    while (cur_block <= last_block) {
++        if (wind_failed) {
++            shard_readv_do_cbk(frame, (void *)(long)0, this, -1, ENOMEM, NULL,
++                               0, NULL, NULL, NULL);
++            goto next;
++        }
+ 
+-  local = frame->local;
++        shard_offset = orig_offset % local->block_size;
++        read_size = local->block_size - shard_offset;
++        if (read_size > remaining_size)
++            read_size = remaining_size;
++
++        remaining_size -= read_size;
++
++        if (cur_block == 0) {
++            anon_fd = fd_ref(fd);
++        } else {
++            anon_fd = fd_anonymous(local->inode_list[i]);
++            if (!anon_fd) {
++                local->op_ret = -1;
++                local->op_errno = ENOMEM;
++                wind_failed = _gf_true;
++                shard_readv_do_cbk(frame, (void *)(long)anon_fd, this, -1,
++                                   ENOMEM, NULL, 0, NULL, NULL, NULL);
++                goto next;
++            }
++        }
+ 
+-  xattr_req = shard_create_gfid_dict(local->xattr_req);
+-  if (!xattr_req)
+-    goto err;
++        STACK_WIND_COOKIE(frame, shard_readv_do_cbk, anon_fd, FIRST_CHILD(this),
++                          FIRST_CHILD(this)->fops->readv, anon_fd, read_size,
++                          shard_offset, local->flags, local->xattr_req);
+ 
+-  STACK_WIND(frame, shard_lookup_marker_file_cbk, FIRST_CHILD(this),
+-             FIRST_CHILD(this)->fops->lookup, &local->newloc, xattr_req);
+-  dict_unref(xattr_req);
+-  return 0;
+-err:
+-  shard_common_failure_unwind(local->fop, frame, -1, op_errno);
+-  return 0;
++        orig_offset += read_size;
++    next:
++        cur_block++;
++        i++;
++        call_count--;
++    }
++    return 0;
+ }
+ 
+-int shard_create_marker_file_under_remove_me_cbk(
+-    call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+-    int32_t op_errno, inode_t *inode, struct iatt *buf, struct iatt *preparent,
+-    struct iatt *postparent, dict_t *xdata) {
+-  inode_t *linked_inode = NULL;
+-  shard_priv_t *priv = NULL;
+-  shard_local_t *local = NULL;
+-
+-  local = frame->local;
+-  priv = this->private;
+-
+-  SHARD_UNSET_ROOT_FS_ID(frame, local);
+-  if (op_ret < 0) {
+-    if ((op_errno != EEXIST) && (op_errno != ENODATA)) {
+-      local->op_ret = op_ret;
+-      local->op_errno = op_errno;
+-      gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED,
+-             "Marker file creation "
+-             "failed while performing %s; entry gfid=%s",
+-             gf_fop_string(local->fop), local->newloc.name);
+-      goto err;
+-    } else {
+-      shard_lookup_marker_file(frame, this);
+-      return 0;
++int
++shard_common_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                       int32_t op_ret, int32_t op_errno, inode_t *inode,
++                       struct iatt *buf, struct iatt *preparent,
++                       struct iatt *postparent, dict_t *xdata)
++{
++    int shard_block_num = (long)cookie;
++    int call_count = 0;
++    shard_local_t *local = NULL;
++
++    local = frame->local;
++
++    if (op_ret < 0) {
++        if (op_errno == EEXIST) {
++            LOCK(&frame->lock);
++            {
++                local->eexist_count++;
++            }
++            UNLOCK(&frame->lock);
++        } else {
++            local->op_ret = op_ret;
++            local->op_errno = op_errno;
++        }
++        gf_msg_debug(this->name, 0,
++                     "mknod of shard %d "
++                     "failed: %s",
++                     shard_block_num, strerror(op_errno));
++        goto done;
+     }
+-  }
+ 
+-  linked_inode =
+-      inode_link(inode, priv->dot_shard_rm_inode, local->newloc.name, buf);
+-  inode_unref(local->newloc.inode);
+-  local->newloc.inode = linked_inode;
++    shard_link_block_inode(local, shard_block_num, inode, buf);
+ 
+-  if (local->fop == GF_FOP_UNLINK)
+-    shard_unlink_base_file(frame, this);
+-  else if (local->fop == GF_FOP_RENAME)
+-    shard_rename_src_base_file(frame, this);
+-  return 0;
+-err:
+-  shard_common_failure_unwind(local->fop, frame, -1, local->op_errno);
+-  return 0;
+-}
+-
+-int shard_create_marker_file_under_remove_me(call_frame_t *frame,
+-                                             xlator_t *this, loc_t *loc) {
+-  int ret = 0;
+-  int op_errno = ENOMEM;
+-  uint64_t bs = 0;
+-  char g1[64] = {
+-      0,
+-  };
+-  char g2[64] = {
+-      0,
+-  };
+-  dict_t *xattr_req = NULL;
+-  shard_priv_t *priv = NULL;
+-  shard_local_t *local = NULL;
+-
+-  priv = this->private;
+-  local = frame->local;
+-
+-  SHARD_SET_ROOT_FS_ID(frame, local);
+-
+-  xattr_req = shard_create_gfid_dict(local->xattr_req);
+-  if (!xattr_req)
+-    goto err;
+-
+-  local->newloc.inode = inode_new(this->itable);
+-  local->newloc.parent = inode_ref(priv->dot_shard_rm_inode);
+-  ret = inode_path(local->newloc.parent, uuid_utoa(loc->inode->gfid),
+-                   (char **)&local->newloc.path);
+-  if (ret < 0) {
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+-           "Inode path failed on "
+-           "pargfid=%s bname=%s",
+-           uuid_utoa_r(priv->dot_shard_rm_gfid, g1),
+-           uuid_utoa_r(loc->inode->gfid, g2));
+-    goto err;
+-  }
+-  local->newloc.name = strrchr(local->newloc.path, '/');
+-  if (local->newloc.name)
+-    local->newloc.name++;
+-
+-  if (local->fop == GF_FOP_UNLINK)
+-    bs = local->block_size;
+-  else if (local->fop == GF_FOP_RENAME)
+-    bs = local->dst_block_size;
+-
+-  SHARD_INODE_CREATE_INIT(this, bs, xattr_req, &local->newloc,
+-                          local->prebuf.ia_size, 0, err);
+-
+-  STACK_WIND(frame, shard_create_marker_file_under_remove_me_cbk,
+-             FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod, &local->newloc,
+-             0, 0, 0644, xattr_req);
+-  dict_unref(xattr_req);
+-  return 0;
++done:
++    call_count = shard_call_count_return(frame);
++    if (call_count == 0) {
++        SHARD_UNSET_ROOT_FS_ID(frame, local);
++        local->create_count = 0;
++        local->post_mknod_handler(frame, this);
++    }
+ 
+-err:
+-  if (xattr_req)
+-    dict_unref(xattr_req);
+-  shard_create_marker_file_under_remove_me_cbk(frame, 0, this, -1, op_errno,
+-                                               NULL, NULL, NULL, NULL, NULL);
+-  return 0;
++    return 0;
+ }
+ 
+-int shard_unlock_entrylk(call_frame_t *frame, xlator_t *this);
+-
+-int shard_unlink_base_file_cbk(call_frame_t *frame, void *cookie,
+-                               xlator_t *this, int32_t op_ret, int32_t op_errno,
+-                               struct iatt *preparent, struct iatt *postparent,
+-                               dict_t *xdata) {
+-  int ret = 0;
+-  shard_local_t *local = NULL;
++int
++shard_common_resume_mknod(call_frame_t *frame, xlator_t *this,
++                          shard_post_mknod_fop_handler_t post_mknod_handler)
++{
++    int i = 0;
++    int shard_idx_iter = 0;
++    int last_block = 0;
++    int ret = 0;
++    int call_count = 0;
++    char path[PATH_MAX] = {
++        0,
++    };
++    mode_t mode = 0;
++    char *bname = NULL;
++    shard_priv_t *priv = NULL;
++    shard_inode_ctx_t ctx_tmp = {
++        0,
++    };
++    shard_local_t *local = NULL;
++    gf_boolean_t wind_failed = _gf_false;
++    fd_t *fd = NULL;
++    loc_t loc = {
++        0,
++    };
++    dict_t *xattr_req = NULL;
+ 
+-  local = frame->local;
++    local = frame->local;
++    priv = this->private;
++    fd = local->fd;
++    shard_idx_iter = local->first_block;
++    last_block = local->last_block;
++    call_count = local->call_count = local->create_count;
++    local->post_mknod_handler = post_mknod_handler;
+ 
+-  if (op_ret < 0) {
+-    local->op_ret = op_ret;
+-    local->op_errno = op_errno;
+-  } else {
+-    shard_inode_ctx_set_refresh_flag(local->int_inodelk.loc.inode, this);
+-    local->preoldparent = *preparent;
+-    local->postoldparent = *postparent;
+-    if (xdata)
+-      local->xattr_rsp = dict_ref(xdata);
+-    if (local->cleanup_required)
+-      shard_start_background_deletion(this);
+-  }
++    SHARD_SET_ROOT_FS_ID(frame, local);
+ 
+-  if (local->entrylk_frame) {
+-    ret = shard_unlock_entrylk(frame, this);
+-    if (ret < 0) {
+-      local->op_ret = -1;
+-      local->op_errno = -ret;
++    ret = shard_inode_ctx_get_all(fd->inode, this, &ctx_tmp);
++    if (ret) {
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
++               "Failed to get inode "
++               "ctx for %s",
++               uuid_utoa(fd->inode->gfid));
++        local->op_ret = -1;
++        local->op_errno = ENOMEM;
++        goto err;
+     }
+-  }
++    mode = st_mode_from_ia(ctx_tmp.stat.ia_prot, ctx_tmp.stat.ia_type);
+ 
+-  ret = shard_unlock_inodelk(frame, this);
+-  if (ret < 0) {
+-    local->op_ret = -1;
+-    local->op_errno = -ret;
+-  }
+-
+-  shard_unlink_cbk(frame, this);
+-  return 0;
+-}
+-
+-int shard_unlink_base_file(call_frame_t *frame, xlator_t *this) {
+-  shard_local_t *local = frame->local;
+-
+-  /* To-Do: Request open-fd count on base file */
+-  STACK_WIND(frame, shard_unlink_base_file_cbk, FIRST_CHILD(this),
+-             FIRST_CHILD(this)->fops->unlink, &local->loc, local->xflag,
+-             local->xattr_req);
+-  return 0;
+-}
+-
+-int shard_unlock_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                             int32_t op_ret, int32_t op_errno, dict_t *xdata) {
+-  if (op_ret)
+-    gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED,
+-           "Unlock failed. Please check brick logs for "
+-           "more details");
+-  SHARD_STACK_DESTROY(frame);
+-  return 0;
+-}
+-
+-int shard_unlock_entrylk(call_frame_t *frame, xlator_t *this) {
+-  loc_t *loc = NULL;
+-  call_frame_t *lk_frame = NULL;
+-  shard_local_t *local = NULL;
+-  shard_local_t *lk_local = NULL;
+-  shard_entrylk_t *lock = NULL;
+-
+-  local = frame->local;
+-  lk_frame = local->entrylk_frame;
+-  lk_local = lk_frame->local;
+-  local->entrylk_frame = NULL;
+-  lock = &lk_local->int_entrylk;
+-  loc = &lock->loc;
+-
+-  STACK_WIND(lk_frame, shard_unlock_entrylk_cbk, FIRST_CHILD(this),
+-             FIRST_CHILD(this)->fops->entrylk, this->name, loc,
+-             lk_local->int_entrylk.basename, ENTRYLK_UNLOCK, ENTRYLK_WRLCK,
+-             NULL);
+-  local->int_entrylk.acquired_lock = _gf_false;
+-  return 0;
+-}
+-
+-int shard_post_entrylk_fop_handler(call_frame_t *frame, xlator_t *this) {
+-  shard_local_t *local = NULL;
+-
+-  local = frame->local;
+-
+-  switch (local->fop) {
+-  case GF_FOP_UNLINK:
+-  case GF_FOP_RENAME:
+-    shard_create_marker_file_under_remove_me(frame, this,
+-                                             &local->int_inodelk.loc);
+-    break;
+-  default:
+-    gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
+-           "post-entrylk handler not defined. This case should not"
+-           " be hit");
+-    break;
+-  }
+-  return 0;
+-}
+-
+-int shard_acquire_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                              int32_t op_ret, int32_t op_errno, dict_t *xdata) {
+-  call_frame_t *main_frame = NULL;
+-  shard_local_t *local = NULL;
+-  shard_local_t *main_local = NULL;
+-
+-  local = frame->local;
+-  main_frame = local->main_frame;
+-  main_local = main_frame->local;
+-
+-  if (local->op_ret < 0) {
+-    shard_common_failure_unwind(main_local->fop, main_frame, op_ret, op_errno);
+-    return 0;
+-  }
+-  main_local->int_entrylk.acquired_lock = _gf_true;
+-  shard_post_entrylk_fop_handler(main_frame, this);
+-  return 0;
+-}
+-
+-int shard_acquire_entrylk(call_frame_t *frame, xlator_t *this, inode_t *inode,
+-                          uuid_t gfid) {
+-  char gfid_str[GF_UUID_BUF_SIZE] = {
+-      0,
+-  };
+-  shard_local_t *local = NULL;
+-  shard_local_t *entrylk_local = NULL;
+-  shard_entrylk_t *int_entrylk = NULL;
+-  call_frame_t *entrylk_frame = NULL;
+-
+-  local = frame->local;
+-  entrylk_frame = create_frame(this, this->ctx->pool);
+-  if (!entrylk_frame) {
+-    gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED,
+-           "Failed to create new frame "
+-           "to lock marker file");
+-    goto err;
+-  }
+-
+-  entrylk_local = mem_get0(this->local_pool);
+-  if (!entrylk_local) {
+-    STACK_DESTROY(entrylk_frame->root);
+-    goto err;
+-  }
+-
+-  entrylk_frame->local = entrylk_local;
+-  entrylk_local->main_frame = frame;
+-  int_entrylk = &entrylk_local->int_entrylk;
+-
+-  int_entrylk->loc.inode = inode_ref(inode);
+-  set_lk_owner_from_ptr(&entrylk_frame->root->lk_owner, entrylk_frame->root);
+-  local->entrylk_frame = entrylk_frame;
+-  gf_uuid_unparse(gfid, gfid_str);
+-  int_entrylk->basename = gf_strdup(gfid_str);
+-
+-  STACK_WIND(entrylk_frame, shard_acquire_entrylk_cbk, FIRST_CHILD(this),
+-             FIRST_CHILD(this)->fops->entrylk, this->name, &int_entrylk->loc,
+-             int_entrylk->basename, ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
+-  return 0;
+-err:
+-  shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
+-  return 0;
+-}
++    while (shard_idx_iter <= last_block) {
++        if (local->inode_list[i]) {
++            shard_idx_iter++;
++            i++;
++            continue;
++        }
+ 
+-int shard_post_lookup_base_shard_rm_handler(call_frame_t *frame,
+-                                            xlator_t *this) {
+-  shard_local_t *local = NULL;
+-  shard_priv_t *priv = NULL;
++        if (wind_failed) {
++            shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this,
++                                   -1, ENOMEM, NULL, NULL, NULL, NULL, NULL);
++            goto next;
++        }
+ 
+-  priv = this->private;
+-  local = frame->local;
++        shard_make_block_abspath(shard_idx_iter, fd->inode->gfid, path,
++                                 sizeof(path));
++
++        xattr_req = shard_create_gfid_dict(local->xattr_req);
++        if (!xattr_req) {
++            local->op_ret = -1;
++            local->op_errno = ENOMEM;
++            wind_failed = _gf_true;
++            shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this,
++                                   -1, ENOMEM, NULL, NULL, NULL, NULL, NULL);
++            goto next;
++        }
++
++        bname = strrchr(path, '/') + 1;
++        loc.inode = inode_new(this->itable);
++        loc.parent = inode_ref(priv->dot_shard_inode);
++        ret = inode_path(loc.parent, bname, (char **)&(loc.path));
++        if (ret < 0 || !(loc.inode)) {
++            gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
++                   "Inode path failed"
++                   "on %s, base file gfid = %s",
++                   bname, uuid_utoa(fd->inode->gfid));
++            local->op_ret = -1;
++            local->op_errno = ENOMEM;
++            wind_failed = _gf_true;
++            loc_wipe(&loc);
++            dict_unref(xattr_req);
++            shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this,
++                                   -1, ENOMEM, NULL, NULL, NULL, NULL, NULL);
++            goto next;
++        }
++
++        loc.name = strrchr(loc.path, '/');
++        if (loc.name)
++            loc.name++;
++
++        STACK_WIND_COOKIE(frame, shard_common_mknod_cbk,
++                          (void *)(long)shard_idx_iter, FIRST_CHILD(this),
++                          FIRST_CHILD(this)->fops->mknod, &loc, mode,
++                          ctx_tmp.stat.ia_rdev, 0, xattr_req);
++        loc_wipe(&loc);
++        dict_unref(xattr_req);
++
++    next:
++        shard_idx_iter++;
++        i++;
++        if (!--call_count)
++            break;
++    }
+ 
+-  if (local->op_ret < 0) {
+-    shard_common_failure_unwind(local->fop, frame, -1, local->op_errno);
+     return 0;
+-  }
+-
+-  if (local->prebuf.ia_nlink > 1) {
+-    gf_msg_debug(this->name, 0, "link count on %s > 1:%d, "
+-                                "performing rename()/unlink()",
+-                 local->int_inodelk.loc.path, local->prebuf.ia_nlink);
+-    if (local->fop == GF_FOP_RENAME)
+-      shard_rename_src_base_file(frame, this);
+-    else if (local->fop == GF_FOP_UNLINK)
+-      shard_unlink_base_file(frame, this);
+-  } else {
+-    gf_msg_debug(this->name, 0, "link count on %s = 1, creating "
+-                                "file under .remove_me",
+-                 local->int_inodelk.loc.path);
+-    local->cleanup_required = _gf_true;
+-    shard_acquire_entrylk(frame, this, priv->dot_shard_rm_inode,
+-                          local->prebuf.ia_gfid);
+-  }
+-  return 0;
+-}
+-
+-int shard_post_inodelk_fop_handler(call_frame_t *frame, xlator_t *this) {
+-  shard_local_t *local = NULL;
+-
+-  local = frame->local;
+-
+-  switch (local->fop) {
+-  case GF_FOP_UNLINK:
+-  case GF_FOP_RENAME:
+-    shard_lookup_base_file(frame, this, &local->int_inodelk.loc,
+-                           shard_post_lookup_base_shard_rm_handler);
+-    break;
+-  default:
+-    gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
+-           "post-inodelk handler not defined. This case should not"
+-           " be hit");
+-    break;
+-  }
+-  return 0;
+-}
+-
+-int shard_acquire_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                              int32_t op_ret, int32_t op_errno, dict_t *xdata) {
+-  call_frame_t *main_frame = NULL;
+-  shard_local_t *local = NULL;
+-  shard_local_t *main_local = NULL;
+-
+-  local = frame->local;
+-  main_frame = local->main_frame;
+-  main_local = main_frame->local;
+-
+-  if (local->op_ret < 0) {
+-    shard_common_failure_unwind(main_local->fop, main_frame, op_ret, op_errno);
+-    return 0;
+-  }
+-  main_local->int_inodelk.acquired_lock = _gf_true;
+-  shard_post_inodelk_fop_handler(main_frame, this);
+-  return 0;
+-}
+-
+-int shard_acquire_inodelk(call_frame_t *frame, xlator_t *this, loc_t *loc) {
+-  call_frame_t *lk_frame = NULL;
+-  shard_local_t *local = NULL;
+-  shard_local_t *lk_local = NULL;
+-  shard_inodelk_t *int_inodelk = NULL;
+-
+-  local = frame->local;
+-  lk_frame = create_frame(this, this->ctx->pool);
+-  if (!lk_frame) {
+-    gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED,
+-           "Failed to create new frame "
+-           "to lock base shard");
+-    goto err;
+-  }
+-  lk_local = mem_get0(this->local_pool);
+-  if (!lk_local) {
+-    STACK_DESTROY(lk_frame->root);
+-    goto err;
+-  }
+-
+-  lk_frame->local = lk_local;
+-  lk_local->main_frame = frame;
+-  int_inodelk = &lk_local->int_inodelk;
+-
+-  int_inodelk->flock.l_len = 0;
+-  int_inodelk->flock.l_start = 0;
+-  int_inodelk->domain = this->name;
+-  int_inodelk->flock.l_type = F_WRLCK;
+-  loc_copy(&local->int_inodelk.loc, loc);
+-  set_lk_owner_from_ptr(&lk_frame->root->lk_owner, lk_frame->root);
+-  local->inodelk_frame = lk_frame;
+-
+-  STACK_WIND(lk_frame, shard_acquire_inodelk_cbk, FIRST_CHILD(this),
+-             FIRST_CHILD(this)->fops->inodelk, int_inodelk->domain,
+-             &local->int_inodelk.loc, F_SETLKW, &int_inodelk->flock, NULL);
+-  return 0;
+ err:
+-  shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
+-  return 0;
++    /*
++     * This block is for handling failure in shard_inode_ctx_get_all().
++     * Failures in the while-loop are handled within the loop.
++     */
++    SHARD_UNSET_ROOT_FS_ID(frame, local);
++    post_mknod_handler(frame, this);
++    return 0;
+ }
+ 
+-int shard_post_mkdir_rm_handler(call_frame_t *frame, xlator_t *this) {
+-  loc_t *loc = NULL;
+-  shard_local_t *local = NULL;
++int
++shard_post_mknod_readv_handler(call_frame_t *frame, xlator_t *this);
+ 
+-  local = frame->local;
++int
++shard_post_lookup_shards_readv_handler(call_frame_t *frame, xlator_t *this)
++{
++    shard_local_t *local = NULL;
+ 
+-  if (local->op_ret < 0) {
+-    shard_common_failure_unwind(local->fop, frame, -1, local->op_errno);
+-    return 0;
+-  }
+-  if (local->fop == GF_FOP_UNLINK)
+-    loc = &local->loc;
+-  else if (local->fop == GF_FOP_RENAME)
+-    loc = &local->loc2;
+-  shard_acquire_inodelk(frame, this, loc);
+-  return 0;
+-}
++    local = frame->local;
+ 
+-int shard_mkdir_internal_dir(call_frame_t *frame, xlator_t *this,
+-                             shard_post_resolve_fop_handler_t handler,
+-                             shard_internal_dir_type_t type);
+-int shard_pre_mkdir_rm_handler(call_frame_t *frame, xlator_t *this) {
+-  shard_local_t *local = NULL;
++    if (local->op_ret < 0) {
++        shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret,
++                                    local->op_errno);
++        return 0;
++    }
+ 
+-  local = frame->local;
++    if (local->create_count) {
++        shard_common_resume_mknod(frame, this, shard_post_mknod_readv_handler);
++    } else {
++        shard_readv_do(frame, this);
++    }
+ 
+-  if (local->op_ret < 0) {
+-    shard_common_failure_unwind(local->fop, frame, -1, local->op_errno);
+     return 0;
+-  }
+-  shard_mkdir_internal_dir(frame, this, shard_post_mkdir_rm_handler,
+-                           SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME);
+-  return 0;
+ }
+ 
+-void shard_begin_rm_resolution(call_frame_t *frame, xlator_t *this) {
+-  shard_priv_t *priv = NULL;
+-  shard_local_t *local = NULL;
++int
++shard_post_mknod_readv_handler(call_frame_t *frame, xlator_t *this)
++{
++    shard_local_t *local = NULL;
+ 
+-  priv = this->private;
+-  local = frame->local;
++    local = frame->local;
+ 
+-  local->dot_shard_rm_loc.inode =
+-      inode_find(this->itable, priv->dot_shard_rm_gfid);
+-  if (!local->dot_shard_rm_loc.inode) {
+-    local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid);
+-    if (!local->dot_shard_loc.inode) {
+-      shard_mkdir_internal_dir(frame, this, shard_pre_mkdir_rm_handler,
+-                               SHARD_INTERNAL_DIR_DOT_SHARD);
+-    } else {
+-      local->post_res_handler = shard_pre_mkdir_rm_handler;
+-      shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD);
+-    }
+-  } else {
+-    local->post_res_handler = shard_post_mkdir_rm_handler;
+-    shard_refresh_internal_dir(frame, this,
+-                               SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME);
+-  }
+-}
+-
+-int shard_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+-                 dict_t *xdata) {
+-  int ret = -1;
+-  uint64_t block_size = 0;
+-  shard_local_t *local = NULL;
+-
+-  ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size);
+-  if ((ret) && (!IA_ISLNK(loc->inode->ia_type))) {
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+-           "Failed to get block "
+-           "size from inode ctx of %s",
+-           uuid_utoa(loc->inode->gfid));
+-    goto err;
+-  }
+-
+-  if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+-    STACK_WIND(frame, default_unlink_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
+-    return 0;
+-  }
+-
+-  local = mem_get0(this->local_pool);
+-  if (!local)
+-    goto err;
+-
+-  frame->local = local;
+-
+-  loc_copy(&local->loc, loc);
+-  local->xflag = xflag;
+-  local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+-  local->block_size = block_size;
+-  local->resolver_base_inode = loc->inode;
+-  local->fop = GF_FOP_UNLINK;
+-  if (!this->itable)
+-    this->itable = (local->loc.inode)->table;
+-
+-  local->resolve_not = _gf_true;
+-  shard_begin_rm_resolution(frame, this);
+-  return 0;
+-err:
+-  shard_common_failure_unwind(GF_FOP_UNLINK, frame, -1, ENOMEM);
+-  return 0;
+-}
++    if (local->op_ret < 0) {
++        shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret,
++                                    local->op_errno);
++        return 0;
++    }
+ 
+-int shard_post_rename_lookup_handler(call_frame_t *frame, xlator_t *this) {
+-  shard_rename_cbk(frame, this);
+-  return 0;
++    if (!local->eexist_count) {
++        shard_readv_do(frame, this);
++    } else {
++        local->call_count = local->eexist_count;
++        shard_common_lookup_shards(frame, this, local->loc.inode,
++                                   shard_post_lookup_shards_readv_handler);
++    }
++    return 0;
+ }
+ 
+-int shard_rename_src_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                         int32_t op_ret, int32_t op_errno, struct iatt *buf,
+-                         struct iatt *preoldparent, struct iatt *postoldparent,
+-                         struct iatt *prenewparent, struct iatt *postnewparent,
+-                         dict_t *xdata) {
+-  int ret = 0;
+-  shard_local_t *local = NULL;
+-
+-  local = frame->local;
++int
++shard_post_resolve_readv_handler(call_frame_t *frame, xlator_t *this)
++{
++    shard_local_t *local = NULL;
+ 
+-  if (op_ret < 0) {
+-    local->op_ret = op_ret;
+-    local->op_errno = op_errno;
+-    goto err;
+-  }
+-  /* Set ctx->refresh to TRUE to force a lookup on disk when
+-   * shard_lookup_base_file() is called next to refresh the hard link
+-   * count in ctx. Note that this is applicable only to the case where
+-   * the rename dst is already existent and sharded.
+-   */
+-  if ((local->dst_block_size) && (!local->cleanup_required))
+-    shard_inode_ctx_set_refresh_flag(local->int_inodelk.loc.inode, this);
+-
+-  local->prebuf = *buf;
+-  local->preoldparent = *preoldparent;
+-  local->postoldparent = *postoldparent;
+-  local->prenewparent = *prenewparent;
+-  local->postnewparent = *postnewparent;
+-  if (xdata)
+-    local->xattr_rsp = dict_ref(xdata);
++    local = frame->local;
+ 
+-  if (local->dst_block_size) {
+-    if (local->entrylk_frame) {
+-      ret = shard_unlock_entrylk(frame, this);
+-      if (ret < 0) {
+-        local->op_ret = -1;
+-        local->op_errno = -ret;
+-      }
++    if (local->op_ret < 0) {
++        if (local->op_errno != ENOENT) {
++            shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret,
++                                        local->op_errno);
++            return 0;
++        } else {
++            struct iovec vec = {
++                0,
++            };
++
++            vec.iov_base = local->iobuf->ptr;
++            vec.iov_len = local->total_size;
++            local->op_ret = local->total_size;
++            SHARD_STACK_UNWIND(readv, frame, local->op_ret, 0, &vec, 1,
++                               &local->prebuf, local->iobref, NULL);
++            return 0;
++        }
+     }
+ 
+-    ret = shard_unlock_inodelk(frame, this);
+-    if (ret < 0) {
+-      local->op_ret = -1;
+-      local->op_errno = -ret;
+-      goto err;
+-    }
+-    if (local->cleanup_required)
+-      shard_start_background_deletion(this);
+-  }
+-
+-  /* Now the base file of src, if sharded, is looked up to gather ia_size
+-   * and ia_blocks.*/
+-  if (local->block_size) {
+-    local->tmp_loc.inode = inode_new(this->itable);
+-    gf_uuid_copy(local->tmp_loc.gfid, (local->loc.inode)->gfid);
+-    shard_lookup_base_file(frame, this, &local->tmp_loc,
+-                           shard_post_rename_lookup_handler);
+-  } else {
+-    shard_rename_cbk(frame, this);
+-  }
+-  return 0;
+-err:
+-  shard_common_failure_unwind(local->fop, frame, local->op_ret,
+-                              local->op_errno);
+-  return 0;
+-}
+-
+-int shard_post_lookup_dst_base_file_handler(call_frame_t *frame,
+-                                            xlator_t *this) {
+-  shard_local_t *local = NULL;
+-
+-  local = frame->local;
++    if (local->call_count) {
++        shard_common_lookup_shards(frame, this, local->resolver_base_inode,
++                                   shard_post_lookup_shards_readv_handler);
++    } else {
++        shard_readv_do(frame, this);
++    }
+ 
+-  if (local->op_ret < 0) {
+-    shard_common_failure_unwind(local->fop, frame, local->op_ret,
+-                                local->op_errno);
+     return 0;
+-  }
+-
+-  /* Save dst base file attributes into postbuf so the information is not
+-   * lost when it is overwritten after lookup on base file of src in
+-   * shard_lookup_base_file_cbk().
+-   */
+-  local->postbuf = local->prebuf;
+-  shard_rename_src_base_file(frame, this);
+-  return 0;
+-}
+-
+-int shard_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc,
+-                 loc_t *newloc, dict_t *xdata) {
+-  int ret = -1;
+-  uint64_t block_size = 0;
+-  uint64_t dst_block_size = 0;
+-  shard_local_t *local = NULL;
+-
+-  if (IA_ISDIR(oldloc->inode->ia_type)) {
+-    STACK_WIND(frame, default_rename_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
+-    return 0;
+-  }
+-
+-  ret = shard_inode_ctx_get_block_size(oldloc->inode, this, &block_size);
+-  if ((ret) && (!IA_ISLNK(oldloc->inode->ia_type))) {
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+-           "Failed to get block "
+-           "size from inode ctx of %s",
+-           uuid_utoa(oldloc->inode->gfid));
+-    goto err;
+-  }
+-
+-  if (newloc->inode)
+-    ret = shard_inode_ctx_get_block_size(newloc->inode, this, &dst_block_size);
+-
+-  /* The following stack_wind covers the case where:
+-   * a. the src file is not sharded and dst doesn't exist, OR
+-   * b. the src and dst both exist but are not sharded.
+-   */
+-  if (((!block_size) && (!dst_block_size)) ||
+-      frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+-    STACK_WIND(frame, default_rename_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
+-    return 0;
+-  }
+-
+-  local = mem_get0(this->local_pool);
+-  if (!local)
+-    goto err;
+-
+-  frame->local = local;
+-  loc_copy(&local->loc, oldloc);
+-  loc_copy(&local->loc2, newloc);
+-  local->resolver_base_inode = newloc->inode;
+-  local->fop = GF_FOP_RENAME;
+-  local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+-  if (!local->xattr_req)
+-    goto err;
+-
+-  local->block_size = block_size;
+-  local->dst_block_size = dst_block_size;
+-  if (!this->itable)
+-    this->itable = (local->loc.inode)->table;
+-  local->resolve_not = _gf_true;
+-
+-  /* The following if-block covers the case where the dst file exists
+-   * and is sharded.
+-   */
+-  if (local->dst_block_size) {
+-    shard_begin_rm_resolution(frame, this);
+-  } else {
+-    /* The following block covers the case where the dst either doesn't
+-     * exist or is NOT sharded but the src is sharded. In this case, shard
+-     * xlator would go ahead and rename src to dst. Once done, it would also
+-     * lookup the base shard of src to get the ia_size and ia_blocks xattr
+-     * values.
+-     */
+-    shard_rename_src_base_file(frame, this);
+-  }
+-  return 0;
+-
+-err:
+-  shard_common_failure_unwind(GF_FOP_RENAME, frame, -1, ENOMEM);
+-  return 0;
+ }
+ 
+-int shard_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                     int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+-                     struct iatt *stbuf, struct iatt *preparent,
+-                     struct iatt *postparent, dict_t *xdata) {
+-  int ret = -1;
+-  shard_local_t *local = NULL;
++int
++shard_post_lookup_readv_handler(call_frame_t *frame, xlator_t *this)
++{
++    int ret = 0;
++    struct iobuf *iobuf = NULL;
++    shard_local_t *local = NULL;
++    shard_priv_t *priv = NULL;
+ 
+-  local = frame->local;
++    priv = this->private;
++    local = frame->local;
+ 
+-  if (op_ret == -1)
+-    goto unwind;
++    if (local->op_ret < 0) {
++        shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret,
++                                    local->op_errno);
++        return 0;
++    }
+ 
+-  ret = shard_inode_ctx_set(inode, this, stbuf, local->block_size,
+-                            SHARD_ALL_MASK);
+-  if (ret)
+-    gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INODE_CTX_SET_FAILED,
+-           "Failed to set inode "
+-           "ctx for %s",
+-           uuid_utoa(inode->gfid));
++    if (local->offset >= local->prebuf.ia_size) {
++        /* If the read is being performed past the end of the file,
++         * unwind the FOP with 0 bytes read as status.
++         */
++        struct iovec vec = {
++            0,
++        };
+ 
+-unwind:
+-  SHARD_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, stbuf,
+-                     preparent, postparent, xdata);
+-  return 0;
+-}
++        iobuf = iobuf_get2(this->ctx->iobuf_pool, local->req_size);
++        if (!iobuf)
++            goto err;
+ 
+-int shard_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+-                 mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) {
+-  shard_priv_t *priv = NULL;
+-  shard_local_t *local = NULL;
++        vec.iov_base = iobuf->ptr;
++        vec.iov_len = 0;
++        local->iobref = iobref_new();
++        iobref_add(local->iobref, iobuf);
++        iobuf_unref(iobuf);
+ 
+-  priv = this->private;
+-  local = mem_get0(this->local_pool);
+-  if (!local)
+-    goto err;
++        SHARD_STACK_UNWIND(readv, frame, 0, 0, &vec, 1, &local->prebuf,
++                           local->iobref, NULL);
++        return 0;
++    }
+ 
+-  frame->local = local;
+-  local->block_size = priv->block_size;
++    local->first_block = get_lowest_block(local->offset, local->block_size);
+ 
+-  if (!__is_gsyncd_on_shard_dir(frame, loc)) {
+-    SHARD_INODE_CREATE_INIT(this, local->block_size, xdata, loc, 0, 0, err);
+-  }
++    local->total_size = local->req_size;
+ 
+-  STACK_WIND(frame, shard_create_cbk, FIRST_CHILD(this),
+-             FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
+-             xdata);
+-  return 0;
+-err:
+-  shard_common_failure_unwind(GF_FOP_CREATE, frame, -1, ENOMEM);
+-  return 0;
+-}
+-
+-int shard_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                   int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) {
+-  /* To-Do: Handle open with O_TRUNC under locks */
+-  SHARD_STACK_UNWIND(open, frame, op_ret, op_errno, fd, xdata);
+-  return 0;
+-}
+-
+-int shard_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+-               fd_t *fd, dict_t *xdata) {
+-  STACK_WIND(frame, shard_open_cbk, FIRST_CHILD(this),
+-             FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
+-  return 0;
+-}
+-
+-int shard_readv_do_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                       int32_t op_ret, int32_t op_errno, struct iovec *vector,
+-                       int32_t count, struct iatt *stbuf, struct iobref *iobref,
+-                       dict_t *xdata) {
+-  int i = 0;
+-  int call_count = 0;
+-  void *address = NULL;
+-  uint64_t block_num = 0;
+-  off_t off = 0;
+-  struct iovec vec = {
+-      0,
+-  };
+-  shard_local_t *local = NULL;
+-  fd_t *anon_fd = cookie;
+-  shard_inode_ctx_t *ctx = NULL;
+-
+-  local = frame->local;
+-
+-  /* If shard has already seen a failure here before, there is no point
+-   * in aggregating subsequent reads, so just go to out.
+-   */
+-  if (local->op_ret < 0)
+-    goto out;
+-
+-  if (op_ret < 0) {
+-    local->op_ret = op_ret;
+-    local->op_errno = op_errno;
+-    goto out;
+-  }
++    local->last_block = get_highest_block(local->offset, local->total_size,
++                                          local->block_size);
+ 
+-  if (local->op_ret >= 0)
+-    local->op_ret += op_ret;
++    local->num_blocks = local->last_block - local->first_block + 1;
++    GF_ASSERT(local->num_blocks > 0);
++    local->resolver_base_inode = local->loc.inode;
+ 
+-  shard_inode_ctx_get(anon_fd->inode, this, &ctx);
+-  block_num = ctx->block_num;
+-
+-  if (block_num == local->first_block) {
+-    address = local->iobuf->ptr;
+-  } else {
+-    /* else
+-     * address to start writing to = beginning of buffer +
+-     *                    number of bytes until end of first block +
+-     *                    + block_size times number of blocks
+-     *                    between the current block and the first
+-     */
+-    address = (char *)local->iobuf->ptr +
+-              (local->block_size - (local->offset % local->block_size)) +
+-              ((block_num - local->first_block - 1) * local->block_size);
+-  }
++    local->inode_list = GF_CALLOC(local->num_blocks, sizeof(inode_t *),
++                                  gf_shard_mt_inode_list);
++    if (!local->inode_list)
++        goto err;
+ 
+-  for (i = 0; i < count; i++) {
+-    address = (char *)address + off;
+-    memcpy(address, vector[i].iov_base, vector[i].iov_len);
+-    off += vector[i].iov_len;
+-  }
++    iobuf = iobuf_get2(this->ctx->iobuf_pool, local->total_size);
++    if (!iobuf)
++        goto err;
+ 
+-out:
+-  if (anon_fd)
+-    fd_unref(anon_fd);
+-  call_count = shard_call_count_return(frame);
+-  if (call_count == 0) {
+-    SHARD_UNSET_ROOT_FS_ID(frame, local);
+-    if (local->op_ret < 0) {
+-      shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret,
+-                                  local->op_errno);
+-    } else {
+-      if (xdata)
+-        local->xattr_rsp = dict_ref(xdata);
+-      vec.iov_base = local->iobuf->ptr;
+-      if (local->offset + local->req_size > local->prebuf.ia_size)
+-          local->total_size = local->prebuf.ia_size - local->offset;
+-      vec.iov_len = local->total_size;
+-      local->op_ret = local->total_size;
+-      SHARD_STACK_UNWIND(readv, frame, local->op_ret, local->op_errno, &vec, 1,
+-                         &local->prebuf, local->iobref, local->xattr_rsp);
+-      return 0;
+-    }
+-  }
+-
+-  return 0;
+-}
+-
+-int shard_readv_do(call_frame_t *frame, xlator_t *this) {
+-  int i = 0;
+-  int call_count = 0;
+-  int last_block = 0;
+-  int cur_block = 0;
+-  off_t orig_offset = 0;
+-  off_t shard_offset = 0;
+-  size_t read_size = 0;
+-  size_t remaining_size = 0;
+-  fd_t *fd = NULL;
+-  fd_t *anon_fd = NULL;
+-  shard_local_t *local = NULL;
+-  gf_boolean_t wind_failed = _gf_false;
+-
+-  local = frame->local;
+-  fd = local->fd;
+-
+-  orig_offset = local->offset;
+-  cur_block = local->first_block;
+-  last_block = local->last_block;
+-  remaining_size = local->total_size;
+-  local->call_count = call_count = local->num_blocks;
+-
+-  SHARD_SET_ROOT_FS_ID(frame, local);
+-
+-  if (fd->flags & O_DIRECT)
+-    local->flags = O_DIRECT;
+-
+-  while (cur_block <= last_block) {
+-    if (wind_failed) {
+-      shard_readv_do_cbk(frame, (void *)(long)0, this, -1, ENOMEM, NULL, 0,
+-                         NULL, NULL, NULL);
+-      goto next;
+-    }
+-
+-    shard_offset = orig_offset % local->block_size;
+-    read_size = local->block_size - shard_offset;
+-    if (read_size > remaining_size)
+-      read_size = remaining_size;
+-
+-    remaining_size -= read_size;
+-
+-    if (cur_block == 0) {
+-      anon_fd = fd_ref(fd);
+-    } else {
+-      anon_fd = fd_anonymous(local->inode_list[i]);
+-      if (!anon_fd) {
+-        local->op_ret = -1;
+-        local->op_errno = ENOMEM;
+-        wind_failed = _gf_true;
+-        shard_readv_do_cbk(frame, (void *)(long)anon_fd, this, -1, ENOMEM, NULL,
+-                           0, NULL, NULL, NULL);
+-        goto next;
+-      }
++    local->iobref = iobref_new();
++    if (!local->iobref) {
++        iobuf_unref(iobuf);
++        goto err;
+     }
+ 
+-    STACK_WIND_COOKIE(frame, shard_readv_do_cbk, anon_fd, FIRST_CHILD(this),
+-                      FIRST_CHILD(this)->fops->readv, anon_fd, read_size,
+-                      shard_offset, local->flags, local->xattr_req);
++    if (iobref_add(local->iobref, iobuf) != 0) {
++        iobuf_unref(iobuf);
++        goto err;
++    }
+ 
+-    orig_offset += read_size;
+-  next:
+-    cur_block++;
+-    i++;
+-    call_count--;
+-  }
+-  return 0;
+-}
++    memset(iobuf->ptr, 0, local->total_size);
++    iobuf_unref(iobuf);
++    local->iobuf = iobuf;
+ 
+-int shard_common_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                           int32_t op_ret, int32_t op_errno, inode_t *inode,
+-                           struct iatt *buf, struct iatt *preparent,
+-                           struct iatt *postparent, dict_t *xdata) {
+-  int shard_block_num = (long)cookie;
+-  int call_count = 0;
+-  shard_local_t *local = NULL;
+-
+-  local = frame->local;
+-
+-  if (op_ret < 0) {
+-    if (op_errno == EEXIST) {
+-      LOCK(&frame->lock);
+-      { local->eexist_count++; }
+-      UNLOCK(&frame->lock);
++    local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid);
++    if (!local->dot_shard_loc.inode) {
++        ret = shard_init_internal_dir_loc(this, local,
++                                          SHARD_INTERNAL_DIR_DOT_SHARD);
++        if (ret)
++            goto err;
++        shard_lookup_internal_dir(frame, this, shard_post_resolve_readv_handler,
++                                  SHARD_INTERNAL_DIR_DOT_SHARD);
+     } else {
+-      local->op_ret = op_ret;
+-      local->op_errno = op_errno;
++        local->post_res_handler = shard_post_resolve_readv_handler;
++        shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD);
+     }
+-    gf_msg_debug(this->name, 0, "mknod of shard %d "
+-                                "failed: %s",
+-                 shard_block_num, strerror(op_errno));
+-    goto done;
+-  }
+-
+-  shard_link_block_inode(local, shard_block_num, inode, buf);
++    return 0;
++err:
++    shard_common_failure_unwind(GF_FOP_READ, frame, -1, ENOMEM);
++    return 0;
++}
+ 
+-done:
+-  call_count = shard_call_count_return(frame);
+-  if (call_count == 0) {
+-    SHARD_UNSET_ROOT_FS_ID(frame, local);
+-    local->create_count = 0;
+-    local->post_mknod_handler(frame, this);
+-  }
+-
+-  return 0;
+-}
+-
+-int shard_common_resume_mknod(
+-    call_frame_t *frame, xlator_t *this,
+-    shard_post_mknod_fop_handler_t post_mknod_handler) {
+-  int i = 0;
+-  int shard_idx_iter = 0;
+-  int last_block = 0;
+-  int ret = 0;
+-  int call_count = 0;
+-  char path[PATH_MAX] = {
+-      0,
+-  };
+-  mode_t mode = 0;
+-  char *bname = NULL;
+-  shard_priv_t *priv = NULL;
+-  shard_inode_ctx_t ctx_tmp = {
+-      0,
+-  };
+-  shard_local_t *local = NULL;
+-  gf_boolean_t wind_failed = _gf_false;
+-  fd_t *fd = NULL;
+-  loc_t loc = {
+-      0,
+-  };
+-  dict_t *xattr_req = NULL;
+-
+-  local = frame->local;
+-  priv = this->private;
+-  fd = local->fd;
+-  shard_idx_iter = local->first_block;
+-  last_block = local->last_block;
+-  call_count = local->call_count = local->create_count;
+-  local->post_mknod_handler = post_mknod_handler;
+-
+-  SHARD_SET_ROOT_FS_ID(frame, local);
+-
+-  ret = shard_inode_ctx_get_all(fd->inode, this, &ctx_tmp);
+-  if (ret) {
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+-           "Failed to get inode "
+-           "ctx for %s",
+-           uuid_utoa(fd->inode->gfid));
+-    local->op_ret = -1;
+-    local->op_errno = ENOMEM;
+-    goto err;
+-  }
+-  mode = st_mode_from_ia(ctx_tmp.stat.ia_prot, ctx_tmp.stat.ia_type);
++int
++shard_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
++            off_t offset, uint32_t flags, dict_t *xdata)
++{
++    int ret = 0;
++    uint64_t block_size = 0;
++    shard_local_t *local = NULL;
+ 
+-  while (shard_idx_iter <= last_block) {
+-    if (local->inode_list[i]) {
+-      shard_idx_iter++;
+-      i++;
+-      continue;
++    ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size);
++    if (ret) {
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
++               "Failed to get block "
++               "size for %s from its inode ctx",
++               uuid_utoa(fd->inode->gfid));
++        goto err;
+     }
+ 
+-    if (wind_failed) {
+-      shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this, -1,
+-                             ENOMEM, NULL, NULL, NULL, NULL, NULL);
+-      goto next;
++    if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
++        /* block_size = 0 means that the file was created before
++         * sharding was enabled on the volume.
++         */
++        STACK_WIND(frame, default_readv_cbk, FIRST_CHILD(this),
++                   FIRST_CHILD(this)->fops->readv, fd, size, offset, flags,
++                   xdata);
++        return 0;
+     }
+ 
+-    shard_make_block_abspath(shard_idx_iter, fd->inode->gfid, path,
+-                             sizeof(path));
+-
+-    xattr_req = shard_create_gfid_dict(local->xattr_req);
+-    if (!xattr_req) {
+-      local->op_ret = -1;
+-      local->op_errno = ENOMEM;
+-      wind_failed = _gf_true;
+-      shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this, -1,
+-                             ENOMEM, NULL, NULL, NULL, NULL, NULL);
+-      goto next;
+-    }
++    if (!this->itable)
++        this->itable = fd->inode->table;
+ 
+-    bname = strrchr(path, '/') + 1;
+-    loc.inode = inode_new(this->itable);
+-    loc.parent = inode_ref(priv->dot_shard_inode);
+-    ret = inode_path(loc.parent, bname, (char **)&(loc.path));
+-    if (ret < 0 || !(loc.inode)) {
+-      gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+-             "Inode path failed"
+-             "on %s, base file gfid = %s",
+-             bname, uuid_utoa(fd->inode->gfid));
+-      local->op_ret = -1;
+-      local->op_errno = ENOMEM;
+-      wind_failed = _gf_true;
+-      loc_wipe(&loc);
+-      dict_unref(xattr_req);
+-      shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this, -1,
+-                             ENOMEM, NULL, NULL, NULL, NULL, NULL);
+-      goto next;
+-    }
++    local = mem_get0(this->local_pool);
++    if (!local)
++        goto err;
+ 
+-    loc.name = strrchr(loc.path, '/');
+-    if (loc.name)
+-      loc.name++;
++    frame->local = local;
+ 
+-    STACK_WIND_COOKIE(frame, shard_common_mknod_cbk,
+-                      (void *)(long)shard_idx_iter, FIRST_CHILD(this),
+-                      FIRST_CHILD(this)->fops->mknod, &loc, mode,
+-                      ctx_tmp.stat.ia_rdev, 0, xattr_req);
+-    loc_wipe(&loc);
+-    dict_unref(xattr_req);
++    ret = syncbarrier_init(&local->barrier);
++    if (ret)
++        goto err;
++    local->fd = fd_ref(fd);
++    local->block_size = block_size;
++    local->offset = offset;
++    local->req_size = size;
++    local->flags = flags;
++    local->fop = GF_FOP_READ;
++    local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
++    if (!local->xattr_req)
++        goto err;
+ 
+-  next:
+-    shard_idx_iter++;
+-    i++;
+-    if (!--call_count)
+-      break;
+-  }
++    local->loc.inode = inode_ref(fd->inode);
++    gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
+ 
+-  return 0;
++    shard_lookup_base_file(frame, this, &local->loc,
++                           shard_post_lookup_readv_handler);
++    return 0;
+ err:
+-  /*
+-   * This block is for handling failure in shard_inode_ctx_get_all().
+-   * Failures in the while-loop are handled within the loop.
+-   */
+-  SHARD_UNSET_ROOT_FS_ID(frame, local);
+-  post_mknod_handler(frame, this);
+-  return 0;
++    shard_common_failure_unwind(GF_FOP_READ, frame, -1, ENOMEM);
++    return 0;
+ }
+ 
+-int shard_post_mknod_readv_handler(call_frame_t *frame, xlator_t *this);
+-
+-int shard_post_lookup_shards_readv_handler(call_frame_t *frame,
+-                                           xlator_t *this) {
+-  shard_local_t *local = NULL;
++int
++shard_common_inode_write_post_update_size_handler(call_frame_t *frame,
++                                                  xlator_t *this)
++{
++    shard_local_t *local = NULL;
+ 
+-  local = frame->local;
++    local = frame->local;
+ 
+-  if (local->op_ret < 0) {
+-    shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret,
+-                                local->op_errno);
++    if (local->op_ret < 0) {
++        shard_common_failure_unwind(local->fop, frame, local->op_ret,
++                                    local->op_errno);
++    } else {
++        shard_common_inode_write_success_unwind(local->fop, frame,
++                                                local->written_size);
++    }
+     return 0;
+-  }
+-
+-  if (local->create_count) {
+-    shard_common_resume_mknod(frame, this, shard_post_mknod_readv_handler);
+-  } else {
+-    shard_readv_do(frame, this);
+-  }
+-
+-  return 0;
+ }
+ 
+-int shard_post_mknod_readv_handler(call_frame_t *frame, xlator_t *this) {
+-  shard_local_t *local = NULL;
++static gf_boolean_t
++shard_is_appending_write(shard_local_t *local)
++{
++    if (local->fop != GF_FOP_WRITE)
++        return _gf_false;
++    if (local->flags & O_APPEND)
++        return _gf_true;
++    if (local->fd->flags & O_APPEND)
++        return _gf_true;
++    return _gf_false;
++}
+ 
+-  local = frame->local;
++int
++__shard_get_delta_size_from_inode_ctx(shard_local_t *local, inode_t *inode,
++                                      xlator_t *this)
++{
++    int ret = -1;
++    uint64_t ctx_uint = 0;
++    shard_inode_ctx_t *ctx = NULL;
+ 
+-  if (local->op_ret < 0) {
+-    shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret,
+-                                local->op_errno);
+-    return 0;
+-  }
++    ret = __inode_ctx_get(inode, this, &ctx_uint);
++    if (ret < 0)
++        return ret;
+ 
+-  if (!local->eexist_count) {
+-    shard_readv_do(frame, this);
+-  } else {
+-    local->call_count = local->eexist_count;
+-    shard_common_lookup_shards(frame, this, local->loc.inode,
+-                               shard_post_lookup_shards_readv_handler);
+-  }
+-  return 0;
+-}
++    ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
+ 
+-int shard_post_resolve_readv_handler(call_frame_t *frame, xlator_t *this) {
+-  shard_local_t *local = NULL;
++    if (shard_is_appending_write(local)) {
++        local->delta_size = local->total_size;
++    } else if (local->offset + local->total_size > ctx->stat.ia_size) {
++        local->delta_size = (local->offset + local->total_size) -
++                            ctx->stat.ia_size;
++    } else {
++        local->delta_size = 0;
++    }
++    ctx->stat.ia_size += (local->delta_size);
++    local->postbuf = ctx->stat;
+ 
+-  local = frame->local;
++    return 0;
++}
+ 
+-  if (local->op_ret < 0) {
+-    if (local->op_errno != ENOENT) {
+-      shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret,
+-                                  local->op_errno);
+-      return 0;
+-    } else {
+-      struct iovec vec = {
+-          0,
+-      };
++int
++shard_get_delta_size_from_inode_ctx(shard_local_t *local, inode_t *inode,
++                                    xlator_t *this)
++{
++    int ret = -1;
+ 
+-      vec.iov_base = local->iobuf->ptr;
+-      vec.iov_len = local->total_size;
+-      local->op_ret = local->total_size;
+-      SHARD_STACK_UNWIND(readv, frame, local->op_ret, 0, &vec, 1,
+-                         &local->prebuf, local->iobref, NULL);
+-      return 0;
++    LOCK(&inode->lock);
++    {
++        ret = __shard_get_delta_size_from_inode_ctx(local, inode, this);
+     }
+-  }
++    UNLOCK(&inode->lock);
+ 
+-  if (local->call_count) {
+-    shard_common_lookup_shards(frame, this, local->resolver_base_inode,
+-                               shard_post_lookup_shards_readv_handler);
+-  } else {
+-    shard_readv_do(frame, this);
+-  }
+-
+-  return 0;
++    return ret;
+ }
+ 
+-int shard_post_lookup_readv_handler(call_frame_t *frame, xlator_t *this) {
+-  int ret = 0;
+-  struct iobuf *iobuf = NULL;
+-  shard_local_t *local = NULL;
+-  shard_priv_t *priv = NULL;
+-
+-  priv = this->private;
+-  local = frame->local;
++int
++shard_common_inode_write_do_cbk(call_frame_t *frame, void *cookie,
++                                xlator_t *this, int32_t op_ret,
++                                int32_t op_errno, struct iatt *pre,
++                                struct iatt *post, dict_t *xdata)
++{
++    int call_count = 0;
++    fd_t *anon_fd = cookie;
++    shard_local_t *local = NULL;
++    glusterfs_fop_t fop = 0;
+ 
+-  if (local->op_ret < 0) {
+-    shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret,
+-                                local->op_errno);
+-    return 0;
+-  }
++    local = frame->local;
++    fop = local->fop;
+ 
+-  if (local->offset >= local->prebuf.ia_size) {
+-    /* If the read is being performed past the end of the file,
+-     * unwind the FOP with 0 bytes read as status.
+-     */
+-    struct iovec vec = {
+-        0,
+-    };
++    LOCK(&frame->lock);
++    {
++        if (op_ret < 0) {
++            local->op_ret = op_ret;
++            local->op_errno = op_errno;
++        } else {
++            local->written_size += op_ret;
++            GF_ATOMIC_ADD(local->delta_blocks,
++                          post->ia_blocks - pre->ia_blocks);
++            local->delta_size += (post->ia_size - pre->ia_size);
++            shard_inode_ctx_set(local->fd->inode, this, post, 0,
++                                SHARD_MASK_TIMES);
++            if (local->fd->inode != anon_fd->inode)
++                shard_inode_ctx_add_to_fsync_list(local->fd->inode, this,
++                                                  anon_fd->inode);
++        }
++    }
++    UNLOCK(&frame->lock);
+ 
+-    iobuf = iobuf_get2(this->ctx->iobuf_pool, local->req_size);
+-    if (!iobuf)
+-      goto err;
++    if (anon_fd)
++        fd_unref(anon_fd);
+ 
+-    vec.iov_base = iobuf->ptr;
+-    vec.iov_len = 0;
+-    local->iobref = iobref_new();
+-    iobref_add(local->iobref, iobuf);
+-    iobuf_unref(iobuf);
++    call_count = shard_call_count_return(frame);
++    if (call_count == 0) {
++        SHARD_UNSET_ROOT_FS_ID(frame, local);
++        if (local->op_ret < 0) {
++            shard_common_failure_unwind(fop, frame, local->op_ret,
++                                        local->op_errno);
++        } else {
++            shard_get_delta_size_from_inode_ctx(local, local->fd->inode, this);
++            local->hole_size = 0;
++            if (xdata)
++                local->xattr_rsp = dict_ref(xdata);
++            shard_update_file_size(
++                frame, this, local->fd, NULL,
++                shard_common_inode_write_post_update_size_handler);
++        }
++    }
+ 
+-    SHARD_STACK_UNWIND(readv, frame, 0, 0, &vec, 1, &local->prebuf,
+-                       local->iobref, NULL);
+     return 0;
+-  }
++}
+ 
+-  local->first_block = get_lowest_block(local->offset, local->block_size);
++int
++shard_common_inode_write_wind(call_frame_t *frame, xlator_t *this, fd_t *fd,
++                              struct iovec *vec, int count, off_t shard_offset,
++                              size_t size)
++{
++    shard_local_t *local = NULL;
+ 
+-  local->total_size = local->req_size;
++    local = frame->local;
+ 
+-  local->last_block =
+-      get_highest_block(local->offset, local->total_size, local->block_size);
++    switch (local->fop) {
++        case GF_FOP_WRITE:
++            STACK_WIND_COOKIE(
++                frame, shard_common_inode_write_do_cbk, fd, FIRST_CHILD(this),
++                FIRST_CHILD(this)->fops->writev, fd, vec, count, shard_offset,
++                local->flags, local->iobref, local->xattr_req);
++            break;
++        case GF_FOP_FALLOCATE:
++            STACK_WIND_COOKIE(
++                frame, shard_common_inode_write_do_cbk, fd, FIRST_CHILD(this),
++                FIRST_CHILD(this)->fops->fallocate, fd, local->flags,
++                shard_offset, size, local->xattr_req);
++            break;
++        case GF_FOP_ZEROFILL:
++            STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd,
++                              FIRST_CHILD(this),
++                              FIRST_CHILD(this)->fops->zerofill, fd,
++                              shard_offset, size, local->xattr_req);
++            break;
++        case GF_FOP_DISCARD:
++            STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd,
++                              FIRST_CHILD(this),
++                              FIRST_CHILD(this)->fops->discard, fd,
++                              shard_offset, size, local->xattr_req);
++            break;
++        default:
++            gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
++                   "Invalid fop id = %d", local->fop);
++            break;
++    }
++    return 0;
++}
+ 
+-  local->num_blocks = local->last_block - local->first_block + 1;
+-  GF_ASSERT(local->num_blocks > 0);
+-  local->resolver_base_inode = local->loc.inode;
++int
++shard_common_inode_write_do(call_frame_t *frame, xlator_t *this)
++{
++    int i = 0;
++    int count = 0;
++    int call_count = 0;
++    int last_block = 0;
++    uint32_t cur_block = 0;
++    fd_t *fd = NULL;
++    fd_t *anon_fd = NULL;
++    shard_local_t *local = NULL;
++    struct iovec *vec = NULL;
++    gf_boolean_t wind_failed = _gf_false;
++    gf_boolean_t odirect = _gf_false;
++    off_t orig_offset = 0;
++    off_t shard_offset = 0;
++    off_t vec_offset = 0;
++    size_t remaining_size = 0;
++    size_t shard_write_size = 0;
+ 
+-  local->inode_list =
+-      GF_CALLOC(local->num_blocks, sizeof(inode_t *), gf_shard_mt_inode_list);
+-  if (!local->inode_list)
+-    goto err;
++    local = frame->local;
++    fd = local->fd;
++
++    orig_offset = local->offset;
++    remaining_size = local->total_size;
++    cur_block = local->first_block;
++    local->call_count = call_count = local->num_blocks;
++    last_block = local->last_block;
++
++    SHARD_SET_ROOT_FS_ID(frame, local);
++
++    if (dict_set_uint32(local->xattr_req, GLUSTERFS_WRITE_UPDATE_ATOMIC, 4)) {
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
++               "Failed to set " GLUSTERFS_WRITE_UPDATE_ATOMIC
++               " into "
++               "dict: %s",
++               uuid_utoa(fd->inode->gfid));
++        local->op_ret = -1;
++        local->op_errno = ENOMEM;
++        local->call_count = 1;
++        shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, -1,
++                                        ENOMEM, NULL, NULL, NULL);
++        return 0;
++    }
+ 
+-  iobuf = iobuf_get2(this->ctx->iobuf_pool, local->total_size);
+-  if (!iobuf)
+-    goto err;
++    if ((fd->flags & O_DIRECT) && (local->fop == GF_FOP_WRITE))
++        odirect = _gf_true;
+ 
+-  local->iobref = iobref_new();
+-  if (!local->iobref) {
+-    iobuf_unref(iobuf);
+-    goto err;
+-  }
++    while (cur_block <= last_block) {
++        if (wind_failed) {
++            shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, -1,
++                                            ENOMEM, NULL, NULL, NULL);
++            goto next;
++        }
+ 
+-  if (iobref_add(local->iobref, iobuf) != 0) {
+-    iobuf_unref(iobuf);
+-    goto err;
+-  }
++        shard_offset = orig_offset % local->block_size;
++        shard_write_size = local->block_size - shard_offset;
++        if (shard_write_size > remaining_size)
++            shard_write_size = remaining_size;
++
++        remaining_size -= shard_write_size;
++
++        if (local->fop == GF_FOP_WRITE) {
++            count = iov_subset(local->vector, local->count, vec_offset,
++                               vec_offset + shard_write_size, NULL);
++
++            vec = GF_CALLOC(count, sizeof(struct iovec), gf_shard_mt_iovec);
++            if (!vec) {
++                local->op_ret = -1;
++                local->op_errno = ENOMEM;
++                wind_failed = _gf_true;
++                GF_FREE(vec);
++                shard_common_inode_write_do_cbk(frame, (void *)(long)0, this,
++                                                -1, ENOMEM, NULL, NULL, NULL);
++                goto next;
++            }
++            count = iov_subset(local->vector, local->count, vec_offset,
++                               vec_offset + shard_write_size, vec);
++        }
+ 
+-  memset(iobuf->ptr, 0, local->total_size);
+-  iobuf_unref(iobuf);
+-  local->iobuf = iobuf;
++        if (cur_block == 0) {
++            anon_fd = fd_ref(fd);
++        } else {
++            anon_fd = fd_anonymous(local->inode_list[i]);
++            if (!anon_fd) {
++                local->op_ret = -1;
++                local->op_errno = ENOMEM;
++                wind_failed = _gf_true;
++                GF_FREE(vec);
++                shard_common_inode_write_do_cbk(frame, (void *)(long)anon_fd,
++                                                this, -1, ENOMEM, NULL, NULL,
++                                                NULL);
++                goto next;
++            }
++
++            if (local->fop == GF_FOP_WRITE) {
++                if (odirect)
++                    local->flags = O_DIRECT;
++                else
++                    local->flags = GF_ANON_FD_FLAGS;
++            }
++        }
+ 
+-  local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid);
+-  if (!local->dot_shard_loc.inode) {
+-    ret =
+-        shard_init_internal_dir_loc(this, local, SHARD_INTERNAL_DIR_DOT_SHARD);
+-    if (ret)
+-      goto err;
+-    shard_lookup_internal_dir(frame, this, shard_post_resolve_readv_handler,
+-                              SHARD_INTERNAL_DIR_DOT_SHARD);
+-  } else {
+-    local->post_res_handler = shard_post_resolve_readv_handler;
+-    shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD);
+-  }
+-  return 0;
+-err:
+-  shard_common_failure_unwind(GF_FOP_READ, frame, -1, ENOMEM);
+-  return 0;
+-}
+-
+-int shard_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+-                off_t offset, uint32_t flags, dict_t *xdata) {
+-  int ret = 0;
+-  uint64_t block_size = 0;
+-  shard_local_t *local = NULL;
+-
+-  ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size);
+-  if (ret) {
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+-           "Failed to get block "
+-           "size for %s from its inode ctx",
+-           uuid_utoa(fd->inode->gfid));
+-    goto err;
+-  }
+-
+-  if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+-    /* block_size = 0 means that the file was created before
+-     * sharding was enabled on the volume.
+-     */
+-    STACK_WIND(frame, default_readv_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata);
+-    return 0;
+-  }
+-
+-  if (!this->itable)
+-    this->itable = fd->inode->table;
+-
+-  local = mem_get0(this->local_pool);
+-  if (!local)
+-    goto err;
+-
+-  frame->local = local;
+-
+-  ret = syncbarrier_init(&local->barrier);
+-  if (ret)
+-    goto err;
+-  local->fd = fd_ref(fd);
+-  local->block_size = block_size;
+-  local->offset = offset;
+-  local->req_size = size;
+-  local->flags = flags;
+-  local->fop = GF_FOP_READ;
+-  local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+-  if (!local->xattr_req)
+-    goto err;
+-
+-  local->loc.inode = inode_ref(fd->inode);
+-  gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
+-
+-  shard_lookup_base_file(frame, this, &local->loc,
+-                         shard_post_lookup_readv_handler);
+-  return 0;
+-err:
+-  shard_common_failure_unwind(GF_FOP_READ, frame, -1, ENOMEM);
+-  return 0;
++        shard_common_inode_write_wind(frame, this, anon_fd, vec, count,
++                                      shard_offset, shard_write_size);
++        if (vec)
++            vec_offset += shard_write_size;
++        orig_offset += shard_write_size;
++        GF_FREE(vec);
++        vec = NULL;
++    next:
++        cur_block++;
++        i++;
++        call_count--;
++    }
++    return 0;
+ }
+ 
+-int shard_common_inode_write_post_update_size_handler(call_frame_t *frame,
+-                                                      xlator_t *this) {
+-  shard_local_t *local = NULL;
+-
+-  local = frame->local;
++int
++shard_common_inode_write_post_mknod_handler(call_frame_t *frame,
++                                            xlator_t *this);
+ 
+-  if (local->op_ret < 0) {
+-    shard_common_failure_unwind(local->fop, frame, local->op_ret,
+-                                local->op_errno);
+-  } else {
+-    shard_common_inode_write_success_unwind(local->fop, frame,
+-                                            local->written_size);
+-  }
+-  return 0;
+-}
++int
++shard_common_inode_write_post_lookup_shards_handler(call_frame_t *frame,
++                                                    xlator_t *this)
++{
++    shard_local_t *local = NULL;
+ 
+-static gf_boolean_t shard_is_appending_write(shard_local_t *local) {
+-  if (local->fop != GF_FOP_WRITE)
+-    return _gf_false;
+-  if (local->flags & O_APPEND)
+-    return _gf_true;
+-  if (local->fd->flags & O_APPEND)
+-    return _gf_true;
+-  return _gf_false;
+-}
++    local = frame->local;
+ 
+-int __shard_get_delta_size_from_inode_ctx(shard_local_t *local, inode_t *inode,
+-                                          xlator_t *this) {
+-  int ret = -1;
+-  uint64_t ctx_uint = 0;
+-  shard_inode_ctx_t *ctx = NULL;
++    if (local->op_ret < 0) {
++        shard_common_failure_unwind(local->fop, frame, local->op_ret,
++                                    local->op_errno);
++        return 0;
++    }
+ 
+-  ret = __inode_ctx_get(inode, this, &ctx_uint);
+-  if (ret < 0)
+-    return ret;
++    if (local->create_count) {
++        shard_common_resume_mknod(frame, this,
++                                  shard_common_inode_write_post_mknod_handler);
++    } else {
++        shard_common_inode_write_do(frame, this);
++    }
+ 
+-  ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
++    return 0;
++}
+ 
+-  if (shard_is_appending_write(local)) {
+-    local->delta_size = local->total_size;
+-  } else if (local->offset + local->total_size > ctx->stat.ia_size) {
+-    local->delta_size = (local->offset + local->total_size) - ctx->stat.ia_size;
+-  } else {
+-    local->delta_size = 0;
+-  }
+-  ctx->stat.ia_size += (local->delta_size);
+-  local->postbuf = ctx->stat;
++int
++shard_common_inode_write_post_mknod_handler(call_frame_t *frame, xlator_t *this)
++{
++    shard_local_t *local = NULL;
+ 
+-  return 0;
+-}
++    local = frame->local;
+ 
+-int shard_get_delta_size_from_inode_ctx(shard_local_t *local, inode_t *inode,
+-                                        xlator_t *this) {
+-  int ret = -1;
++    if (local->op_ret < 0) {
++        shard_common_failure_unwind(local->fop, frame, local->op_ret,
++                                    local->op_errno);
++        return 0;
++    }
+ 
+-  LOCK(&inode->lock);
+-  { ret = __shard_get_delta_size_from_inode_ctx(local, inode, this); }
+-  UNLOCK(&inode->lock);
++    if (!local->eexist_count) {
++        shard_common_inode_write_do(frame, this);
++    } else {
++        local->call_count = local->eexist_count;
++        shard_common_lookup_shards(
++            frame, this, local->loc.inode,
++            shard_common_inode_write_post_lookup_shards_handler);
++    }
+ 
+-  return ret;
++    return 0;
+ }
+ 
+-int shard_common_inode_write_do_cbk(call_frame_t *frame, void *cookie,
+-                                    xlator_t *this, int32_t op_ret,
+-                                    int32_t op_errno, struct iatt *pre,
+-                                    struct iatt *post, dict_t *xdata) {
+-  int call_count = 0;
+-  fd_t *anon_fd = cookie;
+-  shard_local_t *local = NULL;
+-  glusterfs_fop_t fop = 0;
++int
++shard_common_inode_write_post_resolve_handler(call_frame_t *frame,
++                                              xlator_t *this)
++{
++    shard_local_t *local = NULL;
+ 
+-  local = frame->local;
+-  fop = local->fop;
++    local = frame->local;
+ 
+-  LOCK(&frame->lock);
+-  {
+-    if (op_ret < 0) {
+-      local->op_ret = op_ret;
+-      local->op_errno = op_errno;
+-    } else {
+-      local->written_size += op_ret;
+-      GF_ATOMIC_ADD(local->delta_blocks, post->ia_blocks - pre->ia_blocks);
+-      local->delta_size += (post->ia_size - pre->ia_size);
+-      shard_inode_ctx_set(local->fd->inode, this, post, 0, SHARD_MASK_TIMES);
+-      if (local->fd->inode != anon_fd->inode)
+-        shard_inode_ctx_add_to_fsync_list(local->fd->inode, this,
+-                                          anon_fd->inode);
+-    }
+-  }
+-  UNLOCK(&frame->lock);
+-
+-  if (anon_fd)
+-    fd_unref(anon_fd);
+-
+-  call_count = shard_call_count_return(frame);
+-  if (call_count == 0) {
+-    SHARD_UNSET_ROOT_FS_ID(frame, local);
+     if (local->op_ret < 0) {
+-      shard_common_failure_unwind(fop, frame, local->op_ret, local->op_errno);
++        shard_common_failure_unwind(local->fop, frame, local->op_ret,
++                                    local->op_errno);
++        return 0;
++    }
++
++    if (local->call_count) {
++        shard_common_lookup_shards(
++            frame, this, local->resolver_base_inode,
++            shard_common_inode_write_post_lookup_shards_handler);
+     } else {
+-      shard_get_delta_size_from_inode_ctx(local, local->fd->inode, this);
+-      local->hole_size = 0;
+-      if (xdata)
+-        local->xattr_rsp = dict_ref(xdata);
+-      shard_update_file_size(frame, this, local->fd, NULL,
+-                             shard_common_inode_write_post_update_size_handler);
++        shard_common_inode_write_do(frame, this);
+     }
+-  }
+ 
+-  return 0;
++    return 0;
+ }
+ 
+-int shard_common_inode_write_wind(call_frame_t *frame, xlator_t *this, fd_t *fd,
+-                                  struct iovec *vec, int count,
+-                                  off_t shard_offset, size_t size) {
+-  shard_local_t *local = NULL;
++int
++shard_common_inode_write_post_lookup_handler(call_frame_t *frame,
++                                             xlator_t *this)
++{
++    shard_local_t *local = frame->local;
++    shard_priv_t *priv = this->private;
++
++    if (local->op_ret < 0) {
++        shard_common_failure_unwind(local->fop, frame, local->op_ret,
++                                    local->op_errno);
++        return 0;
++    }
+ 
+-  local = frame->local;
++    local->postbuf = local->prebuf;
++
++    /*Adjust offset to EOF so that correct shard is chosen for append*/
++    if (shard_is_appending_write(local))
++        local->offset = local->prebuf.ia_size;
++
++    local->first_block = get_lowest_block(local->offset, local->block_size);
++    local->last_block = get_highest_block(local->offset, local->total_size,
++                                          local->block_size);
++    local->num_blocks = local->last_block - local->first_block + 1;
++    GF_ASSERT(local->num_blocks > 0);
++    local->inode_list = GF_CALLOC(local->num_blocks, sizeof(inode_t *),
++                                  gf_shard_mt_inode_list);
++    if (!local->inode_list) {
++        shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
++        return 0;
++    }
+ 
+-  switch (local->fop) {
+-  case GF_FOP_WRITE:
+-    STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd,
+-                      FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev, fd,
+-                      vec, count, shard_offset, local->flags, local->iobref,
+-                      local->xattr_req);
+-    break;
+-  case GF_FOP_FALLOCATE:
+-    STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd,
+-                      FIRST_CHILD(this), FIRST_CHILD(this)->fops->fallocate, fd,
+-                      local->flags, shard_offset, size, local->xattr_req);
+-    break;
+-  case GF_FOP_ZEROFILL:
+-    STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd,
+-                      FIRST_CHILD(this), FIRST_CHILD(this)->fops->zerofill, fd,
+-                      shard_offset, size, local->xattr_req);
+-    break;
+-  case GF_FOP_DISCARD:
+-    STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd,
+-                      FIRST_CHILD(this), FIRST_CHILD(this)->fops->discard, fd,
+-                      shard_offset, size, local->xattr_req);
+-    break;
+-  default:
+-    gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
+-           "Invalid fop id = %d", local->fop);
+-    break;
+-  }
+-  return 0;
+-}
+-
+-int shard_common_inode_write_do(call_frame_t *frame, xlator_t *this) {
+-  int i = 0;
+-  int count = 0;
+-  int call_count = 0;
+-  int last_block = 0;
+-  uint32_t cur_block = 0;
+-  fd_t *fd = NULL;
+-  fd_t *anon_fd = NULL;
+-  shard_local_t *local = NULL;
+-  struct iovec *vec = NULL;
+-  gf_boolean_t wind_failed = _gf_false;
+-  gf_boolean_t odirect = _gf_false;
+-  off_t orig_offset = 0;
+-  off_t shard_offset = 0;
+-  off_t vec_offset = 0;
+-  size_t remaining_size = 0;
+-  size_t shard_write_size = 0;
+-
+-  local = frame->local;
+-  fd = local->fd;
+-
+-  orig_offset = local->offset;
+-  remaining_size = local->total_size;
+-  cur_block = local->first_block;
+-  local->call_count = call_count = local->num_blocks;
+-  last_block = local->last_block;
+-
+-  SHARD_SET_ROOT_FS_ID(frame, local);
+-
+-  if (dict_set_uint32(local->xattr_req, GLUSTERFS_WRITE_UPDATE_ATOMIC, 4)) {
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
+-           "Failed to set " GLUSTERFS_WRITE_UPDATE_ATOMIC " into "
+-           "dict: %s",
+-           uuid_utoa(fd->inode->gfid));
+-    local->op_ret = -1;
+-    local->op_errno = ENOMEM;
+-    local->call_count = 1;
+-    shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, -1, ENOMEM,
+-                                    NULL, NULL, NULL);
+-    return 0;
+-  }
++    gf_msg_trace(this->name, 0,
++                 "%s: gfid=%s first_block=%" PRIu64
++                 " "
++                 "last_block=%" PRIu64 " num_blocks=%" PRIu64 " offset=%" PRId64
++                 " total_size=%zu flags=%" PRId32 "",
++                 gf_fop_list[local->fop],
++                 uuid_utoa(local->resolver_base_inode->gfid),
++                 local->first_block, local->last_block, local->num_blocks,
++                 local->offset, local->total_size, local->flags);
+ 
+-  if ((fd->flags & O_DIRECT) && (local->fop == GF_FOP_WRITE))
+-    odirect = _gf_true;
++    local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid);
+ 
+-  while (cur_block <= last_block) {
+-    if (wind_failed) {
+-      shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, -1, ENOMEM,
+-                                      NULL, NULL, NULL);
+-      goto next;
++    if (!local->dot_shard_loc.inode) {
++        /*change handler*/
++        shard_mkdir_internal_dir(frame, this,
++                                 shard_common_inode_write_post_resolve_handler,
++                                 SHARD_INTERNAL_DIR_DOT_SHARD);
++    } else {
++        /*change handler*/
++        local->post_res_handler = shard_common_inode_write_post_resolve_handler;
++        shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD);
+     }
++    return 0;
++}
+ 
+-    shard_offset = orig_offset % local->block_size;
+-    shard_write_size = local->block_size - shard_offset;
+-    if (shard_write_size > remaining_size)
+-      shard_write_size = remaining_size;
++int
++shard_mkdir_internal_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                             int32_t op_ret, int32_t op_errno, inode_t *inode,
++                             struct iatt *buf, struct iatt *preparent,
++                             struct iatt *postparent, dict_t *xdata)
++{
++    inode_t *link_inode = NULL;
++    shard_local_t *local = NULL;
++    shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie;
+ 
+-    remaining_size -= shard_write_size;
++    local = frame->local;
+ 
+-    if (local->fop == GF_FOP_WRITE) {
+-      count = iov_subset(local->vector, local->count, vec_offset,
+-                         vec_offset + shard_write_size, NULL);
++    SHARD_UNSET_ROOT_FS_ID(frame, local);
+ 
+-      vec = GF_CALLOC(count, sizeof(struct iovec), gf_shard_mt_iovec);
+-      if (!vec) {
+-        local->op_ret = -1;
+-        local->op_errno = ENOMEM;
+-        wind_failed = _gf_true;
+-        GF_FREE(vec);
+-        shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, -1,
+-                                        ENOMEM, NULL, NULL, NULL);
+-        goto next;
+-      }
+-      count = iov_subset(local->vector, local->count, vec_offset,
+-                         vec_offset + shard_write_size, vec);
++    if (op_ret == -1) {
++        if (op_errno != EEXIST) {
++            local->op_ret = op_ret;
++            local->op_errno = op_errno;
++            goto unwind;
++        } else {
++            gf_msg_debug(this->name, 0,
++                         "mkdir on %s failed "
++                         "with EEXIST. Attempting lookup now",
++                         shard_internal_dir_string(type));
++            shard_lookup_internal_dir(frame, this, local->post_res_handler,
++                                      type);
++            return 0;
++        }
+     }
+ 
+-    if (cur_block == 0) {
+-      anon_fd = fd_ref(fd);
++    link_inode = shard_link_internal_dir_inode(local, inode, buf, type);
++    if (link_inode != inode) {
++        shard_refresh_internal_dir(frame, this, type);
+     } else {
+-      anon_fd = fd_anonymous(local->inode_list[i]);
+-      if (!anon_fd) {
+-        local->op_ret = -1;
+-        local->op_errno = ENOMEM;
+-        wind_failed = _gf_true;
+-        GF_FREE(vec);
+-        shard_common_inode_write_do_cbk(frame, (void *)(long)anon_fd, this, -1,
+-                                        ENOMEM, NULL, NULL, NULL);
+-        goto next;
+-      }
+-
+-      if (local->fop == GF_FOP_WRITE) {
+-        if (odirect)
+-          local->flags = O_DIRECT;
+-        else
+-          local->flags = GF_ANON_FD_FLAGS;
+-      }
+-    }
+-
+-    shard_common_inode_write_wind(frame, this, anon_fd, vec, count,
+-                                  shard_offset, shard_write_size);
+-    if (vec)
+-      vec_offset += shard_write_size;
+-    orig_offset += shard_write_size;
+-    GF_FREE(vec);
+-    vec = NULL;
+-  next:
+-    cur_block++;
+-    i++;
+-    call_count--;
+-  }
+-  return 0;
++        shard_inode_ctx_mark_dir_refreshed(link_inode, this);
++        shard_common_resolve_shards(frame, this, local->post_res_handler);
++    }
++    return 0;
++unwind:
++    shard_common_resolve_shards(frame, this, local->post_res_handler);
++    return 0;
+ }
+ 
+-int shard_common_inode_write_post_mknod_handler(call_frame_t *frame,
+-                                                xlator_t *this);
++int
++shard_mkdir_internal_dir(call_frame_t *frame, xlator_t *this,
++                         shard_post_resolve_fop_handler_t handler,
++                         shard_internal_dir_type_t type)
++{
++    int ret = -1;
++    shard_local_t *local = NULL;
++    shard_priv_t *priv = NULL;
++    dict_t *xattr_req = NULL;
++    uuid_t *gfid = NULL;
++    loc_t *loc = NULL;
++    gf_boolean_t free_gfid = _gf_true;
+ 
+-int shard_common_inode_write_post_lookup_shards_handler(call_frame_t *frame,
+-                                                        xlator_t *this) {
+-  shard_local_t *local = NULL;
++    local = frame->local;
++    priv = this->private;
+ 
+-  local = frame->local;
++    local->post_res_handler = handler;
++    gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t);
++    if (!gfid)
++        goto err;
+ 
+-  if (local->op_ret < 0) {
+-    shard_common_failure_unwind(local->fop, frame, local->op_ret,
+-                                local->op_errno);
+-    return 0;
+-  }
++    switch (type) {
++        case SHARD_INTERNAL_DIR_DOT_SHARD:
++            gf_uuid_copy(*gfid, priv->dot_shard_gfid);
++            loc = &local->dot_shard_loc;
++            break;
++        case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME:
++            gf_uuid_copy(*gfid, priv->dot_shard_rm_gfid);
++            loc = &local->dot_shard_rm_loc;
++            break;
++        default:
++            bzero(*gfid, sizeof(uuid_t));
++            break;
++    }
+ 
+-  if (local->create_count) {
+-    shard_common_resume_mknod(frame, this,
+-                              shard_common_inode_write_post_mknod_handler);
+-  } else {
+-    shard_common_inode_write_do(frame, this);
+-  }
++    xattr_req = dict_new();
++    if (!xattr_req)
++        goto err;
+ 
+-  return 0;
+-}
++    ret = shard_init_internal_dir_loc(this, local, type);
++    if (ret)
++        goto err;
+ 
+-int shard_common_inode_write_post_mknod_handler(call_frame_t *frame,
+-                                                xlator_t *this) {
+-  shard_local_t *local = NULL;
++    ret = dict_set_gfuuid(xattr_req, "gfid-req", *gfid, false);
++    if (ret) {
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
++               "Failed to set gfid-req for %s",
++               shard_internal_dir_string(type));
++        goto err;
++    } else {
++        free_gfid = _gf_false;
++    }
+ 
+-  local = frame->local;
++    SHARD_SET_ROOT_FS_ID(frame, local);
+ 
+-  if (local->op_ret < 0) {
+-    shard_common_failure_unwind(local->fop, frame, local->op_ret,
+-                                local->op_errno);
++    STACK_WIND_COOKIE(frame, shard_mkdir_internal_dir_cbk, (void *)(long)type,
++                      FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir, loc,
++                      0755, 0, xattr_req);
++    dict_unref(xattr_req);
+     return 0;
+-  }
+ 
+-  if (!local->eexist_count) {
+-    shard_common_inode_write_do(frame, this);
+-  } else {
+-    local->call_count = local->eexist_count;
+-    shard_common_lookup_shards(
+-        frame, this, local->loc.inode,
+-        shard_common_inode_write_post_lookup_shards_handler);
+-  }
+-
+-  return 0;
++err:
++    if (xattr_req)
++        dict_unref(xattr_req);
++    local->op_ret = -1;
++    local->op_errno = ENOMEM;
++    if (free_gfid)
++        GF_FREE(gfid);
++    handler(frame, this);
++    return 0;
+ }
+ 
+-int shard_common_inode_write_post_resolve_handler(call_frame_t *frame,
+-                                                  xlator_t *this) {
+-  shard_local_t *local = NULL;
+-
+-  local = frame->local;
+-
+-  if (local->op_ret < 0) {
+-    shard_common_failure_unwind(local->fop, frame, local->op_ret,
+-                                local->op_errno);
++int
++shard_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                int32_t op_ret, int32_t op_errno, dict_t *xdata)
++{
++    /* To-Do: Wind flush on all shards of the file */
++    SHARD_STACK_UNWIND(flush, frame, op_ret, op_errno, xdata);
+     return 0;
+-  }
+-
+-  if (local->call_count) {
+-    shard_common_lookup_shards(
+-        frame, this, local->resolver_base_inode,
+-        shard_common_inode_write_post_lookup_shards_handler);
+-  } else {
+-    shard_common_inode_write_do(frame, this);
+-  }
+-
+-  return 0;
+ }
+ 
+-int shard_common_inode_write_post_lookup_handler(call_frame_t *frame,
+-                                                 xlator_t *this) {
+-  shard_local_t *local = frame->local;
+-  shard_priv_t *priv = this->private;
+-
+-  if (local->op_ret < 0) {
+-    shard_common_failure_unwind(local->fop, frame, local->op_ret,
+-                                local->op_errno);
++int
++shard_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
++{
++    STACK_WIND(frame, shard_flush_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->flush, fd, xdata);
+     return 0;
+-  }
+-
+-  local->postbuf = local->prebuf;
++}
+ 
+-  /*Adjust offset to EOF so that correct shard is chosen for append*/
+-  if (shard_is_appending_write(local))
+-    local->offset = local->prebuf.ia_size;
++int
++__shard_get_timestamps_from_inode_ctx(shard_local_t *local, inode_t *inode,
++                                      xlator_t *this)
++{
++    int ret = -1;
++    uint64_t ctx_uint = 0;
++    shard_inode_ctx_t *ctx = NULL;
+ 
+-  local->first_block = get_lowest_block(local->offset, local->block_size);
+-  local->last_block =
+-      get_highest_block(local->offset, local->total_size, local->block_size);
+-  local->num_blocks = local->last_block - local->first_block + 1;
+-  GF_ASSERT(local->num_blocks > 0);
+-  local->inode_list =
+-      GF_CALLOC(local->num_blocks, sizeof(inode_t *), gf_shard_mt_inode_list);
+-  if (!local->inode_list) {
+-    shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
+-    return 0;
+-  }
++    ret = __inode_ctx_get(inode, this, &ctx_uint);
++    if (ret < 0)
++        return ret;
+ 
+-  gf_msg_trace(
+-      this->name, 0, "%s: gfid=%s first_block=%" PRIu64 " "
+-                     "last_block=%" PRIu64 " num_blocks=%" PRIu64
+-                     " offset=%" PRId64 " total_size=%zu flags=%" PRId32 "",
+-      gf_fop_list[local->fop], uuid_utoa(local->resolver_base_inode->gfid),
+-      local->first_block, local->last_block, local->num_blocks, local->offset,
+-      local->total_size, local->flags);
++    ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
+ 
+-  local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid);
++    local->postbuf.ia_ctime = ctx->stat.ia_ctime;
++    local->postbuf.ia_ctime_nsec = ctx->stat.ia_ctime_nsec;
++    local->postbuf.ia_atime = ctx->stat.ia_atime;
++    local->postbuf.ia_atime_nsec = ctx->stat.ia_atime_nsec;
++    local->postbuf.ia_mtime = ctx->stat.ia_mtime;
++    local->postbuf.ia_mtime_nsec = ctx->stat.ia_mtime_nsec;
+ 
+-  if (!local->dot_shard_loc.inode) {
+-    /*change handler*/
+-    shard_mkdir_internal_dir(frame, this,
+-                             shard_common_inode_write_post_resolve_handler,
+-                             SHARD_INTERNAL_DIR_DOT_SHARD);
+-  } else {
+-    /*change handler*/
+-    local->post_res_handler = shard_common_inode_write_post_resolve_handler;
+-    shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD);
+-  }
+-  return 0;
++    return 0;
+ }
+ 
+-int shard_mkdir_internal_dir_cbk(call_frame_t *frame, void *cookie,
+-                                 xlator_t *this, int32_t op_ret,
+-                                 int32_t op_errno, inode_t *inode,
+-                                 struct iatt *buf, struct iatt *preparent,
+-                                 struct iatt *postparent, dict_t *xdata) {
+-  inode_t *link_inode = NULL;
+-  shard_local_t *local = NULL;
+-  shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie;
+-
+-  local = frame->local;
+-
+-  SHARD_UNSET_ROOT_FS_ID(frame, local);
+-
+-  if (op_ret == -1) {
+-    if (op_errno != EEXIST) {
+-      local->op_ret = op_ret;
+-      local->op_errno = op_errno;
+-      goto unwind;
+-    } else {
+-      gf_msg_debug(this->name, 0, "mkdir on %s failed "
+-                                  "with EEXIST. Attempting lookup now",
+-                   shard_internal_dir_string(type));
+-      shard_lookup_internal_dir(frame, this, local->post_res_handler, type);
+-      return 0;
+-    }
+-  }
+-
+-  link_inode = shard_link_internal_dir_inode(local, inode, buf, type);
+-  if (link_inode != inode) {
+-    shard_refresh_internal_dir(frame, this, type);
+-  } else {
+-    shard_inode_ctx_mark_dir_refreshed(link_inode, this);
+-    shard_common_resolve_shards(frame, this, local->post_res_handler);
+-  }
+-  return 0;
+-unwind:
+-  shard_common_resolve_shards(frame, this, local->post_res_handler);
+-  return 0;
+-}
+-
+-int shard_mkdir_internal_dir(call_frame_t *frame, xlator_t *this,
+-                             shard_post_resolve_fop_handler_t handler,
+-                             shard_internal_dir_type_t type) {
+-  int ret = -1;
+-  shard_local_t *local = NULL;
+-  shard_priv_t *priv = NULL;
+-  dict_t *xattr_req = NULL;
+-  uuid_t *gfid = NULL;
+-  loc_t *loc = NULL;
+-  gf_boolean_t free_gfid = _gf_true;
+-
+-  local = frame->local;
+-  priv = this->private;
+-
+-  local->post_res_handler = handler;
+-  gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t);
+-  if (!gfid)
+-    goto err;
+-
+-  switch (type) {
+-  case SHARD_INTERNAL_DIR_DOT_SHARD:
+-    gf_uuid_copy(*gfid, priv->dot_shard_gfid);
+-    loc = &local->dot_shard_loc;
+-    break;
+-  case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME:
+-    gf_uuid_copy(*gfid, priv->dot_shard_rm_gfid);
+-    loc = &local->dot_shard_rm_loc;
+-    break;
+-  default:
+-    bzero(*gfid, sizeof(uuid_t));
+-    break;
+-  }
+-
+-  xattr_req = dict_new();
+-  if (!xattr_req)
+-    goto err;
+-
+-  ret = shard_init_internal_dir_loc(this, local, type);
+-  if (ret)
+-    goto err;
+-
+-  ret = dict_set_gfuuid(xattr_req, "gfid-req", *gfid, false);
+-  if (ret) {
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
+-           "Failed to set gfid-req for %s", shard_internal_dir_string(type));
+-    goto err;
+-  } else {
+-    free_gfid = _gf_false;
+-  }
+-
+-  SHARD_SET_ROOT_FS_ID(frame, local);
+-
+-  STACK_WIND_COOKIE(frame, shard_mkdir_internal_dir_cbk, (void *)(long)type,
+-                    FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir, loc,
+-                    0755, 0, xattr_req);
+-  dict_unref(xattr_req);
+-  return 0;
++int
++shard_get_timestamps_from_inode_ctx(shard_local_t *local, inode_t *inode,
++                                    xlator_t *this)
++{
++    int ret = 0;
+ 
+-err:
+-  if (xattr_req)
+-    dict_unref(xattr_req);
+-  local->op_ret = -1;
+-  local->op_errno = ENOMEM;
+-  if (free_gfid)
+-    GF_FREE(gfid);
+-  handler(frame, this);
+-  return 0;
+-}
++    LOCK(&inode->lock);
++    {
++        ret = __shard_get_timestamps_from_inode_ctx(local, inode, this);
++    }
++    UNLOCK(&inode->lock);
+ 
+-int shard_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                    int32_t op_ret, int32_t op_errno, dict_t *xdata) {
+-  /* To-Do: Wind flush on all shards of the file */
+-  SHARD_STACK_UNWIND(flush, frame, op_ret, op_errno, xdata);
+-  return 0;
++    return ret;
+ }
+ 
+-int shard_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) {
+-  STACK_WIND(frame, shard_flush_cbk, FIRST_CHILD(this),
+-             FIRST_CHILD(this)->fops->flush, fd, xdata);
+-  return 0;
+-}
++int
++shard_fsync_shards_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                       int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
++                       struct iatt *postbuf, dict_t *xdata)
++{
++    int call_count = 0;
++    uint64_t fsync_count = 0;
++    fd_t *anon_fd = cookie;
++    shard_local_t *local = NULL;
++    shard_inode_ctx_t *ctx = NULL;
++    shard_inode_ctx_t *base_ictx = NULL;
++    inode_t *base_inode = NULL;
++    gf_boolean_t unref_shard_inode = _gf_false;
++
++    local = frame->local;
++    base_inode = local->fd->inode;
+ 
+-int __shard_get_timestamps_from_inode_ctx(shard_local_t *local, inode_t *inode,
+-                                          xlator_t *this) {
+-  int ret = -1;
+-  uint64_t ctx_uint = 0;
+-  shard_inode_ctx_t *ctx = NULL;
++    if (local->op_ret < 0)
++        goto out;
+ 
+-  ret = __inode_ctx_get(inode, this, &ctx_uint);
+-  if (ret < 0)
+-    return ret;
++    LOCK(&frame->lock);
++    {
++        if (op_ret < 0) {
++            local->op_ret = op_ret;
++            local->op_errno = op_errno;
++            UNLOCK(&frame->lock);
++            goto out;
++        }
++        shard_inode_ctx_set(local->fd->inode, this, postbuf, 0,
++                            SHARD_MASK_TIMES);
++    }
++    UNLOCK(&frame->lock);
++    fd_ctx_get(anon_fd, this, &fsync_count);
++out:
++    if (anon_fd && (base_inode != anon_fd->inode)) {
++        LOCK(&base_inode->lock);
++        LOCK(&anon_fd->inode->lock);
++        {
++            __shard_inode_ctx_get(anon_fd->inode, this, &ctx);
++            __shard_inode_ctx_get(base_inode, this, &base_ictx);
++            if (op_ret == 0)
++                ctx->fsync_needed -= fsync_count;
++            GF_ASSERT(ctx->fsync_needed >= 0);
++            if (ctx->fsync_needed != 0) {
++                list_add_tail(&ctx->to_fsync_list, &base_ictx->to_fsync_list);
++                base_ictx->fsync_count++;
++            } else {
++                unref_shard_inode = _gf_true;
++            }
++        }
++        UNLOCK(&anon_fd->inode->lock);
++        UNLOCK(&base_inode->lock);
++    }
+ 
+-  ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
++    if (unref_shard_inode)
++        inode_unref(anon_fd->inode);
++    if (anon_fd)
++        fd_unref(anon_fd);
+ 
+-  local->postbuf.ia_ctime = ctx->stat.ia_ctime;
+-  local->postbuf.ia_ctime_nsec = ctx->stat.ia_ctime_nsec;
+-  local->postbuf.ia_atime = ctx->stat.ia_atime;
+-  local->postbuf.ia_atime_nsec = ctx->stat.ia_atime_nsec;
+-  local->postbuf.ia_mtime = ctx->stat.ia_mtime;
+-  local->postbuf.ia_mtime_nsec = ctx->stat.ia_mtime_nsec;
++    call_count = shard_call_count_return(frame);
++    if (call_count != 0)
++        return 0;
+ 
+-  return 0;
++    if (local->op_ret < 0) {
++        shard_common_failure_unwind(GF_FOP_FSYNC, frame, local->op_ret,
++                                    local->op_errno);
++    } else {
++        shard_get_timestamps_from_inode_ctx(local, base_inode, this);
++        SHARD_STACK_UNWIND(fsync, frame, local->op_ret, local->op_errno,
++                           &local->prebuf, &local->postbuf, local->xattr_rsp);
++    }
++    return 0;
+ }
+ 
+-int shard_get_timestamps_from_inode_ctx(shard_local_t *local, inode_t *inode,
+-                                        xlator_t *this) {
+-  int ret = 0;
++int
++shard_post_lookup_fsync_handler(call_frame_t *frame, xlator_t *this)
++{
++    int ret = 0;
++    int call_count = 0;
++    int fsync_count = 0;
++    fd_t *anon_fd = NULL;
++    inode_t *base_inode = NULL;
++    shard_local_t *local = NULL;
++    shard_inode_ctx_t *ctx = NULL;
++    shard_inode_ctx_t *iter = NULL;
++    struct list_head copy = {
++        0,
++    };
++    shard_inode_ctx_t *tmp = NULL;
+ 
+-  LOCK(&inode->lock);
+-  { ret = __shard_get_timestamps_from_inode_ctx(local, inode, this); }
+-  UNLOCK(&inode->lock);
++    local = frame->local;
++    base_inode = local->fd->inode;
++    local->postbuf = local->prebuf;
++    INIT_LIST_HEAD(&copy);
+ 
+-  return ret;
+-}
++    if (local->op_ret < 0) {
++        shard_common_failure_unwind(GF_FOP_FSYNC, frame, local->op_ret,
++                                    local->op_errno);
++        return 0;
++    }
+ 
+-int shard_fsync_shards_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                           int32_t op_ret, int32_t op_errno,
+-                           struct iatt *prebuf, struct iatt *postbuf,
+-                           dict_t *xdata) {
+-  int call_count = 0;
+-  uint64_t fsync_count = 0;
+-  fd_t *anon_fd = cookie;
+-  shard_local_t *local = NULL;
+-  shard_inode_ctx_t *ctx = NULL;
+-  shard_inode_ctx_t *base_ictx = NULL;
+-  inode_t *base_inode = NULL;
+-  gf_boolean_t unref_shard_inode = _gf_false;
+-
+-  local = frame->local;
+-  base_inode = local->fd->inode;
+-
+-  if (local->op_ret < 0)
+-    goto out;
+-
+-  LOCK(&frame->lock);
+-  {
+-    if (op_ret < 0) {
+-      local->op_ret = op_ret;
+-      local->op_errno = op_errno;
+-      UNLOCK(&frame->lock);
+-      goto out;
+-    }
+-    shard_inode_ctx_set(local->fd->inode, this, postbuf, 0, SHARD_MASK_TIMES);
+-  }
+-  UNLOCK(&frame->lock);
+-  fd_ctx_get(anon_fd, this, &fsync_count);
+-out:
+-  if (anon_fd && (base_inode != anon_fd->inode)) {
+     LOCK(&base_inode->lock);
+-    LOCK(&anon_fd->inode->lock);
+     {
+-      __shard_inode_ctx_get(anon_fd->inode, this, &ctx);
+-      __shard_inode_ctx_get(base_inode, this, &base_ictx);
+-      if (op_ret == 0)
+-        ctx->fsync_needed -= fsync_count;
+-      GF_ASSERT(ctx->fsync_needed >= 0);
+-      if (ctx->fsync_needed != 0) {
+-        list_add_tail(&ctx->to_fsync_list, &base_ictx->to_fsync_list);
+-        base_ictx->fsync_count++;
+-      } else {
+-        unref_shard_inode = _gf_true;
+-      }
+-    }
+-    UNLOCK(&anon_fd->inode->lock);
++        __shard_inode_ctx_get(base_inode, this, &ctx);
++        list_splice_init(&ctx->to_fsync_list, &copy);
++        call_count = ctx->fsync_count;
++        ctx->fsync_count = 0;
++    }
+     UNLOCK(&base_inode->lock);
+-  }
+-
+-  if (unref_shard_inode)
+-    inode_unref(anon_fd->inode);
+-  if (anon_fd)
+-    fd_unref(anon_fd);
+-
+-  call_count = shard_call_count_return(frame);
+-  if (call_count != 0)
+-    return 0;
+ 
+-  if (local->op_ret < 0) {
+-    shard_common_failure_unwind(GF_FOP_FSYNC, frame, local->op_ret,
+-                                local->op_errno);
+-  } else {
+-    shard_get_timestamps_from_inode_ctx(local, base_inode, this);
+-    SHARD_STACK_UNWIND(fsync, frame, local->op_ret, local->op_errno,
+-                       &local->prebuf, &local->postbuf, local->xattr_rsp);
+-  }
+-  return 0;
+-}
+-
+-int shard_post_lookup_fsync_handler(call_frame_t *frame, xlator_t *this) {
+-  int ret = 0;
+-  int call_count = 0;
+-  int fsync_count = 0;
+-  fd_t *anon_fd = NULL;
+-  inode_t *base_inode = NULL;
+-  shard_local_t *local = NULL;
+-  shard_inode_ctx_t *ctx = NULL;
+-  shard_inode_ctx_t *iter = NULL;
+-  struct list_head copy = {
+-      0,
+-  };
+-  shard_inode_ctx_t *tmp = NULL;
+-
+-  local = frame->local;
+-  base_inode = local->fd->inode;
+-  local->postbuf = local->prebuf;
+-  INIT_LIST_HEAD(&copy);
+-
+-  if (local->op_ret < 0) {
+-    shard_common_failure_unwind(GF_FOP_FSYNC, frame, local->op_ret,
+-                                local->op_errno);
+-    return 0;
+-  }
+-
+-  LOCK(&base_inode->lock);
+-  {
+-    __shard_inode_ctx_get(base_inode, this, &ctx);
+-    list_splice_init(&ctx->to_fsync_list, &copy);
+-    call_count = ctx->fsync_count;
+-    ctx->fsync_count = 0;
+-  }
+-  UNLOCK(&base_inode->lock);
+-
+-  local->call_count = ++call_count;
+-
+-  /* Send fsync() on the base shard first */
+-  anon_fd = fd_ref(local->fd);
+-  STACK_WIND_COOKIE(frame, shard_fsync_shards_cbk, anon_fd, FIRST_CHILD(this),
+-                    FIRST_CHILD(this)->fops->fsync, anon_fd, local->datasync,
+-                    local->xattr_req);
+-  call_count--;
+-  anon_fd = NULL;
+-
+-  list_for_each_entry_safe(iter, tmp, &copy, to_fsync_list) {
+-    list_del_init(&iter->to_fsync_list);
+-    fsync_count = 0;
+-    shard_inode_ctx_get_fsync_count(iter->inode, this, &fsync_count);
+-    GF_ASSERT(fsync_count > 0);
+-    anon_fd = fd_anonymous(iter->inode);
+-    if (!anon_fd) {
+-      local->op_ret = -1;
+-      local->op_errno = ENOMEM;
+-      gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED,
+-             "Failed to create "
+-             "anon fd to fsync shard");
+-      shard_fsync_shards_cbk(frame, (void *)(long)anon_fd, this, -1, ENOMEM,
+-                             NULL, NULL, NULL);
+-      continue;
+-    }
++    local->call_count = ++call_count;
+ 
+-    ret = fd_ctx_set(anon_fd, this, fsync_count);
+-    if (ret) {
+-      gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_FD_CTX_SET_FAILED,
+-             "Failed to set fd "
+-             "ctx for shard inode gfid=%s",
+-             uuid_utoa(iter->inode->gfid));
+-      local->op_ret = -1;
+-      local->op_errno = ENOMEM;
+-      shard_fsync_shards_cbk(frame, (void *)(long)anon_fd, this, -1, ENOMEM,
+-                             NULL, NULL, NULL);
+-      continue;
+-    }
++    /* Send fsync() on the base shard first */
++    anon_fd = fd_ref(local->fd);
+     STACK_WIND_COOKIE(frame, shard_fsync_shards_cbk, anon_fd, FIRST_CHILD(this),
+                       FIRST_CHILD(this)->fops->fsync, anon_fd, local->datasync,
+                       local->xattr_req);
+     call_count--;
+-  }
++    anon_fd = NULL;
+ 
+-  return 0;
++    list_for_each_entry_safe(iter, tmp, &copy, to_fsync_list)
++    {
++        list_del_init(&iter->to_fsync_list);
++        fsync_count = 0;
++        shard_inode_ctx_get_fsync_count(iter->inode, this, &fsync_count);
++        GF_ASSERT(fsync_count > 0);
++        anon_fd = fd_anonymous(iter->inode);
++        if (!anon_fd) {
++            local->op_ret = -1;
++            local->op_errno = ENOMEM;
++            gf_msg(this->name, GF_LOG_WARNING, ENOMEM,
++                   SHARD_MSG_MEMALLOC_FAILED,
++                   "Failed to create "
++                   "anon fd to fsync shard");
++            shard_fsync_shards_cbk(frame, (void *)(long)anon_fd, this, -1,
++                                   ENOMEM, NULL, NULL, NULL);
++            continue;
++        }
++
++        ret = fd_ctx_set(anon_fd, this, fsync_count);
++        if (ret) {
++            gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_FD_CTX_SET_FAILED,
++                   "Failed to set fd "
++                   "ctx for shard inode gfid=%s",
++                   uuid_utoa(iter->inode->gfid));
++            local->op_ret = -1;
++            local->op_errno = ENOMEM;
++            shard_fsync_shards_cbk(frame, (void *)(long)anon_fd, this, -1,
++                                   ENOMEM, NULL, NULL, NULL);
++            continue;
++        }
++        STACK_WIND_COOKIE(frame, shard_fsync_shards_cbk, anon_fd,
++                          FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsync,
++                          anon_fd, local->datasync, local->xattr_req);
++        call_count--;
++    }
++
++    return 0;
+ }
+ 
+-int shard_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+-                dict_t *xdata) {
+-  int ret = 0;
+-  uint64_t block_size = 0;
+-  shard_local_t *local = NULL;
++int
++shard_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
++            dict_t *xdata)
++{
++    int ret = 0;
++    uint64_t block_size = 0;
++    shard_local_t *local = NULL;
+ 
+-  ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size);
+-  if (ret) {
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+-           "Failed to get block "
+-           "size for %s from its inode ctx",
+-           uuid_utoa(fd->inode->gfid));
+-    goto err;
+-  }
++    ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size);
++    if (ret) {
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
++               "Failed to get block "
++               "size for %s from its inode ctx",
++               uuid_utoa(fd->inode->gfid));
++        goto err;
++    }
+ 
+-  if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+-    STACK_WIND(frame, default_fsync_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->fsync, fd, datasync, xdata);
+-    return 0;
+-  }
++    if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
++        STACK_WIND(frame, default_fsync_cbk, FIRST_CHILD(this),
++                   FIRST_CHILD(this)->fops->fsync, fd, datasync, xdata);
++        return 0;
++    }
+ 
+-  if (!this->itable)
+-    this->itable = fd->inode->table;
++    if (!this->itable)
++        this->itable = fd->inode->table;
+ 
+-  local = mem_get0(this->local_pool);
+-  if (!local)
+-    goto err;
++    local = mem_get0(this->local_pool);
++    if (!local)
++        goto err;
+ 
+-  frame->local = local;
++    frame->local = local;
+ 
+-  local->fd = fd_ref(fd);
+-  local->fop = GF_FOP_FSYNC;
+-  local->datasync = datasync;
+-  local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+-  if (!local->xattr_req)
+-    goto err;
++    local->fd = fd_ref(fd);
++    local->fop = GF_FOP_FSYNC;
++    local->datasync = datasync;
++    local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
++    if (!local->xattr_req)
++        goto err;
+ 
+-  local->loc.inode = inode_ref(fd->inode);
+-  gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
++    local->loc.inode = inode_ref(fd->inode);
++    gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
+ 
+-  shard_lookup_base_file(frame, this, &local->loc,
+-                         shard_post_lookup_fsync_handler);
+-  return 0;
++    shard_lookup_base_file(frame, this, &local->loc,
++                           shard_post_lookup_fsync_handler);
++    return 0;
+ err:
+-  shard_common_failure_unwind(GF_FOP_FSYNC, frame, -1, ENOMEM);
+-  return 0;
++    shard_common_failure_unwind(GF_FOP_FSYNC, frame, -1, ENOMEM);
++    return 0;
+ }
+ 
+-int shard_readdir_past_dot_shard_cbk(call_frame_t *frame, void *cookie,
+-                                     xlator_t *this, int32_t op_ret,
+-                                     int32_t op_errno,
+-                                     gf_dirent_t *orig_entries, dict_t *xdata) {
+-  gf_dirent_t *entry = NULL;
+-  gf_dirent_t *tmp = NULL;
+-  shard_local_t *local = NULL;
++int
++shard_readdir_past_dot_shard_cbk(call_frame_t *frame, void *cookie,
++                                 xlator_t *this, int32_t op_ret,
++                                 int32_t op_errno, gf_dirent_t *orig_entries,
++                                 dict_t *xdata)
++{
++    gf_dirent_t *entry = NULL;
++    gf_dirent_t *tmp = NULL;
++    shard_local_t *local = NULL;
+ 
+-  local = frame->local;
++    local = frame->local;
+ 
+-  if (op_ret < 0)
+-    goto unwind;
++    if (op_ret < 0)
++        goto unwind;
+ 
+-  list_for_each_entry_safe(entry, tmp, (&orig_entries->list), list) {
+-    list_del_init(&entry->list);
+-    list_add_tail(&entry->list, &local->entries_head.list);
++    list_for_each_entry_safe(entry, tmp, (&orig_entries->list), list)
++    {
++        list_del_init(&entry->list);
++        list_add_tail(&entry->list, &local->entries_head.list);
+ 
+-    if (!entry->dict)
+-      continue;
++        if (!entry->dict)
++            continue;
+ 
+-    if (IA_ISDIR(entry->d_stat.ia_type))
+-      continue;
++        if (IA_ISDIR(entry->d_stat.ia_type))
++            continue;
+ 
+-    if (dict_get(entry->dict, GF_XATTR_SHARD_FILE_SIZE))
+-      shard_modify_size_and_block_count(&entry->d_stat, entry->dict);
+-    if (!entry->inode)
+-      continue;
++        if (dict_get(entry->dict, GF_XATTR_SHARD_FILE_SIZE))
++            shard_modify_size_and_block_count(&entry->d_stat, entry->dict);
++        if (!entry->inode)
++            continue;
+ 
+-    shard_inode_ctx_update(entry->inode, this, entry->dict, &entry->d_stat);
+-  }
+-  local->op_ret += op_ret;
++        shard_inode_ctx_update(entry->inode, this, entry->dict, &entry->d_stat);
++    }
++    local->op_ret += op_ret;
+ 
+ unwind:
+-  if (local->fop == GF_FOP_READDIR)
+-    SHARD_STACK_UNWIND(readdir, frame, local->op_ret, local->op_errno,
+-                       &local->entries_head, xdata);
+-  else
+-    SHARD_STACK_UNWIND(readdirp, frame, op_ret, op_errno, &local->entries_head,
+-                       xdata);
+-  return 0;
++    if (local->fop == GF_FOP_READDIR)
++        SHARD_STACK_UNWIND(readdir, frame, local->op_ret, local->op_errno,
++                           &local->entries_head, xdata);
++    else
++        SHARD_STACK_UNWIND(readdirp, frame, op_ret, op_errno,
++                           &local->entries_head, xdata);
++    return 0;
+ }
+ 
+-int32_t shard_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                          int32_t op_ret, int32_t op_errno,
+-                          gf_dirent_t *orig_entries, dict_t *xdata) {
+-  fd_t *fd = NULL;
+-  gf_dirent_t *entry = NULL;
+-  gf_dirent_t *tmp = NULL;
+-  shard_local_t *local = NULL;
+-  gf_boolean_t last_entry = _gf_false;
++int32_t
++shard_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                  int32_t op_ret, int32_t op_errno, gf_dirent_t *orig_entries,
++                  dict_t *xdata)
++{
++    fd_t *fd = NULL;
++    gf_dirent_t *entry = NULL;
++    gf_dirent_t *tmp = NULL;
++    shard_local_t *local = NULL;
++    gf_boolean_t last_entry = _gf_false;
+ 
+-  local = frame->local;
+-  fd = local->fd;
++    local = frame->local;
++    fd = local->fd;
+ 
+-  if (op_ret < 0)
+-    goto unwind;
++    if (op_ret < 0)
++        goto unwind;
+ 
+-  list_for_each_entry_safe(entry, tmp, (&orig_entries->list), list) {
+-    if (last_entry)
+-      last_entry = _gf_false;
++    list_for_each_entry_safe(entry, tmp, (&orig_entries->list), list)
++    {
++        if (last_entry)
++            last_entry = _gf_false;
++
++        if (__is_root_gfid(fd->inode->gfid) &&
++            !(strcmp(entry->d_name, GF_SHARD_DIR))) {
++            local->offset = entry->d_off;
++            op_ret--;
++            last_entry = _gf_true;
++            continue;
++        }
+ 
+-    if (__is_root_gfid(fd->inode->gfid) &&
+-        !(strcmp(entry->d_name, GF_SHARD_DIR))) {
+-      local->offset = entry->d_off;
+-      op_ret--;
+-      last_entry = _gf_true;
+-      continue;
+-    }
++        list_del_init(&entry->list);
++        list_add_tail(&entry->list, &local->entries_head.list);
+ 
+-    list_del_init(&entry->list);
+-    list_add_tail(&entry->list, &local->entries_head.list);
++        if (!entry->dict)
++            continue;
+ 
+-    if (!entry->dict)
+-      continue;
++        if (IA_ISDIR(entry->d_stat.ia_type))
++            continue;
+ 
+-    if (IA_ISDIR(entry->d_stat.ia_type))
+-      continue;
++        if (dict_get(entry->dict, GF_XATTR_SHARD_FILE_SIZE) &&
++            frame->root->pid != GF_CLIENT_PID_GSYNCD)
++            shard_modify_size_and_block_count(&entry->d_stat, entry->dict);
+ 
+-    if (dict_get(entry->dict, GF_XATTR_SHARD_FILE_SIZE) &&
+-        frame->root->pid != GF_CLIENT_PID_GSYNCD)
+-      shard_modify_size_and_block_count(&entry->d_stat, entry->dict);
++        if (!entry->inode)
++            continue;
+ 
+-    if (!entry->inode)
+-      continue;
++        shard_inode_ctx_update(entry->inode, this, entry->dict, &entry->d_stat);
++    }
+ 
+-    shard_inode_ctx_update(entry->inode, this, entry->dict, &entry->d_stat);
+-  }
++    local->op_ret = op_ret;
+ 
+-  local->op_ret = op_ret;
++    if (last_entry) {
++        if (local->fop == GF_FOP_READDIR)
++            STACK_WIND(frame, shard_readdir_past_dot_shard_cbk,
++                       FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdir,
++                       local->fd, local->readdir_size, local->offset,
++                       local->xattr_req);
++        else
++            STACK_WIND(frame, shard_readdir_past_dot_shard_cbk,
++                       FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdirp,
++                       local->fd, local->readdir_size, local->offset,
++                       local->xattr_req);
++        return 0;
++    }
+ 
+-  if (last_entry) {
++unwind:
+     if (local->fop == GF_FOP_READDIR)
+-      STACK_WIND(frame, shard_readdir_past_dot_shard_cbk, FIRST_CHILD(this),
+-                 FIRST_CHILD(this)->fops->readdir, local->fd,
+-                 local->readdir_size, local->offset, local->xattr_req);
++        SHARD_STACK_UNWIND(readdir, frame, op_ret, op_errno,
++                           &local->entries_head, xdata);
+     else
+-      STACK_WIND(frame, shard_readdir_past_dot_shard_cbk, FIRST_CHILD(this),
+-                 FIRST_CHILD(this)->fops->readdirp, local->fd,
+-                 local->readdir_size, local->offset, local->xattr_req);
++        SHARD_STACK_UNWIND(readdirp, frame, op_ret, op_errno,
++                           &local->entries_head, xdata);
+     return 0;
+-  }
++}
+ 
+-unwind:
+-  if (local->fop == GF_FOP_READDIR)
+-    SHARD_STACK_UNWIND(readdir, frame, op_ret, op_errno, &local->entries_head,
+-                       xdata);
+-  else
+-    SHARD_STACK_UNWIND(readdirp, frame, op_ret, op_errno, &local->entries_head,
+-                       xdata);
+-  return 0;
+-}
+-
+-int shard_readdir_do(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+-                     off_t offset, int whichop, dict_t *xdata) {
+-  int ret = 0;
+-  shard_local_t *local = NULL;
+-
+-  local = mem_get0(this->local_pool);
+-  if (!local) {
+-    goto err;
+-  }
+-
+-  frame->local = local;
+-
+-  local->fd = fd_ref(fd);
+-  local->fop = whichop;
+-  local->readdir_size = size;
+-  INIT_LIST_HEAD(&local->entries_head.list);
+-  local->list_inited = _gf_true;
+-
+-  if (whichop == GF_FOP_READDIR) {
+-    STACK_WIND(frame, shard_readdir_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->readdir, fd, size, offset, xdata);
+-  } else {
+-    local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+-    SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid,
+-                                    local, err);
+-    ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0);
+-    if (ret) {
+-      gf_log(this->name, GF_LOG_WARNING,
+-             "Failed to set "
+-             "dict value: key:%s, directory gfid=%s",
+-             GF_XATTR_SHARD_BLOCK_SIZE, uuid_utoa(fd->inode->gfid));
+-      goto err;
++int
++shard_readdir_do(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
++                 off_t offset, int whichop, dict_t *xdata)
++{
++    int ret = 0;
++    shard_local_t *local = NULL;
++
++    local = mem_get0(this->local_pool);
++    if (!local) {
++        goto err;
+     }
+ 
+-    STACK_WIND(frame, shard_readdir_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->readdirp, fd, size, offset,
+-               local->xattr_req);
+-  }
++    frame->local = local;
++
++    local->fd = fd_ref(fd);
++    local->fop = whichop;
++    local->readdir_size = size;
++    INIT_LIST_HEAD(&local->entries_head.list);
++    local->list_inited = _gf_true;
++
++    if (whichop == GF_FOP_READDIR) {
++        STACK_WIND(frame, shard_readdir_cbk, FIRST_CHILD(this),
++                   FIRST_CHILD(this)->fops->readdir, fd, size, offset, xdata);
++    } else {
++        local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
++        SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid,
++                                        local, err);
++        ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0);
++        if (ret) {
++            gf_log(this->name, GF_LOG_WARNING,
++                   "Failed to set "
++                   "dict value: key:%s, directory gfid=%s",
++                   GF_XATTR_SHARD_BLOCK_SIZE, uuid_utoa(fd->inode->gfid));
++            goto err;
++        }
++
++        STACK_WIND(frame, shard_readdir_cbk, FIRST_CHILD(this),
++                   FIRST_CHILD(this)->fops->readdirp, fd, size, offset,
++                   local->xattr_req);
++    }
+ 
+-  return 0;
++    return 0;
+ 
+ err:
+-  STACK_UNWIND_STRICT(readdir, frame, -1, ENOMEM, NULL, NULL);
+-  return 0;
++    STACK_UNWIND_STRICT(readdir, frame, -1, ENOMEM, NULL, NULL);
++    return 0;
+ }
+ 
+-int32_t shard_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd,
+-                      size_t size, off_t offset, dict_t *xdata) {
+-  shard_readdir_do(frame, this, fd, size, offset, GF_FOP_READDIR, xdata);
+-  return 0;
++int32_t
++shard_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
++              off_t offset, dict_t *xdata)
++{
++    shard_readdir_do(frame, this, fd, size, offset, GF_FOP_READDIR, xdata);
++    return 0;
+ }
+ 
+-int32_t shard_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd,
+-                       size_t size, off_t offset, dict_t *xdata) {
+-  shard_readdir_do(frame, this, fd, size, offset, GF_FOP_READDIRP, xdata);
+-  return 0;
++int32_t
++shard_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
++               off_t offset, dict_t *xdata)
++{
++    shard_readdir_do(frame, this, fd, size, offset, GF_FOP_READDIRP, xdata);
++    return 0;
+ }
+ 
+ int32_t
+@@ -6037,77 +6450,86 @@ shard_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+     return 0;
+ }
+ 
+-int32_t shard_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                            int32_t op_ret, int32_t op_errno, dict_t *dict,
+-                            dict_t *xdata) {
+-  if (op_ret < 0)
+-    goto unwind;
++int32_t
++shard_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                    int32_t op_ret, int32_t op_errno, dict_t *dict,
++                    dict_t *xdata)
++{
++    if (op_ret < 0)
++        goto unwind;
+ 
+-  if (dict && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) {
+-    dict_del(dict, GF_XATTR_SHARD_BLOCK_SIZE);
+-    dict_del(dict, GF_XATTR_SHARD_FILE_SIZE);
+-  }
++    if (dict && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) {
++        dict_del(dict, GF_XATTR_SHARD_BLOCK_SIZE);
++        dict_del(dict, GF_XATTR_SHARD_FILE_SIZE);
++    }
+ 
+ unwind:
+-  SHARD_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, dict, xdata);
+-  return 0;
++    SHARD_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, dict, xdata);
++    return 0;
+ }
+ 
+-int32_t shard_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+-                        const char *name, dict_t *xdata) {
+-  int op_errno = EINVAL;
++int32_t
++shard_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
++                dict_t *xdata)
++{
++    int op_errno = EINVAL;
+ 
+-  if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) && (name) &&
+-      (!strncmp(name, SHARD_XATTR_PREFIX, SLEN(SHARD_XATTR_PREFIX)))) {
+-    op_errno = ENODATA;
+-    goto out;
+-  }
++    if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) && (name) &&
++        (!strncmp(name, SHARD_XATTR_PREFIX, SLEN(SHARD_XATTR_PREFIX)))) {
++        op_errno = ENODATA;
++        goto out;
++    }
+ 
+-  STACK_WIND(frame, shard_fgetxattr_cbk, FIRST_CHILD(this),
+-             FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata);
+-  return 0;
++    STACK_WIND(frame, shard_fgetxattr_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata);
++    return 0;
+ out:
+-  shard_common_failure_unwind(GF_FOP_FGETXATTR, frame, -1, op_errno);
+-  return 0;
++    shard_common_failure_unwind(GF_FOP_FGETXATTR, frame, -1, op_errno);
++    return 0;
+ }
+ 
+-int32_t shard_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                           int32_t op_ret, int32_t op_errno, dict_t *dict,
+-                           dict_t *xdata) {
+-  if (op_ret < 0)
+-    goto unwind;
++int32_t
++shard_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                   int32_t op_ret, int32_t op_errno, dict_t *dict,
++                   dict_t *xdata)
++{
++    if (op_ret < 0)
++        goto unwind;
+ 
+-  if (dict && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) {
+-    dict_del(dict, GF_XATTR_SHARD_BLOCK_SIZE);
+-    dict_del(dict, GF_XATTR_SHARD_FILE_SIZE);
+-  }
++    if (dict && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) {
++        dict_del(dict, GF_XATTR_SHARD_BLOCK_SIZE);
++        dict_del(dict, GF_XATTR_SHARD_FILE_SIZE);
++    }
+ 
+ unwind:
+-  SHARD_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, xdata);
+-  return 0;
++    SHARD_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, xdata);
++    return 0;
+ }
+ 
+-int32_t shard_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+-                       const char *name, dict_t *xdata) {
+-  int op_errno = EINVAL;
++int32_t
++shard_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
++               const char *name, dict_t *xdata)
++{
++    int op_errno = EINVAL;
+ 
+-  if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) && (name) &&
+-      (!strncmp(name, SHARD_XATTR_PREFIX, sizeof(SHARD_XATTR_PREFIX) - 1))) {
+-    op_errno = ENODATA;
+-    goto out;
+-  }
++    if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) && (name) &&
++        (!strncmp(name, SHARD_XATTR_PREFIX, sizeof(SHARD_XATTR_PREFIX) - 1))) {
++        op_errno = ENODATA;
++        goto out;
++    }
+ 
+-  STACK_WIND(frame, shard_getxattr_cbk, FIRST_CHILD(this),
+-             FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
+-  return 0;
++    STACK_WIND(frame, shard_getxattr_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
++    return 0;
+ out:
+-  shard_common_failure_unwind(GF_FOP_GETXATTR, frame, -1, op_errno);
+-  return 0;
++    shard_common_failure_unwind(GF_FOP_GETXATTR, frame, -1, op_errno);
++    return 0;
+ }
+ 
+-int32_t shard_common_set_xattr_cbk(call_frame_t *frame, void *cookie,
+-                                   xlator_t *this, int32_t op_ret,
+-                                   int32_t op_errno, dict_t *xdata) {
++int32_t
++shard_common_set_xattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                           int32_t op_ret, int32_t op_errno, dict_t *xdata)
++{
+     int ret = -1;
+     shard_local_t *local = NULL;
+ 
+@@ -6141,8 +6563,9 @@ err:
+     return 0;
+ }
+ 
+-int32_t shard_post_lookup_set_xattr_handler(call_frame_t *frame,
+-                                            xlator_t *this) {
++int32_t
++shard_post_lookup_set_xattr_handler(call_frame_t *frame, xlator_t *this)
++{
+     shard_local_t *local = NULL;
+ 
+     local = frame->local;
+@@ -6164,9 +6587,11 @@ int32_t shard_post_lookup_set_xattr_handler(call_frame_t *frame,
+     return 0;
+ }
+ 
+-int32_t shard_common_set_xattr(call_frame_t *frame, xlator_t *this,
+-                               glusterfs_fop_t fop, loc_t *loc, fd_t *fd,
+-                               dict_t *dict, int32_t flags, dict_t *xdata) {
++int32_t
++shard_common_set_xattr(call_frame_t *frame, xlator_t *this, glusterfs_fop_t fop,
++                       loc_t *loc, fd_t *fd, dict_t *dict, int32_t flags,
++                       dict_t *xdata)
++{
+     int ret = -1;
+     int op_errno = ENOMEM;
+     uint64_t block_size = 0;
+@@ -6249,489 +6674,531 @@ err:
+     return 0;
+ }
+ 
+-int32_t shard_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+-                        dict_t *dict, int32_t flags, dict_t *xdata) {
++int32_t
++shard_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
++                int32_t flags, dict_t *xdata)
++{
+     shard_common_set_xattr(frame, this, GF_FOP_FSETXATTR, NULL, fd, dict, flags,
+                            xdata);
+     return 0;
+ }
+ 
+-int32_t shard_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+-                       dict_t *dict, int32_t flags, dict_t *xdata) {
++int32_t
++shard_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
++               int32_t flags, dict_t *xdata)
++{
+     shard_common_set_xattr(frame, this, GF_FOP_SETXATTR, loc, NULL, dict, flags,
+                            xdata);
+     return 0;
+ }
+ 
+-int shard_post_setattr_handler(call_frame_t *frame, xlator_t *this) {
+-  shard_local_t *local = NULL;
++int
++shard_post_setattr_handler(call_frame_t *frame, xlator_t *this)
++{
++    shard_local_t *local = NULL;
+ 
+-  local = frame->local;
++    local = frame->local;
+ 
+-  if (local->fop == GF_FOP_SETATTR) {
+-    if (local->op_ret >= 0)
+-      shard_inode_ctx_set(local->loc.inode, this, &local->postbuf, 0,
+-                          SHARD_LOOKUP_MASK);
+-    SHARD_STACK_UNWIND(setattr, frame, local->op_ret, local->op_errno,
+-                       &local->prebuf, &local->postbuf, local->xattr_rsp);
+-  } else if (local->fop == GF_FOP_FSETATTR) {
+-    if (local->op_ret >= 0)
+-      shard_inode_ctx_set(local->fd->inode, this, &local->postbuf, 0,
+-                          SHARD_LOOKUP_MASK);
+-    SHARD_STACK_UNWIND(fsetattr, frame, local->op_ret, local->op_errno,
+-                       &local->prebuf, &local->postbuf, local->xattr_rsp);
+-  }
++    if (local->fop == GF_FOP_SETATTR) {
++        if (local->op_ret >= 0)
++            shard_inode_ctx_set(local->loc.inode, this, &local->postbuf, 0,
++                                SHARD_LOOKUP_MASK);
++        SHARD_STACK_UNWIND(setattr, frame, local->op_ret, local->op_errno,
++                           &local->prebuf, &local->postbuf, local->xattr_rsp);
++    } else if (local->fop == GF_FOP_FSETATTR) {
++        if (local->op_ret >= 0)
++            shard_inode_ctx_set(local->fd->inode, this, &local->postbuf, 0,
++                                SHARD_LOOKUP_MASK);
++        SHARD_STACK_UNWIND(fsetattr, frame, local->op_ret, local->op_errno,
++                           &local->prebuf, &local->postbuf, local->xattr_rsp);
++    }
+ 
+-  return 0;
++    return 0;
+ }
+ 
+-int shard_common_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                             int32_t op_ret, int32_t op_errno,
+-                             struct iatt *prebuf, struct iatt *postbuf,
+-                             dict_t *xdata) {
+-  shard_local_t *local = NULL;
++int
++shard_common_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                         int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
++                         struct iatt *postbuf, dict_t *xdata)
++{
++    shard_local_t *local = NULL;
+ 
+-  local = frame->local;
++    local = frame->local;
+ 
+-  if (op_ret < 0) {
+-    local->op_ret = op_ret;
+-    local->op_errno = op_errno;
+-    goto unwind;
+-  }
++    if (op_ret < 0) {
++        local->op_ret = op_ret;
++        local->op_errno = op_errno;
++        goto unwind;
++    }
+ 
+-  local->prebuf = *prebuf;
+-  if (shard_modify_size_and_block_count(&local->prebuf, xdata)) {
+-    local->op_ret = -1;
+-    local->op_errno = EINVAL;
+-    goto unwind;
+-  }
+-  if (xdata)
+-    local->xattr_rsp = dict_ref(xdata);
+-  local->postbuf = *postbuf;
+-  local->postbuf.ia_size = local->prebuf.ia_size;
+-  local->postbuf.ia_blocks = local->prebuf.ia_blocks;
++    local->prebuf = *prebuf;
++    if (shard_modify_size_and_block_count(&local->prebuf, xdata)) {
++        local->op_ret = -1;
++        local->op_errno = EINVAL;
++        goto unwind;
++    }
++    if (xdata)
++        local->xattr_rsp = dict_ref(xdata);
++    local->postbuf = *postbuf;
++    local->postbuf.ia_size = local->prebuf.ia_size;
++    local->postbuf.ia_blocks = local->prebuf.ia_blocks;
+ 
+ unwind:
+-  local->handler(frame, this);
+-  return 0;
++    local->handler(frame, this);
++    return 0;
+ }
+ 
+-int shard_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+-                  struct iatt *stbuf, int32_t valid, dict_t *xdata) {
+-  int ret = -1;
+-  uint64_t block_size = 0;
+-  shard_local_t *local = NULL;
++int
++shard_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
++              struct iatt *stbuf, int32_t valid, dict_t *xdata)
++{
++    int ret = -1;
++    uint64_t block_size = 0;
++    shard_local_t *local = NULL;
+ 
+-  if ((IA_ISDIR(loc->inode->ia_type)) || (IA_ISLNK(loc->inode->ia_type))) {
+-    STACK_WIND(frame, default_setattr_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata);
+-    return 0;
+-  }
++    if ((IA_ISDIR(loc->inode->ia_type)) || (IA_ISLNK(loc->inode->ia_type))) {
++        STACK_WIND(frame, default_setattr_cbk, FIRST_CHILD(this),
++                   FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata);
++        return 0;
++    }
+ 
+-  ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size);
+-  if (ret) {
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+-           "Failed to get block size from inode ctx of %s",
+-           uuid_utoa(loc->inode->gfid));
+-    goto err;
+-  }
++    ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size);
++    if (ret) {
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
++               "Failed to get block size from inode ctx of %s",
++               uuid_utoa(loc->inode->gfid));
++        goto err;
++    }
+ 
+-  if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+-    STACK_WIND(frame, default_setattr_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata);
+-    return 0;
+-  }
++    if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
++        STACK_WIND(frame, default_setattr_cbk, FIRST_CHILD(this),
++                   FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata);
++        return 0;
++    }
+ 
+-  local = mem_get0(this->local_pool);
+-  if (!local)
+-    goto err;
++    local = mem_get0(this->local_pool);
++    if (!local)
++        goto err;
+ 
+-  frame->local = local;
++    frame->local = local;
+ 
+-  local->handler = shard_post_setattr_handler;
+-  local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+-  if (!local->xattr_req)
+-    goto err;
+-  local->fop = GF_FOP_SETATTR;
+-  loc_copy(&local->loc, loc);
++    local->handler = shard_post_setattr_handler;
++    local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
++    if (!local->xattr_req)
++        goto err;
++    local->fop = GF_FOP_SETATTR;
++    loc_copy(&local->loc, loc);
+ 
+-  SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, local->loc.gfid,
+-                                  local, err);
++    SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, local->loc.gfid,
++                                    local, err);
+ 
+-  STACK_WIND(frame, shard_common_setattr_cbk, FIRST_CHILD(this),
+-             FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid,
+-             local->xattr_req);
+-  return 0;
++    STACK_WIND(frame, shard_common_setattr_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid,
++               local->xattr_req);
++    return 0;
+ err:
+-  shard_common_failure_unwind(GF_FOP_SETATTR, frame, -1, ENOMEM);
+-  return 0;
++    shard_common_failure_unwind(GF_FOP_SETATTR, frame, -1, ENOMEM);
++    return 0;
+ }
+ 
+-int shard_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+-                   struct iatt *stbuf, int32_t valid, dict_t *xdata) {
+-  int ret = -1;
+-  uint64_t block_size = 0;
+-  shard_local_t *local = NULL;
++int
++shard_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
++               struct iatt *stbuf, int32_t valid, dict_t *xdata)
++{
++    int ret = -1;
++    uint64_t block_size = 0;
++    shard_local_t *local = NULL;
+ 
+-  if ((IA_ISDIR(fd->inode->ia_type)) || (IA_ISLNK(fd->inode->ia_type))) {
+-    STACK_WIND(frame, default_fsetattr_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata);
+-    return 0;
+-  }
++    if ((IA_ISDIR(fd->inode->ia_type)) || (IA_ISLNK(fd->inode->ia_type))) {
++        STACK_WIND(frame, default_fsetattr_cbk, FIRST_CHILD(this),
++                   FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata);
++        return 0;
++    }
+ 
+-  ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size);
+-  if (ret) {
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+-           "Failed to get block size from inode ctx of %s",
+-           uuid_utoa(fd->inode->gfid));
+-    goto err;
+-  }
++    ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size);
++    if (ret) {
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
++               "Failed to get block size from inode ctx of %s",
++               uuid_utoa(fd->inode->gfid));
++        goto err;
++    }
+ 
+-  if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+-    STACK_WIND(frame, default_fsetattr_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata);
+-    return 0;
+-  }
++    if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
++        STACK_WIND(frame, default_fsetattr_cbk, FIRST_CHILD(this),
++                   FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata);
++        return 0;
++    }
+ 
+-  if (!this->itable)
+-    this->itable = fd->inode->table;
++    if (!this->itable)
++        this->itable = fd->inode->table;
+ 
+-  local = mem_get0(this->local_pool);
+-  if (!local)
+-    goto err;
++    local = mem_get0(this->local_pool);
++    if (!local)
++        goto err;
+ 
+-  frame->local = local;
++    frame->local = local;
+ 
+-  local->handler = shard_post_setattr_handler;
+-  local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+-  if (!local->xattr_req)
+-    goto err;
+-  local->fop = GF_FOP_FSETATTR;
+-  local->fd = fd_ref(fd);
++    local->handler = shard_post_setattr_handler;
++    local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
++    if (!local->xattr_req)
++        goto err;
++    local->fop = GF_FOP_FSETATTR;
++    local->fd = fd_ref(fd);
+ 
+-  SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid,
+-                                  local, err);
++    SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid,
++                                    local, err);
+ 
+-  STACK_WIND(frame, shard_common_setattr_cbk, FIRST_CHILD(this),
+-             FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid,
+-             local->xattr_req);
+-  return 0;
++    STACK_WIND(frame, shard_common_setattr_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid,
++               local->xattr_req);
++    return 0;
+ err:
+-  shard_common_failure_unwind(GF_FOP_FSETATTR, frame, -1, ENOMEM);
+-  return 0;
+-}
+-
+-int shard_common_inode_write_begin(call_frame_t *frame, xlator_t *this,
+-                                   glusterfs_fop_t fop, fd_t *fd,
+-                                   struct iovec *vector, int32_t count,
+-                                   off_t offset, uint32_t flags, size_t len,
+-                                   struct iobref *iobref, dict_t *xdata) {
+-  int ret = 0;
+-  int i = 0;
+-  uint64_t block_size = 0;
+-  shard_local_t *local = NULL;
+-
+-  ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size);
+-  if (ret) {
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+-           "Failed to get block "
+-           "size for %s from its inode ctx",
+-           uuid_utoa(fd->inode->gfid));
+-    goto out;
+-  }
+-
+-  if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+-    /* block_size = 0 means that the file was created before
+-     * sharding was enabled on the volume.
+-     */
+-    switch (fop) {
+-    case GF_FOP_WRITE:
+-      STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev,
+-                      fd, vector, count, offset, flags, iobref, xdata);
+-      break;
+-    case GF_FOP_FALLOCATE:
+-      STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+-                      FIRST_CHILD(this)->fops->fallocate, fd, flags, offset,
+-                      len, xdata);
+-      break;
+-    case GF_FOP_ZEROFILL:
+-      STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+-                      FIRST_CHILD(this)->fops->zerofill, fd, offset, len,
+-                      xdata);
+-      break;
+-    case GF_FOP_DISCARD:
+-      STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+-                      FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata);
+-      break;
+-    default:
+-      gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
+-             "Invalid fop id = %d", fop);
+-      break;
+-    }
+-    return 0;
+-  }
+-
+-  if (!this->itable)
+-    this->itable = fd->inode->table;
+-
+-  local = mem_get0(this->local_pool);
+-  if (!local)
+-    goto out;
+-
+-  frame->local = local;
+-
+-  ret = syncbarrier_init(&local->barrier);
+-  if (ret)
+-    goto out;
+-  local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+-  if (!local->xattr_req)
+-    goto out;
+-
+-  if (vector) {
+-    local->vector = iov_dup(vector, count);
+-    if (!local->vector)
+-      goto out;
+-    for (i = 0; i < count; i++)
+-      local->total_size += vector[i].iov_len;
+-    local->count = count;
+-  } else {
+-    local->total_size = len;
+-  }
+-
+-  local->fop = fop;
+-  local->offset = offset;
+-  local->flags = flags;
+-  if (iobref)
+-    local->iobref = iobref_ref(iobref);
+-  local->fd = fd_ref(fd);
+-  local->block_size = block_size;
+-  local->resolver_base_inode = local->fd->inode;
+-  GF_ATOMIC_INIT(local->delta_blocks, 0);
+-
+-  local->loc.inode = inode_ref(fd->inode);
+-  gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
+-
+-  shard_lookup_base_file(frame, this, &local->loc,
+-                         shard_common_inode_write_post_lookup_handler);
+-  return 0;
++    shard_common_failure_unwind(GF_FOP_FSETATTR, frame, -1, ENOMEM);
++    return 0;
++}
++
++int
++shard_common_inode_write_begin(call_frame_t *frame, xlator_t *this,
++                               glusterfs_fop_t fop, fd_t *fd,
++                               struct iovec *vector, int32_t count,
++                               off_t offset, uint32_t flags, size_t len,
++                               struct iobref *iobref, dict_t *xdata)
++{
++    int ret = 0;
++    int i = 0;
++    uint64_t block_size = 0;
++    shard_local_t *local = NULL;
++
++    ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size);
++    if (ret) {
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
++               "Failed to get block "
++               "size for %s from its inode ctx",
++               uuid_utoa(fd->inode->gfid));
++        goto out;
++    }
++
++    if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
++        /* block_size = 0 means that the file was created before
++         * sharding was enabled on the volume.
++         */
++        switch (fop) {
++            case GF_FOP_WRITE:
++                STACK_WIND_TAIL(frame, FIRST_CHILD(this),
++                                FIRST_CHILD(this)->fops->writev, fd, vector,
++                                count, offset, flags, iobref, xdata);
++                break;
++            case GF_FOP_FALLOCATE:
++                STACK_WIND_TAIL(frame, FIRST_CHILD(this),
++                                FIRST_CHILD(this)->fops->fallocate, fd, flags,
++                                offset, len, xdata);
++                break;
++            case GF_FOP_ZEROFILL:
++                STACK_WIND_TAIL(frame, FIRST_CHILD(this),
++                                FIRST_CHILD(this)->fops->zerofill, fd, offset,
++                                len, xdata);
++                break;
++            case GF_FOP_DISCARD:
++                STACK_WIND_TAIL(frame, FIRST_CHILD(this),
++                                FIRST_CHILD(this)->fops->discard, fd, offset,
++                                len, xdata);
++                break;
++            default:
++                gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
++                       "Invalid fop id = %d", fop);
++                break;
++        }
++        return 0;
++    }
++
++    if (!this->itable)
++        this->itable = fd->inode->table;
++
++    local = mem_get0(this->local_pool);
++    if (!local)
++        goto out;
++
++    frame->local = local;
++
++    ret = syncbarrier_init(&local->barrier);
++    if (ret)
++        goto out;
++    local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
++    if (!local->xattr_req)
++        goto out;
++
++    if (vector) {
++        local->vector = iov_dup(vector, count);
++        if (!local->vector)
++            goto out;
++        for (i = 0; i < count; i++)
++            local->total_size += vector[i].iov_len;
++        local->count = count;
++    } else {
++        local->total_size = len;
++    }
++
++    local->fop = fop;
++    local->offset = offset;
++    local->flags = flags;
++    if (iobref)
++        local->iobref = iobref_ref(iobref);
++    local->fd = fd_ref(fd);
++    local->block_size = block_size;
++    local->resolver_base_inode = local->fd->inode;
++    GF_ATOMIC_INIT(local->delta_blocks, 0);
++
++    local->loc.inode = inode_ref(fd->inode);
++    gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
++
++    shard_lookup_base_file(frame, this, &local->loc,
++                           shard_common_inode_write_post_lookup_handler);
++    return 0;
+ out:
+-  shard_common_failure_unwind(fop, frame, -1, ENOMEM);
+-  return 0;
++    shard_common_failure_unwind(fop, frame, -1, ENOMEM);
++    return 0;
+ }
+ 
+-int shard_writev(call_frame_t *frame, xlator_t *this, fd_t *fd,
+-                 struct iovec *vector, int32_t count, off_t offset,
+-                 uint32_t flags, struct iobref *iobref, dict_t *xdata) {
+-  shard_common_inode_write_begin(frame, this, GF_FOP_WRITE, fd, vector, count,
+-                                 offset, flags, 0, iobref, xdata);
+-  return 0;
++int
++shard_writev(call_frame_t *frame, xlator_t *this, fd_t *fd,
++             struct iovec *vector, int32_t count, off_t offset, uint32_t flags,
++             struct iobref *iobref, dict_t *xdata)
++{
++    shard_common_inode_write_begin(frame, this, GF_FOP_WRITE, fd, vector, count,
++                                   offset, flags, 0, iobref, xdata);
++    return 0;
+ }
+ 
+-int shard_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd,
+-                    int32_t keep_size, off_t offset, size_t len,
+-                    dict_t *xdata) {
+-  if ((keep_size != 0) && (keep_size != FALLOC_FL_ZERO_RANGE) &&
+-      (keep_size != (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)))
+-    goto out;
++int
++shard_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd,
++                int32_t keep_size, off_t offset, size_t len, dict_t *xdata)
++{
++    if ((keep_size != 0) && (keep_size != FALLOC_FL_ZERO_RANGE) &&
++        (keep_size != (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)))
++        goto out;
+ 
+-  shard_common_inode_write_begin(frame, this, GF_FOP_FALLOCATE, fd, NULL, 0,
+-                                 offset, keep_size, len, NULL, xdata);
+-  return 0;
++    shard_common_inode_write_begin(frame, this, GF_FOP_FALLOCATE, fd, NULL, 0,
++                                   offset, keep_size, len, NULL, xdata);
++    return 0;
+ out:
+-  shard_common_failure_unwind(GF_FOP_FALLOCATE, frame, -1, ENOTSUP);
+-  return 0;
++    shard_common_failure_unwind(GF_FOP_FALLOCATE, frame, -1, ENOTSUP);
++    return 0;
+ }
+ 
+-int shard_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+-                   off_t len, dict_t *xdata) {
+-  shard_common_inode_write_begin(frame, this, GF_FOP_ZEROFILL, fd, NULL, 0,
+-                                 offset, 0, len, NULL, xdata);
+-  return 0;
++int
++shard_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
++               off_t len, dict_t *xdata)
++{
++    shard_common_inode_write_begin(frame, this, GF_FOP_ZEROFILL, fd, NULL, 0,
++                                   offset, 0, len, NULL, xdata);
++    return 0;
+ }
+ 
+-int shard_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+-                  size_t len, dict_t *xdata) {
+-  shard_common_inode_write_begin(frame, this, GF_FOP_DISCARD, fd, NULL, 0,
+-                                 offset, 0, len, NULL, xdata);
+-  return 0;
++int
++shard_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
++              size_t len, dict_t *xdata)
++{
++    shard_common_inode_write_begin(frame, this, GF_FOP_DISCARD, fd, NULL, 0,
++                                   offset, 0, len, NULL, xdata);
++    return 0;
+ }
+ 
+-int32_t shard_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+-                   gf_seek_what_t what, dict_t *xdata) {
+-  /* TBD */
+-  gf_msg(this->name, GF_LOG_INFO, ENOTSUP, SHARD_MSG_FOP_NOT_SUPPORTED,
+-         "seek called on %s.", uuid_utoa(fd->inode->gfid));
+-  shard_common_failure_unwind(GF_FOP_SEEK, frame, -1, ENOTSUP);
+-  return 0;
++int32_t
++shard_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
++           gf_seek_what_t what, dict_t *xdata)
++{
++    /* TBD */
++    gf_msg(this->name, GF_LOG_INFO, ENOTSUP, SHARD_MSG_FOP_NOT_SUPPORTED,
++           "seek called on %s.", uuid_utoa(fd->inode->gfid));
++    shard_common_failure_unwind(GF_FOP_SEEK, frame, -1, ENOTSUP);
++    return 0;
+ }
+ 
+-int32_t mem_acct_init(xlator_t *this) {
+-  int ret = -1;
++int32_t
++mem_acct_init(xlator_t *this)
++{
++    int ret = -1;
+ 
+-  if (!this)
+-    return ret;
++    if (!this)
++        return ret;
+ 
+-  ret = xlator_mem_acct_init(this, gf_shard_mt_end + 1);
++    ret = xlator_mem_acct_init(this, gf_shard_mt_end + 1);
+ 
+-  if (ret != 0) {
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_MEM_ACCT_INIT_FAILED,
+-           "Memory accounting init"
+-           "failed");
+-    return ret;
+-  }
++    if (ret != 0) {
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_MEM_ACCT_INIT_FAILED,
++               "Memory accounting init"
++               "failed");
++        return ret;
++    }
+ 
+-  return ret;
++    return ret;
+ }
+ 
+-int init(xlator_t *this) {
+-  int ret = -1;
+-  shard_priv_t *priv = NULL;
++int
++init(xlator_t *this)
++{
++    int ret = -1;
++    shard_priv_t *priv = NULL;
++
++    if (!this) {
++        gf_msg("shard", GF_LOG_ERROR, 0, SHARD_MSG_NULL_THIS,
++               "this is NULL. init() failed");
++        return -1;
++    }
+ 
+-  if (!this) {
+-    gf_msg("shard", GF_LOG_ERROR, 0, SHARD_MSG_NULL_THIS,
+-           "this is NULL. init() failed");
+-    return -1;
+-  }
+-
+-  if (!this->parents) {
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INVALID_VOLFILE,
+-           "Dangling volume. Check volfile");
+-    goto out;
+-  }
+-
+-  if (!this->children || this->children->next) {
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INVALID_VOLFILE,
+-           "shard not configured with exactly one sub-volume. "
+-           "Check volfile");
+-    goto out;
+-  }
+-
+-  priv = GF_CALLOC(1, sizeof(shard_priv_t), gf_shard_mt_priv_t);
+-  if (!priv)
+-    goto out;
+-
+-  GF_OPTION_INIT("shard-block-size", priv->block_size, size_uint64, out);
+-
+-  GF_OPTION_INIT("shard-deletion-rate", priv->deletion_rate, uint32, out);
+-
+-  GF_OPTION_INIT("shard-lru-limit", priv->lru_limit, uint64, out);
+-
+-  this->local_pool = mem_pool_new(shard_local_t, 128);
+-  if (!this->local_pool) {
+-    ret = -1;
+-    goto out;
+-  }
+-  gf_uuid_parse(SHARD_ROOT_GFID, priv->dot_shard_gfid);
+-  gf_uuid_parse(DOT_SHARD_REMOVE_ME_GFID, priv->dot_shard_rm_gfid);
+-
+-  this->private = priv;
+-  LOCK_INIT(&priv->lock);
+-  INIT_LIST_HEAD(&priv->ilist_head);
+-  ret = 0;
++    if (!this->parents) {
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INVALID_VOLFILE,
++               "Dangling volume. Check volfile");
++        goto out;
++    }
++
++    if (!this->children || this->children->next) {
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INVALID_VOLFILE,
++               "shard not configured with exactly one sub-volume. "
++               "Check volfile");
++        goto out;
++    }
++
++    priv = GF_CALLOC(1, sizeof(shard_priv_t), gf_shard_mt_priv_t);
++    if (!priv)
++        goto out;
++
++    GF_OPTION_INIT("shard-block-size", priv->block_size, size_uint64, out);
++
++    GF_OPTION_INIT("shard-deletion-rate", priv->deletion_rate, uint32, out);
++
++    GF_OPTION_INIT("shard-lru-limit", priv->lru_limit, uint64, out);
++
++    this->local_pool = mem_pool_new(shard_local_t, 128);
++    if (!this->local_pool) {
++        ret = -1;
++        goto out;
++    }
++    gf_uuid_parse(SHARD_ROOT_GFID, priv->dot_shard_gfid);
++    gf_uuid_parse(DOT_SHARD_REMOVE_ME_GFID, priv->dot_shard_rm_gfid);
++
++    this->private = priv;
++    LOCK_INIT(&priv->lock);
++    INIT_LIST_HEAD(&priv->ilist_head);
++    ret = 0;
+ out:
+-  if (ret) {
+-    GF_FREE(priv);
+-    mem_pool_destroy(this->local_pool);
+-  }
++    if (ret) {
++        GF_FREE(priv);
++        mem_pool_destroy(this->local_pool);
++    }
+ 
+-  return ret;
++    return ret;
+ }
+ 
+-void fini(xlator_t *this) {
+-  shard_priv_t *priv = NULL;
++void
++fini(xlator_t *this)
++{
++    shard_priv_t *priv = NULL;
+ 
+-  GF_VALIDATE_OR_GOTO("shard", this, out);
++    GF_VALIDATE_OR_GOTO("shard", this, out);
+ 
+-  mem_pool_destroy(this->local_pool);
+-  this->local_pool = NULL;
++    mem_pool_destroy(this->local_pool);
++    this->local_pool = NULL;
+ 
+-  priv = this->private;
+-  if (!priv)
+-    goto out;
++    priv = this->private;
++    if (!priv)
++        goto out;
+ 
+-  this->private = NULL;
+-  LOCK_DESTROY(&priv->lock);
+-  GF_FREE(priv);
++    this->private = NULL;
++    LOCK_DESTROY(&priv->lock);
++    GF_FREE(priv);
+ 
+ out:
+-  return;
++    return;
+ }
+ 
+-int reconfigure(xlator_t *this, dict_t *options) {
+-  int ret = -1;
+-  shard_priv_t *priv = NULL;
++int
++reconfigure(xlator_t *this, dict_t *options)
++{
++    int ret = -1;
++    shard_priv_t *priv = NULL;
+ 
+-  priv = this->private;
++    priv = this->private;
+ 
+-  GF_OPTION_RECONF("shard-block-size", priv->block_size, options, size, out);
++    GF_OPTION_RECONF("shard-block-size", priv->block_size, options, size, out);
+ 
+-  GF_OPTION_RECONF("shard-deletion-rate", priv->deletion_rate, options, uint32,
+-                   out);
+-  ret = 0;
++    GF_OPTION_RECONF("shard-deletion-rate", priv->deletion_rate, options,
++                     uint32, out);
++    ret = 0;
+ 
+ out:
+-  return ret;
++    return ret;
+ }
+ 
+-int shard_forget(xlator_t *this, inode_t *inode) {
+-  uint64_t ctx_uint = 0;
+-  shard_inode_ctx_t *ctx = NULL;
+-  shard_priv_t *priv = NULL;
++int
++shard_forget(xlator_t *this, inode_t *inode)
++{
++    uint64_t ctx_uint = 0;
++    shard_inode_ctx_t *ctx = NULL;
++    shard_priv_t *priv = NULL;
+ 
+-  priv = this->private;
+-  if (!priv)
+-    return 0;
++    priv = this->private;
++    if (!priv)
++        return 0;
+ 
+-  inode_ctx_del(inode, this, &ctx_uint);
+-  if (!ctx_uint)
+-    return 0;
++    inode_ctx_del(inode, this, &ctx_uint);
++    if (!ctx_uint)
++        return 0;
+ 
+-  ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
++    ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
+ 
+-  /* When LRU limit reaches inode will be forcefully removed from the
+-   * table, inode needs to be removed from LRU of shard as well.
+-   */
+-  if (!list_empty(&ctx->ilist)) {
+-    LOCK(&priv->lock);
+-    {
+-      list_del_init(&ctx->ilist);
+-      priv->inode_count--;
++    /* When LRU limit reaches inode will be forcefully removed from the
++     * table, inode needs to be removed from LRU of shard as well.
++     */
++    if (!list_empty(&ctx->ilist)) {
++        LOCK(&priv->lock);
++        {
++            list_del_init(&ctx->ilist);
++            priv->inode_count--;
++        }
++        UNLOCK(&priv->lock);
+     }
+-    UNLOCK(&priv->lock);
+-  }
+-  GF_FREE(ctx);
++    GF_FREE(ctx);
+ 
+-  return 0;
++    return 0;
+ }
+ 
+-int shard_release(xlator_t *this, fd_t *fd) {
+-  /* TBD */
+-  return 0;
++int
++shard_release(xlator_t *this, fd_t *fd)
++{
++    /* TBD */
++    return 0;
+ }
+ 
+-int shard_priv_dump(xlator_t *this) {
+-  shard_priv_t *priv = NULL;
+-  char key_prefix[GF_DUMP_MAX_BUF_LEN] = {
+-      0,
+-  };
+-  char *str = NULL;
++int
++shard_priv_dump(xlator_t *this)
++{
++    shard_priv_t *priv = NULL;
++    char key_prefix[GF_DUMP_MAX_BUF_LEN] = {
++        0,
++    };
++    char *str = NULL;
+ 
+-  priv = this->private;
++    priv = this->private;
+ 
+-  snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name);
+-  gf_proc_dump_add_section("%s", key_prefix);
+-  str = gf_uint64_2human_readable(priv->block_size);
+-  gf_proc_dump_write("shard-block-size", "%s", str);
+-  gf_proc_dump_write("inode-count", "%d", priv->inode_count);
+-  gf_proc_dump_write("ilist_head", "%p", &priv->ilist_head);
+-  gf_proc_dump_write("lru-max-limit", "%" PRIu64, priv->lru_limit);
++    snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name);
++    gf_proc_dump_add_section("%s", key_prefix);
++    str = gf_uint64_2human_readable(priv->block_size);
++    gf_proc_dump_write("shard-block-size", "%s", str);
++    gf_proc_dump_write("inode-count", "%d", priv->inode_count);
++    gf_proc_dump_write("ilist_head", "%p", &priv->ilist_head);
++    gf_proc_dump_write("lru-max-limit", "%" PRIu64, priv->lru_limit);
+ 
+-  GF_FREE(str);
++    GF_FREE(str);
+ 
+-  return 0;
++    return 0;
+ }
+ 
+-int shard_releasedir(xlator_t *this, fd_t *fd) { return 0; }
++int
++shard_releasedir(xlator_t *this, fd_t *fd)
++{
++    return 0;
++}
+ 
+ struct xlator_fops fops = {
+     .lookup = shard_lookup,
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0563-features-shard-Use-fd-lookup-post-file-open.patch b/SOURCES/0563-features-shard-Use-fd-lookup-post-file-open.patch
new file mode 100644
index 0000000..c680f92
--- /dev/null
+++ b/SOURCES/0563-features-shard-Use-fd-lookup-post-file-open.patch
@@ -0,0 +1,318 @@
+From a19fa252942938a308ffa655fca3814d0660c6e2 Mon Sep 17 00:00:00 2001
+From: Vinayakswami Hariharmath <vharihar@redhat.com>
+Date: Wed, 3 Jun 2020 18:58:56 +0530
+Subject: [PATCH 563/584] features/shard: Use fd lookup post file open
+
+Issue:
+When a process has the open fd and the same file is
+unlinked in middle of the operations, then file based
+lookup fails with ENOENT or stale file
+
+Solution:
+When the file already open and fd is available, use fstat
+to get the file attributes
+
+Backport of:
+> Upstream-patch: https://review.gluster.org/#/c/glusterfs/+/24528/
+> Change-Id: I0e83aee9f11b616dcfe13769ebfcda6742e4e0f4
+> Fixes: #1281
+> Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
+
+BUG: 1925425
+Change-Id: I0e83aee9f11b616dcfe13769ebfcda6742e4e0f4
+Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244957
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/bugs/shard/issue-1281.t      |  34 +++++++++++
+ xlators/features/shard/src/shard.c | 119 +++++++++++++++++++++++--------------
+ 2 files changed, 110 insertions(+), 43 deletions(-)
+ create mode 100644 tests/bugs/shard/issue-1281.t
+
+diff --git a/tests/bugs/shard/issue-1281.t b/tests/bugs/shard/issue-1281.t
+new file mode 100644
+index 0000000..9704caa
+--- /dev/null
++++ b/tests/bugs/shard/issue-1281.t
+@@ -0,0 +1,34 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++
++cleanup;
++
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1}
++TEST $CLI volume set $V0 features.shard on
++TEST $CLI volume set $V0 performance.quick-read off
++TEST $CLI volume set $V0 performance.io-cache off
++TEST $CLI volume set $V0 performance.read-ahead off
++TEST $CLI volume set $V0 performance.write-behind off
++TEST $CLI volume start $V0
++
++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
++
++#Open a file and store descriptor in fd = 5
++exec 5>$M0/foo
++
++#Unlink the same file which is opened in prev step
++TEST unlink $M0/foo
++
++#Write something on the file using the open fd = 5
++echo "issue-1281" >&5
++
++#Write on the descriptor should be succesful
++EXPECT 0 echo $?
++
++#Close the fd = 5
++exec 5>&-
++
++cleanup
+diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
+index c5cc224..2ba4528 100644
+--- a/xlators/features/shard/src/shard.c
++++ b/xlators/features/shard/src/shard.c
+@@ -1653,26 +1653,24 @@ err:
+ }
+ 
+ int
+-shard_lookup_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                           int32_t op_ret, int32_t op_errno, inode_t *inode,
+-                           struct iatt *buf, dict_t *xdata,
+-                           struct iatt *postparent)
++shard_set_iattr_invoke_post_handler(call_frame_t *frame, xlator_t *this,
++                                    inode_t *inode, int32_t op_ret,
++                                    int32_t op_errno, struct iatt *buf,
++                                    dict_t *xdata)
+ {
+     int ret = -1;
+     int32_t mask = SHARD_INODE_WRITE_MASK;
+-    shard_local_t *local = NULL;
++    shard_local_t *local = frame->local;
+     shard_inode_ctx_t ctx = {
+         0,
+     };
+ 
+-    local = frame->local;
+-
+     if (op_ret < 0) {
+         gf_msg(this->name, GF_LOG_ERROR, op_errno,
+                SHARD_MSG_BASE_FILE_LOOKUP_FAILED,
+                "Lookup on base file"
+                " failed : %s",
+-               loc_gfid_utoa(&(local->loc)));
++               uuid_utoa(inode->gfid));
+         local->op_ret = op_ret;
+         local->op_errno = op_errno;
+         goto unwind;
+@@ -1706,18 +1704,57 @@ unwind:
+ }
+ 
+ int
+-shard_lookup_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc,
+-                       shard_post_fop_handler_t handler)
++shard_fstat_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                          int32_t op_ret, int32_t op_errno, struct iatt *buf,
++                          dict_t *xdata)
++{
++    shard_local_t *local = frame->local;
++
++    shard_set_iattr_invoke_post_handler(frame, this, local->fd->inode, op_ret,
++                                        op_errno, buf, xdata);
++    return 0;
++}
++
++int
++shard_lookup_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                           int32_t op_ret, int32_t op_errno, inode_t *inode,
++                           struct iatt *buf, dict_t *xdata,
++                           struct iatt *postparent)
++{
++    /* In case of op_ret < 0, inode passed to this function will be NULL
++       ex: in case of op_errno = ENOENT. So refer prefilled inode data
++       which is part of local.
++       Note: Reassigning/overriding the inode passed to this cbk with inode
++       which is part of *struct shard_local_t* won't cause any issue as
++       both inodes have same reference/address as of the inode passed */
++    inode = ((shard_local_t *)frame->local)->loc.inode;
++
++    shard_set_iattr_invoke_post_handler(frame, this, inode, op_ret, op_errno,
++                                        buf, xdata);
++    return 0;
++}
++
++/* This function decides whether to make file based lookup or
++ * fd based lookup (fstat) depending on the 3rd and 4th arg.
++ * If fd != NULL and loc == NULL then call is for fstat
++ * If fd == NULL and loc != NULL then call is for file based
++ * lookup. Please pass args based on the requirement.
++ */
++int
++shard_refresh_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc,
++                        fd_t *fd, shard_post_fop_handler_t handler)
+ {
+     int ret = -1;
++    inode_t *inode = NULL;
+     shard_local_t *local = NULL;
+     dict_t *xattr_req = NULL;
+     gf_boolean_t need_refresh = _gf_false;
+ 
+     local = frame->local;
+     local->handler = handler;
++    inode = fd ? fd->inode : loc->inode;
+ 
+-    ret = shard_inode_ctx_fill_iatt_from_cache(loc->inode, this, &local->prebuf,
++    ret = shard_inode_ctx_fill_iatt_from_cache(inode, this, &local->prebuf,
+                                                &need_refresh);
+     /* By this time, inode ctx should have been created either in create,
+      * mknod, readdirp or lookup. If not it is a bug!
+@@ -1726,7 +1763,7 @@ shard_lookup_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc,
+         gf_msg_debug(this->name, 0,
+                      "Skipping lookup on base file: %s"
+                      "Serving prebuf off the inode ctx cache",
+-                     uuid_utoa(loc->gfid));
++                     uuid_utoa(inode->gfid));
+         goto out;
+     }
+ 
+@@ -1737,10 +1774,14 @@ shard_lookup_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc,
+         goto out;
+     }
+ 
+-    SHARD_MD_READ_FOP_INIT_REQ_DICT(this, xattr_req, loc->gfid, local, out);
++    SHARD_MD_READ_FOP_INIT_REQ_DICT(this, xattr_req, inode->gfid, local, out);
+ 
+-    STACK_WIND(frame, shard_lookup_base_file_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
++    if (fd)
++        STACK_WIND(frame, shard_fstat_base_file_cbk, FIRST_CHILD(this),
++                   FIRST_CHILD(this)->fops->fstat, fd, xattr_req);
++    else
++        STACK_WIND(frame, shard_lookup_base_file_cbk, FIRST_CHILD(this),
++                   FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
+ 
+     dict_unref(xattr_req);
+     return 0;
+@@ -2718,8 +2759,8 @@ shard_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+     local->resolver_base_inode = loc->inode;
+     GF_ATOMIC_INIT(local->delta_blocks, 0);
+ 
+-    shard_lookup_base_file(frame, this, &local->loc,
+-                           shard_post_lookup_truncate_handler);
++    shard_refresh_base_file(frame, this, &local->loc, NULL,
++                            shard_post_lookup_truncate_handler);
+     return 0;
+ 
+ err:
+@@ -2774,8 +2815,8 @@ shard_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+     local->resolver_base_inode = fd->inode;
+     GF_ATOMIC_INIT(local->delta_blocks, 0);
+ 
+-    shard_lookup_base_file(frame, this, &local->loc,
+-                           shard_post_lookup_truncate_handler);
++    shard_refresh_base_file(frame, this, NULL, fd,
++                            shard_post_lookup_truncate_handler);
+     return 0;
+ err:
+     shard_common_failure_unwind(GF_FOP_FTRUNCATE, frame, -1, ENOMEM);
+@@ -2919,8 +2960,8 @@ shard_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+     if (!local->xattr_req)
+         goto err;
+ 
+-    shard_lookup_base_file(frame, this, &local->loc,
+-                           shard_post_lookup_link_handler);
++    shard_refresh_base_file(frame, this, &local->loc, NULL,
++                            shard_post_lookup_link_handler);
+     return 0;
+ err:
+     shard_common_failure_unwind(GF_FOP_LINK, frame, -1, ENOMEM);
+@@ -4249,8 +4290,8 @@ shard_post_inodelk_fop_handler(call_frame_t *frame, xlator_t *this)
+     switch (local->fop) {
+         case GF_FOP_UNLINK:
+         case GF_FOP_RENAME:
+-            shard_lookup_base_file(frame, this, &local->int_inodelk.loc,
+-                                   shard_post_lookup_base_shard_rm_handler);
++            shard_refresh_base_file(frame, this, &local->int_inodelk.loc, NULL,
++                                    shard_post_lookup_base_shard_rm_handler);
+             break;
+         default:
+             gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
+@@ -4505,8 +4546,8 @@ shard_rename_src_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+     if (local->block_size) {
+         local->tmp_loc.inode = inode_new(this->itable);
+         gf_uuid_copy(local->tmp_loc.gfid, (local->loc.inode)->gfid);
+-        shard_lookup_base_file(frame, this, &local->tmp_loc,
+-                               shard_post_rename_lookup_handler);
++        shard_refresh_base_file(frame, this, &local->tmp_loc, NULL,
++                                shard_post_rename_lookup_handler);
+     } else {
+         shard_rename_cbk(frame, this);
+     }
+@@ -5242,8 +5283,8 @@ shard_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+     local->loc.inode = inode_ref(fd->inode);
+     gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
+ 
+-    shard_lookup_base_file(frame, this, &local->loc,
+-                           shard_post_lookup_readv_handler);
++    shard_refresh_base_file(frame, this, NULL, fd,
++                            shard_post_lookup_readv_handler);
+     return 0;
+ err:
+     shard_common_failure_unwind(GF_FOP_READ, frame, -1, ENOMEM);
+@@ -6046,8 +6087,8 @@ shard_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+     local->loc.inode = inode_ref(fd->inode);
+     gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
+ 
+-    shard_lookup_base_file(frame, this, &local->loc,
+-                           shard_post_lookup_fsync_handler);
++    shard_refresh_base_file(frame, this, NULL, fd,
++                            shard_post_lookup_fsync_handler);
+     return 0;
+ err:
+     shard_common_failure_unwind(GF_FOP_FSYNC, frame, -1, ENOMEM);
+@@ -6420,12 +6461,8 @@ shard_common_remove_xattr(call_frame_t *frame, xlator_t *this,
+     if (xdata)
+         local->xattr_req = dict_ref(xdata);
+ 
+-    /* To-Do: Switch from LOOKUP which is path-based, to FSTAT if the fop is
+-     * on an fd. This comes under a generic class of bugs in shard tracked by
+-     * bz #1782428.
+-     */
+-    shard_lookup_base_file(frame, this, &local->loc,
+-                           shard_post_lookup_remove_xattr_handler);
++    shard_refresh_base_file(frame, this, loc, fd,
++                            shard_post_lookup_remove_xattr_handler);
+     return 0;
+ err:
+     shard_common_failure_unwind(fop, frame, -1, op_errno);
+@@ -6662,12 +6699,8 @@ shard_common_set_xattr(call_frame_t *frame, xlator_t *this, glusterfs_fop_t fop,
+     if (xdata)
+         local->xattr_rsp = dict_ref(xdata);
+ 
+-    /* To-Do: Switch from LOOKUP which is path-based, to FSTAT if the fop is
+-     * on an fd. This comes under a generic class of bugs in shard tracked by
+-     * bz #1782428.
+-     */
+-    shard_lookup_base_file(frame, this, &local->loc,
+-                           shard_post_lookup_set_xattr_handler);
++    shard_refresh_base_file(frame, this, loc, fd,
++                            shard_post_lookup_set_xattr_handler);
+     return 0;
+ err:
+     shard_common_failure_unwind(fop, frame, -1, op_errno);
+@@ -6951,8 +6984,8 @@ shard_common_inode_write_begin(call_frame_t *frame, xlator_t *this,
+     local->loc.inode = inode_ref(fd->inode);
+     gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
+ 
+-    shard_lookup_base_file(frame, this, &local->loc,
+-                           shard_common_inode_write_post_lookup_handler);
++    shard_refresh_base_file(frame, this, NULL, fd,
++                            shard_common_inode_write_post_lookup_handler);
+     return 0;
+ out:
+     shard_common_failure_unwind(fop, frame, -1, ENOMEM);
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0564-store.c-glusterd-store.c-remove-sys_stat-calls.patch b/SOURCES/0564-store.c-glusterd-store.c-remove-sys_stat-calls.patch
new file mode 100644
index 0000000..35cda2e
--- /dev/null
+++ b/SOURCES/0564-store.c-glusterd-store.c-remove-sys_stat-calls.patch
@@ -0,0 +1,215 @@
+From a7a56c079df2eb0253efdd53e1538656c0ce9095 Mon Sep 17 00:00:00 2001
+From: Yaniv Kaul <ykaul@redhat.com>
+Date: Mon, 25 Nov 2019 15:37:46 +0200
+Subject: [PATCH 564/584] store.c/glusterd-store.c: remove sys_stat calls
+
+Instead of querying for the file size and allocating a char array
+according to its size, let's just use a fixed size.
+Those calls are not really needed, and are either expensive or
+cached anyway. Since we do dynamic allocation/free, let's just use
+a fixed array instead.
+
+I'll see if there are other sys_stat() calls that are not really
+useful and try to eliminate them in separate patches.
+
+Backport of:
+> Upstream-patch: https://review.gluster.org/#/c/glusterfs/+/23752/
+> Change-Id: I76b40e78a52ab38f613fc0cdef4be60e6253bf20
+> updates: bz#1193929
+> Signed-off-by: Yaniv Kaul <ykaul@redhat.com>
+
+BUG: 1925425
+Change-Id: I76b40e78a52ab38f613fc0cdef4be60e6253bf20
+Signed-off-by: Yaniv Kaul <ykaul@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244958
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/glusterfs/store.h         |  4 +-
+ libglusterfs/src/store.c                   | 71 ++++--------------------------
+ xlators/mgmt/glusterd/src/glusterd-store.c |  5 +--
+ 3 files changed, 12 insertions(+), 68 deletions(-)
+
+diff --git a/libglusterfs/src/glusterfs/store.h b/libglusterfs/src/glusterfs/store.h
+index 3b3a24c..f63bd05 100644
+--- a/libglusterfs/src/glusterfs/store.h
++++ b/libglusterfs/src/glusterfs/store.h
+@@ -59,8 +59,8 @@ int32_t
+ gf_store_unlink_tmppath(gf_store_handle_t *shandle);
+ 
+ int
+-gf_store_read_and_tokenize(FILE *file, char *str, int size, char **iter_key,
+-                           char **iter_val, gf_store_op_errno_t *store_errno);
++gf_store_read_and_tokenize(FILE *file, char **iter_key, char **iter_val,
++                           gf_store_op_errno_t *store_errno);
+ 
+ int32_t
+ gf_store_retrieve_value(gf_store_handle_t *handle, char *key, char **value);
+diff --git a/libglusterfs/src/store.c b/libglusterfs/src/store.c
+index cdf0aea..fa3649b 100644
+--- a/libglusterfs/src/store.c
++++ b/libglusterfs/src/store.c
+@@ -184,8 +184,8 @@ out:
+ }
+ 
+ int
+-gf_store_read_and_tokenize(FILE *file, char *str, int size, char **iter_key,
+-                           char **iter_val, gf_store_op_errno_t *store_errno)
++gf_store_read_and_tokenize(FILE *file, char **iter_key, char **iter_val,
++                           gf_store_op_errno_t *store_errno)
+ {
+     int32_t ret = -1;
+     char *savetok = NULL;
+@@ -193,15 +193,15 @@ gf_store_read_and_tokenize(FILE *file, char *str, int size, char **iter_key,
+     char *value = NULL;
+     char *temp = NULL;
+     size_t str_len = 0;
++    char str[8192];
+ 
+     GF_ASSERT(file);
+-    GF_ASSERT(str);
+     GF_ASSERT(iter_key);
+     GF_ASSERT(iter_val);
+     GF_ASSERT(store_errno);
+ 
+ retry:
+-    temp = fgets(str, size, file);
++    temp = fgets(str, 8192, file);
+     if (temp == NULL || feof(file)) {
+         ret = -1;
+         *store_errno = GD_STORE_EOF;
+@@ -241,13 +241,8 @@ int32_t
+ gf_store_retrieve_value(gf_store_handle_t *handle, char *key, char **value)
+ {
+     int32_t ret = -1;
+-    char *scan_str = NULL;
+     char *iter_key = NULL;
+     char *iter_val = NULL;
+-    char *free_str = NULL;
+-    struct stat st = {
+-        0,
+-    };
+     gf_store_op_errno_t store_errno = GD_STORE_SUCCESS;
+ 
+     GF_ASSERT(handle);
+@@ -279,32 +274,9 @@ gf_store_retrieve_value(gf_store_handle_t *handle, char *key, char **value)
+     } else {
+         fseek(handle->read, 0, SEEK_SET);
+     }
+-    ret = sys_fstat(handle->fd, &st);
+-    if (ret < 0) {
+-        gf_msg("", GF_LOG_WARNING, errno, LG_MSG_FILE_OP_FAILED,
+-               "stat on file %s failed", handle->path);
+-        ret = -1;
+-        store_errno = GD_STORE_STAT_FAILED;
+-        goto out;
+-    }
+-
+-    /* "st.st_size + 1" is used as we are fetching each
+-     * line of a file using fgets, fgets will append "\0"
+-     * to the end of the string
+-     */
+-    scan_str = GF_CALLOC(1, st.st_size + 1, gf_common_mt_char);
+-
+-    if (scan_str == NULL) {
+-        ret = -1;
+-        store_errno = GD_STORE_ENOMEM;
+-        goto out;
+-    }
+-
+-    free_str = scan_str;
+-
+     do {
+-        ret = gf_store_read_and_tokenize(handle->read, scan_str, st.st_size + 1,
+-                                         &iter_key, &iter_val, &store_errno);
++        ret = gf_store_read_and_tokenize(handle->read, &iter_key, &iter_val,
++                                         &store_errno);
+         if (ret < 0) {
+             gf_msg_trace("", 0,
+                          "error while reading key '%s': "
+@@ -334,8 +306,6 @@ out:
+         sys_close(handle->fd);
+     }
+ 
+-    GF_FREE(free_str);
+-
+     return ret;
+ }
+ 
+@@ -561,40 +531,16 @@ gf_store_iter_get_next(gf_store_iter_t *iter, char **key, char **value,
+                        gf_store_op_errno_t *op_errno)
+ {
+     int32_t ret = -1;
+-    char *scan_str = NULL;
+     char *iter_key = NULL;
+     char *iter_val = NULL;
+-    struct stat st = {
+-        0,
+-    };
+     gf_store_op_errno_t store_errno = GD_STORE_SUCCESS;
+ 
+     GF_ASSERT(iter);
+     GF_ASSERT(key);
+     GF_ASSERT(value);
+ 
+-    ret = sys_stat(iter->filepath, &st);
+-    if (ret < 0) {
+-        gf_msg("", GF_LOG_WARNING, errno, LG_MSG_FILE_OP_FAILED,
+-               "stat on file failed");
+-        ret = -1;
+-        store_errno = GD_STORE_STAT_FAILED;
+-        goto out;
+-    }
+-
+-    /* "st.st_size + 1" is used as we are fetching each
+-     * line of a file using fgets, fgets will append "\0"
+-     * to the end of the string
+-     */
+-    scan_str = GF_CALLOC(1, st.st_size + 1, gf_common_mt_char);
+-    if (!scan_str) {
+-        ret = -1;
+-        store_errno = GD_STORE_ENOMEM;
+-        goto out;
+-    }
+-
+-    ret = gf_store_read_and_tokenize(iter->file, scan_str, st.st_size + 1,
+-                                     &iter_key, &iter_val, &store_errno);
++    ret = gf_store_read_and_tokenize(iter->file, &iter_key, &iter_val,
++                                     &store_errno);
+     if (ret < 0) {
+         goto out;
+     }
+@@ -619,7 +565,6 @@ gf_store_iter_get_next(gf_store_iter_t *iter, char **key, char **value,
+     ret = 0;
+ 
+ out:
+-    GF_FREE(scan_str);
+     if (ret) {
+         GF_FREE(*key);
+         GF_FREE(*value);
+diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c
+index 4fa8116..da63c03 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-store.c
++++ b/xlators/mgmt/glusterd/src/glusterd-store.c
+@@ -4092,7 +4092,6 @@ out:
+ int32_t
+ glusterd_store_retrieve_missed_snaps_list(xlator_t *this)
+ {
+-    char buf[PATH_MAX] = "";
+     char path[PATH_MAX] = "";
+     char *snap_vol_id = NULL;
+     char *missed_node_info = NULL;
+@@ -4129,8 +4128,8 @@ glusterd_store_retrieve_missed_snaps_list(xlator_t *this)
+     }
+ 
+     do {
+-        ret = gf_store_read_and_tokenize(
+-            fp, buf, sizeof(buf), &missed_node_info, &value, &store_errno);
++        ret = gf_store_read_and_tokenize(fp, &missed_node_info, &value,
++                                         &store_errno);
+         if (ret) {
+             if (store_errno == GD_STORE_EOF) {
+                 gf_msg_debug(this->name, 0, "EOF for missed_snap_list");
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0565-libglusterfs-coverity-pointer-to-local-outside-the-s.patch b/SOURCES/0565-libglusterfs-coverity-pointer-to-local-outside-the-s.patch
new file mode 100644
index 0000000..5e91703
--- /dev/null
+++ b/SOURCES/0565-libglusterfs-coverity-pointer-to-local-outside-the-s.patch
@@ -0,0 +1,124 @@
+From d491843640658e91a77f15647cefd1c00422c731 Mon Sep 17 00:00:00 2001
+From: Vinayak hariharmath <65405035+VHariharmath-rh@users.noreply.github.com>
+Date: Wed, 21 Oct 2020 16:14:29 +0530
+Subject: [PATCH 565/584] libglusterfs/coverity: pointer to local outside the
+ scope
+
+issue: gf_store_read_and_tokenize() returns the address
+of the locally referred string.
+
+fix: pass the buf to gf_store_read_and_tokenize() and
+use it for tokenize.
+
+CID: 1430143
+
+Backport of:
+> Upstream-patch: https://github.com/gluster/glusterfs/pull/1675
+> Updates: #1060
+> Change-Id: Ifc346540c263f58f4014ba2ba8c1d491c20ac609
+> Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
+
+BUG: 1925425
+Change-Id: Ifc346540c263f58f4014ba2ba8c1d491c20ac609
+Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244959
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/glusterfs/store.h         |  3 ++-
+ libglusterfs/src/store.c                   | 13 ++++++++-----
+ xlators/mgmt/glusterd/src/glusterd-store.c |  3 ++-
+ 3 files changed, 12 insertions(+), 7 deletions(-)
+
+diff --git a/libglusterfs/src/glusterfs/store.h b/libglusterfs/src/glusterfs/store.h
+index f63bd05..68a20ad 100644
+--- a/libglusterfs/src/glusterfs/store.h
++++ b/libglusterfs/src/glusterfs/store.h
+@@ -60,7 +60,8 @@ gf_store_unlink_tmppath(gf_store_handle_t *shandle);
+ 
+ int
+ gf_store_read_and_tokenize(FILE *file, char **iter_key, char **iter_val,
+-                           gf_store_op_errno_t *store_errno);
++                           gf_store_op_errno_t *store_errno, char *str,
++                           size_t buf_size);
+ 
+ int32_t
+ gf_store_retrieve_value(gf_store_handle_t *handle, char *key, char **value);
+diff --git a/libglusterfs/src/store.c b/libglusterfs/src/store.c
+index fa3649b..3af627a 100644
+--- a/libglusterfs/src/store.c
++++ b/libglusterfs/src/store.c
+@@ -185,7 +185,8 @@ out:
+ 
+ int
+ gf_store_read_and_tokenize(FILE *file, char **iter_key, char **iter_val,
+-                           gf_store_op_errno_t *store_errno)
++                           gf_store_op_errno_t *store_errno, char *str,
++                           size_t buf_size)
+ {
+     int32_t ret = -1;
+     char *savetok = NULL;
+@@ -193,7 +194,6 @@ gf_store_read_and_tokenize(FILE *file, char **iter_key, char **iter_val,
+     char *value = NULL;
+     char *temp = NULL;
+     size_t str_len = 0;
+-    char str[8192];
+ 
+     GF_ASSERT(file);
+     GF_ASSERT(iter_key);
+@@ -201,7 +201,7 @@ gf_store_read_and_tokenize(FILE *file, char **iter_key, char **iter_val,
+     GF_ASSERT(store_errno);
+ 
+ retry:
+-    temp = fgets(str, 8192, file);
++    temp = fgets(str, buf_size, file);
+     if (temp == NULL || feof(file)) {
+         ret = -1;
+         *store_errno = GD_STORE_EOF;
+@@ -275,8 +275,9 @@ gf_store_retrieve_value(gf_store_handle_t *handle, char *key, char **value)
+         fseek(handle->read, 0, SEEK_SET);
+     }
+     do {
++        char buf[8192];
+         ret = gf_store_read_and_tokenize(handle->read, &iter_key, &iter_val,
+-                                         &store_errno);
++                                         &store_errno, buf, 8192);
+         if (ret < 0) {
+             gf_msg_trace("", 0,
+                          "error while reading key '%s': "
+@@ -533,6 +534,8 @@ gf_store_iter_get_next(gf_store_iter_t *iter, char **key, char **value,
+     int32_t ret = -1;
+     char *iter_key = NULL;
+     char *iter_val = NULL;
++    char buf[8192];
++
+     gf_store_op_errno_t store_errno = GD_STORE_SUCCESS;
+ 
+     GF_ASSERT(iter);
+@@ -540,7 +543,7 @@ gf_store_iter_get_next(gf_store_iter_t *iter, char **key, char **value,
+     GF_ASSERT(value);
+ 
+     ret = gf_store_read_and_tokenize(iter->file, &iter_key, &iter_val,
+-                                     &store_errno);
++                                     &store_errno, buf, 8192);
+     if (ret < 0) {
+         goto out;
+     }
+diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c
+index da63c03..a8651d8 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-store.c
++++ b/xlators/mgmt/glusterd/src/glusterd-store.c
+@@ -4128,8 +4128,9 @@ glusterd_store_retrieve_missed_snaps_list(xlator_t *this)
+     }
+ 
+     do {
++        char buf[8192];
+         ret = gf_store_read_and_tokenize(fp, &missed_node_info, &value,
+-                                         &store_errno);
++                                         &store_errno, buf, 8192);
+         if (ret) {
+             if (store_errno == GD_STORE_EOF) {
+                 gf_msg_debug(this->name, 0, "EOF for missed_snap_list");
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0566-enahancement-debug-Option-to-generate-core-dump-with.patch b/SOURCES/0566-enahancement-debug-Option-to-generate-core-dump-with.patch
new file mode 100644
index 0000000..548271e
--- /dev/null
+++ b/SOURCES/0566-enahancement-debug-Option-to-generate-core-dump-with.patch
@@ -0,0 +1,236 @@
+From e66ab728426e147bf4fc594109137ebfb1f2dda6 Mon Sep 17 00:00:00 2001
+From: Vinayak hariharmath <65405035+VHariharmath-rh@users.noreply.github.com>
+Date: Mon, 23 Nov 2020 08:09:44 +0530
+Subject: [PATCH 566/584] enahancement/debug: Option to generate core dump
+ without killing the process
+
+Comments and idea proposed by: Xavi Hernandez(jahernan@redhat.com):
+
+On production systems sometimes we see a log message saying that an assertion
+has failed. But it's hard to track why it failed without additional information
+(on debug builds, a GF_ASSERT() generates a core dump and kills the process,
+so it can be used to debug the issue, but many times we are only able to
+reproduce assertion failures on production systems, where GF_ASSERT() only logs
+a message and continues).
+
+In other cases we may have a core dump caused by a bug, but the core dump doesn't
+necessarily happen when the bug has happened. Sometimes the crash happens so much
+later that the causes that triggered the bug are lost. In these cases we can add
+more assertions to the places that touch the potential candidates to cause the bug,
+but the only thing we'll get is a log message, which may not be enough.
+
+One solution would be to always generate a core dump in case of assertion failure,
+but this was already discussed and it was decided that it was too drastic. If a
+core dump was really needed, a new macro was created to do so: GF_ABORT(),
+but GF_ASSERT() would continue to not kill the process on production systems.
+
+I'm proposing to modify GF_ASSERT() on production builds so that it conditionally
+triggers a signal when a debugger is attached. When this happens, the debugger
+will generate a core dump and continue the process as if nothing had happened.
+If there's no debugger attached, GF_ASSERT() will behave as always.
+
+The idea I have is to use SIGCONT to do that. This signal is harmless, so we can
+unmask it (we currently mask all unneeded signals) and raise it inside a GF_ASSERT()
+when some global variable is set to true.
+
+To produce the core dump, run the script under extras/debug/gfcore.py on other
+terminal. gdb breaks and produces coredump when GF_ASSERT is hit.
+
+The script is copied from #1810 which is written by Xavi Hernandez(jahernan@redhat.com)
+
+Backport of:
+> Upstream-patch: https://github.com/gluster/glusterfs/pull/1814
+> Fixes: #1810
+> Change-Id: I6566ca2cae15501d8835c36f56be4c6950cb2a53
+> Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
+
+BUG: 1927640
+Change-Id: I6566ca2cae15501d8835c36f56be4c6950cb2a53
+Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244960
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ extras/debug/gfcore.py                    | 77 +++++++++++++++++++++++++++++++
+ libglusterfs/src/common-utils.c           | 11 +++++
+ libglusterfs/src/glusterfs/common-utils.h | 10 +++-
+ libglusterfs/src/libglusterfs.sym         | 16 +++++++
+ 4 files changed, 112 insertions(+), 2 deletions(-)
+ create mode 100755 extras/debug/gfcore.py
+
+diff --git a/extras/debug/gfcore.py b/extras/debug/gfcore.py
+new file mode 100755
+index 0000000..9f097f0
+--- /dev/null
++++ b/extras/debug/gfcore.py
+@@ -0,0 +1,77 @@
++#!/usr/bin/env python3
++
++def launch():
++    if len(sys.argv) < 3:
++        sys.stderr.write("Syntax: {} <pid> <count> [<dir>]\n".format(os.path.basename(sys.argv[0])))
++        sys.exit(1)
++
++    pid = int(sys.argv[1])
++    count = int(sys.argv[2])
++    base = os.getcwd()
++    if len(sys.argv) > 3:
++        base = sys.argv[3]
++    base = os.path.realpath(base)
++
++    subprocess.run([
++        "gdb", "-batch",
++        "-p", str(pid),
++        "-ex", "py arg_count = {}".format(count),
++        "-ex", "py arg_dir = '{}'".format(base),
++        "-x", __file__
++    ])
++
++class GFCore(object):
++    def __init__(self, count, base):
++        self.count = count
++        self.base = base
++        gdb.execute('set pagination off')
++        gdb.execute('set gf_signal_on_assert = 1')
++        gdb.events.stop.connect(self.gf_stop)
++
++        self.cont()
++
++    def cont(self, quit = False):
++        if not(quit) and (self.count > 0):
++            gdb.execute('continue')
++        else:
++            gdb.execute('set gf_signal_on_assert = 0')
++            gdb.execute('quit')
++
++    def gf_stop(self, event):
++        quit = False
++
++        if isinstance(event, gdb.SignalEvent):
++            if event.stop_signal == 'SIGCONT':
++                now = datetime.utcnow().isoformat()
++                pid = gdb.selected_inferior().pid
++                name = "{}/gfcore.{}.{}".format(self.base, pid, now)
++                print("Generating coredump '{}'".format(name))
++                gdb.execute('gcore {}'.format(name))
++                self.count -= 1
++
++            elif event.stop_signal == 'SIGINT':
++                print("SIGINT received. Exiting")
++                quit = True
++
++            else:
++                print("Ignoring signal {}".format(event.stop_signal))
++        else:
++            print("Unexpected event {}".format(type(event)))
++
++        self.cont(quit)
++
++# Module 'gdb' is not available when running outside gdb.
++try:
++    import gdb
++    from datetime import datetime
++
++    GFCore(arg_count, arg_dir)
++except ModuleNotFoundError:
++    import sys
++    import os
++    import subprocess
++
++    try:
++        launch()
++    except KeyboardInterrupt:
++        pass
+diff --git a/libglusterfs/src/common-utils.c b/libglusterfs/src/common-utils.c
+index 70d5d21..d351b93 100644
+--- a/libglusterfs/src/common-utils.c
++++ b/libglusterfs/src/common-utils.c
+@@ -77,9 +77,19 @@ char *vol_type_str[] = {
+     "Distributed-Disperse",
+ };
+ 
++gf_boolean_t gf_signal_on_assert = false;
++
+ typedef int32_t (*rw_op_t)(int32_t fd, char *buf, int32_t size);
+ typedef int32_t (*rwv_op_t)(int32_t fd, const struct iovec *buf, int32_t size);
+ 
++void gf_assert(void)
++{
++    if (gf_signal_on_assert) {
++        raise(SIGCONT);
++    }
++
++}
++
+ void
+ gf_xxh64_wrapper(const unsigned char *data, size_t const len,
+                  unsigned long long const seed, char *xxh64)
+@@ -4021,6 +4031,7 @@ gf_thread_vcreate(pthread_t *thread, const pthread_attr_t *attr,
+     sigdelset(&set, SIGSYS);
+     sigdelset(&set, SIGFPE);
+     sigdelset(&set, SIGABRT);
++    sigdelset(&set, SIGCONT);
+ 
+     pthread_sigmask(SIG_BLOCK, &set, &old);
+ 
+diff --git a/libglusterfs/src/glusterfs/common-utils.h b/libglusterfs/src/glusterfs/common-utils.h
+index f0a0a41..604afd0 100644
+--- a/libglusterfs/src/glusterfs/common-utils.h
++++ b/libglusterfs/src/glusterfs/common-utils.h
+@@ -25,6 +25,7 @@
+ #include <limits.h>
+ #include <fnmatch.h>
+ #include <uuid/uuid.h>
++#include <urcu/compiler.h>
+ 
+ #ifndef ffsll
+ #define ffsll(x) __builtin_ffsll(x)
+@@ -431,14 +432,19 @@ BIT_VALUE(unsigned char *array, unsigned int index)
+ #define GF_FILE_CONTENT_REQUESTED(_xattr_req, _content_limit)                  \
+     (dict_get_uint64(_xattr_req, "glusterfs.content", _content_limit) == 0)
+ 
++void gf_assert(void);
++
+ #ifdef DEBUG
+ #define GF_ASSERT(x) assert(x);
+ #else
+ #define GF_ASSERT(x)                                                           \
+     do {                                                                       \
+-        if (!(x)) {                                                            \
++        if (caa_unlikely(!(x))) {                                              \
++            gf_assert();                                                       \
+             gf_msg_callingfn("", GF_LOG_ERROR, 0, LG_MSG_ASSERTION_FAILED,     \
+-                             "Assertion failed: " #x);                         \
++                             "Assertion failed: To attach gdb and coredump,"   \
++                             " Run the script under "                          \
++                             "\"glusterfs/extras/debug/gfcore.py\"");          \
+         }                                                                      \
+     } while (0)
+ #endif
+diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym
+index 0a0862e..9072afa 100644
+--- a/libglusterfs/src/libglusterfs.sym
++++ b/libglusterfs/src/libglusterfs.sym
+@@ -1167,3 +1167,19 @@ gf_changelog_register_generic
+ gf_gfid_generate_from_xxh64
+ find_xlator_option_in_cmd_args_t
+ gf_d_type_from_ia_type
++glusterfs_graph_fini
++glusterfs_process_svc_attach_volfp
++glusterfs_mux_volfile_reconfigure
++glusterfs_process_svc_detach
++mgmt_is_multiplexed_daemon
++xlator_is_cleanup_starting
++gf_nanosleep
++gf_syncfs
++graph_total_client_xlator
++get_xattrs_to_heal
++gf_latency_statedump_and_reset
++gf_latency_new
++gf_latency_reset
++gf_latency_update
++gf_frame_latency_update
++gf_assert
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0567-inode-create-inode-outside-locked-region.patch b/SOURCES/0567-inode-create-inode-outside-locked-region.patch
new file mode 100644
index 0000000..23d51c4
--- /dev/null
+++ b/SOURCES/0567-inode-create-inode-outside-locked-region.patch
@@ -0,0 +1,86 @@
+From 5c81d813c8b1f494d31d54c1ab09a3f0153ebfd4 Mon Sep 17 00:00:00 2001
+From: Amar Tumballi <amarts@redhat.com>
+Date: Sat, 9 Feb 2019 13:13:47 +0530
+Subject: [PATCH 567/584] inode: create inode outside locked region
+
+Only linking of inode to the table, and inserting it in
+a list needs to be in locked region.
+
+Backport of:
+> Upstream-patch: https://review.gluster.org/#/c/glusterfs/+/22183/
+> Updates: bz#1670031
+> Change-Id: I6ea7e956b80cf2765c2233d761909c4bf9c7253c
+> Signed-off-by: Amar Tumballi <amarts@redhat.com>
+
+BUG: 1927640
+Change-Id: I6ea7e956b80cf2765c2233d761909c4bf9c7253c
+Signed-off-by: Amar Tumballi <amarts@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244961
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/inode.c | 23 ++++++++++++-----------
+ 1 file changed, 12 insertions(+), 11 deletions(-)
+
+diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c
+index 98f8ea6..46db04f 100644
+--- a/libglusterfs/src/inode.c
++++ b/libglusterfs/src/inode.c
+@@ -620,7 +620,7 @@ out:
+ }
+ 
+ static inode_t *
+-__inode_create(inode_table_t *table)
++inode_create(inode_table_t *table)
+ {
+     inode_t *newi = NULL;
+ 
+@@ -647,11 +647,7 @@ __inode_create(inode_table_t *table)
+         goto out;
+     }
+ 
+-    list_add(&newi->list, &table->lru);
+-    table->lru_size++;
+-
+ out:
+-
+     return newi;
+ }
+ 
+@@ -668,14 +664,16 @@ inode_new(inode_table_t *table)
+         return NULL;
+     }
+ 
+-    pthread_mutex_lock(&table->lock);
+-    {
+-        inode = __inode_create(table);
+-        if (inode != NULL) {
++    inode = inode_create(table);
++    if (inode) {
++        pthread_mutex_lock(&table->lock);
++        {
++            list_add(&inode->list, &table->lru);
++            table->lru_size++;
+             __inode_ref(inode, false);
+         }
++        pthread_mutex_unlock(&table->lock);
+     }
+-    pthread_mutex_unlock(&table->lock);
+ 
+     return inode;
+ }
+@@ -1613,7 +1611,10 @@ __inode_table_init_root(inode_table_t *table)
+     if (!table)
+         return;
+ 
+-    root = __inode_create(table);
++    root = inode_create(table);
++
++    list_add(&root->list, &table->lru);
++    table->lru_size++;
+ 
+     iatt.ia_gfid[15] = 1;
+     iatt.ia_ino = 1;
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0568-core-tcmu-runner-process-continuous-growing-logs-lru.patch b/SOURCES/0568-core-tcmu-runner-process-continuous-growing-logs-lru.patch
new file mode 100644
index 0000000..22c6790
--- /dev/null
+++ b/SOURCES/0568-core-tcmu-runner-process-continuous-growing-logs-lru.patch
@@ -0,0 +1,131 @@
+From 2640ee56201d320b838909f95608abe07e3ff9b0 Mon Sep 17 00:00:00 2001
+From: mohit84 <moagrawa@redhat.com>
+Date: Tue, 24 Nov 2020 15:29:58 +0530
+Subject: [PATCH 568/584] core: tcmu-runner process continuous growing logs
+ lru_size showing -1
+
+* core: tcmu-runner process continuous growing logs lru_size showing -1
+
+At the time of calling inode_table_prune it checks if current lru_size
+is greater than lru_limit but lru_list is empty it throws a log message
+"Empty inode lru list found but with (%d) lru_size".As per code reading
+it seems lru_size is out of sync with the actual number of inodes in
+lru_list. Due to throwing continuous error messages entire disk is
+getting full and the user has to restart the tcmu-runner process to use
+the volumes.The log message was introduce by a patch
+https://review.gluster.org/#/c/glusterfs/+/15087/.
+
+Solution: Introduce a flag in_lru_list to take decision about inode is
+          being part of lru_list or not.
+
+Backport of:
+> Upstream-patch: https://github.com/gluster/glusterfs/pull/1776
+> Fixes: #1775
+> Change-Id: I4b836bebf4b5db65fbf88ff41c6c88f4a7ac55c1
+> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+
+BUG: 1927640
+Change-Id: I4b836bebf4b5db65fbf88ff41c6c88f4a7ac55c1
+Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244962
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/glusterfs/inode.h |  1 +
+ libglusterfs/src/inode.c           | 14 ++++++++++++++
+ 2 files changed, 15 insertions(+)
+
+diff --git a/libglusterfs/src/glusterfs/inode.h b/libglusterfs/src/glusterfs/inode.h
+index 62c093d..17d0340 100644
+--- a/libglusterfs/src/glusterfs/inode.h
++++ b/libglusterfs/src/glusterfs/inode.h
+@@ -110,6 +110,7 @@ struct _inode {
+     struct _inode_ctx *_ctx; /* replacement for dict_t *(inode->ctx) */
+     bool in_invalidate_list; /* Set if inode is in table invalidate list */
+     bool invalidate_sent;    /* Set it if invalidator_fn is called for inode */
++    bool in_lru_list;        /* Set if inode is in table lru list */
+ };
+ 
+ #define UUID0_STR "00000000-0000-0000-0000-000000000000"
+diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c
+index 46db04f..8e91197 100644
+--- a/libglusterfs/src/inode.c
++++ b/libglusterfs/src/inode.c
+@@ -417,8 +417,10 @@ __inode_passivate(inode_t *inode)
+     dentry_t *dentry = NULL;
+     dentry_t *t = NULL;
+ 
++    GF_ASSERT(!inode->in_lru_list);
+     list_move_tail(&inode->list, &inode->table->lru);
+     inode->table->lru_size++;
++    inode->in_lru_list = _gf_true;
+ 
+     list_for_each_entry_safe(dentry, t, &inode->dentry_list, inode_list)
+     {
+@@ -531,7 +533,10 @@ __inode_ref(inode_t *inode, bool is_invalidate)
+             inode->in_invalidate_list = false;
+             inode->table->invalidate_size--;
+         } else {
++            GF_ASSERT(inode->table->lru_size > 0);
++            GF_ASSERT(inode->in_lru_list);
+             inode->table->lru_size--;
++            inode->in_lru_list = _gf_false;
+         }
+         if (is_invalidate) {
+             inode->in_invalidate_list = true;
+@@ -670,6 +675,8 @@ inode_new(inode_table_t *table)
+         {
+             list_add(&inode->list, &table->lru);
+             table->lru_size++;
++            GF_ASSERT(!inode->in_lru_list);
++            inode->in_lru_list = _gf_true;
+             __inode_ref(inode, false);
+         }
+         pthread_mutex_unlock(&table->lock);
+@@ -1533,6 +1540,7 @@ inode_table_prune(inode_table_t *table)
+         lru_size = table->lru_size;
+         while (lru_size > (table->lru_limit)) {
+             if (list_empty(&table->lru)) {
++                GF_ASSERT(0);
+                 gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0,
+                                  LG_MSG_INVALID_INODE_LIST,
+                                  "Empty inode lru list found"
+@@ -1543,6 +1551,7 @@ inode_table_prune(inode_table_t *table)
+ 
+             lru_size--;
+             entry = list_entry(table->lru.next, inode_t, list);
++            GF_ASSERT(entry->in_lru_list);
+             /* The logic of invalidation is required only if invalidator_fn
+                is present */
+             if (table->invalidator_fn) {
+@@ -1560,6 +1569,7 @@ inode_table_prune(inode_table_t *table)
+             }
+ 
+             table->lru_size--;
++            entry->in_lru_list = _gf_false;
+             __inode_retire(entry);
+             ret++;
+         }
+@@ -1615,6 +1625,7 @@ __inode_table_init_root(inode_table_t *table)
+ 
+     list_add(&root->list, &table->lru);
+     table->lru_size++;
++    root->in_lru_list = _gf_true;
+ 
+     iatt.ia_gfid[15] = 1;
+     iatt.ia_ino = 1;
+@@ -1873,8 +1884,11 @@ inode_table_destroy(inode_table_t *inode_table)
+         while (!list_empty(&inode_table->lru)) {
+             trav = list_first_entry(&inode_table->lru, inode_t, list);
+             inode_forget_atomic(trav, 0);
++            GF_ASSERT(inode_table->lru_size > 0);
++            GF_ASSERT(trav->in_lru_list);
+             __inode_retire(trav);
+             inode_table->lru_size--;
++            trav->in_lru_list = _gf_false;
+         }
+ 
+         /* Same logic for invalidate list */
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0569-features-shard-optimization-over-shard-lookup-in-cas.patch b/SOURCES/0569-features-shard-optimization-over-shard-lookup-in-cas.patch
new file mode 100644
index 0000000..fff8223
--- /dev/null
+++ b/SOURCES/0569-features-shard-optimization-over-shard-lookup-in-cas.patch
@@ -0,0 +1,200 @@
+From 1b86a4bda540ff4cf307c7f38d3041318636ecb7 Mon Sep 17 00:00:00 2001
+From: Vinayakswami Hariharmath <vharihar@redhat.com>
+Date: Thu, 6 Aug 2020 14:39:59 +0530
+Subject: [PATCH 569/584] features/shard: optimization over shard lookup in
+ case of prealloc
+
+Assume that we are preallocating a VM of size 1TB with a shard
+block size of 64MB then there will be ~16k shards.
+
+This creation happens in 2 steps shard_fallocate() path i.e
+
+1. lookup for the shards if any already present and
+2. mknod over those shards do not exist.
+
+But in case of fresh creation, we dont have to lookup for all
+shards which are not present as the the file size will be 0.
+Through this, we can save lookup on all shards which are not
+present. This optimization is quite useful in the case of
+preallocating big vm.
+
+Also if the file is already present and the call is to
+extend it to bigger size then we need not to lookup for non-
+existent shards. Just lookup preexisting shards, populate
+the inodes and issue mknod on extended size.
+
+Backport of:
+> Upstream-patch: https://review.gluster.org/#/c/glusterfs/+/24813/
+> Fixes: #1425
+> Change-Id: I60036fe8302c696e0ca80ff11ab0ef5bcdbd7880
+> Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
+
+BUG: 1925425
+Change-Id: I60036fe8302c696e0ca80ff11ab0ef5bcdbd7880
+Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244963
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/bugs/shard/issue-1425.t      | 45 +++++++++++++++++++++++++++++++++++++
+ xlators/features/shard/src/shard.c | 46 ++++++++++++++++++++++++++++++++------
+ 2 files changed, 84 insertions(+), 7 deletions(-)
+ create mode 100644 tests/bugs/shard/issue-1425.t
+
+diff --git a/tests/bugs/shard/issue-1425.t b/tests/bugs/shard/issue-1425.t
+new file mode 100644
+index 0000000..bbe82c0
+--- /dev/null
++++ b/tests/bugs/shard/issue-1425.t
+@@ -0,0 +1,45 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++
++cleanup;
++
++FILE_COUNT_TIME=5
++
++function get_file_count {
++    ls $1* | wc -l
++}
++
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 $H0:$B0/${V0}0
++TEST $CLI volume set $V0 features.shard on
++TEST $CLI volume set $V0 features.shard-block-size 4MB
++TEST $CLI volume start $V0
++TEST $CLI volume profile $V0 start
++
++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
++
++TEST fallocate -l 20M $M0/foo
++gfid_new=$(get_gfid_string $M0/foo)
++
++# Check for the base shard
++TEST stat $M0/foo
++TEST stat $B0/${V0}0/foo
++
++# There should be 4 associated shards
++EXPECT_WITHIN $FILE_COUNT_TIME 4 get_file_count $B0/${V0}0/.shard/$gfid_new
++
++# There should be 1+4 shards and we expect 4 lookups less than on the build without this patch
++EXPECT "21" echo `$CLI volume profile $V0 info incremental | grep -w LOOKUP | awk '{print $8}'`
++
++# Delete the base shard and check shards get cleaned up
++TEST unlink $M0/foo
++
++TEST ! stat $M0/foo
++TEST ! stat $B0/${V0}0/foo
++
++# There should be no shards now
++EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/$gfid_new
++cleanup
+diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
+index 2ba4528..a6ad1b8 100644
+--- a/xlators/features/shard/src/shard.c
++++ b/xlators/features/shard/src/shard.c
+@@ -995,6 +995,10 @@ shard_initiate_evicted_inode_fsync(xlator_t *this, inode_t *inode)
+ }
+ 
+ int
++shard_common_inode_write_post_lookup_shards_handler(call_frame_t *frame,
++                                                    xlator_t *this);
++
++int
+ shard_common_resolve_shards(call_frame_t *frame, xlator_t *this,
+                             shard_post_resolve_fop_handler_t post_res_handler)
+ {
+@@ -1011,21 +1015,47 @@ shard_common_resolve_shards(call_frame_t *frame, xlator_t *this,
+     inode_t *fsync_inode = NULL;
+     shard_priv_t *priv = NULL;
+     shard_local_t *local = NULL;
++    uint64_t resolve_count = 0;
+ 
+     priv = this->private;
+     local = frame->local;
+     local->call_count = 0;
+     shard_idx_iter = local->first_block;
+     res_inode = local->resolver_base_inode;
++
++    if ((local->op_ret < 0) || (local->resolve_not))
++        goto out;
++
++    /* If this prealloc FOP is for fresh file creation, then the size of the
++     * file will be 0. Then there will be no shards associated with this file.
++     * So we can skip the lookup process for the shards which do not exists
++     * and directly issue mknod to crete shards.
++     *
++     * In case the prealloc fop is to extend the preallocated file to bigger
++     * size then just lookup and populate inodes of existing shards and
++     * update the create count
++     */
++    if (local->fop == GF_FOP_FALLOCATE) {
++        if (!local->prebuf.ia_size) {
++            local->inode_list[0] = inode_ref(res_inode);
++            local->create_count = local->last_block;
++            shard_common_inode_write_post_lookup_shards_handler(frame, this);
++            return 0;
++        }
++        if (local->prebuf.ia_size < local->total_size)
++            local->create_count = local->last_block -
++                                  ((local->prebuf.ia_size - 1) /
++                                   local->block_size);
++    }
++
++    resolve_count = local->last_block - local->create_count;
++
+     if (res_inode)
+         gf_uuid_copy(gfid, res_inode->gfid);
+     else
+         gf_uuid_copy(gfid, local->base_gfid);
+ 
+-    if ((local->op_ret < 0) || (local->resolve_not))
+-        goto out;
+-
+-    while (shard_idx_iter <= local->last_block) {
++    while (shard_idx_iter <= resolve_count) {
+         i++;
+         if (shard_idx_iter == 0) {
+             local->inode_list[i] = inode_ref(res_inode);
+@@ -2434,7 +2464,7 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode,
+     int count = 0;
+     int call_count = 0;
+     int32_t shard_idx_iter = 0;
+-    int last_block = 0;
++    int lookup_count = 0;
+     char path[PATH_MAX] = {
+         0,
+     };
+@@ -2454,7 +2484,7 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode,
+     local = frame->local;
+     count = call_count = local->call_count;
+     shard_idx_iter = local->first_block;
+-    last_block = local->last_block;
++    lookup_count = local->last_block - local->create_count;
+     local->pls_fop_handler = handler;
+     if (local->lookup_shards_barriered)
+         local->barrier.waitfor = local->call_count;
+@@ -2464,7 +2494,7 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode,
+     else
+         gf_uuid_copy(gfid, local->base_gfid);
+ 
+-    while (shard_idx_iter <= last_block) {
++    while (shard_idx_iter <= lookup_count) {
+         if (local->inode_list[i]) {
+             i++;
+             shard_idx_iter++;
+@@ -5651,6 +5681,8 @@ shard_common_inode_write_post_resolve_handler(call_frame_t *frame,
+         shard_common_lookup_shards(
+             frame, this, local->resolver_base_inode,
+             shard_common_inode_write_post_lookup_shards_handler);
++    } else if (local->create_count) {
++        shard_common_inode_write_post_lookup_shards_handler(frame, this);
+     } else {
+         shard_common_inode_write_do(frame, this);
+     }
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0570-features-shard-avoid-repeatative-calls-to-gf_uuid_un.patch b/SOURCES/0570-features-shard-avoid-repeatative-calls-to-gf_uuid_un.patch
new file mode 100644
index 0000000..4d87bcb
--- /dev/null
+++ b/SOURCES/0570-features-shard-avoid-repeatative-calls-to-gf_uuid_un.patch
@@ -0,0 +1,340 @@
+From 1a8b001a121ada4d3d338b52b312896f1790f2bb Mon Sep 17 00:00:00 2001
+From: Vinayak hariharmath <65405035+VHariharmath-rh@users.noreply.github.com>
+Date: Mon, 11 Jan 2021 12:34:55 +0530
+Subject: [PATCH 570/584] features/shard: avoid repeatative calls to
+ gf_uuid_unparse()
+
+The issue is shard_make_block_abspath() calls gf_uuid_unparse()
+every time while constructing shard path. The gfid can be parsed
+and saved once and passed while constructing the path. Thus
+we can avoid calling gf_uuid_unparse().
+
+Backport of:
+> Upstream-patch: https://github.com/gluster/glusterfs/pull/1689
+> Fixes: #1423
+> Change-Id: Ia26fbd5f09e812bbad9e5715242f14143c013c9c
+> Signed-off-by: Vinayakswami Hariharmath vharihar@redhat.com
+
+BUG: 1925425
+Change-Id: Ia26fbd5f09e812bbad9e5715242f14143c013c9c
+Signed-off-by: Vinayakswami Hariharmath vharihar@redhat.com
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244964
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/bugs/shard/issue-1425.t      |   9 ++-
+ xlators/features/shard/src/shard.c | 119 ++++++++++++++++++-------------------
+ 2 files changed, 65 insertions(+), 63 deletions(-)
+
+diff --git a/tests/bugs/shard/issue-1425.t b/tests/bugs/shard/issue-1425.t
+index bbe82c0..8b77705 100644
+--- a/tests/bugs/shard/issue-1425.t
++++ b/tests/bugs/shard/issue-1425.t
+@@ -21,7 +21,13 @@ TEST $CLI volume profile $V0 start
+ 
+ TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+ 
++$CLI volume profile $V0 info clear
++
+ TEST fallocate -l 20M $M0/foo
++
++# There should be 1+4 shards and we expect 4 lookups less than on the build without this patch
++EXPECT "5" echo `$CLI volume profile $V0 info incremental | grep -w LOOKUP | awk '{print $8}'`
++
+ gfid_new=$(get_gfid_string $M0/foo)
+ 
+ # Check for the base shard
+@@ -31,9 +37,6 @@ TEST stat $B0/${V0}0/foo
+ # There should be 4 associated shards
+ EXPECT_WITHIN $FILE_COUNT_TIME 4 get_file_count $B0/${V0}0/.shard/$gfid_new
+ 
+-# There should be 1+4 shards and we expect 4 lookups less than on the build without this patch
+-EXPECT "21" echo `$CLI volume profile $V0 info incremental | grep -w LOOKUP | awk '{print $8}'`
+-
+ # Delete the base shard and check shards get cleaned up
+ TEST unlink $M0/foo
+ 
+diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
+index a6ad1b8..d1d7d7a 100644
+--- a/xlators/features/shard/src/shard.c
++++ b/xlators/features/shard/src/shard.c
+@@ -16,6 +16,8 @@
+ #include <glusterfs/defaults.h>
+ #include <glusterfs/statedump.h>
+ 
++#define SHARD_PATH_MAX (sizeof(GF_SHARD_DIR) + GF_UUID_BUF_SIZE + 16)
++
+ static gf_boolean_t
+ __is_shard_dir(uuid_t gfid)
+ {
+@@ -49,15 +51,19 @@ shard_make_block_bname(int block_num, uuid_t gfid, char *buf, size_t len)
+     snprintf(buf, len, "%s.%d", gfid_str, block_num);
+ }
+ 
+-void
+-shard_make_block_abspath(int block_num, uuid_t gfid, char *filepath, size_t len)
++static int
++shard_make_base_path(char *path, uuid_t gfid)
+ {
+-    char gfid_str[GF_UUID_BUF_SIZE] = {
+-        0,
+-    };
++    strcpy(path, "/" GF_SHARD_DIR "/");
++    uuid_utoa_r(gfid, path + sizeof(GF_SHARD_DIR) + 1);
++    return (sizeof(GF_SHARD_DIR) + GF_UUID_BUF_SIZE);
++}
+ 
+-    gf_uuid_unparse(gfid, gfid_str);
+-    snprintf(filepath, len, "/%s/%s.%d", GF_SHARD_DIR, gfid_str, block_num);
++static inline void
++shard_append_index(char *path, int path_size, int prefix_len,
++                   int shard_idx_iter)
++{
++    snprintf(path + prefix_len, path_size - prefix_len, ".%d", shard_idx_iter);
+ }
+ 
+ int
+@@ -1004,9 +1010,8 @@ shard_common_resolve_shards(call_frame_t *frame, xlator_t *this,
+ {
+     int i = -1;
+     uint32_t shard_idx_iter = 0;
+-    char path[PATH_MAX] = {
+-        0,
+-    };
++    int prefix_len = 0;
++    char path[SHARD_PATH_MAX];
+     uuid_t gfid = {
+         0,
+     };
+@@ -1055,6 +1060,9 @@ shard_common_resolve_shards(call_frame_t *frame, xlator_t *this,
+     else
+         gf_uuid_copy(gfid, local->base_gfid);
+ 
++    /* Build base shard path before appending index of the shard */
++    prefix_len = shard_make_base_path(path, gfid);
++
+     while (shard_idx_iter <= resolve_count) {
+         i++;
+         if (shard_idx_iter == 0) {
+@@ -1062,16 +1070,13 @@ shard_common_resolve_shards(call_frame_t *frame, xlator_t *this,
+             shard_idx_iter++;
+             continue;
+         }
+-
+-        shard_make_block_abspath(shard_idx_iter, gfid, path, sizeof(path));
+-
++        shard_append_index(path, SHARD_PATH_MAX, prefix_len, shard_idx_iter);
+         inode = NULL;
+         inode = inode_resolve(this->itable, path);
+         if (inode) {
+             gf_msg_debug(this->name, 0,
+-                         "Shard %d already "
+-                         "present. gfid=%s. Saving inode for future.",
+-                         shard_idx_iter, uuid_utoa(inode->gfid));
++                         "Shard %s already present. Saving inode for future.",
++                         path);
+             local->inode_list[i] = inode;
+             /* Let the ref on the inodes that are already present
+              * in inode table still be held so that they don't get
+@@ -2153,9 +2158,8 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode)
+     int call_count = 0;
+     uint32_t cur_block = 0;
+     uint32_t last_block = 0;
+-    char path[PATH_MAX] = {
+-        0,
+-    };
++    int prefix_len = 0;
++    char path[SHARD_PATH_MAX];
+     char *bname = NULL;
+     loc_t loc = {
+         0,
+@@ -2216,6 +2220,10 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode)
+         return 0;
+     }
+ 
++    /* Build base shard path before appending index of the shard */
++    prefix_len = shard_make_base_path(path, inode->gfid);
++    bname = path + sizeof(GF_SHARD_DIR) + 1;
++
+     SHARD_SET_ROOT_FS_ID(frame, local);
+     while (cur_block <= last_block) {
+         if (!local->inode_list[i]) {
+@@ -2229,15 +2237,12 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode)
+             goto next;
+         }
+ 
+-        shard_make_block_abspath(cur_block, inode->gfid, path, sizeof(path));
+-        bname = strrchr(path, '/') + 1;
++        shard_append_index(path, SHARD_PATH_MAX, prefix_len, cur_block);
+         loc.parent = inode_ref(priv->dot_shard_inode);
+         ret = inode_path(loc.parent, bname, (char **)&(loc.path));
+         if (ret < 0) {
+             gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+-                   "Inode path failed"
+-                   " on %s. Base file gfid = %s",
+-                   bname, uuid_utoa(inode->gfid));
++                   "Inode path failed on %s.", bname);
+             local->op_ret = -1;
+             local->op_errno = ENOMEM;
+             loc_wipe(&loc);
+@@ -2465,13 +2470,8 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode,
+     int call_count = 0;
+     int32_t shard_idx_iter = 0;
+     int lookup_count = 0;
+-    char path[PATH_MAX] = {
+-        0,
+-    };
++    char path[SHARD_PATH_MAX];
+     char *bname = NULL;
+-    uuid_t gfid = {
+-        0,
+-    };
+     loc_t loc = {
+         0,
+     };
+@@ -2489,10 +2489,16 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode,
+     if (local->lookup_shards_barriered)
+         local->barrier.waitfor = local->call_count;
+ 
++    /* Build base shard path before appending index of the shard */
++    strcpy(path, "/" GF_SHARD_DIR "/");
++
+     if (inode)
+-        gf_uuid_copy(gfid, inode->gfid);
++        uuid_utoa_r(inode->gfid, path + sizeof(GF_SHARD_DIR) + 1);
+     else
+-        gf_uuid_copy(gfid, local->base_gfid);
++        uuid_utoa_r(local->base_gfid, path + sizeof(GF_SHARD_DIR) + 1);
++
++    int prefix_len = sizeof(GF_SHARD_DIR) + GF_UUID_BUF_SIZE;
++    bname = path + sizeof(GF_SHARD_DIR) + 1;
+ 
+     while (shard_idx_iter <= lookup_count) {
+         if (local->inode_list[i]) {
+@@ -2508,18 +2514,14 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode,
+             goto next;
+         }
+ 
+-        shard_make_block_abspath(shard_idx_iter, gfid, path, sizeof(path));
+-
+-        bname = strrchr(path, '/') + 1;
++        shard_append_index(path, SHARD_PATH_MAX, prefix_len, shard_idx_iter);
+         loc.inode = inode_new(this->itable);
+         loc.parent = inode_ref(priv->dot_shard_inode);
+         gf_uuid_copy(loc.pargfid, priv->dot_shard_gfid);
+         ret = inode_path(loc.parent, bname, (char **)&(loc.path));
+         if (ret < 0 || !(loc.inode)) {
+             gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+-                   "Inode path failed"
+-                   " on %s, base file gfid = %s",
+-                   bname, uuid_utoa(gfid));
++                   "Inode path failed on %s", bname);
+             local->op_ret = -1;
+             local->op_errno = ENOMEM;
+             loc_wipe(&loc);
+@@ -3168,12 +3170,7 @@ shard_unlink_shards_do(call_frame_t *frame, xlator_t *this, inode_t *inode)
+     uint32_t cur_block = 0;
+     uint32_t cur_block_idx = 0; /*this is idx into inode_list[] array */
+     char *bname = NULL;
+-    char path[PATH_MAX] = {
+-        0,
+-    };
+-    uuid_t gfid = {
+-        0,
+-    };
++    char path[SHARD_PATH_MAX];
+     loc_t loc = {
+         0,
+     };
+@@ -3184,10 +3181,16 @@ shard_unlink_shards_do(call_frame_t *frame, xlator_t *this, inode_t *inode)
+     priv = this->private;
+     local = frame->local;
+ 
++    /* Build base shard path before appending index of the shard */
++    strcpy(path, "/" GF_SHARD_DIR "/");
++
+     if (inode)
+-        gf_uuid_copy(gfid, inode->gfid);
++        uuid_utoa_r(inode->gfid, path + sizeof(GF_SHARD_DIR) + 1);
+     else
+-        gf_uuid_copy(gfid, local->base_gfid);
++        uuid_utoa_r(local->base_gfid, path + sizeof(GF_SHARD_DIR) + 1);
++
++    int prefix_len = sizeof(GF_SHARD_DIR) + GF_UUID_BUF_SIZE;
++    bname = path + sizeof(GF_SHARD_DIR) + 1;
+ 
+     for (i = 0; i < local->num_blocks; i++) {
+         if (!local->inode_list[i])
+@@ -3203,7 +3206,7 @@ shard_unlink_shards_do(call_frame_t *frame, xlator_t *this, inode_t *inode)
+         gf_msg_debug(this->name, 0,
+                      "All shards that need to be "
+                      "unlinked are non-existent: %s",
+-                     uuid_utoa(gfid));
++                     path);
+         return 0;
+     }
+ 
+@@ -3221,15 +3224,12 @@ shard_unlink_shards_do(call_frame_t *frame, xlator_t *this, inode_t *inode)
+             goto next;
+         }
+ 
+-        shard_make_block_abspath(cur_block, gfid, path, sizeof(path));
+-        bname = strrchr(path, '/') + 1;
++        shard_append_index(path, SHARD_PATH_MAX, prefix_len, cur_block);
+         loc.parent = inode_ref(priv->dot_shard_inode);
+         ret = inode_path(loc.parent, bname, (char **)&(loc.path));
+         if (ret < 0) {
+             gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+-                   "Inode path failed"
+-                   " on %s, base file gfid = %s",
+-                   bname, uuid_utoa(gfid));
++                   "Inode path failed on %s", bname);
+             local->op_ret = -1;
+             local->op_errno = ENOMEM;
+             loc_wipe(&loc);
+@@ -4971,9 +4971,8 @@ shard_common_resume_mknod(call_frame_t *frame, xlator_t *this,
+     int last_block = 0;
+     int ret = 0;
+     int call_count = 0;
+-    char path[PATH_MAX] = {
+-        0,
+-    };
++    int prefix_len = 0;
++    char path[SHARD_PATH_MAX];
+     mode_t mode = 0;
+     char *bname = NULL;
+     shard_priv_t *priv = NULL;
+@@ -4996,6 +4995,10 @@ shard_common_resume_mknod(call_frame_t *frame, xlator_t *this,
+     call_count = local->call_count = local->create_count;
+     local->post_mknod_handler = post_mknod_handler;
+ 
++    /* Build base shard path before appending index of the shard */
++    prefix_len = shard_make_base_path(path, fd->inode->gfid);
++    bname = path + sizeof(GF_SHARD_DIR) + 1;
++
+     SHARD_SET_ROOT_FS_ID(frame, local);
+ 
+     ret = shard_inode_ctx_get_all(fd->inode, this, &ctx_tmp);
+@@ -5022,10 +5025,7 @@ shard_common_resume_mknod(call_frame_t *frame, xlator_t *this,
+                                    -1, ENOMEM, NULL, NULL, NULL, NULL, NULL);
+             goto next;
+         }
+-
+-        shard_make_block_abspath(shard_idx_iter, fd->inode->gfid, path,
+-                                 sizeof(path));
+-
++        shard_append_index(path, SHARD_PATH_MAX, prefix_len, shard_idx_iter);
+         xattr_req = shard_create_gfid_dict(local->xattr_req);
+         if (!xattr_req) {
+             local->op_ret = -1;
+@@ -5036,7 +5036,6 @@ shard_common_resume_mknod(call_frame_t *frame, xlator_t *this,
+             goto next;
+         }
+ 
+-        bname = strrchr(path, '/') + 1;
+         loc.inode = inode_new(this->itable);
+         loc.parent = inode_ref(priv->dot_shard_inode);
+         ret = inode_path(loc.parent, bname, (char **)&(loc.path));
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0571-NetBSD-build-fixes.patch b/SOURCES/0571-NetBSD-build-fixes.patch
new file mode 100644
index 0000000..8a6d4a4
--- /dev/null
+++ b/SOURCES/0571-NetBSD-build-fixes.patch
@@ -0,0 +1,98 @@
+From 2c0d11bb406e50fb515abf0c5a4006e1b362ac8e Mon Sep 17 00:00:00 2001
+From: Emmanuel Dreyfus <manu@netbsd.org>
+Date: Tue, 30 Jun 2020 16:42:36 +0200
+Subject: [PATCH 571/584] NetBSD build fixes
+
+- Make sure -largp is used at link time
+- PTHREAD_MUTEX_ADAPTIVE_NP is not available, use PTHREAD_MUTEX_DEFAULT instead
+- Avoid non POSIX [[ ]] in scripts
+- Do not check of lock.spinlock is NULL since it is not a pointer
+  (it is not a pointer on Linux either)
+
+Backport of:
+> Upstream-patch: https://review.gluster.org/#/c/glusterfs/+/24648/
+> Change-Id: I5e04a7c552d24f8a473c2b837828d1bddfa7e128
+> Fixes: #1347
+> Type: Bug
+> Signed-off-by: Emmanuel Dreyfus <manu@netbsd.org>
+
+BUG: 1925425
+Change-Id: I5e04a7c552d24f8a473c2b837828d1bddfa7e128
+Signed-off-by: Emmanuel Dreyfus <manu@netbsd.org>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/245040
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ configure.ac                                        | 3 +++
+ rpc/rpc-lib/src/rpcsvc.c                            | 4 ++++
+ tools/gfind_missing_files/gfind_missing_files.sh    | 2 +-
+ xlators/performance/write-behind/src/write-behind.c | 4 ++--
+ 4 files changed, 10 insertions(+), 3 deletions(-)
+
+diff --git a/configure.ac b/configure.ac
+index 327733e..6138a59 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -998,6 +998,9 @@ case $host_os in
+     CFLAGS="${CFLAGS} -isystem /usr/local/include"
+     ARGP_LDADD=-largp
+     ;;
++    *netbsd*)
++    ARGP_LDADD=-largp
++    ;;
+ esac
+ dnl argp-standalone does not provide a pkg-config file
+ AC_CHECK_HEADER([argp.h], AC_DEFINE(HAVE_ARGP, 1, [have argp]))
+diff --git a/rpc/rpc-lib/src/rpcsvc.c b/rpc/rpc-lib/src/rpcsvc.c
+index 3f184bf..b031d93 100644
+--- a/rpc/rpc-lib/src/rpcsvc.c
++++ b/rpc/rpc-lib/src/rpcsvc.c
+@@ -46,6 +46,10 @@
+ #include "xdr-rpcclnt.h"
+ #include <glusterfs/glusterfs-acl.h>
+ 
++#ifndef PTHREAD_MUTEX_ADAPTIVE_NP
++#define PTHREAD_MUTEX_ADAPTIVE_NP PTHREAD_MUTEX_DEFAULT
++#endif
++
+ struct rpcsvc_program gluster_dump_prog;
+ 
+ #define rpcsvc_alloc_request(svc, request)                                     \
+diff --git a/tools/gfind_missing_files/gfind_missing_files.sh b/tools/gfind_missing_files/gfind_missing_files.sh
+index f42fe7b..e7aaa0b 100644
+--- a/tools/gfind_missing_files/gfind_missing_files.sh
++++ b/tools/gfind_missing_files/gfind_missing_files.sh
+@@ -61,7 +61,7 @@ mount_slave()
+ 
+ parse_cli()
+ {
+-    if [[ $# -ne 4 ]]; then
++    if [ "$#" -ne 4 ]; then
+         echo "Usage: gfind_missing_files <brick-path> <slave-host> <slave-vol> <OUTFILE>"
+         exit 1
+     else
+diff --git a/xlators/performance/write-behind/src/write-behind.c b/xlators/performance/write-behind/src/write-behind.c
+index 31ab723..76d257f 100644
+--- a/xlators/performance/write-behind/src/write-behind.c
++++ b/xlators/performance/write-behind/src/write-behind.c
+@@ -2490,7 +2490,7 @@ wb_mark_readdirp_start(xlator_t *this, inode_t *directory)
+ 
+     wb_directory_inode = wb_inode_create(this, directory);
+ 
+-    if (!wb_directory_inode || !wb_directory_inode->lock.spinlock)
++    if (!wb_directory_inode)
+         return;
+ 
+     LOCK(&wb_directory_inode->lock);
+@@ -2510,7 +2510,7 @@ wb_mark_readdirp_end(xlator_t *this, inode_t *directory)
+ 
+     wb_directory_inode = wb_inode_ctx_get(this, directory);
+ 
+-    if (!wb_directory_inode || !wb_directory_inode->lock.spinlock)
++    if (!wb_directory_inode)
+         return;
+ 
+     LOCK(&wb_directory_inode->lock);
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0572-locks-remove-unused-conditional-switch-to-spin_lock-.patch b/SOURCES/0572-locks-remove-unused-conditional-switch-to-spin_lock-.patch
new file mode 100644
index 0000000..1447916
--- /dev/null
+++ b/SOURCES/0572-locks-remove-unused-conditional-switch-to-spin_lock-.patch
@@ -0,0 +1,183 @@
+From 1491b33007e84be0a0a74354e89deca8a21ed198 Mon Sep 17 00:00:00 2001
+From: Vinayak hariharmath <65405035+VHariharmath-rh@users.noreply.github.com>
+Date: Tue, 19 Jan 2021 15:39:35 +0530
+Subject: [PATCH 572/584] locks: remove unused conditional switch to spin_lock
+ code
+
+use of spin_locks is depend on the variable use_spinlocks
+but the same is commented in the current code base through
+https://review.gluster.org/#/c/glusterfs/+/14763/. So it is
+of no use to have conditional switching to spin_lock or
+mutex. Removing the dead code as part of the patch
+
+Backport of:
+> Upstream-patch: https://github.com/gluster/glusterfs/pull/2007
+> Fixes: #1996
+> Change-Id: Ib005dd86969ce33d3409164ef3e1011bb3169129
+> Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
+
+BUG: 1925425
+Change-Id: Ib005dd86969ce33d3409164ef3e1011bb3169129
+Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244965
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ configure.ac                         |  7 -----
+ libglusterfs/src/Makefile.am         |  2 +-
+ libglusterfs/src/common-utils.c      |  5 ----
+ libglusterfs/src/glusterfs/locking.h | 51 ------------------------------------
+ libglusterfs/src/locking.c           | 27 -------------------
+ 5 files changed, 1 insertion(+), 91 deletions(-)
+ delete mode 100644 libglusterfs/src/locking.c
+
+diff --git a/configure.ac b/configure.ac
+index 6138a59..3d99f6a 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -585,13 +585,6 @@ AC_CHECK_HEADERS([linux/falloc.h])
+ 
+ AC_CHECK_HEADERS([linux/oom.h], AC_DEFINE(HAVE_LINUX_OOM_H, 1, [have linux/oom.h]))
+ 
+-dnl Mac OS X does not have spinlocks
+-AC_CHECK_FUNC([pthread_spin_init], [have_spinlock=yes])
+-if test "x${have_spinlock}" = "xyes"; then
+-   AC_DEFINE(HAVE_SPINLOCK, 1, [define if found spinlock])
+-fi
+-AC_SUBST(HAVE_SPINLOCK)
+-
+ dnl some os may not have GNU defined strnlen function
+ AC_CHECK_FUNC([strnlen], [have_strnlen=yes])
+ if test "x${have_strnlen}" = "xyes"; then
+diff --git a/libglusterfs/src/Makefile.am b/libglusterfs/src/Makefile.am
+index 970f4b7..830a0c3 100644
+--- a/libglusterfs/src/Makefile.am
++++ b/libglusterfs/src/Makefile.am
+@@ -35,7 +35,7 @@ libglusterfs_la_SOURCES = dict.c xlator.c logging.c \
+ 	strfd.c parse-utils.c $(CONTRIBDIR)/mount/mntent.c \
+ 	$(CONTRIBDIR)/libexecinfo/execinfo.c quota-common-utils.c rot-buffs.c \
+ 	$(CONTRIBDIR)/timer-wheel/timer-wheel.c \
+-	$(CONTRIBDIR)/timer-wheel/find_last_bit.c default-args.c locking.c \
++	$(CONTRIBDIR)/timer-wheel/find_last_bit.c default-args.c \
+ 	$(CONTRIBDIR)/xxhash/xxhash.c \
+ 	compound-fop-utils.c throttle-tbf.c monitoring.c
+ 
+diff --git a/libglusterfs/src/common-utils.c b/libglusterfs/src/common-utils.c
+index d351b93..c2dfe28 100644
+--- a/libglusterfs/src/common-utils.c
++++ b/libglusterfs/src/common-utils.c
+@@ -860,11 +860,6 @@ gf_dump_config_flags()
+     gf_msg_plain_nomem(GF_LOG_ALERT, "setfsid 1");
+ #endif
+ 
+-/* define if found spinlock */
+-#ifdef HAVE_SPINLOCK
+-    gf_msg_plain_nomem(GF_LOG_ALERT, "spinlock 1");
+-#endif
+-
+ /* Define to 1 if you have the <sys/epoll.h> header file. */
+ #ifdef HAVE_SYS_EPOLL_H
+     gf_msg_plain_nomem(GF_LOG_ALERT, "epoll.h 1");
+diff --git a/libglusterfs/src/glusterfs/locking.h b/libglusterfs/src/glusterfs/locking.h
+index 43cc877..63097bb 100644
+--- a/libglusterfs/src/glusterfs/locking.h
++++ b/libglusterfs/src/glusterfs/locking.h
+@@ -22,55 +22,6 @@
+ #define pthread_spin_init(l, v) (*l = v)
+ #endif
+ 
+-#if defined(HAVE_SPINLOCK)
+-
+-typedef union {
+-    pthread_spinlock_t spinlock;
+-    pthread_mutex_t mutex;
+-} gf_lock_t;
+-
+-#if !defined(LOCKING_IMPL)
+-extern int use_spinlocks;
+-
+-/*
+- * Using a dispatch table would be unpleasant because we're dealing with two
+- * different types.  If the dispatch contains direct pointers to pthread_xx
+- * or mutex_xxx then we have to hope that every possible union alternative
+- * starts at the same address as the union itself.  I'm old enough to remember
+- * compilers where this was not the case (for alignment reasons) so I'm a bit
+- * paranoid about that.  Also, I don't like casting arguments through "void *"
+- * which we'd also have to do to avoid type errors.  The other alternative would
+- * be to define actual functions which pick out the right union member, and put
+- * those in the dispatch tables.  Now we have a pointer dereference through the
+- * dispatch table plus a function call, which is likely to be worse than the
+- * branching here from the ?: construct.  If it were a clear win it might be
+- * worth the extra complexity, but for now this way seems preferable.
+- */
+-
+-#define LOCK_INIT(x)                                                           \
+-    (use_spinlocks ? pthread_spin_init(&((x)->spinlock), 0)                    \
+-                   : pthread_mutex_init(&((x)->mutex), 0))
+-
+-#define LOCK(x)                                                                \
+-    (use_spinlocks ? pthread_spin_lock(&((x)->spinlock))                       \
+-                   : pthread_mutex_lock(&((x)->mutex)))
+-
+-#define TRY_LOCK(x)                                                            \
+-    (use_spinlocks ? pthread_spin_trylock(&((x)->spinlock))                    \
+-                   : pthread_mutex_trylock(&((x)->mutex)))
+-
+-#define UNLOCK(x)                                                              \
+-    (use_spinlocks ? pthread_spin_unlock(&((x)->spinlock))                     \
+-                   : pthread_mutex_unlock(&((x)->mutex)))
+-
+-#define LOCK_DESTROY(x)                                                        \
+-    (use_spinlocks ? pthread_spin_destroy(&((x)->spinlock))                    \
+-                   : pthread_mutex_destroy(&((x)->mutex)))
+-
+-#endif
+-
+-#else
+-
+ typedef pthread_mutex_t gf_lock_t;
+ 
+ #define LOCK_INIT(x) pthread_mutex_init(x, 0)
+@@ -79,6 +30,4 @@ typedef pthread_mutex_t gf_lock_t;
+ #define UNLOCK(x) pthread_mutex_unlock(x)
+ #define LOCK_DESTROY(x) pthread_mutex_destroy(x)
+ 
+-#endif /* HAVE_SPINLOCK */
+-
+ #endif /* _LOCKING_H */
+diff --git a/libglusterfs/src/locking.c b/libglusterfs/src/locking.c
+deleted file mode 100644
+index 7577054..0000000
+--- a/libglusterfs/src/locking.c
++++ /dev/null
+@@ -1,27 +0,0 @@
+-/*
+-  Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+-  This file is part of GlusterFS.
+-
+-  This file is licensed to you under your choice of the GNU Lesser
+-  General Public License, version 3 or any later version (LGPLv3 or
+-  later), or the GNU General Public License, version 2 (GPLv2), in all
+-  cases as published by the Free Software Foundation.
+-*/
+-
+-#if defined(HAVE_SPINLOCK)
+-/* None of this matters otherwise. */
+-
+-#include <pthread.h>
+-#include <unistd.h>
+-
+-#define LOCKING_IMPL
+-#include "glusterfs/locking.h"
+-
+-int use_spinlocks = 0;
+-
+-static void __attribute__((constructor)) gf_lock_setup(void)
+-{
+-    // use_spinlocks = (sysconf(_SC_NPROCESSORS_ONLN) > 1);
+-}
+-
+-#endif
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0573-features-shard-unlink-fails-due-to-nospace-to-mknod-.patch b/SOURCES/0573-features-shard-unlink-fails-due-to-nospace-to-mknod-.patch
new file mode 100644
index 0000000..3033727
--- /dev/null
+++ b/SOURCES/0573-features-shard-unlink-fails-due-to-nospace-to-mknod-.patch
@@ -0,0 +1,148 @@
+From 0e453ede1f248a004965d0d368e2c4beb83f2ce1 Mon Sep 17 00:00:00 2001
+From: Vinayakswami Hariharmath <vharihar@redhat.com>
+Date: Mon, 25 Jan 2021 17:32:14 +0530
+Subject: [PATCH 573/584] features/shard: unlink fails due to nospace to mknod
+ marker file
+
+When we hit the max capacity of the storage space, shard_unlink()
+starts failing if there is no space left on the brick to create a
+marker file.
+
+shard_unlink() happens in below steps:
+
+1. create a marker file in the name of gfid of the base file under
+BRICK_PATH/.shard/.remove_me
+2. unlink the base file
+3. shard_delete_shards() deletes the shards in background by
+picking the entries in BRICK_PATH/.shard/.remove_me
+
+If a marker file creation fails then we can't really delete the
+shards which eventually a problem for user who is looking to make
+space by deleting unwanted data.
+
+Solution:
+Create the marker file by marking xdata = GLUSTERFS_INTERNAL_FOP_KEY
+which is considered to be internal op and allowed to create under
+reserved space.
+
+Backport of:
+> Upstream-patch: https://github.com/gluster/glusterfs/pull/2057
+> Fixes: #2038
+> Change-Id: I7facebab940f9aeee81d489df429e00ef4fb7c5d
+> Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
+
+BUG: 1891403
+Change-Id: I7facebab940f9aeee81d489df429e00ef4fb7c5d
+Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244966
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/bugs/shard/issue-2038.t      | 56 ++++++++++++++++++++++++++++++++++++++
+ xlators/features/shard/src/shard.c | 20 ++++++++++++++
+ 2 files changed, 76 insertions(+)
+ create mode 100644 tests/bugs/shard/issue-2038.t
+
+diff --git a/tests/bugs/shard/issue-2038.t b/tests/bugs/shard/issue-2038.t
+new file mode 100644
+index 0000000..fc3e7f9
+--- /dev/null
++++ b/tests/bugs/shard/issue-2038.t
+@@ -0,0 +1,56 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../snapshot.rc
++
++cleanup
++
++FILE_COUNT_TIME=5
++
++function get_file_count {
++    ls $1* | wc -l
++}
++
++TEST verify_lvm_version
++TEST glusterd
++TEST pidof glusterd
++TEST init_n_bricks 1
++TEST setup_lvm 1
++
++TEST $CLI volume create $V0 $H0:$L1
++TEST $CLI volume start $V0
++
++$CLI volume info
++
++TEST $CLI volume set $V0 features.shard on
++TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
++
++#Setting the size in percentage
++TEST $CLI volume set $V0 storage.reserve 40
++
++#wait 5s to reset disk_space_full flag
++sleep 5
++
++TEST touch $M0/test
++TEST unlink $M0/test
++
++TEST dd if=/dev/zero of=$M0/a bs=80M count=1
++TEST dd if=/dev/zero of=$M0/b bs=10M count=1
++
++gfid_new=$(get_gfid_string $M0/a)
++
++# Wait 5s to update disk_space_full flag because thread check disk space
++# after every 5s
++
++sleep 5
++# setup_lvm create lvm partition of 150M and 40M are reserve so after
++# consuming more than 110M next unlink should not fail
++# Delete the base shard and check shards get cleaned up
++TEST unlink $M0/a
++TEST ! stat $M0/a
++
++TEST $CLI volume stop $V0
++TEST $CLI volume delete $V0
++
++cleanup
+diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
+index d1d7d7a..8d4a970 100644
+--- a/xlators/features/shard/src/shard.c
++++ b/xlators/features/shard/src/shard.c
+@@ -4078,6 +4078,16 @@ shard_create_marker_file_under_remove_me(call_frame_t *frame, xlator_t *this,
+     SHARD_INODE_CREATE_INIT(this, bs, xattr_req, &local->newloc,
+                             local->prebuf.ia_size, 0, err);
+ 
++    /* Mark this as an internal operation, so that in case of disk full,
++     * the marker file will be created as part of reserve space */
++    ret = dict_set_int32_sizen(xattr_req, GLUSTERFS_INTERNAL_FOP_KEY, 1);
++    if (ret < 0) {
++        gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
++               "Failed to set key: %s on path %s", GLUSTERFS_INTERNAL_FOP_KEY,
++               local->newloc.path);
++        goto err;
++    }
++
+     STACK_WIND(frame, shard_create_marker_file_under_remove_me_cbk,
+                FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod,
+                &local->newloc, 0, 0, 0644, xattr_req);
+@@ -5843,6 +5853,16 @@ shard_mkdir_internal_dir(call_frame_t *frame, xlator_t *this,
+ 
+     SHARD_SET_ROOT_FS_ID(frame, local);
+ 
++    /* Mark this as an internal operation, so that in case of disk full
++     * the internal dir will be created as part of reserve space */
++    ret = dict_set_int32_sizen(xattr_req, GLUSTERFS_INTERNAL_FOP_KEY, 1);
++    if (ret < 0) {
++        gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
++               "Failed to set key: %s on path %s", GLUSTERFS_INTERNAL_FOP_KEY,
++               loc->path);
++        goto err;
++    }
++
+     STACK_WIND_COOKIE(frame, shard_mkdir_internal_dir_cbk, (void *)(long)type,
+                       FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir, loc,
+                       0755, 0, xattr_req);
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0574-features-shard-delay-unlink-of-a-file-that-has-fd_co.patch b/SOURCES/0574-features-shard-delay-unlink-of-a-file-that-has-fd_co.patch
new file mode 100644
index 0000000..810abd4
--- /dev/null
+++ b/SOURCES/0574-features-shard-delay-unlink-of-a-file-that-has-fd_co.patch
@@ -0,0 +1,712 @@
+From cb0d240004e6d40f8d7f30d177d5970ebc8e25fb Mon Sep 17 00:00:00 2001
+From: Vinayak hariharmath <65405035+VHariharmath-rh@users.noreply.github.com>
+Date: Wed, 3 Feb 2021 17:04:25 +0530
+Subject: [PATCH 574/584] features/shard: delay unlink of a file that has
+ fd_count > 0
+
+When there are multiple processes working on a file and if any
+process unlinks that file then unlink operation shouldn't harm
+other processes working on it. This is a posix a compliant
+behavior and this should be supported when shard feature is
+enabled also.
+
+Problem description:
+Let's consider 2 clients C1 and C2 working on a file F1 with 5
+shards on gluster mount and gluster server has 4 bricks
+B1, B2, B3, B4.
+
+Assume that base file/shard is present on B1, 1st, 2nd shards
+on B2, 3rd and 4th shards on B3 and 5th shard falls on B4 C1
+has opened the F1 in append mode and is writing to it. The
+write FOP goes to 5th shard in this case. So the
+inode->fd_count = 1 on B1(base file) and B4 (5th shard).
+
+C2 at the same time issued unlink to F1. On the server, the
+base file has fd_count = 1 (since C1 has opened the file),
+the base file is renamed under .glusterfs/unlink and
+returned to C2. Then unlink will be sent to shards on all
+bricks and shards on B2 and B3 will be deleted which have
+no open reference yet. C1 starts getting errors while
+accessing the remaining shards though it has open references
+for the file.
+
+This is one such undefined behavior. Likewise we will
+encounter many such undefined behaviors as we dont have one
+global lock to access all shards as one. Of Course having such
+global lock will lead to performance hit as it reduces window
+for parallel access of shards.
+
+Solution:
+The above undefined behavior can be addressed by delaying the
+unlink of a file when there are open references on it.
+File unlink happens in 2 steps.
+step 1: client creates marker file under .shard/remove_me and
+sends unlink on base file to the server
+step 2: on return from the server, the associated shards will
+be cleaned up and finally marker file will be removed.
+
+In step 2, the back ground deletion process does nameless
+lookup using marker file name (marker file is named after the
+gfid of the base file) in glusterfs/unlink dir. If the nameless
+look up is successful then that means the gfid still has open
+fds and deletion of shards has to be delayed. If nameless
+lookup fails then that indicates the gfid is unlinked and no
+open fds on that file (the gfid path is unlinked during final
+close on the file). The shards on which deletion is delayed
+are unlinked one the all open fds are closed and this is
+done through a thread which wakes up every 10 mins.
+
+Also removed active_fd_count from inode structure and
+referring fd_count wherever active_fd_count was used.
+
+Backport of:
+> Upstream-patch: https://github.com/gluster/glusterfs/pull/1563
+> Fixes: #1358
+> Change-Id: I8985093386e26215e0b0dce294c534a66f6ca11c
+> Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
+
+BUG: 1782428
+Change-Id: I8985093386e26215e0b0dce294c534a66f6ca11c
+Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244967
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/glusterfs/glusterfs.h         |   1 +
+ tests/bugs/shard/issue-1358.t                  | 100 +++++++++++++
+ tests/bugs/shard/unlinks-and-renames.t         |   5 +
+ xlators/features/shard/src/shard.c             | 199 ++++++++++++++++++++++++-
+ xlators/features/shard/src/shard.h             |  11 ++
+ xlators/storage/posix/src/posix-entry-ops.c    |  36 +++++
+ xlators/storage/posix/src/posix-inode-fd-ops.c |  64 +++++---
+ 7 files changed, 391 insertions(+), 25 deletions(-)
+ create mode 100644 tests/bugs/shard/issue-1358.t
+
+diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h
+index d3400bf..4401cf6 100644
+--- a/libglusterfs/src/glusterfs/glusterfs.h
++++ b/libglusterfs/src/glusterfs/glusterfs.h
+@@ -261,6 +261,7 @@ enum gf_internal_fop_indicator {
+ #define GF_XATTROP_PURGE_INDEX "glusterfs.xattrop-purge-index"
+ 
+ #define GF_GFIDLESS_LOOKUP "gfidless-lookup"
++#define GF_UNLINKED_LOOKUP "unlinked-lookup"
+ /* replace-brick and pump related internal xattrs */
+ #define RB_PUMP_CMD_START "glusterfs.pump.start"
+ #define RB_PUMP_CMD_PAUSE "glusterfs.pump.pause"
+diff --git a/tests/bugs/shard/issue-1358.t b/tests/bugs/shard/issue-1358.t
+new file mode 100644
+index 0000000..1838e06
+--- /dev/null
++++ b/tests/bugs/shard/issue-1358.t
+@@ -0,0 +1,100 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++
++cleanup;
++
++FILE_COUNT_TIME=5
++
++function get_file_count {
++    ls $1* | wc -l
++}
++
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
++TEST $CLI volume set $V0 features.shard on
++TEST $CLI volume set $V0 features.shard-block-size 4MB
++TEST $CLI volume set $V0 performance.quick-read off
++TEST $CLI volume set $V0 performance.io-cache off
++TEST $CLI volume set $V0 performance.read-ahead off
++TEST $CLI volume set $V0 performance.write-behind off
++TEST $CLI volume start $V0
++
++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
++
++TEST mkdir $M0/dir
++TEST dd if=/dev/urandom of=$M0/dir/foo bs=4M count=5
++gfid_new=$(get_gfid_string $M0/dir/foo)
++
++# Ensure its shards dir is created now.
++TEST stat $B0/${V0}0/.shard/$gfid_new.1
++TEST stat $B0/${V0}1/.shard/$gfid_new.1
++TEST stat $B0/${V0}0/.shard/$gfid_new.2
++TEST stat $B0/${V0}1/.shard/$gfid_new.2
++
++# Open a file and store descriptor in fd = 5
++exec 5>$M0/dir/foo
++
++# Write something on the file using the open fd = 5
++echo "issue-1358" >&5
++
++# Write on the descriptor should be succesful
++EXPECT 0 echo $?
++
++# Unlink the same file which is opened in prev step
++TEST unlink $M0/dir/foo
++
++# Check the base file
++TEST ! stat $M0/dir/foo
++TEST ! stat $B0/${V0}0/foo
++TEST ! stat $B0/${V0}1/foo
++
++# Write something on the file using the open fd = 5
++echo "issue-1281" >&5
++
++# Write on the descriptor should be succesful
++EXPECT 0 echo $?
++
++# Check ".shard/.remove_me"
++EXPECT_WITHIN $FILE_COUNT_TIME 1 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_new
++EXPECT_WITHIN $FILE_COUNT_TIME 1 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_new
++
++# Close the fd = 5
++exec 5>&-
++
++###### To see the shards deleted, wait for 10 mins or repeat the same steps i.e open a file #####
++###### write something to it, unlink it and close it. This will wake up the thread that is ######
++###### responsible to delete the shards
++
++TEST touch $M0/dir/new
++exec 6>$M0/dir/new
++echo "issue-1358" >&6
++EXPECT 0 echo $?
++TEST unlink $M0/dir/new
++exec 6>&-
++
++# Now check the ".shard/remove_me" and the gfid will not be there
++EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_new
++EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_new
++
++# check for the absence of shards
++TEST ! stat $B0/${V0}0/.shard/$gfid_new.1
++TEST ! stat $B0/${V0}1/.shard/$gfid_new.1
++TEST ! stat $B0/${V0}0/.shard/$gfid_new.2
++TEST ! stat $B0/${V0}1/.shard/$gfid_new.2
++
++#### Create the file with same name and check creation and deletion works fine ######
++TEST dd if=/dev/urandom of=$M0/dir/foo bs=4M count=5
++gfid_new=$(get_gfid_string $M0/dir/foo)
++
++# Ensure its shards dir is created now.
++TEST stat $B0/${V0}0/.shard/$gfid_new.1
++TEST stat $B0/${V0}1/.shard/$gfid_new.1
++TEST stat $B0/${V0}0/.shard/$gfid_new.2
++TEST stat $B0/${V0}1/.shard/$gfid_new.2
++
++TEST unlink $M0/dir/foo
++cleanup
++
+diff --git a/tests/bugs/shard/unlinks-and-renames.t b/tests/bugs/shard/unlinks-and-renames.t
+index 990ca69..3280fcb 100644
+--- a/tests/bugs/shard/unlinks-and-renames.t
++++ b/tests/bugs/shard/unlinks-and-renames.t
+@@ -24,6 +24,11 @@ TEST pidof glusterd
+ TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+ TEST $CLI volume set $V0 features.shard on
+ TEST $CLI volume set $V0 features.shard-block-size 4MB
++TEST $CLI volume set $V0 performance.quick-read off
++TEST $CLI volume set $V0 performance.io-cache off
++TEST $CLI volume set $V0 performance.read-ahead off
++TEST $CLI volume set $V0 performance.write-behind off
++
+ TEST $CLI volume start $V0
+ TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0
+ 
+diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
+index 8d4a970..b828ff9 100644
+--- a/xlators/features/shard/src/shard.c
++++ b/xlators/features/shard/src/shard.c
+@@ -1242,7 +1242,8 @@ out:
+ 
+ static inode_t *
+ shard_link_internal_dir_inode(shard_local_t *local, inode_t *inode,
+-                              struct iatt *buf, shard_internal_dir_type_t type)
++                              xlator_t *this, struct iatt *buf,
++                              shard_internal_dir_type_t type)
+ {
+     inode_t *linked_inode = NULL;
+     shard_priv_t *priv = NULL;
+@@ -1250,7 +1251,7 @@ shard_link_internal_dir_inode(shard_local_t *local, inode_t *inode,
+     inode_t **priv_inode = NULL;
+     inode_t *parent = NULL;
+ 
+-    priv = THIS->private;
++    priv = this->private;
+ 
+     switch (type) {
+         case SHARD_INTERNAL_DIR_DOT_SHARD:
+@@ -1294,7 +1295,7 @@ shard_refresh_internal_dir_cbk(call_frame_t *frame, void *cookie,
+     /* To-Do: Fix refcount increment per call to
+      * shard_link_internal_dir_inode().
+      */
+-    linked_inode = shard_link_internal_dir_inode(local, inode, buf, type);
++    linked_inode = shard_link_internal_dir_inode(local, inode, this, buf, type);
+     shard_inode_ctx_mark_dir_refreshed(linked_inode, this);
+ out:
+     shard_common_resolve_shards(frame, this, local->post_res_handler);
+@@ -1383,7 +1384,7 @@ shard_lookup_internal_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+         goto unwind;
+     }
+ 
+-    link_inode = shard_link_internal_dir_inode(local, inode, buf, type);
++    link_inode = shard_link_internal_dir_inode(local, inode, this, buf, type);
+     if (link_inode != inode) {
+         shard_refresh_internal_dir(frame, this, type);
+     } else {
+@@ -3586,7 +3587,8 @@ shard_resolve_internal_dir(xlator_t *this, shard_local_t *local,
+                        "Lookup on %s failed, exiting", bname);
+             goto err;
+         } else {
+-            shard_link_internal_dir_inode(local, loc->inode, &stbuf, type);
++            shard_link_internal_dir_inode(local, loc->inode, this, &stbuf,
++                                          type);
+         }
+     }
+     ret = 0;
+@@ -3633,6 +3635,45 @@ err:
+     return ret;
+ }
+ 
++static int
++shard_nameless_lookup_base_file(xlator_t *this, char *gfid)
++{
++    int ret = 0;
++    loc_t loc = {
++        0,
++    };
++    dict_t *xattr_req = dict_new();
++    if (!xattr_req) {
++        ret = -1;
++        goto out;
++    }
++
++    loc.inode = inode_new(this->itable);
++    if (loc.inode == NULL) {
++        ret = -1;
++        goto out;
++    }
++
++    ret = gf_uuid_parse(gfid, loc.gfid);
++    if (ret < 0)
++        goto out;
++
++    ret = dict_set_uint32(xattr_req, GF_UNLINKED_LOOKUP, 1);
++    if (ret < 0)
++        goto out;
++
++    ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, xattr_req, NULL);
++    if (ret < 0)
++        goto out;
++
++out:
++    if (xattr_req)
++        dict_unref(xattr_req);
++    loc_wipe(&loc);
++
++    return ret;
++}
++
+ int
+ shard_delete_shards(void *opaque)
+ {
+@@ -3734,6 +3775,11 @@ shard_delete_shards(void *opaque)
+                     if (ret < 0)
+                         continue;
+                 }
++
++                ret = shard_nameless_lookup_base_file(this, entry->d_name);
++                if (!ret)
++                    continue;
++
+                 link_inode = inode_link(entry->inode, local->fd->inode,
+                                         entry->d_name, &entry->d_stat);
+ 
+@@ -4105,6 +4151,9 @@ err:
+ int
+ shard_unlock_entrylk(call_frame_t *frame, xlator_t *this);
+ 
++static int
++shard_unlink_handler_spawn(xlator_t *this);
++
+ int
+ shard_unlink_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+                            int32_t op_ret, int32_t op_errno,
+@@ -4126,7 +4175,7 @@ shard_unlink_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+         if (xdata)
+             local->xattr_rsp = dict_ref(xdata);
+         if (local->cleanup_required)
+-            shard_start_background_deletion(this);
++            shard_unlink_handler_spawn(this);
+     }
+ 
+     if (local->entrylk_frame) {
+@@ -5785,7 +5834,7 @@ shard_mkdir_internal_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+         }
+     }
+ 
+-    link_inode = shard_link_internal_dir_inode(local, inode, buf, type);
++    link_inode = shard_link_internal_dir_inode(local, inode, this, buf, type);
+     if (link_inode != inode) {
+         shard_refresh_internal_dir(frame, this, type);
+     } else {
+@@ -7098,6 +7147,132 @@ shard_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+     return 0;
+ }
+ 
++static void
++shard_unlink_wait(shard_unlink_thread_t *ti)
++{
++    struct timespec wait_till = {
++        0,
++    };
++
++    pthread_mutex_lock(&ti->mutex);
++    {
++        /* shard_unlink_handler() runs every 10 mins of interval */
++        wait_till.tv_sec = time(NULL) + 600;
++
++        while (!ti->rerun) {
++            if (pthread_cond_timedwait(&ti->cond, &ti->mutex, &wait_till) ==
++                ETIMEDOUT)
++                break;
++        }
++        ti->rerun = _gf_false;
++    }
++    pthread_mutex_unlock(&ti->mutex);
++}
++
++static void *
++shard_unlink_handler(void *data)
++{
++    shard_unlink_thread_t *ti = data;
++    xlator_t *this = ti->this;
++
++    THIS = this;
++
++    while (!ti->stop) {
++        shard_start_background_deletion(this);
++        shard_unlink_wait(ti);
++    }
++    return NULL;
++}
++
++static int
++shard_unlink_handler_spawn(xlator_t *this)
++{
++    int ret = 0;
++    shard_priv_t *priv = this->private;
++    shard_unlink_thread_t *ti = &priv->thread_info;
++
++    ti->this = this;
++
++    pthread_mutex_lock(&ti->mutex);
++    {
++        if (ti->running) {
++            pthread_cond_signal(&ti->cond);
++        } else {
++            ret = gf_thread_create(&ti->thread, NULL, shard_unlink_handler, ti,
++                                   "shard_unlink");
++            if (ret < 0) {
++                gf_log(this->name, GF_LOG_ERROR,
++                       "Failed to create \"shard_unlink\" thread");
++                goto unlock;
++            }
++            ti->running = _gf_true;
++        }
++
++        ti->rerun = _gf_true;
++    }
++unlock:
++    pthread_mutex_unlock(&ti->mutex);
++    return ret;
++}
++
++static int
++shard_unlink_handler_init(shard_unlink_thread_t *ti)
++{
++    int ret = 0;
++    xlator_t *this = THIS;
++
++    ret = pthread_mutex_init(&ti->mutex, NULL);
++    if (ret) {
++        gf_log(this->name, GF_LOG_ERROR,
++               "Failed to init mutex for \"shard_unlink\" thread");
++        goto out;
++    }
++
++    ret = pthread_cond_init(&ti->cond, NULL);
++    if (ret) {
++        gf_log(this->name, GF_LOG_ERROR,
++               "Failed to init cond var for \"shard_unlink\" thread");
++        pthread_mutex_destroy(&ti->mutex);
++        goto out;
++    }
++
++    ti->running = _gf_false;
++    ti->rerun = _gf_false;
++    ti->stop = _gf_false;
++
++out:
++    return -ret;
++}
++
++static void
++shard_unlink_handler_fini(shard_unlink_thread_t *ti)
++{
++    int ret = 0;
++    xlator_t *this = THIS;
++    if (!ti)
++        return;
++
++    pthread_mutex_lock(&ti->mutex);
++    if (ti->running) {
++        ti->rerun = _gf_true;
++        ti->stop = _gf_true;
++        pthread_cond_signal(&ti->cond);
++    }
++    pthread_mutex_unlock(&ti->mutex);
++
++    if (ti->running) {
++        ret = pthread_join(ti->thread, NULL);
++        if (ret)
++            gf_msg(this->name, GF_LOG_WARNING, 0, 0,
++                   "Failed to clean up shard unlink thread.");
++        ti->running = _gf_false;
++    }
++    ti->thread = 0;
++
++    pthread_cond_destroy(&ti->cond);
++    pthread_mutex_destroy(&ti->mutex);
++}
++
+ int32_t
+ mem_acct_init(xlator_t *this)
+ {
+@@ -7164,6 +7339,14 @@ init(xlator_t *this)
+     this->private = priv;
+     LOCK_INIT(&priv->lock);
+     INIT_LIST_HEAD(&priv->ilist_head);
++
++    ret = shard_unlink_handler_init(&priv->thread_info);
++    if (ret) {
++        gf_log(this->name, GF_LOG_ERROR,
++               "Failed to initialize resources for \"shard_unlink\" thread");
++        goto out;
++    }
++
+     ret = 0;
+ out:
+     if (ret) {
+@@ -7188,6 +7371,8 @@ fini(xlator_t *this)
+     if (!priv)
+         goto out;
+ 
++    shard_unlink_handler_fini(&priv->thread_info);
++
+     this->private = NULL;
+     LOCK_DESTROY(&priv->lock);
+     GF_FREE(priv);
+diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h
+index 4fe181b..3dcb112 100644
+--- a/xlators/features/shard/src/shard.h
++++ b/xlators/features/shard/src/shard.h
+@@ -207,6 +207,16 @@ typedef enum {
+ 
+ /* rm = "remove me" */
+ 
++typedef struct shard_unlink_thread {
++    pthread_mutex_t mutex;
++    pthread_cond_t cond;
++    pthread_t thread;
++    gf_boolean_t running;
++    gf_boolean_t rerun;
++    gf_boolean_t stop;
++    xlator_t *this;
++} shard_unlink_thread_t;
++
+ typedef struct shard_priv {
+     uint64_t block_size;
+     uuid_t dot_shard_gfid;
+@@ -220,6 +230,7 @@ typedef struct shard_priv {
+     shard_bg_deletion_state_t bg_del_state;
+     gf_boolean_t first_lookup_done;
+     uint64_t lru_limit;
++    shard_unlink_thread_t thread_info;
+ } shard_priv_t;
+ 
+ typedef struct {
+diff --git a/xlators/storage/posix/src/posix-entry-ops.c b/xlators/storage/posix/src/posix-entry-ops.c
+index b3a5381..1511e68 100644
+--- a/xlators/storage/posix/src/posix-entry-ops.c
++++ b/xlators/storage/posix/src/posix-entry-ops.c
+@@ -183,6 +183,11 @@ posix_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+     struct posix_private *priv = NULL;
+     posix_inode_ctx_t *ctx = NULL;
+     int ret = 0;
++    uint32_t lookup_unlink_dir = 0;
++    char *unlink_path = NULL;
++    struct stat lstatbuf = {
++        0,
++    };
+ 
+     VALIDATE_OR_GOTO(frame, out);
+     VALIDATE_OR_GOTO(this, out);
+@@ -208,7 +213,36 @@ posix_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+     op_ret = -1;
+     if (gf_uuid_is_null(loc->pargfid) || (loc->name == NULL)) {
+         /* nameless lookup */
++        op_ret = op_errno = errno = 0;
+         MAKE_INODE_HANDLE(real_path, this, loc, &buf);
++
++        /* The gfid will be renamed to ".glusterfs/unlink" in case
++         * there are any open fds on the file in posix_unlink path.
++         * So client can request server to do nameless lookup with
++         * xdata = GF_UNLINKED_LOOKUP in ".glusterfs/unlink"
++         * dir if a client wants to know the status of the all open fds
++         * on the unlinked file. If the file still present in the
++         * ".glusterfs/unlink" dir then it indicates there still
++         * open fds present on the file and the file is still under
++         * unlink process */
++        if (op_ret < 0 && errno == ENOENT) {
++            ret = dict_get_uint32(xdata, GF_UNLINKED_LOOKUP,
++                                  &lookup_unlink_dir);
++            if (!ret && lookup_unlink_dir) {
++                op_ret = op_errno = errno = 0;
++                POSIX_GET_FILE_UNLINK_PATH(priv->base_path, loc->gfid,
++                                           unlink_path);
++                ret = sys_lstat(unlink_path, &lstatbuf);
++                if (ret) {
++                    op_ret = -1;
++                    op_errno = errno;
++                } else {
++                    iatt_from_stat(&buf, &lstatbuf);
++                    buf.ia_nlink = 0;
++                }
++                goto nameless_lookup_unlink_dir_out;
++            }
++        }
+     } else {
+         MAKE_ENTRY_HANDLE(real_path, par_path, this, loc, &buf);
+         if (!real_path || !par_path) {
+@@ -328,6 +362,8 @@ out:
+ 
+     if (op_ret == 0)
+         op_errno = 0;
++
++nameless_lookup_unlink_dir_out:
+     STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno,
+                         (loc) ? loc->inode : NULL, &buf, xattr, &postparent);
+ 
+diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c
+index 761e018..4c2983a 100644
+--- a/xlators/storage/posix/src/posix-inode-fd-ops.c
++++ b/xlators/storage/posix/src/posix-inode-fd-ops.c
+@@ -2504,6 +2504,39 @@ out:
+     return 0;
+ }
+ 
++static int
++posix_unlink_renamed_file(xlator_t *this, inode_t *inode)
++{
++    int ret = 0;
++    char *unlink_path = NULL;
++    uint64_t ctx_uint = 0;
++    posix_inode_ctx_t *ctx = NULL;
++    struct posix_private *priv = this->private;
++
++    ret = inode_ctx_get(inode, this, &ctx_uint);
++
++    if (ret < 0)
++        goto out;
++
++    ctx = (posix_inode_ctx_t *)(uintptr_t)ctx_uint;
++
++    if (ctx->unlink_flag == GF_UNLINK_TRUE) {
++        POSIX_GET_FILE_UNLINK_PATH(priv->base_path, inode->gfid, unlink_path);
++        if (!unlink_path) {
++            gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_UNLINK_FAILED,
++                   "Failed to remove gfid :%s", uuid_utoa(inode->gfid));
++            ret = -1;
++        } else {
++            ret = sys_unlink(unlink_path);
++            if (!ret)
++                ctx->unlink_flag = GF_UNLINK_FALSE;
++        }
++    }
++
++out:
++    return ret;
++}
++
+ int32_t
+ posix_release(xlator_t *this, fd_t *fd)
+ {
+@@ -2514,6 +2547,9 @@ posix_release(xlator_t *this, fd_t *fd)
+     VALIDATE_OR_GOTO(this, out);
+     VALIDATE_OR_GOTO(fd, out);
+ 
++    if (fd->inode->active_fd_count == 0)
++        posix_unlink_renamed_file(this, fd->inode);
++
+     ret = fd_ctx_del(fd, this, &tmp_pfd);
+     if (ret < 0) {
+         gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_PFD_NULL,
+@@ -5881,41 +5917,33 @@ posix_forget(xlator_t *this, inode_t *inode)
+     uint64_t ctx_uint1 = 0;
+     uint64_t ctx_uint2 = 0;
+     posix_inode_ctx_t *ctx = NULL;
+-    posix_mdata_t *mdata = NULL;
+-    struct posix_private *priv_posix = NULL;
+-
+-    priv_posix = (struct posix_private *)this->private;
+-    if (!priv_posix)
+-        return 0;
++    struct posix_private *priv = this->private;
+ 
+     ret = inode_ctx_del2(inode, this, &ctx_uint1, &ctx_uint2);
++
++    if (ctx_uint2)
++        GF_FREE((posix_mdata_t *)(uintptr_t)ctx_uint2);
++
+     if (!ctx_uint1)
+-        goto check_ctx2;
++        return 0;
+ 
+     ctx = (posix_inode_ctx_t *)(uintptr_t)ctx_uint1;
+ 
+     if (ctx->unlink_flag == GF_UNLINK_TRUE) {
+-        POSIX_GET_FILE_UNLINK_PATH(priv_posix->base_path, inode->gfid,
+-                                   unlink_path);
++        POSIX_GET_FILE_UNLINK_PATH(priv->base_path, inode->gfid, unlink_path);
+         if (!unlink_path) {
+             gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_UNLINK_FAILED,
+                    "Failed to remove gfid :%s", uuid_utoa(inode->gfid));
+             ret = -1;
+-            goto ctx_free;
++        } else {
++            ret = sys_unlink(unlink_path);
+         }
+-        ret = sys_unlink(unlink_path);
+     }
+-ctx_free:
++
+     pthread_mutex_destroy(&ctx->xattrop_lock);
+     pthread_mutex_destroy(&ctx->write_atomic_lock);
+     pthread_mutex_destroy(&ctx->pgfid_lock);
+     GF_FREE(ctx);
+ 
+-check_ctx2:
+-    if (ctx_uint2) {
+-        mdata = (posix_mdata_t *)(uintptr_t)ctx_uint2;
+-    }
+-
+-    GF_FREE(mdata);
+     return ret;
+ }
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0575-libglusterfs-add-functions-to-calculate-time-differe.patch b/SOURCES/0575-libglusterfs-add-functions-to-calculate-time-differe.patch
new file mode 100644
index 0000000..98ffc3c
--- /dev/null
+++ b/SOURCES/0575-libglusterfs-add-functions-to-calculate-time-differe.patch
@@ -0,0 +1,160 @@
+From 59e69ae1c7ccda74a8cbf8c9b2ae37bc74cbf612 Mon Sep 17 00:00:00 2001
+From: Ravishankar N <ravishankar@redhat.com>
+Date: Fri, 4 Jun 2021 10:55:37 +0530
+Subject: [PATCH 575/584] libglusterfs: add functions to calculate time
+ difference
+
+Add gf_tvdiff() and gf_tsdiff() to calculate the difference
+between 'struct timeval' and 'struct timespec' values, use
+them where appropriate.
+
+Upstream patch details:
+> https://github.com/gluster/glusterfs/commit/ba7f24b1cedf2549394c21b3f0df1661227cefae
+> Change-Id: I172be06ee84e99a1da76847c15e5ea3fbc059338
+> Signed-off-by: Dmitry Antipov <dmantipov@yandex.ru>
+> Updates: #1002
+
+BUG: 1928676
+Change-Id: I723ab9555b0f8caef108742acc2cb63d6a32eb96
+Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/245294
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ glusterfsd/src/glusterfsd-mgmt.c          |  4 ++--
+ libglusterfs/src/glusterfs/common-utils.h | 32 +++++++++++++++++++++++++++++++
+ libglusterfs/src/latency.c                |  3 +--
+ xlators/cluster/dht/src/dht-rebalance.c   |  6 ++----
+ xlators/debug/io-stats/src/io-stats.c     |  8 ++------
+ 5 files changed, 39 insertions(+), 14 deletions(-)
+
+diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c
+index 61d1b21..a51dd9e 100644
+--- a/glusterfsd/src/glusterfsd-mgmt.c
++++ b/glusterfsd/src/glusterfsd-mgmt.c
+@@ -534,7 +534,7 @@ glusterfs_volume_top_write_perf(uint32_t blk_size, uint32_t blk_count,
+     }
+ 
+     gettimeofday(&end, NULL);
+-    *time = (end.tv_sec - begin.tv_sec) * 1e6 + (end.tv_usec - begin.tv_usec);
++    *time = gf_tvdiff(&begin, &end);
+     *throughput = total_blks / *time;
+     gf_log("glusterd", GF_LOG_INFO,
+            "Throughput %.2f Mbps time %.2f secs "
+@@ -653,7 +653,7 @@ glusterfs_volume_top_read_perf(uint32_t blk_size, uint32_t blk_count,
+     }
+ 
+     gettimeofday(&end, NULL);
+-    *time = (end.tv_sec - begin.tv_sec) * 1e6 + (end.tv_usec - begin.tv_usec);
++    *time = gf_tvdiff(&begin, &end);
+     *throughput = total_blks / *time;
+     gf_log("glusterd", GF_LOG_INFO,
+            "Throughput %.2f Mbps time %.2f secs "
+diff --git a/libglusterfs/src/glusterfs/common-utils.h b/libglusterfs/src/glusterfs/common-utils.h
+index 604afd0..bd48b6f 100644
+--- a/libglusterfs/src/glusterfs/common-utils.h
++++ b/libglusterfs/src/glusterfs/common-utils.h
+@@ -1090,4 +1090,36 @@ find_xlator_option_in_cmd_args_t(const char *option_name, cmd_args_t *args);
+ int
+ gf_d_type_from_ia_type(ia_type_t type);
+ 
++/* Return delta value in microseconds. */
++
++static inline double
++gf_tvdiff(struct timeval *start, struct timeval *end)
++{
++    struct timeval t;
++
++    if (start->tv_usec > end->tv_usec)
++        t.tv_sec = end->tv_sec - 1, t.tv_usec = end->tv_usec + 1000000;
++    else
++        t.tv_sec = end->tv_sec, t.tv_usec = end->tv_usec;
++
++    return (double)(t.tv_sec - start->tv_sec) * 1e6 +
++           (double)(t.tv_usec - start->tv_usec);
++}
++
++/* Return delta value in nanoseconds. */
++
++static inline double
++gf_tsdiff(struct timespec *start, struct timespec *end)
++{
++    struct timespec t;
++
++    if (start->tv_nsec > end->tv_nsec)
++        t.tv_sec = end->tv_sec - 1, t.tv_nsec = end->tv_nsec + 1000000000;
++    else
++        t.tv_sec = end->tv_sec, t.tv_nsec = end->tv_nsec;
++
++    return (double)(t.tv_sec - start->tv_sec) * 1e9 +
++           (double)(t.tv_nsec - start->tv_nsec);
++}
++
+ #endif /* _COMMON_UTILS_H */
+diff --git a/libglusterfs/src/latency.c b/libglusterfs/src/latency.c
+index e1e6de7..ce61399 100644
+--- a/libglusterfs/src/latency.c
++++ b/libglusterfs/src/latency.c
+@@ -33,8 +33,7 @@ gf_update_latency(call_frame_t *frame)
+     if (!(begin->tv_sec && end->tv_sec))
+         goto out;
+ 
+-    elapsed = (end->tv_sec - begin->tv_sec) * 1e9 +
+-              (end->tv_nsec - begin->tv_nsec);
++    elapsed = gf_tsdiff(begin, end);
+ 
+     if (frame->op < 0 || frame->op >= GF_FOP_MAXVALUE) {
+         gf_log("[core]", GF_LOG_WARNING, "Invalid frame op value: %d",
+diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
+index eab7558..e07dec0 100644
+--- a/xlators/cluster/dht/src/dht-rebalance.c
++++ b/xlators/cluster/dht/src/dht-rebalance.c
+@@ -2927,8 +2927,7 @@ gf_defrag_migrate_single_file(void *opaque)
+ 
+     if (defrag->stats == _gf_true) {
+         gettimeofday(&end, NULL);
+-        elapsed = (end.tv_sec - start.tv_sec) * 1e6 +
+-                  (end.tv_usec - start.tv_usec);
++        elapsed = gf_tvdiff(&start, &end);
+         gf_log(this->name, GF_LOG_INFO,
+                "Migration of "
+                "file:%s size:%" PRIu64
+@@ -3529,8 +3528,7 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
+     }
+ 
+     gettimeofday(&end, NULL);
+-    elapsed = (end.tv_sec - dir_start.tv_sec) * 1e6 +
+-              (end.tv_usec - dir_start.tv_usec);
++    elapsed = gf_tvdiff(&dir_start, &end);
+     gf_log(this->name, GF_LOG_INFO,
+            "Migration operation on dir %s took "
+            "%.2f secs",
+diff --git a/xlators/debug/io-stats/src/io-stats.c b/xlators/debug/io-stats/src/io-stats.c
+index 9b34895..8ad96fb 100644
+--- a/xlators/debug/io-stats/src/io-stats.c
++++ b/xlators/debug/io-stats/src/io-stats.c
+@@ -281,9 +281,7 @@ is_fop_latency_started(call_frame_t *frame)
+         begin = &frame->begin;                                                 \
+         end = &frame->end;                                                     \
+                                                                                \
+-        elapsed = ((end->tv_sec - begin->tv_sec) * 1e9 +                       \
+-                   (end->tv_nsec - begin->tv_nsec)) /                          \
+-                  1000;                                                        \
++        elapsed = gf_tsdiff(begin, end) / 1000.0;                              \
+         throughput = op_ret / elapsed;                                         \
+                                                                                \
+         conf = this->private;                                                  \
+@@ -1774,9 +1772,7 @@ update_ios_latency(struct ios_conf *conf, call_frame_t *frame,
+     begin = &frame->begin;
+     end = &frame->end;
+ 
+-    elapsed = ((end->tv_sec - begin->tv_sec) * 1e9 +
+-               (end->tv_nsec - begin->tv_nsec)) /
+-              1000;
++    elapsed = gf_tsdiff(begin, end) / 1000.0;
+ 
+     update_ios_latency_stats(&conf->cumulative, elapsed, op);
+     update_ios_latency_stats(&conf->incremental, elapsed, op);
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0576-rpcsvc-Add-latency-tracking-for-rpc-programs.patch b/SOURCES/0576-rpcsvc-Add-latency-tracking-for-rpc-programs.patch
new file mode 100644
index 0000000..6883559
--- /dev/null
+++ b/SOURCES/0576-rpcsvc-Add-latency-tracking-for-rpc-programs.patch
@@ -0,0 +1,573 @@
+From f2b9d3a089cc9ff9910da0075defe306851aca5c Mon Sep 17 00:00:00 2001
+From: Ravishankar N <ravishankar@redhat.com>
+Date: Fri, 4 Jun 2021 12:27:57 +0530
+Subject: [PATCH 576/584] rpcsvc: Add latency tracking for rpc programs
+
+Added latency tracking of rpc-handling code. With this change we
+should be able to monitor the amount of time rpc-handling code is
+consuming for each of the rpc call.
+
+Upstream patch details:
+> https://review.gluster.org/#/c/glusterfs/+/24955/
+> fixes: #1466
+> Change-Id: I04fc7f3b12bfa5053c0fc36885f271cb78f581cd
+> Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
+
+BUG: 1928676
+Change-Id: Ibcedddb5db3ff4906607050cf9f7ea3ebb266cc5
+Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/245295
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Xavi Hernandez Juan <xhernandez@redhat.com>
+Reviewed-by: Ashish Pandey <aspandey@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/glusterfs/latency.h   | 22 +++++---
+ libglusterfs/src/glusterfs/mem-types.h |  1 +
+ libglusterfs/src/glusterfs/stack.h     |  7 +--
+ libglusterfs/src/glusterfs/statedump.h |  2 +
+ libglusterfs/src/glusterfs/xlator.h    |  2 +-
+ libglusterfs/src/latency.c             | 93 +++++++++++++++-------------------
+ libglusterfs/src/libglusterfs.sym      |  5 ++
+ libglusterfs/src/monitoring.c          |  8 +--
+ libglusterfs/src/statedump.c           | 38 +++++++++++++-
+ libglusterfs/src/xlator.c              |  5 ++
+ rpc/rpc-lib/src/libgfrpc.sym           |  1 +
+ rpc/rpc-lib/src/rpcsvc.c               | 72 +++++++++++++++++++++++++-
+ rpc/rpc-lib/src/rpcsvc.h               |  5 ++
+ xlators/protocol/server/src/server.c   |  2 +
+ 14 files changed, 193 insertions(+), 70 deletions(-)
+
+diff --git a/libglusterfs/src/glusterfs/latency.h b/libglusterfs/src/glusterfs/latency.h
+index ed47b1f..4d601bb 100644
+--- a/libglusterfs/src/glusterfs/latency.h
++++ b/libglusterfs/src/glusterfs/latency.h
+@@ -11,13 +11,23 @@
+ #ifndef __LATENCY_H__
+ #define __LATENCY_H__
+ 
+-#include "glusterfs/glusterfs.h"
++#include <inttypes.h>
++#include <time.h>
+ 
+-typedef struct fop_latency {
+-    double min;   /* min time for the call (microseconds) */
+-    double max;   /* max time for the call (microseconds) */
+-    double total; /* total time (microseconds) */
++typedef struct _gf_latency {
++    uint64_t min;   /* min time for the call (nanoseconds) */
++    uint64_t max;   /* max time for the call (nanoseconds) */
++    uint64_t total; /* total time (nanoseconds) */
+     uint64_t count;
+-} fop_latency_t;
++} gf_latency_t;
+ 
++gf_latency_t *
++gf_latency_new(size_t n);
++
++void
++gf_latency_reset(gf_latency_t *lat);
++
++void
++gf_latency_update(gf_latency_t *lat, struct timespec *begin,
++                  struct timespec *end);
+ #endif /* __LATENCY_H__ */
+diff --git a/libglusterfs/src/glusterfs/mem-types.h b/libglusterfs/src/glusterfs/mem-types.h
+index 92730a9..970b9ff 100644
+--- a/libglusterfs/src/glusterfs/mem-types.h
++++ b/libglusterfs/src/glusterfs/mem-types.h
+@@ -139,6 +139,7 @@ enum gf_common_mem_types_ {
+     gf_common_mt_mgmt_v3_lock_timer_t, /* used only in one location */
+     gf_common_mt_server_cmdline_t,     /* used only in one location */
+     gf_mt_gfdb_query_record_t,
++    gf_common_mt_latency_t,
+     gf_common_mt_end
+ };
+ #endif
+diff --git a/libglusterfs/src/glusterfs/stack.h b/libglusterfs/src/glusterfs/stack.h
+index bd466d8..536a330 100644
+--- a/libglusterfs/src/glusterfs/stack.h
++++ b/libglusterfs/src/glusterfs/stack.h
+@@ -45,6 +45,9 @@ typedef int32_t (*ret_fn_t)(call_frame_t *frame, call_frame_t *prev_frame,
+                             xlator_t *this, int32_t op_ret, int32_t op_errno,
+                             ...);
+ 
++void
++gf_frame_latency_update(call_frame_t *frame);
++
+ struct call_pool {
+     union {
+         struct list_head all_frames;
+@@ -149,8 +152,6 @@ struct _call_stack {
+     } while (0);
+ 
+ struct xlator_fops;
+-void
+-gf_update_latency(call_frame_t *frame);
+ 
+ static inline void
+ FRAME_DESTROY(call_frame_t *frame)
+@@ -158,7 +159,7 @@ FRAME_DESTROY(call_frame_t *frame)
+     void *local = NULL;
+ 
+     if (frame->root->ctx->measure_latency)
+-        gf_update_latency(frame);
++        gf_frame_latency_update(frame);
+ 
+     list_del_init(&frame->frames);
+     if (frame->local) {
+diff --git a/libglusterfs/src/glusterfs/statedump.h b/libglusterfs/src/glusterfs/statedump.h
+index 89d04f9..ce08270 100644
+--- a/libglusterfs/src/glusterfs/statedump.h
++++ b/libglusterfs/src/glusterfs/statedump.h
+@@ -127,4 +127,6 @@ gf_proc_dump_xlator_meminfo(xlator_t *this, strfd_t *strfd);
+ void
+ gf_proc_dump_xlator_profile(xlator_t *this, strfd_t *strfd);
+ 
++void
++gf_latency_statedump_and_reset(char *key, gf_latency_t *lat);
+ #endif /* STATEDUMP_H */
+diff --git a/libglusterfs/src/glusterfs/xlator.h b/libglusterfs/src/glusterfs/xlator.h
+index 273039a..ecb9fa4 100644
+--- a/libglusterfs/src/glusterfs/xlator.h
++++ b/libglusterfs/src/glusterfs/xlator.h
+@@ -808,7 +808,7 @@ struct _xlator {
+ 
+         struct {
+             /* for latency measurement */
+-            fop_latency_t latencies[GF_FOP_MAXVALUE];
++            gf_latency_t latencies[GF_FOP_MAXVALUE];
+             /* for latency measurement */
+             fop_metrics_t metrics[GF_FOP_MAXVALUE];
+ 
+diff --git a/libglusterfs/src/latency.c b/libglusterfs/src/latency.c
+index ce61399..ce4b0e8 100644
+--- a/libglusterfs/src/latency.c
++++ b/libglusterfs/src/latency.c
+@@ -14,39 +14,34 @@
+  */
+ 
+ #include "glusterfs/glusterfs.h"
+-#include "glusterfs/xlator.h"
+-#include "glusterfs/common-utils.h"
+ #include "glusterfs/statedump.h"
+-#include "glusterfs/libglusterfs-messages.h"
+ 
+-void
+-gf_update_latency(call_frame_t *frame)
++gf_latency_t *
++gf_latency_new(size_t n)
+ {
+-    double elapsed;
+-    struct timespec *begin, *end;
+-
+-    fop_latency_t *lat;
+-
+-    begin = &frame->begin;
+-    end = &frame->end;
++    int i = 0;
++    gf_latency_t *lat = NULL;
+ 
+-    if (!(begin->tv_sec && end->tv_sec))
+-        goto out;
++    lat = GF_MALLOC(n * sizeof(*lat), gf_common_mt_latency_t);
++    if (!lat)
++        return NULL;
+ 
+-    elapsed = gf_tsdiff(begin, end);
++    for (i = 0; i < n; i++) {
++        gf_latency_reset(lat + i);
++    }
++    return lat;
++}
+ 
+-    if (frame->op < 0 || frame->op >= GF_FOP_MAXVALUE) {
+-        gf_log("[core]", GF_LOG_WARNING, "Invalid frame op value: %d",
+-               frame->op);
++void
++gf_latency_update(gf_latency_t *lat, struct timespec *begin,
++                  struct timespec *end)
++{
++    if (!(begin->tv_sec && end->tv_sec)) {
++        /*Measure latency might have been enabled/disabled during the op*/
+         return;
+     }
+ 
+-    /* Can happen mostly at initiator xlator, as STACK_WIND/UNWIND macros
+-       set it right anyways for those frames */
+-    if (!frame->op)
+-        frame->op = frame->root->op;
+-
+-    lat = &frame->this->stats.interval.latencies[frame->op];
++    double elapsed = gf_tsdiff(begin, end);
+ 
+     if (lat->max < elapsed)
+         lat->max = elapsed;
+@@ -56,40 +51,34 @@ gf_update_latency(call_frame_t *frame)
+ 
+     lat->total += elapsed;
+     lat->count++;
+-out:
+-    return;
+ }
+ 
+ void
+-gf_proc_dump_latency_info(xlator_t *xl)
++gf_latency_reset(gf_latency_t *lat)
+ {
+-    char key_prefix[GF_DUMP_MAX_BUF_LEN];
+-    char key[GF_DUMP_MAX_BUF_LEN];
+-    int i;
+-
+-    snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.latency", xl->name);
+-    gf_proc_dump_add_section("%s", key_prefix);
+-
+-    for (i = 0; i < GF_FOP_MAXVALUE; i++) {
+-        gf_proc_dump_build_key(key, key_prefix, "%s", (char *)gf_fop_list[i]);
+-
+-        fop_latency_t *lat = &xl->stats.interval.latencies[i];
++    if (!lat)
++        return;
++    memset(lat, 0, sizeof(*lat));
++    lat->min = ULLONG_MAX;
++    /* make sure 'min' is set to high value, so it would be
++       properly set later */
++}
+ 
+-        /* Doesn't make sense to continue if there are no fops
+-           came in the given interval */
+-        if (!lat->count)
+-            continue;
++void
++gf_frame_latency_update(call_frame_t *frame)
++{
++    gf_latency_t *lat;
++    /* Can happen mostly at initiator xlator, as STACK_WIND/UNWIND macros
++       set it right anyways for those frames */
++    if (!frame->op)
++        frame->op = frame->root->op;
+ 
+-        gf_proc_dump_write(key, "%.03f,%" PRId64 ",%.03f",
+-                           (lat->total / lat->count), lat->count, lat->total);
++    if (frame->op < 0 || frame->op >= GF_FOP_MAXVALUE) {
++        gf_log("[core]", GF_LOG_WARNING, "Invalid frame op value: %d",
++               frame->op);
++        return;
+     }
+ 
+-    memset(xl->stats.interval.latencies, 0,
+-           sizeof(xl->stats.interval.latencies));
+-
+-    /* make sure 'min' is set to high value, so it would be
+-       properly set later */
+-    for (i = 0; i < GF_FOP_MAXVALUE; i++) {
+-        xl->stats.interval.latencies[i].min = 0xffffffff;
+-    }
++    lat = &frame->this->stats.interval.latencies[frame->op];
++    gf_latency_update(lat, &frame->begin, &frame->end);
+ }
+diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym
+index 9072afa..4f968e1 100644
+--- a/libglusterfs/src/libglusterfs.sym
++++ b/libglusterfs/src/libglusterfs.sym
+@@ -1183,3 +1183,8 @@ gf_latency_reset
+ gf_latency_update
+ gf_frame_latency_update
+ gf_assert
++gf_latency_statedump_and_reset
++gf_latency_new
++gf_latency_reset
++gf_latency_update
++gf_frame_latency_update
+diff --git a/libglusterfs/src/monitoring.c b/libglusterfs/src/monitoring.c
+index 6d9bfb1..20b7f52 100644
+--- a/libglusterfs/src/monitoring.c
++++ b/libglusterfs/src/monitoring.c
+@@ -113,15 +113,15 @@ dump_latency_and_count(xlator_t *xl, int fd)
+             dprintf(fd, "%s.interval.%s.fail_count %" PRIu64 "\n", xl->name,
+                     gf_fop_list[index], cbk);
+         }
+-        if (xl->stats.interval.latencies[index].count != 0.0) {
++        if (xl->stats.interval.latencies[index].count != 0) {
+             dprintf(fd, "%s.interval.%s.latency %lf\n", xl->name,
+                     gf_fop_list[index],
+-                    (xl->stats.interval.latencies[index].total /
++                    (((double)xl->stats.interval.latencies[index].total) /
+                      xl->stats.interval.latencies[index].count));
+-            dprintf(fd, "%s.interval.%s.max %lf\n", xl->name,
++            dprintf(fd, "%s.interval.%s.max %" PRIu64 "\n", xl->name,
+                     gf_fop_list[index],
+                     xl->stats.interval.latencies[index].max);
+-            dprintf(fd, "%s.interval.%s.min %lf\n", xl->name,
++            dprintf(fd, "%s.interval.%s.min %" PRIu64 "\n", xl->name,
+                     gf_fop_list[index],
+                     xl->stats.interval.latencies[index].min);
+         }
+diff --git a/libglusterfs/src/statedump.c b/libglusterfs/src/statedump.c
+index d18b50f..4bf4cc2 100644
+--- a/libglusterfs/src/statedump.c
++++ b/libglusterfs/src/statedump.c
+@@ -201,6 +201,40 @@ gf_proc_dump_write(char *key, char *value, ...)
+     return ret;
+ }
+ 
++void
++gf_latency_statedump_and_reset(char *key, gf_latency_t *lat)
++{
++    /* Doesn't make sense to continue if there are no fops
++       came in the given interval */
++    if (!lat || !lat->count)
++        return;
++    gf_proc_dump_write(key,
++                       "AVG:%lf CNT:%" PRIu64 " TOTAL:%" PRIu64 " MIN:%" PRIu64
++                       " MAX:%" PRIu64,
++                       (((double)lat->total) / lat->count), lat->count,
++                       lat->total, lat->min, lat->max);
++    gf_latency_reset(lat);
++}
++
++void
++gf_proc_dump_xl_latency_info(xlator_t *xl)
++{
++    char key_prefix[GF_DUMP_MAX_BUF_LEN];
++    char key[GF_DUMP_MAX_BUF_LEN];
++    int i;
++
++    snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.latency", xl->name);
++    gf_proc_dump_add_section("%s", key_prefix);
++
++    for (i = 0; i < GF_FOP_MAXVALUE; i++) {
++        gf_proc_dump_build_key(key, key_prefix, "%s", (char *)gf_fop_list[i]);
++
++        gf_latency_t *lat = &xl->stats.interval.latencies[i];
++
++        gf_latency_statedump_and_reset(key, lat);
++    }
++}
++
+ static void
+ gf_proc_dump_xlator_mem_info(xlator_t *xl)
+ {
+@@ -487,7 +521,7 @@ gf_proc_dump_single_xlator_info(xlator_t *trav)
+         return;
+ 
+     if (ctx->measure_latency)
+-        gf_proc_dump_latency_info(trav);
++        gf_proc_dump_xl_latency_info(trav);
+ 
+     gf_proc_dump_xlator_mem_info(trav);
+ 
+@@ -1024,7 +1058,7 @@ gf_proc_dump_xlator_profile(xlator_t *this, strfd_t *strfd)
+     {
+         gf_dump_strfd = strfd;
+ 
+-        gf_proc_dump_latency_info(this);
++        gf_proc_dump_xl_latency_info(this);
+ 
+         gf_dump_strfd = NULL;
+     }
+diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c
+index 36cc32c..b9ad411 100644
+--- a/libglusterfs/src/xlator.c
++++ b/libglusterfs/src/xlator.c
+@@ -246,6 +246,7 @@ xlator_dynload_apis(xlator_t *xl)
+     void *handle = NULL;
+     volume_opt_list_t *vol_opt = NULL;
+     xlator_api_t *xlapi = NULL;
++    int i = 0;
+ 
+     handle = xl->dlhandle;
+ 
+@@ -343,6 +344,10 @@ xlator_dynload_apis(xlator_t *xl)
+     memcpy(xl->op_version, xlapi->op_version,
+            sizeof(uint32_t) * GF_MAX_RELEASES);
+ 
++    for (i = 0; i < GF_FOP_MAXVALUE; i++) {
++        gf_latency_reset(&xl->stats.interval.latencies[i]);
++    }
++
+     ret = 0;
+ out:
+     return ret;
+diff --git a/rpc/rpc-lib/src/libgfrpc.sym b/rpc/rpc-lib/src/libgfrpc.sym
+index f3544e3..a1757cc 100644
+--- a/rpc/rpc-lib/src/libgfrpc.sym
++++ b/rpc/rpc-lib/src/libgfrpc.sym
+@@ -66,3 +66,4 @@ rpc_transport_unix_options_build
+ rpc_transport_unref
+ rpc_clnt_mgmt_pmap_signout
+ rpcsvc_autoscale_threads
++rpcsvc_statedump
+diff --git a/rpc/rpc-lib/src/rpcsvc.c b/rpc/rpc-lib/src/rpcsvc.c
+index b031d93..855b512 100644
+--- a/rpc/rpc-lib/src/rpcsvc.c
++++ b/rpc/rpc-lib/src/rpcsvc.c
+@@ -25,6 +25,7 @@
+ #include <glusterfs/syncop.h>
+ #include "rpc-drc.h"
+ #include "protocol-common.h"
++#include <glusterfs/statedump.h>
+ 
+ #include <errno.h>
+ #include <pthread.h>
+@@ -377,6 +378,10 @@ rpcsvc_program_actor(rpcsvc_request_t *req)
+         goto err;
+     }
+ 
++    if (svc->xl->ctx->measure_latency) {
++        timespec_now(&req->begin);
++    }
++
+     req->ownthread = program->ownthread;
+     req->synctask = program->synctask;
+ 
+@@ -1526,10 +1531,18 @@ rpcsvc_submit_generic(rpcsvc_request_t *req, struct iovec *proghdr,
+     size_t hdrlen = 0;
+     char new_iobref = 0;
+     rpcsvc_drc_globals_t *drc = NULL;
++    gf_latency_t *lat = NULL;
+ 
+     if ((!req) || (!req->trans))
+         return -1;
+ 
++    if (req->prog && req->begin.tv_sec) {
++        if ((req->procnum >= 0) && (req->procnum < req->prog->numactors)) {
++            timespec_now(&req->end);
++            lat = &req->prog->latencies[req->procnum];
++            gf_latency_update(lat, &req->begin, &req->end);
++        }
++    }
+     trans = req->trans;
+ 
+     for (i = 0; i < hdrcount; i++) {
+@@ -1860,6 +1873,15 @@ rpcsvc_submit_message(rpcsvc_request_t *req, struct iovec *proghdr,
+                                  iobref);
+ }
+ 
++void
++rpcsvc_program_destroy(rpcsvc_program_t *program)
++{
++    if (program) {
++        GF_FREE(program->latencies);
++        GF_FREE(program);
++    }
++}
++
+ int
+ rpcsvc_program_unregister(rpcsvc_t *svc, rpcsvc_program_t *program)
+ {
+@@ -1917,8 +1939,7 @@ rpcsvc_program_unregister(rpcsvc_t *svc, rpcsvc_program_t *program)
+ 
+     ret = 0;
+ out:
+-    if (prog)
+-        GF_FREE(prog);
++    rpcsvc_program_destroy(prog);
+ 
+     if (ret == -1) {
+         if (program) {
+@@ -2303,6 +2324,11 @@ rpcsvc_program_register(rpcsvc_t *svc, rpcsvc_program_t *program,
+     }
+ 
+     memcpy(newprog, program, sizeof(*program));
++    newprog->latencies = gf_latency_new(program->numactors);
++    if (!newprog->latencies) {
++        rpcsvc_program_destroy(newprog);
++        goto out;
++    }
+ 
+     INIT_LIST_HEAD(&newprog->program);
+     pthread_mutexattr_init(&thr_attr);
+@@ -3240,6 +3266,48 @@ out:
+     return ret;
+ }
+ 
++void
++rpcsvc_program_dump(rpcsvc_program_t *prog)
++{
++    char key_prefix[GF_DUMP_MAX_BUF_LEN];
++    char key[GF_DUMP_MAX_BUF_LEN];
++    int i;
++
++    snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s", prog->progname);
++    gf_proc_dump_add_section("%s", key_prefix);
++
++    gf_proc_dump_build_key(key, key_prefix, "program-number");
++    gf_proc_dump_write(key, "%d", prog->prognum);
++
++    gf_proc_dump_build_key(key, key_prefix, "program-version");
++    gf_proc_dump_write(key, "%d", prog->progver);
++
++    strncat(key_prefix, ".latency",
++            sizeof(key_prefix) - strlen(key_prefix) - 1);
++
++    for (i = 0; i < prog->numactors; i++) {
++        gf_proc_dump_build_key(key, key_prefix, "%s", prog->actors[i].procname);
++        gf_latency_statedump_and_reset(key, &prog->latencies[i]);
++    }
++}
++
++void
++rpcsvc_statedump(rpcsvc_t *svc)
++{
++    rpcsvc_program_t *prog = NULL;
++    int ret = 0;
++    ret = pthread_rwlock_tryrdlock(&svc->rpclock);
++    if (ret)
++        return;
++    {
++        list_for_each_entry(prog, &svc->programs, program)
++        {
++            rpcsvc_program_dump(prog);
++        }
++    }
++    pthread_rwlock_unlock(&svc->rpclock);
++}
++
+ rpcsvc_actor_t gluster_dump_actors[GF_DUMP_MAXVALUE] = {
+     [GF_DUMP_NULL] = {"NULL", GF_DUMP_NULL, NULL, NULL, 0, DRC_NA},
+     [GF_DUMP_DUMP] = {"DUMP", GF_DUMP_DUMP, rpcsvc_dump, NULL, 0, DRC_NA},
+diff --git a/rpc/rpc-lib/src/rpcsvc.h b/rpc/rpc-lib/src/rpcsvc.h
+index a51edc7..e336d00 100644
+--- a/rpc/rpc-lib/src/rpcsvc.h
++++ b/rpc/rpc-lib/src/rpcsvc.h
+@@ -275,6 +275,8 @@ struct rpcsvc_request {
+     gf_boolean_t ownthread;
+ 
+     gf_boolean_t synctask;
++    struct timespec begin; /*req handling start time*/
++    struct timespec end;   /*req handling end time*/
+ };
+ 
+ #define rpcsvc_request_program(req) ((rpcsvc_program_t *)((req)->prog))
+@@ -431,6 +433,7 @@ struct rpcsvc_program {
+ 
+     /* Program specific state handed to actors */
+     void *private;
++    gf_latency_t *latencies; /*Tracks latency statistics for the rpc call*/
+ 
+     /* This upcall is provided by the program during registration.
+      * It is used to notify the program about events like connection being
+@@ -696,4 +699,6 @@ rpcsvc_autoscale_threads(glusterfs_ctx_t *ctx, rpcsvc_t *rpc, int incr);
+ 
+ extern int
+ rpcsvc_destroy(rpcsvc_t *svc);
++void
++rpcsvc_statedump(rpcsvc_t *svc);
+ #endif
+diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c
+index 54d9c0f..90eb3ff 100644
+--- a/xlators/protocol/server/src/server.c
++++ b/xlators/protocol/server/src/server.c
+@@ -267,6 +267,8 @@ server_priv(xlator_t *this)
+     gf_proc_dump_build_key(key, "server", "total-bytes-write");
+     gf_proc_dump_write(key, "%" PRIu64, total_write);
+ 
++    rpcsvc_statedump(conf->rpc);
++
+     ret = 0;
+ out:
+     if (ret)
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0577-protocol-client-don-t-reopen-fds-on-which-POSIX-lock.patch b/SOURCES/0577-protocol-client-don-t-reopen-fds-on-which-POSIX-lock.patch
new file mode 100644
index 0000000..1a5d0ea
--- /dev/null
+++ b/SOURCES/0577-protocol-client-don-t-reopen-fds-on-which-POSIX-lock.patch
@@ -0,0 +1,472 @@
+From d7665cf3249310c5faf87368f395b4e25cb86b48 Mon Sep 17 00:00:00 2001
+From: karthik-us <ksubrahm@redhat.com>
+Date: Thu, 15 Apr 2021 10:29:06 +0530
+Subject: [PATCH 577/584] protocol/client: don't reopen fds on which POSIX
+ locks are held after a reconnect
+
+XXXXXXXXXXXXXXXXXXX
+    IMPORTANT:
+XXXXXXXXXXXXXXXXXXX
+As a best pratice, with this patch we are bumping up the op-version
+from GD_OP_VERSION_7_1 to GD_OP_VERSION_7_2 since it introduces a
+new volume option. Enabling the new option will have effect only
+after all the servers and clients are upgraded to this version.
+----------------------------------------------------------------------
+
+Bricks cleanup any granted locks after a client disconnects and
+currently these locks are not healed after a reconnect. This means
+post reconnect a competing process could be granted a lock even though
+the first process which was granted locks has not unlocked. By not
+re-opening fds, subsequent operations on such fds will fail forcing
+the application to close the current fd and reopen a new one. This way
+we prevent any silent corruption.
+
+A new option "client.strict-locks" is introduced to control this
+behaviour. This option is set to "off" by default.
+
+> Upstream patch: https://review.gluster.org/#/c/glusterfs/+/22712/
+> Change-Id: Ieed545efea466cb5e8f5a36199aa26380c301b9e
+> Signed-off-by: Raghavendra G <rgowdapp@redhat.com>
+> updates: bz#1694920
+
+BUG: 1689375
+Change-Id: Ieed545efea466cb5e8f5a36199aa26380c301b9e
+Signed-off-by: karthik-us <ksubrahm@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244909
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
+---
+ libglusterfs/src/glusterfs/globals.h             |  4 +-
+ tests/bugs/bug-1694920.t                         | 63 ++++++++++++++++++++++++
+ xlators/mgmt/glusterd/src/glusterd-volume-set.c  | 14 ++++++
+ xlators/protocol/client/src/client-handshake.c   |  3 +-
+ xlators/protocol/client/src/client-helpers.c     |  5 +-
+ xlators/protocol/client/src/client-lk.c          |  2 +-
+ xlators/protocol/client/src/client-rpc-fops.c    | 45 ++++++++++++++++-
+ xlators/protocol/client/src/client-rpc-fops_v2.c | 32 +++++++++++-
+ xlators/protocol/client/src/client.c             | 13 +++++
+ xlators/protocol/client/src/client.h             | 16 ++++++
+ 10 files changed, 190 insertions(+), 7 deletions(-)
+ create mode 100644 tests/bugs/bug-1694920.t
+
+diff --git a/libglusterfs/src/glusterfs/globals.h b/libglusterfs/src/glusterfs/globals.h
+index 33fb023..ce2d110 100644
+--- a/libglusterfs/src/glusterfs/globals.h
++++ b/libglusterfs/src/glusterfs/globals.h
+@@ -50,7 +50,7 @@
+     1 /* MIN is the fresh start op-version, mostly                             \
+          should not change */
+ #define GD_OP_VERSION_MAX                                                      \
+-    GD_OP_VERSION_7_1 /* MAX VERSION is the maximum                            \
++    GD_OP_VERSION_7_2 /* MAX VERSION is the maximum                            \
+                          count in VME table, should                            \
+                          keep changing with                                    \
+                          introduction of newer                                 \
+@@ -140,6 +140,8 @@
+ 
+ #define GD_OP_VERSION_7_1 70100 /* Op-version for GlusterFS 7.1 */
+ 
++#define GD_OP_VERSION_7_2 70200 /* Op-version for GlusterFS 7.2 */
++
+ #include "glusterfs/xlator.h"
+ #include "glusterfs/options.h"
+ 
+diff --git a/tests/bugs/bug-1694920.t b/tests/bugs/bug-1694920.t
+new file mode 100644
+index 0000000..5bf93c9
+--- /dev/null
++++ b/tests/bugs/bug-1694920.t
+@@ -0,0 +1,63 @@
++#!/bin/bash
++
++SCRIPT_TIMEOUT=300
++
++. $(dirname $0)/../include.rc
++. $(dirname $0)/../volume.rc
++. $(dirname $0)/../fileio.rc
++cleanup;
++
++TEST glusterd;
++TEST pidof glusterd
++
++TEST $CLI volume create $V0 $H0:$B0/${V0};
++TEST $CLI volume set $V0 performance.quick-read off
++TEST $CLI volume set $V0 performance.io-cache off
++TEST $CLI volume set $V0 performance.write-behind off
++TEST $CLI volume set $V0 performance.open-behind off
++TEST $CLI volume set $V0 performance.stat-prefetch off
++TEST $CLI volume set $V0 performance.read-ahead off
++TEST $CLI volume start $V0
++TEST $GFS -s $H0 --volfile-id=$V0  $M0;
++
++TEST touch $M0/a
++
++#When all bricks are up, lock and unlock should succeed
++TEST fd1=`fd_available`
++TEST fd_open $fd1 'w' $M0/a
++TEST flock -x $fd1
++TEST fd_close $fd1
++
++#When all bricks are down, lock/unlock should fail
++TEST fd1=`fd_available`
++TEST fd_open $fd1 'w' $M0/a
++TEST $CLI volume stop $V0
++TEST ! flock -x $fd1
++TEST $CLI volume start $V0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" client_connected_status_meta $M0 $V0-client-0
++TEST fd_close $fd1
++
++#When a brick goes down and comes back up operations on fd which had locks on it should succeed by default
++TEST fd1=`fd_available`
++TEST fd_open $fd1 'w' $M0/a
++TEST flock -x $fd1
++TEST $CLI volume stop $V0
++sleep 2
++TEST $CLI volume start $V0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" client_connected_status_meta $M0 $V0-client-0
++TEST fd_write $fd1 "data"
++TEST fd_close $fd1
++
++#When a brick goes down and comes back up operations on fd which had locks on it should fail when client.strict-locks is on
++TEST $CLI volume set $V0 client.strict-locks on
++TEST fd1=`fd_available`
++TEST fd_open $fd1 'w' $M0/a
++TEST flock -x $fd1
++TEST $CLI volume stop $V0
++sleep 2
++TEST $CLI volume start $V0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" client_connected_status_meta $M0 $V0-client-0
++TEST ! fd_write $fd1 "data"
++TEST fd_close $fd1
++
++cleanup
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+index c1ca190..01f3912 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+@@ -2022,6 +2022,20 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+      .value = "9",
+      .flags = VOLOPT_FLAG_CLIENT_OPT},
+ 
++    {.key = "client.strict-locks",
++     .voltype = "protocol/client",
++     .option = "strict-locks",
++     .value = "off",
++     .op_version = GD_OP_VERSION_7_2,
++     .validate_fn = validate_boolean,
++     .type = GLOBAL_DOC,
++     .description = "When set, doesn't reopen saved fds after reconnect "
++                    "if POSIX locks are held on them. Hence subsequent "
++                    "operations on these fds will fail. This is "
++                    "necessary for stricter lock complaince as bricks "
++                    "cleanup any granted locks when a client "
++                    "disconnects."},
++
+     /* Server xlator options */
+     {.key = "network.tcp-window-size",
+      .voltype = "protocol/server",
+diff --git a/xlators/protocol/client/src/client-handshake.c b/xlators/protocol/client/src/client-handshake.c
+index 6b20d92..a12472b 100644
+--- a/xlators/protocol/client/src/client-handshake.c
++++ b/xlators/protocol/client/src/client-handshake.c
+@@ -910,7 +910,8 @@ client_post_handshake(call_frame_t *frame, xlator_t *this)
+     {
+         list_for_each_entry_safe(fdctx, tmp, &conf->saved_fds, sfd_pos)
+         {
+-            if (fdctx->remote_fd != -1)
++            if (fdctx->remote_fd != -1 ||
++                (!list_empty(&fdctx->lock_list) && conf->strict_locks))
+                 continue;
+ 
+             fdctx->reopen_done = client_child_up_reopen_done;
+diff --git a/xlators/protocol/client/src/client-helpers.c b/xlators/protocol/client/src/client-helpers.c
+index 53b4484..6543100 100644
+--- a/xlators/protocol/client/src/client-helpers.c
++++ b/xlators/protocol/client/src/client-helpers.c
+@@ -410,6 +410,7 @@ client_get_remote_fd(xlator_t *this, fd_t *fd, int flags, int64_t *remote_fd)
+ {
+     clnt_fd_ctx_t *fdctx = NULL;
+     clnt_conf_t *conf = NULL;
++    gf_boolean_t locks_held = _gf_false;
+ 
+     GF_VALIDATE_OR_GOTO(this->name, fd, out);
+     GF_VALIDATE_OR_GOTO(this->name, remote_fd, out);
+@@ -431,11 +432,13 @@ client_get_remote_fd(xlator_t *this, fd_t *fd, int flags, int64_t *remote_fd)
+                 *remote_fd = -1;
+             else
+                 *remote_fd = fdctx->remote_fd;
++
++            locks_held = !list_empty(&fdctx->lock_list);
+         }
+     }
+     pthread_spin_unlock(&conf->fd_lock);
+ 
+-    if ((flags & FALLBACK_TO_ANON_FD) && (*remote_fd == -1))
++    if ((flags & FALLBACK_TO_ANON_FD) && (*remote_fd == -1) && (!locks_held))
+         *remote_fd = GF_ANON_FD_NO;
+ 
+     return 0;
+diff --git a/xlators/protocol/client/src/client-lk.c b/xlators/protocol/client/src/client-lk.c
+index 679e198..c1fb055 100644
+--- a/xlators/protocol/client/src/client-lk.c
++++ b/xlators/protocol/client/src/client-lk.c
+@@ -351,7 +351,7 @@ delete_granted_locks_owner(fd_t *fd, gf_lkowner_t *owner)
+ 
+     list_for_each_entry_safe(lock, tmp, &fdctx->lock_list, list)
+     {
+-        if (!is_same_lkowner(&lock->owner, owner)) {
++        if (is_same_lkowner(&lock->owner, owner)) {
+             list_del_init(&lock->list);
+             list_add_tail(&lock->list, &delete_list);
+             count++;
+diff --git a/xlators/protocol/client/src/client-rpc-fops.c b/xlators/protocol/client/src/client-rpc-fops.c
+index 1c8b31b..3110c78 100644
+--- a/xlators/protocol/client/src/client-rpc-fops.c
++++ b/xlators/protocol/client/src/client-rpc-fops.c
+@@ -22,8 +22,18 @@ int32_t
+ client3_getspec(call_frame_t *frame, xlator_t *this, void *data);
+ rpc_clnt_prog_t clnt3_3_fop_prog;
+ 
+-/* CBK */
++int
++client_is_setlk(int32_t cmd)
++{
++    if ((cmd == F_SETLK) || (cmd == F_SETLK64) || (cmd == F_SETLKW) ||
++        (cmd == F_SETLKW64)) {
++        return 1;
++    }
+ 
++    return 0;
++}
++
++/* CBK */
+ int
+ client3_3_symlink_cbk(struct rpc_req *req, struct iovec *iov, int count,
+                       void *myframe)
+@@ -816,7 +826,8 @@ client3_3_flush_cbk(struct rpc_req *req, struct iovec *iov, int count,
+         goto out;
+     }
+ 
+-    if (rsp.op_ret >= 0 && !fd_is_anonymous(local->fd)) {
++    if ((rsp.op_ret >= 0 || (rsp.op_errno == ENOTCONN)) &&
++        !fd_is_anonymous(local->fd)) {
+         /* Delete all saved locks of the owner issuing flush */
+         ret = delete_granted_locks_owner(local->fd, &local->owner);
+         gf_msg_trace(this->name, 0, "deleting locks of owner (%s) returned %d",
+@@ -2388,10 +2399,12 @@ client3_3_lk_cbk(struct rpc_req *req, struct iovec *iov, int count,
+     int ret = 0;
+     xlator_t *this = NULL;
+     dict_t *xdata = NULL;
++    clnt_local_t *local = NULL;
+ 
+     this = THIS;
+ 
+     frame = myframe;
++    local = frame->local;
+ 
+     if (-1 == req->rpc_status) {
+         rsp.op_ret = -1;
+@@ -2412,6 +2425,18 @@ client3_3_lk_cbk(struct rpc_req *req, struct iovec *iov, int count,
+         ret = client_post_lk(this, &rsp, &lock, &xdata);
+         if (ret < 0)
+             goto out;
++
++        /* Save the lock to the client lock cache to be able
++           to recover in the case of server reboot.*/
++
++        if (client_is_setlk(local->cmd)) {
++            ret = client_add_lock_for_recovery(local->fd, &lock, &local->owner,
++                                               local->cmd);
++            if (ret < 0) {
++                rsp.op_ret = -1;
++                rsp.op_errno = -ret;
++            }
++        }
+     }
+ 
+ out:
+@@ -4263,8 +4288,16 @@ client3_3_flush(call_frame_t *frame, xlator_t *this, void *data)
+     ret = client_pre_flush(this, &req, args->fd, args->xdata);
+     if (ret) {
+         op_errno = -ret;
++        if (op_errno == EBADF) {
++            ret = delete_granted_locks_owner(local->fd, &local->owner);
++            gf_msg_trace(this->name, 0,
++                         "deleting locks of owner (%s) returned %d",
++                         lkowner_utoa(&local->owner), ret);
++        }
++
+         goto unwind;
+     }
++
+     ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FLUSH,
+                                 client3_3_flush_cbk, NULL,
+                                 (xdrproc_t)xdr_gfs3_flush_req);
+@@ -5199,8 +5232,16 @@ client3_3_lk(call_frame_t *frame, xlator_t *this, void *data)
+                         args->xdata);
+     if (ret) {
+         op_errno = -ret;
++
++        if ((op_errno == EBADF) && (args->flock->l_type == F_UNLCK) &&
++            client_is_setlk(local->cmd)) {
++            client_add_lock_for_recovery(local->fd, args->flock, &local->owner,
++                                         local->cmd);
++        }
++
+         goto unwind;
+     }
++
+     ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_LK,
+                                 client3_3_lk_cbk, NULL,
+                                 (xdrproc_t)xdr_gfs3_lk_req);
+diff --git a/xlators/protocol/client/src/client-rpc-fops_v2.c b/xlators/protocol/client/src/client-rpc-fops_v2.c
+index 613dda8..954fc58 100644
+--- a/xlators/protocol/client/src/client-rpc-fops_v2.c
++++ b/xlators/protocol/client/src/client-rpc-fops_v2.c
+@@ -723,7 +723,8 @@ client4_0_flush_cbk(struct rpc_req *req, struct iovec *iov, int count,
+         goto out;
+     }
+ 
+-    if (rsp.op_ret >= 0 && !fd_is_anonymous(local->fd)) {
++    if ((rsp.op_ret >= 0 || (rsp.op_errno == ENOTCONN)) &&
++        !fd_is_anonymous(local->fd)) {
+         /* Delete all saved locks of the owner issuing flush */
+         ret = delete_granted_locks_owner(local->fd, &local->owner);
+         gf_msg_trace(this->name, 0, "deleting locks of owner (%s) returned %d",
+@@ -2193,10 +2194,12 @@ client4_0_lk_cbk(struct rpc_req *req, struct iovec *iov, int count,
+     int ret = 0;
+     xlator_t *this = NULL;
+     dict_t *xdata = NULL;
++    clnt_local_t *local = NULL;
+ 
+     this = THIS;
+ 
+     frame = myframe;
++    local = frame->local;
+ 
+     if (-1 == req->rpc_status) {
+         rsp.op_ret = -1;
+@@ -2217,6 +2220,18 @@ client4_0_lk_cbk(struct rpc_req *req, struct iovec *iov, int count,
+         ret = client_post_lk_v2(this, &rsp, &lock, &xdata);
+         if (ret < 0)
+             goto out;
++
++        /* Save the lock to the client lock cache to be able
++           to recover in the case of server reboot.*/
++
++        if (client_is_setlk(local->cmd)) {
++            ret = client_add_lock_for_recovery(local->fd, &lock, &local->owner,
++                                               local->cmd);
++            if (ret < 0) {
++                rsp.op_ret = -1;
++                rsp.op_errno = -ret;
++            }
++        }
+     }
+ 
+ out:
+@@ -3998,6 +4013,13 @@ client4_0_flush(call_frame_t *frame, xlator_t *this, void *data)
+     ret = client_pre_flush_v2(this, &req, args->fd, args->xdata);
+     if (ret) {
+         op_errno = -ret;
++        if (op_errno == EBADF) {
++            ret = delete_granted_locks_owner(local->fd, &local->owner);
++            gf_msg_trace(this->name, 0,
++                         "deleting locks of owner (%s) returned %d",
++                         lkowner_utoa(&local->owner), ret);
++        }
++
+         goto unwind;
+     }
+     ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FLUSH,
+@@ -4771,8 +4793,16 @@ client4_0_lk(call_frame_t *frame, xlator_t *this, void *data)
+                            args->xdata);
+     if (ret) {
+         op_errno = -ret;
++
++        if ((op_errno == EBADF) && (args->flock->l_type == F_UNLCK) &&
++            client_is_setlk(local->cmd)) {
++            client_add_lock_for_recovery(local->fd, args->flock, &local->owner,
++                                         local->cmd);
++        }
++
+         goto unwind;
+     }
++
+     ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_LK,
+                                 client4_0_lk_cbk, NULL,
+                                 (xdrproc_t)xdr_gfx_lk_req);
+diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c
+index ed855ca..63c90ea 100644
+--- a/xlators/protocol/client/src/client.c
++++ b/xlators/protocol/client/src/client.c
+@@ -2491,6 +2491,7 @@ build_client_config(xlator_t *this, clnt_conf_t *conf)
+     GF_OPTION_INIT("filter-O_DIRECT", conf->filter_o_direct, bool, out);
+ 
+     GF_OPTION_INIT("send-gids", conf->send_gids, bool, out);
++    GF_OPTION_INIT("strict-locks", conf->strict_locks, bool, out);
+ 
+     conf->client_id = glusterfs_leaf_position(this);
+ 
+@@ -2676,6 +2677,7 @@ reconfigure(xlator_t *this, dict_t *options)
+                      out);
+ 
+     GF_OPTION_RECONF("send-gids", conf->send_gids, options, bool, out);
++    GF_OPTION_RECONF("strict-locks", conf->strict_locks, options, bool, out);
+ 
+     ret = 0;
+ out:
+@@ -3032,6 +3034,17 @@ struct volume_options options[] = {
+                     " power. Range 1-32 threads.",
+      .op_version = {GD_OP_VERSION_RHS_3_0},
+      .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
++    {.key = {"strict-locks"},
++     .type = GF_OPTION_TYPE_BOOL,
++     .default_value = "off",
++     .op_version = {GD_OP_VERSION_7_2},
++     .flags = OPT_FLAG_SETTABLE,
++     .description = "When set, doesn't reopen saved fds after reconnect "
++                    "if POSIX locks are held on them. Hence subsequent "
++                    "operations on these fds will fail. This is "
++                    "necessary for stricter lock complaince as bricks "
++                    "cleanup any granted locks when a client "
++                    "disconnects."},
+     {.key = {NULL}},
+ };
+ 
+diff --git a/xlators/protocol/client/src/client.h b/xlators/protocol/client/src/client.h
+index f12fa61..bde3d1a 100644
+--- a/xlators/protocol/client/src/client.h
++++ b/xlators/protocol/client/src/client.h
+@@ -235,6 +235,15 @@ typedef struct clnt_conf {
+                                       * up, disconnects can be
+                                       * logged
+                                       */
++
++    gf_boolean_t strict_locks; /* When set, doesn't reopen saved fds after
++                                  reconnect if POSIX locks are held on them.
++                                  Hence subsequent operations on these fds will
++                                  fail. This is necessary for stricter lock
++                                  complaince as bricks cleanup any granted
++                                  locks when a client disconnects.
++                               */
++
+ } clnt_conf_t;
+ 
+ typedef struct _client_fd_ctx {
+@@ -513,4 +522,11 @@ compound_request_cleanup_v2(gfx_compound_req *req);
+ void
+ client_compound_rsp_cleanup_v2(gfx_compound_rsp *rsp, int len);
+ 
++int
++client_add_lock_for_recovery(fd_t *fd, struct gf_flock *flock,
++                             gf_lkowner_t *owner, int32_t cmd);
++
++int
++client_is_setlk(int32_t cmd);
++
+ #endif /* !_CLIENT_H */
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0578-protocol-client-fallback-to-anonymous-fd-for-fsync.patch b/SOURCES/0578-protocol-client-fallback-to-anonymous-fd-for-fsync.patch
new file mode 100644
index 0000000..d5df9e2
--- /dev/null
+++ b/SOURCES/0578-protocol-client-fallback-to-anonymous-fd-for-fsync.patch
@@ -0,0 +1,46 @@
+From ffb4085b3e04878e85bf505a541203aa2ee71e9c Mon Sep 17 00:00:00 2001
+From: l17zhou <cynthia.zhou@nokia-sbell.com>
+Date: Fri, 6 Mar 2020 03:54:02 +0200
+Subject: [PATCH 578/584] protocol/client: fallback to anonymous fd for fsync
+
+> Upstream patch: https://review.gluster.org/#/c/glusterfs/+/24203/
+> Change-Id: I32f801206ce7fbd05aa693f44c2f140304f2e275
+> Fixes: bz#1810842
+
+BUG: 1689375
+Change-Id: I32f801206ce7fbd05aa693f44c2f140304f2e275
+Signed-off-by: karthik-us <ksubrahm@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/245538
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/protocol/client/src/client-common.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/xlators/protocol/client/src/client-common.c b/xlators/protocol/client/src/client-common.c
+index 64db98d..1417a60 100644
+--- a/xlators/protocol/client/src/client-common.c
++++ b/xlators/protocol/client/src/client-common.c
+@@ -449,7 +449,8 @@ client_pre_fsync(xlator_t *this, gfs3_fsync_req *req, fd_t *fd, int32_t flags,
+     int64_t remote_fd = -1;
+     int op_errno = 0;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, FALLBACK_TO_ANON_FD, remote_fd, op_errno,
++                         out);
+ 
+     req->fd = remote_fd;
+     req->data = flags;
+@@ -2641,7 +2642,8 @@ client_pre_fsync_v2(xlator_t *this, gfx_fsync_req *req, fd_t *fd, int32_t flags,
+     int64_t remote_fd = -1;
+     int op_errno = 0;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, FALLBACK_TO_ANON_FD, remote_fd, op_errno,
++                         out);
+ 
+     req->fd = remote_fd;
+     req->data = flags;
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0579-cli-changing-rebal-task-ID-to-None-in-case-status-is.patch b/SOURCES/0579-cli-changing-rebal-task-ID-to-None-in-case-status-is.patch
new file mode 100644
index 0000000..d568966
--- /dev/null
+++ b/SOURCES/0579-cli-changing-rebal-task-ID-to-None-in-case-status-is.patch
@@ -0,0 +1,168 @@
+From 96c4c3c47c914aced8864e7d178a4d57f7fced05 Mon Sep 17 00:00:00 2001
+From: Tamar Shacked <tshacked@redhat.com>
+Date: Sun, 6 Jun 2021 14:26:18 +0300
+Subject: [PATCH 579/584] cli: changing rebal task ID to "None" in case status
+ is being reset
+
+Rebalance status is being reset during replace/reset-brick operations.
+This cause 'volume status' to shows rebalance as "not started".
+
+Fix:
+change rebalance-status to "reset due to (replace|reset)-brick"
+
+Backport of:
+> Upstream-patch-link: https://github.com/gluster/glusterfs/pull/1869
+> Change-Id: Ia73a8bea3dcd8e51acf4faa6434c3cb0d09856d0
+> Signed-off-by: Tamar Shacked <tshacked@redhat.com>
+> Fixes: #1717
+
+BUG: 1889966
+
+Signed-off-by: Tamar Shacked <tshacked@redhat.com>
+Change-Id: Ia73a8bea3dcd8e51acf4faa6434c3cb0d09856d0
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/245402
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ cli/src/cli-rpc-ops.c                              | 15 ++++++-
+ rpc/xdr/src/cli1-xdr.x                             |  2 +
+ tests/bugs/glusterd/reset-rebalance-state.t        | 46 ++++++++++++++++++++++
+ xlators/mgmt/glusterd/src/glusterd-replace-brick.c |  4 +-
+ xlators/mgmt/glusterd/src/glusterd-reset-brick.c   |  3 +-
+ 5 files changed, 65 insertions(+), 5 deletions(-)
+ create mode 100644 tests/bugs/glusterd/reset-rebalance-state.t
+
+diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c
+index 51b5447..4167c68 100644
+--- a/cli/src/cli-rpc-ops.c
++++ b/cli/src/cli-rpc-ops.c
+@@ -72,6 +72,8 @@ char *cli_vol_task_status_str[] = {"not started",
+                                    "fix-layout stopped",
+                                    "fix-layout completed",
+                                    "fix-layout failed",
++                                   "reset due to replace-brick",
++                                   "reset due to reset-brick",
+                                    "unknown"};
+ 
+ int32_t
+@@ -8357,12 +8359,21 @@ cli_print_volume_status_tasks(dict_t *dict)
+         ret = dict_get_str(dict, key, &task_id_str);
+         if (ret)
+             return;
+-        cli_out("%-20s : %-20s", "ID", task_id_str);
+ 
+         snprintf(key, sizeof(key), "task%d.status", i);
+         ret = dict_get_int32(dict, key, &status);
+-        if (ret)
++        if (ret) {
++            cli_out("%-20s : %-20s", "ID", task_id_str);
+             return;
++        }
++
++        if (!strcmp(op, "Rebalance") &&
++            (status == GF_DEFRAG_STATUS_RESET_DUE_REPLACE_BRC ||
++             status == GF_DEFRAG_STATUS_RESET_DUE_RESET_BRC)) {
++            task_id_str = "None";
++        }
++
++        cli_out("%-20s : %-20s", "ID", task_id_str);
+ 
+         snprintf(task, sizeof(task), "task%d", i);
+ 
+diff --git a/rpc/xdr/src/cli1-xdr.x b/rpc/xdr/src/cli1-xdr.x
+index 777cb00..17d96f1 100644
+--- a/rpc/xdr/src/cli1-xdr.x
++++ b/rpc/xdr/src/cli1-xdr.x
+@@ -45,6 +45,8 @@
+         GF_DEFRAG_STATUS_LAYOUT_FIX_STOPPED,
+         GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE,
+         GF_DEFRAG_STATUS_LAYOUT_FIX_FAILED,
++        GF_DEFRAG_STATUS_RESET_DUE_REPLACE_BRC,
++        GF_DEFRAG_STATUS_RESET_DUE_RESET_BRC,
+         GF_DEFRAG_STATUS_MAX
+ };
+ 
+diff --git a/tests/bugs/glusterd/reset-rebalance-state.t b/tests/bugs/glusterd/reset-rebalance-state.t
+new file mode 100644
+index 0000000..829d2b1
+--- /dev/null
++++ b/tests/bugs/glusterd/reset-rebalance-state.t
+@@ -0,0 +1,46 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../cluster.rc
++. $(dirname $0)/../../volume.rc
++
++
++get_rebalance_status() {
++    $CLI volume status $V0 | egrep ^"Status   " | awk '{print $3}'
++}
++
++run_rebal_check_status() {
++    TEST $CLI volume rebalance $V0 start
++    EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $V0
++    REBAL_STATE=$(get_rebalance_status)
++    TEST [ $REBAL_STATE == "completed" ]
++}
++
++replace_brick_check_status() {
++    TEST $CLI volume replace-brick $V0 $H0:$B0/${V0}1 $H0:$B0/${V0}1_replace commit force
++    REBAL_STATE=$(get_rebalance_status)
++    TEST [ $REBAL_STATE == "reset" ]
++}
++
++reset_brick_check_status() {
++    TEST $CLI volume reset-brick $V0 $H0:$B0/${V0}2 start
++    TEST $CLI volume reset-brick $V0 $H0:$B0/${V0}2 $H0:$B0/${V0}2 commit force
++    REBAL_STATE=$(get_rebalance_status)
++    TEST [ $REBAL_STATE == "reset" ]
++}
++
++cleanup;
++
++TEST glusterd;
++TEST pidof glusterd;
++
++TEST $CLI volume info;
++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1..6} force;
++TEST $CLI volume start $V0;
++
++run_rebal_check_status;
++replace_brick_check_status;
++reset_brick_check_status;
++
++cleanup;
++
+diff --git a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c
+index 0615081..80b80e4 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c
++++ b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c
+@@ -548,8 +548,8 @@ glusterd_op_replace_brick(dict_t *dict, dict_t *rsp_dict)
+         (void)glusterd_svcs_manager(volinfo);
+         goto out;
+     }
+-
+-    volinfo->rebal.defrag_status = 0;
++    if (volinfo->rebal.defrag_status != GF_DEFRAG_STATUS_NOT_STARTED)
++        volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_RESET_DUE_REPLACE_BRC;
+ 
+     ret = glusterd_svcs_manager(volinfo);
+     if (ret) {
+diff --git a/xlators/mgmt/glusterd/src/glusterd-reset-brick.c b/xlators/mgmt/glusterd/src/glusterd-reset-brick.c
+index cf04ce8..19d7549 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-reset-brick.c
++++ b/xlators/mgmt/glusterd/src/glusterd-reset-brick.c
+@@ -342,7 +342,8 @@ glusterd_op_reset_brick(dict_t *dict, dict_t *rsp_dict)
+             goto out;
+         }
+ 
+-        volinfo->rebal.defrag_status = 0;
++        if (volinfo->rebal.defrag_status != GF_DEFRAG_STATUS_NOT_STARTED)
++            volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_RESET_DUE_RESET_BRC;
+ 
+         ret = glusterd_svcs_manager(volinfo);
+         if (ret) {
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0580-cluster-dht-suppress-file-migration-error-for-node-n.patch b/SOURCES/0580-cluster-dht-suppress-file-migration-error-for-node-n.patch
new file mode 100644
index 0000000..06befeb
--- /dev/null
+++ b/SOURCES/0580-cluster-dht-suppress-file-migration-error-for-node-n.patch
@@ -0,0 +1,138 @@
+From a5da8bb830e86b6dd77a06cd59d220052e80b21c Mon Sep 17 00:00:00 2001
+From: Tamar Shacked <tshacked@redhat.com>
+Date: Sun, 6 Jun 2021 11:57:06 +0300
+Subject: [PATCH 580/584] cluster/dht: suppress file migration error for node
+ not supposed to migrate file
+
+A rebalance process does a lookup for every file in the dir it is processing
+before checking if it supposed to migrate the file.
+In this issue there are two rebalance processses running on a replica subvol:
+R1 is migrating the FILE.
+R2 is not supposed to migrate the FILE, but it does a lookup and
+   finds a stale linkfile which is mostly due to a stale layout.
+   Then, it tries to unlink the stale linkfile and gets EBUSY
+   as the linkfile fd is open due R1 migration.
+   As a result a misleading error msg about FILE migration failure
+   due EBUSY is logged in R2 logfile.
+
+Fix:
+suppress the error in case it occured in a node that
+is not supposed to migrate the file.
+
+Backport of:
+> Upstream-patch-link: https://review.gluster.org/#/c/glusterfs/+/24712/
+> fixes: #1371
+> Change-Id: I37832b404e2b0cc40ac5caf45f14c32c891e71f3
+> Signed-off-by: Tamar Shacked <tshacked@redhat.com>
+
+BUG: 1815462
+Signed-off-by: Tamar Shacked <tshacked@redhat.com>
+Change-Id: I915ee8e7470d85a849b198bfa7d58d368a246aae
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/245401
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/dht/src/dht-rebalance.c | 38 ++++++++++++++++++++++-----------
+ 1 file changed, 25 insertions(+), 13 deletions(-)
+
+diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
+index e07dec0..cc0f2c9 100644
+--- a/xlators/cluster/dht/src/dht-rebalance.c
++++ b/xlators/cluster/dht/src/dht-rebalance.c
+@@ -2604,10 +2604,10 @@ out:
+  * all hardlinks.
+  */
+ 
+-int
++gf_boolean_t
+ gf_defrag_should_i_migrate(xlator_t *this, int local_subvol_index, uuid_t gfid)
+ {
+-    int ret = 0;
++    gf_boolean_t ret = _gf_false;
+     int i = local_subvol_index;
+     char *str = NULL;
+     uint32_t hashval = 0;
+@@ -2629,12 +2629,11 @@ gf_defrag_should_i_migrate(xlator_t *this, int local_subvol_index, uuid_t gfid)
+     }
+ 
+     str = uuid_utoa_r(gfid, buf);
+-    ret = dht_hash_compute(this, 0, str, &hashval);
+-    if (ret == 0) {
++    if (dht_hash_compute(this, 0, str, &hashval) == 0) {
+         index = (hashval % entry->count);
+         if (entry->elements[index].info == REBAL_NODEUUID_MINE) {
+             /* Index matches this node's nodeuuid.*/
+-            ret = 1;
++            ret = _gf_true;
+             goto out;
+         }
+ 
+@@ -2647,12 +2646,12 @@ gf_defrag_should_i_migrate(xlator_t *this, int local_subvol_index, uuid_t gfid)
+                 /* None of the bricks in the subvol are up.
+                  * CHILD_DOWN will kill the process soon */
+ 
+-                return 0;
++                return _gf_false;
+             }
+ 
+             if (entry->elements[index].info == REBAL_NODEUUID_MINE) {
+                 /* Index matches this node's nodeuuid.*/
+-                ret = 1;
++                ret = _gf_true;
+                 goto out;
+             }
+         }
+@@ -2701,6 +2700,7 @@ gf_defrag_migrate_single_file(void *opaque)
+     struct iatt *iatt_ptr = NULL;
+     gf_boolean_t update_skippedcount = _gf_true;
+     int i = 0;
++    gf_boolean_t should_i_migrate = 0;
+ 
+     rebal_entry = (struct dht_container *)opaque;
+     if (!rebal_entry) {
+@@ -2754,11 +2754,29 @@ gf_defrag_migrate_single_file(void *opaque)
+         goto out;
+     }
+ 
++    should_i_migrate = gf_defrag_should_i_migrate(
++        this, rebal_entry->local_subvol_index, entry->d_stat.ia_gfid);
++
+     gf_uuid_copy(entry_loc.gfid, entry->d_stat.ia_gfid);
+ 
+     gf_uuid_copy(entry_loc.pargfid, loc->gfid);
+ 
+     ret = syncop_lookup(this, &entry_loc, &iatt, NULL, NULL, NULL);
++
++    if (!should_i_migrate) {
++        /* this node isn't supposed to migrate the file. suppressing any
++         * potential error from lookup as this file is under migration by
++         * another node */
++        if (ret) {
++            gf_msg_debug(this->name, -ret,
++                         "Ignoring lookup failure: node isn't migrating %s",
++                         entry_loc.path);
++            ret = 0;
++        }
++        gf_msg_debug(this->name, 0, "Don't migrate %s ", entry_loc.path);
++        goto out;
++    }
++
+     if (ret) {
+         gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+                "Migrate file failed: %s lookup failed", entry_loc.path);
+@@ -2779,12 +2797,6 @@ gf_defrag_migrate_single_file(void *opaque)
+         goto out;
+     }
+ 
+-    if (!gf_defrag_should_i_migrate(this, rebal_entry->local_subvol_index,
+-                                    entry->d_stat.ia_gfid)) {
+-        gf_msg_debug(this->name, 0, "Don't migrate %s ", entry_loc.path);
+-        goto out;
+-    }
+-
+     iatt_ptr = &iatt;
+ 
+     hashed_subvol = dht_subvol_get_hashed(this, &entry_loc);
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0581-afr-don-t-reopen-fds-on-which-POSIX-locks-are-held.patch b/SOURCES/0581-afr-don-t-reopen-fds-on-which-POSIX-locks-are-held.patch
new file mode 100644
index 0000000..1267608
--- /dev/null
+++ b/SOURCES/0581-afr-don-t-reopen-fds-on-which-POSIX-locks-are-held.patch
@@ -0,0 +1,1431 @@
+From 57c794e31c0333f508ada740227c9afa1889f8ae Mon Sep 17 00:00:00 2001
+From: karthik-us <ksubrahm@redhat.com>
+Date: Thu, 15 Apr 2021 11:27:57 +0530
+Subject: [PATCH 581/584] afr: don't reopen fds on which POSIX locks are held
+
+When client.strict-locks is enabled on a volume and there are POSIX
+locks held on the files, after disconnect and reconnection of the
+clients do not re-open such fds which might lead to multiple clients
+acquiring the locks and cause data corruption.
+
+> Upstream patch: https://github.com/gluster/glusterfs/pull/1980/commits/56bde56c2741c5eac59937a6cf951a14f2878460
+> Change-Id: I8777ffbc2cc8d15ab57b58b72b56eb67521787c5
+> Fixes: #1977
+> Signed-off-by: karthik-us <ksubrahm@redhat.com>
+
+BUG: 1689375
+Change-Id: I8777ffbc2cc8d15ab57b58b72b56eb67521787c5
+Signed-off-by: karthik-us <ksubrahm@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/245414
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
+---
+ rpc/rpc-lib/src/protocol-common.h                |   6 +
+ tests/bugs/replicate/do-not-reopen-fd.t          | 206 +++++++++++++++++
+ xlators/cluster/afr/src/afr-common.c             |  15 +-
+ xlators/cluster/afr/src/afr-open.c               | 280 +++++++++++++++++++----
+ xlators/cluster/afr/src/afr.h                    |   3 +
+ xlators/protocol/client/src/client-common.c      | 148 ++++++++----
+ xlators/protocol/client/src/client-common.h      |   4 +
+ xlators/protocol/client/src/client-helpers.c     |  22 +-
+ xlators/protocol/client/src/client-rpc-fops.c    |  23 +-
+ xlators/protocol/client/src/client-rpc-fops_v2.c |  25 +-
+ xlators/protocol/client/src/client.c             |  21 +-
+ xlators/protocol/client/src/client.h             |   8 +-
+ 12 files changed, 654 insertions(+), 107 deletions(-)
+ create mode 100644 tests/bugs/replicate/do-not-reopen-fd.t
+
+diff --git a/rpc/rpc-lib/src/protocol-common.h b/rpc/rpc-lib/src/protocol-common.h
+index 779878f..f56aaaa 100644
+--- a/rpc/rpc-lib/src/protocol-common.h
++++ b/rpc/rpc-lib/src/protocol-common.h
+@@ -312,6 +312,12 @@ enum glusterd_mgmt_v3_procnum {
+     GLUSTERD_MGMT_V3_MAXVALUE,
+ };
+ 
++enum gf_fd_reopen_status {
++    FD_REOPEN_ALLOWED = 0,
++    FD_REOPEN_NOT_ALLOWED,
++    FD_BAD,
++};
++
+ typedef struct gf_gsync_detailed_status_ gf_gsync_status_t;
+ 
+ enum gf_get_volume_info_type {
+diff --git a/tests/bugs/replicate/do-not-reopen-fd.t b/tests/bugs/replicate/do-not-reopen-fd.t
+new file mode 100644
+index 0000000..76d8e70
+--- /dev/null
++++ b/tests/bugs/replicate/do-not-reopen-fd.t
+@@ -0,0 +1,206 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../fileio.rc
++
++cleanup;
++
++TEST glusterd;
++TEST pidof glusterd
++
++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
++TEST $CLI volume set $V0 performance.write-behind off
++TEST $CLI volume set $V0 performance.open-behind off
++TEST $CLI volume set $V0 client.strict-locks on
++TEST $CLI volume heal $V0 disable
++TEST $CLI volume start $V0
++EXPECT 'Started' volinfo_field $V0 'Status';
++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M1
++
++TEST touch $M0/a
++
++# Kill one brick and take lock on the fd and do a write.
++TEST kill_brick $V0 $H0 $B0/${V0}0
++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0
++TEST fd1=`fd_available`
++TEST fd_open $fd1 'rw' $M0/a
++
++TEST flock -x $fd1
++TEST fd_write $fd1 "data-1"
++
++# Restart the brick and then write. Now fd should not get re-opened but write
++# should still succeed as there were no quorum disconnects.
++TEST $CLI volume start $V0 force
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
++TEST fd_write $fd1 "data-2"
++EXPECT "" cat $B0/${V0}0/a
++EXPECT "data-2" cat $B0/${V0}1/a
++EXPECT "data-2" cat $B0/${V0}2/a
++
++# Check there is no fd opened on the 1st brick by checking for the gfid inside
++# /proc/pid-of-brick/fd/ directory
++gfid_a=$(gf_get_gfid_xattr $B0/${V0}0/a)
++gfid_str_a=$(gf_gfid_xattr_to_str $gfid_a)
++
++EXPECT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
++EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
++EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
++
++TEST fd2=`fd_available`
++TEST fd_open $fd2 'rw' $M1/a
++
++# Kill 2nd brick and try writing to the file. The write should fail due to
++# quorum failure.
++TEST kill_brick $V0 $H0 $B0/${V0}1
++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 1
++TEST ! fd_write $fd1 "data-3"
++TEST ! fd_cat $fd1
++
++# Restart the bricks and try writing to the file. This should fail as two bricks
++# which were down previously, will return EBADFD now.
++TEST $CLI volume start $V0 force
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1
++TEST ! fd_write $fd1 "data-4"
++TEST ! fd_cat $fd1
++
++# Enable heal and check the files will have same content on all the bricks after
++# the heal is completed.
++EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0
++TEST $CLI volume heal $V0 enable
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
++
++TEST $CLI volume heal $V0
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
++EXPECT "data-4" cat $B0/${V0}0/a
++EXPECT "data-4" cat $B0/${V0}1/a
++EXPECT "data-4" cat $B0/${V0}2/a
++TEST $CLI volume heal $V0 disable
++
++# Try writing to the file again on the same fd, which should fail again, since
++# it is not yet re-opened.
++TEST ! fd_write $fd1 "data-5"
++
++# At this point only one brick will have the lock. Try taking the lock again on
++# the bad fd, which should also fail with EBADFD.
++TEST ! flock -x $fd1
++
++# Kill the only brick that is having lock and try taking lock on another client
++# which should succeed.
++TEST kill_brick $V0 $H0 $B0/${V0}2
++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 2
++TEST flock -x $fd2
++TEST fd_write $fd2 "data-6"
++
++# Bring the brick up and try writing & reading on the old fd, which should still
++# fail and operations on the 2nd fd should succeed.
++TEST $CLI volume start $V0 force
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}2
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 2
++TEST ! fd_write $fd1 "data-7"
++
++TEST ! fd_cat $fd1
++TEST fd_cat $fd2
++
++# Close both the fds which will release the locks and then re-open and take lock
++# on the old fd. Operations on that fd should succeed afterwards.
++TEST fd_close $fd1
++TEST fd_close $fd2
++
++TEST ! ls /proc/$$/fd/$fd1
++TEST ! ls /proc/$$/fd/$fd2
++EXPECT_WITHIN $REOPEN_TIMEOUT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
++EXPECT_WITHIN $REOPEN_TIMEOUT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
++EXPECT_WITHIN $REOPEN_TIMEOUT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
++
++TEST fd1=`fd_available`
++TEST fd_open $fd1 'rw' $M0/a
++EXPECT_WITHIN $REOPEN_TIMEOUT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
++EXPECT_WITHIN $REOPEN_TIMEOUT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
++EXPECT_WITHIN $REOPEN_TIMEOUT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
++
++TEST flock -x $fd1
++TEST fd_write $fd1 "data-8"
++TEST fd_cat $fd1
++
++EXPECT "data-8" head -n 1 $B0/${V0}0/a
++EXPECT "data-8" head -n 1 $B0/${V0}1/a
++EXPECT "data-8" head -n 1 $B0/${V0}2/a
++
++TEST fd_close $fd1
++
++# Heal the volume
++TEST $CLI volume heal $V0 enable
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
++
++TEST $CLI volume heal $V0
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
++TEST $CLI volume heal $V0 disable
++
++# Kill one brick and open a fd.
++TEST kill_brick $V0 $H0 $B0/${V0}0
++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0
++TEST fd1=`fd_available`
++TEST fd_open $fd1 'rw' $M0/a
++
++EXPECT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
++EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
++EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
++
++# Restart the brick and then write. Now fd should get re-opened and write should
++# succeed on the previously down brick as well since there are no locks held on
++# any of the bricks.
++TEST $CLI volume start $V0 force
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
++TEST fd_write $fd1 "data-10"
++EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
++
++EXPECT "data-10" head -n 1 $B0/${V0}0/a
++EXPECT "data-10" head -n 1 $B0/${V0}1/a
++EXPECT "data-10" head -n 1 $B0/${V0}2/a
++TEST fd_close $fd1
++
++# Kill one brick, open and take lock on a fd.
++TEST kill_brick $V0 $H0 $B0/${V0}0
++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0
++TEST fd1=`fd_available`
++TEST fd_open $fd1 'rw' $M0/a
++TEST flock -x $fd1
++
++EXPECT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
++EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
++EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
++
++# Kill & restart another brick so that it will return EBADFD
++TEST kill_brick $V0 $H0 $B0/${V0}1
++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" brick_up_status $V0 $H0 $B0/${V0}1
++
++# Restart the bricks and then write. Now fd should not get re-opened since lock
++# is still held on one brick and write should also fail as there is no quorum.
++
++TEST $CLI volume start $V0 force
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1
++TEST ! fd_write $fd1 "data-11"
++EXPECT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
++EXPECT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
++EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
++
++EXPECT "data-10" head -n 1 $B0/${V0}0/a
++EXPECT "data-10" head -n 1 $B0/${V0}1/a
++EXPECT "data-11" head -n 1 $B0/${V0}2/a
++
++TEST fd_close $fd1
++cleanup
+diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
+index 416012c..bd46e59 100644
+--- a/xlators/cluster/afr/src/afr-common.c
++++ b/xlators/cluster/afr/src/afr-common.c
+@@ -2067,6 +2067,8 @@ afr_local_cleanup(afr_local_t *local, xlator_t *this)
+             dict_unref(local->cont.entrylk.xdata);
+     }
+ 
++    GF_FREE(local->need_open);
++
+     if (local->xdata_req)
+         dict_unref(local->xdata_req);
+ 
+@@ -5689,6 +5691,14 @@ afr_local_init(afr_local_t *local, afr_private_t *priv, int32_t *op_errno)
+     }
+     local->is_new_entry = _gf_false;
+ 
++    local->need_open = GF_CALLOC(priv->child_count, sizeof(*local->need_open),
++                                 gf_afr_mt_char);
++    if (!local->need_open) {
++        if (op_errno)
++            *op_errno = ENOMEM;
++        goto out;
++    }
++
+     INIT_LIST_HEAD(&local->healer);
+     return 0;
+ out:
+@@ -6124,9 +6134,8 @@ afr_get_heal_info(call_frame_t *frame, xlator_t *this, loc_t *loc)
+     char *substr = NULL;
+     char *status = NULL;
+ 
+-    ret = afr_lockless_inspect(frame, this, loc->gfid, &inode,
+-                               &entry_selfheal, &data_selfheal,
+-                               &metadata_selfheal, &pending);
++    ret = afr_lockless_inspect(frame, this, loc->gfid, &inode, &entry_selfheal,
++                               &data_selfheal, &metadata_selfheal, &pending);
+ 
+     if (ret == -ENOMEM) {
+         ret = -1;
+diff --git a/xlators/cluster/afr/src/afr-open.c b/xlators/cluster/afr/src/afr-open.c
+index ff72c73..73c1552 100644
+--- a/xlators/cluster/afr/src/afr-open.c
++++ b/xlators/cluster/afr/src/afr-open.c
+@@ -35,6 +35,8 @@
+ #include "afr-dir-read.h"
+ #include "afr-dir-write.h"
+ #include "afr-transaction.h"
++#include "afr-self-heal.h"
++#include "protocol-common.h"
+ 
+ gf_boolean_t
+ afr_is_fd_fixable(fd_t *fd)
+@@ -239,8 +241,32 @@ afr_openfd_fix_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+     return 0;
+ }
+ 
++static void
++afr_fd_ctx_reset_need_open(fd_t *fd, xlator_t *this, unsigned char *need_open)
++{
++    afr_fd_ctx_t *fd_ctx = NULL;
++    afr_private_t *priv = NULL;
++    int i = 0;
++
++    priv = this->private;
++    fd_ctx = afr_fd_ctx_get(fd, this);
++    if (!fd_ctx)
++        return;
++
++    LOCK(&fd->lock);
++    {
++        for (i = 0; i < priv->child_count; i++) {
++            if (fd_ctx->opened_on[i] == AFR_FD_OPENING && need_open[i]) {
++                fd_ctx->opened_on[i] = AFR_FD_NOT_OPENED;
++                need_open[i] = 0;
++            }
++        }
++    }
++    UNLOCK(&fd->lock);
++}
++
+ static int
+-afr_fd_ctx_need_open(fd_t *fd, xlator_t *this, unsigned char *need_open)
++afr_fd_ctx_set_need_open(fd_t *fd, xlator_t *this, unsigned char *need_open)
+ {
+     afr_fd_ctx_t *fd_ctx = NULL;
+     afr_private_t *priv = NULL;
+@@ -248,7 +274,6 @@ afr_fd_ctx_need_open(fd_t *fd, xlator_t *this, unsigned char *need_open)
+     int count = 0;
+ 
+     priv = this->private;
+-
+     fd_ctx = afr_fd_ctx_get(fd, this);
+     if (!fd_ctx)
+         return 0;
+@@ -271,21 +296,217 @@ afr_fd_ctx_need_open(fd_t *fd, xlator_t *this, unsigned char *need_open)
+     return count;
+ }
+ 
++static int
++afr_do_fix_open(call_frame_t *frame, xlator_t *this)
++{
++    afr_local_t *local = frame->local;
++    afr_private_t *priv = NULL;
++    int i = 0;
++    int need_open_count = 0;
++
++    priv = this->private;
++
++    need_open_count = AFR_COUNT(local->need_open, priv->child_count);
++    if (!need_open_count) {
++        goto out;
++    }
++    gf_msg_debug(this->name, 0, "need open count: %d", need_open_count);
++    local->call_count = need_open_count;
++
++    for (i = 0; i < priv->child_count; i++) {
++        if (!local->need_open[i])
++            continue;
++
++        if (IA_IFDIR == local->fd->inode->ia_type) {
++            gf_msg_debug(this->name, 0, "opening fd for dir %s on subvolume %s",
++                         local->loc.path, priv->children[i]->name);
++            STACK_WIND_COOKIE(frame, afr_openfd_fix_open_cbk, (void *)(long)i,
++                              priv->children[i],
++                              priv->children[i]->fops->opendir, &local->loc,
++                              local->fd, NULL);
++        } else {
++            gf_msg_debug(this->name, 0,
++                         "opening fd for file %s on subvolume %s",
++                         local->loc.path, priv->children[i]->name);
++
++            STACK_WIND_COOKIE(
++                frame, afr_openfd_fix_open_cbk, (void *)(long)i,
++                priv->children[i], priv->children[i]->fops->open, &local->loc,
++                local->fd_ctx->flags & ~(O_CREAT | O_EXCL | O_TRUNC), local->fd,
++                NULL);
++        }
++        if (!--need_open_count)
++            break;
++    }
++    return 0;
++
++out:
++    afr_fd_ctx_reset_need_open(local->fd, this, local->need_open);
++    AFR_STACK_DESTROY(frame);
++    return 0;
++}
++
++static int
++afr_is_reopen_allowed_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                          int32_t op_ret, int32_t op_errno,
++                          struct gf_flock *lock, dict_t *xdata)
++{
++    afr_local_t *local = frame->local;
++    afr_private_t *priv = NULL;
++    int ret = -1;
++    int call_count = 0;
++    int i = (long)cookie;
++    int32_t fd_reopen_status = -1;
++    int32_t final_reopen_status = -1;
++
++    priv = this->private;
++    local->replies[i].valid = 1;
++    local->replies[i].op_ret = op_ret;
++    local->replies[i].op_errno = op_errno;
++    if (op_ret != 0) {
++        gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_DICT_GET_FAILED,
++               "Failed getlk for %s", uuid_utoa(local->fd->inode->gfid));
++    }
++
++    if (xdata)
++        local->replies[i].xdata = dict_ref(xdata);
++
++    call_count = afr_frame_return(frame);
++
++    if (call_count)
++        return 0;
++
++    /* Currently we get 3 values from the lower layer (protocol/client) in the
++     * getlk_cbk.
++     *  FD_REOPEN_ALLOWED : No conflicting locks are held and reopen is allowed
++     *  FD_REOPEN_NOT_ALLOWED : Conflicting locks are held and reopen is not
++     *                          allowed
++     *  FD_BAD : FD is not valid
++     *
++     * - If we get FD_REOPEN_NOT_ALLOWED from any of the bricks, will block the
++     *   reopen taking this as high priority.
++     * - If we get FD_BAD from all the replies, we will not reopen since we do
++     *   not know the correct status.
++     * - If we get FD_BAD from few brick and FD_REOPEN_NOT_ALLOWED from one or
++     *   more bricks, then we will block reopen.
++     * - If we get FD_BAD from few bricks and FD_REOPEN_ALLOWED from one or
++     *   more bricks, then we will allow the reopen.
++     *
++     *   We will update the final_reopen_status only when the value returned
++     *   from lower layer is >= FD_REOPEN_ALLOWED and < FD_BAD. We will not set
++     *   FD_BAD in final_reopen_status, since it can lead to unexpected
++     *   behaviours.
++     *
++     *   At the end of this loop, if we still have final_reopen_status as -1
++     *   i.e., the init value, it means we failed to get the fd status from any
++     *   of the bricks or we do not have a valid fd on any of the bricks. We
++     *   will not reopen the fd in this case as well.
++     */
++
++    for (i = 0; i < priv->child_count; i++) {
++        if (final_reopen_status != FD_REOPEN_NOT_ALLOWED &&
++            local->replies[i].xdata) {
++            ret = dict_get_int32(xdata, "fd-reopen-status", &fd_reopen_status);
++            if (ret) {
++                gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_GET_FAILED,
++                       "Failed to get whether reopen is allowed or not on fd "
++                       "for file %s on subvolume %s.",
++                       local->loc.path, priv->children[i]->name);
++            } else if (fd_reopen_status >= FD_REOPEN_ALLOWED &&
++                       fd_reopen_status < FD_BAD) {
++                final_reopen_status = fd_reopen_status;
++            }
++        }
++
++        if (final_reopen_status == FD_REOPEN_NOT_ALLOWED)
++            break;
++    }
++
++    if (final_reopen_status == FD_REOPEN_NOT_ALLOWED) {
++        gf_log(this->name, GF_LOG_INFO,
++               "Conflicting locks held on file %s. FD reopen is not allowed.",
++               local->loc.path);
++    } else if (final_reopen_status == -1) {
++        gf_log(this->name, GF_LOG_INFO,
++               "Failed to get the lock information "
++               "on file %s. FD reopen is not allowed.",
++               local->loc.path);
++    } else {
++        afr_local_replies_wipe(local, priv);
++        afr_do_fix_open(frame, this);
++        return 0;
++    }
++
++    afr_fd_ctx_reset_need_open(local->fd, this, local->need_open);
++    AFR_STACK_DESTROY(frame);
++    return 0;
++}
++
+ void
+-afr_fix_open(fd_t *fd, xlator_t *this)
++afr_is_reopen_allowed(xlator_t *this, call_frame_t *frame)
+ {
+     afr_private_t *priv = NULL;
++    afr_local_t *local = NULL;
++    dict_t *xdata = NULL;
+     int i = 0;
++    int call_count = 0;
++    struct gf_flock flock = {
++        0,
++    };
++
++    local = frame->local;
++    priv = this->private;
++
++    flock.l_type = F_WRLCK;
++    afr_set_lk_owner(frame, this, frame->root);
++    lk_owner_copy(&flock.l_owner, &frame->root->lk_owner);
++
++    call_count = AFR_COUNT(local->child_up, priv->child_count);
++    if (!call_count)
++        goto out;
++    local->call_count = call_count;
++
++    xdata = dict_new();
++    if (xdata == NULL)
++        goto out;
++
++    if (dict_set_int32(xdata, "fd-reopen-status", -1))
++        goto out;
++
++    for (i = 0; i < priv->child_count; i++) {
++        if (local->child_up[i]) {
++            STACK_WIND_COOKIE(frame, afr_is_reopen_allowed_cbk, (void *)(long)i,
++                              priv->children[i], priv->children[i]->fops->lk,
++                              local->fd, F_GETLK, &flock, xdata);
++        } else {
++            continue;
++        }
++
++        if (!--call_count)
++            break;
++    }
++
++    dict_unref(xdata);
++    return;
++
++out:
++    if (xdata)
++        dict_unref(xdata);
++    afr_fd_ctx_reset_need_open(local->fd, this, local->need_open);
++    AFR_STACK_DESTROY(frame);
++    return;
++}
++
++void
++afr_fix_open(fd_t *fd, xlator_t *this)
++{
+     call_frame_t *frame = NULL;
+     afr_local_t *local = NULL;
+     int ret = -1;
+     int32_t op_errno = 0;
+     afr_fd_ctx_t *fd_ctx = NULL;
+-    unsigned char *need_open = NULL;
+     int call_count = 0;
+ 
+-    priv = this->private;
+-
+     if (!afr_is_fd_fixable(fd))
+         goto out;
+ 
+@@ -293,12 +514,6 @@ afr_fix_open(fd_t *fd, xlator_t *this)
+     if (!fd_ctx)
+         goto out;
+ 
+-    need_open = alloca0(priv->child_count);
+-
+-    call_count = afr_fd_ctx_need_open(fd, this, need_open);
+-    if (!call_count)
+-        goto out;
+-
+     frame = create_frame(this, this->ctx->pool);
+     if (!frame)
+         goto out;
+@@ -307,47 +522,24 @@ afr_fix_open(fd_t *fd, xlator_t *this)
+     if (!local)
+         goto out;
+ 
++    call_count = afr_fd_ctx_set_need_open(fd, this, local->need_open);
++    if (!call_count)
++        goto out;
++
+     local->loc.inode = inode_ref(fd->inode);
+     ret = loc_path(&local->loc, NULL);
+     if (ret < 0)
+         goto out;
+-
+     local->fd = fd_ref(fd);
+     local->fd_ctx = fd_ctx;
+ 
+-    local->call_count = call_count;
+-
+-    gf_msg_debug(this->name, 0, "need open count: %d", call_count);
+-
+-    for (i = 0; i < priv->child_count; i++) {
+-        if (!need_open[i])
+-            continue;
+-
+-        if (IA_IFDIR == fd->inode->ia_type) {
+-            gf_msg_debug(this->name, 0, "opening fd for dir %s on subvolume %s",
+-                         local->loc.path, priv->children[i]->name);
+-
+-            STACK_WIND_COOKIE(frame, afr_openfd_fix_open_cbk, (void *)(long)i,
+-                              priv->children[i],
+-                              priv->children[i]->fops->opendir, &local->loc,
+-                              local->fd, NULL);
+-        } else {
+-            gf_msg_debug(this->name, 0,
+-                         "opening fd for file %s on subvolume %s",
+-                         local->loc.path, priv->children[i]->name);
+-
+-            STACK_WIND_COOKIE(frame, afr_openfd_fix_open_cbk, (void *)(long)i,
+-                              priv->children[i], priv->children[i]->fops->open,
+-                              &local->loc, fd_ctx->flags & (~O_TRUNC),
+-                              local->fd, NULL);
+-        }
+-
+-        if (!--call_count)
+-            break;
+-    }
+-
++    afr_is_reopen_allowed(this, frame);
+     return;
++
+ out:
++    if (call_count)
++        afr_fd_ctx_reset_need_open(fd, this, local->need_open);
+     if (frame)
+         AFR_STACK_DESTROY(frame);
++    return;
+ }
+diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
+index 6a9a763..ffc7317 100644
+--- a/xlators/cluster/afr/src/afr.h
++++ b/xlators/cluster/afr/src/afr.h
+@@ -895,6 +895,9 @@ typedef struct _afr_local {
+     afr_ta_fop_state_t fop_state;
+     int ta_failed_subvol;
+     gf_boolean_t is_new_entry;
++
++    /* For fix_open */
++    unsigned char *need_open;
+ } afr_local_t;
+ 
+ typedef struct afr_spbc_timeout {
+diff --git a/xlators/protocol/client/src/client-common.c b/xlators/protocol/client/src/client-common.c
+index 1417a60..92cda12 100644
+--- a/xlators/protocol/client/src/client-common.c
++++ b/xlators/protocol/client/src/client-common.c
+@@ -343,7 +343,7 @@ client_pre_readv(xlator_t *this, gfs3_read_req *req, fd_t *fd, size_t size,
+     int op_errno = ESTALE;
+ 
+     CLIENT_GET_REMOTE_FD(this, fd, FALLBACK_TO_ANON_FD, remote_fd, op_errno,
+-                         out);
++                         GFS3_OP_READ, out);
+ 
+     req->size = size;
+     req->offset = offset;
+@@ -368,7 +368,7 @@ client_pre_writev(xlator_t *this, gfs3_write_req *req, fd_t *fd, size_t size,
+     int op_errno = ESTALE;
+ 
+     CLIENT_GET_REMOTE_FD(this, fd, FALLBACK_TO_ANON_FD, remote_fd, op_errno,
+-                         out);
++                         GFS3_OP_WRITE, out);
+ 
+     req->size = size;
+     req->offset = offset;
+@@ -429,7 +429,8 @@ client_pre_flush(xlator_t *this, gfs3_flush_req *req, fd_t *fd, dict_t *xdata)
+     int64_t remote_fd = -1;
+     int op_errno = ESTALE;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_FLUSH, out);
+ 
+     req->fd = remote_fd;
+     memcpy(req->gfid, fd->inode->gfid, 16);
+@@ -450,7 +451,7 @@ client_pre_fsync(xlator_t *this, gfs3_fsync_req *req, fd_t *fd, int32_t flags,
+     int op_errno = 0;
+ 
+     CLIENT_GET_REMOTE_FD(this, fd, FALLBACK_TO_ANON_FD, remote_fd, op_errno,
+-                         out);
++                         GFS3_OP_FSYNC, out);
+ 
+     req->fd = remote_fd;
+     req->data = flags;
+@@ -591,7 +592,8 @@ client_pre_fsyncdir(xlator_t *this, gfs3_fsyncdir_req *req, fd_t *fd,
+     int32_t op_errno = ESTALE;
+     int64_t remote_fd = -1;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_FSYNCDIR, out);
+ 
+     req->fd = remote_fd;
+     req->data = flags;
+@@ -668,7 +670,8 @@ client_pre_ftruncate(xlator_t *this, gfs3_ftruncate_req *req, fd_t *fd,
+     int64_t remote_fd = -1;
+     int op_errno = EINVAL;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_FTRUNCATE, out);
+ 
+     req->offset = offset;
+     req->fd = remote_fd;
+@@ -687,7 +690,8 @@ client_pre_fstat(xlator_t *this, gfs3_fstat_req *req, fd_t *fd, dict_t *xdata)
+     int64_t remote_fd = -1;
+     int op_errno = ESTALE;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_FSTAT, out);
+ 
+     req->fd = remote_fd;
+     memcpy(req->gfid, fd->inode->gfid, 16);
+@@ -710,7 +714,8 @@ client_pre_lk(xlator_t *this, gfs3_lk_req *req, int32_t cmd,
+     int32_t gf_type = 0;
+     int ret = 0;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_LK, out);
+ 
+     ret = client_cmd_to_gf_cmd(cmd, &gf_cmd);
+     if (ret) {
+@@ -787,7 +792,8 @@ client_pre_readdir(xlator_t *this, gfs3_readdir_req *req, fd_t *fd, size_t size,
+     int64_t remote_fd = -1;
+     int op_errno = ESTALE;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_READDIR, out);
+ 
+     req->size = size;
+     req->offset = offset;
+@@ -869,7 +875,7 @@ client_pre_finodelk(xlator_t *this, gfs3_finodelk_req *req, fd_t *fd, int cmd,
+     int32_t gf_cmd = 0;
+ 
+     CLIENT_GET_REMOTE_FD(this, fd, FALLBACK_TO_ANON_FD, remote_fd, op_errno,
+-                         out);
++                         GFS3_OP_FINODELK, out);
+ 
+     if (cmd == F_GETLK || cmd == F_GETLK64)
+         gf_cmd = GF_LK_GETLK;
+@@ -952,7 +958,8 @@ client_pre_fentrylk(xlator_t *this, gfs3_fentrylk_req *req, fd_t *fd,
+     int64_t remote_fd = -1;
+     int op_errno = ESTALE;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_FENTRYLK, out);
+ 
+     req->fd = remote_fd;
+     req->cmd = cmd_entrylk;
+@@ -1013,7 +1020,7 @@ client_pre_fxattrop(xlator_t *this, gfs3_fxattrop_req *req, fd_t *fd,
+     int64_t remote_fd = -1;
+ 
+     CLIENT_GET_REMOTE_FD(this, fd, FALLBACK_TO_ANON_FD, remote_fd, op_errno,
+-                         out);
++                         GFS3_OP_FXATTROP, out);
+ 
+     req->fd = remote_fd;
+     req->flags = flags;
+@@ -1039,7 +1046,8 @@ client_pre_fgetxattr(xlator_t *this, gfs3_fgetxattr_req *req, fd_t *fd,
+     int64_t remote_fd = -1;
+     int op_errno = ESTALE;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_FGETXATTR, out);
+ 
+     req->namelen = 1; /* Use it as a flag */
+     req->fd = remote_fd;
+@@ -1065,7 +1073,8 @@ client_pre_fsetxattr(xlator_t *this, gfs3_fsetxattr_req *req, fd_t *fd,
+     int64_t remote_fd = -1;
+     int op_errno = ESTALE;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_FSETXATTR, out);
+ 
+     req->fd = remote_fd;
+     req->flags = flags;
+@@ -1091,7 +1100,8 @@ client_pre_rchecksum(xlator_t *this, gfs3_rchecksum_req *req, fd_t *fd,
+     int64_t remote_fd = -1;
+     int op_errno = ESTALE;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_RCHECKSUM, out);
+ 
+     req->len = len;
+     req->offset = offset;
+@@ -1141,7 +1151,8 @@ client_pre_fsetattr(xlator_t *this, gfs3_fsetattr_req *req, fd_t *fd,
+     int op_errno = ESTALE;
+     int64_t remote_fd = -1;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_FSETATTR, out);
+ 
+     req->fd = remote_fd;
+     req->valid = valid;
+@@ -1161,7 +1172,8 @@ client_pre_readdirp(xlator_t *this, gfs3_readdirp_req *req, fd_t *fd,
+     int op_errno = ESTALE;
+     int64_t remote_fd = -1;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_READDIRP, out);
+ 
+     req->size = size;
+     req->offset = offset;
+@@ -1187,7 +1199,8 @@ client_pre_fremovexattr(xlator_t *this, gfs3_fremovexattr_req *req, fd_t *fd,
+     if (!(fd && fd->inode))
+         goto out;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_FREMOVEXATTR, out);
+ 
+     memcpy(req->gfid, fd->inode->gfid, 16);
+     req->name = (char *)name;
+@@ -1208,7 +1221,8 @@ client_pre_fallocate(xlator_t *this, gfs3_fallocate_req *req, fd_t *fd,
+     int op_errno = ESTALE;
+     int64_t remote_fd = -1;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_FALLOCATE, out);
+ 
+     req->fd = remote_fd;
+     req->flags = flags;
+@@ -1230,7 +1244,8 @@ client_pre_discard(xlator_t *this, gfs3_discard_req *req, fd_t *fd,
+     int op_errno = ESTALE;
+     int64_t remote_fd = -1;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_DISCARD, out);
+ 
+     req->fd = remote_fd;
+     req->offset = offset;
+@@ -1251,7 +1266,8 @@ client_pre_zerofill(xlator_t *this, gfs3_zerofill_req *req, fd_t *fd,
+     int op_errno = ESTALE;
+     int64_t remote_fd = -1;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_ZEROFILL, out);
+ 
+     req->fd = remote_fd;
+     req->offset = offset;
+@@ -1286,7 +1302,8 @@ client_pre_seek(xlator_t *this, gfs3_seek_req *req, fd_t *fd, off_t offset,
+     int64_t remote_fd = -1;
+     int op_errno = ESTALE;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_SEEK, out);
+ 
+     memcpy(req->gfid, fd->inode->gfid, 16);
+     req->fd = remote_fd;
+@@ -2508,7 +2525,7 @@ client_pre_readv_v2(xlator_t *this, gfx_read_req *req, fd_t *fd, size_t size,
+     int op_errno = ESTALE;
+ 
+     CLIENT_GET_REMOTE_FD(this, fd, FALLBACK_TO_ANON_FD, remote_fd, op_errno,
+-                         out);
++                         GFS3_OP_READ, out);
+ 
+     req->size = size;
+     req->offset = offset;
+@@ -2532,7 +2549,7 @@ client_pre_writev_v2(xlator_t *this, gfx_write_req *req, fd_t *fd, size_t size,
+     int op_errno = ESTALE;
+ 
+     CLIENT_GET_REMOTE_FD(this, fd, FALLBACK_TO_ANON_FD, remote_fd, op_errno,
+-                         out);
++                         GFS3_OP_WRITE, out);
+ 
+     req->size = size;
+     req->offset = offset;
+@@ -2567,10 +2584,10 @@ client_pre_copy_file_range_v2(xlator_t *this, gfx_copy_file_range_req *req,
+     int op_errno = ESTALE;
+ 
+     CLIENT_GET_REMOTE_FD(this, fd_in, FALLBACK_TO_ANON_FD, remote_fd_in,
+-                         op_errno, out);
++                         op_errno, GFS3_OP_COPY_FILE_RANGE, out);
+ 
+     CLIENT_GET_REMOTE_FD(this, fd_out, FALLBACK_TO_ANON_FD, remote_fd_out,
+-                         op_errno, out);
++                         op_errno, GFS3_OP_COPY_FILE_RANGE, out);
+     req->size = size;
+     req->off_in = off_in;
+     req->off_out = off_out;
+@@ -2623,7 +2640,8 @@ client_pre_flush_v2(xlator_t *this, gfx_flush_req *req, fd_t *fd, dict_t *xdata)
+     int64_t remote_fd = -1;
+     int op_errno = ESTALE;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_FLUSH, out);
+ 
+     req->fd = remote_fd;
+     memcpy(req->gfid, fd->inode->gfid, 16);
+@@ -2643,7 +2661,7 @@ client_pre_fsync_v2(xlator_t *this, gfx_fsync_req *req, fd_t *fd, int32_t flags,
+     int op_errno = 0;
+ 
+     CLIENT_GET_REMOTE_FD(this, fd, FALLBACK_TO_ANON_FD, remote_fd, op_errno,
+-                         out);
++                         GFS3_OP_FSYNC, out);
+ 
+     req->fd = remote_fd;
+     req->data = flags;
+@@ -2778,7 +2796,8 @@ client_pre_fsyncdir_v2(xlator_t *this, gfx_fsyncdir_req *req, fd_t *fd,
+     int32_t op_errno = ESTALE;
+     int64_t remote_fd = -1;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_FSYNCDIR, out);
+ 
+     req->fd = remote_fd;
+     req->data = flags;
+@@ -2852,7 +2871,8 @@ client_pre_ftruncate_v2(xlator_t *this, gfx_ftruncate_req *req, fd_t *fd,
+     int64_t remote_fd = -1;
+     int op_errno = EINVAL;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_FTRUNCATE, out);
+ 
+     req->offset = offset;
+     req->fd = remote_fd;
+@@ -2870,7 +2890,8 @@ client_pre_fstat_v2(xlator_t *this, gfx_fstat_req *req, fd_t *fd, dict_t *xdata)
+     int64_t remote_fd = -1;
+     int op_errno = ESTALE;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_FSTAT, out);
+ 
+     req->fd = remote_fd;
+     memcpy(req->gfid, fd->inode->gfid, 16);
+@@ -2892,7 +2913,8 @@ client_pre_lk_v2(xlator_t *this, gfx_lk_req *req, int32_t cmd,
+     int32_t gf_type = 0;
+     int ret = 0;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_LK, out);
+ 
+     ret = client_cmd_to_gf_cmd(cmd, &gf_cmd);
+     if (ret) {
+@@ -2967,7 +2989,8 @@ client_pre_readdir_v2(xlator_t *this, gfx_readdir_req *req, fd_t *fd,
+     int64_t remote_fd = -1;
+     int op_errno = ESTALE;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_READDIR, out);
+ 
+     req->size = size;
+     req->offset = offset;
+@@ -3048,7 +3071,7 @@ client_pre_finodelk_v2(xlator_t *this, gfx_finodelk_req *req, fd_t *fd, int cmd,
+     int32_t gf_cmd = 0;
+ 
+     CLIENT_GET_REMOTE_FD(this, fd, FALLBACK_TO_ANON_FD, remote_fd, op_errno,
+-                         out);
++                         GFS3_OP_FINODELK, out);
+ 
+     if (cmd == F_GETLK || cmd == F_GETLK64)
+         gf_cmd = GF_LK_GETLK;
+@@ -3129,7 +3152,8 @@ client_pre_fentrylk_v2(xlator_t *this, gfx_fentrylk_req *req, fd_t *fd,
+     int64_t remote_fd = -1;
+     int op_errno = ESTALE;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_FENTRYLK, out);
+ 
+     req->fd = remote_fd;
+     req->cmd = cmd_entrylk;
+@@ -3185,7 +3209,7 @@ client_pre_fxattrop_v2(xlator_t *this, gfx_fxattrop_req *req, fd_t *fd,
+     int64_t remote_fd = -1;
+ 
+     CLIENT_GET_REMOTE_FD(this, fd, FALLBACK_TO_ANON_FD, remote_fd, op_errno,
+-                         out);
++                         GFS3_OP_FXATTROP, out);
+ 
+     req->fd = remote_fd;
+     req->flags = flags;
+@@ -3207,7 +3231,8 @@ client_pre_fgetxattr_v2(xlator_t *this, gfx_fgetxattr_req *req, fd_t *fd,
+     int64_t remote_fd = -1;
+     int op_errno = ESTALE;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_FGETXATTR, out);
+ 
+     req->namelen = 1; /* Use it as a flag */
+     req->fd = remote_fd;
+@@ -3232,7 +3257,8 @@ client_pre_fsetxattr_v2(xlator_t *this, gfx_fsetxattr_req *req, fd_t *fd,
+     int64_t remote_fd = -1;
+     int op_errno = ESTALE;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_FSETXATTR, out);
+ 
+     req->fd = remote_fd;
+     req->flags = flags;
+@@ -3256,7 +3282,8 @@ client_pre_rchecksum_v2(xlator_t *this, gfx_rchecksum_req *req, fd_t *fd,
+     int64_t remote_fd = -1;
+     int op_errno = ESTALE;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_RCHECKSUM, out);
+ 
+     req->len = len;
+     req->offset = offset;
+@@ -3304,7 +3331,8 @@ client_pre_fsetattr_v2(xlator_t *this, gfx_fsetattr_req *req, fd_t *fd,
+     int op_errno = ESTALE;
+     int64_t remote_fd = -1;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_FSETATTR, out);
+ 
+     memcpy(req->gfid, fd->inode->gfid, 16);
+     req->fd = remote_fd;
+@@ -3324,7 +3352,8 @@ client_pre_readdirp_v2(xlator_t *this, gfx_readdirp_req *req, fd_t *fd,
+     int op_errno = ESTALE;
+     int64_t remote_fd = -1;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_READDIRP, out);
+ 
+     req->size = size;
+     req->offset = offset;
+@@ -3349,7 +3378,8 @@ client_pre_fremovexattr_v2(xlator_t *this, gfx_fremovexattr_req *req, fd_t *fd,
+     if (!(fd && fd->inode))
+         goto out;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_FREMOVEXATTR, out);
+ 
+     memcpy(req->gfid, fd->inode->gfid, 16);
+     req->name = (char *)name;
+@@ -3369,7 +3399,8 @@ client_pre_fallocate_v2(xlator_t *this, gfx_fallocate_req *req, fd_t *fd,
+     int op_errno = ESTALE;
+     int64_t remote_fd = -1;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_FALLOCATE, out);
+ 
+     req->fd = remote_fd;
+     req->flags = flags;
+@@ -3390,7 +3421,8 @@ client_pre_discard_v2(xlator_t *this, gfx_discard_req *req, fd_t *fd,
+     int op_errno = ESTALE;
+     int64_t remote_fd = -1;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_DISCARD, out);
+ 
+     req->fd = remote_fd;
+     req->offset = offset;
+@@ -3410,7 +3442,8 @@ client_pre_zerofill_v2(xlator_t *this, gfx_zerofill_req *req, fd_t *fd,
+     int op_errno = ESTALE;
+     int64_t remote_fd = -1;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_ZEROFILL, out);
+ 
+     req->fd = remote_fd;
+     req->offset = offset;
+@@ -3439,7 +3472,8 @@ client_pre_seek_v2(xlator_t *this, gfx_seek_req *req, fd_t *fd, off_t offset,
+     int64_t remote_fd = -1;
+     int op_errno = ESTALE;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_SEEK, out);
+ 
+     memcpy(req->gfid, fd->inode->gfid, 16);
+     req->fd = remote_fd;
+@@ -3587,3 +3621,25 @@ client_post_rename_v2(xlator_t *this, gfx_rename_rsp *rsp, struct iatt *stbuf,
+ 
+     return xdr_to_dict(&rsp->xdata, xdata);
+ }
++
++void
++set_fd_reopen_status(xlator_t *this, dict_t *xdata,
++                     enum gf_fd_reopen_status fd_reopen_status)
++{
++    clnt_conf_t *conf = NULL;
++
++    conf = this->private;
++    if (!conf) {
++        gf_msg_debug(this->name, ENOMEM, "Failed to get client conf");
++        return;
++    }
++
++    if (!conf->strict_locks)
++        fd_reopen_status = FD_REOPEN_ALLOWED;
++
++    if (dict_set_int32(xdata, "fd-reopen-status", fd_reopen_status))
++        gf_msg(this->name, GF_LOG_WARNING, ENOMEM, PC_MSG_DICT_SET_FAILED,
++               NULL);
++
++    return;
++}
+diff --git a/xlators/protocol/client/src/client-common.h b/xlators/protocol/client/src/client-common.h
+index a2043d8..16fb167 100644
+--- a/xlators/protocol/client/src/client-common.h
++++ b/xlators/protocol/client/src/client-common.h
+@@ -627,4 +627,8 @@ client_pre_copy_file_range_v2(xlator_t *this, gfx_copy_file_range_req *req,
+                               off64_t off_out, size_t size, int32_t flags,
+                               dict_t **xdata);
+ 
++void
++set_fd_reopen_status(xlator_t *this, dict_t *xdata,
++                     enum gf_fd_reopen_status fd_reopen_allowed);
++
+ #endif /* __CLIENT_COMMON_H__ */
+diff --git a/xlators/protocol/client/src/client-helpers.c b/xlators/protocol/client/src/client-helpers.c
+index 6543100..48b6448 100644
+--- a/xlators/protocol/client/src/client-helpers.c
++++ b/xlators/protocol/client/src/client-helpers.c
+@@ -406,11 +406,12 @@ clnt_readdir_rsp_cleanup_v2(gfx_readdir_rsp *rsp)
+ }
+ 
+ int
+-client_get_remote_fd(xlator_t *this, fd_t *fd, int flags, int64_t *remote_fd)
++client_get_remote_fd(xlator_t *this, fd_t *fd, int flags, int64_t *remote_fd,
++                     enum gf_fop_procnum fop)
+ {
+     clnt_fd_ctx_t *fdctx = NULL;
+     clnt_conf_t *conf = NULL;
+-    gf_boolean_t locks_held = _gf_false;
++    gf_boolean_t locks_involved = _gf_false;
+ 
+     GF_VALIDATE_OR_GOTO(this->name, fd, out);
+     GF_VALIDATE_OR_GOTO(this->name, remote_fd, out);
+@@ -423,23 +424,32 @@ client_get_remote_fd(xlator_t *this, fd_t *fd, int flags, int64_t *remote_fd)
+             if (fd->anonymous) {
+                 *remote_fd = GF_ANON_FD_NO;
+             } else {
++                if (conf->strict_locks &&
++                    (fop == GFS3_OP_WRITE || fop == GFS3_OP_FTRUNCATE ||
++                     fop == GFS3_OP_FALLOCATE || fop == GFS3_OP_ZEROFILL ||
++                     fop == GFS3_OP_DISCARD)) {
++                    locks_involved = _gf_true;
++                }
+                 *remote_fd = -1;
+                 gf_msg_debug(this->name, EBADF, "not a valid fd for gfid: %s",
+                              uuid_utoa(fd->inode->gfid));
+             }
+         } else {
+-            if (__is_fd_reopen_in_progress(fdctx))
++            if (__is_fd_reopen_in_progress(fdctx)) {
+                 *remote_fd = -1;
+-            else
++            } else {
+                 *remote_fd = fdctx->remote_fd;
++            }
+ 
+-            locks_held = !list_empty(&fdctx->lock_list);
++            locks_involved = !list_empty(&fdctx->lock_list);
+         }
+     }
+     pthread_spin_unlock(&conf->fd_lock);
+ 
+-    if ((flags & FALLBACK_TO_ANON_FD) && (*remote_fd == -1) && (!locks_held))
++    if ((flags & FALLBACK_TO_ANON_FD) && (*remote_fd == -1) &&
++        (!locks_involved)) {
+         *remote_fd = GF_ANON_FD_NO;
++    }
+ 
+     return 0;
+ out:
+diff --git a/xlators/protocol/client/src/client-rpc-fops.c b/xlators/protocol/client/src/client-rpc-fops.c
+index 3110c78..46ac544 100644
+--- a/xlators/protocol/client/src/client-rpc-fops.c
++++ b/xlators/protocol/client/src/client-rpc-fops.c
+@@ -2439,6 +2439,13 @@ client3_3_lk_cbk(struct rpc_req *req, struct iovec *iov, int count,
+         }
+     }
+ 
++    if (local->check_reopen) {
++        if (lock.l_type == F_WRLCK)
++            set_fd_reopen_status(this, xdata, FD_REOPEN_NOT_ALLOWED);
++        else
++            set_fd_reopen_status(this, xdata, FD_REOPEN_ALLOWED);
++    }
++
+ out:
+     if ((rsp.op_ret == -1) && (EAGAIN != gf_error_to_errno(rsp.op_errno))) {
+         gf_msg(this->name, GF_LOG_WARNING, gf_error_to_errno(rsp.op_errno),
+@@ -5198,6 +5205,7 @@ client3_3_lk(call_frame_t *frame, xlator_t *this, void *data)
+             0,
+         },
+     };
++    dict_t *xdata = NULL;
+     int32_t gf_cmd = 0;
+     clnt_local_t *local = NULL;
+     clnt_conf_t *conf = NULL;
+@@ -5224,6 +5232,10 @@ client3_3_lk(call_frame_t *frame, xlator_t *this, void *data)
+         goto unwind;
+     }
+ 
++    ret = dict_get_int32(args->xdata, "fd-reopen-status", &local->check_reopen);
++    if (ret)
++        local->check_reopen = 0;
++
+     local->owner = frame->root->lk_owner;
+     local->cmd = args->cmd;
+     local->fd = fd_ref(args->fd);
+@@ -5237,6 +5249,13 @@ client3_3_lk(call_frame_t *frame, xlator_t *this, void *data)
+             client_is_setlk(local->cmd)) {
+             client_add_lock_for_recovery(local->fd, args->flock, &local->owner,
+                                          local->cmd);
++        } else if (local->check_reopen) {
++            xdata = dict_new();
++            if (xdata == NULL) {
++                op_errno = ENOMEM;
++                goto unwind;
++            }
++            set_fd_reopen_status(this, xdata, FD_BAD);
+         }
+ 
+         goto unwind;
+@@ -5254,8 +5273,10 @@ client3_3_lk(call_frame_t *frame, xlator_t *this, void *data)
+ 
+     return 0;
+ unwind:
+-    CLIENT_STACK_UNWIND(lk, frame, -1, op_errno, NULL, NULL);
++    CLIENT_STACK_UNWIND(lk, frame, -1, op_errno, NULL, xdata);
+     GF_FREE(req.xdata.xdata_val);
++    if (xdata)
++        dict_unref(xdata);
+ 
+     return 0;
+ }
+diff --git a/xlators/protocol/client/src/client-rpc-fops_v2.c b/xlators/protocol/client/src/client-rpc-fops_v2.c
+index 954fc58..d0055e9 100644
+--- a/xlators/protocol/client/src/client-rpc-fops_v2.c
++++ b/xlators/protocol/client/src/client-rpc-fops_v2.c
+@@ -2234,6 +2234,13 @@ client4_0_lk_cbk(struct rpc_req *req, struct iovec *iov, int count,
+         }
+     }
+ 
++    if (local->check_reopen) {
++        if (lock.l_type == F_WRLCK)
++            set_fd_reopen_status(this, xdata, FD_REOPEN_NOT_ALLOWED);
++        else
++            set_fd_reopen_status(this, xdata, FD_REOPEN_ALLOWED);
++    }
++
+ out:
+     if ((rsp.op_ret == -1) && (EAGAIN != gf_error_to_errno(rsp.op_errno))) {
+         gf_msg(this->name, GF_LOG_WARNING, gf_error_to_errno(rsp.op_errno),
+@@ -4759,6 +4766,7 @@ client4_0_lk(call_frame_t *frame, xlator_t *this, void *data)
+             0,
+         },
+     };
++    dict_t *xdata = NULL;
+     int32_t gf_cmd = 0;
+     clnt_local_t *local = NULL;
+     clnt_conf_t *conf = NULL;
+@@ -4785,6 +4793,10 @@ client4_0_lk(call_frame_t *frame, xlator_t *this, void *data)
+         goto unwind;
+     }
+ 
++    ret = dict_get_int32(args->xdata, "fd-reopen-status", &local->check_reopen);
++    if (ret)
++        local->check_reopen = 0;
++
+     local->owner = frame->root->lk_owner;
+     local->cmd = args->cmd;
+     local->fd = fd_ref(args->fd);
+@@ -4798,6 +4810,13 @@ client4_0_lk(call_frame_t *frame, xlator_t *this, void *data)
+             client_is_setlk(local->cmd)) {
+             client_add_lock_for_recovery(local->fd, args->flock, &local->owner,
+                                          local->cmd);
++        } else if (local->check_reopen) {
++            xdata = dict_new();
++            if (xdata == NULL) {
++                op_errno = ENOMEM;
++                goto unwind;
++            }
++            set_fd_reopen_status(this, xdata, FD_BAD);
+         }
+ 
+         goto unwind;
+@@ -4815,8 +4834,10 @@ client4_0_lk(call_frame_t *frame, xlator_t *this, void *data)
+ 
+     return 0;
+ unwind:
+-    CLIENT_STACK_UNWIND(lk, frame, -1, op_errno, NULL, NULL);
++    CLIENT_STACK_UNWIND(lk, frame, -1, op_errno, NULL, xdata);
+     GF_FREE(req.xdata.pairs.pairs_val);
++    if (xdata)
++        dict_unref(xdata);
+ 
+     return 0;
+ }
+@@ -6094,7 +6115,7 @@ client4_0_rchecksum(call_frame_t *frame, xlator_t *this, void *data)
+     conf = this->private;
+ 
+     CLIENT_GET_REMOTE_FD(this, args->fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
+-                         unwind);
++                         GFS3_OP_RCHECKSUM, unwind);
+ 
+     req.len = args->len;
+     req.offset = args->offset;
+diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c
+index 63c90ea..35a5340 100644
+--- a/xlators/protocol/client/src/client.c
++++ b/xlators/protocol/client/src/client.c
+@@ -864,9 +864,11 @@ int32_t
+ client_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+             fd_t *fd, dict_t *xdata)
+ {
+-    int ret = -1;
++    int ret = 0;
++    int op_errno = ENOTCONN;
+     clnt_conf_t *conf = NULL;
+     rpc_clnt_procedure_t *proc = NULL;
++    clnt_fd_ctx_t *fdctx = NULL;
+     clnt_args_t args = {
+         0,
+     };
+@@ -875,6 +877,21 @@ client_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+     if (!conf || !conf->fops)
+         goto out;
+ 
++    if (conf->strict_locks) {
++        pthread_spin_lock(&conf->fd_lock);
++        {
++            fdctx = this_fd_get_ctx(fd, this);
++            if (fdctx && !list_empty(&fdctx->lock_list)) {
++                ret = -1;
++                op_errno = EBADFD;
++            }
++        }
++        pthread_spin_unlock(&conf->fd_lock);
++
++        if (ret)
++            goto out;
++    }
++
+     args.loc = loc;
+     args.fd = fd;
+     args.xdata = xdata;
+@@ -888,7 +905,7 @@ client_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ 
+ out:
+     if (ret)
+-        STACK_UNWIND_STRICT(open, frame, -1, ENOTCONN, NULL, NULL);
++        STACK_UNWIND_STRICT(open, frame, -1, op_errno, NULL, NULL);
+ 
+     return 0;
+ }
+diff --git a/xlators/protocol/client/src/client.h b/xlators/protocol/client/src/client.h
+index bde3d1a..2a50625 100644
+--- a/xlators/protocol/client/src/client.h
++++ b/xlators/protocol/client/src/client.h
+@@ -98,10 +98,10 @@ typedef enum {
+         free(_this_rsp->xdata.xdata_val);                                      \
+     } while (0)
+ 
+-#define CLIENT_GET_REMOTE_FD(xl, fd, flags, remote_fd, op_errno, label)        \
++#define CLIENT_GET_REMOTE_FD(xl, fd, flags, remote_fd, op_errno, fop, label)   \
+     do {                                                                       \
+         int _ret = 0;                                                          \
+-        _ret = client_get_remote_fd(xl, fd, flags, &remote_fd);                \
++        _ret = client_get_remote_fd(xl, fd, flags, &remote_fd, fop);           \
+         if (_ret < 0) {                                                        \
+             op_errno = errno;                                                  \
+             goto label;                                                        \
+@@ -286,6 +286,7 @@ typedef struct client_local {
+     client_posix_lock_t *client_lock;
+     gf_lkowner_t owner;
+     int32_t cmd;
++    int32_t check_reopen;
+     struct list_head lock_list;
+     pthread_mutex_t mutex;
+     char *name;
+@@ -435,7 +436,8 @@ client_default_reopen_done(clnt_fd_ctx_t *fdctx, int64_t rfd, xlator_t *this);
+ void
+ client_attempt_reopen(fd_t *fd, xlator_t *this);
+ int
+-client_get_remote_fd(xlator_t *this, fd_t *fd, int flags, int64_t *remote_fd);
++client_get_remote_fd(xlator_t *this, fd_t *fd, int flags, int64_t *remote_fd,
++                     enum gf_fop_procnum fop);
+ int
+ client_fd_fop_prepare_local(call_frame_t *frame, fd_t *fd, int64_t remote_fd);
+ gf_boolean_t
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0582-protocol-client-Fix-lock-memory-leak.patch b/SOURCES/0582-protocol-client-Fix-lock-memory-leak.patch
new file mode 100644
index 0000000..3fd1dae
--- /dev/null
+++ b/SOURCES/0582-protocol-client-Fix-lock-memory-leak.patch
@@ -0,0 +1,501 @@
+From adeec3d5d85baad8b50d203f34a47ad5360d7cd7 Mon Sep 17 00:00:00 2001
+From: karthik-us <ksubrahm@redhat.com>
+Date: Mon, 7 Jun 2021 18:36:11 +0530
+Subject: [PATCH 582/584] protocol/client: Fix lock memory leak
+
+Problem-1:
+When an overlapping lock is issued the merged lock is not assigned the
+owner. When flush is issued on the fd, this particular lock is not freed
+leading to memory leak
+
+Fix-1:
+Assign the owner while merging the locks.
+
+Problem-2:
+On fd-destroy lock structs could be present in fdctx. For some reason
+with flock -x command and closing of the bash fd, it leads to this code
+path. Which leaks the lock structs.
+
+Fix-2:
+When fdctx is being destroyed in client, make sure to cleanup any lock
+structs.
+
+> Upstream patch: https://github.com/gluster/glusterfs/pull/2338/commits/926402f639471d2664bf00c6692221ba297c525f
+> fixes: gluster#2337
+> Change-Id: I298124213ce5a1cf2b1f1756d5e8a9745d9c0a1c
+> Signed-off-by: Pranith Kumar K <pranith.karampuri@phonepe.com>
+
+BUG: 1689375
+Change-Id: I298124213ce5a1cf2b1f1756d5e8a9745d9c0a1c
+Signed-off-by: karthik-us <ksubrahm@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/245603
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/bugs/client/issue-2337-lock-mem-leak.c | 52 ++++++++++++++++++
+ tests/bugs/client/issue-2337-lock-mem-leak.t | 42 ++++++++++++++
+ tests/bugs/replicate/do-not-reopen-fd.t      | 65 ++++++++++++++--------
+ tests/volume.rc                              |  8 +++
+ xlators/protocol/client/src/client-helpers.c | 10 ++++
+ xlators/protocol/client/src/client-lk.c      | 82 ++++++++++++++++++----------
+ xlators/protocol/client/src/client.h         |  8 ++-
+ 7 files changed, 213 insertions(+), 54 deletions(-)
+ create mode 100644 tests/bugs/client/issue-2337-lock-mem-leak.c
+ create mode 100644 tests/bugs/client/issue-2337-lock-mem-leak.t
+
+diff --git a/tests/bugs/client/issue-2337-lock-mem-leak.c b/tests/bugs/client/issue-2337-lock-mem-leak.c
+new file mode 100644
+index 0000000..d4e02a7
+--- /dev/null
++++ b/tests/bugs/client/issue-2337-lock-mem-leak.c
+@@ -0,0 +1,52 @@
++#include <sys/file.h>
++#include <stdio.h>
++#include <string.h>
++#include <errno.h>
++#include <sys/types.h>
++#include <sys/stat.h>
++#include <fcntl.h>
++
++int
++main(int argc, char *argv[])
++{
++    int fd = -1;
++    char *filename = NULL;
++    struct flock lock = {
++        0,
++    };
++    int i = 0;
++    int ret = -1;
++
++    if (argc != 2) {
++        fprintf(stderr, "Usage: %s <filename> ", argv[0]);
++        goto out;
++    }
++
++    filename = argv[1];
++
++    fd = open(filename, O_RDWR | O_CREAT, 0);
++    if (fd < 0) {
++        fprintf(stderr, "open (%s) failed (%s)\n", filename, strerror(errno));
++        goto out;
++    }
++
++    lock.l_type = F_WRLCK;
++    lock.l_whence = SEEK_SET;
++    lock.l_len = 2;
++
++    while (i < 100) {
++        lock.l_start = i;
++        ret = fcntl(fd, F_SETLK, &lock);
++        if (ret < 0) {
++            fprintf(stderr, "fcntl setlk failed (%s)\n", strerror(errno));
++            goto out;
++        }
++
++        i++;
++    }
++
++    ret = 0;
++
++out:
++    return ret;
++}
+diff --git a/tests/bugs/client/issue-2337-lock-mem-leak.t b/tests/bugs/client/issue-2337-lock-mem-leak.t
+new file mode 100644
+index 0000000..64132a2
+--- /dev/null
++++ b/tests/bugs/client/issue-2337-lock-mem-leak.t
+@@ -0,0 +1,42 @@
++#!/bin/bash
++
++#Test that lock fop is not leaking any memory for overlapping regions
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../fileio.rc
++
++cleanup;
++
++LOCK_TEST=$(dirname $0)/issue-2337-lock-mem-leak
++build_tester $(dirname $0)/issue-2337-lock-mem-leak.c -o ${LOCK_TEST}
++
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 $H0:$B0/${V0}1
++#Guard against flush-behind
++TEST $CLI volume set $V0 performance.write-behind off
++TEST $CLI volume start $V0
++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
++
++TEST touch $M0/a
++TEST fd1=`fd_available`
++TEST fd_open $fd1 'w' $M0/a
++TEST flock -x $fd1
++statedump=$(generate_mount_statedump $V0 $M0)
++EXPECT_NOT "^nostatedump$" echo $statedump
++#Making sure no one changes this mem-tracker name
++TEST grep gf_client_mt_clnt_lock_t $statedump
++TEST fd_close $fd1
++
++statedump=$(generate_mount_statedump $V0 $M0)
++EXPECT_NOT "^nostatedump$" echo $statedump
++TEST ! grep gf_client_mt_clnt_lock_t $statedump
++
++TEST ${LOCK_TEST} $M0/a
++
++statedump=$(generate_mount_statedump $V0 $M0)
++EXPECT_NOT "^nostatedump$" echo $statedump
++TEST ! grep gf_client_mt_clnt_lock_t $statedump
++TEST cleanup_mount_statedump $V0
++TEST rm ${LOCK_TEST}
++cleanup
+diff --git a/tests/bugs/replicate/do-not-reopen-fd.t b/tests/bugs/replicate/do-not-reopen-fd.t
+index 76d8e70..13b5218 100644
+--- a/tests/bugs/replicate/do-not-reopen-fd.t
++++ b/tests/bugs/replicate/do-not-reopen-fd.t
+@@ -45,13 +45,17 @@ EXPECT "data-2" cat $B0/${V0}2/a
+ gfid_a=$(gf_get_gfid_xattr $B0/${V0}0/a)
+ gfid_str_a=$(gf_gfid_xattr_to_str $gfid_a)
+ 
+-EXPECT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+-EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
+-EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
++EXPECT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
++EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
++EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+ 
+ TEST fd2=`fd_available`
+ TEST fd_open $fd2 'rw' $M1/a
+ 
++EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
++EXPECT "^2$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
++EXPECT "^2$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
++
+ # Kill 2nd brick and try writing to the file. The write should fail due to
+ # quorum failure.
+ TEST kill_brick $V0 $H0 $B0/${V0}1
+@@ -66,6 +70,9 @@ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}1
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1
+ TEST ! fd_write $fd1 "data-4"
+ TEST ! fd_cat $fd1
++EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
++EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
++EXPECT "^2$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+ 
+ # Enable heal and check the files will have same content on all the bricks after
+ # the heal is completed.
+@@ -89,7 +96,9 @@ TEST ! fd_write $fd1 "data-5"
+ 
+ # At this point only one brick will have the lock. Try taking the lock again on
+ # the bad fd, which should also fail with EBADFD.
+-TEST ! flock -x $fd1
++# TODO: At the moment quorum failure in lk leads to unlock on the bricks where
++# lock succeeds. This will change lock state on 3rd brick, commenting for now
++#TEST ! flock -x $fd1
+ 
+ # Kill the only brick that is having lock and try taking lock on another client
+ # which should succeed.
+@@ -97,15 +106,25 @@ TEST kill_brick $V0 $H0 $B0/${V0}2
+ EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 2
+ TEST flock -x $fd2
+ TEST fd_write $fd2 "data-6"
++EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
++EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
++
+ 
+ # Bring the brick up and try writing & reading on the old fd, which should still
+ # fail and operations on the 2nd fd should succeed.
+ TEST $CLI volume start $V0 force
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}2
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 2
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M1 $V0-replicate-0 2
++EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
++EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
++EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+ TEST ! fd_write $fd1 "data-7"
+ 
+ TEST ! fd_cat $fd1
++EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
++EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
++EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+ TEST fd_cat $fd2
+ 
+ # Close both the fds which will release the locks and then re-open and take lock
+@@ -113,17 +132,15 @@ TEST fd_cat $fd2
+ TEST fd_close $fd1
+ TEST fd_close $fd2
+ 
+-TEST ! ls /proc/$$/fd/$fd1
+-TEST ! ls /proc/$$/fd/$fd2
+-EXPECT_WITHIN $REOPEN_TIMEOUT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+-EXPECT_WITHIN $REOPEN_TIMEOUT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
+-EXPECT_WITHIN $REOPEN_TIMEOUT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
++EXPECT_WITHIN $REOPEN_TIMEOUT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
++EXPECT_WITHIN $REOPEN_TIMEOUT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
++EXPECT_WITHIN $REOPEN_TIMEOUT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+ 
+ TEST fd1=`fd_available`
+ TEST fd_open $fd1 'rw' $M0/a
+-EXPECT_WITHIN $REOPEN_TIMEOUT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+-EXPECT_WITHIN $REOPEN_TIMEOUT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
+-EXPECT_WITHIN $REOPEN_TIMEOUT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
++EXPECT_WITHIN $REOPEN_TIMEOUT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
++EXPECT_WITHIN $REOPEN_TIMEOUT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
++EXPECT_WITHIN $REOPEN_TIMEOUT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+ 
+ TEST flock -x $fd1
+ TEST fd_write $fd1 "data-8"
+@@ -134,6 +151,10 @@ EXPECT "data-8" head -n 1 $B0/${V0}1/a
+ EXPECT "data-8" head -n 1 $B0/${V0}2/a
+ 
+ TEST fd_close $fd1
++EXPECT_WITHIN $REOPEN_TIMEOUT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
++EXPECT_WITHIN $REOPEN_TIMEOUT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
++EXPECT_WITHIN $REOPEN_TIMEOUT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
++
+ 
+ # Heal the volume
+ TEST $CLI volume heal $V0 enable
+@@ -152,9 +173,9 @@ EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replica
+ TEST fd1=`fd_available`
+ TEST fd_open $fd1 'rw' $M0/a
+ 
+-EXPECT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+-EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
+-EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
++EXPECT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
++EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
++EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+ 
+ # Restart the brick and then write. Now fd should get re-opened and write should
+ # succeed on the previously down brick as well since there are no locks held on
+@@ -163,7 +184,7 @@ TEST $CLI volume start $V0 force
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}0
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
+ TEST fd_write $fd1 "data-10"
+-EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
++EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+ 
+ EXPECT "data-10" head -n 1 $B0/${V0}0/a
+ EXPECT "data-10" head -n 1 $B0/${V0}1/a
+@@ -177,9 +198,9 @@ TEST fd1=`fd_available`
+ TEST fd_open $fd1 'rw' $M0/a
+ TEST flock -x $fd1
+ 
+-EXPECT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+-EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
+-EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
++EXPECT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
++EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
++EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+ 
+ # Kill & restart another brick so that it will return EBADFD
+ TEST kill_brick $V0 $H0 $B0/${V0}1
+@@ -194,9 +215,9 @@ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}1
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1
+ TEST ! fd_write $fd1 "data-11"
+-EXPECT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+-EXPECT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
+-EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
++EXPECT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
++EXPECT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
++EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+ 
+ EXPECT "data-10" head -n 1 $B0/${V0}0/a
+ EXPECT "data-10" head -n 1 $B0/${V0}1/a
+diff --git a/tests/volume.rc b/tests/volume.rc
+index f5dd0b1..17c3835 100644
+--- a/tests/volume.rc
++++ b/tests/volume.rc
+@@ -407,6 +407,14 @@ function gf_check_file_opened_in_brick {
+         fi
+ }
+ 
++function gf_open_file_count_in_brick {
++        vol=$1
++        host=$2
++        brick=$3
++        realpath=$4
++        ls -l /proc/$(get_brick_pid $vol $host $brick)/fd | grep "${realpath}$" | wc -l
++}
++
+ function gf_get_gfid_backend_file_path {
+         brickpath=$1
+         filepath_in_brick=$2
+diff --git a/xlators/protocol/client/src/client-helpers.c b/xlators/protocol/client/src/client-helpers.c
+index 48b6448..a80f303 100644
+--- a/xlators/protocol/client/src/client-helpers.c
++++ b/xlators/protocol/client/src/client-helpers.c
+@@ -3156,11 +3156,14 @@ client_fdctx_destroy(xlator_t *this, clnt_fd_ctx_t *fdctx)
+     int32_t ret = -1;
+     char parent_down = 0;
+     fd_lk_ctx_t *lk_ctx = NULL;
++    gf_lkowner_t null_owner = {0};
++    struct list_head deleted_list;
+ 
+     GF_VALIDATE_OR_GOTO("client", this, out);
+     GF_VALIDATE_OR_GOTO(this->name, fdctx, out);
+ 
+     conf = (clnt_conf_t *)this->private;
++    INIT_LIST_HEAD(&deleted_list);
+ 
+     if (fdctx->remote_fd == -1) {
+         gf_msg_debug(this->name, 0, "not a valid fd");
+@@ -3174,6 +3177,13 @@ client_fdctx_destroy(xlator_t *this, clnt_fd_ctx_t *fdctx)
+     pthread_mutex_unlock(&conf->lock);
+     lk_ctx = fdctx->lk_ctx;
+     fdctx->lk_ctx = NULL;
++    pthread_spin_lock(&conf->fd_lock);
++    {
++        __delete_granted_locks_owner_from_fdctx(fdctx, &null_owner,
++                                                &deleted_list);
++    }
++    pthread_spin_unlock(&conf->fd_lock);
++    destroy_client_locks_from_list(&deleted_list);
+ 
+     if (lk_ctx)
+         fd_lk_ctx_unref(lk_ctx);
+diff --git a/xlators/protocol/client/src/client-lk.c b/xlators/protocol/client/src/client-lk.c
+index c1fb055..cb4e894 100644
+--- a/xlators/protocol/client/src/client-lk.c
++++ b/xlators/protocol/client/src/client-lk.c
+@@ -253,6 +253,7 @@ __insert_and_merge(clnt_fd_ctx_t *fdctx, client_posix_lock_t *lock)
+                 sum = add_locks(lock, conf);
+ 
+                 sum->fd = lock->fd;
++                sum->owner = conf->owner;
+ 
+                 __delete_client_lock(conf);
+                 __destroy_client_lock(conf);
+@@ -320,56 +321,77 @@ destroy_client_lock(client_posix_lock_t *lock)
+     GF_FREE(lock);
+ }
+ 
+-int32_t
+-delete_granted_locks_owner(fd_t *fd, gf_lkowner_t *owner)
++void
++destroy_client_locks_from_list(struct list_head *deleted)
+ {
+-    clnt_fd_ctx_t *fdctx = NULL;
+     client_posix_lock_t *lock = NULL;
+     client_posix_lock_t *tmp = NULL;
+-    xlator_t *this = NULL;
+-    clnt_conf_t *conf = NULL;
+-
+-    struct list_head delete_list;
+-    int ret = 0;
++    xlator_t *this = THIS;
+     int count = 0;
+ 
+-    INIT_LIST_HEAD(&delete_list);
+-    this = THIS;
+-    conf = this->private;
++    list_for_each_entry_safe(lock, tmp, deleted, list)
++    {
++        list_del_init(&lock->list);
++        destroy_client_lock(lock);
++        count++;
++    }
+ 
+-    pthread_spin_lock(&conf->fd_lock);
++    /* FIXME: Need to actually print the locks instead of count */
++    gf_msg_trace(this->name, 0, "Number of locks cleared=%d", count);
++}
+ 
+-    fdctx = this_fd_get_ctx(fd, this);
+-    if (!fdctx) {
+-        pthread_spin_unlock(&conf->fd_lock);
++void
++__delete_granted_locks_owner_from_fdctx(clnt_fd_ctx_t *fdctx,
++                                        gf_lkowner_t *owner,
++                                        struct list_head *deleted)
++{
++    client_posix_lock_t *lock = NULL;
++    client_posix_lock_t *tmp = NULL;
+ 
+-        gf_msg(this->name, GF_LOG_WARNING, EINVAL, PC_MSG_FD_CTX_INVALID,
+-               "fdctx not valid");
+-        ret = -1;
+-        goto out;
++    gf_boolean_t is_null_lkowner = _gf_false;
++
++    if (is_lk_owner_null(owner)) {
++        is_null_lkowner = _gf_true;
+     }
+ 
+     list_for_each_entry_safe(lock, tmp, &fdctx->lock_list, list)
+     {
+-        if (is_same_lkowner(&lock->owner, owner)) {
++        if (is_null_lkowner || is_same_lkowner(&lock->owner, owner)) {
+             list_del_init(&lock->list);
+-            list_add_tail(&lock->list, &delete_list);
+-            count++;
++            list_add_tail(&lock->list, deleted);
+         }
+     }
++}
+ 
+-    pthread_spin_unlock(&conf->fd_lock);
++int32_t
++delete_granted_locks_owner(fd_t *fd, gf_lkowner_t *owner)
++{
++    clnt_fd_ctx_t *fdctx = NULL;
++    xlator_t *this = NULL;
++    clnt_conf_t *conf = NULL;
++    int ret = 0;
++    struct list_head deleted_locks;
+ 
+-    if (!list_empty(&delete_list)) {
+-        list_for_each_entry_safe(lock, tmp, &delete_list, list)
+-        {
+-            list_del_init(&lock->list);
+-            destroy_client_lock(lock);
++    this = THIS;
++    conf = this->private;
++    INIT_LIST_HEAD(&deleted_locks);
++
++    pthread_spin_lock(&conf->fd_lock);
++    {
++        fdctx = this_fd_get_ctx(fd, this);
++        if (!fdctx) {
++            pthread_spin_unlock(&conf->fd_lock);
++
++            gf_smsg(this->name, GF_LOG_WARNING, EINVAL, PC_MSG_FD_CTX_INVALID,
++                    NULL);
++            ret = -1;
++            goto out;
+         }
++        __delete_granted_locks_owner_from_fdctx(fdctx, owner, &deleted_locks);
+     }
++    pthread_spin_unlock(&conf->fd_lock);
+ 
+-    /* FIXME: Need to actually print the locks instead of count */
+-    gf_msg_trace(this->name, 0, "Number of locks cleared=%d", count);
++    destroy_client_locks_from_list(&deleted_locks);
+ 
+ out:
+     return ret;
+diff --git a/xlators/protocol/client/src/client.h b/xlators/protocol/client/src/client.h
+index 2a50625..f952aea 100644
+--- a/xlators/protocol/client/src/client.h
++++ b/xlators/protocol/client/src/client.h
+@@ -406,8 +406,12 @@ int
+ client_attempt_lock_recovery(xlator_t *this, clnt_fd_ctx_t *fdctx);
+ int32_t
+ delete_granted_locks_owner(fd_t *fd, gf_lkowner_t *owner);
+-int32_t
+-delete_granted_locks_fd(clnt_fd_ctx_t *fdctx);
++void
++__delete_granted_locks_owner_from_fdctx(clnt_fd_ctx_t *fdctx,
++                                        gf_lkowner_t *owner,
++                                        struct list_head *deleted);
++void
++destroy_client_locks_from_list(struct list_head *deleted);
+ int32_t
+ client_cmd_to_gf_cmd(int32_t cmd, int32_t *gf_cmd);
+ void
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0583-protocol-client-Initialize-list-head-to-prevent-NULL.patch b/SOURCES/0583-protocol-client-Initialize-list-head-to-prevent-NULL.patch
new file mode 100644
index 0000000..1ac1777
--- /dev/null
+++ b/SOURCES/0583-protocol-client-Initialize-list-head-to-prevent-NULL.patch
@@ -0,0 +1,138 @@
+From f114ba25fab57d1ab9a51fc1f101f2b5571f167a Mon Sep 17 00:00:00 2001
+From: karthik-us <ksubrahm@redhat.com>
+Date: Mon, 7 Jun 2021 19:24:55 +0530
+Subject: [PATCH 583/584] protocol/client: Initialize list head to prevent NULL
+ de-reference
+
+> Upstream patch: https://github.com/gluster/glusterfs/pull/2456/commits/00761df0cd14833ff256b69dba7cf8e2b699554c
+> fixes: #2443
+> Change-Id: I86ef0270d41d6fb924db97fde3196d7c98c8b564
+> Signed-off-by: Pranith Kumar K <pranith.karampuri@phonepe.com>
+
+BUG: 1689375
+Change-Id: I86ef0270d41d6fb924db97fde3196d7c98c8b564
+Signed-off-by: karthik-us <ksubrahm@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/245613
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/bugs/locks/issue-2443-crash.c     | 67 +++++++++++++++++++++++++++++++++
+ tests/bugs/locks/issue-2443-crash.t     | 18 +++++++++
+ xlators/protocol/client/src/client-lk.c |  1 +
+ 3 files changed, 86 insertions(+)
+ create mode 100644 tests/bugs/locks/issue-2443-crash.c
+ create mode 100644 tests/bugs/locks/issue-2443-crash.t
+
+diff --git a/tests/bugs/locks/issue-2443-crash.c b/tests/bugs/locks/issue-2443-crash.c
+new file mode 100644
+index 0000000..5f580bf
+--- /dev/null
++++ b/tests/bugs/locks/issue-2443-crash.c
+@@ -0,0 +1,67 @@
++#include <sys/file.h>
++#include <stdio.h>
++#include <string.h>
++#include <errno.h>
++#include <sys/types.h>
++#include <sys/stat.h>
++#include <fcntl.h>
++
++int
++main(int argc, char *argv[])
++{
++    int fd = -1;
++    char *filename = NULL;
++    struct flock lock = {
++        0,
++    };
++    int i = 0;
++    int ret = -1;
++
++    if (argc != 2) {
++        fprintf(stderr, "Usage: %s <filename> ", argv[0]);
++        goto out;
++    }
++
++    filename = argv[1];
++
++    fd = open(filename, O_RDWR | O_CREAT, 0);
++    if (fd < 0) {
++        fprintf(stderr, "open (%s) failed (%s)\n", filename, strerror(errno));
++        goto out;
++    }
++
++    lock.l_start = 0;
++    lock.l_type = F_RDLCK;
++    lock.l_whence = SEEK_SET;
++    lock.l_len = 2;
++
++    ret = fcntl(fd, F_SETLK, &lock);
++    if (ret < 0) {
++        fprintf(stderr, "fcntl setlk failed (%s)\n", strerror(errno));
++        goto out;
++    }
++
++    lock.l_start = 2;
++    lock.l_type = F_WRLCK;
++    lock.l_whence = SEEK_SET;
++    lock.l_len = 2;
++
++    ret = fcntl(fd, F_SETLK, &lock);
++    if (ret < 0) {
++        fprintf(stderr, "fcntl setlk failed (%s)\n", strerror(errno));
++        goto out;
++    }
++
++    lock.l_start = 0;
++    lock.l_type = F_RDLCK;
++    lock.l_whence = SEEK_SET;
++    lock.l_len = 4;
++
++    ret = fcntl(fd, F_SETLK, &lock);
++    if (ret < 0) {
++        fprintf(stderr, "fcntl setlk failed (%s)\n", strerror(errno));
++        goto out;
++    }
++out:
++    return ret;
++}
+diff --git a/tests/bugs/locks/issue-2443-crash.t b/tests/bugs/locks/issue-2443-crash.t
+new file mode 100644
+index 0000000..162a4d7
+--- /dev/null
++++ b/tests/bugs/locks/issue-2443-crash.t
+@@ -0,0 +1,18 @@
++#!/bin/bash
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++cleanup;
++
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 $H0:$B0/brick0
++TEST $CLI volume start $V0
++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
++
++build_tester $(dirname $0)/issue-2443-crash.c
++TEST mv $(dirname $0)/issue-2443-crash $M0
++cd $M0
++TEST ./issue-2443-crash a
++
++cd -
++cleanup;
+diff --git a/xlators/protocol/client/src/client-lk.c b/xlators/protocol/client/src/client-lk.c
+index cb4e894..37c1d35 100644
+--- a/xlators/protocol/client/src/client-lk.c
++++ b/xlators/protocol/client/src/client-lk.c
+@@ -101,6 +101,7 @@ add_locks(client_posix_lock_t *l1, client_posix_lock_t *l2)
+     sum = GF_CALLOC(1, sizeof(*sum), gf_client_mt_clnt_lock_t);
+     if (!sum)
+         return NULL;
++    INIT_LIST_HEAD(&sum->list);
+ 
+     sum->fl_start = min(l1->fl_start, l2->fl_start);
+     sum->fl_end = max(l1->fl_end, l2->fl_end);
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0584-dht-fixing-xattr-inconsistency.patch b/SOURCES/0584-dht-fixing-xattr-inconsistency.patch
new file mode 100644
index 0000000..bf2c6b9
--- /dev/null
+++ b/SOURCES/0584-dht-fixing-xattr-inconsistency.patch
@@ -0,0 +1,429 @@
+From 2c6c4ad77ba5511a62846af932840deb5bc389ae Mon Sep 17 00:00:00 2001
+From: Tamar Shacked <tshacked@redhat.com>
+Date: Mon, 7 Jun 2021 12:25:57 +0300
+Subject: [PATCH 584/584] dht - fixing xattr inconsistency
+
+The scenario of setting an xattr to a dir, killing one of the bricks,
+removing the xattr, bringing back the brick results in xattr
+inconsistency - The downed brick will still have the xattr, but the rest
+won't.
+This patch add a mechanism that will remove the extra xattrs during
+lookup.
+
+Backport of:
+> Upstream-patch-link: https://review.gluster.org/#/c/glusterfs/+/24687/
+> fixes: #1324
+> Change-Id: Ifec0b7aea6cd40daa8b0319b881191cf83e031d1
+> Signed-off-by: Barak Sason Rofman <bsasonro@redhat.com>
+
+BUG: 1600379
+Change-Id: I588f69b283e5354cd362d74486d6ec6d226ecc96
+Signed-off-by: Tamar Shacked <tshacked@redhat.com>
+Signed-off-by: srijan-sivakumar <ssivakum@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/245560
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/common-utils.c                | 20 +++++++-
+ libglusterfs/src/glusterfs/common-utils.h      |  6 +++
+ tests/bugs/distribute/bug-1600379.t            | 54 ++++++++++++++++++++
+ xlators/cluster/dht/src/dht-common.c           | 14 ++----
+ xlators/cluster/dht/src/dht-common.h           |  4 --
+ xlators/cluster/dht/src/dht-helper.c           |  4 ++
+ xlators/cluster/dht/src/dht-selfheal.c         | 11 ++++
+ xlators/storage/posix/src/posix-helpers.c      | 19 +++++++
+ xlators/storage/posix/src/posix-inode-fd-ops.c | 69 ++++++++++++++++++++++++++
+ xlators/storage/posix/src/posix.h              |  3 ++
+ 10 files changed, 189 insertions(+), 15 deletions(-)
+ create mode 100644 tests/bugs/distribute/bug-1600379.t
+
+diff --git a/libglusterfs/src/common-utils.c b/libglusterfs/src/common-utils.c
+index c2dfe28..d8b7c6e 100644
+--- a/libglusterfs/src/common-utils.c
++++ b/libglusterfs/src/common-utils.c
+@@ -54,6 +54,7 @@
+ #include "xxhash.h"
+ #include <ifaddrs.h>
+ #include "glusterfs/libglusterfs-messages.h"
++#include "glusterfs/glusterfs-acl.h"
+ #include "protocol-common.h"
+ #ifdef __FreeBSD__
+ #include <pthread_np.h>
+@@ -82,12 +83,21 @@ gf_boolean_t gf_signal_on_assert = false;
+ typedef int32_t (*rw_op_t)(int32_t fd, char *buf, int32_t size);
+ typedef int32_t (*rwv_op_t)(int32_t fd, const struct iovec *buf, int32_t size);
+ 
+-void gf_assert(void)
++char *xattrs_to_heal[] = {"user.",
++                          POSIX_ACL_ACCESS_XATTR,
++                          POSIX_ACL_DEFAULT_XATTR,
++                          QUOTA_LIMIT_KEY,
++                          QUOTA_LIMIT_OBJECTS_KEY,
++                          GF_SELINUX_XATTR_KEY,
++                          GF_XATTR_MDATA_KEY,
++                          NULL};
++
++void
++gf_assert(void)
+ {
+     if (gf_signal_on_assert) {
+         raise(SIGCONT);
+     }
+-
+ }
+ 
+ void
+@@ -5430,3 +5440,9 @@ gf_d_type_from_ia_type(ia_type_t type)
+             return DT_UNKNOWN;
+     }
+ }
++
++char **
++get_xattrs_to_heal()
++{
++    return xattrs_to_heal;
++}
+diff --git a/libglusterfs/src/glusterfs/common-utils.h b/libglusterfs/src/glusterfs/common-utils.h
+index bd48b6f..8439bb6 100644
+--- a/libglusterfs/src/glusterfs/common-utils.h
++++ b/libglusterfs/src/glusterfs/common-utils.h
+@@ -183,6 +183,12 @@ enum _gf_xlator_ipc_targets {
+ typedef enum _gf_special_pid gf_special_pid_t;
+ typedef enum _gf_xlator_ipc_targets _gf_xlator_ipc_targets_t;
+ 
++/* Array to hold custom xattr keys */
++extern char *xattrs_to_heal[];
++
++char **
++get_xattrs_to_heal();
++
+ /* The DHT file rename operation is not a straightforward rename.
+  * It involves creating linkto and linkfiles, and can unlink or rename the
+  * source file depending on the hashed and cached subvols for the source
+diff --git a/tests/bugs/distribute/bug-1600379.t b/tests/bugs/distribute/bug-1600379.t
+new file mode 100644
+index 0000000..8d2f615
+--- /dev/null
++++ b/tests/bugs/distribute/bug-1600379.t
+@@ -0,0 +1,54 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++
++# Initialize
++#------------------------------------------------------------
++cleanup;
++
++# Start glusterd
++TEST glusterd;
++TEST pidof glusterd;
++TEST $CLI volume info;
++
++# Create a volume
++TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2}
++
++# Verify volume creation
++EXPECT "$V0" volinfo_field $V0 'Volume Name';
++EXPECT 'Created' volinfo_field $V0 'Status';
++
++# Start volume and verify successful start
++TEST $CLI volume start $V0;
++EXPECT 'Started' volinfo_field $V0 'Status';
++TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
++#------------------------------------------------------------
++
++# Test case - Remove xattr from killed brick on lookup
++#------------------------------------------------------------
++# Create a dir and set custom xattr
++TEST mkdir $M0/testdir
++TEST setfattr -n user.attr -v val $M0/testdir
++xattr_val=`getfattr -d $B0/${V0}2/testdir | awk '{print $1}'`;
++TEST ${xattr_val}='user.attr="val"';
++
++# Kill 2nd brick process
++TEST kill_brick $V0 $H0 $B0/${V0}2
++EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "1" online_brick_count
++
++# Remove custom xattr
++TEST setfattr -x user.attr $M0/testdir
++
++# Bring up the killed brick process
++TEST $CLI volume start $V0 force
++
++# Perform lookup
++sleep 5
++TEST ls $M0/testdir
++
++# Check brick xattrs
++xattr_val_2=`getfattr -d $B0/${V0}2/testdir`;
++TEST [ ${xattr_val_2} = ''] ;
++
++cleanup;
+diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
+index ce0fbbf..edfc6e7 100644
+--- a/xlators/cluster/dht/src/dht-common.c
++++ b/xlators/cluster/dht/src/dht-common.c
+@@ -19,6 +19,7 @@
+ #include <glusterfs/byte-order.h>
+ #include <glusterfs/quota-common-utils.h>
+ #include <glusterfs/upcall-utils.h>
++#include <glusterfs/common-utils.h>
+ 
+ #include <sys/time.h>
+ #include <libgen.h>
+@@ -127,15 +128,6 @@ dht_read_iatt_from_xdata(xlator_t *this, dict_t *xdata, struct iatt *stbuf)
+ int
+ dht_rmdir_unlock(call_frame_t *frame, xlator_t *this);
+ 
+-char *xattrs_to_heal[] = {"user.",
+-                          POSIX_ACL_ACCESS_XATTR,
+-                          POSIX_ACL_DEFAULT_XATTR,
+-                          QUOTA_LIMIT_KEY,
+-                          QUOTA_LIMIT_OBJECTS_KEY,
+-                          GF_SELINUX_XATTR_KEY,
+-                          GF_XATTR_MDATA_KEY,
+-                          NULL};
+-
+ char *dht_dbg_vxattrs[] = {DHT_DBG_HASHED_SUBVOL_PATTERN, NULL};
+ 
+ /* Return true if key exists in array
+@@ -143,6 +135,8 @@ char *dht_dbg_vxattrs[] = {DHT_DBG_HASHED_SUBVOL_PATTERN, NULL};
+ static gf_boolean_t
+ dht_match_xattr(const char *key)
+ {
++    char **xattrs_to_heal = get_xattrs_to_heal();
++
+     return gf_get_index_by_elem(xattrs_to_heal, (char *)key) >= 0;
+ }
+ 
+@@ -5399,11 +5393,13 @@ dht_dir_common_set_remove_xattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+     int call_cnt = 0;
+     dht_local_t *local = NULL;
+     char gfid_local[GF_UUID_BUF_SIZE] = {0};
++    char **xattrs_to_heal;
+ 
+     conf = this->private;
+     local = frame->local;
+     call_cnt = conf->subvolume_cnt;
+     local->flags = flags;
++    xattrs_to_heal = get_xattrs_to_heal();
+ 
+     if (!gf_uuid_is_null(local->gfid)) {
+         gf_uuid_unparse(local->gfid, gfid_local);
+diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
+index 132b3b3..b856c68 100644
+--- a/xlators/cluster/dht/src/dht-common.h
++++ b/xlators/cluster/dht/src/dht-common.h
+@@ -54,10 +54,6 @@
+ #define DHT_DBG_HASHED_SUBVOL_PATTERN "dht.file.hashed-subvol.*"
+ #define DHT_DBG_HASHED_SUBVOL_KEY "dht.file.hashed-subvol."
+ 
+-/* Array to hold custom xattr keys
+- */
+-extern char *xattrs_to_heal[];
+-
+ /* Rebalance nodeuuid flags */
+ #define REBAL_NODEUUID_MINE 0x01
+ 
+diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c
+index 4f7370d..4c3940a 100644
+--- a/xlators/cluster/dht/src/dht-helper.c
++++ b/xlators/cluster/dht/src/dht-helper.c
+@@ -2289,6 +2289,7 @@ dht_dir_set_heal_xattr(xlator_t *this, dht_local_t *local, dict_t *dst,
+     int luret = -1;
+     int luflag = -1;
+     int i = 0;
++    char **xattrs_to_heal;
+ 
+     if (!src || !dst) {
+         gf_msg(this->name, GF_LOG_WARNING, EINVAL, DHT_MSG_DICT_SET_FAILED,
+@@ -2305,6 +2306,9 @@ dht_dir_set_heal_xattr(xlator_t *this, dht_local_t *local, dict_t *dst,
+        and set it to dst dict, here index start from 1 because
+        user xattr already checked in previous statement
+     */
++
++    xattrs_to_heal = get_xattrs_to_heal();
++
+     for (i = 1; xattrs_to_heal[i]; i++) {
+         keyval = dict_get(src, xattrs_to_heal[i]);
+         if (keyval) {
+diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c
+index f4e17d1..8af7301 100644
+--- a/xlators/cluster/dht/src/dht-selfheal.c
++++ b/xlators/cluster/dht/src/dht-selfheal.c
+@@ -2315,6 +2315,15 @@ dht_dir_heal_xattrs(void *data)
+         if (subvol == mds_subvol)
+             continue;
+         if (uret || uflag) {
++            /* Custom xattr heal is required - let posix handle it */
++            ret = dict_set_int8(xdata, "sync_backend_xattrs", _gf_true);
++            if (ret) {
++                gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
++                        "path=%s", local->loc.path, "key=%s",
++                        "sync_backend_xattrs", NULL);
++                goto out;
++            }
++
+             ret = syncop_setxattr(subvol, &local->loc, user_xattr, 0, xdata,
+                                   NULL);
+             if (ret) {
+@@ -2325,6 +2334,8 @@ dht_dir_heal_xattrs(void *data)
+                        "user xattr on path %s on "
+                        "subvol %s, gfid = %s ",
+                        local->loc.path, subvol->name, gfid);
++            } else {
++                dict_del(xdata, "sync_backend_xattrs");
+             }
+         }
+     }
+diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
+index 16351d8..40a9ee4 100644
+--- a/xlators/storage/posix/src/posix-helpers.c
++++ b/xlators/storage/posix/src/posix-helpers.c
+@@ -3656,3 +3656,22 @@ out:
+ 
+     return is_stale;
+ }
++
++/* Delete user xattr from the file at the file-path specified by data and from
++ * dict */
++int
++posix_delete_user_xattr(dict_t *dict, char *k, data_t *v, void *data)
++{
++    int ret;
++    char *real_path = data;
++
++    ret = sys_lremovexattr(real_path, k);
++    if (ret) {
++        gf_msg("posix-helpers", GF_LOG_ERROR, P_MSG_XATTR_NOT_REMOVED, errno,
++               "removexattr failed. key %s path %s", k, real_path);
++    }
++
++    dict_del(dict, k);
++
++    return ret;
++}
+diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c
+index 4c2983a..be22c5e 100644
+--- a/xlators/storage/posix/src/posix-inode-fd-ops.c
++++ b/xlators/storage/posix/src/posix-inode-fd-ops.c
+@@ -62,6 +62,7 @@
+ #include <glusterfs/events.h>
+ #include "posix-gfid-path.h"
+ #include <glusterfs/compat-uuid.h>
++#include <glusterfs/common-utils.h>
+ 
+ extern char *marker_xattrs[];
+ #define ALIGN_SIZE 4096
+@@ -2733,6 +2734,7 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+     int32_t ret = 0;
+     ssize_t acl_size = 0;
+     dict_t *xattr = NULL;
++    dict_t *subvol_xattrs = NULL;
+     posix_xattr_filler_t filler = {
+         0,
+     };
+@@ -2748,6 +2750,10 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+     struct mdata_iatt mdata_iatt = {
+         0,
+     };
++    int8_t sync_backend_xattrs = _gf_false;
++    data_pair_t *custom_xattrs;
++    data_t *keyval = NULL;
++    char **xattrs_to_heal = get_xattrs_to_heal();
+ 
+     DECLARE_OLD_FS_ID_VAR;
+     SET_FS_ID(frame->root->uid, frame->root->gid);
+@@ -2930,6 +2936,66 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+         goto out;
+     }
+ 
++    ret = dict_get_int8(xdata, "sync_backend_xattrs", &sync_backend_xattrs);
++    if (ret) {
++        gf_msg_debug(this->name, -ret, "Unable to get sync_backend_xattrs");
++    }
++
++    if (sync_backend_xattrs) {
++        /* List all custom xattrs */
++        subvol_xattrs = dict_new();
++        if (!subvol_xattrs)
++            goto out;
++
++        ret = dict_set_int32_sizen(xdata, "list-xattr", 1);
++        if (ret) {
++            gf_msg(this->name, GF_LOG_ERROR, 0, ENOMEM,
++                   "Unable to set list-xattr in dict ");
++            goto out;
++        }
++
++        subvol_xattrs = posix_xattr_fill(this, real_path, loc, NULL, -1, xdata,
++                                         NULL);
++
++        /* Remove all user xattrs from the file */
++        dict_foreach_fnmatch(subvol_xattrs, "user.*", posix_delete_user_xattr,
++                             real_path);
++
++        /* Remove all custom xattrs from the file */
++        for (i = 1; xattrs_to_heal[i]; i++) {
++            keyval = dict_get(subvol_xattrs, xattrs_to_heal[i]);
++            if (keyval) {
++                ret = sys_lremovexattr(real_path, xattrs_to_heal[i]);
++                if (ret) {
++                    gf_msg(this->name, GF_LOG_ERROR, P_MSG_XATTR_NOT_REMOVED,
++                           errno, "removexattr failed. key %s path %s",
++                           xattrs_to_heal[i], loc->path);
++                    goto out;
++                }
++
++                dict_del(subvol_xattrs, xattrs_to_heal[i]);
++                keyval = NULL;
++            }
++        }
++
++        /* Set custom xattrs based on info provided by DHT */
++        custom_xattrs = dict->members_list;
++
++        while (custom_xattrs != NULL) {
++            ret = sys_lsetxattr(real_path, custom_xattrs->key,
++                                custom_xattrs->value->data,
++                                custom_xattrs->value->len, flags);
++            if (ret) {
++                op_errno = errno;
++                gf_log(this->name, GF_LOG_ERROR, "setxattr failed - %s %d",
++                       custom_xattrs->key, ret);
++                goto out;
++            }
++
++            custom_xattrs = custom_xattrs->next;
++        }
++    }
++
+     xattr = dict_new();
+     if (!xattr)
+         goto out;
+@@ -3037,6 +3103,9 @@ out:
+     if (xattr)
+         dict_unref(xattr);
+ 
++    if (subvol_xattrs)
++        dict_unref(subvol_xattrs);
++
+     return 0;
+ }
+ 
+diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h
+index 4be979c..b357d34 100644
+--- a/xlators/storage/posix/src/posix.h
++++ b/xlators/storage/posix/src/posix.h
+@@ -686,4 +686,7 @@ posix_update_iatt_buf(struct iatt *buf, int fd, char *loc, dict_t *xdata);
+ gf_boolean_t
+ posix_is_layout_stale(dict_t *xdata, char *par_path, xlator_t *this);
+ 
++int
++posix_delete_user_xattr(dict_t *dict, char *k, data_t *v, void *data);
++
+ #endif /* _POSIX_H */
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0585-ganesha_ha-ganesha_grace-RA-fails-in-start-and-or-fa.patch b/SOURCES/0585-ganesha_ha-ganesha_grace-RA-fails-in-start-and-or-fa.patch
new file mode 100644
index 0000000..e3fa401
--- /dev/null
+++ b/SOURCES/0585-ganesha_ha-ganesha_grace-RA-fails-in-start-and-or-fa.patch
@@ -0,0 +1,77 @@
+From ba399a083a56963bb7414535ede6eff6afcd1a0a Mon Sep 17 00:00:00 2001
+From: "Kaleb S. KEITHLEY" <kkeithle@redhat.com>
+Date: Mon, 14 Jun 2021 12:32:06 -0400
+Subject: [PATCH 585/585] ganesha_ha: ganesha_grace RA fails in start() and/or
+ fails in monitor () (#2523)
+
+shell [[ ]] string compare fails to match returned attr to the
+pattern and subsequently returns status of "not running", resulting
+in dependencies such as the IPaddr (cluster_ip) RA not starting
+
+Change-Id: I2c8d6f5c4cf0480672d52d8aa0d9226950441dc9
+commit 8ec66a43eedd505ec0b40f55c05f13a77fe8074e
+PR: https://github.com/gluster/glusterfs/pull/2523
+issue: https://github.com/gluster/glusterfs/issues/2522
+BUG: 1945143
+Signed-off-by: Kaleb S. KEITHLEY <kkeithle@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/247613
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ extras/ganesha/ocf/ganesha_grace | 12 +++++++++---
+ 1 file changed, 9 insertions(+), 3 deletions(-)
+
+diff --git a/extras/ganesha/ocf/ganesha_grace b/extras/ganesha/ocf/ganesha_grace
+index edc6fa2..ca219af 100644
+--- a/extras/ganesha/ocf/ganesha_grace
++++ b/extras/ganesha/ocf/ganesha_grace
+@@ -122,15 +122,18 @@ ganesha_grace_start()
+ 
+ 	# case 1
+ 	if [[ -z "${attr}" ]]; then
++		ocf_log debug "grace start: returning success case 1"
+ 		return ${OCF_SUCCESS}
+ 	fi
+ 
+ 	# case 2
+-	if [[ "${attr}" = *"value=1" ]]; then
++	if [[ "${attr}" = *"host=\"${host}\" value=\"1\"" ]]; then
++		ocf_log debug "grace start: returning success case 2"
+ 		return ${OCF_SUCCESS}
+ 	fi
+ 
+ 	# case 3
++	ocf_log info "grace start returning: not running case 3 (${attr})"
+ 	return ${OCF_NOT_RUNNING}
+ }
+ 
+@@ -162,7 +165,7 @@ ganesha_grace_monitor()
+ {
+ 	local host=$(ocf_local_nodename)
+ 
+-	ocf_log debug "ganesha_grace monitor ${host}"
++	ocf_log debug "ganesha_grace_monitor ${host}"
+ 
+ 	attr=$(attrd_updater --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null)
+         if [ $? -ne 0 ]; then
+@@ -174,13 +177,16 @@ ganesha_grace_monitor()
+ 	# chance to create it. In which case we'll pretend
+ 	# everything is okay this time around
+ 	if [[ -z "${attr}" ]]; then
++		ocf_log debug "grace monitor: returning success case 1"
+ 		return ${OCF_SUCCESS}
+ 	fi
+ 
+-	if [[ "${attr}" = *"value=1" ]]; then
++	if [[ "${attr}" = *"host=\"${host}\" value=\"1\"" ]]; then
++		ocf_log debug "grace monitor: returning success case 2"
+ 		return ${OCF_SUCCESS}
+ 	fi
+ 
++	ocf_log info "grace monitor: returning not running case 3 (${attr})"
+ 	return ${OCF_NOT_RUNNING}
+ }
+ 
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0586-protocol-client-Do-not-reopen-fd-post-handshake-if-p.patch b/SOURCES/0586-protocol-client-Do-not-reopen-fd-post-handshake-if-p.patch
new file mode 100644
index 0000000..62c574d
--- /dev/null
+++ b/SOURCES/0586-protocol-client-Do-not-reopen-fd-post-handshake-if-p.patch
@@ -0,0 +1,298 @@
+From e431321f1348b5d51733a6b6c5e046fd8c6e28cc Mon Sep 17 00:00:00 2001
+From: karthik-us <ksubrahm@redhat.com>
+Date: Mon, 5 Jul 2021 10:52:10 +0530
+Subject: [PATCH 586/586] protocol/client: Do not reopen fd post handshake if
+ posix lock is held
+
+Problem:
+With client.strict-locks enabled, in some cases where the posix lock is
+taken after a brick gets disconnected, the fd is getting reopened when
+the brick gets reconnected to the client as part of client_post_handshake.
+In such cases the saved fdctx's lock_list may not have the latest
+information.
+
+Fix:
+Check the lock information in the fdctx->lk_ctx as well post handshake
+which will have the latest information on the locks.
+Also check for this field in other places as well to prevent writes
+happening with anonymous fd even without re-opening the fd on the
+restarted brick.
+
+> Upstream patch: https://github.com/gluster/glusterfs/pull/2582
+> Fixes: #2581
+> Change-Id: I7a0799e242ce188c6597dec0a65b4dae7dcd815b
+> Signed-off-by: karthik-us ksubrahm@redhat.com
+
+BUG: 1689375
+Change-Id: I7a0799e242ce188c6597dec0a65b4dae7dcd815b
+Signed-off-by: karthik-us <ksubrahm@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/252588
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/bugs/replicate/do-not-reopen-fd.t        | 76 ++++++++++++++++++--------
+ xlators/protocol/client/src/client-handshake.c |  2 +-
+ xlators/protocol/client/src/client-helpers.c   | 11 +++-
+ xlators/protocol/client/src/client.c           |  2 +-
+ xlators/protocol/client/src/client.h           |  3 +
+ 5 files changed, 67 insertions(+), 27 deletions(-)
+
+diff --git a/tests/bugs/replicate/do-not-reopen-fd.t b/tests/bugs/replicate/do-not-reopen-fd.t
+index 13b5218..f346709 100644
+--- a/tests/bugs/replicate/do-not-reopen-fd.t
++++ b/tests/bugs/replicate/do-not-reopen-fd.t
+@@ -20,10 +20,41 @@ TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+ TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M1
+ 
+ TEST touch $M0/a
++gfid_a=$(gf_get_gfid_xattr $B0/${V0}0/a)
++gfid_str_a=$(gf_gfid_xattr_to_str $gfid_a)
++
++
++# Open fd from a client, check for open fd on all the bricks.
++TEST fd1=`fd_available`
++TEST fd_open $fd1 'rw' $M0/a
++EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
++EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
++EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
++
++# Kill a brick and take lock on the fd
++TEST kill_brick $V0 $H0 $B0/${V0}0
++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_child_up_status_meta $M0 $V0-replicate-0 0
++TEST flock -x $fd1
++
++# Restart the brick and check for no open fd on the restarted brick.
++TEST $CLI volume start $V0 force
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M0 $V0-replicate-0 0
++EXPECT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
++EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
++EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
++
++# Write on the fd. It should fail on the restarted brick.
++TEST fd_write $fd1 "data-0"
++EXPECT "" cat $B0/${V0}0/a
++EXPECT "data-0" cat $B0/${V0}1/a
++EXPECT "data-0" cat $B0/${V0}2/a
++
++TEST fd_close $fd1
+ 
+ # Kill one brick and take lock on the fd and do a write.
+ TEST kill_brick $V0 $H0 $B0/${V0}0
+-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0
++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_child_up_status_meta $M0 $V0-replicate-0 0
+ TEST fd1=`fd_available`
+ TEST fd_open $fd1 'rw' $M0/a
+ 
+@@ -34,7 +65,7 @@ TEST fd_write $fd1 "data-1"
+ # should still succeed as there were no quorum disconnects.
+ TEST $CLI volume start $V0 force
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}0
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M0 $V0-replicate-0 0
+ TEST fd_write $fd1 "data-2"
+ EXPECT "" cat $B0/${V0}0/a
+ EXPECT "data-2" cat $B0/${V0}1/a
+@@ -42,9 +73,6 @@ EXPECT "data-2" cat $B0/${V0}2/a
+ 
+ # Check there is no fd opened on the 1st brick by checking for the gfid inside
+ # /proc/pid-of-brick/fd/ directory
+-gfid_a=$(gf_get_gfid_xattr $B0/${V0}0/a)
+-gfid_str_a=$(gf_gfid_xattr_to_str $gfid_a)
+-
+ EXPECT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+ EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
+ EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+@@ -59,7 +87,7 @@ EXPECT "^2$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+ # Kill 2nd brick and try writing to the file. The write should fail due to
+ # quorum failure.
+ TEST kill_brick $V0 $H0 $B0/${V0}1
+-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 1
++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_child_up_status_meta $M0 $V0-replicate-0 1
+ TEST ! fd_write $fd1 "data-3"
+ TEST ! fd_cat $fd1
+ 
+@@ -67,7 +95,7 @@ TEST ! fd_cat $fd1
+ # which were down previously, will return EBADFD now.
+ TEST $CLI volume start $V0 force
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}1
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M0 $V0-replicate-0 1
+ TEST ! fd_write $fd1 "data-4"
+ TEST ! fd_cat $fd1
+ EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+@@ -79,9 +107,9 @@ EXPECT "^2$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+ EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0
+ TEST $CLI volume heal $V0 enable
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
+ 
+ TEST $CLI volume heal $V0
+ EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+@@ -103,7 +131,7 @@ TEST ! fd_write $fd1 "data-5"
+ # Kill the only brick that is having lock and try taking lock on another client
+ # which should succeed.
+ TEST kill_brick $V0 $H0 $B0/${V0}2
+-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 2
++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_child_up_status_meta $M0 $V0-replicate-0 2
+ TEST flock -x $fd2
+ TEST fd_write $fd2 "data-6"
+ EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+@@ -114,17 +142,17 @@ EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
+ # fail and operations on the 2nd fd should succeed.
+ TEST $CLI volume start $V0 force
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}2
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 2
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M1 $V0-replicate-0 2
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M0 $V0-replicate-0 2
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M1 $V0-replicate-0 2
+ EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+ EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
+-EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
++EXPECT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+ TEST ! fd_write $fd1 "data-7"
+ 
+ TEST ! fd_cat $fd1
+ EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+ EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
+-EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
++EXPECT "^0" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+ TEST fd_cat $fd2
+ 
+ # Close both the fds which will release the locks and then re-open and take lock
+@@ -159,9 +187,9 @@ EXPECT_WITHIN $REOPEN_TIMEOUT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0
+ # Heal the volume
+ TEST $CLI volume heal $V0 enable
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
+ 
+ TEST $CLI volume heal $V0
+ EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+@@ -169,7 +197,7 @@ TEST $CLI volume heal $V0 disable
+ 
+ # Kill one brick and open a fd.
+ TEST kill_brick $V0 $H0 $B0/${V0}0
+-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0
++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_child_up_status_meta $M0 $V0-replicate-0 0
+ TEST fd1=`fd_available`
+ TEST fd_open $fd1 'rw' $M0/a
+ 
+@@ -182,7 +210,7 @@ EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+ # any of the bricks.
+ TEST $CLI volume start $V0 force
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}0
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M0 $V0-replicate-0 0
+ TEST fd_write $fd1 "data-10"
+ EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+ 
+@@ -193,7 +221,7 @@ TEST fd_close $fd1
+ 
+ # Kill one brick, open and take lock on a fd.
+ TEST kill_brick $V0 $H0 $B0/${V0}0
+-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0
++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_child_up_status_meta $M0 $V0-replicate-0 0
+ TEST fd1=`fd_available`
+ TEST fd_open $fd1 'rw' $M0/a
+ TEST flock -x $fd1
+@@ -204,7 +232,7 @@ EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+ 
+ # Kill & restart another brick so that it will return EBADFD
+ TEST kill_brick $V0 $H0 $B0/${V0}1
+-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" brick_up_status $V0 $H0 $B0/${V0}1
++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" brick_up_status $V0 $H0 $B0/${V0}1
+ 
+ # Restart the bricks and then write. Now fd should not get re-opened since lock
+ # is still held on one brick and write should also fail as there is no quorum.
+@@ -212,8 +240,8 @@ EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" brick_up_status $V0 $H0 $B0/${V0}1
+ TEST $CLI volume start $V0 force
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}0
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}1
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M0 $V0-replicate-0 0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M0 $V0-replicate-0 1
+ TEST ! fd_write $fd1 "data-11"
+ EXPECT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+ EXPECT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
+diff --git a/xlators/protocol/client/src/client-handshake.c b/xlators/protocol/client/src/client-handshake.c
+index a12472b..20e03d8 100644
+--- a/xlators/protocol/client/src/client-handshake.c
++++ b/xlators/protocol/client/src/client-handshake.c
+@@ -911,7 +911,7 @@ client_post_handshake(call_frame_t *frame, xlator_t *this)
+         list_for_each_entry_safe(fdctx, tmp, &conf->saved_fds, sfd_pos)
+         {
+             if (fdctx->remote_fd != -1 ||
+-                (!list_empty(&fdctx->lock_list) && conf->strict_locks))
++                (!fdctx_lock_lists_empty(fdctx) && conf->strict_locks))
+                 continue;
+ 
+             fdctx->reopen_done = client_child_up_reopen_done;
+diff --git a/xlators/protocol/client/src/client-helpers.c b/xlators/protocol/client/src/client-helpers.c
+index a80f303..b4a7294 100644
+--- a/xlators/protocol/client/src/client-helpers.c
++++ b/xlators/protocol/client/src/client-helpers.c
+@@ -15,6 +15,15 @@
+ #include <glusterfs/compat-errno.h>
+ #include <glusterfs/common-utils.h>
+ 
++gf_boolean_t
++fdctx_lock_lists_empty(clnt_fd_ctx_t *fdctx)
++{
++    if (list_empty(&fdctx->lock_list) && fd_lk_ctx_empty(fdctx->lk_ctx))
++        return _gf_true;
++
++    return _gf_false;
++}
++
+ int
+ client_fd_lk_list_empty(fd_lk_ctx_t *lk_ctx, gf_boolean_t try_lock)
+ {
+@@ -441,7 +450,7 @@ client_get_remote_fd(xlator_t *this, fd_t *fd, int flags, int64_t *remote_fd,
+                 *remote_fd = fdctx->remote_fd;
+             }
+ 
+-            locks_involved = !list_empty(&fdctx->lock_list);
++            locks_involved = !fdctx_lock_lists_empty(fdctx);
+         }
+     }
+     pthread_spin_unlock(&conf->fd_lock);
+diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c
+index 35a5340..6df2ed1 100644
+--- a/xlators/protocol/client/src/client.c
++++ b/xlators/protocol/client/src/client.c
+@@ -881,7 +881,7 @@ client_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+         pthread_spin_lock(&conf->fd_lock);
+         {
+             fdctx = this_fd_get_ctx(fd, this);
+-            if (fdctx && !list_empty(&fdctx->lock_list)) {
++            if (fdctx && !fdctx_lock_lists_empty(fdctx)) {
+                 ret = -1;
+                 op_errno = EBADFD;
+             }
+diff --git a/xlators/protocol/client/src/client.h b/xlators/protocol/client/src/client.h
+index f952aea..799fe6e 100644
+--- a/xlators/protocol/client/src/client.h
++++ b/xlators/protocol/client/src/client.h
+@@ -535,4 +535,7 @@ client_add_lock_for_recovery(fd_t *fd, struct gf_flock *flock,
+ int
+ client_is_setlk(int32_t cmd);
+ 
++gf_boolean_t
++fdctx_lock_lists_empty(clnt_fd_ctx_t *fdctx);
++
+ #endif /* !_CLIENT_H */
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0587-Update-rfc.sh-to-rhgs-3.5.6.patch b/SOURCES/0587-Update-rfc.sh-to-rhgs-3.5.6.patch
new file mode 100644
index 0000000..420a4cf
--- /dev/null
+++ b/SOURCES/0587-Update-rfc.sh-to-rhgs-3.5.6.patch
@@ -0,0 +1,26 @@
+From f72780b560ea8efe1508aa9ddc574e6dc066bf9a Mon Sep 17 00:00:00 2001
+From: Csaba Henk <chenk@redhat.com>
+Date: Wed, 29 Sep 2021 10:44:37 +0200
+Subject: [PATCH 587/610] Update rfc.sh to rhgs-3.5.6
+
+Signed-off-by: Csaba Henk <chenk@redhat.com>
+---
+ rfc.sh | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/rfc.sh b/rfc.sh
+index daeff32..67798cb 100755
+--- a/rfc.sh
++++ b/rfc.sh
+@@ -18,7 +18,7 @@ done
+ shift $((OPTIND-1))
+ 
+ 
+-branch="rhgs-3.5.5";
++branch="rhgs-3.5.6";
+ 
+ set_hooks_commit_msg()
+ {
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0588-locks-Fix-null-gfid-in-lock-contention-notifications.patch b/SOURCES/0588-locks-Fix-null-gfid-in-lock-contention-notifications.patch
new file mode 100644
index 0000000..1e6c488
--- /dev/null
+++ b/SOURCES/0588-locks-Fix-null-gfid-in-lock-contention-notifications.patch
@@ -0,0 +1,388 @@
+From e3813685237dbdf8dc7cf28726fff2caf2288706 Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <xhernandez@redhat.com>
+Date: Mon, 19 Jul 2021 15:37:02 +0200
+Subject: [PATCH 588/610] locks: Fix null gfid in lock contention notifications
+
+This patch fixes 3 problems:
+
+First problem:
+
+After commit c0bd592e, the pl_inode_t object was also created in the
+cbk of lookup requests. Lookup requests are a bit different than any
+other request because the inode received may not be completely
+initialized. In particular, inode->gfid may be null.
+
+This caused that the gfid stored in the pl_inode_t object was null in
+some cases. This gfid is used mostly for logs, but also to send lock
+contention notifications. This meant that some notifications could be
+sent with a null gfid, making impossible for the client xlator to
+correctly identify the contending inode, so the lock was not released
+immediately when eager-lock was also enabled.
+
+Second problem:
+
+The feature introduced by c0bd592e needed to track the number of
+hardlinks of each inode to detect when it was deleted. However it
+was done using the 'get-link-count' special xattr on lookup, while
+posix only implements it for unlink and rename.
+
+Also, the number of hardlinks was not incremented for mkdir, mknod,
+rename, ..., so it didn't work correctly for directories.
+
+Third problem:
+
+When the last hardlink of an open file is deleted, all locks will be
+denied with ESTALE error, but that's not correct. Access to the open
+fd must succeed.
+
+The first problem is fixed by avoiding creating pl_inode_t objects
+during lookup. Second and third problems are fixed by completely
+ignoring if the file has been deleted or not. Even if we grant a
+lock on a non-existing file, the next operation done by the client
+inside the lock will return the correct error, which should be enough.
+
+Upstream patch:
+> Upstream-patch-link: https://github.com/gluster/glusterfs/pull/2553
+> Fixes: #2551
+> Change-Id: Ic73e82f6b725b838c1600b6a128ea36a75f13253
+> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+
+BUG: 1962972
+Change-Id: Ic73e82f6b725b838c1600b6a128ea36a75f13253
+Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/279192
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/bugs/locks/issue-2551.t       |  58 ++++++++++++++++++
+ xlators/features/locks/src/common.c |  31 +++-------
+ xlators/features/locks/src/locks.h  |   2 -
+ xlators/features/locks/src/posix.c  | 118 +++---------------------------------
+ 4 files changed, 74 insertions(+), 135 deletions(-)
+ create mode 100644 tests/bugs/locks/issue-2551.t
+
+diff --git a/tests/bugs/locks/issue-2551.t b/tests/bugs/locks/issue-2551.t
+new file mode 100644
+index 0000000..a32af02
+--- /dev/null
++++ b/tests/bugs/locks/issue-2551.t
+@@ -0,0 +1,58 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++
++function check_time() {
++    local max="${1}"
++    local start="$(date +"%s")"
++
++    shift
++
++    if "${@}"; then
++        if [[ $(($(date +"%s") - ${start})) -lt ${max} ]]; then
++            return 0
++        fi
++    fi
++
++    return 1
++}
++
++cleanup
++
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/brick{0..2}
++TEST $CLI volume set $V0 disperse.eager-lock on
++TEST $CLI volume set $V0 disperse.eager-lock-timeout 30
++TEST $CLI volume set $V0 features.locks-notify-contention on
++TEST $CLI volume set $V0 performance.write-behind off
++TEST $CLI volume set $V0 performance.open-behind off
++TEST $CLI volume set $V0 performance.quick-read off
++
++TEST $CLI volume start $V0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/brick0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/brick1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/brick2
++
++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0 $M0
++
++TEST mkdir $M0/dir
++TEST dd if=/dev/zero of=$M0/dir/test bs=4k count=1
++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
++
++TEST $CLI volume stop $V0
++TEST $CLI volume start $V0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/brick0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/brick1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/brick2
++
++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0 $M0
++
++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M1
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0 $M1
++
++TEST dd if=/dev/zero of=$M0/dir/test bs=4k count=1 conv=notrunc
++TEST check_time 5 dd if=/dev/zero of=$M1/dir/test bs=4k count=1 conv=notrunc
+diff --git a/xlators/features/locks/src/common.c b/xlators/features/locks/src/common.c
+index cddbfa6..5403086 100644
+--- a/xlators/features/locks/src/common.c
++++ b/xlators/features/locks/src/common.c
+@@ -468,9 +468,7 @@ pl_inode_get(xlator_t *this, inode_t *inode, pl_local_t *local)
+         pl_inode->check_mlock_info = _gf_true;
+         pl_inode->mlock_enforced = _gf_false;
+ 
+-        /* -2 means never looked up. -1 means something went wrong and link
+-         * tracking is disabled. */
+-        pl_inode->links = -2;
++        pl_inode->remove_running = 0;
+ 
+         ret = __inode_ctx_put(inode, this, (uint64_t)(long)(pl_inode));
+         if (ret) {
+@@ -1403,11 +1401,6 @@ pl_inode_remove_prepare(xlator_t *xl, call_frame_t *frame, loc_t *loc,
+ 
+     pthread_mutex_lock(&pl_inode->mutex);
+ 
+-    if (pl_inode->removed) {
+-        error = ESTALE;
+-        goto unlock;
+-    }
+-
+     if (pl_inode_has_owners(xl, frame->root->client, pl_inode, &now, contend)) {
+         error = -1;
+         /* We skip the unlock here because the caller must create a stub when
+@@ -1420,7 +1413,6 @@ pl_inode_remove_prepare(xlator_t *xl, call_frame_t *frame, loc_t *loc,
+     pl_inode->is_locked = _gf_true;
+     pl_inode->remove_running++;
+ 
+-unlock:
+     pthread_mutex_unlock(&pl_inode->mutex);
+ 
+ done:
+@@ -1490,20 +1482,18 @@ pl_inode_remove_cbk(xlator_t *xl, pl_inode_t *pl_inode, int32_t error)
+ 
+     pthread_mutex_lock(&pl_inode->mutex);
+ 
+-    if (error == 0) {
+-        if (pl_inode->links >= 0) {
+-            pl_inode->links--;
+-        }
+-        if (pl_inode->links == 0) {
+-            pl_inode->removed = _gf_true;
+-        }
+-    }
+-
+     pl_inode->remove_running--;
+ 
+     if ((pl_inode->remove_running == 0) && list_empty(&pl_inode->waiting)) {
+         pl_inode->is_locked = _gf_false;
+ 
++        /* At this point it's possible that the inode has been deleted, but
++         * there could be open fd's still referencing it, so we can't prevent
++         * pending locks from being granted. If the file has really been
++         * deleted, whatever the client does once the lock is granted will
++         * fail with the appropriate error, so we don't need to worry about
++         * it here. */
++
+         list_for_each_entry(dom, &pl_inode->dom_list, inode_list)
+         {
+             __grant_blocked_inode_locks(xl, pl_inode, &granted, dom, &now,
+@@ -1555,11 +1545,6 @@ pl_inode_remove_inodelk(pl_inode_t *pl_inode, pl_inode_lock_t *lock)
+     pl_dom_list_t *dom;
+     pl_inode_lock_t *ilock;
+ 
+-    /* If the inode has been deleted, we won't allow any lock. */
+-    if (pl_inode->removed) {
+-        return -ESTALE;
+-    }
+-
+     /* We only synchronize with locks made for regular operations coming from
+      * the user. Locks done for internal purposes are hard to control and could
+      * lead to long delays or deadlocks quite easily. */
+diff --git a/xlators/features/locks/src/locks.h b/xlators/features/locks/src/locks.h
+index 6666feb..2406dcd 100644
+--- a/xlators/features/locks/src/locks.h
++++ b/xlators/features/locks/src/locks.h
+@@ -202,10 +202,8 @@ struct __pl_inode {
+     int fop_wind_count;
+     pthread_cond_t check_fop_wind_count;
+ 
+-    int32_t links;           /* Number of hard links the inode has. */
+     uint32_t remove_running; /* Number of remove operations running. */
+     gf_boolean_t is_locked;  /* Regular locks will be blocked. */
+-    gf_boolean_t removed;    /* The inode has been deleted. */
+ };
+ typedef struct __pl_inode pl_inode_t;
+ 
+diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c
+index 22ef5b8..d5effef 100644
+--- a/xlators/features/locks/src/posix.c
++++ b/xlators/features/locks/src/posix.c
+@@ -2975,104 +2975,24 @@ out:
+     return ret;
+ }
+ 
+-static int32_t
+-pl_request_link_count(dict_t **pxdata)
+-{
+-    dict_t *xdata;
+-
+-    xdata = *pxdata;
+-    if (xdata == NULL) {
+-        xdata = dict_new();
+-        if (xdata == NULL) {
+-            return ENOMEM;
+-        }
+-    } else {
+-        dict_ref(xdata);
+-    }
+-
+-    if (dict_set_uint32(xdata, GET_LINK_COUNT, 0) != 0) {
+-        dict_unref(xdata);
+-        return ENOMEM;
+-    }
+-
+-    *pxdata = xdata;
+-
+-    return 0;
+-}
+-
+-static int32_t
+-pl_check_link_count(dict_t *xdata)
+-{
+-    int32_t count;
+-
+-    /* In case we are unable to read the link count from xdata, we take a
+-     * conservative approach and return -2, which will prevent the inode from
+-     * being considered deleted. In fact it will cause link tracking for this
+-     * inode to be disabled completely to avoid races. */
+-
+-    if (xdata == NULL) {
+-        return -2;
+-    }
+-
+-    if (dict_get_int32(xdata, GET_LINK_COUNT, &count) != 0) {
+-        return -2;
+-    }
+-
+-    return count;
+-}
+-
+ int32_t
+ pl_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+               int32_t op_errno, inode_t *inode, struct iatt *buf, dict_t *xdata,
+               struct iatt *postparent)
+ {
+-    pl_inode_t *pl_inode;
+-
+-    if (op_ret >= 0) {
+-        pl_inode = pl_inode_get(this, inode, NULL);
+-        if (pl_inode == NULL) {
+-            PL_STACK_UNWIND(lookup, xdata, frame, -1, ENOMEM, NULL, NULL, NULL,
+-                            NULL);
+-            return 0;
+-        }
+-
+-        pthread_mutex_lock(&pl_inode->mutex);
+-
+-        /* We only update the link count if we previously didn't know it.
+-         * Doing it always can lead to races since lookup is not executed
+-         * atomically most of the times. */
+-        if (pl_inode->links == -2) {
+-            pl_inode->links = pl_check_link_count(xdata);
+-            if (buf->ia_type == IA_IFDIR) {
+-                /* Directories have at least 2 links. To avoid special handling
+-                 * for directories, we simply decrement the value here to make
+-                 * them equivalent to regular files. */
+-                pl_inode->links--;
+-            }
+-        }
+-
+-        pthread_mutex_unlock(&pl_inode->mutex);
+-    }
+-
+     PL_STACK_UNWIND(lookup, xdata, frame, op_ret, op_errno, inode, buf, xdata,
+                     postparent);
++
+     return 0;
+ }
+ 
+ int32_t
+ pl_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+ {
+-    int32_t error;
++    PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
++    STACK_WIND(frame, pl_lookup_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->lookup, loc, xdata);
+ 
+-    error = pl_request_link_count(&xdata);
+-    if (error == 0) {
+-        PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
+-        STACK_WIND(frame, pl_lookup_cbk, FIRST_CHILD(this),
+-                   FIRST_CHILD(this)->fops->lookup, loc, xdata);
+-        dict_unref(xdata);
+-    } else {
+-        STACK_UNWIND_STRICT(lookup, frame, -1, error, NULL, NULL, NULL, NULL);
+-    }
+     return 0;
+ }
+ 
+@@ -3881,9 +3801,7 @@ unlock:
+             __dump_posixlks(pl_inode);
+         }
+ 
+-        gf_proc_dump_write("links", "%d", pl_inode->links);
+         gf_proc_dump_write("removes_pending", "%u", pl_inode->remove_running);
+-        gf_proc_dump_write("removed", "%u", pl_inode->removed);
+     }
+     pthread_mutex_unlock(&pl_inode->mutex);
+ 
+@@ -4508,21 +4426,9 @@ pl_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+             int32_t op_errno, inode_t *inode, struct iatt *buf,
+             struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+ {
+-    pl_inode_t *pl_inode = (pl_inode_t *)cookie;
+-
+-    if (op_ret >= 0) {
+-        pthread_mutex_lock(&pl_inode->mutex);
+-
+-        /* TODO: can happen pl_inode->links == 0 ? */
+-        if (pl_inode->links >= 0) {
+-            pl_inode->links++;
+-        }
+-
+-        pthread_mutex_unlock(&pl_inode->mutex);
+-    }
+-
+     PL_STACK_UNWIND_FOR_CLIENT(link, xdata, frame, op_ret, op_errno, inode, buf,
+                                preparent, postparent, xdata);
++
+     return 0;
+ }
+ 
+@@ -4530,18 +4436,10 @@ int
+ pl_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+         dict_t *xdata)
+ {
+-    pl_inode_t *pl_inode;
+-
+-    pl_inode = pl_inode_get(this, oldloc->inode, NULL);
+-    if (pl_inode == NULL) {
+-        STACK_UNWIND_STRICT(link, frame, -1, ENOMEM, NULL, NULL, NULL, NULL,
+-                            NULL);
+-        return 0;
+-    }
+-
+     PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), oldloc, newloc);
+-    STACK_WIND_COOKIE(frame, pl_link_cbk, pl_inode, FIRST_CHILD(this),
+-                      FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
++    STACK_WIND(frame, pl_link_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
++
+     return 0;
+ }
+ 
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0589-extras-fix-for-postscript-failure-on-logrotation-of-.patch b/SOURCES/0589-extras-fix-for-postscript-failure-on-logrotation-of-.patch
new file mode 100644
index 0000000..861791f
--- /dev/null
+++ b/SOURCES/0589-extras-fix-for-postscript-failure-on-logrotation-of-.patch
@@ -0,0 +1,63 @@
+From 0bb71e1492b1ad442758399eb8dcb5f087d77f12 Mon Sep 17 00:00:00 2001
+From: Nikhil Ladha <nladha@redhat.com>
+Date: Wed, 28 Apr 2021 02:14:27 +0530
+Subject: [PATCH 589/610] extras: fix for postscript failure on logrotation of
+ snapd logs (#2310)
+
+Issue:
+On executing the logrotate command, the postscript runs as a separate process,
+and when we do a grep for the snapd process it returns the PID of that
+short-term process as well, and executing a kill on that throws the error.
+To check a similar error could be seen if we replace the killall for bricks
+log rotation with a for loop on PIDs.
+
+Fix:
+Use the killall command on the list of snapd processes instead of
+using the kill command to individually kill them.
+
+>Fixes: #2360
+>Change-Id: I1ad6e3e4d74128706e71900d02e715635294ff72
+>Signed-off-by: nik-redhat <nladha@redhat.com>
+
+Upstream patch: https://github.com/gluster/glusterfs/pull/2310
+BUG: 1668303
+
+Change-Id: I59910fc3660e11e131b1aa813848c2e19cbffefd
+Signed-off-by: nik-redhat <nladha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/279533
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ extras/glusterfs-logrotate | 19 +++++++++++++++++++
+ 1 file changed, 19 insertions(+)
+
+diff --git a/extras/glusterfs-logrotate b/extras/glusterfs-logrotate
+index 75f700e..2b9028b 100644
+--- a/extras/glusterfs-logrotate
++++ b/extras/glusterfs-logrotate
+@@ -45,3 +45,22 @@
+   compress
+   delaycompress
+ }
++
++# Rotate snapd log
++/var/log/glusterfs/snaps/*/*.log {
++    sharedscripts
++    weekly
++    maxsize 10M
++    minsize 100k
++
++    # 6 months of logs are good enough
++    rotate 26
++
++    missingok
++    compress
++    delaycompress
++    notifempty
++    postrotate
++    /usr/bin/killall -HUP `pgrep -f "glusterfs.*snapd"` > /dev/null 2>&1 || true
++    endscript
++}
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0590-cluster-afr-Don-t-check-for-stale-entry-index.patch b/SOURCES/0590-cluster-afr-Don-t-check-for-stale-entry-index.patch
new file mode 100644
index 0000000..c7ff40a
--- /dev/null
+++ b/SOURCES/0590-cluster-afr-Don-t-check-for-stale-entry-index.patch
@@ -0,0 +1,128 @@
+From 87138f86b8cb98d1c9d1a4c9a2393e7978d20b1d Mon Sep 17 00:00:00 2001
+From: karthik-us <ksubrahm@redhat.com>
+Date: Tue, 5 Oct 2021 12:33:01 +0530
+Subject: [PATCH 590/610] cluster/afr: Don't check for stale entry-index
+
+Problem:
+In every entry index heal there is a check to see if the
+index is stale or not.
+    1. If a file is created when the brick is down this
+will lead to an extra index lookup because the name is not stale.
+    2. If a file is deleted when the brick is down this will also lead to
+      and extra index lookup because the name is not stale.
+    3. If a file is created and deleted when the brick is down then the
+      index is stale and this will save entry-heal i.e. 2 entrylks and 2 lookups
+
+Since 1, 2 happen significantly more than 3, this is a bad tradeoff.
+
+Fix:
+Let stale index be removed as part of normal entry heal detecting 'the
+name is already deleted' code path.
+
+> Upstream patch: https://github.com/gluster/glusterfs/pull/2612
+> fixes: gluster#2611
+> Change-Id: I29bcc07f2480877a83b30dbd7e2e5631a74df8e8
+> Signed-off-by: Pranith Kumar K <pranith.karampuri@phonepe.com>
+
+BUG: 1994593
+Change-Id: I29bcc07f2480877a83b30dbd7e2e5631a74df8e8
+Signed-off-by: karthik-us <ksubrahm@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/279606
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/afr/src/afr-self-heal-entry.c | 46 +++++++--------------------
+ 1 file changed, 11 insertions(+), 35 deletions(-)
+
+diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
+index a17dd93..14b7417 100644
+--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
++++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
+@@ -933,37 +933,8 @@ afr_selfheal_entry_granular_dirent(xlator_t *subvol, gf_dirent_t *entry,
+                                    loc_t *parent, void *data)
+ {
+     int ret = 0;
+-    loc_t loc = {
+-        0,
+-    };
+-    struct iatt iatt = {
+-        0,
+-    };
+     afr_granular_esh_args_t *args = data;
+ 
+-    /* Look up the actual inode associated with entry. If the lookup returns
+-     * ESTALE or ENOENT, then it means we have a stale index. Remove it.
+-     * This is analogous to the check in afr_shd_index_heal() except that
+-     * here it is achieved through LOOKUP and in afr_shd_index_heal() through
+-     * a GETXATTR.
+-     */
+-
+-    loc.inode = inode_new(args->xl->itable);
+-    loc.parent = inode_ref(args->heal_fd->inode);
+-    gf_uuid_copy(loc.pargfid, loc.parent->gfid);
+-    loc.name = entry->d_name;
+-
+-    ret = syncop_lookup(args->xl, &loc, &iatt, NULL, NULL, NULL);
+-    if ((ret == -ENOENT) || (ret == -ESTALE)) {
+-        /* The name indices under the pgfid index dir are guaranteed
+-         * to be regular files. Hence the hardcoding.
+-         */
+-        afr_shd_entry_purge(subvol, parent->inode, entry->d_name, IA_IFREG);
+-        ret = 0;
+-        goto out;
+-    }
+-    /* TBD: afr_shd_zero_xattrop? */
+-
+     ret = afr_selfheal_entry_dirent(args->frame, args->xl, args->heal_fd,
+                                     entry->d_name, parent->inode, subvol,
+                                     _gf_false);
+@@ -974,8 +945,6 @@ afr_selfheal_entry_granular_dirent(xlator_t *subvol, gf_dirent_t *entry,
+     if (ret == -1)
+         args->mismatch = _gf_true;
+ 
+-out:
+-    loc_wipe(&loc);
+     return ret;
+ }
+ 
+@@ -1050,7 +1019,9 @@ afr_selfheal_entry_do(call_frame_t *frame, xlator_t *this, fd_t *fd, int source,
+     local = frame->local;
+ 
+     gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_SELF_HEAL_INFO,
+-           "performing entry selfheal on %s", uuid_utoa(fd->inode->gfid));
++           "performing %s entry selfheal on %s",
++           (local->need_full_crawl ? "full" : "granular"),
++           uuid_utoa(fd->inode->gfid));
+ 
+     for (i = 0; i < priv->child_count; i++) {
+         /* Expunge */
+@@ -1112,6 +1083,7 @@ __afr_selfheal_entry(call_frame_t *frame, xlator_t *this, fd_t *fd,
+     afr_local_t *local = NULL;
+     afr_private_t *priv = NULL;
+     gf_boolean_t did_sh = _gf_true;
++    char *heal_type = "granular entry";
+ 
+     priv = this->private;
+     local = frame->local;
+@@ -1194,11 +1166,15 @@ postop_unlock:
+     afr_selfheal_unentrylk(frame, this, fd->inode, this->name, NULL,
+                            postop_lock, NULL);
+ out:
+-    if (did_sh)
+-        afr_log_selfheal(fd->inode->gfid, this, ret, "entry", source, sources,
++    if (did_sh) {
++        if (local->need_full_crawl) {
++            heal_type = "full entry";
++        }
++        afr_log_selfheal(fd->inode->gfid, this, ret, heal_type, source, sources,
+                          healed_sinks);
+-    else
++    } else {
+         ret = 1;
++    }
+ 
+     if (locked_replies)
+         afr_replies_wipe(locked_replies, priv->child_count);
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0591-afr-check-for-valid-iatt.patch b/SOURCES/0591-afr-check-for-valid-iatt.patch
new file mode 100644
index 0000000..8f1e48e
--- /dev/null
+++ b/SOURCES/0591-afr-check-for-valid-iatt.patch
@@ -0,0 +1,44 @@
+From 19460ebc988795eeabaeb8e25d6eba9a3cf2864b Mon Sep 17 00:00:00 2001
+From: karthik-us <ksubrahm@redhat.com>
+Date: Mon, 4 Oct 2021 12:44:21 +0530
+Subject: [PATCH 591/610] afr: check for valid iatt
+
+Problem:
+If the entry being processed by afr_shd_anon_inode_cleaner() is no
+longer present, gfid lookup fails with ENOENT on all bricks and iatt
+will never be assigned, causing a crash due to null dereference.
+
+Fix:
+Add a null-check for iatt.
+
+> Upstream patch: https://github.com/gluster/glusterfs/pull/2660
+> Fixes: gluster#2659
+> Change-Id: I6abfc8063677861ce9388ca4efdf491ec956dc74
+> Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+
+BUG: 1995029
+Change-Id: I6abfc8063677861ce9388ca4efdf491ec956dc74
+Signed-off-by: karthik-us <ksubrahm@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/279529
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/afr/src/afr-self-heald.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
+index 18aed93..bc720cf 100644
+--- a/xlators/cluster/afr/src/afr-self-heald.c
++++ b/xlators/cluster/afr/src/afr-self-heald.c
+@@ -870,7 +870,7 @@ afr_shd_anon_inode_cleaner(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
+     }
+ 
+     /*Inode is deleted from subvol*/
+-    if (count == 1 || (iatt->ia_type != IA_IFDIR && multiple_links)) {
++    if (count == 1 || (iatt && iatt->ia_type != IA_IFDIR && multiple_links)) {
+         gf_msg(healer->this->name, GF_LOG_WARNING, 0,
+                AFR_MSG_EXPUNGING_FILE_OR_DIR, "expunging %s %s/%s on %s", type,
+                priv->anon_inode_name, entry->d_name, subvol->name);
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0592-md-cache-fix-integer-signedness-mismatch.patch b/SOURCES/0592-md-cache-fix-integer-signedness-mismatch.patch
new file mode 100644
index 0000000..94cfe88
--- /dev/null
+++ b/SOURCES/0592-md-cache-fix-integer-signedness-mismatch.patch
@@ -0,0 +1,119 @@
+From be3448ed5d9d59752cff4df8325ee67eb7d41531 Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <xhernandez@redhat.com>
+Date: Mon, 19 Jul 2021 06:56:18 +0200
+Subject: [PATCH 592/610] md-cache: fix integer signedness mismatch
+
+md-cache uses a mechanism based on a generation number to detect
+modifications made by other clients to the entries and invalidate
+the cached data.
+
+This generation number is a 32 bit integer. When it overflows,
+special management is done to avoid problems. This overflow condition
+is tracked with a single bit.
+
+For many fops, when they are received, the overflow bit and the
+current generation number are recorded in a single 64-bit value
+which is used later in the cbk.
+
+This is the problematic function:
+
+    uint64_t
+    __mdc_get_generation(xlator_t *this, struct md_cache *mdc)
+    {
+        uint64_t gen = 0, rollover;
+        struct mdc_conf *conf = NULL;
+
+        conf = this->private;
+
+        gen = GF_ATOMIC_INC(conf->generation);
+        if (gen == 0) {
+            gf_log("MDC", GF_LOG_NOTICE, "%p Reset 1", mdc);
+            mdc->gen_rollover = !mdc->gen_rollover;
+            gen = GF_ATOMIC_INC(conf->generation);
+            mdc->ia_time = 0;
+            mdc->generation = 0;
+            mdc->invalidation_time = gen - 1;
+        }
+
+        rollover = mdc->gen_rollover;
+        gen |= (rollover << 32);
+        return gen;
+    }
+
+'conf->generation' is declared as an atomic signed 32-bit integer,
+and 'gen' is an unsigned 64-bit value. When 'gen' is assigned from
+a signed int, the sign bit is extended to fill the high 32 bits of
+'gen'. If the counter has overflown the maximum signed positive
+value, it will become negative (sign bit = 1).
+
+In this case, when 'rollover' is later combined with 'gen', all the
+high bits remain at '1'.
+
+This value is used later in 'mdc_inode_iatt_set_validate' during
+callback processing. The overflow condition and generation numbers
+from when the operation was received are recovered this way:
+
+    rollover = incident_time >> 32;
+    incident_time = (incident_time & 0xffffffff);
+
+('incident_time' is the saved value from '__mdc_get_generation').
+
+So here rollover will be 0xffffffff, when it's expected to be 0
+or 1 only. When this is compared later with the cached overflow
+bit, it doesn't match, which prevents updating the cached info.
+
+This is bad in general, but it's even worse when an entry is not
+cached and 'rollover' is 0xffffffff the first time. When md-cache
+doesn't have cached data it assumes it's everything 0. This causes
+a mismatch, which sends an invalidation request to the kernel, but
+since the 'rollover' doesn't match, the cached data is not updated.
+So the next time the cached data is checked, it will also send an
+invalidation to the kernel, indefinitely.
+
+This patch fixes two things:
+
+1. The 'generation' field is made unsigned to avoid sign extension.
+2. Invalidation requests are only sent if we already had valid cached
+   data. Otherwise it doesn't make sense to send an invalidation.
+
+Upstream patch:
+> Upstream-patch-link: https://github.com/gluster/glusterfs/pull/2619
+> Fixes: #2617
+> Change-Id: Ie40e68288cf143e1bc1a40f46da98f51bb2d6864
+> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+
+BUG: 1904137
+Change-Id: Ie40e68288cf143e1bc1a40f46da98f51bb2d6864
+Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/279188
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/performance/md-cache/src/md-cache.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/xlators/performance/md-cache/src/md-cache.c b/xlators/performance/md-cache/src/md-cache.c
+index bbbee3b..e0256d6 100644
+--- a/xlators/performance/md-cache/src/md-cache.c
++++ b/xlators/performance/md-cache/src/md-cache.c
+@@ -79,7 +79,7 @@ struct mdc_conf {
+     gf_boolean_t cache_statfs;
+     struct mdc_statfs_cache statfs_cache;
+     char *mdc_xattr_str;
+-    gf_atomic_int32_t generation;
++    gf_atomic_uint32_t generation;
+ };
+ 
+ struct mdc_local;
+@@ -537,7 +537,7 @@ mdc_inode_iatt_set_validate(xlator_t *this, inode_t *inode, struct iatt *prebuf,
+             (iatt->ia_mtime_nsec != mdc->md_mtime_nsec) ||
+             (iatt->ia_ctime != mdc->md_ctime) ||
+             (iatt->ia_ctime_nsec != mdc->md_ctime_nsec)) {
+-            if (conf->global_invalidation &&
++            if (conf->global_invalidation && mdc->valid &&
+                 (!prebuf || (prebuf->ia_mtime != mdc->md_mtime) ||
+                  (prebuf->ia_mtime_nsec != mdc->md_mtime_nsec) ||
+                  (prebuf->ia_ctime != mdc->md_ctime) ||
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0593-dht-explicit-null-dereference.patch b/SOURCES/0593-dht-explicit-null-dereference.patch
new file mode 100644
index 0000000..4ad9eea
--- /dev/null
+++ b/SOURCES/0593-dht-explicit-null-dereference.patch
@@ -0,0 +1,58 @@
+From 76c9faf5c750428e5eb69462b82ee0c12cbdabc0 Mon Sep 17 00:00:00 2001
+From: nik-redhat <nladha@redhat.com>
+Date: Fri, 25 Sep 2020 18:39:51 +0530
+Subject: [PATCH 593/610] dht: explicit null dereference
+
+Added a null check for uuid_list_copy, to avoid
+null dereference in strtok_r() in case of strdup()
+failure.
+
+CID: 1325612
+CID: 1274223
+
+>Updates: #1060
+
+>Change-Id: I641a5068cd76d7b2ed92eccf39e7f97d6f7b2480
+>Signed-off-by: nik-redhat <nladha@redhat.com>
+
+Upstream link: https://review.gluster.org/c/glusterfs/+/25046
+BUG: 1997447
+
+Change-Id: I576b4ce610948bdb84eb30377a684c54df718bdc
+Signed-off-by: nik-redhat <nladha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/280063
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/dht/src/dht-common.c | 2 ++
+ xlators/cluster/dht/src/dht-shared.c | 2 ++
+ 2 files changed, 4 insertions(+)
+
+diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
+index edfc6e7..e6a16ff 100644
+--- a/xlators/cluster/dht/src/dht-common.c
++++ b/xlators/cluster/dht/src/dht-common.c
+@@ -4296,6 +4296,8 @@ dht_find_local_subvol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+         index = conf->local_subvols_cnt;
+ 
+         uuid_list_copy = gf_strdup(uuid_list);
++        if (!uuid_list_copy)
++            goto unlock;
+ 
+         for (uuid_str = strtok_r(uuid_list, " ", &saveptr); uuid_str;
+              uuid_str = next_uuid_str) {
+diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c
+index 58e3339..cca272a 100644
+--- a/xlators/cluster/dht/src/dht-shared.c
++++ b/xlators/cluster/dht/src/dht-shared.c
+@@ -567,6 +567,8 @@ gf_defrag_pattern_list_fill(xlator_t *this, gf_defrag_info_t *defrag,
+     pattern_str = strtok_r(data, ",", &tmp_str);
+     while (pattern_str) {
+         dup_str = gf_strdup(pattern_str);
++        if (!dup_str)
++            goto out;
+         pattern_list = GF_CALLOC(1, sizeof(gf_defrag_pattern_list_t), 1);
+         if (!pattern_list) {
+             goto out;
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0594-glusterd-resource-leaks.patch b/SOURCES/0594-glusterd-resource-leaks.patch
new file mode 100644
index 0000000..ccc2f3b
--- /dev/null
+++ b/SOURCES/0594-glusterd-resource-leaks.patch
@@ -0,0 +1,52 @@
+From 663df92f9b4b9f35ae10f84487494829987e2f58 Mon Sep 17 00:00:00 2001
+From: nik-redhat <nladha@redhat.com>
+Date: Fri, 25 Sep 2020 17:56:19 +0530
+Subject: [PATCH 594/610] glusterd: resource leaks
+
+Issue:
+iobref was not freed before exiting the function.
+
+Fix:
+Modified the code to free iobref before exiting.
+
+CID: 1430107
+>Updates: #1060
+
+>Change-Id: I89351b3aa645792eb8dda6292d1e559057b02d8b
+>Signed-off-by: nik-redhat <nladha@redhat.com>
+
+Upstream link: https://review.gluster.org/c/glusterfs/+/25042
+BUG: 1997447
+
+Change-Id: Iea56afca015a7c0f15ab32f490ea27f5ea323a07
+Signed-off-by: nik-redhat <nladha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/280066
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-utils.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
+index 6d40be5..c037933 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
+@@ -6042,7 +6042,6 @@ send_attach_req(xlator_t *this, struct rpc_clnt *rpc, char *path,
+     GF_ATOMIC_INC(conf->blockers);
+     ret = rpc_clnt_submit(rpc, &gd_brick_prog, op, cbkfn, &iov, 1, NULL, 0,
+                           iobref, frame, NULL, 0, NULL, 0, NULL);
+-    return ret;
+ 
+ free_iobref:
+     iobref_unref(iobref);
+@@ -6051,7 +6050,7 @@ maybe_free_iobuf:
+         iobuf_unref(iobuf);
+     }
+ err:
+-    return -1;
++    return ret;
+ }
+ 
+ extern size_t
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0595-glusterd-use-after-free-coverity-issue.patch b/SOURCES/0595-glusterd-use-after-free-coverity-issue.patch
new file mode 100644
index 0000000..7430838
--- /dev/null
+++ b/SOURCES/0595-glusterd-use-after-free-coverity-issue.patch
@@ -0,0 +1,51 @@
+From 025718f1734655c411475ea338cee1659d96763e Mon Sep 17 00:00:00 2001
+From: nik-redhat <nladha@redhat.com>
+Date: Thu, 3 Sep 2020 15:42:45 +0530
+Subject: [PATCH 595/610] glusterd: use after free (coverity issue)
+
+Issue:
+dict_unref is called on the same dict again,
+in the out label of the code, which causes the
+use after free issue.
+
+Fix:
+Set the dict to NULL after unref, to avoid
+use after free issue.
+
+CID: 1430127
+
+>Updates: #1060
+
+>Change-Id: Ide9a5cbc5f496705c671e72b0260da6d4c06f16d
+>Signed-off-by: nik-redhat <nladha@redhat.com>
+
+Upstream link: https://review.gluster.org/c/glusterfs/+/24946
+BUG: 1997447
+
+Change-Id: Id1e58cd6226b9329ad49bd5b75ee96a3a5ec5ab7
+Signed-off-by: nik-redhat <nladha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/280067
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
+index 386eed2..b0fa490 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
+@@ -2039,8 +2039,9 @@ glusterd_update_snaps_synctask(void *opaque)
+                        "Failed to remove snap %s", snap->snapname);
+                 goto out;
+             }
+-            if (dict)
+-                dict_unref(dict);
++
++            dict_unref(dict);
++            dict = NULL;
+         }
+         snprintf(buf, sizeof(buf), "%s.accept_peer_data", prefix);
+         ret = dict_get_int32(peer_data, buf, &val);
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0596-locks-null-dereference.patch b/SOURCES/0596-locks-null-dereference.patch
new file mode 100644
index 0000000..4ad016f
--- /dev/null
+++ b/SOURCES/0596-locks-null-dereference.patch
@@ -0,0 +1,43 @@
+From 099fcac6fecef6fc367d8fcae8442195f3f174db Mon Sep 17 00:00:00 2001
+From: nik-redhat <nladha@redhat.com>
+Date: Fri, 25 Sep 2020 18:19:39 +0530
+Subject: [PATCH 596/610] locks: null dereference
+
+Added a null check before executing the strtok_r()
+to avoid null dereference in case of strdup() failure.
+
+CID: 1407938
+>Updates: #1060
+
+>Change-Id: Iec6e72ae8cb54f6d0a287615c43756325b2026ec
+>Signed-off-by: nik-redhat <nladha@redhat.com>
+
+Upstream link: https://review.gluster.org/c/glusterfs/+/25045
+BUG: 1997447
+
+Change-Id: I47e6e2402badaf4103607b4164f19142a99a2f71
+Signed-off-by: nik-redhat <nladha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/280065
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/features/locks/src/posix.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c
+index d5effef..03c4907 100644
+--- a/xlators/features/locks/src/posix.c
++++ b/xlators/features/locks/src/posix.c
+@@ -494,6 +494,9 @@ pl_inodelk_xattr_fill_multiple(dict_t *this, char *key, data_t *value,
+     char *save_ptr = NULL;
+ 
+     tmp_key = gf_strdup(key);
++    if (!tmp_key)
++        return -1;
++
+     strtok_r(tmp_key, ":", &save_ptr);
+     if (!*save_ptr) {
+         gf_msg(THIS->name, GF_LOG_ERROR, 0, EINVAL,
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0597-glusterd-memory-deallocated-twice.patch b/SOURCES/0597-glusterd-memory-deallocated-twice.patch
new file mode 100644
index 0000000..7e2c49f
--- /dev/null
+++ b/SOURCES/0597-glusterd-memory-deallocated-twice.patch
@@ -0,0 +1,163 @@
+From 59c05230c0df58765e30553c66bbcc0c9965d362 Mon Sep 17 00:00:00 2001
+From: nik-redhat <nladha@redhat.com>
+Date: Tue, 11 Aug 2020 23:12:26 +0530
+Subject: [PATCH 597/610] glusterd: memory deallocated twice
+
+Issue:
+If the the pointer tmptier is destroyed in the function
+code it still it checks for the same in the out label.
+And tries to destroy the same pointer again.
+
+Fix:
+So, instead of passing the ptr by value, if we
+pass it by reference then, on making the ptr in the
+function the value will persist, in the calling
+function and next time when the gf_store_iter_destory()
+is called it won't try to free the ptr again.
+
+CID: 1430122
+
+>Updates: #1060
+
+>Change-Id: I019cea8e301c7cc87be792c03b58722fc96f04ef
+>Signed-off-by: nik-redhat <nladha@redhat.com>
+
+Upstream link: https://review.gluster.org/c/glusterfs/+/24855
+BUG: 1997447
+
+Change-Id: Ib403efd08d47a69d25f291ae61c9cbfcaaa05da8
+Signed-off-by: nik-redhat <nladha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/280076
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/glusterfs/store.h         |  2 +-
+ libglusterfs/src/store.c                   | 12 +++++++-----
+ xlators/mgmt/glusterd/src/glusterd-store.c | 16 ++++++++--------
+ 3 files changed, 16 insertions(+), 14 deletions(-)
+
+diff --git a/libglusterfs/src/glusterfs/store.h b/libglusterfs/src/glusterfs/store.h
+index 68a20ad..76af2df 100644
+--- a/libglusterfs/src/glusterfs/store.h
++++ b/libglusterfs/src/glusterfs/store.h
+@@ -93,7 +93,7 @@ int32_t
+ gf_store_iter_get_matching(gf_store_iter_t *iter, char *key, char **value);
+ 
+ int32_t
+-gf_store_iter_destroy(gf_store_iter_t *iter);
++gf_store_iter_destroy(gf_store_iter_t **iter);
+ 
+ char *
+ gf_store_strerror(gf_store_op_errno_t op_errno);
+diff --git a/libglusterfs/src/store.c b/libglusterfs/src/store.c
+index 3af627a..e4931bf 100644
+--- a/libglusterfs/src/store.c
++++ b/libglusterfs/src/store.c
+@@ -606,23 +606,25 @@ out:
+ }
+ 
+ int32_t
+-gf_store_iter_destroy(gf_store_iter_t *iter)
++gf_store_iter_destroy(gf_store_iter_t **iter)
+ {
+     int32_t ret = -1;
+ 
+-    if (!iter)
++    if (!(*iter))
+         return 0;
+ 
+     /* gf_store_iter_new will not return a valid iter object with iter->file
+      * being NULL*/
+-    ret = fclose(iter->file);
++    ret = fclose((*iter)->file);
+     if (ret)
+         gf_msg("", GF_LOG_ERROR, errno, LG_MSG_FILE_OP_FAILED,
+                "Unable"
+                " to close file: %s, ret: %d",
+-               iter->filepath, ret);
++               (*iter)->filepath, ret);
++
++    GF_FREE(*iter);
++    *iter = NULL;
+ 
+-    GF_FREE(iter);
+     return ret;
+ }
+ 
+diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c
+index a8651d8..e027575 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-store.c
++++ b/xlators/mgmt/glusterd/src/glusterd-store.c
+@@ -2576,7 +2576,7 @@ glusterd_store_retrieve_snapd(glusterd_volinfo_t *volinfo)
+     ret = 0;
+ 
+ out:
+-    if (gf_store_iter_destroy(iter)) {
++    if (gf_store_iter_destroy(&iter)) {
+         gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL,
+                "Failed to destroy store iter");
+         ret = -1;
+@@ -2895,13 +2895,13 @@ glusterd_store_retrieve_bricks(glusterd_volinfo_t *volinfo)
+     ret = 0;
+ 
+ out:
+-    if (gf_store_iter_destroy(tmpiter)) {
++    if (gf_store_iter_destroy(&tmpiter)) {
+         gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL,
+                "Failed to destroy store iter");
+         ret = -1;
+     }
+ 
+-    if (gf_store_iter_destroy(iter)) {
++    if (gf_store_iter_destroy(&iter)) {
+         gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL,
+                "Failed to destroy store iter");
+         ret = -1;
+@@ -3067,7 +3067,7 @@ glusterd_store_retrieve_node_state(glusterd_volinfo_t *volinfo)
+     ret = 0;
+ 
+ out:
+-    if (gf_store_iter_destroy(iter)) {
++    if (gf_store_iter_destroy(&iter)) {
+         gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL,
+                "Failed to destroy store iter");
+         ret = -1;
+@@ -3379,7 +3379,7 @@ glusterd_store_update_volinfo(glusterd_volinfo_t *volinfo)
+     ret = 0;
+ 
+ out:
+-    if (gf_store_iter_destroy(iter)) {
++    if (gf_store_iter_destroy(&iter)) {
+         gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL,
+                "Failed to destroy store iter");
+         ret = -1;
+@@ -3574,7 +3574,7 @@ glusterd_store_retrieve_options(xlator_t *this)
+         goto out;
+     ret = 0;
+ out:
+-    (void)gf_store_iter_destroy(iter);
++    (void)gf_store_iter_destroy(&iter);
+     gf_store_handle_destroy(shandle);
+     return ret;
+ }
+@@ -4026,7 +4026,7 @@ glusterd_store_update_snap(glusterd_snap_t *snap)
+     ret = 0;
+ 
+ out:
+-    if (gf_store_iter_destroy(iter)) {
++    if (gf_store_iter_destroy(&iter)) {
+         gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL,
+                "Failed to destroy store iter");
+         ret = -1;
+@@ -4774,7 +4774,7 @@ glusterd_store_retrieve_peers(xlator_t *this)
+         is_ok = _gf_true;
+ 
+     next:
+-        (void)gf_store_iter_destroy(iter);
++        (void)gf_store_iter_destroy(&iter);
+ 
+         if (!is_ok) {
+             gf_log(this->name, GF_LOG_WARNING,
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0598-glusterd-null-dereference.patch b/SOURCES/0598-glusterd-null-dereference.patch
new file mode 100644
index 0000000..fac1b8f
--- /dev/null
+++ b/SOURCES/0598-glusterd-null-dereference.patch
@@ -0,0 +1,51 @@
+From 84aaaded4e958a10c7492233c053e3c681f2d575 Mon Sep 17 00:00:00 2001
+From: nik-redhat <nladha@redhat.com>
+Date: Thu, 2 Jul 2020 18:10:32 +0530
+Subject: [PATCH 598/610] glusterd: null dereference
+
+Issue:
+There has been either an explicit null
+dereference or a dereference after null
+check in some cases.
+
+Fix:
+Added the proper condition for null check
+and fixed null derefencing.
+
+CID: 1430106 : Dereference after null check
+CID: 1430120 : Explicit null dereferenced
+CID: 1430132 : Dereference after null check
+CID: 1430134 : Dereference after null check
+
+>Change-Id: I7e795cf9f7146a633097c26a766f16b159881fa3
+>Updates: #1060
+>Signed-off-by: nik-redhat <nladha@redhat.com>
+
+Upstream link: https://review.gluster.org/c/glusterfs/+/24664
+BUG: 1997447
+
+Change-Id: I2b2632c93094d0e7b9fbd65a2ca2b0eaf6212d79
+Signed-off-by: nik-redhat <nladha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/280083
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-syncop.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c
+index 05c9e11..f1807cd 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-syncop.c
++++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c
+@@ -1797,7 +1797,7 @@ gd_brick_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
+     pending_node = NULL;
+     ret = 0;
+ out:
+-    if (pending_node)
++    if (pending_node && pending_node->node)
+         glusterd_pending_node_put_rpc(pending_node);
+ 
+     if (rsp_dict)
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0599-afr-null-dereference-nagative-value.patch b/SOURCES/0599-afr-null-dereference-nagative-value.patch
new file mode 100644
index 0000000..7d59cc7
--- /dev/null
+++ b/SOURCES/0599-afr-null-dereference-nagative-value.patch
@@ -0,0 +1,59 @@
+From 4186f81596a481a5c0c5a707fc9b2358ee8f49f0 Mon Sep 17 00:00:00 2001
+From: nik-redhat <nladha@redhat.com>
+Date: Fri, 3 Jul 2020 17:18:33 +0530
+Subject: [PATCH 599/610] afr: null dereference & nagative value
+
+Added a check for NULL before dereferencing
+the object as it may be NULL in few cases
+inside the funtion. Also, added a check for
+the negative value of gfid_idx.
+
+CID: 1430140
+CID: 1430145
+
+>Change-Id: Ib7d23459b48bbc471dbcccab6d20572261882d11
+>Updates: #1060
+>Signed-off-by: nik-redhat <nladha@redhat.com>
+
+Upstream link: https://review.gluster.org/c/glusterfs/+/24671
+BUG: 1997447
+
+Change-Id: I7e705a106d97001b67f5cde8589413c0c24ee507
+Signed-off-by: nik-redhat <nladha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/280085
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/afr/src/afr-self-heal-common.c | 2 +-
+ xlators/cluster/afr/src/afr-self-heal-name.c   | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
+index 0954d2c..cbd5117 100644
+--- a/xlators/cluster/afr/src/afr-self-heal-common.c
++++ b/xlators/cluster/afr/src/afr-self-heal-common.c
+@@ -140,7 +140,7 @@ heal:
+         }
+     }
+ out:
+-    if (gfid_idx && (*gfid_idx == -1) && (ret == 0)) {
++    if (gfid_idx && (*gfid_idx == -1) && (ret == 0) && local) {
+         ret = -afr_final_errno(local, priv);
+     }
+     loc_wipe(&loc);
+diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c
+index 9ec2066..c5ab8d7 100644
+--- a/xlators/cluster/afr/src/afr-self-heal-name.c
++++ b/xlators/cluster/afr/src/afr-self-heal-name.c
+@@ -353,7 +353,7 @@ __afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent,
+     ret = __afr_selfheal_assign_gfid(this, parent, pargfid, bname, inode,
+                                      replies, gfid, locked_on, source, sources,
+                                      is_gfid_absent, &gfid_idx);
+-    if (ret)
++    if (ret || (gfid_idx < 0))
+         return ret;
+ 
+     ret = __afr_selfheal_name_impunge(frame, this, parent, pargfid, bname,
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0600-dht-xlator-integer-handling-issue.patch b/SOURCES/0600-dht-xlator-integer-handling-issue.patch
new file mode 100644
index 0000000..c3970ac
--- /dev/null
+++ b/SOURCES/0600-dht-xlator-integer-handling-issue.patch
@@ -0,0 +1,161 @@
+From 1cd16553d436fa703f5e18d71c35108d0e179e8b Mon Sep 17 00:00:00 2001
+From: nik-redhat <nladha@redhat.com>
+Date: Thu, 9 Apr 2020 11:36:34 +0530
+Subject: [PATCH 600/610] dht xlator: integer handling issue
+
+Issue: The ret value is passed to the function
+instead of the proper errno value
+
+Fix: Passing the errno generated to
+the log function
+
+CID: 1415824 : Improper use of negative value
+CID: 1420205 : Improper use of negative value
+>Change-Id: Iaa7407ebd03eda46a2c027695e6bf0f598b371b2
+>Updates: #1060
+>Signed-off-by: nik-redhat <nladha@redhat.com>
+
+Upstream link: https://review.gluster.org/c/glusterfs/+/24314
+BUG: 1997447
+
+Change-Id: Ibb7f432dbcc9ffd8dff6be6f984a6705894d6bef
+Signed-off-by: nik-redhat <nladha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/280086
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/dht/src/dht-common.c   | 12 ++++++++----
+ xlators/cluster/dht/src/dht-common.h   |  2 +-
+ xlators/cluster/dht/src/dht-helper.c   |  9 ++++++---
+ xlators/cluster/dht/src/dht-selfheal.c |  8 +++++---
+ 4 files changed, 20 insertions(+), 11 deletions(-)
+
+diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
+index e6a16ff..5eaaa1e 100644
+--- a/xlators/cluster/dht/src/dht-common.c
++++ b/xlators/cluster/dht/src/dht-common.c
+@@ -672,13 +672,14 @@ dht_discover_complete(xlator_t *this, call_frame_t *discover_frame)
+ 
+         if (local->need_xattr_heal && !heal_path) {
+             local->need_xattr_heal = 0;
+-            ret = dht_dir_xattr_heal(this, local);
+-            if (ret)
+-                gf_msg(this->name, GF_LOG_ERROR, ret,
++            ret = dht_dir_xattr_heal(this, local, &op_errno);
++            if (ret) {
++                gf_msg(this->name, GF_LOG_ERROR, op_errno,
+                        DHT_MSG_DIR_XATTR_HEAL_FAILED,
+                        "xattr heal failed for "
+                        "directory  gfid is %s ",
+                        gfid_local);
++            }
+         }
+     }
+ 
+@@ -1205,7 +1206,7 @@ dht_dict_get_array(dict_t *dict, char *key, int32_t value[], int32_t size,
+    to non hashed subvol
+ */
+ int
+-dht_dir_xattr_heal(xlator_t *this, dht_local_t *local)
++dht_dir_xattr_heal(xlator_t *this, dht_local_t *local, int *op_errno)
+ {
+     dht_local_t *copy_local = NULL;
+     call_frame_t *copy = NULL;
+@@ -1217,6 +1218,7 @@ dht_dir_xattr_heal(xlator_t *this, dht_local_t *local)
+                "No gfid exists for path %s "
+                "so healing xattr is not possible",
+                local->loc.path);
++        *op_errno = EIO;
+         goto out;
+     }
+ 
+@@ -1230,6 +1232,7 @@ dht_dir_xattr_heal(xlator_t *this, dht_local_t *local)
+                    "Memory allocation failed "
+                    "for path %s gfid %s ",
+                    local->loc.path, gfid_local);
++            *op_errno = ENOMEM;
+             DHT_STACK_DESTROY(copy);
+         } else {
+             copy_local->stbuf = local->stbuf;
+@@ -1244,6 +1247,7 @@ dht_dir_xattr_heal(xlator_t *this, dht_local_t *local)
+                        "Synctask creation failed to heal xattr "
+                        "for path %s gfid %s ",
+                        local->loc.path, gfid_local);
++                *op_errno = ENOMEM;
+                 DHT_STACK_DESTROY(copy);
+             }
+         }
+diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
+index b856c68..1cb1c0c 100644
+--- a/xlators/cluster/dht/src/dht-common.h
++++ b/xlators/cluster/dht/src/dht-common.h
+@@ -1493,7 +1493,7 @@ dht_dir_set_heal_xattr(xlator_t *this, dht_local_t *local, dict_t *dst,
+                        dict_t *src, int *uret, int *uflag);
+ 
+ int
+-dht_dir_xattr_heal(xlator_t *this, dht_local_t *local);
++dht_dir_xattr_heal(xlator_t *this, dht_local_t *local, int *op_errno);
+ 
+ int32_t
+ dht_dict_get_array(dict_t *dict, char *key, int32_t value[], int32_t size,
+diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c
+index 4c3940a..d3444b3 100644
+--- a/xlators/cluster/dht/src/dht-helper.c
++++ b/xlators/cluster/dht/src/dht-helper.c
+@@ -2105,6 +2105,7 @@ dht_heal_full_path_done(int op_ret, call_frame_t *heal_frame, void *data)
+     dht_local_t *local = NULL;
+     xlator_t *this = NULL;
+     int ret = -1;
++    int op_errno = 0;
+ 
+     local = heal_frame->local;
+     main_frame = local->main_frame;
+@@ -2114,10 +2115,12 @@ dht_heal_full_path_done(int op_ret, call_frame_t *heal_frame, void *data)
+     dht_set_fixed_dir_stat(&local->postparent);
+     if (local->need_xattr_heal) {
+         local->need_xattr_heal = 0;
+-        ret = dht_dir_xattr_heal(this, local);
+-        if (ret)
+-            gf_msg(this->name, GF_LOG_ERROR, ret, DHT_MSG_DIR_XATTR_HEAL_FAILED,
++        ret = dht_dir_xattr_heal(this, local, &op_errno);
++        if (ret) {
++            gf_msg(this->name, GF_LOG_ERROR, op_errno,
++                   DHT_MSG_DIR_XATTR_HEAL_FAILED,
+                    "xattr heal failed for directory  %s ", local->loc.path);
++        }
+     }
+ 
+     DHT_STACK_UNWIND(lookup, main_frame, 0, 0, local->inode, &local->stbuf,
+diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c
+index 8af7301..2da9817 100644
+--- a/xlators/cluster/dht/src/dht-selfheal.c
++++ b/xlators/cluster/dht/src/dht-selfheal.c
+@@ -1471,6 +1471,7 @@ dht_selfheal_dir_mkdir(call_frame_t *frame, loc_t *loc, dht_layout_t *layout,
+ {
+     int missing_dirs = 0;
+     int i = 0;
++    int op_errno = 0;
+     int ret = -1;
+     dht_local_t *local = NULL;
+     xlator_t *this = NULL;
+@@ -1493,13 +1494,14 @@ dht_selfheal_dir_mkdir(call_frame_t *frame, loc_t *loc, dht_layout_t *layout,
+         if (!__is_root_gfid(local->stbuf.ia_gfid)) {
+             if (local->need_xattr_heal) {
+                 local->need_xattr_heal = 0;
+-                ret = dht_dir_xattr_heal(this, local);
+-                if (ret)
+-                    gf_msg(this->name, GF_LOG_ERROR, ret,
++                ret = dht_dir_xattr_heal(this, local, &op_errno);
++                if (ret) {
++                    gf_msg(this->name, GF_LOG_ERROR, op_errno,
+                            DHT_MSG_DIR_XATTR_HEAL_FAILED,
+                            "%s:xattr heal failed for "
+                            "directory (gfid = %s)",
+                            local->loc.path, local->gfid);
++                }
+             } else {
+                 if (!gf_uuid_is_null(local->gfid))
+                     gf_uuid_copy(loc->gfid, local->gfid);
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0601-coverity-resource-leak-2321.patch b/SOURCES/0601-coverity-resource-leak-2321.patch
new file mode 100644
index 0000000..35dc964
--- /dev/null
+++ b/SOURCES/0601-coverity-resource-leak-2321.patch
@@ -0,0 +1,99 @@
+From 6d7049a19029331266f70f68d860bbccef01a35d Mon Sep 17 00:00:00 2001
+From: Nikhil Ladha <nladha@redhat.com>
+Date: Thu, 8 Jul 2021 11:26:54 +0530
+Subject: [PATCH 601/610] coverity: resource leak (#2321)
+
+Issue:
+Variable `arg` is not freed before the function exits,
+and leads to resource leak.
+
+Fix:
+Free the arg variable if the status of function call
+`glusterd_compare_friend_volume` is
+`GLUSTERD_VOL_COMP_UPDATE_REQ`, or if the `glusterd_launch_synctask`
+fails to start the process.
+
+And, added a check for return value on calling
+`glusterd_launch_synctask` function and exit if the
+thread creation fails.
+
+CID: 1401716
+>Updates: #1060
+
+>Change-Id: I4abd621771f88853d8d01e9039cdee2f3d862c4f
+>Signed-off-by: nik-redhat <nladha@redhat.com>
+
+Upstream link: https://github.com/gluster/glusterfs/pull/2321
+BUG: 1997447
+
+Change-Id: Ida81dfcd58c5ef45d3ae036d6bd6b36dc6693538
+Signed-off-by: nik-redhat <nladha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/280090
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-utils.c | 10 +++++++---
+ xlators/mgmt/glusterd/src/glusterd-utils.h |  2 +-
+ 2 files changed, 8 insertions(+), 4 deletions(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
+index c037933..cec9c20 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
+@@ -5371,6 +5371,7 @@ glusterd_compare_friend_data(dict_t *peer_data, dict_t *cmp, int32_t *status,
+ 
+         if (GLUSTERD_VOL_COMP_RJT == *status) {
+             ret = 0;
++            update = _gf_false;
+             goto out;
+         }
+         if (GLUSTERD_VOL_COMP_UPDATE_REQ == *status) {
+@@ -5385,11 +5386,12 @@ glusterd_compare_friend_data(dict_t *peer_data, dict_t *cmp, int32_t *status,
+          * first brick to come up before attaching the subsequent bricks
+          * in case brick multiplexing is enabled
+          */
+-        glusterd_launch_synctask(glusterd_import_friend_volumes_synctask, arg);
++        ret = glusterd_launch_synctask(glusterd_import_friend_volumes_synctask,
++                                       arg);
+     }
+ 
+ out:
+-    if (ret && arg) {
++    if ((ret || !update) && arg) {
+         dict_unref(arg->peer_data);
+         dict_unref(arg->peer_ver_data);
+         GF_FREE(arg);
+@@ -13115,7 +13117,7 @@ gd_default_synctask_cbk(int ret, call_frame_t *frame, void *opaque)
+     return ret;
+ }
+ 
+-void
++int
+ glusterd_launch_synctask(synctask_fn_t fn, void *opaque)
+ {
+     xlator_t *this = NULL;
+@@ -13131,6 +13133,8 @@ glusterd_launch_synctask(synctask_fn_t fn, void *opaque)
+         gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_SPAWN_SVCS_FAIL,
+                "Failed to spawn bricks"
+                " and other volume related services");
++
++    return ret;
+ }
+ 
+ /*
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
+index 4541471..3f4f3b8 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.h
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
+@@ -681,7 +681,7 @@ int32_t
+ glusterd_take_lvm_snapshot(glusterd_brickinfo_t *brickinfo,
+                            char *origin_brick_path);
+ 
+-void
++int
+ glusterd_launch_synctask(synctask_fn_t fn, void *opaque);
+ 
+ int
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0602-coverity-null-dereference-2395.patch b/SOURCES/0602-coverity-null-dereference-2395.patch
new file mode 100644
index 0000000..6edc3aa
--- /dev/null
+++ b/SOURCES/0602-coverity-null-dereference-2395.patch
@@ -0,0 +1,87 @@
+From 2ff83650a5f05e3f06853df6d79d3b18f88dfb23 Mon Sep 17 00:00:00 2001
+From: Nikhil Ladha <nladha@redhat.com>
+Date: Thu, 6 May 2021 10:45:46 +0530
+Subject: [PATCH 602/610] coverity: null dereference (#2395)
+
+Fix:
+Updated the code to make it more readable and fixed
+the NULL dereferencing.
+
+CID: 1234622
+>Updates: #1060
+
+>Change-Id: I05bd203bc46fe84be86398bd664a3485409c3bfe
+>Signed-off-by: nik-redhat <nladha@redhat.com>
+
+Upstream link: https://github.com/gluster/glusterfs/pull/2395
+BUG: 1997447
+
+Change-Id: If39cc85115de673a83b6c97137ea8d1f0f825245
+Signed-off-by: nik-redhat <nladha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/280093
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/dht/src/dht-lock.c | 32 +++++++++++++++-----------------
+ 1 file changed, 15 insertions(+), 17 deletions(-)
+
+diff --git a/xlators/cluster/dht/src/dht-lock.c b/xlators/cluster/dht/src/dht-lock.c
+index f9bac4f..6474dfa 100644
+--- a/xlators/cluster/dht/src/dht-lock.c
++++ b/xlators/cluster/dht/src/dht-lock.c
+@@ -914,37 +914,35 @@ dht_nonblocking_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+     dht_local_t *local = NULL;
+     int lk_index = 0, call_cnt = 0;
+     char gfid[GF_UUID_BUF_SIZE] = {0};
++    dht_ilock_wrap_t *my_layout;
+ 
+     local = frame->local;
+     lk_index = (long)cookie;
+ 
++    my_layout = &(local->lock[0].layout.my_layout);
++
+     if (op_ret == -1) {
+-        local->lock[0].layout.my_layout.op_ret = -1;
+-        local->lock[0].layout.my_layout.op_errno = op_errno;
+-
+-        if (local && local->lock[0].layout.my_layout.locks[lk_index]) {
+-            uuid_utoa_r(local->lock[0]
+-                            .layout.my_layout.locks[lk_index]
+-                            ->loc.inode->gfid,
+-                        gfid);
+-
+-            gf_msg_debug(
+-                this->name, op_errno,
+-                "inodelk failed on gfid: %s "
+-                "subvolume: %s",
+-                gfid,
+-                local->lock[0].layout.my_layout.locks[lk_index]->xl->name);
++        my_layout->op_ret = -1;
++        my_layout->op_errno = op_errno;
++
++        if (my_layout->locks[lk_index]) {
++            uuid_utoa_r(my_layout->locks[lk_index]->loc.inode->gfid, gfid);
++
++            gf_msg_debug(this->name, op_errno,
++                         "inodelk failed on gfid: %s "
++                         "subvolume: %s",
++                         gfid, my_layout->locks[lk_index]->xl->name);
+         }
+ 
+         goto out;
+     }
+ 
+-    local->lock[0].layout.my_layout.locks[lk_index]->locked = _gf_true;
++    my_layout->locks[lk_index]->locked = _gf_true;
+ 
+ out:
+     call_cnt = dht_frame_return(frame);
+     if (is_last_call(call_cnt)) {
+-        if (local->lock[0].layout.my_layout.op_ret < 0) {
++        if (my_layout->op_ret < 0) {
+             dht_inodelk_cleanup(frame);
+             return 0;
+         }
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0603-Coverity-Resource-leak-fix-CID-1356547.patch b/SOURCES/0603-Coverity-Resource-leak-fix-CID-1356547.patch
new file mode 100644
index 0000000..8c6b53b
--- /dev/null
+++ b/SOURCES/0603-Coverity-Resource-leak-fix-CID-1356547.patch
@@ -0,0 +1,51 @@
+From 015e6cac71b0a0c330f1e4792f9d60214b191f45 Mon Sep 17 00:00:00 2001
+From: karthik-us <ksubrahm@redhat.com>
+Date: Thu, 7 Oct 2021 21:07:46 +0530
+Subject: [PATCH 603/610] Coverity: Resource leak fix (CID: 1356547)
+
+Issue:
+In function gf_svc_readdirp() there is a chance that 'local' will be allocated
+memory but not released in the failure path.
+
+Fix:
+Assign 'local' to 'frame->local' immediately after the successful allocation, so
+it will be released by the existing failure path code itself.
+
+> Upstream patch: https://github.com/gluster/glusterfs/pull/2362/
+> Change-Id: I4474dc4d4be5432d169cb7d434728f211054997e
+> Signed-off-by: karthik-us <ksubrahm@redhat.com>
+> Updates: gluster#1060
+
+BUG: 1997447
+Change-Id: I4474dc4d4be5432d169cb7d434728f211054997e
+Signed-off-by: karthik-us <ksubrahm@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/280100
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/features/snapview-client/src/snapview-client.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xlators/features/snapview-client/src/snapview-client.c b/xlators/features/snapview-client/src/snapview-client.c
+index 9c789ae..e97db89 100644
+--- a/xlators/features/snapview-client/src/snapview-client.c
++++ b/xlators/features/snapview-client/src/snapview-client.c
+@@ -2156,6 +2156,7 @@ gf_svc_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+                "failed to allocate local");
+         goto out;
+     }
++    frame->local = local;
+ 
+     /*
+      * This is mainly for samba shares (or windows clients). As part of
+@@ -2184,7 +2185,6 @@ gf_svc_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ 
+     local->subvolume = subvolume;
+     local->fd = fd_ref(fd);
+-    frame->local = local;
+ 
+     STACK_WIND(frame, gf_svc_readdirp_cbk, subvolume, subvolume->fops->readdirp,
+                fd, size, off, xdata);
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0604-Coverity-Fix-dereference-before-null-check-CID-13914.patch b/SOURCES/0604-Coverity-Fix-dereference-before-null-check-CID-13914.patch
new file mode 100644
index 0000000..a680327
--- /dev/null
+++ b/SOURCES/0604-Coverity-Fix-dereference-before-null-check-CID-13914.patch
@@ -0,0 +1,50 @@
+From dee1c932df22ee12fe4568b40e58a475309e62fd Mon Sep 17 00:00:00 2001
+From: karthik-us <ksubrahm@redhat.com>
+Date: Thu, 7 Oct 2021 21:18:49 +0530
+Subject: [PATCH 604/610] Coverity: Fix dereference before null check (CID:
+ 1391415)
+
+Problem:
+In function gf_client_dump_inodes_to_dict() there is a null check for
+a variable which is already dereferenced in the previous line. This
+means that there could be a chance that this variable is null. But it
+is not being validate for null before dereferencing it in the first
+place.
+
+Fix:
+Added null check before dereferencing the variable at the first place.
+
+> Upstream patch: https://github.com/gluster/glusterfs/pull/2369/
+> Change-Id: I988b0e93542782353a8059e33db1522b6a5e55f8
+> Signed-off-by: karthik-us <ksubrahm@redhat.com>
+> Updates: gluster#1060
+
+BUG: 1997447
+Change-Id: I988b0e93542782353a8059e33db1522b6a5e55f8
+Signed-off-by: karthik-us <ksubrahm@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/280103
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/client_t.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/libglusterfs/src/client_t.c b/libglusterfs/src/client_t.c
+index e875c8b..216900a 100644
+--- a/libglusterfs/src/client_t.c
++++ b/libglusterfs/src/client_t.c
+@@ -828,8 +828,9 @@ gf_client_dump_inodes_to_dict(xlator_t *this, dict_t *dict)
+                 clienttable->cliententries[count].next_free)
+                 continue;
+             client = clienttable->cliententries[count].client;
+-            if (!strcmp(client->bound_xl->name, this->name)) {
+-                if (client->bound_xl && client->bound_xl->itable) {
++            if (client->bound_xl &&
++                !strcmp(client->bound_xl->name, this->name)) {
++                if (client->bound_xl->itable) {
+                     /* Presently every brick contains only
+                      * one bound_xl for all connections.
+                      * This will lead to duplicating of
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0605-Coverity-Fix-copy-into-fixed-size-buffer-CID-1325542.patch b/SOURCES/0605-Coverity-Fix-copy-into-fixed-size-buffer-CID-1325542.patch
new file mode 100644
index 0000000..849c959
--- /dev/null
+++ b/SOURCES/0605-Coverity-Fix-copy-into-fixed-size-buffer-CID-1325542.patch
@@ -0,0 +1,53 @@
+From 25fc2530f7ee6d7267e2ccc1b75a47a3ae539dff Mon Sep 17 00:00:00 2001
+From: karthik-us <ksubrahm@redhat.com>
+Date: Thu, 7 Oct 2021 21:29:27 +0530
+Subject: [PATCH 605/610] Coverity: Fix copy into fixed size buffer (CID:
+ 1325542)
+
+Problem:
+In __mnt3_fresh_lookup() mres->resolveloc.path is being copied into
+a fixed size string mres->remainingdir, with strncpy without checking
+the size of the source string. This could lead to string overflow.
+
+Fix:
+Copy only till the destination string length and check whether the
+soruce string overflows. If so log an error message and return.
+
+> Upstream patch: https://github.com/gluster/glusterfs/pull/2474/
+> Change-Id: I26dd0653d2636c667ad4e356d12d3d51956c77c3
+> Signed-off-by: karthik-us <ksubrahm@redhat.com>
+> Updates: gluster#1060
+
+BUG: 1997447
+Change-Id: I26dd0653d2636c667ad4e356d12d3d51956c77c3
+Signed-off-by: karthik-us <ksubrahm@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/280106
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/nfs/server/src/mount3.c | 9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+diff --git a/xlators/nfs/server/src/mount3.c b/xlators/nfs/server/src/mount3.c
+index 734453c..3951b9e 100644
+--- a/xlators/nfs/server/src/mount3.c
++++ b/xlators/nfs/server/src/mount3.c
+@@ -1104,8 +1104,13 @@ __mnt3_fresh_lookup(mnt3_resolve_t *mres)
+ {
+     inode_unlink(mres->resolveloc.inode, mres->resolveloc.parent,
+                  mres->resolveloc.name);
+-    strncpy(mres->remainingdir, mres->resolveloc.path,
+-            strlen(mres->resolveloc.path));
++    if (snprintf(mres->remainingdir, sizeof(mres->remainingdir), "%s",
++                 mres->resolveloc.path) >= sizeof(mres->remainingdir)) {
++        gf_msg(GF_MNT, GF_LOG_ERROR, EFAULT, NFS_MSG_RESOLVE_INODE_FAIL,
++               "Failed to copy resolve path: %s", mres->resolveloc.path);
++        nfs_loc_wipe(&mres->resolveloc);
++        return -EFAULT;
++    }
+     nfs_loc_wipe(&mres->resolveloc);
+     return __mnt3_resolve_subdir(mres);
+ }
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0606-dht-handle-DHT_SUBVOL_STATUS_KEY-in-dht_pt_getxattr-.patch b/SOURCES/0606-dht-handle-DHT_SUBVOL_STATUS_KEY-in-dht_pt_getxattr-.patch
new file mode 100644
index 0000000..05ca17b
--- /dev/null
+++ b/SOURCES/0606-dht-handle-DHT_SUBVOL_STATUS_KEY-in-dht_pt_getxattr-.patch
@@ -0,0 +1,69 @@
+From a6ba95b73469ad81d8c5a27293f8d09cc26928a3 Mon Sep 17 00:00:00 2001
+From: Ravishankar N <ravishankar@redhat.com>
+Date: Fri, 18 Dec 2020 16:28:29 +0530
+Subject: [PATCH 606/610] dht: handle DHT_SUBVOL_STATUS_KEY in dht_pt_getxattr
+ (#1934)
+
+In non distribute volumes (plain replicate, ec), DHT uses pass-through
+FOPs (dht_pt_getxattr) instead of the usual FOPS (dht_getxattr). The
+pass through FOP was not handling the DHT_SUBVOL_STATUS_KEY virtual
+xattr because of which geo-rep session was going into a faulty state.
+Fixing it now.
+
+> updates: #1925
+> Change-Id: I766b5b5c047c954a9957ab78aca680eedef1ff1f
+> Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+
+Upstream patch: https://github.com/gluster/glusterfs/pull/1934
+
+BUG: 2006205
+Change-Id: I766b5b5c047c954a9957ab78aca680eedef1ff1f
+Signed-off-by: Shwetha K Acharya <sacharya@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/280112
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/dht/src/dht-common.c | 24 ++++++++++++++++++++++++
+ 1 file changed, 24 insertions(+)
+
+diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
+index 5eaaa1e..c8980e5 100644
+--- a/xlators/cluster/dht/src/dht-common.c
++++ b/xlators/cluster/dht/src/dht-common.c
+@@ -11584,9 +11584,33 @@ int
+ dht_pt_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+                 const char *key, dict_t *xdata)
+ {
++    int op_errno = -1;
++    dht_local_t *local = NULL;
++
++    VALIDATE_OR_GOTO(frame, err);
++    VALIDATE_OR_GOTO(this, err);
++    VALIDATE_OR_GOTO(loc, err);
++    VALIDATE_OR_GOTO(loc->inode, err);
++    VALIDATE_OR_GOTO(this->private, err);
++
++    local = dht_local_init(frame, loc, NULL, GF_FOP_GETXATTR);
++    if (!local) {
++        op_errno = ENOMEM;
++        goto err;
++    }
++
++    if (key &&
++        strncmp(key, DHT_SUBVOL_STATUS_KEY, SLEN(DHT_SUBVOL_STATUS_KEY)) == 0) {
++        dht_vgetxattr_subvol_status(frame, this, key);
++        return 0;
++    }
++
+     STACK_WIND(frame, dht_pt_getxattr_cbk, FIRST_CHILD(this),
+                FIRST_CHILD(this)->fops->getxattr, loc, key, xdata);
+     return 0;
++err:
++    DHT_STACK_UNWIND(getxattr, frame, -1, op_errno, NULL, NULL);
++    return 0;
+ }
+ 
+ static int
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0607-SELinux-Fix-boolean-management.patch b/SOURCES/0607-SELinux-Fix-boolean-management.patch
new file mode 100644
index 0000000..4a62b03
--- /dev/null
+++ b/SOURCES/0607-SELinux-Fix-boolean-management.patch
@@ -0,0 +1,121 @@
+From 4b65ff0d1a3d70fcf3cfa8ab769135ae12f529d8 Mon Sep 17 00:00:00 2001
+From: nik-redhat <nladha@redhat.com>
+Date: Thu, 7 Oct 2021 22:02:32 +0530
+Subject: [PATCH 607/610] SELinux: Fix boolean management
+
+Remove %triggerun ganesha
+This trigger shouldn't be needed to begin with since removing
+selinux-policy-targeted means that the user is switching SELinux off, or
+is is switching the policy (to "mls" or "minimum"). In either case the
+current boolean setting is not going to be used any more. The last
+option, removal of glusterfs-ganesha, is covered by '%postun ganesha'.
+But more importantly, the trigger is called every time
+selinux-policy-targeted is updated (which can be avoided).
+%triggerun is executed after %triggerin -
+https://docs.fedoraproject.org/en-US/packaging-guidelines/Scriptlets/#ordering
+So when selinux-policy-targeted is updated, the new version is installed
+first triggering `semanage boolean -m ganesha_use_fusefs --on`,
+and then the old version is uninstalled triggering
+`semanage boolean -m ganesha_use_fusefs --off`.
+
+* use selinux_[un]set_booleans instead of "semanage boolean"
+  The macro pair properly manages SELinux stores and doesn't disable the
+  boolean in case it was enabled before ${name}-ganesha was installed.
+
+* Only change booleans when the package is first installed or
+  uninstalled
+Updating ${name}-ganesha would disable the boolean because %postun is
+called after %post (same issue as with the triggers).
+
+Signed-off-by: Vit Mojzis <vmojzis@redhat.com>
+Signed-off-by: Kaleb S. KEITHLEY <kkeithle@redhat.com>
+Change-Id: Ibb926ffbe00c9f000bd740708c0a4b3435ee7871
+PR: https://github.com/gluster/glusterfs/pull/2833
+Issue: https://github.com/gluster/glusterfs/issues/2522
+Resolves: rhbz#1973566
+Resolves: rhbz#1975400
+
+BUG: 1973566
+Change-Id: Idef6cbd6bce35151518d6f76e5b74774e5756fc9
+Signed-off-by: nik-redhat <nladha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/280114
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Kaleb Keithley <kkeithle@redhat.com>
+---
+ glusterfs.spec.in | 34 +++++++++++++++++++++-------------
+ 1 file changed, 21 insertions(+), 13 deletions(-)
+
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index 424f4ab..a9a83b1 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -954,7 +954,10 @@ exit 0
+ %if ( 0%{!?_without_server:1} )
+ %if ( 0%{?fedora} && 0%{?fedora} > 25 || ( 0%{?rhel} && 0%{?rhel} > 6 ) )
+ %post ganesha
+-semanage boolean -m ganesha_use_fusefs --on
++# first install
++if [ $1 -eq 1 ]; then
++  %selinux_set_booleans ganesha_use_fusefs=1
++fi
+ exit 0
+ %endif
+ %endif
+@@ -962,7 +965,9 @@ exit 0
+ %if ( 0%{!?_without_georeplication:1} )
+ %post geo-replication
+ %if ( 0%{?rhel} && 0%{?rhel} >= 8 )
+-%selinux_set_booleans %{selinuxbooleans}
++if [ $1 -eq 1 ]; then
++  %selinux_set_booleans %{selinuxbooleans}
++fi
+ %endif
+ if [ $1 -ge 1 ]; then
+     %systemd_postun_with_restart glusterd
+@@ -1089,29 +1094,32 @@ exit 0
+ %if ( 0%{!?_without_server:1} )
+ %if ( 0%{?fedora} && 0%{?fedora} > 25  || ( 0%{?rhel} && 0%{?rhel} > 6 ) )
+ %postun ganesha
+-semanage boolean -m ganesha_use_fusefs --off
++if [ $1 -eq 0 ]; then
++  # use the value of ganesha_use_fusefs from before glusterfs-ganesha was installed
++  %selinux_unset_booleans ganesha_use_fusefs=1
++fi
+ exit 0
+ %endif
+ %endif
+ 
+-##-----------------------------------------------------------------------------
+-## All %%trigger should be placed here and keep them sorted
+-##
+-%if ( 0%{!?_without_server:1} )
+-%if ( 0%{?fedora} && 0%{?fedora} > 25  || ( 0%{?rhel} && 0%{?rhel} > 6 ) )
+-%trigger ganesha -- selinux-policy-targeted
+-semanage boolean -m ganesha_use_fusefs --on
++%if ( 0%{!?_without_georeplication:1} )
++%postun geo-replication
++%if ( 0%{?rhel} && 0%{?rhel} >= 8 )
++if [ $1 -eq 0 ]; then
++  %selinux_unset_booleans %{selinuxbooleans}
++fi
+ exit 0
+ %endif
+ %endif
+ 
+ ##-----------------------------------------------------------------------------
+-## All %%triggerun should be placed here and keep them sorted
++## All %%trigger should be placed here and keep them sorted
+ ##
+ %if ( 0%{!?_without_server:1} )
+ %if ( 0%{?fedora} && 0%{?fedora} > 25  || ( 0%{?rhel} && 0%{?rhel} > 6 ) )
+-%triggerun ganesha -- selinux-policy-targeted
+-semanage boolean -m ganesha_use_fusefs --off
++# ensure ganesha_use_fusefs is on in case of policy mode switch (eg. mls->targeted)
++%triggerin ganesha -- selinux-policy-targeted
++semanage boolean -m ganesha_use_fusefs --on -S targeted
+ exit 0
+ %endif
+ %endif
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0608-cluster-ec-Track-heal-statistics-in-shd.patch b/SOURCES/0608-cluster-ec-Track-heal-statistics-in-shd.patch
new file mode 100644
index 0000000..b08d7a9
--- /dev/null
+++ b/SOURCES/0608-cluster-ec-Track-heal-statistics-in-shd.patch
@@ -0,0 +1,143 @@
+From d806760f1d4c78a2519b01f1c2d07aba0c533755 Mon Sep 17 00:00:00 2001
+From: Pranith Kumar K <pkarampu@redhat.com>
+Date: Fri, 28 Aug 2020 16:03:54 +0530
+Subject: [PATCH 608/610] cluster/ec: Track heal statistics in shd
+
+With this change we should be able to inspect number of heals
+attempted and completed by each shd.
+
+> Upstream patch: https://review.gluster.org/#/c/glusterfs/+/24926/
+> fixes: #1453
+> Change-Id: I10f5d86efcc0a8e4d648da808751d37725682c39
+> Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
+
+BUG: 1853631
+Change-Id: I10f5d86efcc0a8e4d648da808751d37725682c39
+Signed-off-by: Sheetal Pamecha <spamecha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/280208
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/ec/src/ec-heald.c | 49 ++++++++++++++++++++++++++++++++++++++-
+ xlators/cluster/ec/src/ec-types.h |  5 ++++
+ xlators/cluster/ec/src/ec.c       |  6 +++++
+ 3 files changed, 59 insertions(+), 1 deletion(-)
+
+diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c
+index 4f4b6aa..cd4d3ad 100644
+--- a/xlators/cluster/ec/src/ec-heald.c
++++ b/xlators/cluster/ec/src/ec-heald.c
+@@ -152,15 +152,58 @@ ec_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name)
+     return ret;
+ }
+ 
++static gf_boolean_t
++ec_is_heal_completed(char *status)
++{
++    char *bad_pos = NULL;
++    char *zero_pos = NULL;
++
++    if (!status) {
++        return _gf_false;
++    }
++
++    /*Logic:
++     * Status will be of the form Good: <binary>, Bad: <binary>
++     * If heal completes, if we do strchr for '0' it should be present after
++     * 'Bad:' i.e. strRchr for ':'
++     * */
++
++    zero_pos = strchr(status, '0');
++    bad_pos = strrchr(status, ':');
++    if (!zero_pos || !bad_pos) {
++        /*malformed status*/
++        return _gf_false;
++    }
++
++    if (zero_pos > bad_pos) {
++        return _gf_true;
++    }
++
++    return _gf_false;
++}
++
+ int
+ ec_shd_selfheal(struct subvol_healer *healer, int child, loc_t *loc,
+                 gf_boolean_t full)
+ {
+     dict_t *xdata = NULL;
++    dict_t *dict = NULL;
+     uint32_t count;
+     int32_t ret;
++    char *heal_status = NULL;
++    ec_t *ec = healer->this->private;
++
++    GF_ATOMIC_INC(ec->stats.shd.attempted);
++    ret = syncop_getxattr(healer->this, loc, &dict, EC_XATTR_HEAL, NULL,
++                          &xdata);
++    if (ret == 0) {
++        if (dict && (dict_get_str(dict, EC_XATTR_HEAL, &heal_status) == 0)) {
++            if (ec_is_heal_completed(heal_status)) {
++                GF_ATOMIC_INC(ec->stats.shd.completed);
++            }
++        }
++    }
+ 
+-    ret = syncop_getxattr(healer->this, loc, NULL, EC_XATTR_HEAL, NULL, &xdata);
+     if (!full && (loc->inode->ia_type == IA_IFDIR)) {
+         /* If we have just healed a directory, it's possible that
+          * other index entries have appeared to be healed. */
+@@ -179,6 +222,10 @@ ec_shd_selfheal(struct subvol_healer *healer, int child, loc_t *loc,
+         dict_unref(xdata);
+     }
+ 
++    if (dict) {
++        dict_unref(dict);
++    }
++
+     return ret;
+ }
+ 
+diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h
+index 700dc39..ef7a7fe 100644
+--- a/xlators/cluster/ec/src/ec-types.h
++++ b/xlators/cluster/ec/src/ec-types.h
+@@ -626,6 +626,11 @@ struct _ec_statistics {
+                                 requests. (Basically memory allocation
+                                 errors). */
+     } stripe_cache;
++    struct {
++        gf_atomic_t attempted; /*Number of heals attempted on
++                                files/directories*/
++        gf_atomic_t completed; /*Number of heals complted on files/directories*/
++    } shd;
+ };
+ 
+ struct _ec {
+diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
+index 047cdd8..24de9e8 100644
+--- a/xlators/cluster/ec/src/ec.c
++++ b/xlators/cluster/ec/src/ec.c
+@@ -649,6 +649,8 @@ ec_statistics_init(ec_t *ec)
+     GF_ATOMIC_INIT(ec->stats.stripe_cache.evicts, 0);
+     GF_ATOMIC_INIT(ec->stats.stripe_cache.allocs, 0);
+     GF_ATOMIC_INIT(ec->stats.stripe_cache.errors, 0);
++    GF_ATOMIC_INIT(ec->stats.shd.attempted, 0);
++    GF_ATOMIC_INIT(ec->stats.shd.completed, 0);
+ }
+ 
+ int32_t
+@@ -1445,6 +1447,10 @@ ec_dump_private(xlator_t *this)
+                        GF_ATOMIC_GET(ec->stats.stripe_cache.allocs));
+     gf_proc_dump_write("errors", "%" GF_PRI_ATOMIC,
+                        GF_ATOMIC_GET(ec->stats.stripe_cache.errors));
++    gf_proc_dump_write("heals-attempted", "%" GF_PRI_ATOMIC,
++                       GF_ATOMIC_GET(ec->stats.shd.attempted));
++    gf_proc_dump_write("heals-completed", "%" GF_PRI_ATOMIC,
++                       GF_ATOMIC_GET(ec->stats.shd.completed));
+ 
+     return 0;
+ }
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0609-feature-shard-wrong-dname-results-in-dentry-not-foun.patch b/SOURCES/0609-feature-shard-wrong-dname-results-in-dentry-not-foun.patch
new file mode 100644
index 0000000..a3290cb
--- /dev/null
+++ b/SOURCES/0609-feature-shard-wrong-dname-results-in-dentry-not-foun.patch
@@ -0,0 +1,43 @@
+From 89cdfb40264c12105a1b4990fa9b45290aa6cef0 Mon Sep 17 00:00:00 2001
+From: Vinayakswami Hariharmath <vharihar@redhat.com>
+Date: Fri, 8 Oct 2021 09:40:41 +0530
+Subject: [PATCH 609/610] feature/shard: wrong dname results in dentry not
+ found error
+
+Due to wrong dname passed to inode_unlink in
+shard_evicted_inode_fsync_cbk() resulting in dentry not found
+error.
+
+This patch addresses the issue.
+
+> upstream patch: https://github.com/gluster/glusterfs/pull/2475
+> Fixes: #2470
+> Change-Id: I6c479980ae3fa7ba558327055a9e5e5c2d2a850f
+> Signed-off-by: Vinayakswami Hariharmath vharihar@redhat.com
+
+BUG: 1911665
+Change-Id: I96aa5f57303b69a08990de039ddeecad7e7ae6af
+Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/280202
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/features/shard/src/shard.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
+index b828ff9..882373f 100644
+--- a/xlators/features/shard/src/shard.c
++++ b/xlators/features/shard/src/shard.c
+@@ -950,7 +950,7 @@ shard_evicted_inode_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+     {
+         __shard_inode_ctx_get(shard_inode, this, &ctx);
+         if ((list_empty(&ctx->to_fsync_list)) && (list_empty(&ctx->ilist))) {
+-            shard_make_block_bname(ctx->block_num, shard_inode->gfid,
++            shard_make_block_bname(ctx->block_num, ctx->base_gfid,
+                                    block_bname, sizeof(block_bname));
+             inode_unlink(shard_inode, priv->dot_shard_inode, block_bname);
+             /* The following unref corresponds to the ref held by
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0610-glusterfs.spec.in-remove-condtionals-from-tar-depend.patch b/SOURCES/0610-glusterfs.spec.in-remove-condtionals-from-tar-depend.patch
new file mode 100644
index 0000000..132da9c
--- /dev/null
+++ b/SOURCES/0610-glusterfs.spec.in-remove-condtionals-from-tar-depend.patch
@@ -0,0 +1,51 @@
+From b3e86a66de224107f6760157a7cb692227e42954 Mon Sep 17 00:00:00 2001
+From: Shwetha Acharya <sacharya@redhat.com>
+Date: Mon, 30 Aug 2021 18:54:15 +0530
+Subject: [PATCH 610/610] glusterfs.spec.in: remove condtionals from tar
+ dependency (#2734)
+
+* glusterfs.spec.in: remove condtionals from tar dependency
+
+The conditional on rhel minor version fails and tar is not
+marked as required.
+
+As there is not any universal macro to specify the
+minor release, removing the conditionals above the
+"Requires: tar" statement
+
+with this change irrespective of rhel 8.3 and
+above, tar will be marked required for geo-rep.
+
+> Change-Id: Id1e3320a0b1a245fc9cd8c7acb09cc119fca18b8
+> Signed-off-by: Shwetha K Acharya <sacharya@redhat.com>
+
+Upstream patch: https://github.com/gluster/glusterfs/pull/2734
+
+BUG: 1901468
+Change-Id: Id1e3320a0b1a245fc9cd8c7acb09cc119fca18b8
+Signed-off-by: Shwetha K Acharya <sacharya@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/280116
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+---
+ glusterfs.spec.in | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index a9a83b1..8b6646f 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -521,9 +521,8 @@ Requires:         python%{_pythonver}-gluster = %{version}-%{release}
+ Requires:         rsync
+ Requires:         util-linux
+ Requires:         %{name}-libs%{?_isa} = %{version}-%{release}
+-%if ( 0%{?rhel} && ( ( 0%{?rhel} == 8 && 0%{?rhel_minor_version} >= 3 ) || 0%{?rhel} >= 9 ) )
+ Requires:         tar
+-%endif
++
+ # required for setting selinux bools
+ %if ( 0%{?rhel} && 0%{?rhel} >= 8 )
+ Requires(post):      policycoreutils-python-utils
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0611-SELinux-Fix-boolean-management-again.patch b/SOURCES/0611-SELinux-Fix-boolean-management-again.patch
new file mode 100644
index 0000000..a5b2612
--- /dev/null
+++ b/SOURCES/0611-SELinux-Fix-boolean-management-again.patch
@@ -0,0 +1,54 @@
+From 5ad4711f40c0e8ab7c196ac1c9025bf78b8b94e0 Mon Sep 17 00:00:00 2001
+From: "Kaleb S. KEITHLEY" <kkeithle@redhat.com>
+Date: Thu, 18 Nov 2021 09:21:56 -0500
+Subject: [PATCH 611/611] SELinux: Fix boolean management, again
+
+When upgrading from a version of the package that does not include
+the previous fix this means the flawed scriptlet is still executed,
+undoing the setting of the boolean.
+
+In order to work the boolean needs to be set in %posttrans. This is
+a temporary change that can (or should) be removed in the next version
+of RHGS, i.e. 3.5.7.
+
+Issue: https://github.com/gluster/glusterfs/issues/2522
+Resolves: rhbz#1973566
+Resolves: rhbz#1975400
+
+Label: DOWNSTREAM ONLY
+
+BUG: 1973566
+Change-Id: Ida39a3ee5e6b4b0d3255bfef95601890afd80709
+Signed-off-by: Kaleb S. KEITHLEY <kkeithle@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/292189
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ glusterfs.spec.in | 11 +++++++++++
+ 1 file changed, 11 insertions(+)
+
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index 8b6646f..87176c9 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -1123,6 +1123,17 @@ exit 0
+ %endif
+ %endif
+ 
++%if ( 0%{!?_without_server:1} )
++%if ( ( 0%{?fedora} && 0%{?fedora} > 25 )  || ( 0%{?rhel} && 0%{?rhel} > 6 ) )
++# temporary fix to be removed in the next version (i.e. RHGS 3.5.7). This
++# is only needed when upgrading from the flawed versions (e.g. RHGS 3.5.5
++# and earlier.)
++%posttrans ganesha
++semanage boolean -m ganesha_use_fusefs --on -S targeted
++exit 0
++%endif
++%endif
++
+ ##-----------------------------------------------------------------------------
+ ## All %%files should be placed here and keep them grouped
+ ##
+-- 
+1.8.3.1
+
diff --git a/SPECS/glusterfs.spec b/SPECS/glusterfs.spec
index c0e2ed4..50c331e 100644
--- a/SPECS/glusterfs.spec
+++ b/SPECS/glusterfs.spec
@@ -237,7 +237,7 @@ Release:          0.1%{?prereltag:.%{prereltag}}%{?dist}
 %else
 Name:             glusterfs
 Version:          6.0
-Release:          56.4%{?dist}
+Release:          61.2%{?dist}
 ExcludeArch:      i686
 %endif
 License:          GPLv2 or LGPLv3+
@@ -858,7 +858,74 @@ Patch0540: 0540-extras-Disable-write-behind-for-group-samba.patch
 Patch0541: 0541-glusterd-volgen-Add-functionality-to-accept-any-cust.patch
 Patch0542: 0542-xlaotrs-mgmt-Fixing-coverity-issue-1445996.patch
 Patch0543: 0543-glusterd-handle-custom-xlator-failure-cases.patch
-Patch0544: 0544-RHGS-3.5.4-rebuild-to-ship-with-RHEL-8.5.patch
+Patch0544: 0544-tests-avoid-empty-paths-in-environment-variables.patch
+Patch0545: 0545-tests-Excluded-tests-for-unsupported-components.patch
+Patch0546: 0546-Update-rfc.sh-to-rhgs-3.5.5.patch
+Patch0547: 0547-perf-write-behind-Clear-frame-local-on-conflict-erro.patch
+Patch0548: 0548-Add-tar-as-dependency-to-geo-rep-rpm-for-RHEL-8.3-an.patch
+Patch0549: 0549-geo-rep-Change-in-attribute-for-getting-function-nam.patch
+Patch0550: 0550-common-ha-stability-fixes-for-ganesha_grace-and-gane.patch
+Patch0551: 0551-common-ha-ensure-shared_storage-is-mounted-before-se.patch
+Patch0552: 0552-cluster-afr-Change-default-self-heal-window-size-to-.patch
+Patch0553: 0553-cluster-ec-Change-self-heal-window-size-to-4MiB-by-d.patch
+Patch0554: 0554-dht-fix-rebalance-of-sparse-files.patch
+Patch0555: 0555-geo-rep-Improve-handling-of-gfid-mismatches.patch
+Patch0556: 0556-dht-don-t-ignore-xdata-in-fgetxattr.patch
+Patch0557: 0557-cluster-dht-Fix-stack-overflow-in-readdir-p.patch
+Patch0558: 0558-afr-fix-directory-entry-count.patch
+Patch0559: 0559-afr-make-fsync-post-op-aware-of-inodelk-count-2273.patch
+Patch0560: 0560-posix-Avoid-dict_del-logs-in-posix_is_layout_stale-w.patch
+Patch0561: 0561-cluster-ec-Inform-failure-when-some-bricks-are-unava.patch
+Patch0562: 0562-shard.c-Fix-formatting.patch
+Patch0563: 0563-features-shard-Use-fd-lookup-post-file-open.patch
+Patch0564: 0564-store.c-glusterd-store.c-remove-sys_stat-calls.patch
+Patch0565: 0565-libglusterfs-coverity-pointer-to-local-outside-the-s.patch
+Patch0566: 0566-enahancement-debug-Option-to-generate-core-dump-with.patch
+Patch0567: 0567-inode-create-inode-outside-locked-region.patch
+Patch0568: 0568-core-tcmu-runner-process-continuous-growing-logs-lru.patch
+Patch0569: 0569-features-shard-optimization-over-shard-lookup-in-cas.patch
+Patch0570: 0570-features-shard-avoid-repeatative-calls-to-gf_uuid_un.patch
+Patch0571: 0571-NetBSD-build-fixes.patch
+Patch0572: 0572-locks-remove-unused-conditional-switch-to-spin_lock-.patch
+Patch0573: 0573-features-shard-unlink-fails-due-to-nospace-to-mknod-.patch
+Patch0574: 0574-features-shard-delay-unlink-of-a-file-that-has-fd_co.patch
+Patch0575: 0575-libglusterfs-add-functions-to-calculate-time-differe.patch
+Patch0576: 0576-rpcsvc-Add-latency-tracking-for-rpc-programs.patch
+Patch0577: 0577-protocol-client-don-t-reopen-fds-on-which-POSIX-lock.patch
+Patch0578: 0578-protocol-client-fallback-to-anonymous-fd-for-fsync.patch
+Patch0579: 0579-cli-changing-rebal-task-ID-to-None-in-case-status-is.patch
+Patch0580: 0580-cluster-dht-suppress-file-migration-error-for-node-n.patch
+Patch0581: 0581-afr-don-t-reopen-fds-on-which-POSIX-locks-are-held.patch
+Patch0582: 0582-protocol-client-Fix-lock-memory-leak.patch
+Patch0583: 0583-protocol-client-Initialize-list-head-to-prevent-NULL.patch
+Patch0584: 0584-dht-fixing-xattr-inconsistency.patch
+Patch0585: 0585-ganesha_ha-ganesha_grace-RA-fails-in-start-and-or-fa.patch
+Patch0586: 0586-protocol-client-Do-not-reopen-fd-post-handshake-if-p.patch
+Patch0587: 0587-Update-rfc.sh-to-rhgs-3.5.6.patch
+Patch0588: 0588-locks-Fix-null-gfid-in-lock-contention-notifications.patch
+Patch0589: 0589-extras-fix-for-postscript-failure-on-logrotation-of-.patch
+Patch0590: 0590-cluster-afr-Don-t-check-for-stale-entry-index.patch
+Patch0591: 0591-afr-check-for-valid-iatt.patch
+Patch0592: 0592-md-cache-fix-integer-signedness-mismatch.patch
+Patch0593: 0593-dht-explicit-null-dereference.patch
+Patch0594: 0594-glusterd-resource-leaks.patch
+Patch0595: 0595-glusterd-use-after-free-coverity-issue.patch
+Patch0596: 0596-locks-null-dereference.patch
+Patch0597: 0597-glusterd-memory-deallocated-twice.patch
+Patch0598: 0598-glusterd-null-dereference.patch
+Patch0599: 0599-afr-null-dereference-nagative-value.patch
+Patch0600: 0600-dht-xlator-integer-handling-issue.patch
+Patch0601: 0601-coverity-resource-leak-2321.patch
+Patch0602: 0602-coverity-null-dereference-2395.patch
+Patch0603: 0603-Coverity-Resource-leak-fix-CID-1356547.patch
+Patch0604: 0604-Coverity-Fix-dereference-before-null-check-CID-13914.patch
+Patch0605: 0605-Coverity-Fix-copy-into-fixed-size-buffer-CID-1325542.patch
+Patch0606: 0606-dht-handle-DHT_SUBVOL_STATUS_KEY-in-dht_pt_getxattr-.patch
+Patch0607: 0607-SELinux-Fix-boolean-management.patch
+Patch0608: 0608-cluster-ec-Track-heal-statistics-in-shd.patch
+Patch0609: 0609-feature-shard-wrong-dname-results-in-dentry-not-foun.patch
+Patch0610: 0610-glusterfs.spec.in-remove-condtionals-from-tar-depend.patch
+Patch0611: 0611-SELinux-Fix-boolean-management-again.patch
 
 %description
 GlusterFS is a distributed file-system capable of scaling to several
@@ -1067,6 +1134,8 @@ Requires:         python%{_pythonver}-gluster = %{version}-%{release}
 Requires:         rsync
 Requires:         util-linux
 Requires:         %{name}-libs%{?_isa} = %{version}-%{release}
+Requires:         tar
+
 # required for setting selinux bools
 %if ( 0%{?rhel} && 0%{?rhel} >= 8 )
 Requires(post):      policycoreutils-python-utils
@@ -1570,7 +1639,10 @@ exit 0
 %if ( 0%{!?_without_server:1} )
 %if ( 0%{?fedora} && 0%{?fedora} > 25 || ( 0%{?rhel} && 0%{?rhel} > 6 ) )
 %post ganesha
-semanage boolean -m ganesha_use_fusefs --on
+# first install
+if [ $1 -eq 1 ]; then
+  %selinux_set_booleans ganesha_use_fusefs=1
+fi
 exit 0
 %endif
 %endif
@@ -1578,7 +1650,9 @@ exit 0
 %if ( 0%{!?_without_georeplication:1} )
 %post geo-replication
 %if ( 0%{?rhel} && 0%{?rhel} >= 8 )
-%selinux_set_booleans %{selinuxbooleans}
+if [ $1 -eq 1 ]; then
+  %selinux_set_booleans %{selinuxbooleans}
+fi
 %endif
 if [ $1 -ge 1 ]; then
     %systemd_postun_with_restart glusterd
@@ -1705,7 +1779,20 @@ exit 0
 %if ( 0%{!?_without_server:1} )
 %if ( 0%{?fedora} && 0%{?fedora} > 25  || ( 0%{?rhel} && 0%{?rhel} > 6 ) )
 %postun ganesha
-semanage boolean -m ganesha_use_fusefs --off
+if [ $1 -eq 0 ]; then
+  # use the value of ganesha_use_fusefs from before glusterfs-ganesha was installed
+  %selinux_unset_booleans ganesha_use_fusefs=1
+fi
+exit 0
+%endif
+%endif
+
+%if ( 0%{!?_without_georeplication:1} )
+%postun geo-replication
+%if ( 0%{?rhel} && 0%{?rhel} >= 8 )
+if [ $1 -eq 0 ]; then
+  %selinux_unset_booleans %{selinuxbooleans}
+fi
 exit 0
 %endif
 %endif
@@ -1715,19 +1802,20 @@ exit 0
 ##
 %if ( 0%{!?_without_server:1} )
 %if ( 0%{?fedora} && 0%{?fedora} > 25  || ( 0%{?rhel} && 0%{?rhel} > 6 ) )
-%trigger ganesha -- selinux-policy-targeted
-semanage boolean -m ganesha_use_fusefs --on
+# ensure ganesha_use_fusefs is on in case of policy mode switch (eg. mls->targeted)
+%triggerin ganesha -- selinux-policy-targeted
+semanage boolean -m ganesha_use_fusefs --on -S targeted
 exit 0
 %endif
 %endif
 
-##-----------------------------------------------------------------------------
-## All %%triggerun should be placed here and keep them sorted
-##
 %if ( 0%{!?_without_server:1} )
-%if ( 0%{?fedora} && 0%{?fedora} > 25  || ( 0%{?rhel} && 0%{?rhel} > 6 ) )
-%triggerun ganesha -- selinux-policy-targeted
-semanage boolean -m ganesha_use_fusefs --off
+%if ( ( 0%{?fedora} && 0%{?fedora} > 25 )  || ( 0%{?rhel} && 0%{?rhel} > 6 ) )
+# temporary fix to be removed in the next version (i.e. RHGS 3.5.7). This
+# is only needed when upgrading from the flawed versions (e.g. RHGS 3.5.5
+# and earlier.)
+%posttrans ganesha
+semanage boolean -m ganesha_use_fusefs --on -S targeted
 exit 0
 %endif
 %endif
@@ -2601,11 +2689,30 @@ fi
 %endif
 
 %changelog
-* Mon Aug 30 2021 Gluster Jenkins <dkhandel+glusterjenkins@redhat.com> - 6.0-56.4
-- Add gating.yaml, fixes bugs bz#1996984
+* Tue Feb 22 2022 Tamar Shacked <tshacked@redhat.com> - 6.0-61.2
+- Rebuilt with rhel-8.5.0-z-build target. fixes bugs bz#2056953
+
+* Sun Feb 20 2022 Tamar Shacked <tshacked@redhat.com> - 6.0-61.1
+- Rebuilt for rhel-8.5.0.z. fixes bugs bz#2056647
+
+* Mon Nov 29 2021 Gluster Jenkins <dkhandel+glusterjenkins@redhat.com> - 6.0-61
+- fixes bugs bz#1973566
+
+* Mon Oct 11 2021 Gluster Jenkins <dkhandel+glusterjenkins@redhat.com> - 6.0-60
+- fixes bugs bz#1668303 bz#1853631 bz#1901468 bz#1904137 bz#1911665 
+  bz#1962972 bz#1973566 bz#1994593 bz#1995029 bz#1997447 bz#2006205
+
+* Tue Jul 06 2021 Gluster Jenkins <dkhandel+glusterjenkins@redhat.com> - 6.0-59
+- fixes bugs bz#1689375
+
+* Wed Jun 16 2021 Gluster Jenkins <dkhandel+glusterjenkins@redhat.com> - 6.0-58
+- fixes bugs bz#1945143
 
-* Tue Aug 24 2021 Gluster Jenkins <dkhandel+glusterjenkins@redhat.com> - 6.0-56.3
-- fixes bugs bz#1996984
+* Tue Jun 08 2021 Gluster Jenkins <dkhandel+glusterjenkins@redhat.com> - 6.0-57
+- fixes bugs bz#1600379 bz#1689375 bz#1782428 bz#1798897 bz#1815462 
+  bz#1889966 bz#1891403 bz#1901468 bz#1903911 bz#1908635 bz#1917488 bz#1918018 
+  bz#1919132 bz#1925425 bz#1927411 bz#1927640 bz#1928676 bz#1942816 bz#1943467 
+  bz#1945143 bz#1946171 bz#1957191 bz#1957641
 
 * Thu May 06 2021 Gluster Jenkins <dkhandel+glusterjenkins@redhat.com> - 6.0-56.2
 - fixes bugs bz#1953901