From d2787b90cdbddd7c866ca9b070fd190dfccb7b93 Mon Sep 17 00:00:00 2001
From: CentOS Sources <bugs@centos.org>
Date: Apr 05 2022 13:51:38 +0000
Subject: import glusterfs-6.0-61.el7


---

diff --git a/README.debrand b/README.debrand
deleted file mode 100644
index 01c46d2..0000000
--- a/README.debrand
+++ /dev/null
@@ -1,2 +0,0 @@
-Warning: This package was configured for automatic debranding, but the changes
-failed to apply.
diff --git a/SOURCES/0481-RHGS-3.5.3-rebuild-to-ship-with-RHEL.patch b/SOURCES/0481-RHGS-3.5.3-rebuild-to-ship-with-RHEL.patch
deleted file mode 100644
index dd9b0ab..0000000
--- a/SOURCES/0481-RHGS-3.5.3-rebuild-to-ship-with-RHEL.patch
+++ /dev/null
@@ -1,33 +0,0 @@
-From 346aa7cbc34b9bbbaca45180215a4d9ffd5055df Mon Sep 17 00:00:00 2001
-From: Rinku Kothiya <rkothiya@redhat.com>
-Date: Fri, 19 Feb 2021 06:19:07 +0000
-Subject: [PATCH 481/481] RHGS-3.5.3 rebuild to ship with RHEL.
-
-Label: DOWNSTREAM ONLY
-BUG: 1930561
-
-Change-Id: I9c7f30cc6bc616344b27072bfde056c7bba1e143
-Signed-off-by: Rinku Kothiya <rkothiya@redhat.com>
-Reviewed-on: https://code.engineering.redhat.com/gerrit/228413
-Tested-by: RHGS Build Bot <nigelb@redhat.com>
-Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
----
- glusterfs.spec.in | 2 ++
- 1 file changed, 2 insertions(+)
-
-diff --git a/glusterfs.spec.in b/glusterfs.spec.in
-index 30d7162..52f9b40 100644
---- a/glusterfs.spec.in
-+++ b/glusterfs.spec.in
-@@ -1983,6 +1983,8 @@ fi
- %endif
- 
- %changelog
-+* Fri Feb 19 2021 Rinku Kothiya <rkothiya@redhat.com>
-+- Build RGHS clients for RHEL (#1930561)
- 
- * Mon May 11 2020 Sunny Kumar <sunkumar@redhat.com>
- - added requires policycoreutils-python-utils on rhel8 for geo-replication
--- 
-1.8.3.1
-
diff --git a/SOURCES/0481-Update-rfc.sh-to-rhgs-3.5.4.patch b/SOURCES/0481-Update-rfc.sh-to-rhgs-3.5.4.patch
new file mode 100644
index 0000000..0ba12d2
--- /dev/null
+++ b/SOURCES/0481-Update-rfc.sh-to-rhgs-3.5.4.patch
@@ -0,0 +1,26 @@
+From 828be8e789db3c77587c708f930d7fe8c9456e3b Mon Sep 17 00:00:00 2001
+From: Rinku Kothiya <rkothiya@redhat.com>
+Date: Fri, 4 Dec 2020 05:18:45 +0530
+Subject: [PATCH 481/511] Update rfc.sh to rhgs-3.5.4
+
+Signed-off-by: Rinku Kothiya <rkothiya@redhat.com>
+---
+ rfc.sh | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/rfc.sh b/rfc.sh
+index 1dca29f..c0559b9 100755
+--- a/rfc.sh
++++ b/rfc.sh
+@@ -18,7 +18,7 @@ done
+ shift $((OPTIND-1))
+ 
+ 
+-branch="rhgs-3.5.3";
++branch="rhgs-3.5.4";
+ 
+ set_hooks_commit_msg()
+ {
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0482-logger-Always-print-errors-in-english.patch b/SOURCES/0482-logger-Always-print-errors-in-english.patch
new file mode 100644
index 0000000..e454bec
--- /dev/null
+++ b/SOURCES/0482-logger-Always-print-errors-in-english.patch
@@ -0,0 +1,49 @@
+From e43af5b15d14e43c3201fd0fb7bf02663e3e0127 Mon Sep 17 00:00:00 2001
+From: Rinku Kothiya <rkothiya@redhat.com>
+Date: Sat, 7 Nov 2020 12:09:36 +0530
+Subject: [PATCH 482/511] logger: Always print errors in english
+
+Upstream:
+> Reviewed-on: https://github.com/gluster/glusterfs/pull/1657
+> fixes: #1302
+> Change-Id: If0e21f016155276a953c64a8dd13ff3eb281d09d
+> Signed-off-by: Rinku Kothiya <rkothiya@redhat.com>
+
+BUG: 1896425
+
+Change-Id: If0e21f016155276a953c64a8dd13ff3eb281d09d
+Signed-off-by: Rinku Kothiya <rkothiya@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/219999
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/logging.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/libglusterfs/src/logging.c b/libglusterfs/src/logging.c
+index 7f0eff6..5874c34 100644
+--- a/libglusterfs/src/logging.c
++++ b/libglusterfs/src/logging.c
+@@ -513,6 +513,7 @@ gf_openlog(const char *ident, int option, int facility)
+ {
+     int _option = option;
+     int _facility = facility;
++    char *language = NULL;
+ 
+     if (-1 == _option) {
+         _option = LOG_PID | LOG_NDELAY;
+@@ -522,7 +523,10 @@ gf_openlog(const char *ident, int option, int facility)
+     }
+ 
+     /* TODO: Should check for errors here and return appropriately */
+-    setlocale(LC_ALL, "");
++    language = setlocale(LC_ALL, "en_US.UTF-8");
++    if (!language)
++        setlocale(LC_ALL, "");
++
+     setlocale(LC_NUMERIC, "C"); /* C-locale for strtod, ... */
+     /* close the previous syslog if open as we are changing settings */
+     closelog();
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0483-afr-more-quorum-checks-in-lookup-and-new-entry-marki.patch b/SOURCES/0483-afr-more-quorum-checks-in-lookup-and-new-entry-marki.patch
new file mode 100644
index 0000000..c0f2118
--- /dev/null
+++ b/SOURCES/0483-afr-more-quorum-checks-in-lookup-and-new-entry-marki.patch
@@ -0,0 +1,150 @@
+From 8c366f34a279a5ab2a6301bfd93534fe746a23e8 Mon Sep 17 00:00:00 2001
+From: Ravishankar N <ravishankar@redhat.com>
+Date: Mon, 7 Dec 2020 09:53:27 +0530
+Subject: [PATCH 483/511] afr: more quorum checks in lookup and new entry
+ marking
+
+Problem: See upstream github issue for details.
+
+Fix:
+-In lookup if the entry exists in 2 out of 3 bricks, don't fail the
+lookup with ENOENT just because there is an entrylk on the parent.
+Consider quorum before deciding.
+
+-If entry FOP does not succeed on quorum no. of bricks, do not perform
+new entry mark.
+
+Upstream patch details:
+> Reviewed-on: https://review.gluster.org/#/c/glusterfs/+/24499/
+> Fixes: #1303
+> Change-Id: I56df8c89ad53b29fa450c7930a7b7ccec9f4a6c5
+> Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+
+BUG: 1821599
+Change-Id: If513e8a7d6088a676288927630d8e616269bf5d5
+Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/220363
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ ...20-mark-dirty-for-entry-txn-on-quorum-failure.t |  2 --
+ xlators/cluster/afr/src/afr-common.c               | 24 ++++++++++++----------
+ xlators/cluster/afr/src/afr-dir-write.c            |  8 ++++++++
+ xlators/cluster/afr/src/afr.h                      |  4 ++++
+ 4 files changed, 25 insertions(+), 13 deletions(-)
+
+diff --git a/tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t b/tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t
+index 26f9049..49c4dea 100644
+--- a/tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t
++++ b/tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t
+@@ -53,8 +53,6 @@ TEST ! ls $B0/${V0}1/file$i
+ TEST ls $B0/${V0}2/file$i
+ dirty=$(get_hex_xattr trusted.afr.dirty $B0/${V0}2)
+ TEST [ "$dirty" != "000000000000000000000000" ]
+-EXPECT "000000010000000100000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}2/file$i
+-EXPECT "000000010000000100000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}2/file$i
+ 
+ TEST $CLI volume set $V0 self-heal-daemon on
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
+index 89e2483..851ccad 100644
+--- a/xlators/cluster/afr/src/afr-common.c
++++ b/xlators/cluster/afr/src/afr-common.c
+@@ -1236,7 +1236,7 @@ refresh_done:
+     return 0;
+ }
+ 
+-static void
++void
+ afr_fill_success_replies(afr_local_t *local, afr_private_t *priv,
+                          unsigned char *replies)
+ {
+@@ -2290,6 +2290,7 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this)
+         0,
+     };
+     gf_boolean_t locked_entry = _gf_false;
++    gf_boolean_t in_flight_create = _gf_false;
+     gf_boolean_t can_interpret = _gf_true;
+     inode_t *parent = NULL;
+     ia_type_t ia_type = IA_INVAL;
+@@ -2333,17 +2334,12 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this)
+         if (!replies[i].valid)
+             continue;
+ 
+-        if (locked_entry && replies[i].op_ret == -1 &&
+-            replies[i].op_errno == ENOENT) {
+-            /* Second, check entry is still
+-               "underway" in creation */
+-            local->op_ret = -1;
+-            local->op_errno = ENOENT;
+-            goto error;
+-        }
+-
+-        if (replies[i].op_ret == -1)
++        if (replies[i].op_ret == -1) {
++            if (locked_entry && replies[i].op_errno == ENOENT) {
++                in_flight_create = _gf_true;
++            }
+             continue;
++        }
+ 
+         if (read_subvol == -1 || !readable[read_subvol]) {
+             read_subvol = i;
+@@ -2353,6 +2349,12 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this)
+         }
+     }
+ 
++    if (in_flight_create && !afr_has_quorum(success_replies, this, NULL)) {
++        local->op_ret = -1;
++        local->op_errno = ENOENT;
++        goto error;
++    }
++
+     if (read_subvol == -1)
+         goto error;
+     /* We now have a read_subvol, which is readable[] (if there
+diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c
+index 84e2a34..416c19d 100644
+--- a/xlators/cluster/afr/src/afr-dir-write.c
++++ b/xlators/cluster/afr/src/afr-dir-write.c
+@@ -349,6 +349,7 @@ afr_mark_entry_pending_changelog(call_frame_t *frame, xlator_t *this)
+     afr_private_t *priv = NULL;
+     int pre_op_count = 0;
+     int failed_count = 0;
++    unsigned char *success_replies = NULL;
+ 
+     local = frame->local;
+     priv = this->private;
+@@ -364,9 +365,16 @@ afr_mark_entry_pending_changelog(call_frame_t *frame, xlator_t *this)
+     failed_count = AFR_COUNT(local->transaction.failed_subvols,
+                              priv->child_count);
+ 
++    /* FOP succeeded on all bricks. */
+     if (pre_op_count == priv->child_count && !failed_count)
+         return;
+ 
++    /* FOP did not suceed on quorum no. of bricks. */
++    success_replies = alloca0(priv->child_count);
++    afr_fill_success_replies(local, priv, success_replies);
++    if (!afr_has_quorum(success_replies, this, NULL))
++        return;
++
+     if (priv->thin_arbiter_count) {
+         /*Mark new entry using ta file*/
+         local->is_new_entry = _gf_true;
+diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
+index ff96246..ed5096e 100644
+--- a/xlators/cluster/afr/src/afr.h
++++ b/xlators/cluster/afr/src/afr.h
+@@ -1334,4 +1334,8 @@ afr_mark_new_entry_changelog(call_frame_t *frame, xlator_t *this);
+ 
+ void
+ afr_selfheal_childup(xlator_t *this, afr_private_t *priv);
++
++void
++afr_fill_success_replies(afr_local_t *local, afr_private_t *priv,
++                         unsigned char *replies);
+ #endif /* __AFR_H__ */
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0484-glusterd-rebalance-status-displays-stats-as-0-after-.patch b/SOURCES/0484-glusterd-rebalance-status-displays-stats-as-0-after-.patch
new file mode 100644
index 0000000..56d4feb
--- /dev/null
+++ b/SOURCES/0484-glusterd-rebalance-status-displays-stats-as-0-after-.patch
@@ -0,0 +1,90 @@
+From 6c3b21ce5bb76b35856a6c270eb65d11f869061f Mon Sep 17 00:00:00 2001
+From: Sanju Rakonde <srakonde@redhat.com>
+Date: Fri, 26 Jun 2020 12:10:31 +0530
+Subject: [PATCH 484/511] glusterd: rebalance status displays stats as 0 after
+ reboot
+
+problem: while the rebalance is in progress, if a node is
+rebooted rebalance v status shows the stats of this node as
+0 once the node is back.
+
+Reason: when the node is rebooted, once it is back
+glusterd_volume_defrag_restart() starts the rebalance and
+creates the rpc. but due to some race, rebalance process is
+sending disconnect event, so rpc object is getting destroyed. As
+the rpc object is null, request for fetching the latest stats is
+not sent to rebalance process. and stats are shows as default values
+which is 0.
+
+Solution: When the rpc object null, we should create the rpc if the
+rebalance process is up. so that request can be sent to rebalance
+process using the rpc.
+
+>fixes: #1339
+>Change-Id: I1c7533fedd17dcaffc0f7a5a918c87356133a81c
+>Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
+Upstream Patch : https://review.gluster.org/c/glusterfs/+/24641
+
+BUG: 1832306
+Change-Id: I1c7533fedd17dcaffc0f7a5a918c87356133a81c
+Signed-off-by: Srijan Sivakumar <ssivakum@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/220369
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-syncop.c | 29 ++++++++++++++++++++---------
+ 1 file changed, 20 insertions(+), 9 deletions(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c
+index c78983a..df78fef 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-syncop.c
++++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c
+@@ -1693,6 +1693,7 @@ gd_brick_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
+     rpc_clnt_t *rpc = NULL;
+     dict_t *rsp_dict = NULL;
+     int32_t cmd = GF_OP_CMD_NONE;
++    glusterd_volinfo_t *volinfo = NULL;
+ 
+     this = THIS;
+     rsp_dict = dict_new();
+@@ -1724,18 +1725,28 @@ gd_brick_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
+     cds_list_for_each_entry_safe(pending_node, tmp, &selected, list)
+     {
+         rpc = glusterd_pending_node_get_rpc(pending_node);
++        /* In the case of rebalance if the rpc object is null, we try to
++         * create the rpc object. if the rebalance daemon is down, it returns
++         * -1. otherwise, rpc object will be created and referenced.
++         */
+         if (!rpc) {
+-            if (pending_node->type == GD_NODE_REBALANCE) {
+-                ret = 0;
+-                glusterd_defrag_volume_node_rsp(req_dict, NULL, op_ctx);
++            if (pending_node->type == GD_NODE_REBALANCE && pending_node->node) {
++                volinfo = pending_node->node;
++                ret = glusterd_rebalance_rpc_create(volinfo);
++                if (ret) {
++                    ret = 0;
++                    glusterd_defrag_volume_node_rsp(req_dict, NULL, op_ctx);
++                    goto out;
++                } else {
++                    rpc = glusterd_defrag_rpc_get(volinfo->rebal.defrag);
++                }
++            } else {
++                ret = -1;
++                gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RPC_FAILURE,
++                       "Brick Op failed "
++                       "due to rpc failure.");
+                 goto out;
+             }
+-
+-            ret = -1;
+-            gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RPC_FAILURE,
+-                   "Brick Op failed "
+-                   "due to rpc failure.");
+-            goto out;
+         }
+ 
+         /* Redirect operation to be detach tier via rebalance flow. */
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0485-cli-rpc-conditional-init-of-global-quota-rpc-1578.patch b/SOURCES/0485-cli-rpc-conditional-init-of-global-quota-rpc-1578.patch
new file mode 100644
index 0000000..6ed4f1c
--- /dev/null
+++ b/SOURCES/0485-cli-rpc-conditional-init-of-global-quota-rpc-1578.patch
@@ -0,0 +1,87 @@
+From 2e6a5e504e66bc95208420e4882e453a53ac9ea2 Mon Sep 17 00:00:00 2001
+From: schaffung <ssivakum@redhat.com>
+Date: Mon, 2 Nov 2020 11:18:01 +0530
+Subject: [PATCH 485/511] cli-rpc: conditional init of global quota rpc (#1578)
+
+Issue: It is seem that the initialization of rpc to
+connect with quotad is done in every glusterfs cli command,
+irrespective of whether the quota feature is enabled or disabled.
+This seems to be an overkill.
+
+Code change: The file /var/run/quotad/quotad.pid is present
+signals that quotad is enabled. Hence we can put a conditional
+check for seeing when this file exists and if it doesn't we
+just skip over the initialization of the global quotad rpc.
+
+This will go on to reduce the extra rpc calls and operations
+being performed in the kernel space.
+
+>Fixes: #1577
+>Change-Id: Icb69d35330f76ce95626f59af75a12726eb620ff
+>Signed-off-by: srijan-sivakumar <ssivakumar@redhat.com>
+Upstream Patch : https://github.com/gluster/glusterfs/pull/1578
+
+BUG: 1885966
+Change-Id: Icb69d35330f76ce95626f59af75a12726eb620ff
+Signed-off-by: Srijan Sivakumar <ssivakum@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/220371
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ cli/src/cli.c | 18 +++++++++++++-----
+ cli/src/cli.h |  3 +++
+ 2 files changed, 16 insertions(+), 5 deletions(-)
+
+diff --git a/cli/src/cli.c b/cli/src/cli.c
+index 99a16a0..a76c5a2 100644
+--- a/cli/src/cli.c
++++ b/cli/src/cli.c
+@@ -64,8 +64,7 @@
+ extern int connected;
+ /* using argp for command line parsing */
+ 
+-const char *argp_program_version =
+-    PACKAGE_NAME" "PACKAGE_VERSION;
++const char *argp_program_version = PACKAGE_NAME " " PACKAGE_VERSION;
+ const char *argp_program_bug_address = "<" PACKAGE_BUGREPORT ">";
+ 
+ struct rpc_clnt *global_quotad_rpc;
+@@ -840,9 +839,18 @@ main(int argc, char *argv[])
+     if (!global_rpc)
+         goto out;
+ 
+-    global_quotad_rpc = cli_quotad_clnt_rpc_init();
+-    if (!global_quotad_rpc)
+-        goto out;
++    /*
++     * Now, one doesn't need to initialize global rpc
++     * for quota unless and until quota is enabled.
++     * So why not put a check to save all the rpc related
++     * ops here.
++     */
++    ret = sys_access(QUOTAD_PID_PATH, F_OK);
++    if (!ret) {
++        global_quotad_rpc = cli_quotad_clnt_rpc_init();
++        if (!global_quotad_rpc)
++            goto out;
++    }
+ 
+     ret = cli_cmds_register(&state);
+     if (ret)
+diff --git a/cli/src/cli.h b/cli/src/cli.h
+index 37e4d9d..c30ae9c 100644
+--- a/cli/src/cli.h
++++ b/cli/src/cli.h
+@@ -30,6 +30,9 @@
+ #define CLI_TAB_LENGTH 8
+ #define CLI_BRICK_STATUS_LINE_LEN 78
+ 
++// Quotad pid path.
++#define QUOTAD_PID_PATH "/var/run/gluster/quotad/quotad.pid"
++
+ /* Geo-rep command positional arguments' index  */
+ #define GEO_REP_CMD_INDEX 1
+ #define GEO_REP_CMD_CONFIG_INDEX 4
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0486-glusterd-brick-sock-file-deleted-log-error-1560.patch b/SOURCES/0486-glusterd-brick-sock-file-deleted-log-error-1560.patch
new file mode 100644
index 0000000..60750db
--- /dev/null
+++ b/SOURCES/0486-glusterd-brick-sock-file-deleted-log-error-1560.patch
@@ -0,0 +1,87 @@
+From 9b19d4841fc3002d30ec3e44c85ec37682c11bfb Mon Sep 17 00:00:00 2001
+From: schaffung <ssivakum@redhat.com>
+Date: Thu, 22 Oct 2020 13:07:09 +0530
+Subject: [PATCH 486/511] glusterd: brick sock file deleted, log error (#1560)
+
+Issue: The satus of the brick as tracked by glusterd is
+stopped if the socket file corresponding to a running
+brick process is absent in /var/run/gluster. The glusterd
+keeps on trying to reconnect ( rpc layer ) but it fails.
+
+Code change: Rather than registering the rpc connection
+with the help of the given sockfilepath which is not
+even present as it keeps on reconnecting, why not log
+this as an error and not try to reconnect using the
+non-existing sock file path.
+
+>Fixes: #1526
+>Change-Id: I6c81691ab1624c66dec74f5ffcc6c383201ac757
+>Signed-off-by: srijan-sivakumar <ssivakumar@redhat.com>
+Upstream Patch : https://github.com/gluster/glusterfs/pull/1560
+
+BUG: 1882923
+Change-Id: I6c81691ab1624c66dec74f5ffcc6c383201ac757
+Signed-off-by: Srijan Sivakumar <ssivakum@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/220376
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-utils.c | 27 +++++++++++++++++++++++++--
+ 1 file changed, 25 insertions(+), 2 deletions(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
+index d25fc8a..a72c494 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
+@@ -6310,7 +6310,7 @@ find_compatible_brick(glusterd_conf_t *conf, glusterd_volinfo_t *volinfo,
+    check if passed pid is match with running  glusterfs process
+ */
+ 
+-int
++static int
+ glusterd_get_sock_from_brick_pid(int pid, char *sockpath, size_t len)
+ {
+     char fname[128] = "";
+@@ -6383,7 +6383,17 @@ glusterd_get_sock_from_brick_pid(int pid, char *sockpath, size_t len)
+ 
+     if (tmpsockpath[0]) {
+         strncpy(sockpath, tmpsockpath, i);
+-        ret = 0;
++        /*
++         * Condition to check if the brick socket file is present
++         * in the stated path or not. This helps in preventing
++         * constant re-connect triggered in the RPC layer and also
++         * a log message would help out the user.
++         */
++        ret = sys_access(sockpath, F_OK);
++        if (ret) {
++            gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_NOT_FOUND,
++                    "%s not found", sockpath, NULL);
++        }
+     }
+ 
+     return ret;
+@@ -6581,7 +6591,20 @@ glusterd_brick_start(glusterd_volinfo_t *volinfo,
+             if (!is_brick_mx_enabled()) {
+                 glusterd_set_brick_socket_filepath(
+                     volinfo, brickinfo, socketpath, sizeof(socketpath));
++                /*
++                 * Condition to check if the brick socket file is present
++                 * in the stated path or not. This helps in preventing
++                 * constant re-connect triggered in the RPC layer and also
++                 * a log message would help out the user.
++                 */
++                ret = sys_access(socketpath, F_OK);
++                if (ret) {
++                    gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_NOT_FOUND,
++                            "%s not found", socketpath, NULL);
++                    goto out;
++                }
+             }
++
+             gf_log(this->name, GF_LOG_DEBUG,
+                    "Using %s as sockfile for brick %s of volume %s ",
+                    socketpath, brickinfo->path, volinfo->volname);
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0487-Events-Log-file-not-re-opened-after-logrotate.patch b/SOURCES/0487-Events-Log-file-not-re-opened-after-logrotate.patch
new file mode 100644
index 0000000..ac0d1cc
--- /dev/null
+++ b/SOURCES/0487-Events-Log-file-not-re-opened-after-logrotate.patch
@@ -0,0 +1,56 @@
+From c961ee1d7c1abb2552b79ed39ed7fd1bd1b3962f Mon Sep 17 00:00:00 2001
+From: srijan-sivakumar <ssivakum@redhat.com>
+Date: Fri, 7 Aug 2020 15:02:07 +0530
+Subject: [PATCH 487/511] Events: Log file not re-opened after logrotate.
+
+Issue: The logging is being done in the same file
+even after the logrotate utility has changed the file.
+This causes the logfile to grow indefinitely.
+
+Code Changes: Using the WatchedFileHandler class instead
+of FileHandler class. This watches the file it is logging
+into and if the file changes, it is closed and reopened
+using the file name. Hence after file rotate, a new file
+will be used for logging instead of continuing with
+the same old file.
+
+>Fixes: #1289
+>Change-Id: I773d04f17613a03709cb682692efb39fd8e664e2
+>Signed-off-by: srijan-sivakumar <ssivakum@redhat.com>
+Upstream Patch : https://review.gluster.org/c/glusterfs/+/24820
+
+BUG: 1814744
+Change-Id: I773d04f17613a03709cb682692efb39fd8e664e2
+Signed-off-by: srijan-sivakumar <ssivakum@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/220370
+Reviewed-by: Shwetha Acharya <sacharya@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ events/src/utils.py | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/events/src/utils.py b/events/src/utils.py
+index 38b707a..6d4e079 100644
+--- a/events/src/utils.py
++++ b/events/src/utils.py
+@@ -13,6 +13,7 @@ import sys
+ import json
+ import os
+ import logging
++import logging.handlers
+ import fcntl
+ from errno import EBADF
+ from threading import Thread
+@@ -98,7 +99,7 @@ def setup_logger():
+     logger.setLevel(logging.INFO)
+ 
+     # create the logging file handler
+-    fh = logging.FileHandler(LOG_FILE)
++    fh = logging.handlers.WatchedFileHandler(LOG_FILE)
+ 
+     formatter = logging.Formatter("[%(asctime)s] %(levelname)s "
+                                   "[%(module)s - %(lineno)s:%(funcName)s] "
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0488-glusterd-afr-enable-granular-entry-heal-by-default.patch b/SOURCES/0488-glusterd-afr-enable-granular-entry-heal-by-default.patch
new file mode 100644
index 0000000..310bc53
--- /dev/null
+++ b/SOURCES/0488-glusterd-afr-enable-granular-entry-heal-by-default.patch
@@ -0,0 +1,864 @@
+From 0502383024cbf7e4776816e0a992dccc484a3cf2 Mon Sep 17 00:00:00 2001
+From: Ravishankar N <ravishankar@redhat.com>
+Date: Tue, 8 Dec 2020 17:23:22 +0530
+Subject: [PATCH 488/511] glusterd/afr: enable granular-entry-heal by default
+
+XXXXXXXXXXXXXXXXXXX
+    IMPORTANT:
+XXXXXXXXXXXXXXXXXXXX
+I see that for rhgs-3.5.3, GD_OP_VERSION_MAX is GD_OP_VERSION_7_0. Since
+this patch should only act on new volumes in rhgs-3.5.4, I am bumping
+the op-version to GD_OP_VERSION_7_1. In glusterfs upstream, the patch
+acts only if op-version >= GD_OP_VERSION_9_0 as seen in the commit
+messae below.
+
+Upstream patch details:
+/------------------------------------------------------------------------------/
+1. The option has been enabled and tested for quite some time now in RHHI-V
+downstream and I think it is safe to make it 'on' by default. Since it
+is not possible to simply change it from 'off' to 'on' without breaking
+rolling upgrades, old clients etc., I have made it default only for new volumes
+starting from op-verison GD_OP_VERSION_9_0.
+
+Note: If you do a volume reset, the option will be turned back off.
+This is okay as the dir's gfid will be captured in 'xattrop' folder  and heals
+will proceed. There might be stale entries inside entry-changes' folder,
+which will be removed when we enable the option again.
+
+2. I encountered a cust. issue where entry heal was pending on a dir. with
+236436 files in it and the glustershd.log output was just stuck at
+"performing entry selfheal", so I have added logs to give us
+more info in DEBUG level about whether entry heal and data heal are
+progressing (metadata heal doesn't take much time). That way, we have a
+quick visual indication to say things are not 'stuck' if we briefly
+enable debug logs, instead of taking statedumps or checking profile info
+etc.
+
+>Fixes: #1483
+>Change-Id: I4f116f8c92f8cd33f209b758ff14f3c7e1981422
+>Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+Upstream Patch: https://github.com/gluster/glusterfs/pull/1621
+/------------------------------------------------------------------------------/
+
+BUG: 1890506
+Change-Id: If449a1e873633616cfc508d74b5c22eb434b55ae
+Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/220555
+Tested-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/glusterfs/globals.h               |   4 +-
+ libglusterfs/src/syncop-utils.c                    |   4 +-
+ tests/basic/afr/add-brick-self-heal-non-granular.t |  75 +++++++++++++
+ tests/basic/afr/add-brick-self-heal.t              |   4 +-
+ tests/basic/afr/bug-1130892-non-granular.t         |  77 ++++++++++++++
+ .../basic/afr/bug-1493415-gfid-heal-non-granular.t |  79 ++++++++++++++
+ ...507-type-mismatch-error-handling-non-granular.t | 117 +++++++++++++++++++++
+ ...1749322-entry-heal-not-happening-non-granular.t |  90 ++++++++++++++++
+ .../afr/replace-brick-self-heal-non-granular.t     |  65 ++++++++++++
+ tests/basic/afr/replace-brick-self-heal.t          |   2 +-
+ tests/bugs/replicate/bug-1130892.t                 |   2 +-
+ tests/bugs/replicate/bug-1493415-gfid-heal.t       |   2 +-
+ .../bug-1722507-type-mismatch-error-handling.t     |  26 +++--
+ .../bug-1749322-entry-heal-not-happening.t         |   7 +-
+ xlators/cluster/afr/src/afr-self-heal-common.c     |   5 +
+ xlators/cluster/afr/src/afr-self-heal-data.c       |   3 +
+ xlators/cluster/afr/src/afr-self-heal-entry.c      |   7 +-
+ xlators/mgmt/glusterd/src/glusterd-utils.c         |  13 +++
+ 18 files changed, 558 insertions(+), 24 deletions(-)
+ create mode 100644 tests/basic/afr/add-brick-self-heal-non-granular.t
+ create mode 100644 tests/basic/afr/bug-1130892-non-granular.t
+ create mode 100644 tests/basic/afr/bug-1493415-gfid-heal-non-granular.t
+ create mode 100644 tests/basic/afr/bug-1722507-type-mismatch-error-handling-non-granular.t
+ create mode 100644 tests/basic/afr/bug-1749322-entry-heal-not-happening-non-granular.t
+ create mode 100644 tests/basic/afr/replace-brick-self-heal-non-granular.t
+
+diff --git a/libglusterfs/src/glusterfs/globals.h b/libglusterfs/src/glusterfs/globals.h
+index 31717ed..cc145cd 100644
+--- a/libglusterfs/src/glusterfs/globals.h
++++ b/libglusterfs/src/glusterfs/globals.h
+@@ -50,7 +50,7 @@
+     1 /* MIN is the fresh start op-version, mostly                             \
+          should not change */
+ #define GD_OP_VERSION_MAX                                                      \
+-    GD_OP_VERSION_7_0 /* MAX VERSION is the maximum                            \
++    GD_OP_VERSION_7_1 /* MAX VERSION is the maximum                            \
+                          count in VME table, should                            \
+                          keep changing with                                    \
+                          introduction of newer                                 \
+@@ -138,6 +138,8 @@
+ 
+ #define GD_OP_VERSION_7_0 70000 /* Op-version for GlusterFS 7.0 */
+ 
++#define GD_OP_VERSION_7_1 70100 /* Op-version for GlusterFS 7.1 */
++
+ #include "glusterfs/xlator.h"
+ #include "glusterfs/options.h"
+ 
+diff --git a/libglusterfs/src/syncop-utils.c b/libglusterfs/src/syncop-utils.c
+index be03527..2269c76 100644
+--- a/libglusterfs/src/syncop-utils.c
++++ b/libglusterfs/src/syncop-utils.c
+@@ -495,9 +495,7 @@ syncop_dir_scan(xlator_t *subvol, loc_t *loc, int pid, void *data,
+             if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, ".."))
+                 continue;
+ 
+-            ret = fn(subvol, entry, loc, data);
+-            if (ret)
+-                break;
++            ret |= fn(subvol, entry, loc, data);
+         }
+         gf_dirent_free(&entries);
+         if (ret)
+diff --git a/tests/basic/afr/add-brick-self-heal-non-granular.t b/tests/basic/afr/add-brick-self-heal-non-granular.t
+new file mode 100644
+index 0000000..19caf24
+--- /dev/null
++++ b/tests/basic/afr/add-brick-self-heal-non-granular.t
+@@ -0,0 +1,75 @@
++#!/bin/bash
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++cleanup;
++
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
++EXPECT 'Created' volinfo_field $V0 'Status';
++TEST $CLI volume set $V0 cluster.granular-entry-heal off
++TEST $CLI volume start $V0
++EXPECT 'Started' volinfo_field $V0 'Status';
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
++
++TEST $CLI volume set $V0 cluster.data-self-heal off
++TEST $CLI volume set $V0 cluster.metadata-self-heal off
++TEST $CLI volume set $V0 cluster.entry-self-heal off
++TEST $CLI volume set $V0 cluster.heal-timeout 5
++
++TEST $CLI volume set $V0 self-heal-daemon off
++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
++
++# Create files
++for i in {1..5}
++do
++        echo $i > $M0/file$i.txt
++done
++
++# Metadata changes
++TEST setfattr -n user.test -v qwerty $M0/file5.txt
++
++# Add brick1
++TEST $CLI volume add-brick $V0 replica 3 $H0:$B0/${V0}2
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
++
++# New-brick should accuse the old-bricks (Simulating case for data-loss)
++TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}2/
++TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}2/
++
++# Check if pending xattr and dirty-xattr are set for newly-added-brick
++EXPECT "000000000000000100000001" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0
++EXPECT "000000000000000100000001" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1
++EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.dirty $B0/${V0}2
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
++
++TEST $CLI volume set $V0 self-heal-daemon on
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
++TEST $CLI volume heal $V0
++
++# Wait for heal to complete
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
++
++# Check if entry-heal has happened
++TEST diff <(ls $B0/${V0}0 | sort) <(ls $B0/${V0}2 | sort)
++TEST diff <(ls $B0/${V0}1 | sort) <(ls $B0/${V0}2 | sort)
++
++# Test if data was healed
++TEST diff $B0/${V0}0/file1.txt $B0/${V0}2/file1.txt
++
++# Test if metadata was healed and exists on both the bricks
++EXPECT "qwerty" get_text_xattr user.test $B0/${V0}2/file5.txt
++EXPECT "qwerty" get_text_xattr user.test $B0/${V0}0/file5.txt
++
++EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0
++EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1
++EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.dirty $B0/${V0}2
++
++cleanup;
+diff --git a/tests/basic/afr/add-brick-self-heal.t b/tests/basic/afr/add-brick-self-heal.t
+index c847e22..7ebf4f6 100644
+--- a/tests/basic/afr/add-brick-self-heal.t
++++ b/tests/basic/afr/add-brick-self-heal.t
+@@ -38,8 +38,8 @@ TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0
+ TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}2/
+ 
+ # Check if pending xattr and dirty-xattr are set for newly-added-brick
+-EXPECT "000000000000000100000001" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0
+-EXPECT "000000000000000100000001" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1
++EXPECT "000000010000000100000001" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0
++EXPECT "000000010000000100000001" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1
+ EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.dirty $B0/${V0}2
+ 
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+diff --git a/tests/basic/afr/bug-1130892-non-granular.t b/tests/basic/afr/bug-1130892-non-granular.t
+new file mode 100644
+index 0000000..3cdbc7d
+--- /dev/null
++++ b/tests/basic/afr/bug-1130892-non-granular.t
+@@ -0,0 +1,77 @@
++#!/bin/bash
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../afr.rc
++
++cleanup;
++
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume info;
++
++# Create a 1X2 replica
++TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}-{0,1}
++EXPECT 'Created' volinfo_field $V0 'Status';
++TEST $CLI volume set $V0 cluster.granular-entry-heal off
++
++# Disable self-heal daemon
++TEST gluster volume set $V0 self-heal-daemon off
++
++# Enable Client side heal
++TEST $CLI volume set $V0 cluster.data-self-heal off
++TEST $CLI volume set $V0 cluster.metadata-self-heal off
++TEST $CLI volume set $V0 cluster.entry-self-heal off
++
++# Disable all perf-xlators
++TEST $CLI volume set $V0 performance.quick-read off
++TEST $CLI volume set $V0 performance.io-cache off
++TEST $CLI volume set $V0 performance.write-behind off
++TEST $CLI volume set $V0 performance.stat-prefetch off
++TEST $CLI volume set $V0 performance.read-ahead off
++
++# Volume start
++TEST $CLI volume start $V0;
++EXPECT 'Started' volinfo_field $V0 'Status';
++
++# FUSE Mount
++TEST ${GFS} -s $H0 --volfile-id $V0 $M0
++
++# Create files and dirs
++TEST mkdir -p $M0/one/two/
++TEST `echo "Carpe diem" > $M0/one/two/three`
++
++# Simulate disk-replacement
++TEST kill_brick $V0 $H0 $B0/${V0}-1
++EXPECT_WITHIN ${PROCESS_DOWN_TIMEOUT} "^0$" afr_child_up_status $V0 1
++TEST rm -rf $B0/${V0}-1/one
++TEST rm -rf $B0/${V0}-1/.glusterfs
++
++#Ideally, disk replacement is done using reset-brick or replace-brick gluster CLI
++#which will create .glusterfs folder.
++mkdir $B0/${V0}-1/.glusterfs && chmod 600 $B0/${V0}-1/.glusterfs
++
++# Start force
++TEST $CLI volume start $V0 force
++
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
++
++TEST stat $M0/one
++
++sleep 1
++
++# Check pending xattrs
++EXPECT "00000000" afr_get_specific_changelog_xattr $B0/${V0}-0/one trusted.afr.$V0-client-1 data
++EXPECT_NOT "00000000" afr_get_specific_changelog_xattr $B0/${V0}-0/one trusted.afr.$V0-client-1 entry
++EXPECT_NOT "00000000" afr_get_specific_changelog_xattr $B0/${V0}-0/one trusted.afr.$V0-client-1 metadata
++
++TEST gluster volume set $V0 self-heal-daemon on
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
++TEST $CLI volume heal $V0
++EXPECT_WITHIN $HEAL_TIMEOUT "Y" is_dir_heal_done $B0/${V0}-0 $B0/${V0}-1 one
++EXPECT_WITHIN $HEAL_TIMEOUT "Y" is_dir_heal_done $B0/${V0}-0 $B0/${V0}-1 one/two
++EXPECT_WITHIN $HEAL_TIMEOUT "Y" is_file_heal_done $B0/${V0}-0 $B0/${V0}-1 one/two/three
++
++cleanup;
+diff --git a/tests/basic/afr/bug-1493415-gfid-heal-non-granular.t b/tests/basic/afr/bug-1493415-gfid-heal-non-granular.t
+new file mode 100644
+index 0000000..aff001c
+--- /dev/null
++++ b/tests/basic/afr/bug-1493415-gfid-heal-non-granular.t
+@@ -0,0 +1,79 @@
++#!/bin/bash
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../afr.rc
++cleanup;
++
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
++TEST $CLI volume set $V0 cluster.granular-entry-heal off
++TEST $CLI volume start $V0
++
++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0;
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
++TEST $CLI volume set $V0 self-heal-daemon off
++
++# Create base entry in indices/xattrop
++echo "Data" > $M0/FILE
++
++#------------------------------------------------------------------------------#
++TEST touch $M0/f1
++gfid_f1=$(gf_get_gfid_xattr $B0/${V0}0/f1)
++gfid_str_f1=$(gf_gfid_xattr_to_str $gfid_f1)
++
++# Remove gfid xattr and .glusterfs hard link from 2nd brick. This simulates a
++# brick crash at the point where file got created but no xattrs were set.
++TEST setfattr -x trusted.gfid $B0/${V0}1/f1
++TEST rm $B0/${V0}1/.glusterfs/${gfid_str_f1:0:2}/${gfid_str_f1:2:2}/$gfid_str_f1
++
++# storage/posix considers that a file without gfid changed less than a second
++# before doesn't exist, so we need to wait for a second to force posix to
++# consider that this is a valid file but without gfid.
++sleep 2
++
++# Assume there were no pending xattrs on parent dir due to 1st brick crashing
++# too. Then name heal from client must heal the gfid.
++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0;
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
++TEST stat $M0/f1
++EXPECT "$gfid_f1" gf_get_gfid_xattr $B0/${V0}1/f1
++TEST stat $B0/${V0}1/.glusterfs/${gfid_str_f1:0:2}/${gfid_str_f1:2:2}/$gfid_str_f1
++
++#------------------------------------------------------------------------------#
++TEST mkdir $M0/dir
++TEST touch $M0/dir/f2
++gfid_f2=$(gf_get_gfid_xattr $B0/${V0}0/dir/f2)
++gfid_str_f2=$(gf_gfid_xattr_to_str $gfid_f2)
++
++# Remove gfid xattr and .glusterfs hard link from 2nd brick. This simulates a
++# brick crash at the point where file got created but no xattrs were set.
++TEST setfattr -x trusted.gfid $B0/${V0}1/dir/f2
++TEST rm $B0/${V0}1/.glusterfs/${gfid_str_f2:0:2}/${gfid_str_f2:2:2}/$gfid_str_f2
++
++#Now simulate setting of pending entry xattr on parent dir of 1st brick.
++TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}0/dir
++create_brick_xattrop_entry $B0/${V0}0 dir
++
++# storage/posix considers that a file without gfid changed less than a second
++# before doesn't exist, so we need to wait for a second to force posix to
++# consider that this is a valid file but without gfid.
++sleep 2
++
++#Trigger entry-heal via shd
++TEST $CLI volume set $V0 self-heal-daemon on
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
++
++TEST $CLI volume heal $V0
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
++
++EXPECT "$gfid_f2" gf_get_gfid_xattr $B0/${V0}1/dir/f2
++TEST stat $B0/${V0}1/.glusterfs/${gfid_str_f2:0:2}/${gfid_str_f2:2:2}/$gfid_str_f2
++
++#------------------------------------------------------------------------------#
++cleanup;
+diff --git a/tests/basic/afr/bug-1722507-type-mismatch-error-handling-non-granular.t b/tests/basic/afr/bug-1722507-type-mismatch-error-handling-non-granular.t
+new file mode 100644
+index 0000000..9079c93
+--- /dev/null
++++ b/tests/basic/afr/bug-1722507-type-mismatch-error-handling-non-granular.t
+@@ -0,0 +1,117 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../afr.rc
++
++cleanup;
++
++## Start and create a volume
++TEST glusterd;
++TEST pidof glusterd;
++TEST $CLI volume info;
++
++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2};
++TEST $CLI volume set $V0 cluster.granular-entry-heal off
++TEST $CLI volume start $V0;
++TEST $CLI volume set $V0 cluster.heal-timeout 5
++TEST $CLI volume heal $V0 disable
++EXPECT 'Started' volinfo_field $V0 'Status';
++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
++
++TEST mkdir $M0/dir
++
++##########################################################################################
++# GFID link file and the GFID is missing on one brick and all the bricks are being blamed.
++
++TEST touch $M0/dir/file
++TEST `echo append>> $M0/dir/file`
++
++#B0 and B2 must blame B1
++setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir
++setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/$V0"0"/dir
++setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir
++
++# Add entry to xattrop dir to trigger index heal.
++xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
++base_entry_b0=`ls $xattrop_dir0`
++gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/))
++ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
++EXPECT "^1$" get_pending_heal_count $V0
++
++# Remove the gfid xattr and the link file on one brick.
++gfid_file=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file)
++gfid_str_file=$(gf_gfid_xattr_to_str $gfid_file)
++TEST setfattr -x trusted.gfid $B0/${V0}0/dir/file
++TEST rm -f $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
++
++# Launch heal
++TEST $CLI volume heal $V0 enable
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
++
++# Wait for 2 second to force posix to consider that this is a valid file but
++# without gfid.
++sleep 2
++TEST $CLI volume heal $V0
++
++# Heal should not fail as the file is missing gfid xattr and the link file,
++# which is not actually the gfid or type mismatch.
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
++
++EXPECT "$gfid_file" gf_get_gfid_xattr $B0/${V0}0/dir/file
++TEST stat $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
++rm -f $M0/dir/file
++
++
++###########################################################################################
++# GFID link file and the GFID is missing on two bricks and all the bricks are being blamed.
++
++TEST $CLI volume heal $V0 disable
++TEST touch $M0/dir/file
++#TEST kill_brick $V0 $H0 $B0/$V0"1"
++
++#B0 and B2 must blame B1
++setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir
++setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/$V0"0"/dir
++setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir
++
++# Add entry to xattrop dir to trigger index heal.
++xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
++base_entry_b0=`ls $xattrop_dir0`
++gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/))
++ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
++EXPECT "^1$" get_pending_heal_count $V0
++
++# Remove the gfid xattr and the link file on two bricks.
++gfid_file=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file)
++gfid_str_file=$(gf_gfid_xattr_to_str $gfid_file)
++TEST setfattr -x trusted.gfid $B0/${V0}0/dir/file
++TEST rm -f $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
++TEST setfattr -x trusted.gfid $B0/${V0}1/dir/file
++TEST rm -f $B0/${V0}1/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
++
++# Launch heal
++TEST $CLI volume heal $V0 enable
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
++
++# Wait for 2 second to force posix to consider that this is a valid file but
++# without gfid.
++sleep 2
++TEST $CLI volume heal $V0
++
++# Heal should not fail as the file is missing gfid xattr and the link file,
++# which is not actually the gfid or type mismatch.
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
++
++EXPECT "$gfid_file" gf_get_gfid_xattr $B0/${V0}0/dir/file
++TEST stat $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
++EXPECT "$gfid_file" gf_get_gfid_xattr $B0/${V0}1/dir/file
++TEST stat $B0/${V0}1/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
++
++cleanup
+diff --git a/tests/basic/afr/bug-1749322-entry-heal-not-happening-non-granular.t b/tests/basic/afr/bug-1749322-entry-heal-not-happening-non-granular.t
+new file mode 100644
+index 0000000..4f27da4
+--- /dev/null
++++ b/tests/basic/afr/bug-1749322-entry-heal-not-happening-non-granular.t
+@@ -0,0 +1,90 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../afr.rc
++
++cleanup
++
++function check_gfid_and_link_count
++{
++        local file=$1
++
++        file_gfid_b0=$(gf_get_gfid_xattr $B0/${V0}0/$file)
++        TEST [ ! -z $file_gfid_b0 ]
++        file_gfid_b1=$(gf_get_gfid_xattr $B0/${V0}1/$file)
++        file_gfid_b2=$(gf_get_gfid_xattr $B0/${V0}2/$file)
++        EXPECT $file_gfid_b0 echo $file_gfid_b1
++        EXPECT $file_gfid_b0 echo $file_gfid_b2
++
++        EXPECT "2" stat -c %h $B0/${V0}0/$file
++        EXPECT "2" stat -c %h $B0/${V0}1/$file
++        EXPECT "2" stat -c %h $B0/${V0}2/$file
++}
++TESTS_EXPECTED_IN_LOOP=18
++
++################################################################################
++## Start and create a volume
++TEST glusterd;
++TEST pidof glusterd;
++TEST $CLI volume info;
++
++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2};
++TEST $CLI volume set $V0 cluster.granular-entry-heal off
++TEST $CLI volume start $V0;
++TEST $CLI volume set $V0 cluster.heal-timeout 5
++TEST $CLI volume heal $V0 disable
++EXPECT 'Started' volinfo_field $V0 'Status';
++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
++
++TEST mkdir $M0/dir
++TEST `echo "File 1 " > $M0/dir/file1`
++TEST touch $M0/dir/file{2..4}
++
++# Remove file2 from 1st & 3rd bricks
++TEST rm -f $B0/$V0"0"/dir/file2
++TEST rm -f $B0/$V0"2"/dir/file2
++
++# Remove file3 and the .glusterfs hardlink from 1st & 2nd bricks
++gfid_file3=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file3)
++gfid_str_file3=$(gf_gfid_xattr_to_str $gfid_file3)
++TEST rm $B0/$V0"0"/.glusterfs/${gfid_str_file3:0:2}/${gfid_str_file3:2:2}/$gfid_str_file3
++TEST rm $B0/$V0"1"/.glusterfs/${gfid_str_file3:0:2}/${gfid_str_file3:2:2}/$gfid_str_file3
++TEST rm -f $B0/$V0"0"/dir/file3
++TEST rm -f $B0/$V0"1"/dir/file3
++
++# Remove the .glusterfs hardlink and the gfid xattr of file4 on 3rd brick
++gfid_file4=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file4)
++gfid_str_file4=$(gf_gfid_xattr_to_str $gfid_file4)
++TEST rm $B0/$V0"2"/.glusterfs/${gfid_str_file4:0:2}/${gfid_str_file4:2:2}/$gfid_str_file4
++TEST setfattr -x trusted.gfid $B0/$V0"2"/dir/file4
++
++# B0 and B2 blame each other
++setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir
++setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir
++
++# Add entry to xattrop dir on first brick.
++xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
++base_entry_b0=`ls $xattrop_dir0`
++gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/))
++TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
++
++EXPECT "^1$" get_pending_heal_count $V0
++
++# Launch heal
++TEST $CLI volume heal $V0 enable
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
++TEST $CLI volume heal $V0
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
++
++# All the files must be present on all the bricks after conservative merge and
++# should have the gfid xattr and the .glusterfs hardlink.
++check_gfid_and_link_count dir/file1
++check_gfid_and_link_count dir/file2
++check_gfid_and_link_count dir/file3
++check_gfid_and_link_count dir/file4
++
++cleanup
+diff --git a/tests/basic/afr/replace-brick-self-heal-non-granular.t b/tests/basic/afr/replace-brick-self-heal-non-granular.t
+new file mode 100644
+index 0000000..c86bff1
+--- /dev/null
++++ b/tests/basic/afr/replace-brick-self-heal-non-granular.t
+@@ -0,0 +1,65 @@
++#!/bin/bash
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++cleanup;
++
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
++TEST $CLI volume set $V0 cluster.granular-entry-heal off
++TEST $CLI volume start $V0
++TEST $CLI volume set $V0 cluster.data-self-heal off
++TEST $CLI volume set $V0 cluster.metadata-self-heal off
++TEST $CLI volume set $V0 cluster.entry-self-heal off
++TEST $CLI volume set $V0 cluster.heal-timeout 5
++TEST $CLI volume set $V0 self-heal-daemon off
++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
++
++# Create files
++for i in {1..5}
++do
++        echo $i > $M0/file$i.txt
++done
++
++# Metadata changes
++TEST setfattr -n user.test -v qwerty $M0/file5.txt
++
++# Replace brick1
++TEST $CLI volume replace-brick $V0 $H0:$B0/${V0}1 $H0:$B0/${V0}1_new commit force
++
++# Replaced-brick should accuse the non-replaced-brick (Simulating case for data-loss)
++TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}1_new/
++
++# Check if pending xattr and dirty-xattr are set for replaced-brick
++EXPECT "000000000000000100000001" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0
++EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.dirty $B0/${V0}1_new
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
++
++TEST $CLI volume set $V0 self-heal-daemon on
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
++TEST $CLI volume heal $V0
++
++# Wait for heal to complete
++EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
++
++# Check if entry-heal has happened
++TEST diff <(ls $B0/${V0}0 | sort) <(ls $B0/${V0}1_new | sort)
++
++# To make sure that files were not lost from brick0
++TEST diff <(ls $B0/${V0}0 | sort) <(ls $B0/${V0}1 | sort)
++EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0
++
++# Test if data was healed
++TEST diff $B0/${V0}0/file1.txt $B0/${V0}1_new/file1.txt
++# To make sure that data was not lost from brick0
++TEST diff $B0/${V0}0/file1.txt $B0/${V0}1/file1.txt
++
++# Test if metadata was healed and exists on both the bricks
++EXPECT "qwerty" get_text_xattr user.test $B0/${V0}1_new/file5.txt
++EXPECT "qwerty" get_text_xattr user.test $B0/${V0}0/file5.txt
++
++cleanup;
+diff --git a/tests/basic/afr/replace-brick-self-heal.t b/tests/basic/afr/replace-brick-self-heal.t
+index 0360db7..da31c87 100644
+--- a/tests/basic/afr/replace-brick-self-heal.t
++++ b/tests/basic/afr/replace-brick-self-heal.t
+@@ -30,7 +30,7 @@ TEST $CLI volume replace-brick $V0 $H0:$B0/${V0}1 $H0:$B0/${V0}1_new commit forc
+ TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}1_new/
+ 
+ # Check if pending xattr and dirty-xattr are set for replaced-brick
+-EXPECT "000000000000000100000001" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0
++EXPECT "000000010000000100000001" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0
+ EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.dirty $B0/${V0}1_new
+ 
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+diff --git a/tests/bugs/replicate/bug-1130892.t b/tests/bugs/replicate/bug-1130892.t
+index 0f57d66..e23eb26 100644
+--- a/tests/bugs/replicate/bug-1130892.t
++++ b/tests/bugs/replicate/bug-1130892.t
+@@ -56,7 +56,7 @@ EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+ TEST stat $M0/one
+ 
+ # Check pending xattrs
+-EXPECT "00000000" afr_get_specific_changelog_xattr $B0/${V0}-0/one trusted.afr.$V0-client-1 data
++EXPECT "00000001" afr_get_specific_changelog_xattr $B0/${V0}-0/one trusted.afr.$V0-client-1 data
+ EXPECT_NOT "00000000" afr_get_specific_changelog_xattr $B0/${V0}-0/one trusted.afr.$V0-client-1 entry
+ EXPECT "00000000" afr_get_specific_changelog_xattr $B0/${V0}-0/one trusted.afr.$V0-client-1 metadata
+ 
+diff --git a/tests/bugs/replicate/bug-1493415-gfid-heal.t b/tests/bugs/replicate/bug-1493415-gfid-heal.t
+index 125c35a..9714d5e 100644
+--- a/tests/bugs/replicate/bug-1493415-gfid-heal.t
++++ b/tests/bugs/replicate/bug-1493415-gfid-heal.t
+@@ -49,7 +49,7 @@ TEST setfattr -x trusted.gfid $B0/${V0}1/dir/f2
+ TEST rm $B0/${V0}1/.glusterfs/${gfid_str_f2:0:2}/${gfid_str_f2:2:2}/$gfid_str_f2
+ 
+ #Now simulate setting of pending entry xattr on parent dir of 1st brick.
+-TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}0/dir
++TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000000000001 $B0/${V0}0/dir
+ create_brick_xattrop_entry $B0/${V0}0 dir
+ 
+ #Trigger entry-heal via shd
+diff --git a/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t b/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t
+index 0aeaaaf..1fdf7ea 100644
+--- a/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t
++++ b/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t
+@@ -23,19 +23,21 @@ TEST mkdir $M0/dir
+ ##########################################################################################
+ # GFID link file and the GFID is missing on one brick and all the bricks are being blamed.
+ 
+-TEST touch $M0/dir/file
+-#TEST kill_brick $V0 $H0 $B0/$V0"1"
++TEST `echo append>> $M0/dir/file`
+ 
+ #B0 and B2 must blame B1
+-setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir
+-setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/$V0"0"/dir
+-setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir
++# Set data part of the xattr also to 1 so that local->need_full_crawl is true.
++# Another way is to create the needed entries inside indices/entry-changes
++# folder.
++setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000000000001 $B0/$V0"2"/dir
++setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000000000001 $B0/$V0"0"/dir
++setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000000000001 $B0/$V0"0"/dir
+ 
+ # Add entry to xattrop dir to trigger index heal.
+ xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
+ base_entry_b0=`ls $xattrop_dir0`
+ gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/))
+-ln -s $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
++ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
+ EXPECT "^1$" get_pending_heal_count $V0
+ 
+ # Remove the gfid xattr and the link file on one brick.
+@@ -70,18 +72,20 @@ rm -f $M0/dir/file
+ 
+ TEST $CLI volume heal $V0 disable
+ TEST touch $M0/dir/file
+-#TEST kill_brick $V0 $H0 $B0/$V0"1"
+ 
+ #B0 and B2 must blame B1
+-setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir
+-setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/$V0"0"/dir
+-setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir
++# Set data part of the xattr also to 1 so that local->need_full_crawl is true.
++# Another way is to create the needed entries inside indices/entry-changes
++# folder.
++setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000000000001 $B0/$V0"2"/dir
++setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000000000001 $B0/$V0"0"/dir
++setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000000000001 $B0/$V0"0"/dir
+ 
+ # Add entry to xattrop dir to trigger index heal.
+ xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
+ base_entry_b0=`ls $xattrop_dir0`
+ gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/))
+-ln -s $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
++ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
+ EXPECT "^1$" get_pending_heal_count $V0
+ 
+ # Remove the gfid xattr and the link file on two bricks.
+diff --git a/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t b/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t
+index 9627908..3da873a 100644
+--- a/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t
++++ b/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t
+@@ -59,8 +59,11 @@ TEST rm $B0/$V0"2"/.glusterfs/${gfid_str_file4:0:2}/${gfid_str_file4:2:2}/$gfid_
+ TEST setfattr -x trusted.gfid $B0/$V0"2"/dir/file4
+ 
+ # B0 and B2 blame each other
+-setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir
+-setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir
++# Set data part of the xattr also to 1 so that local->need_full_crawl is true.
++# Another way is to create the needed entries inside indices/entry-changes
++# folder.
++setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000000000001 $B0/$V0"2"/dir
++setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000000000001 $B0/$V0"0"/dir
+ 
+ # Add entry to xattrop dir on first brick.
+ xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
+diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
+index 1608f75..36fd3a9 100644
+--- a/xlators/cluster/afr/src/afr-self-heal-common.c
++++ b/xlators/cluster/afr/src/afr-self-heal-common.c
+@@ -2549,6 +2549,11 @@ afr_selfheal_do(call_frame_t *frame, xlator_t *this, uuid_t gfid)
+         }
+     }
+ 
++    gf_msg_debug(
++        this->name, 0,
++        "heals needed for %s: [entry-heal=%d, metadata-heal=%d, data-heal=%d]",
++        uuid_utoa(gfid), entry_selfheal, metadata_selfheal, data_selfheal);
++
+     if (data_selfheal && priv->data_self_heal)
+         data_ret = afr_selfheal_data(frame, this, fd);
+ 
+diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
+index cdff4a5..b97c66b 100644
+--- a/xlators/cluster/afr/src/afr-self-heal-data.c
++++ b/xlators/cluster/afr/src/afr-self-heal-data.c
+@@ -239,6 +239,9 @@ afr_selfheal_data_block(call_frame_t *frame, xlator_t *this, fd_t *fd,
+     sink_count = AFR_COUNT(healed_sinks, priv->child_count);
+     data_lock = alloca0(priv->child_count);
+ 
++    gf_msg_debug(this->name, 0, "gfid:%s, offset=%jd, size=%zu",
++                 uuid_utoa(fd->inode->gfid), offset, size);
++
+     ret = afr_selfheal_inodelk(frame, this, fd->inode, this->name, offset, size,
+                                data_lock);
+     {
+diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
+index 40be898..00b5b2d 100644
+--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
++++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
+@@ -206,8 +206,11 @@ __afr_selfheal_heal_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd,
+                                             replies);
+         } else {
+             if (!gf_uuid_compare(replies[i].poststat.ia_gfid,
+-                                 replies[source].poststat.ia_gfid))
++                                 replies[source].poststat.ia_gfid)) {
++                gf_msg_debug(this->name, 0, "skipping %s, no heal needed.",
++                             name);
+                 continue;
++            }
+ 
+             ret = afr_selfheal_recreate_entry(frame, i, source, sources,
+                                               fd->inode, name, inode, replies);
+@@ -839,7 +842,7 @@ afr_selfheal_entry_granular_dirent(xlator_t *subvol, gf_dirent_t *entry,
+ 
+ out:
+     loc_wipe(&loc);
+-    return 0;
++    return ret;
+ }
+ 
+ static int
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
+index a72c494..bd17a82 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
+@@ -13181,6 +13181,19 @@ glusterd_enable_default_options(glusterd_volinfo_t *volinfo, char *option)
+             goto out;
+         }
+     }
++
++    if ((conf->op_version >= GD_OP_VERSION_7_1) &&
++        (volinfo->status == GLUSTERD_STATUS_NONE)) {
++        ret = dict_set_dynstr_with_alloc(volinfo->dict,
++                                         "cluster.granular-entry-heal", "on");
++        if (ret) {
++            gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
++                   "Failed to set option 'cluster.granular-entry-heal' "
++                   "on volume %s",
++                   volinfo->volname);
++            goto out;
++        }
++    }
+ out:
+     return ret;
+ }
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0489-glusterd-fix-bug-in-enabling-granular-entry-heal.patch b/SOURCES/0489-glusterd-fix-bug-in-enabling-granular-entry-heal.patch
new file mode 100644
index 0000000..dde2156
--- /dev/null
+++ b/SOURCES/0489-glusterd-fix-bug-in-enabling-granular-entry-heal.patch
@@ -0,0 +1,141 @@
+From 2d172144810956225eac3599c943416c4a7e25d0 Mon Sep 17 00:00:00 2001
+From: Ravishankar N <ravishankar@redhat.com>
+Date: Tue, 8 Dec 2020 20:30:23 +0530
+Subject: [PATCH 489/511] glusterd: fix bug in enabling granular-entry-heal
+
+Upstream patch details:
+/------------------------------------------------------------------------------/
+commit f5e1eb87d4af44be3b317b7f99ab88f89c2f0b1a meant to enable  the
+volume option only for replica volumes but inadvertently enabled
+it for all volume types. Fixing it now.
+
+Also found a bug in glusterd where disabling the option on plain
+distribute was succeeding even though setting it in the fist place
+fails. Fixed that too.
+
+>Fixes: #1483
+>Change-Id: Icb6c169a8eec44cc4fb4dd636405d3b3485e91b4
+>Reported-by: Sheetal Pamecha <spamecha@redhat.com>
+>Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+Upstream Patch: https://github.com/gluster/glusterfs/pull/1752
+/------------------------------------------------------------------------------/
+
+BUG: 1890506
+Change-Id: Id63655dac08d2cfda4899d7ee0efe96e72cd6986
+Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/220556
+Tested-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/basic/afr/granular-esh/cli.t              | 30 ++++++++++++++++++++-----
+ xlators/mgmt/glusterd/src/glusterd-utils.c      |  3 ++-
+ xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 12 +++++-----
+ 3 files changed, 34 insertions(+), 11 deletions(-)
+
+diff --git a/tests/basic/afr/granular-esh/cli.t b/tests/basic/afr/granular-esh/cli.t
+index 995d93e..5ab2e39 100644
+--- a/tests/basic/afr/granular-esh/cli.t
++++ b/tests/basic/afr/granular-esh/cli.t
+@@ -11,25 +11,38 @@ TESTS_EXPECTED_IN_LOOP=4
+ TEST glusterd
+ TEST pidof glusterd
+ 
+-TEST   $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+-# Test that enabling the option should work on a newly created volume
+-TEST   $CLI volume set $V0 cluster.granular-entry-heal on
+-TEST   $CLI volume set $V0 cluster.granular-entry-heal off
+-
+ #########################
+ ##### DISPERSE TEST #####
+ #########################
+ # Execute the same command on a disperse volume and make sure it fails.
+ TEST $CLI volume create $V1 disperse 3 redundancy 1 $H0:$B0/${V1}{0,1,2}
++EXPECT "no" volume_get_field $V1 cluster.granular-entry-heal
++TEST $CLI volume start $V1
++TEST ! $CLI volume heal $V1 granular-entry-heal enable
++TEST ! $CLI volume heal $V1 granular-entry-heal disable
++
++TEST $CLI volume stop $V1
++TEST $CLI volume delete $V1
++
++#########################
++##### PLAIN DISTRIBUTE TEST #####
++#########################
++# Execute the same command on a distribute volume and make sure it fails.
++TEST $CLI volume create $V1 $H0:$B0/${V1}{0,1,2}
++EXPECT "no" volume_get_field $V1 cluster.granular-entry-heal
+ TEST $CLI volume start $V1
+ TEST ! $CLI volume heal $V1 granular-entry-heal enable
+ TEST ! $CLI volume heal $V1 granular-entry-heal disable
++TEST $CLI volume stop $V1
++TEST $CLI volume delete $V1
+ 
+ #######################
+ ###### TIER TEST ######
+ #######################
+ # Execute the same command on a disperse + replicate tiered volume and make
+ # sure the option is set on the replicate leg of the volume
++TEST $CLI volume create $V1 disperse 3 redundancy 1 $H0:$B0/${V1}{0,1,2}
++TEST $CLI volume start $V1
+ TEST $CLI volume tier $V1 attach replica 2 $H0:$B0/${V1}{3,4}
+ TEST $CLI volume heal $V1 granular-entry-heal enable
+ EXPECT "enable" volume_get_field $V1 cluster.granular-entry-heal
+@@ -52,10 +65,17 @@ TEST kill_brick $V1 $H0 $B0/${V1}3
+ # failed.
+ TEST ! $CLI volume heal $V1 granular-entry-heal enable
+ EXPECT "disable" volume_get_field $V1 cluster.granular-entry-heal
++TEST $CLI volume stop $V1
++TEST $CLI volume delete $V1
+ 
+ ######################
+ ### REPLICATE TEST ###
+ ######################
++TEST   $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
++EXPECT "on" volume_get_field $V0 cluster.granular-entry-heal
++# Test that enabling the option should work on a newly created volume
++TEST   $CLI volume set $V0 cluster.granular-entry-heal on
++TEST   $CLI volume set $V0 cluster.granular-entry-heal off
+ TEST   $CLI volume start $V0
+ TEST   $CLI volume set $V0 cluster.data-self-heal off
+ TEST   $CLI volume set $V0 cluster.metadata-self-heal off
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
+index bd17a82..ad3750e 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
+@@ -13183,7 +13183,8 @@ glusterd_enable_default_options(glusterd_volinfo_t *volinfo, char *option)
+     }
+ 
+     if ((conf->op_version >= GD_OP_VERSION_7_1) &&
+-        (volinfo->status == GLUSTERD_STATUS_NONE)) {
++        (volinfo->status == GLUSTERD_STATUS_NONE) &&
++        (volinfo->type == GF_CLUSTER_TYPE_REPLICATE)) {
+         ret = dict_set_dynstr_with_alloc(volinfo->dict,
+                                          "cluster.granular-entry-heal", "on");
+         if (ret) {
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
+index 134b04c..09e6ead 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
+@@ -621,11 +621,13 @@ glusterd_handle_heal_options_enable_disable(rpcsvc_request_t *req, dict_t *dict,
+         goto out;
+     }
+ 
+-    if (((heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE) ||
+-         (heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_DISABLE)) &&
+-        (volinfo->type == GF_CLUSTER_TYPE_DISPERSE)) {
+-        ret = -1;
+-        goto out;
++    if ((heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE) ||
++        (heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_DISABLE)) {
++        if ((volinfo->type != GF_CLUSTER_TYPE_REPLICATE) &&
++            (volinfo->type != GF_CLUSTER_TYPE_TIER)) {
++            ret = -1;
++            goto out;
++        }
+     }
+ 
+     if ((heal_op == GF_SHD_OP_HEAL_ENABLE) ||
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0490-Segmentation-fault-occurs-during-truncate.patch b/SOURCES/0490-Segmentation-fault-occurs-during-truncate.patch
new file mode 100644
index 0000000..bd3c777
--- /dev/null
+++ b/SOURCES/0490-Segmentation-fault-occurs-during-truncate.patch
@@ -0,0 +1,57 @@
+From 5a110946b41619577b365cdceddc4da551ff49f0 Mon Sep 17 00:00:00 2001
+From: kinsu <vpolakis@gmail.com>
+Date: Thu, 19 Sep 2019 08:34:32 +0000
+Subject: [PATCH 490/511] Segmentation fault occurs during truncate
+
+Problem:
+Segmentation fault occurs when bricks are nearly full 100% and in
+parallel truncate of a file is attempted (No space left on device).
+Prerequicite is that performance xlators are activated
+(read-ahead, write-behind etc)
+while stack unwind of the frames following an error responce
+from brick (No space left on device) frame->local includes a memory
+location that is not allocated via mem_get but via calloc.
+The destroyed frame is always ra_truncate_cbk winded from ra_ftruncate
+and the inode ptr is copied to the frame local in the wb_ftruncate.
+
+Fix:
+extra check is added for the pool ptr
+
+>Change-Id: Ic5d3bd0ab7011e40b2811c6dece063b256e4d9d1
+>Fixes: bz#1797882
+>Signed-off-by: kinsu <vpolakis@gmail.com>
+
+Upstream-patch: https://review.gluster.org/c/glusterfs/+/23445
+
+BUG: 1842449
+Change-Id: Ic5d3bd0ab7011e40b2811c6dece063b256e4d9d1
+Signed-off-by: nik-redhat <nladha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/220540
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/mem-pool.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c
+index 73503e0..1390747 100644
+--- a/libglusterfs/src/mem-pool.c
++++ b/libglusterfs/src/mem-pool.c
+@@ -857,6 +857,14 @@ mem_put(void *ptr)
+         /* Not one of ours; don't touch it. */
+         return;
+     }
++
++    if (!hdr->pool_list) {
++        gf_msg_callingfn("mem-pool", GF_LOG_CRITICAL, EINVAL,
++                         LG_MSG_INVALID_ARG,
++                         "invalid argument hdr->pool_list NULL");
++        return;
++    }
++
+     pool_list = hdr->pool_list;
+     pt_pool = &pool_list->pools[hdr->power_of_two - POOL_SMALLEST];
+ 
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0491-glusterd-mount-directory-getting-truncated-on-mounti.patch b/SOURCES/0491-glusterd-mount-directory-getting-truncated-on-mounti.patch
new file mode 100644
index 0000000..375cfd2
--- /dev/null
+++ b/SOURCES/0491-glusterd-mount-directory-getting-truncated-on-mounti.patch
@@ -0,0 +1,56 @@
+From 0fed8ca9c6c9e3a9041951bc748c7936d0abc8cf Mon Sep 17 00:00:00 2001
+From: nik-redhat <nladha@redhat.com>
+Date: Tue, 15 Sep 2020 16:20:19 +0530
+Subject: [PATCH 491/511] glusterd: mount directory getting truncated on
+ mounting shared_storage
+
+Issue:
+In case of a user created volume the mount point
+is the brick path 'ex: /data/brick' but in case of
+shared_storage the mount point is '/'.So, here
+we increment the array by one so as to get the exact
+path of brick without '/', which works fine for other
+volumes as the pointer of the brick_dir variable is
+at '/', but for shared_storage it is at 'v'(where v is
+starting letter of 'var' directory). So, on incrementing
+the path we get in case of shared_storage starts from
+'ar/lib/glusterd/...'
+
+Fix:
+Only, increment the pointer if the current position is '/',
+else the path will be wrong.
+
+>Fixes: #1480
+
+>Change-Id: Id31bb13f58134ae2099884fbc5984c4e055fb357
+>Signed-off-by: nik-redhat <nladha@redhat.com>
+
+Upstream patch: https://review.gluster.org/c/glusterfs/+/24989
+
+BUG: 1878077
+Change-Id: Id31bb13f58134ae2099884fbc5984c4e055fb357
+Signed-off-by: nik-redhat <nladha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/220536
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-utils.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
+index ad3750e..b343eee 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
+@@ -1221,7 +1221,8 @@ glusterd_get_brick_mount_dir(char *brickpath, char *hostname, char *mount_dir)
+         }
+ 
+         brick_dir = &brickpath[strlen(mnt_pt)];
+-        brick_dir++;
++        if (brick_dir[0] == '/')
++            brick_dir++;
+ 
+         snprintf(mount_dir, VALID_GLUSTERD_PATHMAX, "/%s", brick_dir);
+     }
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0492-afr-lookup-Pass-xattr_req-in-while-doing-a-selfheal-.patch b/SOURCES/0492-afr-lookup-Pass-xattr_req-in-while-doing-a-selfheal-.patch
new file mode 100644
index 0000000..a983baa
--- /dev/null
+++ b/SOURCES/0492-afr-lookup-Pass-xattr_req-in-while-doing-a-selfheal-.patch
@@ -0,0 +1,188 @@
+From bde1ad97f8739f8370a2bbb92229b1b397ecd82c Mon Sep 17 00:00:00 2001
+From: karthik-us <ksubrahm@redhat.com>
+Date: Tue, 8 Dec 2020 19:06:03 +0530
+Subject: [PATCH 492/511] afr/lookup: Pass xattr_req in while doing a selfheal
+ in lookup
+
+We were not passing xattr_req when doing a name self heal
+as well as a meta data heal. Because of this, some xdata
+was missing which causes i/o errors
+
+Upstream patch details:
+> Change-Id: Ibfb1205a7eb0195632dc3820116ffbbb8043545f
+> Fixes: bz#1728770
+> Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
+Upstream Patch : https://review.gluster.org/#/c/glusterfs/+/23024/
+
+BUG: 1726673
+Change-Id: Ibfb1205a7eb0195632dc3820116ffbbb8043545f
+Signed-off-by: karthik-us <ksubrahm@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/220538
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/bugs/replicate/bug-1728770-pass-xattrs.t | 52 ++++++++++++++++++++++++++
+ tests/include.rc                               |  1 +
+ xlators/cluster/afr/src/afr-common.c           |  8 +++-
+ xlators/cluster/afr/src/afr-self-heal-common.c |  9 ++++-
+ xlators/cluster/afr/src/afr-self-heal.h        |  2 +-
+ 5 files changed, 67 insertions(+), 5 deletions(-)
+ create mode 100644 tests/bugs/replicate/bug-1728770-pass-xattrs.t
+
+diff --git a/tests/bugs/replicate/bug-1728770-pass-xattrs.t b/tests/bugs/replicate/bug-1728770-pass-xattrs.t
+new file mode 100644
+index 0000000..159c4fc
+--- /dev/null
++++ b/tests/bugs/replicate/bug-1728770-pass-xattrs.t
+@@ -0,0 +1,52 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../snapshot.rc
++
++cleanup;
++
++function fop_on_bad_disk {
++    local path=$1
++    mkdir $path/dir{1..1000} 2>/dev/null
++    mv $path/dir1 $path/newdir
++    touch $path/foo.txt
++    echo $?
++}
++
++function ls_fop_on_bad_disk {
++    local path=$1
++    ls $path
++    echo $?
++}
++
++TEST init_n_bricks 6;
++TEST setup_lvm 6;
++
++TEST glusterd;
++TEST pidof glusterd;
++
++TEST $CLI volume create $V0 replica 3 $H0:$L1 $H0:$L2 $H0:$L3 $H0:$L4 $H0:$L5 $H0:$L6;
++TEST $CLI volume set $V0 health-check-interval 1000;
++
++TEST $CLI volume start $V0;
++
++TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0;
++#corrupt last disk
++dd if=/dev/urandom of=/dev/mapper/patchy_snap_vg_6-brick_lvm bs=512K count=200 status=progress && sync
++
++
++# Test the disk is now returning EIO for touch and ls
++EXPECT_WITHIN $DISK_FAIL_TIMEOUT "^1$" fop_on_bad_disk "$L6"
++EXPECT_WITHIN $DISK_FAIL_TIMEOUT "^2$" ls_fop_on_bad_disk "$L6"
++
++TEST touch $M0/foo{1..100}
++TEST $CLI volume remove-brick $V0 replica 3 $H0:$L4 $H0:$L5 $H0:$L6 start
++EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" remove_brick_status_completed_field "$V0" "$H0:$L4 $H0:$L5 $H0:$L6";
++
++#check that remove-brick status should not have any failed or skipped files
++var=`$CLI volume remove-brick $V0 $H0:$L4 $H0:$L5 $H0:$L6 status | grep completed`
++TEST [ `echo $var | awk '{print $5}'` = "0"  ]
++TEST [ `echo $var | awk '{print $6}'` = "0"  ]
++
++cleanup;
+diff --git a/tests/include.rc b/tests/include.rc
+index 762c5e2..c925941 100644
+--- a/tests/include.rc
++++ b/tests/include.rc
+@@ -89,6 +89,7 @@ GRAPH_SWITCH_TIMEOUT=10
+ UNLINK_TIMEOUT=5
+ MDC_TIMEOUT=5
+ IO_WAIT_TIMEOUT=5
++DISK_FAIL_TIMEOUT=80
+ 
+ LOGDIR=$(gluster --print-logdir)
+ 
+diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
+index 851ccad..fca2cd5 100644
+--- a/xlators/cluster/afr/src/afr-common.c
++++ b/xlators/cluster/afr/src/afr-common.c
+@@ -2609,6 +2609,10 @@ afr_lookup_sh_metadata_wrap(void *opaque)
+     dict = dict_new();
+     if (!dict)
+         goto out;
++    if (local->xattr_req) {
++        dict_copy(local->xattr_req, dict);
++    }
++
+     ret = dict_set_sizen_str_sizen(dict, "link-count", GF_XATTROP_INDEX_COUNT);
+     if (ret) {
+         gf_msg_debug(this->name, -ret, "Unable to set link-count in dict ");
+@@ -2617,7 +2621,7 @@ afr_lookup_sh_metadata_wrap(void *opaque)
+     if (loc_is_nameless(&local->loc)) {
+         ret = afr_selfheal_unlocked_discover_on(frame, local->inode,
+                                                 local->loc.gfid, local->replies,
+-                                                local->child_up);
++                                                local->child_up, dict);
+     } else {
+         inode = afr_selfheal_unlocked_lookup_on(frame, local->loc.parent,
+                                                 local->loc.name, local->replies,
+@@ -2791,7 +2795,7 @@ afr_lookup_selfheal_wrap(void *opaque)
+ 
+     inode = afr_selfheal_unlocked_lookup_on(frame, local->loc.parent,
+                                             local->loc.name, local->replies,
+-                                            local->child_up, NULL);
++                                            local->child_up, local->xattr_req);
+     if (inode)
+         inode_unref(inode);
+ 
+diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
+index 36fd3a9..9b6575f 100644
+--- a/xlators/cluster/afr/src/afr-self-heal-common.c
++++ b/xlators/cluster/afr/src/afr-self-heal-common.c
+@@ -1861,7 +1861,7 @@ afr_set_multi_dom_lock_count_request(xlator_t *this, dict_t *dict)
+ int
+ afr_selfheal_unlocked_discover_on(call_frame_t *frame, inode_t *inode,
+                                   uuid_t gfid, struct afr_reply *replies,
+-                                  unsigned char *discover_on)
++                                  unsigned char *discover_on, dict_t *dict)
+ {
+     loc_t loc = {
+         0,
+@@ -1876,6 +1876,8 @@ afr_selfheal_unlocked_discover_on(call_frame_t *frame, inode_t *inode,
+     xattr_req = dict_new();
+     if (!xattr_req)
+         return -ENOMEM;
++    if (dict)
++        dict_copy(dict, xattr_req);
+ 
+     if (afr_xattr_req_prepare(frame->this, xattr_req) != 0) {
+         dict_unref(xattr_req);
+@@ -1906,11 +1908,14 @@ afr_selfheal_unlocked_discover(call_frame_t *frame, inode_t *inode, uuid_t gfid,
+                                struct afr_reply *replies)
+ {
+     afr_local_t *local = NULL;
++    dict_t *dict = NULL;
+ 
+     local = frame->local;
++    if (local && local->xattr_req)
++        dict = local->xattr_req;
+ 
+     return afr_selfheal_unlocked_discover_on(frame, inode, gfid, replies,
+-                                             local->child_up);
++                                             local->child_up, dict);
+ }
+ 
+ unsigned int
+diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h
+index b39af02..8f6fb00 100644
+--- a/xlators/cluster/afr/src/afr-self-heal.h
++++ b/xlators/cluster/afr/src/afr-self-heal.h
+@@ -188,7 +188,7 @@ afr_selfheal_unlocked_discover(call_frame_t *frame, inode_t *inode, uuid_t gfid,
+ int
+ afr_selfheal_unlocked_discover_on(call_frame_t *frame, inode_t *inode,
+                                   uuid_t gfid, struct afr_reply *replies,
+-                                  unsigned char *discover_on);
++                                  unsigned char *discover_on, dict_t *dict);
+ inode_t *
+ afr_selfheal_unlocked_lookup_on(call_frame_t *frame, inode_t *parent,
+                                 const char *name, struct afr_reply *replies,
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0493-geo-rep-Note-section-is-required-for-ignore_deletes.patch b/SOURCES/0493-geo-rep-Note-section-is-required-for-ignore_deletes.patch
new file mode 100644
index 0000000..e712886
--- /dev/null
+++ b/SOURCES/0493-geo-rep-Note-section-is-required-for-ignore_deletes.patch
@@ -0,0 +1,283 @@
+From 03de45e5fb1c8aa5369848ed9e52abd1365e1d21 Mon Sep 17 00:00:00 2001
+From: Shwetha K Acharya <sacharya@redhat.com>
+Date: Wed, 31 Jul 2019 11:34:19 +0530
+Subject: [PATCH 493/511] geo-rep: Note section is required for ignore_deletes
+
+There exists a window of 15 sec, where the deletes are picked up
+by history crawl when the ignore_deletes is set to true.
+And it eventually deletes the file/s from slave which is/are not
+supposed to be deleted. Though it is working as per design, a
+note regarding this is needed.
+
+Added a warning message indicating the same.
+Also logged info when the worker restarts after ignore-deletes
+option set.
+
+>fixes: bz#1708603
+>Change-Id: I103be882fac18b4cef935efa355f5037a396f7c1
+>Signed-off-by: Shwetha K Acharya <sacharya@redhat.com>
+Upstream patch: https://review.gluster.org/c/glusterfs/+/22702
+
+BUG: 1224906
+Change-Id: I103be882fac18b4cef935efa355f5037a396f7c1
+Signed-off-by: srijan-sivakumar <ssivakum@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/220757
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ cli/src/cli-cmd-parser.c             | 45 ++++++++++++++++++++------
+ cli/src/cli-cmd-volume.c             | 20 ++++++++----
+ cli/src/cli.h                        |  3 +-
+ geo-replication/syncdaemon/gsyncd.py |  2 +-
+ geo-replication/syncdaemon/master.py |  6 ++++
+ tests/00-geo-rep/bug-1708603.t       | 63 ++++++++++++++++++++++++++++++++++++
+ 6 files changed, 120 insertions(+), 19 deletions(-)
+ create mode 100644 tests/00-geo-rep/bug-1708603.t
+
+diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c
+index 5fd05f4..34f17c9 100644
+--- a/cli/src/cli-cmd-parser.c
++++ b/cli/src/cli-cmd-parser.c
+@@ -2901,7 +2901,8 @@ out:
+ }
+ 
+ int32_t
+-cli_cmd_gsync_set_parse(const char **words, int wordcount, dict_t **options)
++cli_cmd_gsync_set_parse(struct cli_state *state, const char **words,
++                        int wordcount, dict_t **options, char **errstr)
+ {
+     int32_t ret = -1;
+     dict_t *dict = NULL;
+@@ -2918,6 +2919,8 @@ cli_cmd_gsync_set_parse(const char **words, int wordcount, dict_t **options)
+     char *save_ptr = NULL;
+     char *slave_temp = NULL;
+     char *token = NULL;
++    gf_answer_t answer = GF_ANSWER_NO;
++    const char *question = NULL;
+ 
+     GF_ASSERT(words);
+     GF_ASSERT(options);
+@@ -2990,8 +2993,10 @@ cli_cmd_gsync_set_parse(const char **words, int wordcount, dict_t **options)
+ 
+     if (masteri && gsyncd_url_check(words[masteri]))
+         goto out;
+-    if (slavei && !glob && !gsyncd_url_check(words[slavei]))
++    if (slavei && !glob && !gsyncd_url_check(words[slavei])) {
++        gf_asprintf(errstr, "Invalid slave url: %s", words[slavei]);
+         goto out;
++    }
+ 
+     w = str_getunamb(words[cmdi], opwords);
+     if (!w)
+@@ -3101,16 +3106,36 @@ cli_cmd_gsync_set_parse(const char **words, int wordcount, dict_t **options)
+     }
+     if (!ret)
+         ret = dict_set_int32(dict, "type", type);
+-    if (!ret && type == GF_GSYNC_OPTION_TYPE_CONFIG)
++    if (!ret && type == GF_GSYNC_OPTION_TYPE_CONFIG) {
++        if (!strcmp((char *)words[wordcount - 2], "ignore-deletes") &&
++            !strcmp((char *)words[wordcount - 1], "true")) {
++            question =
++                "There exists ~15 seconds delay for the option to take"
++                " effect from stime of the corresponding brick. Please"
++                " check the log for the time, the option is effective."
++                " Proceed";
++
++            answer = cli_cmd_get_confirmation(state, question);
++
++            if (GF_ANSWER_NO == answer) {
++                gf_log("cli", GF_LOG_INFO,
++                       "Operation "
++                       "cancelled, exiting");
++                *errstr = gf_strdup("Aborted by user.");
++                ret = -1;
++                goto out;
++            }
++        }
++
+         ret = config_parse(words, wordcount, dict, cmdi, glob);
++    }
+ 
+ out:
+     if (slave_temp)
+         GF_FREE(slave_temp);
+-    if (ret) {
+-        if (dict)
+-            dict_unref(dict);
+-    } else
++    if (ret && dict)
++        dict_unref(dict);
++    else
+         *options = dict;
+ 
+     return ret;
+@@ -5659,9 +5684,9 @@ cli_cmd_bitrot_parse(const char **words, int wordcount, dict_t **options)
+     int32_t ret = -1;
+     char *w = NULL;
+     char *volname = NULL;
+-    char *opwords[] = {
+-        "enable",       "disable", "scrub-throttle", "scrub-frequency", "scrub",
+-        "signing-time", "signer-threads", NULL};
++    char *opwords[] = {"enable",          "disable", "scrub-throttle",
++                       "scrub-frequency", "scrub",   "signing-time",
++                       "signer-threads",  NULL};
+     char *scrub_throt_values[] = {"lazy", "normal", "aggressive", NULL};
+     char *scrub_freq_values[] = {"hourly",  "daily",  "weekly", "biweekly",
+                                  "monthly", "minute", NULL};
+diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c
+index 72504ca..6f5bf8b 100644
+--- a/cli/src/cli-cmd-volume.c
++++ b/cli/src/cli-cmd-volume.c
+@@ -2457,6 +2457,7 @@ cli_cmd_volume_gsync_set_cbk(struct cli_state *state, struct cli_cmd_word *word,
+     rpc_clnt_procedure_t *proc = NULL;
+     call_frame_t *frame = NULL;
+     cli_local_t *local = NULL;
++    char *errstr = NULL;
+ #if (USE_EVENTS)
+     int ret1 = -1;
+     int cmd_type = -1;
+@@ -2468,16 +2469,21 @@ cli_cmd_volume_gsync_set_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ 
+     proc = &cli_rpc_prog->proctable[GLUSTER_CLI_GSYNC_SET];
+ 
+-    frame = create_frame(THIS, THIS->ctx->pool);
+-    if (frame == NULL) {
+-        ret = -1;
++    ret = cli_cmd_gsync_set_parse(state, words, wordcount, &options, &errstr);
++    if (ret) {
++        if (errstr) {
++            cli_err("%s", errstr);
++            GF_FREE(errstr);
++        } else {
++            cli_usage_out(word->pattern);
++        }
++        parse_err = 1;
+         goto out;
+     }
+ 
+-    ret = cli_cmd_gsync_set_parse(words, wordcount, &options);
+-    if (ret) {
+-        cli_usage_out(word->pattern);
+-        parse_err = 1;
++    frame = create_frame(THIS, THIS->ctx->pool);
++    if (frame == NULL) {
++        ret = -1;
+         goto out;
+     }
+ 
+diff --git a/cli/src/cli.h b/cli/src/cli.h
+index c30ae9c..7b4f446 100644
+--- a/cli/src/cli.h
++++ b/cli/src/cli.h
+@@ -269,7 +269,8 @@ int32_t
+ cli_cmd_volume_reset_parse(const char **words, int wordcount, dict_t **opt);
+ 
+ int32_t
+-cli_cmd_gsync_set_parse(const char **words, int wordcount, dict_t **opt);
++cli_cmd_gsync_set_parse(struct cli_state *state, const char **words,
++                        int wordcount, dict_t **opt, char **errstr);
+ 
+ int32_t
+ cli_cmd_quota_parse(const char **words, int wordcount, dict_t **opt);
+diff --git a/geo-replication/syncdaemon/gsyncd.py b/geo-replication/syncdaemon/gsyncd.py
+index 8940384..215c62d 100644
+--- a/geo-replication/syncdaemon/gsyncd.py
++++ b/geo-replication/syncdaemon/gsyncd.py
+@@ -315,7 +315,7 @@ def main():
+ 
+     # Log message for loaded config file
+     if config_file is not None:
+-        logging.info(lf("Using session config file", path=config_file))
++        logging.debug(lf("Using session config file", path=config_file))
+ 
+     set_term_handler()
+     excont = FreeObject(exval=0)
+diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py
+index 08e98f8..98637e7 100644
+--- a/geo-replication/syncdaemon/master.py
++++ b/geo-replication/syncdaemon/master.py
+@@ -1549,6 +1549,12 @@ class GMasterChangeloghistoryMixin(GMasterChangelogMixin):
+         data_stime = self.get_data_stime()
+ 
+         end_time = int(time.time())
++
++        #as start of historical crawl marks Geo-rep worker restart
++        if gconf.get("ignore-deletes"):
++            logging.info(lf('ignore-deletes config option is set',
++                         stime=data_stime))
++
+         logging.info(lf('starting history crawl',
+                         turns=self.history_turns,
+                         stime=data_stime,
+diff --git a/tests/00-geo-rep/bug-1708603.t b/tests/00-geo-rep/bug-1708603.t
+new file mode 100644
+index 0000000..26913f1
+--- /dev/null
++++ b/tests/00-geo-rep/bug-1708603.t
+@@ -0,0 +1,63 @@
++#!/bin/bash
++
++. $(dirname $0)/../include.rc
++. $(dirname $0)/../volume.rc
++. $(dirname $0)/../geo-rep.rc
++. $(dirname $0)/../env.rc
++
++SCRIPT_TIMEOUT=300
++
++##Cleanup and start glusterd
++cleanup;
++TEST glusterd;
++TEST pidof glusterd
++
++
++##Variables
++GEOREP_CLI="gluster volume geo-replication"
++master=$GMV0
++SH0="127.0.0.1"
++slave=${SH0}::${GSV0}
++num_active=2
++num_passive=2
++master_mnt=$M0
++slave_mnt=$M1
++
++############################################################
++#SETUP VOLUMES AND GEO-REPLICATION
++############################################################
++
++##create_and_start_master_volume
++TEST $CLI volume create $GMV0 replica 2 $H0:$B0/${GMV0}{1,2,3,4};
++TEST $CLI volume start $GMV0
++
++##create_and_start_slave_volume
++TEST $CLI volume create $GSV0 replica 2 $H0:$B0/${GSV0}{1,2,3,4};
++TEST $CLI volume start $GSV0
++
++##Mount master
++TEST glusterfs -s $H0 --volfile-id $GMV0 $M0
++
++##Mount slave
++TEST glusterfs -s $H0 --volfile-id $GSV0 $M1
++
++#Create geo-rep session
++TEST create_georep_session $master $slave
++
++echo n | $GEOREP_CLI $master $slave config ignore-deletes true >/dev/null 2>&1
++EXPECT "false" echo $($GEOREP_CLI $master $slave config ignore-deletes)
++echo y | $GEOREP_CLI $master $slave config ignore-deletes true
++EXPECT "true" echo $($GEOREP_CLI $master $slave config ignore-deletes)
++
++#Stop Geo-rep
++TEST $GEOREP_CLI $master $slave stop
++
++#Delete Geo-rep
++TEST $GEOREP_CLI $master $slave delete
++
++#Cleanup authorized keys
++sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' ~/.ssh/authorized_keys
++sed -i '/^command=.*gsyncd.*/d' ~/.ssh/authorized_keys
++
++cleanup;
++#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0494-glusterd-start-the-brick-on-a-different-port.patch b/SOURCES/0494-glusterd-start-the-brick-on-a-different-port.patch
new file mode 100644
index 0000000..d11b138
--- /dev/null
+++ b/SOURCES/0494-glusterd-start-the-brick-on-a-different-port.patch
@@ -0,0 +1,54 @@
+From 1b24bc4319203128a9ff7f97fe14f4b3622c4eec Mon Sep 17 00:00:00 2001
+From: Sanju Rakonde <srakonde@redhat.com>
+Date: Wed, 26 Aug 2020 20:05:35 +0530
+Subject: [PATCH 494/511] glusterd: start the brick on a different port
+
+Problem: brick fails to start when the port provided by
+glusterd is in use by any other process
+
+Solution: glusterd should check errno set by runner_run()
+and if it is set to EADDRINUSE, it should allocate a new
+port to the brick and try to start it again.
+
+Previously ret value is checked instead of errno, so the
+retry part never executed. Now, we initialize errno to 0
+before calling runner framework. and afterwards store the
+errno into ret to avoid modification of errno in subsequent
+function calls.
+
+>fixes: #1101
+
+>Change-Id: I1aa048a77c5f8b035dece36976d60602d9753b1a
+>Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
+>Signed-off-by: nik-redhat <nladha@redhat.com>
+
+Upstream patch: https://review.gluster.org/c/glusterfs/+/24923/
+
+BUG: 1865796
+Change-Id: I1aa048a77c5f8b035dece36976d60602d9753b1a
+Signed-off-by: nik-redhat <nladha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/220541
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-utils.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
+index b343eee..f7030fb 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
+@@ -2289,7 +2289,10 @@ retry:
+ 
+     if (wait) {
+         synclock_unlock(&priv->big_lock);
++        errno = 0;
+         ret = runner_run(&runner);
++        if (errno != 0)
++            ret = errno;
+         synclock_lock(&priv->big_lock);
+ 
+         if (ret == EADDRINUSE) {
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0495-geo-rep-descriptive-message-when-worker-crashes-due-.patch b/SOURCES/0495-geo-rep-descriptive-message-when-worker-crashes-due-.patch
new file mode 100644
index 0000000..6b3f6f5
--- /dev/null
+++ b/SOURCES/0495-geo-rep-descriptive-message-when-worker-crashes-due-.patch
@@ -0,0 +1,60 @@
+From 17a2a880290d2038c913c23985df620e3c9741b3 Mon Sep 17 00:00:00 2001
+From: Sunny Kumar <sunkumar@redhat.com>
+Date: Mon, 16 Mar 2020 15:17:23 +0000
+Subject: [PATCH 495/511] geo-rep: descriptive message when worker crashes due
+ to EIO
+
+With this patch now you can notice log if it is due to EIO:
+
+[2020-03-16 16:24:48.293837] E [syncdutils(worker /bricks/brick1/mbr3):348:log_raise_exception] <top>: Getting "Input/Output error" is most likely due to a. Brick is down or b. Split brain issue.
+[2020-03-16 16:24:48.293915] E [syncdutils(worker /bricks/brick1/mbr3):352:log_raise_exception] <top>: This is expected as per design to keep the consistency of the file system. Once the above issue is resolved geo-rep would automatically proceed further.
+
+>Change-Id: Ie33f2440bc96089731ce12afa8dab91d9550a7ca
+>Fixes: #1104
+>Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
+>Upstream Patch : https://review.gluster.org/c/glusterfs/+/24228/
+
+BUG: 1412494
+Change-Id: Ie33f2440bc96089731ce12afa8dab91d9550a7ca
+Signed-off-by: srijan-sivakumar <ssivakum@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/220874
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ geo-replication/syncdaemon/syncdutils.py | 13 ++++++++++++-
+ 1 file changed, 12 insertions(+), 1 deletion(-)
+
+diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py
+index f43e13b..d5a94d4 100644
+--- a/geo-replication/syncdaemon/syncdutils.py
++++ b/geo-replication/syncdaemon/syncdutils.py
+@@ -22,7 +22,7 @@ import socket
+ from subprocess import PIPE
+ from threading import Lock, Thread as baseThread
+ from errno import EACCES, EAGAIN, EPIPE, ENOTCONN, ENOMEM, ECONNABORTED
+-from errno import EINTR, ENOENT, ESTALE, EBUSY, ENODATA, errorcode
++from errno import EINTR, ENOENT, ESTALE, EBUSY, ENODATA, errorcode, EIO
+ from signal import signal, SIGTERM
+ import select as oselect
+ from os import waitpid as owaitpid
+@@ -346,6 +346,17 @@ def log_raise_exception(excont):
+                                                         ECONNABORTED):
+             logging.error(lf('Gluster Mount process exited',
+                              error=errorcode[exc.errno]))
++        elif isinstance(exc, OSError) and exc.errno == EIO:
++            logging.error("Getting \"Input/Output error\" "
++                          "is most likely due to "
++                          "a. Brick is down or "
++                          "b. Split brain issue.")
++            logging.error("This is expected as per design to "
++                          "keep the consistency of the file system. "
++                          "Once the above issue is resolved "
++                          "geo-replication would automatically "
++                          "proceed further.")
++            logtag = "FAIL"
+         else:
+             logtag = "FAIL"
+         if not logtag and logging.getLogger().isEnabledFor(logging.DEBUG):
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0496-posix-Use-MALLOC-instead-of-alloca-to-allocate-memor.patch b/SOURCES/0496-posix-Use-MALLOC-instead-of-alloca-to-allocate-memor.patch
new file mode 100644
index 0000000..590aea3
--- /dev/null
+++ b/SOURCES/0496-posix-Use-MALLOC-instead-of-alloca-to-allocate-memor.patch
@@ -0,0 +1,139 @@
+From 5893e64ca8c147b7acfa12cd9824f254d53ee261 Mon Sep 17 00:00:00 2001
+From: mohit84 <moagrawa@redhat.com>
+Date: Wed, 4 Nov 2020 09:02:03 +0530
+Subject: [PATCH 496/511] posix: Use MALLOC instead of alloca to allocate
+ memory for xattrs list (#1730)
+
+In case of file is having huge xattrs on backend a brick process is
+crashed while alloca(size) limit has been crossed 256k because iot_worker
+stack size is 256k.
+
+> Fixes: #1699
+> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+> Change-Id: I100468234f83329a7d65b43cbe4e10450c1ccecd
+> (Cherry pick from commit fd666caa35ac84dd1cba55399761982011b77112)
+> (Reviewed on upstream link https://github.com/gluster/glusterfs/pull/1828)
+
+Change-Id: I100468234f83329a7d65b43cbe4e10450c1ccecd
+Bug: 1903468
+Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/220872
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/storage/posix/src/posix-gfid-path.c    |  5 ++++-
+ xlators/storage/posix/src/posix-helpers.c      |  3 ++-
+ xlators/storage/posix/src/posix-inode-fd-ops.c | 12 +++++++++---
+ 3 files changed, 15 insertions(+), 5 deletions(-)
+
+diff --git a/xlators/storage/posix/src/posix-gfid-path.c b/xlators/storage/posix/src/posix-gfid-path.c
+index 64b5c6c..01315ac 100644
+--- a/xlators/storage/posix/src/posix-gfid-path.c
++++ b/xlators/storage/posix/src/posix-gfid-path.c
+@@ -195,7 +195,8 @@ posix_get_gfid2path(xlator_t *this, inode_t *inode, const char *real_path,
+             if (size == 0)
+                 goto done;
+         }
+-        list = alloca(size);
++
++        list = GF_MALLOC(size, gf_posix_mt_char);
+         if (!list) {
+             *op_errno = errno;
+             goto err;
+@@ -309,6 +310,7 @@ done:
+             GF_FREE(paths[j]);
+     }
+     ret = 0;
++    GF_FREE(list);
+     return ret;
+ err:
+     if (path)
+@@ -317,5 +319,6 @@ err:
+         if (paths[j])
+             GF_FREE(paths[j]);
+     }
++    GF_FREE(list);
+     return ret;
+ }
+diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
+index 73a44be..ceac52a 100644
+--- a/xlators/storage/posix/src/posix-helpers.c
++++ b/xlators/storage/posix/src/posix-helpers.c
+@@ -349,7 +349,7 @@ _posix_get_marker_all_contributions(posix_xattr_filler_t *filler)
+         goto out;
+     }
+ 
+-    list = alloca(size);
++    list = GF_MALLOC(size, gf_posix_mt_char);
+     if (!list) {
+         goto out;
+     }
+@@ -379,6 +379,7 @@ _posix_get_marker_all_contributions(posix_xattr_filler_t *filler)
+     ret = 0;
+ 
+ out:
++    GF_FREE(list);
+     return ret;
+ }
+ 
+diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c
+index 21119ea..1d37aed 100644
+--- a/xlators/storage/posix/src/posix-inode-fd-ops.c
++++ b/xlators/storage/posix/src/posix-inode-fd-ops.c
+@@ -3305,7 +3305,7 @@ posix_get_ancestry_non_directory(xlator_t *this, inode_t *leaf_inode,
+         goto out;
+     }
+ 
+-    list = alloca(size);
++    list = GF_MALLOC(size, gf_posix_mt_char);
+     if (!list) {
+         *op_errno = errno;
+         goto out;
+@@ -3385,6 +3385,7 @@ posix_get_ancestry_non_directory(xlator_t *this, inode_t *leaf_inode,
+     op_ret = 0;
+ 
+ out:
++    GF_FREE(list);
+     return op_ret;
+ }
+ 
+@@ -3810,7 +3811,8 @@ posix_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+         if (size == 0)
+             goto done;
+     }
+-    list = alloca(size);
++
++    list = GF_MALLOC(size, gf_posix_mt_char);
+     if (!list) {
+         op_errno = errno;
+         goto out;
+@@ -3937,6 +3939,7 @@ out:
+         dict_unref(dict);
+     }
+ 
++    GF_FREE(list);
+     return 0;
+ }
+ 
+@@ -4136,7 +4139,8 @@ posix_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
+         if (size == 0)
+             goto done;
+     }
+-    list = alloca(size + 1);
++
++    list = GF_MALLOC(size, gf_posix_mt_char);
+     if (!list) {
+         op_ret = -1;
+         op_errno = ENOMEM;
+@@ -4240,6 +4244,8 @@ out:
+     if (dict)
+         dict_unref(dict);
+ 
++    GF_FREE(list);
++
+     return 0;
+ }
+ 
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0497-socket-Use-AES128-cipher-in-SSL-if-AES-is-supported-.patch b/SOURCES/0497-socket-Use-AES128-cipher-in-SSL-if-AES-is-supported-.patch
new file mode 100644
index 0000000..9d477ae
--- /dev/null
+++ b/SOURCES/0497-socket-Use-AES128-cipher-in-SSL-if-AES-is-supported-.patch
@@ -0,0 +1,80 @@
+From 85a5cce40dba0393e636c0eb5af9d8f8746f2315 Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <moagrawal@redhat.com>
+Date: Thu, 2 Jan 2020 10:23:52 +0530
+Subject: [PATCH 497/511] socket: Use AES128 cipher in SSL if AES is supported
+ by CPU
+
+SSL performance is improved after configuring AES128 cipher
+so use AES128 cipher as a default cipher on the CPU those
+enabled AES bits otherwise ssl use AES256 cipher
+
+> Change-Id: I91c50fe987cbb22ed76f8012094730c592c63506
+> Fixes: #1050
+> Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
+> (Cherry pick from commit 177cc09d24515596eb51739ce0a276c26e3c52f1)
+> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23952/)
+
+Change-Id: I91c50fe987cbb22ed76f8012094730c592c63506
+Bug: 1612973
+Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/220870
+Tested-by: Mohit Agrawal <moagrawa@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ rpc/rpc-transport/socket/src/socket.c | 32 ++++++++++++++++++++++++++++++++
+ 1 file changed, 32 insertions(+)
+
+diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c
+index 54cd5df..1ee7320 100644
+--- a/rpc/rpc-transport/socket/src/socket.c
++++ b/rpc/rpc-transport/socket/src/socket.c
+@@ -4238,6 +4238,34 @@ static void __attribute__((destructor)) fini_openssl_mt(void)
+     ERR_free_strings();
+ }
+ 
++/* The function returns 0 if AES bit is enabled on the CPU */
++static int
++ssl_check_aes_bit(void)
++{
++    FILE *fp = fopen("/proc/cpuinfo", "r");
++    int ret = 1;
++    size_t len = 0;
++    char *line = NULL;
++    char *match = NULL;
++
++    GF_ASSERT(fp != NULL);
++
++    while (getline(&line, &len, fp) > 0) {
++        if (!strncmp(line, "flags", 5)) {
++            match = strstr(line, " aes");
++            if ((match != NULL) && ((match[4] == ' ') || (match[4] == 0))) {
++                ret = 0;
++                break;
++            }
++        }
++    }
++
++    free(line);
++    fclose(fp);
++
++    return ret;
++}
++
+ static int
+ ssl_setup_connection_params(rpc_transport_t *this)
+ {
+@@ -4261,6 +4289,10 @@ ssl_setup_connection_params(rpc_transport_t *this)
+         return 0;
+     }
+ 
++    if (!ssl_check_aes_bit()) {
++        cipher_list = "AES128:" DEFAULT_CIPHER_LIST;
++    }
++
+     priv->ssl_own_cert = DEFAULT_CERT_PATH;
+     if (dict_get_str(this->options, SSL_OWN_CERT_OPT, &optstr) == 0) {
+         if (!priv->ssl_enabled) {
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0498-geo-rep-Fix-corner-case-in-rename-on-mkdir-during-hy.patch b/SOURCES/0498-geo-rep-Fix-corner-case-in-rename-on-mkdir-during-hy.patch
new file mode 100644
index 0000000..078c390
--- /dev/null
+++ b/SOURCES/0498-geo-rep-Fix-corner-case-in-rename-on-mkdir-during-hy.patch
@@ -0,0 +1,69 @@
+From 11d648660b8bd246756f87b2f40c72fbabf084d1 Mon Sep 17 00:00:00 2001
+From: Sunny Kumar <sunkumar@redhat.com>
+Date: Tue, 19 May 2020 16:13:01 +0100
+Subject: [PATCH 498/511] geo-rep: Fix corner case in rename on mkdir during
+ hybrid crawl
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Problem:
+The issue is being hit during hybrid mode while handling rename on slave.
+In this special case the rename is recorded as mkdir and geo-rep process it
+by resolving the path form backend.
+
+While resolving the backend path during this special handling one corner case is not considered.
+
+<snip>
+Traceback (most recent call last):
+  File "/usr/libexec/glusterfs/python/syncdaemon/repce.py", line 118, in worker
+    res = getattr(self.obj, rmeth)(*in_data[2:])
+  File "/usr/libexec/glusterfs/python/syncdaemon/resource.py", line 588, in entry_ops
+    src_entry = get_slv_dir_path(slv_host, slv_volume, gfid)
+  File "/usr/libexec/glusterfs/python/syncdaemon/syncdutils.py", line 710, in get_slv_dir_path
+    dir_entry = os.path.join(pfx, pargfid, basename)
+  File "/usr/lib64/python2.7/posixpath.py", line 75, in join
+    if b.startswith('/'):
+AttributeError: 'int' object has no attribute 'startswith'
+
+In pyhthon3:
+Traceback (most recent call last):
+  File "<stdin>", line 1, in <module>
+  File "/usr/lib64/python3.8/posixpath.py", line 90, in join
+    genericpath._check_arg_types('join', a, *p)
+  File "/usr/lib64/python3.8/genericpath.py", line 152, in _check_arg_types
+    raise TypeError(f'{funcname}() argument must be str, bytes, or '
+TypeError: join() argument must be str, bytes, or os.PathLike object, not 'int'
+</snip>
+
+>Change-Id: I8b926899c60ad8c4ffc886d57028ba70fd21e332
+>Fixes: #1250
+>Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
+Upstream Patch: https://review.gluster.org/c/glusterfs/+/24468/
+
+BUG: 1835229
+Change-Id: I8b926899c60ad8c4ffc886d57028ba70fd21e332
+Signed-off-by: nik-redhat <nladha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/220867
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ geo-replication/syncdaemon/syncdutils.py | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py
+index d5a94d4..26c79d0 100644
+--- a/geo-replication/syncdaemon/syncdutils.py
++++ b/geo-replication/syncdaemon/syncdutils.py
+@@ -732,6 +732,8 @@ def get_slv_dir_path(slv_host, slv_volume, gfid):
+                     else:
+                         dirpath = dirpath.strip("/")
+                         pargfid = get_gfid_from_mnt(dirpath)
++                        if isinstance(pargfid, int):
++                            return None
+                     dir_entry = os.path.join(pfx, pargfid, basename)
+                     return dir_entry
+ 
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0499-gfapi-give-appropriate-error-when-size-exceeds.patch b/SOURCES/0499-gfapi-give-appropriate-error-when-size-exceeds.patch
new file mode 100644
index 0000000..edeca1a
--- /dev/null
+++ b/SOURCES/0499-gfapi-give-appropriate-error-when-size-exceeds.patch
@@ -0,0 +1,63 @@
+From f78a5d86c55149d80b6efdf60eae7221c238654e Mon Sep 17 00:00:00 2001
+From: Rinku Kothiya <rkothiya@redhat.com>
+Date: Thu, 24 Sep 2020 12:43:51 +0000
+Subject: [PATCH 499/511] gfapi: give appropriate error when size exceeds
+
+This patch help generate appropriate error message
+when the gfapi tries to write data equal to or
+greater than 1 Gb due to the limitation at the
+socket layer.
+
+Upstream:
+> Reviewed-on: https://github.com/gluster/glusterfs/pull/1557
+> fixes: #1518
+> Change-Id: I1234a0b5a6e675a0b20c6b1afe0f4390fd721f6f
+> Signed-off-by: Rinku Kothiya <rkothiya@redhat.com>
+
+BUG: 1691320
+Change-Id: I1234a0b5a6e675a0b20c6b1afe0f4390fd721f6f
+Signed-off-by: Rinku Kothiya <rkothiya@redhat.com>
+Signed-off-by: Sunil Kumar Acharya <sheggodu@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/219998
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+---
+ api/src/gfapi-messages.h | 4 +++-
+ api/src/glfs-fops.c      | 8 ++++++++
+ 2 files changed, 11 insertions(+), 1 deletion(-)
+
+diff --git a/api/src/gfapi-messages.h b/api/src/gfapi-messages.h
+index 68d1242..2ffd5ac 100644
+--- a/api/src/gfapi-messages.h
++++ b/api/src/gfapi-messages.h
+@@ -49,6 +49,8 @@ GLFS_MSGID(API, API_MSG_MEM_ACCT_INIT_FAILED, API_MSG_MASTER_XLATOR_INIT_FAILED,
+            API_MSG_INODE_LINK_FAILED, API_MSG_STATEDUMP_FAILED,
+            API_MSG_XREADDIRP_R_FAILED, API_MSG_LOCK_INSERT_MERGE_FAILED,
+            API_MSG_SETTING_LOCK_TYPE_FAILED, API_MSG_INODE_FIND_FAILED,
+-           API_MSG_FDCTX_SET_FAILED, API_MSG_UPCALL_SYNCOP_FAILED);
++           API_MSG_FDCTX_SET_FAILED, API_MSG_UPCALL_SYNCOP_FAILED,
++           API_MSG_INVALID_ARG);
+ 
++#define API_MSG_INVALID_ARG_STR "Invalid"
+ #endif /* !_GFAPI_MESSAGES_H__ */
+diff --git a/api/src/glfs-fops.c b/api/src/glfs-fops.c
+index e6adea5..051541f 100644
+--- a/api/src/glfs-fops.c
++++ b/api/src/glfs-fops.c
+@@ -1525,6 +1525,14 @@ glfs_pwritev_common(struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt,
+ 
+     GF_REF_GET(glfd);
+ 
++    if (iovec->iov_len >= GF_UNIT_GB) {
++        ret = -1;
++        errno = EINVAL;
++        gf_smsg(THIS->name, GF_LOG_ERROR, errno, API_MSG_INVALID_ARG,
++                "size >= %llu is not allowed", GF_UNIT_GB, NULL);
++        goto out;
++    }
++
+     subvol = glfs_active_subvol(glfd->fs);
+     if (!subvol) {
+         ret = -1;
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0500-features-shard-Convert-shard-block-indices-to-uint64.patch b/SOURCES/0500-features-shard-Convert-shard-block-indices-to-uint64.patch
new file mode 100644
index 0000000..4898422
--- /dev/null
+++ b/SOURCES/0500-features-shard-Convert-shard-block-indices-to-uint64.patch
@@ -0,0 +1,104 @@
+From 60789c658ea22063c26168cb4ce15ac5fd279e58 Mon Sep 17 00:00:00 2001
+From: Ravishankar N <ravishankar@redhat.com>
+Date: Mon, 14 Dec 2020 10:57:03 +0530
+Subject: [PATCH 500/511] features/shard: Convert shard block indices to uint64
+
+This patch fixes a crash in FOPs that operate on really large sharded
+files where number of participant shards could sometimes exceed
+signed int32 max.
+
+The patch also adds GF_ASSERTs to ensure that number of participating
+shards is always greater than 0 for files that do have more than one
+shard.
+
+Upstream:
+> https://review.gluster.org/#/c/glusterfs/+/23407/
+> Change-Id: I354de58796f350eb1aa42fcdf8092ca2e69ccbb6
+> Fixes: #1348
+> Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
+
+BUG: 1752739
+Change-Id: I354de58796f350eb1aa42fcdf8092ca2e69ccbb6
+Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/221061
+Tested-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
+---
+ xlators/features/shard/src/shard.c | 14 ++++++++------
+ xlators/features/shard/src/shard.h |  6 +++---
+ 2 files changed, 11 insertions(+), 9 deletions(-)
+
+diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
+index 16d557b..a967f35 100644
+--- a/xlators/features/shard/src/shard.c
++++ b/xlators/features/shard/src/shard.c
+@@ -1855,10 +1855,9 @@ int shard_truncate_last_shard(call_frame_t *frame, xlator_t *this,
+    */
+   if (!inode) {
+     gf_msg_debug(this->name, 0,
+-                 "Last shard to be truncated absent"
+-                 " in backend: %s. Directly proceeding to update "
+-                 "file size",
+-                 uuid_utoa(inode->gfid));
++                 "Last shard to be truncated absent in backend: " PRIu64
++                 " of gfid: %s. Directly proceeding to update file size",
++                 local->first_block, uuid_utoa(local->loc.inode->gfid));
+     shard_update_file_size(frame, this, NULL, &local->loc,
+                            shard_post_update_size_truncate_handler);
+     return 0;
+@@ -2389,6 +2388,7 @@ int shard_truncate_begin(call_frame_t *frame, xlator_t *this) {
+       get_highest_block(0, local->prebuf.ia_size, local->block_size);
+ 
+   local->num_blocks = local->last_block - local->first_block + 1;
++  GF_ASSERT(local->num_blocks > 0);
+   local->resolver_base_inode =
+       (local->fop == GF_FOP_TRUNCATE) ? local->loc.inode : local->fd->inode;
+ 
+@@ -4809,6 +4809,7 @@ int shard_post_lookup_readv_handler(call_frame_t *frame, xlator_t *this) {
+       get_highest_block(local->offset, local->total_size, local->block_size);
+ 
+   local->num_blocks = local->last_block - local->first_block + 1;
++  GF_ASSERT(local->num_blocks > 0);
+   local->resolver_base_inode = local->loc.inode;
+ 
+   local->inode_list =
+@@ -5266,6 +5267,7 @@ int shard_common_inode_write_post_lookup_handler(call_frame_t *frame,
+   local->last_block =
+       get_highest_block(local->offset, local->total_size, local->block_size);
+   local->num_blocks = local->last_block - local->first_block + 1;
++  GF_ASSERT(local->num_blocks > 0);
+   local->inode_list =
+       GF_CALLOC(local->num_blocks, sizeof(inode_t *), gf_shard_mt_inode_list);
+   if (!local->inode_list) {
+@@ -5274,8 +5276,8 @@ int shard_common_inode_write_post_lookup_handler(call_frame_t *frame,
+   }
+ 
+   gf_msg_trace(
+-      this->name, 0, "%s: gfid=%s first_block=%" PRIu32 " "
+-                     "last_block=%" PRIu32 " num_blocks=%" PRIu32
++      this->name, 0, "%s: gfid=%s first_block=%" PRIu64 " "
++                     "last_block=%" PRIu64 " num_blocks=%" PRIu64
+                      " offset=%" PRId64 " total_size=%zu flags=%" PRId32 "",
+       gf_fop_list[local->fop], uuid_utoa(local->resolver_base_inode->gfid),
+       local->first_block, local->last_block, local->num_blocks, local->offset,
+diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h
+index 1721417..4fe181b 100644
+--- a/xlators/features/shard/src/shard.h
++++ b/xlators/features/shard/src/shard.h
+@@ -254,9 +254,9 @@ typedef int32_t (*shard_post_update_size_fop_handler_t)(call_frame_t *frame,
+ typedef struct shard_local {
+     int op_ret;
+     int op_errno;
+-    int first_block;
+-    int last_block;
+-    int num_blocks;
++    uint64_t first_block;
++    uint64_t last_block;
++    uint64_t num_blocks;
+     int call_count;
+     int eexist_count;
+     int create_count;
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0501-Cli-Removing-old-syntax-of-tier-cmds-from-help-menu.patch b/SOURCES/0501-Cli-Removing-old-syntax-of-tier-cmds-from-help-menu.patch
new file mode 100644
index 0000000..5152df8
--- /dev/null
+++ b/SOURCES/0501-Cli-Removing-old-syntax-of-tier-cmds-from-help-menu.patch
@@ -0,0 +1,48 @@
+From 070698ede9c3765c95364e8207c8311dbf895499 Mon Sep 17 00:00:00 2001
+From: kiyer <kiyer@redhat.com>
+Date: Tue, 8 Dec 2020 15:18:49 +0530
+Subject: [PATCH 501/511] Cli: Removing old syntax of tier cmds from help menu
+
+Remove old syntax of attach-tier and detach-tier
+commands from help menu.
+
+Label: DOWNSTREAM ONLY
+BUG: 1813866
+
+Change-Id: If86e4828b475fb593a5105ca8deac96374f9542d
+Signed-off-by: kiyer <kiyer@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/220510
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ cli/src/cli-cmd-volume.c | 13 -------------
+ 1 file changed, 13 deletions(-)
+
+diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c
+index 6f5bf8b..b6bef80 100644
+--- a/cli/src/cli-cmd-volume.c
++++ b/cli/src/cli-cmd-volume.c
+@@ -3331,19 +3331,6 @@ struct cli_cmd tier_cmds[] = {
+     {"volume tier <VOLNAME> detach <start|stop|status|commit|[force]>",
+      cli_cmd_volume_tier_cbk, "Detach the hot tier from <VOLNAME>"},
+ 
+-    {"volume attach-tier <VOLNAME> [<replica COUNT>] <NEW-BRICK>...",
+-     cli_cmd_volume_tier_cbk,
+-     "NOTE: this is old syntax, will be deprecated in next release. "
+-     "Please use gluster volume tier <vol> attach "
+-     "[<replica COUNT>] <NEW-BRICK>..."},
+-
+-    {"volume detach-tier <VOLNAME> "
+-     "<start|stop|status|commit|force>",
+-     cli_cmd_volume_tier_cbk,
+-     "NOTE: this is old syntax, will be deprecated in next release. "
+-     "Please use gluster volume tier <vol> detach "
+-     "{start|stop|commit} [force]"},
+-
+     {"volume tier <VOLNAME> status\n"
+      "volume tier <VOLNAME> start [force]\n"
+      "volume tier <VOLNAME> stop\n"
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0502-dht-fixing-a-permission-update-issue.patch b/SOURCES/0502-dht-fixing-a-permission-update-issue.patch
new file mode 100644
index 0000000..7c136d0
--- /dev/null
+++ b/SOURCES/0502-dht-fixing-a-permission-update-issue.patch
@@ -0,0 +1,225 @@
+From 3f1eee125a35c33ecb078e5d3bfd80d80e63881d Mon Sep 17 00:00:00 2001
+From: Barak Sason Rofman <bsasonro@redhat.com>
+Date: Wed, 15 Jan 2020 12:02:05 +0200
+Subject: [PATCH 502/511] dht - fixing a permission update issue
+
+When bringing back a downed brick and performing lookup from the client
+side, the permission on said brick aren't updated on the first lookup,
+but only on the second.
+
+This patch modifies permission update logic so the first lookup will
+trigger a permission update on the downed brick.
+
+LIMITATIONS OF THE PATCH:
+As the choice of source depends on whether the directory has layout or not.
+Even the directories on the newly added brick will have layout xattr[zeroed], but the same is not true for a root directory.
+Hence, in case in the entire cluster only the newly added bricks are up [and others are down], then any change in permission during this time will be overwritten by the older permissions when the cluster is restarted.
+
+Upstream:
+> Reviewed-on: https://review.gluster.org/#/c/glusterfs/+/24020/
+> fixes: #999
+> Change-Id: Ieb70246d41e59f9cae9f70bc203627a433dfbd33
+> Signed-off-by: Barak Sason Rofman <bsasonro@redhat.com>
+
+BUG: 1663821
+Change-Id: Ieb70246d41e59f9cae9f70bc203627a433dfbd33
+Signed-off-by: Barak Sason Rofman <bsasonro@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/221116
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/bugs/bug-1064147.t                 | 71 ++++++++++++++++++++++++++++++++
+ xlators/cluster/dht/src/dht-common.c     | 28 ++++++++++---
+ xlators/cluster/dht/src/dht-selfheal.c   | 15 +++++--
+ xlators/storage/posix/src/posix-common.c | 16 +++----
+ 4 files changed, 111 insertions(+), 19 deletions(-)
+ create mode 100755 tests/bugs/bug-1064147.t
+
+diff --git a/tests/bugs/bug-1064147.t b/tests/bugs/bug-1064147.t
+new file mode 100755
+index 0000000..617a1aa
+--- /dev/null
++++ b/tests/bugs/bug-1064147.t
+@@ -0,0 +1,71 @@
++#!/bin/bash
++
++. $(dirname $0)/../include.rc
++. $(dirname $0)/../volume.rc
++
++# Initialize
++#------------------------------------------------------------
++cleanup;
++
++# Start glusterd
++TEST glusterd;
++TEST pidof glusterd;
++TEST $CLI volume info;
++
++# Create a volume
++TEST $CLI volume create $V0 $H0:/${V0}{1,2};
++
++# Verify volume creation
++ EXPECT "$V0" volinfo_field $V0 'Volume Name';
++ EXPECT 'Created' volinfo_field $V0 'Status';
++
++# Start volume and verify successful start
++ TEST $CLI volume start $V0;
++ EXPECT 'Started' volinfo_field $V0 'Status';
++ TEST glusterfs -s $H0 --volfile-id=$V0 $M0
++#------------------------------------------------------------
++
++# Test case 1 - Subvolume down + Healing
++#------------------------------------------------------------
++# Kill 2nd brick process
++TEST kill -9  `ps aux | grep glusterfsd | grep ${V0}2 | grep -v grep | awk '{print $2}'`;
++
++# Change root permissions
++TEST chmod 444 $M0
++
++# Store permission for comparision
++TEST permission_new=`stat -c "%A" $M0`
++
++# Bring up the killed brick process
++TEST $CLI volume start $V0 force
++
++# Perform lookup
++sleep 5
++TEST ls $M0
++
++# Check brick permissions
++TEST brick_perm=`stat -c "%A" /${V0}2`
++TEST [ ${brick_perm} = ${permission_new} ]
++#------------------------------------------------------------
++
++# Test case 2 - Add-brick + Healing
++#------------------------------------------------------------
++# Change root permissions
++TEST chmod 777 $M0
++
++# Store permission for comparision
++TEST permission_new_2=`stat -c "%A" $M0`
++
++# Add a 3rd brick
++TEST $CLI volume add-brick $V0 $H0:/${V0}3
++
++# Perform lookup
++sleep 5
++TEST ls $M0
++
++# Check permissions on the new brick
++TEST brick_perm2=`stat -c "%A" /${V0}3`
++
++TEST [ ${brick_perm2} = ${permission_new_2} ]
++
++cleanup;
+diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
+index 4db89df..fe1d0ee 100644
+--- a/xlators/cluster/dht/src/dht-common.c
++++ b/xlators/cluster/dht/src/dht-common.c
+@@ -1363,13 +1363,29 @@ dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+             dht_aggregate_xattr(local->xattr, xattr);
+         }
+ 
++        if (__is_root_gfid(stbuf->ia_gfid)) {
++            ret = dht_dir_has_layout(xattr, conf->xattr_name);
++            if (ret >= 0) {
++                if (is_greater_time(local->prebuf.ia_ctime,
++                                    local->prebuf.ia_ctime_nsec,
++                                    stbuf->ia_ctime, stbuf->ia_ctime_nsec)) {
++                    /* Choose source */
++                    local->prebuf.ia_gid = stbuf->ia_gid;
++                    local->prebuf.ia_uid = stbuf->ia_uid;
++
++                    local->prebuf.ia_ctime = stbuf->ia_ctime;
++                    local->prebuf.ia_ctime_nsec = stbuf->ia_ctime_nsec;
++                    local->prebuf.ia_prot = stbuf->ia_prot;
++                }
++            }
++        }
++
+         if (local->stbuf.ia_type != IA_INVAL) {
+             /* This is not the first subvol to respond */
+-            if (!__is_root_gfid(stbuf->ia_gfid) &&
+-                ((local->stbuf.ia_gid != stbuf->ia_gid) ||
+-                 (local->stbuf.ia_uid != stbuf->ia_uid) ||
+-                 (is_permission_different(&local->stbuf.ia_prot,
+-                                          &stbuf->ia_prot)))) {
++            if ((local->stbuf.ia_gid != stbuf->ia_gid) ||
++                (local->stbuf.ia_uid != stbuf->ia_uid) ||
++                (is_permission_different(&local->stbuf.ia_prot,
++                                         &stbuf->ia_prot))) {
+                 local->need_attrheal = 1;
+             }
+         }
+@@ -10969,7 +10985,7 @@ dht_notify(xlator_t *this, int event, void *data, ...)
+                 if ((cmd == GF_DEFRAG_CMD_STATUS) ||
+                     (cmd == GF_DEFRAG_CMD_STATUS_TIER) ||
+                     (cmd == GF_DEFRAG_CMD_DETACH_STATUS))
+-                	gf_defrag_status_get(conf, output, _gf_false);
++                    gf_defrag_status_get(conf, output, _gf_false);
+                 else if (cmd == GF_DEFRAG_CMD_START_DETACH_TIER)
+                     gf_defrag_start_detach_tier(defrag);
+                 else if (cmd == GF_DEFRAG_CMD_DETACH_START)
+diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c
+index f5dfff9..f4e17d1 100644
+--- a/xlators/cluster/dht/src/dht-selfheal.c
++++ b/xlators/cluster/dht/src/dht-selfheal.c
+@@ -2097,9 +2097,18 @@ dht_selfheal_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
+     local->selfheal.dir_cbk = dir_cbk;
+     local->selfheal.layout = dht_layout_ref(this, layout);
+ 
+-    if (local->need_attrheal && !IA_ISINVAL(local->mds_stbuf.ia_type)) {
+-        /*Use the one in the mds_stbuf*/
+-        local->stbuf = local->mds_stbuf;
++    if (local->need_attrheal) {
++        if (__is_root_gfid(local->stbuf.ia_gfid)) {
++            local->stbuf.ia_gid = local->prebuf.ia_gid;
++            local->stbuf.ia_uid = local->prebuf.ia_uid;
++
++            local->stbuf.ia_ctime = local->prebuf.ia_ctime;
++            local->stbuf.ia_ctime_nsec = local->prebuf.ia_ctime_nsec;
++            local->stbuf.ia_prot = local->prebuf.ia_prot;
++
++        } else if (!IA_ISINVAL(local->mds_stbuf.ia_type)) {
++            local->stbuf = local->mds_stbuf;
++        }
+     }
+ 
+     if (!__is_root_gfid(local->stbuf.ia_gfid)) {
+diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c
+index c5a43a1..e5c6e62 100644
+--- a/xlators/storage/posix/src/posix-common.c
++++ b/xlators/storage/posix/src/posix-common.c
+@@ -598,6 +598,7 @@ posix_init(xlator_t *this)
+     int force_directory = -1;
+     int create_mask = -1;
+     int create_directory_mask = -1;
++    char value;
+ 
+     dir_data = dict_get(this->options, "directory");
+ 
+@@ -654,16 +655,11 @@ posix_init(xlator_t *this)
+     }
+ 
+     /* Check for Extended attribute support, if not present, log it */
+-    op_ret = sys_lsetxattr(dir_data->data, "trusted.glusterfs.test", "working",
+-                           8, 0);
+-    if (op_ret != -1) {
+-        ret = sys_lremovexattr(dir_data->data, "trusted.glusterfs.test");
+-        if (ret) {
+-            gf_msg(this->name, GF_LOG_DEBUG, errno, P_MSG_INVALID_OPTION,
+-                   "failed to remove xattr: "
+-                   "trusted.glusterfs.test");
+-        }
+-    } else {
++    size = sys_lgetxattr(dir_data->data, "user.x", &value, sizeof(value));
++
++    if ((size == -1) && (errno == EOPNOTSUPP)) {
++        gf_msg(this->name, GF_LOG_DEBUG, 0, P_MSG_XDATA_GETXATTR,
++               "getxattr returned %zd", size);
+         tmp_data = dict_get(this->options, "mandate-attribute");
+         if (tmp_data) {
+             if (gf_string2boolean(tmp_data->data, &tmp_bool) == -1) {
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0503-gfapi-Suspend-synctasks-instead-of-blocking-them.patch b/SOURCES/0503-gfapi-Suspend-synctasks-instead-of-blocking-them.patch
new file mode 100644
index 0000000..466bf4e
--- /dev/null
+++ b/SOURCES/0503-gfapi-Suspend-synctasks-instead-of-blocking-them.patch
@@ -0,0 +1,179 @@
+From 5946a6ec18976c0f52162fe0f47e9b5171af87ec Mon Sep 17 00:00:00 2001
+From: Soumya Koduri <skoduri@redhat.com>
+Date: Mon, 6 Apr 2020 12:36:44 +0530
+Subject: [PATCH 503/511] gfapi: Suspend synctasks instead of blocking them
+
+There are certain conditions which blocks the current
+execution thread (like waiting on mutex lock or condition
+variable or I/O response). In such cases, if it is a
+synctask thread, we should suspend the task instead
+of blocking it (like done in SYNCOP using synctask_yield)
+
+This is to avoid deadlock like the one mentioned below -
+
+1) synctaskA sets fs->migration_in_progress to 1 and
+   does I/O (LOOKUP)
+2) Other synctask threads wait for fs->migration_in_progress
+  to be reset to 0 by synctaskA and hence blocked
+3) but synctaskA cannot resume as all synctask threads are blocked
+   on (2).
+
+Note: this same approach is already used by few other components
+like syncbarrier etc.
+
+>Change-Id: If90f870d663bb242c702a5b86ac52eeda67c6f0d
+>Fixes: #1146
+>Signed-off-by: Soumya Koduri <skoduri@redhat.com>
+Upstream patch: https://review.gluster.org/c/glusterfs/+/24276
+
+BUG: 1779238
+Change-Id: If90f870d663bb242c702a5b86ac52eeda67c6f0d
+Signed-off-by: nik-redhat <nladha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/221081
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Soumya Koduri <skoduri@redhat.com>
+---
+ api/src/glfs-internal.h | 34 ++++++++++++++++++++++++++++++++--
+ api/src/glfs-resolve.c  |  9 +++++++++
+ api/src/glfs.c          |  9 +++++++++
+ 3 files changed, 50 insertions(+), 2 deletions(-)
+
+diff --git a/api/src/glfs-internal.h b/api/src/glfs-internal.h
+index 55401b2..15cf0ee 100644
+--- a/api/src/glfs-internal.h
++++ b/api/src/glfs-internal.h
+@@ -16,6 +16,7 @@
+ #include <glusterfs/upcall-utils.h>
+ #include "glfs-handles.h"
+ #include <glusterfs/refcount.h>
++#include <glusterfs/syncop.h>
+ 
+ #define GLFS_SYMLINK_MAX_FOLLOW 2048
+ 
+@@ -207,6 +208,7 @@ struct glfs {
+     glfs_upcall_cbk up_cbk; /* upcall cbk function to be registered */
+     void *up_data;          /* Opaque data provided by application
+                              * during upcall registration */
++    struct list_head waitq; /* waiting synctasks */
+ };
+ 
+ /* This enum is used to maintain the state of glfd. In case of async fops
+@@ -442,6 +444,34 @@ glfs_process_upcall_event(struct glfs *fs, void *data)
+         THIS = glfd->fd->inode->table->xl->ctx->master;                        \
+     } while (0)
+ 
++#define __GLFS_LOCK_WAIT(fs)                                                   \
++    do {                                                                       \
++        struct synctask *task = NULL;                                          \
++                                                                               \
++        task = synctask_get();                                                 \
++                                                                               \
++        if (task) {                                                            \
++            list_add_tail(&task->waitq, &fs->waitq);                           \
++            pthread_mutex_unlock(&fs->mutex);                                  \
++            synctask_yield(task, NULL);                                              \
++            pthread_mutex_lock(&fs->mutex);                                    \
++        } else {                                                               \
++            /* non-synctask */                                                 \
++            pthread_cond_wait(&fs->cond, &fs->mutex);                          \
++        }                                                                      \
++    } while (0)
++
++#define __GLFS_SYNCTASK_WAKE(fs)                                               \
++    do {                                                                       \
++        struct synctask *waittask = NULL;                                      \
++                                                                               \
++        while (!list_empty(&fs->waitq)) {                                      \
++            waittask = list_entry(fs->waitq.next, struct synctask, waitq);     \
++            list_del_init(&waittask->waitq);                                   \
++            synctask_wake(waittask);                                           \
++        }                                                                      \
++    } while (0)
++
+ /*
+   By default all lock attempts from user context must
+   use glfs_lock() and glfs_unlock(). This allows
+@@ -466,10 +496,10 @@ glfs_lock(struct glfs *fs, gf_boolean_t wait_for_migration)
+     pthread_mutex_lock(&fs->mutex);
+ 
+     while (!fs->init)
+-        pthread_cond_wait(&fs->cond, &fs->mutex);
++        __GLFS_LOCK_WAIT(fs);
+ 
+     while (wait_for_migration && fs->migration_in_progress)
+-        pthread_cond_wait(&fs->cond, &fs->mutex);
++        __GLFS_LOCK_WAIT(fs);
+ 
+     return 0;
+ }
+diff --git a/api/src/glfs-resolve.c b/api/src/glfs-resolve.c
+index 062b7dc..58b6ace 100644
+--- a/api/src/glfs-resolve.c
++++ b/api/src/glfs-resolve.c
+@@ -65,6 +65,9 @@ __glfs_first_lookup(struct glfs *fs, xlator_t *subvol)
+     fs->migration_in_progress = 0;
+     pthread_cond_broadcast(&fs->cond);
+ 
++    /* wake up other waiting tasks */
++    __GLFS_SYNCTASK_WAKE(fs);
++
+     return ret;
+ }
+ 
+@@ -154,6 +157,9 @@ __glfs_refresh_inode(struct glfs *fs, xlator_t *subvol, inode_t *inode,
+     fs->migration_in_progress = 0;
+     pthread_cond_broadcast(&fs->cond);
+ 
++    /* wake up other waiting tasks */
++    __GLFS_SYNCTASK_WAKE(fs);
++
+     return newinode;
+ }
+ 
+@@ -841,6 +847,9 @@ __glfs_migrate_fd(struct glfs *fs, xlator_t *newsubvol, struct glfs_fd *glfd)
+     fs->migration_in_progress = 0;
+     pthread_cond_broadcast(&fs->cond);
+ 
++    /* wake up other waiting tasks */
++    __GLFS_SYNCTASK_WAKE(fs);
++
+     return newfd;
+ }
+ 
+diff --git a/api/src/glfs.c b/api/src/glfs.c
+index f36616d..ae994fa 100644
+--- a/api/src/glfs.c
++++ b/api/src/glfs.c
+@@ -740,6 +740,7 @@ glfs_new_fs(const char *volname)
+ 
+     INIT_LIST_HEAD(&fs->openfds);
+     INIT_LIST_HEAD(&fs->upcall_list);
++    INIT_LIST_HEAD(&fs->waitq);
+ 
+     PTHREAD_MUTEX_INIT(&fs->mutex, NULL, fs->pthread_flags, GLFS_INIT_MUTEX,
+                        err);
+@@ -1228,6 +1229,7 @@ pub_glfs_fini(struct glfs *fs)
+     call_pool_t *call_pool = NULL;
+     int fs_init = 0;
+     int err = -1;
++    struct synctask *waittask = NULL;
+ 
+     DECLARE_OLD_THIS;
+ 
+@@ -1249,6 +1251,13 @@ pub_glfs_fini(struct glfs *fs)
+ 
+     call_pool = fs->ctx->pool;
+ 
++    /* Wake up any suspended synctasks */
++    while (!list_empty(&fs->waitq)) {
++        waittask = list_entry(fs->waitq.next, struct synctask, waitq);
++        list_del_init(&waittask->waitq);
++        synctask_wake(waittask);
++    }
++
+     while (countdown--) {
+         /* give some time for background frames to finish */
+         pthread_mutex_lock(&fs->mutex);
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0504-io-stats-Configure-ios_sample_buf_size-based-on-samp.patch b/SOURCES/0504-io-stats-Configure-ios_sample_buf_size-based-on-samp.patch
new file mode 100644
index 0000000..21d7f7f
--- /dev/null
+++ b/SOURCES/0504-io-stats-Configure-ios_sample_buf_size-based-on-samp.patch
@@ -0,0 +1,109 @@
+From baa566be8832a56fdea7068d84844ec1ec84d8d9 Mon Sep 17 00:00:00 2001
+From: mohit84 <moagrawa@redhat.com>
+Date: Thu, 15 Oct 2020 16:28:58 +0530
+Subject: [PATCH 504/511] io-stats: Configure ios_sample_buf_size based on
+ sample_interval value (#1574)
+
+io-stats xlator declares a ios_sample_buf_size 64k object(10M) per xlator
+but in case of sample_interval is 0 this big buffer is not required so
+declare the default value only while sample_interval is not 0.The new
+change would be helpful to reduce RSS size for a brick and shd process
+while the number of volumes are huge.
+
+> Change-Id: I3e82cca92e40549355edfac32580169f3ce51af8
+> Fixes: #1542
+> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+> (Cherry picked from commit f71660eb879a9cd5761e5adbf10c783e959a990a)
+> (Reviewed on upstream link https://github.com/gluster/glusterfs/issues/1542)
+
+Change-Id: I3e82cca92e40549355edfac32580169f3ce51af8
+BUG: 1898778
+Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/221183
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/bugs/glusterd/daemon-log-level-option.t |  8 ++++----
+ xlators/debug/io-stats/src/io-stats.c         | 26 ++++++++++++++++++++++----
+ 2 files changed, 26 insertions(+), 8 deletions(-)
+
+diff --git a/tests/bugs/glusterd/daemon-log-level-option.t b/tests/bugs/glusterd/daemon-log-level-option.t
+index 66e55e3..5352a63 100644
+--- a/tests/bugs/glusterd/daemon-log-level-option.t
++++ b/tests/bugs/glusterd/daemon-log-level-option.t
+@@ -61,8 +61,8 @@ rm -f /var/log/glusterfs/glustershd.log
+ TEST $CLI volume set all cluster.daemon-log-level WARNING
+ TEST $CLI volume start $V0
+ 
+-# log should not have any info messages
+-EXPECT 0 Info_messages_count "/var/log/glusterfs/glustershd.log"
++# log does have 1 info message specific to configure ios_sample_buf_size in io-stats xlator
++EXPECT 1 Info_messages_count "/var/log/glusterfs/glustershd.log"
+ 
+ # log should not have any debug messages
+ EXPECT 0 Debug_messages_count "/var/log/glusterfs/glustershd.log"
+@@ -78,8 +78,8 @@ rm -f /var/log/glusterfs/glustershd.log
+ TEST $CLI volume set all cluster.daemon-log-level ERROR
+ TEST $CLI volume start $V0
+ 
+-# log should not have any info messages
+-EXPECT 0 Info_messages_count "/var/log/glusterfs/glustershd.log"
++# log does have 1 info message specific to configure ios_sample_buf_size in io-stats xlator
++EXPECT 1 Info_messages_count "/var/log/glusterfs/glustershd.log"
+ 
+ # log should not have any warning messages
+ EXPECT 0 Warning_messages_count "/var/log/glusterfs/glustershd.log"
+diff --git a/xlators/debug/io-stats/src/io-stats.c b/xlators/debug/io-stats/src/io-stats.c
+index aa91a0a..9b34895 100644
+--- a/xlators/debug/io-stats/src/io-stats.c
++++ b/xlators/debug/io-stats/src/io-stats.c
+@@ -3724,6 +3724,15 @@ xlator_set_loglevel(xlator_t *this, int log_level)
+     }
+ }
+ 
++void
++ios_sample_buf_size_configure(char *name, struct ios_conf *conf)
++{
++    conf->ios_sample_buf_size = 1024;
++    gf_log(name, GF_LOG_INFO,
++           "Configure ios_sample_buf "
++           " size is 1024 because ios_sample_interval is 0");
++}
++
+ int
+ reconfigure(xlator_t *this, dict_t *options)
+ {
+@@ -3779,8 +3788,13 @@ reconfigure(xlator_t *this, dict_t *options)
+                      int32, out);
+     GF_OPTION_RECONF("ios-dump-format", dump_format_str, options, str, out);
+     ios_set_log_format_code(conf, dump_format_str);
+-    GF_OPTION_RECONF("ios-sample-buf-size", conf->ios_sample_buf_size, options,
+-                     int32, out);
++    if (conf->ios_sample_interval) {
++        GF_OPTION_RECONF("ios-sample-buf-size", conf->ios_sample_buf_size,
++                         options, int32, out);
++    } else {
++        ios_sample_buf_size_configure(this->name, conf);
++    }
++
+     GF_OPTION_RECONF("sys-log-level", sys_log_str, options, str, out);
+     if (sys_log_str) {
+         sys_log_level = glusterd_check_log_level(sys_log_str);
+@@ -3947,8 +3961,12 @@ init(xlator_t *this)
+     GF_OPTION_INIT("ios-dump-format", dump_format_str, str, out);
+     ios_set_log_format_code(conf, dump_format_str);
+ 
+-    GF_OPTION_INIT("ios-sample-buf-size", conf->ios_sample_buf_size, int32,
+-                   out);
++    if (conf->ios_sample_interval) {
++        GF_OPTION_INIT("ios-sample-buf-size", conf->ios_sample_buf_size, int32,
++                       out);
++    } else {
++        ios_sample_buf_size_configure(this->name, conf);
++    }
+ 
+     ret = ios_init_sample_buf(conf);
+     if (ret) {
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0505-trash-Create-inode_table-only-while-feature-is-enabl.patch b/SOURCES/0505-trash-Create-inode_table-only-while-feature-is-enabl.patch
new file mode 100644
index 0000000..a0f6b62
--- /dev/null
+++ b/SOURCES/0505-trash-Create-inode_table-only-while-feature-is-enabl.patch
@@ -0,0 +1,107 @@
+From 43a8e2c7441b14f5f238cb11d83f32f248b16abb Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <moagrawa@redhat.com>
+Date: Tue, 13 Oct 2020 18:56:20 +0530
+Subject: [PATCH 505/511] trash: Create inode_table only while feature is
+ enabled
+
+Currently trash xlator create a inode table(1M) even if
+feature is not enabled.In brick_mux environment while 250
+bricks are attached with a single brick process and feature
+is not enable brick process increase RSS size unnecessarily.
+
+Solution: Create inode_table only while a feature is enabled.
+The patch reduces 250M RSS size per brick process
+if trash feature is not enabled.
+
+> Change-Id: I11a6fd2b8419fe2988f398be6ec30fb4f3b99a5d
+> Fixes: #1543
+> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+> (Cherry pick from commit 32f25e7b1b4b080ab2640e178b407c878e629376)
+> (Reviewed on upstream link https://github.com/gluster/glusterfs/issues/1543)
+
+Change-Id: I11a6fd2b8419fe2988f398be6ec30fb4f3b99a5d
+BUG: 1898781
+Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/221184
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/features/trash/src/trash.c | 47 +++++++++++++++++++++++++++++++++++---
+ 1 file changed, 44 insertions(+), 3 deletions(-)
+
+diff --git a/xlators/features/trash/src/trash.c b/xlators/features/trash/src/trash.c
+index f96ed73..93f020f 100644
+--- a/xlators/features/trash/src/trash.c
++++ b/xlators/features/trash/src/trash.c
+@@ -2235,16 +2235,47 @@ reconfigure(xlator_t *this, dict_t *options)
+     char trash_dir[PATH_MAX] = {
+         0,
+     };
++    gf_boolean_t active_earlier = _gf_false;
++    gf_boolean_t active_now = _gf_false;
+ 
+     priv = this->private;
+ 
+     GF_VALIDATE_OR_GOTO("trash", priv, out);
+ 
++    active_earlier = priv->state;
++    GF_OPTION_RECONF("trash", active_now, options, bool, out);
++
++    /* Disable of trash feature is not allowed at this point until
++       we are not able to find an approach to cleanup resource
++       gracefully. Here to disable the feature need to destroy inode
++       table and currently it is difficult to ensure inode is not
++       being used
++    */
++    if (active_earlier && !active_now) {
++        gf_log(this->name, GF_LOG_INFO,
++               "Disable of trash feature is not allowed "
++               "during graph reconfigure");
++        ret = 0;
++        goto out;
++    }
++
++    if (!active_earlier && active_now) {
++        if (!priv->trash_itable) {
++            priv->trash_itable = inode_table_new(0, this);
++            if (!priv->trash_itable) {
++                ret = -ENOMEM;
++                gf_log(this->name, GF_LOG_ERROR,
++                       "failed to create trash inode_table"
++                       "  during graph reconfigure");
++                goto out;
++            }
++        }
++        priv->state = active_now;
++    }
++
+     GF_OPTION_RECONF("trash-internal-op", priv->internal, options, bool, out);
+     GF_OPTION_RECONF("trash-dir", tmp, options, str, out);
+ 
+-    GF_OPTION_RECONF("trash", priv->state, options, bool, out);
+-
+     if (priv->state) {
+         ret = create_or_rename_trash_directory(this);
+ 
+@@ -2501,7 +2532,17 @@ init(xlator_t *this)
+         goto out;
+     }
+ 
+-    priv->trash_itable = inode_table_new(0, this);
++    if (priv->state) {
++        priv->trash_itable = inode_table_new(0, this);
++        if (!priv->trash_itable) {
++            ret = -ENOMEM;
++            priv->state = _gf_false;
++            gf_log(this->name, GF_LOG_ERROR,
++                   "failed to create trash inode_table disable trash");
++            goto out;
++        }
++    }
++
+     gf_log(this->name, GF_LOG_DEBUG, "brick path is%s", priv->brick_path);
+ 
+     this->private = (void *)priv;
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0506-posix-Attach-a-posix_spawn_disk_thread-with-glusterf.patch b/SOURCES/0506-posix-Attach-a-posix_spawn_disk_thread-with-glusterf.patch
new file mode 100644
index 0000000..cf978f5
--- /dev/null
+++ b/SOURCES/0506-posix-Attach-a-posix_spawn_disk_thread-with-glusterf.patch
@@ -0,0 +1,499 @@
+From 17a9ce965ef2fec9ee5c8e4b76981bb7cbcf1352 Mon Sep 17 00:00:00 2001
+From: mohit84 <moagrawa@redhat.com>
+Date: Mon, 9 Nov 2020 17:15:42 +0530
+Subject: [PATCH 506/511] posix: Attach a posix_spawn_disk_thread with
+ glusterfs_ctx (#1595)
+
+Currently posix xlator spawns posix_disk_space_threads per brick and in
+case of brick_mux environment while glusterd attached bricks at maximum
+level(250) with a single brick process in that case 250 threads are
+spawned for all bricks and brick process memory size also increased.
+
+Solution: Attach a posix_disk_space thread with glusterfs_ctx to
+          spawn a thread per process basis instead of spawning a per brick
+
+> Fixes: #1482
+> Change-Id: I8dd88f252a950495b71742e2a7588bd5bb019ec7
+> Cherry-picked from commit 3f93be77e1acf5baacafa97a320e91e6879d1c0e
+> Reviewed on upstream link https://github.com/gluster/glusterfs/issues/1482
+> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+
+Change-Id: I8dd88f252a950495b71742e2a7588bd5bb019ec7
+Bug: 1898776
+Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/220366
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ glusterfsd/src/glusterfsd.c                    |   4 +
+ libglusterfs/src/glusterfs/glusterfs.h         |   6 ++
+ xlators/storage/posix/src/posix-common.c       |  68 +++++++++++--
+ xlators/storage/posix/src/posix-handle.h       |   3 +-
+ xlators/storage/posix/src/posix-helpers.c      | 131 ++++++++++++++-----------
+ xlators/storage/posix/src/posix-inode-fd-ops.c |   3 +-
+ xlators/storage/posix/src/posix-mem-types.h    |   1 +
+ xlators/storage/posix/src/posix.h              |  12 ++-
+ 8 files changed, 160 insertions(+), 68 deletions(-)
+
+diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c
+index 955bf1d..ac25255 100644
+--- a/glusterfsd/src/glusterfsd.c
++++ b/glusterfsd/src/glusterfsd.c
+@@ -1840,9 +1840,13 @@ glusterfs_ctx_defaults_init(glusterfs_ctx_t *ctx)
+     INIT_LIST_HEAD(&cmd_args->xlator_options);
+     INIT_LIST_HEAD(&cmd_args->volfile_servers);
+     ctx->pxl_count = 0;
++    ctx->diskxl_count = 0;
+     pthread_mutex_init(&ctx->fd_lock, NULL);
+     pthread_cond_init(&ctx->fd_cond, NULL);
+     INIT_LIST_HEAD(&ctx->janitor_fds);
++    pthread_mutex_init(&ctx->xl_lock, NULL);
++    pthread_cond_init(&ctx->xl_cond, NULL);
++    INIT_LIST_HEAD(&ctx->diskth_xl);
+ 
+     lim.rlim_cur = RLIM_INFINITY;
+     lim.rlim_max = RLIM_INFINITY;
+diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h
+index bf6a987..d3400bf 100644
+--- a/libglusterfs/src/glusterfs/glusterfs.h
++++ b/libglusterfs/src/glusterfs/glusterfs.h
+@@ -740,7 +740,13 @@ struct _glusterfs_ctx {
+     pthread_t janitor;
+     /* The variable is use to save total posix xlator count */
+     uint32_t pxl_count;
++    uint32_t diskxl_count;
+ 
++    /* List of posix xlator use by disk thread*/
++    struct list_head diskth_xl;
++    pthread_mutex_t xl_lock;
++    pthread_cond_t xl_cond;
++    pthread_t disk_space_check;
+     char volume_id[GF_UUID_BUF_SIZE]; /* Used only in protocol/client */
+ };
+ typedef struct _glusterfs_ctx glusterfs_ctx_t;
+diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c
+index e5c6e62..2c9030b 100644
+--- a/xlators/storage/posix/src/posix-common.c
++++ b/xlators/storage/posix/src/posix-common.c
+@@ -138,6 +138,36 @@ posix_inode(xlator_t *this)
+     return 0;
+ }
+ 
++static void
++delete_posix_diskxl(xlator_t *this)
++{
++    struct posix_private *priv = this->private;
++    struct posix_diskxl *pxl = priv->pxl;
++    glusterfs_ctx_t *ctx = this->ctx;
++    uint32_t count = 1;
++
++    if (pxl) {
++        pthread_mutex_lock(&ctx->xl_lock);
++        {
++            pxl->detach_notify = _gf_true;
++            while (pxl->is_use)
++                pthread_cond_wait(&pxl->cond, &ctx->xl_lock);
++            list_del_init(&pxl->list);
++            priv->pxl = NULL;
++            count = --ctx->diskxl_count;
++            if (count == 0)
++                pthread_cond_signal(&ctx->xl_cond);
++        }
++        pthread_mutex_unlock(&ctx->xl_lock);
++        pthread_cond_destroy(&pxl->cond);
++        GF_FREE(pxl);
++        if (count == 0) {
++            pthread_join(ctx->disk_space_check, NULL);
++            ctx->disk_space_check = 0;
++        }
++    }
++}
++
+ /**
+  * notify - when parent sends PARENT_UP, send CHILD_UP event from here
+  */
+@@ -194,6 +224,8 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...)
+             }
+             pthread_mutex_unlock(&ctx->fd_lock);
+ 
++            delete_posix_diskxl(this);
++
+             gf_log(this->name, GF_LOG_INFO, "Sending CHILD_DOWN for brick %s",
+                    victim->name);
+             default_notify(this->parents->xlator, GF_EVENT_CHILD_DOWN, data);
+@@ -318,6 +350,7 @@ posix_reconfigure(xlator_t *this, dict_t *options)
+     int32_t force_directory_mode = -1;
+     int32_t create_mask = -1;
+     int32_t create_directory_mask = -1;
++    double old_disk_reserve = 0.0;
+ 
+     priv = this->private;
+ 
+@@ -383,6 +416,7 @@ posix_reconfigure(xlator_t *this, dict_t *options)
+                " fallback to <hostname>:<export>");
+     }
+ 
++    old_disk_reserve = priv->disk_reserve;
+     GF_OPTION_RECONF("reserve", priv->disk_reserve, options, percent_or_size,
+                      out);
+     /* option can be any one of percent or bytes */
+@@ -390,11 +424,19 @@ posix_reconfigure(xlator_t *this, dict_t *options)
+     if (priv->disk_reserve < 100.0)
+         priv->disk_unit = 'p';
+ 
+-    if (priv->disk_reserve) {
++    /* Delete a pxl object from a list of disk_reserve while something
++       is changed for reserve option during graph reconfigure
++    */
++    if (old_disk_reserve != priv->disk_reserve) {
++        delete_posix_diskxl(this);
++        old_disk_reserve = 0;
++    }
++
++    if (!old_disk_reserve && priv->disk_reserve) {
+         ret = posix_spawn_disk_space_check_thread(this);
+         if (ret) {
+             gf_msg(this->name, GF_LOG_INFO, 0, P_MSG_DISK_SPACE_CHECK_FAILED,
+-                   "Getting disk space check from thread failed");
++                   "Getting disk space check from thread failed ");
+             goto out;
+         }
+     }
+@@ -1008,13 +1050,13 @@ posix_init(xlator_t *this)
+                " fallback to <hostname>:<export>");
+     }
+ 
+-    _private->disk_space_check_active = _gf_false;
+     _private->disk_space_full = 0;
+ 
+     GF_OPTION_INIT("reserve", _private->disk_reserve, percent_or_size, out);
+ 
+     /* option can be any one of percent or bytes */
+     _private->disk_unit = 0;
++    pthread_cond_init(&_private->fd_cond, NULL);
+     if (_private->disk_reserve < 100.0)
+         _private->disk_unit = 'p';
+ 
+@@ -1162,12 +1204,6 @@ posix_fini(xlator_t *this)
+         priv->health_check = 0;
+     }
+ 
+-    if (priv->disk_space_check) {
+-        priv->disk_space_check_active = _gf_false;
+-        (void)gf_thread_cleanup_xint(priv->disk_space_check);
+-        priv->disk_space_check = 0;
+-    }
+-
+     if (priv->janitor) {
+         /*TODO: Make sure the synctask is also complete */
+         ret = gf_tw_del_timer(this->ctx->tw->timer_wheel, priv->janitor);
+@@ -1192,10 +1228,24 @@ posix_fini(xlator_t *this)
+         pthread_join(ctx->janitor, NULL);
+     }
+ 
++    pthread_mutex_lock(&ctx->xl_lock);
++    {
++        count = --ctx->diskxl_count;
++        if (count == 0)
++            pthread_cond_signal(&ctx->xl_cond);
++    }
++    pthread_mutex_unlock(&ctx->xl_lock);
++
++    if (count == 0) {
++        pthread_join(ctx->disk_space_check, NULL);
++        ctx->disk_space_check = 0;
++    }
++
+     if (priv->fsyncer) {
+         (void)gf_thread_cleanup_xint(priv->fsyncer);
+         priv->fsyncer = 0;
+     }
++
+     /*unlock brick dir*/
+     if (priv->mount_lock)
+         (void)sys_closedir(priv->mount_lock);
+diff --git a/xlators/storage/posix/src/posix-handle.h b/xlators/storage/posix/src/posix-handle.h
+index c4d7cb1..8e4c719 100644
+--- a/xlators/storage/posix/src/posix-handle.h
++++ b/xlators/storage/posix/src/posix-handle.h
+@@ -206,5 +206,6 @@ int
+ posix_check_internal_writes(xlator_t *this, fd_t *fd, int sysfd, dict_t *xdata);
+ 
+ void
+-posix_disk_space_check(xlator_t *this);
++posix_disk_space_check(struct posix_private* priv);
++
+ #endif /* !_POSIX_HANDLE_H */
+diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
+index ceac52a..110d383 100644
+--- a/xlators/storage/posix/src/posix-helpers.c
++++ b/xlators/storage/posix/src/posix-helpers.c
+@@ -2284,9 +2284,8 @@ unlock:
+ }
+ 
+ void
+-posix_disk_space_check(xlator_t *this)
++posix_disk_space_check(struct posix_private *priv)
+ {
+-    struct posix_private *priv = NULL;
+     char *subvol_path = NULL;
+     int op_ret = 0;
+     double size = 0;
+@@ -2295,16 +2294,14 @@ posix_disk_space_check(xlator_t *this)
+     double totsz = 0;
+     double freesz = 0;
+ 
+-    GF_VALIDATE_OR_GOTO(this->name, this, out);
+-    priv = this->private;
+-    GF_VALIDATE_OR_GOTO(this->name, priv, out);
++    GF_VALIDATE_OR_GOTO("posix-helpers", priv, out);
+ 
+     subvol_path = priv->base_path;
+ 
+     op_ret = sys_statvfs(subvol_path, &buf);
+ 
+     if (op_ret == -1) {
+-        gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_STATVFS_FAILED,
++        gf_msg("posix-disk", GF_LOG_ERROR, errno, P_MSG_STATVFS_FAILED,
+                "statvfs failed on %s", subvol_path);
+         goto out;
+     }
+@@ -2328,78 +2325,102 @@ out:
+ }
+ 
+ static void *
+-posix_disk_space_check_thread_proc(void *data)
++posix_ctx_disk_thread_proc(void *data)
+ {
+-    xlator_t *this = NULL;
+     struct posix_private *priv = NULL;
++    glusterfs_ctx_t *ctx = NULL;
+     uint32_t interval = 0;
+-    int ret = -1;
+-
+-    this = data;
+-    priv = this->private;
++    struct posix_diskxl *pthis = NULL;
++    xlator_t *this = NULL;
++    struct timespec sleep_till = {
++        0,
++    };
+ 
++    ctx = data;
+     interval = 5;
+-    gf_msg_debug(this->name, 0,
+-                 "disk-space thread started, "
++
++    gf_msg_debug("glusterfs_ctx", 0,
++                 "Ctx disk-space thread started, "
+                  "interval = %d seconds",
+                  interval);
+-    while (1) {
+-        /* aborting sleep() is a request to exit this thread, sleep()
+-         * will normally not return when cancelled */
+-        ret = sleep(interval);
+-        if (ret > 0)
+-            break;
+-        /* prevent thread errors while doing the health-check(s) */
+-        pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
+-
+-        /* Do the disk-check.*/
+-        posix_disk_space_check(this);
+-        if (!priv->disk_space_check_active)
+-            goto out;
+-        pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
+-    }
+ 
+-out:
+-    gf_msg_debug(this->name, 0, "disk space check thread exiting");
+-    LOCK(&priv->lock);
++    pthread_mutex_lock(&ctx->xl_lock);
+     {
+-        priv->disk_space_check_active = _gf_false;
++        while (ctx->diskxl_count > 0) {
++            list_for_each_entry(pthis, &ctx->diskth_xl, list)
++            {
++                pthis->is_use = _gf_true;
++                pthread_mutex_unlock(&ctx->xl_lock);
++
++                THIS = this = pthis->xl;
++                priv = this->private;
++
++                posix_disk_space_check(priv);
++
++                pthread_mutex_lock(&ctx->xl_lock);
++                pthis->is_use = _gf_false;
++                /* Send a signal to posix_notify function */
++                if (pthis->detach_notify)
++                    pthread_cond_signal(&pthis->cond);
++            }
++
++            timespec_now_realtime(&sleep_till);
++            sleep_till.tv_sec += 5;
++            (void)pthread_cond_timedwait(&ctx->xl_cond, &ctx->xl_lock,
++                                         &sleep_till);
++        }
+     }
+-    UNLOCK(&priv->lock);
++    pthread_mutex_unlock(&ctx->xl_lock);
+ 
+     return NULL;
+ }
+ 
+ int
+-posix_spawn_disk_space_check_thread(xlator_t *xl)
++posix_spawn_disk_space_check_thread(xlator_t *this)
+ {
+-    struct posix_private *priv = NULL;
+-    int ret = -1;
++    int ret = 0;
++    glusterfs_ctx_t *ctx = this->ctx;
++    struct posix_diskxl *pxl = NULL;
++    struct posix_private *priv = this->private;
+ 
+-    priv = xl->private;
++    pxl = GF_CALLOC(1, sizeof(struct posix_diskxl), gf_posix_mt_diskxl_t);
++    if (!pxl) {
++        ret = -ENOMEM;
++        gf_log(this->name, GF_LOG_ERROR,
++               "Calloc is failed to allocate "
++               "memory for diskxl object");
++        goto out;
++    }
++    pthread_cond_init(&pxl->cond, NULL);
+ 
+-    LOCK(&priv->lock);
++    pthread_mutex_lock(&ctx->xl_lock);
+     {
+-        /* cancel the running thread  */
+-        if (priv->disk_space_check_active == _gf_true) {
+-            pthread_cancel(priv->disk_space_check);
+-            priv->disk_space_check_active = _gf_false;
+-        }
++        if (ctx->diskxl_count++ == 0) {
++            ret = gf_thread_create(&ctx->disk_space_check, NULL,
++                                   posix_ctx_disk_thread_proc, ctx,
++                                   "posixctxres");
+ 
+-        ret = gf_thread_create(&priv->disk_space_check, NULL,
+-                               posix_disk_space_check_thread_proc, xl,
+-                               "posix_reserve");
+-        if (ret) {
+-            priv->disk_space_check_active = _gf_false;
+-            gf_msg(xl->name, GF_LOG_ERROR, errno, P_MSG_DISK_SPACE_CHECK_FAILED,
+-                   "unable to setup disk space check thread");
+-            goto unlock;
++            if (ret) {
++                gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_THREAD_FAILED,
++                       "spawning disk space check thread failed");
++                ctx->diskxl_count--;
++                pthread_mutex_unlock(&ctx->xl_lock);
++                goto out;
++            }
+         }
++        pxl->xl = this;
++        priv->pxl = (void *)pxl;
++        list_add_tail(&pxl->list, &ctx->diskth_xl);
++    }
++    pthread_mutex_unlock(&ctx->xl_lock);
+ 
+-        priv->disk_space_check_active = _gf_true;
++out:
++    if (ret) {
++        if (pxl) {
++            pthread_cond_destroy(&pxl->cond);
++            GF_FREE(pxl);
++        }
+     }
+-unlock:
+-    UNLOCK(&priv->lock);
+     return ret;
+ }
+ 
+diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c
+index 1d37aed..761e018 100644
+--- a/xlators/storage/posix/src/posix-inode-fd-ops.c
++++ b/xlators/storage/posix/src/posix-inode-fd-ops.c
+@@ -37,6 +37,7 @@
+ #include <fcntl.h>
+ #endif /* HAVE_LINKAT */
+ 
++#include "posix-handle.h"
+ #include <glusterfs/glusterfs.h>
+ #include <glusterfs/checksum.h>
+ #include <glusterfs/dict.h>
+@@ -713,7 +714,7 @@ posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+        option behaviour
+     */
+     if (priv->disk_reserve)
+-        posix_disk_space_check(this);
++        posix_disk_space_check(priv);
+ 
+     DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, ret, ret, unlock);
+ 
+diff --git a/xlators/storage/posix/src/posix-mem-types.h b/xlators/storage/posix/src/posix-mem-types.h
+index 2253f38..bb4c56d 100644
+--- a/xlators/storage/posix/src/posix-mem-types.h
++++ b/xlators/storage/posix/src/posix-mem-types.h
+@@ -20,6 +20,7 @@ enum gf_posix_mem_types_ {
+     gf_posix_mt_paiocb,
+     gf_posix_mt_inode_ctx_t,
+     gf_posix_mt_mdata_attr,
++    gf_posix_mt_diskxl_t,
+     gf_posix_mt_end
+ };
+ #endif
+diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h
+index 07f367b..4be979c 100644
+--- a/xlators/storage/posix/src/posix.h
++++ b/xlators/storage/posix/src/posix.h
+@@ -36,7 +36,6 @@
+ #include <glusterfs/compat.h>
+ #include <glusterfs/timer.h>
+ #include "posix-mem-types.h"
+-#include "posix-handle.h"
+ #include <glusterfs/call-stub.h>
+ 
+ #ifdef HAVE_LIBAIO
+@@ -138,6 +137,14 @@ struct posix_fd {
+     char _pad[4]; /* manual padding */
+ };
+ 
++struct posix_diskxl {
++    pthread_cond_t cond;
++    struct list_head list;
++    xlator_t *xl;
++    gf_boolean_t detach_notify;
++    gf_boolean_t is_use;
++};
++
+ struct posix_private {
+     char *base_path;
+     int32_t base_path_length;
+@@ -207,6 +214,7 @@ struct posix_private {
+     pthread_mutex_t janitor_mutex;
+     pthread_cond_t janitor_cond;
+     pthread_cond_t fd_cond;
++    pthread_cond_t disk_cond;
+     int fsync_queue_count;
+ 
+     enum {
+@@ -233,7 +241,6 @@ struct posix_private {
+     char disk_unit;
+     uint32_t disk_space_full;
+     pthread_t disk_space_check;
+-    gf_boolean_t disk_space_check_active;
+ 
+ #ifdef GF_DARWIN_HOST_OS
+     enum {
+@@ -263,6 +270,7 @@ struct posix_private {
+     gf_boolean_t ctime;
+     gf_boolean_t janitor_task_stop;
+     uint32_t rel_fdcount;
++    void *pxl;
+ };
+ 
+ typedef struct {
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0507-inode-make-critical-section-smaller.patch b/SOURCES/0507-inode-make-critical-section-smaller.patch
new file mode 100644
index 0000000..3b1dac5
--- /dev/null
+++ b/SOURCES/0507-inode-make-critical-section-smaller.patch
@@ -0,0 +1,764 @@
+From b3a17b67a69142eef1b4adde3409d5e54dda1e0b Mon Sep 17 00:00:00 2001
+From: Amar Tumballi <amarts@redhat.com>
+Date: Sat, 9 Feb 2019 13:23:06 +0530
+Subject: [PATCH 507/511] inode: make critical section smaller
+
+do all the 'static' tasks outside of locked region.
+
+* hash_dentry() and hash_gfid() are now called outside locked region.
+* remove extra __dentry_hash exported in libglusterfs.sym
+* avoid checks in locked functions, if the check is done in calling
+  function.
+* implement dentry_destroy(), which handles freeing of dentry separately,
+  from that of dentry_unset (which takes care of separating dentry from
+  inode, and table)
+
+> Updates: bz#1670031
+> Change-Id: I584213e0748464bb427fbdef3c4ab6615d7d5eb0
+> Signed-off-by: Amar Tumballi <amarts@redhat.com>
+> (Cherry pick from commit 8a90d346b9d3f69ff11241feb0011c90a8e57e30)
+> (Review on upstream link https://review.gluster.org/#/c/glusterfs/+/22184/)
+
+Change-Id: I584213e0748464bb427fbdef3c4ab6615d7d5eb0
+BUG: 1898777
+Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/221189
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/glusterfs/inode.h |   3 -
+ libglusterfs/src/inode.c           | 323 +++++++++++++------------------------
+ libglusterfs/src/libglusterfs.sym  |   1 -
+ 3 files changed, 111 insertions(+), 216 deletions(-)
+
+diff --git a/libglusterfs/src/glusterfs/inode.h b/libglusterfs/src/glusterfs/inode.h
+index 4421c47..c875653 100644
+--- a/libglusterfs/src/glusterfs/inode.h
++++ b/libglusterfs/src/glusterfs/inode.h
+@@ -167,9 +167,6 @@ inode_rename(inode_table_t *table, inode_t *olddir, const char *oldname,
+              inode_t *newdir, const char *newname, inode_t *inode,
+              struct iatt *stbuf);
+ 
+-dentry_t *
+-__dentry_grep(inode_table_t *table, inode_t *parent, const char *name);
+-
+ inode_t *
+ inode_grep(inode_table_t *table, inode_t *parent, const char *name);
+ 
+diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c
+index 4c3c546..71b2d2a 100644
+--- a/libglusterfs/src/inode.c
++++ b/libglusterfs/src/inode.c
+@@ -159,27 +159,15 @@ hash_dentry(inode_t *parent, const char *name, int mod)
+ static int
+ hash_gfid(uuid_t uuid, int mod)
+ {
+-    int ret = 0;
+-
+-    ret = uuid[15] + (uuid[14] << 8);
+-
+-    return ret;
++    return ((uuid[15] + (uuid[14] << 8)) % mod);
+ }
+ 
+ static void
+-__dentry_hash(dentry_t *dentry)
++__dentry_hash(dentry_t *dentry, const int hash)
+ {
+     inode_table_t *table = NULL;
+-    int hash = 0;
+-
+-    if (!dentry) {
+-        gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_DENTRY_NOT_FOUND,
+-                         "dentry not found");
+-        return;
+-    }
+ 
+     table = dentry->inode->table;
+-    hash = hash_dentry(dentry->parent, dentry->name, table->hashsize);
+ 
+     list_del_init(&dentry->hash);
+     list_add(&dentry->hash, &table->name_hash[hash]);
+@@ -188,49 +176,44 @@ __dentry_hash(dentry_t *dentry)
+ static int
+ __is_dentry_hashed(dentry_t *dentry)
+ {
+-    if (!dentry) {
+-        gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_DENTRY_NOT_FOUND,
+-                         "dentry not found");
+-        return 0;
+-    }
+-
+     return !list_empty(&dentry->hash);
+ }
+ 
+ static void
+ __dentry_unhash(dentry_t *dentry)
+ {
+-    if (!dentry) {
+-        gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_DENTRY_NOT_FOUND,
+-                         "dentry not found");
+-        return;
+-    }
+-
+     list_del_init(&dentry->hash);
+ }
+ 
+ static void
+-__dentry_unset(dentry_t *dentry)
++dentry_destroy(dentry_t *dentry)
+ {
+-    if (!dentry) {
+-        gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_DENTRY_NOT_FOUND,
+-                         "dentry not found");
++    if (!dentry)
+         return;
+-    }
++
++    GF_FREE(dentry->name);
++    dentry->name = NULL;
++    mem_put(dentry);
++
++    return;
++}
++
++static dentry_t *
++__dentry_unset(dentry_t *dentry)
++{
++    if (!dentry)
++        return NULL;
+ 
+     __dentry_unhash(dentry);
+ 
+     list_del_init(&dentry->inode_list);
+ 
+-    GF_FREE(dentry->name);
+-    dentry->name = NULL;
+-
+     if (dentry->parent) {
+         __inode_unref(dentry->parent, false);
+         dentry->parent = NULL;
+     }
+ 
+-    mem_put(dentry);
++    return dentry;
+ }
+ 
+ static int
+@@ -289,22 +272,14 @@ static int
+ __is_dentry_cyclic(dentry_t *dentry)
+ {
+     int ret = 0;
+-    inode_t *inode = NULL;
+-    char *name = "<nul>";
+ 
+     ret = __foreach_ancestor_dentry(dentry, __check_cycle, dentry->inode);
+     if (ret) {
+-        inode = dentry->inode;
+-
+-        if (dentry->name)
+-            name = dentry->name;
+-
+         gf_msg(dentry->inode->table->name, GF_LOG_CRITICAL, 0,
+                LG_MSG_DENTRY_CYCLIC_LOOP,
+-               "detected cyclic loop "
+-               "formation during inode linkage. inode (%s) linking "
+-               "under itself as %s",
+-               uuid_utoa(inode->gfid), name);
++               "detected cyclic loop formation during inode linkage. "
++               "inode (%s) linking under itself as %s",
++               uuid_utoa(dentry->inode->gfid), dentry->name);
+     }
+ 
+     return ret;
+@@ -313,41 +288,19 @@ __is_dentry_cyclic(dentry_t *dentry)
+ static void
+ __inode_unhash(inode_t *inode)
+ {
+-    if (!inode) {
+-        gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
+-                         "inode not found");
+-        return;
+-    }
+-
+     list_del_init(&inode->hash);
+ }
+ 
+ static int
+ __is_inode_hashed(inode_t *inode)
+ {
+-    if (!inode) {
+-        gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
+-                         "inode not found");
+-        return 0;
+-    }
+-
+     return !list_empty(&inode->hash);
+ }
+ 
+ static void
+-__inode_hash(inode_t *inode)
++__inode_hash(inode_t *inode, const int hash)
+ {
+-    inode_table_t *table = NULL;
+-    int hash = 0;
+-
+-    if (!inode) {
+-        gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
+-                         "inode not found");
+-        return;
+-    }
+-
+-    table = inode->table;
+-    hash = hash_gfid(inode->gfid, 65536);
++    inode_table_t *table = inode->table;
+ 
+     list_del_init(&inode->hash);
+     list_add(&inode->hash, &table->inode_hash[hash]);
+@@ -359,12 +312,6 @@ __dentry_search_for_inode(inode_t *inode, uuid_t pargfid, const char *name)
+     dentry_t *dentry = NULL;
+     dentry_t *tmp = NULL;
+ 
+-    if (!inode || !name) {
+-        gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+-                         "inode || name not found");
+-        return NULL;
+-    }
+-
+     /* earlier, just the ino was sent, which could have been 0, now
+        we deal with gfid, and if sent gfid is null or 0, no need to
+        continue with the check */
+@@ -390,12 +337,6 @@ __inode_ctx_free(inode_t *inode)
+     xlator_t *xl = NULL;
+     xlator_t *old_THIS = NULL;
+ 
+-    if (!inode) {
+-        gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
+-                         "inode not found");
+-        return;
+-    }
+-
+     if (!inode->_ctx) {
+         gf_msg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_CTX_NULL,
+                "_ctx not found");
+@@ -424,12 +365,6 @@ noctx:
+ static void
+ __inode_destroy(inode_t *inode)
+ {
+-    if (!inode) {
+-        gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
+-                         "inode not found");
+-        return;
+-    }
+-
+     __inode_ctx_free(inode);
+ 
+     LOCK_DESTROY(&inode->lock);
+@@ -472,9 +407,6 @@ inode_ctx_merge(fd_t *fd, inode_t *inode, inode_t *linked_inode)
+ static void
+ __inode_activate(inode_t *inode)
+ {
+-    if (!inode)
+-        return;
+-
+     list_move(&inode->list, &inode->table->active);
+     inode->table->active_size++;
+ }
+@@ -485,19 +417,13 @@ __inode_passivate(inode_t *inode)
+     dentry_t *dentry = NULL;
+     dentry_t *t = NULL;
+ 
+-    if (!inode) {
+-        gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
+-                         "inode not found");
+-        return;
+-    }
+-
+     list_move_tail(&inode->list, &inode->table->lru);
+     inode->table->lru_size++;
+ 
+     list_for_each_entry_safe(dentry, t, &inode->dentry_list, inode_list)
+     {
+         if (!__is_dentry_hashed(dentry))
+-            __dentry_unset(dentry);
++            dentry_destroy(__dentry_unset(dentry));
+     }
+ }
+ 
+@@ -507,12 +433,6 @@ __inode_retire(inode_t *inode)
+     dentry_t *dentry = NULL;
+     dentry_t *t = NULL;
+ 
+-    if (!inode) {
+-        gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
+-                         "inode not found");
+-        return;
+-    }
+-
+     list_move_tail(&inode->list, &inode->table->purge);
+     inode->table->purge_size++;
+ 
+@@ -520,7 +440,7 @@ __inode_retire(inode_t *inode)
+ 
+     list_for_each_entry_safe(dentry, t, &inode->dentry_list, inode_list)
+     {
+-        __dentry_unset(dentry);
++        dentry_destroy(__dentry_unset(dentry));
+     }
+ }
+ 
+@@ -547,9 +467,6 @@ __inode_unref(inode_t *inode, bool clear)
+     xlator_t *this = NULL;
+     uint64_t nlookup = 0;
+ 
+-    if (!inode)
+-        return NULL;
+-
+     /*
+      * Root inode should always be in active list of inode table. So unrefs
+      * on root inode are no-ops.
+@@ -677,16 +594,10 @@ inode_ref(inode_t *inode)
+ }
+ 
+ static dentry_t *
+-__dentry_create(inode_t *inode, inode_t *parent, const char *name)
++dentry_create(inode_t *inode, inode_t *parent, const char *name)
+ {
+     dentry_t *newd = NULL;
+ 
+-    if (!inode || !parent || !name) {
+-        gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+-                         "inode || parent || name not found");
+-        return NULL;
+-    }
+-
+     newd = mem_get0(parent->table->dentry_pool);
+     if (newd == NULL) {
+         goto out;
+@@ -702,10 +613,6 @@ __dentry_create(inode_t *inode, inode_t *parent, const char *name)
+         goto out;
+     }
+ 
+-    if (parent)
+-        newd->parent = __inode_ref(parent, false);
+-
+-    list_add(&newd->inode_list, &inode->dentry_list);
+     newd->inode = inode;
+ 
+ out:
+@@ -717,14 +624,6 @@ __inode_create(inode_table_t *table)
+ {
+     inode_t *newi = NULL;
+ 
+-    if (!table) {
+-        gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0,
+-                         LG_MSG_INODE_TABLE_NOT_FOUND,
+-                         "table not "
+-                         "found");
+-        return NULL;
+-    }
+-
+     newi = mem_get0(table->inode_pool);
+     if (!newi) {
+         goto out;
+@@ -795,9 +694,6 @@ __inode_ref_reduce_by_n(inode_t *inode, uint64_t nref)
+ {
+     uint64_t nlookup = 0;
+ 
+-    if (!inode)
+-        return NULL;
+-
+     GF_ASSERT(inode->ref >= nref);
+ 
+     inode->ref -= nref;
+@@ -837,17 +733,12 @@ inode_forget_atomic(inode_t *inode, uint64_t nlookup)
+ }
+ 
+ dentry_t *
+-__dentry_grep(inode_table_t *table, inode_t *parent, const char *name)
++__dentry_grep(inode_table_t *table, inode_t *parent, const char *name,
++              const int hash)
+ {
+-    int hash = 0;
+     dentry_t *dentry = NULL;
+     dentry_t *tmp = NULL;
+ 
+-    if (!table || !name || !parent)
+-        return NULL;
+-
+-    hash = hash_dentry(parent, name, table->hashsize);
+-
+     list_for_each_entry(tmp, &table->name_hash[hash], hash)
+     {
+         if (tmp->parent == parent && !strcmp(tmp->name, name)) {
+@@ -872,15 +763,16 @@ inode_grep(inode_table_t *table, inode_t *parent, const char *name)
+         return NULL;
+     }
+ 
++    int hash = hash_dentry(parent, name, table->hashsize);
++
+     pthread_mutex_lock(&table->lock);
+     {
+-        dentry = __dentry_grep(table, parent, name);
+-
+-        if (dentry)
++        dentry = __dentry_grep(table, parent, name, hash);
++        if (dentry) {
+             inode = dentry->inode;
+-
+-        if (inode)
+-            __inode_ref(inode, false);
++            if (inode)
++                __inode_ref(inode, false);
++        }
+     }
+     pthread_mutex_unlock(&table->lock);
+ 
+@@ -947,17 +839,18 @@ inode_grep_for_gfid(inode_table_t *table, inode_t *parent, const char *name,
+         return ret;
+     }
+ 
++    int hash = hash_dentry(parent, name, table->hashsize);
++
+     pthread_mutex_lock(&table->lock);
+     {
+-        dentry = __dentry_grep(table, parent, name);
+-
+-        if (dentry)
++        dentry = __dentry_grep(table, parent, name, hash);
++        if (dentry) {
+             inode = dentry->inode;
+-
+-        if (inode) {
+-            gf_uuid_copy(gfid, inode->gfid);
+-            *type = inode->ia_type;
+-            ret = 0;
++            if (inode) {
++                gf_uuid_copy(gfid, inode->gfid);
++                *type = inode->ia_type;
++                ret = 0;
++            }
+         }
+     }
+     pthread_mutex_unlock(&table->lock);
+@@ -978,25 +871,14 @@ __is_root_gfid(uuid_t gfid)
+ }
+ 
+ inode_t *
+-__inode_find(inode_table_t *table, uuid_t gfid)
++__inode_find(inode_table_t *table, uuid_t gfid, const int hash)
+ {
+     inode_t *inode = NULL;
+     inode_t *tmp = NULL;
+-    int hash = 0;
+-
+-    if (!table) {
+-        gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0,
+-                         LG_MSG_INODE_TABLE_NOT_FOUND,
+-                         "table not "
+-                         "found");
+-        goto out;
+-    }
+ 
+     if (__is_root_gfid(gfid))
+         return table->root;
+ 
+-    hash = hash_gfid(gfid, 65536);
+-
+     list_for_each_entry(tmp, &table->inode_hash[hash], hash)
+     {
+         if (gf_uuid_compare(tmp->gfid, gfid) == 0) {
+@@ -1005,7 +887,6 @@ __inode_find(inode_table_t *table, uuid_t gfid)
+         }
+     }
+ 
+-out:
+     return inode;
+ }
+ 
+@@ -1022,9 +903,11 @@ inode_find(inode_table_t *table, uuid_t gfid)
+         return NULL;
+     }
+ 
++    int hash = hash_gfid(gfid, 65536);
++
+     pthread_mutex_lock(&table->lock);
+     {
+-        inode = __inode_find(table, gfid);
++        inode = __inode_find(table, gfid, hash);
+         if (inode)
+             __inode_ref(inode, false);
+     }
+@@ -1035,7 +918,7 @@ inode_find(inode_table_t *table, uuid_t gfid)
+ 
+ static inode_t *
+ __inode_link(inode_t *inode, inode_t *parent, const char *name,
+-             struct iatt *iatt)
++             struct iatt *iatt, const int dhash)
+ {
+     dentry_t *dentry = NULL;
+     dentry_t *old_dentry = NULL;
+@@ -1043,16 +926,7 @@ __inode_link(inode_t *inode, inode_t *parent, const char *name,
+     inode_table_t *table = NULL;
+     inode_t *link_inode = NULL;
+ 
+-    if (!inode) {
+-        errno = EINVAL;
+-        return NULL;
+-    }
+-
+     table = inode->table;
+-    if (!table) {
+-        errno = EINVAL;
+-        return NULL;
+-    }
+ 
+     if (parent) {
+         /* We should prevent inode linking between different
+@@ -1090,14 +964,16 @@ __inode_link(inode_t *inode, inode_t *parent, const char *name,
+             return NULL;
+         }
+ 
+-        old_inode = __inode_find(table, iatt->ia_gfid);
++        int ihash = hash_gfid(iatt->ia_gfid, 65536);
++
++        old_inode = __inode_find(table, iatt->ia_gfid, ihash);
+ 
+         if (old_inode) {
+             link_inode = old_inode;
+         } else {
+             gf_uuid_copy(inode->gfid, iatt->ia_gfid);
+             inode->ia_type = iatt->ia_type;
+-            __inode_hash(inode);
++            __inode_hash(inode, ihash);
+         }
+     } else {
+         /* @old_inode serves another important purpose - it indicates
+@@ -1112,22 +988,16 @@ __inode_link(inode_t *inode, inode_t *parent, const char *name,
+         old_inode = inode;
+     }
+ 
+-    if (name) {
+-        if (!strcmp(name, ".") || !strcmp(name, ".."))
+-            return link_inode;
+-
+-        if (strchr(name, '/')) {
+-            GF_ASSERT(!"inode link attempted with '/' in name");
+-            return NULL;
+-        }
++    if (name && (!strcmp(name, ".") || !strcmp(name, ".."))) {
++        return link_inode;
+     }
+ 
+     /* use only link_inode beyond this point */
+     if (parent) {
+-        old_dentry = __dentry_grep(table, parent, name);
++        old_dentry = __dentry_grep(table, parent, name, dhash);
+ 
+         if (!old_dentry || old_dentry->inode != link_inode) {
+-            dentry = __dentry_create(link_inode, parent, name);
++            dentry = dentry_create(link_inode, parent, name);
+             if (!dentry) {
+                 gf_msg_callingfn(
+                     THIS->name, GF_LOG_ERROR, 0, LG_MSG_DENTRY_CREATE_FAILED,
+@@ -1137,15 +1007,20 @@ __inode_link(inode_t *inode, inode_t *parent, const char *name,
+                 errno = ENOMEM;
+                 return NULL;
+             }
++
++            /* dentry linking needs to happen inside lock */
++            dentry->parent = __inode_ref(parent, false);
++            list_add(&dentry->inode_list, &link_inode->dentry_list);
++
+             if (old_inode && __is_dentry_cyclic(dentry)) {
+                 errno = ELOOP;
+-                __dentry_unset(dentry);
++                dentry_destroy(__dentry_unset(dentry));
+                 return NULL;
+             }
+-            __dentry_hash(dentry);
++            __dentry_hash(dentry, dhash);
+ 
+             if (old_dentry)
+-                __dentry_unset(old_dentry);
++                dentry_destroy(__dentry_unset(old_dentry));
+         }
+     }
+ 
+@@ -1155,6 +1030,7 @@ __inode_link(inode_t *inode, inode_t *parent, const char *name,
+ inode_t *
+ inode_link(inode_t *inode, inode_t *parent, const char *name, struct iatt *iatt)
+ {
++    int hash = 0;
+     inode_table_t *table = NULL;
+     inode_t *linked_inode = NULL;
+ 
+@@ -1166,10 +1042,18 @@ inode_link(inode_t *inode, inode_t *parent, const char *name, struct iatt *iatt)
+ 
+     table = inode->table;
+ 
++    if (parent && name) {
++        hash = hash_dentry(parent, name, table->hashsize);
++    }
++
++    if (name && strchr(name, '/')) {
++        GF_ASSERT(!"inode link attempted with '/' in name");
++        return NULL;
++    }
++
+     pthread_mutex_lock(&table->lock);
+     {
+-        linked_inode = __inode_link(inode, parent, name, iatt);
+-
++        linked_inode = __inode_link(inode, parent, name, iatt, hash);
+         if (linked_inode)
+             __inode_ref(linked_inode, false);
+     }
+@@ -1312,48 +1196,47 @@ inode_invalidate(inode_t *inode)
+     return ret;
+ }
+ 
+-static void
++static dentry_t *
+ __inode_unlink(inode_t *inode, inode_t *parent, const char *name)
+ {
+     dentry_t *dentry = NULL;
+     char pgfid[64] = {0};
+     char gfid[64] = {0};
+ 
+-    if (!inode || !parent || !name)
+-        return;
+-
+     dentry = __dentry_search_for_inode(inode, parent->gfid, name);
+ 
+     /* dentry NULL for corrupted backend */
+     if (dentry) {
+-        __dentry_unset(dentry);
++        dentry = __dentry_unset(dentry);
+     } else {
+         gf_msg("inode", GF_LOG_WARNING, 0, LG_MSG_DENTRY_NOT_FOUND,
+                "%s/%s: dentry not found in %s",
+                uuid_utoa_r(parent->gfid, pgfid), name,
+                uuid_utoa_r(inode->gfid, gfid));
+     }
++
++    return dentry;
+ }
+ 
+ void
+ inode_unlink(inode_t *inode, inode_t *parent, const char *name)
+ {
+-    inode_table_t *table = NULL;
++    inode_table_t *table;
++    dentry_t *dentry;
+ 
+-    if (!inode) {
+-        gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
+-                         "inode not found");
++    if (!inode || !parent || !name)
+         return;
+-    }
+ 
+     table = inode->table;
+ 
+     pthread_mutex_lock(&table->lock);
+     {
+-        __inode_unlink(inode, parent, name);
++        dentry = __inode_unlink(inode, parent, name);
+     }
+     pthread_mutex_unlock(&table->lock);
+ 
++    dentry_destroy(dentry);
++
+     inode_table_prune(table);
+ }
+ 
+@@ -1362,6 +1245,9 @@ inode_rename(inode_table_t *table, inode_t *srcdir, const char *srcname,
+              inode_t *dstdir, const char *dstname, inode_t *inode,
+              struct iatt *iatt)
+ {
++    int hash = 0;
++    dentry_t *dentry = NULL;
++
+     if (!inode) {
+         gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
+                          "inode not found");
+@@ -1370,13 +1256,26 @@ inode_rename(inode_table_t *table, inode_t *srcdir, const char *srcname,
+ 
+     table = inode->table;
+ 
++    if (dstname && strchr(dstname, '/')) {
++        GF_ASSERT(!"inode link attempted with '/' in name");
++        return -1;
++    }
++
++    if (dstdir && dstname) {
++        hash = hash_dentry(dstdir, dstname, table->hashsize);
++    }
++
+     pthread_mutex_lock(&table->lock);
+     {
+-        __inode_link(inode, dstdir, dstname, iatt);
+-        __inode_unlink(inode, srcdir, srcname);
++        __inode_link(inode, dstdir, dstname, iatt, hash);
++        /* pick the old dentry */
++        dentry = __inode_unlink(inode, srcdir, srcname);
+     }
+     pthread_mutex_unlock(&table->lock);
+ 
++    /* free the old dentry */
++    dentry_destroy(dentry);
++
+     inode_table_prune(table);
+ 
+     return 0;
+@@ -1447,12 +1346,6 @@ inode_parent(inode_t *inode, uuid_t pargfid, const char *name)
+ static int
+ __inode_has_dentry(inode_t *inode)
+ {
+-    if (!inode) {
+-        gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
+-                         "inode not found");
+-        return 0;
+-    }
+-
+     return !list_empty(&inode->dentry_list);
+ }
+ 
+@@ -1461,6 +1354,12 @@ inode_has_dentry(inode_t *inode)
+ {
+     int dentry_present = 0;
+ 
++    if (!inode) {
++        gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
++                         "inode not found");
++        return 0;
++    }
++
+     LOCK(&inode->lock);
+     {
+         dentry_present = __inode_has_dentry(inode);
+@@ -1720,7 +1619,7 @@ __inode_table_init_root(inode_table_t *table)
+     iatt.ia_ino = 1;
+     iatt.ia_type = IA_IFDIR;
+ 
+-    __inode_link(root, NULL, NULL, &iatt);
++    __inode_link(root, NULL, NULL, &iatt, 0);
+     table->root = root;
+ }
+ 
+diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym
+index 5a721e0..d060292 100644
+--- a/libglusterfs/src/libglusterfs.sym
++++ b/libglusterfs/src/libglusterfs.sym
+@@ -357,7 +357,6 @@ default_copy_file_range
+ default_copy_file_range_cbk
+ default_copy_file_range_failure_cbk
+ default_copy_file_range_resume
+-__dentry_grep
+ dht_is_linkfile
+ dict_add
+ dict_addn
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0508-fuse-fetch-arbitrary-number-of-groups-from-proc-pid-.patch b/SOURCES/0508-fuse-fetch-arbitrary-number-of-groups-from-proc-pid-.patch
new file mode 100644
index 0000000..9ccc1b5
--- /dev/null
+++ b/SOURCES/0508-fuse-fetch-arbitrary-number-of-groups-from-proc-pid-.patch
@@ -0,0 +1,232 @@
+From 87b7689f7727a542c5afa22bdebd3781dd650a2f Mon Sep 17 00:00:00 2001
+From: Csaba Henk <csaba@redhat.com>
+Date: Fri, 17 Jul 2020 11:33:36 +0200
+Subject: [PATCH 508/511] fuse: fetch arbitrary number of groups from
+ /proc/[pid]/status
+
+Glusterfs so far constrained itself with an arbitrary limit (32)
+for the number of groups read from /proc/[pid]/status (this was
+the number of groups shown there prior to Linux commit
+v3.7-9553-g8d238027b87e (v3.8-rc1~74^2~59); since this commit, all
+groups are shown).
+
+With this change we'll read groups up to the number Glusterfs
+supports in general (64k).
+
+Note: the actual number of groups that are made use of in a
+regular Glusterfs setup shall still be capped at ~93 due to limitations
+of the RPC transport. To be able to handle more groups than that,
+brick side gid resolution (server.manage-gids option) can be used along
+with NIS, LDAP or other such networked directory service (see
+https://github.com/gluster/glusterdocs/blob/5ba15a2/docs/Administrator%20Guide/Handling-of-users-with-many-groups.md#limit-in-the-glusterfs-protocol
+).
+
+Also adding some diagnostic messages to frame_fill_groups().
+
+Upstream:
+> Reviewed-on: https://review.gluster.org/c/glusterfs/+/24721
+> Change-Id: I271f3dc3e6d3c44d6d989c7a2073ea5f16c26ee0
+> fixes: #1075
+> Signed-off-by: Csaba Henk <csaba@redhat.com>
+
+BUG: 1749304
+Change-Id: I80bf99d34087fb95768bf2259d8c4774d9f5d0c5
+Signed-off-by: Csaba Henk <csaba@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/220920
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/glusterfs/stack.h    |  7 ++++
+ tests/bugs/fuse/many-groups-for-acl.t | 13 ++++++-
+ xlators/mount/fuse/src/fuse-helpers.c | 71 +++++++++++++++++++++++------------
+ 3 files changed, 65 insertions(+), 26 deletions(-)
+
+diff --git a/libglusterfs/src/glusterfs/stack.h b/libglusterfs/src/glusterfs/stack.h
+index 1758550..bd466d8 100644
+--- a/libglusterfs/src/glusterfs/stack.h
++++ b/libglusterfs/src/glusterfs/stack.h
+@@ -429,6 +429,7 @@ call_stack_alloc_groups(call_stack_t *stack, int ngrps)
+     if (ngrps <= SMALL_GROUP_COUNT) {
+         stack->groups = stack->groups_small;
+     } else {
++        GF_FREE(stack->groups_large);
+         stack->groups_large = GF_CALLOC(ngrps, sizeof(gid_t),
+                                         gf_common_mt_groups_t);
+         if (!stack->groups_large)
+@@ -442,6 +443,12 @@ call_stack_alloc_groups(call_stack_t *stack, int ngrps)
+ }
+ 
+ static inline int
++call_stack_groups_capacity(call_stack_t *stack)
++{
++    return max(stack->ngrps, SMALL_GROUP_COUNT);
++}
++
++static inline int
+ call_frames_count(call_stack_t *call_stack)
+ {
+     call_frame_t *pos;
+diff --git a/tests/bugs/fuse/many-groups-for-acl.t b/tests/bugs/fuse/many-groups-for-acl.t
+index d959f75..a51b1bc 100755
+--- a/tests/bugs/fuse/many-groups-for-acl.t
++++ b/tests/bugs/fuse/many-groups-for-acl.t
+@@ -38,6 +38,13 @@ do
+ done
+ TEST useradd -o -M -u ${NEW_UID} -g ${NEW_GID} -G ${NEW_USER}-${NEW_GIDS} ${NEW_USER}
+ 
++# Linux < 3.8 exports only first 32 gids of pid to userspace
++kernel_exports_few_gids=0
++if [ "$OSTYPE" = Linux ] && \
++   su -m ${NEW_USER} -c "grep ^Groups: /proc/self/status | wc -w | xargs -I@ expr @ - 1 '<' $LAST_GID - $NEW_GID + 1" > /dev/null; then
++       kernel_exports_few_gids=1
++fi
++
+ # preparation done, start the tests
+ 
+ TEST glusterd
+@@ -48,6 +55,8 @@ TEST $CLI volume set $V0 nfs.disable off
+ TEST $CLI volume set ${V0} server.manage-gids off
+ TEST $CLI volume start ${V0}
+ 
++# This is just a synchronization hack to make sure the bricks are
++# up before going on.
+ EXPECT_WITHIN ${NFS_EXPORT_TIMEOUT} "1" is_nfs_export_available
+ 
+ # mount the volume with POSIX ACL support, without --resolve-gids
+@@ -69,8 +78,8 @@ TEST [ $? -eq 0 ]
+ su -m ${NEW_USER} -c "touch ${M0}/first-32-gids-2/success > /dev/null"
+ TEST [ $? -eq 0 ]
+ 
+-su -m ${NEW_USER} -c "touch ${M0}/gid-64/failure > /dev/null"
+-TEST [ $? -ne 0 ]
++su -m ${NEW_USER} -c "touch ${M0}/gid-64/success--if-all-gids-exported > /dev/null"
++TEST [ $? -eq $kernel_exports_few_gids ]
+ 
+ su -m ${NEW_USER} -c "touch ${M0}/gid-120/failure > /dev/null"
+ TEST [ $? -ne 0 ]
+diff --git a/xlators/mount/fuse/src/fuse-helpers.c b/xlators/mount/fuse/src/fuse-helpers.c
+index 5bfc40c..6e04cd4 100644
+--- a/xlators/mount/fuse/src/fuse-helpers.c
++++ b/xlators/mount/fuse/src/fuse-helpers.c
+@@ -139,8 +139,6 @@ get_fuse_state(xlator_t *this, fuse_in_header_t *finh)
+     return state;
+ }
+ 
+-#define FUSE_MAX_AUX_GROUPS                                                    \
+-    32 /* We can get only up to 32 aux groups from /proc */
+ void
+ frame_fill_groups(call_frame_t *frame)
+ {
+@@ -150,8 +148,6 @@ frame_fill_groups(call_frame_t *frame)
+     char filename[32];
+     char line[4096];
+     char *ptr = NULL;
+-    FILE *fp = NULL;
+-    int idx = 0;
+     long int id = 0;
+     char *saveptr = NULL;
+     char *endptr = NULL;
+@@ -191,45 +187,72 @@ frame_fill_groups(call_frame_t *frame)
+ 
+         call_stack_set_groups(frame->root, ngroups, &mygroups);
+     } else {
++        FILE *fp = NULL;
++
+         ret = snprintf(filename, sizeof filename, "/proc/%d/status",
+                        frame->root->pid);
+-        if (ret >= sizeof filename)
++        if (ret >= sizeof filename) {
++            gf_log(this->name, GF_LOG_ERROR, "procfs path exceeds buffer size");
+             goto out;
++        }
+ 
+         fp = fopen(filename, "r");
+-        if (!fp)
++        if (!fp) {
++            gf_log(this->name, GF_LOG_ERROR, "failed to open %s: %s", filename,
++                   strerror(errno));
+             goto out;
++        }
+ 
+-        if (call_stack_alloc_groups(frame->root, ngroups) != 0)
+-            goto out;
++        for (;;) {
++            gf_boolean_t found_groups = _gf_false;
++            int idx = 0;
+ 
+-        while ((ptr = fgets(line, sizeof line, fp))) {
+-            if (strncmp(ptr, "Groups:", 7) != 0)
+-                continue;
++            if (call_stack_alloc_groups(frame->root, ngroups) != 0) {
++                gf_log(this->name, GF_LOG_ERROR,
++                       "failed to allocate gid buffer");
++                goto out;
++            }
+ 
++            while ((ptr = fgets(line, sizeof line, fp))) {
++                if (strncmp(ptr, "Groups:", 7) == 0) {
++                    found_groups = _gf_true;
++                    break;
++                }
++            }
++            if (!found_groups) {
++                gf_log(this->name, GF_LOG_ERROR, "cannot find gid list in %s",
++                       filename);
++                break;
++            }
+             ptr = line + 8;
+ 
+             for (ptr = strtok_r(ptr, " \t\r\n", &saveptr); ptr;
+                  ptr = strtok_r(NULL, " \t\r\n", &saveptr)) {
+                 errno = 0;
+                 id = strtol(ptr, &endptr, 0);
+-                if (errno == ERANGE)
+-                    break;
+-                if (!endptr || *endptr)
++                if (errno == ERANGE || !endptr || *endptr) {
++                    gf_log(this->name, GF_LOG_ERROR, "failed to parse %s",
++                           filename);
+                     break;
+-                frame->root->groups[idx++] = id;
+-                if (idx == FUSE_MAX_AUX_GROUPS)
++                }
++                if (idx < call_stack_groups_capacity(frame->root))
++                    frame->root->groups[idx] = id;
++                idx++;
++                if (idx == GF_MAX_AUX_GROUPS)
+                     break;
+             }
+-
+-            frame->root->ngrps = idx;
+-            break;
++            if (idx > call_stack_groups_capacity(frame->root)) {
++                ngroups = idx;
++                rewind(fp);
++            } else {
++                frame->root->ngrps = idx;
++                break;
++            }
+         }
++    out:
++        if (fp)
++            fclose(fp);
+     }
+-
+-out:
+-    if (fp)
+-        fclose(fp);
+ #elif defined(GF_SOLARIS_HOST_OS)
+     char filename[32];
+     char scratch[128];
+@@ -245,7 +268,7 @@ out:
+         fp = fopen(filename, "r");
+         if (fp != NULL) {
+             if (fgets(scratch, sizeof scratch, fp) != NULL) {
+-                ngrps = MIN(prcred->pr_ngroups, FUSE_MAX_AUX_GROUPS);
++                ngrps = MIN(prcred->pr_ngroups, GF_MAX_AUX_GROUPS);
+                 if (call_stack_alloc_groups(frame->root, ngrps) != 0) {
+                     fclose(fp);
+                     return;
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0509-core-configure-optimum-inode-table-hash_size-for-shd.patch b/SOURCES/0509-core-configure-optimum-inode-table-hash_size-for-shd.patch
new file mode 100644
index 0000000..fdfc9bb
--- /dev/null
+++ b/SOURCES/0509-core-configure-optimum-inode-table-hash_size-for-shd.patch
@@ -0,0 +1,407 @@
+From a18f03cbf2b5652f8617cb4dd236bb4ca9838d96 Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <moagrawa@redhat.com>
+Date: Tue, 6 Oct 2020 16:54:15 +0530
+Subject: [PATCH 509/511] core: configure optimum inode table hash_size for shd
+
+In brick_mux environment a shd process consume high memory.
+After print the statedump i have found it allocates 1M per afr xlator
+for all bricks.In case of configure 4k volumes it consumes almost total
+6G RSS size in which 4G consumes by inode_tables
+
+[cluster/replicate.test1-replicate-0 - usage-type gf_common_mt_list_head memusage]
+size=1273488
+num_allocs=2
+max_size=1273488
+max_num_allocs=2
+total_allocs=2
+
+inode_new_table function allocates memory(1M) for a list of inode and dentry hash.
+For shd lru_limit size is 1 so we don't need to create a big hash table so to reduce
+RSS size for shd process pass optimum bucket count at the time of creating inode_table.
+
+> Change-Id: I039716d42321a232fdee1ee8fd50295e638715bb
+> Fixes: #1538
+> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+> (Cherry pick from commit ca6bbc486e76fdb9a8e07119bb10d7fa45b2e93b)
+> (Reviewed on upstream link https://github.com/gluster/glusterfs/issues/1538)
+
+Change-Id: I039716d42321a232fdee1ee8fd50295e638715bb
+BUG: 1898777
+Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/221191
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ api/src/glfs-master.c                          |  2 +-
+ libglusterfs/src/glusterfs/inode.h             | 17 +++++----
+ libglusterfs/src/inode.c                       | 53 +++++++++++++++++---------
+ xlators/cluster/afr/src/afr.c                  | 10 ++++-
+ xlators/cluster/dht/src/dht-rebalance.c        |  3 +-
+ xlators/cluster/ec/src/ec.c                    |  2 +-
+ xlators/features/bit-rot/src/bitd/bit-rot.c    |  2 +-
+ xlators/features/quota/src/quotad-helpers.c    |  2 +-
+ xlators/features/trash/src/trash.c             |  4 +-
+ xlators/mount/fuse/src/fuse-bridge.c           |  6 +--
+ xlators/nfs/server/src/nfs.c                   |  2 +-
+ xlators/protocol/server/src/server-handshake.c |  3 +-
+ 12 files changed, 66 insertions(+), 40 deletions(-)
+
+diff --git a/api/src/glfs-master.c b/api/src/glfs-master.c
+index b4473b1..9e604d3 100644
+--- a/api/src/glfs-master.c
++++ b/api/src/glfs-master.c
+@@ -45,7 +45,7 @@ graph_setup(struct glfs *fs, glusterfs_graph_t *graph)
+         }
+ 
+         if (!new_subvol->itable) {
+-            itable = inode_table_new(131072, new_subvol);
++            itable = inode_table_new(131072, new_subvol, 0, 0);
+             if (!itable) {
+                 errno = ENOMEM;
+                 ret = -1;
+diff --git a/libglusterfs/src/glusterfs/inode.h b/libglusterfs/src/glusterfs/inode.h
+index c875653..62c093d 100644
+--- a/libglusterfs/src/glusterfs/inode.h
++++ b/libglusterfs/src/glusterfs/inode.h
+@@ -35,11 +35,12 @@ typedef struct _dentry dentry_t;
+ 
+ struct _inode_table {
+     pthread_mutex_t lock;
+-    size_t hashsize;    /* bucket size of inode hash and dentry hash */
+-    char *name;         /* name of the inode table, just for gf_log() */
+-    inode_t *root;      /* root directory inode, with number 1 */
+-    xlator_t *xl;       /* xlator to be called to do purge */
+-    uint32_t lru_limit; /* maximum LRU cache size */
++    size_t dentry_hashsize; /* Number of buckets for dentry hash*/
++    size_t inode_hashsize;  /* Size of inode hash table */
++    char *name;             /* name of the inode table, just for gf_log() */
++    inode_t *root;          /* root directory inode, with number 1 */
++    xlator_t *xl;           /* xlator to be called to do purge */
++    uint32_t lru_limit;     /* maximum LRU cache size */
+     struct list_head *inode_hash; /* buckets for inode hash table */
+     struct list_head *name_hash;  /* buckets for dentry hash table */
+     struct list_head active; /* list of inodes currently active (in an fop) */
+@@ -116,12 +117,14 @@ struct _inode {
+ #define GFID_STR_PFX_LEN (sizeof(GFID_STR_PFX) - 1)
+ 
+ inode_table_t *
+-inode_table_new(uint32_t lru_limit, xlator_t *xl);
++inode_table_new(uint32_t lru_limit, xlator_t *xl, uint32_t dhash_size,
++                uint32_t inodehash_size);
+ 
+ inode_table_t *
+ inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl,
+                              int32_t (*invalidator_fn)(xlator_t *, inode_t *),
+-                             xlator_t *invalidator_xl);
++                             xlator_t *invalidator_xl, uint32_t dentry_hashsize,
++                             uint32_t inode_hashsize);
+ 
+ void
+ inode_table_destroy_all(glusterfs_ctx_t *ctx);
+diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c
+index 71b2d2a..98f8ea6 100644
+--- a/libglusterfs/src/inode.c
++++ b/libglusterfs/src/inode.c
+@@ -763,7 +763,7 @@ inode_grep(inode_table_t *table, inode_t *parent, const char *name)
+         return NULL;
+     }
+ 
+-    int hash = hash_dentry(parent, name, table->hashsize);
++    int hash = hash_dentry(parent, name, table->dentry_hashsize);
+ 
+     pthread_mutex_lock(&table->lock);
+     {
+@@ -839,7 +839,7 @@ inode_grep_for_gfid(inode_table_t *table, inode_t *parent, const char *name,
+         return ret;
+     }
+ 
+-    int hash = hash_dentry(parent, name, table->hashsize);
++    int hash = hash_dentry(parent, name, table->dentry_hashsize);
+ 
+     pthread_mutex_lock(&table->lock);
+     {
+@@ -903,7 +903,7 @@ inode_find(inode_table_t *table, uuid_t gfid)
+         return NULL;
+     }
+ 
+-    int hash = hash_gfid(gfid, 65536);
++    int hash = hash_gfid(gfid, table->inode_hashsize);
+ 
+     pthread_mutex_lock(&table->lock);
+     {
+@@ -964,7 +964,7 @@ __inode_link(inode_t *inode, inode_t *parent, const char *name,
+             return NULL;
+         }
+ 
+-        int ihash = hash_gfid(iatt->ia_gfid, 65536);
++        int ihash = hash_gfid(iatt->ia_gfid, table->inode_hashsize);
+ 
+         old_inode = __inode_find(table, iatt->ia_gfid, ihash);
+ 
+@@ -1043,7 +1043,7 @@ inode_link(inode_t *inode, inode_t *parent, const char *name, struct iatt *iatt)
+     table = inode->table;
+ 
+     if (parent && name) {
+-        hash = hash_dentry(parent, name, table->hashsize);
++        hash = hash_dentry(parent, name, table->dentry_hashsize);
+     }
+ 
+     if (name && strchr(name, '/')) {
+@@ -1262,7 +1262,7 @@ inode_rename(inode_table_t *table, inode_t *srcdir, const char *srcname,
+     }
+ 
+     if (dstdir && dstname) {
+-        hash = hash_dentry(dstdir, dstname, table->hashsize);
++        hash = hash_dentry(dstdir, dstname, table->dentry_hashsize);
+     }
+ 
+     pthread_mutex_lock(&table->lock);
+@@ -1626,7 +1626,8 @@ __inode_table_init_root(inode_table_t *table)
+ inode_table_t *
+ inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl,
+                              int32_t (*invalidator_fn)(xlator_t *, inode_t *),
+-                             xlator_t *invalidator_xl)
++                             xlator_t *invalidator_xl, uint32_t dentry_hashsize,
++                             uint32_t inode_hashsize)
+ {
+     inode_table_t *new = NULL;
+     uint32_t mem_pool_size = lru_limit;
+@@ -1644,7 +1645,19 @@ inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl,
+     new->invalidator_fn = invalidator_fn;
+     new->invalidator_xl = invalidator_xl;
+ 
+-    new->hashsize = 14057; /* TODO: Random Number?? */
++    if (dentry_hashsize == 0) {
++        /* Prime number for uniform distribution */
++        new->dentry_hashsize = 14057;
++    } else {
++        new->dentry_hashsize = dentry_hashsize;
++    }
++
++    if (inode_hashsize == 0) {
++        /* The size of hash table always should be power of 2 */
++        new->inode_hashsize = 65536;
++    } else {
++        new->inode_hashsize = inode_hashsize;
++    }
+ 
+     /* In case FUSE is initing the inode table. */
+     if (!mem_pool_size || (mem_pool_size > DEFAULT_INODE_MEMPOOL_ENTRIES))
+@@ -1658,13 +1671,13 @@ inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl,
+     if (!new->dentry_pool)
+         goto out;
+ 
+-    new->inode_hash = (void *)GF_CALLOC(65536, sizeof(struct list_head),
+-                                        gf_common_mt_list_head);
++    new->inode_hash = (void *)GF_CALLOC(
++        new->inode_hashsize, sizeof(struct list_head), gf_common_mt_list_head);
+     if (!new->inode_hash)
+         goto out;
+ 
+-    new->name_hash = (void *)GF_CALLOC(new->hashsize, sizeof(struct list_head),
+-                                       gf_common_mt_list_head);
++    new->name_hash = (void *)GF_CALLOC(
++        new->dentry_hashsize, sizeof(struct list_head), gf_common_mt_list_head);
+     if (!new->name_hash)
+         goto out;
+ 
+@@ -1675,11 +1688,11 @@ inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl,
+     if (!new->fd_mem_pool)
+         goto out;
+ 
+-    for (i = 0; i < 65536; i++) {
++    for (i = 0; i < new->inode_hashsize; i++) {
+         INIT_LIST_HEAD(&new->inode_hash[i]);
+     }
+ 
+-    for (i = 0; i < new->hashsize; i++) {
++    for (i = 0; i < new->dentry_hashsize; i++) {
+         INIT_LIST_HEAD(&new->name_hash[i]);
+     }
+ 
+@@ -1717,10 +1730,12 @@ out:
+ }
+ 
+ inode_table_t *
+-inode_table_new(uint32_t lru_limit, xlator_t *xl)
++inode_table_new(uint32_t lru_limit, xlator_t *xl, uint32_t dentry_hashsize,
++                uint32_t inode_hashsize)
+ {
+     /* Only fuse for now requires the inode table with invalidator */
+-    return inode_table_with_invalidator(lru_limit, xl, NULL, NULL);
++    return inode_table_with_invalidator(lru_limit, xl, NULL, NULL,
++                                        dentry_hashsize, inode_hashsize);
+ }
+ 
+ int
+@@ -2439,8 +2454,10 @@ inode_table_dump(inode_table_t *itable, char *prefix)
+         return;
+     }
+ 
+-    gf_proc_dump_build_key(key, prefix, "hashsize");
+-    gf_proc_dump_write(key, "%" GF_PRI_SIZET, itable->hashsize);
++    gf_proc_dump_build_key(key, prefix, "dentry_hashsize");
++    gf_proc_dump_write(key, "%" GF_PRI_SIZET, itable->dentry_hashsize);
++    gf_proc_dump_build_key(key, prefix, "inode_hashsize");
++    gf_proc_dump_write(key, "%" GF_PRI_SIZET, itable->inode_hashsize);
+     gf_proc_dump_build_key(key, prefix, "name");
+     gf_proc_dump_write(key, "%s", itable->name);
+ 
+diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
+index 8f9e71f..bfa464f 100644
+--- a/xlators/cluster/afr/src/afr.c
++++ b/xlators/cluster/afr/src/afr.c
+@@ -594,7 +594,15 @@ init(xlator_t *this)
+         goto out;
+     }
+ 
+-    this->itable = inode_table_new(SHD_INODE_LRU_LIMIT, this);
++    if (priv->shd.iamshd) {
++        /* Number of hash bucket should be prime number so declare 131
++           total dentry hash buckets
++        */
++        this->itable = inode_table_new(SHD_INODE_LRU_LIMIT, this, 131, 128);
++    } else {
++        this->itable = inode_table_new(SHD_INODE_LRU_LIMIT, this, 0, 0);
++    }
++
+     if (!this->itable) {
+         ret = -ENOMEM;
+         goto out;
+diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
+index 16ac16c..072896d 100644
+--- a/xlators/cluster/dht/src/dht-rebalance.c
++++ b/xlators/cluster/dht/src/dht-rebalance.c
+@@ -1168,7 +1168,6 @@ __dht_rebalance_migrate_data(xlator_t *this, gf_defrag_info_t *defrag,
+             break;
+         }
+ 
+-
+         offset += ret;
+         total += ret;
+ 
+@@ -2467,7 +2466,7 @@ dht_build_root_inode(xlator_t *this, inode_t **inode)
+         0,
+     };
+ 
+-    itable = inode_table_new(0, this);
++    itable = inode_table_new(0, this, 0, 0);
+     if (!itable)
+         return;
+ 
+diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
+index 3f31c74..4118c3b 100644
+--- a/xlators/cluster/ec/src/ec.c
++++ b/xlators/cluster/ec/src/ec.c
+@@ -734,7 +734,7 @@ init(xlator_t *this)
+     GF_OPTION_INIT("stripe-cache", ec->stripe_cache, uint32, failed);
+     GF_OPTION_INIT("quorum-count", ec->quorum_count, uint32, failed);
+ 
+-    this->itable = inode_table_new(EC_SHD_INODE_LRU_LIMIT, this);
++    this->itable = inode_table_new(EC_SHD_INODE_LRU_LIMIT, this, 0, 0);
+     if (!this->itable)
+         goto failed;
+ 
+diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c
+index 424c0d5..4e0e798 100644
+--- a/xlators/features/bit-rot/src/bitd/bit-rot.c
++++ b/xlators/features/bit-rot/src/bitd/bit-rot.c
+@@ -1658,7 +1658,7 @@ notify(xlator_t *this, int32_t event, void *data, ...)
+                 child->child_up = 1;
+                 child->xl = subvol;
+                 if (!child->table)
+-                    child->table = inode_table_new(4096, subvol);
++                    child->table = inode_table_new(4096, subvol, 0, 0);
+ 
+                 _br_qchild_event(this, child, br_brick_connect);
+                 pthread_cond_signal(&priv->cond);
+diff --git a/xlators/features/quota/src/quotad-helpers.c b/xlators/features/quota/src/quotad-helpers.c
+index d9f0351..46ac116 100644
+--- a/xlators/features/quota/src/quotad-helpers.c
++++ b/xlators/features/quota/src/quotad-helpers.c
+@@ -32,7 +32,7 @@ get_quotad_aggregator_state(xlator_t *this, rpcsvc_request_t *req)
+     UNLOCK(&priv->lock);
+ 
+     if (active_subvol->itable == NULL)
+-        active_subvol->itable = inode_table_new(4096, active_subvol);
++        active_subvol->itable = inode_table_new(4096, active_subvol, 0, 0);
+ 
+     state->itable = active_subvol->itable;
+ 
+diff --git a/xlators/features/trash/src/trash.c b/xlators/features/trash/src/trash.c
+index 93f020f..099c887 100644
+--- a/xlators/features/trash/src/trash.c
++++ b/xlators/features/trash/src/trash.c
+@@ -2261,7 +2261,7 @@ reconfigure(xlator_t *this, dict_t *options)
+ 
+     if (!active_earlier && active_now) {
+         if (!priv->trash_itable) {
+-            priv->trash_itable = inode_table_new(0, this);
++            priv->trash_itable = inode_table_new(0, this, 0, 0);
+             if (!priv->trash_itable) {
+                 ret = -ENOMEM;
+                 gf_log(this->name, GF_LOG_ERROR,
+@@ -2533,7 +2533,7 @@ init(xlator_t *this)
+     }
+ 
+     if (priv->state) {
+-        priv->trash_itable = inode_table_new(0, this);
++        priv->trash_itable = inode_table_new(0, this, 0, 0);
+         if (!priv->trash_itable) {
+             ret = -ENOMEM;
+             priv->state = _gf_false;
+diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
+index 1bddac2..919eea3 100644
+--- a/xlators/mount/fuse/src/fuse-bridge.c
++++ b/xlators/mount/fuse/src/fuse-bridge.c
+@@ -6298,10 +6298,10 @@ fuse_graph_setup(xlator_t *this, glusterfs_graph_t *graph)
+         }
+ 
+ #if FUSE_KERNEL_MINOR_VERSION >= 11
+-        itable = inode_table_with_invalidator(priv->lru_limit, graph->top,
+-                                              fuse_inode_invalidate_fn, this);
++        itable = inode_table_with_invalidator(
++            priv->lru_limit, graph->top, fuse_inode_invalidate_fn, this, 0, 0);
+ #else
+-        itable = inode_table_new(0, graph->top);
++        itable = inode_table_new(0, graph->top, 0, 0);
+ #endif
+         if (!itable) {
+             ret = -1;
+diff --git a/xlators/nfs/server/src/nfs.c b/xlators/nfs/server/src/nfs.c
+index ebded41..402be30 100644
+--- a/xlators/nfs/server/src/nfs.c
++++ b/xlators/nfs/server/src/nfs.c
+@@ -564,7 +564,7 @@ nfs_init_subvolume(struct nfs_state *nfs, xlator_t *xl)
+         return -1;
+ 
+     lrusize = nfs->memfactor * GF_NFS_INODE_LRU_MULT;
+-    xl->itable = inode_table_new(lrusize, xl);
++    xl->itable = inode_table_new(lrusize, xl, 0, 0);
+     if (!xl->itable) {
+         gf_msg(GF_NFS, GF_LOG_CRITICAL, ENOMEM, NFS_MSG_NO_MEMORY,
+                "Failed to allocate inode table");
+diff --git a/xlators/protocol/server/src/server-handshake.c b/xlators/protocol/server/src/server-handshake.c
+index 1d1177d..eeca73c 100644
+--- a/xlators/protocol/server/src/server-handshake.c
++++ b/xlators/protocol/server/src/server-handshake.c
+@@ -36,7 +36,6 @@ gf_compare_client_version(rpcsvc_request_t *req, int fop_prognum,
+     return ret;
+ }
+ 
+-
+ int
+ server_getspec(rpcsvc_request_t *req)
+ {
+@@ -629,7 +628,7 @@ server_setvolume(rpcsvc_request_t *req)
+ 
+             /* TODO: what is this ? */
+             client->bound_xl->itable = inode_table_new(conf->inode_lru_limit,
+-                                                       client->bound_xl);
++                                                       client->bound_xl, 0, 0);
+         }
+     }
+     UNLOCK(&conf->itable_lock);
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0510-glusterd-brick_mux-Optimize-friend-handshake-code-to.patch b/SOURCES/0510-glusterd-brick_mux-Optimize-friend-handshake-code-to.patch
new file mode 100644
index 0000000..e8a4906
--- /dev/null
+++ b/SOURCES/0510-glusterd-brick_mux-Optimize-friend-handshake-code-to.patch
@@ -0,0 +1,784 @@
+From 5294c82e0528059b10cbaab7805b20e76ffdd66b Mon Sep 17 00:00:00 2001
+From: mohit84 <moagrawa@redhat.com>
+Date: Mon, 30 Nov 2020 17:39:53 +0530
+Subject: [PATCH 510/511] glusterd[brick_mux]: Optimize friend handshake code
+ to avoid call_bail (#1614)
+
+During glusterd handshake glusterd received a volume dictionary
+from peer end to compare the own volume dictionary data.If the options
+are differ it sets the key to recognize volume options are changed
+and call import syntask to delete/start the volume.In brick_mux
+environment while number of volumes are high(5k) the dict api in function
+glusterd_compare_friend_volume takes time because the function
+glusterd_handle_friend_req saves all peer volume data in a single dictionary.
+Due to time taken by the function glusterd_handle_friend RPC requests receives
+a call_bail from a peer end gluster(CLI) won't be able to show volume status.
+
+Solution: To optimize the code done below changes
+1) Populate a new specific dictionary to save the peer end version specific
+   data so that function won't take much time to take the decision about the
+   peer end has some volume updates.
+2) In case of volume has differ version set the key in status_arr instead
+   of saving in a dictionary to make the operation is faster.
+
+Note: To validate the changes followed below procedure
+1) Setup 5100 distributed volumes 3x1
+2) Enable brick_mux
+3) Start all the volumes
+4) Kill all gluster processes on 3rd node
+5) Run a loop to update volume option on a 1st node
+   for i in {1..5100}; do gluster v set vol$i performance.open-behind off; done
+6) Start the glusterd process on the 3rd node
+7) Wait to finish handshake and check there should not be any call_bail message
+   in the logs
+
+> Change-Id: Ibad7c23988539cc369ecc39dea2ea6985470bee1
+> Fixes: #1613
+> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+> (Cherry pick from commit 12545d91eed27ff9abb0505a12c7d4e75b45a53e)
+> (Reviewed on upstream link https://github.com/gluster/glusterfs/issues/1613)
+
+Change-Id: Ibad7c23988539cc369ecc39dea2ea6985470bee1
+BUG: 1898784
+Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/221193
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/ctx.c                       |   4 +
+ libglusterfs/src/dict.c                      | 166 ++++++++++++++++++++++++++-
+ libglusterfs/src/globals.c                   |   2 -
+ libglusterfs/src/glusterfs/dict.h            |   5 +
+ libglusterfs/src/glusterfs/globals.h         |   2 +
+ libglusterfs/src/libglusterfs.sym            |   1 +
+ xlators/mgmt/glusterd/src/glusterd-handler.c |  39 ++++---
+ xlators/mgmt/glusterd/src/glusterd-sm.c      |   6 +-
+ xlators/mgmt/glusterd/src/glusterd-sm.h      |   1 +
+ xlators/mgmt/glusterd/src/glusterd-utils.c   | 148 ++++++++++++++----------
+ xlators/mgmt/glusterd/src/glusterd-utils.h   |   2 +-
+ xlators/mgmt/glusterd/src/glusterd.h         |   8 +-
+ 12 files changed, 301 insertions(+), 83 deletions(-)
+
+diff --git a/libglusterfs/src/ctx.c b/libglusterfs/src/ctx.c
+index 4a001c2..ae1a77a 100644
+--- a/libglusterfs/src/ctx.c
++++ b/libglusterfs/src/ctx.c
+@@ -14,6 +14,7 @@
+ #include "glusterfs/glusterfs.h"
+ #include "timer-wheel.h"
+ 
++glusterfs_ctx_t *global_ctx = NULL;
+ glusterfs_ctx_t *
+ glusterfs_ctx_new()
+ {
+@@ -51,6 +52,9 @@ glusterfs_ctx_new()
+     GF_ATOMIC_INIT(ctx->stats.max_dict_pairs, 0);
+     GF_ATOMIC_INIT(ctx->stats.total_pairs_used, 0);
+     GF_ATOMIC_INIT(ctx->stats.total_dicts_used, 0);
++
++    if (!global_ctx)
++        global_ctx = ctx;
+ out:
+     return ctx;
+ }
+diff --git a/libglusterfs/src/dict.c b/libglusterfs/src/dict.c
+index d8cdda4..e5f619c 100644
+--- a/libglusterfs/src/dict.c
++++ b/libglusterfs/src/dict.c
+@@ -56,7 +56,13 @@ struct dict_cmp {
+ static data_t *
+ get_new_data()
+ {
+-    data_t *data = mem_get(THIS->ctx->dict_data_pool);
++    data_t *data = NULL;
++
++    if (global_ctx) {
++        data = mem_get(global_ctx->dict_data_pool);
++    } else {
++        data = mem_get(THIS->ctx->dict_data_pool);
++    }
+ 
+     if (!data)
+         return NULL;
+@@ -3503,3 +3509,161 @@ unlock:
+     UNLOCK(&dict->lock);
+     return 0;
+ }
++
++/* Popluate specific dictionary on the basis of passed key array at the
++   time of unserialize buffer
++*/
++int32_t
++dict_unserialize_specific_keys(char *orig_buf, int32_t size, dict_t **fill,
++                               char **suffix_key_arr, dict_t **specific_dict,
++                               int totkeycount)
++{
++    char *buf = orig_buf;
++    int ret = -1;
++    int32_t count = 0;
++    int i = 0;
++    int j = 0;
++
++    data_t *value = NULL;
++    char *key = NULL;
++    int32_t keylen = 0;
++    int32_t vallen = 0;
++    int32_t hostord = 0;
++    xlator_t *this = NULL;
++    int32_t keylenarr[totkeycount];
++
++    this = THIS;
++    GF_ASSERT(this);
++
++    if (!buf) {
++        gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
++                         "buf is null!");
++        goto out;
++    }
++
++    if (size == 0) {
++        gf_msg_callingfn("dict", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
++                         "size is 0!");
++        goto out;
++    }
++
++    if (!fill) {
++        gf_msg_callingfn("dict", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
++                         "fill is null!");
++        goto out;
++    }
++
++    if (!*fill) {
++        gf_msg_callingfn("dict", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
++                         "*fill is null!");
++        goto out;
++    }
++
++    if ((buf + DICT_HDR_LEN) > (orig_buf + size)) {
++        gf_msg_callingfn("dict", GF_LOG_ERROR, 0, LG_MSG_UNDERSIZED_BUF,
++                         "undersized buffer "
++                         "passed. available (%lu) < required (%lu)",
++                         (long)(orig_buf + size), (long)(buf + DICT_HDR_LEN));
++        goto out;
++    }
++
++    memcpy(&hostord, buf, sizeof(hostord));
++    count = ntoh32(hostord);
++    buf += DICT_HDR_LEN;
++
++    if (count < 0) {
++        gf_smsg("dict", GF_LOG_ERROR, 0, LG_MSG_COUNT_LESS_THAN_ZERO,
++                "count=%d", count, NULL);
++        goto out;
++    }
++
++    /* Compute specific key length and save in array */
++    for (i = 0; i < totkeycount; i++) {
++        keylenarr[i] = strlen(suffix_key_arr[i]);
++    }
++
++    for (i = 0; i < count; i++) {
++        if ((buf + DICT_DATA_HDR_KEY_LEN) > (orig_buf + size)) {
++            gf_msg_callingfn("dict", GF_LOG_ERROR, 0, LG_MSG_UNDERSIZED_BUF,
++                             "undersized "
++                             "buffer passed. available (%lu) < "
++                             "required (%lu)",
++                             (long)(orig_buf + size),
++                             (long)(buf + DICT_DATA_HDR_KEY_LEN));
++            goto out;
++        }
++        memcpy(&hostord, buf, sizeof(hostord));
++        keylen = ntoh32(hostord);
++        buf += DICT_DATA_HDR_KEY_LEN;
++
++        if ((buf + DICT_DATA_HDR_VAL_LEN) > (orig_buf + size)) {
++            gf_msg_callingfn("dict", GF_LOG_ERROR, 0, LG_MSG_UNDERSIZED_BUF,
++                             "undersized "
++                             "buffer passed. available (%lu) < "
++                             "required (%lu)",
++                             (long)(orig_buf + size),
++                             (long)(buf + DICT_DATA_HDR_VAL_LEN));
++            goto out;
++        }
++        memcpy(&hostord, buf, sizeof(hostord));
++        vallen = ntoh32(hostord);
++        buf += DICT_DATA_HDR_VAL_LEN;
++
++        if ((keylen < 0) || (vallen < 0)) {
++            gf_msg_callingfn("dict", GF_LOG_ERROR, 0, LG_MSG_UNDERSIZED_BUF,
++                             "undersized length passed "
++                             "key:%d val:%d",
++                             keylen, vallen);
++            goto out;
++        }
++        if ((buf + keylen) > (orig_buf + size)) {
++            gf_msg_callingfn("dict", GF_LOG_ERROR, 0, LG_MSG_UNDERSIZED_BUF,
++                             "undersized buffer passed. "
++                             "available (%lu) < required (%lu)",
++                             (long)(orig_buf + size), (long)(buf + keylen));
++            goto out;
++        }
++        key = buf;
++        buf += keylen + 1; /* for '\0' */
++
++        if ((buf + vallen) > (orig_buf + size)) {
++            gf_msg_callingfn("dict", GF_LOG_ERROR, 0, LG_MSG_UNDERSIZED_BUF,
++                             "undersized buffer passed. "
++                             "available (%lu) < required (%lu)",
++                             (long)(orig_buf + size), (long)(buf + vallen));
++            goto out;
++        }
++        value = get_new_data();
++
++        if (!value) {
++            ret = -1;
++            goto out;
++        }
++        value->len = vallen;
++        value->data = gf_memdup(buf, vallen);
++        value->data_type = GF_DATA_TYPE_STR_OLD;
++        value->is_static = _gf_false;
++        buf += vallen;
++
++        ret = dict_addn(*fill, key, keylen, value);
++        if (ret < 0) {
++            data_destroy(value);
++            goto out;
++        }
++        for (j = 0; j < totkeycount; j++) {
++            if (keylen > keylenarr[j]) {
++                if (!strcmp(key + keylen - keylenarr[j], suffix_key_arr[j])) {
++                    ret = dict_addn(*specific_dict, key, keylen, value);
++                    break;
++                }
++            }
++        }
++
++        if (ret < 0)
++            goto out;
++    }
++
++    ret = 0;
++out:
++    return ret;
++}
+diff --git a/libglusterfs/src/globals.c b/libglusterfs/src/globals.c
+index e433ee8..30c15b6 100644
+--- a/libglusterfs/src/globals.c
++++ b/libglusterfs/src/globals.c
+@@ -96,7 +96,6 @@ const char *gf_upcall_list[GF_UPCALL_FLAGS_MAXVALUE] = {
+ /* This global ctx is a bad hack to prevent some of the libgfapi crashes.
+  * This should be removed once the patch on resource pool is accepted
+  */
+-glusterfs_ctx_t *global_ctx = NULL;
+ pthread_mutex_t global_ctx_mutex = PTHREAD_MUTEX_INITIALIZER;
+ xlator_t global_xlator;
+ static int gf_global_mem_acct_enable = 1;
+@@ -236,7 +235,6 @@ __glusterfs_this_location()
+     if (*this_location == NULL) {
+         thread_xlator = &global_xlator;
+     }
+-
+     return this_location;
+ }
+ 
+diff --git a/libglusterfs/src/glusterfs/dict.h b/libglusterfs/src/glusterfs/dict.h
+index 8239c7a..6e469c7 100644
+--- a/libglusterfs/src/glusterfs/dict.h
++++ b/libglusterfs/src/glusterfs/dict.h
+@@ -423,4 +423,9 @@ dict_has_key_from_array(dict_t *dict, char **strings, gf_boolean_t *result);
+ 
+ int
+ dict_serialized_length_lk(dict_t *this);
++
++int32_t
++dict_unserialize_specific_keys(char *orig_buf, int32_t size, dict_t **fill,
++                               char **specific_key_arr, dict_t **specific_dict,
++                               int totkeycount);
+ #endif
+diff --git a/libglusterfs/src/glusterfs/globals.h b/libglusterfs/src/glusterfs/globals.h
+index cc145cd..33fb023 100644
+--- a/libglusterfs/src/glusterfs/globals.h
++++ b/libglusterfs/src/glusterfs/globals.h
+@@ -199,4 +199,6 @@ int
+ gf_global_mem_acct_enable_get(void);
+ int
+ gf_global_mem_acct_enable_set(int val);
++
++extern glusterfs_ctx_t *global_ctx;
+ #endif /* !_GLOBALS_H */
+diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym
+index d060292..bc770e2 100644
+--- a/libglusterfs/src/libglusterfs.sym
++++ b/libglusterfs/src/libglusterfs.sym
+@@ -436,6 +436,7 @@ dict_clear_flag
+ dict_check_flag
+ dict_unref
+ dict_unserialize
++dict_unserialize_specific_keys
+ drop_token
+ eh_destroy
+ eh_dump
+diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c
+index b8799ab..908361c 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-handler.c
++++ b/xlators/mgmt/glusterd/src/glusterd-handler.c
+@@ -86,6 +86,9 @@ glusterd_big_locked_handler(rpcsvc_request_t *req, rpcsvc_actor actor_fn)
+     return ret;
+ }
+ 
++static char *specific_key_suffix[] = {".quota-cksum", ".ckusm", ".version",
++                                      ".quota-version", ".name"};
++
+ static int
+ glusterd_handle_friend_req(rpcsvc_request_t *req, uuid_t uuid, char *hostname,
+                            int port, gd1_mgmt_friend_req *friend_req)
+@@ -97,6 +100,8 @@ glusterd_handle_friend_req(rpcsvc_request_t *req, uuid_t uuid, char *hostname,
+     char rhost[UNIX_PATH_MAX + 1] = {0};
+     uuid_t friend_uuid = {0};
+     dict_t *dict = NULL;
++    dict_t *peer_ver = NULL;
++    int totcount = sizeof(specific_key_suffix) / sizeof(specific_key_suffix[0]);
+ 
+     gf_uuid_parse(uuid_utoa(uuid), friend_uuid);
+     if (!port)
+@@ -104,8 +109,19 @@ glusterd_handle_friend_req(rpcsvc_request_t *req, uuid_t uuid, char *hostname,
+ 
+     ret = glusterd_remote_hostname_get(req, rhost, sizeof(rhost));
+ 
++    ctx = GF_CALLOC(1, sizeof(*ctx), gf_gld_mt_friend_req_ctx_t);
++    dict = dict_new();
++    peer_ver = dict_new();
++
+     RCU_READ_LOCK;
+ 
++    if (!ctx || !dict || !peer_ver) {
++        gf_msg("glusterd", GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
++               "Unable to allocate memory");
++        ret = -1;
++        goto out;
++    }
++
+     peerinfo = glusterd_peerinfo_find(uuid, rhost);
+ 
+     if (peerinfo == NULL) {
+@@ -130,28 +146,14 @@ glusterd_handle_friend_req(rpcsvc_request_t *req, uuid_t uuid, char *hostname,
+     event->peername = gf_strdup(peerinfo->hostname);
+     gf_uuid_copy(event->peerid, peerinfo->uuid);
+ 
+-    ctx = GF_CALLOC(1, sizeof(*ctx), gf_gld_mt_friend_req_ctx_t);
+-
+-    if (!ctx) {
+-        gf_msg("glusterd", GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
+-               "Unable to allocate memory");
+-        ret = -1;
+-        goto out;
+-    }
+-
+     gf_uuid_copy(ctx->uuid, uuid);
+     if (hostname)
+         ctx->hostname = gf_strdup(hostname);
+     ctx->req = req;
+ 
+-    dict = dict_new();
+-    if (!dict) {
+-        ret = -1;
+-        goto out;
+-    }
+-
+-    ret = dict_unserialize(friend_req->vols.vols_val, friend_req->vols.vols_len,
+-                           &dict);
++    ret = dict_unserialize_specific_keys(
++        friend_req->vols.vols_val, friend_req->vols.vols_len, &dict,
++        specific_key_suffix, &peer_ver, totcount);
+ 
+     if (ret)
+         goto out;
+@@ -159,6 +161,7 @@ glusterd_handle_friend_req(rpcsvc_request_t *req, uuid_t uuid, char *hostname,
+         dict->extra_stdfree = friend_req->vols.vols_val;
+ 
+     ctx->vols = dict;
++    ctx->peer_ver = peer_ver;
+     event->ctx = ctx;
+ 
+     ret = glusterd_friend_sm_inject_event(event);
+@@ -188,6 +191,8 @@ out:
+         } else {
+             free(friend_req->vols.vols_val);
+         }
++        if (peer_ver)
++            dict_unref(peer_ver);
+         if (event)
+             GF_FREE(event->peername);
+         GF_FREE(event);
+diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.c b/xlators/mgmt/glusterd/src/glusterd-sm.c
+index 044da3d..d10a792 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-sm.c
++++ b/xlators/mgmt/glusterd/src/glusterd-sm.c
+@@ -106,6 +106,8 @@ glusterd_destroy_friend_req_ctx(glusterd_friend_req_ctx_t *ctx)
+ 
+     if (ctx->vols)
+         dict_unref(ctx->vols);
++    if (ctx->peer_ver)
++        dict_unref(ctx->peer_ver);
+     GF_FREE(ctx->hostname);
+     GF_FREE(ctx);
+ }
+@@ -936,8 +938,8 @@ glusterd_ac_handle_friend_add_req(glusterd_friend_sm_event_t *event, void *ctx)
+     // Build comparison logic here.
+     pthread_mutex_lock(&conf->import_volumes);
+     {
+-        ret = glusterd_compare_friend_data(ev_ctx->vols, &status,
+-                                           event->peername);
++        ret = glusterd_compare_friend_data(ev_ctx->vols, ev_ctx->peer_ver,
++                                           &status, event->peername);
+         if (ret) {
+             pthread_mutex_unlock(&conf->import_volumes);
+             goto out;
+diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.h b/xlators/mgmt/glusterd/src/glusterd-sm.h
+index ce008ac..efdf68e 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-sm.h
++++ b/xlators/mgmt/glusterd/src/glusterd-sm.h
+@@ -174,6 +174,7 @@ typedef struct glusterd_friend_req_ctx_ {
+     rpcsvc_request_t *req;
+     int port;
+     dict_t *vols;
++    dict_t *peer_ver;  // Dictionary to save peer ver data
+ } glusterd_friend_req_ctx_t;
+ 
+ typedef struct glusterd_friend_update_ctx_ {
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
+index f7030fb..cf32bd9 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
+@@ -3709,12 +3709,14 @@ out:
+     return ret;
+ }
+ 
+-int32_t
+-glusterd_compare_friend_volume(dict_t *peer_data, int32_t count,
+-                               int32_t *status, char *hostname)
++static int32_t
++glusterd_compare_friend_volume(dict_t *peer_data,
++                               glusterd_friend_synctask_args_t *arg,
++                               int32_t count, int32_t *status, char *hostname)
+ {
+     int32_t ret = -1;
+     char key[64] = "";
++    char key_prefix[32];
+     int keylen;
+     glusterd_volinfo_t *volinfo = NULL;
+     char *volname = NULL;
+@@ -3726,15 +3728,20 @@ glusterd_compare_friend_volume(dict_t *peer_data, int32_t count,
+     xlator_t *this = NULL;
+ 
+     GF_ASSERT(peer_data);
++    GF_ASSERT(arg);
+     GF_ASSERT(status);
+ 
+     this = THIS;
+     GF_ASSERT(this);
+ 
+-    keylen = snprintf(key, sizeof(key), "volume%d.name", count);
+-    ret = dict_get_strn(peer_data, key, keylen, &volname);
+-    if (ret)
++    snprintf(key_prefix, sizeof(key_prefix), "volume%d", count);
++    keylen = snprintf(key, sizeof(key), "%s.name", key_prefix);
++    ret = dict_get_strn(arg->peer_ver_data, key, keylen, &volname);
++    if (ret) {
++        gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
++                "Key=%s is NULL in peer_ver_data", key, NULL);
+         goto out;
++    }
+ 
+     ret = glusterd_volinfo_find(volname, &volinfo);
+     if (ret) {
+@@ -3750,10 +3757,13 @@ glusterd_compare_friend_volume(dict_t *peer_data, int32_t count,
+         goto out;
+     }
+ 
+-    keylen = snprintf(key, sizeof(key), "volume%d.version", count);
+-    ret = dict_get_int32n(peer_data, key, keylen, &version);
+-    if (ret)
++    keylen = snprintf(key, sizeof(key), "%s.version", key_prefix);
++    ret = dict_get_int32n(arg->peer_ver_data, key, keylen, &version);
++    if (ret) {
++        gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
++                "Key=%s is NULL in peer_ver_data", key, NULL);
+         goto out;
++    }
+ 
+     if (version > volinfo->version) {
+         // Mismatch detected
+@@ -3772,10 +3782,13 @@ glusterd_compare_friend_volume(dict_t *peer_data, int32_t count,
+ 
+     // Now, versions are same, compare cksums.
+     //
+-    snprintf(key, sizeof(key), "volume%d.ckusm", count);
+-    ret = dict_get_uint32(peer_data, key, &cksum);
+-    if (ret)
++    snprintf(key, sizeof(key), "%s.ckusm", key_prefix);
++    ret = dict_get_uint32(arg->peer_ver_data, key, &cksum);
++    if (ret) {
++        gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
++                "Key=%s is NULL in peer_ver_data", key, NULL);
+         goto out;
++    }
+ 
+     if (cksum != volinfo->cksum) {
+         ret = 0;
+@@ -3790,8 +3803,8 @@ glusterd_compare_friend_volume(dict_t *peer_data, int32_t count,
+     if (!dict_get_sizen(volinfo->dict, VKEY_FEATURES_QUOTA))
+         goto skip_quota;
+ 
+-    snprintf(key, sizeof(key), "volume%d.quota-version", count);
+-    ret = dict_get_uint32(peer_data, key, &quota_version);
++    snprintf(key, sizeof(key), "%s.quota-version", key_prefix);
++    ret = dict_get_uint32(arg->peer_ver_data, key, &quota_version);
+     if (ret) {
+         gf_msg_debug(this->name, 0,
+                      "quota-version key absent for"
+@@ -3809,6 +3822,7 @@ glusterd_compare_friend_volume(dict_t *peer_data, int32_t count,
+                    "%d on peer %s",
+                    volinfo->volname, volinfo->quota_conf_version, quota_version,
+                    hostname);
++            GF_ATOMIC_INIT(volinfo->volpeerupdate, 1);
+             *status = GLUSTERD_VOL_COMP_UPDATE_REQ;
+             goto out;
+         } else if (quota_version < volinfo->quota_conf_version) {
+@@ -3819,8 +3833,8 @@ glusterd_compare_friend_volume(dict_t *peer_data, int32_t count,
+ 
+     // Now, versions are same, compare cksums.
+     //
+-    snprintf(key, sizeof(key), "volume%d.quota-cksum", count);
+-    ret = dict_get_uint32(peer_data, key, &quota_cksum);
++    snprintf(key, sizeof(key), "%s.quota-cksum", key_prefix);
++    ret = dict_get_uint32(arg->peer_ver_data, key, &quota_cksum);
+     if (ret) {
+         gf_msg_debug(this->name, 0,
+                      "quota checksum absent for "
+@@ -3846,13 +3860,12 @@ skip_quota:
+     *status = GLUSTERD_VOL_COMP_SCS;
+ 
+ out:
+-    keylen = snprintf(key, sizeof(key), "volume%d.update", count);
+-
+     if (*status == GLUSTERD_VOL_COMP_UPDATE_REQ) {
+-        ret = dict_set_int32n(peer_data, key, keylen, 1);
+-    } else {
+-        ret = dict_set_int32n(peer_data, key, keylen, 0);
++        /*Set the status to ensure volume is updated on the peer
++         */
++        arg->status_arr[(count / 64)] ^= 1UL << (count % 64);
+     }
++
+     if (*status == GLUSTERD_VOL_COMP_RJT) {
+         gf_event(EVENT_COMPARE_FRIEND_VOLUME_FAILED, "volume=%s",
+                  volinfo->volname);
+@@ -4935,8 +4948,9 @@ out:
+     return ret;
+ }
+ 
+-int32_t
+-glusterd_import_friend_volume(dict_t *peer_data, int count)
++static int32_t
++glusterd_import_friend_volume(dict_t *peer_data, int count,
++                              glusterd_friend_synctask_args_t *arg)
+ {
+     int32_t ret = -1;
+     glusterd_conf_t *priv = NULL;
+@@ -4954,10 +4968,27 @@ glusterd_import_friend_volume(dict_t *peer_data, int count)
+     priv = this->private;
+     GF_ASSERT(priv);
+ 
+-    ret = snprintf(key, sizeof(key), "volume%d.update", count);
+-    ret = dict_get_int32n(peer_data, key, ret, &update);
+-    if (ret || !update) {
++    if (arg) {
++        /*Check if the volume options are updated on the other peers
++         */
++        update = (1UL & (arg->status_arr[(count / 64)] >> (count % 64)));
++    } else {
++        ret = snprintf(key, sizeof(key), "volume%d.update", count);
++        ret = dict_get_int32n(peer_data, key, ret, &update);
++        if (ret) {
++            gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
++                    "Key=%s", key, NULL);
++            goto out;
++        }
++    }
++
++    if (!update) {
+         /* if update is 0 that means the volume is not imported */
++        gf_log(this->name, GF_LOG_DEBUG,
++               "The volume%d does"
++               " not have any peer change",
++               count);
++        ret = 0;
+         goto out;
+     }
+ 
+@@ -5045,6 +5076,8 @@ glusterd_import_friend_volumes_synctask(void *opaque)
+     glusterd_conf_t *conf = NULL;
+     dict_t *peer_data = NULL;
+     glusterd_friend_synctask_args_t *arg = NULL;
++    uint64_t bm = 0;
++    uint64_t mask = 0;
+ 
+     this = THIS;
+     GF_ASSERT(this);
+@@ -5056,17 +5089,7 @@ glusterd_import_friend_volumes_synctask(void *opaque)
+     if (!arg)
+         goto out;
+ 
+-    peer_data = dict_new();
+-    if (!peer_data) {
+-        goto out;
+-    }
+-
+-    ret = dict_unserialize(arg->dict_buf, arg->dictlen, &peer_data);
+-    if (ret) {
+-        errno = ENOMEM;
+-        goto out;
+-    }
+-
++    peer_data = arg->peer_data;
+     ret = dict_get_int32n(peer_data, "count", SLEN("count"), &count);
+     if (ret)
+         goto out;
+@@ -5083,11 +5106,18 @@ glusterd_import_friend_volumes_synctask(void *opaque)
+     conf->restart_bricks = _gf_true;
+ 
+     while (i <= count) {
+-        ret = glusterd_import_friend_volume(peer_data, i);
+-        if (ret) {
+-            break;
++        bm = arg->status_arr[i / 64];
++        while (bm != 0) {
++            /* mask will contain the lowest bit set from bm. */
++            mask = bm & (-bm);
++            bm ^= mask;
++            ret = glusterd_import_friend_volume(peer_data, i + ffsll(mask) - 2,
++                                                arg);
++            if (ret < 0) {
++                break;
++            }
+         }
+-        i++;
++        i += 64;
+     }
+     if (i > count) {
+         glusterd_svcs_manager(NULL);
+@@ -5095,11 +5125,9 @@ glusterd_import_friend_volumes_synctask(void *opaque)
+     conf->restart_bricks = _gf_false;
+     synccond_broadcast(&conf->cond_restart_bricks);
+ out:
+-    if (peer_data)
+-        dict_unref(peer_data);
+     if (arg) {
+-        if (arg->dict_buf)
+-            GF_FREE(arg->dict_buf);
++        dict_unref(arg->peer_data);
++        dict_unref(arg->peer_ver_data);
+         GF_FREE(arg);
+     }
+ 
+@@ -5121,7 +5149,7 @@ glusterd_import_friend_volumes(dict_t *peer_data)
+         goto out;
+ 
+     while (i <= count) {
+-        ret = glusterd_import_friend_volume(peer_data, i);
++        ret = glusterd_import_friend_volume(peer_data, i, NULL);
+         if (ret)
+             goto out;
+         i++;
+@@ -5260,7 +5288,8 @@ out:
+ }
+ 
+ int32_t
+-glusterd_compare_friend_data(dict_t *peer_data, int32_t *status, char *hostname)
++glusterd_compare_friend_data(dict_t *peer_data, dict_t *cmp, int32_t *status,
++                             char *hostname)
+ {
+     int32_t ret = -1;
+     int32_t count = 0;
+@@ -5289,8 +5318,19 @@ glusterd_compare_friend_data(dict_t *peer_data, int32_t *status, char *hostname)
+     if (ret)
+         goto out;
+ 
++    arg = GF_CALLOC(1, sizeof(*arg) + sizeof(uint64_t) * (count / 64),
++                    gf_common_mt_char);
++    if (!arg) {
++        ret = -1;
++        gf_msg("glusterd", GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
++               "Out Of Memory");
++        goto out;
++    }
++    arg->peer_data = dict_ref(peer_data);
++    arg->peer_ver_data = dict_ref(cmp);
+     while (i <= count) {
+-        ret = glusterd_compare_friend_volume(peer_data, i, status, hostname);
++        ret = glusterd_compare_friend_volume(peer_data, arg, i, status,
++                                             hostname);
+         if (ret)
+             goto out;
+ 
+@@ -5310,21 +5350,13 @@ glusterd_compare_friend_data(dict_t *peer_data, int32_t *status, char *hostname)
+          * first brick to come up before attaching the subsequent bricks
+          * in case brick multiplexing is enabled
+          */
+-        arg = GF_CALLOC(1, sizeof(*arg), gf_common_mt_char);
+-        ret = dict_allocate_and_serialize(peer_data, &arg->dict_buf,
+-                                          &arg->dictlen);
+-        if (ret < 0) {
+-            gf_log(this->name, GF_LOG_ERROR,
+-                   "dict_serialize failed while handling "
+-                   " import friend volume request");
+-            goto out;
+-        }
+-
+         glusterd_launch_synctask(glusterd_import_friend_volumes_synctask, arg);
+     }
+ 
+ out:
+     if (ret && arg) {
++        dict_unref(arg->peer_data);
++        dict_unref(arg->peer_ver_data);
+         GF_FREE(arg);
+     }
+     gf_msg_debug(this->name, 0, "Returning with ret: %d, status: %d", ret,
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
+index 5f5de82..02d85d2 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.h
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
+@@ -231,7 +231,7 @@ glusterd_add_volumes_to_export_dict(dict_t *peer_data, char **buf,
+                                     u_int *length);
+ 
+ int32_t
+-glusterd_compare_friend_data(dict_t *peer_data, int32_t *status,
++glusterd_compare_friend_data(dict_t *peer_data, dict_t *cmp, int32_t *status,
+                              char *hostname);
+ 
+ int
+diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
+index f739b5d..efe4d0e 100644
+--- a/xlators/mgmt/glusterd/src/glusterd.h
++++ b/xlators/mgmt/glusterd/src/glusterd.h
+@@ -234,8 +234,12 @@ typedef struct glusterd_add_dict_args {
+ } glusterd_add_dict_args_t;
+ 
+ typedef struct glusterd_friend_synctask_args {
+-    char *dict_buf;
+-    u_int dictlen;
++    dict_t *peer_data;
++    dict_t *peer_ver_data;  // Dictionary to save peer version data
++    /* This status_arr[1] is not a real size, real size of the array
++       is dynamically allocated
++    */
++    uint64_t status_arr[1];
+ } glusterd_friend_synctask_args_t;
+ 
+ typedef enum gf_brick_status {
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0511-features-shard-Missing-format-specifier.patch b/SOURCES/0511-features-shard-Missing-format-specifier.patch
new file mode 100644
index 0000000..baf6cf4
--- /dev/null
+++ b/SOURCES/0511-features-shard-Missing-format-specifier.patch
@@ -0,0 +1,39 @@
+From 868d346cc35c222d19b95bd9c367674c9ea859df Mon Sep 17 00:00:00 2001
+From: Vinayakswami Hariharmath <vharihar@redhat.com>
+Date: Tue, 15 Dec 2020 16:23:49 +0530
+Subject: [PATCH 511/511] features/shard: Missing format specifier
+
+PRIu64 format specifier explicitly needs (percent sign) as
+prefix and that was missing as part of the below commit on
+downstream
+
+https://code.engineering.redhat.com/gerrit/#/c/221061/
+
+BUG: 1752739
+Change-Id: I354de58796f350eb1aa42fcdf8092ca2e69ccbb6
+
+Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
+Change-Id: I4598893e3fcca3a2b3e6e8ef9b64b3e5e98923e6
+Reviewed-on: https://code.engineering.redhat.com/gerrit/221217
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
+---
+ xlators/features/shard/src/shard.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
+index a967f35..099b062 100644
+--- a/xlators/features/shard/src/shard.c
++++ b/xlators/features/shard/src/shard.c
+@@ -1855,7 +1855,7 @@ int shard_truncate_last_shard(call_frame_t *frame, xlator_t *this,
+    */
+   if (!inode) {
+     gf_msg_debug(this->name, 0,
+-                 "Last shard to be truncated absent in backend: " PRIu64
++                 "Last shard to be truncated absent in backend:%" PRIu64
+                  " of gfid: %s. Directly proceeding to update file size",
+                  local->first_block, uuid_utoa(local->loc.inode->gfid));
+     shard_update_file_size(frame, this, NULL, &local->loc,
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0512-glusterd-shared-storage-mount-fails-in-ipv6-environm.patch b/SOURCES/0512-glusterd-shared-storage-mount-fails-in-ipv6-environm.patch
new file mode 100644
index 0000000..37de503
--- /dev/null
+++ b/SOURCES/0512-glusterd-shared-storage-mount-fails-in-ipv6-environm.patch
@@ -0,0 +1,105 @@
+From c963653a89c3f6466af9a3e8f19246a7907f7f8c Mon Sep 17 00:00:00 2001
+From: nik-redhat <nladha@redhat.com>
+Date: Thu, 30 Jul 2020 13:04:52 +0530
+Subject: [PATCH 512/517] glusterd: shared storage mount fails in ipv6
+ environment
+
+Issue:
+In case of ipv6 environment, the mounting of glusterd_shared_storage
+volume fails as it doesn't recognises the ipv6 enviornment.
+
+Fix:
+In case of ipv6 environment, the address-family is passed
+to the hooks script on creating shared-storage, then depending
+upon the address-family --xlator-option=transport.address-family=inet6
+option is added to the mount command, and the mounting succeeds.
+
+>Fixes: #1406
+>
+>Change-Id: Ib1888c34d85e6c01618b0ba214cbe1f57576908d
+>Signed-off-by: nik-redhat <nladha@redhat.com>
+
+Upstream patch: https://review.gluster.org/c/glusterfs/+/24797
+BUG: 1856574
+
+Change-Id: Ib1888c34d85e6c01618b0ba214cbe1f57576908d
+Signed-off-by: nik-redhat <nladha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/221844
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Srijan Sivakumar <ssivakum@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ .../set/post/S32gluster_enable_shared_storage.sh      | 11 +++++++++--
+ xlators/mgmt/glusterd/src/glusterd-hooks.c            | 19 +++++++++++++++++++
+ 2 files changed, 28 insertions(+), 2 deletions(-)
+
+diff --git a/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh b/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh
+index 3bae37c..9597503 100755
+--- a/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh
++++ b/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh
+@@ -104,8 +104,15 @@ function check_volume_status()
+     echo $status
+ }
+ 
+-mount_cmd="mount -t glusterfs $local_node_hostname:/gluster_shared_storage \
+-           /run/gluster/shared_storage"
++key=`echo $5 | cut -d '=' -f 1`
++val=`echo $5 | cut -d '=' -f 2`
++if [ "$key" == "transport.address-family" ]; then
++    mount_cmd="mount -t glusterfs -o xlator-option=transport.address-family=inet6 \
++               $local_node_hostname:/gluster_shared_storage /var/run/gluster/shared_storage"
++else
++    mount_cmd="mount -t glusterfs $local_node_hostname:/gluster_shared_storage \
++           /var/run/gluster/shared_storage"
++fi
+ 
+ if [ "$option" == "enable" ]; then
+     retry=0;
+diff --git a/xlators/mgmt/glusterd/src/glusterd-hooks.c b/xlators/mgmt/glusterd/src/glusterd-hooks.c
+index 216cdf7..4f0d775 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-hooks.c
++++ b/xlators/mgmt/glusterd/src/glusterd-hooks.c
+@@ -200,11 +200,16 @@ glusterd_hooks_set_volume_args(dict_t *dict, runner_t *runner)
+     int i = 0;
+     int count = 0;
+     int ret = -1;
++    int flag = 0;
+     char query[1024] = {
+         0,
+     };
+     char *key = NULL;
+     char *value = NULL;
++    char *inet_family = NULL;
++    xlator_t *this = NULL;
++    this = THIS;
++    GF_ASSERT(this);
+ 
+     ret = dict_get_int32(dict, "count", &count);
+     if (ret)
+@@ -228,9 +233,23 @@ glusterd_hooks_set_volume_args(dict_t *dict, runner_t *runner)
+             continue;
+ 
+         runner_argprintf(runner, "%s=%s", key, value);
++        if ((strncmp(key, "cluster.enable-shared-storage",
++                     SLEN("cluster.enable-shared-storage")) == 0 ||
++             strncmp(key, "enable-shared-storage",
++                     SLEN("enable-shared-storage")) == 0) &&
++            strncmp(value, "enable", SLEN("enable")) == 0)
++            flag = 1;
+     }
+ 
+     glusterd_hooks_add_custom_args(dict, runner);
++    if (flag == 1) {
++        ret = dict_get_str_sizen(this->options, "transport.address-family",
++                                 &inet_family);
++        if (!ret) {
++            runner_argprintf(runner, "transport.address-family=%s",
++                             inet_family);
++        }
++    }
+ 
+     ret = 0;
+ out:
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0513-afr-mark-pending-xattrs-as-a-part-of-metadata-heal.patch b/SOURCES/0513-afr-mark-pending-xattrs-as-a-part-of-metadata-heal.patch
new file mode 100644
index 0000000..ebd5609
--- /dev/null
+++ b/SOURCES/0513-afr-mark-pending-xattrs-as-a-part-of-metadata-heal.patch
@@ -0,0 +1,191 @@
+From 708c17a8a69b2657f384affaedfcf4ba0a123893 Mon Sep 17 00:00:00 2001
+From: karthik-us <ksubrahm@redhat.com>
+Date: Wed, 23 Dec 2020 14:45:07 +0530
+Subject: [PATCH 513/517] afr: mark pending xattrs as a part of metadata heal
+
+...if pending xattrs are zero for all children.
+
+Problem:
+If there are no pending xattrs and a metadata heal needs to be
+performed, it can be possible that we end up with xattrs inadvertendly
+deleted from all bricks, as explained in the  BZ.
+
+Fix:
+After picking one among the sources as the good copy, mark pending xattrs on
+all sources to blame the sinks. Now even if this metadata heal fails midway,
+a subsequent heal will still choose one of the valid sources that it
+picked previously.
+
+Upstream patch details:
+> Fixes: #1067
+> Change-Id: If1b050b70b0ad911e162c04db4d89b263e2b8d7b
+> Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+Upstream patch: https://review.gluster.org/#/c/glusterfs/+/21922/
+
+BUG: 1640148
+Change-Id: If1b050b70b0ad911e162c04db4d89b263e2b8d7b
+Signed-off-by: karthik-us <ksubrahm@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/222073
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
+---
+ tests/bugs/replicate/mdata-heal-no-xattrs.t      | 59 ++++++++++++++++++++++
+ xlators/cluster/afr/src/afr-self-heal-metadata.c | 62 +++++++++++++++++++++++-
+ 2 files changed, 120 insertions(+), 1 deletion(-)
+ create mode 100644 tests/bugs/replicate/mdata-heal-no-xattrs.t
+
+diff --git a/tests/bugs/replicate/mdata-heal-no-xattrs.t b/tests/bugs/replicate/mdata-heal-no-xattrs.t
+new file mode 100644
+index 0000000..d3b0c50
+--- /dev/null
++++ b/tests/bugs/replicate/mdata-heal-no-xattrs.t
+@@ -0,0 +1,59 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++cleanup;
++
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2};
++TEST $CLI volume set $V0 cluster.self-heal-daemon off
++TEST $CLI volume start $V0
++
++TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
++echo "Data">$M0/FILE
++ret=$?
++TEST [ $ret -eq 0 ]
++
++# Change permission on brick-0: simulates the case where there is metadata
++# mismatch but no pending xattrs. This brick will become the source for heal.
++TEST chmod +x $B0/$V0"0"/FILE
++
++# Add gfid to xattrop
++xattrop_b0=$(afr_get_index_path $B0/$V0"0")
++base_entry_b0=`ls $xattrop_b0`
++gfid_str_FILE=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/FILE))
++TEST ln $xattrop_b0/$base_entry_b0 $xattrop_b0/$gfid_str_FILE
++EXPECT_WITHIN $HEAL_TIMEOUT "^1$" get_pending_heal_count $V0
++
++TEST $CLI volume set $V0 cluster.self-heal-daemon on
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
++TEST $CLI volume heal $V0
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
++
++# Brick-0 should contain xattrs blaming other 2 bricks.
++# The values will be zero because heal is over.
++EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0/FILE
++EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0/FILE
++TEST ! getfattr -n trusted.afr.$V0-client-0 $B0/${V0}0/FILE
++
++# Brick-1 and Brick-2 must not contain any afr xattrs.
++TEST ! getfattr -n trusted.afr.$V0-client-0 $B0/${V0}1/FILE
++TEST ! getfattr -n trusted.afr.$V0-client-1 $B0/${V0}1/FILE
++TEST ! getfattr -n trusted.afr.$V0-client-2 $B0/${V0}1/FILE
++TEST ! getfattr -n trusted.afr.$V0-client-0 $B0/${V0}2/FILE
++TEST ! getfattr -n trusted.afr.$V0-client-1 $B0/${V0}2/FILE
++TEST ! getfattr -n trusted.afr.$V0-client-2 $B0/${V0}2/FILE
++
++# check permission bits.
++EXPECT '755' stat -c %a $B0/${V0}0/FILE
++EXPECT '755' stat -c %a $B0/${V0}1/FILE
++EXPECT '755' stat -c %a $B0/${V0}2/FILE
++
++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
++cleanup;
+diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c
+index f4e31b6..03f43ba 100644
+--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c
++++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c
+@@ -190,6 +190,59 @@ out:
+     return ret;
+ }
+ 
++static int
++__afr_selfheal_metadata_mark_pending_xattrs(call_frame_t *frame, xlator_t *this,
++                                            inode_t *inode,
++                                            struct afr_reply *replies,
++                                            unsigned char *sources)
++{
++    int ret = 0;
++    int i = 0;
++    int m_idx = 0;
++    afr_private_t *priv = NULL;
++    int raw[AFR_NUM_CHANGE_LOGS] = {0};
++    dict_t *xattr = NULL;
++
++    priv = this->private;
++    m_idx = afr_index_for_transaction_type(AFR_METADATA_TRANSACTION);
++    raw[m_idx] = 1;
++
++    xattr = dict_new();
++    if (!xattr)
++        return -ENOMEM;
++
++    for (i = 0; i < priv->child_count; i++) {
++        if (sources[i])
++            continue;
++        ret = dict_set_static_bin(xattr, priv->pending_key[i], raw,
++                                  sizeof(int) * AFR_NUM_CHANGE_LOGS);
++        if (ret) {
++            ret = -1;
++            goto out;
++        }
++    }
++
++    for (i = 0; i < priv->child_count; i++) {
++        if (!sources[i])
++            continue;
++        ret = afr_selfheal_post_op(frame, this, inode, i, xattr, NULL);
++        if (ret < 0) {
++            gf_msg(this->name, GF_LOG_INFO, -ret, AFR_MSG_SELF_HEAL_INFO,
++                   "Failed to set pending metadata xattr on child %d for %s", i,
++                   uuid_utoa(inode->gfid));
++            goto out;
++        }
++    }
++
++    afr_replies_wipe(replies, priv->child_count);
++    ret = afr_selfheal_unlocked_discover(frame, inode, inode->gfid, replies);
++
++out:
++    if (xattr)
++        dict_unref(xattr);
++    return ret;
++}
++
+ /*
+  * Look for mismatching uid/gid or mode or user xattrs even if
+  * AFR xattrs don't say so, and pick one arbitrarily as winner. */
+@@ -210,6 +263,7 @@ __afr_selfheal_metadata_finalize_source(call_frame_t *frame, xlator_t *this,
+     };
+     int source = -1;
+     int sources_count = 0;
++    int ret = 0;
+ 
+     priv = this->private;
+ 
+@@ -300,7 +354,13 @@ __afr_selfheal_metadata_finalize_source(call_frame_t *frame, xlator_t *this,
+             healed_sinks[i] = 1;
+         }
+     }
+-
++    if ((sources_count == priv->child_count) && (source > -1) &&
++        (AFR_COUNT(healed_sinks, priv->child_count) != 0)) {
++        ret = __afr_selfheal_metadata_mark_pending_xattrs(frame, this, inode,
++                                                          replies, sources);
++        if (ret < 0)
++            return ret;
++    }
+ out:
+     afr_mark_active_sinks(this, sources, locked_on, healed_sinks);
+     return source;
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0514-afr-event-gen-changes.patch b/SOURCES/0514-afr-event-gen-changes.patch
new file mode 100644
index 0000000..9f9562e
--- /dev/null
+++ b/SOURCES/0514-afr-event-gen-changes.patch
@@ -0,0 +1,308 @@
+From 4c47d6dd7c5ddcaa2a1e159427c0f6713fd33907 Mon Sep 17 00:00:00 2001
+From: karthik-us <ksubrahm@redhat.com>
+Date: Wed, 23 Dec 2020 14:57:51 +0530
+Subject: [PATCH 514/517] afr: event gen changes
+
+The general idea of the changes is to prevent resetting event generation
+to zero in the inode ctx, since event gen is something that should
+follow 'causal order'.
+
+Change #1:
+For a read txn, in inode refresh cbk, if event_generation is
+found zero, we are failing the read fop. This is not needed
+because change in event gen is only a marker for the next inode refresh to
+happen and should not be taken into account by the current read txn.
+
+Change #2:
+The event gen being zero above can happen if there is a racing lookup,
+which resets even get (in afr_lookup_done) if there are non zero afr
+xattrs. The resetting is done only to trigger an inode refresh and a
+possible client side heal on the next lookup. That can be acheived by
+setting the need_refresh flag in the inode ctx. So replaced all
+occurences of resetting even gen to zero with a call to
+afr_inode_need_refresh_set().
+
+Change #3:
+In both lookup and discover path, we are doing an inode refresh which is
+not required since all 3 essentially do the same thing- update the inode
+ctx with the good/bad copies from the brick replies. Inode refresh also
+triggers background heals, but I think it is okay to do it when we call
+refresh during the read and write txns and not in the lookup path.
+
+The .ts which relied on inode refresh in lookup path to trigger heals are
+now changed to do read txn so that inode refresh and the heal happens.
+
+Upstream patch details:
+> Change-Id: Iebf39a9be6ffd7ffd6e4046c96b0fa78ade6c5ec
+> Fixes: #1179
+> Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+> Reported-by: Erik Jacobson <erik.jacobson at hpe.com>
+Upstream patch: https://review.gluster.org/#/c/glusterfs/+/24316/
+
+BUG: 1640148
+Change-Id: Iebf39a9be6ffd7ffd6e4046c96b0fa78ade6c5ec
+Signed-off-by: karthik-us <ksubrahm@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/222074
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
+---
+ ...fid-mismatch-resolution-with-fav-child-policy.t |  8 +-
+ xlators/cluster/afr/src/afr-common.c               | 92 +++++-----------------
+ xlators/cluster/afr/src/afr-dir-write.c            |  6 +-
+ xlators/cluster/afr/src/afr.h                      |  5 +-
+ 4 files changed, 29 insertions(+), 82 deletions(-)
+
+diff --git a/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t b/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t
+index f4aa351..12af0c8 100644
+--- a/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t
++++ b/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t
+@@ -168,8 +168,8 @@ TEST [ "$gfid_1" != "$gfid_2" ]
+ #We know that second brick has the bigger size file
+ BIGGER_FILE_MD5=$(md5sum $B0/${V0}1/f3 | cut -d\  -f1)
+ 
+-TEST ls $M0/f3
+-TEST cat $M0/f3
++TEST ls $M0 #Trigger entry heal via readdir inode refresh
++TEST cat $M0/f3 #Trigger data heal via readv inode refresh
+ EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+ 
+ #gfid split-brain should be resolved
+@@ -215,8 +215,8 @@ TEST $CLI volume start $V0 force
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+ EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2
+ 
+-TEST ls $M0/f4
+-TEST cat $M0/f4
++TEST ls $M0 #Trigger entry heal via readdir inode refresh
++TEST cat $M0/f4  #Trigger data heal via readv inode refresh
+ EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+ 
+ #gfid split-brain should be resolved
+diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
+index fca2cd5..90b4f14 100644
+--- a/xlators/cluster/afr/src/afr-common.c
++++ b/xlators/cluster/afr/src/afr-common.c
+@@ -284,7 +284,7 @@ __afr_set_in_flight_sb_status(xlator_t *this, afr_local_t *local,
+                 metadatamap |= (1 << index);
+             }
+             if (metadatamap_old != metadatamap) {
+-                event = 0;
++                __afr_inode_need_refresh_set(inode, this);
+             }
+             break;
+ 
+@@ -297,7 +297,7 @@ __afr_set_in_flight_sb_status(xlator_t *this, afr_local_t *local,
+                 datamap |= (1 << index);
+             }
+             if (datamap_old != datamap)
+-                event = 0;
++                __afr_inode_need_refresh_set(inode, this);
+             break;
+ 
+         default:
+@@ -461,34 +461,6 @@ out:
+ }
+ 
+ int
+-__afr_inode_event_gen_reset_small(inode_t *inode, xlator_t *this)
+-{
+-    int ret = -1;
+-    uint16_t datamap = 0;
+-    uint16_t metadatamap = 0;
+-    uint32_t event = 0;
+-    uint64_t val = 0;
+-    afr_inode_ctx_t *ctx = NULL;
+-
+-    ret = __afr_inode_ctx_get(this, inode, &ctx);
+-    if (ret)
+-        return ret;
+-
+-    val = ctx->read_subvol;
+-
+-    metadatamap = (val & 0x000000000000ffff) >> 0;
+-    datamap = (val & 0x00000000ffff0000) >> 16;
+-    event = 0;
+-
+-    val = ((uint64_t)metadatamap) | (((uint64_t)datamap) << 16) |
+-          (((uint64_t)event) << 32);
+-
+-    ctx->read_subvol = val;
+-
+-    return ret;
+-}
+-
+-int
+ __afr_inode_read_subvol_get(inode_t *inode, xlator_t *this, unsigned char *data,
+                             unsigned char *metadata, int *event_p)
+ {
+@@ -559,22 +531,6 @@ out:
+ }
+ 
+ int
+-__afr_inode_event_gen_reset(inode_t *inode, xlator_t *this)
+-{
+-    afr_private_t *priv = NULL;
+-    int ret = -1;
+-
+-    priv = this->private;
+-
+-    if (priv->child_count <= 16)
+-        ret = __afr_inode_event_gen_reset_small(inode, this);
+-    else
+-        ret = -1;
+-
+-    return ret;
+-}
+-
+-int
+ afr_inode_read_subvol_get(inode_t *inode, xlator_t *this, unsigned char *data,
+                           unsigned char *metadata, int *event_p)
+ {
+@@ -723,30 +679,22 @@ out:
+     return need_refresh;
+ }
+ 
+-static int
+-afr_inode_need_refresh_set(inode_t *inode, xlator_t *this)
++int
++__afr_inode_need_refresh_set(inode_t *inode, xlator_t *this)
+ {
+     int ret = -1;
+     afr_inode_ctx_t *ctx = NULL;
+ 
+-    GF_VALIDATE_OR_GOTO(this->name, inode, out);
+-
+-    LOCK(&inode->lock);
+-    {
+-        ret = __afr_inode_ctx_get(this, inode, &ctx);
+-        if (ret)
+-            goto unlock;
+-
++    ret = __afr_inode_ctx_get(this, inode, &ctx);
++    if (ret == 0) {
+         ctx->need_refresh = _gf_true;
+     }
+-unlock:
+-    UNLOCK(&inode->lock);
+-out:
++
+     return ret;
+ }
+ 
+ int
+-afr_inode_event_gen_reset(inode_t *inode, xlator_t *this)
++afr_inode_need_refresh_set(inode_t *inode, xlator_t *this)
+ {
+     int ret = -1;
+ 
+@@ -754,7 +702,7 @@ afr_inode_event_gen_reset(inode_t *inode, xlator_t *this)
+ 
+     LOCK(&inode->lock);
+     {
+-        ret = __afr_inode_event_gen_reset(inode, this);
++        ret = __afr_inode_need_refresh_set(inode, this);
+     }
+     UNLOCK(&inode->lock);
+ out:
+@@ -1191,7 +1139,7 @@ afr_txn_refresh_done(call_frame_t *frame, xlator_t *this, int err)
+     ret = afr_inode_get_readable(frame, inode, this, local->readable,
+                                  &event_generation, local->transaction.type);
+ 
+-    if (ret == -EIO || (local->is_read_txn && !event_generation)) {
++    if (ret == -EIO) {
+         /* No readable subvolume even after refresh ==> splitbrain.*/
+         if (!priv->fav_child_policy) {
+             err = EIO;
+@@ -2413,7 +2361,7 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this)
+         if (read_subvol == -1)
+             goto cant_interpret;
+         if (ret) {
+-            afr_inode_event_gen_reset(local->inode, this);
++            afr_inode_need_refresh_set(local->inode, this);
+             dict_del_sizen(local->replies[read_subvol].xdata, GF_CONTENT_KEY);
+         }
+     } else {
+@@ -2971,6 +2919,7 @@ afr_discover_unwind(call_frame_t *frame, xlator_t *this)
+     afr_private_t *priv = NULL;
+     afr_local_t *local = NULL;
+     int read_subvol = -1;
++    int ret = 0;
+     unsigned char *data_readable = NULL;
+     unsigned char *success_replies = NULL;
+ 
+@@ -2992,7 +2941,10 @@ afr_discover_unwind(call_frame_t *frame, xlator_t *this)
+     if (!afr_has_quorum(success_replies, this, frame))
+         goto unwind;
+ 
+-    afr_replies_interpret(frame, this, local->inode, NULL);
++    ret = afr_replies_interpret(frame, this, local->inode, NULL);
++    if (ret) {
++        afr_inode_need_refresh_set(local->inode, this);
++    }
+ 
+     read_subvol = afr_read_subvol_decide(local->inode, this, NULL,
+                                          data_readable);
+@@ -3248,11 +3200,7 @@ afr_discover(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
+     afr_read_subvol_get(loc->inode, this, NULL, NULL, &event,
+                         AFR_DATA_TRANSACTION, NULL);
+ 
+-    if (afr_is_inode_refresh_reqd(loc->inode, this, event,
+-                                  local->event_generation))
+-        afr_inode_refresh(frame, this, loc->inode, NULL, afr_discover_do);
+-    else
+-        afr_discover_do(frame, this, 0);
++    afr_discover_do(frame, this, 0);
+ 
+     return 0;
+ out:
+@@ -3393,11 +3341,7 @@ afr_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
+     afr_read_subvol_get(loc->parent, this, NULL, NULL, &event,
+                         AFR_DATA_TRANSACTION, NULL);
+ 
+-    if (afr_is_inode_refresh_reqd(loc->inode, this, event,
+-                                  local->event_generation))
+-        afr_inode_refresh(frame, this, loc->parent, NULL, afr_lookup_do);
+-    else
+-        afr_lookup_do(frame, this, 0);
++    afr_lookup_do(frame, this, 0);
+ 
+     return 0;
+ out:
+diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c
+index 416c19d..d419bfc 100644
+--- a/xlators/cluster/afr/src/afr-dir-write.c
++++ b/xlators/cluster/afr/src/afr-dir-write.c
+@@ -123,11 +123,11 @@ __afr_dir_write_finalize(call_frame_t *frame, xlator_t *this)
+             continue;
+         if (local->replies[i].op_ret < 0) {
+             if (local->inode)
+-                afr_inode_event_gen_reset(local->inode, this);
++                afr_inode_need_refresh_set(local->inode, this);
+             if (local->parent)
+-                afr_inode_event_gen_reset(local->parent, this);
++                afr_inode_need_refresh_set(local->parent, this);
+             if (local->parent2)
+-                afr_inode_event_gen_reset(local->parent2, this);
++                afr_inode_need_refresh_set(local->parent2, this);
+             continue;
+         }
+ 
+diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
+index ed5096e..3a2b26d 100644
+--- a/xlators/cluster/afr/src/afr.h
++++ b/xlators/cluster/afr/src/afr.h
+@@ -948,7 +948,10 @@ afr_inode_read_subvol_set(inode_t *inode, xlator_t *this,
+                           int event_generation);
+ 
+ int
+-afr_inode_event_gen_reset(inode_t *inode, xlator_t *this);
++__afr_inode_need_refresh_set(inode_t *inode, xlator_t *this);
++
++int
++afr_inode_need_refresh_set(inode_t *inode, xlator_t *this);
+ 
+ int
+ afr_read_subvol_select_by_policy(inode_t *inode, xlator_t *this,
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0515-cluster-afr-Heal-directory-rename-without-rmdir-mkdi.patch b/SOURCES/0515-cluster-afr-Heal-directory-rename-without-rmdir-mkdi.patch
new file mode 100644
index 0000000..9c7693a
--- /dev/null
+++ b/SOURCES/0515-cluster-afr-Heal-directory-rename-without-rmdir-mkdi.patch
@@ -0,0 +1,2155 @@
+From aab8a587360214432c4a2ab59134411f1d38c509 Mon Sep 17 00:00:00 2001
+From: karthik-us <ksubrahm@redhat.com>
+Date: Wed, 9 Dec 2020 10:46:31 +0530
+Subject: [PATCH 515/517] cluster/afr: Heal directory rename without
+ rmdir/mkdir
+
+Problem1:
+When a directory is renamed while a brick
+is down entry-heal always did an rm -rf on that directory on
+the sink on old location and did mkdir and created the directory
+hierarchy again in the new location. This is inefficient.
+
+Problem2:
+Renamedir heal order may lead to a scenario where directory in
+the new location could be created before deleting it from old
+location leading to 2 directories with same gfid in posix.
+
+Fix:
+As part of heal, if oldlocation is healed first and is not present in
+source-brick always rename it into a hidden directory inside the
+sink-brick so that when heal is triggered in new-location shd can
+rename it from this hidden directory to the new-location.
+
+If new-location heal is triggered first and it detects that the
+directory already exists in the brick, then it should skip healing the
+directory until it appears in the hidden directory.
+
+Credits: Ravi for rename-data-loss.t script
+
+Upstream patch details:
+> Fixes: #1211
+> Change-Id: I0cba2006f35cd03d314d18211ce0bd530e254843
+> Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
+Upstream patch: https://review.gluster.org/#/c/glusterfs/+/24373/
+
+BUG: 1640148
+Change-Id: I0cba2006f35cd03d314d18211ce0bd530e254843
+Signed-off-by: karthik-us <ksubrahm@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/220660
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
+---
+ tests/afr.rc                                    |  16 +
+ tests/basic/afr/afr-anon-inode-no-quorum.t      |  63 ++++
+ tests/basic/afr/afr-anon-inode.t                | 114 ++++++
+ tests/basic/afr/entry-self-heal-anon-dir-off.t  | 464 ++++++++++++++++++++++++
+ tests/basic/afr/rename-data-loss.t              |  72 ++++
+ tests/bugs/replicate/bug-1744548-heal-timeout.t |   6 +-
+ tests/features/trash.t                          |  74 ++--
+ xlators/cluster/afr/src/afr-common.c            |  46 ++-
+ xlators/cluster/afr/src/afr-dir-read.c          |  12 +-
+ xlators/cluster/afr/src/afr-self-heal-common.c  | 182 ++++++++++
+ xlators/cluster/afr/src/afr-self-heal-entry.c   | 206 +++++++++--
+ xlators/cluster/afr/src/afr-self-heal-name.c    |  33 +-
+ xlators/cluster/afr/src/afr-self-heal.h         |   5 +
+ xlators/cluster/afr/src/afr-self-heald.c        | 178 ++++++++-
+ xlators/cluster/afr/src/afr-self-heald.h        |   2 +-
+ xlators/cluster/afr/src/afr.c                   |  40 +-
+ xlators/cluster/afr/src/afr.h                   |  11 +
+ xlators/mgmt/glusterd/src/glusterd-volgen.c     |  39 ++
+ xlators/mgmt/glusterd/src/glusterd-volume-set.c |   6 +
+ 19 files changed, 1442 insertions(+), 127 deletions(-)
+ create mode 100644 tests/basic/afr/afr-anon-inode-no-quorum.t
+ create mode 100644 tests/basic/afr/afr-anon-inode.t
+ create mode 100644 tests/basic/afr/entry-self-heal-anon-dir-off.t
+ create mode 100644 tests/basic/afr/rename-data-loss.t
+
+diff --git a/tests/afr.rc b/tests/afr.rc
+index 35f352d..2417899 100644
+--- a/tests/afr.rc
++++ b/tests/afr.rc
+@@ -105,3 +105,19 @@ function get_quorum_type()
+         local repl_id="$3"
+         cat $m/.meta/graphs/active/$v-replicate-$repl_id/private|grep quorum-type|awk '{print $3}'
+ }
++
++function afr_private_key_value()
++{
++        local v=$1
++        local m=$2
++        local replica_id=$3
++        local key=$4
++#xargs at the end will strip leading spaces
++        grep -E "^${key} = " $m/.meta/graphs/active/${v}-replicate-${replica_id}/private | cut -f2 -d'=' | xargs
++}
++
++function afr_anon_entry_count()
++{
++    local b=$1
++    ls $b/.glusterfs-anonymous-inode* | wc -l
++}
+diff --git a/tests/basic/afr/afr-anon-inode-no-quorum.t b/tests/basic/afr/afr-anon-inode-no-quorum.t
+new file mode 100644
+index 0000000..896ba0c
+--- /dev/null
++++ b/tests/basic/afr/afr-anon-inode-no-quorum.t
+@@ -0,0 +1,63 @@
++#!/bin/bash
++
++#Test that anon-inode entry is not cleaned up as long as there exists at least
++#one valid entry
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../afr.rc
++
++cleanup;
++
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
++TEST $CLI volume heal $V0 disable
++TEST $CLI volume set $V0 performance.write-behind off
++TEST $CLI volume set $V0 performance.read-ahead off
++TEST $CLI volume set $V0 performance.readdir-ahead off
++TEST $CLI volume set $V0 performance.open-behind off
++TEST $CLI volume set $V0 performance.stat-prefetch off
++TEST $CLI volume set $V0 performance.io-cache off
++TEST $CLI volume set $V0 performance.quick-read off
++TEST $CLI volume set $V0 cluster.entry-self-heal off
++TEST $CLI volume start $V0
++
++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
++
++TEST touch $M0/a $M0/b
++
++gfid_a=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/a))
++gfid_b=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/b))
++TEST kill_brick $V0 $H0 $B0/${V0}0
++TEST mv $M0/a $M0/a-new
++TEST mv $M0/b $M0/b-new
++
++TEST $CLI volume start $V0 force
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
++TEST ! ls $M0/a
++TEST ! ls $M0/b
++anon_inode_name=$(ls -a $B0/${V0}0 | grep glusterfs-anonymous-inode)
++TEST stat $B0/${V0}0/$anon_inode_name/$gfid_a
++TEST stat $B0/${V0}0/$anon_inode_name/$gfid_b
++#Make sure index heal doesn't happen after enabling heal
++TEST setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1
++TEST rm -f $B0/${V0}1/.glusterfs/indices/xattrop/*
++TEST $CLI volume heal $V0 enable
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
++TEST $CLI volume heal $V0
++#Allow time for a scan
++sleep 5
++TEST stat $B0/${V0}0/$anon_inode_name/$gfid_a
++TEST stat $B0/${V0}0/$anon_inode_name/$gfid_b
++inum_b=$(STAT_INO $B0/${V0}0/$anon_inode_name/$gfid_b)
++TEST rm -f $M0/a-new
++TEST stat $M0/b-new
++
++TEST $CLI volume heal $V0
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}0
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}1
++EXPECT "$inum_b" STAT_INO $B0/${V0}0/b-new
++
++cleanup
+diff --git a/tests/basic/afr/afr-anon-inode.t b/tests/basic/afr/afr-anon-inode.t
+new file mode 100644
+index 0000000..f4cf37a
+--- /dev/null
++++ b/tests/basic/afr/afr-anon-inode.t
+@@ -0,0 +1,114 @@
++#!/bin/bash
++#Tests that afr-anon-inode test cases work fine as expected
++#These are cases where in entry-heal/name-heal we dont know entry for an inode
++#so these inodes are kept in a special directory
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../afr.rc
++
++cleanup;
++
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0..2}
++TEST $CLI volume set $V0 performance.quick-read off
++TEST $CLI volume set $V0 performance.io-cache off
++TEST $CLI volume set $V0 performance.write-behind off
++TEST $CLI volume set $V0 performance.stat-prefetch off
++TEST $CLI volume set $V0 performance.read-ahead off
++TEST $CLI volume set $V0 performance.open-behind off
++TEST $CLI volume start $V0
++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
++EXPECT "^1$" afr_private_key_value $V0 $M0 0 "use-anonymous-inode"
++TEST $CLI volume set $V0 cluster.use-anonymous-inode no
++EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^0$" afr_private_key_value $V0 $M0 0 "use-anonymous-inode"
++TEST $CLI volume set $V0 cluster.use-anonymous-inode yes
++EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "use-anonymous-inode"
++TEST mkdir -p $M0/d1/b $M0/d2/a
++TEST kill_brick $V0 $H0 $B0/${V0}0
++TEST mv $M0/d2/a $M0/d1
++TEST mv $M0/d1/b $M0/d2
++TEST $CLI volume start $V0 force
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
++anon_inode_name=$(ls -a $B0/${V0}0 | grep glusterfs-anonymous-inode)
++TEST [[ -d $B0/${V0}1/$anon_inode_name ]]
++TEST [[ -d $B0/${V0}2/$anon_inode_name ]]
++anon_gfid=$(gf_get_gfid_xattr $B0/${V0}0/$anon_inode_name)
++EXPECT "$anon_gfid" gf_get_gfid_xattr $B0/${V0}1/$anon_inode_name
++EXPECT "$anon_gfid" gf_get_gfid_xattr $B0/${V0}2/$anon_inode_name
++
++TEST ! ls $M0/$anon_inode_name
++EXPECT "^4$" echo $(ls -a $M0 | wc -l)
++
++#Test purging code path by shd
++TEST $CLI volume heal $V0 disable
++TEST mkdir $M0/l0 $M0/l1 $M0/l2
++TEST touch $M0/del-file $M0/del-file-nolink $M0/l0/file
++TEST ln $M0/del-file $M0/del-file-link
++TEST ln $M0/l0/file $M0/l1/file-link1
++TEST ln $M0/l0/file $M0/l2/file-link2
++TEST mkdir -p $M0/del-recursive-dir/d1
++
++TEST kill_brick $V0 $H0 $B0/${V0}0
++TEST rm -f $M0/del-file $M0/del-file-nolink
++TEST rm -rf $M0/del-recursive-dir
++TEST mv $M0/d1/a $M0/d2
++TEST mv $M0/l0/file $M0/l0/renamed-file
++TEST $CLI volume start $V0 force
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 0
++
++nolink_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/del-file-nolink))
++link_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/del-file))
++dir_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/del-recursive-dir))
++rename_dir_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/d1/a))
++rename_file_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/l0/file))
++TEST ! stat $M0/del-file
++TEST stat $B0/${V0}0/$anon_inode_name/$link_gfid
++TEST ! stat $M0/del-file-nolink
++TEST ! stat $B0/${V0}0/$anon_inode_name/$nolink_gfid
++TEST ! stat $M0/del-recursive-dir
++TEST stat $B0/${V0}0/$anon_inode_name/$dir_gfid
++TEST ! stat $M0/d1/a
++TEST stat $B0/${V0}0/$anon_inode_name/$rename_dir_gfid
++TEST ! stat $M0/l0/file
++TEST stat $B0/${V0}0/$anon_inode_name/$rename_file_gfid
++
++TEST kill_brick $V0 $H0 $B0/${V0}1
++TEST mv $M0/l1/file-link1 $M0/l1/renamed-file-link1
++TEST $CLI volume start $V0 force
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 1
++TEST ! stat $M0/l1/file-link1
++TEST stat $B0/${V0}1/$anon_inode_name/$rename_file_gfid
++
++TEST kill_brick $V0 $H0 $B0/${V0}2
++TEST mv $M0/l2/file-link2 $M0/l2/renamed-file-link2
++TEST $CLI volume start $V0 force
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 2
++TEST ! stat $M0/l2/file-link2
++TEST stat $B0/${V0}2/$anon_inode_name/$rename_file_gfid
++
++#Simulate only anon-inodes present in all bricks
++TEST rm -f $M0/l0/renamed-file $M0/l1/renamed-file-link1 $M0/l2/renamed-file-link2
++
++#Test that shd doesn't cleanup anon-inodes when some bricks are down
++TEST kill_brick $V0 $H0 $B0/${V0}1
++TEST $CLI volume heal $V0 enable
++$CLI volume heal $V0
++sleep 5 #Allow time for completion of one scan
++TEST stat $B0/${V0}0/$anon_inode_name/$link_gfid
++TEST stat $B0/${V0}0/$anon_inode_name/$rename_dir_gfid
++TEST stat $B0/${V0}0/$anon_inode_name/$dir_gfid
++rename_dir_inum=$(STAT_INO $B0/${V0}0/$anon_inode_name/$rename_dir_gfid)
++
++TEST $CLI volume start $V0 force
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 1
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}0
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}1
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}2
++
++#Test that rename indeed happened instead of rmdir/mkdir
++renamed_dir_inum=$(STAT_INO $B0/${V0}0/d2/a)
++EXPECT "$rename_dir_inum" echo $renamed_dir_inum
++cleanup;
+diff --git a/tests/basic/afr/entry-self-heal-anon-dir-off.t b/tests/basic/afr/entry-self-heal-anon-dir-off.t
+new file mode 100644
+index 0000000..0803a08
+--- /dev/null
++++ b/tests/basic/afr/entry-self-heal-anon-dir-off.t
+@@ -0,0 +1,464 @@
++#!/bin/bash
++
++#This file checks if missing entry self-heal and entry self-heal are working
++#as expected.
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../afr.rc
++
++cleanup;
++
++function get_file_type {
++        stat -c "%a:%F:%g:%t:%T:%u" $1
++}
++
++function diff_dirs {
++        diff <(ls $1 | sort) <(ls $2 | sort)
++}
++
++function heal_status {
++        local f1_path="${1}/${3}"
++        local f2_path="${2}/${3}"
++        local insync=""
++        diff_dirs $f1_path $f2_path
++        if [ $? -eq 0 ];
++        then
++                insync="Y"
++        else
++                insync="N"
++        fi
++        local xattr11=$(get_hex_xattr trusted.afr.$V0-client-0 $f1_path)
++        local xattr12=$(get_hex_xattr trusted.afr.$V0-client-1 $f1_path)
++        local xattr21=$(get_hex_xattr trusted.afr.$V0-client-0 $f2_path)
++        local xattr22=$(get_hex_xattr trusted.afr.$V0-client-1 $f2_path)
++        local dirty1=$(get_hex_xattr trusted.afr.dirty $f1_path)
++        local dirty2=$(get_hex_xattr trusted.afr.dirty $f2_path)
++        if [ -z $xattr11 ]; then xattr11="000000000000000000000000"; fi
++        if [ -z $xattr12 ]; then xattr12="000000000000000000000000"; fi
++        if [ -z $xattr21 ]; then xattr21="000000000000000000000000"; fi
++        if [ -z $xattr22 ]; then xattr22="000000000000000000000000"; fi
++        if [ -z $dirty1 ]; then dirty1="000000000000000000000000"; fi
++        if [ -z $dirty2 ]; then dirty2="000000000000000000000000"; fi
++        echo ${insync}${xattr11}${xattr12}${xattr21}${xattr22}${dirty1}${dirty2}
++}
++
++function is_heal_done {
++        local zero_xattr="000000000000000000000000"
++        if [ "$(heal_status $@)" == "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" ];
++        then
++                echo "Y"
++        else
++                echo "N"
++        fi
++}
++
++function print_pending_heals {
++        local result=":"
++        for i in "$@";
++        do
++                if [ "N" == $(is_heal_done $B0/${V0}0 $B0/${V0}1 $i) ];
++                then
++                        result="$result:$i"
++                fi
++        done
++#To prevent any match for EXPECT_WITHIN, print a char non-existent in file-names
++        if [ $result == ":" ]; then result="~"; fi
++        echo $result
++}
++
++zero_xattr="000000000000000000000000"
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
++TEST $CLI volume heal $V0 disable
++TEST $CLI volume set $V0 cluster.use-anonymous-inode off
++TEST $CLI volume set $V0 performance.write-behind off
++TEST $CLI volume set $V0 performance.read-ahead off
++TEST $CLI volume set $V0 performance.readdir-ahead off
++TEST $CLI volume set $V0 performance.open-behind off
++TEST $CLI volume set $V0 performance.stat-prefetch off
++TEST $CLI volume set $V0 performance.io-cache off
++TEST $CLI volume set $V0 performance.quick-read off
++TEST $CLI volume set $V0 cluster.data-self-heal on
++TEST $CLI volume set $V0 cluster.metadata-self-heal on
++TEST $CLI volume set $V0 cluster.entry-self-heal on
++TEST $CLI volume start $V0
++
++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --use-readdirp=no $M0
++cd $M0
++#_me_ is dir on which missing entry self-heal happens, _heal is where dir self-heal happens
++#spb is split-brain, fool is all fool
++
++#source_self_accusing means there exists source and a sink which self-accuses.
++#This simulates failures where fops failed on the bricks without it going down.
++#Something like EACCESS/EDQUOT etc
++
++TEST mkdir spb_heal spb spb_me_heal spb_me fool_heal fool_me v1_fool_heal v1_fool_me source_creations_heal source_deletions_heal source_creations_me source_deletions_me v1_dirty_me v1_dirty_heal source_self_accusing
++TEST mkfifo source_deletions_heal/fifo
++TEST mknod  source_deletions_heal/block b 4 5
++TEST mknod  source_deletions_heal/char c 1 5
++TEST touch  source_deletions_heal/file
++TEST ln -s  source_deletions_heal/file source_deletions_heal/slink
++TEST mkdir  source_deletions_heal/dir1
++TEST mkdir  source_deletions_heal/dir1/dir2
++
++TEST mkfifo source_deletions_me/fifo
++TEST mknod  source_deletions_me/block b 4 5
++TEST mknod  source_deletions_me/char c 1 5
++TEST touch  source_deletions_me/file
++TEST ln -s  source_deletions_me/file source_deletions_me/slink
++TEST mkdir  source_deletions_me/dir1
++TEST mkdir  source_deletions_me/dir1/dir2
++
++TEST mkfifo source_self_accusing/fifo
++TEST mknod  source_self_accusing/block b 4 5
++TEST mknod  source_self_accusing/char c 1 5
++TEST touch  source_self_accusing/file
++TEST ln -s  source_self_accusing/file source_self_accusing/slink
++TEST mkdir  source_self_accusing/dir1
++TEST mkdir  source_self_accusing/dir1/dir2
++
++TEST kill_brick $V0 $H0 $B0/${V0}0
++
++TEST touch spb_heal/0 spb/0 spb_me_heal/0 spb_me/0 fool_heal/0 fool_me/0 v1_fool_heal/0 v1_fool_me/0 v1_dirty_heal/0 v1_dirty_me/0
++TEST rm -rf source_deletions_heal/fifo source_deletions_heal/block source_deletions_heal/char source_deletions_heal/file source_deletions_heal/slink source_deletions_heal/dir1
++TEST rm -rf source_deletions_me/fifo source_deletions_me/block source_deletions_me/char source_deletions_me/file source_deletions_me/slink source_deletions_me/dir1
++TEST rm -rf source_self_accusing/fifo source_self_accusing/block source_self_accusing/char source_self_accusing/file source_self_accusing/slink source_self_accusing/dir1
++
++#Test that the files are deleted
++TEST ! stat $B0/${V0}1/source_deletions_heal/fifo
++TEST ! stat $B0/${V0}1/source_deletions_heal/block
++TEST ! stat $B0/${V0}1/source_deletions_heal/char
++TEST ! stat $B0/${V0}1/source_deletions_heal/file
++TEST ! stat $B0/${V0}1/source_deletions_heal/slink
++TEST ! stat $B0/${V0}1/source_deletions_heal/dir1
++TEST ! stat $B0/${V0}1/source_deletions_me/fifo
++TEST ! stat $B0/${V0}1/source_deletions_me/block
++TEST ! stat $B0/${V0}1/source_deletions_me/char
++TEST ! stat $B0/${V0}1/source_deletions_me/file
++TEST ! stat $B0/${V0}1/source_deletions_me/slink
++TEST ! stat $B0/${V0}1/source_deletions_me/dir1
++TEST ! stat $B0/${V0}1/source_self_accusing/fifo
++TEST ! stat $B0/${V0}1/source_self_accusing/block
++TEST ! stat $B0/${V0}1/source_self_accusing/char
++TEST ! stat $B0/${V0}1/source_self_accusing/file
++TEST ! stat $B0/${V0}1/source_self_accusing/slink
++TEST ! stat $B0/${V0}1/source_self_accusing/dir1
++
++
++TEST mkfifo source_creations_heal/fifo
++TEST mknod  source_creations_heal/block b 4 5
++TEST mknod  source_creations_heal/char c 1 5
++TEST touch  source_creations_heal/file
++TEST ln -s  source_creations_heal/file source_creations_heal/slink
++TEST mkdir  source_creations_heal/dir1
++TEST mkdir  source_creations_heal/dir1/dir2
++
++TEST mkfifo source_creations_me/fifo
++TEST mknod  source_creations_me/block b 4 5
++TEST mknod  source_creations_me/char c 1 5
++TEST touch  source_creations_me/file
++TEST ln -s  source_creations_me/file source_creations_me/slink
++TEST mkdir  source_creations_me/dir1
++TEST mkdir  source_creations_me/dir1/dir2
++
++$CLI volume stop $V0
++
++#simulate fool fool scenario for fool_* dirs
++setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1/{fool_heal,fool_me}
++setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}1/{fool_heal,fool_me}
++setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}1/{v1_fool_heal,v1_fool_me}
++
++#Simulate v1-dirty(self-accusing but no pending ops on others) scenario for v1-dirty
++setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1/v1_dirty_{heal,me}
++setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}1/v1_dirty_{heal,me}
++
++$CLI volume start $V0 force
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
++TEST kill_brick $V0 $H0 $B0/${V0}1
++
++TEST touch spb_heal/1 spb/0 spb_me_heal/1 spb_me/0 fool_heal/1 fool_me/1 v1_fool_heal/1 v1_fool_me/1
++
++$CLI volume stop $V0
++
++#simulate fool fool scenario for fool_* dirs
++setfattr -x trusted.afr.$V0-client-1 $B0/${V0}0/{fool_heal,fool_me}
++setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}1/{fool_heal,fool_me}
++setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}1/{v1_fool_heal,v1_fool_me}
++
++#simulate self-accusing for source_self_accusing
++TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000006 $B0/${V0}0/source_self_accusing
++
++$CLI volume start $V0 force
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
++
++# Check if conservative merges happened correctly on _me_ dirs
++TEST stat spb_me_heal/1
++TEST stat $B0/${V0}0/spb_me_heal/1
++TEST stat $B0/${V0}1/spb_me_heal/1
++
++TEST stat spb_me_heal/0
++TEST stat $B0/${V0}0/spb_me_heal/0
++TEST stat $B0/${V0}1/spb_me_heal/0
++
++TEST stat fool_me/1
++TEST stat $B0/${V0}0/fool_me/1
++TEST stat $B0/${V0}1/fool_me/1
++
++TEST stat fool_me/0
++TEST stat $B0/${V0}0/fool_me/0
++TEST stat $B0/${V0}1/fool_me/0
++
++TEST stat v1_fool_me/0
++TEST stat $B0/${V0}0/v1_fool_me/0
++TEST stat $B0/${V0}1/v1_fool_me/0
++
++TEST stat v1_fool_me/1
++TEST stat $B0/${V0}0/v1_fool_me/1
++TEST stat $B0/${V0}1/v1_fool_me/1
++
++TEST stat v1_dirty_me/0
++TEST stat $B0/${V0}0/v1_dirty_me/0
++TEST stat $B0/${V0}1/v1_dirty_me/0
++
++#Check if files that have gfid-mismatches in _me_ are giving EIO
++TEST ! stat spb_me/0
++
++#Check if stale files are deleted on access
++TEST ! stat source_deletions_me/fifo
++TEST ! stat $B0/${V0}0/source_deletions_me/fifo
++TEST ! stat $B0/${V0}1/source_deletions_me/fifo
++TEST ! stat source_deletions_me/block
++TEST ! stat $B0/${V0}0/source_deletions_me/block
++TEST ! stat $B0/${V0}1/source_deletions_me/block
++TEST ! stat source_deletions_me/char
++TEST ! stat $B0/${V0}0/source_deletions_me/char
++TEST ! stat $B0/${V0}1/source_deletions_me/char
++TEST ! stat source_deletions_me/file
++TEST ! stat $B0/${V0}0/source_deletions_me/file
++TEST ! stat $B0/${V0}1/source_deletions_me/file
++TEST ! stat source_deletions_me/file
++TEST ! stat $B0/${V0}0/source_deletions_me/file
++TEST ! stat $B0/${V0}1/source_deletions_me/file
++TEST ! stat source_deletions_me/dir1/dir2
++TEST ! stat $B0/${V0}0/source_deletions_me/dir1/dir2
++TEST ! stat $B0/${V0}1/source_deletions_me/dir1/dir2
++TEST ! stat source_deletions_me/dir1
++TEST ! stat $B0/${V0}0/source_deletions_me/dir1
++TEST ! stat $B0/${V0}1/source_deletions_me/dir1
++
++#Test if the files created as part of access are healed correctly
++r=$(get_file_type source_creations_me/fifo)
++EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/fifo
++EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/fifo
++TEST [ -p source_creations_me/fifo ]
++
++r=$(get_file_type source_creations_me/block)
++EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/block
++EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/block
++EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}1/source_creations_me/block
++EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}0/source_creations_me/block
++TEST [ -b source_creations_me/block ]
++
++r=$(get_file_type source_creations_me/char)
++EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/char
++EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/char
++EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}1/source_creations_me/char
++EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}0/source_creations_me/char
++TEST [ -c source_creations_me/char ]
++
++r=$(get_file_type source_creations_me/file)
++EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/file
++EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/file
++TEST [ -f source_creations_me/file ]
++
++r=$(get_file_type source_creations_me/slink)
++EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/slink
++EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/slink
++TEST [ -h source_creations_me/slink ]
++
++r=$(get_file_type source_creations_me/dir1/dir2)
++EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/dir1/dir2
++EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/dir1/dir2
++TEST [ -d source_creations_me/dir1/dir2 ]
++
++r=$(get_file_type source_creations_me/dir1)
++EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/dir1
++EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/dir1
++TEST [ -d source_creations_me/dir1 ]
++
++#Trigger heal and check _heal dirs are healed properly
++#Trigger change in event generation number. That way inodes would get refreshed during lookup
++TEST kill_brick $V0 $H0 $B0/${V0}1
++$CLI volume start $V0 force
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
++
++TEST stat spb_heal
++TEST stat spb_me_heal
++TEST stat fool_heal
++TEST stat fool_me
++TEST stat v1_fool_heal
++TEST stat v1_fool_me
++TEST stat source_deletions_heal
++TEST stat source_deletions_me
++TEST stat source_self_accusing
++TEST stat source_creations_heal
++TEST stat source_creations_me
++TEST stat v1_dirty_heal
++TEST stat v1_dirty_me
++TEST $CLI volume stop $V0
++TEST rm -rf $B0/${V0}{0,1}/.glusterfs/indices/xattrop/*
++
++$CLI volume start $V0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
++
++#Create base entry in indices/xattrop
++echo "Data" > $M0/FILE
++rm -f $M0/FILE
++EXPECT "1" count_index_entries $B0/${V0}0
++EXPECT "1" count_index_entries $B0/${V0}1
++
++TEST $CLI volume stop $V0;
++
++#Create entries for fool_heal and fool_me to ensure they are fully healed and dirty xattrs erased, before triggering index heal
++create_brick_xattrop_entry $B0/${V0}0 fool_heal fool_me source_creations_heal/dir1
++
++$CLI volume start $V0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
++
++$CLI volume heal $V0 enable
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
++
++TEST $CLI volume heal $V0;
++EXPECT_WITHIN $HEAL_TIMEOUT "~" print_pending_heals spb_heal spb_me_heal fool_heal fool_me v1_fool_heal v1_fool_me source_deletions_heal source_deletions_me source_creations_heal source_creations_me v1_dirty_heal v1_dirty_me source_self_accusing
++
++EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 spb_heal
++EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 spb_me_heal
++EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 fool_heal
++EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 fool_me
++EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_fool_heal
++EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_fool_me
++EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_deletions_heal
++EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_deletions_me
++EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_self_accusing
++EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_creations_heal
++EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_creations_me
++EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_dirty_heal
++EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_dirty_me
++
++#Don't access the files/dirs from mount point as that may cause self-heals
++# Check if conservative merges happened correctly on heal dirs
++TEST stat $B0/${V0}0/spb_heal/1
++TEST stat $B0/${V0}1/spb_heal/1
++
++TEST stat $B0/${V0}0/spb_heal/0
++TEST stat $B0/${V0}1/spb_heal/0
++
++TEST stat $B0/${V0}0/fool_heal/1
++TEST stat $B0/${V0}1/fool_heal/1
++
++TEST stat $B0/${V0}0/fool_heal/0
++TEST stat $B0/${V0}1/fool_heal/0
++
++TEST stat $B0/${V0}0/v1_fool_heal/0
++TEST stat $B0/${V0}1/v1_fool_heal/0
++
++TEST stat $B0/${V0}0/v1_fool_heal/1
++TEST stat $B0/${V0}1/v1_fool_heal/1
++
++TEST stat $B0/${V0}0/v1_dirty_heal/0
++TEST stat $B0/${V0}1/v1_dirty_heal/0
++
++#Check if files that have gfid-mismatches in spb are giving EIO
++TEST ! stat spb/0
++
++#Check if stale files are deleted on access
++TEST ! stat $B0/${V0}0/source_deletions_heal/fifo
++TEST ! stat $B0/${V0}1/source_deletions_heal/fifo
++TEST ! stat $B0/${V0}0/source_deletions_heal/block
++TEST ! stat $B0/${V0}1/source_deletions_heal/block
++TEST ! stat $B0/${V0}0/source_deletions_heal/char
++TEST ! stat $B0/${V0}1/source_deletions_heal/char
++TEST ! stat $B0/${V0}0/source_deletions_heal/file
++TEST ! stat $B0/${V0}1/source_deletions_heal/file
++TEST ! stat $B0/${V0}0/source_deletions_heal/file
++TEST ! stat $B0/${V0}1/source_deletions_heal/file
++TEST ! stat $B0/${V0}0/source_deletions_heal/dir1/dir2
++TEST ! stat $B0/${V0}1/source_deletions_heal/dir1/dir2
++TEST ! stat $B0/${V0}0/source_deletions_heal/dir1
++TEST ! stat $B0/${V0}1/source_deletions_heal/dir1
++
++#Check if stale files are deleted on access
++TEST ! stat $B0/${V0}0/source_self_accusing/fifo
++TEST ! stat $B0/${V0}1/source_self_accusing/fifo
++TEST ! stat $B0/${V0}0/source_self_accusing/block
++TEST ! stat $B0/${V0}1/source_self_accusing/block
++TEST ! stat $B0/${V0}0/source_self_accusing/char
++TEST ! stat $B0/${V0}1/source_self_accusing/char
++TEST ! stat $B0/${V0}0/source_self_accusing/file
++TEST ! stat $B0/${V0}1/source_self_accusing/file
++TEST ! stat $B0/${V0}0/source_self_accusing/file
++TEST ! stat $B0/${V0}1/source_self_accusing/file
++TEST ! stat $B0/${V0}0/source_self_accusing/dir1/dir2
++TEST ! stat $B0/${V0}1/source_self_accusing/dir1/dir2
++TEST ! stat $B0/${V0}0/source_self_accusing/dir1
++TEST ! stat $B0/${V0}1/source_self_accusing/dir1
++
++#Test if the files created as part of full self-heal correctly
++r=$(get_file_type $B0/${V0}0/source_creations_heal/fifo)
++EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/fifo
++TEST [ -p $B0/${V0}0/source_creations_heal/fifo ]
++EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}1/source_creations_heal/block
++EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}0/source_creations_heal/block
++
++r=$(get_file_type $B0/${V0}0/source_creations_heal/block)
++EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/block
++
++r=$(get_file_type $B0/${V0}0/source_creations_heal/char)
++EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/char
++EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}1/source_creations_heal/char
++EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}0/source_creations_heal/char
++
++r=$(get_file_type $B0/${V0}0/source_creations_heal/file)
++EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/file
++TEST [ -f $B0/${V0}0/source_creations_heal/file ]
++
++r=$(get_file_type source_creations_heal/file $B0/${V0}0/slink)
++EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/file slink
++TEST [ -h $B0/${V0}0/source_creations_heal/slink ]
++
++r=$(get_file_type $B0/${V0}0/source_creations_heal/dir1/dir2)
++EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/dir1/dir2
++TEST [ -d $B0/${V0}0/source_creations_heal/dir1/dir2 ]
++
++r=$(get_file_type $B0/${V0}0/source_creations_heal/dir1)
++EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/dir1
++TEST [ -d $B0/${V0}0/source_creations_heal/dir1 ]
++
++cd -
++
++#Anonymous directory shouldn't be created
++TEST mkdir $M0/rename-dir
++before_rename=$(STAT_INO $B0/${V0}1/rename-dir)
++TEST kill_brick $V0 $H0 $B0/${V0}1
++TEST mv $M0/rename-dir $M0/new-name
++TEST $CLI volume start $V0 force
++#Since features.ctime is not enabled by default in downstream, the below test
++#will fail. If ctime feature is enabled, there will be trusted.glusterfs.mdata
++#xattr set which will differ for the parent in the gfid split-brain scenario
++#and when lookup is triggered, the gfid gets added to indices/xattrop leading
++#the below test to pass in upstream. Hence commenting it here.
++#'spb' is in split-brain so pending-heal-count will be 2
++#EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0
++after_rename=$(STAT_INO $B0/${V0}1/new-name)
++EXPECT "0" echo $(ls -a $B0/${V0}0/ | grep anonymous-inode | wc -l)
++EXPECT "0" echo $(ls -a $B0/${V0}1/ | grep anonymous-inode | wc -l)
++EXPECT_NOT "$before_rename" echo $after_rename
++cleanup
+diff --git a/tests/basic/afr/rename-data-loss.t b/tests/basic/afr/rename-data-loss.t
+new file mode 100644
+index 0000000..256ee2a
+--- /dev/null
++++ b/tests/basic/afr/rename-data-loss.t
+@@ -0,0 +1,72 @@
++#!/bin/bash
++#Self-heal tests
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../afr.rc
++
++cleanup;
++
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1}
++TEST $CLI volume set $V0 write-behind off
++TEST $CLI volume set $V0 self-heal-daemon off
++TEST $CLI volume set $V0 data-self-heal off
++TEST $CLI volume set $V0 metadata-self-heal off
++TEST $CLI volume set $V0 entry-self-heal off
++TEST $CLI volume start $V0
++EXPECT 'Started' volinfo_field $V0 'Status'
++TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
++
++cd $M0
++TEST `echo "line1" >> file1`
++TEST mkdir dir1
++TEST mkdir dir2
++TEST mkdir -p dir1/dira/dirb
++TEST `echo "line1">>dir1/dira/dirb/file1`
++TEST mkdir delete_me
++TEST `echo "line1" >> delete_me/file1`
++
++#brick0 has witnessed the second write while brick1 is down.
++TEST kill_brick $V0 $H0 $B0/brick1
++TEST `echo "line2" >> file1`
++TEST `echo "line2" >> dir1/dira/dirb/file1`
++TEST `echo "line2" >> delete_me/file1`
++
++#Toggle the bricks that are up/down.
++TEST $CLI volume start $V0 force
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
++TEST kill_brick $V0 $H0 $B0/brick0
++
++#Rename when the 'source' brick0 for data-selfheals is down.
++mv file1 file2
++mv dir1/dira dir2
++
++#Delete a dir when brick0 is down.
++rm -rf delete_me
++cd -
++
++#Bring everything up and trigger heal
++TEST $CLI volume set $V0 self-heal-daemon on
++TEST $CLI volume start $V0 force
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
++TEST $CLI volume heal $V0
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/brick0
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/brick1
++
++#Remount to avoid reading from caches
++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
++TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
++EXPECT "line2" tail -1 $M0/file2
++EXPECT "line2" tail -1 $M0/dir2/dira/dirb/file1
++TEST ! stat $M0/delete_me/file1
++TEST ! stat $M0/delete_me
++
++anon_inode_name=$(ls -a $B0/brick0 | grep glusterfs-anonymous-inode)
++TEST [[ -d $B0/brick0/$anon_inode_name ]]
++TEST [[ -d $B0/brick1/$anon_inode_name ]]
++cleanup
+diff --git a/tests/bugs/replicate/bug-1744548-heal-timeout.t b/tests/bugs/replicate/bug-1744548-heal-timeout.t
+index c208112..0115350 100644
+--- a/tests/bugs/replicate/bug-1744548-heal-timeout.t
++++ b/tests/bugs/replicate/bug-1744548-heal-timeout.t
+@@ -25,14 +25,14 @@ TEST ! $CLI volume heal $V0
+ TEST $CLI volume profile $V0 start
+ TEST $CLI volume profile $V0 info clear
+ TEST $CLI volume heal $V0 enable
+-# Each brick does 3 opendirs, corresponding to dirty, xattrop and entry-changes
+-EXPECT_WITHIN $HEAL_TIMEOUT "^333$" get_cumulative_opendir_count
++# Each brick does 4 opendirs, corresponding to dirty, xattrop and entry-changes, anonymous-inode
++EXPECT_WITHIN 4 "^444$" get_cumulative_opendir_count
+ 
+ # Check that a change in heal-timeout is honoured immediately.
+ TEST $CLI volume set $V0 cluster.heal-timeout 5
+ sleep 10
+ # Two crawls must have happened.
+-EXPECT_WITHIN $HEAL_TIMEOUT "^999$" get_cumulative_opendir_count
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^121212$" get_cumulative_opendir_count
+ 
+ # shd must not heal if it is disabled and heal-timeout is changed.
+ TEST $CLI volume heal $V0 disable
+diff --git a/tests/features/trash.t b/tests/features/trash.t
+index 472e909..da5b50b 100755
+--- a/tests/features/trash.t
++++ b/tests/features/trash.t
+@@ -94,105 +94,105 @@ wildcard_not_exists() {
+         if [ $? -eq 0 ]; then echo "Y"; else echo "N"; fi
+ }
+ 
+-# testing glusterd [1-3]
++# testing glusterd
+ TEST glusterd
+ TEST pidof glusterd
+ TEST $CLI volume info
+ 
+-# creating distributed volume [4]
++# creating distributed volume
+ TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2}
+ 
+-# checking volume status [5-7]
++# checking volume status
+ EXPECT "$V0" volinfo_field $V0 'Volume Name'
+ EXPECT 'Created' volinfo_field $V0 'Status'
+ EXPECT '2' brick_count $V0
+ 
+-# test without enabling trash translator [8]
++# test without enabling trash translator
+ TEST start_vol $V0 $M0
+ 
+-# test on enabling trash translator [9-10]
++# test on enabling trash translator
+ TEST $CLI volume set $V0 features.trash on
+ EXPECT 'on' volinfo_field $V0 'features.trash'
+ 
+-# files directly under mount point [11]
++# files directly under mount point
+ create_files $M0/file1 $M0/file2
+ TEST file_exists $V0 file1 file2
+ 
+-# perform unlink [12]
++# perform unlink
+ TEST unlink_op file1
+ 
+-# perform truncate [13]
++# perform truncate
+ TEST truncate_op file2 4
+ 
+-# create files directory hierarchy and check [14]
++# create files directory hierarchy and check
+ mkdir -p $M0/1/2/3
+ create_files $M0/1/2/3/foo1 $M0/1/2/3/foo2
+ TEST file_exists $V0 1/2/3/foo1 1/2/3/foo2
+ 
+-# perform unlink [15]
++# perform unlink
+ TEST unlink_op 1/2/3/foo1
+ 
+-# perform truncate [16]
++# perform truncate
+ TEST truncate_op 1/2/3/foo2 4
+ 
+ # create a directory for eliminate pattern
+ mkdir $M0/a
+ 
+-# set the eliminate pattern [17-18]
++# set the eliminate pattern
+ TEST $CLI volume set $V0 features.trash-eliminate-path /a
+ EXPECT '/a' volinfo_field $V0 'features.trash-eliminate-path'
+ 
+-# create two files and check [19]
++# create two files and check
+ create_files $M0/a/test1 $M0/a/test2
+ TEST file_exists $V0 a/test1 a/test2
+ 
+-# remove from eliminate pattern [20]
++# remove from eliminate pattern
+ rm -f $M0/a/test1
+ EXPECT "Y" wildcard_not_exists $M0/.trashcan/a/test1*
+ 
+-# truncate from eliminate path [21-23]
++# truncate from eliminate path
+ truncate -s 2 $M0/a/test2
+ TEST [ -e $M0/a/test2 ]
+ TEST [ `ls -l $M0/a/test2 | awk '{print $5}'` -eq 2 ]
+ EXPECT "Y" wildcard_not_exists $M0/.trashcan/a/test2*
+ 
+-# set internal op on [24-25]
++# set internal op on
+ TEST $CLI volume set $V0 features.trash-internal-op on
+ EXPECT 'on' volinfo_field $V0 'features.trash-internal-op'
+ 
+-# again create two files and check [26]
++# again create two files and check
+ create_files $M0/inop1 $M0/inop2
+ TEST file_exists $V0 inop1 inop2
+ 
+-# perform unlink [27]
++# perform unlink
+ TEST unlink_op inop1
+ 
+-# perform truncate [28]
++# perform truncate
+ TEST truncate_op inop2 4
+ 
+-# remove one brick and restart the volume [28-31]
++# remove one brick and restart the volume
+ TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}2 force
+ EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+ TEST $CLI volume stop $V0
+ TEST start_vol $V0 $M0 $M0/.trashcan
+ 
+-# again create two files and check [33]
++# again create two files and check
+ create_files $M0/rebal1 $M0/rebal2
+ TEST file_exists $V0 rebal1 rebal2
+ 
+-# add one brick [34-35]
++# add one brick
+ TEST $CLI volume add-brick $V0 $H0:$B0/${V0}3
+ TEST [ -d $B0/${V0}3 ]
+ 
+ 
+-# perform rebalance [36]
++# perform rebalance
+ TEST $CLI volume rebalance $V0 start force
+ EXPECT_WITHIN $REBALANCE_TIMEOUT "0" rebalance_completed
+ 
+ #Find out which file was migrated to the new brick
+ file_name=$(ls $B0/${V0}3/rebal*| xargs basename)
+ 
+-# check whether rebalance was succesful [37-40]
++# check whether rebalance was succesful
+ EXPECT "Y" wildcard_exists $B0/${V0}3/$file_name*
+ EXPECT "Y" wildcard_exists $B0/${V0}1/.trashcan/internal_op/$file_name*
+ 
+@@ -201,52 +201,42 @@ EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+ # force required in case rebalance is not over
+ TEST $CLI volume stop $V0 force
+ 
+-# create a replicated volume [41]
++# create a replicated volume
+ TEST $CLI volume create $V1 replica 2 $H0:$B0/${V1}{1,2}
+ 
+-# checking volume status [42-45]
++# checking volume status
+ EXPECT "$V1" volinfo_field $V1 'Volume Name'
+ EXPECT 'Replicate' volinfo_field $V1 'Type'
+ EXPECT 'Created' volinfo_field $V1 'Status'
+ EXPECT '2' brick_count $V1
+ 
+-# enable trash with options and start the replicate volume by disabling automatic self-heal [46-50]
++# enable trash with options and start the replicate volume by disabling automatic self-heal
+ TEST $CLI volume set $V1 features.trash on
+ TEST $CLI volume set $V1 features.trash-internal-op on
+ EXPECT 'on' volinfo_field $V1 'features.trash'
+ EXPECT 'on' volinfo_field $V1 'features.trash-internal-op'
+ TEST start_vol $V1 $M1 $M1/.trashcan
+ 
+-# mount and check for trash directory [51]
++# mount and check for trash directory
+ TEST [ -d $M1/.trashcan/internal_op ]
+ 
+-# create a file and check [52]
++# create a file and check
+ touch $M1/self
+ TEST [ -e $B0/${V1}1/self -a -e $B0/${V1}2/self ]
+ 
+-# kill one brick and delete the file from mount point [53-54]
++# kill one brick and delete the file from mount point
+ kill_brick $V1 $H0 $B0/${V1}1
+ EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "1" online_brick_count
+ rm -f $M1/self
+ EXPECT "Y" wildcard_exists $B0/${V1}2/.trashcan/self*
+ 
+-# force start the volume and trigger the self-heal manually [55-57]
+-TEST $CLI volume start $V1 force
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" online_brick_count
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+-# Since we created the file under root of the volume, it will be
+-# healed automatically
+-
+-# check for the removed file in trashcan [58]
+-EXPECT_WITHIN $HEAL_TIMEOUT "Y" wildcard_exists $B0/${V1}1/.trashcan/internal_op/self*
+-
+-# check renaming of trash directory through cli [59-62]
++# check renaming of trash directory through cli
+ TEST $CLI volume set $V0 trash-dir abc
+ TEST start_vol $V0 $M0 $M0/abc
+ TEST [ -e $M0/abc -a ! -e $M0/.trashcan ]
+ EXPECT "Y" wildcard_exists $B0/${V0}1/abc/internal_op/rebal*
+ 
+-# ensure that rename and delete operation on trash directory fails [63-65]
++# ensure that rename and delete operation on trash directory fails
+ rm -rf $M0/abc/internal_op
+ TEST [ -e $M0/abc/internal_op ]
+ rm -rf $M0/abc/
+diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
+index 90b4f14..6f2da11 100644
+--- a/xlators/cluster/afr/src/afr-common.c
++++ b/xlators/cluster/afr/src/afr-common.c
+@@ -47,6 +47,41 @@ afr_quorum_errno(afr_private_t *priv)
+     return ENOTCONN;
+ }
+ 
++gf_boolean_t
++afr_is_private_directory(afr_private_t *priv, uuid_t pargfid, const char *name,
++                         pid_t pid)
++{
++    if (!__is_root_gfid(pargfid)) {
++        return _gf_false;
++    }
++
++    if (strcmp(name, GF_REPLICATE_TRASH_DIR) == 0) {
++        /*For backward compatibility /.landfill is private*/
++        return _gf_true;
++    }
++
++    if (pid == GF_CLIENT_PID_GSYNCD) {
++        /*geo-rep needs to create/sync private directory on slave because
++         * it appears in changelog*/
++        return _gf_false;
++    }
++
++    if (pid == GF_CLIENT_PID_GLFS_HEAL || pid == GF_CLIENT_PID_SELF_HEALD) {
++        if (strcmp(name, priv->anon_inode_name) == 0) {
++            /* anonymous-inode dir is private*/
++            return _gf_true;
++        }
++    } else {
++        if (strncmp(name, AFR_ANON_DIR_PREFIX, strlen(AFR_ANON_DIR_PREFIX)) ==
++            0) {
++            /* anonymous-inode dir prefix is private for geo-rep to work*/
++            return _gf_true;
++        }
++    }
++
++    return _gf_false;
++}
++
+ int
+ afr_fav_child_reset_sink_xattrs(void *opaque);
+ 
+@@ -3301,11 +3336,10 @@ afr_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
+         return 0;
+     }
+ 
+-    if (__is_root_gfid(loc->parent->gfid)) {
+-        if (!strcmp(loc->name, GF_REPLICATE_TRASH_DIR)) {
+-            op_errno = EPERM;
+-            goto out;
+-        }
++    if (afr_is_private_directory(this->private, loc->parent->gfid, loc->name,
++                                 frame->root->pid)) {
++        op_errno = EPERM;
++        goto out;
+     }
+ 
+     local = AFR_FRAME_INIT(frame, op_errno);
+@@ -4832,6 +4866,7 @@ afr_priv_dump(xlator_t *this)
+                        priv->background_self_heal_count);
+     gf_proc_dump_write("healers", "%d", priv->healers);
+     gf_proc_dump_write("read-hash-mode", "%d", priv->hash_mode);
++    gf_proc_dump_write("use-anonymous-inode", "%d", priv->use_anon_inode);
+     if (priv->quorum_count == AFR_QUORUM_AUTO) {
+         gf_proc_dump_write("quorum-type", "auto");
+     } else if (priv->quorum_count == 0) {
+@@ -5792,6 +5827,7 @@ afr_priv_destroy(afr_private_t *priv)
+     GF_FREE(priv->local);
+     GF_FREE(priv->pending_key);
+     GF_FREE(priv->children);
++    GF_FREE(priv->anon_inode);
+     GF_FREE(priv->child_up);
+     GF_FREE(priv->child_latency);
+     LOCK_DESTROY(&priv->lock);
+diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c
+index 6307b63..d64b6a9 100644
+--- a/xlators/cluster/afr/src/afr-dir-read.c
++++ b/xlators/cluster/afr/src/afr-dir-read.c
+@@ -158,8 +158,8 @@ afr_validate_read_subvol(inode_t *inode, xlator_t *this, int par_read_subvol)
+ }
+ 
+ static void
+-afr_readdir_transform_entries(gf_dirent_t *subvol_entries, int subvol,
+-                              gf_dirent_t *entries, fd_t *fd)
++afr_readdir_transform_entries(call_frame_t *frame, gf_dirent_t *subvol_entries,
++                              int subvol, gf_dirent_t *entries, fd_t *fd)
+ {
+     int ret = -1;
+     gf_dirent_t *entry = NULL;
+@@ -177,8 +177,8 @@ afr_readdir_transform_entries(gf_dirent_t *subvol_entries, int subvol,
+ 
+     list_for_each_entry_safe(entry, tmp, &subvol_entries->list, list)
+     {
+-        if (__is_root_gfid(fd->inode->gfid) &&
+-            !strcmp(entry->d_name, GF_REPLICATE_TRASH_DIR)) {
++        if (afr_is_private_directory(priv, fd->inode->gfid, entry->d_name,
++                                     frame->root->pid)) {
+             continue;
+         }
+ 
+@@ -222,8 +222,8 @@ afr_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+     }
+ 
+     if (op_ret >= 0)
+-        afr_readdir_transform_entries(subvol_entries, (long)cookie, &entries,
+-                                      local->fd);
++        afr_readdir_transform_entries(frame, subvol_entries, (long)cookie,
++                                      &entries, local->fd);
+ 
+     AFR_STACK_UNWIND(readdir, frame, op_ret, op_errno, &entries, xdata);
+ 
+diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
+index 9b6575f..0a8a7fd 100644
+--- a/xlators/cluster/afr/src/afr-self-heal-common.c
++++ b/xlators/cluster/afr/src/afr-self-heal-common.c
+@@ -2753,3 +2753,185 @@ afr_choose_source_by_policy(afr_private_t *priv, unsigned char *sources,
+ out:
+     return source;
+ }
++
++static int
++afr_anon_inode_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                         int32_t op_ret, int32_t op_errno, inode_t *inode,
++                         struct iatt *buf, struct iatt *preparent,
++                         struct iatt *postparent, dict_t *xdata)
++{
++    afr_local_t *local = frame->local;
++    int i = (long)cookie;
++
++    local->replies[i].valid = 1;
++    local->replies[i].op_ret = op_ret;
++    local->replies[i].op_errno = op_errno;
++    if (op_ret == 0) {
++        local->op_ret = 0;
++        local->replies[i].poststat = *buf;
++        local->replies[i].preparent = *preparent;
++        local->replies[i].postparent = *postparent;
++    }
++    if (xdata) {
++        local->replies[i].xdata = dict_ref(xdata);
++    }
++
++    syncbarrier_wake(&local->barrier);
++    return 0;
++}
++
++int
++afr_anon_inode_create(xlator_t *this, int child, inode_t **linked_inode)
++{
++    call_frame_t *frame = NULL;
++    afr_local_t *local = NULL;
++    afr_private_t *priv = this->private;
++    unsigned char *mkdir_on = alloca0(priv->child_count);
++    unsigned char *lookup_on = alloca0(priv->child_count);
++    loc_t loc = {0};
++    int32_t op_errno = 0;
++    int32_t child_op_errno = 0;
++    struct iatt iatt = {0};
++    dict_t *xdata = NULL;
++    uuid_t anon_inode_gfid = {0};
++    int mkdir_count = 0;
++    int i = 0;
++
++    /*Try to mkdir everywhere and return success if the dir exists on 'child'
++     */
++
++    if (!priv->use_anon_inode) {
++        op_errno = EINVAL;
++        goto out;
++    }
++
++    frame = afr_frame_create(this, &op_errno);
++    if (op_errno) {
++        goto out;
++    }
++    local = frame->local;
++    if (!local->child_up[child]) {
++        /*Other bricks may need mkdir so don't error out yet*/
++        child_op_errno = ENOTCONN;
++    }
++    gf_uuid_parse(priv->anon_gfid_str, anon_inode_gfid);
++    for (i = 0; i < priv->child_count; i++) {
++        if (!local->child_up[i])
++            continue;
++
++        if (priv->anon_inode[i]) {
++            mkdir_on[i] = 0;
++        } else {
++            mkdir_on[i] = 1;
++            mkdir_count++;
++        }
++    }
++
++    if (mkdir_count == 0) {
++        *linked_inode = inode_find(this->itable, anon_inode_gfid);
++        if (*linked_inode) {
++            op_errno = 0;
++            goto out;
++        }
++    }
++
++    loc.parent = inode_ref(this->itable->root);
++    loc.name = priv->anon_inode_name;
++    loc.inode = inode_new(this->itable);
++    if (!loc.inode) {
++        op_errno = ENOMEM;
++        goto out;
++    }
++
++    xdata = dict_new();
++    if (!xdata) {
++        op_errno = ENOMEM;
++        goto out;
++    }
++
++    op_errno = -dict_set_gfuuid(xdata, "gfid-req", anon_inode_gfid, _gf_true);
++    if (op_errno) {
++        goto out;
++    }
++
++    if (mkdir_count == 0) {
++        memcpy(lookup_on, local->child_up, priv->child_count);
++        goto lookup;
++    }
++
++    AFR_ONLIST(mkdir_on, frame, afr_anon_inode_mkdir_cbk, mkdir, &loc, 0755, 0,
++               xdata);
++
++    for (i = 0; i < priv->child_count; i++) {
++        if (!mkdir_on[i]) {
++            continue;
++        }
++
++        if (local->replies[i].op_ret == 0) {
++            priv->anon_inode[i] = 1;
++            iatt = local->replies[i].poststat;
++        } else if (local->replies[i].op_ret < 0 &&
++                   local->replies[i].op_errno == EEXIST) {
++            lookup_on[i] = 1;
++        } else if (i == child) {
++            child_op_errno = local->replies[i].op_errno;
++        }
++    }
++
++    if (AFR_COUNT(lookup_on, priv->child_count) == 0) {
++        goto link;
++    }
++
++lookup:
++    AFR_ONLIST(lookup_on, frame, afr_selfheal_discover_cbk, lookup, &loc,
++               xdata);
++    for (i = 0; i < priv->child_count; i++) {
++        if (!lookup_on[i]) {
++            continue;
++        }
++
++        if (local->replies[i].op_ret == 0) {
++            if (gf_uuid_compare(anon_inode_gfid,
++                                local->replies[i].poststat.ia_gfid) == 0) {
++                priv->anon_inode[i] = 1;
++                iatt = local->replies[i].poststat;
++            } else {
++                if (i == child)
++                    child_op_errno = EINVAL;
++                gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_DATA,
++                       "%s has gfid: %s", priv->anon_inode_name,
++                       uuid_utoa(local->replies[i].poststat.ia_gfid));
++            }
++        } else if (i == child) {
++            child_op_errno = local->replies[i].op_errno;
++        }
++    }
++link:
++    if (!gf_uuid_is_null(iatt.ia_gfid)) {
++        *linked_inode = inode_link(loc.inode, loc.parent, loc.name, &iatt);
++        if (*linked_inode) {
++            op_errno = 0;
++            inode_lookup(*linked_inode);
++        } else {
++            op_errno = ENOMEM;
++        }
++        goto out;
++    }
++
++out:
++    if (xdata)
++        dict_unref(xdata);
++    loc_wipe(&loc);
++    /*child_op_errno takes precedence*/
++    if (child_op_errno == 0) {
++        child_op_errno = op_errno;
++    }
++
++    if (child_op_errno && *linked_inode) {
++        inode_unref(*linked_inode);
++        *linked_inode = NULL;
++    }
++    if (frame)
++        AFR_STACK_DESTROY(frame);
++    return -child_op_errno;
++}
+diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
+index 00b5b2d..20b07dd 100644
+--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
++++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
+@@ -16,54 +16,170 @@
+ #include <glusterfs/syncop-utils.h>
+ #include <glusterfs/events.h>
+ 
+-static int
+-afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name,
+-                          inode_t *inode, int child, struct afr_reply *replies)
++int
++afr_selfheal_entry_anon_inode(xlator_t *this, inode_t *dir, const char *name,
++                              inode_t *inode, int child,
++                              struct afr_reply *replies,
++                              gf_boolean_t *anon_inode)
+ {
+     afr_private_t *priv = NULL;
++    afr_local_t *local = NULL;
+     xlator_t *subvol = NULL;
+     int ret = 0;
++    int i = 0;
++    char g[64] = {0};
++    unsigned char *lookup_success = NULL;
++    call_frame_t *frame = NULL;
++    loc_t loc2 = {
++        0,
++    };
+     loc_t loc = {
+         0,
+     };
+-    char g[64];
+ 
+     priv = this->private;
+-
+     subvol = priv->children[child];
++    lookup_success = alloca0(priv->child_count);
++    uuid_utoa_r(replies[child].poststat.ia_gfid, g);
++    loc.inode = inode_new(inode->table);
++    if (!loc.inode) {
++        ret = -ENOMEM;
++        goto out;
++    }
++
++    if (replies[child].poststat.ia_type == IA_IFDIR) {
++        /* This directory may have sub-directory hierarchy which may need to
++         * be preserved for subsequent heals. So unconditionally move the
++         * directory to anonymous-inode directory*/
++        *anon_inode = _gf_true;
++        goto anon_inode;
++    }
++
++    frame = afr_frame_create(this, &ret);
++    if (!frame) {
++        ret = -ret;
++        goto out;
++    }
++    local = frame->local;
++    gf_uuid_copy(loc.gfid, replies[child].poststat.ia_gfid);
++    AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup, &loc,
++               NULL);
++    for (i = 0; i < priv->child_count; i++) {
++        if (local->replies[i].op_ret == 0) {
++            lookup_success[i] = 1;
++        } else if (local->replies[i].op_errno != ENOENT &&
++                   local->replies[i].op_errno != ESTALE) {
++            ret = -local->replies[i].op_errno;
++        }
++    }
++
++    if (priv->quorum_count) {
++        if (afr_has_quorum(lookup_success, this, NULL)) {
++            *anon_inode = _gf_true;
++        }
++    } else if (AFR_COUNT(lookup_success, priv->child_count) > 1) {
++        *anon_inode = _gf_true;
++    } else if (ret) {
++        goto out;
++    }
++
++anon_inode:
++    if (!*anon_inode) {
++        ret = 0;
++        goto out;
++    }
+ 
+     loc.parent = inode_ref(dir);
+     gf_uuid_copy(loc.pargfid, dir->gfid);
+     loc.name = name;
+-    loc.inode = inode_ref(inode);
+ 
+-    if (replies[child].valid && replies[child].op_ret == 0) {
+-        switch (replies[child].poststat.ia_type) {
+-            case IA_IFDIR:
+-                gf_msg(this->name, GF_LOG_WARNING, 0,
+-                       AFR_MSG_EXPUNGING_FILE_OR_DIR,
+-                       "expunging dir %s/%s (%s) on %s", uuid_utoa(dir->gfid),
+-                       name, uuid_utoa_r(replies[child].poststat.ia_gfid, g),
+-                       subvol->name);
+-                ret = syncop_rmdir(subvol, &loc, 1, NULL, NULL);
+-                break;
+-            default:
+-                gf_msg(this->name, GF_LOG_WARNING, 0,
+-                       AFR_MSG_EXPUNGING_FILE_OR_DIR,
+-                       "expunging file %s/%s (%s) on %s", uuid_utoa(dir->gfid),
+-                       name, uuid_utoa_r(replies[child].poststat.ia_gfid, g),
+-                       subvol->name);
+-                ret = syncop_unlink(subvol, &loc, NULL, NULL);
+-                break;
+-        }
++    ret = afr_anon_inode_create(this, child, &loc2.parent);
++    if (ret < 0)
++        goto out;
++
++    loc2.name = g;
++    ret = syncop_rename(subvol, &loc, &loc2, NULL, NULL);
++    if (ret < 0) {
++        gf_msg(this->name, GF_LOG_WARNING, -ret, AFR_MSG_EXPUNGING_FILE_OR_DIR,
++               "Rename to %s dir %s/%s (%s) on %s failed",
++               priv->anon_inode_name, uuid_utoa(dir->gfid), name, g,
++               subvol->name);
++    } else {
++        gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR,
++               "Rename to %s dir %s/%s (%s) on %s successful",
++               priv->anon_inode_name, uuid_utoa(dir->gfid), name, g,
++               subvol->name);
+     }
+ 
++out:
+     loc_wipe(&loc);
++    loc_wipe(&loc2);
++    if (frame) {
++        AFR_STACK_DESTROY(frame);
++    }
+ 
+     return ret;
+ }
+ 
+ int
++afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name,
++                          inode_t *inode, int child, struct afr_reply *replies)
++{
++    char g[64] = {0};
++    afr_private_t *priv = NULL;
++    xlator_t *subvol = NULL;
++    int ret = 0;
++    loc_t loc = {
++        0,
++    };
++    gf_boolean_t anon_inode = _gf_false;
++
++    priv = this->private;
++    subvol = priv->children[child];
++
++    if ((!replies[child].valid) || (replies[child].op_ret < 0)) {
++        /*Nothing to do*/
++        ret = 0;
++        goto out;
++    }
++
++    if (priv->use_anon_inode) {
++        ret = afr_selfheal_entry_anon_inode(this, dir, name, inode, child,
++                                            replies, &anon_inode);
++        if (ret < 0 || anon_inode)
++            goto out;
++    }
++
++    loc.parent = inode_ref(dir);
++    loc.inode = inode_new(inode->table);
++    if (!loc.inode) {
++        ret = -ENOMEM;
++        goto out;
++    }
++    loc.name = name;
++    switch (replies[child].poststat.ia_type) {
++        case IA_IFDIR:
++            gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR,
++                   "expunging dir %s/%s (%s) on %s", uuid_utoa(dir->gfid), name,
++                   uuid_utoa_r(replies[child].poststat.ia_gfid, g),
++                   subvol->name);
++            ret = syncop_rmdir(subvol, &loc, 1, NULL, NULL);
++            break;
++        default:
++            gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR,
++                   "expunging file %s/%s (%s) on %s", uuid_utoa(dir->gfid),
++                   name, uuid_utoa_r(replies[child].poststat.ia_gfid, g),
++                   subvol->name);
++            ret = syncop_unlink(subvol, &loc, NULL, NULL);
++            break;
++    }
++
++out:
++    loc_wipe(&loc);
++    return ret;
++}
++
++int
+ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source,
+                             unsigned char *sources, inode_t *dir,
+                             const char *name, inode_t *inode,
+@@ -76,6 +192,9 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source,
+     loc_t srcloc = {
+         0,
+     };
++    loc_t anonloc = {
++        0,
++    };
+     xlator_t *this = frame->this;
+     afr_private_t *priv = NULL;
+     dict_t *xdata = NULL;
+@@ -86,15 +205,18 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source,
+         0,
+     };
+     unsigned char *newentry = NULL;
++    char iatt_uuid_str[64] = {0};
++    char dir_uuid_str[64] = {0};
+ 
+     priv = this->private;
+     iatt = &replies[source].poststat;
++    uuid_utoa_r(iatt->ia_gfid, iatt_uuid_str);
+     if (iatt->ia_type == IA_INVAL || gf_uuid_is_null(iatt->ia_gfid)) {
+         gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SELF_HEAL_FAILED,
+                "Invalid ia_type (%d) or gfid(%s). source brick=%d, "
+                "pargfid=%s, name=%s",
+-               iatt->ia_type, uuid_utoa(iatt->ia_gfid), source,
+-               uuid_utoa(dir->gfid), name);
++               iatt->ia_type, iatt_uuid_str, source,
++               uuid_utoa_r(dir->gfid, dir_uuid_str), name);
+         ret = -EINVAL;
+         goto out;
+     }
+@@ -119,14 +241,24 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source,
+ 
+     srcloc.inode = inode_ref(inode);
+     gf_uuid_copy(srcloc.gfid, iatt->ia_gfid);
+-    if (iatt->ia_type != IA_IFDIR)
+-        ret = syncop_lookup(priv->children[dst], &srcloc, 0, 0, 0, 0);
+-    if (iatt->ia_type == IA_IFDIR || ret == -ENOENT || ret == -ESTALE) {
++    ret = syncop_lookup(priv->children[dst], &srcloc, 0, 0, 0, 0);
++    if (ret == -ENOENT || ret == -ESTALE) {
+         newentry[dst] = 1;
+         ret = afr_selfheal_newentry_mark(frame, this, inode, source, replies,
+                                          sources, newentry);
+         if (ret)
+             goto out;
++    } else if (ret == 0 && iatt->ia_type == IA_IFDIR && priv->use_anon_inode) {
++        // Try rename from hidden directory
++        ret = afr_anon_inode_create(this, dst, &anonloc.parent);
++        if (ret < 0)
++            goto out;
++        anonloc.inode = inode_ref(inode);
++        anonloc.name = iatt_uuid_str;
++        ret = syncop_rename(priv->children[dst], &anonloc, &loc, NULL, NULL);
++        if (ret == -ENOENT || ret == -ESTALE)
++            ret = -1; /*This sets 'mismatch' to true*/
++        goto out;
+     }
+ 
+     mode = st_mode_from_ia(iatt->ia_prot, iatt->ia_type);
+@@ -165,6 +297,7 @@ out:
+     GF_FREE(linkname);
+     loc_wipe(&loc);
+     loc_wipe(&srcloc);
++    loc_wipe(&anonloc);
+     return ret;
+ }
+ 
+@@ -580,6 +713,11 @@ afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ 
+     priv = this->private;
+ 
++    if (afr_is_private_directory(priv, fd->inode->gfid, name,
++                                 GF_CLIENT_PID_SELF_HEALD)) {
++        return 0;
++    }
++
+     xattr = dict_new();
+     if (!xattr)
+         return -ENOMEM;
+@@ -628,7 +766,7 @@ afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd,
+                                           replies);
+ 
+         if ((ret == 0) && (priv->esh_granular) && parent_idx_inode) {
+-            ret = afr_shd_index_purge(subvol, parent_idx_inode, name,
++            ret = afr_shd_entry_purge(subvol, parent_idx_inode, name,
+                                       inode->ia_type);
+             /* Why is ret force-set to 0? We do not care about
+              * index purge failing for full heal as it is quite
+@@ -758,10 +896,6 @@ afr_selfheal_entry_do_subvol(call_frame_t *frame, xlator_t *this, fd_t *fd,
+             if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, ".."))
+                 continue;
+ 
+-            if (__is_root_gfid(fd->inode->gfid) &&
+-                !strcmp(entry->d_name, GF_REPLICATE_TRASH_DIR))
+-                continue;
+-
+             ret = afr_selfheal_entry_dirent(iter_frame, this, fd, entry->d_name,
+                                             loc.inode, subvol,
+                                             local->need_full_crawl);
+@@ -824,7 +958,7 @@ afr_selfheal_entry_granular_dirent(xlator_t *subvol, gf_dirent_t *entry,
+         /* The name indices under the pgfid index dir are guaranteed
+          * to be regular files. Hence the hardcoding.
+          */
+-        afr_shd_index_purge(subvol, parent->inode, entry->d_name, IA_IFREG);
++        afr_shd_entry_purge(subvol, parent->inode, entry->d_name, IA_IFREG);
+         ret = 0;
+         goto out;
+     }
+diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c
+index dace071..51e3d8c 100644
+--- a/xlators/cluster/afr/src/afr-self-heal-name.c
++++ b/xlators/cluster/afr/src/afr-self-heal-name.c
+@@ -98,21 +98,12 @@ __afr_selfheal_name_expunge(xlator_t *this, inode_t *parent, uuid_t pargfid,
+                             const char *bname, inode_t *inode,
+                             struct afr_reply *replies)
+ {
+-    loc_t loc = {
+-        0,
+-    };
+     int i = 0;
+     afr_private_t *priv = NULL;
+-    char g[64];
+     int ret = 0;
+ 
+     priv = this->private;
+ 
+-    loc.parent = inode_ref(parent);
+-    gf_uuid_copy(loc.pargfid, pargfid);
+-    loc.name = bname;
+-    loc.inode = inode_ref(inode);
+-
+     for (i = 0; i < priv->child_count; i++) {
+         if (!replies[i].valid)
+             continue;
+@@ -120,30 +111,10 @@ __afr_selfheal_name_expunge(xlator_t *this, inode_t *parent, uuid_t pargfid,
+         if (replies[i].op_ret)
+             continue;
+ 
+-        switch (replies[i].poststat.ia_type) {
+-            case IA_IFDIR:
+-                gf_msg(this->name, GF_LOG_WARNING, 0,
+-                       AFR_MSG_EXPUNGING_FILE_OR_DIR,
+-                       "expunging dir %s/%s (%s) on %s", uuid_utoa(pargfid),
+-                       bname, uuid_utoa_r(replies[i].poststat.ia_gfid, g),
+-                       priv->children[i]->name);
+-
+-                ret |= syncop_rmdir(priv->children[i], &loc, 1, NULL, NULL);
+-                break;
+-            default:
+-                gf_msg(this->name, GF_LOG_WARNING, 0,
+-                       AFR_MSG_EXPUNGING_FILE_OR_DIR,
+-                       "expunging file %s/%s (%s) on %s", uuid_utoa(pargfid),
+-                       bname, uuid_utoa_r(replies[i].poststat.ia_gfid, g),
+-                       priv->children[i]->name);
+-
+-                ret |= syncop_unlink(priv->children[i], &loc, NULL, NULL);
+-                break;
+-        }
++        ret |= afr_selfheal_entry_delete(this, parent, bname, inode, i,
++                                         replies);
+     }
+ 
+-    loc_wipe(&loc);
+-
+     return ret;
+ }
+ 
+diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h
+index 8f6fb00..c8dc384 100644
+--- a/xlators/cluster/afr/src/afr-self-heal.h
++++ b/xlators/cluster/afr/src/afr-self-heal.h
+@@ -370,4 +370,9 @@ gf_boolean_t
+ afr_is_file_empty_on_all_children(afr_private_t *priv,
+                                   struct afr_reply *replies);
+ 
++int
++afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name,
++                          inode_t *inode, int child, struct afr_reply *replies);
++int
++afr_anon_inode_create(xlator_t *this, int child, inode_t **linked_inode);
+ #endif /* !_AFR_SELFHEAL_H */
+diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
+index 95ac5f2..939a135 100644
+--- a/xlators/cluster/afr/src/afr-self-heald.c
++++ b/xlators/cluster/afr/src/afr-self-heald.c
+@@ -222,7 +222,7 @@ out:
+ }
+ 
+ int
+-afr_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name,
++afr_shd_entry_purge(xlator_t *subvol, inode_t *inode, char *name,
+                     ia_type_t type)
+ {
+     int ret = 0;
+@@ -422,7 +422,7 @@ afr_shd_index_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
+     ret = afr_shd_selfheal(healer, healer->subvol, gfid);
+ 
+     if (ret == -ENOENT || ret == -ESTALE)
+-        afr_shd_index_purge(subvol, parent->inode, entry->d_name, val);
++        afr_shd_entry_purge(subvol, parent->inode, entry->d_name, val);
+ 
+     if (ret == 2)
+         /* If bricks crashed in pre-op after creating indices/xattrop
+@@ -798,6 +798,176 @@ afr_bricks_available_for_heal(afr_private_t *priv)
+     return _gf_true;
+ }
+ 
++static int
++afr_shd_anon_inode_cleaner(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
++                           void *data)
++{
++    struct subvol_healer *healer = data;
++    afr_private_t *priv = healer->this->private;
++    call_frame_t *frame = NULL;
++    afr_local_t *local = NULL;
++    int ret = 0;
++    loc_t loc = {0};
++    int count = 0;
++    int i = 0;
++    int op_errno = 0;
++    struct iatt *iatt = NULL;
++    gf_boolean_t multiple_links = _gf_false;
++    unsigned char *gfid_present = alloca0(priv->child_count);
++    unsigned char *entry_present = alloca0(priv->child_count);
++    char *type = "file";
++
++    frame = afr_frame_create(healer->this, &ret);
++    if (!frame) {
++        ret = -ret;
++        goto out;
++    }
++    local = frame->local;
++    if (AFR_COUNT(local->child_up, priv->child_count) != priv->child_count) {
++        gf_msg_debug(healer->this->name, 0,
++                     "Not all bricks are up. Skipping "
++                     "cleanup of %s on %s",
++                     entry->d_name, subvol->name);
++        ret = 0;
++        goto out;
++    }
++
++    loc.inode = inode_new(parent->inode->table);
++    if (!loc.inode) {
++        ret = -ENOMEM;
++        goto out;
++    }
++    ret = gf_uuid_parse(entry->d_name, loc.gfid);
++    if (ret) {
++        ret = 0;
++        goto out;
++    }
++    AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup, &loc,
++               NULL);
++    for (i = 0; i < priv->child_count; i++) {
++        if (local->replies[i].op_ret == 0) {
++            count++;
++            gfid_present[i] = 1;
++            iatt = &local->replies[i].poststat;
++            if (iatt->ia_type == IA_IFDIR) {
++                type = "dir";
++            }
++
++            if (i == healer->subvol) {
++                if (local->replies[i].poststat.ia_nlink > 1) {
++                    multiple_links = _gf_true;
++                }
++            }
++        } else if (local->replies[i].op_errno != ENOENT &&
++                   local->replies[i].op_errno != ESTALE) {
++            /*We don't have complete view. Skip the entry*/
++            gf_msg_debug(healer->this->name, local->replies[i].op_errno,
++                         "Skipping cleanup of %s on %s", entry->d_name,
++                         subvol->name);
++            ret = 0;
++            goto out;
++        }
++    }
++
++    /*Inode is deleted from subvol*/
++    if (count == 1 || (iatt->ia_type != IA_IFDIR && multiple_links)) {
++        gf_msg(healer->this->name, GF_LOG_WARNING, 0,
++               AFR_MSG_EXPUNGING_FILE_OR_DIR, "expunging %s %s/%s on %s", type,
++               priv->anon_inode_name, entry->d_name, subvol->name);
++        ret = afr_shd_entry_purge(subvol, parent->inode, entry->d_name,
++                                  iatt->ia_type);
++        if (ret == -ENOENT || ret == -ESTALE)
++            ret = 0;
++    } else if (count > 1) {
++        loc_wipe(&loc);
++        loc.parent = inode_ref(parent->inode);
++        loc.name = entry->d_name;
++        loc.inode = inode_new(parent->inode->table);
++        if (!loc.inode) {
++            ret = -ENOMEM;
++            goto out;
++        }
++        AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup,
++                   &loc, NULL);
++        count = 0;
++        for (i = 0; i < priv->child_count; i++) {
++            if (local->replies[i].op_ret == 0) {
++                count++;
++                entry_present[i] = 1;
++                iatt = &local->replies[i].poststat;
++            } else if (local->replies[i].op_errno != ENOENT &&
++                       local->replies[i].op_errno != ESTALE) {
++                /*We don't have complete view. Skip the entry*/
++                gf_msg_debug(healer->this->name, local->replies[i].op_errno,
++                             "Skipping cleanup of %s on %s", entry->d_name,
++                             subvol->name);
++                ret = 0;
++                goto out;
++            }
++        }
++        for (i = 0; i < priv->child_count; i++) {
++            if (gfid_present[i] && !entry_present[i]) {
++                /*Entry is not anonymous on at least one subvol*/
++                gf_msg_debug(healer->this->name, 0,
++                             "Valid entry present on %s "
++                             "Skipping cleanup of %s on %s",
++                             priv->children[i]->name, entry->d_name,
++                             subvol->name);
++                ret = 0;
++                goto out;
++            }
++        }
++
++        gf_msg(healer->this->name, GF_LOG_WARNING, 0,
++               AFR_MSG_EXPUNGING_FILE_OR_DIR,
++               "expunging %s %s/%s on all subvols", type, priv->anon_inode_name,
++               entry->d_name);
++        ret = 0;
++        for (i = 0; i < priv->child_count; i++) {
++            op_errno = -afr_shd_entry_purge(priv->children[i], loc.parent,
++                                            entry->d_name, iatt->ia_type);
++            if (op_errno != ENOENT && op_errno != ESTALE) {
++                ret |= -op_errno;
++            }
++        }
++    }
++
++out:
++    if (frame)
++        AFR_STACK_DESTROY(frame);
++    loc_wipe(&loc);
++    return ret;
++}
++
++static void
++afr_cleanup_anon_inode_dir(struct subvol_healer *healer)
++{
++    int ret = 0;
++    call_frame_t *frame = NULL;
++    afr_private_t *priv = healer->this->private;
++    loc_t loc = {0};
++
++    ret = afr_anon_inode_create(healer->this, healer->subvol, &loc.inode);
++    if (ret)
++        goto out;
++
++    frame = afr_frame_create(healer->this, &ret);
++    if (!frame) {
++        ret = -ret;
++        goto out;
++    }
++
++    ret = syncop_mt_dir_scan(frame, priv->children[healer->subvol], &loc,
++                             GF_CLIENT_PID_SELF_HEALD, healer,
++                             afr_shd_anon_inode_cleaner, NULL,
++                             priv->shd.max_threads, priv->shd.wait_qlength);
++out:
++    if (frame)
++        AFR_STACK_DESTROY(frame);
++    loc_wipe(&loc);
++    return;
++}
++
+ void *
+ afr_shd_index_healer(void *data)
+ {
+@@ -854,6 +1024,10 @@ afr_shd_index_healer(void *data)
+             sleep(1);
+         } while (ret > 0);
+ 
++        if (ret == 0) {
++            afr_cleanup_anon_inode_dir(healer);
++        }
++
+         if (pre_crawl_xdata && !healer->crawl_event.heal_failed_count) {
+             afr_shd_ta_check_and_unset_xattrs(this, &loc, healer,
+                                               pre_crawl_xdata);
+diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h
+index 1990539..acd567e 100644
+--- a/xlators/cluster/afr/src/afr-self-heald.h
++++ b/xlators/cluster/afr/src/afr-self-heald.h
+@@ -70,6 +70,6 @@ afr_shd_gfid_to_path(xlator_t *this, xlator_t *subvol, uuid_t gfid,
+                      char **path_p);
+ 
+ int
+-afr_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name,
++afr_shd_entry_purge(xlator_t *subvol, inode_t *inode, char *name,
+                     ia_type_t type);
+ #endif /* !_AFR_SELF_HEALD_H */
+diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
+index bfa464f..33fe4d8 100644
+--- a/xlators/cluster/afr/src/afr.c
++++ b/xlators/cluster/afr/src/afr.c
+@@ -135,6 +135,27 @@ set_data_self_heal_algorithm(afr_private_t *priv, char *algo)
+     }
+ }
+ 
++void
++afr_handle_anon_inode_options(afr_private_t *priv, dict_t *options)
++{
++    char *volfile_id_str = NULL;
++    uuid_t anon_inode_gfid = {0};
++
++    /*If volume id is not present don't enable anything*/
++    if (dict_get_str(options, "volume-id", &volfile_id_str))
++        return;
++    GF_ASSERT(strlen(AFR_ANON_DIR_PREFIX) + strlen(volfile_id_str) <= NAME_MAX);
++    /*anon_inode_name is not supposed to change once assigned*/
++    if (!priv->anon_inode_name[0]) {
++        snprintf(priv->anon_inode_name, sizeof(priv->anon_inode_name), "%s-%s",
++                 AFR_ANON_DIR_PREFIX, volfile_id_str);
++        gf_uuid_parse(volfile_id_str, anon_inode_gfid);
++        /*Flip a bit to make sure volfile-id and anon-gfid are not same*/
++        anon_inode_gfid[0] ^= 1;
++        uuid_utoa_r(anon_inode_gfid, priv->anon_gfid_str);
++    }
++}
++
+ int
+ reconfigure(xlator_t *this, dict_t *options)
+ {
+@@ -287,6 +308,10 @@ reconfigure(xlator_t *this, dict_t *options)
+         consistent_io = _gf_false;
+     priv->consistent_io = consistent_io;
+ 
++    afr_handle_anon_inode_options(priv, options);
++
++    GF_OPTION_RECONF("use-anonymous-inode", priv->use_anon_inode, options, bool,
++                     out);
+     if (priv->shd.enabled) {
+         if ((priv->shd.enabled != enabled_old) ||
+             (timeout_old != priv->shd.timeout))
+@@ -535,7 +560,9 @@ init(xlator_t *this)
+ 
+     GF_OPTION_INIT("consistent-metadata", priv->consistent_metadata, bool, out);
+     GF_OPTION_INIT("consistent-io", priv->consistent_io, bool, out);
++    afr_handle_anon_inode_options(priv, this->options);
+ 
++    GF_OPTION_INIT("use-anonymous-inode", priv->use_anon_inode, bool, out);
+     if (priv->quorum_count != 0)
+         priv->consistent_io = _gf_false;
+ 
+@@ -547,13 +574,16 @@ init(xlator_t *this)
+         goto out;
+     }
+ 
++    priv->anon_inode = GF_CALLOC(sizeof(unsigned char), child_count,
++                                 gf_afr_mt_char);
++
+     priv->child_up = GF_CALLOC(sizeof(unsigned char), child_count,
+                                gf_afr_mt_char);
+ 
+     priv->child_latency = GF_MALLOC(sizeof(*priv->child_latency) * child_count,
+                                     gf_afr_mt_child_latency_t);
+ 
+-    if (!priv->child_up || !priv->child_latency) {
++    if (!priv->child_up || !priv->child_latency || !priv->anon_inode) {
+         ret = -ENOMEM;
+         goto out;
+     }
+@@ -1218,6 +1248,14 @@ struct volume_options options[] = {
+      .tags = {"replicate"},
+      .description = "This option exists only for backward compatibility "
+                     "and configuring it doesn't have any effect"},
++    {.key = {"use-anonymous-inode"},
++     .type = GF_OPTION_TYPE_BOOL,
++     .default_value = "no",
++     .op_version = {GD_OP_VERSION_7_0},
++     .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE,
++     .tags = {"replicate"},
++     .description = "Setting this option heals directory renames efficiently"},
++
+     {.key = {NULL}},
+ };
+ 
+diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
+index 3a2b26d..6a9a763 100644
+--- a/xlators/cluster/afr/src/afr.h
++++ b/xlators/cluster/afr/src/afr.h
+@@ -40,6 +40,8 @@
+ #define AFR_TA_DOM_MODIFY "afr.ta.dom-modify"
+ 
+ #define AFR_HALO_MAX_LATENCY 99999
++#define AFR_ANON_DIR_PREFIX ".glusterfs-anonymous-inode"
++
+ 
+ #define PFLAG_PENDING (1 << 0)
+ #define PFLAG_SBRAIN (1 << 1)
+@@ -155,6 +157,7 @@ typedef struct _afr_private {
+     struct list_head ta_waitq;
+     struct list_head ta_onwireq;
+ 
++    unsigned char *anon_inode;
+     unsigned char *child_up;
+     int64_t *child_latency;
+     unsigned char *local;
+@@ -240,6 +243,11 @@ typedef struct _afr_private {
+     gf_boolean_t esh_granular;
+     gf_boolean_t consistent_io;
+     gf_boolean_t data_self_heal; /* on/off */
++    gf_boolean_t use_anon_inode;
++
++    /*For anon-inode handling */
++    char anon_inode_name[NAME_MAX + 1];
++    char anon_gfid_str[UUID_SIZE + 1];
+ } afr_private_t;
+ 
+ typedef enum {
+@@ -1341,4 +1349,7 @@ afr_selfheal_childup(xlator_t *this, afr_private_t *priv);
+ void
+ afr_fill_success_replies(afr_local_t *local, afr_private_t *priv,
+                          unsigned char *replies);
++gf_boolean_t
++afr_is_private_directory(afr_private_t *priv, uuid_t pargfid, const char *name,
++                         pid_t pid);
+ #endif /* __AFR_H__ */
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
+index 094a71f..1920284 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
+@@ -3867,6 +3867,38 @@ out:
+ }
+ 
+ static int
++set_volfile_id_option(volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
++                      int clusters)
++{
++    xlator_t *xlator = NULL;
++    int i = 0;
++    int ret = -1;
++    glusterd_conf_t *conf = NULL;
++    xlator_t *this = NULL;
++
++    this = THIS;
++    GF_VALIDATE_OR_GOTO("glusterd", this, out);
++    conf = this->private;
++    GF_VALIDATE_OR_GOTO(this->name, conf, out);
++
++    if (conf->op_version < GD_OP_VERSION_7_1)
++        return 0;
++    xlator = first_of(graph);
++
++    for (i = 0; i < clusters; i++) {
++        ret = xlator_set_fixed_option(xlator, "volume-id",
++                                      uuid_utoa(volinfo->volume_id));
++        if (ret)
++            goto out;
++
++        xlator = xlator->next;
++    }
++
++out:
++    return ret;
++}
++
++static int
+ volgen_graph_build_afr_clusters(volgen_graph_t *graph,
+                                 glusterd_volinfo_t *volinfo)
+ {
+@@ -3906,6 +3938,13 @@ volgen_graph_build_afr_clusters(volgen_graph_t *graph,
+         clusters = -1;
+         goto out;
+     }
++
++    ret = set_volfile_id_option(graph, volinfo, clusters);
++    if (ret) {
++        clusters = -1;
++        goto out;
++    }
++
+     if (!volinfo->arbiter_count)
+         goto out;
+ 
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+index 62acadf..c1ca190 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+@@ -3789,4 +3789,10 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+      .voltype = "features/cloudsync",
+      .op_version = GD_OP_VERSION_7_0,
+      .flags = VOLOPT_FLAG_CLIENT_OPT},
++
++    {.key = "cluster.use-anonymous-inode",
++     .voltype = "cluster/replicate",
++     .op_version = GD_OP_VERSION_7_1,
++     .value = "yes",
++     .flags = VOLOPT_FLAG_CLIENT_OPT},
+     {.key = NULL}};
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0516-afr-return-EIO-for-gfid-split-brains.patch b/SOURCES/0516-afr-return-EIO-for-gfid-split-brains.patch
new file mode 100644
index 0000000..0f6249e
--- /dev/null
+++ b/SOURCES/0516-afr-return-EIO-for-gfid-split-brains.patch
@@ -0,0 +1,338 @@
+From 8d24d891aade910b0bb86b27c25a8d2382e19ba0 Mon Sep 17 00:00:00 2001
+From: karthik-us <ksubrahm@redhat.com>
+Date: Tue, 15 Dec 2020 15:04:19 +0530
+Subject: [PATCH 516/517] afr: return -EIO for gfid split-brains.
+
+Problem:
+entry-self-heal-anon-dir-off.t was failing occasionally because
+afr_gfid_split_brain_source() returned -1 instead of -EIO for
+split-brains, causing the code to proceed to afr_lookup_done(), which
+in turn succeeded the lookup if there was a parallel client side heal
+going on.
+
+Fix:
+Return -EIO instead of -1 so that lookp fails.
+
+Also, afr_selfheal_name() was using the same dict to get and set values. This
+could be problematic if the caller passed local->xdata_req, since
+setting a response in a request dict can lead to bugs.So changed it to use
+separate request and response dicts.
+
+Upstream patch details:
+> Fixes: #1739
+> Credits Pranith Karampuri <pranith.karampuri@phonepe.com>
+> Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+>Change-Id: I5cb4c547fb25e6bfc8bec1740f7eb64e1a5ad443
+Upstream patch: https://github.com/gluster/glusterfs/pull/1819/
+
+BUG: 1640148
+Signed-off-by: karthik-us <ksubrahm@redhat.com>
+Change-Id: I5cb4c547fb25e6bfc8bec1740f7eb64e1a5ad443
+Reviewed-on: https://code.engineering.redhat.com/gerrit/221209
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
+---
+ xlators/cluster/afr/src/afr-common.c           | 12 ++++++++----
+ xlators/cluster/afr/src/afr-self-heal-common.c | 27 +++++++++++++-------------
+ xlators/cluster/afr/src/afr-self-heal-entry.c  |  8 ++++----
+ xlators/cluster/afr/src/afr-self-heal-name.c   | 23 +++++++++++-----------
+ xlators/cluster/afr/src/afr-self-heal.h        |  5 +++--
+ xlators/cluster/afr/src/afr-self-heald.c       |  2 +-
+ 6 files changed, 42 insertions(+), 35 deletions(-)
+
+diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
+index 6f2da11..416012c 100644
+--- a/xlators/cluster/afr/src/afr-common.c
++++ b/xlators/cluster/afr/src/afr-common.c
+@@ -2366,7 +2366,7 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this)
+         /* If we were called from glfsheal and there is still a gfid
+          * mismatch, succeed the lookup and let glfsheal print the
+          * response via gfid-heal-msg.*/
+-        if (!dict_get_str_sizen(local->xattr_req, "gfid-heal-msg",
++        if (!dict_get_str_sizen(local->xattr_rsp, "gfid-heal-msg",
+                                 &gfid_heal_msg))
+             goto cant_interpret;
+ 
+@@ -2421,7 +2421,7 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this)
+         goto error;
+     }
+ 
+-    ret = dict_get_str_sizen(local->xattr_req, "gfid-heal-msg", &gfid_heal_msg);
++    ret = dict_get_str_sizen(local->xattr_rsp, "gfid-heal-msg", &gfid_heal_msg);
+     if (!ret) {
+         ret = dict_set_str_sizen(local->replies[read_subvol].xdata,
+                                  "gfid-heal-msg", gfid_heal_msg);
+@@ -2768,9 +2768,12 @@ afr_lookup_selfheal_wrap(void *opaque)
+     local = frame->local;
+     this = frame->this;
+     loc_pargfid(&local->loc, pargfid);
++    if (!local->xattr_rsp)
++        local->xattr_rsp = dict_new();
+ 
+     ret = afr_selfheal_name(frame->this, pargfid, local->loc.name,
+-                            &local->cont.lookup.gfid_req, local->xattr_req);
++                            &local->cont.lookup.gfid_req, local->xattr_req,
++                            local->xattr_rsp);
+     if (ret == -EIO)
+         goto unwind;
+ 
+@@ -2786,7 +2789,8 @@ afr_lookup_selfheal_wrap(void *opaque)
+     return 0;
+ 
+ unwind:
+-    AFR_STACK_UNWIND(lookup, frame, -1, EIO, NULL, NULL, NULL, NULL);
++    AFR_STACK_UNWIND(lookup, frame, -1, EIO, NULL, NULL, local->xattr_rsp,
++                     NULL);
+     return 0;
+ }
+ 
+diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
+index 0a8a7fd..0954d2c 100644
+--- a/xlators/cluster/afr/src/afr-self-heal-common.c
++++ b/xlators/cluster/afr/src/afr-self-heal-common.c
+@@ -245,7 +245,8 @@ int
+ afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies,
+                             inode_t *inode, uuid_t pargfid, const char *bname,
+                             int src_idx, int child_idx,
+-                            unsigned char *locked_on, int *src, dict_t *xdata)
++                            unsigned char *locked_on, int *src, dict_t *req,
++                            dict_t *rsp)
+ {
+     afr_private_t *priv = NULL;
+     char g1[64] = {
+@@ -266,8 +267,8 @@ afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies,
+         gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
+                "All the bricks should be up to resolve the gfid split "
+                "barin");
+-        if (xdata) {
+-            ret = dict_set_sizen_str_sizen(xdata, "gfid-heal-msg",
++        if (rsp) {
++            ret = dict_set_sizen_str_sizen(rsp, "gfid-heal-msg",
+                                            SALL_BRICKS_UP_TO_RESOLVE);
+             if (ret)
+                 gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_DICT_SET_FAILED,
+@@ -277,8 +278,8 @@ afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies,
+         goto out;
+     }
+ 
+-    if (xdata) {
+-        ret = dict_get_int32_sizen(xdata, "heal-op", &heal_op);
++    if (req) {
++        ret = dict_get_int32_sizen(req, "heal-op", &heal_op);
+         if (ret)
+             goto fav_child;
+     } else {
+@@ -292,8 +293,8 @@ afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies,
+             if (*src == -1) {
+                 gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
+                        SNO_BIGGER_FILE);
+-                if (xdata) {
+-                    ret = dict_set_sizen_str_sizen(xdata, "gfid-heal-msg",
++                if (rsp) {
++                    ret = dict_set_sizen_str_sizen(rsp, "gfid-heal-msg",
+                                                    SNO_BIGGER_FILE);
+                     if (ret)
+                         gf_msg(this->name, GF_LOG_ERROR, 0,
+@@ -310,8 +311,8 @@ afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies,
+             if (*src == -1) {
+                 gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
+                        SNO_DIFF_IN_MTIME);
+-                if (xdata) {
+-                    ret = dict_set_sizen_str_sizen(xdata, "gfid-heal-msg",
++                if (rsp) {
++                    ret = dict_set_sizen_str_sizen(rsp, "gfid-heal-msg",
+                                                    SNO_DIFF_IN_MTIME);
+                     if (ret)
+                         gf_msg(this->name, GF_LOG_ERROR, 0,
+@@ -323,7 +324,7 @@ afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies,
+             break;
+ 
+         case GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK:
+-            ret = dict_get_str_sizen(xdata, "child-name", &src_brick);
++            ret = dict_get_str_sizen(req, "child-name", &src_brick);
+             if (ret) {
+                 gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
+                        "Error getting the source "
+@@ -335,8 +336,8 @@ afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies,
+             if (*src == -1) {
+                 gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
+                        SERROR_GETTING_SRC_BRICK);
+-                if (xdata) {
+-                    ret = dict_set_sizen_str_sizen(xdata, "gfid-heal-msg",
++                if (rsp) {
++                    ret = dict_set_sizen_str_sizen(rsp, "gfid-heal-msg",
+                                                    SERROR_GETTING_SRC_BRICK);
+                     if (ret)
+                         gf_msg(this->name, GF_LOG_ERROR, 0,
+@@ -400,7 +401,7 @@ out:
+                  uuid_utoa_r(replies[child_idx].poststat.ia_gfid, g1), src_idx,
+                  priv->children[src_idx]->name, src_idx,
+                  uuid_utoa_r(replies[src_idx].poststat.ia_gfid, g2));
+-        return -1;
++        return -EIO;
+     }
+     return 0;
+ }
+diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
+index 20b07dd..a17dd93 100644
+--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
++++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
+@@ -399,7 +399,7 @@ afr_selfheal_detect_gfid_and_type_mismatch(xlator_t *this,
+             (ia_type == replies[i].poststat.ia_type)) {
+             ret = afr_gfid_split_brain_source(this, replies, inode, pargfid,
+                                               bname, src_idx, i, locked_on, src,
+-                                              NULL);
++                                              NULL, NULL);
+             if (ret)
+                 gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
+                        "Skipping conservative merge on the "
+@@ -474,7 +474,7 @@ __afr_selfheal_merge_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd,
+         return ret;
+ 
+     /* In case of type mismatch / unable to resolve gfid mismatch on the
+-     * entry, return -1.*/
++     * entry, return -EIO.*/
+     ret = afr_selfheal_detect_gfid_and_type_mismatch(
+         this, replies, inode, fd->inode->gfid, name, source, locked_on, &src);
+ 
+@@ -905,7 +905,7 @@ afr_selfheal_entry_do_subvol(call_frame_t *frame, xlator_t *this, fd_t *fd,
+                 break;
+             }
+ 
+-            if (ret == -1) {
++            if (ret == -EIO) {
+                 /* gfid or type mismatch. */
+                 mismatch = _gf_true;
+                 ret = 0;
+@@ -1072,7 +1072,7 @@ afr_selfheal_entry_do(call_frame_t *frame, xlator_t *this, fd_t *fd, int source,
+         else
+             ret = afr_selfheal_entry_do_subvol(frame, this, fd, i);
+ 
+-        if (ret == -1) {
++        if (ret == -EIO) {
+             /* gfid or type mismatch. */
+             mismatch = _gf_true;
+             ret = 0;
+diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c
+index 51e3d8c..9ec2066 100644
+--- a/xlators/cluster/afr/src/afr-self-heal-name.c
++++ b/xlators/cluster/afr/src/afr-self-heal-name.c
+@@ -217,7 +217,8 @@ afr_selfheal_name_gfid_mismatch_check(xlator_t *this, struct afr_reply *replies,
+                                       int source, unsigned char *sources,
+                                       int *gfid_idx, uuid_t pargfid,
+                                       const char *bname, inode_t *inode,
+-                                      unsigned char *locked_on, dict_t *xdata)
++                                      unsigned char *locked_on, dict_t *req,
++                                      dict_t *rsp)
+ {
+     int i = 0;
+     int gfid_idx_iter = -1;
+@@ -245,11 +246,11 @@ afr_selfheal_name_gfid_mismatch_check(xlator_t *this, struct afr_reply *replies,
+         if (sources[i] || source == -1) {
+             if ((sources[gfid_idx_iter] || source == -1) &&
+                 gf_uuid_compare(gfid, gfid1)) {
+-                ret = afr_gfid_split_brain_source(this, replies, inode, pargfid,
+-                                                  bname, gfid_idx_iter, i,
+-                                                  locked_on, gfid_idx, xdata);
++                ret = afr_gfid_split_brain_source(
++                    this, replies, inode, pargfid, bname, gfid_idx_iter, i,
++                    locked_on, gfid_idx, req, rsp);
+                 if (!ret && *gfid_idx >= 0) {
+-                    ret = dict_set_sizen_str_sizen(xdata, "gfid-heal-msg",
++                    ret = dict_set_sizen_str_sizen(rsp, "gfid-heal-msg",
+                                                    "GFID split-brain resolved");
+                     if (ret)
+                         gf_msg(this->name, GF_LOG_ERROR, 0,
+@@ -303,7 +304,7 @@ __afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent,
+                        unsigned char *sources, unsigned char *sinks,
+                        unsigned char *healed_sinks, int source,
+                        unsigned char *locked_on, struct afr_reply *replies,
+-                       void *gfid_req, dict_t *xdata)
++                       void *gfid_req, dict_t *req, dict_t *rsp)
+ {
+     int gfid_idx = -1;
+     int ret = -1;
+@@ -333,7 +334,7 @@ __afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent,
+ 
+     ret = afr_selfheal_name_gfid_mismatch_check(this, replies, source, sources,
+                                                 &gfid_idx, pargfid, bname,
+-                                                inode, locked_on, xdata);
++                                                inode, locked_on, req, rsp);
+     if (ret)
+         return ret;
+ 
+@@ -450,7 +451,7 @@ out:
+ int
+ afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent,
+                      uuid_t pargfid, const char *bname, void *gfid_req,
+-                     dict_t *xdata)
++                     dict_t *req, dict_t *rsp)
+ {
+     afr_private_t *priv = NULL;
+     unsigned char *sources = NULL;
+@@ -505,7 +506,7 @@ afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent,
+ 
+         ret = __afr_selfheal_name_do(frame, this, parent, pargfid, bname, inode,
+                                      sources, sinks, healed_sinks, source,
+-                                     locked_on, replies, gfid_req, xdata);
++                                     locked_on, replies, gfid_req, req, rsp);
+     }
+ unlock:
+     afr_selfheal_unentrylk(frame, this, parent, this->name, bname, locked_on,
+@@ -578,7 +579,7 @@ afr_selfheal_name_unlocked_inspect(call_frame_t *frame, xlator_t *this,
+ 
+ int
+ afr_selfheal_name(xlator_t *this, uuid_t pargfid, const char *bname,
+-                  void *gfid_req, dict_t *xdata)
++                  void *gfid_req, dict_t *req, dict_t *rsp)
+ {
+     inode_t *parent = NULL;
+     call_frame_t *frame = NULL;
+@@ -600,7 +601,7 @@ afr_selfheal_name(xlator_t *this, uuid_t pargfid, const char *bname,
+ 
+     if (need_heal) {
+         ret = afr_selfheal_name_do(frame, this, parent, pargfid, bname,
+-                                   gfid_req, xdata);
++                                   gfid_req, req, rsp);
+         if (ret)
+             goto out;
+     }
+diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h
+index c8dc384..6b0bf69 100644
+--- a/xlators/cluster/afr/src/afr-self-heal.h
++++ b/xlators/cluster/afr/src/afr-self-heal.h
+@@ -127,7 +127,7 @@ afr_throttled_selfheal(call_frame_t *frame, xlator_t *this);
+ 
+ int
+ afr_selfheal_name(xlator_t *this, uuid_t gfid, const char *name, void *gfid_req,
+-                  dict_t *xdata);
++                  dict_t *req, dict_t *rsp);
+ 
+ int
+ afr_selfheal_data(call_frame_t *frame, xlator_t *this, fd_t *fd);
+@@ -357,7 +357,8 @@ int
+ afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies,
+                             inode_t *inode, uuid_t pargfid, const char *bname,
+                             int src_idx, int child_idx,
+-                            unsigned char *locked_on, int *src, dict_t *xdata);
++                            unsigned char *locked_on, int *src, dict_t *req,
++                            dict_t *rsp);
+ int
+ afr_mark_source_sinks_if_file_empty(xlator_t *this, unsigned char *sources,
+                                     unsigned char *sinks,
+diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
+index 939a135..18aed93 100644
+--- a/xlators/cluster/afr/src/afr-self-heald.c
++++ b/xlators/cluster/afr/src/afr-self-heald.c
+@@ -295,7 +295,7 @@ afr_shd_selfheal_name(struct subvol_healer *healer, int child, uuid_t parent,
+ {
+     int ret = -1;
+ 
+-    ret = afr_selfheal_name(THIS, parent, bname, NULL, NULL);
++    ret = afr_selfheal_name(THIS, parent, bname, NULL, NULL, NULL);
+ 
+     return ret;
+ }
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0517-gfapi-glfs_h_creat_open-new-API-to-create-handle-and.patch b/SOURCES/0517-gfapi-glfs_h_creat_open-new-API-to-create-handle-and.patch
new file mode 100644
index 0000000..bc1b263
--- /dev/null
+++ b/SOURCES/0517-gfapi-glfs_h_creat_open-new-API-to-create-handle-and.patch
@@ -0,0 +1,388 @@
+From da75c2857fd8b173d47fb7fc3b925ffd14105f64 Mon Sep 17 00:00:00 2001
+From: "Kaleb S. KEITHLEY" <kkeithle@rhel7x.kkeithle.usersys.redhat.com>
+Date: Wed, 23 Dec 2020 07:39:13 -0500
+Subject: [PATCH 517/517] gfapi: 'glfs_h_creat_open' - new API to create handle
+ and open fd
+
+Right now we have two separate APIs, one
+- 'glfs_h_creat_handle' to create handle & another
+- 'glfs_h_open' to create a glfd to return to application
+
+Having two separate routines can result in access errors
+while trying to create and write into a read-only file.
+
+Since a fd is opened even during file/directory creation,
+introducing a new API to make these two operations atomic i.e,
+which can create both handle & fd and pass them to application
+
+This is backport of below mainline patch -
+- https://review.gluster.org/#/c/glusterfs/+/23448/
+- bz#1753569
+
+> Signed-off-by: Soumya Koduri <skoduri@redhat.com>
+> Change-Id: Ibf513fcfcdad175f4d7eb6fa7a61b8feec6d33b5
+> release-6: commit 5a2af2fd06356f6fc79d591c352caffd4c511c9e
+> master:    commit 41a0f2aa755ec7162facd30209f2fa3f40308766
+
+BUG: 1910119
+Change-Id: Ib397dbe82a6928d8f24251809d30febddd007bfc
+Signed-off-by: Kaleb S. KEITHLEY <kkeithle@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/222083
+Reviewed-by: Soumya Koduri <skoduri@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ api/src/gfapi.aliases                 |   1 +
+ api/src/gfapi.map                     |   5 ++
+ api/src/glfs-handleops.c              | 135 ++++++++++++++++++++++++++++++++++
+ api/src/glfs-handles.h                |   5 ++
+ tests/basic/gfapi/glfs_h_creat_open.c | 118 +++++++++++++++++++++++++++++
+ tests/basic/gfapi/glfs_h_creat_open.t |  27 +++++++
+ 6 files changed, 291 insertions(+)
+ create mode 100644 tests/basic/gfapi/glfs_h_creat_open.c
+ create mode 100755 tests/basic/gfapi/glfs_h_creat_open.t
+
+diff --git a/api/src/gfapi.aliases b/api/src/gfapi.aliases
+index 692ae13..3d3415c 100644
+--- a/api/src/gfapi.aliases
++++ b/api/src/gfapi.aliases
+@@ -197,3 +197,4 @@ _pub_glfs_fsetattr _glfs_fsetattr$GFAPI_6.0
+ _pub_glfs_setattr _glfs_setattr$GFAPI_6.0
+ 
+ _pub_glfs_set_statedump_path _glfs_set_statedump_path@GFAPI_6.4
++_pub_glfs_h_creat_open _glfs_h_creat_open@GFAPI_6.6
+diff --git a/api/src/gfapi.map b/api/src/gfapi.map
+index df65837..614f3f6 100644
+--- a/api/src/gfapi.map
++++ b/api/src/gfapi.map
+@@ -276,3 +276,8 @@ GFAPI_6.4 {
+ 	global:
+ 		glfs_set_statedump_path;
+ } GFAPI_PRIVATE_6.1;
++
++GFAPI_6.6 {
++	global:
++		glfs_h_creat_open;
++} GFAPI_6.4;
+diff --git a/api/src/glfs-handleops.c b/api/src/glfs-handleops.c
+index d4e1545..7b8ff14 100644
+--- a/api/src/glfs-handleops.c
++++ b/api/src/glfs-handleops.c
+@@ -843,6 +843,141 @@ invalid_fs:
+ GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_creat, 3.4.2);
+ 
+ struct glfs_object *
++pub_glfs_h_creat_open(struct glfs *fs, struct glfs_object *parent,
++                      const char *path, int flags, mode_t mode,
++                      struct stat *stat, struct glfs_fd **out_fd)
++{
++    int ret = -1;
++    struct glfs_fd *glfd = NULL;
++    xlator_t *subvol = NULL;
++    inode_t *inode = NULL;
++    loc_t loc = {
++        0,
++    };
++    struct iatt iatt = {
++        0,
++    };
++    uuid_t gfid;
++    dict_t *xattr_req = NULL;
++    struct glfs_object *object = NULL;
++    dict_t *fop_attr = NULL;
++
++    /* validate in args */
++    if ((fs == NULL) || (parent == NULL) || (path == NULL) ||
++        (out_fd == NULL)) {
++        errno = EINVAL;
++        return NULL;
++    }
++
++    DECLARE_OLD_THIS;
++    __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
++
++    /* get the active volume */
++    subvol = glfs_active_subvol(fs);
++    if (!subvol) {
++        ret = -1;
++        goto out;
++    }
++
++    /* get/refresh the in arg objects inode in correlation to the xlator */
++    inode = glfs_resolve_inode(fs, subvol, parent);
++    if (!inode) {
++        ret = -1;
++        goto out;
++    }
++
++    xattr_req = dict_new();
++    if (!xattr_req) {
++        ret = -1;
++        errno = ENOMEM;
++        goto out;
++    }
++
++    gf_uuid_generate(gfid);
++    ret = dict_set_gfuuid(xattr_req, "gfid-req", gfid, true);
++    if (ret) {
++        ret = -1;
++        errno = ENOMEM;
++        goto out;
++    }
++
++    GLFS_LOC_FILL_PINODE(inode, loc, ret, errno, out, path);
++
++    glfd = glfs_fd_new(fs);
++    if (!glfd) {
++        ret = -1;
++        errno = ENOMEM;
++        goto out;
++    }
++
++    glfd->fd = fd_create(loc.inode, getpid());
++    if (!glfd->fd) {
++        ret = -1;
++        errno = ENOMEM;
++        goto out;
++    }
++    glfd->fd->flags = flags;
++
++    ret = get_fop_attr_thrd_key(&fop_attr);
++    if (ret)
++        gf_msg_debug("gfapi", 0, "Getting leaseid from thread failed");
++
++    /* fop/op */
++    ret = syncop_create(subvol, &loc, flags, mode, glfd->fd, &iatt, xattr_req,
++                        NULL);
++    DECODE_SYNCOP_ERR(ret);
++
++    /* populate out args */
++    if (ret == 0) {
++        glfd->fd->flags = flags;
++
++        ret = glfs_loc_link(&loc, &iatt);
++        if (ret != 0) {
++            goto out;
++        }
++
++        if (stat)
++            glfs_iatt_to_stat(fs, &iatt, stat);
++
++        ret = glfs_create_object(&loc, &object);
++    }
++
++out:
++    if (ret && object != NULL) {
++        /* Release the held reference */
++        glfs_h_close(object);
++        object = NULL;
++    }
++
++    loc_wipe(&loc);
++
++    if (inode)
++        inode_unref(inode);
++
++    if (fop_attr)
++        dict_unref(fop_attr);
++
++    if (xattr_req)
++        dict_unref(xattr_req);
++
++    if (ret && glfd) {
++        GF_REF_PUT(glfd);
++    } else if (glfd) {
++        glfd_set_state_bind(glfd);
++        *out_fd = glfd;
++    }
++
++    glfs_subvol_done(fs, subvol);
++
++    __GLFS_EXIT_FS;
++
++invalid_fs:
++    return object;
++}
++
++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_creat_open, 6.6);
++
++struct glfs_object *
+ pub_glfs_h_mkdir(struct glfs *fs, struct glfs_object *parent, const char *path,
+                  mode_t mode, struct stat *stat)
+ {
+diff --git a/api/src/glfs-handles.h b/api/src/glfs-handles.h
+index f7e6a06..4d039b9 100644
+--- a/api/src/glfs-handles.h
++++ b/api/src/glfs-handles.h
+@@ -250,6 +250,11 @@ int
+ glfs_h_access(glfs_t *fs, glfs_object_t *object, int mask) __THROW
+     GFAPI_PUBLIC(glfs_h_access, 3.6.0);
+ 
++struct glfs_object *
++glfs_h_creat_open(struct glfs *fs, struct glfs_object *parent, const char *path,
++                  int flags, mode_t mode, struct stat *stat,
++                  struct glfs_fd **out_fd) __THROW
++    GFAPI_PUBLIC(glfs_h_creat_open, 6.6);
+ /*
+   SYNOPSIS
+ 
+diff --git a/tests/basic/gfapi/glfs_h_creat_open.c b/tests/basic/gfapi/glfs_h_creat_open.c
+new file mode 100644
+index 0000000..7672561
+--- /dev/null
++++ b/tests/basic/gfapi/glfs_h_creat_open.c
+@@ -0,0 +1,118 @@
++#include <fcntl.h>
++#include <unistd.h>
++#include <time.h>
++#include <limits.h>
++#include <string.h>
++#include <stdio.h>
++#include <errno.h>
++#include <stdlib.h>
++#include <glusterfs/api/glfs.h>
++#include <glusterfs/api/glfs-handles.h>
++
++#define LOG_ERR(func, ret)                                                     \
++    do {                                                                       \
++        if (ret != 0) {                                                        \
++            fprintf(stderr, "%s : returned error ret(%d), errno(%d)\n", func,  \
++                    ret, errno);                                               \
++            exit(1);                                                           \
++        } else {                                                               \
++            fprintf(stderr, "%s : returned %d\n", func, ret);                  \
++        }                                                                      \
++    } while (0)
++#define LOG_IF_NO_ERR(func, ret)                                               \
++    do {                                                                       \
++        if (ret == 0) {                                                        \
++            fprintf(stderr, "%s : hasn't returned error %d\n", func, ret);     \
++            exit(1);                                                           \
++        } else {                                                               \
++            fprintf(stderr, "%s : returned %d\n", func, ret);                  \
++        }                                                                      \
++    } while (0)
++int
++main(int argc, char *argv[])
++{
++    glfs_t *fs = NULL;
++    int ret = 0;
++    struct glfs_object *root = NULL, *leaf = NULL;
++    glfs_fd_t *fd = NULL;
++    char *filename = "/ro-file";
++    struct stat sb = {
++        0,
++    };
++    char *logfile = NULL;
++    char *volname = NULL;
++    char *hostname = NULL;
++    char buf[32] = "abcdefghijklmnopqrstuvwxyz012345";
++
++    fprintf(stderr, "Starting glfs_h_creat_open\n");
++
++    if (argc != 4) {
++        fprintf(stderr, "Invalid argument\n");
++        exit(1);
++    }
++
++    hostname = argv[1];
++    volname = argv[2];
++    logfile = argv[3];
++
++    fs = glfs_new(volname);
++    if (!fs) {
++        fprintf(stderr, "glfs_new: returned NULL\n");
++        return 1;
++    }
++
++    ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
++    LOG_ERR("glfs_set_volfile_server", ret);
++
++    ret = glfs_set_logging(fs, logfile, 7);
++    LOG_ERR("glfs_set_logging", ret);
++
++    ret = glfs_init(fs);
++    LOG_ERR("glfs_init", ret);
++
++    sleep(2);
++    root = glfs_h_lookupat(fs, NULL, "/", &sb, 0);
++    if (!root) {
++        ret = -1;
++        LOG_ERR("glfs_h_lookupat root", ret);
++    }
++    leaf = glfs_h_lookupat(fs, root, filename, &sb, 0);
++    if (!leaf) {
++        ret = -1;
++        LOG_IF_NO_ERR("glfs_h_lookupat leaf", ret);
++    }
++
++    leaf = glfs_h_creat_open(fs, root, filename, O_RDONLY, 00444, &sb, &fd);
++    if (!leaf || !fd) {
++        ret = -1;
++        LOG_ERR("glfs_h_creat leaf", ret);
++    }
++    fprintf(stderr, "glfs_h_create_open leaf - %p\n", leaf);
++
++    ret = glfs_write(fd, buf, 32, 0);
++    if (ret < 0) {
++        fprintf(stderr, "glfs_write: error writing to file %s, %s\n", filename,
++                strerror(errno));
++        goto out;
++    }
++
++    ret = glfs_h_getattrs(fs, leaf, &sb);
++    LOG_ERR("glfs_h_getattrs", ret);
++
++    if (sb.st_size != 32) {
++        fprintf(stderr, "glfs_write: post size mismatch\n");
++        goto out;
++    }
++
++    fprintf(stderr, "Successfully opened and written to a read-only file \n");
++out:
++    if (fd)
++        glfs_close(fd);
++
++    ret = glfs_fini(fs);
++    LOG_ERR("glfs_fini", ret);
++
++    fprintf(stderr, "End of libgfapi_fini\n");
++
++    exit(0);
++}
+diff --git a/tests/basic/gfapi/glfs_h_creat_open.t b/tests/basic/gfapi/glfs_h_creat_open.t
+new file mode 100755
+index 0000000..f24ae73
+--- /dev/null
++++ b/tests/basic/gfapi/glfs_h_creat_open.t
+@@ -0,0 +1,27 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++
++cleanup;
++
++TEST glusterd
++
++TEST $CLI volume create $V0 $H0:$B0/brick1;
++EXPECT 'Created' volinfo_field $V0 'Status';
++
++TEST $CLI volume start $V0;
++EXPECT 'Started' volinfo_field $V0 'Status';
++
++logdir=`gluster --print-logdir`
++
++TEST build_tester $(dirname $0)/glfs_h_creat_open.c -lgfapi
++
++TEST ./$(dirname $0)/glfs_h_creat_open $H0 $V0  $logdir/glfs.log
++
++cleanup_tester $(dirname $0)/glfs_h_creat_open
++
++TEST $CLI volume stop $V0
++TEST $CLI volume delete $V0
++
++cleanup;
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0518-glusterd-Fix-for-shared-storage-in-ipv6-env.patch b/SOURCES/0518-glusterd-Fix-for-shared-storage-in-ipv6-env.patch
new file mode 100644
index 0000000..00d29b9
--- /dev/null
+++ b/SOURCES/0518-glusterd-Fix-for-shared-storage-in-ipv6-env.patch
@@ -0,0 +1,41 @@
+From 818025e467ea98b32a855c92ba6aef6e172e029f Mon Sep 17 00:00:00 2001
+From: Nikhil Ladha <nladha@redhat.com>
+Date: Fri, 8 Jan 2021 13:12:46 +0530
+Subject: [PATCH 518/526] glusterd: Fix for shared storage in ipv6 env
+
+Issue:
+Mounting shared storage volume was failing in ipv6 env if the hostnames were FQDNs.
+The brickname for the volume was being cut off, as a result, volume creation was failing.
+
+>Change-Id: Ib38993724c709b35b603f9ac666630c50c932c3e
+>Fixes: #1406
+>Signed-off-by: nik-redhat <nladha@redhat.com>
+Upstream patch: https://github.com/gluster/glusterfs/pull/1972
+
+BUG: 1856574
+
+Change-Id: Ib38993724c709b35b603f9ac666630c50c932c3e
+Signed-off-by: nik-redhat <nladha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/223248
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
+---
+ extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh b/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh
+index 9597503..e9261af 100755
+--- a/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh
++++ b/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh
+@@ -46,7 +46,7 @@ do
+ 
+     key=`echo $line | cut -d ':' -f 1`
+     if [ "$key" == "Hostname" ]; then
+-        hostname=`echo $line | cut -d ':' -f 2 | xargs`
++        hostname=`echo $line | cut -d ' ' -f 2 | xargs`
+     fi
+ 
+     if [ "$key" == "State" ]; then
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0519-glusterfs-events-Fix-incorrect-attribute-access-2002.patch b/SOURCES/0519-glusterfs-events-Fix-incorrect-attribute-access-2002.patch
new file mode 100644
index 0000000..f37acfd
--- /dev/null
+++ b/SOURCES/0519-glusterfs-events-Fix-incorrect-attribute-access-2002.patch
@@ -0,0 +1,58 @@
+From 6ed227367b6eb7d6d7afde3859ad0a711a3adf36 Mon Sep 17 00:00:00 2001
+From: Leela Venkaiah G <gleelavenkaiah@gmail.com>
+Date: Wed, 13 Jan 2021 16:02:25 +0530
+Subject: [PATCH 519/526] glusterfs-events: Fix incorrect attribute access
+ (#2002)
+
+Issue: When GlusterCmdException is raised, current code try to access
+message atrribute which doesn't exist and resulting in a malformed
+error string on failure operations
+
+Code Change: Replace `message` with `args[0]`
+
+>Fixes: #2001
+>Change-Id: I65c9f0ee79310937a384025b8d454acda154e4bb
+>Signed-off-by: Leela Venkaiah G <lgangava@redhat.com>
+Upstream patch: https://github.com/gluster/glusterfs/pull/2002
+
+BUG: 1600459
+Change-Id: I65c9f0ee79310937a384025b8d454acda154e4bb
+Signed-off-by: srijan-sivakumar <ssivakum@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/223584
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ events/src/peer_eventsapi.py | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/events/src/peer_eventsapi.py b/events/src/peer_eventsapi.py
+index 26b77a0..c388da4 100644
+--- a/events/src/peer_eventsapi.py
++++ b/events/src/peer_eventsapi.py
+@@ -174,9 +174,9 @@ def sync_to_peers(args):
+             sync_file_to_peers(WEBHOOKS_FILE_TO_SYNC)
+         except GlusterCmdException as e:
+             # Print stdout if stderr is empty
+-            errmsg = e.message[2] if e.message[2] else e.message[1]
++            errmsg = e.args[0][2] if e.args[0][2] else e.args[0][1]
+             handle_output_error("Failed to sync Webhooks file: [Error: {0}]"
+-                                "{1}".format(e.message[0], errmsg),
++                                "{1}".format(e.args[0][0], errmsg),
+                                 errcode=ERROR_WEBHOOK_SYNC_FAILED,
+                                 json_output=args.json)
+ 
+@@ -185,9 +185,9 @@ def sync_to_peers(args):
+             sync_file_to_peers(CUSTOM_CONFIG_FILE_TO_SYNC)
+         except GlusterCmdException as e:
+             # Print stdout if stderr is empty
+-            errmsg = e.message[2] if e.message[2] else e.message[1]
++            errmsg = e.args[0][2] if e.args[0][2] else e.args[0][1]
+             handle_output_error("Failed to sync Config file: [Error: {0}]"
+-                                "{1}".format(e.message[0], errmsg),
++                                "{1}".format(e.args[0][0], errmsg),
+                                 errcode=ERROR_CONFIG_SYNC_FAILED,
+                                 json_output=args.json)
+ 
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0520-performance-open-behind-seek-fop-should-open_and_res.patch b/SOURCES/0520-performance-open-behind-seek-fop-should-open_and_res.patch
new file mode 100644
index 0000000..c46a9ca
--- /dev/null
+++ b/SOURCES/0520-performance-open-behind-seek-fop-should-open_and_res.patch
@@ -0,0 +1,70 @@
+From a3fd2c9d85bbd23131c985599d9c9d74f66f32d2 Mon Sep 17 00:00:00 2001
+From: Pranith Kumar K <pkarampu@redhat.com>
+Date: Thu, 10 Oct 2019 10:50:59 +0530
+Subject: [PATCH 520/526] performance/open-behind: seek fop should
+ open_and_resume
+
+Upstream patch:
+> Upstream-patch-link: https://review.gluster.org/#/c/glusterfs/+/23530
+> fixes: bz#1760187
+> Change-Id: I4c6ad13194d4fc5c7705e35bf9a27fce504b51f9
+> Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
+
+BUG: 1830713
+Change-Id: I4c6ad13194d4fc5c7705e35bf9a27fce504b51f9
+Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/224484
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/performance/open-behind/src/open-behind.c | 27 +++++++++++++++++++++++
+ 1 file changed, 27 insertions(+)
+
+diff --git a/xlators/performance/open-behind/src/open-behind.c b/xlators/performance/open-behind/src/open-behind.c
+index 268c717..3ee3c40 100644
+--- a/xlators/performance/open-behind/src/open-behind.c
++++ b/xlators/performance/open-behind/src/open-behind.c
+@@ -711,6 +711,32 @@ err:
+ }
+ 
+ int
++ob_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
++        gf_seek_what_t what, dict_t *xdata)
++{
++    call_stub_t *stub = NULL;
++    fd_t *wind_fd = NULL;
++
++    wind_fd = ob_get_wind_fd(this, fd, NULL);
++
++    stub = fop_seek_stub(frame, default_seek_resume, wind_fd, offset, what,
++                         xdata);
++
++    fd_unref(wind_fd);
++
++    if (!stub)
++        goto err;
++
++    open_and_resume(this, wind_fd, stub);
++
++    return 0;
++err:
++    STACK_UNWIND_STRICT(fstat, frame, -1, ENOMEM, 0, 0);
++
++    return 0;
++}
++
++int
+ ob_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+ {
+     call_stub_t *stub = NULL;
+@@ -1276,6 +1302,7 @@ struct xlator_fops fops = {
+     .flush = ob_flush,
+     .fsync = ob_fsync,
+     .fstat = ob_fstat,
++    .seek = ob_seek,
+     .ftruncate = ob_ftruncate,
+     .fsetxattr = ob_fsetxattr,
+     .setxattr = ob_setxattr,
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0521-open-behind-fix-missing-fd-reference.patch b/SOURCES/0521-open-behind-fix-missing-fd-reference.patch
new file mode 100644
index 0000000..8e18af8
--- /dev/null
+++ b/SOURCES/0521-open-behind-fix-missing-fd-reference.patch
@@ -0,0 +1,121 @@
+From 211d0f7dbb4991b2191925973222ebc79f010e84 Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <xhernandez@redhat.com>
+Date: Sun, 8 Mar 2020 18:36:45 +0100
+Subject: [PATCH 521/526] open-behind: fix missing fd reference
+
+Open behind was not keeping any reference on fd's pending to be
+opened. This makes it possible that a concurrent close and en entry
+fop (unlink, rename, ...) caused destruction of the fd while it
+was still being used.
+
+Upstream patch:
+> Upstream patch link: https://review.gluster.org/c/glusterfs/+/24204
+> Change-Id: Ie9e992902cf2cd7be4af1f8b4e57af9bd6afd8e9
+> Fixes: bz#1810934
+> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+
+Change-Id: Ie9e992902cf2cd7be4af1f8b4e57af9bd6afd8e9
+BUG: 1830713
+Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/224485
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/performance/open-behind/src/open-behind.c | 27 ++++++++++++++---------
+ 1 file changed, 16 insertions(+), 11 deletions(-)
+
+diff --git a/xlators/performance/open-behind/src/open-behind.c b/xlators/performance/open-behind/src/open-behind.c
+index 3ee3c40..dd2f2fd 100644
+--- a/xlators/performance/open-behind/src/open-behind.c
++++ b/xlators/performance/open-behind/src/open-behind.c
+@@ -206,8 +206,13 @@ ob_fd_free(ob_fd_t *ob_fd)
+     if (ob_fd->xdata)
+         dict_unref(ob_fd->xdata);
+ 
+-    if (ob_fd->open_frame)
++    if (ob_fd->open_frame) {
++        /* If we sill have a frame it means that background open has never
++         * been triggered. We need to release the pending reference. */
++        fd_unref(ob_fd->fd);
++
+         STACK_DESTROY(ob_fd->open_frame->root);
++    }
+ 
+     GF_FREE(ob_fd);
+ }
+@@ -297,6 +302,7 @@ ob_wake_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+             call_resume(stub);
+     }
+ 
++    /* The background open is completed. We can release the 'fd' reference. */
+     fd_unref(fd);
+ 
+     STACK_DESTROY(frame->root);
+@@ -331,7 +337,9 @@ ob_fd_wake(xlator_t *this, fd_t *fd, ob_fd_t *ob_fd)
+     }
+ 
+     if (frame) {
+-        frame->local = fd_ref(fd);
++        /* We don't need to take a reference here. We already have a reference
++         * while the open is pending. */
++        frame->local = fd;
+ 
+         STACK_WIND(frame, ob_wake_cbk, FIRST_CHILD(this),
+                    FIRST_CHILD(this)->fops->open, &ob_fd->loc, ob_fd->flags, fd,
+@@ -345,15 +353,12 @@ void
+ ob_inode_wake(xlator_t *this, struct list_head *ob_fds)
+ {
+     ob_fd_t *ob_fd = NULL, *tmp = NULL;
+-    fd_t *fd = NULL;
+ 
+     if (!list_empty(ob_fds)) {
+         list_for_each_entry_safe(ob_fd, tmp, ob_fds, ob_fds_on_inode)
+         {
+             ob_fd_wake(this, ob_fd->fd, ob_fd);
+-            fd = ob_fd->fd;
+             ob_fd_free(ob_fd);
+-            fd_unref(fd);
+         }
+     }
+ }
+@@ -365,7 +370,7 @@ ob_fd_copy(ob_fd_t *src, ob_fd_t *dst)
+     if (!src || !dst)
+         goto out;
+ 
+-    dst->fd = __fd_ref(src->fd);
++    dst->fd = src->fd;
+     dst->loc.inode = inode_ref(src->loc.inode);
+     gf_uuid_copy(dst->loc.gfid, src->loc.gfid);
+     dst->flags = src->flags;
+@@ -509,7 +514,6 @@ ob_open_behind(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ 
+     ob_fd->ob_inode = ob_inode;
+ 
+-    /* don't do fd_ref, it'll cause leaks */
+     ob_fd->fd = fd;
+ 
+     ob_fd->open_frame = copy_frame(frame);
+@@ -539,15 +543,16 @@ ob_open_behind(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+     }
+     UNLOCK(&fd->inode->lock);
+ 
+-    if (!open_in_progress && !unlinked) {
+-        fd_ref(fd);
++    /* We take a reference while the background open is pending or being
++     * processed. If we finally wind the request in the foreground, then
++     * ob_fd_free() will take care of this additional reference. */
++    fd_ref(fd);
+ 
++    if (!open_in_progress && !unlinked) {
+         STACK_UNWIND_STRICT(open, frame, 0, 0, fd, xdata);
+ 
+         if (!conf->lazy_open)
+             ob_fd_wake(this, fd, NULL);
+-
+-        fd_unref(fd);
+     } else {
+         ob_fd_free(ob_fd);
+         STACK_WIND(frame, default_open_cbk, FIRST_CHILD(this),
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0522-lcov-improve-line-coverage.patch b/SOURCES/0522-lcov-improve-line-coverage.patch
new file mode 100644
index 0000000..13ece12
--- /dev/null
+++ b/SOURCES/0522-lcov-improve-line-coverage.patch
@@ -0,0 +1,746 @@
+From 46e2bbd52d4427c1348fa38dcb5d2b5f125555f1 Mon Sep 17 00:00:00 2001
+From: Amar Tumballi <amarts@redhat.com>
+Date: Thu, 30 May 2019 15:25:01 +0530
+Subject: [PATCH 522/526] lcov: improve line coverage
+
+upcall: remove extra variable assignment and use just one
+        initialization.
+open-behind: reduce the overall number of lines, in functions
+             not frequently called
+selinux: reduce some lines in init failure cases
+
+Upstream patch:
+> Upstream-patch-link: https://review.gluster.org/#/c/glusterfs/+/22789
+> updates: bz#1693692
+> Change-Id: I7c1de94f2ec76a5bfe1f48a9632879b18e5fbb95
+> Signed-off-by: Amar Tumballi <amarts@redhat.com>
+
+BUG: 1830713
+Change-Id: I7c1de94f2ec76a5bfe1f48a9632879b18e5fbb95
+Signed-off-by: Amar Tumballi <amarts@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/224486
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/features/selinux/src/selinux.c            |   6 +-
+ xlators/features/upcall/src/upcall.c              | 108 +++++++---------------
+ xlators/performance/open-behind/src/open-behind.c |  58 ++++--------
+ 3 files changed, 55 insertions(+), 117 deletions(-)
+
+diff --git a/xlators/features/selinux/src/selinux.c b/xlators/features/selinux/src/selinux.c
+index 58b4c5d..e8e16cd 100644
+--- a/xlators/features/selinux/src/selinux.c
++++ b/xlators/features/selinux/src/selinux.c
+@@ -234,7 +234,6 @@ init(xlator_t *this)
+     priv = GF_CALLOC(1, sizeof(*priv), gf_selinux_mt_selinux_priv_t);
+     if (!priv) {
+         gf_log(this->name, GF_LOG_ERROR, "out of memory");
+-        ret = ENOMEM;
+         goto out;
+     }
+ 
+@@ -242,7 +241,6 @@ init(xlator_t *this)
+ 
+     this->local_pool = mem_pool_new(selinux_priv_t, 64);
+     if (!this->local_pool) {
+-        ret = -1;
+         gf_msg(this->name, GF_LOG_ERROR, ENOMEM, SL_MSG_ENOMEM,
+                "Failed to create local_t's memory pool");
+         goto out;
+@@ -252,9 +250,7 @@ init(xlator_t *this)
+     ret = 0;
+ out:
+     if (ret) {
+-        if (priv) {
+-            GF_FREE(priv);
+-        }
++        GF_FREE(priv);
+         mem_pool_destroy(this->local_pool);
+     }
+     return ret;
+diff --git a/xlators/features/upcall/src/upcall.c b/xlators/features/upcall/src/upcall.c
+index 2583c50..0795f58 100644
+--- a/xlators/features/upcall/src/upcall.c
++++ b/xlators/features/upcall/src/upcall.c
+@@ -57,14 +57,13 @@ static int32_t
+ up_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+         fd_t *fd, dict_t *xdata)
+ {
+-    int32_t op_errno = -1;
++    int32_t op_errno = ENOMEM;
+     upcall_local_t *local = NULL;
+ 
+     EXIT_IF_UPCALL_OFF(this, out);
+ 
+     local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
+     if (!local) {
+-        op_errno = ENOMEM;
+         goto err;
+     }
+ 
+@@ -111,14 +110,13 @@ up_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
+           int count, off_t off, uint32_t flags, struct iobref *iobref,
+           dict_t *xdata)
+ {
+-    int32_t op_errno = -1;
++    int32_t op_errno = ENOMEM;
+     upcall_local_t *local = NULL;
+ 
+     EXIT_IF_UPCALL_OFF(this, out);
+ 
+     local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
+     if (!local) {
+-        op_errno = ENOMEM;
+         goto err;
+     }
+ 
+@@ -167,14 +165,13 @@ static int32_t
+ up_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+          off_t offset, uint32_t flags, dict_t *xdata)
+ {
+-    int32_t op_errno = -1;
++    int32_t op_errno = ENOMEM;
+     upcall_local_t *local = NULL;
+ 
+     EXIT_IF_UPCALL_OFF(this, out);
+ 
+     local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
+     if (!local) {
+-        op_errno = ENOMEM;
+         goto err;
+     }
+ 
+@@ -220,14 +217,13 @@ static int32_t
+ up_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
+       struct gf_flock *flock, dict_t *xdata)
+ {
+-    int32_t op_errno = -1;
++    int32_t op_errno = ENOMEM;
+     upcall_local_t *local = NULL;
+ 
+     EXIT_IF_UPCALL_OFF(this, out);
+ 
+     local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
+     if (!local) {
+-        op_errno = ENOMEM;
+         goto err;
+     }
+ 
+@@ -274,14 +270,13 @@ static int32_t
+ up_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+             dict_t *xdata)
+ {
+-    int32_t op_errno = -1;
++    int32_t op_errno = ENOMEM;
+     upcall_local_t *local = NULL;
+ 
+     EXIT_IF_UPCALL_OFF(this, out);
+ 
+     local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL);
+     if (!local) {
+-        op_errno = ENOMEM;
+         goto err;
+     }
+ 
+@@ -343,14 +338,13 @@ static int32_t
+ up_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf,
+            int32_t valid, dict_t *xdata)
+ {
+-    int32_t op_errno = -1;
++    int32_t op_errno = ENOMEM;
+     upcall_local_t *local = NULL;
+ 
+     EXIT_IF_UPCALL_OFF(this, out);
+ 
+     local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL);
+     if (!local) {
+-        op_errno = ENOMEM;
+         goto err;
+     }
+ 
+@@ -410,14 +404,13 @@ static int32_t
+ up_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+           dict_t *xdata)
+ {
+-    int32_t op_errno = -1;
++    int32_t op_errno = ENOMEM;
+     upcall_local_t *local = NULL;
+ 
+     EXIT_IF_UPCALL_OFF(this, out);
+ 
+     local = upcall_local_init(frame, this, newloc, NULL, oldloc->inode, NULL);
+     if (!local) {
+-        op_errno = ENOMEM;
+         goto err;
+     }
+ 
+@@ -472,14 +465,13 @@ static int32_t
+ up_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+           dict_t *xdata)
+ {
+-    int32_t op_errno = -1;
++    int32_t op_errno = ENOMEM;
+     upcall_local_t *local = NULL;
+ 
+     EXIT_IF_UPCALL_OFF(this, out);
+ 
+     local = upcall_local_init(frame, this, loc, NULL, loc->inode, NULL);
+     if (!local) {
+-        op_errno = ENOMEM;
+         goto err;
+     }
+ 
+@@ -531,14 +523,13 @@ static int32_t
+ up_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+         dict_t *xdata)
+ {
+-    int32_t op_errno = -1;
++    int32_t op_errno = ENOMEM;
+     upcall_local_t *local = NULL;
+ 
+     EXIT_IF_UPCALL_OFF(this, out);
+ 
+     local = upcall_local_init(frame, this, newloc, NULL, oldloc->inode, NULL);
+     if (!local) {
+-        op_errno = ENOMEM;
+         goto err;
+     }
+ 
+@@ -592,14 +583,13 @@ static int32_t
+ up_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+          dict_t *xdata)
+ {
+-    int32_t op_errno = -1;
++    int32_t op_errno = ENOMEM;
+     upcall_local_t *local = NULL;
+ 
+     EXIT_IF_UPCALL_OFF(this, out);
+ 
+     local = upcall_local_init(frame, this, loc, NULL, loc->inode, NULL);
+     if (!local) {
+-        op_errno = ENOMEM;
+         goto err;
+     }
+ 
+@@ -653,14 +643,13 @@ static int32_t
+ up_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+          mode_t umask, dict_t *params)
+ {
+-    int32_t op_errno = -1;
++    int32_t op_errno = ENOMEM;
+     upcall_local_t *local = NULL;
+ 
+     EXIT_IF_UPCALL_OFF(this, out);
+ 
+     local = upcall_local_init(frame, this, loc, NULL, loc->parent, NULL);
+     if (!local) {
+-        op_errno = ENOMEM;
+         goto err;
+     }
+ 
+@@ -717,15 +706,13 @@ static int32_t
+ up_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+           mode_t mode, mode_t umask, fd_t *fd, dict_t *params)
+ {
+-    int32_t op_errno = -1;
++    int32_t op_errno = ENOMEM;
+     upcall_local_t *local = NULL;
+ 
+     EXIT_IF_UPCALL_OFF(this, out);
+ 
+     local = upcall_local_init(frame, this, loc, NULL, loc->parent, NULL);
+-
+     if (!local) {
+-        op_errno = ENOMEM;
+         goto err;
+     }
+ 
+@@ -774,14 +761,13 @@ out:
+ static int32_t
+ up_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
+ {
+-    int32_t op_errno = -1;
++    int32_t op_errno = ENOMEM;
+     upcall_local_t *local = NULL;
+ 
+     EXIT_IF_UPCALL_OFF(this, out);
+ 
+     local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL);
+     if (!local) {
+-        op_errno = ENOMEM;
+         goto err;
+     }
+ 
+@@ -826,14 +812,13 @@ out:
+ static int32_t
+ up_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+ {
+-    int32_t op_errno = -1;
++    int32_t op_errno = ENOMEM;
+     upcall_local_t *local = NULL;
+ 
+     EXIT_IF_UPCALL_OFF(this, out);
+ 
+     local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL);
+     if (!local) {
+-        op_errno = ENOMEM;
+         goto err;
+     }
+ 
+@@ -852,14 +837,13 @@ err:
+ static int32_t
+ up_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+ {
+-    int32_t op_errno = -1;
++    int32_t op_errno = ENOMEM;
+     upcall_local_t *local = NULL;
+ 
+     EXIT_IF_UPCALL_OFF(this, out);
+ 
+     local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
+     if (!local) {
+-        op_errno = ENOMEM;
+         goto err;
+     }
+ 
+@@ -879,14 +863,13 @@ static int32_t
+ up_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+              dict_t *xdata)
+ {
+-    int32_t op_errno = -1;
++    int32_t op_errno = ENOMEM;
+     upcall_local_t *local = NULL;
+ 
+     EXIT_IF_UPCALL_OFF(this, out);
+ 
+     local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
+     if (!local) {
+-        op_errno = ENOMEM;
+         goto err;
+     }
+ 
+@@ -932,14 +915,13 @@ static int32_t
+ up_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
+           dict_t *xdata)
+ {
+-    int32_t op_errno = -1;
++    int32_t op_errno = ENOMEM;
+     upcall_local_t *local = NULL;
+ 
+     EXIT_IF_UPCALL_OFF(this, out);
+ 
+     local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL);
+     if (!local) {
+-        op_errno = ENOMEM;
+         goto err;
+     }
+ 
+@@ -986,14 +968,13 @@ static int32_t
+ up_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+             dict_t *xdata)
+ {
+-    int32_t op_errno = -1;
++    int32_t op_errno = ENOMEM;
+     upcall_local_t *local = NULL;
+ 
+     EXIT_IF_UPCALL_OFF(this, out);
+ 
+     local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL);
+     if (!local) {
+-        op_errno = ENOMEM;
+         goto err;
+     }
+ 
+@@ -1047,14 +1028,13 @@ static int32_t
+ up_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+          dev_t rdev, mode_t umask, dict_t *xdata)
+ {
+-    int32_t op_errno = -1;
++    int32_t op_errno = ENOMEM;
+     upcall_local_t *local = NULL;
+ 
+     EXIT_IF_UPCALL_OFF(this, out);
+ 
+     local = upcall_local_init(frame, this, loc, NULL, loc->parent, NULL);
+     if (!local) {
+-        op_errno = ENOMEM;
+         goto err;
+     }
+ 
+@@ -1110,14 +1090,13 @@ static int32_t
+ up_symlink(call_frame_t *frame, xlator_t *this, const char *linkpath,
+            loc_t *loc, mode_t umask, dict_t *xdata)
+ {
+-    int32_t op_errno = -1;
++    int32_t op_errno = ENOMEM;
+     upcall_local_t *local = NULL;
+ 
+     EXIT_IF_UPCALL_OFF(this, out);
+ 
+     local = upcall_local_init(frame, this, loc, NULL, loc->parent, NULL);
+     if (!local) {
+-        op_errno = ENOMEM;
+         goto err;
+     }
+ 
+@@ -1164,14 +1143,13 @@ static int32_t
+ up_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+            dict_t *xdata)
+ {
+-    int32_t op_errno = -1;
++    int32_t op_errno = ENOMEM;
+     upcall_local_t *local = NULL;
+ 
+     EXIT_IF_UPCALL_OFF(this, out);
+ 
+     local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL);
+     if (!local) {
+-        op_errno = ENOMEM;
+         goto err;
+     }
+ 
+@@ -1216,14 +1194,13 @@ out:
+ static int32_t
+ up_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+ {
+-    int32_t op_errno = -1;
++    int32_t op_errno = ENOMEM;
+     upcall_local_t *local = NULL;
+ 
+     EXIT_IF_UPCALL_OFF(this, out);
+ 
+     local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL);
+     if (!local) {
+-        op_errno = ENOMEM;
+         goto err;
+     }
+ 
+@@ -1270,14 +1247,13 @@ static int32_t
+ up_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+            off_t off, dict_t *xdata)
+ {
+-    int32_t op_errno = -1;
++    int32_t op_errno = ENOMEM;
+     upcall_local_t *local = NULL;
+ 
+     EXIT_IF_UPCALL_OFF(this, out);
+ 
+     local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
+     if (!local) {
+-        op_errno = ENOMEM;
+         goto err;
+     }
+ 
+@@ -1334,14 +1310,13 @@ static int32_t
+ up_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+             off_t off, dict_t *dict)
+ {
+-    int32_t op_errno = -1;
++    int32_t op_errno = ENOMEM;
+     upcall_local_t *local = NULL;
+ 
+     EXIT_IF_UPCALL_OFF(this, out);
+ 
+     local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
+     if (!local) {
+-        op_errno = ENOMEM;
+         goto err;
+     }
+ 
+@@ -1361,14 +1336,13 @@ static int32_t
+ up_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
+             int32_t valid, dict_t *xdata)
+ {
+-    int32_t op_errno = -1;
++    int32_t op_errno = ENOMEM;
+     upcall_local_t *local = NULL;
+ 
+     EXIT_IF_UPCALL_OFF(this, out);
+ 
+     local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
+     if (!local) {
+-        op_errno = ENOMEM;
+         goto err;
+     }
+ 
+@@ -1415,14 +1389,13 @@ static int32_t
+ up_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+              off_t offset, size_t len, dict_t *xdata)
+ {
+-    int32_t op_errno = -1;
++    int32_t op_errno = ENOMEM;
+     upcall_local_t *local = NULL;
+ 
+     EXIT_IF_UPCALL_OFF(this, out);
+ 
+     local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
+     if (!local) {
+-        op_errno = ENOMEM;
+         goto err;
+     }
+ 
+@@ -1470,14 +1443,13 @@ static int32_t
+ up_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+            size_t len, dict_t *xdata)
+ {
+-    int32_t op_errno = -1;
++    int32_t op_errno = ENOMEM;
+     upcall_local_t *local = NULL;
+ 
+     EXIT_IF_UPCALL_OFF(this, out);
+ 
+     local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
+     if (!local) {
+-        op_errno = ENOMEM;
+         goto err;
+     }
+ 
+@@ -1524,14 +1496,13 @@ static int
+ up_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+             off_t len, dict_t *xdata)
+ {
+-    int32_t op_errno = -1;
++    int32_t op_errno = ENOMEM;
+     upcall_local_t *local = NULL;
+ 
+     EXIT_IF_UPCALL_OFF(this, out);
+ 
+     local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
+     if (!local) {
+-        op_errno = ENOMEM;
+         goto err;
+     }
+ 
+@@ -1577,14 +1548,13 @@ static int32_t
+ up_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+         gf_seek_what_t what, dict_t *xdata)
+ {
+-    int32_t op_errno = -1;
++    int32_t op_errno = ENOMEM;
+     upcall_local_t *local = NULL;
+ 
+     EXIT_IF_UPCALL_OFF(this, out);
+ 
+     local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
+     if (!local) {
+-        op_errno = ENOMEM;
+         goto err;
+     }
+ 
+@@ -1652,14 +1622,13 @@ static int32_t
+ up_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+             int32_t flags, dict_t *xdata)
+ {
+-    int32_t op_errno = -1;
++    int32_t op_errno = ENOMEM;
+     upcall_local_t *local = NULL;
+ 
+     EXIT_IF_UPCALL_OFF(this, out);
+ 
+     local = upcall_local_init(frame, this, loc, NULL, loc->inode, dict);
+     if (!local) {
+-        op_errno = ENOMEM;
+         goto err;
+     }
+ 
+@@ -1727,14 +1696,13 @@ static int32_t
+ up_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+              int32_t flags, dict_t *xdata)
+ {
+-    int32_t op_errno = -1;
++    int32_t op_errno = ENOMEM;
+     upcall_local_t *local = NULL;
+ 
+     EXIT_IF_UPCALL_OFF(this, out);
+ 
+     local = upcall_local_init(frame, this, NULL, fd, fd->inode, dict);
+     if (!local) {
+-        op_errno = ENOMEM;
+         goto err;
+     }
+ 
+@@ -1800,7 +1768,7 @@ static int32_t
+ up_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
+                 dict_t *xdata)
+ {
+-    int32_t op_errno = -1;
++    int32_t op_errno = ENOMEM;
+     upcall_local_t *local = NULL;
+     dict_t *xattr = NULL;
+ 
+@@ -1808,13 +1776,11 @@ up_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
+ 
+     xattr = dict_for_key_value(name, "", 1, _gf_true);
+     if (!xattr) {
+-        op_errno = ENOMEM;
+         goto err;
+     }
+ 
+     local = upcall_local_init(frame, this, NULL, fd, fd->inode, xattr);
+     if (!local) {
+-        op_errno = ENOMEM;
+         goto err;
+     }
+ 
+@@ -1885,7 +1851,7 @@ static int32_t
+ up_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+                const char *name, dict_t *xdata)
+ {
+-    int32_t op_errno = -1;
++    int32_t op_errno = ENOMEM;
+     upcall_local_t *local = NULL;
+     dict_t *xattr = NULL;
+ 
+@@ -1893,13 +1859,11 @@ up_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ 
+     xattr = dict_for_key_value(name, "", 1, _gf_true);
+     if (!xattr) {
+-        op_errno = ENOMEM;
+         goto err;
+     }
+ 
+     local = upcall_local_init(frame, this, loc, NULL, loc->inode, xattr);
+     if (!local) {
+-        op_errno = ENOMEM;
+         goto err;
+     }
+ 
+@@ -1950,14 +1914,13 @@ static int32_t
+ up_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
+              dict_t *xdata)
+ {
+-    int32_t op_errno = -1;
++    int32_t op_errno = ENOMEM;
+     upcall_local_t *local = NULL;
+ 
+     EXIT_IF_UPCALL_OFF(this, out);
+ 
+     local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
+     if (!local) {
+-        op_errno = ENOMEM;
+         goto err;
+     }
+ 
+@@ -2000,14 +1963,13 @@ static int32_t
+ up_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name,
+             dict_t *xdata)
+ {
+-    int32_t op_errno = -1;
++    int32_t op_errno = ENOMEM;
+     upcall_local_t *local = NULL;
+ 
+     EXIT_IF_UPCALL_OFF(this, out);
+ 
+     local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL);
+     if (!local) {
+-        op_errno = ENOMEM;
+         goto err;
+     }
+ 
+diff --git a/xlators/performance/open-behind/src/open-behind.c b/xlators/performance/open-behind/src/open-behind.c
+index dd2f2fd..cbe89ec 100644
+--- a/xlators/performance/open-behind/src/open-behind.c
++++ b/xlators/performance/open-behind/src/open-behind.c
+@@ -581,7 +581,7 @@ ob_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, fd_t *fd,
+ {
+     fd_t *old_fd = NULL;
+     int ret = -1;
+-    int op_errno = 0;
++    int op_errno = ENOMEM;
+     call_stub_t *stub = NULL;
+ 
+     old_fd = fd_lookup(fd->inode, 0);
+@@ -589,7 +589,6 @@ ob_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, fd_t *fd,
+         /* open-behind only when this is the first FD */
+         stub = fop_open_stub(frame, default_open_resume, loc, flags, fd, xdata);
+         if (!stub) {
+-            op_errno = ENOMEM;
+             fd_unref(old_fd);
+             goto err;
+         }
+@@ -603,7 +602,6 @@ ob_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, fd_t *fd,
+ 
+     ret = ob_open_behind(frame, this, loc, flags, fd, xdata);
+     if (ret) {
+-        op_errno = ENOMEM;
+         goto err;
+     }
+ 
+@@ -900,18 +898,12 @@ int
+ ob_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+             int cmd, struct gf_flock *flock, dict_t *xdata)
+ {
+-    call_stub_t *stub = NULL;
+-
+-    stub = fop_finodelk_stub(frame, default_finodelk_resume, volume, fd, cmd,
+-                             flock, xdata);
+-    if (!stub)
+-        goto err;
+-
+-    open_and_resume(this, fd, stub);
+-
+-    return 0;
+-err:
+-    STACK_UNWIND_STRICT(finodelk, frame, -1, ENOMEM, 0);
++    call_stub_t *stub = fop_finodelk_stub(frame, default_finodelk_resume,
++                                          volume, fd, cmd, flock, xdata);
++    if (stub)
++        open_and_resume(this, fd, stub);
++    else
++        STACK_UNWIND_STRICT(finodelk, frame, -1, ENOMEM, 0);
+ 
+     return 0;
+ }
+@@ -921,18 +913,12 @@ ob_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+             const char *basename, entrylk_cmd cmd, entrylk_type type,
+             dict_t *xdata)
+ {
+-    call_stub_t *stub = NULL;
+-
+-    stub = fop_fentrylk_stub(frame, default_fentrylk_resume, volume, fd,
+-                             basename, cmd, type, xdata);
+-    if (!stub)
+-        goto err;
+-
+-    open_and_resume(this, fd, stub);
+-
+-    return 0;
+-err:
+-    STACK_UNWIND_STRICT(fentrylk, frame, -1, ENOMEM, 0);
++    call_stub_t *stub = fop_fentrylk_stub(
++        frame, default_fentrylk_resume, volume, fd, basename, cmd, type, xdata);
++    if (stub)
++        open_and_resume(this, fd, stub);
++    else
++        STACK_UNWIND_STRICT(fentrylk, frame, -1, ENOMEM, 0);
+ 
+     return 0;
+ }
+@@ -941,18 +927,12 @@ int
+ ob_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd,
+             gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+ {
+-    call_stub_t *stub = NULL;
+-
+-    stub = fop_fxattrop_stub(frame, default_fxattrop_resume, fd, optype, xattr,
+-                             xdata);
+-    if (!stub)
+-        goto err;
+-
+-    open_and_resume(this, fd, stub);
+-
+-    return 0;
+-err:
+-    STACK_UNWIND_STRICT(fxattrop, frame, -1, ENOMEM, 0, 0);
++    call_stub_t *stub = fop_fxattrop_stub(frame, default_fxattrop_resume, fd,
++                                          optype, xattr, xdata);
++    if (stub)
++        open_and_resume(this, fd, stub);
++    else
++        STACK_UNWIND_STRICT(fxattrop, frame, -1, ENOMEM, 0, 0);
+ 
+     return 0;
+ }
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0523-open-behind-rewrite-of-internal-logic.patch b/SOURCES/0523-open-behind-rewrite-of-internal-logic.patch
new file mode 100644
index 0000000..621d5ae
--- /dev/null
+++ b/SOURCES/0523-open-behind-rewrite-of-internal-logic.patch
@@ -0,0 +1,2720 @@
+From b924c8ca8a133fc9413c8ed1407e63f1658c7e79 Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <xhernandez@redhat.com>
+Date: Tue, 12 May 2020 23:54:54 +0200
+Subject: [PATCH 523/526] open-behind: rewrite of internal logic
+
+There was a critical flaw in the previous implementation of open-behind.
+
+When an open is done in the background, it's necessary to take a
+reference on the fd_t object because once we "fake" the open answer,
+the fd could be destroyed. However as long as there's a reference,
+the release function won't be called. So, if the application closes
+the file descriptor without having actually opened it, there will
+always remain at least 1 reference, causing a leak.
+
+To avoid this problem, the previous implementation didn't take a
+reference on the fd_t, so there were races where the fd could be
+destroyed while it was still in use.
+
+To fix this, I've implemented a new xlator cbk that gets called from
+fuse when the application closes a file descriptor.
+
+The whole logic of handling background opens have been simplified and
+it's more efficient now. Only if the fop needs to be delayed until an
+open completes, a stub is created. Otherwise no memory allocations are
+needed.
+
+Correctly handling the close request while the open is still pending
+has added a bit of complexity, but overall normal operation is simpler.
+
+Upstream patch:
+> Upstream-patch-link: https://review.gluster.org/#/c/glusterfs/+/24451
+> Change-Id: I6376a5491368e0e1c283cc452849032636261592
+> Fixes: #1225
+> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+
+BUG: 1830713
+Change-Id: I6376a5491368e0e1c283cc452849032636261592
+Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/224487
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/fd.c                              |   26 +
+ libglusterfs/src/glusterfs/fd.h                    |    3 +
+ libglusterfs/src/glusterfs/xlator.h                |    4 +
+ libglusterfs/src/libglusterfs.sym                  |    1 +
+ tests/basic/open-behind/open-behind.t              |  183 +++
+ tests/basic/open-behind/tester-fd.c                |   99 ++
+ tests/basic/open-behind/tester.c                   |  444 +++++++
+ tests/basic/open-behind/tester.h                   |  145 +++
+ tests/bugs/glusterfs/bug-873962-spb.t              |    1 +
+ xlators/mount/fuse/src/fuse-bridge.c               |    2 +
+ .../open-behind/src/open-behind-messages.h         |    6 +-
+ xlators/performance/open-behind/src/open-behind.c  | 1302 ++++++++------------
+ 12 files changed, 1393 insertions(+), 823 deletions(-)
+ create mode 100644 tests/basic/open-behind/open-behind.t
+ create mode 100644 tests/basic/open-behind/tester-fd.c
+ create mode 100644 tests/basic/open-behind/tester.c
+ create mode 100644 tests/basic/open-behind/tester.h
+
+diff --git a/libglusterfs/src/fd.c b/libglusterfs/src/fd.c
+index 314546a..e4ec401 100644
+--- a/libglusterfs/src/fd.c
++++ b/libglusterfs/src/fd.c
+@@ -501,6 +501,32 @@ out:
+ }
+ 
+ void
++fd_close(fd_t *fd)
++{
++    xlator_t *xl, *old_THIS;
++
++    old_THIS = THIS;
++
++    for (xl = fd->inode->table->xl->graph->first; xl != NULL; xl = xl->next) {
++        if (!xl->call_cleanup) {
++            THIS = xl;
++
++            if (IA_ISDIR(fd->inode->ia_type)) {
++                if (xl->cbks->fdclosedir != NULL) {
++                    xl->cbks->fdclosedir(xl, fd);
++                }
++            } else {
++                if (xl->cbks->fdclose != NULL) {
++                    xl->cbks->fdclose(xl, fd);
++                }
++            }
++        }
++    }
++
++    THIS = old_THIS;
++}
++
++void
+ fd_unref(fd_t *fd)
+ {
+     int32_t refcount = 0;
+diff --git a/libglusterfs/src/glusterfs/fd.h b/libglusterfs/src/glusterfs/fd.h
+index cdbe289..4d157c4 100644
+--- a/libglusterfs/src/glusterfs/fd.h
++++ b/libglusterfs/src/glusterfs/fd.h
+@@ -107,6 +107,9 @@ fd_ref(fd_t *fd);
+ void
+ fd_unref(fd_t *fd);
+ 
++void
++fd_close(fd_t *fd);
++
+ fd_t *
+ fd_create(struct _inode *inode, pid_t pid);
+ 
+diff --git a/libglusterfs/src/glusterfs/xlator.h b/libglusterfs/src/glusterfs/xlator.h
+index 8650ccc..273039a 100644
+--- a/libglusterfs/src/glusterfs/xlator.h
++++ b/libglusterfs/src/glusterfs/xlator.h
+@@ -705,6 +705,8 @@ typedef size_t (*cbk_inodectx_size_t)(xlator_t *this, inode_t *inode);
+ 
+ typedef size_t (*cbk_fdctx_size_t)(xlator_t *this, fd_t *fd);
+ 
++typedef void (*cbk_fdclose_t)(xlator_t *this, fd_t *fd);
++
+ struct xlator_cbks {
+     cbk_forget_t forget;
+     cbk_release_t release;
+@@ -715,6 +717,8 @@ struct xlator_cbks {
+     cbk_ictxmerge_t ictxmerge;
+     cbk_inodectx_size_t ictxsize;
+     cbk_fdctx_size_t fdctxsize;
++    cbk_fdclose_t fdclose;
++    cbk_fdclose_t fdclosedir;
+ };
+ 
+ typedef int32_t (*dumpop_priv_t)(xlator_t *this);
+diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym
+index bc770e2..0a0862e 100644
+--- a/libglusterfs/src/libglusterfs.sym
++++ b/libglusterfs/src/libglusterfs.sym
+@@ -456,6 +456,7 @@ event_unregister_close
+ fd_anonymous
+ fd_anonymous_with_flags
+ fd_bind
++fd_close
+ fd_create
+ fd_create_uint64
+ __fd_ctx_del
+diff --git a/tests/basic/open-behind/open-behind.t b/tests/basic/open-behind/open-behind.t
+new file mode 100644
+index 0000000..5e865d6
+--- /dev/null
++++ b/tests/basic/open-behind/open-behind.t
+@@ -0,0 +1,183 @@
++#!/bin/bash
++
++WD="$(dirname "${0}")"
++
++. ${WD}/../../include.rc
++. ${WD}/../../volume.rc
++
++function assign() {
++    local _assign_var="${1}"
++    local _assign_value="${2}"
++
++    printf -v "${_assign_var}" "%s" "${_assign_value}"
++}
++
++function pipe_create() {
++    local _pipe_create_var="${1}"
++    local _pipe_create_name
++    local _pipe_create_fd
++
++    _pipe_create_name="$(mktemp -u)"
++    mkfifo "${_pipe_create_name}"
++    exec {_pipe_create_fd}<>"${_pipe_create_name}"
++    rm "${_pipe_create_name}"
++
++    assign "${_pipe_create_var}" "${_pipe_create_fd}"
++}
++
++function pipe_close() {
++    local _pipe_close_fd="${!1}"
++
++    exec {_pipe_close_fd}>&-
++}
++
++function tester_start() {
++    declare -ag tester
++    local tester_in
++    local tester_out
++
++    pipe_create tester_in
++    pipe_create tester_out
++
++    ${WD}/tester <&${tester_in} >&${tester_out} &
++
++    tester=("$!" "${tester_in}" "${tester_out}")
++}
++
++function tester_send() {
++    declare -ag tester
++    local tester_res
++    local tester_extra
++
++    echo "${*}" >&${tester[1]}
++
++    read -t 3 -u ${tester[2]} tester_res tester_extra
++    echo "${tester_res} ${tester_extra}"
++    if [[ "${tester_res}" == "OK" ]]; then
++        return 0
++    fi
++
++    return 1
++}
++
++function tester_stop() {
++    declare -ag tester
++    local tester_res
++
++    tester_send "quit"
++
++    tester_res=0
++    if ! wait ${tester[0]}; then
++        tester_res=$?
++    fi
++
++    unset tester
++
++    return ${tester_res}
++}
++
++function count_open() {
++    local file="$(realpath "${B0}/${V0}/${1}")"
++    local count="0"
++    local inode
++    local ref
++
++    inode="$(stat -c %i "${file}")"
++
++    for fd in /proc/${BRICK_PID}/fd/*; do
++        ref="$(readlink "${fd}")"
++        if [[ "${ref}" == "${B0}/${V0}/"* ]]; then
++            if [[ "$(stat -c %i "${ref}")" == "${inode}" ]]; then
++                count="$((${count} + 1))"
++            fi
++        fi
++    done
++
++    echo "${count}"
++}
++
++cleanup
++
++TEST build_tester ${WD}/tester.c ${WD}/tester-fd.c
++
++TEST glusterd
++TEST pidof glusterd
++TEST ${CLI} volume create ${V0} ${H0}:${B0}/${V0}
++TEST ${CLI} volume set ${V0} flush-behind off
++TEST ${CLI} volume set ${V0} write-behind off
++TEST ${CLI} volume set ${V0} quick-read off
++TEST ${CLI} volume set ${V0} stat-prefetch on
++TEST ${CLI} volume set ${V0} io-cache off
++TEST ${CLI} volume set ${V0} open-behind on
++TEST ${CLI} volume set ${V0} lazy-open off
++TEST ${CLI} volume set ${V0} read-after-open off
++TEST ${CLI} volume start ${V0}
++
++TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0};
++
++BRICK_PID="$(get_brick_pid ${V0} ${H0} ${B0}/${V0})"
++
++TEST touch "${M0}/test"
++
++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
++TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0};
++
++TEST tester_start
++
++TEST tester_send fd open 0 "${M0}/test"
++EXPECT_WITHIN 5 "1" count_open "/test"
++TEST tester_send fd close 0
++EXPECT_WITHIN 5 "0" count_open "/test"
++
++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
++TEST ${CLI} volume set ${V0} lazy-open on
++TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0};
++
++TEST tester_send fd open 0 "${M0}/test"
++sleep 2
++EXPECT "0" count_open "/test"
++TEST tester_send fd write 0 "test"
++EXPECT "1" count_open "/test"
++TEST tester_send fd close 0
++EXPECT_WITHIN 5 "0" count_open "/test"
++
++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
++TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0};
++
++TEST tester_send fd open 0 "${M0}/test"
++EXPECT "0" count_open "/test"
++EXPECT "test" tester_send fd read 0 64
++# Even though read-after-open is disabled, use-anonymous-fd is also disabled,
++# so reads need to open the file first.
++EXPECT "1" count_open "/test"
++TEST tester_send fd close 0
++EXPECT "0" count_open "/test"
++
++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
++TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0};
++
++TEST tester_send fd open 0 "${M0}/test"
++EXPECT "0" count_open "/test"
++TEST tester_send fd open 1 "${M0}/test"
++EXPECT "2" count_open "/test"
++TEST tester_send fd close 0
++EXPECT_WITHIN 5 "1" count_open "/test"
++TEST tester_send fd close 1
++EXPECT_WITHIN 5 "0" count_open "/test"
++
++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
++TEST ${CLI} volume set ${V0} read-after-open on
++TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0};
++
++TEST tester_send fd open 0 "${M0}/test"
++EXPECT "0" count_open "/test"
++EXPECT "test" tester_send fd read 0 64
++EXPECT "1" count_open "/test"
++TEST tester_send fd close 0
++EXPECT_WITHIN 5 "0" count_open "/test"
++
++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
++
++TEST tester_stop
++
++cleanup
+diff --git a/tests/basic/open-behind/tester-fd.c b/tests/basic/open-behind/tester-fd.c
+new file mode 100644
+index 0000000..00f02bc
+--- /dev/null
++++ b/tests/basic/open-behind/tester-fd.c
+@@ -0,0 +1,99 @@
++/*
++  Copyright (c) 2020 Red Hat, Inc. <http://www.redhat.com>
++  This file is part of GlusterFS.
++
++  This file is licensed to you under your choice of the GNU Lesser
++  General Public License, version 3 or any later version (LGPLv3 or
++  later), or the GNU General Public License, version 2 (GPLv2), in all
++  cases as published by the Free Software Foundation.
++*/
++
++#include "tester.h"
++
++#include <stdlib.h>
++#include <unistd.h>
++#include <sys/types.h>
++#include <sys/stat.h>
++#include <fcntl.h>
++#include <string.h>
++#include <ctype.h>
++#include <errno.h>
++
++static int32_t
++fd_open(context_t *ctx, command_t *cmd)
++{
++    obj_t *obj;
++    int32_t fd;
++
++    obj = cmd->args[0].obj.ref;
++
++    fd = open(cmd->args[1].str.data, O_RDWR);
++    if (fd < 0) {
++        return error(errno, "open() failed");
++    }
++
++    obj->type = OBJ_TYPE_FD;
++    obj->fd = fd;
++
++    out_ok("%d", fd);
++
++    return 0;
++}
++
++static int32_t
++fd_close(context_t *ctx, command_t *cmd)
++{
++    obj_t *obj;
++
++    obj = cmd->args[0].obj.ref;
++    obj->type = OBJ_TYPE_NONE;
++
++    if (close(obj->fd) != 0) {
++        return error(errno, "close() failed");
++    }
++
++    out_ok();
++
++    return 0;
++}
++
++static int32_t
++fd_write(context_t *ctx, command_t *cmd)
++{
++    ssize_t len, ret;
++
++    len = strlen(cmd->args[1].str.data);
++    ret = write(cmd->args[0].obj.ref->fd, cmd->args[1].str.data, len);
++    if (ret < 0) {
++        return error(errno, "write() failed");
++    }
++
++    out_ok("%zd", ret);
++
++    return 0;
++}
++
++static int32_t
++fd_read(context_t *ctx, command_t *cmd)
++{
++    char data[cmd->args[1].num.value + 1];
++    ssize_t ret;
++
++    ret = read(cmd->args[0].obj.ref->fd, data, cmd->args[1].num.value);
++    if (ret < 0) {
++        return error(errno, "read() failed");
++    }
++
++    data[ret] = 0;
++
++    out_ok("%zd %s", ret, data);
++
++    return 0;
++}
++
++command_t fd_commands[] = {
++    {"open", fd_open, CMD_ARGS(ARG_VAL(OBJ_TYPE_NONE), ARG_STR(1024))},
++    {"close", fd_close, CMD_ARGS(ARG_VAL(OBJ_TYPE_FD))},
++    {"write", fd_write, CMD_ARGS(ARG_VAL(OBJ_TYPE_FD), ARG_STR(1024))},
++    {"read", fd_read, CMD_ARGS(ARG_VAL(OBJ_TYPE_FD), ARG_NUM(0, 1024))},
++    CMD_END};
+diff --git a/tests/basic/open-behind/tester.c b/tests/basic/open-behind/tester.c
+new file mode 100644
+index 0000000..b2da71c
+--- /dev/null
++++ b/tests/basic/open-behind/tester.c
+@@ -0,0 +1,444 @@
++/*
++  Copyright (c) 2020 Red Hat, Inc. <http://www.redhat.com>
++  This file is part of GlusterFS.
++
++  This file is licensed to you under your choice of the GNU Lesser
++  General Public License, version 3 or any later version (LGPLv3 or
++  later), or the GNU General Public License, version 2 (GPLv2), in all
++  cases as published by the Free Software Foundation.
++*/
++
++#include "tester.h"
++
++#include <stdlib.h>
++#include <unistd.h>
++#include <string.h>
++#include <ctype.h>
++#include <errno.h>
++
++static void *
++mem_alloc(size_t size)
++{
++    void *ptr;
++
++    ptr = malloc(size);
++    if (ptr == NULL) {
++        error(ENOMEM, "Failed to allocate memory (%zu bytes)", size);
++    }
++
++    return ptr;
++}
++
++static void
++mem_free(void *ptr)
++{
++    free(ptr);
++}
++
++static bool
++buffer_create(context_t *ctx, size_t size)
++{
++    ctx->buffer.base = mem_alloc(size);
++    if (ctx->buffer.base == NULL) {
++        return false;
++    }
++
++    ctx->buffer.size = size;
++    ctx->buffer.len = 0;
++    ctx->buffer.pos = 0;
++
++    return true;
++}
++
++static void
++buffer_destroy(context_t *ctx)
++{
++    mem_free(ctx->buffer.base);
++    ctx->buffer.size = 0;
++    ctx->buffer.len = 0;
++}
++
++static int32_t
++buffer_get(context_t *ctx)
++{
++    ssize_t len;
++
++    if (ctx->buffer.pos >= ctx->buffer.len) {
++        len = read(0, ctx->buffer.base, ctx->buffer.size);
++        if (len < 0) {
++            return error(errno, "read() failed");
++        }
++        if (len == 0) {
++            return 0;
++        }
++
++        ctx->buffer.len = len;
++        ctx->buffer.pos = 0;
++    }
++
++    return ctx->buffer.base[ctx->buffer.pos++];
++}
++
++static int32_t
++str_skip_spaces(context_t *ctx, int32_t current)
++{
++    while ((current > 0) && (current != '\n') && isspace(current)) {
++        current = buffer_get(ctx);
++    }
++
++    return current;
++}
++
++static int32_t
++str_token(context_t *ctx, char *buffer, uint32_t size, int32_t current)
++{
++    uint32_t len;
++
++    current = str_skip_spaces(ctx, current);
++
++    len = 0;
++    while ((size > 0) && (current > 0) && (current != '\n') &&
++           !isspace(current)) {
++        len++;
++        *buffer++ = current;
++        size--;
++        current = buffer_get(ctx);
++    }
++
++    if (len == 0) {
++        return error(ENODATA, "Expecting a token");
++    }
++
++    if (size == 0) {
++        return error(ENOBUFS, "Token too long");
++    }
++
++    *buffer = 0;
++
++    return current;
++}
++
++static int32_t
++str_number(context_t *ctx, uint64_t min, uint64_t max, uint64_t *value,
++           int32_t current)
++{
++    char text[32], *ptr;
++    uint64_t num;
++
++    current = str_token(ctx, text, sizeof(text), current);
++    if (current > 0) {
++        num = strtoul(text, &ptr, 0);
++        if ((*ptr != 0) || (num < min) || (num > max)) {
++            return error(ERANGE, "Invalid number");
++        }
++        *value = num;
++    }
++
++    return current;
++}
++
++static int32_t
++str_eol(context_t *ctx, int32_t current)
++{
++    current = str_skip_spaces(ctx, current);
++    if (current != '\n') {
++        return error(EINVAL, "Expecting end of command");
++    }
++
++    return current;
++}
++
++static void
++str_skip(context_t *ctx, int32_t current)
++{
++    while ((current > 0) && (current != '\n')) {
++        current = buffer_get(ctx);
++    }
++}
++
++static int32_t
++cmd_parse_obj(context_t *ctx, arg_t *arg, int32_t current)
++{
++    obj_t *obj;
++    uint64_t id;
++
++    current = str_number(ctx, 0, ctx->obj_count, &id, current);
++    if (current <= 0) {
++        return current;
++    }
++
++    obj = &ctx->objs[id];
++    if (obj->type != arg->obj.type) {
++        if (obj->type != OBJ_TYPE_NONE) {
++            return error(EBUSY, "Object is in use");
++        }
++        return error(ENOENT, "Object is not defined");
++    }
++
++    arg->obj.ref = obj;
++
++    return current;
++}
++
++static int32_t
++cmd_parse_num(context_t *ctx, arg_t *arg, int32_t current)
++{
++    return str_number(ctx, arg->num.min, arg->num.max, &arg->num.value,
++                      current);
++}
++
++static int32_t
++cmd_parse_str(context_t *ctx, arg_t *arg, int32_t current)
++{
++    return str_token(ctx, arg->str.data, arg->str.size, current);
++}
++
++static int32_t
++cmd_parse_args(context_t *ctx, command_t *cmd, int32_t current)
++{
++    arg_t *arg;
++
++    for (arg = cmd->args; arg->type != ARG_TYPE_NONE; arg++) {
++        switch (arg->type) {
++            case ARG_TYPE_OBJ:
++                current = cmd_parse_obj(ctx, arg, current);
++                break;
++            case ARG_TYPE_NUM:
++                current = cmd_parse_num(ctx, arg, current);
++                break;
++            case ARG_TYPE_STR:
++                current = cmd_parse_str(ctx, arg, current);
++                break;
++            default:
++                return error(EINVAL, "Unknown argument type");
++        }
++    }
++
++    if (current < 0) {
++        return current;
++    }
++
++    current = str_eol(ctx, current);
++    if (current <= 0) {
++        return error(EINVAL, "Syntax error");
++    }
++
++    return cmd->handler(ctx, cmd);
++}
++
++static int32_t
++cmd_parse(context_t *ctx, command_t *cmds)
++{
++    char text[32];
++    command_t *cmd;
++    int32_t current;
++
++    cmd = cmds;
++    do {
++        current = str_token(ctx, text, sizeof(text), buffer_get(ctx));
++        if (current <= 0) {
++            return current;
++        }
++
++        while (cmd->name != NULL) {
++            if (strcmp(cmd->name, text) == 0) {
++                if (cmd->handler != NULL) {
++                    return cmd_parse_args(ctx, cmd, current);
++                }
++                cmd = cmd->cmds;
++                break;
++            }
++            cmd++;
++        }
++    } while (cmd->name != NULL);
++
++    str_skip(ctx, current);
++
++    return error(ENOTSUP, "Unknown command");
++}
++
++static void
++cmd_fini(context_t *ctx, command_t *cmds)
++{
++    command_t *cmd;
++    arg_t *arg;
++
++    for (cmd = cmds; cmd->name != NULL; cmd++) {
++        if (cmd->handler == NULL) {
++            cmd_fini(ctx, cmd->cmds);
++        } else {
++            for (arg = cmd->args; arg->type != ARG_TYPE_NONE; arg++) {
++                switch (arg->type) {
++                    case ARG_TYPE_STR:
++                        mem_free(arg->str.data);
++                        arg->str.data = NULL;
++                        break;
++                    default:
++                        break;
++                }
++            }
++        }
++    }
++}
++
++static bool
++cmd_init(context_t *ctx, command_t *cmds)
++{
++    command_t *cmd;
++    arg_t *arg;
++
++    for (cmd = cmds; cmd->name != NULL; cmd++) {
++        if (cmd->handler == NULL) {
++            if (!cmd_init(ctx, cmd->cmds)) {
++                return false;
++            }
++        } else {
++            for (arg = cmd->args; arg->type != ARG_TYPE_NONE; arg++) {
++                switch (arg->type) {
++                    case ARG_TYPE_STR:
++                        arg->str.data = mem_alloc(arg->str.size);
++                        if (arg->str.data == NULL) {
++                            return false;
++                        }
++                        break;
++                    default:
++                        break;
++                }
++            }
++        }
++    }
++
++    return true;
++}
++
++static bool
++objs_create(context_t *ctx, uint32_t count)
++{
++    uint32_t i;
++
++    ctx->objs = mem_alloc(sizeof(obj_t) * count);
++    if (ctx->objs == NULL) {
++        return false;
++    }
++    ctx->obj_count = count;
++
++    for (i = 0; i < count; i++) {
++        ctx->objs[i].type = OBJ_TYPE_NONE;
++    }
++
++    return true;
++}
++
++static int32_t
++objs_destroy(context_t *ctx)
++{
++    uint32_t i;
++    int32_t err;
++
++    err = 0;
++    for (i = 0; i < ctx->obj_count; i++) {
++        if (ctx->objs[i].type != OBJ_TYPE_NONE) {
++            err = error(ENOTEMPTY, "Objects not destroyed");
++            break;
++        }
++    }
++
++    mem_free(ctx->objs);
++    ctx->objs = NULL;
++    ctx->obj_count = 0;
++
++    return err;
++}
++
++static context_t *
++init(size_t size, uint32_t objs, command_t *cmds)
++{
++    context_t *ctx;
++
++    ctx = mem_alloc(sizeof(context_t));
++    if (ctx == NULL) {
++        goto failed;
++    }
++
++    if (!buffer_create(ctx, size)) {
++        goto failed_ctx;
++    }
++
++    if (!objs_create(ctx, objs)) {
++        goto failed_buffer;
++    }
++
++    if (!cmd_init(ctx, cmds)) {
++        goto failed_objs;
++    }
++
++    ctx->active = true;
++
++    return ctx;
++
++failed_objs:
++    cmd_fini(ctx, cmds);
++    objs_destroy(ctx);
++failed_buffer:
++    buffer_destroy(ctx);
++failed_ctx:
++    mem_free(ctx);
++failed:
++    return NULL;
++}
++
++static int32_t
++fini(context_t *ctx, command_t *cmds)
++{
++    int32_t ret;
++
++    cmd_fini(ctx, cmds);
++    buffer_destroy(ctx);
++
++    ret = objs_destroy(ctx);
++
++    ctx->active = false;
++
++    return ret;
++}
++
++static int32_t
++exec_quit(context_t *ctx, command_t *cmd)
++{
++    ctx->active = false;
++
++    return 0;
++}
++
++static command_t commands[] = {{"fd", NULL, CMD_SUB(fd_commands)},
++                               {"quit", exec_quit, CMD_ARGS()},
++                               CMD_END};
++
++int32_t
++main(int32_t argc, char *argv[])
++{
++    context_t *ctx;
++    int32_t res;
++
++    ctx = init(1024, 16, commands);
++    if (ctx == NULL) {
++        return 1;
++    }
++
++    do {
++        res = cmd_parse(ctx, commands);
++        if (res < 0) {
++            out_err(-res);
++        }
++    } while (ctx->active);
++
++    res = fini(ctx, commands);
++    if (res >= 0) {
++        out_ok();
++        return 0;
++    }
++
++    out_err(-res);
++
++    return 1;
++}
+diff --git a/tests/basic/open-behind/tester.h b/tests/basic/open-behind/tester.h
+new file mode 100644
+index 0000000..64e940c
+--- /dev/null
++++ b/tests/basic/open-behind/tester.h
+@@ -0,0 +1,145 @@
++/*
++  Copyright (c) 2020 Red Hat, Inc. <http://www.redhat.com>
++  This file is part of GlusterFS.
++
++  This file is licensed to you under your choice of the GNU Lesser
++  General Public License, version 3 or any later version (LGPLv3 or
++  later), or the GNU General Public License, version 2 (GPLv2), in all
++  cases as published by the Free Software Foundation.
++*/
++
++#ifndef __TESTER_H__
++#define __TESTER_H__
++
++#include <stdio.h>
++#include <inttypes.h>
++#include <stdbool.h>
++
++enum _obj_type;
++typedef enum _obj_type obj_type_t;
++
++enum _arg_type;
++typedef enum _arg_type arg_type_t;
++
++struct _buffer;
++typedef struct _buffer buffer_t;
++
++struct _obj;
++typedef struct _obj obj_t;
++
++struct _context;
++typedef struct _context context_t;
++
++struct _arg;
++typedef struct _arg arg_t;
++
++struct _command;
++typedef struct _command command_t;
++
++enum _obj_type { OBJ_TYPE_NONE, OBJ_TYPE_FD };
++
++enum _arg_type { ARG_TYPE_NONE, ARG_TYPE_OBJ, ARG_TYPE_NUM, ARG_TYPE_STR };
++
++struct _buffer {
++    char *base;
++    uint32_t size;
++    uint32_t len;
++    uint32_t pos;
++};
++
++struct _obj {
++    obj_type_t type;
++    union {
++        int32_t fd;
++    };
++};
++
++struct _context {
++    obj_t *objs;
++    buffer_t buffer;
++    uint32_t obj_count;
++    bool active;
++};
++
++struct _arg {
++    arg_type_t type;
++    union {
++        struct {
++            obj_type_t type;
++            obj_t *ref;
++        } obj;
++        struct {
++            uint64_t value;
++            uint64_t min;
++            uint64_t max;
++        } num;
++        struct {
++            uint32_t size;
++            char *data;
++        } str;
++    };
++};
++
++struct _command {
++    const char *name;
++    int32_t (*handler)(context_t *ctx, command_t *cmd);
++    union {
++        arg_t *args;
++        command_t *cmds;
++    };
++};
++
++#define msg(_stream, _fmt, _args...)                                           \
++    do {                                                                       \
++        fprintf(_stream, _fmt "\n", ##_args);                                  \
++        fflush(_stream);                                                       \
++    } while (0)
++
++#define msg_out(_fmt, _args...) msg(stdout, _fmt, ##_args)
++#define msg_err(_err, _fmt, _args...)                                          \
++    ({                                                                         \
++        int32_t __msg_err = (_err);                                            \
++        msg(stderr, "[%4u:%-15s] " _fmt, __LINE__, __FUNCTION__, __msg_err,    \
++            ##_args);                                                          \
++        -__msg_err;                                                            \
++    })
++
++#define error(_err, _fmt, _args...) msg_err(_err, "E(%4d) " _fmt, ##_args)
++#define warn(_err, _fmt, _args...) msg_err(_err, "W(%4d) " _fmt, ##_args)
++#define info(_err, _fmt, _args...) msg_err(_err, "I(%4d) " _fmt, ##_args)
++
++#define out_ok(_args...) msg_out("OK " _args)
++#define out_err(_err) msg_out("ERR %d", _err)
++
++#define ARG_END                                                                \
++    {                                                                          \
++        ARG_TYPE_NONE                                                          \
++    }
++
++#define CMD_ARGS1(_x, _args...)                                                \
++    .args = (arg_t[]) { _args }
++#define CMD_ARGS(_args...) CMD_ARGS1(, ##_args, ARG_END)
++
++#define CMD_SUB(_cmds) .cmds = _cmds
++
++#define CMD_END                                                                \
++    {                                                                          \
++        NULL, NULL, CMD_SUB(NULL)                                              \
++    }
++
++#define ARG_VAL(_type)                                                         \
++    {                                                                          \
++        ARG_TYPE_OBJ, .obj = {.type = _type }                                  \
++    }
++#define ARG_NUM(_min, _max)                                                    \
++    {                                                                          \
++        ARG_TYPE_NUM, .num = {.min = _min, .max = _max }                       \
++    }
++#define ARG_STR(_size)                                                         \
++    {                                                                          \
++        ARG_TYPE_STR, .str = {.size = _size }                                  \
++    }
++
++extern command_t fd_commands[];
++
++#endif /* __TESTER_H__ */
+\ No newline at end of file
+diff --git a/tests/bugs/glusterfs/bug-873962-spb.t b/tests/bugs/glusterfs/bug-873962-spb.t
+index db84a22..db71cc0 100644
+--- a/tests/bugs/glusterfs/bug-873962-spb.t
++++ b/tests/bugs/glusterfs/bug-873962-spb.t
+@@ -14,6 +14,7 @@ TEST $CLI volume set $V0 performance.io-cache off
+ TEST $CLI volume set $V0 performance.write-behind off
+ TEST $CLI volume set $V0 performance.stat-prefetch off
+ TEST $CLI volume set $V0 performance.read-ahead off
++TEST $CLI volume set $V0 performance.open-behind off
+ TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+ TEST $CLI volume start $V0
+ TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=$V0 $M0 --direct-io-mode=enable
+diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
+index 919eea3..76b5809 100644
+--- a/xlators/mount/fuse/src/fuse-bridge.c
++++ b/xlators/mount/fuse/src/fuse-bridge.c
+@@ -3398,6 +3398,8 @@ fuse_release(xlator_t *this, fuse_in_header_t *finh, void *msg,
+     gf_log("glusterfs-fuse", GF_LOG_TRACE,
+            "finh->unique: %" PRIu64 ": RELEASE %p", finh->unique, state->fd);
+ 
++    fd_close(state->fd);
++
+     fuse_fd_ctx_destroy(this, state->fd);
+     fd_unref(fd);
+ 
+diff --git a/xlators/performance/open-behind/src/open-behind-messages.h b/xlators/performance/open-behind/src/open-behind-messages.h
+index f250824..0e78917 100644
+--- a/xlators/performance/open-behind/src/open-behind-messages.h
++++ b/xlators/performance/open-behind/src/open-behind-messages.h
+@@ -23,6 +23,10 @@
+  */
+ 
+ GLFS_MSGID(OPEN_BEHIND, OPEN_BEHIND_MSG_XLATOR_CHILD_MISCONFIGURED,
+-           OPEN_BEHIND_MSG_VOL_MISCONFIGURED, OPEN_BEHIND_MSG_NO_MEMORY);
++           OPEN_BEHIND_MSG_VOL_MISCONFIGURED, OPEN_BEHIND_MSG_NO_MEMORY,
++           OPEN_BEHIND_MSG_FAILED, OPEN_BEHIND_MSG_BAD_STATE);
++
++#define OPEN_BEHIND_MSG_FAILED_STR "Failed to submit fop"
++#define OPEN_BEHIND_MSG_BAD_STATE_STR "Unexpected state"
+ 
+ #endif /* _OPEN_BEHIND_MESSAGES_H_ */
+diff --git a/xlators/performance/open-behind/src/open-behind.c b/xlators/performance/open-behind/src/open-behind.c
+index cbe89ec..e43fe73 100644
+--- a/xlators/performance/open-behind/src/open-behind.c
++++ b/xlators/performance/open-behind/src/open-behind.c
+@@ -16,6 +16,18 @@
+ #include "open-behind-messages.h"
+ #include <glusterfs/glusterfs-acl.h>
+ 
++/* Note: The initial design of open-behind was made to cover the simple case
++ *       of open, read, close for small files. This pattern combined with
++ *       quick-read can do the whole operation without a single request to the
++ *       bricks (except the initial lookup).
++ *
++ *       The way to do this has been improved, but the logic remains the same.
++ *       Basically, this means that any operation sent to the fd or the inode
++ *       that it's not a read, causes the open request to be sent to the
++ *       bricks, and all future operations will be executed synchronously,
++ *       including opens (it's reset once all fd's are closed).
++ */
++
+ typedef struct ob_conf {
+     gf_boolean_t use_anonymous_fd; /* use anonymous FDs wherever safe
+                                       e.g - fstat() readv()
+@@ -32,1096 +44,754 @@ typedef struct ob_conf {
+                                         */
+ } ob_conf_t;
+ 
+-typedef struct ob_inode {
+-    inode_t *inode;
+-    struct list_head resume_fops;
+-    struct list_head ob_fds;
+-    int count;
+-    int op_ret;
+-    int op_errno;
+-    gf_boolean_t open_in_progress;
+-    int unlinked;
+-} ob_inode_t;
++/* A negative state represents an errno value negated. In this case the
++ * current operation cannot be processed. */
++typedef enum _ob_state {
++    /* There are no opens on the inode or the first open is already
++     * completed. The current operation can be sent directly. */
++    OB_STATE_READY = 0,
+ 
+-typedef struct ob_fd {
+-    call_frame_t *open_frame;
+-    loc_t loc;
+-    dict_t *xdata;
+-    int flags;
+-    int op_errno;
+-    ob_inode_t *ob_inode;
+-    fd_t *fd;
+-    gf_boolean_t opened;
+-    gf_boolean_t ob_inode_fops_waiting;
+-    struct list_head list;
+-    struct list_head ob_fds_on_inode;
+-} ob_fd_t;
++    /* There's an open pending and it has been triggered. The current
++     * operation should be "stubbified" and processed with
++     * ob_stub_dispatch(). */
++    OB_STATE_OPEN_TRIGGERED,
+ 
+-ob_inode_t *
+-ob_inode_alloc(inode_t *inode)
+-{
+-    ob_inode_t *ob_inode = NULL;
++    /* There's an open pending but it has not been triggered. The current
++     * operation can be processed directly but using an anonymous fd. */
++    OB_STATE_OPEN_PENDING,
+ 
+-    ob_inode = GF_CALLOC(1, sizeof(*ob_inode), gf_ob_mt_inode_t);
+-    if (ob_inode == NULL)
+-        goto out;
++    /* The current operation is the first open on the inode. */
++    OB_STATE_FIRST_OPEN
++} ob_state_t;
+ 
+-    ob_inode->inode = inode;
+-    INIT_LIST_HEAD(&ob_inode->resume_fops);
+-    INIT_LIST_HEAD(&ob_inode->ob_fds);
+-out:
+-    return ob_inode;
+-}
+-
+-void
+-ob_inode_free(ob_inode_t *ob_inode)
+-{
+-    if (ob_inode == NULL)
+-        goto out;
++typedef struct ob_inode {
++    /* List of stubs pending on the first open. Once the first open is
++     * complete, all these stubs will be resubmitted, and dependencies
++     * will be checked again. */
++    struct list_head resume_fops;
+ 
+-    list_del_init(&ob_inode->resume_fops);
+-    list_del_init(&ob_inode->ob_fds);
++    /* The inode this object references. */
++    inode_t *inode;
+ 
+-    GF_FREE(ob_inode);
+-out:
+-    return;
+-}
++    /* The fd from the first open sent to this inode. It will be set
++     * from the moment the open is processed until the open if fully
++     * executed or closed before actually opened. It's NULL in all
++     * other cases. */
++    fd_t *first_fd;
++
++    /* The stub from the first open operation. When open fop starts
++     * being processed, it's assigned the OB_OPEN_PREPARING value
++     * until the actual stub is created. This is necessary to avoid
++     * creating the stub inside a locked region. Once the stub is
++     * successfully created, it's assigned here. This value is set
++     * to NULL once the stub is resumed. */
++    call_stub_t *first_open;
++
++    /* The total number of currently open fd's on this inode. */
++    int32_t open_count;
++
++    /* This flag is set as soon as we know that the open will be
++     * sent to the bricks, even before the stub is ready. */
++    bool triggered;
++} ob_inode_t;
+ 
+-ob_inode_t *
+-ob_inode_get(xlator_t *this, inode_t *inode)
++/* Dummy pointer used temporarily while the actual open stub is being created */
++#define OB_OPEN_PREPARING ((call_stub_t *)-1)
++
++#define OB_POST_COMMON(_fop, _xl, _frame, _fd, _args...)                       \
++    case OB_STATE_FIRST_OPEN:                                                  \
++        gf_smsg((_xl)->name, GF_LOG_ERROR, EINVAL, OPEN_BEHIND_MSG_BAD_STATE,  \
++                "fop=%s", #_fop, "state=%d", __ob_state, NULL);                \
++        default_##_fop##_failure_cbk(_frame, EINVAL);                          \
++        break;                                                                 \
++    case OB_STATE_READY:                                                       \
++        default_##_fop(_frame, _xl, ##_args);                                  \
++        break;                                                                 \
++    case OB_STATE_OPEN_TRIGGERED: {                                            \
++        call_stub_t *__ob_stub = fop_##_fop##_stub(_frame, ob_##_fop,          \
++                                                   ##_args);                   \
++        if (__ob_stub != NULL) {                                               \
++            ob_stub_dispatch(_xl, __ob_inode, _fd, __ob_stub);                 \
++            break;                                                             \
++        }                                                                      \
++        __ob_state = -ENOMEM;                                                  \
++    }                                                                          \
++    default:                                                                   \
++        gf_smsg((_xl)->name, GF_LOG_ERROR, -__ob_state,                        \
++                OPEN_BEHIND_MSG_FAILED, "fop=%s", #_fop, NULL);                \
++        default_##_fop##_failure_cbk(_frame, -__ob_state)
++
++#define OB_POST_FD(_fop, _xl, _frame, _fd, _trigger, _args...)                 \
++    do {                                                                       \
++        ob_inode_t *__ob_inode;                                                \
++        fd_t *__first_fd;                                                      \
++        ob_state_t __ob_state = ob_open_and_resume_fd(                         \
++            _xl, _fd, 0, true, _trigger, &__ob_inode, &__first_fd);            \
++        switch (__ob_state) {                                                  \
++            case OB_STATE_OPEN_PENDING:                                        \
++                if (!(_trigger)) {                                             \
++                    fd_t *__ob_fd = fd_anonymous_with_flags((_fd)->inode,      \
++                                                            (_fd)->flags);     \
++                    if (__ob_fd != NULL) {                                     \
++                        default_##_fop(_frame, _xl, ##_args);                  \
++                        fd_unref(__ob_fd);                                     \
++                        break;                                                 \
++                    }                                                          \
++                    __ob_state = -ENOMEM;                                      \
++                }                                                              \
++                OB_POST_COMMON(_fop, _xl, _frame, __first_fd, ##_args);        \
++        }                                                                      \
++    } while (0)
++
++#define OB_POST_FLUSH(_xl, _frame, _fd, _args...)                              \
++    do {                                                                       \
++        ob_inode_t *__ob_inode;                                                \
++        fd_t *__first_fd;                                                      \
++        ob_state_t __ob_state = ob_open_and_resume_fd(                         \
++            _xl, _fd, 0, true, false, &__ob_inode, &__first_fd);               \
++        switch (__ob_state) {                                                  \
++            case OB_STATE_OPEN_PENDING:                                        \
++                default_flush_cbk(_frame, NULL, _xl, 0, 0, NULL);              \
++                break;                                                         \
++                OB_POST_COMMON(flush, _xl, _frame, __first_fd, ##_args);       \
++        }                                                                      \
++    } while (0)
++
++#define OB_POST_INODE(_fop, _xl, _frame, _inode, _trigger, _args...)           \
++    do {                                                                       \
++        ob_inode_t *__ob_inode;                                                \
++        fd_t *__first_fd;                                                      \
++        ob_state_t __ob_state = ob_open_and_resume_inode(                      \
++            _xl, _inode, NULL, 0, true, _trigger, &__ob_inode, &__first_fd);   \
++        switch (__ob_state) {                                                  \
++            case OB_STATE_OPEN_PENDING:                                        \
++                OB_POST_COMMON(_fop, _xl, _frame, __first_fd, ##_args);        \
++        }                                                                      \
++    } while (0)
++
++static ob_inode_t *
++ob_inode_get_locked(xlator_t *this, inode_t *inode)
+ {
+     ob_inode_t *ob_inode = NULL;
+     uint64_t value = 0;
+-    int ret = 0;
+ 
+-    if (!inode)
+-        goto out;
++    if ((__inode_ctx_get(inode, this, &value) == 0) && (value != 0)) {
++        return (ob_inode_t *)(uintptr_t)value;
++    }
+ 
+-    LOCK(&inode->lock);
+-    {
+-        __inode_ctx_get(inode, this, &value);
+-        if (value == 0) {
+-            ob_inode = ob_inode_alloc(inode);
+-            if (ob_inode == NULL)
+-                goto unlock;
+-
+-            value = (uint64_t)(uintptr_t)ob_inode;
+-            ret = __inode_ctx_set(inode, this, &value);
+-            if (ret < 0) {
+-                ob_inode_free(ob_inode);
+-                ob_inode = NULL;
+-            }
+-        } else {
+-            ob_inode = (ob_inode_t *)(uintptr_t)value;
++    ob_inode = GF_CALLOC(1, sizeof(*ob_inode), gf_ob_mt_inode_t);
++    if (ob_inode != NULL) {
++        ob_inode->inode = inode;
++        INIT_LIST_HEAD(&ob_inode->resume_fops);
++
++        value = (uint64_t)(uintptr_t)ob_inode;
++        if (__inode_ctx_set(inode, this, &value) < 0) {
++            GF_FREE(ob_inode);
++            ob_inode = NULL;
+         }
+     }
+-unlock:
+-    UNLOCK(&inode->lock);
+ 
+-out:
+     return ob_inode;
+ }
+ 
+-ob_fd_t *
+-__ob_fd_ctx_get(xlator_t *this, fd_t *fd)
++static ob_state_t
++ob_open_and_resume_inode(xlator_t *xl, inode_t *inode, fd_t *fd,
++                         int32_t open_count, bool synchronous, bool trigger,
++                         ob_inode_t **pob_inode, fd_t **pfd)
+ {
+-    uint64_t value = 0;
+-    int ret = -1;
+-    ob_fd_t *ob_fd = NULL;
++    ob_conf_t *conf;
++    ob_inode_t *ob_inode;
++    call_stub_t *open_stub;
+ 
+-    ret = __fd_ctx_get(fd, this, &value);
+-    if (ret)
+-        return NULL;
++    if (inode == NULL) {
++        return OB_STATE_READY;
++    }
+ 
+-    ob_fd = (void *)((long)value);
++    conf = xl->private;
+ 
+-    return ob_fd;
+-}
++    *pfd = NULL;
+ 
+-ob_fd_t *
+-ob_fd_ctx_get(xlator_t *this, fd_t *fd)
+-{
+-    ob_fd_t *ob_fd = NULL;
+-
+-    LOCK(&fd->lock);
++    LOCK(&inode->lock);
+     {
+-        ob_fd = __ob_fd_ctx_get(this, fd);
+-    }
+-    UNLOCK(&fd->lock);
+-
+-    return ob_fd;
+-}
++        ob_inode = ob_inode_get_locked(xl, inode);
++        if (ob_inode == NULL) {
++            UNLOCK(&inode->lock);
+ 
+-int
+-__ob_fd_ctx_set(xlator_t *this, fd_t *fd, ob_fd_t *ob_fd)
+-{
+-    uint64_t value = 0;
+-    int ret = -1;
++            return -ENOMEM;
++        }
++        *pob_inode = ob_inode;
++
++        ob_inode->open_count += open_count;
++
++        /* If first_fd is not NULL, it means that there's a previous open not
++         * yet completed. */
++        if (ob_inode->first_fd != NULL) {
++            *pfd = ob_inode->first_fd;
++            /* If the current request doesn't trigger the open and it hasn't
++             * been triggered yet, we can continue without issuing the open
++             * only if the current request belongs to the same fd as the
++             * first one. */
++            if (!trigger && !ob_inode->triggered &&
++                (ob_inode->first_fd == fd)) {
++                UNLOCK(&inode->lock);
++
++                return OB_STATE_OPEN_PENDING;
++            }
+ 
+-    value = (long)((void *)ob_fd);
++            /* We need to issue the open. It could have already been triggered
++             * before. In this case open_stub will be NULL. Or the initial open
++             * may not be completely ready yet. In this case open_stub will be
++             * OB_OPEN_PREPARING. */
++            open_stub = ob_inode->first_open;
++            ob_inode->first_open = NULL;
++            ob_inode->triggered = true;
+ 
+-    ret = __fd_ctx_set(fd, this, value);
++            UNLOCK(&inode->lock);
+ 
+-    return ret;
+-}
++            if ((open_stub != NULL) && (open_stub != OB_OPEN_PREPARING)) {
++                call_resume(open_stub);
++            }
+ 
+-int
+-ob_fd_ctx_set(xlator_t *this, fd_t *fd, ob_fd_t *ob_fd)
+-{
+-    int ret = -1;
++            return OB_STATE_OPEN_TRIGGERED;
++        }
+ 
+-    LOCK(&fd->lock);
+-    {
+-        ret = __ob_fd_ctx_set(this, fd, ob_fd);
+-    }
+-    UNLOCK(&fd->lock);
++        /* There's no pending open. Only opens can be non synchronous, so all
++         * regular fops will be processed directly. For non synchronous opens,
++         * we'll still process them normally (i.e. synchornous) if there are
++         * more file descriptors open. */
++        if (synchronous || (ob_inode->open_count > open_count)) {
++            UNLOCK(&inode->lock);
+ 
+-    return ret;
+-}
++            return OB_STATE_READY;
++        }
+ 
+-ob_fd_t *
+-ob_fd_new(void)
+-{
+-    ob_fd_t *ob_fd = NULL;
++        *pfd = fd;
+ 
+-    ob_fd = GF_CALLOC(1, sizeof(*ob_fd), gf_ob_mt_fd_t);
++        /* This is the first open. We keep a reference on the fd and set
++         * first_open stub to OB_OPEN_PREPARING until the actual stub can
++         * be assigned (we don't create the stub here to avoid doing memory
++         * allocations inside the mutex). */
++        ob_inode->first_fd = __fd_ref(fd);
++        ob_inode->first_open = OB_OPEN_PREPARING;
+ 
+-    INIT_LIST_HEAD(&ob_fd->list);
+-    INIT_LIST_HEAD(&ob_fd->ob_fds_on_inode);
++        /* If lazy_open is not set, we'll need to immediately send the open,
++         * so we set triggered right now. */
++        ob_inode->triggered = !conf->lazy_open;
++    }
++    UNLOCK(&inode->lock);
+ 
+-    return ob_fd;
++    return OB_STATE_FIRST_OPEN;
+ }
+ 
+-void
+-ob_fd_free(ob_fd_t *ob_fd)
++static ob_state_t
++ob_open_and_resume_fd(xlator_t *xl, fd_t *fd, int32_t open_count,
++                      bool synchronous, bool trigger, ob_inode_t **pob_inode,
++                      fd_t **pfd)
+ {
+-    LOCK(&ob_fd->fd->inode->lock);
+-    {
+-        list_del_init(&ob_fd->ob_fds_on_inode);
+-    }
+-    UNLOCK(&ob_fd->fd->inode->lock);
+-
+-    loc_wipe(&ob_fd->loc);
+-
+-    if (ob_fd->xdata)
+-        dict_unref(ob_fd->xdata);
++    uint64_t err;
+ 
+-    if (ob_fd->open_frame) {
+-        /* If we sill have a frame it means that background open has never
+-         * been triggered. We need to release the pending reference. */
+-        fd_unref(ob_fd->fd);
+-
+-        STACK_DESTROY(ob_fd->open_frame->root);
++    if ((fd_ctx_get(fd, xl, &err) == 0) && (err != 0)) {
++        return (ob_state_t)-err;
+     }
+ 
+-    GF_FREE(ob_fd);
++    return ob_open_and_resume_inode(xl, fd->inode, fd, open_count, synchronous,
++                                    trigger, pob_inode, pfd);
+ }
+ 
+-int
+-ob_wake_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+-            int op_errno, fd_t *fd_ret, dict_t *xdata)
++static ob_state_t
++ob_open_behind(xlator_t *xl, fd_t *fd, int32_t flags, ob_inode_t **pob_inode,
++               fd_t **pfd)
+ {
+-    fd_t *fd = NULL;
+-    int count = 0;
+-    int ob_inode_op_ret = 0;
+-    int ob_inode_op_errno = 0;
+-    ob_fd_t *ob_fd = NULL;
+-    call_stub_t *stub = NULL, *tmp = NULL;
+-    ob_inode_t *ob_inode = NULL;
+-    gf_boolean_t ob_inode_fops_waiting = _gf_false;
+-    struct list_head fops_waiting_on_fd, fops_waiting_on_inode;
++    bool synchronous;
+ 
+-    fd = frame->local;
+-    frame->local = NULL;
+-
+-    INIT_LIST_HEAD(&fops_waiting_on_fd);
+-    INIT_LIST_HEAD(&fops_waiting_on_inode);
++    /* TODO: If O_CREAT, O_APPEND, O_WRONLY or O_DIRECT are specified, shouldn't
++     *       we also execute this open synchronously ? */
++    synchronous = (flags & O_TRUNC) != 0;
+ 
+-    ob_inode = ob_inode_get(this, fd->inode);
++    return ob_open_and_resume_fd(xl, fd, 1, synchronous, true, pob_inode, pfd);
++}
+ 
+-    LOCK(&fd->lock);
++static int32_t
++ob_stub_dispatch(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd,
++                 call_stub_t *stub)
++{
++    LOCK(&ob_inode->inode->lock);
+     {
+-        ob_fd = __ob_fd_ctx_get(this, fd);
+-        ob_fd->opened = _gf_true;
+-
+-        ob_inode_fops_waiting = ob_fd->ob_inode_fops_waiting;
+-
+-        list_splice_init(&ob_fd->list, &fops_waiting_on_fd);
+-
+-        if (op_ret < 0) {
+-            /* mark fd BAD for ever */
+-            ob_fd->op_errno = op_errno;
+-            ob_fd = NULL; /*shouldn't be freed*/
+-        } else {
+-            __fd_ctx_del(fd, this, NULL);
+-        }
+-    }
+-    UNLOCK(&fd->lock);
+-
+-    if (ob_inode_fops_waiting) {
+-        LOCK(&fd->inode->lock);
+-        {
+-            count = --ob_inode->count;
+-            if (op_ret < 0) {
+-                /* TODO: when to reset the error? */
+-                ob_inode->op_ret = -1;
+-                ob_inode->op_errno = op_errno;
+-            }
+-
+-            if (count == 0) {
+-                ob_inode->open_in_progress = _gf_false;
+-                ob_inode_op_ret = ob_inode->op_ret;
+-                ob_inode_op_errno = ob_inode->op_errno;
+-                list_splice_init(&ob_inode->resume_fops,
+-                                 &fops_waiting_on_inode);
+-            }
++        /* We only queue a stub if the open has not been completed or
++         * cancelled. */
++        if (ob_inode->first_fd == fd) {
++            list_add_tail(&stub->list, &ob_inode->resume_fops);
++            stub = NULL;
+         }
+-        UNLOCK(&fd->inode->lock);
+-    }
+-
+-    if (ob_fd)
+-        ob_fd_free(ob_fd);
+-
+-    list_for_each_entry_safe(stub, tmp, &fops_waiting_on_fd, list)
+-    {
+-        list_del_init(&stub->list);
+-
+-        if (op_ret < 0)
+-            call_unwind_error(stub, -1, op_errno);
+-        else
+-            call_resume(stub);
+     }
++    UNLOCK(&ob_inode->inode->lock);
+ 
+-    list_for_each_entry_safe(stub, tmp, &fops_waiting_on_inode, list)
+-    {
+-        list_del_init(&stub->list);
+-
+-        if (ob_inode_op_ret < 0)
+-            call_unwind_error(stub, -1, ob_inode_op_errno);
+-        else
+-            call_resume(stub);
++    if (stub != NULL) {
++        call_resume(stub);
+     }
+ 
+-    /* The background open is completed. We can release the 'fd' reference. */
+-    fd_unref(fd);
+-
+-    STACK_DESTROY(frame->root);
+-
+     return 0;
+ }
+ 
+-int
+-ob_fd_wake(xlator_t *this, fd_t *fd, ob_fd_t *ob_fd)
++static int32_t
++ob_open_dispatch(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd,
++                 call_stub_t *stub)
+ {
+-    call_frame_t *frame = NULL;
+-
+-    if (ob_fd == NULL) {
+-        LOCK(&fd->lock);
+-        {
+-            ob_fd = __ob_fd_ctx_get(this, fd);
+-            if (!ob_fd)
+-                goto unlock;
++    bool closed;
+ 
+-            frame = ob_fd->open_frame;
+-            ob_fd->open_frame = NULL;
+-        }
+-    unlock:
+-        UNLOCK(&fd->lock);
+-    } else {
+-        LOCK(&fd->lock);
+-        {
+-            frame = ob_fd->open_frame;
+-            ob_fd->open_frame = NULL;
++    LOCK(&ob_inode->inode->lock);
++    {
++        closed = ob_inode->first_fd != fd;
++        if (!closed) {
++            if (ob_inode->triggered) {
++                ob_inode->first_open = NULL;
++            } else {
++                ob_inode->first_open = stub;
++                stub = NULL;
++            }
+         }
+-        UNLOCK(&fd->lock);
+     }
++    UNLOCK(&ob_inode->inode->lock);
+ 
+-    if (frame) {
+-        /* We don't need to take a reference here. We already have a reference
+-         * while the open is pending. */
+-        frame->local = fd;
+-
+-        STACK_WIND(frame, ob_wake_cbk, FIRST_CHILD(this),
+-                   FIRST_CHILD(this)->fops->open, &ob_fd->loc, ob_fd->flags, fd,
+-                   ob_fd->xdata);
++    if (stub != NULL) {
++        if (closed) {
++            call_stub_destroy(stub);
++            fd_unref(fd);
++        } else {
++            call_resume(stub);
++        }
+     }
+ 
+     return 0;
+ }
+ 
+-void
+-ob_inode_wake(xlator_t *this, struct list_head *ob_fds)
++static void
++ob_resume_pending(struct list_head *list)
+ {
+-    ob_fd_t *ob_fd = NULL, *tmp = NULL;
++    call_stub_t *stub;
+ 
+-    if (!list_empty(ob_fds)) {
+-        list_for_each_entry_safe(ob_fd, tmp, ob_fds, ob_fds_on_inode)
+-        {
+-            ob_fd_wake(this, ob_fd->fd, ob_fd);
+-            ob_fd_free(ob_fd);
+-        }
+-    }
+-}
++    while (!list_empty(list)) {
++        stub = list_first_entry(list, call_stub_t, list);
++        list_del_init(&stub->list);
+ 
+-/* called holding inode->lock and fd->lock */
+-void
+-ob_fd_copy(ob_fd_t *src, ob_fd_t *dst)
+-{
+-    if (!src || !dst)
+-        goto out;
+-
+-    dst->fd = src->fd;
+-    dst->loc.inode = inode_ref(src->loc.inode);
+-    gf_uuid_copy(dst->loc.gfid, src->loc.gfid);
+-    dst->flags = src->flags;
+-    dst->xdata = dict_ref(src->xdata);
+-    dst->ob_inode = src->ob_inode;
+-out:
+-    return;
++        call_resume(stub);
++    }
+ }
+ 
+-int
+-open_all_pending_fds_and_resume(xlator_t *this, inode_t *inode,
+-                                call_stub_t *stub)
++static void
++ob_open_completed(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd, int32_t op_ret,
++                  int32_t op_errno)
+ {
+-    ob_inode_t *ob_inode = NULL;
+-    ob_fd_t *ob_fd = NULL, *tmp = NULL;
+-    gf_boolean_t was_open_in_progress = _gf_false;
+-    gf_boolean_t wait_for_open = _gf_false;
+-    struct list_head ob_fds;
++    struct list_head list;
+ 
+-    ob_inode = ob_inode_get(this, inode);
+-    if (ob_inode == NULL)
+-        goto out;
++    INIT_LIST_HEAD(&list);
+ 
+-    INIT_LIST_HEAD(&ob_fds);
++    if (op_ret < 0) {
++        fd_ctx_set(fd, xl, op_errno <= 0 ? EIO : op_errno);
++    }
+ 
+-    LOCK(&inode->lock);
++    LOCK(&ob_inode->inode->lock);
+     {
+-        was_open_in_progress = ob_inode->open_in_progress;
+-        ob_inode->unlinked = 1;
+-
+-        if (was_open_in_progress) {
+-            list_add_tail(&stub->list, &ob_inode->resume_fops);
+-            goto inode_unlock;
+-        }
+-
+-        list_for_each_entry(ob_fd, &ob_inode->ob_fds, ob_fds_on_inode)
+-        {
+-            LOCK(&ob_fd->fd->lock);
+-            {
+-                if (ob_fd->opened)
+-                    goto fd_unlock;
+-
+-                ob_inode->count++;
+-                ob_fd->ob_inode_fops_waiting = _gf_true;
+-
+-                if (ob_fd->open_frame == NULL) {
+-                    /* open in progress no need of wake */
+-                } else {
+-                    tmp = ob_fd_new();
+-                    tmp->open_frame = ob_fd->open_frame;
+-                    ob_fd->open_frame = NULL;
+-
+-                    ob_fd_copy(ob_fd, tmp);
+-                    list_add_tail(&tmp->ob_fds_on_inode, &ob_fds);
+-                }
+-            }
+-        fd_unlock:
+-            UNLOCK(&ob_fd->fd->lock);
+-        }
+-
+-        if (ob_inode->count) {
+-            wait_for_open = ob_inode->open_in_progress = _gf_true;
+-            list_add_tail(&stub->list, &ob_inode->resume_fops);
++        /* Only update the fields if the file has not been closed before
++         * getting here. */
++        if (ob_inode->first_fd == fd) {
++            list_splice_init(&ob_inode->resume_fops, &list);
++            ob_inode->first_fd = NULL;
++            ob_inode->first_open = NULL;
++            ob_inode->triggered = false;
+         }
+     }
+-inode_unlock:
+-    UNLOCK(&inode->lock);
++    UNLOCK(&ob_inode->inode->lock);
+ 
+-out:
+-    if (!was_open_in_progress) {
+-        if (!wait_for_open) {
+-            call_resume(stub);
+-        } else {
+-            ob_inode_wake(this, &ob_fds);
+-        }
+-    }
++    ob_resume_pending(&list);
+ 
+-    return 0;
++    fd_unref(fd);
+ }
+ 
+-int
+-open_and_resume(xlator_t *this, fd_t *fd, call_stub_t *stub)
++static int32_t
++ob_open_cbk(call_frame_t *frame, void *cookie, xlator_t *xl, int32_t op_ret,
++            int32_t op_errno, fd_t *fd, dict_t *xdata)
+ {
+-    ob_fd_t *ob_fd = NULL;
+-    int op_errno = 0;
+-
+-    if (!fd)
+-        goto nofd;
+-
+-    LOCK(&fd->lock);
+-    {
+-        ob_fd = __ob_fd_ctx_get(this, fd);
+-        if (!ob_fd)
+-            goto unlock;
++    ob_inode_t *ob_inode;
+ 
+-        if (ob_fd->op_errno) {
+-            op_errno = ob_fd->op_errno;
+-            goto unlock;
+-        }
++    ob_inode = frame->local;
++    frame->local = NULL;
+ 
+-        list_add_tail(&stub->list, &ob_fd->list);
+-    }
+-unlock:
+-    UNLOCK(&fd->lock);
++    ob_open_completed(xl, ob_inode, cookie, op_ret, op_errno);
+ 
+-nofd:
+-    if (op_errno)
+-        call_unwind_error(stub, -1, op_errno);
+-    else if (ob_fd)
+-        ob_fd_wake(this, fd, NULL);
+-    else
+-        call_resume(stub);
++    STACK_DESTROY(frame->root);
+ 
+     return 0;
+ }
+ 
+-int
+-ob_open_behind(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
++static int32_t
++ob_open_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+                fd_t *fd, dict_t *xdata)
+ {
+-    ob_fd_t *ob_fd = NULL;
+-    int ret = -1;
+-    ob_conf_t *conf = NULL;
+-    ob_inode_t *ob_inode = NULL;
+-    gf_boolean_t open_in_progress = _gf_false;
+-    int unlinked = 0;
+-
+-    conf = this->private;
+-
+-    if (flags & O_TRUNC) {
+-        STACK_WIND(frame, default_open_cbk, FIRST_CHILD(this),
+-                   FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
+-        return 0;
+-    }
+-
+-    ob_inode = ob_inode_get(this, fd->inode);
+-
+-    ob_fd = ob_fd_new();
+-    if (!ob_fd)
+-        goto enomem;
+-
+-    ob_fd->ob_inode = ob_inode;
+-
+-    ob_fd->fd = fd;
+-
+-    ob_fd->open_frame = copy_frame(frame);
+-    if (!ob_fd->open_frame)
+-        goto enomem;
+-    ret = loc_copy(&ob_fd->loc, loc);
+-    if (ret)
+-        goto enomem;
+-
+-    ob_fd->flags = flags;
+-    if (xdata)
+-        ob_fd->xdata = dict_ref(xdata);
+-
+-    LOCK(&fd->inode->lock);
+-    {
+-        open_in_progress = ob_inode->open_in_progress;
+-        unlinked = ob_inode->unlinked;
+-        if (!open_in_progress && !unlinked) {
+-            ret = ob_fd_ctx_set(this, fd, ob_fd);
+-            if (ret) {
+-                UNLOCK(&fd->inode->lock);
+-                goto enomem;
+-            }
+-
+-            list_add(&ob_fd->ob_fds_on_inode, &ob_inode->ob_fds);
+-        }
+-    }
+-    UNLOCK(&fd->inode->lock);
+-
+-    /* We take a reference while the background open is pending or being
+-     * processed. If we finally wind the request in the foreground, then
+-     * ob_fd_free() will take care of this additional reference. */
+-    fd_ref(fd);
+-
+-    if (!open_in_progress && !unlinked) {
+-        STACK_UNWIND_STRICT(open, frame, 0, 0, fd, xdata);
+-
+-        if (!conf->lazy_open)
+-            ob_fd_wake(this, fd, NULL);
+-    } else {
+-        ob_fd_free(ob_fd);
+-        STACK_WIND(frame, default_open_cbk, FIRST_CHILD(this),
+-                   FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
+-    }
++    STACK_WIND_COOKIE(frame, ob_open_cbk, fd, FIRST_CHILD(this),
++                      FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
+ 
+     return 0;
+-enomem:
+-    if (ob_fd) {
+-        if (ob_fd->open_frame)
+-            STACK_DESTROY(ob_fd->open_frame->root);
+-
+-        loc_wipe(&ob_fd->loc);
+-        if (ob_fd->xdata)
+-            dict_unref(ob_fd->xdata);
+-
+-        GF_FREE(ob_fd);
+-    }
+-
+-    return -1;
+ }
+ 
+-int
++static int32_t
+ ob_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, fd_t *fd,
+         dict_t *xdata)
+ {
+-    fd_t *old_fd = NULL;
+-    int ret = -1;
+-    int op_errno = ENOMEM;
+-    call_stub_t *stub = NULL;
+-
+-    old_fd = fd_lookup(fd->inode, 0);
+-    if (old_fd) {
+-        /* open-behind only when this is the first FD */
+-        stub = fop_open_stub(frame, default_open_resume, loc, flags, fd, xdata);
+-        if (!stub) {
+-            fd_unref(old_fd);
+-            goto err;
+-        }
+-
+-        open_and_resume(this, old_fd, stub);
++    ob_inode_t *ob_inode;
++    call_frame_t *open_frame;
++    call_stub_t *stub;
++    fd_t *first_fd;
++    ob_state_t state;
++
++    state = ob_open_behind(this, fd, flags, &ob_inode, &first_fd);
++    if (state == OB_STATE_READY) {
++        /* There's no pending open, but there are other file descriptors opened
++         * or the current flags require a synchronous open. */
++        return default_open(frame, this, loc, flags, fd, xdata);
++    }
+ 
+-        fd_unref(old_fd);
++    if (state == OB_STATE_OPEN_TRIGGERED) {
++        /* The first open is in progress (either because it was already issued
++         * or because this request triggered it). We try to create a new stub
++         * to retry the operation once the initial open completes. */
++        stub = fop_open_stub(frame, ob_open, loc, flags, fd, xdata);
++        if (stub != NULL) {
++            return ob_stub_dispatch(this, ob_inode, first_fd, stub);
++        }
+ 
+-        return 0;
++        state = -ENOMEM;
+     }
+ 
+-    ret = ob_open_behind(frame, this, loc, flags, fd, xdata);
+-    if (ret) {
+-        goto err;
+-    }
++    if (state == OB_STATE_FIRST_OPEN) {
++        /* We try to create a stub for the new open. A new frame needs to be
++         * used because the current one may be destroyed soon after sending
++         * the open's reply. */
++        open_frame = copy_frame(frame);
++        if (open_frame != NULL) {
++            stub = fop_open_stub(open_frame, ob_open_resume, loc, flags, fd,
++                                 xdata);
++            if (stub != NULL) {
++                open_frame->local = ob_inode;
+ 
+-    return 0;
+-err:
+-    gf_msg(this->name, GF_LOG_ERROR, op_errno, OPEN_BEHIND_MSG_NO_MEMORY, "%s",
+-           loc->path);
++                /* TODO: Previous version passed xdata back to the caller, but
++                 *       probably this doesn't make sense since it won't contain
++                 *       any requested data. I think it would be better to pass
++                 *       NULL for xdata. */
++                default_open_cbk(frame, NULL, this, 0, 0, fd, xdata);
+ 
+-    STACK_UNWIND_STRICT(open, frame, -1, op_errno, 0, 0);
++                return ob_open_dispatch(this, ob_inode, first_fd, stub);
++            }
+ 
+-    return 0;
+-}
++            STACK_DESTROY(open_frame->root);
++        }
+ 
+-fd_t *
+-ob_get_wind_fd(xlator_t *this, fd_t *fd, uint32_t *flag)
+-{
+-    fd_t *wind_fd = NULL;
+-    ob_fd_t *ob_fd = NULL;
+-    ob_conf_t *conf = NULL;
++        /* In case of error, simulate a regular completion but with an error
++         * code. */
++        ob_open_completed(this, ob_inode, first_fd, -1, ENOMEM);
+ 
+-    conf = this->private;
++        state = -ENOMEM;
++    }
+ 
+-    ob_fd = ob_fd_ctx_get(this, fd);
++    /* In case of failure we need to decrement the number of open files because
++     * ob_fdclose() won't be called. */
+ 
+-    if (ob_fd && ob_fd->open_frame && conf->use_anonymous_fd) {
+-        wind_fd = fd_anonymous(fd->inode);
+-        if ((ob_fd->flags & O_DIRECT) && (flag))
+-            *flag = *flag | O_DIRECT;
+-    } else {
+-        wind_fd = fd_ref(fd);
++    LOCK(&fd->inode->lock);
++    {
++        ob_inode->open_count--;
+     }
++    UNLOCK(&fd->inode->lock);
+ 
+-    return wind_fd;
++    gf_smsg(this->name, GF_LOG_ERROR, -state, OPEN_BEHIND_MSG_FAILED, "fop=%s",
++            "open", "path=%s", loc->path, NULL);
++
++    return default_open_failure_cbk(frame, -state);
+ }
+ 
+-int
++static int32_t
+ ob_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+          off_t offset, uint32_t flags, dict_t *xdata)
+ {
+-    call_stub_t *stub = NULL;
+-    fd_t *wind_fd = NULL;
+-    ob_conf_t *conf = NULL;
++    ob_conf_t *conf = this->private;
++    bool trigger = conf->read_after_open || !conf->use_anonymous_fd;
+ 
+-    conf = this->private;
+-
+-    if (!conf->read_after_open)
+-        wind_fd = ob_get_wind_fd(this, fd, &flags);
+-    else
+-        wind_fd = fd_ref(fd);
+-
+-    stub = fop_readv_stub(frame, default_readv_resume, wind_fd, size, offset,
+-                          flags, xdata);
+-    fd_unref(wind_fd);
+-
+-    if (!stub)
+-        goto err;
+-
+-    open_and_resume(this, wind_fd, stub);
+-
+-    return 0;
+-err:
+-    STACK_UNWIND_STRICT(readv, frame, -1, ENOMEM, 0, 0, 0, 0, 0);
++    OB_POST_FD(readv, this, frame, fd, trigger, fd, size, offset, flags, xdata);
+ 
+     return 0;
+ }
+ 
+-int
++static int32_t
+ ob_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *iov,
+           int count, off_t offset, uint32_t flags, struct iobref *iobref,
+           dict_t *xdata)
+ {
+-    call_stub_t *stub = NULL;
+-
+-    stub = fop_writev_stub(frame, default_writev_resume, fd, iov, count, offset,
+-                           flags, iobref, xdata);
+-    if (!stub)
+-        goto err;
+-
+-    open_and_resume(this, fd, stub);
+-
+-    return 0;
+-err:
+-    STACK_UNWIND_STRICT(writev, frame, -1, ENOMEM, 0, 0, 0);
++    OB_POST_FD(writev, this, frame, fd, true, fd, iov, count, offset, flags,
++               iobref, xdata);
+ 
+     return 0;
+ }
+ 
+-int
++static int32_t
+ ob_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+ {
+-    call_stub_t *stub = NULL;
+-    fd_t *wind_fd = NULL;
+-
+-    wind_fd = ob_get_wind_fd(this, fd, NULL);
+-
+-    stub = fop_fstat_stub(frame, default_fstat_resume, wind_fd, xdata);
++    ob_conf_t *conf = this->private;
++    bool trigger = !conf->use_anonymous_fd;
+ 
+-    fd_unref(wind_fd);
+-
+-    if (!stub)
+-        goto err;
+-
+-    open_and_resume(this, wind_fd, stub);
+-
+-    return 0;
+-err:
+-    STACK_UNWIND_STRICT(fstat, frame, -1, ENOMEM, 0, 0);
++    OB_POST_FD(fstat, this, frame, fd, trigger, fd, xdata);
+ 
+     return 0;
+ }
+ 
+-int
++static int32_t
+ ob_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+         gf_seek_what_t what, dict_t *xdata)
+ {
+-    call_stub_t *stub = NULL;
+-    fd_t *wind_fd = NULL;
+-
+-    wind_fd = ob_get_wind_fd(this, fd, NULL);
++    ob_conf_t *conf = this->private;
++    bool trigger = !conf->use_anonymous_fd;
+ 
+-    stub = fop_seek_stub(frame, default_seek_resume, wind_fd, offset, what,
+-                         xdata);
+-
+-    fd_unref(wind_fd);
+-
+-    if (!stub)
+-        goto err;
+-
+-    open_and_resume(this, wind_fd, stub);
+-
+-    return 0;
+-err:
+-    STACK_UNWIND_STRICT(fstat, frame, -1, ENOMEM, 0, 0);
++    OB_POST_FD(seek, this, frame, fd, trigger, fd, offset, what, xdata);
+ 
+     return 0;
+ }
+ 
+-int
++static int32_t
+ ob_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+ {
+-    call_stub_t *stub = NULL;
+-    ob_fd_t *ob_fd = NULL;
+-    gf_boolean_t unwind = _gf_false;
+-
+-    LOCK(&fd->lock);
+-    {
+-        ob_fd = __ob_fd_ctx_get(this, fd);
+-        if (ob_fd && ob_fd->open_frame)
+-            /* if open() was never wound to backend,
+-               no need to wind flush() either.
+-            */
+-            unwind = _gf_true;
+-    }
+-    UNLOCK(&fd->lock);
+-
+-    if (unwind)
+-        goto unwind;
+-
+-    stub = fop_flush_stub(frame, default_flush_resume, fd, xdata);
+-    if (!stub)
+-        goto err;
+-
+-    open_and_resume(this, fd, stub);
+-
+-    return 0;
+-err:
+-    STACK_UNWIND_STRICT(flush, frame, -1, ENOMEM, 0);
+-
+-    return 0;
+-
+-unwind:
+-    STACK_UNWIND_STRICT(flush, frame, 0, 0, 0);
++    OB_POST_FLUSH(this, frame, fd, fd, xdata);
+ 
+     return 0;
+ }
+ 
+-int
++static int32_t
+ ob_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int flag, dict_t *xdata)
+ {
+-    call_stub_t *stub = NULL;
+-
+-    stub = fop_fsync_stub(frame, default_fsync_resume, fd, flag, xdata);
+-    if (!stub)
+-        goto err;
+-
+-    open_and_resume(this, fd, stub);
+-
+-    return 0;
+-err:
+-    STACK_UNWIND_STRICT(fsync, frame, -1, ENOMEM, 0, 0, 0);
++    OB_POST_FD(fsync, this, frame, fd, true, fd, flag, xdata);
+ 
+     return 0;
+ }
+ 
+-int
++static int32_t
+ ob_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int cmd,
+       struct gf_flock *flock, dict_t *xdata)
+ {
+-    call_stub_t *stub = NULL;
+-
+-    stub = fop_lk_stub(frame, default_lk_resume, fd, cmd, flock, xdata);
+-    if (!stub)
+-        goto err;
+-
+-    open_and_resume(this, fd, stub);
+-
+-    return 0;
+-err:
+-    STACK_UNWIND_STRICT(lk, frame, -1, ENOMEM, 0, 0);
++    OB_POST_FD(lk, this, frame, fd, true, fd, cmd, flock, xdata);
+ 
+     return 0;
+ }
+ 
+-int
++static int32_t
+ ob_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+              dict_t *xdata)
+ {
+-    call_stub_t *stub = NULL;
+-
+-    stub = fop_ftruncate_stub(frame, default_ftruncate_resume, fd, offset,
+-                              xdata);
+-    if (!stub)
+-        goto err;
+-
+-    open_and_resume(this, fd, stub);
+-
+-    return 0;
+-err:
+-    STACK_UNWIND_STRICT(ftruncate, frame, -1, ENOMEM, 0, 0, 0);
++    OB_POST_FD(ftruncate, this, frame, fd, true, fd, offset, xdata);
+ 
+     return 0;
+ }
+ 
+-int
++static int32_t
+ ob_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xattr,
+              int flags, dict_t *xdata)
+ {
+-    call_stub_t *stub = NULL;
+-
+-    stub = fop_fsetxattr_stub(frame, default_fsetxattr_resume, fd, xattr, flags,
+-                              xdata);
+-    if (!stub)
+-        goto err;
+-
+-    open_and_resume(this, fd, stub);
+-
+-    return 0;
+-err:
+-    STACK_UNWIND_STRICT(fsetxattr, frame, -1, ENOMEM, 0);
++    OB_POST_FD(fsetxattr, this, frame, fd, true, fd, xattr, flags, xdata);
+ 
+     return 0;
+ }
+ 
+-int
++static int32_t
+ ob_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
+              dict_t *xdata)
+ {
+-    call_stub_t *stub = NULL;
+-
+-    stub = fop_fgetxattr_stub(frame, default_fgetxattr_resume, fd, name, xdata);
+-    if (!stub)
+-        goto err;
+-
+-    open_and_resume(this, fd, stub);
+-
+-    return 0;
+-err:
+-    STACK_UNWIND_STRICT(fgetxattr, frame, -1, ENOMEM, 0, 0);
++    OB_POST_FD(fgetxattr, this, frame, fd, true, fd, name, xdata);
+ 
+     return 0;
+ }
+ 
+-int
++static int32_t
+ ob_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
+                 dict_t *xdata)
+ {
+-    call_stub_t *stub = NULL;
+-
+-    stub = fop_fremovexattr_stub(frame, default_fremovexattr_resume, fd, name,
+-                                 xdata);
+-    if (!stub)
+-        goto err;
+-
+-    open_and_resume(this, fd, stub);
+-
+-    return 0;
+-err:
+-    STACK_UNWIND_STRICT(fremovexattr, frame, -1, ENOMEM, 0);
++    OB_POST_FD(fremovexattr, this, frame, fd, true, fd, name, xdata);
+ 
+     return 0;
+ }
+ 
+-int
++static int32_t
+ ob_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+             int cmd, struct gf_flock *flock, dict_t *xdata)
+ {
+-    call_stub_t *stub = fop_finodelk_stub(frame, default_finodelk_resume,
+-                                          volume, fd, cmd, flock, xdata);
+-    if (stub)
+-        open_and_resume(this, fd, stub);
+-    else
+-        STACK_UNWIND_STRICT(finodelk, frame, -1, ENOMEM, 0);
++    OB_POST_FD(finodelk, this, frame, fd, true, volume, fd, cmd, flock, xdata);
+ 
+     return 0;
+ }
+ 
+-int
++static int32_t
+ ob_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+             const char *basename, entrylk_cmd cmd, entrylk_type type,
+             dict_t *xdata)
+ {
+-    call_stub_t *stub = fop_fentrylk_stub(
+-        frame, default_fentrylk_resume, volume, fd, basename, cmd, type, xdata);
+-    if (stub)
+-        open_and_resume(this, fd, stub);
+-    else
+-        STACK_UNWIND_STRICT(fentrylk, frame, -1, ENOMEM, 0);
++    OB_POST_FD(fentrylk, this, frame, fd, true, volume, fd, basename, cmd, type,
++               xdata);
+ 
+     return 0;
+ }
+ 
+-int
++static int32_t
+ ob_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd,
+             gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+ {
+-    call_stub_t *stub = fop_fxattrop_stub(frame, default_fxattrop_resume, fd,
+-                                          optype, xattr, xdata);
+-    if (stub)
+-        open_and_resume(this, fd, stub);
+-    else
+-        STACK_UNWIND_STRICT(fxattrop, frame, -1, ENOMEM, 0, 0);
++    OB_POST_FD(fxattrop, this, frame, fd, true, fd, optype, xattr, xdata);
+ 
+     return 0;
+ }
+ 
+-int
++static int32_t
+ ob_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *iatt,
+             int valid, dict_t *xdata)
+ {
+-    call_stub_t *stub = NULL;
+-
+-    stub = fop_fsetattr_stub(frame, default_fsetattr_resume, fd, iatt, valid,
+-                             xdata);
+-    if (!stub)
+-        goto err;
+-
+-    open_and_resume(this, fd, stub);
+-
+-    return 0;
+-err:
+-    STACK_UNWIND_STRICT(fsetattr, frame, -1, ENOMEM, 0, 0, 0);
++    OB_POST_FD(fsetattr, this, frame, fd, true, fd, iatt, valid, xdata);
+ 
+     return 0;
+ }
+ 
+-int
++static int32_t
+ ob_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+              off_t offset, size_t len, dict_t *xdata)
+ {
+-    call_stub_t *stub;
+-
+-    stub = fop_fallocate_stub(frame, default_fallocate_resume, fd, mode, offset,
+-                              len, xdata);
+-    if (!stub)
+-        goto err;
+-
+-    open_and_resume(this, fd, stub);
++    OB_POST_FD(fallocate, this, frame, fd, true, fd, mode, offset, len, xdata);
+ 
+     return 0;
+-err:
+-    STACK_UNWIND_STRICT(fallocate, frame, -1, ENOMEM, NULL, NULL, NULL);
+-    return 0;
+ }
+ 
+-int
++static int32_t
+ ob_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+            size_t len, dict_t *xdata)
+ {
+-    call_stub_t *stub;
+-
+-    stub = fop_discard_stub(frame, default_discard_resume, fd, offset, len,
+-                            xdata);
+-    if (!stub)
+-        goto err;
+-
+-    open_and_resume(this, fd, stub);
++    OB_POST_FD(discard, this, frame, fd, true, fd, offset, len, xdata);
+ 
+     return 0;
+-err:
+-    STACK_UNWIND_STRICT(discard, frame, -1, ENOMEM, NULL, NULL, NULL);
+-    return 0;
+ }
+ 
+-int
++static int32_t
+ ob_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+             off_t len, dict_t *xdata)
+ {
+-    call_stub_t *stub;
+-
+-    stub = fop_zerofill_stub(frame, default_zerofill_resume, fd, offset, len,
+-                             xdata);
+-    if (!stub)
+-        goto err;
++    OB_POST_FD(zerofill, this, frame, fd, true, fd, offset, len, xdata);
+ 
+-    open_and_resume(this, fd, stub);
+-
+-    return 0;
+-err:
+-    STACK_UNWIND_STRICT(zerofill, frame, -1, ENOMEM, NULL, NULL, NULL);
+     return 0;
+ }
+ 
+-int
++static int32_t
+ ob_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags,
+           dict_t *xdata)
+ {
+-    call_stub_t *stub = NULL;
+-
+-    stub = fop_unlink_stub(frame, default_unlink_resume, loc, xflags, xdata);
+-    if (!stub)
+-        goto err;
+-
+-    open_all_pending_fds_and_resume(this, loc->inode, stub);
+-
+-    return 0;
+-err:
+-    STACK_UNWIND_STRICT(unlink, frame, -1, ENOMEM, 0, 0, 0);
++    OB_POST_INODE(unlink, this, frame, loc->inode, true, loc, xflags, xdata);
+ 
+     return 0;
+ }
+ 
+-int
++static int32_t
+ ob_rename(call_frame_t *frame, xlator_t *this, loc_t *src, loc_t *dst,
+           dict_t *xdata)
+ {
+-    call_stub_t *stub = NULL;
+-
+-    stub = fop_rename_stub(frame, default_rename_resume, src, dst, xdata);
+-    if (!stub)
+-        goto err;
+-
+-    open_all_pending_fds_and_resume(this, dst->inode, stub);
+-
+-    return 0;
+-err:
+-    STACK_UNWIND_STRICT(rename, frame, -1, ENOMEM, 0, 0, 0, 0, 0, 0);
++    OB_POST_INODE(rename, this, frame, dst->inode, true, src, dst, xdata);
+ 
+     return 0;
+ }
+ 
+-int32_t
++static int32_t
+ ob_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf,
+            int32_t valid, dict_t *xdata)
+ {
+-    call_stub_t *stub = NULL;
+-
+-    stub = fop_setattr_stub(frame, default_setattr_resume, loc, stbuf, valid,
+-                            xdata);
+-    if (!stub)
+-        goto err;
++    OB_POST_INODE(setattr, this, frame, loc->inode, true, loc, stbuf, valid,
++                  xdata);
+ 
+-    open_all_pending_fds_and_resume(this, loc->inode, stub);
+-
+-    return 0;
+-err:
+-    STACK_UNWIND_STRICT(setattr, frame, -1, ENOMEM, NULL, NULL, NULL);
+     return 0;
+ }
+ 
+-int32_t
++static int32_t
+ ob_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+             int32_t flags, dict_t *xdata)
+ {
+-    call_stub_t *stub = NULL;
+-    gf_boolean_t access_xattr = _gf_false;
+-
+     if (dict_get(dict, POSIX_ACL_DEFAULT_XATTR) ||
+         dict_get(dict, POSIX_ACL_ACCESS_XATTR) ||
+-        dict_get(dict, GF_SELINUX_XATTR_KEY))
+-        access_xattr = _gf_true;
+-
+-    if (!access_xattr)
++        dict_get(dict, GF_SELINUX_XATTR_KEY)) {
+         return default_setxattr(frame, this, loc, dict, flags, xdata);
++    }
+ 
+-    stub = fop_setxattr_stub(frame, default_setxattr_resume, loc, dict, flags,
+-                             xdata);
+-    if (!stub)
+-        goto err;
+-
+-    open_all_pending_fds_and_resume(this, loc->inode, stub);
++    OB_POST_INODE(setxattr, this, frame, loc->inode, true, loc, dict, flags,
++                  xdata);
+ 
+     return 0;
+-err:
+-    STACK_UNWIND_STRICT(setxattr, frame, -1, ENOMEM, NULL);
+-    return 0;
+ }
+ 
+-int
+-ob_release(xlator_t *this, fd_t *fd)
++static void
++ob_fdclose(xlator_t *this, fd_t *fd)
+ {
+-    ob_fd_t *ob_fd = NULL;
++    struct list_head list;
++    ob_inode_t *ob_inode;
++    call_stub_t *stub;
++
++    INIT_LIST_HEAD(&list);
++    stub = NULL;
+ 
+-    ob_fd = ob_fd_ctx_get(this, fd);
++    LOCK(&fd->inode->lock);
++    {
++        ob_inode = ob_inode_get_locked(this, fd->inode);
++        if (ob_inode != NULL) {
++            ob_inode->open_count--;
++
++            /* If this fd is the same as ob_inode->first_fd, it means that
++             * the initial open has not fully completed. We'll try to cancel
++             * it. */
++            if (ob_inode->first_fd == fd) {
++                if (ob_inode->first_open == OB_OPEN_PREPARING) {
++                    /* In this case ob_open_dispatch() has not been called yet.
++                     * We clear first_fd and first_open to allow that function
++                     * to know that the open is not really needed. This also
++                     * allows other requests to work as expected if they
++                     * arrive before the dispatch function is called. If there
++                     * are pending fops, we can directly process them here.
++                     * (note that there shouldn't be any fd related fops, but
++                     * if there are, it's fine if they fail). */
++                    ob_inode->first_fd = NULL;
++                    ob_inode->first_open = NULL;
++                    ob_inode->triggered = false;
++                    list_splice_init(&ob_inode->resume_fops, &list);
++                } else if (!ob_inode->triggered) {
++                    /* If the open has already been dispatched, we can only
++                     * cancel it if it has not been triggered. Otherwise we
++                     * simply wait until it completes. While it's not triggered,
++                     * first_open must be a valid stub and there can't be any
++                     * pending fops. */
++                    GF_ASSERT((ob_inode->first_open != NULL) &&
++                              list_empty(&ob_inode->resume_fops));
++
++                    ob_inode->first_fd = NULL;
++                    stub = ob_inode->first_open;
++                    ob_inode->first_open = NULL;
++                }
++            }
++        }
++    }
++    UNLOCK(&fd->inode->lock);
+ 
+-    ob_fd_free(ob_fd);
++    if (stub != NULL) {
++        call_stub_destroy(stub);
++        fd_unref(fd);
++    }
+ 
+-    return 0;
++    ob_resume_pending(&list);
+ }
+ 
+ int
+ ob_forget(xlator_t *this, inode_t *inode)
+ {
+-    ob_inode_t *ob_inode = NULL;
++    ob_inode_t *ob_inode;
+     uint64_t value = 0;
+ 
+-    inode_ctx_del(inode, this, &value);
+-
+-    if (value) {
++    if ((inode_ctx_del(inode, this, &value) == 0) && (value != 0)) {
+         ob_inode = (ob_inode_t *)(uintptr_t)value;
+-        ob_inode_free(ob_inode);
++        GF_FREE(ob_inode);
+     }
+ 
+     return 0;
+@@ -1153,20 +823,18 @@ ob_priv_dump(xlator_t *this)
+ int
+ ob_fdctx_dump(xlator_t *this, fd_t *fd)
+ {
+-    ob_fd_t *ob_fd = NULL;
+     char key_prefix[GF_DUMP_MAX_BUF_LEN] = {
+         0,
+     };
+-    int ret = 0;
++    uint64_t value = 0;
++    int ret = 0, error = 0;
+ 
+     ret = TRY_LOCK(&fd->lock);
+     if (ret)
+         return 0;
+ 
+-    ob_fd = __ob_fd_ctx_get(this, fd);
+-    if (!ob_fd) {
+-        UNLOCK(&fd->lock);
+-        return 0;
++    if ((__fd_ctx_get(fd, this, &value) == 0) && (value != 0)) {
++        error = (int32_t)value;
+     }
+ 
+     gf_proc_dump_build_key(key_prefix, "xlator.performance.open-behind",
+@@ -1175,17 +843,7 @@ ob_fdctx_dump(xlator_t *this, fd_t *fd)
+ 
+     gf_proc_dump_write("fd", "%p", fd);
+ 
+-    gf_proc_dump_write("open_frame", "%p", ob_fd->open_frame);
+-
+-    if (ob_fd->open_frame)
+-        gf_proc_dump_write("open_frame.root.unique", "%" PRIu64,
+-                           ob_fd->open_frame->root->unique);
+-
+-    gf_proc_dump_write("loc.path", "%s", ob_fd->loc.path);
+-
+-    gf_proc_dump_write("loc.ino", "%s", uuid_utoa(ob_fd->loc.gfid));
+-
+-    gf_proc_dump_write("flags", "%d", ob_fd->flags);
++    gf_proc_dump_write("error", "%d", error);
+ 
+     UNLOCK(&fd->lock);
+ 
+@@ -1307,7 +965,7 @@ struct xlator_fops fops = {
+ };
+ 
+ struct xlator_cbks cbks = {
+-    .release = ob_release,
++    .fdclose = ob_fdclose,
+     .forget = ob_forget,
+ };
+ 
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0524-open-behind-fix-call_frame-leak.patch b/SOURCES/0524-open-behind-fix-call_frame-leak.patch
new file mode 100644
index 0000000..75a243d
--- /dev/null
+++ b/SOURCES/0524-open-behind-fix-call_frame-leak.patch
@@ -0,0 +1,70 @@
+From 36dddf59a02d91d3db5b124be626ab6bc235ed5a Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <xhernandez@redhat.com>
+Date: Wed, 19 Aug 2020 23:27:38 +0200
+Subject: [PATCH 524/526] open-behind: fix call_frame leak
+
+When an open was delayed, a copy of the frame was created because the
+current frame was used to unwind the "fake" open. When the open was
+actually sent, the frame was correctly destroyed. However if the file
+was closed before needing to send the open, the frame was not destroyed.
+
+This patch correctly destroys the frame in all cases.
+
+Upstream patch:
+> Upstream-patch-link: https://review.gluster.org/#/c/glusterfs/+/24892
+> Change-Id: I8c00fc7f15545c240e8151305d9e4cf06d653926
+> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+> Fixes: #1440
+
+BUG: 1830713
+Change-Id: I8c00fc7f15545c240e8151305d9e4cf06d653926
+Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/224488
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/performance/open-behind/src/open-behind.c | 14 ++++++++++----
+ 1 file changed, 10 insertions(+), 4 deletions(-)
+
+diff --git a/xlators/performance/open-behind/src/open-behind.c b/xlators/performance/open-behind/src/open-behind.c
+index e43fe73..1ab635e 100644
+--- a/xlators/performance/open-behind/src/open-behind.c
++++ b/xlators/performance/open-behind/src/open-behind.c
+@@ -333,6 +333,14 @@ ob_stub_dispatch(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd,
+     return 0;
+ }
+ 
++static void
++ob_open_destroy(call_stub_t *stub, fd_t *fd)
++{
++    STACK_DESTROY(stub->frame->root);
++    call_stub_destroy(stub);
++    fd_unref(fd);
++}
++
+ static int32_t
+ ob_open_dispatch(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd,
+                  call_stub_t *stub)
+@@ -355,8 +363,7 @@ ob_open_dispatch(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd,
+ 
+     if (stub != NULL) {
+         if (closed) {
+-            call_stub_destroy(stub);
+-            fd_unref(fd);
++            ob_open_destroy(stub, fd);
+         } else {
+             call_resume(stub);
+         }
+@@ -776,8 +783,7 @@ ob_fdclose(xlator_t *this, fd_t *fd)
+     UNLOCK(&fd->inode->lock);
+ 
+     if (stub != NULL) {
+-        call_stub_destroy(stub);
+-        fd_unref(fd);
++        ob_open_destroy(stub, fd);
+     }
+ 
+     ob_resume_pending(&list);
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0525-open-behind-implement-create-fop.patch b/SOURCES/0525-open-behind-implement-create-fop.patch
new file mode 100644
index 0000000..c7a5329
--- /dev/null
+++ b/SOURCES/0525-open-behind-implement-create-fop.patch
@@ -0,0 +1,109 @@
+From 41aae052b5e3afe64d3e0668643726bab0e77265 Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <xhernandez@redhat.com>
+Date: Fri, 4 Sep 2020 14:49:50 +0200
+Subject: [PATCH 525/526] open-behind: implement create fop
+
+Open behind didn't implement create fop. This caused that files created
+were not accounted for the number of open fd's. This could cause future
+opens to be delayed when they shouldn't.
+
+This patch implements the create fop. It also fixes a problem when
+destroying the stack: when frame->local was not NULL, STACK_DESTROY()
+tried to mem_put() it, which is not correct.
+
+Upstream patch:
+> Upstream-patch-link: https://review.gluster.org/#/c/glusterfs/+/24953
+> Fixes: #1440
+> Change-Id: Ic982bad07d4af30b915d7eb1fbcef7a847a45869
+> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+
+BUG: 1830713
+Change-Id: Ic982bad07d4af30b915d7eb1fbcef7a847a45869
+Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/224489
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/performance/open-behind/src/open-behind.c | 52 +++++++++++++++++++++++
+ 1 file changed, 52 insertions(+)
+
+diff --git a/xlators/performance/open-behind/src/open-behind.c b/xlators/performance/open-behind/src/open-behind.c
+index 1ab635e..600c3b6 100644
+--- a/xlators/performance/open-behind/src/open-behind.c
++++ b/xlators/performance/open-behind/src/open-behind.c
+@@ -336,6 +336,7 @@ ob_stub_dispatch(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd,
+ static void
+ ob_open_destroy(call_stub_t *stub, fd_t *fd)
+ {
++    stub->frame->local = NULL;
+     STACK_DESTROY(stub->frame->root);
+     call_stub_destroy(stub);
+     fd_unref(fd);
+@@ -516,6 +517,56 @@ ob_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, fd_t *fd,
+ }
+ 
+ static int32_t
++ob_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
++          mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
++{
++    ob_inode_t *ob_inode;
++    call_stub_t *stub;
++    fd_t *first_fd;
++    ob_state_t state;
++
++    /* Create requests are never delayed. We always send them synchronously. */
++    state = ob_open_and_resume_fd(this, fd, 1, true, true, &ob_inode,
++                                  &first_fd);
++    if (state == OB_STATE_READY) {
++        /* There's no pending open, but there are other file descriptors opened
++         * so we simply forward the request synchronously. */
++        return default_create(frame, this, loc, flags, mode, umask, fd, xdata);
++    }
++
++    if (state == OB_STATE_OPEN_TRIGGERED) {
++        /* The first open is in progress (either because it was already issued
++         * or because this request triggered it). We try to create a new stub
++         * to retry the operation once the initial open completes. */
++        stub = fop_create_stub(frame, ob_create, loc, flags, mode, umask, fd,
++                               xdata);
++        if (stub != NULL) {
++            return ob_stub_dispatch(this, ob_inode, first_fd, stub);
++        }
++
++        state = -ENOMEM;
++    }
++
++    /* Since we forced a synchronous request, OB_STATE_FIRST_OPEN will never
++     * be returned by ob_open_and_resume_fd(). If we are here it can only be
++     * because there has been a problem. */
++
++    /* In case of failure we need to decrement the number of open files because
++     * ob_fdclose() won't be called. */
++
++    LOCK(&fd->inode->lock);
++    {
++        ob_inode->open_count--;
++    }
++    UNLOCK(&fd->inode->lock);
++
++    gf_smsg(this->name, GF_LOG_ERROR, -state, OPEN_BEHIND_MSG_FAILED, "fop=%s",
++            "create", "path=%s", loc->path, NULL);
++
++    return default_create_failure_cbk(frame, -state);
++}
++
++static int32_t
+ ob_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+          off_t offset, uint32_t flags, dict_t *xdata)
+ {
+@@ -946,6 +997,7 @@ fini(xlator_t *this)
+ 
+ struct xlator_fops fops = {
+     .open = ob_open,
++    .create = ob_create,
+     .readv = ob_readv,
+     .writev = ob_writev,
+     .flush = ob_flush,
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0526-Quota-quota_fsck.py-converting-byte-string-to-string.patch b/SOURCES/0526-Quota-quota_fsck.py-converting-byte-string-to-string.patch
new file mode 100644
index 0000000..fb74fd8
--- /dev/null
+++ b/SOURCES/0526-Quota-quota_fsck.py-converting-byte-string-to-string.patch
@@ -0,0 +1,44 @@
+From baeca3c9b70548463ceea0ae27e6f98cf06e96b7 Mon Sep 17 00:00:00 2001
+From: srijan-sivakumar <ssivakum@redhat.com>
+Date: Tue, 28 Jul 2020 22:27:34 +0530
+Subject: [PATCH 526/526] Quota quota_fsck.py, converting byte string to string
+
+Issue: The quota_fsck.py script throws an TypeError
+due to the fact that the data is read as bytes and then
+the string operations are applied on the. Now, in python3
+string is unicode and hence we get the type error.
+
+Code Changes:
+Decoding the bytes value into utf-8 format.
+
+>Change-Id: Ia1ff52a821d664a371c8166692ff506ae39f6e40
+>Signed-off-by: srijan-sivakumar <ssivakum@redhat.com>
+>Fixes: #1401
+Upstream patch: https://review.gluster.org/c/glusterfs/+/24785
+
+BUG: 1719171
+Change-Id: Ia1ff52a821d664a371c8166692ff506ae39f6e40
+Signed-off-by: srijan-sivakumar <ssivakum@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/224780
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Kshithij Iyer <kiyer@redhat.com>
+Reviewed-by: Rinku Kothiya <rkothiya@redhat.com>
+---
+ extras/quota/quota_fsck.py | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/extras/quota/quota_fsck.py b/extras/quota/quota_fsck.py
+index 174f2a2..ea8d638 100755
+--- a/extras/quota/quota_fsck.py
++++ b/extras/quota/quota_fsck.py
+@@ -157,6 +157,7 @@ def get_quota_xattr_brick(dpath):
+     xattr_dict['parents'] = {}
+ 
+     for xattr in pairs:
++        xattr = xattr.decode("utf-8")
+         xattr_key = xattr.split("=")[0]
+         if re.search("# file:", xattr_key):
+             # skip the file comment
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0527-Events-Socket-creation-after-getaddrinfo-and-IPv4-an.patch b/SOURCES/0527-Events-Socket-creation-after-getaddrinfo-and-IPv4-an.patch
new file mode 100644
index 0000000..133a24e
--- /dev/null
+++ b/SOURCES/0527-Events-Socket-creation-after-getaddrinfo-and-IPv4-an.patch
@@ -0,0 +1,200 @@
+From 4152c77defac24ace3b1b6b9cc81a4f614254e4f Mon Sep 17 00:00:00 2001
+From: srijan-sivakumar <ssivakum@redhat.com>
+Date: Sat, 18 Jul 2020 05:59:09 +0530
+Subject: [PATCH 527/532] Events: Socket creation after getaddrinfo and IPv4
+ and IPv6 packet capture
+
+Issue: Currently, the socket creation is done
+prior to getaddrinfo function being invoked. This
+can cause mismatch in the protocol and address
+families of the created socket and the result
+of the getaddrinfo api. Also, the glustereventsd
+UDP server by default only captures IPv4 packets
+hence IPv6 packets are not even captured.
+
+Code Changes:
+1. Modified the socket creation in such a way that
+the parameters taken in are dependent upon the
+result of the getaddrinfo function.
+2. Created a subclass for adding address family
+in glustereventsd.py for both AF_INET and AF_INET6.
+3. Modified addresses in the eventsapiconf.py.in
+
+Reasoning behind the approach:
+1. If we are using getaddrinfo function then
+socket creation should happen only after we
+check if we received back valid addresses.
+Hence socket creation should come after the call
+to getaddrinfo
+2. The listening server which pushes the events
+to the webhook has to listen for both IPv4
+and IPv6 messages as we would not be sure as to
+what address family is picked in _gf_event.
+
+>Fixes: #1377
+>Change-Id: I568dcd1a977c8832f0fef981e1f81cac7043c760
+>Signed-off-by: srijan-sivakumar <ssivakum@redhat.com>
+Upstream patch: https://review.gluster.org/c/glusterfs/+/24722
+
+BUG: 1814744
+Change-Id: I568dcd1a977c8832f0fef981e1f81cac7043c760
+Signed-off-by: srijan-sivakumar <ssivakum@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/225567
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
+---
+ events/src/eventsapiconf.py.in |  2 ++
+ events/src/glustereventsd.py   | 37 ++++++++++++++++++++++++++++++-------
+ libglusterfs/src/events.c      | 27 +++++++++++++++++++--------
+ 3 files changed, 51 insertions(+), 15 deletions(-)
+
+diff --git a/events/src/eventsapiconf.py.in b/events/src/eventsapiconf.py.in
+index 76b5954..700093b 100644
+--- a/events/src/eventsapiconf.py.in
++++ b/events/src/eventsapiconf.py.in
+@@ -28,6 +28,8 @@ def get_glusterd_workdir():
+     return glusterd_workdir
+ 
+ SERVER_ADDRESS = "0.0.0.0"
++SERVER_ADDRESSv4 = "0.0.0.0"
++SERVER_ADDRESSv6 = "::1"
+ DEFAULT_CONFIG_FILE = "@SYSCONF_DIR@/glusterfs/eventsconfig.json"
+ CUSTOM_CONFIG_FILE_TO_SYNC = "/events/config.json"
+ CUSTOM_CONFIG_FILE = get_glusterd_workdir() + CUSTOM_CONFIG_FILE_TO_SYNC
+diff --git a/events/src/glustereventsd.py b/events/src/glustereventsd.py
+index c4c7b65..341a3b6 100644
+--- a/events/src/glustereventsd.py
++++ b/events/src/glustereventsd.py
+@@ -13,6 +13,7 @@
+ from __future__ import print_function
+ import sys
+ import signal
++import threading
+ try:
+     import socketserver
+ except ImportError:
+@@ -23,10 +24,17 @@ from argparse import ArgumentParser, RawDescriptionHelpFormatter
+ from eventtypes import all_events
+ import handlers
+ import utils
+-from eventsapiconf import SERVER_ADDRESS, PID_FILE
++from eventsapiconf import SERVER_ADDRESSv4, SERVER_ADDRESSv6, PID_FILE
+ from eventsapiconf import AUTO_BOOL_ATTRIBUTES, AUTO_INT_ATTRIBUTES
+ from utils import logger, PidFile, PidFileLockFailed, boolify
+ 
++# Subclass so that specifically IPv4 packets are captured
++class UDPServerv4(socketserver.ThreadingUDPServer):
++    address_family = socket.AF_INET
++
++# Subclass so that specifically IPv6 packets are captured
++class UDPServerv6(socketserver.ThreadingUDPServer):
++    address_family = socket.AF_INET6
+ 
+ class GlusterEventsRequestHandler(socketserver.BaseRequestHandler):
+ 
+@@ -89,6 +97,10 @@ def signal_handler_sigusr2(sig, frame):
+     utils.restart_webhook_pool()
+ 
+ 
++def UDP_server_thread(sock):
++    sock.serve_forever()
++
++
+ def init_event_server():
+     utils.setup_logger()
+     utils.load_all()
+@@ -99,15 +111,26 @@ def init_event_server():
+         sys.stderr.write("Unable to get Port details from Config\n")
+         sys.exit(1)
+ 
+-    # Start the Eventing Server, UDP Server
++    # Creating the Eventing Server, UDP Server for IPv4 packets
++    try:
++        serverv4 = UDPServerv4((SERVER_ADDRESSv4, port),
++                   GlusterEventsRequestHandler)
++    except socket.error as e:
++        sys.stderr.write("Failed to start Eventsd for IPv4: {0}\n".format(e))
++        sys.exit(1)
++    # Creating the Eventing Server, UDP Server for IPv6 packets
+     try:
+-        server = socketserver.ThreadingUDPServer(
+-            (SERVER_ADDRESS, port),
+-            GlusterEventsRequestHandler)
++        serverv6 = UDPServerv6((SERVER_ADDRESSv6, port),
++                   GlusterEventsRequestHandler)
+     except socket.error as e:
+-        sys.stderr.write("Failed to start Eventsd: {0}\n".format(e))
++        sys.stderr.write("Failed to start Eventsd for IPv6: {0}\n".format(e))
+         sys.exit(1)
+-    server.serve_forever()
++    server_thread1 = threading.Thread(target=UDP_server_thread,
++                     args=(serverv4,))
++    server_thread2 = threading.Thread(target=UDP_server_thread,
++                     args=(serverv6,))
++    server_thread1.start()
++    server_thread2.start()
+ 
+ 
+ def get_args():
+diff --git a/libglusterfs/src/events.c b/libglusterfs/src/events.c
+index 6d1e383..4d720ca 100644
+--- a/libglusterfs/src/events.c
++++ b/libglusterfs/src/events.c
+@@ -40,6 +40,7 @@ _gf_event(eventtypes_t event, const char *fmt, ...)
+     char *host = NULL;
+     struct addrinfo hints;
+     struct addrinfo *result = NULL;
++    struct addrinfo *iter_result_ptr = NULL;
+     xlator_t *this = THIS;
+     char *volfile_server_transport = NULL;
+ 
+@@ -51,13 +52,6 @@ _gf_event(eventtypes_t event, const char *fmt, ...)
+         goto out;
+     }
+ 
+-    /* Initialize UDP socket */
+-    sock = socket(AF_INET, SOCK_DGRAM, 0);
+-    if (sock < 0) {
+-        ret = EVENT_ERROR_SOCKET;
+-        goto out;
+-    }
+-
+     if (ctx) {
+         volfile_server_transport = ctx->cmd_args.volfile_server_transport;
+     }
+@@ -66,7 +60,6 @@ _gf_event(eventtypes_t event, const char *fmt, ...)
+     }
+ 
+     /* host = NULL returns localhost */
+-    host = NULL;
+     if (ctx && ctx->cmd_args.volfile_server &&
+         (strcmp(volfile_server_transport, "unix"))) {
+         /* If it is client code then volfile_server is set
+@@ -84,6 +77,24 @@ _gf_event(eventtypes_t event, const char *fmt, ...)
+         goto out;
+     }
+ 
++    // iterate over the result and break when socket creation is success.
++    for (iter_result_ptr = result; iter_result_ptr != NULL;
++         iter_result_ptr = iter_result_ptr->ai_next) {
++        sock = socket(iter_result_ptr->ai_family, iter_result_ptr->ai_socktype,
++                      iter_result_ptr->ai_protocol);
++        if (sock != -1) {
++            break;
++        }
++    }
++    /*
++     * If none of the addrinfo structures lead to a successful socket
++     * creation, socket creation has failed.
++     */
++    if (sock < 0) {
++        ret = EVENT_ERROR_SOCKET;
++        goto out;
++    }
++
+     va_start(arguments, fmt);
+     ret = gf_vasprintf(&msg, fmt, arguments);
+     va_end(arguments);
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0528-Extras-Removing-xattr_analysis-script.patch b/SOURCES/0528-Extras-Removing-xattr_analysis-script.patch
new file mode 100644
index 0000000..d04068d
--- /dev/null
+++ b/SOURCES/0528-Extras-Removing-xattr_analysis-script.patch
@@ -0,0 +1,134 @@
+From 3fc74ce6c282f0f43fdcfeda47b71a1b19945b6d Mon Sep 17 00:00:00 2001
+From: srijan-sivakumar <ssivakum@redhat.com>
+Date: Wed, 3 Feb 2021 10:11:04 +0530
+Subject: [PATCH 528/532] Extras: Removing xattr_analysis script
+
+The xattr_analysis.py script is used rarely for
+debugging and seeing that it has some dependencies,
+removing it from the release.
+
+If need be, it would be directly shared with the cu.
+
+Label: DOWNSTREAM ONLY
+BUG: 1719171
+
+Change-Id: I4bb0df3ebfa7e43e13858b4b6e3efbb02ea79d5f
+Signed-off-by: srijan-sivakumar <ssivakum@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/226301
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ extras/quota/Makefile.am       |  4 +--
+ extras/quota/xattr_analysis.py | 73 ------------------------------------------
+ glusterfs.spec.in              |  1 -
+ 3 files changed, 2 insertions(+), 76 deletions(-)
+ delete mode 100755 extras/quota/xattr_analysis.py
+
+diff --git a/extras/quota/Makefile.am b/extras/quota/Makefile.am
+index cdb6be1..e4d9322 100644
+--- a/extras/quota/Makefile.am
++++ b/extras/quota/Makefile.am
+@@ -2,7 +2,7 @@ scriptsdir = $(datadir)/glusterfs/scripts
+ scripts_SCRIPTS =  log_accounting.sh
+ 
+ if WITH_SERVER
+-scripts_SCRIPTS += xattr_analysis.py quota_fsck.py
++scripts_SCRIPTS += quota_fsck.py
+ endif
+ 
+-EXTRA_DIST = log_accounting.sh xattr_analysis.py quota_fsck.py
++EXTRA_DIST = log_accounting.sh quota_fsck.py
+diff --git a/extras/quota/xattr_analysis.py b/extras/quota/xattr_analysis.py
+deleted file mode 100755
+index 7bd7d96..0000000
+--- a/extras/quota/xattr_analysis.py
++++ /dev/null
+@@ -1,73 +0,0 @@
+-#!/usr/bin/python3
+-# Below script has two purposes
+-#  1. Display xattr of entire FS tree in a human readable form
+-#  2. Display all the directory where contri and size mismatch.
+-#      (If there are any directory with contri and size mismatch that are not dirty
+-#       then that highlights a propagation issue)
+-#  The script takes only one input LOG _FILE generated from the command,
+-#  find <brick_path> | xargs  getfattr -d -m. -e hex  > log_gluster_xattr
+-
+-from __future__ import print_function
+-import re
+-import subprocess
+-import sys
+-from hurry.filesize import size
+-
+-if len(sys.argv) < 2:
+-    sys.exit('Usage: %s log_gluster_xattr \n'
+-              'to generate log_gluster_xattr use: \n'
+-              'find <brick_path> | xargs  getfattr -d -m. -e hex  > log_gluster_xattr'
+-              % sys.argv[0])
+-LOG_FILE=sys.argv[1]
+-
+-def get_quota_xattr_brick():
+-    out = subprocess.check_output (["/usr/bin/cat", LOG_FILE])
+-    pairs = out.splitlines()
+-
+-    xdict = {}
+-    mismatch_size = [('====contri_size===', '====size====')]
+-    for xattr in pairs:
+-        k = xattr.split("=")[0]
+-        if re.search("# file:", k):
+-            print(xdict)
+-            filename=k
+-            print("=====" + filename + "=======")
+-            xdict = {}
+-        elif k is "":
+-            pass
+-        else:
+-            print(xattr)
+-            v = xattr.split("=")[1]
+-            if re.search("contri", k):
+-                if len(v) == 34:
+-                    # for files size is obtained in iatt, file count should be 1, dir count=0
+-                    xdict['contri_file_count'] = int(v[18:34], 16)
+-                    xdict['contri_dir_count'] = 0
+-                else:
+-                    xdict['contri_size'] = size(int(v[2:18], 16))
+-                    xdict['contri_file_count'] = int(v[18:34], 16)
+-                    xdict['contri_dir_count'] = int(v[34:], 16)
+-            elif re.search("size", k):
+-                xdict['size'] = size(int(v[2:18], 16))
+-                xdict['file_count'] = int(v[18:34], 16)
+-                xdict['dir_count'] = int(v[34:], 16)
+-            elif re.search("dirty", k):
+-                if v == '0x3000':
+-                    xdict['dirty'] = False
+-                elif v == '0x3100':
+-                    xdict['dirty'] = True
+-            elif re.search("limit_objects", k):
+-                xdict['limit_objects'] = int(v[2:18], 16)
+-            elif re.search("limit_set", k):
+-                xdict['limit_set'] = size(int(v[2:18], 16))
+-
+-            if 'size' in xdict and 'contri_size' in xdict and xdict['size'] != xdict['contri_size']:
+-                mismatch_size.append((xdict['contri_size'], xdict['size'], filename))
+-
+-    for values in mismatch_size:
+-        print(values)
+-
+-
+-if __name__ == '__main__':
+-    get_quota_xattr_brick()
+-
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index 30d7162..2be7677 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -1380,7 +1380,6 @@ exit 0
+ %if ( 0%{!?_without_server:1} )
+ %files server
+ %doc extras/clear_xattrs.sh
+-%{_datadir}/glusterfs/scripts/xattr_analysis.py*
+ %{_datadir}/glusterfs/scripts/quota_fsck.py*
+ # sysconf
+ %config(noreplace) %{_sysconfdir}/glusterfs
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0529-geo-rep-prompt-should-work-for-ignore_deletes.patch b/SOURCES/0529-geo-rep-prompt-should-work-for-ignore_deletes.patch
new file mode 100644
index 0000000..671451d
--- /dev/null
+++ b/SOURCES/0529-geo-rep-prompt-should-work-for-ignore_deletes.patch
@@ -0,0 +1,75 @@
+From 1c7e96e73273b7891ea6ef0d768c2bf7ff5de7b0 Mon Sep 17 00:00:00 2001
+From: Shwetha K Acharya <sacharya@redhat.com>
+Date: Thu, 4 Feb 2021 16:29:39 +0530
+Subject: [PATCH 529/532] geo-rep: prompt should work for ignore_deletes
+
+The python cli is intelligent enough to parse both "-" and "_" alike:
+
+Example:
+geo-replication config updated successfully
+sync_job 4
+geo-replication config updated successfully
+gluster volume geo-replication primary 127.0.0.1::secondary config | grep sync_jobs
+sync_jobs:5
+
+Thus the prompt which appears after ignore-deletes true should
+work for both ignore-deletes and ignore_deletes.
+
+Label: DOWNSTREAM ONLY
+
+BUG: 1224906
+Change-Id: I89f854200a604d07d3ac6c374fe6d445ce9f22ca
+Signed-off-by: Shwetha K Acharya <sacharya@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/226599
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ cli/src/cli-cmd-parser.c       |  5 +++--
+ tests/00-geo-rep/bug-1708603.t | 12 ++++++++++--
+ 2 files changed, 13 insertions(+), 4 deletions(-)
+
+diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c
+index 34f17c9..dda8979 100644
+--- a/cli/src/cli-cmd-parser.c
++++ b/cli/src/cli-cmd-parser.c
+@@ -3107,8 +3107,9 @@ cli_cmd_gsync_set_parse(struct cli_state *state, const char **words,
+     if (!ret)
+         ret = dict_set_int32(dict, "type", type);
+     if (!ret && type == GF_GSYNC_OPTION_TYPE_CONFIG) {
+-        if (!strcmp((char *)words[wordcount - 2], "ignore-deletes") &&
+-            !strcmp((char *)words[wordcount - 1], "true")) {
++        if ((((!strcmp((char *)words[wordcount - 2], "ignore_deletes")) ||
++              (!strcmp((char *)words[wordcount - 2], "ignore-deletes")))) &&
++            ((!strcmp((char *)words[wordcount - 1], "true")))) {
+             question =
+                 "There exists ~15 seconds delay for the option to take"
+                 " effect from stime of the corresponding brick. Please"
+diff --git a/tests/00-geo-rep/bug-1708603.t b/tests/00-geo-rep/bug-1708603.t
+index 26913f1..edafb48 100644
+--- a/tests/00-geo-rep/bug-1708603.t
++++ b/tests/00-geo-rep/bug-1708603.t
+@@ -44,11 +44,19 @@ TEST glusterfs -s $H0 --volfile-id $GSV0 $M1
+ #Create geo-rep session
+ TEST create_georep_session $master $slave
+ 
+-echo n | $GEOREP_CLI $master $slave config ignore-deletes true >/dev/null 2>&1
+-EXPECT "false" echo $($GEOREP_CLI $master $slave config ignore-deletes)
++echo n | $GEOREP_CLI $master $slave config ignore_deletes true >/dev/null 2>&1
++EXPECT "false" echo $($GEOREP_CLI $master $slave config ignore_deletes)
++
++echo y | $GEOREP_CLI $master $slave config ignore_deletes true
++EXPECT "true" echo $($GEOREP_CLI $master $slave config ignore_deletes)
++
++$GEOREP_CLI $master $slave config ignore_deletes false
+ echo y | $GEOREP_CLI $master $slave config ignore-deletes true
+ EXPECT "true" echo $($GEOREP_CLI $master $slave config ignore-deletes)
+ 
++echo n | $GEOREP_CLI $master $slave config ignore-deletes true >/dev/null 2>&1
++EXPECT "true" echo $($GEOREP_CLI $master $slave config ignore-deletes)
++
+ #Stop Geo-rep
+ TEST $GEOREP_CLI $master $slave stop
+ 
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0530-gfapi-avoid-crash-while-logging-message.patch b/SOURCES/0530-gfapi-avoid-crash-while-logging-message.patch
new file mode 100644
index 0000000..aec73b7
--- /dev/null
+++ b/SOURCES/0530-gfapi-avoid-crash-while-logging-message.patch
@@ -0,0 +1,41 @@
+From 5a7348a266587704dae4f1ddda16b7c95f547251 Mon Sep 17 00:00:00 2001
+From: Rinku Kothiya <rkothiya@redhat.com>
+Date: Sun, 7 Feb 2021 13:40:24 +0000
+Subject: [PATCH 530/532] gfapi: avoid crash while logging message.
+
+Breaking parameter into two different parameter
+to avoid a crash.
+
+Upstream:
+> Reviewed-on: https://github.com/gluster/glusterfs/pull/2139
+> fixes: #2138
+> Change-Id: Idd5f3631488c1d892748f83e6847fb6fd2d0802a
+> Signed-off-by: Rinku Kothiya <rkothiya@redhat.com>
+
+BUG: 1691320
+
+Change-Id: Ifd6a96982ffd4e5334f8be2297de2ad826f3145b
+Signed-off-by: Rinku Kothiya <rkothiya@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/226851
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ api/src/glfs-fops.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/api/src/glfs-fops.c b/api/src/glfs-fops.c
+index 051541f..6dc3b66 100644
+--- a/api/src/glfs-fops.c
++++ b/api/src/glfs-fops.c
+@@ -1529,7 +1529,7 @@ glfs_pwritev_common(struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt,
+         ret = -1;
+         errno = EINVAL;
+         gf_smsg(THIS->name, GF_LOG_ERROR, errno, API_MSG_INVALID_ARG,
+-                "size >= %llu is not allowed", GF_UNIT_GB, NULL);
++                "Data size too large", "size=%llu", GF_UNIT_GB, NULL);
+         goto out;
+     }
+ 
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0531-Glustereventsd-Default-port-change-2091.patch b/SOURCES/0531-Glustereventsd-Default-port-change-2091.patch
new file mode 100644
index 0000000..8c2ecbf
--- /dev/null
+++ b/SOURCES/0531-Glustereventsd-Default-port-change-2091.patch
@@ -0,0 +1,69 @@
+From 058a853a1438b2a62586c545f71150ade3de23b7 Mon Sep 17 00:00:00 2001
+From: schaffung <ssivakum@redhat.com>
+Date: Wed, 10 Feb 2021 13:43:48 +0530
+Subject: [PATCH 531/532] Glustereventsd Default port change (#2091)
+
+Issue : The default port of glustereventsd is currently 24009
+which is preventing glustereventsd from binding to the UDP port
+due to selinux policies.
+
+Fix: Changing the default port to be bound by chanding it to something
+in the ephemeral range.
+
+>Fixes: #2080
+>Change-Id: Ibdc87f83f82f69660dca95d6d14b226e10d8bd33
+>Signed-off-by: srijan-sivakumar <ssivakum@redhat.com>
+Upstream Patch : https://github.com/gluster/glusterfs/pull/2091
+
+BUG: 1814744
+Change-Id: Ibdc87f83f82f69660dca95d6d14b226e10d8bd33
+Signed-off-by: srijan-sivakumar <ssivakum@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/227249
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ events/src/eventsconfig.json   | 2 +-
+ extras/firewalld/glusterfs.xml | 2 +-
+ libglusterfs/src/events.c      | 2 +-
+ 3 files changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/events/src/eventsconfig.json b/events/src/eventsconfig.json
+index 89e5b9c..14d8f84 100644
+--- a/events/src/eventsconfig.json
++++ b/events/src/eventsconfig.json
+@@ -1,5 +1,5 @@
+ {
+     "log-level": "INFO",
+-    "port": 24009,
++    "port": 55555,
+     "disable-events-log": false
+ }
+diff --git a/extras/firewalld/glusterfs.xml b/extras/firewalld/glusterfs.xml
+index 7e17644..dc74b2e 100644
+--- a/extras/firewalld/glusterfs.xml
++++ b/extras/firewalld/glusterfs.xml
+@@ -4,7 +4,7 @@
+ <description>Default ports for gluster-distributed storage</description>
+ <port protocol="tcp" port="24007"/>    <!--For glusterd -->
+ <port protocol="tcp" port="24008"/>    <!--For glusterd RDMA port management -->
+-<port protocol="tcp" port="24009"/>    <!--For glustereventsd -->
++<port protocol="tcp" port="55555"/>    <!--For glustereventsd -->
+ <port protocol="tcp" port="38465"/>    <!--Gluster NFS service -->
+ <port protocol="tcp" port="38466"/>    <!--Gluster NFS service -->
+ <port protocol="tcp" port="38467"/>    <!--Gluster NFS service -->
+diff --git a/libglusterfs/src/events.c b/libglusterfs/src/events.c
+index 4d720ca..3659606 100644
+--- a/libglusterfs/src/events.c
++++ b/libglusterfs/src/events.c
+@@ -26,7 +26,7 @@
+ #include "glusterfs/events.h"
+ 
+ #define EVENT_HOST "127.0.0.1"
+-#define EVENT_PORT 24009
++#define EVENT_PORT 55555
+ 
+ int
+ _gf_event(eventtypes_t event, const char *fmt, ...)
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0532-glusterd-fix-for-starting-brick-on-new-port.patch b/SOURCES/0532-glusterd-fix-for-starting-brick-on-new-port.patch
new file mode 100644
index 0000000..97e5aa7
--- /dev/null
+++ b/SOURCES/0532-glusterd-fix-for-starting-brick-on-new-port.patch
@@ -0,0 +1,79 @@
+From 2dad17fdbaab2ab2cda6a05dec9dcd2d37ea32ff Mon Sep 17 00:00:00 2001
+From: Nikhil Ladha <nladha@redhat.com>
+Date: Wed, 10 Feb 2021 15:07:32 +0530
+Subject: [PATCH 532/532] glusterd: fix for starting brick on new port
+
+The Errno set by the runner code was not correct when the bind() fails
+to assign an already occupied port in the __socket_server_bind().
+
+Fix:
+Updated the code to return the correct errno from the
+__socket_server_bind() if the bind() fails due to EADDRINUSE error. And,
+use the returned errno from runner_run() to retry allocating a new port
+to the brick process.
+
+>Fixes: #1101
+
+>Change-Id: If124337f41344a04f050754e402490529ef4ecdc
+>Signed-off-by: nik-redhat nladha@redhat.com
+
+Upstream patch: https://github.com/gluster/glusterfs/pull/2090
+
+BUG: 1865796
+
+Change-Id: If124337f41344a04f050754e402490529ef4ecdc
+Signed-off-by: nik-redhat <nladha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/227261
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Xavi Hernandez Juan <xhernandez@redhat.com>
+---
+ rpc/rpc-transport/socket/src/socket.c      | 3 +++
+ xlators/mgmt/glusterd/src/glusterd-utils.c | 6 ++----
+ 2 files changed, 5 insertions(+), 4 deletions(-)
+
+diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c
+index 1ee7320..96ed9f1 100644
+--- a/rpc/rpc-transport/socket/src/socket.c
++++ b/rpc/rpc-transport/socket/src/socket.c
+@@ -973,8 +973,11 @@ __socket_server_bind(rpc_transport_t *this)
+                this->myinfo.identifier, strerror(errno));
+         if (errno == EADDRINUSE) {
+             gf_log(this->name, GF_LOG_ERROR, "Port is already in use");
++            ret = -EADDRINUSE;
++            goto out;
+         }
+     }
++
+     if (AF_UNIX != SA(&this->myinfo.sockaddr)->sa_family) {
+         if (getsockname(priv->sock, SA(&this->myinfo.sockaddr),
+                         &this->myinfo.sockaddr_len) != 0) {
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
+index cf32bd9..bc188a2 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
+@@ -2151,6 +2151,7 @@ glusterd_volume_start_glusterfs(glusterd_volinfo_t *volinfo,
+         ret = -1;
+         goto out;
+     }
++
+     /* Build the exp_path, before starting the glusterfsd even in
+        valgrind mode. Otherwise all the glusterfsd processes start
+        writing the valgrind log to the same file.
+@@ -2289,13 +2290,10 @@ retry:
+ 
+     if (wait) {
+         synclock_unlock(&priv->big_lock);
+-        errno = 0;
+         ret = runner_run(&runner);
+-        if (errno != 0)
+-            ret = errno;
+         synclock_lock(&priv->big_lock);
+ 
+-        if (ret == EADDRINUSE) {
++        if (ret == -EADDRINUSE) {
+             /* retry after getting a new port */
+             gf_msg(this->name, GF_LOG_WARNING, -ret,
+                    GD_MSG_SRC_BRICK_PORT_UNAVAIL,
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0533-glusterd-Rebalance-cli-is-not-showing-correct-status.patch b/SOURCES/0533-glusterd-Rebalance-cli-is-not-showing-correct-status.patch
new file mode 100644
index 0000000..158b4b7
--- /dev/null
+++ b/SOURCES/0533-glusterd-Rebalance-cli-is-not-showing-correct-status.patch
@@ -0,0 +1,250 @@
+From 854ab79dbef449c39adf66e3faebb4681359fce4 Mon Sep 17 00:00:00 2001
+From: mohit84 <moagrawa@redhat.com>
+Date: Thu, 18 Feb 2021 09:40:44 +0530
+Subject: [PATCH 533/538] glusterd: Rebalance cli is not showing correct status
+ after reboot (#2172)
+
+Rebalance cli is not showing correct status after reboot.
+
+The CLI is not correct status because defrag object is not
+valid at the time of creating a rpc connection to show the status.
+The defrag object is not valid because at the time of start a glusterd
+glusterd_restart_rebalance can be call almost at the same time by two
+different synctask and glusterd got a disconnect on rpc object and it
+cleanup the defrag object.
+
+Solution: To avoid the defrag object populate a reference count before
+          create a defrag rpc object.
+>Fixes: #1339
+>Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+>Change-Id: Ia284015d79beaa3d703ebabb92f26870a5aaafba
+Upstream Patch : https://github.com/gluster/glusterfs/pull/2172
+
+BUG: 1832306
+Change-Id: Ia284015d79beaa3d703ebabb92f26870a5aaafba
+Signed-off-by: srijan-sivakumar <ssivakum@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/228249
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-rebalance.c | 35 ++++++++++-----
+ xlators/mgmt/glusterd/src/glusterd-syncop.c    |  1 +
+ xlators/mgmt/glusterd/src/glusterd-utils.c     | 59 +++++++++++++++++++++++++-
+ xlators/mgmt/glusterd/src/glusterd-utils.h     |  5 +++
+ xlators/mgmt/glusterd/src/glusterd.h           |  1 +
+ 5 files changed, 90 insertions(+), 11 deletions(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
+index b419a89..fcd5318 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c
++++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
+@@ -86,6 +86,7 @@ __glusterd_defrag_notify(struct rpc_clnt *rpc, void *mydata,
+     glusterd_conf_t *priv = NULL;
+     xlator_t *this = NULL;
+     int pid = -1;
++    int refcnt = 0;
+ 
+     this = THIS;
+     if (!this)
+@@ -125,11 +126,12 @@ __glusterd_defrag_notify(struct rpc_clnt *rpc, void *mydata,
+         }
+ 
+         case RPC_CLNT_DISCONNECT: {
+-            if (!defrag->connected)
+-                return 0;
+-
+             LOCK(&defrag->lock);
+             {
++                if (!defrag->connected) {
++                    UNLOCK(&defrag->lock);
++                    return 0;
++                }
+                 defrag->connected = 0;
+             }
+             UNLOCK(&defrag->lock);
+@@ -146,11 +148,11 @@ __glusterd_defrag_notify(struct rpc_clnt *rpc, void *mydata,
+             glusterd_defrag_rpc_put(defrag);
+             if (defrag->cbk_fn)
+                 defrag->cbk_fn(volinfo, volinfo->rebal.defrag_status);
+-
+-            GF_FREE(defrag);
++            refcnt = glusterd_defrag_unref(defrag);
+             gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_REBALANCE_DISCONNECTED,
+-                   "Rebalance process for volume %s has disconnected.",
+-                   volinfo->volname);
++                   "Rebalance process for volume %s has disconnected"
++                   " and defrag refcnt is %d.",
++                   volinfo->volname, refcnt);
+             break;
+         }
+         case RPC_CLNT_DESTROY:
+@@ -309,7 +311,11 @@ glusterd_handle_defrag_start(glusterd_volinfo_t *volinfo, char *op_errstr,
+         gf_msg_debug("glusterd", 0, "rebalance command failed");
+         goto out;
+     }
+-
++    /* Take reference before sleep to save defrag object cleanup while
++       glusterd_restart_rebalance call for other bricks by syncktask
++       at the time of restart a glusterd.
++    */
++    glusterd_defrag_ref(defrag);
+     sleep(5);
+ 
+     ret = glusterd_rebalance_rpc_create(volinfo);
+@@ -372,6 +378,7 @@ glusterd_rebalance_rpc_create(glusterd_volinfo_t *volinfo)
+     GF_ASSERT(this);
+     priv = this->private;
+     GF_ASSERT(priv);
++    struct rpc_clnt *rpc = NULL;
+ 
+     // rebalance process is not started
+     if (!defrag)
+@@ -396,13 +403,21 @@ glusterd_rebalance_rpc_create(glusterd_volinfo_t *volinfo)
+     }
+ 
+     glusterd_volinfo_ref(volinfo);
+-    ret = glusterd_rpc_create(&defrag->rpc, options, glusterd_defrag_notify,
+-                              volinfo, _gf_true);
++    ret = glusterd_rpc_create(&rpc, options, glusterd_defrag_notify, volinfo,
++                              _gf_false);
+     if (ret) {
+         gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_RPC_CREATE_FAIL,
+                "Glusterd RPC creation failed");
+         goto out;
+     }
++    LOCK(&defrag->lock);
++    {
++        if (!defrag->rpc)
++            defrag->rpc = rpc;
++        else
++            rpc_clnt_unref(rpc);
++    }
++    UNLOCK(&defrag->lock);
+     ret = 0;
+ out:
+     if (options)
+diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c
+index df78fef..05c9e11 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-syncop.c
++++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c
+@@ -1732,6 +1732,7 @@ gd_brick_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
+         if (!rpc) {
+             if (pending_node->type == GD_NODE_REBALANCE && pending_node->node) {
+                 volinfo = pending_node->node;
++                glusterd_defrag_ref(volinfo->rebal.defrag);
+                 ret = glusterd_rebalance_rpc_create(volinfo);
+                 if (ret) {
+                     ret = 0;
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
+index bc188a2..9fb8eab 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
+@@ -93,6 +93,44 @@
+ #define NLMV4_VERSION 4
+ #define NLMV1_VERSION 1
+ 
++int
++glusterd_defrag_ref(glusterd_defrag_info_t *defrag)
++{
++    int refcnt = 0;
++
++    if (!defrag)
++        goto out;
++
++    LOCK(&defrag->lock);
++    {
++        refcnt = ++defrag->refcnt;
++    }
++    UNLOCK(&defrag->lock);
++
++out:
++    return refcnt;
++}
++
++int
++glusterd_defrag_unref(glusterd_defrag_info_t *defrag)
++{
++    int refcnt = -1;
++
++    if (!defrag)
++        goto out;
++
++    LOCK(&defrag->lock);
++    {
++        refcnt = --defrag->refcnt;
++        if (refcnt <= 0)
++            GF_FREE(defrag);
++    }
++    UNLOCK(&defrag->lock);
++
++out:
++    return refcnt;
++}
++
+ gf_boolean_t
+ is_brick_mx_enabled(void)
+ {
+@@ -9370,6 +9408,7 @@ glusterd_volume_defrag_restart(glusterd_volinfo_t *volinfo, char *op_errstr,
+     char pidfile[PATH_MAX] = "";
+     int ret = -1;
+     pid_t pid = 0;
++    int refcnt = 0;
+ 
+     this = THIS;
+     GF_ASSERT(this);
+@@ -9410,7 +9449,25 @@ glusterd_volume_defrag_restart(glusterd_volinfo_t *volinfo, char *op_errstr,
+                              volinfo->volname);
+                     goto out;
+                 }
+-                ret = glusterd_rebalance_rpc_create(volinfo);
++                refcnt = glusterd_defrag_ref(volinfo->rebal.defrag);
++                /* If refcnt value is 1 it means either defrag object is
++                   poulated by glusterd_rebalance_defrag_init or previous
++                   rpc creation was failed.If it is not 1 it means it(defrag)
++                   was populated at the time of start a rebalance daemon.
++                   We need to create a rpc object only while a previous
++                   rpc connection was not established successfully at the
++                   time of restart a rebalance daemon by
++                   glusterd_handle_defrag_start otherwise rebalance cli
++                   does not show correct status after just reboot a node and try
++                   to print the rebalance status because defrag object has been
++                   destroyed during handling of rpc disconnect.
++                */
++                if (refcnt == 1) {
++                    ret = glusterd_rebalance_rpc_create(volinfo);
++                } else {
++                    ret = 0;
++                    glusterd_defrag_unref(volinfo->rebal.defrag);
++                }
+                 break;
+             }
+         case GF_DEFRAG_STATUS_NOT_STARTED:
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
+index 02d85d2..4541471 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.h
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
+@@ -886,4 +886,9 @@ int32_t
+ glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type,
+                            int32_t sub_count);
+ 
++int
++glusterd_defrag_ref(glusterd_defrag_info_t *defrag);
++
++int
++glusterd_defrag_unref(glusterd_defrag_info_t *defrag);
+ #endif
+diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
+index efe4d0e..9de3f28 100644
+--- a/xlators/mgmt/glusterd/src/glusterd.h
++++ b/xlators/mgmt/glusterd/src/glusterd.h
+@@ -321,6 +321,7 @@ struct glusterd_defrag_info_ {
+     uint64_t total_data;
+     uint64_t num_files_lookedup;
+     uint64_t total_failures;
++    int refcnt;
+     gf_lock_t lock;
+     int cmd;
+     pthread_t th;
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0534-glusterd-Resolve-use-after-free-bug-2181.patch b/SOURCES/0534-glusterd-Resolve-use-after-free-bug-2181.patch
new file mode 100644
index 0000000..2dc72c1
--- /dev/null
+++ b/SOURCES/0534-glusterd-Resolve-use-after-free-bug-2181.patch
@@ -0,0 +1,47 @@
+From b3647eb5415b2e3d9e1a11ad6c4689e520f17b39 Mon Sep 17 00:00:00 2001
+From: mohit84 <moagrawa@redhat.com>
+Date: Mon, 22 Feb 2021 10:09:34 +0530
+Subject: [PATCH 534/538] glusterd: Resolve use after free bug (#2181)
+
+In the commit 61ae58e67567ea4de8f8efc6b70a9b1f8e0f1bea
+introduced a coverity bug use object after cleanup
+the object.
+
+Cleanup memory after comeout from a critical section
+>Fixes: #2180
+
+>Change-Id: Iee2050c4883a0dd44b8523bb822b664462ab6041
+>Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+Upstream Patch : https://github.com/gluster/glusterfs/pull/2181
+
+BUG: 1832306
+Change-Id: Iee2050c4883a0dd44b8523bb822b664462ab6041
+Signed-off-by: srijan-sivakumar <ssivakum@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/228578
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-utils.c | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
+index 9fb8eab..6d40be5 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
+@@ -122,11 +122,10 @@ glusterd_defrag_unref(glusterd_defrag_info_t *defrag)
+     LOCK(&defrag->lock);
+     {
+         refcnt = --defrag->refcnt;
+-        if (refcnt <= 0)
+-            GF_FREE(defrag);
+     }
+     UNLOCK(&defrag->lock);
+-
++    if (refcnt <= 0)
++        GF_FREE(defrag);
+ out:
+     return refcnt;
+ }
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0535-multiple-files-use-dict_allocate_and_serialize-where.patch b/SOURCES/0535-multiple-files-use-dict_allocate_and_serialize-where.patch
new file mode 100644
index 0000000..e1622de
--- /dev/null
+++ b/SOURCES/0535-multiple-files-use-dict_allocate_and_serialize-where.patch
@@ -0,0 +1,270 @@
+From 775d500cd136bd8c940faaeffde1217c25a87e3d Mon Sep 17 00:00:00 2001
+From: Yaniv Kaul <ykaul@redhat.com>
+Date: Sun, 2 Jun 2019 21:14:18 +0300
+Subject: [PATCH 535/538] (multiple files) use dict_allocate_and_serialize()
+ where applicable.
+
+This function does length, allocation and serialization for you.
+
+Upstream patch:
+> Upstream-patch-link: https://review.gluster.org/#/c/glusterfs/+/22800
+> Change-Id: I142a259952a2fe83dd719442afaefe4a43a8e55e
+> updates: bz#1193929
+> Signed-off-by: Yaniv Kaul <ykaul@redhat.com>
+
+Change-Id: I142a259952a2fe83dd719442afaefe4a43a8e55e
+BUG: 1911292
+Signed-off-by: Yaniv Kaul <ykaul@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/228611
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/afr/src/afr-inode-read.c       | 34 +++++---------------------
+ xlators/cluster/ec/src/ec-combine.c            | 16 +++---------
+ xlators/features/locks/src/posix.c             | 23 +++--------------
+ xlators/protocol/client/src/client-handshake.c | 14 +++--------
+ xlators/protocol/server/src/server-handshake.c | 24 +++++++-----------
+ xlators/protocol/server/src/server-helpers.c   | 27 +++-----------------
+ 6 files changed, 28 insertions(+), 110 deletions(-)
+
+diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c
+index 523a5b4..cf305af 100644
+--- a/xlators/cluster/afr/src/afr-inode-read.c
++++ b/xlators/cluster/afr/src/afr-inode-read.c
+@@ -948,24 +948,13 @@ unlock:
+             goto unwind;
+         }
+ 
+-        len = dict_serialized_length(local->dict);
+-        if (len <= 0) {
+-            goto unwind;
+-        }
+-
+-        lockinfo_buf = GF_CALLOC(1, len, gf_common_mt_char);
+-        if (!lockinfo_buf) {
++        op_ret = dict_allocate_and_serialize(
++            local->dict, (char **)&lockinfo_buf, (unsigned int *)&len);
++        if (op_ret != 0) {
+             local->op_ret = -1;
+-            local->op_errno = ENOMEM;
+             goto unwind;
+         }
+ 
+-        op_ret = dict_serialize(local->dict, lockinfo_buf);
+-        if (op_ret < 0) {
+-            local->op_ret = -1;
+-            local->op_errno = -op_ret;
+-        }
+-
+         op_ret = dict_set_dynptr(newdict, GF_XATTR_LOCKINFO_KEY,
+                                  (void *)lockinfo_buf, len);
+         if (op_ret < 0) {
+@@ -1064,24 +1053,13 @@ unlock:
+             goto unwind;
+         }
+ 
+-        len = dict_serialized_length(local->dict);
+-        if (len <= 0) {
+-            goto unwind;
+-        }
+-
+-        lockinfo_buf = GF_CALLOC(1, len, gf_common_mt_char);
+-        if (!lockinfo_buf) {
++        op_ret = dict_allocate_and_serialize(
++            local->dict, (char **)&lockinfo_buf, (unsigned int *)&len);
++        if (op_ret != 0) {
+             local->op_ret = -1;
+-            local->op_errno = ENOMEM;
+             goto unwind;
+         }
+ 
+-        op_ret = dict_serialize(local->dict, lockinfo_buf);
+-        if (op_ret < 0) {
+-            local->op_ret = -1;
+-            local->op_errno = -op_ret;
+-        }
+-
+         op_ret = dict_set_dynptr(newdict, GF_XATTR_LOCKINFO_KEY,
+                                  (void *)lockinfo_buf, len);
+         if (op_ret < 0) {
+diff --git a/xlators/cluster/ec/src/ec-combine.c b/xlators/cluster/ec/src/ec-combine.c
+index 99e5534..9d712b3 100644
+--- a/xlators/cluster/ec/src/ec-combine.c
++++ b/xlators/cluster/ec/src/ec-combine.c
+@@ -486,22 +486,12 @@ ec_dict_data_merge(ec_cbk_data_t *cbk, int32_t which, char *key)
+ 
+     tmp = NULL;
+ 
+-    len = dict_serialized_length(lockinfo);
+-    if (len < 0) {
+-        err = len;
+-
+-        goto out;
+-    }
+-    ptr = GF_MALLOC(len, gf_common_mt_char);
+-    if (ptr == NULL) {
+-        err = -ENOMEM;
+-
+-        goto out;
+-    }
+-    err = dict_serialize(lockinfo, ptr);
++    err = dict_allocate_and_serialize(lockinfo, (char **)&ptr,
++                                      (unsigned int *)&len);
+     if (err != 0) {
+         goto out;
+     }
++
+     dict = (which == EC_COMBINE_XDATA) ? cbk->xdata : cbk->dict;
+     err = dict_set_dynptr(dict, key, ptr, len);
+     if (err != 0) {
+diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c
+index 5ae0125..cdd1ff7 100644
+--- a/xlators/features/locks/src/posix.c
++++ b/xlators/features/locks/src/posix.c
+@@ -1547,8 +1547,9 @@ pl_fgetxattr_handle_lockinfo(xlator_t *this, fd_t *fd, dict_t *dict,
+         goto out;
+     }
+ 
+-    len = dict_serialized_length(tmp);
+-    if (len < 0) {
++    op_ret = dict_allocate_and_serialize(tmp, (char **)&buf,
++                                         (unsigned int *)&len);
++    if (op_ret != 0) {
+         *op_errno = -op_ret;
+         op_ret = -1;
+         gf_log(this->name, GF_LOG_WARNING,
+@@ -1558,24 +1559,6 @@ pl_fgetxattr_handle_lockinfo(xlator_t *this, fd_t *fd, dict_t *dict,
+         goto out;
+     }
+ 
+-    buf = GF_CALLOC(1, len, gf_common_mt_char);
+-    if (buf == NULL) {
+-        op_ret = -1;
+-        *op_errno = ENOMEM;
+-        goto out;
+-    }
+-
+-    op_ret = dict_serialize(tmp, buf);
+-    if (op_ret < 0) {
+-        *op_errno = -op_ret;
+-        op_ret = -1;
+-        gf_log(this->name, GF_LOG_WARNING,
+-               "dict_serialize failed (%s) while handling lockinfo "
+-               "for fd (ptr: %p inode-gfid:%s)",
+-               strerror(*op_errno), fd, uuid_utoa(fd->inode->gfid));
+-        goto out;
+-    }
+-
+     op_ret = dict_set_dynptr(dict, GF_XATTR_LOCKINFO_KEY, buf, len);
+     if (op_ret < 0) {
+         *op_errno = -op_ret;
+diff --git a/xlators/protocol/client/src/client-handshake.c b/xlators/protocol/client/src/client-handshake.c
+index 0002361..6b20d92 100644
+--- a/xlators/protocol/client/src/client-handshake.c
++++ b/xlators/protocol/client/src/client-handshake.c
+@@ -1286,18 +1286,10 @@ client_setvolume(xlator_t *this, struct rpc_clnt *rpc)
+                "Failed to set client opversion in handshake message");
+     }
+ 
+-    ret = dict_serialized_length(options);
+-    if (ret < 0) {
+-        gf_msg(this->name, GF_LOG_ERROR, 0, PC_MSG_DICT_ERROR,
+-               "failed to get serialized length of dict");
++    ret = dict_allocate_and_serialize(options, (char **)&req.dict.dict_val,
++                                      &req.dict.dict_len);
++    if (ret != 0) {
+         ret = -1;
+-        goto fail;
+-    }
+-    req.dict.dict_len = ret;
+-    req.dict.dict_val = GF_CALLOC(1, req.dict.dict_len,
+-                                  gf_client_mt_clnt_req_buf_t);
+-    ret = dict_serialize(options, req.dict.dict_val);
+-    if (ret < 0) {
+         gf_msg(this->name, GF_LOG_ERROR, 0, PC_MSG_DICT_SERIALIZE_FAIL,
+                "failed to serialize "
+                "dictionary");
+diff --git a/xlators/protocol/server/src/server-handshake.c b/xlators/protocol/server/src/server-handshake.c
+index eeca73c..54dc030 100644
+--- a/xlators/protocol/server/src/server-handshake.c
++++ b/xlators/protocol/server/src/server-handshake.c
+@@ -676,22 +676,16 @@ fail:
+     GF_ASSERT(rsp);
+ 
+     rsp->op_ret = 0;
+-    ret = dict_serialized_length(reply);
+-    if (ret > 0) {
+-        rsp->dict.dict_len = ret;
+-        rsp->dict.dict_val = GF_CALLOC(1, rsp->dict.dict_len,
+-                                       gf_server_mt_rsp_buf_t);
+-        if (rsp->dict.dict_val) {
+-            ret = dict_serialize(reply, rsp->dict.dict_val);
+-            if (ret < 0) {
+-                gf_msg_debug("server-handshake", 0,
+-                             "failed "
+-                             "to serialize reply dict");
+-                op_ret = -1;
+-                op_errno = -ret;
+-            }
+-        }
++
++    ret = dict_allocate_and_serialize(reply, (char **)&rsp->dict.dict_val,
++                                      &rsp->dict.dict_len);
++    if (ret != 0) {
++        ret = -1;
++        gf_msg_debug("server-handshake", 0, "failed to serialize reply dict");
++        op_ret = -1;
++        op_errno = -ret;
+     }
++
+     rsp->op_ret = op_ret;
+     rsp->op_errno = gf_errno_to_error(op_errno);
+ 
+diff --git a/xlators/protocol/server/src/server-helpers.c b/xlators/protocol/server/src/server-helpers.c
+index e74a24d..33959b5 100644
+--- a/xlators/protocol/server/src/server-helpers.c
++++ b/xlators/protocol/server/src/server-helpers.c
+@@ -902,7 +902,6 @@ serialize_rsp_direntp(gf_dirent_t *entries, gfs3_readdirp_rsp *rsp)
+     gfs3_dirplist *trav = NULL;
+     gfs3_dirplist *prev = NULL;
+     int ret = -1;
+-    int temp = 0;
+ 
+     GF_VALIDATE_OR_GOTO("server", entries, out);
+     GF_VALIDATE_OR_GOTO("server", rsp, out);
+@@ -923,28 +922,10 @@ serialize_rsp_direntp(gf_dirent_t *entries, gfs3_readdirp_rsp *rsp)
+ 
+         /* if 'dict' is present, pack it */
+         if (entry->dict) {
+-            temp = dict_serialized_length(entry->dict);
+-
+-            if (temp < 0) {
+-                gf_msg(THIS->name, GF_LOG_ERROR, EINVAL, PS_MSG_INVALID_ENTRY,
+-                       "failed to get "
+-                       "serialized length of reply dict");
+-                errno = EINVAL;
+-                trav->dict.dict_len = 0;
+-                goto out;
+-            }
+-            trav->dict.dict_len = temp;
+-
+-            trav->dict.dict_val = GF_CALLOC(1, trav->dict.dict_len,
+-                                            gf_server_mt_rsp_buf_t);
+-            if (!trav->dict.dict_val) {
+-                errno = ENOMEM;
+-                trav->dict.dict_len = 0;
+-                goto out;
+-            }
+-
+-            ret = dict_serialize(entry->dict, trav->dict.dict_val);
+-            if (ret < 0) {
++            ret = dict_allocate_and_serialize(entry->dict,
++                                              (char **)&trav->dict.dict_val,
++                                              &trav->dict.dict_len);
++            if (ret != 0) {
+                 gf_msg(THIS->name, GF_LOG_ERROR, 0, PS_MSG_DICT_SERIALIZE_FAIL,
+                        "failed to serialize reply dict");
+                 errno = -ret;
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0536-dht-Ongoing-IO-is-failed-during-volume-shrink-operat.patch b/SOURCES/0536-dht-Ongoing-IO-is-failed-during-volume-shrink-operat.patch
new file mode 100644
index 0000000..94e0b64
--- /dev/null
+++ b/SOURCES/0536-dht-Ongoing-IO-is-failed-during-volume-shrink-operat.patch
@@ -0,0 +1,102 @@
+From 32281b4b5cf79d0ef6f0c65775bb81093e1ba479 Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <moagrawa@redhat.com>
+Date: Wed, 24 Feb 2021 18:44:12 +0530
+Subject: [PATCH 536/538] dht: Ongoing IO is failed during volume shrink
+ operation (#2188)
+
+In the commit (c878174) we have introduced a check
+to avoid stale layout issue.To avoid a stale layout
+issue dht has set a key along with layout at the time
+of wind a create fop and posix validates the parent
+layout based on the key value. If layout does not match
+it throw and error.In case of volume shrink layout has
+been changed by reabalance daemon and if layout does not
+matches dht is not able to wind a create fop successfully.
+
+Solution: To avoid the issue populate a key only while
+          dht has wind a fop first time. After got an
+          error in 2nd attempt dht takes a lock and then
+          reattempt to wind a fop again.
+
+> Fixes: #2187
+> Change-Id: Ie018386e7823a11eea415496bb226ca032453a55
+> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+> (Cherry pick from commit da6ce622b722f7d12619c5860293faf03f7cd00c
+> Reviewed on upstream link https://github.com/gluster/glusterfs/pull/2188
+
+Bug: 1924044
+Change-Id: I7670dbe2d562b83db0af3753f994653ffdd49591
+Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/228941
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/dht/src/dht-common.c | 41 ++++++++++++++++++++++++++----------
+ 1 file changed, 30 insertions(+), 11 deletions(-)
+
+diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
+index fe1d0ee..7425c1a 100644
+--- a/xlators/cluster/dht/src/dht-common.c
++++ b/xlators/cluster/dht/src/dht-common.c
+@@ -8526,15 +8526,32 @@ dht_create_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this,
+ {
+     dht_local_t *local = NULL;
+     xlator_t *avail_subvol = NULL;
++    int lk_count = 0;
+ 
+     local = frame->local;
+ 
+     if (!dht_is_subvol_filled(this, subvol)) {
+-        gf_msg_debug(this->name, 0, "creating %s on %s", loc->path,
+-                     subvol->name);
+-
+-        dht_set_parent_layout_in_dict(loc, this, local);
+-
++        lk_count = local->lock[0].layout.parent_layout.lk_count;
++        gf_msg_debug(this->name, 0, "creating %s on %s with lock_count %d",
++                     loc->path, subvol->name, lk_count);
++        /*The function dht_set_parent_layout_in_dict sets the layout
++          in dictionary and posix_create validates a layout before
++          creating a file.In case if parent layout does not match
++          with disk layout posix xlator throw an error but in case
++          if volume is shrunk layout has been changed by rebalance daemon
++          so we need to call this function only while a function is calling
++          without taking any lock otherwise we would not able to populate a
++          layout on disk in case if layout has changed.
++        */
++        if (!lk_count) {
++            dht_set_parent_layout_in_dict(loc, this, local);
++        } else {
++            /* Delete a key to avoid layout validate if it was set by
++               previous STACK_WIND attempt when a lock was not taken
++               by dht_create
++            */
++            (void)dict_del_sizen(local->params, GF_PREOP_PARENT_KEY);
++        }
+         STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol,
+                           subvol->fops->create, loc, flags, mode, umask, fd,
+                           params);
+@@ -8554,12 +8571,14 @@ dht_create_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this,
+ 
+             goto out;
+         }
+-
+-        gf_msg_debug(this->name, 0, "creating %s on %s", loc->path,
+-                     subvol->name);
+-
+-        dht_set_parent_layout_in_dict(loc, this, local);
+-
++        lk_count = local->lock[0].layout.parent_layout.lk_count;
++        gf_msg_debug(this->name, 0, "creating %s on %s with lk_count %d",
++                     loc->path, subvol->name, lk_count);
++        if (!lk_count) {
++            dht_set_parent_layout_in_dict(loc, this, local);
++        } else {
++            (void)dict_del_sizen(local->params, GF_PREOP_PARENT_KEY);
++        }
+         STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol,
+                           subvol->fops->create, loc, flags, mode, umask, fd,
+                           params);
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0537-cluster-afr-Fix-race-in-lockinfo-f-getxattr.patch b/SOURCES/0537-cluster-afr-Fix-race-in-lockinfo-f-getxattr.patch
new file mode 100644
index 0000000..dcf0940
--- /dev/null
+++ b/SOURCES/0537-cluster-afr-Fix-race-in-lockinfo-f-getxattr.patch
@@ -0,0 +1,387 @@
+From 7b7ec67680415c22773ebb2a5daacf298b6b1e06 Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <xhernandez@redhat.com>
+Date: Sat, 13 Feb 2021 18:37:32 +0100
+Subject: [PATCH 537/538] cluster/afr: Fix race in lockinfo (f)getxattr
+
+A shared dictionary was updated outside the lock after having updated
+the number of remaining answers. This means that one thread may be
+processing the last answer and unwinding the request before another
+thread completes updating the dict.
+
+    Thread 1                           Thread 2
+
+    LOCK()
+    call_cnt-- (=1)
+    UNLOCK()
+                                       LOCK()
+                                       call_cnt-- (=0)
+                                       UNLOCK()
+                                       update_dict(dict)
+                                       if (call_cnt == 0) {
+                                           STACK_UNWIND(dict);
+                                       }
+    update_dict(dict)
+    if (call_cnt == 0) {
+        STACK_UNWIND(dict);
+    }
+
+The updates from thread 1 are lost.
+
+This patch also reduces the work done inside the locked region and
+reduces code duplication.
+
+Upstream-patch:
+> Upstream-patch-link: https://github.com/gluster/glusterfs/pull/2162
+> Fixes: #2161
+> Change-Id: Idc0d34ab19ea6031de0641f7b05c624d90fac8fa
+> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+
+BUG: 1911292
+Change-Id: Idc0d34ab19ea6031de0641f7b05c624d90fac8fa
+Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/228924
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/afr/src/afr-inode-read.c | 254 ++++++++++++++-----------------
+ 1 file changed, 112 insertions(+), 142 deletions(-)
+
+diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c
+index cf305af..98e195a 100644
+--- a/xlators/cluster/afr/src/afr-inode-read.c
++++ b/xlators/cluster/afr/src/afr-inode-read.c
+@@ -15,6 +15,8 @@
+ #include <stdlib.h>
+ #include <signal.h>
+ 
++#include <urcu/uatomic.h>
++
+ #include <glusterfs/glusterfs.h>
+ #include "afr.h"
+ #include <glusterfs/dict.h>
+@@ -868,188 +870,121 @@ afr_getxattr_quota_size_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+     return 0;
+ }
+ 
+-int32_t
+-afr_getxattr_lockinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                          int32_t op_ret, int32_t op_errno, dict_t *dict,
+-                          dict_t *xdata)
++static int32_t
++afr_update_local_dicts(call_frame_t *frame, dict_t *dict, dict_t *xdata)
+ {
+-    int call_cnt = 0, len = 0;
+-    char *lockinfo_buf = NULL;
+-    dict_t *lockinfo = NULL, *newdict = NULL;
+-    afr_local_t *local = NULL;
++    afr_local_t *local;
++    dict_t *local_dict;
++    dict_t *local_xdata;
++    int32_t ret;
+ 
+-    LOCK(&frame->lock);
+-    {
+-        local = frame->local;
++    local = frame->local;
++    local_dict = NULL;
++    local_xdata = NULL;
+ 
+-        call_cnt = --local->call_count;
++    ret = -ENOMEM;
+ 
+-        if ((op_ret < 0) || (!dict && !xdata)) {
+-            goto unlock;
+-        }
+-
+-        if (xdata) {
+-            if (!local->xdata_rsp) {
+-                local->xdata_rsp = dict_new();
+-                if (!local->xdata_rsp) {
+-                    local->op_ret = -1;
+-                    local->op_errno = ENOMEM;
+-                    goto unlock;
+-                }
+-            }
++    if ((dict != NULL) && (local->dict == NULL)) {
++        local_dict = dict_new();
++        if (local_dict == NULL) {
++            goto done;
+         }
++    }
+ 
+-        if (!dict) {
+-            goto unlock;
++    if ((xdata != NULL) && (local->xdata_rsp == NULL)) {
++        local_xdata = dict_new();
++        if (local_xdata == NULL) {
++            goto done;
+         }
++    }
+ 
+-        op_ret = dict_get_ptr_and_len(dict, GF_XATTR_LOCKINFO_KEY,
+-                                      (void **)&lockinfo_buf, &len);
++    if ((local_dict != NULL) || (local_xdata != NULL)) {
++        /* TODO: Maybe it would be better to preallocate both dicts before
++         *       sending the requests. This way we don't need to use a LOCK()
++         *       here. */
++        LOCK(&frame->lock);
+ 
+-        if (!lockinfo_buf) {
+-            goto unlock;
++        if ((local_dict != NULL) && (local->dict == NULL)) {
++            local->dict = local_dict;
++            local_dict = NULL;
+         }
+ 
+-        if (!local->dict) {
+-            local->dict = dict_new();
+-            if (!local->dict) {
+-                local->op_ret = -1;
+-                local->op_errno = ENOMEM;
+-                goto unlock;
+-            }
++        if ((local_xdata != NULL) && (local->xdata_rsp == NULL)) {
++            local->xdata_rsp = local_xdata;
++            local_xdata = NULL;
+         }
+-    }
+-unlock:
+-    UNLOCK(&frame->lock);
+ 
+-    if (lockinfo_buf != NULL) {
+-        lockinfo = dict_new();
+-        if (lockinfo == NULL) {
+-            local->op_ret = -1;
+-            local->op_errno = ENOMEM;
+-        } else {
+-            op_ret = dict_unserialize(lockinfo_buf, len, &lockinfo);
+-
+-            if (lockinfo && local->dict) {
+-                dict_copy(lockinfo, local->dict);
+-            }
+-        }
+-    }
+-
+-    if (xdata && local->xdata_rsp) {
+-        dict_copy(xdata, local->xdata_rsp);
++        UNLOCK(&frame->lock);
+     }
+ 
+-    if (!call_cnt) {
+-        newdict = dict_new();
+-        if (!newdict) {
+-            local->op_ret = -1;
+-            local->op_errno = ENOMEM;
+-            goto unwind;
++    if (dict != NULL) {
++        if (dict_copy(dict, local->dict) < 0) {
++            goto done;
+         }
++    }
+ 
+-        op_ret = dict_allocate_and_serialize(
+-            local->dict, (char **)&lockinfo_buf, (unsigned int *)&len);
+-        if (op_ret != 0) {
+-            local->op_ret = -1;
+-            goto unwind;
++    if (xdata != NULL) {
++        if (dict_copy(xdata, local->xdata_rsp) < 0) {
++            goto done;
+         }
++    }
+ 
+-        op_ret = dict_set_dynptr(newdict, GF_XATTR_LOCKINFO_KEY,
+-                                 (void *)lockinfo_buf, len);
+-        if (op_ret < 0) {
+-            local->op_ret = -1;
+-            local->op_errno = -op_ret;
+-            goto unwind;
+-        }
++    ret = 0;
+ 
+-    unwind:
+-        AFR_STACK_UNWIND(getxattr, frame, op_ret, op_errno, newdict,
+-                         local->xdata_rsp);
++done:
++    if (local_dict != NULL) {
++        dict_unref(local_dict);
+     }
+ 
+-    dict_unref(lockinfo);
++    if (local_xdata != NULL) {
++        dict_unref(local_xdata);
++    }
+ 
+-    return 0;
++    return ret;
+ }
+ 
+-int32_t
+-afr_fgetxattr_lockinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                           int32_t op_ret, int32_t op_errno, dict_t *dict,
+-                           dict_t *xdata)
++static void
++afr_getxattr_lockinfo_cbk_common(call_frame_t *frame, int32_t op_ret,
++                                 int32_t op_errno, dict_t *dict, dict_t *xdata,
++                                 bool is_fgetxattr)
+ {
+-    int call_cnt = 0, len = 0;
++    int len = 0;
+     char *lockinfo_buf = NULL;
+     dict_t *lockinfo = NULL, *newdict = NULL;
+     afr_local_t *local = NULL;
+ 
+-    LOCK(&frame->lock);
+-    {
+-        local = frame->local;
+-
+-        call_cnt = --local->call_count;
+-
+-        if ((op_ret < 0) || (!dict && !xdata)) {
+-            goto unlock;
+-        }
+-
+-        if (xdata) {
+-            if (!local->xdata_rsp) {
+-                local->xdata_rsp = dict_new();
+-                if (!local->xdata_rsp) {
+-                    local->op_ret = -1;
+-                    local->op_errno = ENOMEM;
+-                    goto unlock;
+-                }
+-            }
+-        }
+-
+-        if (!dict) {
+-            goto unlock;
+-        }
++    local = frame->local;
+ 
++    if ((op_ret >= 0) && (dict != NULL)) {
+         op_ret = dict_get_ptr_and_len(dict, GF_XATTR_LOCKINFO_KEY,
+                                       (void **)&lockinfo_buf, &len);
+-
+-        if (!lockinfo_buf) {
+-            goto unlock;
+-        }
+-
+-        if (!local->dict) {
+-            local->dict = dict_new();
+-            if (!local->dict) {
+-                local->op_ret = -1;
+-                local->op_errno = ENOMEM;
+-                goto unlock;
++        if (lockinfo_buf != NULL) {
++            lockinfo = dict_new();
++            if (lockinfo == NULL) {
++                op_ret = -1;
++            } else {
++                op_ret = dict_unserialize(lockinfo_buf, len, &lockinfo);
+             }
+         }
+     }
+-unlock:
+-    UNLOCK(&frame->lock);
+ 
+-    if (lockinfo_buf != NULL) {
+-        lockinfo = dict_new();
+-        if (lockinfo == NULL) {
+-            local->op_ret = -1;
+-            local->op_errno = ENOMEM;
+-        } else {
+-            op_ret = dict_unserialize(lockinfo_buf, len, &lockinfo);
+-
+-            if (lockinfo && local->dict) {
+-                dict_copy(lockinfo, local->dict);
+-            }
++    if ((op_ret >= 0) && ((lockinfo != NULL) || (xdata != NULL))) {
++        op_ret = afr_update_local_dicts(frame, lockinfo, xdata);
++        if (lockinfo != NULL) {
++            dict_unref(lockinfo);
+         }
+     }
+ 
+-    if (xdata && local->xdata_rsp) {
+-        dict_copy(xdata, local->xdata_rsp);
++    if (op_ret < 0) {
++        local->op_ret = -1;
++        local->op_errno = ENOMEM;
+     }
+ 
+-    if (!call_cnt) {
++    if (uatomic_sub_return(&local->call_count, 1) == 0) {
+         newdict = dict_new();
+         if (!newdict) {
+             local->op_ret = -1;
+-            local->op_errno = ENOMEM;
++            local->op_errno = op_errno = ENOMEM;
+             goto unwind;
+         }
+ 
+@@ -1057,23 +992,58 @@ unlock:
+             local->dict, (char **)&lockinfo_buf, (unsigned int *)&len);
+         if (op_ret != 0) {
+             local->op_ret = -1;
++            local->op_errno = op_errno = ENOMEM;
+             goto unwind;
+         }
+ 
+         op_ret = dict_set_dynptr(newdict, GF_XATTR_LOCKINFO_KEY,
+                                  (void *)lockinfo_buf, len);
+         if (op_ret < 0) {
+-            local->op_ret = -1;
+-            local->op_errno = -op_ret;
++            GF_FREE(lockinfo_buf);
++            local->op_ret = op_ret = -1;
++            local->op_errno = op_errno = -op_ret;
+             goto unwind;
+         }
+ 
+     unwind:
+-        AFR_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, newdict,
+-                         local->xdata_rsp);
++        /* TODO: These unwinds use op_ret and op_errno instead of local->op_ret
++         *       and local->op_errno. This doesn't seem right because any
++         *       failure during processing of each answer could be silently
++         *       ignored. This is kept this was the old behavior and because
++         *       local->op_ret is initialized as -1 and local->op_errno is
++         *       initialized as EUCLEAN, which makes these values useless. */
++        if (is_fgetxattr) {
++            AFR_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, newdict,
++                             local->xdata_rsp);
++        } else {
++            AFR_STACK_UNWIND(getxattr, frame, op_ret, op_errno, newdict,
++                             local->xdata_rsp);
++        }
++
++        if (newdict != NULL) {
++            dict_unref(newdict);
++        }
+     }
++}
++
++static int32_t
++afr_getxattr_lockinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                          int32_t op_ret, int32_t op_errno, dict_t *dict,
++                          dict_t *xdata)
++{
++    afr_getxattr_lockinfo_cbk_common(frame, op_ret, op_errno, dict, xdata,
++                                     false);
+ 
+-    dict_unref(lockinfo);
++    return 0;
++}
++
++static int32_t
++afr_fgetxattr_lockinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                           int32_t op_ret, int32_t op_errno, dict_t *dict,
++                           dict_t *xdata)
++{
++    afr_getxattr_lockinfo_cbk_common(frame, op_ret, op_errno, dict, xdata,
++                                     true);
+ 
+     return 0;
+ }
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0538-afr-fix-coverity-issue-introduced-by-90cefde.patch b/SOURCES/0538-afr-fix-coverity-issue-introduced-by-90cefde.patch
new file mode 100644
index 0000000..de164a3
--- /dev/null
+++ b/SOURCES/0538-afr-fix-coverity-issue-introduced-by-90cefde.patch
@@ -0,0 +1,46 @@
+From 31cd7627ff329a39691239322df3bc88e962ad02 Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <xhernandez@redhat.com>
+Date: Mon, 1 Mar 2021 05:19:39 +0100
+Subject: [PATCH 538/538] afr: fix coverity issue introduced by 90cefde
+
+Fixes coverity issues 1447029 and 1447028.
+
+Backport of:
+> Upstream-patch-link: https://github.com/gluster/glusterfs/pull/2201
+> Updates: #2161
+> Change-Id: I6a564231d6aeb76de20675b7ced5d45eed8c377f
+> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+
+BUG: 1911292
+Change-Id: I6a564231d6aeb76de20675b7ced5d45eed8c377f
+Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/229200
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/afr/src/afr-inode-read.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c
+index 98e195a..d874172 100644
+--- a/xlators/cluster/afr/src/afr-inode-read.c
++++ b/xlators/cluster/afr/src/afr-inode-read.c
+@@ -918,13 +918,13 @@ afr_update_local_dicts(call_frame_t *frame, dict_t *dict, dict_t *xdata)
+     }
+ 
+     if (dict != NULL) {
+-        if (dict_copy(dict, local->dict) < 0) {
++        if (dict_copy(dict, local->dict) == NULL) {
+             goto done;
+         }
+     }
+ 
+     if (xdata != NULL) {
+-        if (dict_copy(xdata, local->xdata_rsp) < 0) {
++        if (dict_copy(xdata, local->xdata_rsp) == NULL) {
+             goto done;
+         }
+     }
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0539-extras-disable-lookup-optimize-in-virt-and-block-gro.patch b/SOURCES/0539-extras-disable-lookup-optimize-in-virt-and-block-gro.patch
new file mode 100644
index 0000000..18f851f
--- /dev/null
+++ b/SOURCES/0539-extras-disable-lookup-optimize-in-virt-and-block-gro.patch
@@ -0,0 +1,62 @@
+From 88523814fe296c9cc9f7619e06210830f59c5edf Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <xhernandez@redhat.com>
+Date: Fri, 12 Mar 2021 10:32:09 +0100
+Subject: [PATCH 539/539] extras: disable lookup-optimize in virt and block
+ groups
+
+lookup-optimize doesn't provide any benefit for virtualized
+environments and gluster-block workloads, but it's known to cause
+corruption in some cases when sharding is also enabled and the volume
+is expanded or shrunk.
+
+For this reason, we disable lookup-optimize by default on those
+environments.
+
+Backport of:
+> Upstream-patch-link: https://github.com/gluster/glusterfs/pull/2254
+> Fixes: #2253
+> Change-Id: I25861aa50b335556a995a9c33318dd3afb41bf71
+> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+
+BUG: 1939372
+Change-Id: I25861aa50b335556a995a9c33318dd3afb41bf71
+Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/231173
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ extras/group-distributed-virt | 1 +
+ extras/group-gluster-block    | 1 +
+ extras/group-virt.example     | 1 +
+ 3 files changed, 3 insertions(+)
+
+diff --git a/extras/group-distributed-virt b/extras/group-distributed-virt
+index a960b76..6da3de0 100644
+--- a/extras/group-distributed-virt
++++ b/extras/group-distributed-virt
+@@ -8,3 +8,4 @@ user.cifs=off
+ client.event-threads=4
+ server.event-threads=4
+ performance.client-io-threads=on
++cluster.lookup-optimize=off
+diff --git a/extras/group-gluster-block b/extras/group-gluster-block
+index 1e39801..b8d3e8d 100644
+--- a/extras/group-gluster-block
++++ b/extras/group-gluster-block
+@@ -25,3 +25,4 @@ features.shard-block-size=64MB
+ user.cifs=off
+ server.allow-insecure=on
+ cluster.choose-local=off
++cluster.lookup-optimize=off
+diff --git a/extras/group-virt.example b/extras/group-virt.example
+index 3a441eb..155f5f5 100644
+--- a/extras/group-virt.example
++++ b/extras/group-virt.example
+@@ -21,3 +21,4 @@ server.tcp-user-timeout=20
+ server.keepalive-time=10
+ server.keepalive-interval=2
+ server.keepalive-count=5
++cluster.lookup-optimize=off
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0540-extras-Disable-write-behind-for-group-samba.patch b/SOURCES/0540-extras-Disable-write-behind-for-group-samba.patch
new file mode 100644
index 0000000..0a89c64
--- /dev/null
+++ b/SOURCES/0540-extras-Disable-write-behind-for-group-samba.patch
@@ -0,0 +1,37 @@
+From 6895b6c67e9c29af3f966b4d9ee5cb40da763d24 Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <moagrawa@redhat.com>
+Date: Wed, 14 Apr 2021 12:38:45 +0530
+Subject: [PATCH 540/540] extras: Disable write-behind for group samba.
+
+when write-behind is enabled with Samba it could be a
+source of data corruption. The translator, while
+processing a write call, immediately returns success but continues
+writing the data to the server in the background. This can cause data
+corruption when two clients relying on Samba to provide data consistency
+are operating on the same file.
+
+> fixes: https://github.com/gluster/glusterfs/issues/2329
+
+Change-Id: I5265056ff315a5f3cd97ea11b18db0831b1b901d
+Solution: Disable write-behind for samba group
+BUG: 1948547
+Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/235876
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ extras/group-samba | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/extras/group-samba b/extras/group-samba
+index eeee6e0..9611a1f 100644
+--- a/extras/group-samba
++++ b/extras/group-samba
+@@ -9,3 +9,4 @@ performance.nl-cache=on
+ performance.nl-cache-timeout=600
+ performance.readdir-ahead=on
+ performance.parallel-readdir=on
++performance.write-behind=off
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0541-glusterd-volgen-Add-functionality-to-accept-any-cust.patch b/SOURCES/0541-glusterd-volgen-Add-functionality-to-accept-any-cust.patch
new file mode 100644
index 0000000..29135df
--- /dev/null
+++ b/SOURCES/0541-glusterd-volgen-Add-functionality-to-accept-any-cust.patch
@@ -0,0 +1,545 @@
+From 23ab7175e64ab4d75fbcb6874008843cc78b65b8 Mon Sep 17 00:00:00 2001
+From: Ashish Pandey <aspandey@redhat.com>
+Date: Fri, 16 Apr 2021 18:48:56 +0530
+Subject: [PATCH 541/542] glusterd-volgen: Add functionality to accept any
+ custom xlator
+
+Add new function which allow users to insert any custom xlators.
+It makes to provide a way to add any processing into file operations.
+
+Users can deploy the plugin(xlator shared object) and integrate it to glusterfsd.
+
+If users want to enable a custom xlator, do the follows:
+
+1. put xlator object(.so file) into "XLATOR_DIR/user/"
+2. set the option user.xlator.<xlator> to the existing xlator-name to specify of the position in graph
+3. restart gluster volume
+
+Options for custom xlator are able to set in "user.xlator.<xlator>.<optkey>".
+
+Backport of :
+>https://github.com/gluster/glusterfs/commit/ea86b664f3b1f54901ce1b7d7fba7d80456f2089
+>Fixes: https://github.com/gluster/glusterfs/issues/1943
+>Change-Id: Ife3ae1514ea474f5dae2897223012f9d04b64674
+>Signed-off-by:Ryo Furuhashi <ryo.furuhashi.nh@hitachi.com>
+>Co-authored-by: Yaniv Kaul <ykaul@redhat.com>
+>Co-authored-by: Xavi Hernandez <xhernandez@users.noreply.github.com>
+
+Change-Id: Ic8f28bfcfde67213eb1092b0ebf4822c874d37bb
+BUG: 1927235
+Signed-off-by: Ashish Pandey <aspandey@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/236830
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
+Reviewed-by: Xavi Hernandez Juan <xhernandez@redhat.com>
+---
+ cli/src/cli-rpc-ops.c                       | 148 ++++++++++++++++++++------
+ cli/src/cli.h                               |   2 -
+ tests/basic/user-xlator.t                   |  65 ++++++++++++
+ tests/env.rc.in                             |   3 +
+ xlators/mgmt/glusterd/src/glusterd-volgen.c | 155 ++++++++++++++++++++++++++++
+ 5 files changed, 342 insertions(+), 31 deletions(-)
+ create mode 100755 tests/basic/user-xlator.t
+
+diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c
+index 4e91265..51b5447 100644
+--- a/cli/src/cli-rpc-ops.c
++++ b/cli/src/cli-rpc-ops.c
+@@ -2269,49 +2269,131 @@ out:
+     return ret;
+ }
+ 
+-char *
+-is_server_debug_xlator(void *myframe)
++/*
++ * returns
++ *   1 : is server debug xlator
++ *   0 : is not server debug xlator
++ *  <0 : error
++ */
++static int
++is_server_debug_xlator(char *key, char *value)
++{
++    if (!key || !value)
++        return -1;
++
++    if (strcmp("debug.trace", key) == 0 ||
++        strcmp("debug.error-gen", key) == 0) {
++        if (strcmp("client", value) == 0)
++            return 0;
++        else
++            return 1;
++    }
++
++    return 0;
++}
++
++/*
++ * returns
++ *   1 : is user xlator
++ *   0 : is not user xlator
++ *  <0 : error
++ */
++static int
++is_server_user_xlator(char *key, char *value)
++{
++    int ret = 0;
++
++    if (!key || !value)
++        return -1;
++
++    ret = fnmatch("user.xlator.*", key, 0);
++    if (ret < 0) {
++        ret = -1;
++        goto out;
++    } else if (ret == FNM_NOMATCH) {
++        ret = 0;
++        goto out;
++    }
++
++    ret = fnmatch("user.xlator.*.*", key, 0);
++    if (ret < 0) {
++        ret = -1;
++        goto out;
++    } else if (ret != FNM_NOMATCH) {  // this is user xlator's option key
++        ret = 0;
++        goto out;
++    }
++
++    ret = 1;
++
++out:
++    return ret;
++}
++
++static int
++added_server_xlator(void *myframe, char **added_xlator)
+ {
+     call_frame_t *frame = NULL;
+     cli_local_t *local = NULL;
+     char **words = NULL;
+     char *key = NULL;
+     char *value = NULL;
+-    char *debug_xlator = NULL;
++    int ret = 0;
+ 
+     frame = myframe;
+     local = frame->local;
+     words = (char **)local->words;
+ 
+     while (*words != NULL) {
+-        if (strstr(*words, "trace") == NULL &&
+-            strstr(*words, "error-gen") == NULL) {
+-            words++;
+-            continue;
+-        }
+-
+         key = *words;
+         words++;
+         value = *words;
+-        if (value == NULL)
++
++        if (!value) {
+             break;
+-        if (strstr(value, "client")) {
+-            words++;
+-            continue;
+-        } else {
+-            if (!(strstr(value, "posix") || strstr(value, "acl") ||
+-                  strstr(value, "locks") || strstr(value, "io-threads") ||
+-                  strstr(value, "marker") || strstr(value, "index"))) {
+-                words++;
+-                continue;
+-            } else {
+-                debug_xlator = gf_strdup(key);
+-                break;
++        }
++
++        ret = is_server_debug_xlator(key, value);
++        if (ret < 0) {
++            gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
++                   "failed to check that debug xlator was added");
++            ret = -1;
++            goto out;
++        }
++
++        if (ret) {
++            *added_xlator = gf_strdup(key);
++            if (!*added_xlator) {
++                gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
++                       "Out of memory");
++                ret = -1;
++                goto out;
++            }
++            break;
++        }
++
++        ret = is_server_user_xlator(key, value);
++        if (ret < 0) {
++            gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
++                   "failed to check that user xlator was added");
++            ret = -1;
++            goto out;
++        }
++
++        if (ret) {
++            *added_xlator = gf_strdup(key);
++            if (!*added_xlator) {
++                gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
++                       "Out of memory");
++                ret = -1;
++                goto out;
+             }
++            break;
+         }
+     }
+ 
+-    return debug_xlator;
++out:
++    return ret;
+ }
+ 
+ int
+@@ -2327,7 +2409,7 @@ gf_cli_set_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
+     char msg[1024] = {
+         0,
+     };
+-    char *debug_xlator = NULL;
++    char *added_xlator = NULL;
+     char tmp_str[512] = {
+         0,
+     };
+@@ -2365,18 +2447,26 @@ gf_cli_set_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
+      * The process has to be restarted. So this is a check from the
+      * volume set option such that if debug xlators such as trace/errorgen
+      * are provided in the set command, warn the user.
++     * volume set option such that if user custom xlators or debug
++     * xlators such as trace/errorgen are provided in the set command,
++     * warn the user.
+      */
+-    debug_xlator = is_server_debug_xlator(myframe);
++    ret = added_server_xlator(myframe, &added_xlator);
++    if (ret < 0) {
++        gf_log("cli", GF_LOG_ERROR,
++               "failed to check that server graph has been changed");
++        goto out;
++    }
+ 
+     if (dict_get_str(dict, "help-str", &help_str) && !msg[0])
+         snprintf(msg, sizeof(msg), "Set volume %s",
+                  (rsp.op_ret) ? "unsuccessful" : "successful");
+-    if (rsp.op_ret == 0 && debug_xlator) {
++    if (rsp.op_ret == 0 && added_xlator) {
+         snprintf(tmp_str, sizeof(tmp_str),
+                  "\n%s translator has been "
+                  "added to the server volume file. Please restart the"
+                  " volume for enabling the translator",
+-                 debug_xlator);
++                 added_xlator);
+     }
+ 
+     if ((global_state->mode & GLUSTER_MODE_XML) && (help_str == NULL)) {
+@@ -2394,7 +2484,7 @@ gf_cli_set_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
+             cli_err("volume set: failed");
+     } else {
+         if (help_str == NULL) {
+-            if (debug_xlator == NULL)
++            if (added_xlator == NULL)
+                 cli_out("volume set: success");
+             else
+                 cli_out("volume set: success%s", tmp_str);
+@@ -2408,7 +2498,7 @@ gf_cli_set_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ out:
+     if (dict)
+         dict_unref(dict);
+-    GF_FREE(debug_xlator);
++    GF_FREE(added_xlator);
+     cli_cmd_broadcast_response(ret);
+     gf_free_xdr_cli_rsp(rsp);
+     return ret;
+diff --git a/cli/src/cli.h b/cli/src/cli.h
+index 7b4f446..b5b69ea 100644
+--- a/cli/src/cli.h
++++ b/cli/src/cli.h
+@@ -502,8 +502,6 @@ cli_xml_output_snapshot(int cmd_type, dict_t *dict, int op_ret, int op_errno,
+ int
+ cli_xml_snapshot_status_single_snap(cli_local_t *local, dict_t *dict,
+                                     char *key);
+-char *
+-is_server_debug_xlator(void *myframe);
+ 
+ int32_t
+ cli_cmd_snapshot_parse(const char **words, int wordcount, dict_t **options,
+diff --git a/tests/basic/user-xlator.t b/tests/basic/user-xlator.t
+new file mode 100755
+index 0000000..a711f9f
+--- /dev/null
++++ b/tests/basic/user-xlator.t
+@@ -0,0 +1,65 @@
++#!/bin/bash
++
++. $(dirname $0)/../include.rc
++. $(dirname $0)/../volume.rc
++
++#### patchy.dev.d-backends-patchy1.vol
++brick=${B0//\//-}
++SERVER_VOLFILE="/var/lib/glusterd/vols/${V0}/${V0}.${H0}.${brick:1}-${V0}1.vol"
++
++cleanup;
++
++TEST mkdir -p $B0/single-brick
++TEST mkdir -p ${GLUSTER_XLATOR_DIR}/user
++
++## deploy dummy user xlator
++TEST cp ${GLUSTER_XLATOR_DIR}/playground/template.so ${GLUSTER_XLATOR_DIR}/user/hoge.so
++
++TEST glusterd
++TEST $CLI volume create $V0 replica 3  $H0:$B0/${V0}{1,2,3,4,5,6};
++TEST $CLI volume set $V0 user.xlator.hoge posix
++TEST grep -q 'user/hoge' ${SERVER_VOLFILE}
++
++TEST $CLI volume set $V0 user.xlator.hoge.opt1 10
++TEST grep -q '"option opt1 10"' ${SERVER_VOLFILE}
++TEST $CLI volume set $V0 user.xlator.hoge.opt2 hogehoge
++TEST grep -q '"option opt2 hogehoge"' ${SERVER_VOLFILE}
++TEST $CLI volume set $V0 user.xlator.hoge.opt3 true
++TEST grep -q '"option opt3 true"' ${SERVER_VOLFILE}
++
++TEST $CLI volume start $V0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}3
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}4
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}5
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}6
++
++TEST $CLI volume set $V0 user.xlator.hoge trash
++TEST grep -q 'user/hoge' ${SERVER_VOLFILE}
++
++TEST $CLI volume stop $V0
++TEST $CLI volume start $V0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}3
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}4
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}5
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}6
++
++TEST ! $CLI volume set $V0 user.xlator.hoge unknown
++TEST grep -q 'user/hoge' ${SERVER_VOLFILE} # When the CLI fails, the volfile is not modified.
++
++TEST $CLI volume stop $V0
++TEST $CLI volume start $V0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}3
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}4
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}5
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}6
++
++#### teardown
++
++TEST rm -f ${GLUSTER_XLATOR_DIR}/user/hoge.so
++cleanup;
+diff --git a/tests/env.rc.in b/tests/env.rc.in
+index c7472a7..1f0ca88 100644
+--- a/tests/env.rc.in
++++ b/tests/env.rc.in
+@@ -40,3 +40,6 @@ export GLUSTER_LIBEXECDIR
+ 
+ RUN_NFS_TESTS=@BUILD_GNFS@
+ export RUN_NFS_TESTS
++
++GLUSTER_XLATOR_DIR=@libdir@/glusterfs/@PACKAGE_VERSION@/xlator
++export GLUSTER_XLATOR_DIR
+\ No newline at end of file
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
+index 1920284..a242b5c 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
+@@ -45,6 +45,11 @@ struct gd_validate_reconf_opts {
+ 
+ extern struct volopt_map_entry glusterd_volopt_map[];
+ 
++struct check_and_add_user_xlator_t {
++    volgen_graph_t *graph;
++    char *volname;
++};
++
+ #define RPC_SET_OPT(XL, CLI_OPT, XLATOR_OPT, ERROR_CMD)                        \
+     do {                                                                       \
+         char *_value = NULL;                                                   \
+@@ -2822,6 +2827,145 @@ out:
+     return ret;
+ }
+ 
++static gf_boolean_t
++check_user_xlator_position(dict_t *dict, char *key, data_t *value,
++                           void *prev_xlname)
++{
++    if (strncmp(key, "user.xlator.", SLEN("user.xlator.")) != 0) {
++        return false;
++    }
++
++    if (fnmatch("user.xlator.*.*", key, 0) == 0) {
++        return false;
++    }
++
++    char *value_str = data_to_str(value);
++    if (!value_str) {
++        return false;
++    }
++
++    if (strcmp(value_str, prev_xlname) == 0) {
++        gf_log("glusterd", GF_LOG_INFO,
++               "found insert position of user-xlator(%s)", key);
++        return true;
++    }
++
++    return false;
++}
++
++static int
++set_user_xlator_option(dict_t *set_dict, char *key, data_t *value, void *data)
++{
++    xlator_t *xl = data;
++    char *optname = strrchr(key, '.') + 1;
++
++    gf_log("glusterd", GF_LOG_DEBUG, "set user xlator option %s = %s", key,
++           value->data);
++
++    return xlator_set_option(xl, optname, strlen(optname), data_to_str(value));
++}
++
++static int
++insert_user_xlator_to_graph(dict_t *set_dict, char *key, data_t *value,
++                            void *action_data)
++{
++    int ret = -1;
++
++    struct check_and_add_user_xlator_t *data = action_data;
++
++    char *xlator_name = strrchr(key, '.') + 1;  // user.xlator.<xlator_name>
++    char *xlator_option_matcher = NULL;
++    char *type = NULL;
++    xlator_t *xl = NULL;
++
++    // convert optkey to xlator type
++    if (gf_asprintf(&type, "user/%s", xlator_name) < 0) {
++        gf_log("glusterd", GF_LOG_ERROR, "failed to generate user-xlator type");
++        goto out;
++    }
++
++    gf_log("glusterd", GF_LOG_INFO, "add user xlator=%s to graph", type);
++
++    xl = volgen_graph_add(data->graph, type, data->volname);
++    if (!xl) {
++        goto out;
++    }
++
++    ret = gf_asprintf(&xlator_option_matcher, "user.xlator.%s.*", xlator_name);
++    if (ret < 0) {
++        gf_log("glusterd", GF_LOG_ERROR,
++               "failed to generate user-xlator option matcher");
++        goto out;
++    }
++
++    dict_foreach_fnmatch(set_dict, xlator_option_matcher,
++                         set_user_xlator_option, xl);
++
++out:
++    if (type)
++        GF_FREE(type);
++    if (xlator_option_matcher)
++        GF_FREE(xlator_option_matcher);
++
++    return ret;
++}
++
++static int
++validate_user_xlator_position(dict_t *this, char *key, data_t *value,
++                              void *unused)
++{
++    int ret = -1;
++    int i = 0;
++
++    if (!value)
++        goto out;
++
++    if (fnmatch("user.xlator.*.*", key, 0) == 0) {
++        ret = 0;
++        goto out;
++    }
++
++    char *value_str = data_to_str(value);
++    if (!value_str)
++        goto out;
++
++    int num_xlators = sizeof(server_graph_table) /
++                      sizeof(server_graph_table[0]);
++    for (i = 0; i < num_xlators; i++) {
++        if (server_graph_table[i].dbg_key &&
++            strcmp(value_str, server_graph_table[i].dbg_key) == 0) {
++            ret = 0;
++            goto out;
++        }
++    }
++
++out:
++    if (ret == -1)
++        gf_log("glusterd", GF_LOG_ERROR, "invalid user xlator position %s = %s",
++               key, value->data);
++
++    return ret;
++}
++
++static int
++check_and_add_user_xl(volgen_graph_t *graph, dict_t *set_dict, char *volname,
++                      char *prev_xlname)
++{
++    if (!prev_xlname)
++        goto out;
++
++    struct check_and_add_user_xlator_t data = {.graph = graph,
++                                               .volname = volname};
++
++    if (dict_foreach_match(set_dict, check_user_xlator_position, prev_xlname,
++                           insert_user_xlator_to_graph, &data) < 0) {
++        return -1;
++    }
++
++out:
++    return 0;
++}
++
+ static int
+ server_graph_builder(volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+                      dict_t *set_dict, void *param)
+@@ -2831,6 +2975,12 @@ server_graph_builder(volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+     char *loglevel = NULL;
+     int i = 0;
+ 
++    if (dict_foreach_fnmatch(set_dict, "user.xlator.*",
++                             validate_user_xlator_position, NULL) < 0) {
++        ret = -EINVAL;
++        goto out;
++    }
++
+     i = sizeof(server_graph_table) / sizeof(server_graph_table[0]) - 1;
+ 
+     while (i >= 0) {
+@@ -2848,6 +2998,11 @@ server_graph_builder(volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+         if (ret)
+             goto out;
+ 
++        ret = check_and_add_user_xl(graph, set_dict, volinfo->volname,
++                                    server_graph_table[i].dbg_key);
++        if (ret)
++            goto out;
++
+         i--;
+     }
+ 
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0542-xlaotrs-mgmt-Fixing-coverity-issue-1445996.patch b/SOURCES/0542-xlaotrs-mgmt-Fixing-coverity-issue-1445996.patch
new file mode 100644
index 0000000..f6e0641
--- /dev/null
+++ b/SOURCES/0542-xlaotrs-mgmt-Fixing-coverity-issue-1445996.patch
@@ -0,0 +1,64 @@
+From f3db0c99faf813e0f2e9ffcf599416555a59df1f Mon Sep 17 00:00:00 2001
+From: Ashish Pandey <aspandey@redhat.com>
+Date: Tue, 9 Feb 2021 16:43:35 +0530
+Subject: [PATCH 542/542] xlaotrs/mgmt: Fixing coverity issue 1445996
+
+Backport of https://github.com/gluster/glusterfs/pull/2148/commits/9785e96e0bdf6e60896570fdf5e4a6976a6f60ba
+
+Fixing "Null pointer dereferences"
+
+BUG: 1927235
+Change-Id: Idbc014e1302d2450f97bccd028681198c0d97424
+Signed-off-by: Ashish Pandey <aspandey@redhat.com>
+Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/237433
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-volgen.c | 12 +++++++-----
+ 1 file changed, 7 insertions(+), 5 deletions(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
+index a242b5c..71aed08 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
+@@ -2916,21 +2916,23 @@ validate_user_xlator_position(dict_t *this, char *key, data_t *value,
+ {
+     int ret = -1;
+     int i = 0;
++    char *value_str = NULL;
+ 
+     if (!value)
+         goto out;
+ 
++    value_str = data_to_str(value);
++    if (!value_str)
++        goto out;
++
+     if (fnmatch("user.xlator.*.*", key, 0) == 0) {
+         ret = 0;
+         goto out;
+     }
+ 
+-    char *value_str = data_to_str(value);
+-    if (!value_str)
+-        goto out;
+-
+     int num_xlators = sizeof(server_graph_table) /
+                       sizeof(server_graph_table[0]);
++
+     for (i = 0; i < num_xlators; i++) {
+         if (server_graph_table[i].dbg_key &&
+             strcmp(value_str, server_graph_table[i].dbg_key) == 0) {
+@@ -2942,7 +2944,7 @@ validate_user_xlator_position(dict_t *this, char *key, data_t *value,
+ out:
+     if (ret == -1)
+         gf_log("glusterd", GF_LOG_ERROR, "invalid user xlator position %s = %s",
+-               key, value->data);
++               key, value_str);
+ 
+     return ret;
+ }
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0543-glusterd-handle-custom-xlator-failure-cases.patch b/SOURCES/0543-glusterd-handle-custom-xlator-failure-cases.patch
new file mode 100644
index 0000000..c6194c7
--- /dev/null
+++ b/SOURCES/0543-glusterd-handle-custom-xlator-failure-cases.patch
@@ -0,0 +1,162 @@
+From 71fc5b7949e00c4448f5ec1291e756b201a70082 Mon Sep 17 00:00:00 2001
+From: Ravishankar N <ravishankar@redhat.com>
+Date: Thu, 29 Apr 2021 18:34:57 +0530
+Subject: [PATCH 543/543] glusterd: handle custom xlator failure cases
+
+Problem-1:
+custom xlator insertion was failing for those xlators in the brick graph
+whose dbg_key was NULL in the server_graph_table. Looking at the git log,
+the dbg_key was added in commit d1397dbd7d6cdbd2d81d5d36d608b6175d449db4
+for inserting debug xlators.
+
+Fix: I think it is fine to define it for all brick xlators below server.
+
+Problem-2:
+In the commit-op phase, glusterd_op_set_volume() updates the volinfo
+dict with the key-value pairs and then proceeds to create the volfiles.
+If any of the steps fail, the volinfo dict retains those key-values,
+until glusterd is restarted or `gluster vol reset $VOLNAME` is issued.
+
+Fix:
+Make a copy of the volinfo dict and if there are any failures in
+proceeding with the set volume logic, restore the dict to its original
+state.
+
+Backport of:
+> Upstream-patch-link: https://github.com/gluster/glusterfs/pull/2371
+> Change-Id: I9010dab33d0139b8e6d603308e331b6d220a4849
+> Updates: #2370
+> Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+
+Change-Id: I9010dab33d0139b8e6d603308e331b6d220a4849
+BUG: 1953901
+Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/239889
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/basic/user-xlator.t                   | 16 ++++++++++++++--
+ xlators/mgmt/glusterd/src/glusterd-op-sm.c  | 16 ++++++++++++++++
+ xlators/mgmt/glusterd/src/glusterd-volgen.c | 14 +++++++-------
+ 3 files changed, 37 insertions(+), 9 deletions(-)
+
+diff --git a/tests/basic/user-xlator.t b/tests/basic/user-xlator.t
+index a711f9f..ed2d831 100755
+--- a/tests/basic/user-xlator.t
++++ b/tests/basic/user-xlator.t
+@@ -35,8 +35,18 @@ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}4
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}5
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}6
+ 
+-TEST $CLI volume set $V0 user.xlator.hoge trash
+-TEST grep -q 'user/hoge' ${SERVER_VOLFILE}
++# Test that the insertion at all positions between server and posix is successful.
++# It is not guaranteed that the brick process will start/work in all positions though.
++TESTS_EXPECTED_IN_LOOP=34
++declare -a brick_side_xlators=("decompounder" "io-stats" "quota" "index" "barrier"
++                               "marker" "selinux" "io-threads" "upcall" "leases"
++                               "read-only" "worm" "locks"  "access-control"
++                               "bitrot-stub" "changelog" "trash")
++for xlator in "${brick_side_xlators[@]}"
++  do
++    TEST_IN_LOOP $CLI volume set $V0 user.xlator.hoge $xlator
++    TEST_IN_LOOP grep -q 'user/hoge' ${SERVER_VOLFILE}
++  done
+ 
+ TEST $CLI volume stop $V0
+ TEST $CLI volume start $V0
+@@ -49,6 +59,8 @@ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}6
+ 
+ TEST ! $CLI volume set $V0 user.xlator.hoge unknown
+ TEST grep -q 'user/hoge' ${SERVER_VOLFILE} # When the CLI fails, the volfile is not modified.
++# User xlator insert failures must not prevent setting other volume options.
++TEST $CLI volume set $V0 storage.reserve 10%
+ 
+ TEST $CLI volume stop $V0
+ TEST $CLI volume start $V0
+diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+index 1e84f5f..893af29 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+@@ -2911,6 +2911,7 @@ glusterd_op_set_volume(dict_t *dict, char **errstr)
+     uint32_t new_op_version = 0;
+     gf_boolean_t quorum_action = _gf_false;
+     glusterd_svc_t *svc = NULL;
++    dict_t *volinfo_dict_orig = NULL;
+ 
+     this = THIS;
+     GF_ASSERT(this);
+@@ -2918,6 +2919,10 @@ glusterd_op_set_volume(dict_t *dict, char **errstr)
+     priv = this->private;
+     GF_ASSERT(priv);
+ 
++    volinfo_dict_orig = dict_new();
++    if (!volinfo_dict_orig)
++        goto out;
++
+     ret = dict_get_int32n(dict, "count", SLEN("count"), &dict_count);
+     if (ret) {
+         gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+@@ -2949,6 +2954,11 @@ glusterd_op_set_volume(dict_t *dict, char **errstr)
+         goto out;
+     }
+ 
++    if (dict_copy(volinfo->dict, volinfo_dict_orig) == NULL) {
++        ret = -ENOMEM;
++        goto out;
++    }
++
+     /* TODO: Remove this once v3.3 compatibility is not required */
+     check_op_version = dict_get_str_boolean(dict, "check-op-version",
+                                             _gf_false);
+@@ -3171,6 +3181,12 @@ out:
+     gf_msg_debug(this->name, 0, "returning %d", ret);
+     if (quorum_action)
+         glusterd_do_quorum_action();
++    if (ret < 0 && count > 1) {
++        if (dict_reset(volinfo->dict) == 0)
++            dict_copy(volinfo_dict_orig, volinfo->dict);
++    }
++    if (volinfo_dict_orig)
++        dict_unref(volinfo_dict_orig);
+     return ret;
+ }
+ 
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
+index 71aed08..aa85bdb 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
+@@ -2706,24 +2706,24 @@ out:
+ static volgen_brick_xlator_t server_graph_table[] = {
+     {brick_graph_add_server, NULL},
+     {brick_graph_add_decompounder, "decompounder"},
+-    {brick_graph_add_io_stats, "NULL"},
++    {brick_graph_add_io_stats, "io-stats"},
+     {brick_graph_add_sdfs, "sdfs"},
+     {brick_graph_add_namespace, "namespace"},
+-    {brick_graph_add_cdc, NULL},
++    {brick_graph_add_cdc, "cdc" },
+     {brick_graph_add_quota, "quota"},
+     {brick_graph_add_index, "index"},
+-    {brick_graph_add_barrier, NULL},
++    {brick_graph_add_barrier, "barrier" },
+     {brick_graph_add_marker, "marker"},
+     {brick_graph_add_selinux, "selinux"},
+     {brick_graph_add_fdl, "fdl"},
+     {brick_graph_add_iot, "io-threads"},
+     {brick_graph_add_upcall, "upcall"},
+     {brick_graph_add_leases, "leases"},
+-    {brick_graph_add_pump, NULL},
+-    {brick_graph_add_ro, NULL},
+-    {brick_graph_add_worm, NULL},
++    {brick_graph_add_pump, "pump" },
++    {brick_graph_add_ro, "read-only" },
++    {brick_graph_add_worm, "worm" },
+     {brick_graph_add_locks, "locks"},
+-    {brick_graph_add_acl, "acl"},
++    {brick_graph_add_acl, "access-control"},
+     {brick_graph_add_bitrot_stub, "bitrot-stub"},
+     {brick_graph_add_changelog, "changelog"},
+ #if USE_GFDB /* changetimerecorder depends on gfdb */
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0544-tests-avoid-empty-paths-in-environment-variables.patch b/SOURCES/0544-tests-avoid-empty-paths-in-environment-variables.patch
new file mode 100644
index 0000000..cb5e80b
--- /dev/null
+++ b/SOURCES/0544-tests-avoid-empty-paths-in-environment-variables.patch
@@ -0,0 +1,86 @@
+From 3eaf937e69fe4219738c93d39af1cc909b1ee3f8 Mon Sep 17 00:00:00 2001
+From: Rinku Kothiya <rkothiya@redhat.com>
+Date: Fri, 23 Apr 2021 09:30:35 +0000
+Subject: [PATCH 544/584] tests: avoid empty paths in environment variables
+
+Many variables containing paths in env.rc.in are defined in a way
+that leave a trailing ':' in the variable when the previous value
+was empty or undefined.
+
+In the particular case of 'LD_PRELOAD_PATH' variable, this causes
+that the system looks for dynamic libraries in the current working
+directory. When this directory is inside a Gluster mount point, a
+significant delay is caused each time a program is run (and testing
+framework can run lots of programs for each test).
+
+This patch prevents that variables containing paths could end with
+a trailing ':'.
+
+Backport of :
+>Upstream-patch-link: https://github.com/gluster/glusterfs/pull/2349
+>Fixes: #2348
+>Change-Id: I669f5a78e14f176c0a58824ba577330989d84769
+>Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+>Signed-off-by: Rinku Kothiya <rkothiya@redhat.com>
+
+Change-Id: Ie903ca443aa4789553ac4687818a7f69c113af41
+Signed-off-by: Rinku Kothiya <rkothiya@redhat.com>
+---
+ tests/env.rc.in | 17 +++++++----------
+ 1 file changed, 7 insertions(+), 10 deletions(-)
+
+diff --git a/tests/env.rc.in b/tests/env.rc.in
+index 1f0ca88..2d8ff0e 100644
+--- a/tests/env.rc.in
++++ b/tests/env.rc.in
+@@ -2,34 +2,31 @@ prefix=@prefix@
+ exec_prefix=@exec_prefix@
+ libdir=@libdir@
+ 
+-PATH=@sbindir@:$PATH
++PATH=@bindir@:@sbindir@${PATH:+:${PATH}}
+ export PATH
+ 
+ GLUSTERD_PIDFILEDIR=@localstatedir@/run/gluster
+ export GLUSTERD_PIDFILEDIR
+ 
+-LD_LIBRARY_PATH=@libdir@:$LD_LIBRARY_PATH
++LD_LIBRARY_PATH=@libdir@${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}
+ export LD_LIBRARY_PATH
+ 
+-LIBRARY_PATH=@libdir@:$LIBRARY_PATH
++LIBRARY_PATH=@libdir@${LIBRARY_PATH:+:${LIBRARY_PATH}}
+ export LIBRARY_PATH
+ 
+-CPATH=@includedir@:$CPATH
++CPATH=@includedir@${CPATH:+:${CPATH}}
+ export CPATH
+ 
+ GLUSTERD_WORKDIR=@GLUSTERD_WORKDIR@
+ export GLUSTERD_WORKDIR
+ 
+-PKG_CONFIG_PATH=@pkgconfigdir@:$PKG_CONFIG_PATH
++PKG_CONFIG_PATH=@pkgconfigdir@${PKG_CONFIG_PATH:+:${PKG_CONFIG_PATH}}
+ export PKG_CONFIG_PATH
+ 
+-PYTHONPATH=@BUILD_PYTHON_SITE_PACKAGES@:$PYTHON_PATH
+-export PYTHONPATH
+-
+ PYTHON=@PYTHON@
+ export PYTHON
+ 
+-PYTHONPATH=@BUILD_PYTHON_SITE_PACKAGES@:$PYTHON_PATH
++PYTHONPATH=@BUILD_PYTHON_SITE_PACKAGES@${PYTHONPATH:+:${PYTHONPATH}}
+ export PYTHONPATH
+ 
+ GLUSTER_CMD_DIR=@sbindir@
+@@ -42,4 +39,4 @@ RUN_NFS_TESTS=@BUILD_GNFS@
+ export RUN_NFS_TESTS
+ 
+ GLUSTER_XLATOR_DIR=@libdir@/glusterfs/@PACKAGE_VERSION@/xlator
+-export GLUSTER_XLATOR_DIR
+\ No newline at end of file
++export GLUSTER_XLATOR_DIR
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0545-tests-Excluded-tests-for-unsupported-components.patch b/SOURCES/0545-tests-Excluded-tests-for-unsupported-components.patch
new file mode 100644
index 0000000..add8025
--- /dev/null
+++ b/SOURCES/0545-tests-Excluded-tests-for-unsupported-components.patch
@@ -0,0 +1,32 @@
+From 6b340470e01dc177767fae990cf19037202140b7 Mon Sep 17 00:00:00 2001
+From: Tamar Shacked <tshacked@redhat.com>
+Date: Mon, 31 May 2021 21:27:41 +0300
+Subject: [PATCH 545/584] tests: Excluded tests for unsupported components
+
+Quota and Tier are depricated from RHGS-3.5.5.
+Stop running regression tests for them.
+
+Label: DOWNSTREAM ONLY
+
+Signed-off-by: Tamar Shacked <tshacked@redhat.com>
+Change-Id: I3ca1aacba9a31129f5e68fcffdd80e69e51f7bcc
+---
+ run-tests.sh | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/run-tests.sh b/run-tests.sh
+index c835d93..5cc18b0 100755
+--- a/run-tests.sh
++++ b/run-tests.sh
+@@ -349,7 +349,7 @@ function run_tests()
+     fi
+ 
+     for t in $(find ${regression_testsdir}/tests -name '*.t' \
+-               | LC_COLLATE=C sort) ; do
++               | egrep -v "tier|quota" | LC_COLLATE=C sort) ; do
+         old_cores=$(ls /*-*.core 2> /dev/null | wc -l)
+         total_tests=$((total_tests+1))
+         if match $t "$@" ; then
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0546-Update-rfc.sh-to-rhgs-3.5.5.patch b/SOURCES/0546-Update-rfc.sh-to-rhgs-3.5.5.patch
new file mode 100644
index 0000000..935f533
--- /dev/null
+++ b/SOURCES/0546-Update-rfc.sh-to-rhgs-3.5.5.patch
@@ -0,0 +1,36 @@
+From 6ff3314f24687c8224a5520f9c4d2b3c39e730b7 Mon Sep 17 00:00:00 2001
+From: Tamar Shacked <tshacked@redhat.com>
+Date: Tue, 1 Jun 2021 13:02:24 +0300
+Subject: [PATCH 546/584] Update rfc.sh to rhgs-3.5.5
+
+Signed-off-by: Tamar Shacked <tshacked@redhat.com>
+Change-Id: Iff543dc77174f983dd39f9fb7cc5005b49594750
+---
+ rfc.sh | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/rfc.sh b/rfc.sh
+index c0559b9..daeff32 100755
+--- a/rfc.sh
++++ b/rfc.sh
+@@ -18,7 +18,7 @@ done
+ shift $((OPTIND-1))
+ 
+ 
+-branch="rhgs-3.5.4";
++branch="rhgs-3.5.5";
+ 
+ set_hooks_commit_msg()
+ {
+@@ -315,7 +315,7 @@ main()
+     if [ -z "${reference}" ]; then
+         $drier git push $ORIGIN HEAD:refs/for/$branch/rfc;
+     else
+-        $drier git push $ORIGIN HEAD:refs/for/$branch/ref-${reference};
++        $drier git push $ORIGIN HEAD:refs/for/$branch;
+     fi
+ }
+ 
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0547-perf-write-behind-Clear-frame-local-on-conflict-erro.patch b/SOURCES/0547-perf-write-behind-Clear-frame-local-on-conflict-erro.patch
new file mode 100644
index 0000000..2bd8e28
--- /dev/null
+++ b/SOURCES/0547-perf-write-behind-Clear-frame-local-on-conflict-erro.patch
@@ -0,0 +1,47 @@
+From 08c57926118b1ab8fa1fcd5b16913ff22d97d065 Mon Sep 17 00:00:00 2001
+From: N Balachandran <nbalacha@redhat.com>
+Date: Wed, 25 Sep 2019 19:50:27 +0530
+Subject: [PATCH 547/584] perf/write-behind: Clear frame->local on conflict
+ error
+
+WB saves the wb_inode in frame->local for the truncate and
+ftruncate fops. This value is not cleared in case of error
+on a conflicting write request. FRAME_DESTROY finds a non-null
+frame->local and tries to free it using mem_put. However,
+wb_inode is allocated using GF_CALLOC, causing the
+process to crash.
+
+credit: vpolakis@gmail.com
+
+Upstream Patch: https://review.gluster.org/#/c/glusterfs/+/23485/
+>Change-Id: I217f61470445775e05145aebe44c814731c1b8c5
+>Fixes: bz#1753592
+>Signed-off-by: N Balachandran <nbalacha@redhat.com>
+
+BUG: 1917488
+Change-Id: I217f61470445775e05145aebe44c814731c1b8c5
+Signed-off-by: Sunil Kumar H G <sheggodu@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244277
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+---
+ xlators/performance/write-behind/src/write-behind.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/xlators/performance/write-behind/src/write-behind.c b/xlators/performance/write-behind/src/write-behind.c
+index 90a0bcf..31ab723 100644
+--- a/xlators/performance/write-behind/src/write-behind.c
++++ b/xlators/performance/write-behind/src/write-behind.c
+@@ -1523,6 +1523,10 @@ __wb_handle_failed_conflict(wb_request_t *req, wb_request_t *conflict,
+              */
+             req->op_ret = -1;
+             req->op_errno = conflict->op_errno;
++            if ((req->stub->fop == GF_FOP_TRUNCATE) ||
++                (req->stub->fop == GF_FOP_FTRUNCATE)) {
++                req->stub->frame->local = NULL;
++            }
+ 
+             list_del_init(&req->todo);
+             list_add_tail(&req->winds, tasks);
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0548-Add-tar-as-dependency-to-geo-rep-rpm-for-RHEL-8.3-an.patch b/SOURCES/0548-Add-tar-as-dependency-to-geo-rep-rpm-for-RHEL-8.3-an.patch
new file mode 100644
index 0000000..aed347c
--- /dev/null
+++ b/SOURCES/0548-Add-tar-as-dependency-to-geo-rep-rpm-for-RHEL-8.3-an.patch
@@ -0,0 +1,49 @@
+From cb7e72bce8b6a46605753b72919c1c839ecb4cc9 Mon Sep 17 00:00:00 2001
+From: root <root@sacharya.remote.csb>
+Date: Thu, 3 Jun 2021 12:08:24 +0530
+Subject: [PATCH 548/584] Add tar as dependency to geo-rep rpm for RHEL 8.3 and
+ above
+
+Reason: from RHEL 8.3, tar is not bundled by default
+
+>Fixes: #1849
+>Signed-off-by: Shwetha K Acharya <sacharya@redhat.com>
+>Change-Id: Ic1424e0550cef6a78e3e9e7b42665ab01016436f
+Upstream Patch: https://github.com/gluster/glusterfs/pull/1850
+
+BUG: 1901468
+Change-Id: Ic1424e0550cef6a78e3e9e7b42665ab01016436f
+Signed-off-by: Shwetha K Acharya <sacharya@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244896
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Srijan Sivakumar <ssivakum@redhat.com>
+---
+ glusterfs.spec.in | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index 2be7677..424f4ab 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -521,6 +521,9 @@ Requires:         python%{_pythonver}-gluster = %{version}-%{release}
+ Requires:         rsync
+ Requires:         util-linux
+ Requires:         %{name}-libs%{?_isa} = %{version}-%{release}
++%if ( 0%{?rhel} && ( ( 0%{?rhel} == 8 && 0%{?rhel_minor_version} >= 3 ) || 0%{?rhel} >= 9 ) )
++Requires:         tar
++%endif
+ # required for setting selinux bools
+ %if ( 0%{?rhel} && 0%{?rhel} >= 8 )
+ Requires(post):      policycoreutils-python-utils
+@@ -1982,6 +1985,8 @@ fi
+ %endif
+ 
+ %changelog
++* Thu Nov 26 2020 Shwetha K Acharya <sacharya@redhat.com>
++- Add tar as dependency to georeplication rpm for RHEL version >= 8.3
+ 
+ * Mon May 11 2020 Sunny Kumar <sunkumar@redhat.com>
+ - added requires policycoreutils-python-utils on rhel8 for geo-replication
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0549-geo-rep-Change-in-attribute-for-getting-function-nam.patch b/SOURCES/0549-geo-rep-Change-in-attribute-for-getting-function-nam.patch
new file mode 100644
index 0000000..b61e5ea
--- /dev/null
+++ b/SOURCES/0549-geo-rep-Change-in-attribute-for-getting-function-nam.patch
@@ -0,0 +1,45 @@
+From f90c13912a9c64e4479b55fee4ba4ac50e509302 Mon Sep 17 00:00:00 2001
+From: schaffung <ssivakum@redhat.com>
+Date: Sat, 9 Jan 2021 15:41:15 +0530
+Subject: [PATCH 549/584] geo-rep : Change in attribute for getting function
+ name in py 3 (#1900)
+
+Issue: The schedule_geo-rep script uses `func_name` to obtain
+the name of the function being referred to but from python3
+onwards, the attribute has been changed to `__name__`.
+
+Code Change:
+ Changing `func_name` to `__name__`.
+
+>Fixes: #1898
+>Signed-off-by: srijan-sivakumar <ssivakum@redhat.com>
+>Change-Id: I4ed69a06cffed9db17c8f8949b8000c74be1d717
+Upstream Patch : https://github.com/gluster/glusterfs/pull/1900
+
+BUG: 1903911
+Change-Id: I4ed69a06cffed9db17c8f8949b8000c74be1d717
+Signed-off-by: srijan-sivakumar <ssivakum@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244570
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Shwetha Acharya <sacharya@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ extras/geo-rep/schedule_georep.py.in | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/extras/geo-rep/schedule_georep.py.in b/extras/geo-rep/schedule_georep.py.in
+index ac93716..9bb3df5 100644
+--- a/extras/geo-rep/schedule_georep.py.in
++++ b/extras/geo-rep/schedule_georep.py.in
+@@ -102,7 +102,7 @@ def cache_output_with_args(func):
+     """
+     def wrapper(*args, **kwargs):
+         global cache_data
+-        key = "_".join([func.func_name] + list(args))
++        key = "_".join([func.__name__] + list(args))
+         if cache_data.get(key, None) is None:
+             cache_data[key] = func(*args, **kwargs)
+ 
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0550-common-ha-stability-fixes-for-ganesha_grace-and-gane.patch b/SOURCES/0550-common-ha-stability-fixes-for-ganesha_grace-and-gane.patch
new file mode 100644
index 0000000..8bc6694
--- /dev/null
+++ b/SOURCES/0550-common-ha-stability-fixes-for-ganesha_grace-and-gane.patch
@@ -0,0 +1,184 @@
+From 053bb9c7356eae82b1089582bb2844388ae4df57 Mon Sep 17 00:00:00 2001
+From: "Kaleb S. KEITHLEY" <kkeithle@redhat.com>
+Date: Wed, 2 Jun 2021 07:49:12 -0400
+Subject: [PATCH 550/584] common-ha: stability fixes for ganesha_grace and
+ ganesha_mon RAs
+
+Include fixes suggested by ClusterHA devs.
+
+1) It turns out that crm_attribute attrs and attrd_updater attrs really
+are one and the same, despite what I was told years ago.
+
+attrs created with crm_attribute ... --lifetime=reboot ... or
+attrd_updater are one and same. As per ClusterHA devs having an attr
+created with crm_attribute ... --lifetime=forever and also
+creating/updating the same attr with attrd_updater is a recipe for
+weird things to happen that will be difficult to debug.
+
+2) using hostname -s or hostname for node names in crm_attribute and
+attrd_updater potentially could use the wrong name if the host has
+been renamed; use ocf_local_nodename() (in ocf-shellfuncs) instead.
+
+https://github.com/gluster/glusterfs/issues/2276
+https://github.com/gluster/glusterfs/pull/2283
+commit 9bd2c697686ec40e2c4f711df961860c8a735baa
+
+Change-Id:If572d396fae9206628714fb2ce00f72e94f2258f
+BUG: 1945143
+Signed-off-by: Kaleb S. KEITHLEY <kkeithle@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244593
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ extras/ganesha/ocf/ganesha_grace | 28 +++++++++---------------
+ extras/ganesha/ocf/ganesha_mon   | 47 ++++++++++++++--------------------------
+ 2 files changed, 26 insertions(+), 49 deletions(-)
+
+diff --git a/extras/ganesha/ocf/ganesha_grace b/extras/ganesha/ocf/ganesha_grace
+index 825f716..edc6fa2 100644
+--- a/extras/ganesha/ocf/ganesha_grace
++++ b/extras/ganesha/ocf/ganesha_grace
+@@ -94,25 +94,21 @@ esac
+ ganesha_grace_start()
+ {
+ 	local rc=${OCF_ERR_GENERIC}
+-	local host=$(hostname -s)
++	local host=$(ocf_local_nodename)
+ 
+-	ocf_log debug "ganesha_grace_start()"
+-	# give ganesha_mon RA a chance to set the crm_attr first
++	ocf_log debug "ganesha_grace_start ${host}"
++	# give ganesha_mon RA a chance to set the attr first
+ 	# I mislike the sleep, but it's not clear that looping
+ 	# with a small sleep is necessarily better
+ 	# start has a 40sec timeout, so a 5sec sleep here is okay
+         sleep 5
+-	attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null)
++	attr=$(attrd_updater --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null)
+         if [ $? -ne 0 ]; then
+-		host=$(hostname)
+-		attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null )
+-                if [ $? -ne 0 ]; then
+-	                ocf_log info "grace start: crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} failed"
+-                fi
++	        ocf_log info "grace start: attrd_updater --query --node=${host} --name=${OCF_RESKEY_grace_active} failed"
+         fi
+ 
+ 	# Three possibilities:
+-	# 1. There is no attribute at all and attr_updater returns
++	# 1. There is no attribute at all and attrd_updater returns
+ 	#    a zero length string. This happens when
+ 	#    ganesha_mon::monitor hasn't run at least once to set
+ 	#    the attribute. The assumption here is that the system
+@@ -164,17 +160,13 @@ ganesha_grace_notify()
+ 
+ ganesha_grace_monitor()
+ {
+-	local host=$(hostname -s)
++	local host=$(ocf_local_nodename)
+ 
+-	ocf_log debug "monitor"
++	ocf_log debug "ganesha_grace monitor ${host}"
+ 
+-	attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null)
++	attr=$(attrd_updater --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null)
+         if [ $? -ne 0 ]; then
+-		host=$(hostname)
+-	        attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null)
+-                if [ $? -ne 0 ]; then
+-	                ocf_log info "crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} failed"
+-                fi
++	        ocf_log info "attrd_updater --query --node=${host} --name=${OCF_RESKEY_grace_active} failed"
+         fi
+ 
+ 	# if there is no attribute (yet), maybe it's because
+diff --git a/extras/ganesha/ocf/ganesha_mon b/extras/ganesha/ocf/ganesha_mon
+index 2b4a9d6..7fbbf70 100644
+--- a/extras/ganesha/ocf/ganesha_mon
++++ b/extras/ganesha/ocf/ganesha_mon
+@@ -124,7 +124,6 @@ ganesha_mon_stop()
+ 
+ ganesha_mon_monitor()
+ {
+-	local host=$(hostname -s)
+ 	local pid_file="/var/run/ganesha.pid"
+ 	local rhel6_pid_file="/var/run/ganesha.nfsd.pid"
+ 	local proc_pid="/proc/"
+@@ -141,31 +140,27 @@ ganesha_mon_monitor()
+ 
+ 	if [ "x${proc_pid}" != "x/proc/" -a -d ${proc_pid} ]; then
+ 
+-		attrd_updater -n ${OCF_RESKEY_ganesha_active} -v 1
++		attrd_updater --name ${OCF_RESKEY_ganesha_active} -v 1
+ 		if [ $? -ne 0 ]; then
+-			ocf_log info "warning: attrd_updater -n ${OCF_RESKEY_ganesha_active} -v 1 failed"
++			ocf_log info "warning: attrd_updater --name ${OCF_RESKEY_ganesha_active} -v 1 failed"
+ 		fi
+ 
+ 		# ganesha_grace (nfs-grace) RA follows grace-active attr
+ 		# w/ constraint location
+-		attrd_updater -n ${OCF_RESKEY_grace_active} -v 1
++		attrd_updater --name ${OCF_RESKEY_grace_active} -v 1
+ 		if [ $? -ne 0 ]; then
+-			ocf_log info "warning: attrd_updater -n ${OCF_RESKEY_grace_active} -v 1 failed"
++			ocf_log info "warning: attrd_updater --name ${OCF_RESKEY_grace_active} -v 1 failed"
+ 		fi
+ 
+ 		# ganesha_mon (nfs-mon) and ganesha_grace (nfs-grace)
+-		# track grace-active crm_attr (attr != crm_attr)
+-		# we can't just use the attr as there's no way to query
+-		# its value in RHEL6 pacemaker
+-
+-		crm_attribute --node=${host} --lifetime=forever --name=${OCF_RESKEY_grace_active} --update=1 2> /dev/null
+-		if [ $? -ne 0 ]; then
+-			host=$(hostname)
+-			crm_attribute --node=${host} --lifetime=forever --name=${OCF_RESKEY_grace_active} --update=1 2> /dev/null
+-			if [ $? -ne 0 ]; then
+-				ocf_log info "mon monitor warning: crm_attribute --node=${host} --lifetime=forever --name=${OCF_RESKEY_grace_active} --update=1 failed"
+-			fi
+-		fi
++		# track grace-active attr.
++		#
++		# Originally we were told that attrs set with attrd_updater
++		# are different/distinct than attrs set with crm_attribute.
++		# Now, years later, we are told that they are the same and
++		# that the values of attrs set with attrd_updater can be
++		# retrieved with crm_attribute. Or with attrd_updater -Q
++		# now that we no longer have to deal with rhel6.
+ 
+ 		return ${OCF_SUCCESS}
+ 	fi
+@@ -182,26 +177,16 @@ ganesha_mon_monitor()
+ 	# the remaining ganesha.nfsds into grace before
+ 	# initiating the VIP fail-over.
+ 
+-	attrd_updater -D -n ${OCF_RESKEY_grace_active}
+-	if [ $? -ne 0 ]; then
+-		ocf_log info "warning: attrd_updater -D -n ${OCF_RESKEY_grace_active} failed"
+-	fi
+-
+-	host=$(hostname -s)
+-	crm_attribute --node=${host} --name=${OCF_RESKEY_grace_active} --update=0 2> /dev/null
++	attrd_updater --delete --name ${OCF_RESKEY_grace_active}
+ 	if [ $? -ne 0 ]; then
+-		host=$(hostname)
+-		crm_attribute --node=${host} --name=${OCF_RESKEY_grace_active} --update=0 2> /dev/null
+-		if [ $? -ne 0 ]; then
+-			ocf_log info "mon monitor warning: crm_attribute --node=${host} --name=${OCF_RESKEY_grace_active} --update=0 failed"
+-		fi
++		ocf_log info "warning: attrd_updater --delete --name ${OCF_RESKEY_grace_active} failed"
+ 	fi
+ 
+ 	sleep ${OCF_RESKEY_grace_delay}
+ 
+-	attrd_updater -D -n ${OCF_RESKEY_ganesha_active}
++	attrd_updater --delete --name ${OCF_RESKEY_ganesha_active}
+ 	if [ $? -ne 0 ]; then
+-		ocf_log info "warning: attrd_updater -D -n ${OCF_RESKEY_ganesha_active} failed"
++		ocf_log info "warning: attrd_updater --delete --name ${OCF_RESKEY_ganesha_active} failed"
+ 	fi
+ 
+ 	return ${OCF_SUCCESS}
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0551-common-ha-ensure-shared_storage-is-mounted-before-se.patch b/SOURCES/0551-common-ha-ensure-shared_storage-is-mounted-before-se.patch
new file mode 100644
index 0000000..e3a107f
--- /dev/null
+++ b/SOURCES/0551-common-ha-ensure-shared_storage-is-mounted-before-se.patch
@@ -0,0 +1,52 @@
+From fcfd40132624df5e888d53b4a8c4ce1cf7087413 Mon Sep 17 00:00:00 2001
+From: "Kaleb S. KEITHLEY" <kkeithle@redhat.com>
+Date: Wed, 2 Jun 2021 07:40:04 -0400
+Subject: [PATCH 551/584] common-ha: ensure shared_storage is mounted before
+ setup (#2296)
+
+If gluster shared-storage isn't mounted, ganesha will fail to start
+
+commit a249b9020d281d0482db0aeb52e8856acd931e02
+https://github.com/gluster/glusterfs/issues/2278
+https://github.com/gluster/glusterfs/pull/2296
+
+Change-Id: I6ed7044ea6b6c61b013ebe17088bfde311b109b7
+BUG: 1918018
+Signed-off-by: Kaleb S. KEITHLEY <kkeithle@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244592
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ extras/ganesha/scripts/ganesha-ha.sh | 13 +++++++++++++
+ 1 file changed, 13 insertions(+)
+
+diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh
+index 491c61d..012084f 100644
+--- a/extras/ganesha/scripts/ganesha-ha.sh
++++ b/extras/ganesha/scripts/ganesha-ha.sh
+@@ -195,9 +195,22 @@ setup_cluster()
+     local servers=${3}
+     local unclean=""
+     local quorum_policy="stop"
++    local dfresult=""
+ 
+     logger "setting up cluster ${name} with the following ${servers}"
+ 
++    # check that shared_storage is mounted
++    dfresult=$(df -T ${HA_VOL_MNT})
++    if [[ -z "${dfresult}" ]]; then
++        logger "gluster shared_storage is not mounted, exiting..."
++        exit 1
++    fi
++
++    if [[ "${dfresult}" != *"fuse.glusterfs"* ]]; then
++        logger "gluster shared_storage is not mounted, exiting..."
++        exit 1
++    fi
++
+     # pcs cluster setup --force ${PCS9OR10_PCS_CNAME_OPTION} ${name} ${servers}
+     pcs cluster setup --force ${PCS9OR10_PCS_CNAME_OPTION} ${name} --enable ${servers}
+     if [ $? -ne 0 ]; then
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0552-cluster-afr-Change-default-self-heal-window-size-to-.patch b/SOURCES/0552-cluster-afr-Change-default-self-heal-window-size-to-.patch
new file mode 100644
index 0000000..41b94cd
--- /dev/null
+++ b/SOURCES/0552-cluster-afr-Change-default-self-heal-window-size-to-.patch
@@ -0,0 +1,67 @@
+From e9e1b0bc6e2deaf44190636ab6826065ed3c0392 Mon Sep 17 00:00:00 2001
+From: Pranith Kumar Karampuri <pranith.karampuri@phonepe.com>
+Date: Wed, 3 Feb 2021 18:10:40 +0530
+Subject: [PATCH 552/584] cluster/afr: Change default self-heal-window-size to
+ 1MB (#2068)
+
+At the moment self-heal-window-size is 128KB. This leads to healing data
+in 128KB chunks. With the growth of data and the avg file sizes
+nowadays, 1MB seems like a better default.
+
+Upstream patch details:
+> https://github.com/gluster/glusterfs/pull/2111
+> Change-Id: I70c42c83b16c7adb53d6b5762969e878477efb5c
+> Fixes: #2067
+> Signed-off-by: Pranith Kumar K <pranith.karampuri@phonepe.com>
+
+BUG: 1946171
+Change-Id: Icd6a5c02ca16a1a6095f7bc10feed8ddc2505f41
+Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244557
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/afr/src/afr-self-heal-data.c | 6 ++++++
+ xlators/cluster/afr/src/afr.c                | 6 +++---
+ 2 files changed, 9 insertions(+), 3 deletions(-)
+
+diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
+index b97c66b..156cb18 100644
+--- a/xlators/cluster/afr/src/afr-self-heal-data.c
++++ b/xlators/cluster/afr/src/afr-self-heal-data.c
+@@ -337,6 +337,12 @@ afr_selfheal_data_do(call_frame_t *frame, xlator_t *this, fd_t *fd, int source,
+     }
+ 
+     block = 128 * 1024 * priv->data_self_heal_window_size;
++    if (HAS_HOLES((&replies[source].poststat))) {
++        /*Reduce the possibility of data-block allocations in case of files
++         * with holes. Correct way to fix it would be to use seek fop while
++         * healing data*/
++        block = 128 * 1024;
++    }
+ 
+     type = afr_data_self_heal_type_get(priv, healed_sinks, source, replies);
+ 
+diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
+index 33fe4d8..0956e5a 100644
+--- a/xlators/cluster/afr/src/afr.c
++++ b/xlators/cluster/afr/src/afr.c
+@@ -910,12 +910,12 @@ struct volume_options options[] = {
+      .type = GF_OPTION_TYPE_INT,
+      .min = 1,
+      .max = 1024,
+-     .default_value = "1",
++     .default_value = "8",
+      .op_version = {1},
+      .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+      .tags = {"replicate"},
+-     .description = "Maximum number blocks per file for which self-heal "
+-                    "process would be applied simultaneously."},
++     .description = "Maximum number of 128KB blocks per file for which "
++                    "self-heal process would be applied simultaneously."},
+     {.key = {"metadata-self-heal"},
+      .type = GF_OPTION_TYPE_BOOL,
+      .default_value = "off",
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0553-cluster-ec-Change-self-heal-window-size-to-4MiB-by-d.patch b/SOURCES/0553-cluster-ec-Change-self-heal-window-size-to-4MiB-by-d.patch
new file mode 100644
index 0000000..2144845
--- /dev/null
+++ b/SOURCES/0553-cluster-ec-Change-self-heal-window-size-to-4MiB-by-d.patch
@@ -0,0 +1,46 @@
+From 1fa01865eb9bf6a1113669c262fc526ef11f61f2 Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <xhernandez@users.noreply.github.com>
+Date: Sat, 6 Feb 2021 01:53:28 +0100
+Subject: [PATCH 553/584] cluster/ec: Change self-heal-window-size to 4MiB by
+ default (#2071)
+
+The current block size used for self-heal by default is 128 KiB. This
+requires a significant amount of management requests for a very small
+portion of data healed.
+
+With this patch the block size is increased to 4 MiB. For a standard
+EC volume configuration of 4+2, this means that each healed block of
+a file will update 1 MiB on each brick.
+
+Upstream patch details:
+> https://github.com/gluster/glusterfs/pull/2071
+> Change-Id: Ifeec4a2d54988017d038085720513c121b03445b
+> Updates: #2067
+> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+
+BUG: 1946171
+Change-Id: I9e3eed2d83c9de54242e6161b2e3951c2f6f8000
+Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244558
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/ec/src/ec.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
+index 4118c3b..a930089 100644
+--- a/xlators/cluster/ec/src/ec.c
++++ b/xlators/cluster/ec/src/ec.c
+@@ -1644,7 +1644,7 @@ struct volume_options options[] = {
+      .type = GF_OPTION_TYPE_INT,
+      .min = 1,
+      .max = 1024,
+-     .default_value = "1",
++     .default_value = "32",
+      .op_version = {GD_OP_VERSION_3_11_0},
+      .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+      .tags = {"disperse"},
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0554-dht-fix-rebalance-of-sparse-files.patch b/SOURCES/0554-dht-fix-rebalance-of-sparse-files.patch
new file mode 100644
index 0000000..935303b
--- /dev/null
+++ b/SOURCES/0554-dht-fix-rebalance-of-sparse-files.patch
@@ -0,0 +1,245 @@
+From 2cb90b7798fa469f2d7d938ae88733eb1962d63d Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <xhernandez@gmail.com>
+Date: Fri, 9 Apr 2021 18:13:30 +0200
+Subject: [PATCH 554/584] dht: fix rebalance of sparse files
+
+Current implementation of rebalance for sparse files has a bug that,
+in some cases, causes a read of 0 bytes from the source subvolume.
+Posix xlator doesn't allow 0 byte reads and fails them with EINVAL,
+which causes rebalance to abort the migration.
+
+This patch implements a more robust way of finding data segments in
+a sparse file that avoids 0 byte reads, allowing the file to be
+migrated successfully.
+
+Backport of:
+> Upstream-patch: https://github.com/gluster/glusterfs/pull/2318
+> Fixes: #2317
+> Change-Id: Iff168dda2fb0f2edf716b21eb04cc2cc8ac3915c
+> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+
+BUG: 1957641
+Change-Id: Iff168dda2fb0f2edf716b21eb04cc2cc8ac3915c
+Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244551
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/bugs/distribute/issue-2317.t      |  29 ++++++++
+ tests/volume.rc                         |   4 ++
+ xlators/cluster/dht/src/dht-rebalance.c | 116 +++++++++++++++++---------------
+ 3 files changed, 93 insertions(+), 56 deletions(-)
+ create mode 100755 tests/bugs/distribute/issue-2317.t
+
+diff --git a/tests/bugs/distribute/issue-2317.t b/tests/bugs/distribute/issue-2317.t
+new file mode 100755
+index 0000000..e29d003
+--- /dev/null
++++ b/tests/bugs/distribute/issue-2317.t
+@@ -0,0 +1,29 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++
++TESTS_EXPECTED_IN_LOOP=126
++
++cleanup
++
++TEST glusterd
++TEST ${CLI} volume create ${V0} replica 3 ${H0}:/$B0/${V0}_{0..2}
++TEST ${CLI} volume start ${V0}
++
++TEST ${GFS} --volfile-server ${H0} --volfile-id ${V0} ${M0}
++
++# Create several files to make sure that at least some of them should be
++# migrated by rebalance.
++for i in {0..63}; do
++    TEST dd if=/dev/urandom of=${M0}/file.${i} bs=4k count=1
++    TEST dd if=/dev/urandom of=${M0}/file.${i} bs=4k count=1 seek=128
++done
++
++TEST ${CLI} volume add-brick ${V0} ${H0}:${B0}/${V0}_{3..5}
++TEST ${CLI} volume rebalance ${V0} start force
++EXPECT_WITHIN ${REBALANCE_TIMEOUT} "completed" rebalance_status_field "${V0}"
++
++EXPECT "^0$" rebalance_failed_field "${V0}"
++
++cleanup
+diff --git a/tests/volume.rc b/tests/volume.rc
+index 9a002d9..f5dd0b1 100644
+--- a/tests/volume.rc
++++ b/tests/volume.rc
+@@ -75,6 +75,10 @@ function rebalance_status_field {
+         $CLI volume rebalance $1 status | awk '{print $7}' | sed -n 3p
+ }
+ 
++function rebalance_failed_field {
++        $CLI volume rebalance $1 status | awk '{print $5}' | sed -n 3p
++}
++
+ function fix-layout_status_field {
+         #The fix-layout status can be up to 3 words, (ex:'fix-layout in progress'), hence the awk-print $2 thru $4.
+         #But if the status is less than 3 words, it also prints the next field i.e the run_time_in_secs.(ex:'completed 3.00').
+diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
+index 072896d..eab7558 100644
+--- a/xlators/cluster/dht/src/dht-rebalance.c
++++ b/xlators/cluster/dht/src/dht-rebalance.c
+@@ -1024,6 +1024,46 @@ out:
+     return ret;
+ }
+ 
++static int32_t
++dht_rebalance_sparse_segment(xlator_t *subvol, fd_t *fd, off_t *offset,
++                             size_t *size)
++{
++    off_t hole;
++    int32_t ret;
++
++    do {
++        ret = syncop_seek(subvol, fd, *offset, GF_SEEK_DATA, NULL, offset);
++        if (ret >= 0) {
++            /* Starting at the offset of the last data segment, find the
++             * next hole. After a data segment there should always be a
++             * hole, since EOF is considered a hole. */
++            ret = syncop_seek(subvol, fd, *offset, GF_SEEK_HOLE, NULL, &hole);
++        }
++
++        if (ret < 0) {
++            if (ret == -ENXIO) {
++                /* This can happen if there are no more data segments (i.e.
++                 * the offset is at EOF), or there was a data segment but the
++                 * file has been truncated to a smaller size between both
++                 * seek requests. In both cases we are done. The file doesn't
++                 * contain more data. */
++                ret = 0;
++            }
++            return ret;
++        }
++
++        /* It could happen that at the same offset we detected data in the
++         * first seek, there could be a hole in the second seek if user is
++         * modifying the file concurrently. In this case we need to find a
++         * new data segment to migrate. */
++    } while (hole <= *offset);
++
++    /* Calculate the total size of the current data block */
++    *size = hole - *offset;
++
++    return 1;
++}
++
+ static int
+ __dht_rebalance_migrate_data(xlator_t *this, gf_defrag_info_t *defrag,
+                              xlator_t *from, xlator_t *to, fd_t *src, fd_t *dst,
+@@ -1032,8 +1072,6 @@ __dht_rebalance_migrate_data(xlator_t *this, gf_defrag_info_t *defrag,
+     int ret = 0;
+     int count = 0;
+     off_t offset = 0;
+-    off_t data_offset = 0;
+-    off_t hole_offset = 0;
+     struct iovec *vector = NULL;
+     struct iobref *iobref = NULL;
+     uint64_t total = 0;
+@@ -1048,71 +1086,36 @@ __dht_rebalance_migrate_data(xlator_t *this, gf_defrag_info_t *defrag,
+     while (total < ia_size) {
+         /* This is a regular file - read it sequentially */
+         if (!hole_exists) {
+-            read_size = (((ia_size - total) > DHT_REBALANCE_BLKSIZE)
+-                             ? DHT_REBALANCE_BLKSIZE
+-                             : (ia_size - total));
++            data_block_size = ia_size - total;
+         } else {
+             /* This is a sparse file - read only the data segments in the file
+              */
+ 
+             /* If the previous data block is fully copied, find the next data
+-             * segment
+-             * starting at the offset of the last read and written byte,  */
++             * segment starting at the offset of the last read and written
++             * byte. */
+             if (data_block_size <= 0) {
+-                ret = syncop_seek(from, src, offset, GF_SEEK_DATA, NULL,
+-                                  &data_offset);
+-                if (ret) {
+-                    if (ret == -ENXIO)
+-                        ret = 0; /* No more data segments */
+-                    else
+-                        *fop_errno = -ret; /* Error occurred */
+-
++                ret = dht_rebalance_sparse_segment(from, src, &offset,
++                                                   &data_block_size);
++                if (ret <= 0) {
++                    *fop_errno = -ret;
+                     break;
+                 }
+-
+-                /* If the position of the current data segment is greater than
+-                 * the position of the next hole, find the next hole in order to
+-                 * calculate the length of the new data segment */
+-                if (data_offset > hole_offset) {
+-                    /* Starting at the offset of the last data segment, find the
+-                     * next hole */
+-                    ret = syncop_seek(from, src, data_offset, GF_SEEK_HOLE,
+-                                      NULL, &hole_offset);
+-                    if (ret) {
+-                        /* If an error occurred here it's a real error because
+-                         * if the seek for a data segment was successful then
+-                         * necessarily another hole must exist (EOF is a hole)
+-                         */
+-                        *fop_errno = -ret;
+-                        break;
+-                    }
+-
+-                    /* Calculate the total size of the current data block */
+-                    data_block_size = hole_offset - data_offset;
+-                }
+-            } else {
+-                /* There is still data in the current segment, move the
+-                 * data_offset to the position of the last written byte */
+-                data_offset = offset;
+             }
+-
+-            /* Calculate how much data needs to be read and written. If the data
+-             * segment's length is bigger than DHT_REBALANCE_BLKSIZE, read and
+-             * write DHT_REBALANCE_BLKSIZE data length and the rest in the
+-             * next iteration(s) */
+-            read_size = ((data_block_size > DHT_REBALANCE_BLKSIZE)
+-                             ? DHT_REBALANCE_BLKSIZE
+-                             : data_block_size);
+-
+-            /* Calculate the remaining size of the data block - maybe there's no
+-             * need to seek for data in the next iteration */
+-            data_block_size -= read_size;
+-
+-            /* Set offset to the offset of the data segment so read and write
+-             * will have the correct position */
+-            offset = data_offset;
+         }
+ 
++        /* Calculate how much data needs to be read and written. If the data
++         * segment's length is bigger than DHT_REBALANCE_BLKSIZE, read and
++         * write DHT_REBALANCE_BLKSIZE data length and the rest in the
++         * next iteration(s) */
++        read_size = ((data_block_size > DHT_REBALANCE_BLKSIZE)
++                         ? DHT_REBALANCE_BLKSIZE
++                         : data_block_size);
++
++        /* Calculate the remaining size of the data block - maybe there's no
++         * need to seek for data in the next iteration */
++        data_block_size -= read_size;
++
+         ret = syncop_readv(from, src, read_size, offset, 0, &vector, &count,
+                            &iobref, NULL, NULL, NULL);
+ 
+@@ -1177,6 +1180,7 @@ __dht_rebalance_migrate_data(xlator_t *this, gf_defrag_info_t *defrag,
+         iobref = NULL;
+         vector = NULL;
+     }
++
+     if (iobref)
+         iobref_unref(iobref);
+     GF_FREE(vector);
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0555-geo-rep-Improve-handling-of-gfid-mismatches.patch b/SOURCES/0555-geo-rep-Improve-handling-of-gfid-mismatches.patch
new file mode 100644
index 0000000..85b19e0
--- /dev/null
+++ b/SOURCES/0555-geo-rep-Improve-handling-of-gfid-mismatches.patch
@@ -0,0 +1,79 @@
+From f2d3866e617d25ea62cda01afddc81ef0db3356e Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <xhernandez@redhat.com>
+Date: Tue, 4 May 2021 22:39:03 +0200
+Subject: [PATCH 555/584] geo-rep: Improve handling of gfid mismatches
+
+In some circumstances geo-replication can detect mismatching gfids
+between primary and secondary. These entries are fixed in an iterative
+way, assuming that after a fix, a previously failing entry could
+succeed.
+
+Previous code was trying to fix them in a loop that can be executed
+up to 10 times. If some entry cannot be fixed after 10 attempts, it's
+discarded. These fixes are very slow, so trying to do them many times
+causes geo-replication to get out of sync.
+
+To minimize the number of iterations done, this patch checks if the
+number of entries and failures remains constant after each iteration.
+If they are constant, it means that nothing else can be fixed, so it
+makes no sense to do more iterations. This reduces the number of
+iterations to 2 or 3 in most of the cases, improving geo-replication
+performance.
+
+Backport of:
+> Upstream-patch: https://github.com/gluster/glusterfs/pull/2389
+> Fixes: #2388
+> Change-Id: I6d9a623a60045694e1a832195e1dc1fb9e88ae54
+> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+
+BUG: 1957191
+Change-Id: I6d9a623a60045694e1a832195e1dc1fb9e88ae54
+Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244550
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ geo-replication/syncdaemon/master.py | 18 +++++++++++++++++-
+ 1 file changed, 17 insertions(+), 1 deletion(-)
+
+diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py
+index 98637e7..aef9373 100644
+--- a/geo-replication/syncdaemon/master.py
++++ b/geo-replication/syncdaemon/master.py
+@@ -1224,9 +1224,11 @@ class GMasterChangelogMixin(GMasterCommon):
+ 
+             if gconf.get("gfid-conflict-resolution"):
+                 count = 0
++                num_entries = len(entries)
++                num_failures = len(failures)
+                 if failures:
+                     logging.info(lf('Entry ops failed with gfid mismatch',
+-                                count=len(failures)))
++                                    count=num_failures))
+                 while failures and count < self.MAX_OE_RETRIES:
+                     count += 1
+                     self.handle_entry_failures(failures, entries)
+@@ -1237,6 +1239,20 @@ class GMasterChangelogMixin(GMasterCommon):
+                                      "gfid mismatch")
+                         break
+ 
++                    # If this iteration has not removed any entry or reduced
++                    # the number of failures compared to the previous one, we
++                    # don't need to keep iterating because we'll get the same
++                    # result in all other attempts.
++                    if ((num_entries == len(entries)) and
++                        (num_failures == len(failures))):
++                        logging.info(lf("No more gfid mismatches can be fixed",
++                                        entries=num_entries,
++                                        failures=num_failures))
++                        break
++
++                    num_entries = len(entries)
++                    num_failures = len(failures)
++
+             self.log_failures(failures, 'gfid', gauxpfx(), 'ENTRY')
+             self.status.dec_value("entry", len(entries))
+ 
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0556-dht-don-t-ignore-xdata-in-fgetxattr.patch b/SOURCES/0556-dht-don-t-ignore-xdata-in-fgetxattr.patch
new file mode 100644
index 0000000..0cf3545
--- /dev/null
+++ b/SOURCES/0556-dht-don-t-ignore-xdata-in-fgetxattr.patch
@@ -0,0 +1,52 @@
+From a7f6ad0c617a36414c8232cb692471703923b16d Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <xhernandez@users.noreply.github.com>
+Date: Tue, 19 Jan 2021 18:03:33 +0100
+Subject: [PATCH 556/584] dht: don't ignore xdata in fgetxattr
+
+DHT was passing NULL for xdata in fgetxattr() request, ignoring any
+data sent by upper xlators.
+
+This patch fixes the issue by sending the received xdata to lower
+xlators, as it's currently done for getxattr().
+
+Backport of:
+> Upstream-patch: https://github.com/gluster/glusterfs/pull/2020
+> Fixes: #1991
+> Change-Id: If3d3f1f2ce6215f3b1acc46480e133cb4294eaec
+> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+
+BUG: 1919132
+Change-Id: If3d3f1f2ce6215f3b1acc46480e133cb4294eaec
+Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244538
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/dht/src/dht-common.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
+index 7425c1a..0773092 100644
+--- a/xlators/cluster/dht/src/dht-common.c
++++ b/xlators/cluster/dht/src/dht-common.c
+@@ -5262,7 +5262,7 @@ dht_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *key,
+ 
+         if (!ret && key && local->mds_subvol && dht_match_xattr(key)) {
+             STACK_WIND(frame, dht_mds_getxattr_cbk, local->mds_subvol,
+-                       local->mds_subvol->fops->fgetxattr, fd, key, NULL);
++                       local->mds_subvol->fops->fgetxattr, fd, key, xdata);
+ 
+             return 0;
+         }
+@@ -5274,7 +5274,7 @@ dht_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *key,
+     for (i = 0; i < cnt; i++) {
+         subvol = layout->list[i].xlator;
+         STACK_WIND(frame, dht_getxattr_cbk, subvol, subvol->fops->fgetxattr, fd,
+-                   key, NULL);
++                   key, xdata);
+     }
+     return 0;
+ 
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0557-cluster-dht-Fix-stack-overflow-in-readdir-p.patch b/SOURCES/0557-cluster-dht-Fix-stack-overflow-in-readdir-p.patch
new file mode 100644
index 0000000..2add6cb
--- /dev/null
+++ b/SOURCES/0557-cluster-dht-Fix-stack-overflow-in-readdir-p.patch
@@ -0,0 +1,306 @@
+From ba57b043db1e19196cf860baeeeb1acfc9985cd2 Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <xhernandez@users.noreply.github.com>
+Date: Wed, 24 Feb 2021 15:04:23 +0100
+Subject: [PATCH 557/584] cluster/dht: Fix stack overflow in readdir(p)
+
+When parallel-readdir is enabled, readdir(p) requests sent by DHT can be
+immediately processed and answered in the same thread before the call to
+STACK_WIND_COOKIE() completes.
+
+This means that the readdir(p) cbk is processed synchronously. In some
+cases it may decide to send another readdir(p) request, which causes a
+recursive call.
+
+When some special conditions happen and the directories are big, it's
+possible that the number of nested calls is so high that the process
+crashes because of a stack overflow.
+
+This patch fixes this by not allowing nested readdir(p) calls. When a
+nested call is detected, it's queued instead of sending it. The queued
+request is processed when the current call finishes by the top level
+stack function.
+
+Backport of 3 patches:
+> Upstream-patch: https://github.com/gluster/glusterfs/pull/2170
+> Fixes: #2169
+> Change-Id: Id763a8a51fb3c3314588ec7c162f649babf33099
+> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+
+> Upstream-patch: https://github.com/gluster/glusterfs/pull/2202
+> Updates: #2169
+> Change-Id: I97e73c0aae74fc5d80c975f56f2f7a64e3e1ae95
+> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+
+> Upstream-patch: https://github.com/gluster/glusterfs/pull/2242
+> Fixes: #2239
+> Change-Id: I6b2e48e87c85de27fad67a12d97abd91fa27c0c1
+> Signed-off-by: Pranith Kumar K <pranith.karampuri@phonepe.com>
+
+BUG: 1798897
+Change-Id: Id763a8a51fb3c3314588ec7c162f649babf33099
+Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244549
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/bugs/distribute/issue-2169.t   |  33 +++++++++
+ xlators/cluster/dht/src/dht-common.c | 134 ++++++++++++++++++++++++++++++++---
+ xlators/cluster/dht/src/dht-common.h |   5 ++
+ 3 files changed, 162 insertions(+), 10 deletions(-)
+ create mode 100755 tests/bugs/distribute/issue-2169.t
+
+diff --git a/tests/bugs/distribute/issue-2169.t b/tests/bugs/distribute/issue-2169.t
+new file mode 100755
+index 0000000..91fa72a
+--- /dev/null
++++ b/tests/bugs/distribute/issue-2169.t
+@@ -0,0 +1,33 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++
++cleanup
++
++TEST glusterd
++TEST ${CLI} volume create ${V0} ${H0}:/$B0/${V0}_0
++TEST ${CLI} volume set ${V0} readdir-ahead on
++TEST ${CLI} volume set ${V0} parallel-readdir on
++TEST ${CLI} volume start ${V0}
++
++TEST ${GFS} --volfile-server ${H0} --volfile-id ${V0} ${M0}
++
++TEST mkdir -p ${M0}/d/d.{000..999}
++
++EXPECT_WITHIN ${UMOUNT_TIMEOUT} "Y" force_umount ${M0}
++
++TEST ${CLI} volume add-brick ${V0} ${H0}:${B0}/${V0}_{1..7}
++
++TEST ${GFS} --volfile-server ${H0} --volfile-id ${V0} ${M0}
++
++ls -l ${M0}/d/ | wc -l
++
++EXPECT_WITHIN ${UMOUNT_TIMEOUT} "Y" force_umount ${M0}
++TEST ${GFS} --volfile-server ${H0} --volfile-id ${V0} ${M0}
++
++ls -l ${M0}/d/ | wc -l
++
++TEST ls ${M0}/d
++
++cleanup
+diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
+index 0773092..ce0fbbf 100644
+--- a/xlators/cluster/dht/src/dht-common.c
++++ b/xlators/cluster/dht/src/dht-common.c
+@@ -24,8 +24,15 @@
+ #include <libgen.h>
+ #include <signal.h>
+ 
++#include <urcu/uatomic.h>
++
+ int run_defrag = 0;
+ 
++static int
++dht_rmdir_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                       int op_ret, int op_errno, gf_dirent_t *entries,
++                       dict_t *xdata);
++
+ int
+ dht_link2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret);
+ 
+@@ -6681,6 +6688,94 @@ out:
+     return;
+ }
+ 
++/* Execute a READDIR request if no other request is in progress. Otherwise
++ * queue it to be executed when the current one finishes.
++ *
++ * When parallel-readdir is enabled and directory contents are cached, the
++ * callback of a readdirp will be called before returning from STACK_WIND.
++ * If the returned contents are not useful for DHT, and the buffer is not
++ * yet full, a nested readdirp request will be sent. This means that there
++ * will be many recursive calls. In the worst case there might be a stack
++ * overflow.
++ *
++ * To avoid this, we only wind a request if no other request is being wound.
++ * If there's another request, we simple store the values for the next call.
++ * When the thread processing the current wind completes it, it will take
++ * the new arguments and send the request from the top level stack. */
++static void
++dht_queue_readdir(call_frame_t *frame, xlator_t *xl, off_t offset,
++                  fop_readdir_cbk_t cbk)
++{
++    dht_local_t *local;
++    int32_t queue;
++    xlator_t *this = NULL;
++
++    local = frame->local;
++    this = frame->this;
++
++    local->queue_xl = xl;
++    local->queue_offset = offset;
++
++    if (uatomic_add_return(&local->queue, 1) == 1) {
++        /* If we are here it means that we are the first one to send a
++         * readdir request. Any attempt to send more readdir requests will
++         * find local->queue > 1, so it won't do anything. The needed data
++         * to send the request has been stored into local->queue_*.
++         *
++         * Note: this works because we will only have 1 additional request
++         *       at most (the one called by the cbk function) while we are
++         *       processing another readdir. */
++        do {
++            STACK_WIND_COOKIE(frame, cbk, local->queue_xl, local->queue_xl,
++                              local->queue_xl->fops->readdir, local->fd,
++                              local->size, local->queue_offset, local->xattr);
++
++            /* If a new readdirp request has been added before returning
++             * from winding, we process it. */
++        } while ((queue = uatomic_sub_return(&local->queue, 1)) > 0);
++
++        if (queue < 0) {
++            /* A negative value means that an unwind has been called before
++             * returning from the previous wind. This means that 'local' is
++             * not needed anymore and must be destroyed. */
++            dht_local_wipe(this, local);
++        }
++    }
++}
++
++/* Execute a READDIRP request if no other request is in progress. Otherwise
++ * queue it to be executed when the current one finishes. */
++static void
++dht_queue_readdirp(call_frame_t *frame, xlator_t *xl, off_t offset,
++                   fop_readdirp_cbk_t cbk)
++{
++    dht_local_t *local;
++    int32_t queue;
++    xlator_t *this = NULL;
++
++    local = frame->local;
++    this = frame->this;
++
++    local->queue_xl = xl;
++    local->queue_offset = offset;
++
++    /* Check dht_queue_readdir() comments for an explanation of this. */
++    if (uatomic_add_return(&local->queue, 1) == 1) {
++        do {
++            STACK_WIND_COOKIE(frame, cbk, local->queue_xl, local->queue_xl,
++                              local->queue_xl->fops->readdirp, local->fd,
++                              local->size, local->queue_offset, local->xattr);
++        } while ((queue = uatomic_sub_return(&local->queue, 1)) > 0);
++
++        if (queue < 0) {
++            /* A negative value means that an unwind has been called before
++             * returning from the previous wind. This means that 'local' is
++             * not needed anymore and must be destroyed. */
++            dht_local_wipe(this, local);
++        }
++    }
++}
++
+ /* Posix returns op_errno = ENOENT to indicate that there are no more
+  * entries
+  */
+@@ -6950,9 +7045,8 @@ done:
+             }
+         }
+ 
+-        STACK_WIND_COOKIE(frame, dht_readdirp_cbk, next_subvol, next_subvol,
+-                          next_subvol->fops->readdirp, local->fd, local->size,
+-                          next_offset, local->xattr);
++        dht_queue_readdirp(frame, next_subvol, next_offset, dht_readdirp_cbk);
++
+         return 0;
+     }
+ 
+@@ -6970,6 +7064,17 @@ unwind:
+     if (prev != dht_last_up_subvol(this))
+         op_errno = 0;
+ 
++    /* If we are inside a recursive call (or not inside a recursive call but
++     * the cbk is completed before the wind returns), local->queue will be 1.
++     * In this case we cannot destroy 'local' because it will be needed by
++     * the caller of STACK_WIND. In this case, we decrease the value to let
++     * the caller know that the operation has terminated and it must destroy
++     * 'local'. If local->queue 0, we can destroy it here because there are
++     * no other users. */
++    if (uatomic_sub_return(&local->queue, 1) >= 0) {
++        frame->local = NULL;
++    }
++
+     DHT_STACK_UNWIND(readdirp, frame, op_ret, op_errno, &entries, NULL);
+ 
+     gf_dirent_free(&entries);
+@@ -7071,9 +7176,8 @@ done:
+             goto unwind;
+         }
+ 
+-        STACK_WIND_COOKIE(frame, dht_readdir_cbk, next_subvol, next_subvol,
+-                          next_subvol->fops->readdir, local->fd, local->size,
+-                          next_offset, NULL);
++        dht_queue_readdir(frame, next_subvol, next_offset, dht_readdir_cbk);
++
+         return 0;
+     }
+ 
+@@ -7089,6 +7193,17 @@ unwind:
+     if (prev != dht_last_up_subvol(this))
+         op_errno = 0;
+ 
++    /* If we are inside a recursive call (or not inside a recursive call but
++     * the cbk is completed before the wind returns), local->queue will be 1.
++     * In this case we cannot destroy 'local' because it will be needed by
++     * the caller of STACK_WIND. In this case, we decrease the value to let
++     * the caller know that the operation has terminated and it must destroy
++     * 'local'. If local->queue 0, we can destroy it here because there are
++     * no other users. */
++    if (uatomic_sub_return(&local->queue, 1) >= 0) {
++        frame->local = NULL;
++    }
++
+     if (!skip_hashed_check) {
+         DHT_STACK_UNWIND(readdir, frame, op_ret, op_errno, &entries, NULL);
+         gf_dirent_free(&entries);
+@@ -7096,6 +7211,7 @@ unwind:
+     } else {
+         DHT_STACK_UNWIND(readdir, frame, op_ret, op_errno, orig_entries, NULL);
+     }
++
+     return 0;
+ }
+ 
+@@ -7172,11 +7288,9 @@ dht_do_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+             }
+         }
+ 
+-        STACK_WIND_COOKIE(frame, dht_readdirp_cbk, xvol, xvol,
+-                          xvol->fops->readdirp, fd, size, yoff, local->xattr);
++        dht_queue_readdirp(frame, xvol, yoff, dht_readdirp_cbk);
+     } else {
+-        STACK_WIND_COOKIE(frame, dht_readdir_cbk, xvol, xvol,
+-                          xvol->fops->readdir, fd, size, yoff, local->xattr);
++        dht_queue_readdir(frame, xvol, yoff, dht_readdir_cbk);
+     }
+ 
+     return 0;
+diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
+index 92f1b89..132b3b3 100644
+--- a/xlators/cluster/dht/src/dht-common.h
++++ b/xlators/cluster/dht/src/dht-common.h
+@@ -369,6 +369,11 @@ struct dht_local {
+ 
+     dht_dir_transaction_t lock[2], *current;
+ 
++    /* for nested readdirs */
++    xlator_t *queue_xl;
++    off_t queue_offset;
++    int32_t queue;
++
+     /* inodelks during filerename for backward compatibility */
+     dht_lock_t **rename_inodelk_backward_compatible;
+     int rename_inodelk_bc_count;
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0558-afr-fix-directory-entry-count.patch b/SOURCES/0558-afr-fix-directory-entry-count.patch
new file mode 100644
index 0000000..4134f77
--- /dev/null
+++ b/SOURCES/0558-afr-fix-directory-entry-count.patch
@@ -0,0 +1,238 @@
+From 9bf6986f8ea3edd9de3d2629404f7ab11c1597de Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <xhernandez@redhat.com>
+Date: Tue, 9 Mar 2021 00:24:07 +0100
+Subject: [PATCH 558/584] afr: fix directory entry count
+
+AFR may hide some existing entries from a directory when reading it
+because they are generated internally for private management. However
+the returned number of entries from readdir() function is not updated
+accordingly. So it may return a number higher than the real entries
+present in the gf_dirent list.
+
+This may cause unexpected behavior of clients, including gfapi which
+incorrectly assumes that there was an entry when the list was actually
+empty.
+
+This patch also makes the check in gfapi more robust to avoid similar
+issues that could appear in the future.
+
+Backport of:
+> Upstream-patch: https://github.com/gluster/glusterfs/pull/2233
+> Fixes: #2232
+> Change-Id: I81ba3699248a53ebb0ee4e6e6231a4301436f763
+> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+
+BUG: 1927411
+Change-Id: I81ba3699248a53ebb0ee4e6e6231a4301436f763
+Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244535
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ api/src/glfs-fops.c                    |  3 +-
+ tests/bugs/replicate/issue-2232.c      | 85 ++++++++++++++++++++++++++++++++++
+ tests/bugs/replicate/issue-2232.t      | 34 ++++++++++++++
+ xlators/cluster/afr/src/afr-dir-read.c | 11 +++--
+ 4 files changed, 129 insertions(+), 4 deletions(-)
+ create mode 100644 tests/bugs/replicate/issue-2232.c
+ create mode 100644 tests/bugs/replicate/issue-2232.t
+
+diff --git a/api/src/glfs-fops.c b/api/src/glfs-fops.c
+index 6dc3b66..821d250 100644
+--- a/api/src/glfs-fops.c
++++ b/api/src/glfs-fops.c
+@@ -3748,8 +3748,9 @@ glfd_entry_refresh(struct glfs_fd *glfd, int plus)
+         errno = 0;
+     }
+ 
+-    if (ret > 0)
++    if ((ret > 0) && !list_empty(&glfd->entries)) {
+         glfd->next = list_entry(glfd->entries.next, gf_dirent_t, list);
++    }
+ 
+     gf_dirent_free(&old);
+ out:
+diff --git a/tests/bugs/replicate/issue-2232.c b/tests/bugs/replicate/issue-2232.c
+new file mode 100644
+index 0000000..df547c2
+--- /dev/null
++++ b/tests/bugs/replicate/issue-2232.c
+@@ -0,0 +1,85 @@
++
++#include <stdio.h>
++#include <errno.h>
++#include <stdlib.h>
++#include <errno.h>
++#include <string.h>
++#include <glusterfs/api/glfs.h>
++
++int main(int argc, char **argv)
++{
++    char log[128];
++    struct dirent entry;
++    struct dirent *ent;
++    glfs_xreaddirp_stat_t *xstat;
++    int ret, flags;
++
++    if (argc != 3) {
++        fprintf(stderr, "Syntax: %s <hostname> <volume>\n", argv[0]);
++        exit(1);
++    }
++    char *hostname = argv[1];
++    char *volname = argv[2];
++
++    glfs_t *fs = glfs_new(volname);
++    if (!fs) {
++        fprintf(stderr, "glfs_new() failed\n");
++        exit(1);
++    }
++
++    ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
++    if (ret < 0) {
++        fprintf(stderr, "glfs_set_volfile_server() failed\n");
++        return ret;
++    }
++
++    sprintf(log, "/tmp/logs-%d.log", getpid());
++    ret = glfs_set_logging(fs, log, 9);
++    if (ret < 0) {
++        fprintf(stderr, "glfs_set_logging() failed\n");
++        return ret;
++    }
++
++    ret = glfs_init(fs);
++    if (ret < 0) {
++        fprintf(stderr, "glfs_init() failed\n");
++        return ret;
++    }
++
++    glfs_fd_t *fd = glfs_opendir(fs, "/");
++    if (fd == NULL) {
++        fprintf(stderr, "glfs_opendir() failed\n");
++        return 1;
++    }
++
++    flags = GFAPI_XREADDIRP_STAT | GFAPI_XREADDIRP_HANDLE;
++    xstat = NULL;
++    while ((ret = glfs_xreaddirplus_r(fd, flags, &xstat, &entry, &ent)) > 0) {
++        if (xstat != NULL) {
++            glfs_free(xstat);
++        }
++        if ((strcmp(ent->d_name, ".") == 0) ||
++            (strcmp(ent->d_name, "..") == 0)) {
++            xstat = NULL;
++            continue;
++        }
++        if ((xstat == NULL) || ((ret & GFAPI_XREADDIRP_HANDLE) == 0)) {
++            fprintf(stderr, "glfs_xreaddirplus_r() failed: %s\n",
++                    strerror(errno));
++            return 1;
++        }
++
++        xstat = NULL;
++    }
++
++    if (ret < 0) {
++        fprintf(stderr, "glfs_xreaddirplus_r() failed\n");
++        return ret;
++    }
++
++    glfs_close(fd);
++
++    glfs_fini(fs);
++
++    return ret;
++}
+diff --git a/tests/bugs/replicate/issue-2232.t b/tests/bugs/replicate/issue-2232.t
+new file mode 100644
+index 0000000..66a41e0
+--- /dev/null
++++ b/tests/bugs/replicate/issue-2232.t
+@@ -0,0 +1,34 @@
++#!/bin/bash
++
++. $(dirname "${0}")/../../include.rc
++. $(dirname "${0}")/../../volume.rc
++
++cleanup;
++TEST gcc $(dirname "${0}")/issue-2232.c -o $(dirname "${0}")/issue-2232 -lgfapi
++TEST glusterd
++TEST pidof glusterd
++
++TEST $CLI volume create ${V0} replica 3 ${H0}:${B0}/${V0}{0..2}
++
++# Create a fake .glusterfs-anonymous-inode-... entry
++ANONINO=".glusterfs-anonymous-inode-aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"
++TEST mkdir ${B0}/${V0}{0..2}/${ANONINO}
++gfid="$(uuidgen)"
++hex="0x$(echo "${gfid}" | tr -d '-')"
++TEST assign_gfid "${hex}" "${B0}/${V0}0/${ANONINO}"
++TEST assign_gfid "${hex}" "${B0}/${V0}1/${ANONINO}"
++TEST assign_gfid "${hex}" "${B0}/${V0}2/${ANONINO}"
++TEST mkdir -p "${B0}/${V0}0/.glusterfs/${gfid:0:2}/${gfid:2:2}"
++TEST mkdir -p "${B0}/${V0}1/.glusterfs/${gfid:0:2}/${gfid:2:2}"
++TEST mkdir -p "${B0}/${V0}2/.glusterfs/${gfid:0:2}/${gfid:2:2}"
++TEST ln -s "../../00/00/00000000-0000-0000-0000-000000000001/${ANONINO}" "${B0}/${V0}0/.glusterfs/${gfid:0:2}/${gfid:2:2}/${gfid}"
++TEST ln -s "../../00/00/00000000-0000-0000-0000-000000000001/${ANONINO}" "${B0}/${V0}1/.glusterfs/${gfid:0:2}/${gfid:2:2}/${gfid}"
++TEST ln -s "../../00/00/00000000-0000-0000-0000-000000000001/${ANONINO}" "${B0}/${V0}2/.glusterfs/${gfid:0:2}/${gfid:2:2}/${gfid}"
++
++TEST $CLI volume start ${V0}
++
++TEST $(dirname "${0}")/issue-2232 ${H0} ${V0}
++
++TEST rm -f $(dirname $0)/issue-2232
++
++cleanup
+diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c
+index d64b6a9..a98f8df 100644
+--- a/xlators/cluster/afr/src/afr-dir-read.c
++++ b/xlators/cluster/afr/src/afr-dir-read.c
+@@ -157,7 +157,7 @@ afr_validate_read_subvol(inode_t *inode, xlator_t *this, int par_read_subvol)
+     return 0;
+ }
+ 
+-static void
++static int32_t
+ afr_readdir_transform_entries(call_frame_t *frame, gf_dirent_t *subvol_entries,
+                               int subvol, gf_dirent_t *entries, fd_t *fd)
+ {
+@@ -168,6 +168,7 @@ afr_readdir_transform_entries(call_frame_t *frame, gf_dirent_t *subvol_entries,
+     afr_private_t *priv = NULL;
+     gf_boolean_t need_heal = _gf_false;
+     gf_boolean_t validate_subvol = _gf_false;
++    int32_t count = 0;
+ 
+     this = THIS;
+     priv = this->private;
+@@ -184,6 +185,7 @@ afr_readdir_transform_entries(call_frame_t *frame, gf_dirent_t *subvol_entries,
+ 
+         list_del_init(&entry->list);
+         list_add_tail(&entry->list, &entries->list);
++        count++;
+ 
+         if (!validate_subvol)
+             continue;
+@@ -197,6 +199,8 @@ afr_readdir_transform_entries(call_frame_t *frame, gf_dirent_t *subvol_entries,
+             }
+         }
+     }
++
++    return count;
+ }
+ 
+ int32_t
+@@ -222,8 +226,9 @@ afr_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+     }
+ 
+     if (op_ret >= 0)
+-        afr_readdir_transform_entries(frame, subvol_entries, (long)cookie,
+-                                      &entries, local->fd);
++        op_ret = afr_readdir_transform_entries(frame, subvol_entries,
++                                               (long)cookie, &entries,
++                                               local->fd);
+ 
+     AFR_STACK_UNWIND(readdir, frame, op_ret, op_errno, &entries, xdata);
+ 
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0559-afr-make-fsync-post-op-aware-of-inodelk-count-2273.patch b/SOURCES/0559-afr-make-fsync-post-op-aware-of-inodelk-count-2273.patch
new file mode 100644
index 0000000..91add36
--- /dev/null
+++ b/SOURCES/0559-afr-make-fsync-post-op-aware-of-inodelk-count-2273.patch
@@ -0,0 +1,163 @@
+From 2b6e6c234dffa72c9f2af747908b1e1f29080698 Mon Sep 17 00:00:00 2001
+From: Ravishankar N <ravishankar@redhat.com>
+Date: Thu, 25 Mar 2021 11:52:13 +0530
+Subject: [PATCH 559/584] afr: make fsync post-op aware of inodelk count
+ (#2273)
+
+Problem:
+Since commit bd540db1e, eager-locking was enabled for fsync. But on
+certain VM workloads wit sharding enabled, shard xlator keeps sending
+fsync on the base shard. This can cause blocked inodelks from other
+clients (including shd) to time out due to call bail.
+
+Fix:
+Make afr fsync aware of inodelk count and not delay post-op + unlock
+when inodelk count > 1, just like writev.
+
+Code is restructured so that any fd based AFR_DATA_TRANSACTION can be made
+aware by setting GLUSTERFS_INODELK_DOM_COUNT in xdata request.
+
+Note: We do not know yet why VMs go in to paused state because of the
+blocked inodelks but this patch should be a first step in reducing the
+occurence.
+
+Upstream patch details:
+> https://github.com/gluster/glusterfs/pull/2273/
+> Updates: #2198
+> Change-Id: Ib91ebdd3101d590c326e69c829cf9335003e260b
+> Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+
+BUG: 1943467
+Change-Id: Id407ca54007e3bbb206a1d9431ebaf89a2167f74
+Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244516
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/afr/src/afr-inode-write.c | 40 ++++++++++++++++++-------------
+ xlators/features/locks/src/posix.c        |  1 +
+ 2 files changed, 24 insertions(+), 17 deletions(-)
+
+diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
+index df82b6e..962a7b1 100644
+--- a/xlators/cluster/afr/src/afr-inode-write.c
++++ b/xlators/cluster/afr/src/afr-inode-write.c
+@@ -42,6 +42,7 @@ __afr_inode_write_finalize(call_frame_t *frame, xlator_t *this)
+     struct iatt *stbuf = NULL;
+     afr_local_t *local = NULL;
+     afr_private_t *priv = NULL;
++    afr_lock_t *lock = NULL;
+     afr_read_subvol_args_t args = {
+         0,
+     };
+@@ -50,6 +51,12 @@ __afr_inode_write_finalize(call_frame_t *frame, xlator_t *this)
+     priv = this->private;
+     GF_VALIDATE_OR_GOTO(this->name, local->inode, out);
+ 
++    if (local->update_num_inodelks &&
++        local->transaction.type == AFR_DATA_TRANSACTION) {
++        lock = &local->inode_ctx->lock[local->transaction.type];
++        lock->num_inodelks = local->num_inodelks;
++    }
++
+     /*This code needs to stay till DHT sends fops on linked
+      * inodes*/
+     if (!inode_is_linked(local->inode)) {
+@@ -134,6 +141,7 @@ __afr_inode_write_fill(call_frame_t *frame, xlator_t *this, int child_index,
+ {
+     afr_local_t *local = NULL;
+     afr_private_t *priv = NULL;
++    int num_inodelks = 0;
+ 
+     local = frame->local;
+     priv = this->private;
+@@ -146,8 +154,16 @@ __afr_inode_write_fill(call_frame_t *frame, xlator_t *this, int child_index,
+ 
+     local->replies[child_index].op_ret = op_ret;
+     local->replies[child_index].op_errno = op_errno;
+-    if (xdata)
++    if (xdata) {
+         local->replies[child_index].xdata = dict_ref(xdata);
++        if (dict_get_int32_sizen(xdata, GLUSTERFS_INODELK_COUNT,
++                                 &num_inodelks) == 0) {
++            if (num_inodelks > local->num_inodelks) {
++                local->num_inodelks = num_inodelks;
++                local->update_num_inodelks = _gf_true;
++            }
++        }
++    }
+ 
+     if (op_ret >= 0) {
+         if (prebuf)
+@@ -284,7 +300,6 @@ afr_inode_write_fill(call_frame_t *frame, xlator_t *this, int child_index,
+     afr_local_t *local = frame->local;
+     uint32_t open_fd_count = 0;
+     uint32_t write_is_append = 0;
+-    int32_t num_inodelks = 0;
+ 
+     LOCK(&frame->lock);
+     {
+@@ -306,15 +321,6 @@ afr_inode_write_fill(call_frame_t *frame, xlator_t *this, int child_index,
+             local->open_fd_count = open_fd_count;
+             local->update_open_fd_count = _gf_true;
+         }
+-
+-        ret = dict_get_int32_sizen(xdata, GLUSTERFS_INODELK_COUNT,
+-                                   &num_inodelks);
+-        if (ret < 0)
+-            goto unlock;
+-        if (num_inodelks > local->num_inodelks) {
+-            local->num_inodelks = num_inodelks;
+-            local->update_num_inodelks = _gf_true;
+-        }
+     }
+ unlock:
+     UNLOCK(&frame->lock);
+@@ -324,7 +330,6 @@ void
+ afr_process_post_writev(call_frame_t *frame, xlator_t *this)
+ {
+     afr_local_t *local = NULL;
+-    afr_lock_t *lock = NULL;
+ 
+     local = frame->local;
+ 
+@@ -343,11 +348,6 @@ afr_process_post_writev(call_frame_t *frame, xlator_t *this)
+ 
+     if (local->update_open_fd_count)
+         local->inode_ctx->open_fd_count = local->open_fd_count;
+-    if (local->update_num_inodelks &&
+-        local->transaction.type == AFR_DATA_TRANSACTION) {
+-        lock = &local->inode_ctx->lock[local->transaction.type];
+-        lock->num_inodelks = local->num_inodelks;
+-    }
+ }
+ 
+ int
+@@ -2516,6 +2516,12 @@ afr_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+     if (!local->xdata_req)
+         goto out;
+ 
++    if (dict_set_str_sizen(local->xdata_req, GLUSTERFS_INODELK_DOM_COUNT,
++                           this->name)) {
++        op_errno = ENOMEM;
++        goto out;
++    }
++
+     local->fd = fd_ref(fd);
+     ret = afr_set_inode_local(this, local, fd->inode);
+     if (ret)
+diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c
+index cdd1ff7..22ef5b8 100644
+--- a/xlators/features/locks/src/posix.c
++++ b/xlators/features/locks/src/posix.c
+@@ -4943,6 +4943,7 @@ struct xlator_fops fops = {
+     .rchecksum = pl_rchecksum,
+     .statfs = pl_statfs,
+     .fsyncdir = pl_fsyncdir,
++    .fsync = pl_fsync,
+     .readdir = pl_readdir,
+     .symlink = pl_symlink,
+     .link = pl_link,
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0560-posix-Avoid-dict_del-logs-in-posix_is_layout_stale-w.patch b/SOURCES/0560-posix-Avoid-dict_del-logs-in-posix_is_layout_stale-w.patch
new file mode 100644
index 0000000..cccac36
--- /dev/null
+++ b/SOURCES/0560-posix-Avoid-dict_del-logs-in-posix_is_layout_stale-w.patch
@@ -0,0 +1,73 @@
+From e56605d5808b41335026a5470fa10f5e5b5389f3 Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <moagrawal@redhat.com>
+Date: Mon, 6 Apr 2020 21:58:03 +0530
+Subject: [PATCH 560/584] posix: Avoid dict_del logs in posix_is_layout_stale
+ while key is NULL
+
+Problem: The key "GF_PREOP_PARENT_KEY" has been populated by dht and
+         for non-distribute volume like 1x3 key is not populated so
+         posix_is_layout stale throw a message while a file is created
+
+Solution: To avoid a log put a condition before delete a key
+
+Upstream patch details:
+> https://review.gluster.org/#/c/glusterfs/+/24297/
+> Change-Id: I813ee7960633e7f9f5e9ad2f42f288053d9eb71f
+> Fixes: #1150
+> Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
+
+BUG: 1942816
+Change-Id: I746a2619989265f3bc9bb648c4b8e4bbefaedc56
+Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244925
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/bugs/glusterd/brick-mux-validation.t | 4 ++--
+ xlators/storage/posix/src/posix-helpers.c  | 5 +++--
+ 2 files changed, 5 insertions(+), 4 deletions(-)
+
+diff --git a/tests/bugs/glusterd/brick-mux-validation.t b/tests/bugs/glusterd/brick-mux-validation.t
+index 03a4768..61b0455 100644
+--- a/tests/bugs/glusterd/brick-mux-validation.t
++++ b/tests/bugs/glusterd/brick-mux-validation.t
+@@ -24,7 +24,7 @@ TEST $CLI volume create $V0 $H0:$B0/${V0}{1..3}
+ TEST $CLI volume start $V0
+ 
+ EXPECT 1 count_brick_processes
+-EXPECT 1 count_brick_pids
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_pids
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 online_brick_count
+ 
+ pkill gluster
+@@ -101,4 +101,4 @@ TEST $CLI_IGNORE_PARTITION volume reset-brick $V1 $H0:$B0/${V1}1 $H0:$B0/${V1}1
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT 6 online_brick_count
+ EXPECT 1 count_brick_processes
+ 
+-cleanup;
+\ No newline at end of file
++cleanup;
+diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
+index 110d383..16351d8 100644
+--- a/xlators/storage/posix/src/posix-helpers.c
++++ b/xlators/storage/posix/src/posix-helpers.c
+@@ -3596,13 +3596,14 @@ posix_is_layout_stale(dict_t *xdata, char *par_path, xlator_t *this)
+     op_ret = dict_get_str_sizen(xdata, GF_PREOP_PARENT_KEY, &xattr_name);
+     if (xattr_name == NULL) {
+         op_ret = 0;
+-        goto out;
++        return is_stale;
+     }
+ 
+     arg_data = dict_get(xdata, xattr_name);
+     if (!arg_data) {
+         op_ret = 0;
+-        goto out;
++        dict_del_sizen(xdata, GF_PREOP_PARENT_KEY);
++        return is_stale;
+     }
+ 
+     size = sys_lgetxattr(par_path, xattr_name, value_buf,
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0561-cluster-ec-Inform-failure-when-some-bricks-are-unava.patch b/SOURCES/0561-cluster-ec-Inform-failure-when-some-bricks-are-unava.patch
new file mode 100644
index 0000000..4f191cc
--- /dev/null
+++ b/SOURCES/0561-cluster-ec-Inform-failure-when-some-bricks-are-unava.patch
@@ -0,0 +1,202 @@
+From 488a5aa4932842334e2749224e9c39f8b6fd379c Mon Sep 17 00:00:00 2001
+From: Ashish Pandey <aspandey@redhat.com>
+Date: Wed, 20 May 2020 11:30:17 +0530
+Subject: [PATCH 561/584] cluster/ec: Inform failure when some bricks are
+ unavailable.
+
+Provide proper information about failure when a fop
+fails on some of the brick.
+Also provide information about parent fop and
+the map of the bricks on which it is failing.
+
+Upstream patch details:
+>Change-Id: If812739617df65cd146c8e667fbacff653717248
+>updates #1434
+>Signed-off-by: Ashish Pandey <aspandey@redhat.com>
+>https://review.gluster.org/#/c/glusterfs/+/24858/
+
+Change-Id: I3549d637e7345f05f21ac1c0e8106973c69d1be9
+BUG: 1908635
+Signed-off-by: Ashish Pandey <aspandey@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244926
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/ec/src/ec-common.c | 76 +++++++++++++++++++++++---------------
+ xlators/cluster/ec/src/ec.c        | 14 ++++++-
+ 2 files changed, 58 insertions(+), 32 deletions(-)
+
+diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
+index e3f8769..a9624d8 100644
+--- a/xlators/cluster/ec/src/ec-common.c
++++ b/xlators/cluster/ec/src/ec-common.c
+@@ -316,17 +316,19 @@ ec_check_status(ec_fop_data_t *fop)
+         }
+     }
+ 
+-    gf_msg(fop->xl->name, GF_LOG_WARNING, 0, EC_MSG_OP_FAIL_ON_SUBVOLS,
+-           "Operation failed on %d of %d subvolumes.(up=%s, mask=%s, "
+-           "remaining=%s, good=%s, bad=%s, %s)",
+-           gf_bits_count(ec->xl_up & ~(fop->remaining | fop->good)), ec->nodes,
+-           ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
+-           ec_bin(str2, sizeof(str2), fop->mask, ec->nodes),
+-           ec_bin(str3, sizeof(str3), fop->remaining, ec->nodes),
+-           ec_bin(str4, sizeof(str4), fop->good, ec->nodes),
+-           ec_bin(str5, sizeof(str5), ec->xl_up & ~(fop->remaining | fop->good),
+-                  ec->nodes),
+-           ec_msg_str(fop));
++    gf_msg(
++        fop->xl->name, GF_LOG_WARNING, 0, EC_MSG_OP_FAIL_ON_SUBVOLS,
++        "Operation failed on %d of %d subvolumes.(up=%s, mask=%s, "
++        "remaining=%s, good=%s, bad=%s,"
++        "(Least significant bit represents first client/brick of subvol), %s)",
++        gf_bits_count(ec->xl_up & ~(fop->remaining | fop->good)), ec->nodes,
++        ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
++        ec_bin(str2, sizeof(str2), fop->mask, ec->nodes),
++        ec_bin(str3, sizeof(str3), fop->remaining, ec->nodes),
++        ec_bin(str4, sizeof(str4), fop->good, ec->nodes),
++        ec_bin(str5, sizeof(str5), ec->xl_up & ~(fop->remaining | fop->good),
++               ec->nodes),
++        ec_msg_str(fop));
+     if (fop->use_fd) {
+         if (fop->fd != NULL) {
+             ec_fheal(NULL, fop->xl, -1, EC_MINIMUM_ONE, ec_heal_report, NULL,
+@@ -614,10 +616,10 @@ ec_msg_str(ec_fop_data_t *fop)
+     loc_t *loc2 = NULL;
+     char gfid1[64] = {0};
+     char gfid2[64] = {0};
++    ec_fop_data_t *parent = fop->parent;
+ 
+     if (fop->errstr)
+         return fop->errstr;
+-
+     if (!fop->use_fd) {
+         loc1 = &fop->loc[0];
+         loc2 = &fop->loc[1];
+@@ -625,23 +627,45 @@ ec_msg_str(ec_fop_data_t *fop)
+         if (fop->id == GF_FOP_RENAME) {
+             gf_asprintf(&fop->errstr,
+                         "FOP : '%s' failed on '%s' and '%s' with gfids "
+-                        "%s and %s respectively",
++                        "%s and %s respectively. Parent FOP: %s",
+                         ec_fop_name(fop->id), loc1->path, loc2->path,
+                         uuid_utoa_r(loc1->gfid, gfid1),
+-                        uuid_utoa_r(loc2->gfid, gfid2));
++                        uuid_utoa_r(loc2->gfid, gfid2),
++                        parent ? ec_fop_name(parent->id) : "No Parent");
+         } else {
+-            gf_asprintf(&fop->errstr, "FOP : '%s' failed on '%s' with gfid %s",
+-                        ec_fop_name(fop->id), loc1->path,
+-                        uuid_utoa_r(loc1->gfid, gfid1));
++            gf_asprintf(
++                &fop->errstr,
++                "FOP : '%s' failed on '%s' with gfid %s. Parent FOP: %s",
++                ec_fop_name(fop->id), loc1->path,
++                uuid_utoa_r(loc1->gfid, gfid1),
++                parent ? ec_fop_name(parent->id) : "No Parent");
+         }
+     } else {
+-        gf_asprintf(&fop->errstr, "FOP : '%s' failed on gfid %s",
+-                    ec_fop_name(fop->id),
+-                    uuid_utoa_r(fop->fd->inode->gfid, gfid1));
++        gf_asprintf(
++            &fop->errstr, "FOP : '%s' failed on gfid %s. Parent FOP: %s",
++            ec_fop_name(fop->id), uuid_utoa_r(fop->fd->inode->gfid, gfid1),
++            parent ? ec_fop_name(parent->id) : "No Parent");
+     }
+     return fop->errstr;
+ }
+ 
++static void
++ec_log_insufficient_vol(ec_fop_data_t *fop, int32_t have, uint32_t need,
++                        int32_t loglevel)
++{
++    ec_t *ec = fop->xl->private;
++    char str1[32], str2[32], str3[32];
++
++    gf_msg(ec->xl->name, loglevel, 0, EC_MSG_CHILDS_INSUFFICIENT,
++           "Insufficient available children for this request: "
++           "Have : %d, Need : %u : Child UP : %s "
++           "Mask: %s, Healing : %s : %s ",
++           have, need, ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
++           ec_bin(str2, sizeof(str2), fop->mask, ec->nodes),
++           ec_bin(str3, sizeof(str3), fop->healing, ec->nodes),
++           ec_msg_str(fop));
++}
++
+ static int32_t
+ ec_child_select(ec_fop_data_t *fop)
+ {
+@@ -699,11 +723,7 @@ ec_child_select(ec_fop_data_t *fop)
+     ec_trace("SELECT", fop, "");
+ 
+     if ((num < fop->minimum) && (num < ec->fragments)) {
+-        gf_msg(ec->xl->name, GF_LOG_ERROR, 0, EC_MSG_CHILDS_INSUFFICIENT,
+-               "Insufficient available children "
+-               "for this request (have %d, need "
+-               "%d). %s",
+-               num, fop->minimum, ec_msg_str(fop));
++        ec_log_insufficient_vol(fop, num, fop->minimum, GF_LOG_ERROR);
+         return 0;
+     }
+ 
+@@ -711,11 +731,7 @@ ec_child_select(ec_fop_data_t *fop)
+         (fop->locks[0].update[EC_DATA_TXN] ||
+          fop->locks[0].update[EC_METADATA_TXN])) {
+         if (ec->quorum_count && (num < ec->quorum_count)) {
+-            gf_msg(ec->xl->name, GF_LOG_ERROR, 0, EC_MSG_CHILDS_INSUFFICIENT,
+-                   "Insufficient available children "
+-                   "for this request (have %d, need "
+-                   "%d). %s",
+-                   num, ec->quorum_count, ec_msg_str(fop));
++            ec_log_insufficient_vol(fop, num, ec->quorum_count, GF_LOG_ERROR);
+             return 0;
+         }
+     }
+diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
+index a930089..047cdd8 100644
+--- a/xlators/cluster/ec/src/ec.c
++++ b/xlators/cluster/ec/src/ec.c
+@@ -325,13 +325,18 @@ ec_get_event_from_state(ec_t *ec)
+ void
+ ec_up(xlator_t *this, ec_t *ec)
+ {
++    char str1[32], str2[32];
++
+     if (ec->timer != NULL) {
+         gf_timer_call_cancel(this->ctx, ec->timer);
+         ec->timer = NULL;
+     }
+ 
+     ec->up = 1;
+-    gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_UP, "Going UP");
++    gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_UP,
++           "Going UP : Child UP = %s Child Notify = %s",
++           ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
++           ec_bin(str2, sizeof(str2), ec->xl_notify, ec->nodes));
+ 
+     gf_event(EVENT_EC_MIN_BRICKS_UP, "subvol=%s", this->name);
+ }
+@@ -339,13 +344,18 @@ ec_up(xlator_t *this, ec_t *ec)
+ void
+ ec_down(xlator_t *this, ec_t *ec)
+ {
++    char str1[32], str2[32];
++
+     if (ec->timer != NULL) {
+         gf_timer_call_cancel(this->ctx, ec->timer);
+         ec->timer = NULL;
+     }
+ 
+     ec->up = 0;
+-    gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_DOWN, "Going DOWN");
++    gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_DOWN,
++           "Going DOWN : Child UP = %s Child Notify = %s",
++           ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
++           ec_bin(str2, sizeof(str2), ec->xl_notify, ec->nodes));
+ 
+     gf_event(EVENT_EC_MIN_BRICKS_NOT_UP, "subvol=%s", this->name);
+ }
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0562-shard.c-Fix-formatting.patch b/SOURCES/0562-shard.c-Fix-formatting.patch
new file mode 100644
index 0000000..14fbed6
--- /dev/null
+++ b/SOURCES/0562-shard.c-Fix-formatting.patch
@@ -0,0 +1,12513 @@
+From ea96fcd832de0b49f0e050f535d22a500da1503a Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <xhernandez@redhat.com>
+Date: Thu, 3 Jun 2021 13:14:04 +0200
+Subject: [PATCH 562/584] shard.c: Fix formatting
+
+A previous downstream change [1] had changed the formatting of the
+entire xlators/features/shard/src/shard.c. This patch reapplies the
+correct formatting. No other changes have been made.
+
+[1] https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/185716/
+
+BUG: 1925425
+Change-Id: Ie655ddaaa26aa884878e66bc0d9ce1f021f6a85f
+Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244956
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/features/shard/src/shard.c | 11701 ++++++++++++++++++-----------------
+ 1 file changed, 6084 insertions(+), 5617 deletions(-)
+
+diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
+index 099b062..c5cc224 100644
+--- a/xlators/features/shard/src/shard.c
++++ b/xlators/features/shard/src/shard.c
+@@ -16,5813 +16,6226 @@
+ #include <glusterfs/defaults.h>
+ #include <glusterfs/statedump.h>
+ 
+-static gf_boolean_t __is_shard_dir(uuid_t gfid) {
+-  shard_priv_t *priv = THIS->private;
++static gf_boolean_t
++__is_shard_dir(uuid_t gfid)
++{
++    shard_priv_t *priv = THIS->private;
+ 
+-  if (gf_uuid_compare(gfid, priv->dot_shard_gfid) == 0)
+-    return _gf_true;
++    if (gf_uuid_compare(gfid, priv->dot_shard_gfid) == 0)
++        return _gf_true;
+ 
+-  return _gf_false;
++    return _gf_false;
+ }
+ 
+-static gf_boolean_t __is_gsyncd_on_shard_dir(call_frame_t *frame, loc_t *loc) {
+-  if (frame->root->pid == GF_CLIENT_PID_GSYNCD &&
+-      (__is_shard_dir(loc->pargfid) ||
+-       (loc->parent && __is_shard_dir(loc->parent->gfid))))
+-    return _gf_true;
++static gf_boolean_t
++__is_gsyncd_on_shard_dir(call_frame_t *frame, loc_t *loc)
++{
++    if (frame->root->pid == GF_CLIENT_PID_GSYNCD &&
++        (__is_shard_dir(loc->pargfid) ||
++         (loc->parent && __is_shard_dir(loc->parent->gfid))))
++        return _gf_true;
+ 
+-  return _gf_false;
++    return _gf_false;
+ }
+ 
+-void shard_make_block_bname(int block_num, uuid_t gfid, char *buf, size_t len) {
+-  char gfid_str[GF_UUID_BUF_SIZE] = {
+-      0,
+-  };
++void
++shard_make_block_bname(int block_num, uuid_t gfid, char *buf, size_t len)
++{
++    char gfid_str[GF_UUID_BUF_SIZE] = {
++        0,
++    };
+ 
+-  gf_uuid_unparse(gfid, gfid_str);
+-  snprintf(buf, len, "%s.%d", gfid_str, block_num);
++    gf_uuid_unparse(gfid, gfid_str);
++    snprintf(buf, len, "%s.%d", gfid_str, block_num);
+ }
+ 
+-void shard_make_block_abspath(int block_num, uuid_t gfid, char *filepath,
+-                              size_t len) {
+-  char gfid_str[GF_UUID_BUF_SIZE] = {
+-      0,
+-  };
++void
++shard_make_block_abspath(int block_num, uuid_t gfid, char *filepath, size_t len)
++{
++    char gfid_str[GF_UUID_BUF_SIZE] = {
++        0,
++    };
+ 
+-  gf_uuid_unparse(gfid, gfid_str);
+-  snprintf(filepath, len, "/%s/%s.%d", GF_SHARD_DIR, gfid_str, block_num);
++    gf_uuid_unparse(gfid, gfid_str);
++    snprintf(filepath, len, "/%s/%s.%d", GF_SHARD_DIR, gfid_str, block_num);
+ }
+ 
+-int __shard_inode_ctx_get(inode_t *inode, xlator_t *this,
+-                          shard_inode_ctx_t **ctx) {
+-  int ret = -1;
+-  uint64_t ctx_uint = 0;
+-  shard_inode_ctx_t *ctx_p = NULL;
++int
++__shard_inode_ctx_get(inode_t *inode, xlator_t *this, shard_inode_ctx_t **ctx)
++{
++    int ret = -1;
++    uint64_t ctx_uint = 0;
++    shard_inode_ctx_t *ctx_p = NULL;
+ 
+-  ret = __inode_ctx_get(inode, this, &ctx_uint);
+-  if (ret == 0) {
+-    *ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
+-    return ret;
+-  }
++    ret = __inode_ctx_get(inode, this, &ctx_uint);
++    if (ret == 0) {
++        *ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
++        return ret;
++    }
+ 
+-  ctx_p = GF_CALLOC(1, sizeof(*ctx_p), gf_shard_mt_inode_ctx_t);
+-  if (!ctx_p)
+-    return ret;
++    ctx_p = GF_CALLOC(1, sizeof(*ctx_p), gf_shard_mt_inode_ctx_t);
++    if (!ctx_p)
++        return ret;
+ 
+-  INIT_LIST_HEAD(&ctx_p->ilist);
+-  INIT_LIST_HEAD(&ctx_p->to_fsync_list);
++    INIT_LIST_HEAD(&ctx_p->ilist);
++    INIT_LIST_HEAD(&ctx_p->to_fsync_list);
+ 
+-  ret = __inode_ctx_set(inode, this, (uint64_t *)&ctx_p);
+-  if (ret < 0) {
+-    GF_FREE(ctx_p);
+-    return ret;
+-  }
++    ret = __inode_ctx_set(inode, this, (uint64_t *)&ctx_p);
++    if (ret < 0) {
++        GF_FREE(ctx_p);
++        return ret;
++    }
+ 
+-  *ctx = ctx_p;
++    *ctx = ctx_p;
+ 
+-  return ret;
++    return ret;
+ }
+ 
+-int shard_inode_ctx_get(inode_t *inode, xlator_t *this,
+-                        shard_inode_ctx_t **ctx) {
+-  int ret = 0;
++int
++shard_inode_ctx_get(inode_t *inode, xlator_t *this, shard_inode_ctx_t **ctx)
++{
++    int ret = 0;
+ 
+-  LOCK(&inode->lock);
+-  { ret = __shard_inode_ctx_get(inode, this, ctx); }
+-  UNLOCK(&inode->lock);
++    LOCK(&inode->lock);
++    {
++        ret = __shard_inode_ctx_get(inode, this, ctx);
++    }
++    UNLOCK(&inode->lock);
+ 
+-  return ret;
++    return ret;
+ }
+ 
+-int __shard_inode_ctx_set(inode_t *inode, xlator_t *this, struct iatt *stbuf,
+-                          uint64_t block_size, int32_t valid) {
+-  int ret = -1;
+-  shard_inode_ctx_t *ctx = NULL;
++int
++__shard_inode_ctx_set(inode_t *inode, xlator_t *this, struct iatt *stbuf,
++                      uint64_t block_size, int32_t valid)
++{
++    int ret = -1;
++    shard_inode_ctx_t *ctx = NULL;
+ 
+-  ret = __shard_inode_ctx_get(inode, this, &ctx);
+-  if (ret)
+-    return ret;
++    ret = __shard_inode_ctx_get(inode, this, &ctx);
++    if (ret)
++        return ret;
+ 
+-  if (valid & SHARD_MASK_BLOCK_SIZE)
+-    ctx->block_size = block_size;
++    if (valid & SHARD_MASK_BLOCK_SIZE)
++        ctx->block_size = block_size;
+ 
+-  if (valid & SHARD_MASK_PROT)
+-    ctx->stat.ia_prot = stbuf->ia_prot;
++    if (valid & SHARD_MASK_PROT)
++        ctx->stat.ia_prot = stbuf->ia_prot;
+ 
+-  if (valid & SHARD_MASK_NLINK)
+-    ctx->stat.ia_nlink = stbuf->ia_nlink;
++    if (valid & SHARD_MASK_NLINK)
++        ctx->stat.ia_nlink = stbuf->ia_nlink;
+ 
+-  if (valid & SHARD_MASK_UID)
+-    ctx->stat.ia_uid = stbuf->ia_uid;
++    if (valid & SHARD_MASK_UID)
++        ctx->stat.ia_uid = stbuf->ia_uid;
+ 
+-  if (valid & SHARD_MASK_GID)
+-    ctx->stat.ia_gid = stbuf->ia_gid;
++    if (valid & SHARD_MASK_GID)
++        ctx->stat.ia_gid = stbuf->ia_gid;
+ 
+-  if (valid & SHARD_MASK_SIZE)
+-    ctx->stat.ia_size = stbuf->ia_size;
++    if (valid & SHARD_MASK_SIZE)
++        ctx->stat.ia_size = stbuf->ia_size;
+ 
+-  if (valid & SHARD_MASK_BLOCKS)
+-    ctx->stat.ia_blocks = stbuf->ia_blocks;
++    if (valid & SHARD_MASK_BLOCKS)
++        ctx->stat.ia_blocks = stbuf->ia_blocks;
+ 
+-  if (valid & SHARD_MASK_TIMES) {
+-    SHARD_TIME_UPDATE(ctx->stat.ia_mtime, ctx->stat.ia_mtime_nsec,
+-                      stbuf->ia_mtime, stbuf->ia_mtime_nsec);
+-    SHARD_TIME_UPDATE(ctx->stat.ia_ctime, ctx->stat.ia_ctime_nsec,
+-                      stbuf->ia_ctime, stbuf->ia_ctime_nsec);
+-    SHARD_TIME_UPDATE(ctx->stat.ia_atime, ctx->stat.ia_atime_nsec,
+-                      stbuf->ia_atime, stbuf->ia_atime_nsec);
+-  }
++    if (valid & SHARD_MASK_TIMES) {
++        SHARD_TIME_UPDATE(ctx->stat.ia_mtime, ctx->stat.ia_mtime_nsec,
++                          stbuf->ia_mtime, stbuf->ia_mtime_nsec);
++        SHARD_TIME_UPDATE(ctx->stat.ia_ctime, ctx->stat.ia_ctime_nsec,
++                          stbuf->ia_ctime, stbuf->ia_ctime_nsec);
++        SHARD_TIME_UPDATE(ctx->stat.ia_atime, ctx->stat.ia_atime_nsec,
++                          stbuf->ia_atime, stbuf->ia_atime_nsec);
++    }
+ 
+-  if (valid & SHARD_MASK_OTHERS) {
+-    ctx->stat.ia_ino = stbuf->ia_ino;
+-    gf_uuid_copy(ctx->stat.ia_gfid, stbuf->ia_gfid);
+-    ctx->stat.ia_dev = stbuf->ia_dev;
+-    ctx->stat.ia_type = stbuf->ia_type;
+-    ctx->stat.ia_rdev = stbuf->ia_rdev;
+-    ctx->stat.ia_blksize = stbuf->ia_blksize;
+-  }
++    if (valid & SHARD_MASK_OTHERS) {
++        ctx->stat.ia_ino = stbuf->ia_ino;
++        gf_uuid_copy(ctx->stat.ia_gfid, stbuf->ia_gfid);
++        ctx->stat.ia_dev = stbuf->ia_dev;
++        ctx->stat.ia_type = stbuf->ia_type;
++        ctx->stat.ia_rdev = stbuf->ia_rdev;
++        ctx->stat.ia_blksize = stbuf->ia_blksize;
++    }
+ 
+-  if (valid & SHARD_MASK_REFRESH_RESET)
+-    ctx->refresh = _gf_false;
++    if (valid & SHARD_MASK_REFRESH_RESET)
++        ctx->refresh = _gf_false;
+ 
+-  return 0;
++    return 0;
+ }
+ 
+-int shard_inode_ctx_set(inode_t *inode, xlator_t *this, struct iatt *stbuf,
+-                        uint64_t block_size, int32_t valid) {
+-  int ret = -1;
++int
++shard_inode_ctx_set(inode_t *inode, xlator_t *this, struct iatt *stbuf,
++                    uint64_t block_size, int32_t valid)
++{
++    int ret = -1;
+ 
+-  LOCK(&inode->lock);
+-  { ret = __shard_inode_ctx_set(inode, this, stbuf, block_size, valid); }
+-  UNLOCK(&inode->lock);
++    LOCK(&inode->lock);
++    {
++        ret = __shard_inode_ctx_set(inode, this, stbuf, block_size, valid);
++    }
++    UNLOCK(&inode->lock);
+ 
+-  return ret;
++    return ret;
+ }
+ 
+-int __shard_inode_ctx_set_refresh_flag(inode_t *inode, xlator_t *this) {
+-  int ret = -1;
+-  shard_inode_ctx_t *ctx = NULL;
++int
++__shard_inode_ctx_set_refresh_flag(inode_t *inode, xlator_t *this)
++{
++    int ret = -1;
++    shard_inode_ctx_t *ctx = NULL;
+ 
+-  ret = __shard_inode_ctx_get(inode, this, &ctx);
+-  if (ret)
+-    return ret;
++    ret = __shard_inode_ctx_get(inode, this, &ctx);
++    if (ret)
++        return ret;
+ 
+-  ctx->refresh = _gf_true;
++    ctx->refresh = _gf_true;
+ 
+-  return 0;
++    return 0;
+ }
+-int shard_inode_ctx_set_refresh_flag(inode_t *inode, xlator_t *this) {
+-  int ret = -1;
++int
++shard_inode_ctx_set_refresh_flag(inode_t *inode, xlator_t *this)
++{
++    int ret = -1;
+ 
+-  LOCK(&inode->lock);
+-  { ret = __shard_inode_ctx_set_refresh_flag(inode, this); }
+-  UNLOCK(&inode->lock);
++    LOCK(&inode->lock);
++    {
++        ret = __shard_inode_ctx_set_refresh_flag(inode, this);
++    }
++    UNLOCK(&inode->lock);
+ 
+-  return ret;
++    return ret;
+ }
+ 
+-int __shard_inode_ctx_mark_dir_refreshed(inode_t *inode, xlator_t *this) {
+-  int ret = -1;
+-  shard_inode_ctx_t *ctx = NULL;
++int
++__shard_inode_ctx_mark_dir_refreshed(inode_t *inode, xlator_t *this)
++{
++    int ret = -1;
++    shard_inode_ctx_t *ctx = NULL;
+ 
+-  ret = __shard_inode_ctx_get(inode, this, &ctx);
+-  if (ret)
+-    return ret;
++    ret = __shard_inode_ctx_get(inode, this, &ctx);
++    if (ret)
++        return ret;
+ 
+-  ctx->refreshed = _gf_true;
+-  return 0;
++    ctx->refreshed = _gf_true;
++    return 0;
+ }
+ 
+-int shard_inode_ctx_mark_dir_refreshed(inode_t *inode, xlator_t *this) {
+-  int ret = -1;
++int
++shard_inode_ctx_mark_dir_refreshed(inode_t *inode, xlator_t *this)
++{
++    int ret = -1;
+ 
+-  LOCK(&inode->lock);
+-  { ret = __shard_inode_ctx_mark_dir_refreshed(inode, this); }
+-  UNLOCK(&inode->lock);
++    LOCK(&inode->lock);
++    {
++        ret = __shard_inode_ctx_mark_dir_refreshed(inode, this);
++    }
++    UNLOCK(&inode->lock);
+ 
+-  return ret;
++    return ret;
+ }
+ 
+-int __shard_inode_ctx_add_to_fsync_list(inode_t *base_inode, xlator_t *this,
+-                                        inode_t *shard_inode) {
+-  int ret = -1;
+-  shard_inode_ctx_t *base_ictx = NULL;
+-  shard_inode_ctx_t *shard_ictx = NULL;
++int
++__shard_inode_ctx_add_to_fsync_list(inode_t *base_inode, xlator_t *this,
++                                    inode_t *shard_inode)
++{
++    int ret = -1;
++    shard_inode_ctx_t *base_ictx = NULL;
++    shard_inode_ctx_t *shard_ictx = NULL;
+ 
+-  ret = __shard_inode_ctx_get(base_inode, this, &base_ictx);
+-  if (ret)
+-    return ret;
++    ret = __shard_inode_ctx_get(base_inode, this, &base_ictx);
++    if (ret)
++        return ret;
+ 
+-  ret = __shard_inode_ctx_get(shard_inode, this, &shard_ictx);
+-  if (ret)
+-    return ret;
++    ret = __shard_inode_ctx_get(shard_inode, this, &shard_ictx);
++    if (ret)
++        return ret;
+ 
+-  if (shard_ictx->fsync_needed) {
+-    shard_ictx->fsync_needed++;
+-    return 1;
+-  }
++    if (shard_ictx->fsync_needed) {
++        shard_ictx->fsync_needed++;
++        return 1;
++    }
+ 
+-  list_add_tail(&shard_ictx->to_fsync_list, &base_ictx->to_fsync_list);
+-  shard_ictx->inode = shard_inode;
+-  shard_ictx->fsync_needed++;
+-  base_ictx->fsync_count++;
+-  shard_ictx->base_inode = base_inode;
++    list_add_tail(&shard_ictx->to_fsync_list, &base_ictx->to_fsync_list);
++    shard_ictx->inode = shard_inode;
++    shard_ictx->fsync_needed++;
++    base_ictx->fsync_count++;
++    shard_ictx->base_inode = base_inode;
+ 
+-  return 0;
++    return 0;
+ }
+ 
+-int shard_inode_ctx_add_to_fsync_list(inode_t *base_inode, xlator_t *this,
+-                                      inode_t *shard_inode) {
+-  int ret = -1;
++int
++shard_inode_ctx_add_to_fsync_list(inode_t *base_inode, xlator_t *this,
++                                  inode_t *shard_inode)
++{
++    int ret = -1;
+ 
+-  /* This ref acts as a refkeepr on the base inode. We
+-   * need to keep this inode alive as it holds the head
+-   * of the to_fsync_list.
+-   */
+-  inode_ref(base_inode);
+-  inode_ref(shard_inode);
++    /* This ref acts as a refkeepr on the base inode. We
++     * need to keep this inode alive as it holds the head
++     * of the to_fsync_list.
++     */
++    inode_ref(base_inode);
++    inode_ref(shard_inode);
+ 
+-  LOCK(&base_inode->lock);
+-  LOCK(&shard_inode->lock);
+-  { ret = __shard_inode_ctx_add_to_fsync_list(base_inode, this, shard_inode); }
+-  UNLOCK(&shard_inode->lock);
+-  UNLOCK(&base_inode->lock);
++    LOCK(&base_inode->lock);
++    LOCK(&shard_inode->lock);
++    {
++        ret = __shard_inode_ctx_add_to_fsync_list(base_inode, this,
++                                                  shard_inode);
++    }
++    UNLOCK(&shard_inode->lock);
++    UNLOCK(&base_inode->lock);
+ 
+-  /* Unref the base inode corresponding to the ref above, if the shard is
+-   * found to be already part of the fsync list.
+-   */
+-  if (ret != 0) {
+-    inode_unref(base_inode);
+-    inode_unref(shard_inode);
+-  }
+-  return ret;
++    /* Unref the base inode corresponding to the ref above, if the shard is
++     * found to be already part of the fsync list.
++     */
++    if (ret != 0) {
++        inode_unref(base_inode);
++        inode_unref(shard_inode);
++    }
++    return ret;
+ }
+ 
+-gf_boolean_t __shard_inode_ctx_needs_lookup(inode_t *inode, xlator_t *this) {
+-  int ret = -1;
+-  shard_inode_ctx_t *ctx = NULL;
++gf_boolean_t
++__shard_inode_ctx_needs_lookup(inode_t *inode, xlator_t *this)
++{
++    int ret = -1;
++    shard_inode_ctx_t *ctx = NULL;
+ 
+-  ret = __shard_inode_ctx_get(inode, this, &ctx);
+-  /* If inode ctx get fails, better to err on the side of caution and
+-   * try again? Unless the failure is due to mem-allocation.
+-   */
+-  if (ret)
+-    return _gf_true;
++    ret = __shard_inode_ctx_get(inode, this, &ctx);
++    /* If inode ctx get fails, better to err on the side of caution and
++     * try again? Unless the failure is due to mem-allocation.
++     */
++    if (ret)
++        return _gf_true;
+ 
+-  return !ctx->refreshed;
++    return !ctx->refreshed;
+ }
+ 
+-gf_boolean_t shard_inode_ctx_needs_lookup(inode_t *inode, xlator_t *this) {
+-  gf_boolean_t flag = _gf_false;
++gf_boolean_t
++shard_inode_ctx_needs_lookup(inode_t *inode, xlator_t *this)
++{
++    gf_boolean_t flag = _gf_false;
+ 
+-  LOCK(&inode->lock);
+-  { flag = __shard_inode_ctx_needs_lookup(inode, this); }
+-  UNLOCK(&inode->lock);
++    LOCK(&inode->lock);
++    {
++        flag = __shard_inode_ctx_needs_lookup(inode, this);
++    }
++    UNLOCK(&inode->lock);
+ 
+-  return flag;
++    return flag;
+ }
+-int __shard_inode_ctx_invalidate(inode_t *inode, xlator_t *this,
+-                                 struct iatt *stbuf) {
+-  int ret = -1;
+-  shard_inode_ctx_t *ctx = NULL;
++int
++__shard_inode_ctx_invalidate(inode_t *inode, xlator_t *this, struct iatt *stbuf)
++{
++    int ret = -1;
++    shard_inode_ctx_t *ctx = NULL;
+ 
+-  ret = __shard_inode_ctx_get(inode, this, &ctx);
+-  if (ret)
+-    return ret;
++    ret = __shard_inode_ctx_get(inode, this, &ctx);
++    if (ret)
++        return ret;
+ 
+-  if ((stbuf->ia_size != ctx->stat.ia_size) ||
+-      (stbuf->ia_blocks != ctx->stat.ia_blocks))
+-    ctx->refresh = _gf_true;
++    if ((stbuf->ia_size != ctx->stat.ia_size) ||
++        (stbuf->ia_blocks != ctx->stat.ia_blocks))
++        ctx->refresh = _gf_true;
+ 
+-  return 0;
++    return 0;
+ }
+ 
+-int shard_inode_ctx_invalidate(inode_t *inode, xlator_t *this,
+-                               struct iatt *stbuf) {
+-  int ret = -1;
++int
++shard_inode_ctx_invalidate(inode_t *inode, xlator_t *this, struct iatt *stbuf)
++{
++    int ret = -1;
+ 
+-  LOCK(&inode->lock);
+-  { ret = __shard_inode_ctx_invalidate(inode, this, stbuf); }
+-  UNLOCK(&inode->lock);
++    LOCK(&inode->lock);
++    {
++        ret = __shard_inode_ctx_invalidate(inode, this, stbuf);
++    }
++    UNLOCK(&inode->lock);
+ 
+-  return ret;
++    return ret;
+ }
+ 
+-int __shard_inode_ctx_get_block_size(inode_t *inode, xlator_t *this,
+-                                     uint64_t *block_size) {
+-  int ret = -1;
+-  uint64_t ctx_uint = 0;
+-  shard_inode_ctx_t *ctx = NULL;
++int
++__shard_inode_ctx_get_block_size(inode_t *inode, xlator_t *this,
++                                 uint64_t *block_size)
++{
++    int ret = -1;
++    uint64_t ctx_uint = 0;
++    shard_inode_ctx_t *ctx = NULL;
+ 
+-  ret = __inode_ctx_get(inode, this, &ctx_uint);
+-  if (ret < 0)
+-    return ret;
++    ret = __inode_ctx_get(inode, this, &ctx_uint);
++    if (ret < 0)
++        return ret;
+ 
+-  ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
++    ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
+ 
+-  *block_size = ctx->block_size;
++    *block_size = ctx->block_size;
+ 
+-  return 0;
++    return 0;
+ }
+ 
+-int shard_inode_ctx_get_block_size(inode_t *inode, xlator_t *this,
+-                                   uint64_t *block_size) {
+-  int ret = -1;
++int
++shard_inode_ctx_get_block_size(inode_t *inode, xlator_t *this,
++                               uint64_t *block_size)
++{
++    int ret = -1;
+ 
+-  LOCK(&inode->lock);
+-  { ret = __shard_inode_ctx_get_block_size(inode, this, block_size); }
+-  UNLOCK(&inode->lock);
++    LOCK(&inode->lock);
++    {
++        ret = __shard_inode_ctx_get_block_size(inode, this, block_size);
++    }
++    UNLOCK(&inode->lock);
+ 
+-  return ret;
++    return ret;
+ }
+ 
+-int __shard_inode_ctx_get_fsync_count(inode_t *inode, xlator_t *this,
+-                                      int *fsync_count) {
+-  int ret = -1;
+-  uint64_t ctx_uint = 0;
+-  shard_inode_ctx_t *ctx = NULL;
++int
++__shard_inode_ctx_get_fsync_count(inode_t *inode, xlator_t *this,
++                                  int *fsync_count)
++{
++    int ret = -1;
++    uint64_t ctx_uint = 0;
++    shard_inode_ctx_t *ctx = NULL;
+ 
+-  ret = __inode_ctx_get(inode, this, &ctx_uint);
+-  if (ret < 0)
+-    return ret;
++    ret = __inode_ctx_get(inode, this, &ctx_uint);
++    if (ret < 0)
++        return ret;
+ 
+-  ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
++    ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
+ 
+-  *fsync_count = ctx->fsync_needed;
++    *fsync_count = ctx->fsync_needed;
+ 
+-  return 0;
++    return 0;
+ }
+ 
+-int shard_inode_ctx_get_fsync_count(inode_t *inode, xlator_t *this,
+-                                    int *fsync_count) {
+-  int ret = -1;
++int
++shard_inode_ctx_get_fsync_count(inode_t *inode, xlator_t *this,
++                                int *fsync_count)
++{
++    int ret = -1;
+ 
+-  LOCK(&inode->lock);
+-  { ret = __shard_inode_ctx_get_fsync_count(inode, this, fsync_count); }
+-  UNLOCK(&inode->lock);
++    LOCK(&inode->lock);
++    {
++        ret = __shard_inode_ctx_get_fsync_count(inode, this, fsync_count);
++    }
++    UNLOCK(&inode->lock);
+ 
+-  return ret;
++    return ret;
+ }
+-int __shard_inode_ctx_get_all(inode_t *inode, xlator_t *this,
+-                              shard_inode_ctx_t *ctx_out) {
+-  int ret = -1;
+-  uint64_t ctx_uint = 0;
+-  shard_inode_ctx_t *ctx = NULL;
++int
++__shard_inode_ctx_get_all(inode_t *inode, xlator_t *this,
++                          shard_inode_ctx_t *ctx_out)
++{
++    int ret = -1;
++    uint64_t ctx_uint = 0;
++    shard_inode_ctx_t *ctx = NULL;
+ 
+-  ret = __inode_ctx_get(inode, this, &ctx_uint);
+-  if (ret < 0)
+-    return ret;
++    ret = __inode_ctx_get(inode, this, &ctx_uint);
++    if (ret < 0)
++        return ret;
+ 
+-  ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
++    ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
+ 
+-  memcpy(ctx_out, ctx, sizeof(shard_inode_ctx_t));
+-  return 0;
++    memcpy(ctx_out, ctx, sizeof(shard_inode_ctx_t));
++    return 0;
+ }
+ 
+-int shard_inode_ctx_get_all(inode_t *inode, xlator_t *this,
+-                            shard_inode_ctx_t *ctx_out) {
+-  int ret = -1;
++int
++shard_inode_ctx_get_all(inode_t *inode, xlator_t *this,
++                        shard_inode_ctx_t *ctx_out)
++{
++    int ret = -1;
+ 
+-  LOCK(&inode->lock);
+-  { ret = __shard_inode_ctx_get_all(inode, this, ctx_out); }
+-  UNLOCK(&inode->lock);
++    LOCK(&inode->lock);
++    {
++        ret = __shard_inode_ctx_get_all(inode, this, ctx_out);
++    }
++    UNLOCK(&inode->lock);
+ 
+-  return ret;
++    return ret;
+ }
+ 
+-int __shard_inode_ctx_fill_iatt_from_cache(inode_t *inode, xlator_t *this,
+-                                           struct iatt *buf,
+-                                           gf_boolean_t *need_refresh) {
+-  int ret = -1;
+-  uint64_t ctx_uint = 0;
+-  shard_inode_ctx_t *ctx = NULL;
++int
++__shard_inode_ctx_fill_iatt_from_cache(inode_t *inode, xlator_t *this,
++                                       struct iatt *buf,
++                                       gf_boolean_t *need_refresh)
++{
++    int ret = -1;
++    uint64_t ctx_uint = 0;
++    shard_inode_ctx_t *ctx = NULL;
+ 
+-  ret = __inode_ctx_get(inode, this, &ctx_uint);
+-  if (ret < 0)
+-    return ret;
++    ret = __inode_ctx_get(inode, this, &ctx_uint);
++    if (ret < 0)
++        return ret;
+ 
+-  ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
++    ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
+ 
+-  if (ctx->refresh == _gf_false)
+-    *buf = ctx->stat;
+-  else
+-    *need_refresh = _gf_true;
++    if (ctx->refresh == _gf_false)
++        *buf = ctx->stat;
++    else
++        *need_refresh = _gf_true;
+ 
+-  return 0;
++    return 0;
+ }
+ 
+-int shard_inode_ctx_fill_iatt_from_cache(inode_t *inode, xlator_t *this,
+-                                         struct iatt *buf,
+-                                         gf_boolean_t *need_refresh) {
+-  int ret = -1;
++int
++shard_inode_ctx_fill_iatt_from_cache(inode_t *inode, xlator_t *this,
++                                     struct iatt *buf,
++                                     gf_boolean_t *need_refresh)
++{
++    int ret = -1;
+ 
+-  LOCK(&inode->lock);
+-  {
+-    ret =
+-        __shard_inode_ctx_fill_iatt_from_cache(inode, this, buf, need_refresh);
+-  }
+-  UNLOCK(&inode->lock);
++    LOCK(&inode->lock);
++    {
++        ret = __shard_inode_ctx_fill_iatt_from_cache(inode, this, buf,
++                                                     need_refresh);
++    }
++    UNLOCK(&inode->lock);
+ 
+-  return ret;
++    return ret;
+ }
+ 
+-void shard_local_wipe(shard_local_t *local) {
+-  int i = 0;
+-  int count = 0;
++void
++shard_local_wipe(shard_local_t *local)
++{
++    int i = 0;
++    int count = 0;
++
++    count = local->num_blocks;
++
++    syncbarrier_destroy(&local->barrier);
++    loc_wipe(&local->loc);
++    loc_wipe(&local->dot_shard_loc);
++    loc_wipe(&local->dot_shard_rm_loc);
++    loc_wipe(&local->loc2);
++    loc_wipe(&local->tmp_loc);
++    loc_wipe(&local->int_inodelk.loc);
++    loc_wipe(&local->int_entrylk.loc);
++    loc_wipe(&local->newloc);
++
++    if (local->name)
++        GF_FREE(local->name);
++
++    if (local->int_entrylk.basename)
++        GF_FREE(local->int_entrylk.basename);
++    if (local->fd)
++        fd_unref(local->fd);
+ 
+-  count = local->num_blocks;
++    if (local->xattr_req)
++        dict_unref(local->xattr_req);
++    if (local->xattr_rsp)
++        dict_unref(local->xattr_rsp);
+ 
+-  syncbarrier_destroy(&local->barrier);
+-  loc_wipe(&local->loc);
+-  loc_wipe(&local->dot_shard_loc);
+-  loc_wipe(&local->dot_shard_rm_loc);
+-  loc_wipe(&local->loc2);
+-  loc_wipe(&local->tmp_loc);
+-  loc_wipe(&local->int_inodelk.loc);
+-  loc_wipe(&local->int_entrylk.loc);
+-  loc_wipe(&local->newloc);
++    for (i = 0; i < count; i++) {
++        if (!local->inode_list)
++            break;
+ 
+-  if (local->name)
+-    GF_FREE(local->name);
++        if (local->inode_list[i])
++            inode_unref(local->inode_list[i]);
++    }
+ 
+-  if (local->int_entrylk.basename)
+-    GF_FREE(local->int_entrylk.basename);
+-  if (local->fd)
+-    fd_unref(local->fd);
++    GF_FREE(local->inode_list);
+ 
+-  if (local->xattr_req)
+-    dict_unref(local->xattr_req);
+-  if (local->xattr_rsp)
+-    dict_unref(local->xattr_rsp);
++    GF_FREE(local->vector);
++    if (local->iobref)
++        iobref_unref(local->iobref);
++    if (local->list_inited)
++        gf_dirent_free(&local->entries_head);
++    if (local->inodelk_frame)
++        SHARD_STACK_DESTROY(local->inodelk_frame);
++    if (local->entrylk_frame)
++        SHARD_STACK_DESTROY(local->entrylk_frame);
++}
+ 
+-  for (i = 0; i < count; i++) {
+-    if (!local->inode_list)
+-      break;
+-
+-    if (local->inode_list[i])
+-      inode_unref(local->inode_list[i]);
+-  }
+-
+-  GF_FREE(local->inode_list);
+-
+-  GF_FREE(local->vector);
+-  if (local->iobref)
+-    iobref_unref(local->iobref);
+-  if (local->list_inited)
+-    gf_dirent_free(&local->entries_head);
+-  if (local->inodelk_frame)
+-    SHARD_STACK_DESTROY(local->inodelk_frame);
+-  if (local->entrylk_frame)
+-    SHARD_STACK_DESTROY(local->entrylk_frame);
+-}
+-
+-int shard_modify_size_and_block_count(struct iatt *stbuf, dict_t *dict) {
+-  int ret = -1;
+-  void *size_attr = NULL;
+-  uint64_t size_array[4];
+-
+-  ret = dict_get_ptr(dict, GF_XATTR_SHARD_FILE_SIZE, &size_attr);
+-  if (ret) {
+-    gf_msg_callingfn(THIS->name, GF_LOG_ERROR, 0,
+-                     SHARD_MSG_INTERNAL_XATTR_MISSING,
+-                     "Failed to "
+-                     "get " GF_XATTR_SHARD_FILE_SIZE " for %s",
+-                     uuid_utoa(stbuf->ia_gfid));
+-    return ret;
+-  }
++int
++shard_modify_size_and_block_count(struct iatt *stbuf, dict_t *dict)
++{
++    int ret = -1;
++    void *size_attr = NULL;
++    uint64_t size_array[4];
++
++    ret = dict_get_ptr(dict, GF_XATTR_SHARD_FILE_SIZE, &size_attr);
++    if (ret) {
++        gf_msg_callingfn(THIS->name, GF_LOG_ERROR, 0,
++                         SHARD_MSG_INTERNAL_XATTR_MISSING,
++                         "Failed to "
++                         "get " GF_XATTR_SHARD_FILE_SIZE " for %s",
++                         uuid_utoa(stbuf->ia_gfid));
++        return ret;
++    }
+ 
+-  memcpy(size_array, size_attr, sizeof(size_array));
++    memcpy(size_array, size_attr, sizeof(size_array));
+ 
+-  stbuf->ia_size = ntoh64(size_array[0]);
+-  stbuf->ia_blocks = ntoh64(size_array[2]);
++    stbuf->ia_size = ntoh64(size_array[0]);
++    stbuf->ia_blocks = ntoh64(size_array[2]);
+ 
+-  return 0;
++    return 0;
+ }
+ 
+-int shard_call_count_return(call_frame_t *frame) {
+-  int call_count = 0;
+-  shard_local_t *local = NULL;
++int
++shard_call_count_return(call_frame_t *frame)
++{
++    int call_count = 0;
++    shard_local_t *local = NULL;
+ 
+-  local = frame->local;
++    local = frame->local;
+ 
+-  LOCK(&frame->lock);
+-  { call_count = --local->call_count; }
+-  UNLOCK(&frame->lock);
++    LOCK(&frame->lock);
++    {
++        call_count = --local->call_count;
++    }
++    UNLOCK(&frame->lock);
+ 
+-  return call_count;
++    return call_count;
+ }
+ 
+-static char *shard_internal_dir_string(shard_internal_dir_type_t type) {
+-  char *str = NULL;
+-
+-  switch (type) {
+-  case SHARD_INTERNAL_DIR_DOT_SHARD:
+-    str = GF_SHARD_DIR;
+-    break;
+-  case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME:
+-    str = GF_SHARD_REMOVE_ME_DIR;
+-    break;
+-  default:
+-    break;
+-  }
+-  return str;
++static char *
++shard_internal_dir_string(shard_internal_dir_type_t type)
++{
++    char *str = NULL;
++
++    switch (type) {
++        case SHARD_INTERNAL_DIR_DOT_SHARD:
++            str = GF_SHARD_DIR;
++            break;
++        case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME:
++            str = GF_SHARD_REMOVE_ME_DIR;
++            break;
++        default:
++            break;
++    }
++    return str;
+ }
+ 
+-static int shard_init_internal_dir_loc(xlator_t *this, shard_local_t *local,
+-                                       shard_internal_dir_type_t type) {
+-  int ret = -1;
+-  char *bname = NULL;
+-  inode_t *parent = NULL;
+-  loc_t *internal_dir_loc = NULL;
+-  shard_priv_t *priv = NULL;
++static int
++shard_init_internal_dir_loc(xlator_t *this, shard_local_t *local,
++                            shard_internal_dir_type_t type)
++{
++    int ret = -1;
++    char *bname = NULL;
++    inode_t *parent = NULL;
++    loc_t *internal_dir_loc = NULL;
++    shard_priv_t *priv = NULL;
+ 
+-  priv = this->private;
+-  if (!local)
+-    return -1;
++    priv = this->private;
++    if (!local)
++        return -1;
++
++    switch (type) {
++        case SHARD_INTERNAL_DIR_DOT_SHARD:
++            internal_dir_loc = &local->dot_shard_loc;
++            bname = GF_SHARD_DIR;
++            parent = inode_ref(this->itable->root);
++            break;
++        case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME:
++            internal_dir_loc = &local->dot_shard_rm_loc;
++            bname = GF_SHARD_REMOVE_ME_DIR;
++            parent = inode_ref(priv->dot_shard_inode);
++            break;
++        default:
++            break;
++    }
+ 
+-  switch (type) {
+-  case SHARD_INTERNAL_DIR_DOT_SHARD:
+-    internal_dir_loc = &local->dot_shard_loc;
+-    bname = GF_SHARD_DIR;
+-    parent = inode_ref(this->itable->root);
+-    break;
+-  case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME:
+-    internal_dir_loc = &local->dot_shard_rm_loc;
+-    bname = GF_SHARD_REMOVE_ME_DIR;
+-    parent = inode_ref(priv->dot_shard_inode);
+-    break;
+-  default:
+-    break;
+-  }
+-
+-  internal_dir_loc->inode = inode_new(this->itable);
+-  internal_dir_loc->parent = parent;
+-  ret = inode_path(internal_dir_loc->parent, bname,
+-                   (char **)&internal_dir_loc->path);
+-  if (ret < 0 || !(internal_dir_loc->inode)) {
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+-           "Inode path failed on %s", bname);
+-    goto out;
+-  }
+-
+-  internal_dir_loc->name = strrchr(internal_dir_loc->path, '/');
+-  if (internal_dir_loc->name)
+-    internal_dir_loc->name++;
+-
+-  ret = 0;
+-out:
+-  return ret;
+-}
+-
+-inode_t *__shard_update_shards_inode_list(inode_t *linked_inode, xlator_t *this,
+-                                          inode_t *base_inode, int block_num,
+-                                          uuid_t gfid) {
+-  char block_bname[256] = {
+-      0,
+-  };
+-  inode_t *lru_inode = NULL;
+-  shard_priv_t *priv = NULL;
+-  shard_inode_ctx_t *ctx = NULL;
+-  shard_inode_ctx_t *lru_inode_ctx = NULL;
+-  shard_inode_ctx_t *lru_base_inode_ctx = NULL;
+-  inode_t *fsync_inode = NULL;
+-  inode_t *lru_base_inode = NULL;
+-  gf_boolean_t do_fsync = _gf_false;
+-
+-  priv = this->private;
+-
+-  shard_inode_ctx_get(linked_inode, this, &ctx);
+-
+-  if (list_empty(&ctx->ilist)) {
+-    if (priv->inode_count + 1 <= priv->lru_limit) {
+-      /* If this inode was linked here for the first time (indicated
+-       * by empty list), and if there is still space in the priv list,
+-       * add this ctx to the tail of the list.
+-       */
+-      /* For as long as an inode is in lru list, we try to
+-       * keep it alive by holding a ref on it.
+-       */
+-      inode_ref(linked_inode);
+-      if (base_inode)
+-        gf_uuid_copy(ctx->base_gfid, base_inode->gfid);
+-      else
+-        gf_uuid_copy(ctx->base_gfid, gfid);
+-      ctx->block_num = block_num;
+-      list_add_tail(&ctx->ilist, &priv->ilist_head);
+-      priv->inode_count++;
+-      ctx->base_inode = inode_ref(base_inode);
+-    } else {
+-      /*If on the other hand there is no available slot for this inode
+-       * in the list, delete the lru inode from the head of the list,
+-       * unlink it. And in its place add this new inode into the list.
+-       */
+-      lru_inode_ctx =
+-          list_first_entry(&priv->ilist_head, shard_inode_ctx_t, ilist);
+-      GF_ASSERT(lru_inode_ctx->block_num > 0);
+-      lru_base_inode = lru_inode_ctx->base_inode;
+-      list_del_init(&lru_inode_ctx->ilist);
+-      lru_inode = inode_find(linked_inode->table, lru_inode_ctx->stat.ia_gfid);
+-      /* If the lru inode was part of the pending-fsync list,
+-       * the base inode needs to be unref'd, the lru inode
+-       * deleted from fsync list and fsync'd in a new frame,
+-       * and then unlinked in memory and forgotten.
+-       */
+-      if (!lru_base_inode)
+-        goto after_fsync_check;
+-      LOCK(&lru_base_inode->lock);
+-      LOCK(&lru_inode->lock);
+-      {
+-        if (!list_empty(&lru_inode_ctx->to_fsync_list)) {
+-          list_del_init(&lru_inode_ctx->to_fsync_list);
+-          lru_inode_ctx->fsync_needed = 0;
+-          do_fsync = _gf_true;
+-          __shard_inode_ctx_get(lru_base_inode, this, &lru_base_inode_ctx);
+-          lru_base_inode_ctx->fsync_count--;
+-        }
+-      }
+-      UNLOCK(&lru_inode->lock);
+-      UNLOCK(&lru_base_inode->lock);
+-
+-    after_fsync_check:
+-      if (!do_fsync) {
+-        shard_make_block_bname(lru_inode_ctx->block_num,
+-                               lru_inode_ctx->base_gfid, block_bname,
+-                               sizeof(block_bname));
+-        /* The following unref corresponds to the ref held at
+-         * the time the shard was added to the lru list.
+-         */
+-        inode_unref(lru_inode);
+-        inode_unlink(lru_inode, priv->dot_shard_inode, block_bname);
+-        inode_forget(lru_inode, 0);
+-      } else {
+-        /* The following unref corresponds to the ref
+-         * held when the shard was added to fsync list.
+-         */
+-        inode_unref(lru_inode);
+-        fsync_inode = lru_inode;
+-        if (lru_base_inode)
+-          inode_unref(lru_base_inode);
+-      }
+-      /* The following unref corresponds to the ref
+-       * held by inode_find() above.
+-       */
+-      inode_unref(lru_inode);
+-
+-      /* The following unref corresponds to the ref held on the base shard
+-       * at the time of adding shard inode to lru list
+-       */
+-      if (lru_base_inode)
+-        inode_unref(lru_base_inode);
+-
+-      /* For as long as an inode is in lru list, we try to
+-       * keep it alive by holding a ref on it.
+-       */
+-      inode_ref(linked_inode);
+-      if (base_inode)
+-        gf_uuid_copy(ctx->base_gfid, base_inode->gfid);
+-      else
+-        gf_uuid_copy(ctx->base_gfid, gfid);
+-      ctx->block_num = block_num;
+-      ctx->base_inode = inode_ref(base_inode);
+-      list_add_tail(&ctx->ilist, &priv->ilist_head);
+-    }
+-  } else {
+-    /* If this is not the first time this inode is being operated on, move
+-     * it to the most recently used end of the list.
+-     */
+-    list_move_tail(&ctx->ilist, &priv->ilist_head);
+-  }
+-  return fsync_inode;
+-}
+-
+-int shard_common_failure_unwind(glusterfs_fop_t fop, call_frame_t *frame,
+-                                int32_t op_ret, int32_t op_errno) {
+-  switch (fop) {
+-  case GF_FOP_LOOKUP:
+-    SHARD_STACK_UNWIND(lookup, frame, op_ret, op_errno, NULL, NULL, NULL, NULL);
+-    break;
+-  case GF_FOP_STAT:
+-    SHARD_STACK_UNWIND(stat, frame, op_ret, op_errno, NULL, NULL);
+-    break;
+-  case GF_FOP_FSTAT:
+-    SHARD_STACK_UNWIND(fstat, frame, op_ret, op_errno, NULL, NULL);
+-    break;
+-  case GF_FOP_TRUNCATE:
+-    SHARD_STACK_UNWIND(truncate, frame, op_ret, op_errno, NULL, NULL, NULL);
+-    break;
+-  case GF_FOP_FTRUNCATE:
+-    SHARD_STACK_UNWIND(ftruncate, frame, op_ret, op_errno, NULL, NULL, NULL);
+-    break;
+-  case GF_FOP_MKNOD:
+-    SHARD_STACK_UNWIND(mknod, frame, op_ret, op_errno, NULL, NULL, NULL, NULL,
+-                       NULL);
+-    break;
+-  case GF_FOP_LINK:
+-    SHARD_STACK_UNWIND(link, frame, op_ret, op_errno, NULL, NULL, NULL, NULL,
+-                       NULL);
+-    break;
+-  case GF_FOP_CREATE:
+-    SHARD_STACK_UNWIND(create, frame, op_ret, op_errno, NULL, NULL, NULL, NULL,
+-                       NULL, NULL);
+-    break;
+-  case GF_FOP_UNLINK:
+-    SHARD_STACK_UNWIND(unlink, frame, op_ret, op_errno, NULL, NULL, NULL);
+-    break;
+-  case GF_FOP_RENAME:
+-    SHARD_STACK_UNWIND(rename, frame, op_ret, op_errno, NULL, NULL, NULL, NULL,
+-                       NULL, NULL);
+-    break;
+-  case GF_FOP_WRITE:
+-    SHARD_STACK_UNWIND(writev, frame, op_ret, op_errno, NULL, NULL, NULL);
+-    break;
+-  case GF_FOP_FALLOCATE:
+-    SHARD_STACK_UNWIND(fallocate, frame, op_ret, op_errno, NULL, NULL, NULL);
+-    break;
+-  case GF_FOP_ZEROFILL:
+-    SHARD_STACK_UNWIND(zerofill, frame, op_ret, op_errno, NULL, NULL, NULL);
+-    break;
+-  case GF_FOP_DISCARD:
+-    SHARD_STACK_UNWIND(discard, frame, op_ret, op_errno, NULL, NULL, NULL);
+-    break;
+-  case GF_FOP_READ:
+-    SHARD_STACK_UNWIND(readv, frame, op_ret, op_errno, NULL, -1, NULL, NULL,
+-                       NULL);
+-    break;
+-  case GF_FOP_FSYNC:
+-    SHARD_STACK_UNWIND(fsync, frame, op_ret, op_errno, NULL, NULL, NULL);
+-    break;
+-  case GF_FOP_REMOVEXATTR:
+-    SHARD_STACK_UNWIND(removexattr, frame, op_ret, op_errno, NULL);
+-    break;
+-  case GF_FOP_FREMOVEXATTR:
+-    SHARD_STACK_UNWIND(fremovexattr, frame, op_ret, op_errno, NULL);
+-    break;
+-  case GF_FOP_FGETXATTR:
+-    SHARD_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, NULL, NULL);
+-    break;
+-  case GF_FOP_GETXATTR:
+-    SHARD_STACK_UNWIND(getxattr, frame, op_ret, op_errno, NULL, NULL);
+-    break;
+-  case GF_FOP_FSETXATTR:
+-    SHARD_STACK_UNWIND(fsetxattr, frame, op_ret, op_errno, NULL);
+-    break;
+-  case GF_FOP_SETXATTR:
+-    SHARD_STACK_UNWIND(setxattr, frame, op_ret, op_errno, NULL);
+-    break;
+-  case GF_FOP_SETATTR:
+-    SHARD_STACK_UNWIND(setattr, frame, op_ret, op_errno, NULL, NULL, NULL);
+-    break;
+-  case GF_FOP_FSETATTR:
+-    SHARD_STACK_UNWIND(fsetattr, frame, op_ret, op_errno, NULL, NULL, NULL);
+-    break;
+-  case GF_FOP_SEEK:
+-    SHARD_STACK_UNWIND(seek, frame, op_ret, op_errno, 0, NULL);
+-    break;
+-  default:
+-    gf_msg(THIS->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
+-           "Invalid fop id = %d", fop);
+-    break;
+-  }
+-  return 0;
+-}
+-
+-int shard_common_inode_write_success_unwind(glusterfs_fop_t fop,
+-                                            call_frame_t *frame,
+-                                            int32_t op_ret) {
+-  shard_local_t *local = NULL;
+-
+-  local = frame->local;
+-
+-  switch (fop) {
+-  case GF_FOP_WRITE:
+-    SHARD_STACK_UNWIND(writev, frame, op_ret, 0, &local->prebuf,
+-                       &local->postbuf, local->xattr_rsp);
+-    break;
+-  case GF_FOP_FALLOCATE:
+-    SHARD_STACK_UNWIND(fallocate, frame, op_ret, 0, &local->prebuf,
+-                       &local->postbuf, local->xattr_rsp);
+-    break;
+-  case GF_FOP_ZEROFILL:
+-    SHARD_STACK_UNWIND(zerofill, frame, op_ret, 0, &local->prebuf,
+-                       &local->postbuf, local->xattr_rsp);
+-    break;
+-  case GF_FOP_DISCARD:
+-    SHARD_STACK_UNWIND(discard, frame, op_ret, 0, &local->prebuf,
+-                       &local->postbuf, local->xattr_rsp);
+-    break;
+-  default:
+-    gf_msg(THIS->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
+-           "Invalid fop id = %d", fop);
+-    break;
+-  }
+-  return 0;
+-}
+-
+-int shard_evicted_inode_fsync_cbk(call_frame_t *frame, void *cookie,
+-                                  xlator_t *this, int32_t op_ret,
+-                                  int32_t op_errno, struct iatt *prebuf,
+-                                  struct iatt *postbuf, dict_t *xdata) {
+-  char block_bname[256] = {
+-      0,
+-  };
+-  fd_t *anon_fd = cookie;
+-  inode_t *shard_inode = NULL;
+-  shard_inode_ctx_t *ctx = NULL;
+-  shard_priv_t *priv = NULL;
+-
+-  priv = this->private;
+-
+-  if (anon_fd == NULL || op_ret < 0) {
+-    gf_msg(this->name, GF_LOG_WARNING, op_errno, SHARD_MSG_MEMALLOC_FAILED,
+-           "fsync failed on shard");
+-    goto out;
+-  }
+-  shard_inode = anon_fd->inode;
+-
+-  LOCK(&priv->lock);
+-  LOCK(&shard_inode->lock);
+-  {
+-    __shard_inode_ctx_get(shard_inode, this, &ctx);
+-    if ((list_empty(&ctx->to_fsync_list)) && (list_empty(&ctx->ilist))) {
+-      shard_make_block_bname(ctx->block_num, shard_inode->gfid, block_bname,
+-                             sizeof(block_bname));
+-      inode_unlink(shard_inode, priv->dot_shard_inode, block_bname);
+-      /* The following unref corresponds to the ref held by
+-       * inode_link() at the time the shard was created or
+-       * looked up
+-       */
+-      inode_unref(shard_inode);
+-      inode_forget(shard_inode, 0);
+-    }
+-  }
+-  UNLOCK(&shard_inode->lock);
+-  UNLOCK(&priv->lock);
++    internal_dir_loc->inode = inode_new(this->itable);
++    internal_dir_loc->parent = parent;
++    ret = inode_path(internal_dir_loc->parent, bname,
++                     (char **)&internal_dir_loc->path);
++    if (ret < 0 || !(internal_dir_loc->inode)) {
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
++               "Inode path failed on %s", bname);
++        goto out;
++    }
++
++    internal_dir_loc->name = strrchr(internal_dir_loc->path, '/');
++    if (internal_dir_loc->name)
++        internal_dir_loc->name++;
+ 
++    ret = 0;
+ out:
+-  if (anon_fd)
+-    fd_unref(anon_fd);
+-  STACK_DESTROY(frame->root);
+-  return 0;
++    return ret;
+ }
+ 
+-int shard_initiate_evicted_inode_fsync(xlator_t *this, inode_t *inode) {
+-  fd_t *anon_fd = NULL;
+-  call_frame_t *fsync_frame = NULL;
+-
+-  fsync_frame = create_frame(this, this->ctx->pool);
+-  if (!fsync_frame) {
+-    gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED,
+-           "Failed to create new frame "
+-           "to fsync shard");
+-    return -1;
+-  }
+-
+-  anon_fd = fd_anonymous(inode);
+-  if (!anon_fd) {
+-    gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED,
+-           "Failed to create anon fd to"
+-           " fsync shard");
+-    STACK_DESTROY(fsync_frame->root);
+-    return -1;
+-  }
+-
+-  STACK_WIND_COOKIE(fsync_frame, shard_evicted_inode_fsync_cbk, anon_fd,
+-                    FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsync, anon_fd,
+-                    1, NULL);
+-  return 0;
+-}
+-
+-int shard_common_resolve_shards(
+-    call_frame_t *frame, xlator_t *this,
+-    shard_post_resolve_fop_handler_t post_res_handler) {
+-  int i = -1;
+-  uint32_t shard_idx_iter = 0;
+-  char path[PATH_MAX] = {
+-      0,
+-  };
+-  uuid_t gfid = {
+-      0,
+-  };
+-  inode_t *inode = NULL;
+-  inode_t *res_inode = NULL;
+-  inode_t *fsync_inode = NULL;
+-  shard_priv_t *priv = NULL;
+-  shard_local_t *local = NULL;
+-
+-  priv = this->private;
+-  local = frame->local;
+-  local->call_count = 0;
+-  shard_idx_iter = local->first_block;
+-  res_inode = local->resolver_base_inode;
+-  if (res_inode)
+-    gf_uuid_copy(gfid, res_inode->gfid);
+-  else
+-    gf_uuid_copy(gfid, local->base_gfid);
+-
+-  if ((local->op_ret < 0) || (local->resolve_not))
+-    goto out;
+-
+-  while (shard_idx_iter <= local->last_block) {
+-    i++;
+-    if (shard_idx_iter == 0) {
+-      local->inode_list[i] = inode_ref(res_inode);
+-      shard_idx_iter++;
+-      continue;
+-    }
+-
+-    shard_make_block_abspath(shard_idx_iter, gfid, path, sizeof(path));
+-
+-    inode = NULL;
+-    inode = inode_resolve(this->itable, path);
+-    if (inode) {
+-      gf_msg_debug(this->name, 0, "Shard %d already "
+-                                  "present. gfid=%s. Saving inode for future.",
+-                   shard_idx_iter, uuid_utoa(inode->gfid));
+-      local->inode_list[i] = inode;
+-      /* Let the ref on the inodes that are already present
+-       * in inode table still be held so that they don't get
+-       * forgotten by the time the fop reaches the actual
+-       * write stage.
+-       */
+-      LOCK(&priv->lock);
+-      {
+-        fsync_inode = __shard_update_shards_inode_list(inode, this, res_inode,
+-                                                       shard_idx_iter, gfid);
+-      }
+-      UNLOCK(&priv->lock);
+-      shard_idx_iter++;
+-      if (fsync_inode)
+-        shard_initiate_evicted_inode_fsync(this, fsync_inode);
+-      continue;
++inode_t *
++__shard_update_shards_inode_list(inode_t *linked_inode, xlator_t *this,
++                                 inode_t *base_inode, int block_num,
++                                 uuid_t gfid)
++{
++    char block_bname[256] = {
++        0,
++    };
++    inode_t *lru_inode = NULL;
++    shard_priv_t *priv = NULL;
++    shard_inode_ctx_t *ctx = NULL;
++    shard_inode_ctx_t *lru_inode_ctx = NULL;
++    shard_inode_ctx_t *lru_base_inode_ctx = NULL;
++    inode_t *fsync_inode = NULL;
++    inode_t *lru_base_inode = NULL;
++    gf_boolean_t do_fsync = _gf_false;
++
++    priv = this->private;
++
++    shard_inode_ctx_get(linked_inode, this, &ctx);
++
++    if (list_empty(&ctx->ilist)) {
++        if (priv->inode_count + 1 <= priv->lru_limit) {
++            /* If this inode was linked here for the first time (indicated
++             * by empty list), and if there is still space in the priv list,
++             * add this ctx to the tail of the list.
++             */
++            /* For as long as an inode is in lru list, we try to
++             * keep it alive by holding a ref on it.
++             */
++            inode_ref(linked_inode);
++            if (base_inode)
++                gf_uuid_copy(ctx->base_gfid, base_inode->gfid);
++            else
++                gf_uuid_copy(ctx->base_gfid, gfid);
++            ctx->block_num = block_num;
++            list_add_tail(&ctx->ilist, &priv->ilist_head);
++            priv->inode_count++;
++            ctx->base_inode = inode_ref(base_inode);
++        } else {
++            /*If on the other hand there is no available slot for this inode
++             * in the list, delete the lru inode from the head of the list,
++             * unlink it. And in its place add this new inode into the list.
++             */
++            lru_inode_ctx = list_first_entry(&priv->ilist_head,
++                                             shard_inode_ctx_t, ilist);
++            GF_ASSERT(lru_inode_ctx->block_num > 0);
++            lru_base_inode = lru_inode_ctx->base_inode;
++            list_del_init(&lru_inode_ctx->ilist);
++            lru_inode = inode_find(linked_inode->table,
++                                   lru_inode_ctx->stat.ia_gfid);
++            /* If the lru inode was part of the pending-fsync list,
++             * the base inode needs to be unref'd, the lru inode
++             * deleted from fsync list and fsync'd in a new frame,
++             * and then unlinked in memory and forgotten.
++             */
++            if (!lru_base_inode)
++                goto after_fsync_check;
++            LOCK(&lru_base_inode->lock);
++            LOCK(&lru_inode->lock);
++            {
++                if (!list_empty(&lru_inode_ctx->to_fsync_list)) {
++                    list_del_init(&lru_inode_ctx->to_fsync_list);
++                    lru_inode_ctx->fsync_needed = 0;
++                    do_fsync = _gf_true;
++                    __shard_inode_ctx_get(lru_base_inode, this,
++                                          &lru_base_inode_ctx);
++                    lru_base_inode_ctx->fsync_count--;
++                }
++            }
++            UNLOCK(&lru_inode->lock);
++            UNLOCK(&lru_base_inode->lock);
++
++        after_fsync_check:
++            if (!do_fsync) {
++                shard_make_block_bname(lru_inode_ctx->block_num,
++                                       lru_inode_ctx->base_gfid, block_bname,
++                                       sizeof(block_bname));
++                /* The following unref corresponds to the ref held at
++                 * the time the shard was added to the lru list.
++                 */
++                inode_unref(lru_inode);
++                inode_unlink(lru_inode, priv->dot_shard_inode, block_bname);
++                inode_forget(lru_inode, 0);
++            } else {
++                /* The following unref corresponds to the ref
++                 * held when the shard was added to fsync list.
++                 */
++                inode_unref(lru_inode);
++                fsync_inode = lru_inode;
++                if (lru_base_inode)
++                    inode_unref(lru_base_inode);
++            }
++            /* The following unref corresponds to the ref
++             * held by inode_find() above.
++             */
++            inode_unref(lru_inode);
++
++            /* The following unref corresponds to the ref held on the base shard
++             * at the time of adding shard inode to lru list
++             */
++            if (lru_base_inode)
++                inode_unref(lru_base_inode);
++
++            /* For as long as an inode is in lru list, we try to
++             * keep it alive by holding a ref on it.
++             */
++            inode_ref(linked_inode);
++            if (base_inode)
++                gf_uuid_copy(ctx->base_gfid, base_inode->gfid);
++            else
++                gf_uuid_copy(ctx->base_gfid, gfid);
++            ctx->block_num = block_num;
++            ctx->base_inode = inode_ref(base_inode);
++            list_add_tail(&ctx->ilist, &priv->ilist_head);
++        }
+     } else {
+-      local->call_count++;
+-      shard_idx_iter++;
++        /* If this is not the first time this inode is being operated on, move
++         * it to the most recently used end of the list.
++         */
++        list_move_tail(&ctx->ilist, &priv->ilist_head);
+     }
+-  }
+-out:
+-  post_res_handler(frame, this);
+-  return 0;
++    return fsync_inode;
+ }
+ 
+-int shard_update_file_size_cbk(call_frame_t *frame, void *cookie,
+-                               xlator_t *this, int32_t op_ret, int32_t op_errno,
+-                               dict_t *dict, dict_t *xdata) {
+-  inode_t *inode = NULL;
+-  shard_local_t *local = NULL;
+-
+-  local = frame->local;
+-
+-  if ((local->fd) && (local->fd->inode))
+-    inode = local->fd->inode;
+-  else if (local->loc.inode)
+-    inode = local->loc.inode;
+-
+-  if (op_ret < 0) {
+-    gf_msg(this->name, GF_LOG_ERROR, op_errno,
+-           SHARD_MSG_UPDATE_FILE_SIZE_FAILED, "Update to file size"
+-                                              " xattr failed on %s",
+-           uuid_utoa(inode->gfid));
+-    local->op_ret = op_ret;
+-    local->op_errno = op_errno;
+-    goto err;
+-  }
+-
+-  if (shard_modify_size_and_block_count(&local->postbuf, dict)) {
+-    local->op_ret = -1;
+-    local->op_errno = ENOMEM;
+-    goto err;
+-  }
+-err:
+-  local->post_update_size_handler(frame, this);
+-  return 0;
++int
++shard_common_failure_unwind(glusterfs_fop_t fop, call_frame_t *frame,
++                            int32_t op_ret, int32_t op_errno)
++{
++    switch (fop) {
++        case GF_FOP_LOOKUP:
++            SHARD_STACK_UNWIND(lookup, frame, op_ret, op_errno, NULL, NULL,
++                               NULL, NULL);
++            break;
++        case GF_FOP_STAT:
++            SHARD_STACK_UNWIND(stat, frame, op_ret, op_errno, NULL, NULL);
++            break;
++        case GF_FOP_FSTAT:
++            SHARD_STACK_UNWIND(fstat, frame, op_ret, op_errno, NULL, NULL);
++            break;
++        case GF_FOP_TRUNCATE:
++            SHARD_STACK_UNWIND(truncate, frame, op_ret, op_errno, NULL, NULL,
++                               NULL);
++            break;
++        case GF_FOP_FTRUNCATE:
++            SHARD_STACK_UNWIND(ftruncate, frame, op_ret, op_errno, NULL, NULL,
++                               NULL);
++            break;
++        case GF_FOP_MKNOD:
++            SHARD_STACK_UNWIND(mknod, frame, op_ret, op_errno, NULL, NULL, NULL,
++                               NULL, NULL);
++            break;
++        case GF_FOP_LINK:
++            SHARD_STACK_UNWIND(link, frame, op_ret, op_errno, NULL, NULL, NULL,
++                               NULL, NULL);
++            break;
++        case GF_FOP_CREATE:
++            SHARD_STACK_UNWIND(create, frame, op_ret, op_errno, NULL, NULL,
++                               NULL, NULL, NULL, NULL);
++            break;
++        case GF_FOP_UNLINK:
++            SHARD_STACK_UNWIND(unlink, frame, op_ret, op_errno, NULL, NULL,
++                               NULL);
++            break;
++        case GF_FOP_RENAME:
++            SHARD_STACK_UNWIND(rename, frame, op_ret, op_errno, NULL, NULL,
++                               NULL, NULL, NULL, NULL);
++            break;
++        case GF_FOP_WRITE:
++            SHARD_STACK_UNWIND(writev, frame, op_ret, op_errno, NULL, NULL,
++                               NULL);
++            break;
++        case GF_FOP_FALLOCATE:
++            SHARD_STACK_UNWIND(fallocate, frame, op_ret, op_errno, NULL, NULL,
++                               NULL);
++            break;
++        case GF_FOP_ZEROFILL:
++            SHARD_STACK_UNWIND(zerofill, frame, op_ret, op_errno, NULL, NULL,
++                               NULL);
++            break;
++        case GF_FOP_DISCARD:
++            SHARD_STACK_UNWIND(discard, frame, op_ret, op_errno, NULL, NULL,
++                               NULL);
++            break;
++        case GF_FOP_READ:
++            SHARD_STACK_UNWIND(readv, frame, op_ret, op_errno, NULL, -1, NULL,
++                               NULL, NULL);
++            break;
++        case GF_FOP_FSYNC:
++            SHARD_STACK_UNWIND(fsync, frame, op_ret, op_errno, NULL, NULL,
++                               NULL);
++            break;
++        case GF_FOP_REMOVEXATTR:
++            SHARD_STACK_UNWIND(removexattr, frame, op_ret, op_errno, NULL);
++            break;
++        case GF_FOP_FREMOVEXATTR:
++            SHARD_STACK_UNWIND(fremovexattr, frame, op_ret, op_errno, NULL);
++            break;
++        case GF_FOP_FGETXATTR:
++            SHARD_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, NULL, NULL);
++            break;
++        case GF_FOP_GETXATTR:
++            SHARD_STACK_UNWIND(getxattr, frame, op_ret, op_errno, NULL, NULL);
++            break;
++        case GF_FOP_FSETXATTR:
++            SHARD_STACK_UNWIND(fsetxattr, frame, op_ret, op_errno, NULL);
++            break;
++        case GF_FOP_SETXATTR:
++            SHARD_STACK_UNWIND(setxattr, frame, op_ret, op_errno, NULL);
++            break;
++        case GF_FOP_SETATTR:
++            SHARD_STACK_UNWIND(setattr, frame, op_ret, op_errno, NULL, NULL,
++                               NULL);
++            break;
++        case GF_FOP_FSETATTR:
++            SHARD_STACK_UNWIND(fsetattr, frame, op_ret, op_errno, NULL, NULL,
++                               NULL);
++            break;
++        case GF_FOP_SEEK:
++            SHARD_STACK_UNWIND(seek, frame, op_ret, op_errno, 0, NULL);
++            break;
++        default:
++            gf_msg(THIS->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
++                   "Invalid fop id = %d", fop);
++            break;
++    }
++    return 0;
+ }
+ 
+-int shard_set_size_attrs(int64_t size, int64_t block_count,
+-                         int64_t **size_attr_p) {
+-  int ret = -1;
+-  int64_t *size_attr = NULL;
++int
++shard_common_inode_write_success_unwind(glusterfs_fop_t fop,
++                                        call_frame_t *frame, int32_t op_ret)
++{
++    shard_local_t *local = NULL;
+ 
+-  if (!size_attr_p)
+-    goto out;
++    local = frame->local;
+ 
+-  size_attr = GF_CALLOC(4, sizeof(int64_t), gf_shard_mt_int64_t);
+-  if (!size_attr)
+-    goto out;
++    switch (fop) {
++        case GF_FOP_WRITE:
++            SHARD_STACK_UNWIND(writev, frame, op_ret, 0, &local->prebuf,
++                               &local->postbuf, local->xattr_rsp);
++            break;
++        case GF_FOP_FALLOCATE:
++            SHARD_STACK_UNWIND(fallocate, frame, op_ret, 0, &local->prebuf,
++                               &local->postbuf, local->xattr_rsp);
++            break;
++        case GF_FOP_ZEROFILL:
++            SHARD_STACK_UNWIND(zerofill, frame, op_ret, 0, &local->prebuf,
++                               &local->postbuf, local->xattr_rsp);
++            break;
++        case GF_FOP_DISCARD:
++            SHARD_STACK_UNWIND(discard, frame, op_ret, 0, &local->prebuf,
++                               &local->postbuf, local->xattr_rsp);
++            break;
++        default:
++            gf_msg(THIS->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
++                   "Invalid fop id = %d", fop);
++            break;
++    }
++    return 0;
++}
+ 
+-  size_attr[0] = hton64(size);
+-  /* As sharding evolves, it _may_ be necessary to embed more pieces of
+-   * information within the same xattr. So allocating slots for them in
+-   * advance. For now, only bytes 0-63 and 128-191 which would make up the
+-   * current size and block count respectively of the file are valid.
+-   */
+-  size_attr[2] = hton64(block_count);
++int
++shard_evicted_inode_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                              int32_t op_ret, int32_t op_errno,
++                              struct iatt *prebuf, struct iatt *postbuf,
++                              dict_t *xdata)
++{
++    char block_bname[256] = {
++        0,
++    };
++    fd_t *anon_fd = cookie;
++    inode_t *shard_inode = NULL;
++    shard_inode_ctx_t *ctx = NULL;
++    shard_priv_t *priv = NULL;
+ 
+-  *size_attr_p = size_attr;
++    priv = this->private;
+ 
+-  ret = 0;
+-out:
+-  return ret;
+-}
++    if (anon_fd == NULL || op_ret < 0) {
++        gf_msg(this->name, GF_LOG_WARNING, op_errno, SHARD_MSG_MEMALLOC_FAILED,
++               "fsync failed on shard");
++        goto out;
++    }
++    shard_inode = anon_fd->inode;
+ 
+-int shard_update_file_size(call_frame_t *frame, xlator_t *this, fd_t *fd,
+-                           loc_t *loc,
+-                           shard_post_update_size_fop_handler_t handler) {
+-  int ret = -1;
+-  int64_t *size_attr = NULL;
+-  int64_t delta_blocks = 0;
+-  inode_t *inode = NULL;
+-  shard_local_t *local = NULL;
+-  dict_t *xattr_req = NULL;
++    LOCK(&priv->lock);
++    LOCK(&shard_inode->lock);
++    {
++        __shard_inode_ctx_get(shard_inode, this, &ctx);
++        if ((list_empty(&ctx->to_fsync_list)) && (list_empty(&ctx->ilist))) {
++            shard_make_block_bname(ctx->block_num, shard_inode->gfid,
++                                   block_bname, sizeof(block_bname));
++            inode_unlink(shard_inode, priv->dot_shard_inode, block_bname);
++            /* The following unref corresponds to the ref held by
++             * inode_link() at the time the shard was created or
++             * looked up
++             */
++            inode_unref(shard_inode);
++            inode_forget(shard_inode, 0);
++        }
++    }
++    UNLOCK(&shard_inode->lock);
++    UNLOCK(&priv->lock);
+ 
+-  local = frame->local;
+-  local->post_update_size_handler = handler;
++out:
++    if (anon_fd)
++        fd_unref(anon_fd);
++    STACK_DESTROY(frame->root);
++    return 0;
++}
+ 
+-  xattr_req = dict_new();
+-  if (!xattr_req) {
+-    local->op_ret = -1;
+-    local->op_errno = ENOMEM;
+-    goto out;
+-  }
+-
+-  if (fd)
+-    inode = fd->inode;
+-  else
+-    inode = loc->inode;
+-
+-  /* If both size and block count have not changed, then skip the xattrop.
+-   */
+-  delta_blocks = GF_ATOMIC_GET(local->delta_blocks);
+-  if ((local->delta_size + local->hole_size == 0) && (delta_blocks == 0)) {
+-    goto out;
+-  }
+-
+-  ret = shard_set_size_attrs(local->delta_size + local->hole_size, delta_blocks,
+-                             &size_attr);
+-  if (ret) {
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_SIZE_SET_FAILED,
+-           "Failed to set size attrs for %s", uuid_utoa(inode->gfid));
+-    local->op_ret = -1;
+-    local->op_errno = ENOMEM;
+-    goto out;
+-  }
+-
+-  ret = dict_set_bin(xattr_req, GF_XATTR_SHARD_FILE_SIZE, size_attr, 8 * 4);
+-  if (ret) {
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
+-           "Failed to set key %s into dict. gfid=%s", GF_XATTR_SHARD_FILE_SIZE,
+-           uuid_utoa(inode->gfid));
+-    GF_FREE(size_attr);
+-    local->op_ret = -1;
+-    local->op_errno = ENOMEM;
+-    goto out;
+-  }
++int
++shard_initiate_evicted_inode_fsync(xlator_t *this, inode_t *inode)
++{
++    fd_t *anon_fd = NULL;
++    call_frame_t *fsync_frame = NULL;
++
++    fsync_frame = create_frame(this, this->ctx->pool);
++    if (!fsync_frame) {
++        gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED,
++               "Failed to create new frame "
++               "to fsync shard");
++        return -1;
++    }
+ 
+-  if (fd)
+-    STACK_WIND(frame, shard_update_file_size_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->fxattrop, fd, GF_XATTROP_ADD_ARRAY64,
+-               xattr_req, NULL);
+-  else
+-    STACK_WIND(frame, shard_update_file_size_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->xattrop, loc, GF_XATTROP_ADD_ARRAY64,
+-               xattr_req, NULL);
++    anon_fd = fd_anonymous(inode);
++    if (!anon_fd) {
++        gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED,
++               "Failed to create anon fd to"
++               " fsync shard");
++        STACK_DESTROY(fsync_frame->root);
++        return -1;
++    }
+ 
+-  dict_unref(xattr_req);
+-  return 0;
++    STACK_WIND_COOKIE(fsync_frame, shard_evicted_inode_fsync_cbk, anon_fd,
++                      FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsync,
++                      anon_fd, 1, NULL);
++    return 0;
++}
+ 
+-out:
+-  if (xattr_req)
+-    dict_unref(xattr_req);
+-  handler(frame, this);
+-  return 0;
+-}
+-
+-static inode_t *shard_link_internal_dir_inode(shard_local_t *local,
+-                                              inode_t *inode, struct iatt *buf,
+-                                              shard_internal_dir_type_t type) {
+-  inode_t *linked_inode = NULL;
+-  shard_priv_t *priv = NULL;
+-  char *bname = NULL;
+-  inode_t **priv_inode = NULL;
+-  inode_t *parent = NULL;
+-
+-  priv = THIS->private;
+-
+-  switch (type) {
+-  case SHARD_INTERNAL_DIR_DOT_SHARD:
+-    bname = GF_SHARD_DIR;
+-    priv_inode = &priv->dot_shard_inode;
+-    parent = inode->table->root;
+-    break;
+-  case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME:
+-    bname = GF_SHARD_REMOVE_ME_DIR;
+-    priv_inode = &priv->dot_shard_rm_inode;
+-    parent = priv->dot_shard_inode;
+-    break;
+-  default:
+-    break;
+-  }
+-
+-  linked_inode = inode_link(inode, parent, bname, buf);
+-  inode_lookup(linked_inode);
+-  *priv_inode = linked_inode;
+-  return linked_inode;
+-}
+-
+-int shard_refresh_internal_dir_cbk(call_frame_t *frame, void *cookie,
+-                                   xlator_t *this, int32_t op_ret,
+-                                   int32_t op_errno, inode_t *inode,
+-                                   struct iatt *buf, dict_t *xdata,
+-                                   struct iatt *postparent) {
+-  shard_local_t *local = NULL;
+-  inode_t *linked_inode = NULL;
+-  shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie;
+-
+-  local = frame->local;
+-
+-  if (op_ret) {
+-    local->op_ret = op_ret;
+-    local->op_errno = op_errno;
+-    goto out;
+-  }
+-
+-  /* To-Do: Fix refcount increment per call to
+-   * shard_link_internal_dir_inode().
+-   */
+-  linked_inode = shard_link_internal_dir_inode(local, inode, buf, type);
+-  shard_inode_ctx_mark_dir_refreshed(linked_inode, this);
+-out:
+-  shard_common_resolve_shards(frame, this, local->post_res_handler);
+-  return 0;
+-}
+-
+-int shard_refresh_internal_dir(call_frame_t *frame, xlator_t *this,
+-                               shard_internal_dir_type_t type) {
+-  loc_t loc = {
+-      0,
+-  };
+-  inode_t *inode = NULL;
+-  shard_priv_t *priv = NULL;
+-  shard_local_t *local = NULL;
+-  uuid_t gfid = {
+-      0,
+-  };
+-
+-  local = frame->local;
+-  priv = this->private;
+-
+-  switch (type) {
+-  case SHARD_INTERNAL_DIR_DOT_SHARD:
+-    gf_uuid_copy(gfid, priv->dot_shard_gfid);
+-    break;
+-  case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME:
+-    gf_uuid_copy(gfid, priv->dot_shard_rm_gfid);
+-    break;
+-  default:
+-    break;
+-  }
+-
+-  inode = inode_find(this->itable, gfid);
+-
+-  if (!shard_inode_ctx_needs_lookup(inode, this)) {
+-    local->op_ret = 0;
+-    goto out;
+-  }
++int
++shard_common_resolve_shards(call_frame_t *frame, xlator_t *this,
++                            shard_post_resolve_fop_handler_t post_res_handler)
++{
++    int i = -1;
++    uint32_t shard_idx_iter = 0;
++    char path[PATH_MAX] = {
++        0,
++    };
++    uuid_t gfid = {
++        0,
++    };
++    inode_t *inode = NULL;
++    inode_t *res_inode = NULL;
++    inode_t *fsync_inode = NULL;
++    shard_priv_t *priv = NULL;
++    shard_local_t *local = NULL;
+ 
+-  /* Plain assignment because the ref is already taken above through
+-   * call to inode_find()
+-   */
+-  loc.inode = inode;
+-  gf_uuid_copy(loc.gfid, gfid);
++    priv = this->private;
++    local = frame->local;
++    local->call_count = 0;
++    shard_idx_iter = local->first_block;
++    res_inode = local->resolver_base_inode;
++    if (res_inode)
++        gf_uuid_copy(gfid, res_inode->gfid);
++    else
++        gf_uuid_copy(gfid, local->base_gfid);
+ 
+-  STACK_WIND_COOKIE(frame, shard_refresh_internal_dir_cbk, (void *)(long)type,
+-                    FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup, &loc,
+-                    NULL);
+-  loc_wipe(&loc);
++    if ((local->op_ret < 0) || (local->resolve_not))
++        goto out;
+ 
+-  return 0;
++    while (shard_idx_iter <= local->last_block) {
++        i++;
++        if (shard_idx_iter == 0) {
++            local->inode_list[i] = inode_ref(res_inode);
++            shard_idx_iter++;
++            continue;
++        }
+ 
++        shard_make_block_abspath(shard_idx_iter, gfid, path, sizeof(path));
++
++        inode = NULL;
++        inode = inode_resolve(this->itable, path);
++        if (inode) {
++            gf_msg_debug(this->name, 0,
++                         "Shard %d already "
++                         "present. gfid=%s. Saving inode for future.",
++                         shard_idx_iter, uuid_utoa(inode->gfid));
++            local->inode_list[i] = inode;
++            /* Let the ref on the inodes that are already present
++             * in inode table still be held so that they don't get
++             * forgotten by the time the fop reaches the actual
++             * write stage.
++             */
++            LOCK(&priv->lock);
++            {
++                fsync_inode = __shard_update_shards_inode_list(
++                    inode, this, res_inode, shard_idx_iter, gfid);
++            }
++            UNLOCK(&priv->lock);
++            shard_idx_iter++;
++            if (fsync_inode)
++                shard_initiate_evicted_inode_fsync(this, fsync_inode);
++            continue;
++        } else {
++            local->call_count++;
++            shard_idx_iter++;
++        }
++    }
+ out:
+-  shard_common_resolve_shards(frame, this, local->post_res_handler);
+-  return 0;
++    post_res_handler(frame, this);
++    return 0;
+ }
+ 
+-int shard_lookup_internal_dir_cbk(call_frame_t *frame, void *cookie,
+-                                  xlator_t *this, int32_t op_ret,
+-                                  int32_t op_errno, inode_t *inode,
+-                                  struct iatt *buf, dict_t *xdata,
+-                                  struct iatt *postparent) {
+-  inode_t *link_inode = NULL;
+-  shard_local_t *local = NULL;
+-  shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie;
++int
++shard_update_file_size_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                           int32_t op_ret, int32_t op_errno, dict_t *dict,
++                           dict_t *xdata)
++{
++    inode_t *inode = NULL;
++    shard_local_t *local = NULL;
+ 
+-  local = frame->local;
++    local = frame->local;
+ 
+-  if (op_ret) {
+-    local->op_ret = op_ret;
+-    local->op_errno = op_errno;
+-    goto unwind;
+-  }
+-
+-  if (!IA_ISDIR(buf->ia_type)) {
+-    gf_msg(this->name, GF_LOG_CRITICAL, 0, SHARD_MSG_DOT_SHARD_NODIR,
+-           "%s already exists and "
+-           "is not a directory. Please remove it from all bricks "
+-           "and try again",
+-           shard_internal_dir_string(type));
+-    local->op_ret = -1;
+-    local->op_errno = EIO;
+-    goto unwind;
+-  }
+-
+-  link_inode = shard_link_internal_dir_inode(local, inode, buf, type);
+-  if (link_inode != inode) {
+-    shard_refresh_internal_dir(frame, this, type);
+-  } else {
+-    shard_inode_ctx_mark_dir_refreshed(link_inode, this);
+-    shard_common_resolve_shards(frame, this, local->post_res_handler);
+-  }
+-  return 0;
++    if ((local->fd) && (local->fd->inode))
++        inode = local->fd->inode;
++    else if (local->loc.inode)
++        inode = local->loc.inode;
+ 
+-unwind:
+-  local->post_res_handler(frame, this);
+-  return 0;
+-}
+-
+-int shard_lookup_internal_dir(call_frame_t *frame, xlator_t *this,
+-                              shard_post_resolve_fop_handler_t post_res_handler,
+-                              shard_internal_dir_type_t type) {
+-  int ret = -1;
+-  dict_t *xattr_req = NULL;
+-  shard_priv_t *priv = NULL;
+-  shard_local_t *local = NULL;
+-  uuid_t *gfid = NULL;
+-  loc_t *loc = NULL;
+-  gf_boolean_t free_gfid = _gf_true;
+-
+-  local = frame->local;
+-  priv = this->private;
+-  local->post_res_handler = post_res_handler;
+-
+-  gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t);
+-  if (!gfid)
+-    goto err;
+-
+-  xattr_req = dict_new();
+-  if (!xattr_req) {
+-    local->op_ret = -1;
+-    local->op_errno = ENOMEM;
+-    goto err;
+-  }
+-
+-  switch (type) {
+-  case SHARD_INTERNAL_DIR_DOT_SHARD:
+-    gf_uuid_copy(*gfid, priv->dot_shard_gfid);
+-    loc = &local->dot_shard_loc;
+-    break;
+-  case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME:
+-    gf_uuid_copy(*gfid, priv->dot_shard_rm_gfid);
+-    loc = &local->dot_shard_rm_loc;
+-    break;
+-  default:
+-    bzero(*gfid, sizeof(uuid_t));
+-    break;
+-  }
+-
+-  ret = dict_set_gfuuid(xattr_req, "gfid-req", *gfid, false);
+-  if (ret) {
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
+-           "Failed to set gfid of %s into dict",
+-           shard_internal_dir_string(type));
+-    local->op_ret = -1;
+-    local->op_errno = ENOMEM;
+-    goto err;
+-  } else {
+-    free_gfid = _gf_false;
+-  }
+-
+-  STACK_WIND_COOKIE(frame, shard_lookup_internal_dir_cbk, (void *)(long)type,
+-                    FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup, loc,
+-                    xattr_req);
+-
+-  dict_unref(xattr_req);
+-  return 0;
+-
+-err:
+-  if (xattr_req)
+-    dict_unref(xattr_req);
+-  if (free_gfid)
+-    GF_FREE(gfid);
+-  post_res_handler(frame, this);
+-  return 0;
+-}
+-
+-static void shard_inode_ctx_update(inode_t *inode, xlator_t *this,
+-                                   dict_t *xdata, struct iatt *buf) {
+-  int ret = 0;
+-  uint64_t size = 0;
+-  void *bsize = NULL;
+-
+-  if (shard_inode_ctx_get_block_size(inode, this, &size)) {
+-    /* Fresh lookup */
+-    ret = dict_get_ptr(xdata, GF_XATTR_SHARD_BLOCK_SIZE, &bsize);
+-    if (!ret)
+-      size = ntoh64(*((uint64_t *)bsize));
+-    /* If the file is sharded, set its block size, otherwise just
+-     * set 0.
+-     */
+-
+-    shard_inode_ctx_set(inode, this, buf, size, SHARD_MASK_BLOCK_SIZE);
+-  }
+-  /* If the file is sharded, also set the remaining attributes,
+-   * except for ia_size and ia_blocks.
+-   */
+-  if (size) {
+-    shard_inode_ctx_set(inode, this, buf, 0, SHARD_LOOKUP_MASK);
+-    (void)shard_inode_ctx_invalidate(inode, this, buf);
+-  }
+-}
+-
+-int shard_delete_shards(void *opaque);
+-
+-int shard_delete_shards_cbk(int ret, call_frame_t *frame, void *data);
+-
+-int shard_start_background_deletion(xlator_t *this) {
+-  int ret = 0;
+-  gf_boolean_t i_cleanup = _gf_true;
+-  shard_priv_t *priv = NULL;
+-  call_frame_t *cleanup_frame = NULL;
+-
+-  priv = this->private;
+-
+-  LOCK(&priv->lock);
+-  {
+-    switch (priv->bg_del_state) {
+-    case SHARD_BG_DELETION_NONE:
+-      i_cleanup = _gf_true;
+-      priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING;
+-      break;
+-    case SHARD_BG_DELETION_LAUNCHING:
+-      i_cleanup = _gf_false;
+-      break;
+-    case SHARD_BG_DELETION_IN_PROGRESS:
+-      priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING;
+-      i_cleanup = _gf_false;
+-      break;
+-    default:
+-      break;
+-    }
+-  }
+-  UNLOCK(&priv->lock);
+-  if (!i_cleanup)
+-    return 0;
+-
+-  cleanup_frame = create_frame(this, this->ctx->pool);
+-  if (!cleanup_frame) {
+-    gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED,
+-           "Failed to create "
+-           "new frame to delete shards");
+-    ret = -ENOMEM;
+-    goto err;
+-  }
+-
+-  set_lk_owner_from_ptr(&cleanup_frame->root->lk_owner, cleanup_frame->root);
+-
+-  ret = synctask_new(this->ctx->env, shard_delete_shards,
+-                     shard_delete_shards_cbk, cleanup_frame, cleanup_frame);
+-  if (ret < 0) {
+-    gf_msg(this->name, GF_LOG_WARNING, errno, SHARD_MSG_SHARDS_DELETION_FAILED,
+-           "failed to create task to do background "
+-           "cleanup of shards");
+-    STACK_DESTROY(cleanup_frame->root);
+-    goto err;
+-  }
+-  return 0;
++    if (op_ret < 0) {
++        gf_msg(this->name, GF_LOG_ERROR, op_errno,
++               SHARD_MSG_UPDATE_FILE_SIZE_FAILED,
++               "Update to file size"
++               " xattr failed on %s",
++               uuid_utoa(inode->gfid));
++        local->op_ret = op_ret;
++        local->op_errno = op_errno;
++        goto err;
++    }
+ 
++    if (shard_modify_size_and_block_count(&local->postbuf, dict)) {
++        local->op_ret = -1;
++        local->op_errno = ENOMEM;
++        goto err;
++    }
+ err:
+-  LOCK(&priv->lock);
+-  { priv->bg_del_state = SHARD_BG_DELETION_NONE; }
+-  UNLOCK(&priv->lock);
+-  return ret;
++    local->post_update_size_handler(frame, this);
++    return 0;
+ }
+ 
+-int shard_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                     int32_t op_ret, int32_t op_errno, inode_t *inode,
+-                     struct iatt *buf, dict_t *xdata, struct iatt *postparent) {
+-  int ret = -1;
+-  shard_priv_t *priv = NULL;
+-  gf_boolean_t i_start_cleanup = _gf_false;
+-
+-  priv = this->private;
+-
+-  if (op_ret < 0)
+-    goto unwind;
+-
+-  if (IA_ISDIR(buf->ia_type))
+-    goto unwind;
+-
+-  /* Also, if the file is sharded, get the file size and block cnt xattr,
+-   * and store them in the stbuf appropriately.
+-   */
+-
+-  if (dict_get(xdata, GF_XATTR_SHARD_FILE_SIZE) &&
+-      frame->root->pid != GF_CLIENT_PID_GSYNCD)
+-    shard_modify_size_and_block_count(buf, xdata);
+-
+-  /* If this was a fresh lookup, there are two possibilities:
+-   * 1) If the file is sharded (indicated by the presence of block size
+-   *    xattr), store this block size, along with rdev and mode in its
+-   *    inode ctx.
+-   * 2) If the file is not sharded, store size along with rdev and mode
+-   *    (which are anyway don't cares) in inode ctx. Since @ctx_tmp is
+-   *    already initialised to all zeroes, nothing more needs to be done.
+-   */
++int
++shard_set_size_attrs(int64_t size, int64_t block_count, int64_t **size_attr_p)
++{
++    int ret = -1;
++    int64_t *size_attr = NULL;
+ 
+-  (void)shard_inode_ctx_update(inode, this, xdata, buf);
++    if (!size_attr_p)
++        goto out;
+ 
+-  LOCK(&priv->lock);
+-  {
+-    if (priv->first_lookup_done == _gf_false) {
+-      priv->first_lookup_done = _gf_true;
+-      i_start_cleanup = _gf_true;
+-    }
+-  }
+-  UNLOCK(&priv->lock);
++    size_attr = GF_CALLOC(4, sizeof(int64_t), gf_shard_mt_int64_t);
++    if (!size_attr)
++        goto out;
+ 
+-  if (!i_start_cleanup)
+-    goto unwind;
++    size_attr[0] = hton64(size);
++    /* As sharding evolves, it _may_ be necessary to embed more pieces of
++     * information within the same xattr. So allocating slots for them in
++     * advance. For now, only bytes 0-63 and 128-191 which would make up the
++     * current size and block count respectively of the file are valid.
++     */
++    size_attr[2] = hton64(block_count);
+ 
+-  ret = shard_start_background_deletion(this);
+-  if (ret < 0) {
+-    LOCK(&priv->lock);
+-    { priv->first_lookup_done = _gf_false; }
+-    UNLOCK(&priv->lock);
+-  }
++    *size_attr_p = size_attr;
+ 
+-unwind:
+-  SHARD_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, buf, xdata,
+-                     postparent);
+-  return 0;
++    ret = 0;
++out:
++    return ret;
+ }
+ 
+-int shard_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc,
+-                 dict_t *xattr_req) {
+-  int ret = -1;
+-  int32_t op_errno = ENOMEM;
+-  uint64_t block_size = 0;
+-  shard_local_t *local = NULL;
+-
+-  this->itable = loc->inode->table;
+-  if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) &&
+-      (frame->root->pid != GF_CLIENT_PID_GLFS_HEAL)) {
+-    SHARD_ENTRY_FOP_CHECK(loc, op_errno, err);
+-  }
++int
++shard_update_file_size(call_frame_t *frame, xlator_t *this, fd_t *fd,
++                       loc_t *loc, shard_post_update_size_fop_handler_t handler)
++{
++    int ret = -1;
++    int64_t *size_attr = NULL;
++    int64_t delta_blocks = 0;
++    inode_t *inode = NULL;
++    shard_local_t *local = NULL;
++    dict_t *xattr_req = NULL;
+ 
+-  local = mem_get0(this->local_pool);
+-  if (!local)
+-    goto err;
++    local = frame->local;
++    local->post_update_size_handler = handler;
+ 
+-  frame->local = local;
++    xattr_req = dict_new();
++    if (!xattr_req) {
++        local->op_ret = -1;
++        local->op_errno = ENOMEM;
++        goto out;
++    }
+ 
+-  loc_copy(&local->loc, loc);
++    if (fd)
++        inode = fd->inode;
++    else
++        inode = loc->inode;
+ 
+-  local->xattr_req = xattr_req ? dict_ref(xattr_req) : dict_new();
+-  if (!local->xattr_req)
+-    goto err;
++    /* If both size and block count have not changed, then skip the xattrop.
++     */
++    delta_blocks = GF_ATOMIC_GET(local->delta_blocks);
++    if ((local->delta_size + local->hole_size == 0) && (delta_blocks == 0)) {
++        goto out;
++    }
+ 
+-  if (shard_inode_ctx_get_block_size(loc->inode, this, &block_size)) {
+-    ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0);
++    ret = shard_set_size_attrs(local->delta_size + local->hole_size,
++                               delta_blocks, &size_attr);
+     if (ret) {
+-      gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
+-             "Failed to set dict"
+-             " value: key:%s for path %s",
+-             GF_XATTR_SHARD_BLOCK_SIZE, loc->path);
+-      goto err;
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_SIZE_SET_FAILED,
++               "Failed to set size attrs for %s", uuid_utoa(inode->gfid));
++        local->op_ret = -1;
++        local->op_errno = ENOMEM;
++        goto out;
+     }
+-  }
+ 
+-  if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
+-    ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_FILE_SIZE, 8 * 4);
++    ret = dict_set_bin(xattr_req, GF_XATTR_SHARD_FILE_SIZE, size_attr, 8 * 4);
+     if (ret) {
+-      gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
+-             "Failed to set dict value: key:%s for path %s.",
+-             GF_XATTR_SHARD_FILE_SIZE, loc->path);
+-      goto err;
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
++               "Failed to set key %s into dict. gfid=%s",
++               GF_XATTR_SHARD_FILE_SIZE, uuid_utoa(inode->gfid));
++        GF_FREE(size_attr);
++        local->op_ret = -1;
++        local->op_errno = ENOMEM;
++        goto out;
+     }
+-  }
+ 
+-  if ((xattr_req) && (dict_get(xattr_req, GF_CONTENT_KEY)))
+-    dict_del(xattr_req, GF_CONTENT_KEY);
++    if (fd)
++        STACK_WIND(frame, shard_update_file_size_cbk, FIRST_CHILD(this),
++                   FIRST_CHILD(this)->fops->fxattrop, fd,
++                   GF_XATTROP_ADD_ARRAY64, xattr_req, NULL);
++    else
++        STACK_WIND(frame, shard_update_file_size_cbk, FIRST_CHILD(this),
++                   FIRST_CHILD(this)->fops->xattrop, loc,
++                   GF_XATTROP_ADD_ARRAY64, xattr_req, NULL);
+ 
+-  STACK_WIND(frame, shard_lookup_cbk, FIRST_CHILD(this),
+-             FIRST_CHILD(this)->fops->lookup, loc, local->xattr_req);
+-  return 0;
+-err:
+-  shard_common_failure_unwind(GF_FOP_LOOKUP, frame, -1, op_errno);
+-  return 0;
+-}
++    dict_unref(xattr_req);
++    return 0;
+ 
+-int shard_lookup_base_file_cbk(call_frame_t *frame, void *cookie,
+-                               xlator_t *this, int32_t op_ret, int32_t op_errno,
+-                               inode_t *inode, struct iatt *buf, dict_t *xdata,
+-                               struct iatt *postparent) {
+-  int ret = -1;
+-  int32_t mask = SHARD_INODE_WRITE_MASK;
+-  shard_local_t *local = NULL;
+-  shard_inode_ctx_t ctx = {
+-      0,
+-  };
+-
+-  local = frame->local;
+-
+-  if (op_ret < 0) {
+-    gf_msg(this->name, GF_LOG_ERROR, op_errno,
+-           SHARD_MSG_BASE_FILE_LOOKUP_FAILED, "Lookup on base file"
+-                                              " failed : %s",
+-           loc_gfid_utoa(&(local->loc)));
+-    local->op_ret = op_ret;
+-    local->op_errno = op_errno;
+-    goto unwind;
+-  }
++out:
++    if (xattr_req)
++        dict_unref(xattr_req);
++    handler(frame, this);
++    return 0;
++}
+ 
+-  local->prebuf = *buf;
+-  if (shard_modify_size_and_block_count(&local->prebuf, xdata)) {
+-    local->op_ret = -1;
+-    local->op_errno = EINVAL;
+-    goto unwind;
+-  }
+-
+-  if (shard_inode_ctx_get_all(inode, this, &ctx))
+-    mask = SHARD_ALL_MASK;
+-
+-  ret = shard_inode_ctx_set(inode, this, &local->prebuf, 0,
+-                            (mask | SHARD_MASK_REFRESH_RESET));
+-  if (ret) {
+-    gf_msg(this->name, GF_LOG_ERROR, SHARD_MSG_INODE_CTX_SET_FAILED, 0,
+-           "Failed to set inode"
+-           " write params into inode ctx for %s",
+-           uuid_utoa(buf->ia_gfid));
+-    local->op_ret = -1;
+-    local->op_errno = ENOMEM;
+-    goto unwind;
+-  }
++static inode_t *
++shard_link_internal_dir_inode(shard_local_t *local, inode_t *inode,
++                              struct iatt *buf, shard_internal_dir_type_t type)
++{
++    inode_t *linked_inode = NULL;
++    shard_priv_t *priv = NULL;
++    char *bname = NULL;
++    inode_t **priv_inode = NULL;
++    inode_t *parent = NULL;
++
++    priv = THIS->private;
++
++    switch (type) {
++        case SHARD_INTERNAL_DIR_DOT_SHARD:
++            bname = GF_SHARD_DIR;
++            priv_inode = &priv->dot_shard_inode;
++            parent = inode->table->root;
++            break;
++        case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME:
++            bname = GF_SHARD_REMOVE_ME_DIR;
++            priv_inode = &priv->dot_shard_rm_inode;
++            parent = priv->dot_shard_inode;
++            break;
++        default:
++            break;
++    }
+ 
+-unwind:
+-  local->handler(frame, this);
+-  return 0;
+-}
+-
+-int shard_lookup_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc,
+-                           shard_post_fop_handler_t handler) {
+-  int ret = -1;
+-  shard_local_t *local = NULL;
+-  dict_t *xattr_req = NULL;
+-  gf_boolean_t need_refresh = _gf_false;
+-
+-  local = frame->local;
+-  local->handler = handler;
+-
+-  ret = shard_inode_ctx_fill_iatt_from_cache(loc->inode, this, &local->prebuf,
+-                                             &need_refresh);
+-  /* By this time, inode ctx should have been created either in create,
+-   * mknod, readdirp or lookup. If not it is a bug!
+-   */
+-  if ((ret == 0) && (need_refresh == _gf_false)) {
+-    gf_msg_debug(this->name, 0, "Skipping lookup on base file: %s"
+-                                "Serving prebuf off the inode ctx cache",
+-                 uuid_utoa(loc->gfid));
+-    goto out;
+-  }
+-
+-  xattr_req = dict_new();
+-  if (!xattr_req) {
+-    local->op_ret = -1;
+-    local->op_errno = ENOMEM;
+-    goto out;
+-  }
++    linked_inode = inode_link(inode, parent, bname, buf);
++    inode_lookup(linked_inode);
++    *priv_inode = linked_inode;
++    return linked_inode;
++}
+ 
+-  SHARD_MD_READ_FOP_INIT_REQ_DICT(this, xattr_req, loc->gfid, local, out);
++int
++shard_refresh_internal_dir_cbk(call_frame_t *frame, void *cookie,
++                               xlator_t *this, int32_t op_ret, int32_t op_errno,
++                               inode_t *inode, struct iatt *buf, dict_t *xdata,
++                               struct iatt *postparent)
++{
++    shard_local_t *local = NULL;
++    inode_t *linked_inode = NULL;
++    shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie;
+ 
+-  STACK_WIND(frame, shard_lookup_base_file_cbk, FIRST_CHILD(this),
+-             FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
++    local = frame->local;
+ 
+-  dict_unref(xattr_req);
+-  return 0;
++    if (op_ret) {
++        local->op_ret = op_ret;
++        local->op_errno = op_errno;
++        goto out;
++    }
+ 
++    /* To-Do: Fix refcount increment per call to
++     * shard_link_internal_dir_inode().
++     */
++    linked_inode = shard_link_internal_dir_inode(local, inode, buf, type);
++    shard_inode_ctx_mark_dir_refreshed(linked_inode, this);
+ out:
+-  if (xattr_req)
+-    dict_unref(xattr_req);
+-  handler(frame, this);
+-  return 0;
++    shard_common_resolve_shards(frame, this, local->post_res_handler);
++    return 0;
+ }
+ 
+-int shard_post_fstat_handler(call_frame_t *frame, xlator_t *this) {
+-  shard_local_t *local = NULL;
++int
++shard_refresh_internal_dir(call_frame_t *frame, xlator_t *this,
++                           shard_internal_dir_type_t type)
++{
++    loc_t loc = {
++        0,
++    };
++    inode_t *inode = NULL;
++    shard_priv_t *priv = NULL;
++    shard_local_t *local = NULL;
++    uuid_t gfid = {
++        0,
++    };
+ 
+-  local = frame->local;
++    local = frame->local;
++    priv = this->private;
++
++    switch (type) {
++        case SHARD_INTERNAL_DIR_DOT_SHARD:
++            gf_uuid_copy(gfid, priv->dot_shard_gfid);
++            break;
++        case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME:
++            gf_uuid_copy(gfid, priv->dot_shard_rm_gfid);
++            break;
++        default:
++            break;
++    }
+ 
+-  if (local->op_ret >= 0)
+-    shard_inode_ctx_set(local->fd->inode, this, &local->prebuf, 0,
+-                        SHARD_LOOKUP_MASK);
++    inode = inode_find(this->itable, gfid);
+ 
+-  SHARD_STACK_UNWIND(fstat, frame, local->op_ret, local->op_errno,
+-                     &local->prebuf, local->xattr_rsp);
+-  return 0;
+-}
++    if (!shard_inode_ctx_needs_lookup(inode, this)) {
++        local->op_ret = 0;
++        goto out;
++    }
+ 
+-int shard_post_stat_handler(call_frame_t *frame, xlator_t *this) {
+-  shard_local_t *local = NULL;
++    /* Plain assignment because the ref is already taken above through
++     * call to inode_find()
++     */
++    loc.inode = inode;
++    gf_uuid_copy(loc.gfid, gfid);
+ 
+-  local = frame->local;
++    STACK_WIND_COOKIE(frame, shard_refresh_internal_dir_cbk, (void *)(long)type,
++                      FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup, &loc,
++                      NULL);
++    loc_wipe(&loc);
+ 
+-  if (local->op_ret >= 0)
+-    shard_inode_ctx_set(local->loc.inode, this, &local->prebuf, 0,
+-                        SHARD_LOOKUP_MASK);
++    return 0;
+ 
+-  SHARD_STACK_UNWIND(stat, frame, local->op_ret, local->op_errno,
+-                     &local->prebuf, local->xattr_rsp);
+-  return 0;
++out:
++    shard_common_resolve_shards(frame, this, local->post_res_handler);
++    return 0;
+ }
+ 
+-int shard_common_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                          int32_t op_ret, int32_t op_errno, struct iatt *buf,
+-                          dict_t *xdata) {
+-  inode_t *inode = NULL;
+-  shard_local_t *local = NULL;
+-
+-  local = frame->local;
++int
++shard_lookup_internal_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                              int32_t op_ret, int32_t op_errno, inode_t *inode,
++                              struct iatt *buf, dict_t *xdata,
++                              struct iatt *postparent)
++{
++    inode_t *link_inode = NULL;
++    shard_local_t *local = NULL;
++    shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie;
+ 
+-  if (op_ret < 0) {
+-    gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_STAT_FAILED,
+-           "stat failed: %s", local->fd ? uuid_utoa(local->fd->inode->gfid)
+-                                        : uuid_utoa((local->loc.inode)->gfid));
+-    local->op_ret = op_ret;
+-    local->op_errno = op_errno;
+-    goto unwind;
+-  }
++    local = frame->local;
+ 
+-  local->prebuf = *buf;
+-  if (shard_modify_size_and_block_count(&local->prebuf, xdata)) {
+-    local->op_ret = -1;
+-    local->op_errno = EINVAL;
+-    goto unwind;
+-  }
+-  local->xattr_rsp = dict_ref(xdata);
++    if (op_ret) {
++        local->op_ret = op_ret;
++        local->op_errno = op_errno;
++        goto unwind;
++    }
+ 
+-  if (local->loc.inode)
+-    inode = local->loc.inode;
+-  else
+-    inode = local->fd->inode;
++    if (!IA_ISDIR(buf->ia_type)) {
++        gf_msg(this->name, GF_LOG_CRITICAL, 0, SHARD_MSG_DOT_SHARD_NODIR,
++               "%s already exists and "
++               "is not a directory. Please remove it from all bricks "
++               "and try again",
++               shard_internal_dir_string(type));
++        local->op_ret = -1;
++        local->op_errno = EIO;
++        goto unwind;
++    }
+ 
+-  shard_inode_ctx_invalidate(inode, this, &local->prebuf);
++    link_inode = shard_link_internal_dir_inode(local, inode, buf, type);
++    if (link_inode != inode) {
++        shard_refresh_internal_dir(frame, this, type);
++    } else {
++        shard_inode_ctx_mark_dir_refreshed(link_inode, this);
++        shard_common_resolve_shards(frame, this, local->post_res_handler);
++    }
++    return 0;
+ 
+ unwind:
+-  local->handler(frame, this);
+-  return 0;
++    local->post_res_handler(frame, this);
++    return 0;
+ }
+ 
+-int shard_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) {
+-  int ret = -1;
+-  uint64_t block_size = 0;
+-  shard_local_t *local = NULL;
++int
++shard_lookup_internal_dir(call_frame_t *frame, xlator_t *this,
++                          shard_post_resolve_fop_handler_t post_res_handler,
++                          shard_internal_dir_type_t type)
++{
++    int ret = -1;
++    dict_t *xattr_req = NULL;
++    shard_priv_t *priv = NULL;
++    shard_local_t *local = NULL;
++    uuid_t *gfid = NULL;
++    loc_t *loc = NULL;
++    gf_boolean_t free_gfid = _gf_true;
+ 
+-  if ((IA_ISDIR(loc->inode->ia_type)) || (IA_ISLNK(loc->inode->ia_type))) {
+-    STACK_WIND(frame, default_stat_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->stat, loc, xdata);
+-    return 0;
+-  }
++    local = frame->local;
++    priv = this->private;
++    local->post_res_handler = post_res_handler;
+ 
+-  ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size);
+-  if (ret) {
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+-           "Failed to get block "
+-           "size from inode ctx of %s",
+-           uuid_utoa(loc->inode->gfid));
+-    goto err;
+-  }
++    gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t);
++    if (!gfid)
++        goto err;
+ 
+-  if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+-    STACK_WIND(frame, default_stat_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->stat, loc, xdata);
+-    return 0;
+-  }
++    xattr_req = dict_new();
++    if (!xattr_req) {
++        local->op_ret = -1;
++        local->op_errno = ENOMEM;
++        goto err;
++    }
+ 
+-  local = mem_get0(this->local_pool);
+-  if (!local)
+-    goto err;
++    switch (type) {
++        case SHARD_INTERNAL_DIR_DOT_SHARD:
++            gf_uuid_copy(*gfid, priv->dot_shard_gfid);
++            loc = &local->dot_shard_loc;
++            break;
++        case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME:
++            gf_uuid_copy(*gfid, priv->dot_shard_rm_gfid);
++            loc = &local->dot_shard_rm_loc;
++            break;
++        default:
++            bzero(*gfid, sizeof(uuid_t));
++            break;
++    }
+ 
+-  frame->local = local;
++    ret = dict_set_gfuuid(xattr_req, "gfid-req", *gfid, false);
++    if (ret) {
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
++               "Failed to set gfid of %s into dict",
++               shard_internal_dir_string(type));
++        local->op_ret = -1;
++        local->op_errno = ENOMEM;
++        goto err;
++    } else {
++        free_gfid = _gf_false;
++    }
+ 
+-  local->handler = shard_post_stat_handler;
+-  loc_copy(&local->loc, loc);
+-  local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+-  if (!local->xattr_req)
+-    goto err;
++    STACK_WIND_COOKIE(frame, shard_lookup_internal_dir_cbk, (void *)(long)type,
++                      FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup, loc,
++                      xattr_req);
+ 
+-  SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, local->loc.gfid,
+-                                  local, err);
++    dict_unref(xattr_req);
++    return 0;
+ 
+-  STACK_WIND(frame, shard_common_stat_cbk, FIRST_CHILD(this),
+-             FIRST_CHILD(this)->fops->stat, loc, local->xattr_req);
+-  return 0;
+ err:
+-  shard_common_failure_unwind(GF_FOP_STAT, frame, -1, ENOMEM);
+-  return 0;
++    if (xattr_req)
++        dict_unref(xattr_req);
++    if (free_gfid)
++        GF_FREE(gfid);
++    post_res_handler(frame, this);
++    return 0;
+ }
+ 
+-int shard_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) {
+-  int ret = -1;
+-  uint64_t block_size = 0;
+-  shard_local_t *local = NULL;
++static void
++shard_inode_ctx_update(inode_t *inode, xlator_t *this, dict_t *xdata,
++                       struct iatt *buf)
++{
++    int ret = 0;
++    uint64_t size = 0;
++    void *bsize = NULL;
++
++    if (shard_inode_ctx_get_block_size(inode, this, &size)) {
++        /* Fresh lookup */
++        ret = dict_get_ptr(xdata, GF_XATTR_SHARD_BLOCK_SIZE, &bsize);
++        if (!ret)
++            size = ntoh64(*((uint64_t *)bsize));
++        /* If the file is sharded, set its block size, otherwise just
++         * set 0.
++         */
++
++        shard_inode_ctx_set(inode, this, buf, size, SHARD_MASK_BLOCK_SIZE);
++    }
++    /* If the file is sharded, also set the remaining attributes,
++     * except for ia_size and ia_blocks.
++     */
++    if (size) {
++        shard_inode_ctx_set(inode, this, buf, 0, SHARD_LOOKUP_MASK);
++        (void)shard_inode_ctx_invalidate(inode, this, buf);
++    }
++}
+ 
+-  if ((IA_ISDIR(fd->inode->ia_type)) || (IA_ISLNK(fd->inode->ia_type))) {
+-    STACK_WIND(frame, default_fstat_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->fstat, fd, xdata);
+-    return 0;
+-  }
++int
++shard_delete_shards(void *opaque);
+ 
+-  ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size);
+-  if (ret) {
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+-           "Failed to get block "
+-           "size from inode ctx of %s",
+-           uuid_utoa(fd->inode->gfid));
+-    goto err;
+-  }
++int
++shard_delete_shards_cbk(int ret, call_frame_t *frame, void *data);
+ 
+-  if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+-    STACK_WIND(frame, default_fstat_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->fstat, fd, xdata);
+-    return 0;
+-  }
++int
++shard_start_background_deletion(xlator_t *this)
++{
++    int ret = 0;
++    gf_boolean_t i_cleanup = _gf_true;
++    shard_priv_t *priv = NULL;
++    call_frame_t *cleanup_frame = NULL;
+ 
+-  if (!this->itable)
+-    this->itable = fd->inode->table;
++    priv = this->private;
+ 
+-  local = mem_get0(this->local_pool);
+-  if (!local)
+-    goto err;
++    LOCK(&priv->lock);
++    {
++        switch (priv->bg_del_state) {
++            case SHARD_BG_DELETION_NONE:
++                i_cleanup = _gf_true;
++                priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING;
++                break;
++            case SHARD_BG_DELETION_LAUNCHING:
++                i_cleanup = _gf_false;
++                break;
++            case SHARD_BG_DELETION_IN_PROGRESS:
++                priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING;
++                i_cleanup = _gf_false;
++                break;
++            default:
++                break;
++        }
++    }
++    UNLOCK(&priv->lock);
++    if (!i_cleanup)
++        return 0;
+ 
+-  frame->local = local;
++    cleanup_frame = create_frame(this, this->ctx->pool);
++    if (!cleanup_frame) {
++        gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED,
++               "Failed to create "
++               "new frame to delete shards");
++        ret = -ENOMEM;
++        goto err;
++    }
+ 
+-  local->handler = shard_post_fstat_handler;
+-  local->fd = fd_ref(fd);
+-  local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+-  if (!local->xattr_req)
+-    goto err;
++    set_lk_owner_from_ptr(&cleanup_frame->root->lk_owner, cleanup_frame->root);
+ 
+-  SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid,
+-                                  local, err);
++    ret = synctask_new(this->ctx->env, shard_delete_shards,
++                       shard_delete_shards_cbk, cleanup_frame, cleanup_frame);
++    if (ret < 0) {
++        gf_msg(this->name, GF_LOG_WARNING, errno,
++               SHARD_MSG_SHARDS_DELETION_FAILED,
++               "failed to create task to do background "
++               "cleanup of shards");
++        STACK_DESTROY(cleanup_frame->root);
++        goto err;
++    }
++    return 0;
+ 
+-  STACK_WIND(frame, shard_common_stat_cbk, FIRST_CHILD(this),
+-             FIRST_CHILD(this)->fops->fstat, fd, local->xattr_req);
+-  return 0;
+ err:
+-  shard_common_failure_unwind(GF_FOP_FSTAT, frame, -1, ENOMEM);
+-  return 0;
++    LOCK(&priv->lock);
++    {
++        priv->bg_del_state = SHARD_BG_DELETION_NONE;
++    }
++    UNLOCK(&priv->lock);
++    return ret;
+ }
+ 
+-int shard_post_update_size_truncate_handler(call_frame_t *frame,
+-                                            xlator_t *this) {
+-  shard_local_t *local = NULL;
++int
++shard_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                 int32_t op_ret, int32_t op_errno, inode_t *inode,
++                 struct iatt *buf, dict_t *xdata, struct iatt *postparent)
++{
++    int ret = -1;
++    shard_priv_t *priv = NULL;
++    gf_boolean_t i_start_cleanup = _gf_false;
+ 
+-  local = frame->local;
++    priv = this->private;
+ 
+-  if (local->fop == GF_FOP_TRUNCATE)
+-    SHARD_STACK_UNWIND(truncate, frame, local->op_ret, local->op_errno,
+-                       &local->prebuf, &local->postbuf, NULL);
+-  else
+-    SHARD_STACK_UNWIND(ftruncate, frame, local->op_ret, local->op_errno,
+-                       &local->prebuf, &local->postbuf, NULL);
+-  return 0;
+-}
++    if (op_ret < 0)
++        goto unwind;
+ 
+-int shard_truncate_last_shard_cbk(call_frame_t *frame, void *cookie,
+-                                  xlator_t *this, int32_t op_ret,
+-                                  int32_t op_errno, struct iatt *prebuf,
+-                                  struct iatt *postbuf, dict_t *xdata) {
+-  inode_t *inode = NULL;
+-  int64_t delta_blocks = 0;
+-  shard_local_t *local = NULL;
++    if (IA_ISDIR(buf->ia_type))
++        goto unwind;
+ 
+-  local = frame->local;
++    /* Also, if the file is sharded, get the file size and block cnt xattr,
++     * and store them in the stbuf appropriately.
++     */
+ 
+-  SHARD_UNSET_ROOT_FS_ID(frame, local);
++    if (dict_get(xdata, GF_XATTR_SHARD_FILE_SIZE) &&
++        frame->root->pid != GF_CLIENT_PID_GSYNCD)
++        shard_modify_size_and_block_count(buf, xdata);
++
++    /* If this was a fresh lookup, there are two possibilities:
++     * 1) If the file is sharded (indicated by the presence of block size
++     *    xattr), store this block size, along with rdev and mode in its
++     *    inode ctx.
++     * 2) If the file is not sharded, store size along with rdev and mode
++     *    (which are anyway don't cares) in inode ctx. Since @ctx_tmp is
++     *    already initialised to all zeroes, nothing more needs to be done.
++     */
+ 
+-  inode = (local->fop == GF_FOP_TRUNCATE) ? local->loc.inode : local->fd->inode;
+-  if (op_ret < 0) {
+-    gf_msg(this->name, GF_LOG_ERROR, op_errno,
+-           SHARD_MSG_TRUNCATE_LAST_SHARD_FAILED, "truncate on last"
+-                                                 " shard failed : %s",
+-           uuid_utoa(inode->gfid));
+-    local->op_ret = op_ret;
+-    local->op_errno = op_errno;
+-    goto err;
+-  }
+-
+-  local->postbuf.ia_size = local->offset;
+-  /* Let the delta be negative. We want xattrop to do subtraction */
+-  local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size;
+-  delta_blocks = GF_ATOMIC_ADD(local->delta_blocks,
+-                               postbuf->ia_blocks - prebuf->ia_blocks);
+-  GF_ASSERT(delta_blocks <= 0);
+-  local->postbuf.ia_blocks += delta_blocks;
+-  local->hole_size = 0;
+-
+-  shard_inode_ctx_set(inode, this, &local->postbuf, 0, SHARD_MASK_TIMES);
+-  shard_update_file_size(frame, this, NULL, &local->loc,
+-                         shard_post_update_size_truncate_handler);
+-  return 0;
+-err:
+-  shard_common_failure_unwind(local->fop, frame, local->op_ret,
+-                              local->op_errno);
+-  return 0;
+-}
+-
+-int shard_truncate_last_shard(call_frame_t *frame, xlator_t *this,
+-                              inode_t *inode) {
+-  size_t last_shard_size_after = 0;
+-  loc_t loc = {
+-      0,
+-  };
+-  shard_local_t *local = NULL;
+-
+-  local = frame->local;
+-
+-  /* A NULL inode could be due to the fact that the last shard which
+-   * needs to be truncated does not exist due to it lying in a hole
+-   * region. So the only thing left to do in that case would be an
+-   * update to file size xattr.
+-   */
+-  if (!inode) {
+-    gf_msg_debug(this->name, 0,
+-                 "Last shard to be truncated absent in backend:%" PRIu64
+-                 " of gfid: %s. Directly proceeding to update file size",
+-                 local->first_block, uuid_utoa(local->loc.inode->gfid));
+-    shard_update_file_size(frame, this, NULL, &local->loc,
+-                           shard_post_update_size_truncate_handler);
+-    return 0;
+-  }
++    (void)shard_inode_ctx_update(inode, this, xdata, buf);
+ 
+-  SHARD_SET_ROOT_FS_ID(frame, local);
++    LOCK(&priv->lock);
++    {
++        if (priv->first_lookup_done == _gf_false) {
++            priv->first_lookup_done = _gf_true;
++            i_start_cleanup = _gf_true;
++        }
++    }
++    UNLOCK(&priv->lock);
+ 
+-  loc.inode = inode_ref(inode);
+-  gf_uuid_copy(loc.gfid, inode->gfid);
++    if (!i_start_cleanup)
++        goto unwind;
+ 
+-  last_shard_size_after = (local->offset % local->block_size);
++    ret = shard_start_background_deletion(this);
++    if (ret < 0) {
++        LOCK(&priv->lock);
++        {
++            priv->first_lookup_done = _gf_false;
++        }
++        UNLOCK(&priv->lock);
++    }
+ 
+-  STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this),
+-             FIRST_CHILD(this)->fops->truncate, &loc, last_shard_size_after,
+-             NULL);
+-  loc_wipe(&loc);
+-  return 0;
++unwind:
++    SHARD_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, buf, xdata,
++                       postparent);
++    return 0;
+ }
+ 
+-void shard_unlink_block_inode(shard_local_t *local, int shard_block_num);
++int
++shard_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
++{
++    int ret = -1;
++    int32_t op_errno = ENOMEM;
++    uint64_t block_size = 0;
++    shard_local_t *local = NULL;
+ 
+-int shard_truncate_htol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                            int32_t op_ret, int32_t op_errno,
+-                            struct iatt *preparent, struct iatt *postparent,
+-                            dict_t *xdata) {
+-  int ret = 0;
+-  int call_count = 0;
+-  int shard_block_num = (long)cookie;
+-  uint64_t block_count = 0;
+-  shard_local_t *local = NULL;
++    this->itable = loc->inode->table;
++    if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) &&
++        (frame->root->pid != GF_CLIENT_PID_GLFS_HEAL)) {
++        SHARD_ENTRY_FOP_CHECK(loc, op_errno, err);
++    }
+ 
+-  local = frame->local;
++    local = mem_get0(this->local_pool);
++    if (!local)
++        goto err;
+ 
+-  if (op_ret < 0) {
+-    local->op_ret = op_ret;
+-    local->op_errno = op_errno;
+-    goto done;
+-  }
+-  ret = dict_get_uint64(xdata, GF_GET_FILE_BLOCK_COUNT, &block_count);
+-  if (!ret) {
+-    GF_ATOMIC_SUB(local->delta_blocks, block_count);
+-  } else {
+-    /* dict_get failed possibly due to a heterogeneous cluster? */
+-    gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
+-           "Failed to get key %s from dict during truncate of gfid %s",
+-           GF_GET_FILE_BLOCK_COUNT,
+-           uuid_utoa(local->resolver_base_inode->gfid));
+-  }
+-
+-  shard_unlink_block_inode(local, shard_block_num);
+-done:
+-  call_count = shard_call_count_return(frame);
+-  if (call_count == 0) {
+-    SHARD_UNSET_ROOT_FS_ID(frame, local);
+-    shard_truncate_last_shard(frame, this, local->inode_list[0]);
+-  }
+-  return 0;
+-}
+-
+-int shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode) {
+-  int i = 1;
+-  int ret = -1;
+-  int call_count = 0;
+-  uint32_t cur_block = 0;
+-  uint32_t last_block = 0;
+-  char path[PATH_MAX] = {
+-      0,
+-  };
+-  char *bname = NULL;
+-  loc_t loc = {
+-      0,
+-  };
+-  gf_boolean_t wind_failed = _gf_false;
+-  shard_local_t *local = NULL;
+-  shard_priv_t *priv = NULL;
+-  dict_t *xdata_req = NULL;
+-
+-  local = frame->local;
+-  priv = this->private;
+-
+-  cur_block = local->first_block + 1;
+-  last_block = local->last_block;
+-
+-  /* Determine call count */
+-  for (i = 1; i < local->num_blocks; i++) {
+-    if (!local->inode_list[i])
+-      continue;
+-    call_count++;
+-  }
+-
+-  if (!call_count) {
+-    /* Call count = 0 implies that all of the shards that need to be
+-     * unlinked do not exist. So shard xlator would now proceed to
+-     * do the final truncate + size updates.
+-     */
+-    gf_msg_debug(this->name, 0, "Shards to be unlinked as part of "
+-                                "truncate absent in backend: %s. Directly "
+-                                "proceeding to update file size",
+-                 uuid_utoa(inode->gfid));
+-    local->postbuf.ia_size = local->offset;
+-    local->postbuf.ia_blocks = local->prebuf.ia_blocks;
+-    local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size;
+-    GF_ATOMIC_INIT(local->delta_blocks, 0);
+-    local->hole_size = 0;
+-    shard_update_file_size(frame, this, local->fd, &local->loc,
+-                           shard_post_update_size_truncate_handler);
+-    return 0;
+-  }
++    frame->local = local;
+ 
+-  local->call_count = call_count;
+-  i = 1;
+-  xdata_req = dict_new();
+-  if (!xdata_req) {
+-    shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
+-    return 0;
+-  }
+-  ret = dict_set_uint64(xdata_req, GF_GET_FILE_BLOCK_COUNT, 8 * 8);
+-  if (ret) {
+-    gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
+-           "Failed to set key %s into dict during truncate of %s",
+-           GF_GET_FILE_BLOCK_COUNT,
+-           uuid_utoa(local->resolver_base_inode->gfid));
+-    dict_unref(xdata_req);
+-    shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
+-    return 0;
+-  }
++    loc_copy(&local->loc, loc);
+ 
+-  SHARD_SET_ROOT_FS_ID(frame, local);
+-  while (cur_block <= last_block) {
+-    if (!local->inode_list[i]) {
+-      cur_block++;
+-      i++;
+-      continue;
+-    }
+-    if (wind_failed) {
+-      shard_truncate_htol_cbk(frame, (void *)(long)cur_block, this, -1, ENOMEM,
+-                              NULL, NULL, NULL);
+-      goto next;
+-    }
++    local->xattr_req = xattr_req ? dict_ref(xattr_req) : dict_new();
++    if (!local->xattr_req)
++        goto err;
+ 
+-    shard_make_block_abspath(cur_block, inode->gfid, path, sizeof(path));
+-    bname = strrchr(path, '/') + 1;
+-    loc.parent = inode_ref(priv->dot_shard_inode);
+-    ret = inode_path(loc.parent, bname, (char **)&(loc.path));
+-    if (ret < 0) {
+-      gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+-             "Inode path failed"
+-             " on %s. Base file gfid = %s",
+-             bname, uuid_utoa(inode->gfid));
+-      local->op_ret = -1;
+-      local->op_errno = ENOMEM;
+-      loc_wipe(&loc);
+-      wind_failed = _gf_true;
+-      shard_truncate_htol_cbk(frame, (void *)(long)cur_block, this, -1, ENOMEM,
+-                              NULL, NULL, NULL);
+-      goto next;
++    if (shard_inode_ctx_get_block_size(loc->inode, this, &block_size)) {
++        ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0);
++        if (ret) {
++            gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
++                   "Failed to set dict"
++                   " value: key:%s for path %s",
++                   GF_XATTR_SHARD_BLOCK_SIZE, loc->path);
++            goto err;
++        }
+     }
+-    loc.name = strrchr(loc.path, '/');
+-    if (loc.name)
+-      loc.name++;
+-    loc.inode = inode_ref(local->inode_list[i]);
+ 
+-    STACK_WIND_COOKIE(frame, shard_truncate_htol_cbk, (void *)(long)cur_block,
+-                      FIRST_CHILD(this), FIRST_CHILD(this)->fops->unlink, &loc,
+-                      0, xdata_req);
+-    loc_wipe(&loc);
+-  next:
+-    i++;
+-    cur_block++;
+-    if (!--call_count)
+-      break;
+-  }
+-  dict_unref(xdata_req);
+-  return 0;
+-}
+-
+-int shard_truncate_do(call_frame_t *frame, xlator_t *this) {
+-  shard_local_t *local = NULL;
++    if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
++        ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_FILE_SIZE,
++                              8 * 4);
++        if (ret) {
++            gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
++                   "Failed to set dict value: key:%s for path %s.",
++                   GF_XATTR_SHARD_FILE_SIZE, loc->path);
++            goto err;
++        }
++    }
+ 
+-  local = frame->local;
++    if ((xattr_req) && (dict_get(xattr_req, GF_CONTENT_KEY)))
++        dict_del(xattr_req, GF_CONTENT_KEY);
+ 
+-  if (local->num_blocks == 1) {
+-    /* This means that there are no shards to be unlinked.
+-     * The fop boils down to truncating the last shard, updating
+-     * the size and unwinding.
+-     */
+-    shard_truncate_last_shard(frame, this, local->inode_list[0]);
++    STACK_WIND(frame, shard_lookup_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->lookup, loc, local->xattr_req);
++    return 0;
++err:
++    shard_common_failure_unwind(GF_FOP_LOOKUP, frame, -1, op_errno);
+     return 0;
+-  } else {
+-    shard_truncate_htol(frame, this, local->loc.inode);
+-  }
+-  return 0;
+ }
+ 
+-int shard_post_lookup_shards_truncate_handler(call_frame_t *frame,
+-                                              xlator_t *this) {
+-  shard_local_t *local = NULL;
++int
++shard_lookup_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                           int32_t op_ret, int32_t op_errno, inode_t *inode,
++                           struct iatt *buf, dict_t *xdata,
++                           struct iatt *postparent)
++{
++    int ret = -1;
++    int32_t mask = SHARD_INODE_WRITE_MASK;
++    shard_local_t *local = NULL;
++    shard_inode_ctx_t ctx = {
++        0,
++    };
+ 
+-  local = frame->local;
++    local = frame->local;
+ 
+-  if (local->op_ret < 0) {
+-    shard_common_failure_unwind(local->fop, frame, local->op_ret,
+-                                local->op_errno);
+-    return 0;
+-  }
+-
+-  shard_truncate_do(frame, this);
+-  return 0;
+-}
+-
+-void shard_link_block_inode(shard_local_t *local, int block_num, inode_t *inode,
+-                            struct iatt *buf) {
+-  int list_index = 0;
+-  char block_bname[256] = {
+-      0,
+-  };
+-  uuid_t gfid = {
+-      0,
+-  };
+-  inode_t *linked_inode = NULL;
+-  xlator_t *this = NULL;
+-  inode_t *fsync_inode = NULL;
+-  shard_priv_t *priv = NULL;
+-  inode_t *base_inode = NULL;
+-
+-  this = THIS;
+-  priv = this->private;
+-  if (local->loc.inode) {
+-    gf_uuid_copy(gfid, local->loc.inode->gfid);
+-    base_inode = local->loc.inode;
+-  } else if (local->resolver_base_inode) {
+-    gf_uuid_copy(gfid, local->resolver_base_inode->gfid);
+-    base_inode = local->resolver_base_inode;
+-  } else {
+-    gf_uuid_copy(gfid, local->base_gfid);
+-  }
+-
+-  shard_make_block_bname(block_num, gfid, block_bname, sizeof(block_bname));
+-
+-  shard_inode_ctx_set(inode, this, buf, 0, SHARD_LOOKUP_MASK);
+-  linked_inode = inode_link(inode, priv->dot_shard_inode, block_bname, buf);
+-  inode_lookup(linked_inode);
+-  list_index = block_num - local->first_block;
+-  local->inode_list[list_index] = linked_inode;
+-
+-  LOCK(&priv->lock);
+-  {
+-    fsync_inode = __shard_update_shards_inode_list(linked_inode, this,
+-                                                   base_inode, block_num, gfid);
+-  }
+-  UNLOCK(&priv->lock);
+-  if (fsync_inode)
+-    shard_initiate_evicted_inode_fsync(this, fsync_inode);
+-}
+-
+-int shard_common_lookup_shards_cbk(call_frame_t *frame, void *cookie,
+-                                   xlator_t *this, int32_t op_ret,
+-                                   int32_t op_errno, inode_t *inode,
+-                                   struct iatt *buf, dict_t *xdata,
+-                                   struct iatt *postparent) {
+-  int call_count = 0;
+-  int shard_block_num = (long)cookie;
+-  uuid_t gfid = {
+-      0,
+-  };
+-  shard_local_t *local = NULL;
+-
+-  local = frame->local;
+-  if (local->resolver_base_inode)
+-    gf_uuid_copy(gfid, local->resolver_base_inode->gfid);
+-  else
+-    gf_uuid_copy(gfid, local->base_gfid);
+-
+-  if (op_ret < 0) {
+-    /* Ignore absence of shards in the backend in truncate fop. */
+-    switch (local->fop) {
+-    case GF_FOP_TRUNCATE:
+-    case GF_FOP_FTRUNCATE:
+-    case GF_FOP_RENAME:
+-    case GF_FOP_UNLINK:
+-      if (op_errno == ENOENT)
+-        goto done;
+-      break;
+-    case GF_FOP_WRITE:
+-    case GF_FOP_READ:
+-    case GF_FOP_ZEROFILL:
+-    case GF_FOP_DISCARD:
+-    case GF_FOP_FALLOCATE:
+-      if ((!local->first_lookup_done) && (op_errno == ENOENT)) {
+-        LOCK(&frame->lock);
+-        { local->create_count++; }
+-        UNLOCK(&frame->lock);
+-        goto done;
+-      }
+-      break;
+-    default:
+-      break;
++    if (op_ret < 0) {
++        gf_msg(this->name, GF_LOG_ERROR, op_errno,
++               SHARD_MSG_BASE_FILE_LOOKUP_FAILED,
++               "Lookup on base file"
++               " failed : %s",
++               loc_gfid_utoa(&(local->loc)));
++        local->op_ret = op_ret;
++        local->op_errno = op_errno;
++        goto unwind;
+     }
+ 
+-    /* else */
+-    gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_LOOKUP_SHARD_FAILED,
+-           "Lookup on shard %d "
+-           "failed. Base file gfid = %s",
+-           shard_block_num, uuid_utoa(gfid));
+-    local->op_ret = op_ret;
+-    local->op_errno = op_errno;
+-    goto done;
+-  }
+-
+-  shard_link_block_inode(local, shard_block_num, inode, buf);
+-
+-done:
+-  if (local->lookup_shards_barriered) {
+-    syncbarrier_wake(&local->barrier);
+-    return 0;
+-  } else {
+-    call_count = shard_call_count_return(frame);
+-    if (call_count == 0) {
+-      if (!local->first_lookup_done)
+-        local->first_lookup_done = _gf_true;
+-      local->pls_fop_handler(frame, this);
++    local->prebuf = *buf;
++    if (shard_modify_size_and_block_count(&local->prebuf, xdata)) {
++        local->op_ret = -1;
++        local->op_errno = EINVAL;
++        goto unwind;
+     }
+-  }
+-  return 0;
+-}
+ 
+-dict_t *shard_create_gfid_dict(dict_t *dict) {
+-  int ret = 0;
+-  dict_t *new = NULL;
+-  unsigned char *gfid = NULL;
++    if (shard_inode_ctx_get_all(inode, this, &ctx))
++        mask = SHARD_ALL_MASK;
+ 
+-  new = dict_copy_with_ref(dict, NULL);
+-  if (!new)
+-    return NULL;
++    ret = shard_inode_ctx_set(inode, this, &local->prebuf, 0,
++                              (mask | SHARD_MASK_REFRESH_RESET));
++    if (ret) {
++        gf_msg(this->name, GF_LOG_ERROR, SHARD_MSG_INODE_CTX_SET_FAILED, 0,
++               "Failed to set inode"
++               " write params into inode ctx for %s",
++               uuid_utoa(buf->ia_gfid));
++        local->op_ret = -1;
++        local->op_errno = ENOMEM;
++        goto unwind;
++    }
+ 
+-  gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_char);
+-  if (!gfid) {
+-    ret = -1;
+-    goto out;
+-  }
++unwind:
++    local->handler(frame, this);
++    return 0;
++}
+ 
+-  gf_uuid_generate(gfid);
++int
++shard_lookup_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc,
++                       shard_post_fop_handler_t handler)
++{
++    int ret = -1;
++    shard_local_t *local = NULL;
++    dict_t *xattr_req = NULL;
++    gf_boolean_t need_refresh = _gf_false;
+ 
+-  ret = dict_set_gfuuid(new, "gfid-req", gfid, false);
++    local = frame->local;
++    local->handler = handler;
+ 
+-out:
+-  if (ret) {
+-    dict_unref(new);
+-    new = NULL;
+-    GF_FREE(gfid);
+-  }
+-
+-  return new;
+-}
+-
+-int shard_common_lookup_shards(call_frame_t *frame, xlator_t *this,
+-                               inode_t *inode,
+-                               shard_post_lookup_shards_fop_handler_t handler) {
+-  int i = 0;
+-  int ret = 0;
+-  int count = 0;
+-  int call_count = 0;
+-  int32_t shard_idx_iter = 0;
+-  int last_block = 0;
+-  char path[PATH_MAX] = {
+-      0,
+-  };
+-  char *bname = NULL;
+-  uuid_t gfid = {
+-      0,
+-  };
+-  loc_t loc = {
+-      0,
+-  };
+-  shard_local_t *local = NULL;
+-  shard_priv_t *priv = NULL;
+-  gf_boolean_t wind_failed = _gf_false;
+-  dict_t *xattr_req = NULL;
+-
+-  priv = this->private;
+-  local = frame->local;
+-  count = call_count = local->call_count;
+-  shard_idx_iter = local->first_block;
+-  last_block = local->last_block;
+-  local->pls_fop_handler = handler;
+-  if (local->lookup_shards_barriered)
+-    local->barrier.waitfor = local->call_count;
+-
+-  if (inode)
+-    gf_uuid_copy(gfid, inode->gfid);
+-  else
+-    gf_uuid_copy(gfid, local->base_gfid);
+-
+-  while (shard_idx_iter <= last_block) {
+-    if (local->inode_list[i]) {
+-      i++;
+-      shard_idx_iter++;
+-      continue;
+-    }
+-
+-    if (wind_failed) {
+-      shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter, this,
+-                                     -1, ENOMEM, NULL, NULL, NULL, NULL);
+-      goto next;
+-    }
+-
+-    shard_make_block_abspath(shard_idx_iter, gfid, path, sizeof(path));
+-
+-    bname = strrchr(path, '/') + 1;
+-    loc.inode = inode_new(this->itable);
+-    loc.parent = inode_ref(priv->dot_shard_inode);
+-    gf_uuid_copy(loc.pargfid, priv->dot_shard_gfid);
+-    ret = inode_path(loc.parent, bname, (char **)&(loc.path));
+-    if (ret < 0 || !(loc.inode)) {
+-      gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+-             "Inode path failed"
+-             " on %s, base file gfid = %s",
+-             bname, uuid_utoa(gfid));
+-      local->op_ret = -1;
+-      local->op_errno = ENOMEM;
+-      loc_wipe(&loc);
+-      wind_failed = _gf_true;
+-      shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter, this,
+-                                     -1, ENOMEM, NULL, NULL, NULL, NULL);
+-      goto next;
++    ret = shard_inode_ctx_fill_iatt_from_cache(loc->inode, this, &local->prebuf,
++                                               &need_refresh);
++    /* By this time, inode ctx should have been created either in create,
++     * mknod, readdirp or lookup. If not it is a bug!
++     */
++    if ((ret == 0) && (need_refresh == _gf_false)) {
++        gf_msg_debug(this->name, 0,
++                     "Skipping lookup on base file: %s"
++                     "Serving prebuf off the inode ctx cache",
++                     uuid_utoa(loc->gfid));
++        goto out;
+     }
+ 
+-    loc.name = strrchr(loc.path, '/');
+-    if (loc.name)
+-      loc.name++;
+-
+-    xattr_req = shard_create_gfid_dict(local->xattr_req);
++    xattr_req = dict_new();
+     if (!xattr_req) {
+-      local->op_ret = -1;
+-      local->op_errno = ENOMEM;
+-      wind_failed = _gf_true;
+-      loc_wipe(&loc);
+-      shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter, this,
+-                                     -1, ENOMEM, NULL, NULL, NULL, NULL);
+-      goto next;
+-    }
+-
+-    STACK_WIND_COOKIE(frame, shard_common_lookup_shards_cbk,
+-                      (void *)(long)shard_idx_iter, FIRST_CHILD(this),
+-                      FIRST_CHILD(this)->fops->lookup, &loc, xattr_req);
+-    loc_wipe(&loc);
+-    dict_unref(xattr_req);
+-  next:
+-    shard_idx_iter++;
+-    i++;
+-
+-    if (!--call_count)
+-      break;
+-  }
+-  if (local->lookup_shards_barriered) {
+-    syncbarrier_wait(&local->barrier, count);
+-    local->pls_fop_handler(frame, this);
+-  }
+-  return 0;
+-}
+-
+-int shard_post_resolve_truncate_handler(call_frame_t *frame, xlator_t *this) {
+-  shard_local_t *local = NULL;
+-
+-  local = frame->local;
+-
+-  if (local->op_ret < 0) {
+-    if (local->op_errno == ENOENT) {
+-      /* If lookup on /.shard fails with ENOENT, it means that
+-       * the file was 0-byte in size but truncated sometime in
+-       * the past to a higher size which is reflected in the
+-       * size xattr, and now being truncated to a lower size.
+-       * In this case, the only thing that needs to be done is
+-       * to update the size xattr of the file and unwind.
+-       */
+-      local->first_block = local->last_block = 0;
+-      local->num_blocks = 1;
+-      local->call_count = 0;
+-      local->op_ret = 0;
+-      local->postbuf.ia_size = local->offset;
+-      shard_update_file_size(frame, this, local->fd, &local->loc,
+-                             shard_post_update_size_truncate_handler);
+-      return 0;
+-    } else {
+-      shard_common_failure_unwind(local->fop, frame, local->op_ret,
+-                                  local->op_errno);
+-      return 0;
++        local->op_ret = -1;
++        local->op_errno = ENOMEM;
++        goto out;
+     }
+-  }
+ 
+-  if (!local->call_count)
+-    shard_truncate_do(frame, this);
+-  else
+-    shard_common_lookup_shards(frame, this, local->loc.inode,
+-                               shard_post_lookup_shards_truncate_handler);
+-
+-  return 0;
+-}
+-
+-int shard_truncate_begin(call_frame_t *frame, xlator_t *this) {
+-  int ret = 0;
+-  shard_local_t *local = NULL;
+-  shard_priv_t *priv = NULL;
+-
+-  priv = this->private;
+-  local = frame->local;
+-
+-  /* First participant block here is the lowest numbered block that would
+-   * hold the last byte of the file post successful truncation.
+-   * Last participant block is the block that contains the last byte in
+-   * the current state of the file.
+-   * If (first block == last_block):
+-   *         then that means that the file only needs truncation of the
+-   *         first (or last since both are same) block.
+-   * Else
+-   *         if (new_size % block_size == 0)
+-   *                 then that means there is no truncate to be done with
+-   *                 only shards from first_block + 1 through the last
+-   *                 block needing to be unlinked.
+-   *         else
+-   *                 both truncate of the first block and unlink of the
+-   *                 remaining shards until end of file is required.
+-   */
+-  local->first_block =
+-      (local->offset == 0) ? 0 : get_lowest_block(local->offset - 1,
+-                                                  local->block_size);
+-  local->last_block =
+-      get_highest_block(0, local->prebuf.ia_size, local->block_size);
+-
+-  local->num_blocks = local->last_block - local->first_block + 1;
+-  GF_ASSERT(local->num_blocks > 0);
+-  local->resolver_base_inode =
+-      (local->fop == GF_FOP_TRUNCATE) ? local->loc.inode : local->fd->inode;
+-
+-  if ((local->first_block == 0) && (local->num_blocks == 1)) {
+-    if (local->fop == GF_FOP_TRUNCATE)
+-      STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this),
+-                 FIRST_CHILD(this)->fops->truncate, &local->loc, local->offset,
+-                 local->xattr_req);
+-    else
+-      STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this),
+-                 FIRST_CHILD(this)->fops->ftruncate, local->fd, local->offset,
+-                 local->xattr_req);
+-    return 0;
+-  }
++    SHARD_MD_READ_FOP_INIT_REQ_DICT(this, xattr_req, loc->gfid, local, out);
+ 
+-  local->inode_list =
+-      GF_CALLOC(local->num_blocks, sizeof(inode_t *), gf_shard_mt_inode_list);
+-  if (!local->inode_list)
+-    goto err;
++    STACK_WIND(frame, shard_lookup_base_file_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
+ 
+-  local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid);
+-  if (!local->dot_shard_loc.inode) {
+-    ret =
+-        shard_init_internal_dir_loc(this, local, SHARD_INTERNAL_DIR_DOT_SHARD);
+-    if (ret)
+-      goto err;
+-    shard_lookup_internal_dir(frame, this, shard_post_resolve_truncate_handler,
+-                              SHARD_INTERNAL_DIR_DOT_SHARD);
+-  } else {
+-    local->post_res_handler = shard_post_resolve_truncate_handler;
+-    shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD);
+-  }
+-  return 0;
++    dict_unref(xattr_req);
++    return 0;
+ 
+-err:
+-  shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
+-  return 0;
++out:
++    if (xattr_req)
++        dict_unref(xattr_req);
++    handler(frame, this);
++    return 0;
+ }
+ 
+-int shard_post_lookup_truncate_handler(call_frame_t *frame, xlator_t *this) {
+-  shard_local_t *local = NULL;
+-  struct iatt tmp_stbuf = {
+-      0,
+-  };
+-
+-  local = frame->local;
++int
++shard_post_fstat_handler(call_frame_t *frame, xlator_t *this)
++{
++    shard_local_t *local = NULL;
+ 
+-  if (local->op_ret < 0) {
+-    shard_common_failure_unwind(local->fop, frame, local->op_ret,
+-                                local->op_errno);
+-    return 0;
+-  }
++    local = frame->local;
+ 
+-  local->postbuf = tmp_stbuf = local->prebuf;
++    if (local->op_ret >= 0)
++        shard_inode_ctx_set(local->fd->inode, this, &local->prebuf, 0,
++                            SHARD_LOOKUP_MASK);
+ 
+-  if (local->prebuf.ia_size == local->offset) {
+-    /* If the file size is same as requested size, unwind the call
+-     * immediately.
+-     */
+-    if (local->fop == GF_FOP_TRUNCATE)
+-      SHARD_STACK_UNWIND(truncate, frame, 0, 0, &local->prebuf, &local->postbuf,
+-                         NULL);
+-    else
+-      SHARD_STACK_UNWIND(ftruncate, frame, 0, 0, &local->prebuf,
+-                         &local->postbuf, NULL);
+-  } else if (local->offset > local->prebuf.ia_size) {
+-    /* If the truncate is from a lower to a higher size, set the
+-     * new size xattr and unwind.
+-     */
+-    local->hole_size = local->offset - local->prebuf.ia_size;
+-    local->delta_size = 0;
+-    GF_ATOMIC_INIT(local->delta_blocks, 0);
+-    local->postbuf.ia_size = local->offset;
+-    tmp_stbuf.ia_size = local->offset;
+-    shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0,
+-                        SHARD_INODE_WRITE_MASK);
+-    shard_update_file_size(frame, this, NULL, &local->loc,
+-                           shard_post_update_size_truncate_handler);
+-  } else {
+-    /* ... else
+-     * i.   unlink all shards that need to be unlinked.
+-     * ii.  truncate the last of the shards.
+-     * iii. update the new size using setxattr.
+-     * and unwind the fop.
+-     */
+-    local->hole_size = 0;
+-    local->delta_size = (local->offset - local->prebuf.ia_size);
+-    GF_ATOMIC_INIT(local->delta_blocks, 0);
+-    tmp_stbuf.ia_size = local->offset;
+-    shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0,
+-                        SHARD_INODE_WRITE_MASK);
+-    shard_truncate_begin(frame, this);
+-  }
+-  return 0;
++    SHARD_STACK_UNWIND(fstat, frame, local->op_ret, local->op_errno,
++                       &local->prebuf, local->xattr_rsp);
++    return 0;
+ }
+ 
+-/* TO-DO:
+- * Fix updates to size and block count with racing write(s) and truncate(s).
+- */
++int
++shard_post_stat_handler(call_frame_t *frame, xlator_t *this)
++{
++    shard_local_t *local = NULL;
+ 
+-int shard_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc,
+-                   off_t offset, dict_t *xdata) {
+-  int ret = -1;
+-  uint64_t block_size = 0;
+-  shard_local_t *local = NULL;
++    local = frame->local;
+ 
+-  ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size);
+-  if (ret) {
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+-           "Failed to get block "
+-           "size from inode ctx of %s",
+-           uuid_utoa(loc->inode->gfid));
+-    goto err;
+-  }
++    if (local->op_ret >= 0)
++        shard_inode_ctx_set(local->loc.inode, this, &local->prebuf, 0,
++                            SHARD_LOOKUP_MASK);
+ 
+-  if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+-    STACK_WIND(frame, default_truncate_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
++    SHARD_STACK_UNWIND(stat, frame, local->op_ret, local->op_errno,
++                       &local->prebuf, local->xattr_rsp);
+     return 0;
+-  }
+-
+-  if (!this->itable)
+-    this->itable = loc->inode->table;
++}
+ 
+-  local = mem_get0(this->local_pool);
+-  if (!local)
+-    goto err;
+-
+-  frame->local = local;
+-
+-  ret = syncbarrier_init(&local->barrier);
+-  if (ret)
+-    goto err;
+-  loc_copy(&local->loc, loc);
+-  local->offset = offset;
+-  local->block_size = block_size;
+-  local->fop = GF_FOP_TRUNCATE;
+-  local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+-  if (!local->xattr_req)
+-    goto err;
+-  local->resolver_base_inode = loc->inode;
+-  GF_ATOMIC_INIT(local->delta_blocks, 0);
+-
+-  shard_lookup_base_file(frame, this, &local->loc,
+-                         shard_post_lookup_truncate_handler);
+-  return 0;
++int
++shard_common_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                      int32_t op_ret, int32_t op_errno, struct iatt *buf,
++                      dict_t *xdata)
++{
++    inode_t *inode = NULL;
++    shard_local_t *local = NULL;
+ 
+-err:
+-  shard_common_failure_unwind(GF_FOP_TRUNCATE, frame, -1, ENOMEM);
+-  return 0;
+-}
+-
+-int shard_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+-                    dict_t *xdata) {
+-  int ret = -1;
+-  uint64_t block_size = 0;
+-  shard_local_t *local = NULL;
+-
+-  ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size);
+-  if (ret) {
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+-           "Failed to get block "
+-           "size from inode ctx of %s",
+-           uuid_utoa(fd->inode->gfid));
+-    goto err;
+-  }
+-
+-  if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+-    STACK_WIND(frame, default_ftruncate_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
+-    return 0;
+-  }
+-
+-  if (!this->itable)
+-    this->itable = fd->inode->table;
+-
+-  local = mem_get0(this->local_pool);
+-  if (!local)
+-    goto err;
+-
+-  frame->local = local;
+-  ret = syncbarrier_init(&local->barrier);
+-  if (ret)
+-    goto err;
+-  local->fd = fd_ref(fd);
+-  local->offset = offset;
+-  local->block_size = block_size;
+-  local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+-  if (!local->xattr_req)
+-    goto err;
+-  local->fop = GF_FOP_FTRUNCATE;
+-
+-  local->loc.inode = inode_ref(fd->inode);
+-  gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
+-  local->resolver_base_inode = fd->inode;
+-  GF_ATOMIC_INIT(local->delta_blocks, 0);
+-
+-  shard_lookup_base_file(frame, this, &local->loc,
+-                         shard_post_lookup_truncate_handler);
+-  return 0;
+-err:
+-  shard_common_failure_unwind(GF_FOP_FTRUNCATE, frame, -1, ENOMEM);
+-  return 0;
+-}
++    local = frame->local;
+ 
+-int shard_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                    int32_t op_ret, int32_t op_errno, inode_t *inode,
+-                    struct iatt *buf, struct iatt *preparent,
+-                    struct iatt *postparent, dict_t *xdata) {
+-  int ret = -1;
+-  shard_local_t *local = NULL;
++    if (op_ret < 0) {
++        gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_STAT_FAILED,
++               "stat failed: %s",
++               local->fd ? uuid_utoa(local->fd->inode->gfid)
++                         : uuid_utoa((local->loc.inode)->gfid));
++        local->op_ret = op_ret;
++        local->op_errno = op_errno;
++        goto unwind;
++    }
+ 
+-  local = frame->local;
++    local->prebuf = *buf;
++    if (shard_modify_size_and_block_count(&local->prebuf, xdata)) {
++        local->op_ret = -1;
++        local->op_errno = EINVAL;
++        goto unwind;
++    }
++    local->xattr_rsp = dict_ref(xdata);
+ 
+-  if (op_ret == -1)
+-    goto unwind;
++    if (local->loc.inode)
++        inode = local->loc.inode;
++    else
++        inode = local->fd->inode;
+ 
+-  ret =
+-      shard_inode_ctx_set(inode, this, buf, local->block_size, SHARD_ALL_MASK);
+-  if (ret)
+-    gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INODE_CTX_SET_FAILED,
+-           "Failed to set inode "
+-           "ctx for %s",
+-           uuid_utoa(inode->gfid));
++    shard_inode_ctx_invalidate(inode, this, &local->prebuf);
+ 
+ unwind:
+-  SHARD_STACK_UNWIND(mknod, frame, op_ret, op_errno, inode, buf, preparent,
+-                     postparent, xdata);
+-
+-  return 0;
++    local->handler(frame, this);
++    return 0;
+ }
+ 
+-int shard_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+-                dev_t rdev, mode_t umask, dict_t *xdata) {
+-  shard_priv_t *priv = NULL;
+-  shard_local_t *local = NULL;
++int
++shard_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
++{
++    int ret = -1;
++    uint64_t block_size = 0;
++    shard_local_t *local = NULL;
++
++    if ((IA_ISDIR(loc->inode->ia_type)) || (IA_ISLNK(loc->inode->ia_type))) {
++        STACK_WIND(frame, default_stat_cbk, FIRST_CHILD(this),
++                   FIRST_CHILD(this)->fops->stat, loc, xdata);
++        return 0;
++    }
+ 
+-  priv = this->private;
+-  local = mem_get0(this->local_pool);
+-  if (!local)
+-    goto err;
++    ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size);
++    if (ret) {
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
++               "Failed to get block "
++               "size from inode ctx of %s",
++               uuid_utoa(loc->inode->gfid));
++        goto err;
++    }
+ 
+-  frame->local = local;
+-  local->block_size = priv->block_size;
+-  if (!__is_gsyncd_on_shard_dir(frame, loc)) {
+-    SHARD_INODE_CREATE_INIT(this, local->block_size, xdata, loc, 0, 0, err);
+-  }
++    if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
++        STACK_WIND(frame, default_stat_cbk, FIRST_CHILD(this),
++                   FIRST_CHILD(this)->fops->stat, loc, xdata);
++        return 0;
++    }
+ 
+-  STACK_WIND(frame, shard_mknod_cbk, FIRST_CHILD(this),
+-             FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata);
+-  return 0;
+-err:
+-  shard_common_failure_unwind(GF_FOP_MKNOD, frame, -1, ENOMEM);
+-  return 0;
+-}
++    local = mem_get0(this->local_pool);
++    if (!local)
++        goto err;
+ 
+-int32_t shard_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                       int32_t op_ret, int32_t op_errno, inode_t *inode,
+-                       struct iatt *buf, struct iatt *preparent,
+-                       struct iatt *postparent, dict_t *xdata) {
+-  shard_local_t *local = NULL;
++    frame->local = local;
+ 
+-  local = frame->local;
+-  if (op_ret < 0)
+-    goto err;
++    local->handler = shard_post_stat_handler;
++    loc_copy(&local->loc, loc);
++    local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
++    if (!local->xattr_req)
++        goto err;
+ 
+-  shard_inode_ctx_set(inode, this, buf, 0, SHARD_MASK_NLINK | SHARD_MASK_TIMES);
+-  buf->ia_size = local->prebuf.ia_size;
+-  buf->ia_blocks = local->prebuf.ia_blocks;
++    SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, local->loc.gfid,
++                                    local, err);
+ 
+-  SHARD_STACK_UNWIND(link, frame, op_ret, op_errno, inode, buf, preparent,
+-                     postparent, xdata);
+-  return 0;
++    STACK_WIND(frame, shard_common_stat_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->stat, loc, local->xattr_req);
++    return 0;
+ err:
+-  shard_common_failure_unwind(GF_FOP_LINK, frame, op_ret, op_errno);
+-  return 0;
++    shard_common_failure_unwind(GF_FOP_STAT, frame, -1, ENOMEM);
++    return 0;
+ }
+ 
+-int shard_post_lookup_link_handler(call_frame_t *frame, xlator_t *this) {
+-  shard_local_t *local = NULL;
+-
+-  local = frame->local;
+-
+-  if (local->op_ret < 0) {
+-    SHARD_STACK_UNWIND(link, frame, local->op_ret, local->op_errno, NULL, NULL,
+-                       NULL, NULL, NULL);
+-    return 0;
+-  }
++int
++shard_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
++{
++    int ret = -1;
++    uint64_t block_size = 0;
++    shard_local_t *local = NULL;
+ 
+-  STACK_WIND(frame, shard_link_cbk, FIRST_CHILD(this),
+-             FIRST_CHILD(this)->fops->link, &local->loc, &local->loc2,
+-             local->xattr_req);
+-  return 0;
+-}
++    if ((IA_ISDIR(fd->inode->ia_type)) || (IA_ISLNK(fd->inode->ia_type))) {
++        STACK_WIND(frame, default_fstat_cbk, FIRST_CHILD(this),
++                   FIRST_CHILD(this)->fops->fstat, fd, xdata);
++        return 0;
++    }
+ 
+-int32_t shard_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc,
+-                   loc_t *newloc, dict_t *xdata) {
+-  int ret = -1;
+-  uint64_t block_size = 0;
+-  shard_local_t *local = NULL;
++    ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size);
++    if (ret) {
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
++               "Failed to get block "
++               "size from inode ctx of %s",
++               uuid_utoa(fd->inode->gfid));
++        goto err;
++    }
+ 
+-  ret = shard_inode_ctx_get_block_size(oldloc->inode, this, &block_size);
+-  if (ret) {
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+-           "Failed to get block "
+-           "size from inode ctx of %s",
+-           uuid_utoa(oldloc->inode->gfid));
+-    goto err;
+-  }
++    if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
++        STACK_WIND(frame, default_fstat_cbk, FIRST_CHILD(this),
++                   FIRST_CHILD(this)->fops->fstat, fd, xdata);
++        return 0;
++    }
+ 
+-  if (!block_size) {
+-    STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->link,
+-                    oldloc, newloc, xdata);
+-    return 0;
+-  }
++    if (!this->itable)
++        this->itable = fd->inode->table;
+ 
+-  if (!this->itable)
+-    this->itable = oldloc->inode->table;
++    local = mem_get0(this->local_pool);
++    if (!local)
++        goto err;
+ 
+-  local = mem_get0(this->local_pool);
+-  if (!local)
+-    goto err;
++    frame->local = local;
+ 
+-  frame->local = local;
++    local->handler = shard_post_fstat_handler;
++    local->fd = fd_ref(fd);
++    local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
++    if (!local->xattr_req)
++        goto err;
+ 
+-  loc_copy(&local->loc, oldloc);
+-  loc_copy(&local->loc2, newloc);
+-  local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+-  if (!local->xattr_req)
+-    goto err;
++    SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid,
++                                    local, err);
+ 
+-  shard_lookup_base_file(frame, this, &local->loc,
+-                         shard_post_lookup_link_handler);
+-  return 0;
++    STACK_WIND(frame, shard_common_stat_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->fstat, fd, local->xattr_req);
++    return 0;
+ err:
+-  shard_common_failure_unwind(GF_FOP_LINK, frame, -1, ENOMEM);
+-  return 0;
++    shard_common_failure_unwind(GF_FOP_FSTAT, frame, -1, ENOMEM);
++    return 0;
+ }
+ 
+-int shard_unlink_shards_do(call_frame_t *frame, xlator_t *this, inode_t *inode);
+-
+-int shard_post_lookup_shards_unlink_handler(call_frame_t *frame,
+-                                            xlator_t *this) {
+-  shard_local_t *local = NULL;
+-    uuid_t gfid = {
+-        0,
+-    };
++int
++shard_post_update_size_truncate_handler(call_frame_t *frame, xlator_t *this)
++{
++    shard_local_t *local = NULL;
+ 
+-  local = frame->local;
++    local = frame->local;
+ 
+-    if (local->resolver_base_inode)
+-        gf_uuid_copy(gfid, local->resolver_base_inode->gfid);
++    if (local->fop == GF_FOP_TRUNCATE)
++        SHARD_STACK_UNWIND(truncate, frame, local->op_ret, local->op_errno,
++                           &local->prebuf, &local->postbuf, NULL);
+     else
+-        gf_uuid_copy(gfid, local->base_gfid);
+-
+-  if ((local->op_ret < 0) && (local->op_errno != ENOENT)) {
+-    gf_msg(this->name, GF_LOG_ERROR, local->op_errno, SHARD_MSG_FOP_FAILED,
+-           "failed to delete shards of %s", uuid_utoa(gfid));
++        SHARD_STACK_UNWIND(ftruncate, frame, local->op_ret, local->op_errno,
++                           &local->prebuf, &local->postbuf, NULL);
+     return 0;
+-  }
+-  local->op_ret = 0;
+-  local->op_errno = 0;
+-
+-  shard_unlink_shards_do(frame, this, local->resolver_base_inode);
+-  return 0;
+ }
+ 
+-int shard_post_resolve_unlink_handler(call_frame_t *frame, xlator_t *this) {
+-  shard_local_t *local = NULL;
+-
+-  local = frame->local;
+-  local->lookup_shards_barriered = _gf_true;
+-
+-  if (!local->call_count)
+-    shard_unlink_shards_do(frame, this, local->resolver_base_inode);
+-  else
+-    shard_common_lookup_shards(frame, this, local->resolver_base_inode,
+-                               shard_post_lookup_shards_unlink_handler);
+-  return 0;
+-}
+-
+-void shard_unlink_block_inode(shard_local_t *local, int shard_block_num) {
+-  char block_bname[256] = {
+-      0,
+-  };
+-  uuid_t gfid = {
+-      0,
+-  };
+-  inode_t *inode = NULL;
+-  inode_t *base_inode = NULL;
+-  xlator_t *this = NULL;
+-  shard_priv_t *priv = NULL;
+-  shard_inode_ctx_t *ctx = NULL;
+-  shard_inode_ctx_t *base_ictx = NULL;
+-  int unref_base_inode = 0;
+-  int unref_shard_inode = 0;
+-
+-  this = THIS;
+-  priv = this->private;
+-
+-  inode = local->inode_list[shard_block_num - local->first_block];
+-  shard_inode_ctx_get(inode, this, &ctx);
+-  base_inode = ctx->base_inode;
+-  if (base_inode)
+-    gf_uuid_copy(gfid, base_inode->gfid);
+-  else
+-    gf_uuid_copy(gfid, ctx->base_gfid);
+-  shard_make_block_bname(shard_block_num, gfid, block_bname,
+-                         sizeof(block_bname));
+-
+-  LOCK(&priv->lock);
+-  if (base_inode)
+-    LOCK(&base_inode->lock);
+-  LOCK(&inode->lock);
+-  {
+-    __shard_inode_ctx_get(inode, this, &ctx);
+-    if (!list_empty(&ctx->ilist)) {
+-      list_del_init(&ctx->ilist);
+-      priv->inode_count--;
+-      unref_base_inode++;
+-      unref_shard_inode++;
+-      GF_ASSERT(priv->inode_count >= 0);
+-    }
+-    if (ctx->fsync_needed) {
+-      unref_base_inode++;
+-      unref_shard_inode++;
+-      list_del_init(&ctx->to_fsync_list);
+-      if (base_inode) {
+-        __shard_inode_ctx_get(base_inode, this, &base_ictx);
+-        base_ictx->fsync_count--;
+-      }
+-    }
+-  }
+-  UNLOCK(&inode->lock);
+-  if (base_inode)
+-    UNLOCK(&base_inode->lock);
++int
++shard_truncate_last_shard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                              int32_t op_ret, int32_t op_errno,
++                              struct iatt *prebuf, struct iatt *postbuf,
++                              dict_t *xdata)
++{
++    inode_t *inode = NULL;
++    int64_t delta_blocks = 0;
++    shard_local_t *local = NULL;
+ 
+-  inode_unlink(inode, priv->dot_shard_inode, block_bname);
+-  inode_ref_reduce_by_n(inode, unref_shard_inode);
+-  inode_forget(inode, 0);
++    local = frame->local;
+ 
+-  if (base_inode && unref_base_inode)
+-    inode_ref_reduce_by_n(base_inode, unref_base_inode);
+-  UNLOCK(&priv->lock);
+-}
++    SHARD_UNSET_ROOT_FS_ID(frame, local);
+ 
+-int shard_rename_cbk(call_frame_t *frame, xlator_t *this) {
+-  shard_local_t *local = NULL;
++    inode = (local->fop == GF_FOP_TRUNCATE) ? local->loc.inode
++                                            : local->fd->inode;
++    if (op_ret < 0) {
++        gf_msg(this->name, GF_LOG_ERROR, op_errno,
++               SHARD_MSG_TRUNCATE_LAST_SHARD_FAILED,
++               "truncate on last"
++               " shard failed : %s",
++               uuid_utoa(inode->gfid));
++        local->op_ret = op_ret;
++        local->op_errno = op_errno;
++        goto err;
++    }
+ 
+-  local = frame->local;
++    local->postbuf.ia_size = local->offset;
++    /* Let the delta be negative. We want xattrop to do subtraction */
++    local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size;
++    delta_blocks = GF_ATOMIC_ADD(local->delta_blocks,
++                                 postbuf->ia_blocks - prebuf->ia_blocks);
++    GF_ASSERT(delta_blocks <= 0);
++    local->postbuf.ia_blocks += delta_blocks;
++    local->hole_size = 0;
+ 
+-  SHARD_STACK_UNWIND(rename, frame, local->op_ret, local->op_errno,
+-                     &local->prebuf, &local->preoldparent,
+-                     &local->postoldparent, &local->prenewparent,
+-                     &local->postnewparent, local->xattr_rsp);
+-  return 0;
++    shard_inode_ctx_set(inode, this, &local->postbuf, 0, SHARD_MASK_TIMES);
++    shard_update_file_size(frame, this, NULL, &local->loc,
++                           shard_post_update_size_truncate_handler);
++    return 0;
++err:
++    shard_common_failure_unwind(local->fop, frame, local->op_ret,
++                                local->op_errno);
++    return 0;
+ }
+ 
+-int32_t shard_unlink_cbk(call_frame_t *frame, xlator_t *this) {
+-  shard_local_t *local = frame->local;
++int
++shard_truncate_last_shard(call_frame_t *frame, xlator_t *this, inode_t *inode)
++{
++    size_t last_shard_size_after = 0;
++    loc_t loc = {
++        0,
++    };
++    shard_local_t *local = NULL;
+ 
+-  SHARD_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno,
+-                     &local->preoldparent, &local->postoldparent,
+-                     local->xattr_rsp);
+-  return 0;
+-}
++    local = frame->local;
+ 
+-int shard_unlink_shards_do_cbk(call_frame_t *frame, void *cookie,
+-                               xlator_t *this, int32_t op_ret, int32_t op_errno,
+-                               struct iatt *preparent, struct iatt *postparent,
+-                               dict_t *xdata) {
+-  int shard_block_num = (long)cookie;
+-  shard_local_t *local = NULL;
++    /* A NULL inode could be due to the fact that the last shard which
++     * needs to be truncated does not exist due to it lying in a hole
++     * region. So the only thing left to do in that case would be an
++     * update to file size xattr.
++     */
++    if (!inode) {
++        gf_msg_debug(this->name, 0,
++                     "Last shard to be truncated absent in backend:%" PRIu64
++                     " of gfid: %s. Directly proceeding to update file size",
++                     local->first_block, uuid_utoa(local->loc.inode->gfid));
++        shard_update_file_size(frame, this, NULL, &local->loc,
++                               shard_post_update_size_truncate_handler);
++        return 0;
++    }
+ 
+-  local = frame->local;
++    SHARD_SET_ROOT_FS_ID(frame, local);
+ 
+-  if (op_ret < 0) {
+-    local->op_ret = op_ret;
+-    local->op_errno = op_errno;
+-    goto done;
+-  }
++    loc.inode = inode_ref(inode);
++    gf_uuid_copy(loc.gfid, inode->gfid);
+ 
+-  shard_unlink_block_inode(local, shard_block_num);
+-done:
+-  syncbarrier_wake(&local->barrier);
+-  return 0;
+-}
+-
+-int shard_unlink_shards_do(call_frame_t *frame, xlator_t *this,
+-                           inode_t *inode) {
+-  int i = 0;
+-  int ret = -1;
+-  int count = 0;
+-  uint32_t cur_block = 0;
+-  uint32_t cur_block_idx = 0; /*this is idx into inode_list[] array */
+-  char *bname = NULL;
+-  char path[PATH_MAX] = {
+-      0,
+-  };
+-  uuid_t gfid = {
+-      0,
+-  };
+-  loc_t loc = {
+-      0,
+-  };
+-  gf_boolean_t wind_failed = _gf_false;
+-  shard_local_t *local = NULL;
+-  shard_priv_t *priv = NULL;
+-
+-  priv = this->private;
+-  local = frame->local;
+-
+-  if (inode)
+-    gf_uuid_copy(gfid, inode->gfid);
+-  else
+-    gf_uuid_copy(gfid, local->base_gfid);
+-
+-  for (i = 0; i < local->num_blocks; i++) {
+-    if (!local->inode_list[i])
+-      continue;
+-    count++;
+-  }
+-
+-  if (!count) {
+-    /* callcount = 0 implies that all of the shards that need to be
+-     * unlinked are non-existent (in other words the file is full of
+-     * holes).
+-     */
+-    gf_msg_debug(this->name, 0, "All shards that need to be "
+-                                "unlinked are non-existent: %s",
+-                 uuid_utoa(gfid));
++    last_shard_size_after = (local->offset % local->block_size);
++
++    STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->truncate, &loc, last_shard_size_after,
++               NULL);
++    loc_wipe(&loc);
+     return 0;
+-  }
++}
+ 
+-  SHARD_SET_ROOT_FS_ID(frame, local);
+-  local->barrier.waitfor = count;
+-  cur_block = cur_block_idx + local->first_block;
++void
++shard_unlink_block_inode(shard_local_t *local, int shard_block_num);
+ 
+-  while (cur_block_idx < local->num_blocks) {
+-    if (!local->inode_list[cur_block_idx])
+-      goto next;
++int
++shard_truncate_htol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                        int32_t op_ret, int32_t op_errno,
++                        struct iatt *preparent, struct iatt *postparent,
++                        dict_t *xdata)
++{
++    int ret = 0;
++    int call_count = 0;
++    int shard_block_num = (long)cookie;
++    uint64_t block_count = 0;
++    shard_local_t *local = NULL;
++
++    local = frame->local;
+ 
+-    if (wind_failed) {
+-      shard_unlink_shards_do_cbk(frame, (void *)(long)cur_block, this, -1,
+-                                 ENOMEM, NULL, NULL, NULL);
+-      goto next;
++    if (op_ret < 0) {
++        local->op_ret = op_ret;
++        local->op_errno = op_errno;
++        goto done;
++    }
++    ret = dict_get_uint64(xdata, GF_GET_FILE_BLOCK_COUNT, &block_count);
++    if (!ret) {
++        GF_ATOMIC_SUB(local->delta_blocks, block_count);
++    } else {
++        /* dict_get failed possibly due to a heterogeneous cluster? */
++        gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
++               "Failed to get key %s from dict during truncate of gfid %s",
++               GF_GET_FILE_BLOCK_COUNT,
++               uuid_utoa(local->resolver_base_inode->gfid));
+     }
+ 
+-    shard_make_block_abspath(cur_block, gfid, path, sizeof(path));
+-    bname = strrchr(path, '/') + 1;
+-    loc.parent = inode_ref(priv->dot_shard_inode);
+-    ret = inode_path(loc.parent, bname, (char **)&(loc.path));
+-    if (ret < 0) {
+-      gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+-             "Inode path failed"
+-             " on %s, base file gfid = %s",
+-             bname, uuid_utoa(gfid));
+-      local->op_ret = -1;
+-      local->op_errno = ENOMEM;
+-      loc_wipe(&loc);
+-      wind_failed = _gf_true;
+-      shard_unlink_shards_do_cbk(frame, (void *)(long)cur_block, this, -1,
+-                                 ENOMEM, NULL, NULL, NULL);
+-      goto next;
++    shard_unlink_block_inode(local, shard_block_num);
++done:
++    call_count = shard_call_count_return(frame);
++    if (call_count == 0) {
++        SHARD_UNSET_ROOT_FS_ID(frame, local);
++        shard_truncate_last_shard(frame, this, local->inode_list[0]);
+     }
++    return 0;
++}
+ 
+-    loc.name = strrchr(loc.path, '/');
+-    if (loc.name)
+-      loc.name++;
+-    loc.inode = inode_ref(local->inode_list[cur_block_idx]);
++int
++shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode)
++{
++    int i = 1;
++    int ret = -1;
++    int call_count = 0;
++    uint32_t cur_block = 0;
++    uint32_t last_block = 0;
++    char path[PATH_MAX] = {
++        0,
++    };
++    char *bname = NULL;
++    loc_t loc = {
++        0,
++    };
++    gf_boolean_t wind_failed = _gf_false;
++    shard_local_t *local = NULL;
++    shard_priv_t *priv = NULL;
++    dict_t *xdata_req = NULL;
+ 
+-    STACK_WIND_COOKIE(frame, shard_unlink_shards_do_cbk,
+-                      (void *)(long)cur_block, FIRST_CHILD(this),
+-                      FIRST_CHILD(this)->fops->unlink, &loc, local->xflag,
+-                      local->xattr_req);
+-    loc_wipe(&loc);
+-  next:
+-    cur_block++;
+-    cur_block_idx++;
+-  }
+-  syncbarrier_wait(&local->barrier, count);
+-  SHARD_UNSET_ROOT_FS_ID(frame, local);
+-  return 0;
+-}
+-
+-int shard_regulated_shards_deletion(call_frame_t *cleanup_frame, xlator_t *this,
+-                                    int now, int first_block,
+-                                    gf_dirent_t *entry) {
+-  int i = 0;
+-  int ret = 0;
+-  shard_local_t *local = NULL;
+-  uuid_t gfid = {
+-      0,
+-  };
+-
+-  local = cleanup_frame->local;
+-
+-  local->inode_list = GF_CALLOC(now, sizeof(inode_t *), gf_shard_mt_inode_list);
+-  if (!local->inode_list)
+-    return -ENOMEM;
+-
+-  local->first_block = first_block;
+-  local->last_block = first_block + now - 1;
+-  local->num_blocks = now;
+-  gf_uuid_parse(entry->d_name, gfid);
+-  gf_uuid_copy(local->base_gfid, gfid);
+-  local->resolver_base_inode = inode_find(this->itable, gfid);
+-  local->call_count = 0;
+-  ret = syncbarrier_init(&local->barrier);
+-  if (ret) {
+-    GF_FREE(local->inode_list);
+-    local->inode_list = NULL;
+-    inode_unref(local->resolver_base_inode);
+-    local->resolver_base_inode = NULL;
+-    return -errno;
+-  }
+-  shard_common_resolve_shards(cleanup_frame, this,
+-                              shard_post_resolve_unlink_handler);
+-
+-  for (i = 0; i < local->num_blocks; i++) {
+-    if (local->inode_list[i])
+-      inode_unref(local->inode_list[i]);
+-  }
+-  GF_FREE(local->inode_list);
+-  local->inode_list = NULL;
+-  if (local->op_ret)
+-    ret = -local->op_errno;
+-  syncbarrier_destroy(&local->barrier);
+-  inode_unref(local->resolver_base_inode);
+-  local->resolver_base_inode = NULL;
+-  STACK_RESET(cleanup_frame->root);
+-  return ret;
+-}
+-
+-int __shard_delete_shards_of_entry(call_frame_t *cleanup_frame, xlator_t *this,
+-                                   gf_dirent_t *entry, inode_t *inode) {
+-  int ret = 0;
+-  int shard_count = 0;
+-  int first_block = 0;
+-  int now = 0;
+-  uint64_t size = 0;
+-  uint64_t block_size = 0;
+-  uint64_t size_array[4] = {
+-      0,
+-  };
+-  void *bsize = NULL;
+-  void *size_attr = NULL;
+-  dict_t *xattr_rsp = NULL;
+-  loc_t loc = {
+-      0,
+-  };
+-  shard_local_t *local = NULL;
+-  shard_priv_t *priv = NULL;
+-
+-  priv = this->private;
+-  local = cleanup_frame->local;
+-  ret = dict_reset(local->xattr_req);
+-  if (ret) {
+-    gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
+-           "Failed to reset dict");
+-    ret = -ENOMEM;
+-    goto err;
+-  }
+-
+-  ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0);
+-  if (ret) {
+-    gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
+-           "Failed to set dict value: key:%s", GF_XATTR_SHARD_BLOCK_SIZE);
+-    ret = -ENOMEM;
+-    goto err;
+-  }
+-
+-  ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_FILE_SIZE, 8 * 4);
+-  if (ret) {
+-    gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
+-           "Failed to set dict value: key:%s", GF_XATTR_SHARD_FILE_SIZE);
+-    ret = -ENOMEM;
+-    goto err;
+-  }
+-
+-  loc.inode = inode_ref(inode);
+-  loc.parent = inode_ref(priv->dot_shard_rm_inode);
+-  ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path));
+-  if (ret < 0) {
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+-           "Inode path  failed on %s", entry->d_name);
+-    ret = -ENOMEM;
+-    goto err;
+-  }
+-
+-  loc.name = strrchr(loc.path, '/');
+-  if (loc.name)
+-    loc.name++;
+-  ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, local->xattr_req,
+-                      &xattr_rsp);
+-  if (ret)
+-    goto err;
+-
+-  ret = dict_get_ptr(xattr_rsp, GF_XATTR_SHARD_BLOCK_SIZE, &bsize);
+-  if (ret) {
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
+-           "Failed to get dict value: key:%s", GF_XATTR_SHARD_BLOCK_SIZE);
+-    goto err;
+-  }
+-  block_size = ntoh64(*((uint64_t *)bsize));
+-
+-  ret = dict_get_ptr(xattr_rsp, GF_XATTR_SHARD_FILE_SIZE, &size_attr);
+-  if (ret) {
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
+-           "Failed to get dict value: key:%s", GF_XATTR_SHARD_FILE_SIZE);
+-    goto err;
+-  }
+-
+-  memcpy(size_array, size_attr, sizeof(size_array));
+-  size = ntoh64(size_array[0]);
+-
+-  shard_count = (size / block_size) - 1;
+-  if (shard_count < 0) {
+-    gf_msg_debug(this->name, 0, "Size of %s hasn't grown beyond "
+-                                "its shard-block-size. Nothing to delete. "
+-                                "Returning",
+-                 entry->d_name);
+-    /* File size < shard-block-size, so nothing to delete */
+-    ret = 0;
+-    goto delete_marker;
+-  }
+-  if ((size % block_size) > 0)
+-    shard_count++;
+-
+-  if (shard_count == 0) {
+-    gf_msg_debug(this->name, 0, "Size of %s is exactly equal to "
+-                                "its shard-block-size. Nothing to delete. "
+-                                "Returning",
+-                 entry->d_name);
+-    ret = 0;
+-    goto delete_marker;
+-  }
+-  gf_msg_debug(this->name, 0,
+-               "base file = %s, "
+-               "shard-block-size=%" PRIu64 ", file-size=%" PRIu64 ", "
+-               "shard_count=%d",
+-               entry->d_name, block_size, size, shard_count);
+-
+-  /* Perform a gfid-based lookup to see if gfid corresponding to marker
+-   * file's base name exists.
+-   */
+-  loc_wipe(&loc);
+-  loc.inode = inode_new(this->itable);
+-  if (!loc.inode) {
+-    ret = -ENOMEM;
+-    goto err;
+-  }
+-  gf_uuid_parse(entry->d_name, loc.gfid);
+-  ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, NULL, NULL);
+-  if (!ret) {
+-    gf_msg_debug(this->name, 0, "Base shard corresponding to gfid "
+-                                "%s is present. Skipping shard deletion. "
+-                                "Returning",
+-                 entry->d_name);
+-    ret = 0;
+-    goto delete_marker;
+-  }
++    local = frame->local;
++    priv = this->private;
+ 
+-  first_block = 1;
++    cur_block = local->first_block + 1;
++    last_block = local->last_block;
+ 
+-  while (shard_count) {
+-    if (shard_count < local->deletion_rate) {
+-      now = shard_count;
+-      shard_count = 0;
+-    } else {
+-      now = local->deletion_rate;
+-      shard_count -= local->deletion_rate;
++    /* Determine call count */
++    for (i = 1; i < local->num_blocks; i++) {
++        if (!local->inode_list[i])
++            continue;
++        call_count++;
+     }
+ 
+-    gf_msg_debug(this->name, 0, "deleting %d shards starting from "
+-                                "block %d of gfid %s",
+-                 now, first_block, entry->d_name);
+-    ret = shard_regulated_shards_deletion(cleanup_frame, this, now, first_block,
+-                                          entry);
+-    if (ret)
+-      goto err;
+-    first_block += now;
+-  }
++    if (!call_count) {
++        /* Call count = 0 implies that all of the shards that need to be
++         * unlinked do not exist. So shard xlator would now proceed to
++         * do the final truncate + size updates.
++         */
++        gf_msg_debug(this->name, 0,
++                     "Shards to be unlinked as part of "
++                     "truncate absent in backend: %s. Directly "
++                     "proceeding to update file size",
++                     uuid_utoa(inode->gfid));
++        local->postbuf.ia_size = local->offset;
++        local->postbuf.ia_blocks = local->prebuf.ia_blocks;
++        local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size;
++        GF_ATOMIC_INIT(local->delta_blocks, 0);
++        local->hole_size = 0;
++        shard_update_file_size(frame, this, local->fd, &local->loc,
++                               shard_post_update_size_truncate_handler);
++        return 0;
++    }
+ 
+-delete_marker:
+-  loc_wipe(&loc);
+-  loc.inode = inode_ref(inode);
+-  loc.parent = inode_ref(priv->dot_shard_rm_inode);
+-  ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path));
+-  if (ret < 0) {
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+-           "Inode path  failed on %s", entry->d_name);
+-    ret = -ENOMEM;
+-    goto err;
+-  }
+-  loc.name = strrchr(loc.path, '/');
+-  if (loc.name)
+-    loc.name++;
+-  ret = syncop_unlink(FIRST_CHILD(this), &loc, NULL, NULL);
+-  if (ret)
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_SHARDS_DELETION_FAILED,
+-           "Failed to delete %s "
+-           "from /%s",
+-           entry->d_name, GF_SHARD_REMOVE_ME_DIR);
+-err:
+-  if (xattr_rsp)
+-    dict_unref(xattr_rsp);
+-  loc_wipe(&loc);
+-  return ret;
+-}
+-
+-int shard_delete_shards_of_entry(call_frame_t *cleanup_frame, xlator_t *this,
+-                                 gf_dirent_t *entry, inode_t *inode) {
+-  int ret = -1;
+-  loc_t loc = {
+-      0,
+-  };
+-  shard_priv_t *priv = NULL;
+-
+-  priv = this->private;
+-  loc.inode = inode_ref(priv->dot_shard_rm_inode);
+-
+-  ret = syncop_entrylk(FIRST_CHILD(this), this->name, &loc, entry->d_name,
+-                       ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL, NULL);
+-  if (ret < 0) {
+-    if (ret == -EAGAIN) {
+-      ret = 0;
+-    }
+-    goto out;
+-  }
+-  { ret = __shard_delete_shards_of_entry(cleanup_frame, this, entry, inode); }
+-  syncop_entrylk(FIRST_CHILD(this), this->name, &loc, entry->d_name,
+-                 ENTRYLK_UNLOCK, ENTRYLK_WRLCK, NULL, NULL);
+-out:
+-  loc_wipe(&loc);
+-  return ret;
+-}
+-
+-int shard_delete_shards_cbk(int ret, call_frame_t *frame, void *data) {
+-  SHARD_STACK_DESTROY(frame);
+-  return 0;
+-}
+-
+-int shard_resolve_internal_dir(xlator_t *this, shard_local_t *local,
+-                               shard_internal_dir_type_t type) {
+-  int ret = 0;
+-  char *bname = NULL;
+-  loc_t *loc = NULL;
+-  shard_priv_t *priv = NULL;
+-  uuid_t gfid = {
+-      0,
+-  };
+-  struct iatt stbuf = {
+-      0,
+-  };
+-
+-  priv = this->private;
+-
+-  switch (type) {
+-  case SHARD_INTERNAL_DIR_DOT_SHARD:
+-    loc = &local->dot_shard_loc;
+-    gf_uuid_copy(gfid, priv->dot_shard_gfid);
+-    bname = GF_SHARD_DIR;
+-    break;
+-  case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME:
+-    loc = &local->dot_shard_rm_loc;
+-    gf_uuid_copy(gfid, priv->dot_shard_rm_gfid);
+-    bname = GF_SHARD_REMOVE_ME_DIR;
+-    break;
+-  default:
+-    break;
+-  }
+-
+-  loc->inode = inode_find(this->itable, gfid);
+-  if (!loc->inode) {
+-    ret = shard_init_internal_dir_loc(this, local, type);
+-    if (ret)
+-      goto err;
+-    ret = dict_reset(local->xattr_req);
++    local->call_count = call_count;
++    i = 1;
++    xdata_req = dict_new();
++    if (!xdata_req) {
++        shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
++        return 0;
++    }
++    ret = dict_set_uint64(xdata_req, GF_GET_FILE_BLOCK_COUNT, 8 * 8);
+     if (ret) {
+-      gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
+-             "Failed to reset "
+-             "dict");
+-      ret = -ENOMEM;
+-      goto err;
+-    }
+-    ret = dict_set_gfuuid(local->xattr_req, "gfid-req", gfid, true);
+-    ret = syncop_lookup(FIRST_CHILD(this), loc, &stbuf, NULL, local->xattr_req,
+-                        NULL);
+-    if (ret < 0) {
+-      if (ret != -ENOENT)
+-        gf_msg(this->name, GF_LOG_ERROR, -ret, SHARD_MSG_SHARDS_DELETION_FAILED,
+-               "Lookup on %s failed, exiting", bname);
+-      goto err;
+-    } else {
+-      shard_link_internal_dir_inode(local, loc->inode, &stbuf, type);
++        gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
++               "Failed to set key %s into dict during truncate of %s",
++               GF_GET_FILE_BLOCK_COUNT,
++               uuid_utoa(local->resolver_base_inode->gfid));
++        dict_unref(xdata_req);
++        shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
++        return 0;
+     }
+-  }
+-  ret = 0;
+-err:
+-  return ret;
+-}
+-
+-int shard_lookup_marker_entry(xlator_t *this, shard_local_t *local,
+-                              gf_dirent_t *entry) {
+-  int ret = 0;
+-  loc_t loc = {
+-      0,
+-  };
+-
+-  loc.inode = inode_new(this->itable);
+-  if (!loc.inode) {
+-    ret = -ENOMEM;
+-    goto err;
+-  }
+-  loc.parent = inode_ref(local->fd->inode);
+-
+-  ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path));
+-  if (ret < 0) {
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+-           "Inode path failed on %s", entry->d_name);
+-    ret = -ENOMEM;
+-    goto err;
+-  }
+-
+-  loc.name = strrchr(loc.path, '/');
+-  if (loc.name)
+-    loc.name++;
+-
+-  ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, NULL, NULL);
+-  if (ret < 0) {
+-    goto err;
+-  }
+-  entry->inode = inode_ref(loc.inode);
+-  ret = 0;
+-err:
+-  loc_wipe(&loc);
+-  return ret;
+-}
+-
+-int shard_delete_shards(void *opaque) {
+-  int ret = 0;
+-  off_t offset = 0;
+-  loc_t loc = {
+-      0,
+-  };
+-  inode_t *link_inode = NULL;
+-  xlator_t *this = NULL;
+-  shard_priv_t *priv = NULL;
+-  shard_local_t *local = NULL;
+-  gf_dirent_t entries;
+-  gf_dirent_t *entry = NULL;
+-  call_frame_t *cleanup_frame = NULL;
+-  gf_boolean_t done = _gf_false;
+-
+-  this = THIS;
+-  priv = this->private;
+-  INIT_LIST_HEAD(&entries.list);
+-
+-  cleanup_frame = opaque;
+-
+-  local = mem_get0(this->local_pool);
+-  if (!local) {
+-    gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED,
+-           "Failed to create local to "
+-           "delete shards");
+-    ret = -ENOMEM;
+-    goto err;
+-  }
+-  cleanup_frame->local = local;
+-  local->fop = GF_FOP_UNLINK;
+-
+-  local->xattr_req = dict_new();
+-  if (!local->xattr_req) {
+-    ret = -ENOMEM;
+-    goto err;
+-  }
+-  local->deletion_rate = priv->deletion_rate;
+-
+-  ret = shard_resolve_internal_dir(this, local, SHARD_INTERNAL_DIR_DOT_SHARD);
+-  if (ret == -ENOENT) {
+-    gf_msg_debug(this->name, 0, ".shard absent. Nothing to"
+-                                " delete. Exiting");
+-    ret = 0;
+-    goto err;
+-  } else if (ret < 0) {
+-    goto err;
+-  }
+ 
+-  ret = shard_resolve_internal_dir(this, local,
+-                                   SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME);
+-  if (ret == -ENOENT) {
+-    gf_msg_debug(this->name, 0, ".remove_me absent. "
+-                                "Nothing to delete. Exiting");
+-    ret = 0;
+-    goto err;
+-  } else if (ret < 0) {
+-    goto err;
+-  }
+-
+-  local->fd = fd_anonymous(local->dot_shard_rm_loc.inode);
+-  if (!local->fd) {
+-    ret = -ENOMEM;
+-    goto err;
+-  }
+-
+-  for (;;) {
+-    offset = 0;
++    SHARD_SET_ROOT_FS_ID(frame, local);
++    while (cur_block <= last_block) {
++        if (!local->inode_list[i]) {
++            cur_block++;
++            i++;
++            continue;
++        }
++        if (wind_failed) {
++            shard_truncate_htol_cbk(frame, (void *)(long)cur_block, this, -1,
++                                    ENOMEM, NULL, NULL, NULL);
++            goto next;
++        }
++
++        shard_make_block_abspath(cur_block, inode->gfid, path, sizeof(path));
++        bname = strrchr(path, '/') + 1;
++        loc.parent = inode_ref(priv->dot_shard_inode);
++        ret = inode_path(loc.parent, bname, (char **)&(loc.path));
++        if (ret < 0) {
++            gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
++                   "Inode path failed"
++                   " on %s. Base file gfid = %s",
++                   bname, uuid_utoa(inode->gfid));
++            local->op_ret = -1;
++            local->op_errno = ENOMEM;
++            loc_wipe(&loc);
++            wind_failed = _gf_true;
++            shard_truncate_htol_cbk(frame, (void *)(long)cur_block, this, -1,
++                                    ENOMEM, NULL, NULL, NULL);
++            goto next;
++        }
++        loc.name = strrchr(loc.path, '/');
++        if (loc.name)
++            loc.name++;
++        loc.inode = inode_ref(local->inode_list[i]);
++
++        STACK_WIND_COOKIE(frame, shard_truncate_htol_cbk,
++                          (void *)(long)cur_block, FIRST_CHILD(this),
++                          FIRST_CHILD(this)->fops->unlink, &loc, 0, xdata_req);
++        loc_wipe(&loc);
++    next:
++        i++;
++        cur_block++;
++        if (!--call_count)
++            break;
++    }
++    dict_unref(xdata_req);
++    return 0;
++}
++
++int
++shard_truncate_do(call_frame_t *frame, xlator_t *this)
++{
++    shard_local_t *local = NULL;
++
++    local = frame->local;
++
++    if (local->num_blocks == 1) {
++        /* This means that there are no shards to be unlinked.
++         * The fop boils down to truncating the last shard, updating
++         * the size and unwinding.
++         */
++        shard_truncate_last_shard(frame, this, local->inode_list[0]);
++        return 0;
++    } else {
++        shard_truncate_htol(frame, this, local->loc.inode);
++    }
++    return 0;
++}
++
++int
++shard_post_lookup_shards_truncate_handler(call_frame_t *frame, xlator_t *this)
++{
++    shard_local_t *local = NULL;
++
++    local = frame->local;
++
++    if (local->op_ret < 0) {
++        shard_common_failure_unwind(local->fop, frame, local->op_ret,
++                                    local->op_errno);
++        return 0;
++    }
++
++    shard_truncate_do(frame, this);
++    return 0;
++}
++
++void
++shard_link_block_inode(shard_local_t *local, int block_num, inode_t *inode,
++                       struct iatt *buf)
++{
++    int list_index = 0;
++    char block_bname[256] = {
++        0,
++    };
++    uuid_t gfid = {
++        0,
++    };
++    inode_t *linked_inode = NULL;
++    xlator_t *this = NULL;
++    inode_t *fsync_inode = NULL;
++    shard_priv_t *priv = NULL;
++    inode_t *base_inode = NULL;
++
++    this = THIS;
++    priv = this->private;
++    if (local->loc.inode) {
++        gf_uuid_copy(gfid, local->loc.inode->gfid);
++        base_inode = local->loc.inode;
++    } else if (local->resolver_base_inode) {
++        gf_uuid_copy(gfid, local->resolver_base_inode->gfid);
++        base_inode = local->resolver_base_inode;
++    } else {
++        gf_uuid_copy(gfid, local->base_gfid);
++    }
++
++    shard_make_block_bname(block_num, gfid, block_bname, sizeof(block_bname));
++
++    shard_inode_ctx_set(inode, this, buf, 0, SHARD_LOOKUP_MASK);
++    linked_inode = inode_link(inode, priv->dot_shard_inode, block_bname, buf);
++    inode_lookup(linked_inode);
++    list_index = block_num - local->first_block;
++    local->inode_list[list_index] = linked_inode;
++
+     LOCK(&priv->lock);
+     {
+-      if (priv->bg_del_state == SHARD_BG_DELETION_LAUNCHING) {
+-        priv->bg_del_state = SHARD_BG_DELETION_IN_PROGRESS;
+-      } else if (priv->bg_del_state == SHARD_BG_DELETION_IN_PROGRESS) {
+-        priv->bg_del_state = SHARD_BG_DELETION_NONE;
+-        done = _gf_true;
+-      }
++        fsync_inode = __shard_update_shards_inode_list(
++            linked_inode, this, base_inode, block_num, gfid);
+     }
+     UNLOCK(&priv->lock);
+-    if (done)
+-      break;
+-    while ((ret = syncop_readdirp(FIRST_CHILD(this), local->fd, 131072, offset,
+-                                  &entries, local->xattr_req, NULL))) {
+-      if (ret > 0)
+-        ret = 0;
+-      list_for_each_entry(entry, &entries.list, list) {
+-        offset = entry->d_off;
++    if (fsync_inode)
++        shard_initiate_evicted_inode_fsync(this, fsync_inode);
++}
++
++int
++shard_common_lookup_shards_cbk(call_frame_t *frame, void *cookie,
++                               xlator_t *this, int32_t op_ret, int32_t op_errno,
++                               inode_t *inode, struct iatt *buf, dict_t *xdata,
++                               struct iatt *postparent)
++{
++    int call_count = 0;
++    int shard_block_num = (long)cookie;
++    uuid_t gfid = {
++        0,
++    };
++    shard_local_t *local = NULL;
++
++    local = frame->local;
++    if (local->resolver_base_inode)
++        gf_uuid_copy(gfid, local->resolver_base_inode->gfid);
++    else
++        gf_uuid_copy(gfid, local->base_gfid);
++
++    if (op_ret < 0) {
++        /* Ignore absence of shards in the backend in truncate fop. */
++        switch (local->fop) {
++            case GF_FOP_TRUNCATE:
++            case GF_FOP_FTRUNCATE:
++            case GF_FOP_RENAME:
++            case GF_FOP_UNLINK:
++                if (op_errno == ENOENT)
++                    goto done;
++                break;
++            case GF_FOP_WRITE:
++            case GF_FOP_READ:
++            case GF_FOP_ZEROFILL:
++            case GF_FOP_DISCARD:
++            case GF_FOP_FALLOCATE:
++                if ((!local->first_lookup_done) && (op_errno == ENOENT)) {
++                    LOCK(&frame->lock);
++                    {
++                        local->create_count++;
++                    }
++                    UNLOCK(&frame->lock);
++                    goto done;
++                }
++                break;
++            default:
++                break;
++        }
++
++        /* else */
++        gf_msg(this->name, GF_LOG_ERROR, op_errno,
++               SHARD_MSG_LOOKUP_SHARD_FAILED,
++               "Lookup on shard %d "
++               "failed. Base file gfid = %s",
++               shard_block_num, uuid_utoa(gfid));
++        local->op_ret = op_ret;
++        local->op_errno = op_errno;
++        goto done;
++    }
++
++    shard_link_block_inode(local, shard_block_num, inode, buf);
++
++done:
++    if (local->lookup_shards_barriered) {
++        syncbarrier_wake(&local->barrier);
++        return 0;
++    } else {
++        call_count = shard_call_count_return(frame);
++        if (call_count == 0) {
++            if (!local->first_lookup_done)
++                local->first_lookup_done = _gf_true;
++            local->pls_fop_handler(frame, this);
++        }
++    }
++    return 0;
++}
++
++dict_t *
++shard_create_gfid_dict(dict_t *dict)
++{
++    int ret = 0;
++    dict_t *new = NULL;
++    unsigned char *gfid = NULL;
++
++    new = dict_copy_with_ref(dict, NULL);
++    if (!new)
++        return NULL;
++
++    gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_char);
++    if (!gfid) {
++        ret = -1;
++        goto out;
++    }
++
++    gf_uuid_generate(gfid);
++
++    ret = dict_set_gfuuid(new, "gfid-req", gfid, false);
+ 
+-        if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, ".."))
+-          continue;
++out:
++    if (ret) {
++        dict_unref(new);
++        new = NULL;
++        GF_FREE(gfid);
++    }
++
++    return new;
++}
++
++int
++shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode,
++                           shard_post_lookup_shards_fop_handler_t handler)
++{
++    int i = 0;
++    int ret = 0;
++    int count = 0;
++    int call_count = 0;
++    int32_t shard_idx_iter = 0;
++    int last_block = 0;
++    char path[PATH_MAX] = {
++        0,
++    };
++    char *bname = NULL;
++    uuid_t gfid = {
++        0,
++    };
++    loc_t loc = {
++        0,
++    };
++    shard_local_t *local = NULL;
++    shard_priv_t *priv = NULL;
++    gf_boolean_t wind_failed = _gf_false;
++    dict_t *xattr_req = NULL;
+ 
+-        if (!entry->inode) {
+-          ret = shard_lookup_marker_entry(this, local, entry);
+-          if (ret < 0)
++    priv = this->private;
++    local = frame->local;
++    count = call_count = local->call_count;
++    shard_idx_iter = local->first_block;
++    last_block = local->last_block;
++    local->pls_fop_handler = handler;
++    if (local->lookup_shards_barriered)
++        local->barrier.waitfor = local->call_count;
++
++    if (inode)
++        gf_uuid_copy(gfid, inode->gfid);
++    else
++        gf_uuid_copy(gfid, local->base_gfid);
++
++    while (shard_idx_iter <= last_block) {
++        if (local->inode_list[i]) {
++            i++;
++            shard_idx_iter++;
+             continue;
+         }
+-        link_inode = inode_link(entry->inode, local->fd->inode, entry->d_name,
+-                                &entry->d_stat);
+ 
+-        gf_msg_debug(this->name, 0, "Initiating deletion of "
+-                                    "shards of gfid %s",
+-                     entry->d_name);
+-        ret = shard_delete_shards_of_entry(cleanup_frame, this, entry,
+-                                           link_inode);
+-        inode_unlink(link_inode, local->fd->inode, entry->d_name);
+-        inode_unref(link_inode);
+-        if (ret) {
+-          gf_msg(this->name, GF_LOG_ERROR, -ret,
+-                 SHARD_MSG_SHARDS_DELETION_FAILED,
+-                 "Failed to clean up shards of gfid %s", entry->d_name);
+-          continue;
++        if (wind_failed) {
++            shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter,
++                                           this, -1, ENOMEM, NULL, NULL, NULL,
++                                           NULL);
++            goto next;
+         }
+-        gf_msg(this->name, GF_LOG_INFO, 0, SHARD_MSG_SHARD_DELETION_COMPLETED,
+-               "Deleted "
+-               "shards of gfid=%s from backend",
+-               entry->d_name);
+-      }
+-      gf_dirent_free(&entries);
+-      if (ret)
+-        break;
+-    }
+-  }
+-  ret = 0;
+-  loc_wipe(&loc);
+-  return ret;
++
++        shard_make_block_abspath(shard_idx_iter, gfid, path, sizeof(path));
++
++        bname = strrchr(path, '/') + 1;
++        loc.inode = inode_new(this->itable);
++        loc.parent = inode_ref(priv->dot_shard_inode);
++        gf_uuid_copy(loc.pargfid, priv->dot_shard_gfid);
++        ret = inode_path(loc.parent, bname, (char **)&(loc.path));
++        if (ret < 0 || !(loc.inode)) {
++            gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
++                   "Inode path failed"
++                   " on %s, base file gfid = %s",
++                   bname, uuid_utoa(gfid));
++            local->op_ret = -1;
++            local->op_errno = ENOMEM;
++            loc_wipe(&loc);
++            wind_failed = _gf_true;
++            shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter,
++                                           this, -1, ENOMEM, NULL, NULL, NULL,
++                                           NULL);
++            goto next;
++        }
++
++        loc.name = strrchr(loc.path, '/');
++        if (loc.name)
++            loc.name++;
++
++        xattr_req = shard_create_gfid_dict(local->xattr_req);
++        if (!xattr_req) {
++            local->op_ret = -1;
++            local->op_errno = ENOMEM;
++            wind_failed = _gf_true;
++            loc_wipe(&loc);
++            shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter,
++                                           this, -1, ENOMEM, NULL, NULL, NULL,
++                                           NULL);
++            goto next;
++        }
++
++        STACK_WIND_COOKIE(frame, shard_common_lookup_shards_cbk,
++                          (void *)(long)shard_idx_iter, FIRST_CHILD(this),
++                          FIRST_CHILD(this)->fops->lookup, &loc, xattr_req);
++        loc_wipe(&loc);
++        dict_unref(xattr_req);
++    next:
++        shard_idx_iter++;
++        i++;
++
++        if (!--call_count)
++            break;
++    }
++    if (local->lookup_shards_barriered) {
++        syncbarrier_wait(&local->barrier, count);
++        local->pls_fop_handler(frame, this);
++    }
++    return 0;
++}
++
++int
++shard_post_resolve_truncate_handler(call_frame_t *frame, xlator_t *this)
++{
++    shard_local_t *local = NULL;
++
++    local = frame->local;
++
++    if (local->op_ret < 0) {
++        if (local->op_errno == ENOENT) {
++            /* If lookup on /.shard fails with ENOENT, it means that
++             * the file was 0-byte in size but truncated sometime in
++             * the past to a higher size which is reflected in the
++             * size xattr, and now being truncated to a lower size.
++             * In this case, the only thing that needs to be done is
++             * to update the size xattr of the file and unwind.
++             */
++            local->first_block = local->last_block = 0;
++            local->num_blocks = 1;
++            local->call_count = 0;
++            local->op_ret = 0;
++            local->postbuf.ia_size = local->offset;
++            shard_update_file_size(frame, this, local->fd, &local->loc,
++                                   shard_post_update_size_truncate_handler);
++            return 0;
++        } else {
++            shard_common_failure_unwind(local->fop, frame, local->op_ret,
++                                        local->op_errno);
++            return 0;
++        }
++    }
++
++    if (!local->call_count)
++        shard_truncate_do(frame, this);
++    else
++        shard_common_lookup_shards(frame, this, local->loc.inode,
++                                   shard_post_lookup_shards_truncate_handler);
++
++    return 0;
++}
++
++int
++shard_truncate_begin(call_frame_t *frame, xlator_t *this)
++{
++    int ret = 0;
++    shard_local_t *local = NULL;
++    shard_priv_t *priv = NULL;
++
++    priv = this->private;
++    local = frame->local;
++
++    /* First participant block here is the lowest numbered block that would
++     * hold the last byte of the file post successful truncation.
++     * Last participant block is the block that contains the last byte in
++     * the current state of the file.
++     * If (first block == last_block):
++     *         then that means that the file only needs truncation of the
++     *         first (or last since both are same) block.
++     * Else
++     *         if (new_size % block_size == 0)
++     *                 then that means there is no truncate to be done with
++     *                 only shards from first_block + 1 through the last
++     *                 block needing to be unlinked.
++     *         else
++     *                 both truncate of the first block and unlink of the
++     *                 remaining shards until end of file is required.
++     */
++    local->first_block = (local->offset == 0)
++                             ? 0
++                             : get_lowest_block(local->offset - 1,
++                                                local->block_size);
++    local->last_block = get_highest_block(0, local->prebuf.ia_size,
++                                          local->block_size);
++
++    local->num_blocks = local->last_block - local->first_block + 1;
++    GF_ASSERT(local->num_blocks > 0);
++    local->resolver_base_inode = (local->fop == GF_FOP_TRUNCATE)
++                                     ? local->loc.inode
++                                     : local->fd->inode;
++
++    if ((local->first_block == 0) && (local->num_blocks == 1)) {
++        if (local->fop == GF_FOP_TRUNCATE)
++            STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this),
++                       FIRST_CHILD(this)->fops->truncate, &local->loc,
++                       local->offset, local->xattr_req);
++        else
++            STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this),
++                       FIRST_CHILD(this)->fops->ftruncate, local->fd,
++                       local->offset, local->xattr_req);
++        return 0;
++    }
++
++    local->inode_list = GF_CALLOC(local->num_blocks, sizeof(inode_t *),
++                                  gf_shard_mt_inode_list);
++    if (!local->inode_list)
++        goto err;
++
++    local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid);
++    if (!local->dot_shard_loc.inode) {
++        ret = shard_init_internal_dir_loc(this, local,
++                                          SHARD_INTERNAL_DIR_DOT_SHARD);
++        if (ret)
++            goto err;
++        shard_lookup_internal_dir(frame, this,
++                                  shard_post_resolve_truncate_handler,
++                                  SHARD_INTERNAL_DIR_DOT_SHARD);
++    } else {
++        local->post_res_handler = shard_post_resolve_truncate_handler;
++        shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD);
++    }
++    return 0;
+ 
+ err:
+-  LOCK(&priv->lock);
+-  { priv->bg_del_state = SHARD_BG_DELETION_NONE; }
+-  UNLOCK(&priv->lock);
+-  loc_wipe(&loc);
+-  return ret;
+-}
+-
+-int shard_unlock_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                             int32_t op_ret, int32_t op_errno, dict_t *xdata) {
+-  if (op_ret)
+-    gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED,
+-           "Unlock failed. Please check brick logs for "
+-           "more details");
+-  SHARD_STACK_DESTROY(frame);
+-  return 0;
+-}
+-
+-int shard_unlock_inodelk(call_frame_t *frame, xlator_t *this) {
+-  loc_t *loc = NULL;
+-  call_frame_t *lk_frame = NULL;
+-  shard_local_t *local = NULL;
+-  shard_local_t *lk_local = NULL;
+-  shard_inodelk_t *lock = NULL;
+-
+-  local = frame->local;
+-  lk_frame = local->inodelk_frame;
+-  lk_local = lk_frame->local;
+-  local->inodelk_frame = NULL;
+-  loc = &local->int_inodelk.loc;
+-  lock = &lk_local->int_inodelk;
+-  lock->flock.l_type = F_UNLCK;
+-
+-  STACK_WIND(lk_frame, shard_unlock_inodelk_cbk, FIRST_CHILD(this),
+-             FIRST_CHILD(this)->fops->inodelk, lock->domain, loc, F_SETLK,
+-             &lock->flock, NULL);
+-  local->int_inodelk.acquired_lock = _gf_false;
+-  return 0;
+-}
+-
+-int shard_rename_src_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                         int32_t op_ret, int32_t op_errno, struct iatt *buf,
+-                         struct iatt *preoldparent, struct iatt *postoldparent,
+-                         struct iatt *prenewparent, struct iatt *postnewparent,
+-                         dict_t *xdata);
+-int shard_rename_src_base_file(call_frame_t *frame, xlator_t *this) {
+-  int ret = 0;
+-  loc_t *dst_loc = NULL;
+-  loc_t tmp_loc = {
+-      0,
+-  };
+-  shard_local_t *local = frame->local;
+-
+-  if (local->dst_block_size) {
+-    tmp_loc.parent = inode_ref(local->loc2.parent);
+-    ret = inode_path(tmp_loc.parent, local->loc2.name, (char **)&tmp_loc.path);
+-    if (ret < 0) {
+-      gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+-             "Inode path failed"
+-             " on pargfid=%s bname=%s",
+-             uuid_utoa(tmp_loc.parent->gfid), local->loc2.name);
+-      local->op_ret = -1;
+-      local->op_errno = ENOMEM;
+-      goto err;
+-    }
+-
+-    tmp_loc.name = strrchr(tmp_loc.path, '/');
+-    if (tmp_loc.name)
+-      tmp_loc.name++;
+-    dst_loc = &tmp_loc;
+-  } else {
+-    dst_loc = &local->loc2;
+-  }
+-
+-  /* To-Do: Request open-fd count on dst base file */
+-  STACK_WIND(frame, shard_rename_src_cbk, FIRST_CHILD(this),
+-             FIRST_CHILD(this)->fops->rename, &local->loc, dst_loc,
+-             local->xattr_req);
+-  loc_wipe(&tmp_loc);
+-  return 0;
+-err:
+-  loc_wipe(&tmp_loc);
+-  shard_common_failure_unwind(local->fop, frame, local->op_ret,
+-                              local->op_errno);
+-  return 0;
+-}
+-
+-int shard_unlink_base_file(call_frame_t *frame, xlator_t *this);
+-
+-int shard_set_size_attrs_on_marker_file_cbk(call_frame_t *frame, void *cookie,
+-                                            xlator_t *this, int32_t op_ret,
+-                                            int32_t op_errno, dict_t *dict,
+-                                            dict_t *xdata) {
+-  shard_priv_t *priv = NULL;
+-  shard_local_t *local = NULL;
+-
+-  priv = this->private;
+-  local = frame->local;
+-  if (op_ret < 0) {
+-    gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED,
+-           "Xattrop on marker file failed "
+-           "while performing %s; entry gfid=%s",
+-           gf_fop_string(local->fop), local->newloc.name);
+-    goto err;
+-  }
+-
+-  inode_unlink(local->newloc.inode, priv->dot_shard_rm_inode,
+-               local->newloc.name);
+-
+-  if (local->fop == GF_FOP_UNLINK)
+-    shard_unlink_base_file(frame, this);
+-  else if (local->fop == GF_FOP_RENAME)
+-    shard_rename_src_base_file(frame, this);
+-  return 0;
+-err:
+-  shard_common_failure_unwind(local->fop, frame, op_ret, op_errno);
+-  return 0;
+-}
+-
+-int shard_set_size_attrs_on_marker_file(call_frame_t *frame, xlator_t *this) {
+-  int op_errno = ENOMEM;
+-  uint64_t bs = 0;
+-  dict_t *xdata = NULL;
+-  shard_local_t *local = NULL;
+-
+-  local = frame->local;
+-  xdata = dict_new();
+-  if (!xdata)
+-    goto err;
+-
+-  if (local->fop == GF_FOP_UNLINK)
+-    bs = local->block_size;
+-  else if (local->fop == GF_FOP_RENAME)
+-    bs = local->dst_block_size;
+-  SHARD_INODE_CREATE_INIT(this, bs, xdata, &local->newloc,
+-                          local->prebuf.ia_size, 0, err);
+-  STACK_WIND(frame, shard_set_size_attrs_on_marker_file_cbk, FIRST_CHILD(this),
+-             FIRST_CHILD(this)->fops->xattrop, &local->newloc,
+-             GF_XATTROP_GET_AND_SET, xdata, NULL);
+-  dict_unref(xdata);
+-  return 0;
+-err:
+-  if (xdata)
+-    dict_unref(xdata);
+-  shard_common_failure_unwind(local->fop, frame, -1, op_errno);
+-  return 0;
++    shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
++    return 0;
+ }
+ 
+-int shard_lookup_marker_file_cbk(call_frame_t *frame, void *cookie,
+-                                 xlator_t *this, int32_t op_ret,
+-                                 int32_t op_errno, inode_t *inode,
+-                                 struct iatt *buf, dict_t *xdata,
+-                                 struct iatt *postparent) {
+-  inode_t *linked_inode = NULL;
+-  shard_priv_t *priv = NULL;
+-  shard_local_t *local = NULL;
+-
+-  local = frame->local;
+-  priv = this->private;
+-
+-  if (op_ret < 0) {
+-    gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED,
+-           "Lookup on marker file failed "
+-           "while performing %s; entry gfid=%s",
+-           gf_fop_string(local->fop), local->newloc.name);
+-    goto err;
+-  }
+-
+-  linked_inode =
+-      inode_link(inode, priv->dot_shard_rm_inode, local->newloc.name, buf);
+-  inode_unref(local->newloc.inode);
+-  local->newloc.inode = linked_inode;
+-  shard_set_size_attrs_on_marker_file(frame, this);
+-  return 0;
++int
++shard_post_lookup_truncate_handler(call_frame_t *frame, xlator_t *this)
++{
++    shard_local_t *local = NULL;
++    struct iatt tmp_stbuf = {
++        0,
++    };
++
++    local = frame->local;
++
++    if (local->op_ret < 0) {
++        shard_common_failure_unwind(local->fop, frame, local->op_ret,
++                                    local->op_errno);
++        return 0;
++    }
++
++    local->postbuf = tmp_stbuf = local->prebuf;
++
++    if (local->prebuf.ia_size == local->offset) {
++        /* If the file size is same as requested size, unwind the call
++         * immediately.
++         */
++        if (local->fop == GF_FOP_TRUNCATE)
++            SHARD_STACK_UNWIND(truncate, frame, 0, 0, &local->prebuf,
++                               &local->postbuf, NULL);
++        else
++            SHARD_STACK_UNWIND(ftruncate, frame, 0, 0, &local->prebuf,
++                               &local->postbuf, NULL);
++    } else if (local->offset > local->prebuf.ia_size) {
++        /* If the truncate is from a lower to a higher size, set the
++         * new size xattr and unwind.
++         */
++        local->hole_size = local->offset - local->prebuf.ia_size;
++        local->delta_size = 0;
++        GF_ATOMIC_INIT(local->delta_blocks, 0);
++        local->postbuf.ia_size = local->offset;
++        tmp_stbuf.ia_size = local->offset;
++        shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0,
++                            SHARD_INODE_WRITE_MASK);
++        shard_update_file_size(frame, this, NULL, &local->loc,
++                               shard_post_update_size_truncate_handler);
++    } else {
++        /* ... else
++         * i.   unlink all shards that need to be unlinked.
++         * ii.  truncate the last of the shards.
++         * iii. update the new size using setxattr.
++         * and unwind the fop.
++         */
++        local->hole_size = 0;
++        local->delta_size = (local->offset - local->prebuf.ia_size);
++        GF_ATOMIC_INIT(local->delta_blocks, 0);
++        tmp_stbuf.ia_size = local->offset;
++        shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0,
++                            SHARD_INODE_WRITE_MASK);
++        shard_truncate_begin(frame, this);
++    }
++    return 0;
++}
++
++/* TO-DO:
++ * Fix updates to size and block count with racing write(s) and truncate(s).
++ */
++
++int
++shard_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
++               dict_t *xdata)
++{
++    int ret = -1;
++    uint64_t block_size = 0;
++    shard_local_t *local = NULL;
++
++    ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size);
++    if (ret) {
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
++               "Failed to get block "
++               "size from inode ctx of %s",
++               uuid_utoa(loc->inode->gfid));
++        goto err;
++    }
++
++    if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
++        STACK_WIND(frame, default_truncate_cbk, FIRST_CHILD(this),
++                   FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
++        return 0;
++    }
++
++    if (!this->itable)
++        this->itable = loc->inode->table;
++
++    local = mem_get0(this->local_pool);
++    if (!local)
++        goto err;
++
++    frame->local = local;
++
++    ret = syncbarrier_init(&local->barrier);
++    if (ret)
++        goto err;
++    loc_copy(&local->loc, loc);
++    local->offset = offset;
++    local->block_size = block_size;
++    local->fop = GF_FOP_TRUNCATE;
++    local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
++    if (!local->xattr_req)
++        goto err;
++    local->resolver_base_inode = loc->inode;
++    GF_ATOMIC_INIT(local->delta_blocks, 0);
++
++    shard_lookup_base_file(frame, this, &local->loc,
++                           shard_post_lookup_truncate_handler);
++    return 0;
++
+ err:
+-  shard_common_failure_unwind(local->fop, frame, op_ret, op_errno);
+-  return 0;
++    shard_common_failure_unwind(GF_FOP_TRUNCATE, frame, -1, ENOMEM);
++    return 0;
+ }
+ 
+-int shard_lookup_marker_file(call_frame_t *frame, xlator_t *this) {
+-  int op_errno = ENOMEM;
+-  dict_t *xattr_req = NULL;
+-  shard_local_t *local = NULL;
++int
++shard_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
++                dict_t *xdata)
++{
++    int ret = -1;
++    uint64_t block_size = 0;
++    shard_local_t *local = NULL;
++
++    ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size);
++    if (ret) {
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
++               "Failed to get block "
++               "size from inode ctx of %s",
++               uuid_utoa(fd->inode->gfid));
++        goto err;
++    }
++
++    if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
++        STACK_WIND(frame, default_ftruncate_cbk, FIRST_CHILD(this),
++                   FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
++        return 0;
++    }
++
++    if (!this->itable)
++        this->itable = fd->inode->table;
++
++    local = mem_get0(this->local_pool);
++    if (!local)
++        goto err;
++
++    frame->local = local;
++    ret = syncbarrier_init(&local->barrier);
++    if (ret)
++        goto err;
++    local->fd = fd_ref(fd);
++    local->offset = offset;
++    local->block_size = block_size;
++    local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
++    if (!local->xattr_req)
++        goto err;
++    local->fop = GF_FOP_FTRUNCATE;
++
++    local->loc.inode = inode_ref(fd->inode);
++    gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
++    local->resolver_base_inode = fd->inode;
++    GF_ATOMIC_INIT(local->delta_blocks, 0);
++
++    shard_lookup_base_file(frame, this, &local->loc,
++                           shard_post_lookup_truncate_handler);
++    return 0;
++err:
++    shard_common_failure_unwind(GF_FOP_FTRUNCATE, frame, -1, ENOMEM);
++    return 0;
++}
++
++int
++shard_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                int32_t op_ret, int32_t op_errno, inode_t *inode,
++                struct iatt *buf, struct iatt *preparent,
++                struct iatt *postparent, dict_t *xdata)
++{
++    int ret = -1;
++    shard_local_t *local = NULL;
++
++    local = frame->local;
++
++    if (op_ret == -1)
++        goto unwind;
++
++    ret = shard_inode_ctx_set(inode, this, buf, local->block_size,
++                              SHARD_ALL_MASK);
++    if (ret)
++        gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INODE_CTX_SET_FAILED,
++               "Failed to set inode "
++               "ctx for %s",
++               uuid_utoa(inode->gfid));
++
++unwind:
++    SHARD_STACK_UNWIND(mknod, frame, op_ret, op_errno, inode, buf, preparent,
++                       postparent, xdata);
++
++    return 0;
++}
++
++int
++shard_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
++            dev_t rdev, mode_t umask, dict_t *xdata)
++{
++    shard_priv_t *priv = NULL;
++    shard_local_t *local = NULL;
++
++    priv = this->private;
++    local = mem_get0(this->local_pool);
++    if (!local)
++        goto err;
++
++    frame->local = local;
++    local->block_size = priv->block_size;
++    if (!__is_gsyncd_on_shard_dir(frame, loc)) {
++        SHARD_INODE_CREATE_INIT(this, local->block_size, xdata, loc, 0, 0, err);
++    }
++
++    STACK_WIND(frame, shard_mknod_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata);
++    return 0;
++err:
++    shard_common_failure_unwind(GF_FOP_MKNOD, frame, -1, ENOMEM);
++    return 0;
++}
++
++int32_t
++shard_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++               int32_t op_ret, int32_t op_errno, inode_t *inode,
++               struct iatt *buf, struct iatt *preparent,
++               struct iatt *postparent, dict_t *xdata)
++{
++    shard_local_t *local = NULL;
++
++    local = frame->local;
++    if (op_ret < 0)
++        goto err;
++
++    shard_inode_ctx_set(inode, this, buf, 0,
++                        SHARD_MASK_NLINK | SHARD_MASK_TIMES);
++    buf->ia_size = local->prebuf.ia_size;
++    buf->ia_blocks = local->prebuf.ia_blocks;
++
++    SHARD_STACK_UNWIND(link, frame, op_ret, op_errno, inode, buf, preparent,
++                       postparent, xdata);
++    return 0;
++err:
++    shard_common_failure_unwind(GF_FOP_LINK, frame, op_ret, op_errno);
++    return 0;
++}
++
++int
++shard_post_lookup_link_handler(call_frame_t *frame, xlator_t *this)
++{
++    shard_local_t *local = NULL;
++
++    local = frame->local;
++
++    if (local->op_ret < 0) {
++        SHARD_STACK_UNWIND(link, frame, local->op_ret, local->op_errno, NULL,
++                           NULL, NULL, NULL, NULL);
++        return 0;
++    }
++
++    STACK_WIND(frame, shard_link_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->link, &local->loc, &local->loc2,
++               local->xattr_req);
++    return 0;
++}
++
++int32_t
++shard_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
++           dict_t *xdata)
++{
++    int ret = -1;
++    uint64_t block_size = 0;
++    shard_local_t *local = NULL;
++
++    ret = shard_inode_ctx_get_block_size(oldloc->inode, this, &block_size);
++    if (ret) {
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
++               "Failed to get block "
++               "size from inode ctx of %s",
++               uuid_utoa(oldloc->inode->gfid));
++        goto err;
++    }
++
++    if (!block_size) {
++        STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->link,
++                        oldloc, newloc, xdata);
++        return 0;
++    }
++
++    if (!this->itable)
++        this->itable = oldloc->inode->table;
++
++    local = mem_get0(this->local_pool);
++    if (!local)
++        goto err;
++
++    frame->local = local;
++
++    loc_copy(&local->loc, oldloc);
++    loc_copy(&local->loc2, newloc);
++    local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
++    if (!local->xattr_req)
++        goto err;
++
++    shard_lookup_base_file(frame, this, &local->loc,
++                           shard_post_lookup_link_handler);
++    return 0;
++err:
++    shard_common_failure_unwind(GF_FOP_LINK, frame, -1, ENOMEM);
++    return 0;
++}
++
++int
++shard_unlink_shards_do(call_frame_t *frame, xlator_t *this, inode_t *inode);
++
++int
++shard_post_lookup_shards_unlink_handler(call_frame_t *frame, xlator_t *this)
++{
++    shard_local_t *local = NULL;
++    uuid_t gfid = {
++        0,
++    };
++
++    local = frame->local;
++
++    if (local->resolver_base_inode)
++        gf_uuid_copy(gfid, local->resolver_base_inode->gfid);
++    else
++        gf_uuid_copy(gfid, local->base_gfid);
++
++    if ((local->op_ret < 0) && (local->op_errno != ENOENT)) {
++        gf_msg(this->name, GF_LOG_ERROR, local->op_errno, SHARD_MSG_FOP_FAILED,
++               "failed to delete shards of %s", uuid_utoa(gfid));
++        return 0;
++    }
++    local->op_ret = 0;
++    local->op_errno = 0;
++
++    shard_unlink_shards_do(frame, this, local->resolver_base_inode);
++    return 0;
++}
++
++int
++shard_post_resolve_unlink_handler(call_frame_t *frame, xlator_t *this)
++{
++    shard_local_t *local = NULL;
++
++    local = frame->local;
++    local->lookup_shards_barriered = _gf_true;
++
++    if (!local->call_count)
++        shard_unlink_shards_do(frame, this, local->resolver_base_inode);
++    else
++        shard_common_lookup_shards(frame, this, local->resolver_base_inode,
++                                   shard_post_lookup_shards_unlink_handler);
++    return 0;
++}
++
++void
++shard_unlink_block_inode(shard_local_t *local, int shard_block_num)
++{
++    char block_bname[256] = {
++        0,
++    };
++    uuid_t gfid = {
++        0,
++    };
++    inode_t *inode = NULL;
++    inode_t *base_inode = NULL;
++    xlator_t *this = NULL;
++    shard_priv_t *priv = NULL;
++    shard_inode_ctx_t *ctx = NULL;
++    shard_inode_ctx_t *base_ictx = NULL;
++    int unref_base_inode = 0;
++    int unref_shard_inode = 0;
++
++    this = THIS;
++    priv = this->private;
++
++    inode = local->inode_list[shard_block_num - local->first_block];
++    shard_inode_ctx_get(inode, this, &ctx);
++    base_inode = ctx->base_inode;
++    if (base_inode)
++        gf_uuid_copy(gfid, base_inode->gfid);
++    else
++        gf_uuid_copy(gfid, ctx->base_gfid);
++    shard_make_block_bname(shard_block_num, gfid, block_bname,
++                           sizeof(block_bname));
++
++    LOCK(&priv->lock);
++    if (base_inode)
++        LOCK(&base_inode->lock);
++    LOCK(&inode->lock);
++    {
++        __shard_inode_ctx_get(inode, this, &ctx);
++        if (!list_empty(&ctx->ilist)) {
++            list_del_init(&ctx->ilist);
++            priv->inode_count--;
++            unref_base_inode++;
++            unref_shard_inode++;
++            GF_ASSERT(priv->inode_count >= 0);
++        }
++        if (ctx->fsync_needed) {
++            unref_base_inode++;
++            unref_shard_inode++;
++            list_del_init(&ctx->to_fsync_list);
++            if (base_inode) {
++                __shard_inode_ctx_get(base_inode, this, &base_ictx);
++                base_ictx->fsync_count--;
++            }
++        }
++    }
++    UNLOCK(&inode->lock);
++    if (base_inode)
++        UNLOCK(&base_inode->lock);
++
++    inode_unlink(inode, priv->dot_shard_inode, block_bname);
++    inode_ref_reduce_by_n(inode, unref_shard_inode);
++    inode_forget(inode, 0);
++
++    if (base_inode && unref_base_inode)
++        inode_ref_reduce_by_n(base_inode, unref_base_inode);
++    UNLOCK(&priv->lock);
++}
++
++int
++shard_rename_cbk(call_frame_t *frame, xlator_t *this)
++{
++    shard_local_t *local = NULL;
++
++    local = frame->local;
++
++    SHARD_STACK_UNWIND(rename, frame, local->op_ret, local->op_errno,
++                       &local->prebuf, &local->preoldparent,
++                       &local->postoldparent, &local->prenewparent,
++                       &local->postnewparent, local->xattr_rsp);
++    return 0;
++}
++
++int32_t
++shard_unlink_cbk(call_frame_t *frame, xlator_t *this)
++{
++    shard_local_t *local = frame->local;
++
++    SHARD_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno,
++                       &local->preoldparent, &local->postoldparent,
++                       local->xattr_rsp);
++    return 0;
++}
++
++int
++shard_unlink_shards_do_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                           int32_t op_ret, int32_t op_errno,
++                           struct iatt *preparent, struct iatt *postparent,
++                           dict_t *xdata)
++{
++    int shard_block_num = (long)cookie;
++    shard_local_t *local = NULL;
++
++    local = frame->local;
++
++    if (op_ret < 0) {
++        local->op_ret = op_ret;
++        local->op_errno = op_errno;
++        goto done;
++    }
++
++    shard_unlink_block_inode(local, shard_block_num);
++done:
++    syncbarrier_wake(&local->barrier);
++    return 0;
++}
++
++int
++shard_unlink_shards_do(call_frame_t *frame, xlator_t *this, inode_t *inode)
++{
++    int i = 0;
++    int ret = -1;
++    int count = 0;
++    uint32_t cur_block = 0;
++    uint32_t cur_block_idx = 0; /*this is idx into inode_list[] array */
++    char *bname = NULL;
++    char path[PATH_MAX] = {
++        0,
++    };
++    uuid_t gfid = {
++        0,
++    };
++    loc_t loc = {
++        0,
++    };
++    gf_boolean_t wind_failed = _gf_false;
++    shard_local_t *local = NULL;
++    shard_priv_t *priv = NULL;
++
++    priv = this->private;
++    local = frame->local;
++
++    if (inode)
++        gf_uuid_copy(gfid, inode->gfid);
++    else
++        gf_uuid_copy(gfid, local->base_gfid);
++
++    for (i = 0; i < local->num_blocks; i++) {
++        if (!local->inode_list[i])
++            continue;
++        count++;
++    }
++
++    if (!count) {
++        /* callcount = 0 implies that all of the shards that need to be
++         * unlinked are non-existent (in other words the file is full of
++         * holes).
++         */
++        gf_msg_debug(this->name, 0,
++                     "All shards that need to be "
++                     "unlinked are non-existent: %s",
++                     uuid_utoa(gfid));
++        return 0;
++    }
++
++    SHARD_SET_ROOT_FS_ID(frame, local);
++    local->barrier.waitfor = count;
++    cur_block = cur_block_idx + local->first_block;
++
++    while (cur_block_idx < local->num_blocks) {
++        if (!local->inode_list[cur_block_idx])
++            goto next;
++
++        if (wind_failed) {
++            shard_unlink_shards_do_cbk(frame, (void *)(long)cur_block, this, -1,
++                                       ENOMEM, NULL, NULL, NULL);
++            goto next;
++        }
++
++        shard_make_block_abspath(cur_block, gfid, path, sizeof(path));
++        bname = strrchr(path, '/') + 1;
++        loc.parent = inode_ref(priv->dot_shard_inode);
++        ret = inode_path(loc.parent, bname, (char **)&(loc.path));
++        if (ret < 0) {
++            gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
++                   "Inode path failed"
++                   " on %s, base file gfid = %s",
++                   bname, uuid_utoa(gfid));
++            local->op_ret = -1;
++            local->op_errno = ENOMEM;
++            loc_wipe(&loc);
++            wind_failed = _gf_true;
++            shard_unlink_shards_do_cbk(frame, (void *)(long)cur_block, this, -1,
++                                       ENOMEM, NULL, NULL, NULL);
++            goto next;
++        }
++
++        loc.name = strrchr(loc.path, '/');
++        if (loc.name)
++            loc.name++;
++        loc.inode = inode_ref(local->inode_list[cur_block_idx]);
++
++        STACK_WIND_COOKIE(frame, shard_unlink_shards_do_cbk,
++                          (void *)(long)cur_block, FIRST_CHILD(this),
++                          FIRST_CHILD(this)->fops->unlink, &loc, local->xflag,
++                          local->xattr_req);
++        loc_wipe(&loc);
++    next:
++        cur_block++;
++        cur_block_idx++;
++    }
++    syncbarrier_wait(&local->barrier, count);
++    SHARD_UNSET_ROOT_FS_ID(frame, local);
++    return 0;
++}
++
++int
++shard_regulated_shards_deletion(call_frame_t *cleanup_frame, xlator_t *this,
++                                int now, int first_block, gf_dirent_t *entry)
++{
++    int i = 0;
++    int ret = 0;
++    shard_local_t *local = NULL;
++    uuid_t gfid = {
++        0,
++    };
++
++    local = cleanup_frame->local;
++
++    local->inode_list = GF_CALLOC(now, sizeof(inode_t *),
++                                  gf_shard_mt_inode_list);
++    if (!local->inode_list)
++        return -ENOMEM;
++
++    local->first_block = first_block;
++    local->last_block = first_block + now - 1;
++    local->num_blocks = now;
++    gf_uuid_parse(entry->d_name, gfid);
++    gf_uuid_copy(local->base_gfid, gfid);
++    local->resolver_base_inode = inode_find(this->itable, gfid);
++    local->call_count = 0;
++    ret = syncbarrier_init(&local->barrier);
++    if (ret) {
++        GF_FREE(local->inode_list);
++        local->inode_list = NULL;
++        inode_unref(local->resolver_base_inode);
++        local->resolver_base_inode = NULL;
++        return -errno;
++    }
++    shard_common_resolve_shards(cleanup_frame, this,
++                                shard_post_resolve_unlink_handler);
++
++    for (i = 0; i < local->num_blocks; i++) {
++        if (local->inode_list[i])
++            inode_unref(local->inode_list[i]);
++    }
++    GF_FREE(local->inode_list);
++    local->inode_list = NULL;
++    if (local->op_ret)
++        ret = -local->op_errno;
++    syncbarrier_destroy(&local->barrier);
++    inode_unref(local->resolver_base_inode);
++    local->resolver_base_inode = NULL;
++    STACK_RESET(cleanup_frame->root);
++    return ret;
++}
++
++int
++__shard_delete_shards_of_entry(call_frame_t *cleanup_frame, xlator_t *this,
++                               gf_dirent_t *entry, inode_t *inode)
++{
++    int ret = 0;
++    int shard_count = 0;
++    int first_block = 0;
++    int now = 0;
++    uint64_t size = 0;
++    uint64_t block_size = 0;
++    uint64_t size_array[4] = {
++        0,
++    };
++    void *bsize = NULL;
++    void *size_attr = NULL;
++    dict_t *xattr_rsp = NULL;
++    loc_t loc = {
++        0,
++    };
++    shard_local_t *local = NULL;
++    shard_priv_t *priv = NULL;
++
++    priv = this->private;
++    local = cleanup_frame->local;
++    ret = dict_reset(local->xattr_req);
++    if (ret) {
++        gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
++               "Failed to reset dict");
++        ret = -ENOMEM;
++        goto err;
++    }
++
++    ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0);
++    if (ret) {
++        gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
++               "Failed to set dict value: key:%s", GF_XATTR_SHARD_BLOCK_SIZE);
++        ret = -ENOMEM;
++        goto err;
++    }
++
++    ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_FILE_SIZE, 8 * 4);
++    if (ret) {
++        gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
++               "Failed to set dict value: key:%s", GF_XATTR_SHARD_FILE_SIZE);
++        ret = -ENOMEM;
++        goto err;
++    }
++
++    loc.inode = inode_ref(inode);
++    loc.parent = inode_ref(priv->dot_shard_rm_inode);
++    ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path));
++    if (ret < 0) {
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
++               "Inode path  failed on %s", entry->d_name);
++        ret = -ENOMEM;
++        goto err;
++    }
++
++    loc.name = strrchr(loc.path, '/');
++    if (loc.name)
++        loc.name++;
++    ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, local->xattr_req,
++                        &xattr_rsp);
++    if (ret)
++        goto err;
++
++    ret = dict_get_ptr(xattr_rsp, GF_XATTR_SHARD_BLOCK_SIZE, &bsize);
++    if (ret) {
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
++               "Failed to get dict value: key:%s", GF_XATTR_SHARD_BLOCK_SIZE);
++        goto err;
++    }
++    block_size = ntoh64(*((uint64_t *)bsize));
++
++    ret = dict_get_ptr(xattr_rsp, GF_XATTR_SHARD_FILE_SIZE, &size_attr);
++    if (ret) {
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
++               "Failed to get dict value: key:%s", GF_XATTR_SHARD_FILE_SIZE);
++        goto err;
++    }
++
++    memcpy(size_array, size_attr, sizeof(size_array));
++    size = ntoh64(size_array[0]);
++
++    shard_count = (size / block_size) - 1;
++    if (shard_count < 0) {
++        gf_msg_debug(this->name, 0,
++                     "Size of %s hasn't grown beyond "
++                     "its shard-block-size. Nothing to delete. "
++                     "Returning",
++                     entry->d_name);
++        /* File size < shard-block-size, so nothing to delete */
++        ret = 0;
++        goto delete_marker;
++    }
++    if ((size % block_size) > 0)
++        shard_count++;
++
++    if (shard_count == 0) {
++        gf_msg_debug(this->name, 0,
++                     "Size of %s is exactly equal to "
++                     "its shard-block-size. Nothing to delete. "
++                     "Returning",
++                     entry->d_name);
++        ret = 0;
++        goto delete_marker;
++    }
++    gf_msg_debug(this->name, 0,
++                 "base file = %s, "
++                 "shard-block-size=%" PRIu64 ", file-size=%" PRIu64
++                 ", "
++                 "shard_count=%d",
++                 entry->d_name, block_size, size, shard_count);
++
++    /* Perform a gfid-based lookup to see if gfid corresponding to marker
++     * file's base name exists.
++     */
++    loc_wipe(&loc);
++    loc.inode = inode_new(this->itable);
++    if (!loc.inode) {
++        ret = -ENOMEM;
++        goto err;
++    }
++    gf_uuid_parse(entry->d_name, loc.gfid);
++    ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, NULL, NULL);
++    if (!ret) {
++        gf_msg_debug(this->name, 0,
++                     "Base shard corresponding to gfid "
++                     "%s is present. Skipping shard deletion. "
++                     "Returning",
++                     entry->d_name);
++        ret = 0;
++        goto delete_marker;
++    }
++
++    first_block = 1;
++
++    while (shard_count) {
++        if (shard_count < local->deletion_rate) {
++            now = shard_count;
++            shard_count = 0;
++        } else {
++            now = local->deletion_rate;
++            shard_count -= local->deletion_rate;
++        }
++
++        gf_msg_debug(this->name, 0,
++                     "deleting %d shards starting from "
++                     "block %d of gfid %s",
++                     now, first_block, entry->d_name);
++        ret = shard_regulated_shards_deletion(cleanup_frame, this, now,
++                                              first_block, entry);
++        if (ret)
++            goto err;
++        first_block += now;
++    }
++
++delete_marker:
++    loc_wipe(&loc);
++    loc.inode = inode_ref(inode);
++    loc.parent = inode_ref(priv->dot_shard_rm_inode);
++    ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path));
++    if (ret < 0) {
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
++               "Inode path  failed on %s", entry->d_name);
++        ret = -ENOMEM;
++        goto err;
++    }
++    loc.name = strrchr(loc.path, '/');
++    if (loc.name)
++        loc.name++;
++    ret = syncop_unlink(FIRST_CHILD(this), &loc, NULL, NULL);
++    if (ret)
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_SHARDS_DELETION_FAILED,
++               "Failed to delete %s "
++               "from /%s",
++               entry->d_name, GF_SHARD_REMOVE_ME_DIR);
++err:
++    if (xattr_rsp)
++        dict_unref(xattr_rsp);
++    loc_wipe(&loc);
++    return ret;
++}
++
++int
++shard_delete_shards_of_entry(call_frame_t *cleanup_frame, xlator_t *this,
++                             gf_dirent_t *entry, inode_t *inode)
++{
++    int ret = -1;
++    loc_t loc = {
++        0,
++    };
++    shard_priv_t *priv = NULL;
++
++    priv = this->private;
++    loc.inode = inode_ref(priv->dot_shard_rm_inode);
++
++    ret = syncop_entrylk(FIRST_CHILD(this), this->name, &loc, entry->d_name,
++                         ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL, NULL);
++    if (ret < 0) {
++        if (ret == -EAGAIN) {
++            ret = 0;
++        }
++        goto out;
++    }
++    {
++        ret = __shard_delete_shards_of_entry(cleanup_frame, this, entry, inode);
++    }
++    syncop_entrylk(FIRST_CHILD(this), this->name, &loc, entry->d_name,
++                   ENTRYLK_UNLOCK, ENTRYLK_WRLCK, NULL, NULL);
++out:
++    loc_wipe(&loc);
++    return ret;
++}
++
++int
++shard_delete_shards_cbk(int ret, call_frame_t *frame, void *data)
++{
++    SHARD_STACK_DESTROY(frame);
++    return 0;
++}
++
++int
++shard_resolve_internal_dir(xlator_t *this, shard_local_t *local,
++                           shard_internal_dir_type_t type)
++{
++    int ret = 0;
++    char *bname = NULL;
++    loc_t *loc = NULL;
++    shard_priv_t *priv = NULL;
++    uuid_t gfid = {
++        0,
++    };
++    struct iatt stbuf = {
++        0,
++    };
++
++    priv = this->private;
++
++    switch (type) {
++        case SHARD_INTERNAL_DIR_DOT_SHARD:
++            loc = &local->dot_shard_loc;
++            gf_uuid_copy(gfid, priv->dot_shard_gfid);
++            bname = GF_SHARD_DIR;
++            break;
++        case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME:
++            loc = &local->dot_shard_rm_loc;
++            gf_uuid_copy(gfid, priv->dot_shard_rm_gfid);
++            bname = GF_SHARD_REMOVE_ME_DIR;
++            break;
++        default:
++            break;
++    }
++
++    loc->inode = inode_find(this->itable, gfid);
++    if (!loc->inode) {
++        ret = shard_init_internal_dir_loc(this, local, type);
++        if (ret)
++            goto err;
++        ret = dict_reset(local->xattr_req);
++        if (ret) {
++            gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
++                   "Failed to reset "
++                   "dict");
++            ret = -ENOMEM;
++            goto err;
++        }
++        ret = dict_set_gfuuid(local->xattr_req, "gfid-req", gfid, true);
++        ret = syncop_lookup(FIRST_CHILD(this), loc, &stbuf, NULL,
++                            local->xattr_req, NULL);
++        if (ret < 0) {
++            if (ret != -ENOENT)
++                gf_msg(this->name, GF_LOG_ERROR, -ret,
++                       SHARD_MSG_SHARDS_DELETION_FAILED,
++                       "Lookup on %s failed, exiting", bname);
++            goto err;
++        } else {
++            shard_link_internal_dir_inode(local, loc->inode, &stbuf, type);
++        }
++    }
++    ret = 0;
++err:
++    return ret;
++}
++
++int
++shard_lookup_marker_entry(xlator_t *this, shard_local_t *local,
++                          gf_dirent_t *entry)
++{
++    int ret = 0;
++    loc_t loc = {
++        0,
++    };
++
++    loc.inode = inode_new(this->itable);
++    if (!loc.inode) {
++        ret = -ENOMEM;
++        goto err;
++    }
++    loc.parent = inode_ref(local->fd->inode);
++
++    ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path));
++    if (ret < 0) {
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
++               "Inode path failed on %s", entry->d_name);
++        ret = -ENOMEM;
++        goto err;
++    }
++
++    loc.name = strrchr(loc.path, '/');
++    if (loc.name)
++        loc.name++;
++
++    ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, NULL, NULL);
++    if (ret < 0) {
++        goto err;
++    }
++    entry->inode = inode_ref(loc.inode);
++    ret = 0;
++err:
++    loc_wipe(&loc);
++    return ret;
++}
++
++int
++shard_delete_shards(void *opaque)
++{
++    int ret = 0;
++    off_t offset = 0;
++    loc_t loc = {
++        0,
++    };
++    inode_t *link_inode = NULL;
++    xlator_t *this = NULL;
++    shard_priv_t *priv = NULL;
++    shard_local_t *local = NULL;
++    gf_dirent_t entries;
++    gf_dirent_t *entry = NULL;
++    call_frame_t *cleanup_frame = NULL;
++    gf_boolean_t done = _gf_false;
++
++    this = THIS;
++    priv = this->private;
++    INIT_LIST_HEAD(&entries.list);
++
++    cleanup_frame = opaque;
++
++    local = mem_get0(this->local_pool);
++    if (!local) {
++        gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED,
++               "Failed to create local to "
++               "delete shards");
++        ret = -ENOMEM;
++        goto err;
++    }
++    cleanup_frame->local = local;
++    local->fop = GF_FOP_UNLINK;
++
++    local->xattr_req = dict_new();
++    if (!local->xattr_req) {
++        ret = -ENOMEM;
++        goto err;
++    }
++    local->deletion_rate = priv->deletion_rate;
++
++    ret = shard_resolve_internal_dir(this, local, SHARD_INTERNAL_DIR_DOT_SHARD);
++    if (ret == -ENOENT) {
++        gf_msg_debug(this->name, 0,
++                     ".shard absent. Nothing to"
++                     " delete. Exiting");
++        ret = 0;
++        goto err;
++    } else if (ret < 0) {
++        goto err;
++    }
++
++    ret = shard_resolve_internal_dir(this, local,
++                                     SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME);
++    if (ret == -ENOENT) {
++        gf_msg_debug(this->name, 0,
++                     ".remove_me absent. "
++                     "Nothing to delete. Exiting");
++        ret = 0;
++        goto err;
++    } else if (ret < 0) {
++        goto err;
++    }
++
++    local->fd = fd_anonymous(local->dot_shard_rm_loc.inode);
++    if (!local->fd) {
++        ret = -ENOMEM;
++        goto err;
++    }
++
++    for (;;) {
++        offset = 0;
++        LOCK(&priv->lock);
++        {
++            if (priv->bg_del_state == SHARD_BG_DELETION_LAUNCHING) {
++                priv->bg_del_state = SHARD_BG_DELETION_IN_PROGRESS;
++            } else if (priv->bg_del_state == SHARD_BG_DELETION_IN_PROGRESS) {
++                priv->bg_del_state = SHARD_BG_DELETION_NONE;
++                done = _gf_true;
++            }
++        }
++        UNLOCK(&priv->lock);
++        if (done)
++            break;
++        while (
++            (ret = syncop_readdirp(FIRST_CHILD(this), local->fd, 131072, offset,
++                                   &entries, local->xattr_req, NULL))) {
++            if (ret > 0)
++                ret = 0;
++            list_for_each_entry(entry, &entries.list, list)
++            {
++                offset = entry->d_off;
++
++                if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, ".."))
++                    continue;
++
++                if (!entry->inode) {
++                    ret = shard_lookup_marker_entry(this, local, entry);
++                    if (ret < 0)
++                        continue;
++                }
++                link_inode = inode_link(entry->inode, local->fd->inode,
++                                        entry->d_name, &entry->d_stat);
++
++                gf_msg_debug(this->name, 0,
++                             "Initiating deletion of "
++                             "shards of gfid %s",
++                             entry->d_name);
++                ret = shard_delete_shards_of_entry(cleanup_frame, this, entry,
++                                                   link_inode);
++                inode_unlink(link_inode, local->fd->inode, entry->d_name);
++                inode_unref(link_inode);
++                if (ret) {
++                    gf_msg(this->name, GF_LOG_ERROR, -ret,
++                           SHARD_MSG_SHARDS_DELETION_FAILED,
++                           "Failed to clean up shards of gfid %s",
++                           entry->d_name);
++                    continue;
++                }
++                gf_msg(this->name, GF_LOG_INFO, 0,
++                       SHARD_MSG_SHARD_DELETION_COMPLETED,
++                       "Deleted "
++                       "shards of gfid=%s from backend",
++                       entry->d_name);
++            }
++            gf_dirent_free(&entries);
++            if (ret)
++                break;
++        }
++    }
++    ret = 0;
++    loc_wipe(&loc);
++    return ret;
++
++err:
++    LOCK(&priv->lock);
++    {
++        priv->bg_del_state = SHARD_BG_DELETION_NONE;
++    }
++    UNLOCK(&priv->lock);
++    loc_wipe(&loc);
++    return ret;
++}
++
++int
++shard_unlock_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                         int32_t op_ret, int32_t op_errno, dict_t *xdata)
++{
++    if (op_ret)
++        gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED,
++               "Unlock failed. Please check brick logs for "
++               "more details");
++    SHARD_STACK_DESTROY(frame);
++    return 0;
++}
++
++int
++shard_unlock_inodelk(call_frame_t *frame, xlator_t *this)
++{
++    loc_t *loc = NULL;
++    call_frame_t *lk_frame = NULL;
++    shard_local_t *local = NULL;
++    shard_local_t *lk_local = NULL;
++    shard_inodelk_t *lock = NULL;
++
++    local = frame->local;
++    lk_frame = local->inodelk_frame;
++    lk_local = lk_frame->local;
++    local->inodelk_frame = NULL;
++    loc = &local->int_inodelk.loc;
++    lock = &lk_local->int_inodelk;
++    lock->flock.l_type = F_UNLCK;
++
++    STACK_WIND(lk_frame, shard_unlock_inodelk_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->inodelk, lock->domain, loc, F_SETLK,
++               &lock->flock, NULL);
++    local->int_inodelk.acquired_lock = _gf_false;
++    return 0;
++}
++
++int
++shard_rename_src_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                     int32_t op_ret, int32_t op_errno, struct iatt *buf,
++                     struct iatt *preoldparent, struct iatt *postoldparent,
++                     struct iatt *prenewparent, struct iatt *postnewparent,
++                     dict_t *xdata);
++int
++shard_rename_src_base_file(call_frame_t *frame, xlator_t *this)
++{
++    int ret = 0;
++    loc_t *dst_loc = NULL;
++    loc_t tmp_loc = {
++        0,
++    };
++    shard_local_t *local = frame->local;
++
++    if (local->dst_block_size) {
++        tmp_loc.parent = inode_ref(local->loc2.parent);
++        ret = inode_path(tmp_loc.parent, local->loc2.name,
++                         (char **)&tmp_loc.path);
++        if (ret < 0) {
++            gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
++                   "Inode path failed"
++                   " on pargfid=%s bname=%s",
++                   uuid_utoa(tmp_loc.parent->gfid), local->loc2.name);
++            local->op_ret = -1;
++            local->op_errno = ENOMEM;
++            goto err;
++        }
++
++        tmp_loc.name = strrchr(tmp_loc.path, '/');
++        if (tmp_loc.name)
++            tmp_loc.name++;
++        dst_loc = &tmp_loc;
++    } else {
++        dst_loc = &local->loc2;
++    }
++
++    /* To-Do: Request open-fd count on dst base file */
++    STACK_WIND(frame, shard_rename_src_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->rename, &local->loc, dst_loc,
++               local->xattr_req);
++    loc_wipe(&tmp_loc);
++    return 0;
++err:
++    loc_wipe(&tmp_loc);
++    shard_common_failure_unwind(local->fop, frame, local->op_ret,
++                                local->op_errno);
++    return 0;
++}
++
++int
++shard_unlink_base_file(call_frame_t *frame, xlator_t *this);
++
++int
++shard_set_size_attrs_on_marker_file_cbk(call_frame_t *frame, void *cookie,
++                                        xlator_t *this, int32_t op_ret,
++                                        int32_t op_errno, dict_t *dict,
++                                        dict_t *xdata)
++{
++    shard_priv_t *priv = NULL;
++    shard_local_t *local = NULL;
++
++    priv = this->private;
++    local = frame->local;
++    if (op_ret < 0) {
++        gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED,
++               "Xattrop on marker file failed "
++               "while performing %s; entry gfid=%s",
++               gf_fop_string(local->fop), local->newloc.name);
++        goto err;
++    }
++
++    inode_unlink(local->newloc.inode, priv->dot_shard_rm_inode,
++                 local->newloc.name);
++
++    if (local->fop == GF_FOP_UNLINK)
++        shard_unlink_base_file(frame, this);
++    else if (local->fop == GF_FOP_RENAME)
++        shard_rename_src_base_file(frame, this);
++    return 0;
++err:
++    shard_common_failure_unwind(local->fop, frame, op_ret, op_errno);
++    return 0;
++}
++
++int
++shard_set_size_attrs_on_marker_file(call_frame_t *frame, xlator_t *this)
++{
++    int op_errno = ENOMEM;
++    uint64_t bs = 0;
++    dict_t *xdata = NULL;
++    shard_local_t *local = NULL;
++
++    local = frame->local;
++    xdata = dict_new();
++    if (!xdata)
++        goto err;
++
++    if (local->fop == GF_FOP_UNLINK)
++        bs = local->block_size;
++    else if (local->fop == GF_FOP_RENAME)
++        bs = local->dst_block_size;
++    SHARD_INODE_CREATE_INIT(this, bs, xdata, &local->newloc,
++                            local->prebuf.ia_size, 0, err);
++    STACK_WIND(frame, shard_set_size_attrs_on_marker_file_cbk,
++               FIRST_CHILD(this), FIRST_CHILD(this)->fops->xattrop,
++               &local->newloc, GF_XATTROP_GET_AND_SET, xdata, NULL);
++    dict_unref(xdata);
++    return 0;
++err:
++    if (xdata)
++        dict_unref(xdata);
++    shard_common_failure_unwind(local->fop, frame, -1, op_errno);
++    return 0;
++}
++
++int
++shard_lookup_marker_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                             int32_t op_ret, int32_t op_errno, inode_t *inode,
++                             struct iatt *buf, dict_t *xdata,
++                             struct iatt *postparent)
++{
++    inode_t *linked_inode = NULL;
++    shard_priv_t *priv = NULL;
++    shard_local_t *local = NULL;
++
++    local = frame->local;
++    priv = this->private;
++
++    if (op_ret < 0) {
++        gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED,
++               "Lookup on marker file failed "
++               "while performing %s; entry gfid=%s",
++               gf_fop_string(local->fop), local->newloc.name);
++        goto err;
++    }
++
++    linked_inode = inode_link(inode, priv->dot_shard_rm_inode,
++                              local->newloc.name, buf);
++    inode_unref(local->newloc.inode);
++    local->newloc.inode = linked_inode;
++    shard_set_size_attrs_on_marker_file(frame, this);
++    return 0;
++err:
++    shard_common_failure_unwind(local->fop, frame, op_ret, op_errno);
++    return 0;
++}
++
++int
++shard_lookup_marker_file(call_frame_t *frame, xlator_t *this)
++{
++    int op_errno = ENOMEM;
++    dict_t *xattr_req = NULL;
++    shard_local_t *local = NULL;
++
++    local = frame->local;
++
++    xattr_req = shard_create_gfid_dict(local->xattr_req);
++    if (!xattr_req)
++        goto err;
++
++    STACK_WIND(frame, shard_lookup_marker_file_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->lookup, &local->newloc, xattr_req);
++    dict_unref(xattr_req);
++    return 0;
++err:
++    shard_common_failure_unwind(local->fop, frame, -1, op_errno);
++    return 0;
++}
++
++int
++shard_create_marker_file_under_remove_me_cbk(
++    call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
++    int32_t op_errno, inode_t *inode, struct iatt *buf, struct iatt *preparent,
++    struct iatt *postparent, dict_t *xdata)
++{
++    inode_t *linked_inode = NULL;
++    shard_priv_t *priv = NULL;
++    shard_local_t *local = NULL;
++
++    local = frame->local;
++    priv = this->private;
++
++    SHARD_UNSET_ROOT_FS_ID(frame, local);
++    if (op_ret < 0) {
++        if ((op_errno != EEXIST) && (op_errno != ENODATA)) {
++            local->op_ret = op_ret;
++            local->op_errno = op_errno;
++            gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED,
++                   "Marker file creation "
++                   "failed while performing %s; entry gfid=%s",
++                   gf_fop_string(local->fop), local->newloc.name);
++            goto err;
++        } else {
++            shard_lookup_marker_file(frame, this);
++            return 0;
++        }
++    }
++
++    linked_inode = inode_link(inode, priv->dot_shard_rm_inode,
++                              local->newloc.name, buf);
++    inode_unref(local->newloc.inode);
++    local->newloc.inode = linked_inode;
++
++    if (local->fop == GF_FOP_UNLINK)
++        shard_unlink_base_file(frame, this);
++    else if (local->fop == GF_FOP_RENAME)
++        shard_rename_src_base_file(frame, this);
++    return 0;
++err:
++    shard_common_failure_unwind(local->fop, frame, -1, local->op_errno);
++    return 0;
++}
++
++int
++shard_create_marker_file_under_remove_me(call_frame_t *frame, xlator_t *this,
++                                         loc_t *loc)
++{
++    int ret = 0;
++    int op_errno = ENOMEM;
++    uint64_t bs = 0;
++    char g1[64] = {
++        0,
++    };
++    char g2[64] = {
++        0,
++    };
++    dict_t *xattr_req = NULL;
++    shard_priv_t *priv = NULL;
++    shard_local_t *local = NULL;
++
++    priv = this->private;
++    local = frame->local;
++
++    SHARD_SET_ROOT_FS_ID(frame, local);
++
++    xattr_req = shard_create_gfid_dict(local->xattr_req);
++    if (!xattr_req)
++        goto err;
++
++    local->newloc.inode = inode_new(this->itable);
++    local->newloc.parent = inode_ref(priv->dot_shard_rm_inode);
++    ret = inode_path(local->newloc.parent, uuid_utoa(loc->inode->gfid),
++                     (char **)&local->newloc.path);
++    if (ret < 0) {
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
++               "Inode path failed on "
++               "pargfid=%s bname=%s",
++               uuid_utoa_r(priv->dot_shard_rm_gfid, g1),
++               uuid_utoa_r(loc->inode->gfid, g2));
++        goto err;
++    }
++    local->newloc.name = strrchr(local->newloc.path, '/');
++    if (local->newloc.name)
++        local->newloc.name++;
++
++    if (local->fop == GF_FOP_UNLINK)
++        bs = local->block_size;
++    else if (local->fop == GF_FOP_RENAME)
++        bs = local->dst_block_size;
++
++    SHARD_INODE_CREATE_INIT(this, bs, xattr_req, &local->newloc,
++                            local->prebuf.ia_size, 0, err);
++
++    STACK_WIND(frame, shard_create_marker_file_under_remove_me_cbk,
++               FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod,
++               &local->newloc, 0, 0, 0644, xattr_req);
++    dict_unref(xattr_req);
++    return 0;
++
++err:
++    if (xattr_req)
++        dict_unref(xattr_req);
++    shard_create_marker_file_under_remove_me_cbk(frame, 0, this, -1, op_errno,
++                                                 NULL, NULL, NULL, NULL, NULL);
++    return 0;
++}
++
++int
++shard_unlock_entrylk(call_frame_t *frame, xlator_t *this);
++
++int
++shard_unlink_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                           int32_t op_ret, int32_t op_errno,
++                           struct iatt *preparent, struct iatt *postparent,
++                           dict_t *xdata)
++{
++    int ret = 0;
++    shard_local_t *local = NULL;
++
++    local = frame->local;
++
++    if (op_ret < 0) {
++        local->op_ret = op_ret;
++        local->op_errno = op_errno;
++    } else {
++        shard_inode_ctx_set_refresh_flag(local->int_inodelk.loc.inode, this);
++        local->preoldparent = *preparent;
++        local->postoldparent = *postparent;
++        if (xdata)
++            local->xattr_rsp = dict_ref(xdata);
++        if (local->cleanup_required)
++            shard_start_background_deletion(this);
++    }
++
++    if (local->entrylk_frame) {
++        ret = shard_unlock_entrylk(frame, this);
++        if (ret < 0) {
++            local->op_ret = -1;
++            local->op_errno = -ret;
++        }
++    }
++
++    ret = shard_unlock_inodelk(frame, this);
++    if (ret < 0) {
++        local->op_ret = -1;
++        local->op_errno = -ret;
++    }
++
++    shard_unlink_cbk(frame, this);
++    return 0;
++}
++
++int
++shard_unlink_base_file(call_frame_t *frame, xlator_t *this)
++{
++    shard_local_t *local = frame->local;
++
++    /* To-Do: Request open-fd count on base file */
++    STACK_WIND(frame, shard_unlink_base_file_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->unlink, &local->loc, local->xflag,
++               local->xattr_req);
++    return 0;
++}
++
++int
++shard_unlock_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                         int32_t op_ret, int32_t op_errno, dict_t *xdata)
++{
++    if (op_ret)
++        gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED,
++               "Unlock failed. Please check brick logs for "
++               "more details");
++    SHARD_STACK_DESTROY(frame);
++    return 0;
++}
++
++int
++shard_unlock_entrylk(call_frame_t *frame, xlator_t *this)
++{
++    loc_t *loc = NULL;
++    call_frame_t *lk_frame = NULL;
++    shard_local_t *local = NULL;
++    shard_local_t *lk_local = NULL;
++    shard_entrylk_t *lock = NULL;
++
++    local = frame->local;
++    lk_frame = local->entrylk_frame;
++    lk_local = lk_frame->local;
++    local->entrylk_frame = NULL;
++    lock = &lk_local->int_entrylk;
++    loc = &lock->loc;
++
++    STACK_WIND(lk_frame, shard_unlock_entrylk_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->entrylk, this->name, loc,
++               lk_local->int_entrylk.basename, ENTRYLK_UNLOCK, ENTRYLK_WRLCK,
++               NULL);
++    local->int_entrylk.acquired_lock = _gf_false;
++    return 0;
++}
++
++int
++shard_post_entrylk_fop_handler(call_frame_t *frame, xlator_t *this)
++{
++    shard_local_t *local = NULL;
++
++    local = frame->local;
++
++    switch (local->fop) {
++        case GF_FOP_UNLINK:
++        case GF_FOP_RENAME:
++            shard_create_marker_file_under_remove_me(frame, this,
++                                                     &local->int_inodelk.loc);
++            break;
++        default:
++            gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
++                   "post-entrylk handler not defined. This case should not"
++                   " be hit");
++            break;
++    }
++    return 0;
++}
++
++int
++shard_acquire_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                          int32_t op_ret, int32_t op_errno, dict_t *xdata)
++{
++    call_frame_t *main_frame = NULL;
++    shard_local_t *local = NULL;
++    shard_local_t *main_local = NULL;
++
++    local = frame->local;
++    main_frame = local->main_frame;
++    main_local = main_frame->local;
++
++    if (local->op_ret < 0) {
++        shard_common_failure_unwind(main_local->fop, main_frame, op_ret,
++                                    op_errno);
++        return 0;
++    }
++    main_local->int_entrylk.acquired_lock = _gf_true;
++    shard_post_entrylk_fop_handler(main_frame, this);
++    return 0;
++}
++
++int
++shard_acquire_entrylk(call_frame_t *frame, xlator_t *this, inode_t *inode,
++                      uuid_t gfid)
++{
++    char gfid_str[GF_UUID_BUF_SIZE] = {
++        0,
++    };
++    shard_local_t *local = NULL;
++    shard_local_t *entrylk_local = NULL;
++    shard_entrylk_t *int_entrylk = NULL;
++    call_frame_t *entrylk_frame = NULL;
++
++    local = frame->local;
++    entrylk_frame = create_frame(this, this->ctx->pool);
++    if (!entrylk_frame) {
++        gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED,
++               "Failed to create new frame "
++               "to lock marker file");
++        goto err;
++    }
++
++    entrylk_local = mem_get0(this->local_pool);
++    if (!entrylk_local) {
++        STACK_DESTROY(entrylk_frame->root);
++        goto err;
++    }
++
++    entrylk_frame->local = entrylk_local;
++    entrylk_local->main_frame = frame;
++    int_entrylk = &entrylk_local->int_entrylk;
++
++    int_entrylk->loc.inode = inode_ref(inode);
++    set_lk_owner_from_ptr(&entrylk_frame->root->lk_owner, entrylk_frame->root);
++    local->entrylk_frame = entrylk_frame;
++    gf_uuid_unparse(gfid, gfid_str);
++    int_entrylk->basename = gf_strdup(gfid_str);
++
++    STACK_WIND(entrylk_frame, shard_acquire_entrylk_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->entrylk, this->name, &int_entrylk->loc,
++               int_entrylk->basename, ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
++    return 0;
++err:
++    shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
++    return 0;
++}
++
++int
++shard_post_lookup_base_shard_rm_handler(call_frame_t *frame, xlator_t *this)
++{
++    shard_local_t *local = NULL;
++    shard_priv_t *priv = NULL;
++
++    priv = this->private;
++    local = frame->local;
++
++    if (local->op_ret < 0) {
++        shard_common_failure_unwind(local->fop, frame, -1, local->op_errno);
++        return 0;
++    }
++
++    if (local->prebuf.ia_nlink > 1) {
++        gf_msg_debug(this->name, 0,
++                     "link count on %s > 1:%d, "
++                     "performing rename()/unlink()",
++                     local->int_inodelk.loc.path, local->prebuf.ia_nlink);
++        if (local->fop == GF_FOP_RENAME)
++            shard_rename_src_base_file(frame, this);
++        else if (local->fop == GF_FOP_UNLINK)
++            shard_unlink_base_file(frame, this);
++    } else {
++        gf_msg_debug(this->name, 0,
++                     "link count on %s = 1, creating "
++                     "file under .remove_me",
++                     local->int_inodelk.loc.path);
++        local->cleanup_required = _gf_true;
++        shard_acquire_entrylk(frame, this, priv->dot_shard_rm_inode,
++                              local->prebuf.ia_gfid);
++    }
++    return 0;
++}
++
++int
++shard_post_inodelk_fop_handler(call_frame_t *frame, xlator_t *this)
++{
++    shard_local_t *local = NULL;
++
++    local = frame->local;
++
++    switch (local->fop) {
++        case GF_FOP_UNLINK:
++        case GF_FOP_RENAME:
++            shard_lookup_base_file(frame, this, &local->int_inodelk.loc,
++                                   shard_post_lookup_base_shard_rm_handler);
++            break;
++        default:
++            gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
++                   "post-inodelk handler not defined. This case should not"
++                   " be hit");
++            break;
++    }
++    return 0;
++}
++
++int
++shard_acquire_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                          int32_t op_ret, int32_t op_errno, dict_t *xdata)
++{
++    call_frame_t *main_frame = NULL;
++    shard_local_t *local = NULL;
++    shard_local_t *main_local = NULL;
++
++    local = frame->local;
++    main_frame = local->main_frame;
++    main_local = main_frame->local;
++
++    if (local->op_ret < 0) {
++        shard_common_failure_unwind(main_local->fop, main_frame, op_ret,
++                                    op_errno);
++        return 0;
++    }
++    main_local->int_inodelk.acquired_lock = _gf_true;
++    shard_post_inodelk_fop_handler(main_frame, this);
++    return 0;
++}
++
++int
++shard_acquire_inodelk(call_frame_t *frame, xlator_t *this, loc_t *loc)
++{
++    call_frame_t *lk_frame = NULL;
++    shard_local_t *local = NULL;
++    shard_local_t *lk_local = NULL;
++    shard_inodelk_t *int_inodelk = NULL;
++
++    local = frame->local;
++    lk_frame = create_frame(this, this->ctx->pool);
++    if (!lk_frame) {
++        gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED,
++               "Failed to create new frame "
++               "to lock base shard");
++        goto err;
++    }
++    lk_local = mem_get0(this->local_pool);
++    if (!lk_local) {
++        STACK_DESTROY(lk_frame->root);
++        goto err;
++    }
++
++    lk_frame->local = lk_local;
++    lk_local->main_frame = frame;
++    int_inodelk = &lk_local->int_inodelk;
++
++    int_inodelk->flock.l_len = 0;
++    int_inodelk->flock.l_start = 0;
++    int_inodelk->domain = this->name;
++    int_inodelk->flock.l_type = F_WRLCK;
++    loc_copy(&local->int_inodelk.loc, loc);
++    set_lk_owner_from_ptr(&lk_frame->root->lk_owner, lk_frame->root);
++    local->inodelk_frame = lk_frame;
++
++    STACK_WIND(lk_frame, shard_acquire_inodelk_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->inodelk, int_inodelk->domain,
++               &local->int_inodelk.loc, F_SETLKW, &int_inodelk->flock, NULL);
++    return 0;
++err:
++    shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
++    return 0;
++}
++
++int
++shard_post_mkdir_rm_handler(call_frame_t *frame, xlator_t *this)
++{
++    loc_t *loc = NULL;
++    shard_local_t *local = NULL;
++
++    local = frame->local;
++
++    if (local->op_ret < 0) {
++        shard_common_failure_unwind(local->fop, frame, -1, local->op_errno);
++        return 0;
++    }
++    if (local->fop == GF_FOP_UNLINK)
++        loc = &local->loc;
++    else if (local->fop == GF_FOP_RENAME)
++        loc = &local->loc2;
++    shard_acquire_inodelk(frame, this, loc);
++    return 0;
++}
++
++int
++shard_mkdir_internal_dir(call_frame_t *frame, xlator_t *this,
++                         shard_post_resolve_fop_handler_t handler,
++                         shard_internal_dir_type_t type);
++int
++shard_pre_mkdir_rm_handler(call_frame_t *frame, xlator_t *this)
++{
++    shard_local_t *local = NULL;
++
++    local = frame->local;
++
++    if (local->op_ret < 0) {
++        shard_common_failure_unwind(local->fop, frame, -1, local->op_errno);
++        return 0;
++    }
++    shard_mkdir_internal_dir(frame, this, shard_post_mkdir_rm_handler,
++                             SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME);
++    return 0;
++}
++
++void
++shard_begin_rm_resolution(call_frame_t *frame, xlator_t *this)
++{
++    shard_priv_t *priv = NULL;
++    shard_local_t *local = NULL;
++
++    priv = this->private;
++    local = frame->local;
++
++    local->dot_shard_rm_loc.inode = inode_find(this->itable,
++                                               priv->dot_shard_rm_gfid);
++    if (!local->dot_shard_rm_loc.inode) {
++        local->dot_shard_loc.inode = inode_find(this->itable,
++                                                priv->dot_shard_gfid);
++        if (!local->dot_shard_loc.inode) {
++            shard_mkdir_internal_dir(frame, this, shard_pre_mkdir_rm_handler,
++                                     SHARD_INTERNAL_DIR_DOT_SHARD);
++        } else {
++            local->post_res_handler = shard_pre_mkdir_rm_handler;
++            shard_refresh_internal_dir(frame, this,
++                                       SHARD_INTERNAL_DIR_DOT_SHARD);
++        }
++    } else {
++        local->post_res_handler = shard_post_mkdir_rm_handler;
++        shard_refresh_internal_dir(frame, this,
++                                   SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME);
++    }
++}
++
++int
++shard_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
++             dict_t *xdata)
++{
++    int ret = -1;
++    uint64_t block_size = 0;
++    shard_local_t *local = NULL;
++
++    ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size);
++    if ((ret) && (!IA_ISLNK(loc->inode->ia_type))) {
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
++               "Failed to get block "
++               "size from inode ctx of %s",
++               uuid_utoa(loc->inode->gfid));
++        goto err;
++    }
++
++    if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
++        STACK_WIND(frame, default_unlink_cbk, FIRST_CHILD(this),
++                   FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
++        return 0;
++    }
++
++    local = mem_get0(this->local_pool);
++    if (!local)
++        goto err;
++
++    frame->local = local;
++
++    loc_copy(&local->loc, loc);
++    local->xflag = xflag;
++    local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
++    local->block_size = block_size;
++    local->resolver_base_inode = loc->inode;
++    local->fop = GF_FOP_UNLINK;
++    if (!this->itable)
++        this->itable = (local->loc.inode)->table;
++
++    local->resolve_not = _gf_true;
++    shard_begin_rm_resolution(frame, this);
++    return 0;
++err:
++    shard_common_failure_unwind(GF_FOP_UNLINK, frame, -1, ENOMEM);
++    return 0;
++}
++
++int
++shard_post_rename_lookup_handler(call_frame_t *frame, xlator_t *this)
++{
++    shard_rename_cbk(frame, this);
++    return 0;
++}
++
++int
++shard_rename_src_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                     int32_t op_ret, int32_t op_errno, struct iatt *buf,
++                     struct iatt *preoldparent, struct iatt *postoldparent,
++                     struct iatt *prenewparent, struct iatt *postnewparent,
++                     dict_t *xdata)
++{
++    int ret = 0;
++    shard_local_t *local = NULL;
++
++    local = frame->local;
++
++    if (op_ret < 0) {
++        local->op_ret = op_ret;
++        local->op_errno = op_errno;
++        goto err;
++    }
++    /* Set ctx->refresh to TRUE to force a lookup on disk when
++     * shard_lookup_base_file() is called next to refresh the hard link
++     * count in ctx. Note that this is applicable only to the case where
++     * the rename dst is already existent and sharded.
++     */
++    if ((local->dst_block_size) && (!local->cleanup_required))
++        shard_inode_ctx_set_refresh_flag(local->int_inodelk.loc.inode, this);
++
++    local->prebuf = *buf;
++    local->preoldparent = *preoldparent;
++    local->postoldparent = *postoldparent;
++    local->prenewparent = *prenewparent;
++    local->postnewparent = *postnewparent;
++    if (xdata)
++        local->xattr_rsp = dict_ref(xdata);
++
++    if (local->dst_block_size) {
++        if (local->entrylk_frame) {
++            ret = shard_unlock_entrylk(frame, this);
++            if (ret < 0) {
++                local->op_ret = -1;
++                local->op_errno = -ret;
++            }
++        }
++
++        ret = shard_unlock_inodelk(frame, this);
++        if (ret < 0) {
++            local->op_ret = -1;
++            local->op_errno = -ret;
++            goto err;
++        }
++        if (local->cleanup_required)
++            shard_start_background_deletion(this);
++    }
++
++    /* Now the base file of src, if sharded, is looked up to gather ia_size
++     * and ia_blocks.*/
++    if (local->block_size) {
++        local->tmp_loc.inode = inode_new(this->itable);
++        gf_uuid_copy(local->tmp_loc.gfid, (local->loc.inode)->gfid);
++        shard_lookup_base_file(frame, this, &local->tmp_loc,
++                               shard_post_rename_lookup_handler);
++    } else {
++        shard_rename_cbk(frame, this);
++    }
++    return 0;
++err:
++    shard_common_failure_unwind(local->fop, frame, local->op_ret,
++                                local->op_errno);
++    return 0;
++}
++
++int
++shard_post_lookup_dst_base_file_handler(call_frame_t *frame, xlator_t *this)
++{
++    shard_local_t *local = NULL;
++
++    local = frame->local;
++
++    if (local->op_ret < 0) {
++        shard_common_failure_unwind(local->fop, frame, local->op_ret,
++                                    local->op_errno);
++        return 0;
++    }
++
++    /* Save dst base file attributes into postbuf so the information is not
++     * lost when it is overwritten after lookup on base file of src in
++     * shard_lookup_base_file_cbk().
++     */
++    local->postbuf = local->prebuf;
++    shard_rename_src_base_file(frame, this);
++    return 0;
++}
++
++int
++shard_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
++             dict_t *xdata)
++{
++    int ret = -1;
++    uint64_t block_size = 0;
++    uint64_t dst_block_size = 0;
++    shard_local_t *local = NULL;
++
++    if (IA_ISDIR(oldloc->inode->ia_type)) {
++        STACK_WIND(frame, default_rename_cbk, FIRST_CHILD(this),
++                   FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
++        return 0;
++    }
++
++    ret = shard_inode_ctx_get_block_size(oldloc->inode, this, &block_size);
++    if ((ret) && (!IA_ISLNK(oldloc->inode->ia_type))) {
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
++               "Failed to get block "
++               "size from inode ctx of %s",
++               uuid_utoa(oldloc->inode->gfid));
++        goto err;
++    }
++
++    if (newloc->inode)
++        ret = shard_inode_ctx_get_block_size(newloc->inode, this,
++                                             &dst_block_size);
++
++    /* The following stack_wind covers the case where:
++     * a. the src file is not sharded and dst doesn't exist, OR
++     * b. the src and dst both exist but are not sharded.
++     */
++    if (((!block_size) && (!dst_block_size)) ||
++        frame->root->pid == GF_CLIENT_PID_GSYNCD) {
++        STACK_WIND(frame, default_rename_cbk, FIRST_CHILD(this),
++                   FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
++        return 0;
++    }
++
++    local = mem_get0(this->local_pool);
++    if (!local)
++        goto err;
++
++    frame->local = local;
++    loc_copy(&local->loc, oldloc);
++    loc_copy(&local->loc2, newloc);
++    local->resolver_base_inode = newloc->inode;
++    local->fop = GF_FOP_RENAME;
++    local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
++    if (!local->xattr_req)
++        goto err;
++
++    local->block_size = block_size;
++    local->dst_block_size = dst_block_size;
++    if (!this->itable)
++        this->itable = (local->loc.inode)->table;
++    local->resolve_not = _gf_true;
++
++    /* The following if-block covers the case where the dst file exists
++     * and is sharded.
++     */
++    if (local->dst_block_size) {
++        shard_begin_rm_resolution(frame, this);
++    } else {
++        /* The following block covers the case where the dst either doesn't
++         * exist or is NOT sharded but the src is sharded. In this case, shard
++         * xlator would go ahead and rename src to dst. Once done, it would also
++         * lookup the base shard of src to get the ia_size and ia_blocks xattr
++         * values.
++         */
++        shard_rename_src_base_file(frame, this);
++    }
++    return 0;
++
++err:
++    shard_common_failure_unwind(GF_FOP_RENAME, frame, -1, ENOMEM);
++    return 0;
++}
++
++int
++shard_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                 int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
++                 struct iatt *stbuf, struct iatt *preparent,
++                 struct iatt *postparent, dict_t *xdata)
++{
++    int ret = -1;
++    shard_local_t *local = NULL;
++
++    local = frame->local;
++
++    if (op_ret == -1)
++        goto unwind;
++
++    ret = shard_inode_ctx_set(inode, this, stbuf, local->block_size,
++                              SHARD_ALL_MASK);
++    if (ret)
++        gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INODE_CTX_SET_FAILED,
++               "Failed to set inode "
++               "ctx for %s",
++               uuid_utoa(inode->gfid));
++
++unwind:
++    SHARD_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, stbuf,
++                       preparent, postparent, xdata);
++    return 0;
++}
++
++int
++shard_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
++             mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
++{
++    shard_priv_t *priv = NULL;
++    shard_local_t *local = NULL;
++
++    priv = this->private;
++    local = mem_get0(this->local_pool);
++    if (!local)
++        goto err;
++
++    frame->local = local;
++    local->block_size = priv->block_size;
++
++    if (!__is_gsyncd_on_shard_dir(frame, loc)) {
++        SHARD_INODE_CREATE_INIT(this, local->block_size, xdata, loc, 0, 0, err);
++    }
++
++    STACK_WIND(frame, shard_create_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
++               xdata);
++    return 0;
++err:
++    shard_common_failure_unwind(GF_FOP_CREATE, frame, -1, ENOMEM);
++    return 0;
++}
++
++int
++shard_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++               int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
++{
++    /* To-Do: Handle open with O_TRUNC under locks */
++    SHARD_STACK_UNWIND(open, frame, op_ret, op_errno, fd, xdata);
++    return 0;
++}
++
++int
++shard_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
++           fd_t *fd, dict_t *xdata)
++{
++    STACK_WIND(frame, shard_open_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
++    return 0;
++}
++
++int
++shard_readv_do_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                   int32_t op_ret, int32_t op_errno, struct iovec *vector,
++                   int32_t count, struct iatt *stbuf, struct iobref *iobref,
++                   dict_t *xdata)
++{
++    int i = 0;
++    int call_count = 0;
++    void *address = NULL;
++    uint64_t block_num = 0;
++    off_t off = 0;
++    struct iovec vec = {
++        0,
++    };
++    shard_local_t *local = NULL;
++    fd_t *anon_fd = cookie;
++    shard_inode_ctx_t *ctx = NULL;
++
++    local = frame->local;
++
++    /* If shard has already seen a failure here before, there is no point
++     * in aggregating subsequent reads, so just go to out.
++     */
++    if (local->op_ret < 0)
++        goto out;
++
++    if (op_ret < 0) {
++        local->op_ret = op_ret;
++        local->op_errno = op_errno;
++        goto out;
++    }
++
++    if (local->op_ret >= 0)
++        local->op_ret += op_ret;
++
++    shard_inode_ctx_get(anon_fd->inode, this, &ctx);
++    block_num = ctx->block_num;
++
++    if (block_num == local->first_block) {
++        address = local->iobuf->ptr;
++    } else {
++        /* else
++         * address to start writing to = beginning of buffer +
++         *                    number of bytes until end of first block +
++         *                    + block_size times number of blocks
++         *                    between the current block and the first
++         */
++        address = (char *)local->iobuf->ptr +
++                  (local->block_size - (local->offset % local->block_size)) +
++                  ((block_num - local->first_block - 1) * local->block_size);
++    }
++
++    for (i = 0; i < count; i++) {
++        address = (char *)address + off;
++        memcpy(address, vector[i].iov_base, vector[i].iov_len);
++        off += vector[i].iov_len;
++    }
++
++out:
++    if (anon_fd)
++        fd_unref(anon_fd);
++    call_count = shard_call_count_return(frame);
++    if (call_count == 0) {
++        SHARD_UNSET_ROOT_FS_ID(frame, local);
++        if (local->op_ret < 0) {
++            shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret,
++                                        local->op_errno);
++        } else {
++            if (xdata)
++                local->xattr_rsp = dict_ref(xdata);
++            vec.iov_base = local->iobuf->ptr;
++            if (local->offset + local->req_size > local->prebuf.ia_size)
++                local->total_size = local->prebuf.ia_size - local->offset;
++            vec.iov_len = local->total_size;
++            local->op_ret = local->total_size;
++            SHARD_STACK_UNWIND(readv, frame, local->op_ret, local->op_errno,
++                               &vec, 1, &local->prebuf, local->iobref,
++                               local->xattr_rsp);
++            return 0;
++        }
++    }
++
++    return 0;
++}
++
++int
++shard_readv_do(call_frame_t *frame, xlator_t *this)
++{
++    int i = 0;
++    int call_count = 0;
++    int last_block = 0;
++    int cur_block = 0;
++    off_t orig_offset = 0;
++    off_t shard_offset = 0;
++    size_t read_size = 0;
++    size_t remaining_size = 0;
++    fd_t *fd = NULL;
++    fd_t *anon_fd = NULL;
++    shard_local_t *local = NULL;
++    gf_boolean_t wind_failed = _gf_false;
++
++    local = frame->local;
++    fd = local->fd;
++
++    orig_offset = local->offset;
++    cur_block = local->first_block;
++    last_block = local->last_block;
++    remaining_size = local->total_size;
++    local->call_count = call_count = local->num_blocks;
++
++    SHARD_SET_ROOT_FS_ID(frame, local);
++
++    if (fd->flags & O_DIRECT)
++        local->flags = O_DIRECT;
++
++    while (cur_block <= last_block) {
++        if (wind_failed) {
++            shard_readv_do_cbk(frame, (void *)(long)0, this, -1, ENOMEM, NULL,
++                               0, NULL, NULL, NULL);
++            goto next;
++        }
+ 
+-  local = frame->local;
++        shard_offset = orig_offset % local->block_size;
++        read_size = local->block_size - shard_offset;
++        if (read_size > remaining_size)
++            read_size = remaining_size;
++
++        remaining_size -= read_size;
++
++        if (cur_block == 0) {
++            anon_fd = fd_ref(fd);
++        } else {
++            anon_fd = fd_anonymous(local->inode_list[i]);
++            if (!anon_fd) {
++                local->op_ret = -1;
++                local->op_errno = ENOMEM;
++                wind_failed = _gf_true;
++                shard_readv_do_cbk(frame, (void *)(long)anon_fd, this, -1,
++                                   ENOMEM, NULL, 0, NULL, NULL, NULL);
++                goto next;
++            }
++        }
+ 
+-  xattr_req = shard_create_gfid_dict(local->xattr_req);
+-  if (!xattr_req)
+-    goto err;
++        STACK_WIND_COOKIE(frame, shard_readv_do_cbk, anon_fd, FIRST_CHILD(this),
++                          FIRST_CHILD(this)->fops->readv, anon_fd, read_size,
++                          shard_offset, local->flags, local->xattr_req);
+ 
+-  STACK_WIND(frame, shard_lookup_marker_file_cbk, FIRST_CHILD(this),
+-             FIRST_CHILD(this)->fops->lookup, &local->newloc, xattr_req);
+-  dict_unref(xattr_req);
+-  return 0;
+-err:
+-  shard_common_failure_unwind(local->fop, frame, -1, op_errno);
+-  return 0;
++        orig_offset += read_size;
++    next:
++        cur_block++;
++        i++;
++        call_count--;
++    }
++    return 0;
+ }
+ 
+-int shard_create_marker_file_under_remove_me_cbk(
+-    call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+-    int32_t op_errno, inode_t *inode, struct iatt *buf, struct iatt *preparent,
+-    struct iatt *postparent, dict_t *xdata) {
+-  inode_t *linked_inode = NULL;
+-  shard_priv_t *priv = NULL;
+-  shard_local_t *local = NULL;
+-
+-  local = frame->local;
+-  priv = this->private;
+-
+-  SHARD_UNSET_ROOT_FS_ID(frame, local);
+-  if (op_ret < 0) {
+-    if ((op_errno != EEXIST) && (op_errno != ENODATA)) {
+-      local->op_ret = op_ret;
+-      local->op_errno = op_errno;
+-      gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED,
+-             "Marker file creation "
+-             "failed while performing %s; entry gfid=%s",
+-             gf_fop_string(local->fop), local->newloc.name);
+-      goto err;
+-    } else {
+-      shard_lookup_marker_file(frame, this);
+-      return 0;
++int
++shard_common_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                       int32_t op_ret, int32_t op_errno, inode_t *inode,
++                       struct iatt *buf, struct iatt *preparent,
++                       struct iatt *postparent, dict_t *xdata)
++{
++    int shard_block_num = (long)cookie;
++    int call_count = 0;
++    shard_local_t *local = NULL;
++
++    local = frame->local;
++
++    if (op_ret < 0) {
++        if (op_errno == EEXIST) {
++            LOCK(&frame->lock);
++            {
++                local->eexist_count++;
++            }
++            UNLOCK(&frame->lock);
++        } else {
++            local->op_ret = op_ret;
++            local->op_errno = op_errno;
++        }
++        gf_msg_debug(this->name, 0,
++                     "mknod of shard %d "
++                     "failed: %s",
++                     shard_block_num, strerror(op_errno));
++        goto done;
+     }
+-  }
+ 
+-  linked_inode =
+-      inode_link(inode, priv->dot_shard_rm_inode, local->newloc.name, buf);
+-  inode_unref(local->newloc.inode);
+-  local->newloc.inode = linked_inode;
++    shard_link_block_inode(local, shard_block_num, inode, buf);
+ 
+-  if (local->fop == GF_FOP_UNLINK)
+-    shard_unlink_base_file(frame, this);
+-  else if (local->fop == GF_FOP_RENAME)
+-    shard_rename_src_base_file(frame, this);
+-  return 0;
+-err:
+-  shard_common_failure_unwind(local->fop, frame, -1, local->op_errno);
+-  return 0;
+-}
+-
+-int shard_create_marker_file_under_remove_me(call_frame_t *frame,
+-                                             xlator_t *this, loc_t *loc) {
+-  int ret = 0;
+-  int op_errno = ENOMEM;
+-  uint64_t bs = 0;
+-  char g1[64] = {
+-      0,
+-  };
+-  char g2[64] = {
+-      0,
+-  };
+-  dict_t *xattr_req = NULL;
+-  shard_priv_t *priv = NULL;
+-  shard_local_t *local = NULL;
+-
+-  priv = this->private;
+-  local = frame->local;
+-
+-  SHARD_SET_ROOT_FS_ID(frame, local);
+-
+-  xattr_req = shard_create_gfid_dict(local->xattr_req);
+-  if (!xattr_req)
+-    goto err;
+-
+-  local->newloc.inode = inode_new(this->itable);
+-  local->newloc.parent = inode_ref(priv->dot_shard_rm_inode);
+-  ret = inode_path(local->newloc.parent, uuid_utoa(loc->inode->gfid),
+-                   (char **)&local->newloc.path);
+-  if (ret < 0) {
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+-           "Inode path failed on "
+-           "pargfid=%s bname=%s",
+-           uuid_utoa_r(priv->dot_shard_rm_gfid, g1),
+-           uuid_utoa_r(loc->inode->gfid, g2));
+-    goto err;
+-  }
+-  local->newloc.name = strrchr(local->newloc.path, '/');
+-  if (local->newloc.name)
+-    local->newloc.name++;
+-
+-  if (local->fop == GF_FOP_UNLINK)
+-    bs = local->block_size;
+-  else if (local->fop == GF_FOP_RENAME)
+-    bs = local->dst_block_size;
+-
+-  SHARD_INODE_CREATE_INIT(this, bs, xattr_req, &local->newloc,
+-                          local->prebuf.ia_size, 0, err);
+-
+-  STACK_WIND(frame, shard_create_marker_file_under_remove_me_cbk,
+-             FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod, &local->newloc,
+-             0, 0, 0644, xattr_req);
+-  dict_unref(xattr_req);
+-  return 0;
++done:
++    call_count = shard_call_count_return(frame);
++    if (call_count == 0) {
++        SHARD_UNSET_ROOT_FS_ID(frame, local);
++        local->create_count = 0;
++        local->post_mknod_handler(frame, this);
++    }
+ 
+-err:
+-  if (xattr_req)
+-    dict_unref(xattr_req);
+-  shard_create_marker_file_under_remove_me_cbk(frame, 0, this, -1, op_errno,
+-                                               NULL, NULL, NULL, NULL, NULL);
+-  return 0;
++    return 0;
+ }
+ 
+-int shard_unlock_entrylk(call_frame_t *frame, xlator_t *this);
+-
+-int shard_unlink_base_file_cbk(call_frame_t *frame, void *cookie,
+-                               xlator_t *this, int32_t op_ret, int32_t op_errno,
+-                               struct iatt *preparent, struct iatt *postparent,
+-                               dict_t *xdata) {
+-  int ret = 0;
+-  shard_local_t *local = NULL;
++int
++shard_common_resume_mknod(call_frame_t *frame, xlator_t *this,
++                          shard_post_mknod_fop_handler_t post_mknod_handler)
++{
++    int i = 0;
++    int shard_idx_iter = 0;
++    int last_block = 0;
++    int ret = 0;
++    int call_count = 0;
++    char path[PATH_MAX] = {
++        0,
++    };
++    mode_t mode = 0;
++    char *bname = NULL;
++    shard_priv_t *priv = NULL;
++    shard_inode_ctx_t ctx_tmp = {
++        0,
++    };
++    shard_local_t *local = NULL;
++    gf_boolean_t wind_failed = _gf_false;
++    fd_t *fd = NULL;
++    loc_t loc = {
++        0,
++    };
++    dict_t *xattr_req = NULL;
+ 
+-  local = frame->local;
++    local = frame->local;
++    priv = this->private;
++    fd = local->fd;
++    shard_idx_iter = local->first_block;
++    last_block = local->last_block;
++    call_count = local->call_count = local->create_count;
++    local->post_mknod_handler = post_mknod_handler;
+ 
+-  if (op_ret < 0) {
+-    local->op_ret = op_ret;
+-    local->op_errno = op_errno;
+-  } else {
+-    shard_inode_ctx_set_refresh_flag(local->int_inodelk.loc.inode, this);
+-    local->preoldparent = *preparent;
+-    local->postoldparent = *postparent;
+-    if (xdata)
+-      local->xattr_rsp = dict_ref(xdata);
+-    if (local->cleanup_required)
+-      shard_start_background_deletion(this);
+-  }
++    SHARD_SET_ROOT_FS_ID(frame, local);
+ 
+-  if (local->entrylk_frame) {
+-    ret = shard_unlock_entrylk(frame, this);
+-    if (ret < 0) {
+-      local->op_ret = -1;
+-      local->op_errno = -ret;
++    ret = shard_inode_ctx_get_all(fd->inode, this, &ctx_tmp);
++    if (ret) {
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
++               "Failed to get inode "
++               "ctx for %s",
++               uuid_utoa(fd->inode->gfid));
++        local->op_ret = -1;
++        local->op_errno = ENOMEM;
++        goto err;
+     }
+-  }
++    mode = st_mode_from_ia(ctx_tmp.stat.ia_prot, ctx_tmp.stat.ia_type);
+ 
+-  ret = shard_unlock_inodelk(frame, this);
+-  if (ret < 0) {
+-    local->op_ret = -1;
+-    local->op_errno = -ret;
+-  }
+-
+-  shard_unlink_cbk(frame, this);
+-  return 0;
+-}
+-
+-int shard_unlink_base_file(call_frame_t *frame, xlator_t *this) {
+-  shard_local_t *local = frame->local;
+-
+-  /* To-Do: Request open-fd count on base file */
+-  STACK_WIND(frame, shard_unlink_base_file_cbk, FIRST_CHILD(this),
+-             FIRST_CHILD(this)->fops->unlink, &local->loc, local->xflag,
+-             local->xattr_req);
+-  return 0;
+-}
+-
+-int shard_unlock_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                             int32_t op_ret, int32_t op_errno, dict_t *xdata) {
+-  if (op_ret)
+-    gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED,
+-           "Unlock failed. Please check brick logs for "
+-           "more details");
+-  SHARD_STACK_DESTROY(frame);
+-  return 0;
+-}
+-
+-int shard_unlock_entrylk(call_frame_t *frame, xlator_t *this) {
+-  loc_t *loc = NULL;
+-  call_frame_t *lk_frame = NULL;
+-  shard_local_t *local = NULL;
+-  shard_local_t *lk_local = NULL;
+-  shard_entrylk_t *lock = NULL;
+-
+-  local = frame->local;
+-  lk_frame = local->entrylk_frame;
+-  lk_local = lk_frame->local;
+-  local->entrylk_frame = NULL;
+-  lock = &lk_local->int_entrylk;
+-  loc = &lock->loc;
+-
+-  STACK_WIND(lk_frame, shard_unlock_entrylk_cbk, FIRST_CHILD(this),
+-             FIRST_CHILD(this)->fops->entrylk, this->name, loc,
+-             lk_local->int_entrylk.basename, ENTRYLK_UNLOCK, ENTRYLK_WRLCK,
+-             NULL);
+-  local->int_entrylk.acquired_lock = _gf_false;
+-  return 0;
+-}
+-
+-int shard_post_entrylk_fop_handler(call_frame_t *frame, xlator_t *this) {
+-  shard_local_t *local = NULL;
+-
+-  local = frame->local;
+-
+-  switch (local->fop) {
+-  case GF_FOP_UNLINK:
+-  case GF_FOP_RENAME:
+-    shard_create_marker_file_under_remove_me(frame, this,
+-                                             &local->int_inodelk.loc);
+-    break;
+-  default:
+-    gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
+-           "post-entrylk handler not defined. This case should not"
+-           " be hit");
+-    break;
+-  }
+-  return 0;
+-}
+-
+-int shard_acquire_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                              int32_t op_ret, int32_t op_errno, dict_t *xdata) {
+-  call_frame_t *main_frame = NULL;
+-  shard_local_t *local = NULL;
+-  shard_local_t *main_local = NULL;
+-
+-  local = frame->local;
+-  main_frame = local->main_frame;
+-  main_local = main_frame->local;
+-
+-  if (local->op_ret < 0) {
+-    shard_common_failure_unwind(main_local->fop, main_frame, op_ret, op_errno);
+-    return 0;
+-  }
+-  main_local->int_entrylk.acquired_lock = _gf_true;
+-  shard_post_entrylk_fop_handler(main_frame, this);
+-  return 0;
+-}
+-
+-int shard_acquire_entrylk(call_frame_t *frame, xlator_t *this, inode_t *inode,
+-                          uuid_t gfid) {
+-  char gfid_str[GF_UUID_BUF_SIZE] = {
+-      0,
+-  };
+-  shard_local_t *local = NULL;
+-  shard_local_t *entrylk_local = NULL;
+-  shard_entrylk_t *int_entrylk = NULL;
+-  call_frame_t *entrylk_frame = NULL;
+-
+-  local = frame->local;
+-  entrylk_frame = create_frame(this, this->ctx->pool);
+-  if (!entrylk_frame) {
+-    gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED,
+-           "Failed to create new frame "
+-           "to lock marker file");
+-    goto err;
+-  }
+-
+-  entrylk_local = mem_get0(this->local_pool);
+-  if (!entrylk_local) {
+-    STACK_DESTROY(entrylk_frame->root);
+-    goto err;
+-  }
+-
+-  entrylk_frame->local = entrylk_local;
+-  entrylk_local->main_frame = frame;
+-  int_entrylk = &entrylk_local->int_entrylk;
+-
+-  int_entrylk->loc.inode = inode_ref(inode);
+-  set_lk_owner_from_ptr(&entrylk_frame->root->lk_owner, entrylk_frame->root);
+-  local->entrylk_frame = entrylk_frame;
+-  gf_uuid_unparse(gfid, gfid_str);
+-  int_entrylk->basename = gf_strdup(gfid_str);
+-
+-  STACK_WIND(entrylk_frame, shard_acquire_entrylk_cbk, FIRST_CHILD(this),
+-             FIRST_CHILD(this)->fops->entrylk, this->name, &int_entrylk->loc,
+-             int_entrylk->basename, ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
+-  return 0;
+-err:
+-  shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
+-  return 0;
+-}
++    while (shard_idx_iter <= last_block) {
++        if (local->inode_list[i]) {
++            shard_idx_iter++;
++            i++;
++            continue;
++        }
+ 
+-int shard_post_lookup_base_shard_rm_handler(call_frame_t *frame,
+-                                            xlator_t *this) {
+-  shard_local_t *local = NULL;
+-  shard_priv_t *priv = NULL;
++        if (wind_failed) {
++            shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this,
++                                   -1, ENOMEM, NULL, NULL, NULL, NULL, NULL);
++            goto next;
++        }
+ 
+-  priv = this->private;
+-  local = frame->local;
++        shard_make_block_abspath(shard_idx_iter, fd->inode->gfid, path,
++                                 sizeof(path));
++
++        xattr_req = shard_create_gfid_dict(local->xattr_req);
++        if (!xattr_req) {
++            local->op_ret = -1;
++            local->op_errno = ENOMEM;
++            wind_failed = _gf_true;
++            shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this,
++                                   -1, ENOMEM, NULL, NULL, NULL, NULL, NULL);
++            goto next;
++        }
++
++        bname = strrchr(path, '/') + 1;
++        loc.inode = inode_new(this->itable);
++        loc.parent = inode_ref(priv->dot_shard_inode);
++        ret = inode_path(loc.parent, bname, (char **)&(loc.path));
++        if (ret < 0 || !(loc.inode)) {
++            gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
++                   "Inode path failed"
++                   "on %s, base file gfid = %s",
++                   bname, uuid_utoa(fd->inode->gfid));
++            local->op_ret = -1;
++            local->op_errno = ENOMEM;
++            wind_failed = _gf_true;
++            loc_wipe(&loc);
++            dict_unref(xattr_req);
++            shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this,
++                                   -1, ENOMEM, NULL, NULL, NULL, NULL, NULL);
++            goto next;
++        }
++
++        loc.name = strrchr(loc.path, '/');
++        if (loc.name)
++            loc.name++;
++
++        STACK_WIND_COOKIE(frame, shard_common_mknod_cbk,
++                          (void *)(long)shard_idx_iter, FIRST_CHILD(this),
++                          FIRST_CHILD(this)->fops->mknod, &loc, mode,
++                          ctx_tmp.stat.ia_rdev, 0, xattr_req);
++        loc_wipe(&loc);
++        dict_unref(xattr_req);
++
++    next:
++        shard_idx_iter++;
++        i++;
++        if (!--call_count)
++            break;
++    }
+ 
+-  if (local->op_ret < 0) {
+-    shard_common_failure_unwind(local->fop, frame, -1, local->op_errno);
+     return 0;
+-  }
+-
+-  if (local->prebuf.ia_nlink > 1) {
+-    gf_msg_debug(this->name, 0, "link count on %s > 1:%d, "
+-                                "performing rename()/unlink()",
+-                 local->int_inodelk.loc.path, local->prebuf.ia_nlink);
+-    if (local->fop == GF_FOP_RENAME)
+-      shard_rename_src_base_file(frame, this);
+-    else if (local->fop == GF_FOP_UNLINK)
+-      shard_unlink_base_file(frame, this);
+-  } else {
+-    gf_msg_debug(this->name, 0, "link count on %s = 1, creating "
+-                                "file under .remove_me",
+-                 local->int_inodelk.loc.path);
+-    local->cleanup_required = _gf_true;
+-    shard_acquire_entrylk(frame, this, priv->dot_shard_rm_inode,
+-                          local->prebuf.ia_gfid);
+-  }
+-  return 0;
+-}
+-
+-int shard_post_inodelk_fop_handler(call_frame_t *frame, xlator_t *this) {
+-  shard_local_t *local = NULL;
+-
+-  local = frame->local;
+-
+-  switch (local->fop) {
+-  case GF_FOP_UNLINK:
+-  case GF_FOP_RENAME:
+-    shard_lookup_base_file(frame, this, &local->int_inodelk.loc,
+-                           shard_post_lookup_base_shard_rm_handler);
+-    break;
+-  default:
+-    gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
+-           "post-inodelk handler not defined. This case should not"
+-           " be hit");
+-    break;
+-  }
+-  return 0;
+-}
+-
+-int shard_acquire_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                              int32_t op_ret, int32_t op_errno, dict_t *xdata) {
+-  call_frame_t *main_frame = NULL;
+-  shard_local_t *local = NULL;
+-  shard_local_t *main_local = NULL;
+-
+-  local = frame->local;
+-  main_frame = local->main_frame;
+-  main_local = main_frame->local;
+-
+-  if (local->op_ret < 0) {
+-    shard_common_failure_unwind(main_local->fop, main_frame, op_ret, op_errno);
+-    return 0;
+-  }
+-  main_local->int_inodelk.acquired_lock = _gf_true;
+-  shard_post_inodelk_fop_handler(main_frame, this);
+-  return 0;
+-}
+-
+-int shard_acquire_inodelk(call_frame_t *frame, xlator_t *this, loc_t *loc) {
+-  call_frame_t *lk_frame = NULL;
+-  shard_local_t *local = NULL;
+-  shard_local_t *lk_local = NULL;
+-  shard_inodelk_t *int_inodelk = NULL;
+-
+-  local = frame->local;
+-  lk_frame = create_frame(this, this->ctx->pool);
+-  if (!lk_frame) {
+-    gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED,
+-           "Failed to create new frame "
+-           "to lock base shard");
+-    goto err;
+-  }
+-  lk_local = mem_get0(this->local_pool);
+-  if (!lk_local) {
+-    STACK_DESTROY(lk_frame->root);
+-    goto err;
+-  }
+-
+-  lk_frame->local = lk_local;
+-  lk_local->main_frame = frame;
+-  int_inodelk = &lk_local->int_inodelk;
+-
+-  int_inodelk->flock.l_len = 0;
+-  int_inodelk->flock.l_start = 0;
+-  int_inodelk->domain = this->name;
+-  int_inodelk->flock.l_type = F_WRLCK;
+-  loc_copy(&local->int_inodelk.loc, loc);
+-  set_lk_owner_from_ptr(&lk_frame->root->lk_owner, lk_frame->root);
+-  local->inodelk_frame = lk_frame;
+-
+-  STACK_WIND(lk_frame, shard_acquire_inodelk_cbk, FIRST_CHILD(this),
+-             FIRST_CHILD(this)->fops->inodelk, int_inodelk->domain,
+-             &local->int_inodelk.loc, F_SETLKW, &int_inodelk->flock, NULL);
+-  return 0;
+ err:
+-  shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
+-  return 0;
++    /*
++     * This block is for handling failure in shard_inode_ctx_get_all().
++     * Failures in the while-loop are handled within the loop.
++     */
++    SHARD_UNSET_ROOT_FS_ID(frame, local);
++    post_mknod_handler(frame, this);
++    return 0;
+ }
+ 
+-int shard_post_mkdir_rm_handler(call_frame_t *frame, xlator_t *this) {
+-  loc_t *loc = NULL;
+-  shard_local_t *local = NULL;
++int
++shard_post_mknod_readv_handler(call_frame_t *frame, xlator_t *this);
+ 
+-  local = frame->local;
++int
++shard_post_lookup_shards_readv_handler(call_frame_t *frame, xlator_t *this)
++{
++    shard_local_t *local = NULL;
+ 
+-  if (local->op_ret < 0) {
+-    shard_common_failure_unwind(local->fop, frame, -1, local->op_errno);
+-    return 0;
+-  }
+-  if (local->fop == GF_FOP_UNLINK)
+-    loc = &local->loc;
+-  else if (local->fop == GF_FOP_RENAME)
+-    loc = &local->loc2;
+-  shard_acquire_inodelk(frame, this, loc);
+-  return 0;
+-}
++    local = frame->local;
+ 
+-int shard_mkdir_internal_dir(call_frame_t *frame, xlator_t *this,
+-                             shard_post_resolve_fop_handler_t handler,
+-                             shard_internal_dir_type_t type);
+-int shard_pre_mkdir_rm_handler(call_frame_t *frame, xlator_t *this) {
+-  shard_local_t *local = NULL;
++    if (local->op_ret < 0) {
++        shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret,
++                                    local->op_errno);
++        return 0;
++    }
+ 
+-  local = frame->local;
++    if (local->create_count) {
++        shard_common_resume_mknod(frame, this, shard_post_mknod_readv_handler);
++    } else {
++        shard_readv_do(frame, this);
++    }
+ 
+-  if (local->op_ret < 0) {
+-    shard_common_failure_unwind(local->fop, frame, -1, local->op_errno);
+     return 0;
+-  }
+-  shard_mkdir_internal_dir(frame, this, shard_post_mkdir_rm_handler,
+-                           SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME);
+-  return 0;
+ }
+ 
+-void shard_begin_rm_resolution(call_frame_t *frame, xlator_t *this) {
+-  shard_priv_t *priv = NULL;
+-  shard_local_t *local = NULL;
++int
++shard_post_mknod_readv_handler(call_frame_t *frame, xlator_t *this)
++{
++    shard_local_t *local = NULL;
+ 
+-  priv = this->private;
+-  local = frame->local;
++    local = frame->local;
+ 
+-  local->dot_shard_rm_loc.inode =
+-      inode_find(this->itable, priv->dot_shard_rm_gfid);
+-  if (!local->dot_shard_rm_loc.inode) {
+-    local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid);
+-    if (!local->dot_shard_loc.inode) {
+-      shard_mkdir_internal_dir(frame, this, shard_pre_mkdir_rm_handler,
+-                               SHARD_INTERNAL_DIR_DOT_SHARD);
+-    } else {
+-      local->post_res_handler = shard_pre_mkdir_rm_handler;
+-      shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD);
+-    }
+-  } else {
+-    local->post_res_handler = shard_post_mkdir_rm_handler;
+-    shard_refresh_internal_dir(frame, this,
+-                               SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME);
+-  }
+-}
+-
+-int shard_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+-                 dict_t *xdata) {
+-  int ret = -1;
+-  uint64_t block_size = 0;
+-  shard_local_t *local = NULL;
+-
+-  ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size);
+-  if ((ret) && (!IA_ISLNK(loc->inode->ia_type))) {
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+-           "Failed to get block "
+-           "size from inode ctx of %s",
+-           uuid_utoa(loc->inode->gfid));
+-    goto err;
+-  }
+-
+-  if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+-    STACK_WIND(frame, default_unlink_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
+-    return 0;
+-  }
+-
+-  local = mem_get0(this->local_pool);
+-  if (!local)
+-    goto err;
+-
+-  frame->local = local;
+-
+-  loc_copy(&local->loc, loc);
+-  local->xflag = xflag;
+-  local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+-  local->block_size = block_size;
+-  local->resolver_base_inode = loc->inode;
+-  local->fop = GF_FOP_UNLINK;
+-  if (!this->itable)
+-    this->itable = (local->loc.inode)->table;
+-
+-  local->resolve_not = _gf_true;
+-  shard_begin_rm_resolution(frame, this);
+-  return 0;
+-err:
+-  shard_common_failure_unwind(GF_FOP_UNLINK, frame, -1, ENOMEM);
+-  return 0;
+-}
++    if (local->op_ret < 0) {
++        shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret,
++                                    local->op_errno);
++        return 0;
++    }
+ 
+-int shard_post_rename_lookup_handler(call_frame_t *frame, xlator_t *this) {
+-  shard_rename_cbk(frame, this);
+-  return 0;
++    if (!local->eexist_count) {
++        shard_readv_do(frame, this);
++    } else {
++        local->call_count = local->eexist_count;
++        shard_common_lookup_shards(frame, this, local->loc.inode,
++                                   shard_post_lookup_shards_readv_handler);
++    }
++    return 0;
+ }
+ 
+-int shard_rename_src_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                         int32_t op_ret, int32_t op_errno, struct iatt *buf,
+-                         struct iatt *preoldparent, struct iatt *postoldparent,
+-                         struct iatt *prenewparent, struct iatt *postnewparent,
+-                         dict_t *xdata) {
+-  int ret = 0;
+-  shard_local_t *local = NULL;
+-
+-  local = frame->local;
++int
++shard_post_resolve_readv_handler(call_frame_t *frame, xlator_t *this)
++{
++    shard_local_t *local = NULL;
+ 
+-  if (op_ret < 0) {
+-    local->op_ret = op_ret;
+-    local->op_errno = op_errno;
+-    goto err;
+-  }
+-  /* Set ctx->refresh to TRUE to force a lookup on disk when
+-   * shard_lookup_base_file() is called next to refresh the hard link
+-   * count in ctx. Note that this is applicable only to the case where
+-   * the rename dst is already existent and sharded.
+-   */
+-  if ((local->dst_block_size) && (!local->cleanup_required))
+-    shard_inode_ctx_set_refresh_flag(local->int_inodelk.loc.inode, this);
+-
+-  local->prebuf = *buf;
+-  local->preoldparent = *preoldparent;
+-  local->postoldparent = *postoldparent;
+-  local->prenewparent = *prenewparent;
+-  local->postnewparent = *postnewparent;
+-  if (xdata)
+-    local->xattr_rsp = dict_ref(xdata);
++    local = frame->local;
+ 
+-  if (local->dst_block_size) {
+-    if (local->entrylk_frame) {
+-      ret = shard_unlock_entrylk(frame, this);
+-      if (ret < 0) {
+-        local->op_ret = -1;
+-        local->op_errno = -ret;
+-      }
++    if (local->op_ret < 0) {
++        if (local->op_errno != ENOENT) {
++            shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret,
++                                        local->op_errno);
++            return 0;
++        } else {
++            struct iovec vec = {
++                0,
++            };
++
++            vec.iov_base = local->iobuf->ptr;
++            vec.iov_len = local->total_size;
++            local->op_ret = local->total_size;
++            SHARD_STACK_UNWIND(readv, frame, local->op_ret, 0, &vec, 1,
++                               &local->prebuf, local->iobref, NULL);
++            return 0;
++        }
+     }
+ 
+-    ret = shard_unlock_inodelk(frame, this);
+-    if (ret < 0) {
+-      local->op_ret = -1;
+-      local->op_errno = -ret;
+-      goto err;
+-    }
+-    if (local->cleanup_required)
+-      shard_start_background_deletion(this);
+-  }
+-
+-  /* Now the base file of src, if sharded, is looked up to gather ia_size
+-   * and ia_blocks.*/
+-  if (local->block_size) {
+-    local->tmp_loc.inode = inode_new(this->itable);
+-    gf_uuid_copy(local->tmp_loc.gfid, (local->loc.inode)->gfid);
+-    shard_lookup_base_file(frame, this, &local->tmp_loc,
+-                           shard_post_rename_lookup_handler);
+-  } else {
+-    shard_rename_cbk(frame, this);
+-  }
+-  return 0;
+-err:
+-  shard_common_failure_unwind(local->fop, frame, local->op_ret,
+-                              local->op_errno);
+-  return 0;
+-}
+-
+-int shard_post_lookup_dst_base_file_handler(call_frame_t *frame,
+-                                            xlator_t *this) {
+-  shard_local_t *local = NULL;
+-
+-  local = frame->local;
++    if (local->call_count) {
++        shard_common_lookup_shards(frame, this, local->resolver_base_inode,
++                                   shard_post_lookup_shards_readv_handler);
++    } else {
++        shard_readv_do(frame, this);
++    }
+ 
+-  if (local->op_ret < 0) {
+-    shard_common_failure_unwind(local->fop, frame, local->op_ret,
+-                                local->op_errno);
+     return 0;
+-  }
+-
+-  /* Save dst base file attributes into postbuf so the information is not
+-   * lost when it is overwritten after lookup on base file of src in
+-   * shard_lookup_base_file_cbk().
+-   */
+-  local->postbuf = local->prebuf;
+-  shard_rename_src_base_file(frame, this);
+-  return 0;
+-}
+-
+-int shard_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc,
+-                 loc_t *newloc, dict_t *xdata) {
+-  int ret = -1;
+-  uint64_t block_size = 0;
+-  uint64_t dst_block_size = 0;
+-  shard_local_t *local = NULL;
+-
+-  if (IA_ISDIR(oldloc->inode->ia_type)) {
+-    STACK_WIND(frame, default_rename_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
+-    return 0;
+-  }
+-
+-  ret = shard_inode_ctx_get_block_size(oldloc->inode, this, &block_size);
+-  if ((ret) && (!IA_ISLNK(oldloc->inode->ia_type))) {
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+-           "Failed to get block "
+-           "size from inode ctx of %s",
+-           uuid_utoa(oldloc->inode->gfid));
+-    goto err;
+-  }
+-
+-  if (newloc->inode)
+-    ret = shard_inode_ctx_get_block_size(newloc->inode, this, &dst_block_size);
+-
+-  /* The following stack_wind covers the case where:
+-   * a. the src file is not sharded and dst doesn't exist, OR
+-   * b. the src and dst both exist but are not sharded.
+-   */
+-  if (((!block_size) && (!dst_block_size)) ||
+-      frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+-    STACK_WIND(frame, default_rename_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
+-    return 0;
+-  }
+-
+-  local = mem_get0(this->local_pool);
+-  if (!local)
+-    goto err;
+-
+-  frame->local = local;
+-  loc_copy(&local->loc, oldloc);
+-  loc_copy(&local->loc2, newloc);
+-  local->resolver_base_inode = newloc->inode;
+-  local->fop = GF_FOP_RENAME;
+-  local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+-  if (!local->xattr_req)
+-    goto err;
+-
+-  local->block_size = block_size;
+-  local->dst_block_size = dst_block_size;
+-  if (!this->itable)
+-    this->itable = (local->loc.inode)->table;
+-  local->resolve_not = _gf_true;
+-
+-  /* The following if-block covers the case where the dst file exists
+-   * and is sharded.
+-   */
+-  if (local->dst_block_size) {
+-    shard_begin_rm_resolution(frame, this);
+-  } else {
+-    /* The following block covers the case where the dst either doesn't
+-     * exist or is NOT sharded but the src is sharded. In this case, shard
+-     * xlator would go ahead and rename src to dst. Once done, it would also
+-     * lookup the base shard of src to get the ia_size and ia_blocks xattr
+-     * values.
+-     */
+-    shard_rename_src_base_file(frame, this);
+-  }
+-  return 0;
+-
+-err:
+-  shard_common_failure_unwind(GF_FOP_RENAME, frame, -1, ENOMEM);
+-  return 0;
+ }
+ 
+-int shard_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                     int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+-                     struct iatt *stbuf, struct iatt *preparent,
+-                     struct iatt *postparent, dict_t *xdata) {
+-  int ret = -1;
+-  shard_local_t *local = NULL;
++int
++shard_post_lookup_readv_handler(call_frame_t *frame, xlator_t *this)
++{
++    int ret = 0;
++    struct iobuf *iobuf = NULL;
++    shard_local_t *local = NULL;
++    shard_priv_t *priv = NULL;
+ 
+-  local = frame->local;
++    priv = this->private;
++    local = frame->local;
+ 
+-  if (op_ret == -1)
+-    goto unwind;
++    if (local->op_ret < 0) {
++        shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret,
++                                    local->op_errno);
++        return 0;
++    }
+ 
+-  ret = shard_inode_ctx_set(inode, this, stbuf, local->block_size,
+-                            SHARD_ALL_MASK);
+-  if (ret)
+-    gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INODE_CTX_SET_FAILED,
+-           "Failed to set inode "
+-           "ctx for %s",
+-           uuid_utoa(inode->gfid));
++    if (local->offset >= local->prebuf.ia_size) {
++        /* If the read is being performed past the end of the file,
++         * unwind the FOP with 0 bytes read as status.
++         */
++        struct iovec vec = {
++            0,
++        };
+ 
+-unwind:
+-  SHARD_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, stbuf,
+-                     preparent, postparent, xdata);
+-  return 0;
+-}
++        iobuf = iobuf_get2(this->ctx->iobuf_pool, local->req_size);
++        if (!iobuf)
++            goto err;
+ 
+-int shard_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+-                 mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) {
+-  shard_priv_t *priv = NULL;
+-  shard_local_t *local = NULL;
++        vec.iov_base = iobuf->ptr;
++        vec.iov_len = 0;
++        local->iobref = iobref_new();
++        iobref_add(local->iobref, iobuf);
++        iobuf_unref(iobuf);
+ 
+-  priv = this->private;
+-  local = mem_get0(this->local_pool);
+-  if (!local)
+-    goto err;
++        SHARD_STACK_UNWIND(readv, frame, 0, 0, &vec, 1, &local->prebuf,
++                           local->iobref, NULL);
++        return 0;
++    }
+ 
+-  frame->local = local;
+-  local->block_size = priv->block_size;
++    local->first_block = get_lowest_block(local->offset, local->block_size);
+ 
+-  if (!__is_gsyncd_on_shard_dir(frame, loc)) {
+-    SHARD_INODE_CREATE_INIT(this, local->block_size, xdata, loc, 0, 0, err);
+-  }
++    local->total_size = local->req_size;
+ 
+-  STACK_WIND(frame, shard_create_cbk, FIRST_CHILD(this),
+-             FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
+-             xdata);
+-  return 0;
+-err:
+-  shard_common_failure_unwind(GF_FOP_CREATE, frame, -1, ENOMEM);
+-  return 0;
+-}
+-
+-int shard_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                   int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) {
+-  /* To-Do: Handle open with O_TRUNC under locks */
+-  SHARD_STACK_UNWIND(open, frame, op_ret, op_errno, fd, xdata);
+-  return 0;
+-}
+-
+-int shard_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+-               fd_t *fd, dict_t *xdata) {
+-  STACK_WIND(frame, shard_open_cbk, FIRST_CHILD(this),
+-             FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
+-  return 0;
+-}
+-
+-int shard_readv_do_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                       int32_t op_ret, int32_t op_errno, struct iovec *vector,
+-                       int32_t count, struct iatt *stbuf, struct iobref *iobref,
+-                       dict_t *xdata) {
+-  int i = 0;
+-  int call_count = 0;
+-  void *address = NULL;
+-  uint64_t block_num = 0;
+-  off_t off = 0;
+-  struct iovec vec = {
+-      0,
+-  };
+-  shard_local_t *local = NULL;
+-  fd_t *anon_fd = cookie;
+-  shard_inode_ctx_t *ctx = NULL;
+-
+-  local = frame->local;
+-
+-  /* If shard has already seen a failure here before, there is no point
+-   * in aggregating subsequent reads, so just go to out.
+-   */
+-  if (local->op_ret < 0)
+-    goto out;
+-
+-  if (op_ret < 0) {
+-    local->op_ret = op_ret;
+-    local->op_errno = op_errno;
+-    goto out;
+-  }
++    local->last_block = get_highest_block(local->offset, local->total_size,
++                                          local->block_size);
+ 
+-  if (local->op_ret >= 0)
+-    local->op_ret += op_ret;
++    local->num_blocks = local->last_block - local->first_block + 1;
++    GF_ASSERT(local->num_blocks > 0);
++    local->resolver_base_inode = local->loc.inode;
+ 
+-  shard_inode_ctx_get(anon_fd->inode, this, &ctx);
+-  block_num = ctx->block_num;
+-
+-  if (block_num == local->first_block) {
+-    address = local->iobuf->ptr;
+-  } else {
+-    /* else
+-     * address to start writing to = beginning of buffer +
+-     *                    number of bytes until end of first block +
+-     *                    + block_size times number of blocks
+-     *                    between the current block and the first
+-     */
+-    address = (char *)local->iobuf->ptr +
+-              (local->block_size - (local->offset % local->block_size)) +
+-              ((block_num - local->first_block - 1) * local->block_size);
+-  }
++    local->inode_list = GF_CALLOC(local->num_blocks, sizeof(inode_t *),
++                                  gf_shard_mt_inode_list);
++    if (!local->inode_list)
++        goto err;
+ 
+-  for (i = 0; i < count; i++) {
+-    address = (char *)address + off;
+-    memcpy(address, vector[i].iov_base, vector[i].iov_len);
+-    off += vector[i].iov_len;
+-  }
++    iobuf = iobuf_get2(this->ctx->iobuf_pool, local->total_size);
++    if (!iobuf)
++        goto err;
+ 
+-out:
+-  if (anon_fd)
+-    fd_unref(anon_fd);
+-  call_count = shard_call_count_return(frame);
+-  if (call_count == 0) {
+-    SHARD_UNSET_ROOT_FS_ID(frame, local);
+-    if (local->op_ret < 0) {
+-      shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret,
+-                                  local->op_errno);
+-    } else {
+-      if (xdata)
+-        local->xattr_rsp = dict_ref(xdata);
+-      vec.iov_base = local->iobuf->ptr;
+-      if (local->offset + local->req_size > local->prebuf.ia_size)
+-          local->total_size = local->prebuf.ia_size - local->offset;
+-      vec.iov_len = local->total_size;
+-      local->op_ret = local->total_size;
+-      SHARD_STACK_UNWIND(readv, frame, local->op_ret, local->op_errno, &vec, 1,
+-                         &local->prebuf, local->iobref, local->xattr_rsp);
+-      return 0;
+-    }
+-  }
+-
+-  return 0;
+-}
+-
+-int shard_readv_do(call_frame_t *frame, xlator_t *this) {
+-  int i = 0;
+-  int call_count = 0;
+-  int last_block = 0;
+-  int cur_block = 0;
+-  off_t orig_offset = 0;
+-  off_t shard_offset = 0;
+-  size_t read_size = 0;
+-  size_t remaining_size = 0;
+-  fd_t *fd = NULL;
+-  fd_t *anon_fd = NULL;
+-  shard_local_t *local = NULL;
+-  gf_boolean_t wind_failed = _gf_false;
+-
+-  local = frame->local;
+-  fd = local->fd;
+-
+-  orig_offset = local->offset;
+-  cur_block = local->first_block;
+-  last_block = local->last_block;
+-  remaining_size = local->total_size;
+-  local->call_count = call_count = local->num_blocks;
+-
+-  SHARD_SET_ROOT_FS_ID(frame, local);
+-
+-  if (fd->flags & O_DIRECT)
+-    local->flags = O_DIRECT;
+-
+-  while (cur_block <= last_block) {
+-    if (wind_failed) {
+-      shard_readv_do_cbk(frame, (void *)(long)0, this, -1, ENOMEM, NULL, 0,
+-                         NULL, NULL, NULL);
+-      goto next;
+-    }
+-
+-    shard_offset = orig_offset % local->block_size;
+-    read_size = local->block_size - shard_offset;
+-    if (read_size > remaining_size)
+-      read_size = remaining_size;
+-
+-    remaining_size -= read_size;
+-
+-    if (cur_block == 0) {
+-      anon_fd = fd_ref(fd);
+-    } else {
+-      anon_fd = fd_anonymous(local->inode_list[i]);
+-      if (!anon_fd) {
+-        local->op_ret = -1;
+-        local->op_errno = ENOMEM;
+-        wind_failed = _gf_true;
+-        shard_readv_do_cbk(frame, (void *)(long)anon_fd, this, -1, ENOMEM, NULL,
+-                           0, NULL, NULL, NULL);
+-        goto next;
+-      }
++    local->iobref = iobref_new();
++    if (!local->iobref) {
++        iobuf_unref(iobuf);
++        goto err;
+     }
+ 
+-    STACK_WIND_COOKIE(frame, shard_readv_do_cbk, anon_fd, FIRST_CHILD(this),
+-                      FIRST_CHILD(this)->fops->readv, anon_fd, read_size,
+-                      shard_offset, local->flags, local->xattr_req);
++    if (iobref_add(local->iobref, iobuf) != 0) {
++        iobuf_unref(iobuf);
++        goto err;
++    }
+ 
+-    orig_offset += read_size;
+-  next:
+-    cur_block++;
+-    i++;
+-    call_count--;
+-  }
+-  return 0;
+-}
++    memset(iobuf->ptr, 0, local->total_size);
++    iobuf_unref(iobuf);
++    local->iobuf = iobuf;
+ 
+-int shard_common_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                           int32_t op_ret, int32_t op_errno, inode_t *inode,
+-                           struct iatt *buf, struct iatt *preparent,
+-                           struct iatt *postparent, dict_t *xdata) {
+-  int shard_block_num = (long)cookie;
+-  int call_count = 0;
+-  shard_local_t *local = NULL;
+-
+-  local = frame->local;
+-
+-  if (op_ret < 0) {
+-    if (op_errno == EEXIST) {
+-      LOCK(&frame->lock);
+-      { local->eexist_count++; }
+-      UNLOCK(&frame->lock);
++    local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid);
++    if (!local->dot_shard_loc.inode) {
++        ret = shard_init_internal_dir_loc(this, local,
++                                          SHARD_INTERNAL_DIR_DOT_SHARD);
++        if (ret)
++            goto err;
++        shard_lookup_internal_dir(frame, this, shard_post_resolve_readv_handler,
++                                  SHARD_INTERNAL_DIR_DOT_SHARD);
+     } else {
+-      local->op_ret = op_ret;
+-      local->op_errno = op_errno;
++        local->post_res_handler = shard_post_resolve_readv_handler;
++        shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD);
+     }
+-    gf_msg_debug(this->name, 0, "mknod of shard %d "
+-                                "failed: %s",
+-                 shard_block_num, strerror(op_errno));
+-    goto done;
+-  }
+-
+-  shard_link_block_inode(local, shard_block_num, inode, buf);
++    return 0;
++err:
++    shard_common_failure_unwind(GF_FOP_READ, frame, -1, ENOMEM);
++    return 0;
++}
+ 
+-done:
+-  call_count = shard_call_count_return(frame);
+-  if (call_count == 0) {
+-    SHARD_UNSET_ROOT_FS_ID(frame, local);
+-    local->create_count = 0;
+-    local->post_mknod_handler(frame, this);
+-  }
+-
+-  return 0;
+-}
+-
+-int shard_common_resume_mknod(
+-    call_frame_t *frame, xlator_t *this,
+-    shard_post_mknod_fop_handler_t post_mknod_handler) {
+-  int i = 0;
+-  int shard_idx_iter = 0;
+-  int last_block = 0;
+-  int ret = 0;
+-  int call_count = 0;
+-  char path[PATH_MAX] = {
+-      0,
+-  };
+-  mode_t mode = 0;
+-  char *bname = NULL;
+-  shard_priv_t *priv = NULL;
+-  shard_inode_ctx_t ctx_tmp = {
+-      0,
+-  };
+-  shard_local_t *local = NULL;
+-  gf_boolean_t wind_failed = _gf_false;
+-  fd_t *fd = NULL;
+-  loc_t loc = {
+-      0,
+-  };
+-  dict_t *xattr_req = NULL;
+-
+-  local = frame->local;
+-  priv = this->private;
+-  fd = local->fd;
+-  shard_idx_iter = local->first_block;
+-  last_block = local->last_block;
+-  call_count = local->call_count = local->create_count;
+-  local->post_mknod_handler = post_mknod_handler;
+-
+-  SHARD_SET_ROOT_FS_ID(frame, local);
+-
+-  ret = shard_inode_ctx_get_all(fd->inode, this, &ctx_tmp);
+-  if (ret) {
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+-           "Failed to get inode "
+-           "ctx for %s",
+-           uuid_utoa(fd->inode->gfid));
+-    local->op_ret = -1;
+-    local->op_errno = ENOMEM;
+-    goto err;
+-  }
+-  mode = st_mode_from_ia(ctx_tmp.stat.ia_prot, ctx_tmp.stat.ia_type);
++int
++shard_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
++            off_t offset, uint32_t flags, dict_t *xdata)
++{
++    int ret = 0;
++    uint64_t block_size = 0;
++    shard_local_t *local = NULL;
+ 
+-  while (shard_idx_iter <= last_block) {
+-    if (local->inode_list[i]) {
+-      shard_idx_iter++;
+-      i++;
+-      continue;
++    ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size);
++    if (ret) {
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
++               "Failed to get block "
++               "size for %s from its inode ctx",
++               uuid_utoa(fd->inode->gfid));
++        goto err;
+     }
+ 
+-    if (wind_failed) {
+-      shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this, -1,
+-                             ENOMEM, NULL, NULL, NULL, NULL, NULL);
+-      goto next;
++    if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
++        /* block_size = 0 means that the file was created before
++         * sharding was enabled on the volume.
++         */
++        STACK_WIND(frame, default_readv_cbk, FIRST_CHILD(this),
++                   FIRST_CHILD(this)->fops->readv, fd, size, offset, flags,
++                   xdata);
++        return 0;
+     }
+ 
+-    shard_make_block_abspath(shard_idx_iter, fd->inode->gfid, path,
+-                             sizeof(path));
+-
+-    xattr_req = shard_create_gfid_dict(local->xattr_req);
+-    if (!xattr_req) {
+-      local->op_ret = -1;
+-      local->op_errno = ENOMEM;
+-      wind_failed = _gf_true;
+-      shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this, -1,
+-                             ENOMEM, NULL, NULL, NULL, NULL, NULL);
+-      goto next;
+-    }
++    if (!this->itable)
++        this->itable = fd->inode->table;
+ 
+-    bname = strrchr(path, '/') + 1;
+-    loc.inode = inode_new(this->itable);
+-    loc.parent = inode_ref(priv->dot_shard_inode);
+-    ret = inode_path(loc.parent, bname, (char **)&(loc.path));
+-    if (ret < 0 || !(loc.inode)) {
+-      gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+-             "Inode path failed"
+-             "on %s, base file gfid = %s",
+-             bname, uuid_utoa(fd->inode->gfid));
+-      local->op_ret = -1;
+-      local->op_errno = ENOMEM;
+-      wind_failed = _gf_true;
+-      loc_wipe(&loc);
+-      dict_unref(xattr_req);
+-      shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this, -1,
+-                             ENOMEM, NULL, NULL, NULL, NULL, NULL);
+-      goto next;
+-    }
++    local = mem_get0(this->local_pool);
++    if (!local)
++        goto err;
+ 
+-    loc.name = strrchr(loc.path, '/');
+-    if (loc.name)
+-      loc.name++;
++    frame->local = local;
+ 
+-    STACK_WIND_COOKIE(frame, shard_common_mknod_cbk,
+-                      (void *)(long)shard_idx_iter, FIRST_CHILD(this),
+-                      FIRST_CHILD(this)->fops->mknod, &loc, mode,
+-                      ctx_tmp.stat.ia_rdev, 0, xattr_req);
+-    loc_wipe(&loc);
+-    dict_unref(xattr_req);
++    ret = syncbarrier_init(&local->barrier);
++    if (ret)
++        goto err;
++    local->fd = fd_ref(fd);
++    local->block_size = block_size;
++    local->offset = offset;
++    local->req_size = size;
++    local->flags = flags;
++    local->fop = GF_FOP_READ;
++    local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
++    if (!local->xattr_req)
++        goto err;
+ 
+-  next:
+-    shard_idx_iter++;
+-    i++;
+-    if (!--call_count)
+-      break;
+-  }
++    local->loc.inode = inode_ref(fd->inode);
++    gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
+ 
+-  return 0;
++    shard_lookup_base_file(frame, this, &local->loc,
++                           shard_post_lookup_readv_handler);
++    return 0;
+ err:
+-  /*
+-   * This block is for handling failure in shard_inode_ctx_get_all().
+-   * Failures in the while-loop are handled within the loop.
+-   */
+-  SHARD_UNSET_ROOT_FS_ID(frame, local);
+-  post_mknod_handler(frame, this);
+-  return 0;
++    shard_common_failure_unwind(GF_FOP_READ, frame, -1, ENOMEM);
++    return 0;
+ }
+ 
+-int shard_post_mknod_readv_handler(call_frame_t *frame, xlator_t *this);
+-
+-int shard_post_lookup_shards_readv_handler(call_frame_t *frame,
+-                                           xlator_t *this) {
+-  shard_local_t *local = NULL;
++int
++shard_common_inode_write_post_update_size_handler(call_frame_t *frame,
++                                                  xlator_t *this)
++{
++    shard_local_t *local = NULL;
+ 
+-  local = frame->local;
++    local = frame->local;
+ 
+-  if (local->op_ret < 0) {
+-    shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret,
+-                                local->op_errno);
++    if (local->op_ret < 0) {
++        shard_common_failure_unwind(local->fop, frame, local->op_ret,
++                                    local->op_errno);
++    } else {
++        shard_common_inode_write_success_unwind(local->fop, frame,
++                                                local->written_size);
++    }
+     return 0;
+-  }
+-
+-  if (local->create_count) {
+-    shard_common_resume_mknod(frame, this, shard_post_mknod_readv_handler);
+-  } else {
+-    shard_readv_do(frame, this);
+-  }
+-
+-  return 0;
+ }
+ 
+-int shard_post_mknod_readv_handler(call_frame_t *frame, xlator_t *this) {
+-  shard_local_t *local = NULL;
++static gf_boolean_t
++shard_is_appending_write(shard_local_t *local)
++{
++    if (local->fop != GF_FOP_WRITE)
++        return _gf_false;
++    if (local->flags & O_APPEND)
++        return _gf_true;
++    if (local->fd->flags & O_APPEND)
++        return _gf_true;
++    return _gf_false;
++}
+ 
+-  local = frame->local;
++int
++__shard_get_delta_size_from_inode_ctx(shard_local_t *local, inode_t *inode,
++                                      xlator_t *this)
++{
++    int ret = -1;
++    uint64_t ctx_uint = 0;
++    shard_inode_ctx_t *ctx = NULL;
+ 
+-  if (local->op_ret < 0) {
+-    shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret,
+-                                local->op_errno);
+-    return 0;
+-  }
++    ret = __inode_ctx_get(inode, this, &ctx_uint);
++    if (ret < 0)
++        return ret;
+ 
+-  if (!local->eexist_count) {
+-    shard_readv_do(frame, this);
+-  } else {
+-    local->call_count = local->eexist_count;
+-    shard_common_lookup_shards(frame, this, local->loc.inode,
+-                               shard_post_lookup_shards_readv_handler);
+-  }
+-  return 0;
+-}
++    ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
+ 
+-int shard_post_resolve_readv_handler(call_frame_t *frame, xlator_t *this) {
+-  shard_local_t *local = NULL;
++    if (shard_is_appending_write(local)) {
++        local->delta_size = local->total_size;
++    } else if (local->offset + local->total_size > ctx->stat.ia_size) {
++        local->delta_size = (local->offset + local->total_size) -
++                            ctx->stat.ia_size;
++    } else {
++        local->delta_size = 0;
++    }
++    ctx->stat.ia_size += (local->delta_size);
++    local->postbuf = ctx->stat;
+ 
+-  local = frame->local;
++    return 0;
++}
+ 
+-  if (local->op_ret < 0) {
+-    if (local->op_errno != ENOENT) {
+-      shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret,
+-                                  local->op_errno);
+-      return 0;
+-    } else {
+-      struct iovec vec = {
+-          0,
+-      };
++int
++shard_get_delta_size_from_inode_ctx(shard_local_t *local, inode_t *inode,
++                                    xlator_t *this)
++{
++    int ret = -1;
+ 
+-      vec.iov_base = local->iobuf->ptr;
+-      vec.iov_len = local->total_size;
+-      local->op_ret = local->total_size;
+-      SHARD_STACK_UNWIND(readv, frame, local->op_ret, 0, &vec, 1,
+-                         &local->prebuf, local->iobref, NULL);
+-      return 0;
++    LOCK(&inode->lock);
++    {
++        ret = __shard_get_delta_size_from_inode_ctx(local, inode, this);
+     }
+-  }
++    UNLOCK(&inode->lock);
+ 
+-  if (local->call_count) {
+-    shard_common_lookup_shards(frame, this, local->resolver_base_inode,
+-                               shard_post_lookup_shards_readv_handler);
+-  } else {
+-    shard_readv_do(frame, this);
+-  }
+-
+-  return 0;
++    return ret;
+ }
+ 
+-int shard_post_lookup_readv_handler(call_frame_t *frame, xlator_t *this) {
+-  int ret = 0;
+-  struct iobuf *iobuf = NULL;
+-  shard_local_t *local = NULL;
+-  shard_priv_t *priv = NULL;
+-
+-  priv = this->private;
+-  local = frame->local;
++int
++shard_common_inode_write_do_cbk(call_frame_t *frame, void *cookie,
++                                xlator_t *this, int32_t op_ret,
++                                int32_t op_errno, struct iatt *pre,
++                                struct iatt *post, dict_t *xdata)
++{
++    int call_count = 0;
++    fd_t *anon_fd = cookie;
++    shard_local_t *local = NULL;
++    glusterfs_fop_t fop = 0;
+ 
+-  if (local->op_ret < 0) {
+-    shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret,
+-                                local->op_errno);
+-    return 0;
+-  }
++    local = frame->local;
++    fop = local->fop;
+ 
+-  if (local->offset >= local->prebuf.ia_size) {
+-    /* If the read is being performed past the end of the file,
+-     * unwind the FOP with 0 bytes read as status.
+-     */
+-    struct iovec vec = {
+-        0,
+-    };
++    LOCK(&frame->lock);
++    {
++        if (op_ret < 0) {
++            local->op_ret = op_ret;
++            local->op_errno = op_errno;
++        } else {
++            local->written_size += op_ret;
++            GF_ATOMIC_ADD(local->delta_blocks,
++                          post->ia_blocks - pre->ia_blocks);
++            local->delta_size += (post->ia_size - pre->ia_size);
++            shard_inode_ctx_set(local->fd->inode, this, post, 0,
++                                SHARD_MASK_TIMES);
++            if (local->fd->inode != anon_fd->inode)
++                shard_inode_ctx_add_to_fsync_list(local->fd->inode, this,
++                                                  anon_fd->inode);
++        }
++    }
++    UNLOCK(&frame->lock);
+ 
+-    iobuf = iobuf_get2(this->ctx->iobuf_pool, local->req_size);
+-    if (!iobuf)
+-      goto err;
++    if (anon_fd)
++        fd_unref(anon_fd);
+ 
+-    vec.iov_base = iobuf->ptr;
+-    vec.iov_len = 0;
+-    local->iobref = iobref_new();
+-    iobref_add(local->iobref, iobuf);
+-    iobuf_unref(iobuf);
++    call_count = shard_call_count_return(frame);
++    if (call_count == 0) {
++        SHARD_UNSET_ROOT_FS_ID(frame, local);
++        if (local->op_ret < 0) {
++            shard_common_failure_unwind(fop, frame, local->op_ret,
++                                        local->op_errno);
++        } else {
++            shard_get_delta_size_from_inode_ctx(local, local->fd->inode, this);
++            local->hole_size = 0;
++            if (xdata)
++                local->xattr_rsp = dict_ref(xdata);
++            shard_update_file_size(
++                frame, this, local->fd, NULL,
++                shard_common_inode_write_post_update_size_handler);
++        }
++    }
+ 
+-    SHARD_STACK_UNWIND(readv, frame, 0, 0, &vec, 1, &local->prebuf,
+-                       local->iobref, NULL);
+     return 0;
+-  }
++}
+ 
+-  local->first_block = get_lowest_block(local->offset, local->block_size);
++int
++shard_common_inode_write_wind(call_frame_t *frame, xlator_t *this, fd_t *fd,
++                              struct iovec *vec, int count, off_t shard_offset,
++                              size_t size)
++{
++    shard_local_t *local = NULL;
+ 
+-  local->total_size = local->req_size;
++    local = frame->local;
+ 
+-  local->last_block =
+-      get_highest_block(local->offset, local->total_size, local->block_size);
++    switch (local->fop) {
++        case GF_FOP_WRITE:
++            STACK_WIND_COOKIE(
++                frame, shard_common_inode_write_do_cbk, fd, FIRST_CHILD(this),
++                FIRST_CHILD(this)->fops->writev, fd, vec, count, shard_offset,
++                local->flags, local->iobref, local->xattr_req);
++            break;
++        case GF_FOP_FALLOCATE:
++            STACK_WIND_COOKIE(
++                frame, shard_common_inode_write_do_cbk, fd, FIRST_CHILD(this),
++                FIRST_CHILD(this)->fops->fallocate, fd, local->flags,
++                shard_offset, size, local->xattr_req);
++            break;
++        case GF_FOP_ZEROFILL:
++            STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd,
++                              FIRST_CHILD(this),
++                              FIRST_CHILD(this)->fops->zerofill, fd,
++                              shard_offset, size, local->xattr_req);
++            break;
++        case GF_FOP_DISCARD:
++            STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd,
++                              FIRST_CHILD(this),
++                              FIRST_CHILD(this)->fops->discard, fd,
++                              shard_offset, size, local->xattr_req);
++            break;
++        default:
++            gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
++                   "Invalid fop id = %d", local->fop);
++            break;
++    }
++    return 0;
++}
+ 
+-  local->num_blocks = local->last_block - local->first_block + 1;
+-  GF_ASSERT(local->num_blocks > 0);
+-  local->resolver_base_inode = local->loc.inode;
++int
++shard_common_inode_write_do(call_frame_t *frame, xlator_t *this)
++{
++    int i = 0;
++    int count = 0;
++    int call_count = 0;
++    int last_block = 0;
++    uint32_t cur_block = 0;
++    fd_t *fd = NULL;
++    fd_t *anon_fd = NULL;
++    shard_local_t *local = NULL;
++    struct iovec *vec = NULL;
++    gf_boolean_t wind_failed = _gf_false;
++    gf_boolean_t odirect = _gf_false;
++    off_t orig_offset = 0;
++    off_t shard_offset = 0;
++    off_t vec_offset = 0;
++    size_t remaining_size = 0;
++    size_t shard_write_size = 0;
+ 
+-  local->inode_list =
+-      GF_CALLOC(local->num_blocks, sizeof(inode_t *), gf_shard_mt_inode_list);
+-  if (!local->inode_list)
+-    goto err;
++    local = frame->local;
++    fd = local->fd;
++
++    orig_offset = local->offset;
++    remaining_size = local->total_size;
++    cur_block = local->first_block;
++    local->call_count = call_count = local->num_blocks;
++    last_block = local->last_block;
++
++    SHARD_SET_ROOT_FS_ID(frame, local);
++
++    if (dict_set_uint32(local->xattr_req, GLUSTERFS_WRITE_UPDATE_ATOMIC, 4)) {
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
++               "Failed to set " GLUSTERFS_WRITE_UPDATE_ATOMIC
++               " into "
++               "dict: %s",
++               uuid_utoa(fd->inode->gfid));
++        local->op_ret = -1;
++        local->op_errno = ENOMEM;
++        local->call_count = 1;
++        shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, -1,
++                                        ENOMEM, NULL, NULL, NULL);
++        return 0;
++    }
+ 
+-  iobuf = iobuf_get2(this->ctx->iobuf_pool, local->total_size);
+-  if (!iobuf)
+-    goto err;
++    if ((fd->flags & O_DIRECT) && (local->fop == GF_FOP_WRITE))
++        odirect = _gf_true;
+ 
+-  local->iobref = iobref_new();
+-  if (!local->iobref) {
+-    iobuf_unref(iobuf);
+-    goto err;
+-  }
++    while (cur_block <= last_block) {
++        if (wind_failed) {
++            shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, -1,
++                                            ENOMEM, NULL, NULL, NULL);
++            goto next;
++        }
+ 
+-  if (iobref_add(local->iobref, iobuf) != 0) {
+-    iobuf_unref(iobuf);
+-    goto err;
+-  }
++        shard_offset = orig_offset % local->block_size;
++        shard_write_size = local->block_size - shard_offset;
++        if (shard_write_size > remaining_size)
++            shard_write_size = remaining_size;
++
++        remaining_size -= shard_write_size;
++
++        if (local->fop == GF_FOP_WRITE) {
++            count = iov_subset(local->vector, local->count, vec_offset,
++                               vec_offset + shard_write_size, NULL);
++
++            vec = GF_CALLOC(count, sizeof(struct iovec), gf_shard_mt_iovec);
++            if (!vec) {
++                local->op_ret = -1;
++                local->op_errno = ENOMEM;
++                wind_failed = _gf_true;
++                GF_FREE(vec);
++                shard_common_inode_write_do_cbk(frame, (void *)(long)0, this,
++                                                -1, ENOMEM, NULL, NULL, NULL);
++                goto next;
++            }
++            count = iov_subset(local->vector, local->count, vec_offset,
++                               vec_offset + shard_write_size, vec);
++        }
+ 
+-  memset(iobuf->ptr, 0, local->total_size);
+-  iobuf_unref(iobuf);
+-  local->iobuf = iobuf;
++        if (cur_block == 0) {
++            anon_fd = fd_ref(fd);
++        } else {
++            anon_fd = fd_anonymous(local->inode_list[i]);
++            if (!anon_fd) {
++                local->op_ret = -1;
++                local->op_errno = ENOMEM;
++                wind_failed = _gf_true;
++                GF_FREE(vec);
++                shard_common_inode_write_do_cbk(frame, (void *)(long)anon_fd,
++                                                this, -1, ENOMEM, NULL, NULL,
++                                                NULL);
++                goto next;
++            }
++
++            if (local->fop == GF_FOP_WRITE) {
++                if (odirect)
++                    local->flags = O_DIRECT;
++                else
++                    local->flags = GF_ANON_FD_FLAGS;
++            }
++        }
+ 
+-  local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid);
+-  if (!local->dot_shard_loc.inode) {
+-    ret =
+-        shard_init_internal_dir_loc(this, local, SHARD_INTERNAL_DIR_DOT_SHARD);
+-    if (ret)
+-      goto err;
+-    shard_lookup_internal_dir(frame, this, shard_post_resolve_readv_handler,
+-                              SHARD_INTERNAL_DIR_DOT_SHARD);
+-  } else {
+-    local->post_res_handler = shard_post_resolve_readv_handler;
+-    shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD);
+-  }
+-  return 0;
+-err:
+-  shard_common_failure_unwind(GF_FOP_READ, frame, -1, ENOMEM);
+-  return 0;
+-}
+-
+-int shard_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+-                off_t offset, uint32_t flags, dict_t *xdata) {
+-  int ret = 0;
+-  uint64_t block_size = 0;
+-  shard_local_t *local = NULL;
+-
+-  ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size);
+-  if (ret) {
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+-           "Failed to get block "
+-           "size for %s from its inode ctx",
+-           uuid_utoa(fd->inode->gfid));
+-    goto err;
+-  }
+-
+-  if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+-    /* block_size = 0 means that the file was created before
+-     * sharding was enabled on the volume.
+-     */
+-    STACK_WIND(frame, default_readv_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata);
+-    return 0;
+-  }
+-
+-  if (!this->itable)
+-    this->itable = fd->inode->table;
+-
+-  local = mem_get0(this->local_pool);
+-  if (!local)
+-    goto err;
+-
+-  frame->local = local;
+-
+-  ret = syncbarrier_init(&local->barrier);
+-  if (ret)
+-    goto err;
+-  local->fd = fd_ref(fd);
+-  local->block_size = block_size;
+-  local->offset = offset;
+-  local->req_size = size;
+-  local->flags = flags;
+-  local->fop = GF_FOP_READ;
+-  local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+-  if (!local->xattr_req)
+-    goto err;
+-
+-  local->loc.inode = inode_ref(fd->inode);
+-  gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
+-
+-  shard_lookup_base_file(frame, this, &local->loc,
+-                         shard_post_lookup_readv_handler);
+-  return 0;
+-err:
+-  shard_common_failure_unwind(GF_FOP_READ, frame, -1, ENOMEM);
+-  return 0;
++        shard_common_inode_write_wind(frame, this, anon_fd, vec, count,
++                                      shard_offset, shard_write_size);
++        if (vec)
++            vec_offset += shard_write_size;
++        orig_offset += shard_write_size;
++        GF_FREE(vec);
++        vec = NULL;
++    next:
++        cur_block++;
++        i++;
++        call_count--;
++    }
++    return 0;
+ }
+ 
+-int shard_common_inode_write_post_update_size_handler(call_frame_t *frame,
+-                                                      xlator_t *this) {
+-  shard_local_t *local = NULL;
+-
+-  local = frame->local;
++int
++shard_common_inode_write_post_mknod_handler(call_frame_t *frame,
++                                            xlator_t *this);
+ 
+-  if (local->op_ret < 0) {
+-    shard_common_failure_unwind(local->fop, frame, local->op_ret,
+-                                local->op_errno);
+-  } else {
+-    shard_common_inode_write_success_unwind(local->fop, frame,
+-                                            local->written_size);
+-  }
+-  return 0;
+-}
++int
++shard_common_inode_write_post_lookup_shards_handler(call_frame_t *frame,
++                                                    xlator_t *this)
++{
++    shard_local_t *local = NULL;
+ 
+-static gf_boolean_t shard_is_appending_write(shard_local_t *local) {
+-  if (local->fop != GF_FOP_WRITE)
+-    return _gf_false;
+-  if (local->flags & O_APPEND)
+-    return _gf_true;
+-  if (local->fd->flags & O_APPEND)
+-    return _gf_true;
+-  return _gf_false;
+-}
++    local = frame->local;
+ 
+-int __shard_get_delta_size_from_inode_ctx(shard_local_t *local, inode_t *inode,
+-                                          xlator_t *this) {
+-  int ret = -1;
+-  uint64_t ctx_uint = 0;
+-  shard_inode_ctx_t *ctx = NULL;
++    if (local->op_ret < 0) {
++        shard_common_failure_unwind(local->fop, frame, local->op_ret,
++                                    local->op_errno);
++        return 0;
++    }
+ 
+-  ret = __inode_ctx_get(inode, this, &ctx_uint);
+-  if (ret < 0)
+-    return ret;
++    if (local->create_count) {
++        shard_common_resume_mknod(frame, this,
++                                  shard_common_inode_write_post_mknod_handler);
++    } else {
++        shard_common_inode_write_do(frame, this);
++    }
+ 
+-  ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
++    return 0;
++}
+ 
+-  if (shard_is_appending_write(local)) {
+-    local->delta_size = local->total_size;
+-  } else if (local->offset + local->total_size > ctx->stat.ia_size) {
+-    local->delta_size = (local->offset + local->total_size) - ctx->stat.ia_size;
+-  } else {
+-    local->delta_size = 0;
+-  }
+-  ctx->stat.ia_size += (local->delta_size);
+-  local->postbuf = ctx->stat;
++int
++shard_common_inode_write_post_mknod_handler(call_frame_t *frame, xlator_t *this)
++{
++    shard_local_t *local = NULL;
+ 
+-  return 0;
+-}
++    local = frame->local;
+ 
+-int shard_get_delta_size_from_inode_ctx(shard_local_t *local, inode_t *inode,
+-                                        xlator_t *this) {
+-  int ret = -1;
++    if (local->op_ret < 0) {
++        shard_common_failure_unwind(local->fop, frame, local->op_ret,
++                                    local->op_errno);
++        return 0;
++    }
+ 
+-  LOCK(&inode->lock);
+-  { ret = __shard_get_delta_size_from_inode_ctx(local, inode, this); }
+-  UNLOCK(&inode->lock);
++    if (!local->eexist_count) {
++        shard_common_inode_write_do(frame, this);
++    } else {
++        local->call_count = local->eexist_count;
++        shard_common_lookup_shards(
++            frame, this, local->loc.inode,
++            shard_common_inode_write_post_lookup_shards_handler);
++    }
+ 
+-  return ret;
++    return 0;
+ }
+ 
+-int shard_common_inode_write_do_cbk(call_frame_t *frame, void *cookie,
+-                                    xlator_t *this, int32_t op_ret,
+-                                    int32_t op_errno, struct iatt *pre,
+-                                    struct iatt *post, dict_t *xdata) {
+-  int call_count = 0;
+-  fd_t *anon_fd = cookie;
+-  shard_local_t *local = NULL;
+-  glusterfs_fop_t fop = 0;
++int
++shard_common_inode_write_post_resolve_handler(call_frame_t *frame,
++                                              xlator_t *this)
++{
++    shard_local_t *local = NULL;
+ 
+-  local = frame->local;
+-  fop = local->fop;
++    local = frame->local;
+ 
+-  LOCK(&frame->lock);
+-  {
+-    if (op_ret < 0) {
+-      local->op_ret = op_ret;
+-      local->op_errno = op_errno;
+-    } else {
+-      local->written_size += op_ret;
+-      GF_ATOMIC_ADD(local->delta_blocks, post->ia_blocks - pre->ia_blocks);
+-      local->delta_size += (post->ia_size - pre->ia_size);
+-      shard_inode_ctx_set(local->fd->inode, this, post, 0, SHARD_MASK_TIMES);
+-      if (local->fd->inode != anon_fd->inode)
+-        shard_inode_ctx_add_to_fsync_list(local->fd->inode, this,
+-                                          anon_fd->inode);
+-    }
+-  }
+-  UNLOCK(&frame->lock);
+-
+-  if (anon_fd)
+-    fd_unref(anon_fd);
+-
+-  call_count = shard_call_count_return(frame);
+-  if (call_count == 0) {
+-    SHARD_UNSET_ROOT_FS_ID(frame, local);
+     if (local->op_ret < 0) {
+-      shard_common_failure_unwind(fop, frame, local->op_ret, local->op_errno);
++        shard_common_failure_unwind(local->fop, frame, local->op_ret,
++                                    local->op_errno);
++        return 0;
++    }
++
++    if (local->call_count) {
++        shard_common_lookup_shards(
++            frame, this, local->resolver_base_inode,
++            shard_common_inode_write_post_lookup_shards_handler);
+     } else {
+-      shard_get_delta_size_from_inode_ctx(local, local->fd->inode, this);
+-      local->hole_size = 0;
+-      if (xdata)
+-        local->xattr_rsp = dict_ref(xdata);
+-      shard_update_file_size(frame, this, local->fd, NULL,
+-                             shard_common_inode_write_post_update_size_handler);
++        shard_common_inode_write_do(frame, this);
+     }
+-  }
+ 
+-  return 0;
++    return 0;
+ }
+ 
+-int shard_common_inode_write_wind(call_frame_t *frame, xlator_t *this, fd_t *fd,
+-                                  struct iovec *vec, int count,
+-                                  off_t shard_offset, size_t size) {
+-  shard_local_t *local = NULL;
++int
++shard_common_inode_write_post_lookup_handler(call_frame_t *frame,
++                                             xlator_t *this)
++{
++    shard_local_t *local = frame->local;
++    shard_priv_t *priv = this->private;
++
++    if (local->op_ret < 0) {
++        shard_common_failure_unwind(local->fop, frame, local->op_ret,
++                                    local->op_errno);
++        return 0;
++    }
+ 
+-  local = frame->local;
++    local->postbuf = local->prebuf;
++
++    /*Adjust offset to EOF so that correct shard is chosen for append*/
++    if (shard_is_appending_write(local))
++        local->offset = local->prebuf.ia_size;
++
++    local->first_block = get_lowest_block(local->offset, local->block_size);
++    local->last_block = get_highest_block(local->offset, local->total_size,
++                                          local->block_size);
++    local->num_blocks = local->last_block - local->first_block + 1;
++    GF_ASSERT(local->num_blocks > 0);
++    local->inode_list = GF_CALLOC(local->num_blocks, sizeof(inode_t *),
++                                  gf_shard_mt_inode_list);
++    if (!local->inode_list) {
++        shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
++        return 0;
++    }
+ 
+-  switch (local->fop) {
+-  case GF_FOP_WRITE:
+-    STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd,
+-                      FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev, fd,
+-                      vec, count, shard_offset, local->flags, local->iobref,
+-                      local->xattr_req);
+-    break;
+-  case GF_FOP_FALLOCATE:
+-    STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd,
+-                      FIRST_CHILD(this), FIRST_CHILD(this)->fops->fallocate, fd,
+-                      local->flags, shard_offset, size, local->xattr_req);
+-    break;
+-  case GF_FOP_ZEROFILL:
+-    STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd,
+-                      FIRST_CHILD(this), FIRST_CHILD(this)->fops->zerofill, fd,
+-                      shard_offset, size, local->xattr_req);
+-    break;
+-  case GF_FOP_DISCARD:
+-    STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd,
+-                      FIRST_CHILD(this), FIRST_CHILD(this)->fops->discard, fd,
+-                      shard_offset, size, local->xattr_req);
+-    break;
+-  default:
+-    gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
+-           "Invalid fop id = %d", local->fop);
+-    break;
+-  }
+-  return 0;
+-}
+-
+-int shard_common_inode_write_do(call_frame_t *frame, xlator_t *this) {
+-  int i = 0;
+-  int count = 0;
+-  int call_count = 0;
+-  int last_block = 0;
+-  uint32_t cur_block = 0;
+-  fd_t *fd = NULL;
+-  fd_t *anon_fd = NULL;
+-  shard_local_t *local = NULL;
+-  struct iovec *vec = NULL;
+-  gf_boolean_t wind_failed = _gf_false;
+-  gf_boolean_t odirect = _gf_false;
+-  off_t orig_offset = 0;
+-  off_t shard_offset = 0;
+-  off_t vec_offset = 0;
+-  size_t remaining_size = 0;
+-  size_t shard_write_size = 0;
+-
+-  local = frame->local;
+-  fd = local->fd;
+-
+-  orig_offset = local->offset;
+-  remaining_size = local->total_size;
+-  cur_block = local->first_block;
+-  local->call_count = call_count = local->num_blocks;
+-  last_block = local->last_block;
+-
+-  SHARD_SET_ROOT_FS_ID(frame, local);
+-
+-  if (dict_set_uint32(local->xattr_req, GLUSTERFS_WRITE_UPDATE_ATOMIC, 4)) {
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
+-           "Failed to set " GLUSTERFS_WRITE_UPDATE_ATOMIC " into "
+-           "dict: %s",
+-           uuid_utoa(fd->inode->gfid));
+-    local->op_ret = -1;
+-    local->op_errno = ENOMEM;
+-    local->call_count = 1;
+-    shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, -1, ENOMEM,
+-                                    NULL, NULL, NULL);
+-    return 0;
+-  }
++    gf_msg_trace(this->name, 0,
++                 "%s: gfid=%s first_block=%" PRIu64
++                 " "
++                 "last_block=%" PRIu64 " num_blocks=%" PRIu64 " offset=%" PRId64
++                 " total_size=%zu flags=%" PRId32 "",
++                 gf_fop_list[local->fop],
++                 uuid_utoa(local->resolver_base_inode->gfid),
++                 local->first_block, local->last_block, local->num_blocks,
++                 local->offset, local->total_size, local->flags);
+ 
+-  if ((fd->flags & O_DIRECT) && (local->fop == GF_FOP_WRITE))
+-    odirect = _gf_true;
++    local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid);
+ 
+-  while (cur_block <= last_block) {
+-    if (wind_failed) {
+-      shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, -1, ENOMEM,
+-                                      NULL, NULL, NULL);
+-      goto next;
++    if (!local->dot_shard_loc.inode) {
++        /*change handler*/
++        shard_mkdir_internal_dir(frame, this,
++                                 shard_common_inode_write_post_resolve_handler,
++                                 SHARD_INTERNAL_DIR_DOT_SHARD);
++    } else {
++        /*change handler*/
++        local->post_res_handler = shard_common_inode_write_post_resolve_handler;
++        shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD);
+     }
++    return 0;
++}
+ 
+-    shard_offset = orig_offset % local->block_size;
+-    shard_write_size = local->block_size - shard_offset;
+-    if (shard_write_size > remaining_size)
+-      shard_write_size = remaining_size;
++int
++shard_mkdir_internal_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                             int32_t op_ret, int32_t op_errno, inode_t *inode,
++                             struct iatt *buf, struct iatt *preparent,
++                             struct iatt *postparent, dict_t *xdata)
++{
++    inode_t *link_inode = NULL;
++    shard_local_t *local = NULL;
++    shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie;
+ 
+-    remaining_size -= shard_write_size;
++    local = frame->local;
+ 
+-    if (local->fop == GF_FOP_WRITE) {
+-      count = iov_subset(local->vector, local->count, vec_offset,
+-                         vec_offset + shard_write_size, NULL);
++    SHARD_UNSET_ROOT_FS_ID(frame, local);
+ 
+-      vec = GF_CALLOC(count, sizeof(struct iovec), gf_shard_mt_iovec);
+-      if (!vec) {
+-        local->op_ret = -1;
+-        local->op_errno = ENOMEM;
+-        wind_failed = _gf_true;
+-        GF_FREE(vec);
+-        shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, -1,
+-                                        ENOMEM, NULL, NULL, NULL);
+-        goto next;
+-      }
+-      count = iov_subset(local->vector, local->count, vec_offset,
+-                         vec_offset + shard_write_size, vec);
++    if (op_ret == -1) {
++        if (op_errno != EEXIST) {
++            local->op_ret = op_ret;
++            local->op_errno = op_errno;
++            goto unwind;
++        } else {
++            gf_msg_debug(this->name, 0,
++                         "mkdir on %s failed "
++                         "with EEXIST. Attempting lookup now",
++                         shard_internal_dir_string(type));
++            shard_lookup_internal_dir(frame, this, local->post_res_handler,
++                                      type);
++            return 0;
++        }
+     }
+ 
+-    if (cur_block == 0) {
+-      anon_fd = fd_ref(fd);
++    link_inode = shard_link_internal_dir_inode(local, inode, buf, type);
++    if (link_inode != inode) {
++        shard_refresh_internal_dir(frame, this, type);
+     } else {
+-      anon_fd = fd_anonymous(local->inode_list[i]);
+-      if (!anon_fd) {
+-        local->op_ret = -1;
+-        local->op_errno = ENOMEM;
+-        wind_failed = _gf_true;
+-        GF_FREE(vec);
+-        shard_common_inode_write_do_cbk(frame, (void *)(long)anon_fd, this, -1,
+-                                        ENOMEM, NULL, NULL, NULL);
+-        goto next;
+-      }
+-
+-      if (local->fop == GF_FOP_WRITE) {
+-        if (odirect)
+-          local->flags = O_DIRECT;
+-        else
+-          local->flags = GF_ANON_FD_FLAGS;
+-      }
+-    }
+-
+-    shard_common_inode_write_wind(frame, this, anon_fd, vec, count,
+-                                  shard_offset, shard_write_size);
+-    if (vec)
+-      vec_offset += shard_write_size;
+-    orig_offset += shard_write_size;
+-    GF_FREE(vec);
+-    vec = NULL;
+-  next:
+-    cur_block++;
+-    i++;
+-    call_count--;
+-  }
+-  return 0;
++        shard_inode_ctx_mark_dir_refreshed(link_inode, this);
++        shard_common_resolve_shards(frame, this, local->post_res_handler);
++    }
++    return 0;
++unwind:
++    shard_common_resolve_shards(frame, this, local->post_res_handler);
++    return 0;
+ }
+ 
+-int shard_common_inode_write_post_mknod_handler(call_frame_t *frame,
+-                                                xlator_t *this);
++int
++shard_mkdir_internal_dir(call_frame_t *frame, xlator_t *this,
++                         shard_post_resolve_fop_handler_t handler,
++                         shard_internal_dir_type_t type)
++{
++    int ret = -1;
++    shard_local_t *local = NULL;
++    shard_priv_t *priv = NULL;
++    dict_t *xattr_req = NULL;
++    uuid_t *gfid = NULL;
++    loc_t *loc = NULL;
++    gf_boolean_t free_gfid = _gf_true;
+ 
+-int shard_common_inode_write_post_lookup_shards_handler(call_frame_t *frame,
+-                                                        xlator_t *this) {
+-  shard_local_t *local = NULL;
++    local = frame->local;
++    priv = this->private;
+ 
+-  local = frame->local;
++    local->post_res_handler = handler;
++    gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t);
++    if (!gfid)
++        goto err;
+ 
+-  if (local->op_ret < 0) {
+-    shard_common_failure_unwind(local->fop, frame, local->op_ret,
+-                                local->op_errno);
+-    return 0;
+-  }
++    switch (type) {
++        case SHARD_INTERNAL_DIR_DOT_SHARD:
++            gf_uuid_copy(*gfid, priv->dot_shard_gfid);
++            loc = &local->dot_shard_loc;
++            break;
++        case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME:
++            gf_uuid_copy(*gfid, priv->dot_shard_rm_gfid);
++            loc = &local->dot_shard_rm_loc;
++            break;
++        default:
++            bzero(*gfid, sizeof(uuid_t));
++            break;
++    }
+ 
+-  if (local->create_count) {
+-    shard_common_resume_mknod(frame, this,
+-                              shard_common_inode_write_post_mknod_handler);
+-  } else {
+-    shard_common_inode_write_do(frame, this);
+-  }
++    xattr_req = dict_new();
++    if (!xattr_req)
++        goto err;
+ 
+-  return 0;
+-}
++    ret = shard_init_internal_dir_loc(this, local, type);
++    if (ret)
++        goto err;
+ 
+-int shard_common_inode_write_post_mknod_handler(call_frame_t *frame,
+-                                                xlator_t *this) {
+-  shard_local_t *local = NULL;
++    ret = dict_set_gfuuid(xattr_req, "gfid-req", *gfid, false);
++    if (ret) {
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
++               "Failed to set gfid-req for %s",
++               shard_internal_dir_string(type));
++        goto err;
++    } else {
++        free_gfid = _gf_false;
++    }
+ 
+-  local = frame->local;
++    SHARD_SET_ROOT_FS_ID(frame, local);
+ 
+-  if (local->op_ret < 0) {
+-    shard_common_failure_unwind(local->fop, frame, local->op_ret,
+-                                local->op_errno);
++    STACK_WIND_COOKIE(frame, shard_mkdir_internal_dir_cbk, (void *)(long)type,
++                      FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir, loc,
++                      0755, 0, xattr_req);
++    dict_unref(xattr_req);
+     return 0;
+-  }
+ 
+-  if (!local->eexist_count) {
+-    shard_common_inode_write_do(frame, this);
+-  } else {
+-    local->call_count = local->eexist_count;
+-    shard_common_lookup_shards(
+-        frame, this, local->loc.inode,
+-        shard_common_inode_write_post_lookup_shards_handler);
+-  }
+-
+-  return 0;
++err:
++    if (xattr_req)
++        dict_unref(xattr_req);
++    local->op_ret = -1;
++    local->op_errno = ENOMEM;
++    if (free_gfid)
++        GF_FREE(gfid);
++    handler(frame, this);
++    return 0;
+ }
+ 
+-int shard_common_inode_write_post_resolve_handler(call_frame_t *frame,
+-                                                  xlator_t *this) {
+-  shard_local_t *local = NULL;
+-
+-  local = frame->local;
+-
+-  if (local->op_ret < 0) {
+-    shard_common_failure_unwind(local->fop, frame, local->op_ret,
+-                                local->op_errno);
++int
++shard_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                int32_t op_ret, int32_t op_errno, dict_t *xdata)
++{
++    /* To-Do: Wind flush on all shards of the file */
++    SHARD_STACK_UNWIND(flush, frame, op_ret, op_errno, xdata);
+     return 0;
+-  }
+-
+-  if (local->call_count) {
+-    shard_common_lookup_shards(
+-        frame, this, local->resolver_base_inode,
+-        shard_common_inode_write_post_lookup_shards_handler);
+-  } else {
+-    shard_common_inode_write_do(frame, this);
+-  }
+-
+-  return 0;
+ }
+ 
+-int shard_common_inode_write_post_lookup_handler(call_frame_t *frame,
+-                                                 xlator_t *this) {
+-  shard_local_t *local = frame->local;
+-  shard_priv_t *priv = this->private;
+-
+-  if (local->op_ret < 0) {
+-    shard_common_failure_unwind(local->fop, frame, local->op_ret,
+-                                local->op_errno);
++int
++shard_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
++{
++    STACK_WIND(frame, shard_flush_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->flush, fd, xdata);
+     return 0;
+-  }
+-
+-  local->postbuf = local->prebuf;
++}
+ 
+-  /*Adjust offset to EOF so that correct shard is chosen for append*/
+-  if (shard_is_appending_write(local))
+-    local->offset = local->prebuf.ia_size;
++int
++__shard_get_timestamps_from_inode_ctx(shard_local_t *local, inode_t *inode,
++                                      xlator_t *this)
++{
++    int ret = -1;
++    uint64_t ctx_uint = 0;
++    shard_inode_ctx_t *ctx = NULL;
+ 
+-  local->first_block = get_lowest_block(local->offset, local->block_size);
+-  local->last_block =
+-      get_highest_block(local->offset, local->total_size, local->block_size);
+-  local->num_blocks = local->last_block - local->first_block + 1;
+-  GF_ASSERT(local->num_blocks > 0);
+-  local->inode_list =
+-      GF_CALLOC(local->num_blocks, sizeof(inode_t *), gf_shard_mt_inode_list);
+-  if (!local->inode_list) {
+-    shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
+-    return 0;
+-  }
++    ret = __inode_ctx_get(inode, this, &ctx_uint);
++    if (ret < 0)
++        return ret;
+ 
+-  gf_msg_trace(
+-      this->name, 0, "%s: gfid=%s first_block=%" PRIu64 " "
+-                     "last_block=%" PRIu64 " num_blocks=%" PRIu64
+-                     " offset=%" PRId64 " total_size=%zu flags=%" PRId32 "",
+-      gf_fop_list[local->fop], uuid_utoa(local->resolver_base_inode->gfid),
+-      local->first_block, local->last_block, local->num_blocks, local->offset,
+-      local->total_size, local->flags);
++    ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
+ 
+-  local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid);
++    local->postbuf.ia_ctime = ctx->stat.ia_ctime;
++    local->postbuf.ia_ctime_nsec = ctx->stat.ia_ctime_nsec;
++    local->postbuf.ia_atime = ctx->stat.ia_atime;
++    local->postbuf.ia_atime_nsec = ctx->stat.ia_atime_nsec;
++    local->postbuf.ia_mtime = ctx->stat.ia_mtime;
++    local->postbuf.ia_mtime_nsec = ctx->stat.ia_mtime_nsec;
+ 
+-  if (!local->dot_shard_loc.inode) {
+-    /*change handler*/
+-    shard_mkdir_internal_dir(frame, this,
+-                             shard_common_inode_write_post_resolve_handler,
+-                             SHARD_INTERNAL_DIR_DOT_SHARD);
+-  } else {
+-    /*change handler*/
+-    local->post_res_handler = shard_common_inode_write_post_resolve_handler;
+-    shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD);
+-  }
+-  return 0;
++    return 0;
+ }
+ 
+-int shard_mkdir_internal_dir_cbk(call_frame_t *frame, void *cookie,
+-                                 xlator_t *this, int32_t op_ret,
+-                                 int32_t op_errno, inode_t *inode,
+-                                 struct iatt *buf, struct iatt *preparent,
+-                                 struct iatt *postparent, dict_t *xdata) {
+-  inode_t *link_inode = NULL;
+-  shard_local_t *local = NULL;
+-  shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie;
+-
+-  local = frame->local;
+-
+-  SHARD_UNSET_ROOT_FS_ID(frame, local);
+-
+-  if (op_ret == -1) {
+-    if (op_errno != EEXIST) {
+-      local->op_ret = op_ret;
+-      local->op_errno = op_errno;
+-      goto unwind;
+-    } else {
+-      gf_msg_debug(this->name, 0, "mkdir on %s failed "
+-                                  "with EEXIST. Attempting lookup now",
+-                   shard_internal_dir_string(type));
+-      shard_lookup_internal_dir(frame, this, local->post_res_handler, type);
+-      return 0;
+-    }
+-  }
+-
+-  link_inode = shard_link_internal_dir_inode(local, inode, buf, type);
+-  if (link_inode != inode) {
+-    shard_refresh_internal_dir(frame, this, type);
+-  } else {
+-    shard_inode_ctx_mark_dir_refreshed(link_inode, this);
+-    shard_common_resolve_shards(frame, this, local->post_res_handler);
+-  }
+-  return 0;
+-unwind:
+-  shard_common_resolve_shards(frame, this, local->post_res_handler);
+-  return 0;
+-}
+-
+-int shard_mkdir_internal_dir(call_frame_t *frame, xlator_t *this,
+-                             shard_post_resolve_fop_handler_t handler,
+-                             shard_internal_dir_type_t type) {
+-  int ret = -1;
+-  shard_local_t *local = NULL;
+-  shard_priv_t *priv = NULL;
+-  dict_t *xattr_req = NULL;
+-  uuid_t *gfid = NULL;
+-  loc_t *loc = NULL;
+-  gf_boolean_t free_gfid = _gf_true;
+-
+-  local = frame->local;
+-  priv = this->private;
+-
+-  local->post_res_handler = handler;
+-  gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t);
+-  if (!gfid)
+-    goto err;
+-
+-  switch (type) {
+-  case SHARD_INTERNAL_DIR_DOT_SHARD:
+-    gf_uuid_copy(*gfid, priv->dot_shard_gfid);
+-    loc = &local->dot_shard_loc;
+-    break;
+-  case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME:
+-    gf_uuid_copy(*gfid, priv->dot_shard_rm_gfid);
+-    loc = &local->dot_shard_rm_loc;
+-    break;
+-  default:
+-    bzero(*gfid, sizeof(uuid_t));
+-    break;
+-  }
+-
+-  xattr_req = dict_new();
+-  if (!xattr_req)
+-    goto err;
+-
+-  ret = shard_init_internal_dir_loc(this, local, type);
+-  if (ret)
+-    goto err;
+-
+-  ret = dict_set_gfuuid(xattr_req, "gfid-req", *gfid, false);
+-  if (ret) {
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
+-           "Failed to set gfid-req for %s", shard_internal_dir_string(type));
+-    goto err;
+-  } else {
+-    free_gfid = _gf_false;
+-  }
+-
+-  SHARD_SET_ROOT_FS_ID(frame, local);
+-
+-  STACK_WIND_COOKIE(frame, shard_mkdir_internal_dir_cbk, (void *)(long)type,
+-                    FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir, loc,
+-                    0755, 0, xattr_req);
+-  dict_unref(xattr_req);
+-  return 0;
++int
++shard_get_timestamps_from_inode_ctx(shard_local_t *local, inode_t *inode,
++                                    xlator_t *this)
++{
++    int ret = 0;
+ 
+-err:
+-  if (xattr_req)
+-    dict_unref(xattr_req);
+-  local->op_ret = -1;
+-  local->op_errno = ENOMEM;
+-  if (free_gfid)
+-    GF_FREE(gfid);
+-  handler(frame, this);
+-  return 0;
+-}
++    LOCK(&inode->lock);
++    {
++        ret = __shard_get_timestamps_from_inode_ctx(local, inode, this);
++    }
++    UNLOCK(&inode->lock);
+ 
+-int shard_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                    int32_t op_ret, int32_t op_errno, dict_t *xdata) {
+-  /* To-Do: Wind flush on all shards of the file */
+-  SHARD_STACK_UNWIND(flush, frame, op_ret, op_errno, xdata);
+-  return 0;
++    return ret;
+ }
+ 
+-int shard_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) {
+-  STACK_WIND(frame, shard_flush_cbk, FIRST_CHILD(this),
+-             FIRST_CHILD(this)->fops->flush, fd, xdata);
+-  return 0;
+-}
++int
++shard_fsync_shards_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                       int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
++                       struct iatt *postbuf, dict_t *xdata)
++{
++    int call_count = 0;
++    uint64_t fsync_count = 0;
++    fd_t *anon_fd = cookie;
++    shard_local_t *local = NULL;
++    shard_inode_ctx_t *ctx = NULL;
++    shard_inode_ctx_t *base_ictx = NULL;
++    inode_t *base_inode = NULL;
++    gf_boolean_t unref_shard_inode = _gf_false;
++
++    local = frame->local;
++    base_inode = local->fd->inode;
+ 
+-int __shard_get_timestamps_from_inode_ctx(shard_local_t *local, inode_t *inode,
+-                                          xlator_t *this) {
+-  int ret = -1;
+-  uint64_t ctx_uint = 0;
+-  shard_inode_ctx_t *ctx = NULL;
++    if (local->op_ret < 0)
++        goto out;
+ 
+-  ret = __inode_ctx_get(inode, this, &ctx_uint);
+-  if (ret < 0)
+-    return ret;
++    LOCK(&frame->lock);
++    {
++        if (op_ret < 0) {
++            local->op_ret = op_ret;
++            local->op_errno = op_errno;
++            UNLOCK(&frame->lock);
++            goto out;
++        }
++        shard_inode_ctx_set(local->fd->inode, this, postbuf, 0,
++                            SHARD_MASK_TIMES);
++    }
++    UNLOCK(&frame->lock);
++    fd_ctx_get(anon_fd, this, &fsync_count);
++out:
++    if (anon_fd && (base_inode != anon_fd->inode)) {
++        LOCK(&base_inode->lock);
++        LOCK(&anon_fd->inode->lock);
++        {
++            __shard_inode_ctx_get(anon_fd->inode, this, &ctx);
++            __shard_inode_ctx_get(base_inode, this, &base_ictx);
++            if (op_ret == 0)
++                ctx->fsync_needed -= fsync_count;
++            GF_ASSERT(ctx->fsync_needed >= 0);
++            if (ctx->fsync_needed != 0) {
++                list_add_tail(&ctx->to_fsync_list, &base_ictx->to_fsync_list);
++                base_ictx->fsync_count++;
++            } else {
++                unref_shard_inode = _gf_true;
++            }
++        }
++        UNLOCK(&anon_fd->inode->lock);
++        UNLOCK(&base_inode->lock);
++    }
+ 
+-  ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
++    if (unref_shard_inode)
++        inode_unref(anon_fd->inode);
++    if (anon_fd)
++        fd_unref(anon_fd);
+ 
+-  local->postbuf.ia_ctime = ctx->stat.ia_ctime;
+-  local->postbuf.ia_ctime_nsec = ctx->stat.ia_ctime_nsec;
+-  local->postbuf.ia_atime = ctx->stat.ia_atime;
+-  local->postbuf.ia_atime_nsec = ctx->stat.ia_atime_nsec;
+-  local->postbuf.ia_mtime = ctx->stat.ia_mtime;
+-  local->postbuf.ia_mtime_nsec = ctx->stat.ia_mtime_nsec;
++    call_count = shard_call_count_return(frame);
++    if (call_count != 0)
++        return 0;
+ 
+-  return 0;
++    if (local->op_ret < 0) {
++        shard_common_failure_unwind(GF_FOP_FSYNC, frame, local->op_ret,
++                                    local->op_errno);
++    } else {
++        shard_get_timestamps_from_inode_ctx(local, base_inode, this);
++        SHARD_STACK_UNWIND(fsync, frame, local->op_ret, local->op_errno,
++                           &local->prebuf, &local->postbuf, local->xattr_rsp);
++    }
++    return 0;
+ }
+ 
+-int shard_get_timestamps_from_inode_ctx(shard_local_t *local, inode_t *inode,
+-                                        xlator_t *this) {
+-  int ret = 0;
++int
++shard_post_lookup_fsync_handler(call_frame_t *frame, xlator_t *this)
++{
++    int ret = 0;
++    int call_count = 0;
++    int fsync_count = 0;
++    fd_t *anon_fd = NULL;
++    inode_t *base_inode = NULL;
++    shard_local_t *local = NULL;
++    shard_inode_ctx_t *ctx = NULL;
++    shard_inode_ctx_t *iter = NULL;
++    struct list_head copy = {
++        0,
++    };
++    shard_inode_ctx_t *tmp = NULL;
+ 
+-  LOCK(&inode->lock);
+-  { ret = __shard_get_timestamps_from_inode_ctx(local, inode, this); }
+-  UNLOCK(&inode->lock);
++    local = frame->local;
++    base_inode = local->fd->inode;
++    local->postbuf = local->prebuf;
++    INIT_LIST_HEAD(&copy);
+ 
+-  return ret;
+-}
++    if (local->op_ret < 0) {
++        shard_common_failure_unwind(GF_FOP_FSYNC, frame, local->op_ret,
++                                    local->op_errno);
++        return 0;
++    }
+ 
+-int shard_fsync_shards_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                           int32_t op_ret, int32_t op_errno,
+-                           struct iatt *prebuf, struct iatt *postbuf,
+-                           dict_t *xdata) {
+-  int call_count = 0;
+-  uint64_t fsync_count = 0;
+-  fd_t *anon_fd = cookie;
+-  shard_local_t *local = NULL;
+-  shard_inode_ctx_t *ctx = NULL;
+-  shard_inode_ctx_t *base_ictx = NULL;
+-  inode_t *base_inode = NULL;
+-  gf_boolean_t unref_shard_inode = _gf_false;
+-
+-  local = frame->local;
+-  base_inode = local->fd->inode;
+-
+-  if (local->op_ret < 0)
+-    goto out;
+-
+-  LOCK(&frame->lock);
+-  {
+-    if (op_ret < 0) {
+-      local->op_ret = op_ret;
+-      local->op_errno = op_errno;
+-      UNLOCK(&frame->lock);
+-      goto out;
+-    }
+-    shard_inode_ctx_set(local->fd->inode, this, postbuf, 0, SHARD_MASK_TIMES);
+-  }
+-  UNLOCK(&frame->lock);
+-  fd_ctx_get(anon_fd, this, &fsync_count);
+-out:
+-  if (anon_fd && (base_inode != anon_fd->inode)) {
+     LOCK(&base_inode->lock);
+-    LOCK(&anon_fd->inode->lock);
+     {
+-      __shard_inode_ctx_get(anon_fd->inode, this, &ctx);
+-      __shard_inode_ctx_get(base_inode, this, &base_ictx);
+-      if (op_ret == 0)
+-        ctx->fsync_needed -= fsync_count;
+-      GF_ASSERT(ctx->fsync_needed >= 0);
+-      if (ctx->fsync_needed != 0) {
+-        list_add_tail(&ctx->to_fsync_list, &base_ictx->to_fsync_list);
+-        base_ictx->fsync_count++;
+-      } else {
+-        unref_shard_inode = _gf_true;
+-      }
+-    }
+-    UNLOCK(&anon_fd->inode->lock);
++        __shard_inode_ctx_get(base_inode, this, &ctx);
++        list_splice_init(&ctx->to_fsync_list, &copy);
++        call_count = ctx->fsync_count;
++        ctx->fsync_count = 0;
++    }
+     UNLOCK(&base_inode->lock);
+-  }
+-
+-  if (unref_shard_inode)
+-    inode_unref(anon_fd->inode);
+-  if (anon_fd)
+-    fd_unref(anon_fd);
+-
+-  call_count = shard_call_count_return(frame);
+-  if (call_count != 0)
+-    return 0;
+ 
+-  if (local->op_ret < 0) {
+-    shard_common_failure_unwind(GF_FOP_FSYNC, frame, local->op_ret,
+-                                local->op_errno);
+-  } else {
+-    shard_get_timestamps_from_inode_ctx(local, base_inode, this);
+-    SHARD_STACK_UNWIND(fsync, frame, local->op_ret, local->op_errno,
+-                       &local->prebuf, &local->postbuf, local->xattr_rsp);
+-  }
+-  return 0;
+-}
+-
+-int shard_post_lookup_fsync_handler(call_frame_t *frame, xlator_t *this) {
+-  int ret = 0;
+-  int call_count = 0;
+-  int fsync_count = 0;
+-  fd_t *anon_fd = NULL;
+-  inode_t *base_inode = NULL;
+-  shard_local_t *local = NULL;
+-  shard_inode_ctx_t *ctx = NULL;
+-  shard_inode_ctx_t *iter = NULL;
+-  struct list_head copy = {
+-      0,
+-  };
+-  shard_inode_ctx_t *tmp = NULL;
+-
+-  local = frame->local;
+-  base_inode = local->fd->inode;
+-  local->postbuf = local->prebuf;
+-  INIT_LIST_HEAD(&copy);
+-
+-  if (local->op_ret < 0) {
+-    shard_common_failure_unwind(GF_FOP_FSYNC, frame, local->op_ret,
+-                                local->op_errno);
+-    return 0;
+-  }
+-
+-  LOCK(&base_inode->lock);
+-  {
+-    __shard_inode_ctx_get(base_inode, this, &ctx);
+-    list_splice_init(&ctx->to_fsync_list, &copy);
+-    call_count = ctx->fsync_count;
+-    ctx->fsync_count = 0;
+-  }
+-  UNLOCK(&base_inode->lock);
+-
+-  local->call_count = ++call_count;
+-
+-  /* Send fsync() on the base shard first */
+-  anon_fd = fd_ref(local->fd);
+-  STACK_WIND_COOKIE(frame, shard_fsync_shards_cbk, anon_fd, FIRST_CHILD(this),
+-                    FIRST_CHILD(this)->fops->fsync, anon_fd, local->datasync,
+-                    local->xattr_req);
+-  call_count--;
+-  anon_fd = NULL;
+-
+-  list_for_each_entry_safe(iter, tmp, &copy, to_fsync_list) {
+-    list_del_init(&iter->to_fsync_list);
+-    fsync_count = 0;
+-    shard_inode_ctx_get_fsync_count(iter->inode, this, &fsync_count);
+-    GF_ASSERT(fsync_count > 0);
+-    anon_fd = fd_anonymous(iter->inode);
+-    if (!anon_fd) {
+-      local->op_ret = -1;
+-      local->op_errno = ENOMEM;
+-      gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED,
+-             "Failed to create "
+-             "anon fd to fsync shard");
+-      shard_fsync_shards_cbk(frame, (void *)(long)anon_fd, this, -1, ENOMEM,
+-                             NULL, NULL, NULL);
+-      continue;
+-    }
++    local->call_count = ++call_count;
+ 
+-    ret = fd_ctx_set(anon_fd, this, fsync_count);
+-    if (ret) {
+-      gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_FD_CTX_SET_FAILED,
+-             "Failed to set fd "
+-             "ctx for shard inode gfid=%s",
+-             uuid_utoa(iter->inode->gfid));
+-      local->op_ret = -1;
+-      local->op_errno = ENOMEM;
+-      shard_fsync_shards_cbk(frame, (void *)(long)anon_fd, this, -1, ENOMEM,
+-                             NULL, NULL, NULL);
+-      continue;
+-    }
++    /* Send fsync() on the base shard first */
++    anon_fd = fd_ref(local->fd);
+     STACK_WIND_COOKIE(frame, shard_fsync_shards_cbk, anon_fd, FIRST_CHILD(this),
+                       FIRST_CHILD(this)->fops->fsync, anon_fd, local->datasync,
+                       local->xattr_req);
+     call_count--;
+-  }
++    anon_fd = NULL;
+ 
+-  return 0;
++    list_for_each_entry_safe(iter, tmp, &copy, to_fsync_list)
++    {
++        list_del_init(&iter->to_fsync_list);
++        fsync_count = 0;
++        shard_inode_ctx_get_fsync_count(iter->inode, this, &fsync_count);
++        GF_ASSERT(fsync_count > 0);
++        anon_fd = fd_anonymous(iter->inode);
++        if (!anon_fd) {
++            local->op_ret = -1;
++            local->op_errno = ENOMEM;
++            gf_msg(this->name, GF_LOG_WARNING, ENOMEM,
++                   SHARD_MSG_MEMALLOC_FAILED,
++                   "Failed to create "
++                   "anon fd to fsync shard");
++            shard_fsync_shards_cbk(frame, (void *)(long)anon_fd, this, -1,
++                                   ENOMEM, NULL, NULL, NULL);
++            continue;
++        }
++
++        ret = fd_ctx_set(anon_fd, this, fsync_count);
++        if (ret) {
++            gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_FD_CTX_SET_FAILED,
++                   "Failed to set fd "
++                   "ctx for shard inode gfid=%s",
++                   uuid_utoa(iter->inode->gfid));
++            local->op_ret = -1;
++            local->op_errno = ENOMEM;
++            shard_fsync_shards_cbk(frame, (void *)(long)anon_fd, this, -1,
++                                   ENOMEM, NULL, NULL, NULL);
++            continue;
++        }
++        STACK_WIND_COOKIE(frame, shard_fsync_shards_cbk, anon_fd,
++                          FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsync,
++                          anon_fd, local->datasync, local->xattr_req);
++        call_count--;
++    }
++
++    return 0;
+ }
+ 
+-int shard_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+-                dict_t *xdata) {
+-  int ret = 0;
+-  uint64_t block_size = 0;
+-  shard_local_t *local = NULL;
++int
++shard_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
++            dict_t *xdata)
++{
++    int ret = 0;
++    uint64_t block_size = 0;
++    shard_local_t *local = NULL;
+ 
+-  ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size);
+-  if (ret) {
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+-           "Failed to get block "
+-           "size for %s from its inode ctx",
+-           uuid_utoa(fd->inode->gfid));
+-    goto err;
+-  }
++    ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size);
++    if (ret) {
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
++               "Failed to get block "
++               "size for %s from its inode ctx",
++               uuid_utoa(fd->inode->gfid));
++        goto err;
++    }
+ 
+-  if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+-    STACK_WIND(frame, default_fsync_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->fsync, fd, datasync, xdata);
+-    return 0;
+-  }
++    if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
++        STACK_WIND(frame, default_fsync_cbk, FIRST_CHILD(this),
++                   FIRST_CHILD(this)->fops->fsync, fd, datasync, xdata);
++        return 0;
++    }
+ 
+-  if (!this->itable)
+-    this->itable = fd->inode->table;
++    if (!this->itable)
++        this->itable = fd->inode->table;
+ 
+-  local = mem_get0(this->local_pool);
+-  if (!local)
+-    goto err;
++    local = mem_get0(this->local_pool);
++    if (!local)
++        goto err;
+ 
+-  frame->local = local;
++    frame->local = local;
+ 
+-  local->fd = fd_ref(fd);
+-  local->fop = GF_FOP_FSYNC;
+-  local->datasync = datasync;
+-  local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+-  if (!local->xattr_req)
+-    goto err;
++    local->fd = fd_ref(fd);
++    local->fop = GF_FOP_FSYNC;
++    local->datasync = datasync;
++    local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
++    if (!local->xattr_req)
++        goto err;
+ 
+-  local->loc.inode = inode_ref(fd->inode);
+-  gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
++    local->loc.inode = inode_ref(fd->inode);
++    gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
+ 
+-  shard_lookup_base_file(frame, this, &local->loc,
+-                         shard_post_lookup_fsync_handler);
+-  return 0;
++    shard_lookup_base_file(frame, this, &local->loc,
++                           shard_post_lookup_fsync_handler);
++    return 0;
+ err:
+-  shard_common_failure_unwind(GF_FOP_FSYNC, frame, -1, ENOMEM);
+-  return 0;
++    shard_common_failure_unwind(GF_FOP_FSYNC, frame, -1, ENOMEM);
++    return 0;
+ }
+ 
+-int shard_readdir_past_dot_shard_cbk(call_frame_t *frame, void *cookie,
+-                                     xlator_t *this, int32_t op_ret,
+-                                     int32_t op_errno,
+-                                     gf_dirent_t *orig_entries, dict_t *xdata) {
+-  gf_dirent_t *entry = NULL;
+-  gf_dirent_t *tmp = NULL;
+-  shard_local_t *local = NULL;
++int
++shard_readdir_past_dot_shard_cbk(call_frame_t *frame, void *cookie,
++                                 xlator_t *this, int32_t op_ret,
++                                 int32_t op_errno, gf_dirent_t *orig_entries,
++                                 dict_t *xdata)
++{
++    gf_dirent_t *entry = NULL;
++    gf_dirent_t *tmp = NULL;
++    shard_local_t *local = NULL;
+ 
+-  local = frame->local;
++    local = frame->local;
+ 
+-  if (op_ret < 0)
+-    goto unwind;
++    if (op_ret < 0)
++        goto unwind;
+ 
+-  list_for_each_entry_safe(entry, tmp, (&orig_entries->list), list) {
+-    list_del_init(&entry->list);
+-    list_add_tail(&entry->list, &local->entries_head.list);
++    list_for_each_entry_safe(entry, tmp, (&orig_entries->list), list)
++    {
++        list_del_init(&entry->list);
++        list_add_tail(&entry->list, &local->entries_head.list);
+ 
+-    if (!entry->dict)
+-      continue;
++        if (!entry->dict)
++            continue;
+ 
+-    if (IA_ISDIR(entry->d_stat.ia_type))
+-      continue;
++        if (IA_ISDIR(entry->d_stat.ia_type))
++            continue;
+ 
+-    if (dict_get(entry->dict, GF_XATTR_SHARD_FILE_SIZE))
+-      shard_modify_size_and_block_count(&entry->d_stat, entry->dict);
+-    if (!entry->inode)
+-      continue;
++        if (dict_get(entry->dict, GF_XATTR_SHARD_FILE_SIZE))
++            shard_modify_size_and_block_count(&entry->d_stat, entry->dict);
++        if (!entry->inode)
++            continue;
+ 
+-    shard_inode_ctx_update(entry->inode, this, entry->dict, &entry->d_stat);
+-  }
+-  local->op_ret += op_ret;
++        shard_inode_ctx_update(entry->inode, this, entry->dict, &entry->d_stat);
++    }
++    local->op_ret += op_ret;
+ 
+ unwind:
+-  if (local->fop == GF_FOP_READDIR)
+-    SHARD_STACK_UNWIND(readdir, frame, local->op_ret, local->op_errno,
+-                       &local->entries_head, xdata);
+-  else
+-    SHARD_STACK_UNWIND(readdirp, frame, op_ret, op_errno, &local->entries_head,
+-                       xdata);
+-  return 0;
++    if (local->fop == GF_FOP_READDIR)
++        SHARD_STACK_UNWIND(readdir, frame, local->op_ret, local->op_errno,
++                           &local->entries_head, xdata);
++    else
++        SHARD_STACK_UNWIND(readdirp, frame, op_ret, op_errno,
++                           &local->entries_head, xdata);
++    return 0;
+ }
+ 
+-int32_t shard_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                          int32_t op_ret, int32_t op_errno,
+-                          gf_dirent_t *orig_entries, dict_t *xdata) {
+-  fd_t *fd = NULL;
+-  gf_dirent_t *entry = NULL;
+-  gf_dirent_t *tmp = NULL;
+-  shard_local_t *local = NULL;
+-  gf_boolean_t last_entry = _gf_false;
++int32_t
++shard_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                  int32_t op_ret, int32_t op_errno, gf_dirent_t *orig_entries,
++                  dict_t *xdata)
++{
++    fd_t *fd = NULL;
++    gf_dirent_t *entry = NULL;
++    gf_dirent_t *tmp = NULL;
++    shard_local_t *local = NULL;
++    gf_boolean_t last_entry = _gf_false;
+ 
+-  local = frame->local;
+-  fd = local->fd;
++    local = frame->local;
++    fd = local->fd;
+ 
+-  if (op_ret < 0)
+-    goto unwind;
++    if (op_ret < 0)
++        goto unwind;
+ 
+-  list_for_each_entry_safe(entry, tmp, (&orig_entries->list), list) {
+-    if (last_entry)
+-      last_entry = _gf_false;
++    list_for_each_entry_safe(entry, tmp, (&orig_entries->list), list)
++    {
++        if (last_entry)
++            last_entry = _gf_false;
++
++        if (__is_root_gfid(fd->inode->gfid) &&
++            !(strcmp(entry->d_name, GF_SHARD_DIR))) {
++            local->offset = entry->d_off;
++            op_ret--;
++            last_entry = _gf_true;
++            continue;
++        }
+ 
+-    if (__is_root_gfid(fd->inode->gfid) &&
+-        !(strcmp(entry->d_name, GF_SHARD_DIR))) {
+-      local->offset = entry->d_off;
+-      op_ret--;
+-      last_entry = _gf_true;
+-      continue;
+-    }
++        list_del_init(&entry->list);
++        list_add_tail(&entry->list, &local->entries_head.list);
+ 
+-    list_del_init(&entry->list);
+-    list_add_tail(&entry->list, &local->entries_head.list);
++        if (!entry->dict)
++            continue;
+ 
+-    if (!entry->dict)
+-      continue;
++        if (IA_ISDIR(entry->d_stat.ia_type))
++            continue;
+ 
+-    if (IA_ISDIR(entry->d_stat.ia_type))
+-      continue;
++        if (dict_get(entry->dict, GF_XATTR_SHARD_FILE_SIZE) &&
++            frame->root->pid != GF_CLIENT_PID_GSYNCD)
++            shard_modify_size_and_block_count(&entry->d_stat, entry->dict);
+ 
+-    if (dict_get(entry->dict, GF_XATTR_SHARD_FILE_SIZE) &&
+-        frame->root->pid != GF_CLIENT_PID_GSYNCD)
+-      shard_modify_size_and_block_count(&entry->d_stat, entry->dict);
++        if (!entry->inode)
++            continue;
+ 
+-    if (!entry->inode)
+-      continue;
++        shard_inode_ctx_update(entry->inode, this, entry->dict, &entry->d_stat);
++    }
+ 
+-    shard_inode_ctx_update(entry->inode, this, entry->dict, &entry->d_stat);
+-  }
++    local->op_ret = op_ret;
+ 
+-  local->op_ret = op_ret;
++    if (last_entry) {
++        if (local->fop == GF_FOP_READDIR)
++            STACK_WIND(frame, shard_readdir_past_dot_shard_cbk,
++                       FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdir,
++                       local->fd, local->readdir_size, local->offset,
++                       local->xattr_req);
++        else
++            STACK_WIND(frame, shard_readdir_past_dot_shard_cbk,
++                       FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdirp,
++                       local->fd, local->readdir_size, local->offset,
++                       local->xattr_req);
++        return 0;
++    }
+ 
+-  if (last_entry) {
++unwind:
+     if (local->fop == GF_FOP_READDIR)
+-      STACK_WIND(frame, shard_readdir_past_dot_shard_cbk, FIRST_CHILD(this),
+-                 FIRST_CHILD(this)->fops->readdir, local->fd,
+-                 local->readdir_size, local->offset, local->xattr_req);
++        SHARD_STACK_UNWIND(readdir, frame, op_ret, op_errno,
++                           &local->entries_head, xdata);
+     else
+-      STACK_WIND(frame, shard_readdir_past_dot_shard_cbk, FIRST_CHILD(this),
+-                 FIRST_CHILD(this)->fops->readdirp, local->fd,
+-                 local->readdir_size, local->offset, local->xattr_req);
++        SHARD_STACK_UNWIND(readdirp, frame, op_ret, op_errno,
++                           &local->entries_head, xdata);
+     return 0;
+-  }
++}
+ 
+-unwind:
+-  if (local->fop == GF_FOP_READDIR)
+-    SHARD_STACK_UNWIND(readdir, frame, op_ret, op_errno, &local->entries_head,
+-                       xdata);
+-  else
+-    SHARD_STACK_UNWIND(readdirp, frame, op_ret, op_errno, &local->entries_head,
+-                       xdata);
+-  return 0;
+-}
+-
+-int shard_readdir_do(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+-                     off_t offset, int whichop, dict_t *xdata) {
+-  int ret = 0;
+-  shard_local_t *local = NULL;
+-
+-  local = mem_get0(this->local_pool);
+-  if (!local) {
+-    goto err;
+-  }
+-
+-  frame->local = local;
+-
+-  local->fd = fd_ref(fd);
+-  local->fop = whichop;
+-  local->readdir_size = size;
+-  INIT_LIST_HEAD(&local->entries_head.list);
+-  local->list_inited = _gf_true;
+-
+-  if (whichop == GF_FOP_READDIR) {
+-    STACK_WIND(frame, shard_readdir_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->readdir, fd, size, offset, xdata);
+-  } else {
+-    local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+-    SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid,
+-                                    local, err);
+-    ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0);
+-    if (ret) {
+-      gf_log(this->name, GF_LOG_WARNING,
+-             "Failed to set "
+-             "dict value: key:%s, directory gfid=%s",
+-             GF_XATTR_SHARD_BLOCK_SIZE, uuid_utoa(fd->inode->gfid));
+-      goto err;
++int
++shard_readdir_do(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
++                 off_t offset, int whichop, dict_t *xdata)
++{
++    int ret = 0;
++    shard_local_t *local = NULL;
++
++    local = mem_get0(this->local_pool);
++    if (!local) {
++        goto err;
+     }
+ 
+-    STACK_WIND(frame, shard_readdir_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->readdirp, fd, size, offset,
+-               local->xattr_req);
+-  }
++    frame->local = local;
++
++    local->fd = fd_ref(fd);
++    local->fop = whichop;
++    local->readdir_size = size;
++    INIT_LIST_HEAD(&local->entries_head.list);
++    local->list_inited = _gf_true;
++
++    if (whichop == GF_FOP_READDIR) {
++        STACK_WIND(frame, shard_readdir_cbk, FIRST_CHILD(this),
++                   FIRST_CHILD(this)->fops->readdir, fd, size, offset, xdata);
++    } else {
++        local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
++        SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid,
++                                        local, err);
++        ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0);
++        if (ret) {
++            gf_log(this->name, GF_LOG_WARNING,
++                   "Failed to set "
++                   "dict value: key:%s, directory gfid=%s",
++                   GF_XATTR_SHARD_BLOCK_SIZE, uuid_utoa(fd->inode->gfid));
++            goto err;
++        }
++
++        STACK_WIND(frame, shard_readdir_cbk, FIRST_CHILD(this),
++                   FIRST_CHILD(this)->fops->readdirp, fd, size, offset,
++                   local->xattr_req);
++    }
+ 
+-  return 0;
++    return 0;
+ 
+ err:
+-  STACK_UNWIND_STRICT(readdir, frame, -1, ENOMEM, NULL, NULL);
+-  return 0;
++    STACK_UNWIND_STRICT(readdir, frame, -1, ENOMEM, NULL, NULL);
++    return 0;
+ }
+ 
+-int32_t shard_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd,
+-                      size_t size, off_t offset, dict_t *xdata) {
+-  shard_readdir_do(frame, this, fd, size, offset, GF_FOP_READDIR, xdata);
+-  return 0;
++int32_t
++shard_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
++              off_t offset, dict_t *xdata)
++{
++    shard_readdir_do(frame, this, fd, size, offset, GF_FOP_READDIR, xdata);
++    return 0;
+ }
+ 
+-int32_t shard_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd,
+-                       size_t size, off_t offset, dict_t *xdata) {
+-  shard_readdir_do(frame, this, fd, size, offset, GF_FOP_READDIRP, xdata);
+-  return 0;
++int32_t
++shard_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
++               off_t offset, dict_t *xdata)
++{
++    shard_readdir_do(frame, this, fd, size, offset, GF_FOP_READDIRP, xdata);
++    return 0;
+ }
+ 
+ int32_t
+@@ -6037,77 +6450,86 @@ shard_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+     return 0;
+ }
+ 
+-int32_t shard_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                            int32_t op_ret, int32_t op_errno, dict_t *dict,
+-                            dict_t *xdata) {
+-  if (op_ret < 0)
+-    goto unwind;
++int32_t
++shard_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                    int32_t op_ret, int32_t op_errno, dict_t *dict,
++                    dict_t *xdata)
++{
++    if (op_ret < 0)
++        goto unwind;
+ 
+-  if (dict && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) {
+-    dict_del(dict, GF_XATTR_SHARD_BLOCK_SIZE);
+-    dict_del(dict, GF_XATTR_SHARD_FILE_SIZE);
+-  }
++    if (dict && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) {
++        dict_del(dict, GF_XATTR_SHARD_BLOCK_SIZE);
++        dict_del(dict, GF_XATTR_SHARD_FILE_SIZE);
++    }
+ 
+ unwind:
+-  SHARD_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, dict, xdata);
+-  return 0;
++    SHARD_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, dict, xdata);
++    return 0;
+ }
+ 
+-int32_t shard_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+-                        const char *name, dict_t *xdata) {
+-  int op_errno = EINVAL;
++int32_t
++shard_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
++                dict_t *xdata)
++{
++    int op_errno = EINVAL;
+ 
+-  if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) && (name) &&
+-      (!strncmp(name, SHARD_XATTR_PREFIX, SLEN(SHARD_XATTR_PREFIX)))) {
+-    op_errno = ENODATA;
+-    goto out;
+-  }
++    if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) && (name) &&
++        (!strncmp(name, SHARD_XATTR_PREFIX, SLEN(SHARD_XATTR_PREFIX)))) {
++        op_errno = ENODATA;
++        goto out;
++    }
+ 
+-  STACK_WIND(frame, shard_fgetxattr_cbk, FIRST_CHILD(this),
+-             FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata);
+-  return 0;
++    STACK_WIND(frame, shard_fgetxattr_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata);
++    return 0;
+ out:
+-  shard_common_failure_unwind(GF_FOP_FGETXATTR, frame, -1, op_errno);
+-  return 0;
++    shard_common_failure_unwind(GF_FOP_FGETXATTR, frame, -1, op_errno);
++    return 0;
+ }
+ 
+-int32_t shard_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                           int32_t op_ret, int32_t op_errno, dict_t *dict,
+-                           dict_t *xdata) {
+-  if (op_ret < 0)
+-    goto unwind;
++int32_t
++shard_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                   int32_t op_ret, int32_t op_errno, dict_t *dict,
++                   dict_t *xdata)
++{
++    if (op_ret < 0)
++        goto unwind;
+ 
+-  if (dict && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) {
+-    dict_del(dict, GF_XATTR_SHARD_BLOCK_SIZE);
+-    dict_del(dict, GF_XATTR_SHARD_FILE_SIZE);
+-  }
++    if (dict && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) {
++        dict_del(dict, GF_XATTR_SHARD_BLOCK_SIZE);
++        dict_del(dict, GF_XATTR_SHARD_FILE_SIZE);
++    }
+ 
+ unwind:
+-  SHARD_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, xdata);
+-  return 0;
++    SHARD_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, xdata);
++    return 0;
+ }
+ 
+-int32_t shard_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+-                       const char *name, dict_t *xdata) {
+-  int op_errno = EINVAL;
++int32_t
++shard_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
++               const char *name, dict_t *xdata)
++{
++    int op_errno = EINVAL;
+ 
+-  if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) && (name) &&
+-      (!strncmp(name, SHARD_XATTR_PREFIX, sizeof(SHARD_XATTR_PREFIX) - 1))) {
+-    op_errno = ENODATA;
+-    goto out;
+-  }
++    if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) && (name) &&
++        (!strncmp(name, SHARD_XATTR_PREFIX, sizeof(SHARD_XATTR_PREFIX) - 1))) {
++        op_errno = ENODATA;
++        goto out;
++    }
+ 
+-  STACK_WIND(frame, shard_getxattr_cbk, FIRST_CHILD(this),
+-             FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
+-  return 0;
++    STACK_WIND(frame, shard_getxattr_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
++    return 0;
+ out:
+-  shard_common_failure_unwind(GF_FOP_GETXATTR, frame, -1, op_errno);
+-  return 0;
++    shard_common_failure_unwind(GF_FOP_GETXATTR, frame, -1, op_errno);
++    return 0;
+ }
+ 
+-int32_t shard_common_set_xattr_cbk(call_frame_t *frame, void *cookie,
+-                                   xlator_t *this, int32_t op_ret,
+-                                   int32_t op_errno, dict_t *xdata) {
++int32_t
++shard_common_set_xattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                           int32_t op_ret, int32_t op_errno, dict_t *xdata)
++{
+     int ret = -1;
+     shard_local_t *local = NULL;
+ 
+@@ -6141,8 +6563,9 @@ err:
+     return 0;
+ }
+ 
+-int32_t shard_post_lookup_set_xattr_handler(call_frame_t *frame,
+-                                            xlator_t *this) {
++int32_t
++shard_post_lookup_set_xattr_handler(call_frame_t *frame, xlator_t *this)
++{
+     shard_local_t *local = NULL;
+ 
+     local = frame->local;
+@@ -6164,9 +6587,11 @@ int32_t shard_post_lookup_set_xattr_handler(call_frame_t *frame,
+     return 0;
+ }
+ 
+-int32_t shard_common_set_xattr(call_frame_t *frame, xlator_t *this,
+-                               glusterfs_fop_t fop, loc_t *loc, fd_t *fd,
+-                               dict_t *dict, int32_t flags, dict_t *xdata) {
++int32_t
++shard_common_set_xattr(call_frame_t *frame, xlator_t *this, glusterfs_fop_t fop,
++                       loc_t *loc, fd_t *fd, dict_t *dict, int32_t flags,
++                       dict_t *xdata)
++{
+     int ret = -1;
+     int op_errno = ENOMEM;
+     uint64_t block_size = 0;
+@@ -6249,489 +6674,531 @@ err:
+     return 0;
+ }
+ 
+-int32_t shard_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+-                        dict_t *dict, int32_t flags, dict_t *xdata) {
++int32_t
++shard_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
++                int32_t flags, dict_t *xdata)
++{
+     shard_common_set_xattr(frame, this, GF_FOP_FSETXATTR, NULL, fd, dict, flags,
+                            xdata);
+     return 0;
+ }
+ 
+-int32_t shard_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+-                       dict_t *dict, int32_t flags, dict_t *xdata) {
++int32_t
++shard_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
++               int32_t flags, dict_t *xdata)
++{
+     shard_common_set_xattr(frame, this, GF_FOP_SETXATTR, loc, NULL, dict, flags,
+                            xdata);
+     return 0;
+ }
+ 
+-int shard_post_setattr_handler(call_frame_t *frame, xlator_t *this) {
+-  shard_local_t *local = NULL;
++int
++shard_post_setattr_handler(call_frame_t *frame, xlator_t *this)
++{
++    shard_local_t *local = NULL;
+ 
+-  local = frame->local;
++    local = frame->local;
+ 
+-  if (local->fop == GF_FOP_SETATTR) {
+-    if (local->op_ret >= 0)
+-      shard_inode_ctx_set(local->loc.inode, this, &local->postbuf, 0,
+-                          SHARD_LOOKUP_MASK);
+-    SHARD_STACK_UNWIND(setattr, frame, local->op_ret, local->op_errno,
+-                       &local->prebuf, &local->postbuf, local->xattr_rsp);
+-  } else if (local->fop == GF_FOP_FSETATTR) {
+-    if (local->op_ret >= 0)
+-      shard_inode_ctx_set(local->fd->inode, this, &local->postbuf, 0,
+-                          SHARD_LOOKUP_MASK);
+-    SHARD_STACK_UNWIND(fsetattr, frame, local->op_ret, local->op_errno,
+-                       &local->prebuf, &local->postbuf, local->xattr_rsp);
+-  }
++    if (local->fop == GF_FOP_SETATTR) {
++        if (local->op_ret >= 0)
++            shard_inode_ctx_set(local->loc.inode, this, &local->postbuf, 0,
++                                SHARD_LOOKUP_MASK);
++        SHARD_STACK_UNWIND(setattr, frame, local->op_ret, local->op_errno,
++                           &local->prebuf, &local->postbuf, local->xattr_rsp);
++    } else if (local->fop == GF_FOP_FSETATTR) {
++        if (local->op_ret >= 0)
++            shard_inode_ctx_set(local->fd->inode, this, &local->postbuf, 0,
++                                SHARD_LOOKUP_MASK);
++        SHARD_STACK_UNWIND(fsetattr, frame, local->op_ret, local->op_errno,
++                           &local->prebuf, &local->postbuf, local->xattr_rsp);
++    }
+ 
+-  return 0;
++    return 0;
+ }
+ 
+-int shard_common_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                             int32_t op_ret, int32_t op_errno,
+-                             struct iatt *prebuf, struct iatt *postbuf,
+-                             dict_t *xdata) {
+-  shard_local_t *local = NULL;
++int
++shard_common_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                         int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
++                         struct iatt *postbuf, dict_t *xdata)
++{
++    shard_local_t *local = NULL;
+ 
+-  local = frame->local;
++    local = frame->local;
+ 
+-  if (op_ret < 0) {
+-    local->op_ret = op_ret;
+-    local->op_errno = op_errno;
+-    goto unwind;
+-  }
++    if (op_ret < 0) {
++        local->op_ret = op_ret;
++        local->op_errno = op_errno;
++        goto unwind;
++    }
+ 
+-  local->prebuf = *prebuf;
+-  if (shard_modify_size_and_block_count(&local->prebuf, xdata)) {
+-    local->op_ret = -1;
+-    local->op_errno = EINVAL;
+-    goto unwind;
+-  }
+-  if (xdata)
+-    local->xattr_rsp = dict_ref(xdata);
+-  local->postbuf = *postbuf;
+-  local->postbuf.ia_size = local->prebuf.ia_size;
+-  local->postbuf.ia_blocks = local->prebuf.ia_blocks;
++    local->prebuf = *prebuf;
++    if (shard_modify_size_and_block_count(&local->prebuf, xdata)) {
++        local->op_ret = -1;
++        local->op_errno = EINVAL;
++        goto unwind;
++    }
++    if (xdata)
++        local->xattr_rsp = dict_ref(xdata);
++    local->postbuf = *postbuf;
++    local->postbuf.ia_size = local->prebuf.ia_size;
++    local->postbuf.ia_blocks = local->prebuf.ia_blocks;
+ 
+ unwind:
+-  local->handler(frame, this);
+-  return 0;
++    local->handler(frame, this);
++    return 0;
+ }
+ 
+-int shard_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+-                  struct iatt *stbuf, int32_t valid, dict_t *xdata) {
+-  int ret = -1;
+-  uint64_t block_size = 0;
+-  shard_local_t *local = NULL;
++int
++shard_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
++              struct iatt *stbuf, int32_t valid, dict_t *xdata)
++{
++    int ret = -1;
++    uint64_t block_size = 0;
++    shard_local_t *local = NULL;
+ 
+-  if ((IA_ISDIR(loc->inode->ia_type)) || (IA_ISLNK(loc->inode->ia_type))) {
+-    STACK_WIND(frame, default_setattr_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata);
+-    return 0;
+-  }
++    if ((IA_ISDIR(loc->inode->ia_type)) || (IA_ISLNK(loc->inode->ia_type))) {
++        STACK_WIND(frame, default_setattr_cbk, FIRST_CHILD(this),
++                   FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata);
++        return 0;
++    }
+ 
+-  ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size);
+-  if (ret) {
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+-           "Failed to get block size from inode ctx of %s",
+-           uuid_utoa(loc->inode->gfid));
+-    goto err;
+-  }
++    ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size);
++    if (ret) {
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
++               "Failed to get block size from inode ctx of %s",
++               uuid_utoa(loc->inode->gfid));
++        goto err;
++    }
+ 
+-  if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+-    STACK_WIND(frame, default_setattr_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata);
+-    return 0;
+-  }
++    if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
++        STACK_WIND(frame, default_setattr_cbk, FIRST_CHILD(this),
++                   FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata);
++        return 0;
++    }
+ 
+-  local = mem_get0(this->local_pool);
+-  if (!local)
+-    goto err;
++    local = mem_get0(this->local_pool);
++    if (!local)
++        goto err;
+ 
+-  frame->local = local;
++    frame->local = local;
+ 
+-  local->handler = shard_post_setattr_handler;
+-  local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+-  if (!local->xattr_req)
+-    goto err;
+-  local->fop = GF_FOP_SETATTR;
+-  loc_copy(&local->loc, loc);
++    local->handler = shard_post_setattr_handler;
++    local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
++    if (!local->xattr_req)
++        goto err;
++    local->fop = GF_FOP_SETATTR;
++    loc_copy(&local->loc, loc);
+ 
+-  SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, local->loc.gfid,
+-                                  local, err);
++    SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, local->loc.gfid,
++                                    local, err);
+ 
+-  STACK_WIND(frame, shard_common_setattr_cbk, FIRST_CHILD(this),
+-             FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid,
+-             local->xattr_req);
+-  return 0;
++    STACK_WIND(frame, shard_common_setattr_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid,
++               local->xattr_req);
++    return 0;
+ err:
+-  shard_common_failure_unwind(GF_FOP_SETATTR, frame, -1, ENOMEM);
+-  return 0;
++    shard_common_failure_unwind(GF_FOP_SETATTR, frame, -1, ENOMEM);
++    return 0;
+ }
+ 
+-int shard_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+-                   struct iatt *stbuf, int32_t valid, dict_t *xdata) {
+-  int ret = -1;
+-  uint64_t block_size = 0;
+-  shard_local_t *local = NULL;
++int
++shard_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
++               struct iatt *stbuf, int32_t valid, dict_t *xdata)
++{
++    int ret = -1;
++    uint64_t block_size = 0;
++    shard_local_t *local = NULL;
+ 
+-  if ((IA_ISDIR(fd->inode->ia_type)) || (IA_ISLNK(fd->inode->ia_type))) {
+-    STACK_WIND(frame, default_fsetattr_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata);
+-    return 0;
+-  }
++    if ((IA_ISDIR(fd->inode->ia_type)) || (IA_ISLNK(fd->inode->ia_type))) {
++        STACK_WIND(frame, default_fsetattr_cbk, FIRST_CHILD(this),
++                   FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata);
++        return 0;
++    }
+ 
+-  ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size);
+-  if (ret) {
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+-           "Failed to get block size from inode ctx of %s",
+-           uuid_utoa(fd->inode->gfid));
+-    goto err;
+-  }
++    ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size);
++    if (ret) {
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
++               "Failed to get block size from inode ctx of %s",
++               uuid_utoa(fd->inode->gfid));
++        goto err;
++    }
+ 
+-  if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+-    STACK_WIND(frame, default_fsetattr_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata);
+-    return 0;
+-  }
++    if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
++        STACK_WIND(frame, default_fsetattr_cbk, FIRST_CHILD(this),
++                   FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata);
++        return 0;
++    }
+ 
+-  if (!this->itable)
+-    this->itable = fd->inode->table;
++    if (!this->itable)
++        this->itable = fd->inode->table;
+ 
+-  local = mem_get0(this->local_pool);
+-  if (!local)
+-    goto err;
++    local = mem_get0(this->local_pool);
++    if (!local)
++        goto err;
+ 
+-  frame->local = local;
++    frame->local = local;
+ 
+-  local->handler = shard_post_setattr_handler;
+-  local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+-  if (!local->xattr_req)
+-    goto err;
+-  local->fop = GF_FOP_FSETATTR;
+-  local->fd = fd_ref(fd);
++    local->handler = shard_post_setattr_handler;
++    local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
++    if (!local->xattr_req)
++        goto err;
++    local->fop = GF_FOP_FSETATTR;
++    local->fd = fd_ref(fd);
+ 
+-  SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid,
+-                                  local, err);
++    SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid,
++                                    local, err);
+ 
+-  STACK_WIND(frame, shard_common_setattr_cbk, FIRST_CHILD(this),
+-             FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid,
+-             local->xattr_req);
+-  return 0;
++    STACK_WIND(frame, shard_common_setattr_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid,
++               local->xattr_req);
++    return 0;
+ err:
+-  shard_common_failure_unwind(GF_FOP_FSETATTR, frame, -1, ENOMEM);
+-  return 0;
+-}
+-
+-int shard_common_inode_write_begin(call_frame_t *frame, xlator_t *this,
+-                                   glusterfs_fop_t fop, fd_t *fd,
+-                                   struct iovec *vector, int32_t count,
+-                                   off_t offset, uint32_t flags, size_t len,
+-                                   struct iobref *iobref, dict_t *xdata) {
+-  int ret = 0;
+-  int i = 0;
+-  uint64_t block_size = 0;
+-  shard_local_t *local = NULL;
+-
+-  ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size);
+-  if (ret) {
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+-           "Failed to get block "
+-           "size for %s from its inode ctx",
+-           uuid_utoa(fd->inode->gfid));
+-    goto out;
+-  }
+-
+-  if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+-    /* block_size = 0 means that the file was created before
+-     * sharding was enabled on the volume.
+-     */
+-    switch (fop) {
+-    case GF_FOP_WRITE:
+-      STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev,
+-                      fd, vector, count, offset, flags, iobref, xdata);
+-      break;
+-    case GF_FOP_FALLOCATE:
+-      STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+-                      FIRST_CHILD(this)->fops->fallocate, fd, flags, offset,
+-                      len, xdata);
+-      break;
+-    case GF_FOP_ZEROFILL:
+-      STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+-                      FIRST_CHILD(this)->fops->zerofill, fd, offset, len,
+-                      xdata);
+-      break;
+-    case GF_FOP_DISCARD:
+-      STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+-                      FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata);
+-      break;
+-    default:
+-      gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
+-             "Invalid fop id = %d", fop);
+-      break;
+-    }
+-    return 0;
+-  }
+-
+-  if (!this->itable)
+-    this->itable = fd->inode->table;
+-
+-  local = mem_get0(this->local_pool);
+-  if (!local)
+-    goto out;
+-
+-  frame->local = local;
+-
+-  ret = syncbarrier_init(&local->barrier);
+-  if (ret)
+-    goto out;
+-  local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+-  if (!local->xattr_req)
+-    goto out;
+-
+-  if (vector) {
+-    local->vector = iov_dup(vector, count);
+-    if (!local->vector)
+-      goto out;
+-    for (i = 0; i < count; i++)
+-      local->total_size += vector[i].iov_len;
+-    local->count = count;
+-  } else {
+-    local->total_size = len;
+-  }
+-
+-  local->fop = fop;
+-  local->offset = offset;
+-  local->flags = flags;
+-  if (iobref)
+-    local->iobref = iobref_ref(iobref);
+-  local->fd = fd_ref(fd);
+-  local->block_size = block_size;
+-  local->resolver_base_inode = local->fd->inode;
+-  GF_ATOMIC_INIT(local->delta_blocks, 0);
+-
+-  local->loc.inode = inode_ref(fd->inode);
+-  gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
+-
+-  shard_lookup_base_file(frame, this, &local->loc,
+-                         shard_common_inode_write_post_lookup_handler);
+-  return 0;
++    shard_common_failure_unwind(GF_FOP_FSETATTR, frame, -1, ENOMEM);
++    return 0;
++}
++
++int
++shard_common_inode_write_begin(call_frame_t *frame, xlator_t *this,
++                               glusterfs_fop_t fop, fd_t *fd,
++                               struct iovec *vector, int32_t count,
++                               off_t offset, uint32_t flags, size_t len,
++                               struct iobref *iobref, dict_t *xdata)
++{
++    int ret = 0;
++    int i = 0;
++    uint64_t block_size = 0;
++    shard_local_t *local = NULL;
++
++    ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size);
++    if (ret) {
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
++               "Failed to get block "
++               "size for %s from its inode ctx",
++               uuid_utoa(fd->inode->gfid));
++        goto out;
++    }
++
++    if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
++        /* block_size = 0 means that the file was created before
++         * sharding was enabled on the volume.
++         */
++        switch (fop) {
++            case GF_FOP_WRITE:
++                STACK_WIND_TAIL(frame, FIRST_CHILD(this),
++                                FIRST_CHILD(this)->fops->writev, fd, vector,
++                                count, offset, flags, iobref, xdata);
++                break;
++            case GF_FOP_FALLOCATE:
++                STACK_WIND_TAIL(frame, FIRST_CHILD(this),
++                                FIRST_CHILD(this)->fops->fallocate, fd, flags,
++                                offset, len, xdata);
++                break;
++            case GF_FOP_ZEROFILL:
++                STACK_WIND_TAIL(frame, FIRST_CHILD(this),
++                                FIRST_CHILD(this)->fops->zerofill, fd, offset,
++                                len, xdata);
++                break;
++            case GF_FOP_DISCARD:
++                STACK_WIND_TAIL(frame, FIRST_CHILD(this),
++                                FIRST_CHILD(this)->fops->discard, fd, offset,
++                                len, xdata);
++                break;
++            default:
++                gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
++                       "Invalid fop id = %d", fop);
++                break;
++        }
++        return 0;
++    }
++
++    if (!this->itable)
++        this->itable = fd->inode->table;
++
++    local = mem_get0(this->local_pool);
++    if (!local)
++        goto out;
++
++    frame->local = local;
++
++    ret = syncbarrier_init(&local->barrier);
++    if (ret)
++        goto out;
++    local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
++    if (!local->xattr_req)
++        goto out;
++
++    if (vector) {
++        local->vector = iov_dup(vector, count);
++        if (!local->vector)
++            goto out;
++        for (i = 0; i < count; i++)
++            local->total_size += vector[i].iov_len;
++        local->count = count;
++    } else {
++        local->total_size = len;
++    }
++
++    local->fop = fop;
++    local->offset = offset;
++    local->flags = flags;
++    if (iobref)
++        local->iobref = iobref_ref(iobref);
++    local->fd = fd_ref(fd);
++    local->block_size = block_size;
++    local->resolver_base_inode = local->fd->inode;
++    GF_ATOMIC_INIT(local->delta_blocks, 0);
++
++    local->loc.inode = inode_ref(fd->inode);
++    gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
++
++    shard_lookup_base_file(frame, this, &local->loc,
++                           shard_common_inode_write_post_lookup_handler);
++    return 0;
+ out:
+-  shard_common_failure_unwind(fop, frame, -1, ENOMEM);
+-  return 0;
++    shard_common_failure_unwind(fop, frame, -1, ENOMEM);
++    return 0;
+ }
+ 
+-int shard_writev(call_frame_t *frame, xlator_t *this, fd_t *fd,
+-                 struct iovec *vector, int32_t count, off_t offset,
+-                 uint32_t flags, struct iobref *iobref, dict_t *xdata) {
+-  shard_common_inode_write_begin(frame, this, GF_FOP_WRITE, fd, vector, count,
+-                                 offset, flags, 0, iobref, xdata);
+-  return 0;
++int
++shard_writev(call_frame_t *frame, xlator_t *this, fd_t *fd,
++             struct iovec *vector, int32_t count, off_t offset, uint32_t flags,
++             struct iobref *iobref, dict_t *xdata)
++{
++    shard_common_inode_write_begin(frame, this, GF_FOP_WRITE, fd, vector, count,
++                                   offset, flags, 0, iobref, xdata);
++    return 0;
+ }
+ 
+-int shard_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd,
+-                    int32_t keep_size, off_t offset, size_t len,
+-                    dict_t *xdata) {
+-  if ((keep_size != 0) && (keep_size != FALLOC_FL_ZERO_RANGE) &&
+-      (keep_size != (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)))
+-    goto out;
++int
++shard_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd,
++                int32_t keep_size, off_t offset, size_t len, dict_t *xdata)
++{
++    if ((keep_size != 0) && (keep_size != FALLOC_FL_ZERO_RANGE) &&
++        (keep_size != (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)))
++        goto out;
+ 
+-  shard_common_inode_write_begin(frame, this, GF_FOP_FALLOCATE, fd, NULL, 0,
+-                                 offset, keep_size, len, NULL, xdata);
+-  return 0;
++    shard_common_inode_write_begin(frame, this, GF_FOP_FALLOCATE, fd, NULL, 0,
++                                   offset, keep_size, len, NULL, xdata);
++    return 0;
+ out:
+-  shard_common_failure_unwind(GF_FOP_FALLOCATE, frame, -1, ENOTSUP);
+-  return 0;
++    shard_common_failure_unwind(GF_FOP_FALLOCATE, frame, -1, ENOTSUP);
++    return 0;
+ }
+ 
+-int shard_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+-                   off_t len, dict_t *xdata) {
+-  shard_common_inode_write_begin(frame, this, GF_FOP_ZEROFILL, fd, NULL, 0,
+-                                 offset, 0, len, NULL, xdata);
+-  return 0;
++int
++shard_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
++               off_t len, dict_t *xdata)
++{
++    shard_common_inode_write_begin(frame, this, GF_FOP_ZEROFILL, fd, NULL, 0,
++                                   offset, 0, len, NULL, xdata);
++    return 0;
+ }
+ 
+-int shard_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+-                  size_t len, dict_t *xdata) {
+-  shard_common_inode_write_begin(frame, this, GF_FOP_DISCARD, fd, NULL, 0,
+-                                 offset, 0, len, NULL, xdata);
+-  return 0;
++int
++shard_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
++              size_t len, dict_t *xdata)
++{
++    shard_common_inode_write_begin(frame, this, GF_FOP_DISCARD, fd, NULL, 0,
++                                   offset, 0, len, NULL, xdata);
++    return 0;
+ }
+ 
+-int32_t shard_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+-                   gf_seek_what_t what, dict_t *xdata) {
+-  /* TBD */
+-  gf_msg(this->name, GF_LOG_INFO, ENOTSUP, SHARD_MSG_FOP_NOT_SUPPORTED,
+-         "seek called on %s.", uuid_utoa(fd->inode->gfid));
+-  shard_common_failure_unwind(GF_FOP_SEEK, frame, -1, ENOTSUP);
+-  return 0;
++int32_t
++shard_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
++           gf_seek_what_t what, dict_t *xdata)
++{
++    /* TBD */
++    gf_msg(this->name, GF_LOG_INFO, ENOTSUP, SHARD_MSG_FOP_NOT_SUPPORTED,
++           "seek called on %s.", uuid_utoa(fd->inode->gfid));
++    shard_common_failure_unwind(GF_FOP_SEEK, frame, -1, ENOTSUP);
++    return 0;
+ }
+ 
+-int32_t mem_acct_init(xlator_t *this) {
+-  int ret = -1;
++int32_t
++mem_acct_init(xlator_t *this)
++{
++    int ret = -1;
+ 
+-  if (!this)
+-    return ret;
++    if (!this)
++        return ret;
+ 
+-  ret = xlator_mem_acct_init(this, gf_shard_mt_end + 1);
++    ret = xlator_mem_acct_init(this, gf_shard_mt_end + 1);
+ 
+-  if (ret != 0) {
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_MEM_ACCT_INIT_FAILED,
+-           "Memory accounting init"
+-           "failed");
+-    return ret;
+-  }
++    if (ret != 0) {
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_MEM_ACCT_INIT_FAILED,
++               "Memory accounting init"
++               "failed");
++        return ret;
++    }
+ 
+-  return ret;
++    return ret;
+ }
+ 
+-int init(xlator_t *this) {
+-  int ret = -1;
+-  shard_priv_t *priv = NULL;
++int
++init(xlator_t *this)
++{
++    int ret = -1;
++    shard_priv_t *priv = NULL;
++
++    if (!this) {
++        gf_msg("shard", GF_LOG_ERROR, 0, SHARD_MSG_NULL_THIS,
++               "this is NULL. init() failed");
++        return -1;
++    }
+ 
+-  if (!this) {
+-    gf_msg("shard", GF_LOG_ERROR, 0, SHARD_MSG_NULL_THIS,
+-           "this is NULL. init() failed");
+-    return -1;
+-  }
+-
+-  if (!this->parents) {
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INVALID_VOLFILE,
+-           "Dangling volume. Check volfile");
+-    goto out;
+-  }
+-
+-  if (!this->children || this->children->next) {
+-    gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INVALID_VOLFILE,
+-           "shard not configured with exactly one sub-volume. "
+-           "Check volfile");
+-    goto out;
+-  }
+-
+-  priv = GF_CALLOC(1, sizeof(shard_priv_t), gf_shard_mt_priv_t);
+-  if (!priv)
+-    goto out;
+-
+-  GF_OPTION_INIT("shard-block-size", priv->block_size, size_uint64, out);
+-
+-  GF_OPTION_INIT("shard-deletion-rate", priv->deletion_rate, uint32, out);
+-
+-  GF_OPTION_INIT("shard-lru-limit", priv->lru_limit, uint64, out);
+-
+-  this->local_pool = mem_pool_new(shard_local_t, 128);
+-  if (!this->local_pool) {
+-    ret = -1;
+-    goto out;
+-  }
+-  gf_uuid_parse(SHARD_ROOT_GFID, priv->dot_shard_gfid);
+-  gf_uuid_parse(DOT_SHARD_REMOVE_ME_GFID, priv->dot_shard_rm_gfid);
+-
+-  this->private = priv;
+-  LOCK_INIT(&priv->lock);
+-  INIT_LIST_HEAD(&priv->ilist_head);
+-  ret = 0;
++    if (!this->parents) {
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INVALID_VOLFILE,
++               "Dangling volume. Check volfile");
++        goto out;
++    }
++
++    if (!this->children || this->children->next) {
++        gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INVALID_VOLFILE,
++               "shard not configured with exactly one sub-volume. "
++               "Check volfile");
++        goto out;
++    }
++
++    priv = GF_CALLOC(1, sizeof(shard_priv_t), gf_shard_mt_priv_t);
++    if (!priv)
++        goto out;
++
++    GF_OPTION_INIT("shard-block-size", priv->block_size, size_uint64, out);
++
++    GF_OPTION_INIT("shard-deletion-rate", priv->deletion_rate, uint32, out);
++
++    GF_OPTION_INIT("shard-lru-limit", priv->lru_limit, uint64, out);
++
++    this->local_pool = mem_pool_new(shard_local_t, 128);
++    if (!this->local_pool) {
++        ret = -1;
++        goto out;
++    }
++    gf_uuid_parse(SHARD_ROOT_GFID, priv->dot_shard_gfid);
++    gf_uuid_parse(DOT_SHARD_REMOVE_ME_GFID, priv->dot_shard_rm_gfid);
++
++    this->private = priv;
++    LOCK_INIT(&priv->lock);
++    INIT_LIST_HEAD(&priv->ilist_head);
++    ret = 0;
+ out:
+-  if (ret) {
+-    GF_FREE(priv);
+-    mem_pool_destroy(this->local_pool);
+-  }
++    if (ret) {
++        GF_FREE(priv);
++        mem_pool_destroy(this->local_pool);
++    }
+ 
+-  return ret;
++    return ret;
+ }
+ 
+-void fini(xlator_t *this) {
+-  shard_priv_t *priv = NULL;
++void
++fini(xlator_t *this)
++{
++    shard_priv_t *priv = NULL;
+ 
+-  GF_VALIDATE_OR_GOTO("shard", this, out);
++    GF_VALIDATE_OR_GOTO("shard", this, out);
+ 
+-  mem_pool_destroy(this->local_pool);
+-  this->local_pool = NULL;
++    mem_pool_destroy(this->local_pool);
++    this->local_pool = NULL;
+ 
+-  priv = this->private;
+-  if (!priv)
+-    goto out;
++    priv = this->private;
++    if (!priv)
++        goto out;
+ 
+-  this->private = NULL;
+-  LOCK_DESTROY(&priv->lock);
+-  GF_FREE(priv);
++    this->private = NULL;
++    LOCK_DESTROY(&priv->lock);
++    GF_FREE(priv);
+ 
+ out:
+-  return;
++    return;
+ }
+ 
+-int reconfigure(xlator_t *this, dict_t *options) {
+-  int ret = -1;
+-  shard_priv_t *priv = NULL;
++int
++reconfigure(xlator_t *this, dict_t *options)
++{
++    int ret = -1;
++    shard_priv_t *priv = NULL;
+ 
+-  priv = this->private;
++    priv = this->private;
+ 
+-  GF_OPTION_RECONF("shard-block-size", priv->block_size, options, size, out);
++    GF_OPTION_RECONF("shard-block-size", priv->block_size, options, size, out);
+ 
+-  GF_OPTION_RECONF("shard-deletion-rate", priv->deletion_rate, options, uint32,
+-                   out);
+-  ret = 0;
++    GF_OPTION_RECONF("shard-deletion-rate", priv->deletion_rate, options,
++                     uint32, out);
++    ret = 0;
+ 
+ out:
+-  return ret;
++    return ret;
+ }
+ 
+-int shard_forget(xlator_t *this, inode_t *inode) {
+-  uint64_t ctx_uint = 0;
+-  shard_inode_ctx_t *ctx = NULL;
+-  shard_priv_t *priv = NULL;
++int
++shard_forget(xlator_t *this, inode_t *inode)
++{
++    uint64_t ctx_uint = 0;
++    shard_inode_ctx_t *ctx = NULL;
++    shard_priv_t *priv = NULL;
+ 
+-  priv = this->private;
+-  if (!priv)
+-    return 0;
++    priv = this->private;
++    if (!priv)
++        return 0;
+ 
+-  inode_ctx_del(inode, this, &ctx_uint);
+-  if (!ctx_uint)
+-    return 0;
++    inode_ctx_del(inode, this, &ctx_uint);
++    if (!ctx_uint)
++        return 0;
+ 
+-  ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
++    ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
+ 
+-  /* When LRU limit reaches inode will be forcefully removed from the
+-   * table, inode needs to be removed from LRU of shard as well.
+-   */
+-  if (!list_empty(&ctx->ilist)) {
+-    LOCK(&priv->lock);
+-    {
+-      list_del_init(&ctx->ilist);
+-      priv->inode_count--;
++    /* When LRU limit reaches inode will be forcefully removed from the
++     * table, inode needs to be removed from LRU of shard as well.
++     */
++    if (!list_empty(&ctx->ilist)) {
++        LOCK(&priv->lock);
++        {
++            list_del_init(&ctx->ilist);
++            priv->inode_count--;
++        }
++        UNLOCK(&priv->lock);
+     }
+-    UNLOCK(&priv->lock);
+-  }
+-  GF_FREE(ctx);
++    GF_FREE(ctx);
+ 
+-  return 0;
++    return 0;
+ }
+ 
+-int shard_release(xlator_t *this, fd_t *fd) {
+-  /* TBD */
+-  return 0;
++int
++shard_release(xlator_t *this, fd_t *fd)
++{
++    /* TBD */
++    return 0;
+ }
+ 
+-int shard_priv_dump(xlator_t *this) {
+-  shard_priv_t *priv = NULL;
+-  char key_prefix[GF_DUMP_MAX_BUF_LEN] = {
+-      0,
+-  };
+-  char *str = NULL;
++int
++shard_priv_dump(xlator_t *this)
++{
++    shard_priv_t *priv = NULL;
++    char key_prefix[GF_DUMP_MAX_BUF_LEN] = {
++        0,
++    };
++    char *str = NULL;
+ 
+-  priv = this->private;
++    priv = this->private;
+ 
+-  snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name);
+-  gf_proc_dump_add_section("%s", key_prefix);
+-  str = gf_uint64_2human_readable(priv->block_size);
+-  gf_proc_dump_write("shard-block-size", "%s", str);
+-  gf_proc_dump_write("inode-count", "%d", priv->inode_count);
+-  gf_proc_dump_write("ilist_head", "%p", &priv->ilist_head);
+-  gf_proc_dump_write("lru-max-limit", "%" PRIu64, priv->lru_limit);
++    snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name);
++    gf_proc_dump_add_section("%s", key_prefix);
++    str = gf_uint64_2human_readable(priv->block_size);
++    gf_proc_dump_write("shard-block-size", "%s", str);
++    gf_proc_dump_write("inode-count", "%d", priv->inode_count);
++    gf_proc_dump_write("ilist_head", "%p", &priv->ilist_head);
++    gf_proc_dump_write("lru-max-limit", "%" PRIu64, priv->lru_limit);
+ 
+-  GF_FREE(str);
++    GF_FREE(str);
+ 
+-  return 0;
++    return 0;
+ }
+ 
+-int shard_releasedir(xlator_t *this, fd_t *fd) { return 0; }
++int
++shard_releasedir(xlator_t *this, fd_t *fd)
++{
++    return 0;
++}
+ 
+ struct xlator_fops fops = {
+     .lookup = shard_lookup,
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0563-features-shard-Use-fd-lookup-post-file-open.patch b/SOURCES/0563-features-shard-Use-fd-lookup-post-file-open.patch
new file mode 100644
index 0000000..c680f92
--- /dev/null
+++ b/SOURCES/0563-features-shard-Use-fd-lookup-post-file-open.patch
@@ -0,0 +1,318 @@
+From a19fa252942938a308ffa655fca3814d0660c6e2 Mon Sep 17 00:00:00 2001
+From: Vinayakswami Hariharmath <vharihar@redhat.com>
+Date: Wed, 3 Jun 2020 18:58:56 +0530
+Subject: [PATCH 563/584] features/shard: Use fd lookup post file open
+
+Issue:
+When a process has the open fd and the same file is
+unlinked in middle of the operations, then file based
+lookup fails with ENOENT or stale file
+
+Solution:
+When the file already open and fd is available, use fstat
+to get the file attributes
+
+Backport of:
+> Upstream-patch: https://review.gluster.org/#/c/glusterfs/+/24528/
+> Change-Id: I0e83aee9f11b616dcfe13769ebfcda6742e4e0f4
+> Fixes: #1281
+> Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
+
+BUG: 1925425
+Change-Id: I0e83aee9f11b616dcfe13769ebfcda6742e4e0f4
+Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244957
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/bugs/shard/issue-1281.t      |  34 +++++++++++
+ xlators/features/shard/src/shard.c | 119 +++++++++++++++++++++++--------------
+ 2 files changed, 110 insertions(+), 43 deletions(-)
+ create mode 100644 tests/bugs/shard/issue-1281.t
+
+diff --git a/tests/bugs/shard/issue-1281.t b/tests/bugs/shard/issue-1281.t
+new file mode 100644
+index 0000000..9704caa
+--- /dev/null
++++ b/tests/bugs/shard/issue-1281.t
+@@ -0,0 +1,34 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++
++cleanup;
++
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1}
++TEST $CLI volume set $V0 features.shard on
++TEST $CLI volume set $V0 performance.quick-read off
++TEST $CLI volume set $V0 performance.io-cache off
++TEST $CLI volume set $V0 performance.read-ahead off
++TEST $CLI volume set $V0 performance.write-behind off
++TEST $CLI volume start $V0
++
++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
++
++#Open a file and store descriptor in fd = 5
++exec 5>$M0/foo
++
++#Unlink the same file which is opened in prev step
++TEST unlink $M0/foo
++
++#Write something on the file using the open fd = 5
++echo "issue-1281" >&5
++
++#Write on the descriptor should be succesful
++EXPECT 0 echo $?
++
++#Close the fd = 5
++exec 5>&-
++
++cleanup
+diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
+index c5cc224..2ba4528 100644
+--- a/xlators/features/shard/src/shard.c
++++ b/xlators/features/shard/src/shard.c
+@@ -1653,26 +1653,24 @@ err:
+ }
+ 
+ int
+-shard_lookup_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+-                           int32_t op_ret, int32_t op_errno, inode_t *inode,
+-                           struct iatt *buf, dict_t *xdata,
+-                           struct iatt *postparent)
++shard_set_iattr_invoke_post_handler(call_frame_t *frame, xlator_t *this,
++                                    inode_t *inode, int32_t op_ret,
++                                    int32_t op_errno, struct iatt *buf,
++                                    dict_t *xdata)
+ {
+     int ret = -1;
+     int32_t mask = SHARD_INODE_WRITE_MASK;
+-    shard_local_t *local = NULL;
++    shard_local_t *local = frame->local;
+     shard_inode_ctx_t ctx = {
+         0,
+     };
+ 
+-    local = frame->local;
+-
+     if (op_ret < 0) {
+         gf_msg(this->name, GF_LOG_ERROR, op_errno,
+                SHARD_MSG_BASE_FILE_LOOKUP_FAILED,
+                "Lookup on base file"
+                " failed : %s",
+-               loc_gfid_utoa(&(local->loc)));
++               uuid_utoa(inode->gfid));
+         local->op_ret = op_ret;
+         local->op_errno = op_errno;
+         goto unwind;
+@@ -1706,18 +1704,57 @@ unwind:
+ }
+ 
+ int
+-shard_lookup_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc,
+-                       shard_post_fop_handler_t handler)
++shard_fstat_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                          int32_t op_ret, int32_t op_errno, struct iatt *buf,
++                          dict_t *xdata)
++{
++    shard_local_t *local = frame->local;
++
++    shard_set_iattr_invoke_post_handler(frame, this, local->fd->inode, op_ret,
++                                        op_errno, buf, xdata);
++    return 0;
++}
++
++int
++shard_lookup_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                           int32_t op_ret, int32_t op_errno, inode_t *inode,
++                           struct iatt *buf, dict_t *xdata,
++                           struct iatt *postparent)
++{
++    /* In case of op_ret < 0, inode passed to this function will be NULL
++       ex: in case of op_errno = ENOENT. So refer prefilled inode data
++       which is part of local.
++       Note: Reassigning/overriding the inode passed to this cbk with inode
++       which is part of *struct shard_local_t* won't cause any issue as
++       both inodes have same reference/address as of the inode passed */
++    inode = ((shard_local_t *)frame->local)->loc.inode;
++
++    shard_set_iattr_invoke_post_handler(frame, this, inode, op_ret, op_errno,
++                                        buf, xdata);
++    return 0;
++}
++
++/* This function decides whether to make file based lookup or
++ * fd based lookup (fstat) depending on the 3rd and 4th arg.
++ * If fd != NULL and loc == NULL then call is for fstat
++ * If fd == NULL and loc != NULL then call is for file based
++ * lookup. Please pass args based on the requirement.
++ */
++int
++shard_refresh_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc,
++                        fd_t *fd, shard_post_fop_handler_t handler)
+ {
+     int ret = -1;
++    inode_t *inode = NULL;
+     shard_local_t *local = NULL;
+     dict_t *xattr_req = NULL;
+     gf_boolean_t need_refresh = _gf_false;
+ 
+     local = frame->local;
+     local->handler = handler;
++    inode = fd ? fd->inode : loc->inode;
+ 
+-    ret = shard_inode_ctx_fill_iatt_from_cache(loc->inode, this, &local->prebuf,
++    ret = shard_inode_ctx_fill_iatt_from_cache(inode, this, &local->prebuf,
+                                                &need_refresh);
+     /* By this time, inode ctx should have been created either in create,
+      * mknod, readdirp or lookup. If not it is a bug!
+@@ -1726,7 +1763,7 @@ shard_lookup_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc,
+         gf_msg_debug(this->name, 0,
+                      "Skipping lookup on base file: %s"
+                      "Serving prebuf off the inode ctx cache",
+-                     uuid_utoa(loc->gfid));
++                     uuid_utoa(inode->gfid));
+         goto out;
+     }
+ 
+@@ -1737,10 +1774,14 @@ shard_lookup_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc,
+         goto out;
+     }
+ 
+-    SHARD_MD_READ_FOP_INIT_REQ_DICT(this, xattr_req, loc->gfid, local, out);
++    SHARD_MD_READ_FOP_INIT_REQ_DICT(this, xattr_req, inode->gfid, local, out);
+ 
+-    STACK_WIND(frame, shard_lookup_base_file_cbk, FIRST_CHILD(this),
+-               FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
++    if (fd)
++        STACK_WIND(frame, shard_fstat_base_file_cbk, FIRST_CHILD(this),
++                   FIRST_CHILD(this)->fops->fstat, fd, xattr_req);
++    else
++        STACK_WIND(frame, shard_lookup_base_file_cbk, FIRST_CHILD(this),
++                   FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
+ 
+     dict_unref(xattr_req);
+     return 0;
+@@ -2718,8 +2759,8 @@ shard_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+     local->resolver_base_inode = loc->inode;
+     GF_ATOMIC_INIT(local->delta_blocks, 0);
+ 
+-    shard_lookup_base_file(frame, this, &local->loc,
+-                           shard_post_lookup_truncate_handler);
++    shard_refresh_base_file(frame, this, &local->loc, NULL,
++                            shard_post_lookup_truncate_handler);
+     return 0;
+ 
+ err:
+@@ -2774,8 +2815,8 @@ shard_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+     local->resolver_base_inode = fd->inode;
+     GF_ATOMIC_INIT(local->delta_blocks, 0);
+ 
+-    shard_lookup_base_file(frame, this, &local->loc,
+-                           shard_post_lookup_truncate_handler);
++    shard_refresh_base_file(frame, this, NULL, fd,
++                            shard_post_lookup_truncate_handler);
+     return 0;
+ err:
+     shard_common_failure_unwind(GF_FOP_FTRUNCATE, frame, -1, ENOMEM);
+@@ -2919,8 +2960,8 @@ shard_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+     if (!local->xattr_req)
+         goto err;
+ 
+-    shard_lookup_base_file(frame, this, &local->loc,
+-                           shard_post_lookup_link_handler);
++    shard_refresh_base_file(frame, this, &local->loc, NULL,
++                            shard_post_lookup_link_handler);
+     return 0;
+ err:
+     shard_common_failure_unwind(GF_FOP_LINK, frame, -1, ENOMEM);
+@@ -4249,8 +4290,8 @@ shard_post_inodelk_fop_handler(call_frame_t *frame, xlator_t *this)
+     switch (local->fop) {
+         case GF_FOP_UNLINK:
+         case GF_FOP_RENAME:
+-            shard_lookup_base_file(frame, this, &local->int_inodelk.loc,
+-                                   shard_post_lookup_base_shard_rm_handler);
++            shard_refresh_base_file(frame, this, &local->int_inodelk.loc, NULL,
++                                    shard_post_lookup_base_shard_rm_handler);
+             break;
+         default:
+             gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
+@@ -4505,8 +4546,8 @@ shard_rename_src_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+     if (local->block_size) {
+         local->tmp_loc.inode = inode_new(this->itable);
+         gf_uuid_copy(local->tmp_loc.gfid, (local->loc.inode)->gfid);
+-        shard_lookup_base_file(frame, this, &local->tmp_loc,
+-                               shard_post_rename_lookup_handler);
++        shard_refresh_base_file(frame, this, &local->tmp_loc, NULL,
++                                shard_post_rename_lookup_handler);
+     } else {
+         shard_rename_cbk(frame, this);
+     }
+@@ -5242,8 +5283,8 @@ shard_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+     local->loc.inode = inode_ref(fd->inode);
+     gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
+ 
+-    shard_lookup_base_file(frame, this, &local->loc,
+-                           shard_post_lookup_readv_handler);
++    shard_refresh_base_file(frame, this, NULL, fd,
++                            shard_post_lookup_readv_handler);
+     return 0;
+ err:
+     shard_common_failure_unwind(GF_FOP_READ, frame, -1, ENOMEM);
+@@ -6046,8 +6087,8 @@ shard_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+     local->loc.inode = inode_ref(fd->inode);
+     gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
+ 
+-    shard_lookup_base_file(frame, this, &local->loc,
+-                           shard_post_lookup_fsync_handler);
++    shard_refresh_base_file(frame, this, NULL, fd,
++                            shard_post_lookup_fsync_handler);
+     return 0;
+ err:
+     shard_common_failure_unwind(GF_FOP_FSYNC, frame, -1, ENOMEM);
+@@ -6420,12 +6461,8 @@ shard_common_remove_xattr(call_frame_t *frame, xlator_t *this,
+     if (xdata)
+         local->xattr_req = dict_ref(xdata);
+ 
+-    /* To-Do: Switch from LOOKUP which is path-based, to FSTAT if the fop is
+-     * on an fd. This comes under a generic class of bugs in shard tracked by
+-     * bz #1782428.
+-     */
+-    shard_lookup_base_file(frame, this, &local->loc,
+-                           shard_post_lookup_remove_xattr_handler);
++    shard_refresh_base_file(frame, this, loc, fd,
++                            shard_post_lookup_remove_xattr_handler);
+     return 0;
+ err:
+     shard_common_failure_unwind(fop, frame, -1, op_errno);
+@@ -6662,12 +6699,8 @@ shard_common_set_xattr(call_frame_t *frame, xlator_t *this, glusterfs_fop_t fop,
+     if (xdata)
+         local->xattr_rsp = dict_ref(xdata);
+ 
+-    /* To-Do: Switch from LOOKUP which is path-based, to FSTAT if the fop is
+-     * on an fd. This comes under a generic class of bugs in shard tracked by
+-     * bz #1782428.
+-     */
+-    shard_lookup_base_file(frame, this, &local->loc,
+-                           shard_post_lookup_set_xattr_handler);
++    shard_refresh_base_file(frame, this, loc, fd,
++                            shard_post_lookup_set_xattr_handler);
+     return 0;
+ err:
+     shard_common_failure_unwind(fop, frame, -1, op_errno);
+@@ -6951,8 +6984,8 @@ shard_common_inode_write_begin(call_frame_t *frame, xlator_t *this,
+     local->loc.inode = inode_ref(fd->inode);
+     gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
+ 
+-    shard_lookup_base_file(frame, this, &local->loc,
+-                           shard_common_inode_write_post_lookup_handler);
++    shard_refresh_base_file(frame, this, NULL, fd,
++                            shard_common_inode_write_post_lookup_handler);
+     return 0;
+ out:
+     shard_common_failure_unwind(fop, frame, -1, ENOMEM);
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0564-store.c-glusterd-store.c-remove-sys_stat-calls.patch b/SOURCES/0564-store.c-glusterd-store.c-remove-sys_stat-calls.patch
new file mode 100644
index 0000000..35cda2e
--- /dev/null
+++ b/SOURCES/0564-store.c-glusterd-store.c-remove-sys_stat-calls.patch
@@ -0,0 +1,215 @@
+From a7a56c079df2eb0253efdd53e1538656c0ce9095 Mon Sep 17 00:00:00 2001
+From: Yaniv Kaul <ykaul@redhat.com>
+Date: Mon, 25 Nov 2019 15:37:46 +0200
+Subject: [PATCH 564/584] store.c/glusterd-store.c: remove sys_stat calls
+
+Instead of querying for the file size and allocating a char array
+according to its size, let's just use a fixed size.
+Those calls are not really needed, and are either expensive or
+cached anyway. Since we do dynamic allocation/free, let's just use
+a fixed array instead.
+
+I'll see if there are other sys_stat() calls that are not really
+useful and try to eliminate them in separate patches.
+
+Backport of:
+> Upstream-patch: https://review.gluster.org/#/c/glusterfs/+/23752/
+> Change-Id: I76b40e78a52ab38f613fc0cdef4be60e6253bf20
+> updates: bz#1193929
+> Signed-off-by: Yaniv Kaul <ykaul@redhat.com>
+
+BUG: 1925425
+Change-Id: I76b40e78a52ab38f613fc0cdef4be60e6253bf20
+Signed-off-by: Yaniv Kaul <ykaul@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244958
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/glusterfs/store.h         |  4 +-
+ libglusterfs/src/store.c                   | 71 ++++--------------------------
+ xlators/mgmt/glusterd/src/glusterd-store.c |  5 +--
+ 3 files changed, 12 insertions(+), 68 deletions(-)
+
+diff --git a/libglusterfs/src/glusterfs/store.h b/libglusterfs/src/glusterfs/store.h
+index 3b3a24c..f63bd05 100644
+--- a/libglusterfs/src/glusterfs/store.h
++++ b/libglusterfs/src/glusterfs/store.h
+@@ -59,8 +59,8 @@ int32_t
+ gf_store_unlink_tmppath(gf_store_handle_t *shandle);
+ 
+ int
+-gf_store_read_and_tokenize(FILE *file, char *str, int size, char **iter_key,
+-                           char **iter_val, gf_store_op_errno_t *store_errno);
++gf_store_read_and_tokenize(FILE *file, char **iter_key, char **iter_val,
++                           gf_store_op_errno_t *store_errno);
+ 
+ int32_t
+ gf_store_retrieve_value(gf_store_handle_t *handle, char *key, char **value);
+diff --git a/libglusterfs/src/store.c b/libglusterfs/src/store.c
+index cdf0aea..fa3649b 100644
+--- a/libglusterfs/src/store.c
++++ b/libglusterfs/src/store.c
+@@ -184,8 +184,8 @@ out:
+ }
+ 
+ int
+-gf_store_read_and_tokenize(FILE *file, char *str, int size, char **iter_key,
+-                           char **iter_val, gf_store_op_errno_t *store_errno)
++gf_store_read_and_tokenize(FILE *file, char **iter_key, char **iter_val,
++                           gf_store_op_errno_t *store_errno)
+ {
+     int32_t ret = -1;
+     char *savetok = NULL;
+@@ -193,15 +193,15 @@ gf_store_read_and_tokenize(FILE *file, char *str, int size, char **iter_key,
+     char *value = NULL;
+     char *temp = NULL;
+     size_t str_len = 0;
++    char str[8192];
+ 
+     GF_ASSERT(file);
+-    GF_ASSERT(str);
+     GF_ASSERT(iter_key);
+     GF_ASSERT(iter_val);
+     GF_ASSERT(store_errno);
+ 
+ retry:
+-    temp = fgets(str, size, file);
++    temp = fgets(str, 8192, file);
+     if (temp == NULL || feof(file)) {
+         ret = -1;
+         *store_errno = GD_STORE_EOF;
+@@ -241,13 +241,8 @@ int32_t
+ gf_store_retrieve_value(gf_store_handle_t *handle, char *key, char **value)
+ {
+     int32_t ret = -1;
+-    char *scan_str = NULL;
+     char *iter_key = NULL;
+     char *iter_val = NULL;
+-    char *free_str = NULL;
+-    struct stat st = {
+-        0,
+-    };
+     gf_store_op_errno_t store_errno = GD_STORE_SUCCESS;
+ 
+     GF_ASSERT(handle);
+@@ -279,32 +274,9 @@ gf_store_retrieve_value(gf_store_handle_t *handle, char *key, char **value)
+     } else {
+         fseek(handle->read, 0, SEEK_SET);
+     }
+-    ret = sys_fstat(handle->fd, &st);
+-    if (ret < 0) {
+-        gf_msg("", GF_LOG_WARNING, errno, LG_MSG_FILE_OP_FAILED,
+-               "stat on file %s failed", handle->path);
+-        ret = -1;
+-        store_errno = GD_STORE_STAT_FAILED;
+-        goto out;
+-    }
+-
+-    /* "st.st_size + 1" is used as we are fetching each
+-     * line of a file using fgets, fgets will append "\0"
+-     * to the end of the string
+-     */
+-    scan_str = GF_CALLOC(1, st.st_size + 1, gf_common_mt_char);
+-
+-    if (scan_str == NULL) {
+-        ret = -1;
+-        store_errno = GD_STORE_ENOMEM;
+-        goto out;
+-    }
+-
+-    free_str = scan_str;
+-
+     do {
+-        ret = gf_store_read_and_tokenize(handle->read, scan_str, st.st_size + 1,
+-                                         &iter_key, &iter_val, &store_errno);
++        ret = gf_store_read_and_tokenize(handle->read, &iter_key, &iter_val,
++                                         &store_errno);
+         if (ret < 0) {
+             gf_msg_trace("", 0,
+                          "error while reading key '%s': "
+@@ -334,8 +306,6 @@ out:
+         sys_close(handle->fd);
+     }
+ 
+-    GF_FREE(free_str);
+-
+     return ret;
+ }
+ 
+@@ -561,40 +531,16 @@ gf_store_iter_get_next(gf_store_iter_t *iter, char **key, char **value,
+                        gf_store_op_errno_t *op_errno)
+ {
+     int32_t ret = -1;
+-    char *scan_str = NULL;
+     char *iter_key = NULL;
+     char *iter_val = NULL;
+-    struct stat st = {
+-        0,
+-    };
+     gf_store_op_errno_t store_errno = GD_STORE_SUCCESS;
+ 
+     GF_ASSERT(iter);
+     GF_ASSERT(key);
+     GF_ASSERT(value);
+ 
+-    ret = sys_stat(iter->filepath, &st);
+-    if (ret < 0) {
+-        gf_msg("", GF_LOG_WARNING, errno, LG_MSG_FILE_OP_FAILED,
+-               "stat on file failed");
+-        ret = -1;
+-        store_errno = GD_STORE_STAT_FAILED;
+-        goto out;
+-    }
+-
+-    /* "st.st_size + 1" is used as we are fetching each
+-     * line of a file using fgets, fgets will append "\0"
+-     * to the end of the string
+-     */
+-    scan_str = GF_CALLOC(1, st.st_size + 1, gf_common_mt_char);
+-    if (!scan_str) {
+-        ret = -1;
+-        store_errno = GD_STORE_ENOMEM;
+-        goto out;
+-    }
+-
+-    ret = gf_store_read_and_tokenize(iter->file, scan_str, st.st_size + 1,
+-                                     &iter_key, &iter_val, &store_errno);
++    ret = gf_store_read_and_tokenize(iter->file, &iter_key, &iter_val,
++                                     &store_errno);
+     if (ret < 0) {
+         goto out;
+     }
+@@ -619,7 +565,6 @@ gf_store_iter_get_next(gf_store_iter_t *iter, char **key, char **value,
+     ret = 0;
+ 
+ out:
+-    GF_FREE(scan_str);
+     if (ret) {
+         GF_FREE(*key);
+         GF_FREE(*value);
+diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c
+index 4fa8116..da63c03 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-store.c
++++ b/xlators/mgmt/glusterd/src/glusterd-store.c
+@@ -4092,7 +4092,6 @@ out:
+ int32_t
+ glusterd_store_retrieve_missed_snaps_list(xlator_t *this)
+ {
+-    char buf[PATH_MAX] = "";
+     char path[PATH_MAX] = "";
+     char *snap_vol_id = NULL;
+     char *missed_node_info = NULL;
+@@ -4129,8 +4128,8 @@ glusterd_store_retrieve_missed_snaps_list(xlator_t *this)
+     }
+ 
+     do {
+-        ret = gf_store_read_and_tokenize(
+-            fp, buf, sizeof(buf), &missed_node_info, &value, &store_errno);
++        ret = gf_store_read_and_tokenize(fp, &missed_node_info, &value,
++                                         &store_errno);
+         if (ret) {
+             if (store_errno == GD_STORE_EOF) {
+                 gf_msg_debug(this->name, 0, "EOF for missed_snap_list");
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0565-libglusterfs-coverity-pointer-to-local-outside-the-s.patch b/SOURCES/0565-libglusterfs-coverity-pointer-to-local-outside-the-s.patch
new file mode 100644
index 0000000..5e91703
--- /dev/null
+++ b/SOURCES/0565-libglusterfs-coverity-pointer-to-local-outside-the-s.patch
@@ -0,0 +1,124 @@
+From d491843640658e91a77f15647cefd1c00422c731 Mon Sep 17 00:00:00 2001
+From: Vinayak hariharmath <65405035+VHariharmath-rh@users.noreply.github.com>
+Date: Wed, 21 Oct 2020 16:14:29 +0530
+Subject: [PATCH 565/584] libglusterfs/coverity: pointer to local outside the
+ scope
+
+issue: gf_store_read_and_tokenize() returns the address
+of the locally referred string.
+
+fix: pass the buf to gf_store_read_and_tokenize() and
+use it for tokenize.
+
+CID: 1430143
+
+Backport of:
+> Upstream-patch: https://github.com/gluster/glusterfs/pull/1675
+> Updates: #1060
+> Change-Id: Ifc346540c263f58f4014ba2ba8c1d491c20ac609
+> Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
+
+BUG: 1925425
+Change-Id: Ifc346540c263f58f4014ba2ba8c1d491c20ac609
+Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244959
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/glusterfs/store.h         |  3 ++-
+ libglusterfs/src/store.c                   | 13 ++++++++-----
+ xlators/mgmt/glusterd/src/glusterd-store.c |  3 ++-
+ 3 files changed, 12 insertions(+), 7 deletions(-)
+
+diff --git a/libglusterfs/src/glusterfs/store.h b/libglusterfs/src/glusterfs/store.h
+index f63bd05..68a20ad 100644
+--- a/libglusterfs/src/glusterfs/store.h
++++ b/libglusterfs/src/glusterfs/store.h
+@@ -60,7 +60,8 @@ gf_store_unlink_tmppath(gf_store_handle_t *shandle);
+ 
+ int
+ gf_store_read_and_tokenize(FILE *file, char **iter_key, char **iter_val,
+-                           gf_store_op_errno_t *store_errno);
++                           gf_store_op_errno_t *store_errno, char *str,
++                           size_t buf_size);
+ 
+ int32_t
+ gf_store_retrieve_value(gf_store_handle_t *handle, char *key, char **value);
+diff --git a/libglusterfs/src/store.c b/libglusterfs/src/store.c
+index fa3649b..3af627a 100644
+--- a/libglusterfs/src/store.c
++++ b/libglusterfs/src/store.c
+@@ -185,7 +185,8 @@ out:
+ 
+ int
+ gf_store_read_and_tokenize(FILE *file, char **iter_key, char **iter_val,
+-                           gf_store_op_errno_t *store_errno)
++                           gf_store_op_errno_t *store_errno, char *str,
++                           size_t buf_size)
+ {
+     int32_t ret = -1;
+     char *savetok = NULL;
+@@ -193,7 +194,6 @@ gf_store_read_and_tokenize(FILE *file, char **iter_key, char **iter_val,
+     char *value = NULL;
+     char *temp = NULL;
+     size_t str_len = 0;
+-    char str[8192];
+ 
+     GF_ASSERT(file);
+     GF_ASSERT(iter_key);
+@@ -201,7 +201,7 @@ gf_store_read_and_tokenize(FILE *file, char **iter_key, char **iter_val,
+     GF_ASSERT(store_errno);
+ 
+ retry:
+-    temp = fgets(str, 8192, file);
++    temp = fgets(str, buf_size, file);
+     if (temp == NULL || feof(file)) {
+         ret = -1;
+         *store_errno = GD_STORE_EOF;
+@@ -275,8 +275,9 @@ gf_store_retrieve_value(gf_store_handle_t *handle, char *key, char **value)
+         fseek(handle->read, 0, SEEK_SET);
+     }
+     do {
++        char buf[8192];
+         ret = gf_store_read_and_tokenize(handle->read, &iter_key, &iter_val,
+-                                         &store_errno);
++                                         &store_errno, buf, 8192);
+         if (ret < 0) {
+             gf_msg_trace("", 0,
+                          "error while reading key '%s': "
+@@ -533,6 +534,8 @@ gf_store_iter_get_next(gf_store_iter_t *iter, char **key, char **value,
+     int32_t ret = -1;
+     char *iter_key = NULL;
+     char *iter_val = NULL;
++    char buf[8192];
++
+     gf_store_op_errno_t store_errno = GD_STORE_SUCCESS;
+ 
+     GF_ASSERT(iter);
+@@ -540,7 +543,7 @@ gf_store_iter_get_next(gf_store_iter_t *iter, char **key, char **value,
+     GF_ASSERT(value);
+ 
+     ret = gf_store_read_and_tokenize(iter->file, &iter_key, &iter_val,
+-                                     &store_errno);
++                                     &store_errno, buf, 8192);
+     if (ret < 0) {
+         goto out;
+     }
+diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c
+index da63c03..a8651d8 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-store.c
++++ b/xlators/mgmt/glusterd/src/glusterd-store.c
+@@ -4128,8 +4128,9 @@ glusterd_store_retrieve_missed_snaps_list(xlator_t *this)
+     }
+ 
+     do {
++        char buf[8192];
+         ret = gf_store_read_and_tokenize(fp, &missed_node_info, &value,
+-                                         &store_errno);
++                                         &store_errno, buf, 8192);
+         if (ret) {
+             if (store_errno == GD_STORE_EOF) {
+                 gf_msg_debug(this->name, 0, "EOF for missed_snap_list");
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0566-enahancement-debug-Option-to-generate-core-dump-with.patch b/SOURCES/0566-enahancement-debug-Option-to-generate-core-dump-with.patch
new file mode 100644
index 0000000..548271e
--- /dev/null
+++ b/SOURCES/0566-enahancement-debug-Option-to-generate-core-dump-with.patch
@@ -0,0 +1,236 @@
+From e66ab728426e147bf4fc594109137ebfb1f2dda6 Mon Sep 17 00:00:00 2001
+From: Vinayak hariharmath <65405035+VHariharmath-rh@users.noreply.github.com>
+Date: Mon, 23 Nov 2020 08:09:44 +0530
+Subject: [PATCH 566/584] enahancement/debug: Option to generate core dump
+ without killing the process
+
+Comments and idea proposed by: Xavi Hernandez(jahernan@redhat.com):
+
+On production systems sometimes we see a log message saying that an assertion
+has failed. But it's hard to track why it failed without additional information
+(on debug builds, a GF_ASSERT() generates a core dump and kills the process,
+so it can be used to debug the issue, but many times we are only able to
+reproduce assertion failures on production systems, where GF_ASSERT() only logs
+a message and continues).
+
+In other cases we may have a core dump caused by a bug, but the core dump doesn't
+necessarily happen when the bug has happened. Sometimes the crash happens so much
+later that the causes that triggered the bug are lost. In these cases we can add
+more assertions to the places that touch the potential candidates to cause the bug,
+but the only thing we'll get is a log message, which may not be enough.
+
+One solution would be to always generate a core dump in case of assertion failure,
+but this was already discussed and it was decided that it was too drastic. If a
+core dump was really needed, a new macro was created to do so: GF_ABORT(),
+but GF_ASSERT() would continue to not kill the process on production systems.
+
+I'm proposing to modify GF_ASSERT() on production builds so that it conditionally
+triggers a signal when a debugger is attached. When this happens, the debugger
+will generate a core dump and continue the process as if nothing had happened.
+If there's no debugger attached, GF_ASSERT() will behave as always.
+
+The idea I have is to use SIGCONT to do that. This signal is harmless, so we can
+unmask it (we currently mask all unneeded signals) and raise it inside a GF_ASSERT()
+when some global variable is set to true.
+
+To produce the core dump, run the script under extras/debug/gfcore.py on other
+terminal. gdb breaks and produces coredump when GF_ASSERT is hit.
+
+The script is copied from #1810 which is written by Xavi Hernandez(jahernan@redhat.com)
+
+Backport of:
+> Upstream-patch: https://github.com/gluster/glusterfs/pull/1814
+> Fixes: #1810
+> Change-Id: I6566ca2cae15501d8835c36f56be4c6950cb2a53
+> Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
+
+BUG: 1927640
+Change-Id: I6566ca2cae15501d8835c36f56be4c6950cb2a53
+Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244960
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ extras/debug/gfcore.py                    | 77 +++++++++++++++++++++++++++++++
+ libglusterfs/src/common-utils.c           | 11 +++++
+ libglusterfs/src/glusterfs/common-utils.h | 10 +++-
+ libglusterfs/src/libglusterfs.sym         | 16 +++++++
+ 4 files changed, 112 insertions(+), 2 deletions(-)
+ create mode 100755 extras/debug/gfcore.py
+
+diff --git a/extras/debug/gfcore.py b/extras/debug/gfcore.py
+new file mode 100755
+index 0000000..9f097f0
+--- /dev/null
++++ b/extras/debug/gfcore.py
+@@ -0,0 +1,77 @@
++#!/usr/bin/env python3
++
++def launch():
++    if len(sys.argv) < 3:
++        sys.stderr.write("Syntax: {} <pid> <count> [<dir>]\n".format(os.path.basename(sys.argv[0])))
++        sys.exit(1)
++
++    pid = int(sys.argv[1])
++    count = int(sys.argv[2])
++    base = os.getcwd()
++    if len(sys.argv) > 3:
++        base = sys.argv[3]
++    base = os.path.realpath(base)
++
++    subprocess.run([
++        "gdb", "-batch",
++        "-p", str(pid),
++        "-ex", "py arg_count = {}".format(count),
++        "-ex", "py arg_dir = '{}'".format(base),
++        "-x", __file__
++    ])
++
++class GFCore(object):
++    def __init__(self, count, base):
++        self.count = count
++        self.base = base
++        gdb.execute('set pagination off')
++        gdb.execute('set gf_signal_on_assert = 1')
++        gdb.events.stop.connect(self.gf_stop)
++
++        self.cont()
++
++    def cont(self, quit = False):
++        if not(quit) and (self.count > 0):
++            gdb.execute('continue')
++        else:
++            gdb.execute('set gf_signal_on_assert = 0')
++            gdb.execute('quit')
++
++    def gf_stop(self, event):
++        quit = False
++
++        if isinstance(event, gdb.SignalEvent):
++            if event.stop_signal == 'SIGCONT':
++                now = datetime.utcnow().isoformat()
++                pid = gdb.selected_inferior().pid
++                name = "{}/gfcore.{}.{}".format(self.base, pid, now)
++                print("Generating coredump '{}'".format(name))
++                gdb.execute('gcore {}'.format(name))
++                self.count -= 1
++
++            elif event.stop_signal == 'SIGINT':
++                print("SIGINT received. Exiting")
++                quit = True
++
++            else:
++                print("Ignoring signal {}".format(event.stop_signal))
++        else:
++            print("Unexpected event {}".format(type(event)))
++
++        self.cont(quit)
++
++# Module 'gdb' is not available when running outside gdb.
++try:
++    import gdb
++    from datetime import datetime
++
++    GFCore(arg_count, arg_dir)
++except ModuleNotFoundError:
++    import sys
++    import os
++    import subprocess
++
++    try:
++        launch()
++    except KeyboardInterrupt:
++        pass
+diff --git a/libglusterfs/src/common-utils.c b/libglusterfs/src/common-utils.c
+index 70d5d21..d351b93 100644
+--- a/libglusterfs/src/common-utils.c
++++ b/libglusterfs/src/common-utils.c
+@@ -77,9 +77,19 @@ char *vol_type_str[] = {
+     "Distributed-Disperse",
+ };
+ 
++gf_boolean_t gf_signal_on_assert = false;
++
+ typedef int32_t (*rw_op_t)(int32_t fd, char *buf, int32_t size);
+ typedef int32_t (*rwv_op_t)(int32_t fd, const struct iovec *buf, int32_t size);
+ 
++void gf_assert(void)
++{
++    if (gf_signal_on_assert) {
++        raise(SIGCONT);
++    }
++
++}
++
+ void
+ gf_xxh64_wrapper(const unsigned char *data, size_t const len,
+                  unsigned long long const seed, char *xxh64)
+@@ -4021,6 +4031,7 @@ gf_thread_vcreate(pthread_t *thread, const pthread_attr_t *attr,
+     sigdelset(&set, SIGSYS);
+     sigdelset(&set, SIGFPE);
+     sigdelset(&set, SIGABRT);
++    sigdelset(&set, SIGCONT);
+ 
+     pthread_sigmask(SIG_BLOCK, &set, &old);
+ 
+diff --git a/libglusterfs/src/glusterfs/common-utils.h b/libglusterfs/src/glusterfs/common-utils.h
+index f0a0a41..604afd0 100644
+--- a/libglusterfs/src/glusterfs/common-utils.h
++++ b/libglusterfs/src/glusterfs/common-utils.h
+@@ -25,6 +25,7 @@
+ #include <limits.h>
+ #include <fnmatch.h>
+ #include <uuid/uuid.h>
++#include <urcu/compiler.h>
+ 
+ #ifndef ffsll
+ #define ffsll(x) __builtin_ffsll(x)
+@@ -431,14 +432,19 @@ BIT_VALUE(unsigned char *array, unsigned int index)
+ #define GF_FILE_CONTENT_REQUESTED(_xattr_req, _content_limit)                  \
+     (dict_get_uint64(_xattr_req, "glusterfs.content", _content_limit) == 0)
+ 
++void gf_assert(void);
++
+ #ifdef DEBUG
+ #define GF_ASSERT(x) assert(x);
+ #else
+ #define GF_ASSERT(x)                                                           \
+     do {                                                                       \
+-        if (!(x)) {                                                            \
++        if (caa_unlikely(!(x))) {                                              \
++            gf_assert();                                                       \
+             gf_msg_callingfn("", GF_LOG_ERROR, 0, LG_MSG_ASSERTION_FAILED,     \
+-                             "Assertion failed: " #x);                         \
++                             "Assertion failed: To attach gdb and coredump,"   \
++                             " Run the script under "                          \
++                             "\"glusterfs/extras/debug/gfcore.py\"");          \
+         }                                                                      \
+     } while (0)
+ #endif
+diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym
+index 0a0862e..9072afa 100644
+--- a/libglusterfs/src/libglusterfs.sym
++++ b/libglusterfs/src/libglusterfs.sym
+@@ -1167,3 +1167,19 @@ gf_changelog_register_generic
+ gf_gfid_generate_from_xxh64
+ find_xlator_option_in_cmd_args_t
+ gf_d_type_from_ia_type
++glusterfs_graph_fini
++glusterfs_process_svc_attach_volfp
++glusterfs_mux_volfile_reconfigure
++glusterfs_process_svc_detach
++mgmt_is_multiplexed_daemon
++xlator_is_cleanup_starting
++gf_nanosleep
++gf_syncfs
++graph_total_client_xlator
++get_xattrs_to_heal
++gf_latency_statedump_and_reset
++gf_latency_new
++gf_latency_reset
++gf_latency_update
++gf_frame_latency_update
++gf_assert
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0567-inode-create-inode-outside-locked-region.patch b/SOURCES/0567-inode-create-inode-outside-locked-region.patch
new file mode 100644
index 0000000..23d51c4
--- /dev/null
+++ b/SOURCES/0567-inode-create-inode-outside-locked-region.patch
@@ -0,0 +1,86 @@
+From 5c81d813c8b1f494d31d54c1ab09a3f0153ebfd4 Mon Sep 17 00:00:00 2001
+From: Amar Tumballi <amarts@redhat.com>
+Date: Sat, 9 Feb 2019 13:13:47 +0530
+Subject: [PATCH 567/584] inode: create inode outside locked region
+
+Only linking of inode to the table, and inserting it in
+a list needs to be in locked region.
+
+Backport of:
+> Upstream-patch: https://review.gluster.org/#/c/glusterfs/+/22183/
+> Updates: bz#1670031
+> Change-Id: I6ea7e956b80cf2765c2233d761909c4bf9c7253c
+> Signed-off-by: Amar Tumballi <amarts@redhat.com>
+
+BUG: 1927640
+Change-Id: I6ea7e956b80cf2765c2233d761909c4bf9c7253c
+Signed-off-by: Amar Tumballi <amarts@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244961
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/inode.c | 23 ++++++++++++-----------
+ 1 file changed, 12 insertions(+), 11 deletions(-)
+
+diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c
+index 98f8ea6..46db04f 100644
+--- a/libglusterfs/src/inode.c
++++ b/libglusterfs/src/inode.c
+@@ -620,7 +620,7 @@ out:
+ }
+ 
+ static inode_t *
+-__inode_create(inode_table_t *table)
++inode_create(inode_table_t *table)
+ {
+     inode_t *newi = NULL;
+ 
+@@ -647,11 +647,7 @@ __inode_create(inode_table_t *table)
+         goto out;
+     }
+ 
+-    list_add(&newi->list, &table->lru);
+-    table->lru_size++;
+-
+ out:
+-
+     return newi;
+ }
+ 
+@@ -668,14 +664,16 @@ inode_new(inode_table_t *table)
+         return NULL;
+     }
+ 
+-    pthread_mutex_lock(&table->lock);
+-    {
+-        inode = __inode_create(table);
+-        if (inode != NULL) {
++    inode = inode_create(table);
++    if (inode) {
++        pthread_mutex_lock(&table->lock);
++        {
++            list_add(&inode->list, &table->lru);
++            table->lru_size++;
+             __inode_ref(inode, false);
+         }
++        pthread_mutex_unlock(&table->lock);
+     }
+-    pthread_mutex_unlock(&table->lock);
+ 
+     return inode;
+ }
+@@ -1613,7 +1611,10 @@ __inode_table_init_root(inode_table_t *table)
+     if (!table)
+         return;
+ 
+-    root = __inode_create(table);
++    root = inode_create(table);
++
++    list_add(&root->list, &table->lru);
++    table->lru_size++;
+ 
+     iatt.ia_gfid[15] = 1;
+     iatt.ia_ino = 1;
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0568-core-tcmu-runner-process-continuous-growing-logs-lru.patch b/SOURCES/0568-core-tcmu-runner-process-continuous-growing-logs-lru.patch
new file mode 100644
index 0000000..22c6790
--- /dev/null
+++ b/SOURCES/0568-core-tcmu-runner-process-continuous-growing-logs-lru.patch
@@ -0,0 +1,131 @@
+From 2640ee56201d320b838909f95608abe07e3ff9b0 Mon Sep 17 00:00:00 2001
+From: mohit84 <moagrawa@redhat.com>
+Date: Tue, 24 Nov 2020 15:29:58 +0530
+Subject: [PATCH 568/584] core: tcmu-runner process continuous growing logs
+ lru_size showing -1
+
+* core: tcmu-runner process continuous growing logs lru_size showing -1
+
+At the time of calling inode_table_prune it checks if current lru_size
+is greater than lru_limit but lru_list is empty it throws a log message
+"Empty inode lru list found but with (%d) lru_size".As per code reading
+it seems lru_size is out of sync with the actual number of inodes in
+lru_list. Due to throwing continuous error messages entire disk is
+getting full and the user has to restart the tcmu-runner process to use
+the volumes.The log message was introduce by a patch
+https://review.gluster.org/#/c/glusterfs/+/15087/.
+
+Solution: Introduce a flag in_lru_list to take decision about inode is
+          being part of lru_list or not.
+
+Backport of:
+> Upstream-patch: https://github.com/gluster/glusterfs/pull/1776
+> Fixes: #1775
+> Change-Id: I4b836bebf4b5db65fbf88ff41c6c88f4a7ac55c1
+> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+
+BUG: 1927640
+Change-Id: I4b836bebf4b5db65fbf88ff41c6c88f4a7ac55c1
+Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244962
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/glusterfs/inode.h |  1 +
+ libglusterfs/src/inode.c           | 14 ++++++++++++++
+ 2 files changed, 15 insertions(+)
+
+diff --git a/libglusterfs/src/glusterfs/inode.h b/libglusterfs/src/glusterfs/inode.h
+index 62c093d..17d0340 100644
+--- a/libglusterfs/src/glusterfs/inode.h
++++ b/libglusterfs/src/glusterfs/inode.h
+@@ -110,6 +110,7 @@ struct _inode {
+     struct _inode_ctx *_ctx; /* replacement for dict_t *(inode->ctx) */
+     bool in_invalidate_list; /* Set if inode is in table invalidate list */
+     bool invalidate_sent;    /* Set it if invalidator_fn is called for inode */
++    bool in_lru_list;        /* Set if inode is in table lru list */
+ };
+ 
+ #define UUID0_STR "00000000-0000-0000-0000-000000000000"
+diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c
+index 46db04f..8e91197 100644
+--- a/libglusterfs/src/inode.c
++++ b/libglusterfs/src/inode.c
+@@ -417,8 +417,10 @@ __inode_passivate(inode_t *inode)
+     dentry_t *dentry = NULL;
+     dentry_t *t = NULL;
+ 
++    GF_ASSERT(!inode->in_lru_list);
+     list_move_tail(&inode->list, &inode->table->lru);
+     inode->table->lru_size++;
++    inode->in_lru_list = _gf_true;
+ 
+     list_for_each_entry_safe(dentry, t, &inode->dentry_list, inode_list)
+     {
+@@ -531,7 +533,10 @@ __inode_ref(inode_t *inode, bool is_invalidate)
+             inode->in_invalidate_list = false;
+             inode->table->invalidate_size--;
+         } else {
++            GF_ASSERT(inode->table->lru_size > 0);
++            GF_ASSERT(inode->in_lru_list);
+             inode->table->lru_size--;
++            inode->in_lru_list = _gf_false;
+         }
+         if (is_invalidate) {
+             inode->in_invalidate_list = true;
+@@ -670,6 +675,8 @@ inode_new(inode_table_t *table)
+         {
+             list_add(&inode->list, &table->lru);
+             table->lru_size++;
++            GF_ASSERT(!inode->in_lru_list);
++            inode->in_lru_list = _gf_true;
+             __inode_ref(inode, false);
+         }
+         pthread_mutex_unlock(&table->lock);
+@@ -1533,6 +1540,7 @@ inode_table_prune(inode_table_t *table)
+         lru_size = table->lru_size;
+         while (lru_size > (table->lru_limit)) {
+             if (list_empty(&table->lru)) {
++                GF_ASSERT(0);
+                 gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0,
+                                  LG_MSG_INVALID_INODE_LIST,
+                                  "Empty inode lru list found"
+@@ -1543,6 +1551,7 @@ inode_table_prune(inode_table_t *table)
+ 
+             lru_size--;
+             entry = list_entry(table->lru.next, inode_t, list);
++            GF_ASSERT(entry->in_lru_list);
+             /* The logic of invalidation is required only if invalidator_fn
+                is present */
+             if (table->invalidator_fn) {
+@@ -1560,6 +1569,7 @@ inode_table_prune(inode_table_t *table)
+             }
+ 
+             table->lru_size--;
++            entry->in_lru_list = _gf_false;
+             __inode_retire(entry);
+             ret++;
+         }
+@@ -1615,6 +1625,7 @@ __inode_table_init_root(inode_table_t *table)
+ 
+     list_add(&root->list, &table->lru);
+     table->lru_size++;
++    root->in_lru_list = _gf_true;
+ 
+     iatt.ia_gfid[15] = 1;
+     iatt.ia_ino = 1;
+@@ -1873,8 +1884,11 @@ inode_table_destroy(inode_table_t *inode_table)
+         while (!list_empty(&inode_table->lru)) {
+             trav = list_first_entry(&inode_table->lru, inode_t, list);
+             inode_forget_atomic(trav, 0);
++            GF_ASSERT(inode_table->lru_size > 0);
++            GF_ASSERT(trav->in_lru_list);
+             __inode_retire(trav);
+             inode_table->lru_size--;
++            trav->in_lru_list = _gf_false;
+         }
+ 
+         /* Same logic for invalidate list */
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0569-features-shard-optimization-over-shard-lookup-in-cas.patch b/SOURCES/0569-features-shard-optimization-over-shard-lookup-in-cas.patch
new file mode 100644
index 0000000..fff8223
--- /dev/null
+++ b/SOURCES/0569-features-shard-optimization-over-shard-lookup-in-cas.patch
@@ -0,0 +1,200 @@
+From 1b86a4bda540ff4cf307c7f38d3041318636ecb7 Mon Sep 17 00:00:00 2001
+From: Vinayakswami Hariharmath <vharihar@redhat.com>
+Date: Thu, 6 Aug 2020 14:39:59 +0530
+Subject: [PATCH 569/584] features/shard: optimization over shard lookup in
+ case of prealloc
+
+Assume that we are preallocating a VM of size 1TB with a shard
+block size of 64MB then there will be ~16k shards.
+
+This creation happens in 2 steps shard_fallocate() path i.e
+
+1. lookup for the shards if any already present and
+2. mknod over those shards do not exist.
+
+But in case of fresh creation, we dont have to lookup for all
+shards which are not present as the the file size will be 0.
+Through this, we can save lookup on all shards which are not
+present. This optimization is quite useful in the case of
+preallocating big vm.
+
+Also if the file is already present and the call is to
+extend it to bigger size then we need not to lookup for non-
+existent shards. Just lookup preexisting shards, populate
+the inodes and issue mknod on extended size.
+
+Backport of:
+> Upstream-patch: https://review.gluster.org/#/c/glusterfs/+/24813/
+> Fixes: #1425
+> Change-Id: I60036fe8302c696e0ca80ff11ab0ef5bcdbd7880
+> Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
+
+BUG: 1925425
+Change-Id: I60036fe8302c696e0ca80ff11ab0ef5bcdbd7880
+Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244963
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/bugs/shard/issue-1425.t      | 45 +++++++++++++++++++++++++++++++++++++
+ xlators/features/shard/src/shard.c | 46 ++++++++++++++++++++++++++++++++------
+ 2 files changed, 84 insertions(+), 7 deletions(-)
+ create mode 100644 tests/bugs/shard/issue-1425.t
+
+diff --git a/tests/bugs/shard/issue-1425.t b/tests/bugs/shard/issue-1425.t
+new file mode 100644
+index 0000000..bbe82c0
+--- /dev/null
++++ b/tests/bugs/shard/issue-1425.t
+@@ -0,0 +1,45 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++
++cleanup;
++
++FILE_COUNT_TIME=5
++
++function get_file_count {
++    ls $1* | wc -l
++}
++
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 $H0:$B0/${V0}0
++TEST $CLI volume set $V0 features.shard on
++TEST $CLI volume set $V0 features.shard-block-size 4MB
++TEST $CLI volume start $V0
++TEST $CLI volume profile $V0 start
++
++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
++
++TEST fallocate -l 20M $M0/foo
++gfid_new=$(get_gfid_string $M0/foo)
++
++# Check for the base shard
++TEST stat $M0/foo
++TEST stat $B0/${V0}0/foo
++
++# There should be 4 associated shards
++EXPECT_WITHIN $FILE_COUNT_TIME 4 get_file_count $B0/${V0}0/.shard/$gfid_new
++
++# There should be 1+4 shards and we expect 4 lookups less than on the build without this patch
++EXPECT "21" echo `$CLI volume profile $V0 info incremental | grep -w LOOKUP | awk '{print $8}'`
++
++# Delete the base shard and check shards get cleaned up
++TEST unlink $M0/foo
++
++TEST ! stat $M0/foo
++TEST ! stat $B0/${V0}0/foo
++
++# There should be no shards now
++EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/$gfid_new
++cleanup
+diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
+index 2ba4528..a6ad1b8 100644
+--- a/xlators/features/shard/src/shard.c
++++ b/xlators/features/shard/src/shard.c
+@@ -995,6 +995,10 @@ shard_initiate_evicted_inode_fsync(xlator_t *this, inode_t *inode)
+ }
+ 
+ int
++shard_common_inode_write_post_lookup_shards_handler(call_frame_t *frame,
++                                                    xlator_t *this);
++
++int
+ shard_common_resolve_shards(call_frame_t *frame, xlator_t *this,
+                             shard_post_resolve_fop_handler_t post_res_handler)
+ {
+@@ -1011,21 +1015,47 @@ shard_common_resolve_shards(call_frame_t *frame, xlator_t *this,
+     inode_t *fsync_inode = NULL;
+     shard_priv_t *priv = NULL;
+     shard_local_t *local = NULL;
++    uint64_t resolve_count = 0;
+ 
+     priv = this->private;
+     local = frame->local;
+     local->call_count = 0;
+     shard_idx_iter = local->first_block;
+     res_inode = local->resolver_base_inode;
++
++    if ((local->op_ret < 0) || (local->resolve_not))
++        goto out;
++
++    /* If this prealloc FOP is for fresh file creation, then the size of the
++     * file will be 0. Then there will be no shards associated with this file.
++     * So we can skip the lookup process for the shards which do not exists
++     * and directly issue mknod to crete shards.
++     *
++     * In case the prealloc fop is to extend the preallocated file to bigger
++     * size then just lookup and populate inodes of existing shards and
++     * update the create count
++     */
++    if (local->fop == GF_FOP_FALLOCATE) {
++        if (!local->prebuf.ia_size) {
++            local->inode_list[0] = inode_ref(res_inode);
++            local->create_count = local->last_block;
++            shard_common_inode_write_post_lookup_shards_handler(frame, this);
++            return 0;
++        }
++        if (local->prebuf.ia_size < local->total_size)
++            local->create_count = local->last_block -
++                                  ((local->prebuf.ia_size - 1) /
++                                   local->block_size);
++    }
++
++    resolve_count = local->last_block - local->create_count;
++
+     if (res_inode)
+         gf_uuid_copy(gfid, res_inode->gfid);
+     else
+         gf_uuid_copy(gfid, local->base_gfid);
+ 
+-    if ((local->op_ret < 0) || (local->resolve_not))
+-        goto out;
+-
+-    while (shard_idx_iter <= local->last_block) {
++    while (shard_idx_iter <= resolve_count) {
+         i++;
+         if (shard_idx_iter == 0) {
+             local->inode_list[i] = inode_ref(res_inode);
+@@ -2434,7 +2464,7 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode,
+     int count = 0;
+     int call_count = 0;
+     int32_t shard_idx_iter = 0;
+-    int last_block = 0;
++    int lookup_count = 0;
+     char path[PATH_MAX] = {
+         0,
+     };
+@@ -2454,7 +2484,7 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode,
+     local = frame->local;
+     count = call_count = local->call_count;
+     shard_idx_iter = local->first_block;
+-    last_block = local->last_block;
++    lookup_count = local->last_block - local->create_count;
+     local->pls_fop_handler = handler;
+     if (local->lookup_shards_barriered)
+         local->barrier.waitfor = local->call_count;
+@@ -2464,7 +2494,7 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode,
+     else
+         gf_uuid_copy(gfid, local->base_gfid);
+ 
+-    while (shard_idx_iter <= last_block) {
++    while (shard_idx_iter <= lookup_count) {
+         if (local->inode_list[i]) {
+             i++;
+             shard_idx_iter++;
+@@ -5651,6 +5681,8 @@ shard_common_inode_write_post_resolve_handler(call_frame_t *frame,
+         shard_common_lookup_shards(
+             frame, this, local->resolver_base_inode,
+             shard_common_inode_write_post_lookup_shards_handler);
++    } else if (local->create_count) {
++        shard_common_inode_write_post_lookup_shards_handler(frame, this);
+     } else {
+         shard_common_inode_write_do(frame, this);
+     }
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0570-features-shard-avoid-repeatative-calls-to-gf_uuid_un.patch b/SOURCES/0570-features-shard-avoid-repeatative-calls-to-gf_uuid_un.patch
new file mode 100644
index 0000000..4d87bcb
--- /dev/null
+++ b/SOURCES/0570-features-shard-avoid-repeatative-calls-to-gf_uuid_un.patch
@@ -0,0 +1,340 @@
+From 1a8b001a121ada4d3d338b52b312896f1790f2bb Mon Sep 17 00:00:00 2001
+From: Vinayak hariharmath <65405035+VHariharmath-rh@users.noreply.github.com>
+Date: Mon, 11 Jan 2021 12:34:55 +0530
+Subject: [PATCH 570/584] features/shard: avoid repeatative calls to
+ gf_uuid_unparse()
+
+The issue is shard_make_block_abspath() calls gf_uuid_unparse()
+every time while constructing shard path. The gfid can be parsed
+and saved once and passed while constructing the path. Thus
+we can avoid calling gf_uuid_unparse().
+
+Backport of:
+> Upstream-patch: https://github.com/gluster/glusterfs/pull/1689
+> Fixes: #1423
+> Change-Id: Ia26fbd5f09e812bbad9e5715242f14143c013c9c
+> Signed-off-by: Vinayakswami Hariharmath vharihar@redhat.com
+
+BUG: 1925425
+Change-Id: Ia26fbd5f09e812bbad9e5715242f14143c013c9c
+Signed-off-by: Vinayakswami Hariharmath vharihar@redhat.com
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244964
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/bugs/shard/issue-1425.t      |   9 ++-
+ xlators/features/shard/src/shard.c | 119 ++++++++++++++++++-------------------
+ 2 files changed, 65 insertions(+), 63 deletions(-)
+
+diff --git a/tests/bugs/shard/issue-1425.t b/tests/bugs/shard/issue-1425.t
+index bbe82c0..8b77705 100644
+--- a/tests/bugs/shard/issue-1425.t
++++ b/tests/bugs/shard/issue-1425.t
+@@ -21,7 +21,13 @@ TEST $CLI volume profile $V0 start
+ 
+ TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+ 
++$CLI volume profile $V0 info clear
++
+ TEST fallocate -l 20M $M0/foo
++
++# There should be 1+4 shards and we expect 4 lookups less than on the build without this patch
++EXPECT "5" echo `$CLI volume profile $V0 info incremental | grep -w LOOKUP | awk '{print $8}'`
++
+ gfid_new=$(get_gfid_string $M0/foo)
+ 
+ # Check for the base shard
+@@ -31,9 +37,6 @@ TEST stat $B0/${V0}0/foo
+ # There should be 4 associated shards
+ EXPECT_WITHIN $FILE_COUNT_TIME 4 get_file_count $B0/${V0}0/.shard/$gfid_new
+ 
+-# There should be 1+4 shards and we expect 4 lookups less than on the build without this patch
+-EXPECT "21" echo `$CLI volume profile $V0 info incremental | grep -w LOOKUP | awk '{print $8}'`
+-
+ # Delete the base shard and check shards get cleaned up
+ TEST unlink $M0/foo
+ 
+diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
+index a6ad1b8..d1d7d7a 100644
+--- a/xlators/features/shard/src/shard.c
++++ b/xlators/features/shard/src/shard.c
+@@ -16,6 +16,8 @@
+ #include <glusterfs/defaults.h>
+ #include <glusterfs/statedump.h>
+ 
++#define SHARD_PATH_MAX (sizeof(GF_SHARD_DIR) + GF_UUID_BUF_SIZE + 16)
++
+ static gf_boolean_t
+ __is_shard_dir(uuid_t gfid)
+ {
+@@ -49,15 +51,19 @@ shard_make_block_bname(int block_num, uuid_t gfid, char *buf, size_t len)
+     snprintf(buf, len, "%s.%d", gfid_str, block_num);
+ }
+ 
+-void
+-shard_make_block_abspath(int block_num, uuid_t gfid, char *filepath, size_t len)
++static int
++shard_make_base_path(char *path, uuid_t gfid)
+ {
+-    char gfid_str[GF_UUID_BUF_SIZE] = {
+-        0,
+-    };
++    strcpy(path, "/" GF_SHARD_DIR "/");
++    uuid_utoa_r(gfid, path + sizeof(GF_SHARD_DIR) + 1);
++    return (sizeof(GF_SHARD_DIR) + GF_UUID_BUF_SIZE);
++}
+ 
+-    gf_uuid_unparse(gfid, gfid_str);
+-    snprintf(filepath, len, "/%s/%s.%d", GF_SHARD_DIR, gfid_str, block_num);
++static inline void
++shard_append_index(char *path, int path_size, int prefix_len,
++                   int shard_idx_iter)
++{
++    snprintf(path + prefix_len, path_size - prefix_len, ".%d", shard_idx_iter);
+ }
+ 
+ int
+@@ -1004,9 +1010,8 @@ shard_common_resolve_shards(call_frame_t *frame, xlator_t *this,
+ {
+     int i = -1;
+     uint32_t shard_idx_iter = 0;
+-    char path[PATH_MAX] = {
+-        0,
+-    };
++    int prefix_len = 0;
++    char path[SHARD_PATH_MAX];
+     uuid_t gfid = {
+         0,
+     };
+@@ -1055,6 +1060,9 @@ shard_common_resolve_shards(call_frame_t *frame, xlator_t *this,
+     else
+         gf_uuid_copy(gfid, local->base_gfid);
+ 
++    /* Build base shard path before appending index of the shard */
++    prefix_len = shard_make_base_path(path, gfid);
++
+     while (shard_idx_iter <= resolve_count) {
+         i++;
+         if (shard_idx_iter == 0) {
+@@ -1062,16 +1070,13 @@ shard_common_resolve_shards(call_frame_t *frame, xlator_t *this,
+             shard_idx_iter++;
+             continue;
+         }
+-
+-        shard_make_block_abspath(shard_idx_iter, gfid, path, sizeof(path));
+-
++        shard_append_index(path, SHARD_PATH_MAX, prefix_len, shard_idx_iter);
+         inode = NULL;
+         inode = inode_resolve(this->itable, path);
+         if (inode) {
+             gf_msg_debug(this->name, 0,
+-                         "Shard %d already "
+-                         "present. gfid=%s. Saving inode for future.",
+-                         shard_idx_iter, uuid_utoa(inode->gfid));
++                         "Shard %s already present. Saving inode for future.",
++                         path);
+             local->inode_list[i] = inode;
+             /* Let the ref on the inodes that are already present
+              * in inode table still be held so that they don't get
+@@ -2153,9 +2158,8 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode)
+     int call_count = 0;
+     uint32_t cur_block = 0;
+     uint32_t last_block = 0;
+-    char path[PATH_MAX] = {
+-        0,
+-    };
++    int prefix_len = 0;
++    char path[SHARD_PATH_MAX];
+     char *bname = NULL;
+     loc_t loc = {
+         0,
+@@ -2216,6 +2220,10 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode)
+         return 0;
+     }
+ 
++    /* Build base shard path before appending index of the shard */
++    prefix_len = shard_make_base_path(path, inode->gfid);
++    bname = path + sizeof(GF_SHARD_DIR) + 1;
++
+     SHARD_SET_ROOT_FS_ID(frame, local);
+     while (cur_block <= last_block) {
+         if (!local->inode_list[i]) {
+@@ -2229,15 +2237,12 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode)
+             goto next;
+         }
+ 
+-        shard_make_block_abspath(cur_block, inode->gfid, path, sizeof(path));
+-        bname = strrchr(path, '/') + 1;
++        shard_append_index(path, SHARD_PATH_MAX, prefix_len, cur_block);
+         loc.parent = inode_ref(priv->dot_shard_inode);
+         ret = inode_path(loc.parent, bname, (char **)&(loc.path));
+         if (ret < 0) {
+             gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+-                   "Inode path failed"
+-                   " on %s. Base file gfid = %s",
+-                   bname, uuid_utoa(inode->gfid));
++                   "Inode path failed on %s.", bname);
+             local->op_ret = -1;
+             local->op_errno = ENOMEM;
+             loc_wipe(&loc);
+@@ -2465,13 +2470,8 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode,
+     int call_count = 0;
+     int32_t shard_idx_iter = 0;
+     int lookup_count = 0;
+-    char path[PATH_MAX] = {
+-        0,
+-    };
++    char path[SHARD_PATH_MAX];
+     char *bname = NULL;
+-    uuid_t gfid = {
+-        0,
+-    };
+     loc_t loc = {
+         0,
+     };
+@@ -2489,10 +2489,16 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode,
+     if (local->lookup_shards_barriered)
+         local->barrier.waitfor = local->call_count;
+ 
++    /* Build base shard path before appending index of the shard */
++    strcpy(path, "/" GF_SHARD_DIR "/");
++
+     if (inode)
+-        gf_uuid_copy(gfid, inode->gfid);
++        uuid_utoa_r(inode->gfid, path + sizeof(GF_SHARD_DIR) + 1);
+     else
+-        gf_uuid_copy(gfid, local->base_gfid);
++        uuid_utoa_r(local->base_gfid, path + sizeof(GF_SHARD_DIR) + 1);
++
++    int prefix_len = sizeof(GF_SHARD_DIR) + GF_UUID_BUF_SIZE;
++    bname = path + sizeof(GF_SHARD_DIR) + 1;
+ 
+     while (shard_idx_iter <= lookup_count) {
+         if (local->inode_list[i]) {
+@@ -2508,18 +2514,14 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode,
+             goto next;
+         }
+ 
+-        shard_make_block_abspath(shard_idx_iter, gfid, path, sizeof(path));
+-
+-        bname = strrchr(path, '/') + 1;
++        shard_append_index(path, SHARD_PATH_MAX, prefix_len, shard_idx_iter);
+         loc.inode = inode_new(this->itable);
+         loc.parent = inode_ref(priv->dot_shard_inode);
+         gf_uuid_copy(loc.pargfid, priv->dot_shard_gfid);
+         ret = inode_path(loc.parent, bname, (char **)&(loc.path));
+         if (ret < 0 || !(loc.inode)) {
+             gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+-                   "Inode path failed"
+-                   " on %s, base file gfid = %s",
+-                   bname, uuid_utoa(gfid));
++                   "Inode path failed on %s", bname);
+             local->op_ret = -1;
+             local->op_errno = ENOMEM;
+             loc_wipe(&loc);
+@@ -3168,12 +3170,7 @@ shard_unlink_shards_do(call_frame_t *frame, xlator_t *this, inode_t *inode)
+     uint32_t cur_block = 0;
+     uint32_t cur_block_idx = 0; /*this is idx into inode_list[] array */
+     char *bname = NULL;
+-    char path[PATH_MAX] = {
+-        0,
+-    };
+-    uuid_t gfid = {
+-        0,
+-    };
++    char path[SHARD_PATH_MAX];
+     loc_t loc = {
+         0,
+     };
+@@ -3184,10 +3181,16 @@ shard_unlink_shards_do(call_frame_t *frame, xlator_t *this, inode_t *inode)
+     priv = this->private;
+     local = frame->local;
+ 
++    /* Build base shard path before appending index of the shard */
++    strcpy(path, "/" GF_SHARD_DIR "/");
++
+     if (inode)
+-        gf_uuid_copy(gfid, inode->gfid);
++        uuid_utoa_r(inode->gfid, path + sizeof(GF_SHARD_DIR) + 1);
+     else
+-        gf_uuid_copy(gfid, local->base_gfid);
++        uuid_utoa_r(local->base_gfid, path + sizeof(GF_SHARD_DIR) + 1);
++
++    int prefix_len = sizeof(GF_SHARD_DIR) + GF_UUID_BUF_SIZE;
++    bname = path + sizeof(GF_SHARD_DIR) + 1;
+ 
+     for (i = 0; i < local->num_blocks; i++) {
+         if (!local->inode_list[i])
+@@ -3203,7 +3206,7 @@ shard_unlink_shards_do(call_frame_t *frame, xlator_t *this, inode_t *inode)
+         gf_msg_debug(this->name, 0,
+                      "All shards that need to be "
+                      "unlinked are non-existent: %s",
+-                     uuid_utoa(gfid));
++                     path);
+         return 0;
+     }
+ 
+@@ -3221,15 +3224,12 @@ shard_unlink_shards_do(call_frame_t *frame, xlator_t *this, inode_t *inode)
+             goto next;
+         }
+ 
+-        shard_make_block_abspath(cur_block, gfid, path, sizeof(path));
+-        bname = strrchr(path, '/') + 1;
++        shard_append_index(path, SHARD_PATH_MAX, prefix_len, cur_block);
+         loc.parent = inode_ref(priv->dot_shard_inode);
+         ret = inode_path(loc.parent, bname, (char **)&(loc.path));
+         if (ret < 0) {
+             gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+-                   "Inode path failed"
+-                   " on %s, base file gfid = %s",
+-                   bname, uuid_utoa(gfid));
++                   "Inode path failed on %s", bname);
+             local->op_ret = -1;
+             local->op_errno = ENOMEM;
+             loc_wipe(&loc);
+@@ -4971,9 +4971,8 @@ shard_common_resume_mknod(call_frame_t *frame, xlator_t *this,
+     int last_block = 0;
+     int ret = 0;
+     int call_count = 0;
+-    char path[PATH_MAX] = {
+-        0,
+-    };
++    int prefix_len = 0;
++    char path[SHARD_PATH_MAX];
+     mode_t mode = 0;
+     char *bname = NULL;
+     shard_priv_t *priv = NULL;
+@@ -4996,6 +4995,10 @@ shard_common_resume_mknod(call_frame_t *frame, xlator_t *this,
+     call_count = local->call_count = local->create_count;
+     local->post_mknod_handler = post_mknod_handler;
+ 
++    /* Build base shard path before appending index of the shard */
++    prefix_len = shard_make_base_path(path, fd->inode->gfid);
++    bname = path + sizeof(GF_SHARD_DIR) + 1;
++
+     SHARD_SET_ROOT_FS_ID(frame, local);
+ 
+     ret = shard_inode_ctx_get_all(fd->inode, this, &ctx_tmp);
+@@ -5022,10 +5025,7 @@ shard_common_resume_mknod(call_frame_t *frame, xlator_t *this,
+                                    -1, ENOMEM, NULL, NULL, NULL, NULL, NULL);
+             goto next;
+         }
+-
+-        shard_make_block_abspath(shard_idx_iter, fd->inode->gfid, path,
+-                                 sizeof(path));
+-
++        shard_append_index(path, SHARD_PATH_MAX, prefix_len, shard_idx_iter);
+         xattr_req = shard_create_gfid_dict(local->xattr_req);
+         if (!xattr_req) {
+             local->op_ret = -1;
+@@ -5036,7 +5036,6 @@ shard_common_resume_mknod(call_frame_t *frame, xlator_t *this,
+             goto next;
+         }
+ 
+-        bname = strrchr(path, '/') + 1;
+         loc.inode = inode_new(this->itable);
+         loc.parent = inode_ref(priv->dot_shard_inode);
+         ret = inode_path(loc.parent, bname, (char **)&(loc.path));
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0571-NetBSD-build-fixes.patch b/SOURCES/0571-NetBSD-build-fixes.patch
new file mode 100644
index 0000000..8a6d4a4
--- /dev/null
+++ b/SOURCES/0571-NetBSD-build-fixes.patch
@@ -0,0 +1,98 @@
+From 2c0d11bb406e50fb515abf0c5a4006e1b362ac8e Mon Sep 17 00:00:00 2001
+From: Emmanuel Dreyfus <manu@netbsd.org>
+Date: Tue, 30 Jun 2020 16:42:36 +0200
+Subject: [PATCH 571/584] NetBSD build fixes
+
+- Make sure -largp is used at link time
+- PTHREAD_MUTEX_ADAPTIVE_NP is not available, use PTHREAD_MUTEX_DEFAULT instead
+- Avoid non POSIX [[ ]] in scripts
+- Do not check of lock.spinlock is NULL since it is not a pointer
+  (it is not a pointer on Linux either)
+
+Backport of:
+> Upstream-patch: https://review.gluster.org/#/c/glusterfs/+/24648/
+> Change-Id: I5e04a7c552d24f8a473c2b837828d1bddfa7e128
+> Fixes: #1347
+> Type: Bug
+> Signed-off-by: Emmanuel Dreyfus <manu@netbsd.org>
+
+BUG: 1925425
+Change-Id: I5e04a7c552d24f8a473c2b837828d1bddfa7e128
+Signed-off-by: Emmanuel Dreyfus <manu@netbsd.org>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/245040
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ configure.ac                                        | 3 +++
+ rpc/rpc-lib/src/rpcsvc.c                            | 4 ++++
+ tools/gfind_missing_files/gfind_missing_files.sh    | 2 +-
+ xlators/performance/write-behind/src/write-behind.c | 4 ++--
+ 4 files changed, 10 insertions(+), 3 deletions(-)
+
+diff --git a/configure.ac b/configure.ac
+index 327733e..6138a59 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -998,6 +998,9 @@ case $host_os in
+     CFLAGS="${CFLAGS} -isystem /usr/local/include"
+     ARGP_LDADD=-largp
+     ;;
++    *netbsd*)
++    ARGP_LDADD=-largp
++    ;;
+ esac
+ dnl argp-standalone does not provide a pkg-config file
+ AC_CHECK_HEADER([argp.h], AC_DEFINE(HAVE_ARGP, 1, [have argp]))
+diff --git a/rpc/rpc-lib/src/rpcsvc.c b/rpc/rpc-lib/src/rpcsvc.c
+index 3f184bf..b031d93 100644
+--- a/rpc/rpc-lib/src/rpcsvc.c
++++ b/rpc/rpc-lib/src/rpcsvc.c
+@@ -46,6 +46,10 @@
+ #include "xdr-rpcclnt.h"
+ #include <glusterfs/glusterfs-acl.h>
+ 
++#ifndef PTHREAD_MUTEX_ADAPTIVE_NP
++#define PTHREAD_MUTEX_ADAPTIVE_NP PTHREAD_MUTEX_DEFAULT
++#endif
++
+ struct rpcsvc_program gluster_dump_prog;
+ 
+ #define rpcsvc_alloc_request(svc, request)                                     \
+diff --git a/tools/gfind_missing_files/gfind_missing_files.sh b/tools/gfind_missing_files/gfind_missing_files.sh
+index f42fe7b..e7aaa0b 100644
+--- a/tools/gfind_missing_files/gfind_missing_files.sh
++++ b/tools/gfind_missing_files/gfind_missing_files.sh
+@@ -61,7 +61,7 @@ mount_slave()
+ 
+ parse_cli()
+ {
+-    if [[ $# -ne 4 ]]; then
++    if [ "$#" -ne 4 ]; then
+         echo "Usage: gfind_missing_files <brick-path> <slave-host> <slave-vol> <OUTFILE>"
+         exit 1
+     else
+diff --git a/xlators/performance/write-behind/src/write-behind.c b/xlators/performance/write-behind/src/write-behind.c
+index 31ab723..76d257f 100644
+--- a/xlators/performance/write-behind/src/write-behind.c
++++ b/xlators/performance/write-behind/src/write-behind.c
+@@ -2490,7 +2490,7 @@ wb_mark_readdirp_start(xlator_t *this, inode_t *directory)
+ 
+     wb_directory_inode = wb_inode_create(this, directory);
+ 
+-    if (!wb_directory_inode || !wb_directory_inode->lock.spinlock)
++    if (!wb_directory_inode)
+         return;
+ 
+     LOCK(&wb_directory_inode->lock);
+@@ -2510,7 +2510,7 @@ wb_mark_readdirp_end(xlator_t *this, inode_t *directory)
+ 
+     wb_directory_inode = wb_inode_ctx_get(this, directory);
+ 
+-    if (!wb_directory_inode || !wb_directory_inode->lock.spinlock)
++    if (!wb_directory_inode)
+         return;
+ 
+     LOCK(&wb_directory_inode->lock);
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0572-locks-remove-unused-conditional-switch-to-spin_lock-.patch b/SOURCES/0572-locks-remove-unused-conditional-switch-to-spin_lock-.patch
new file mode 100644
index 0000000..1447916
--- /dev/null
+++ b/SOURCES/0572-locks-remove-unused-conditional-switch-to-spin_lock-.patch
@@ -0,0 +1,183 @@
+From 1491b33007e84be0a0a74354e89deca8a21ed198 Mon Sep 17 00:00:00 2001
+From: Vinayak hariharmath <65405035+VHariharmath-rh@users.noreply.github.com>
+Date: Tue, 19 Jan 2021 15:39:35 +0530
+Subject: [PATCH 572/584] locks: remove unused conditional switch to spin_lock
+ code
+
+use of spin_locks is depend on the variable use_spinlocks
+but the same is commented in the current code base through
+https://review.gluster.org/#/c/glusterfs/+/14763/. So it is
+of no use to have conditional switching to spin_lock or
+mutex. Removing the dead code as part of the patch
+
+Backport of:
+> Upstream-patch: https://github.com/gluster/glusterfs/pull/2007
+> Fixes: #1996
+> Change-Id: Ib005dd86969ce33d3409164ef3e1011bb3169129
+> Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
+
+BUG: 1925425
+Change-Id: Ib005dd86969ce33d3409164ef3e1011bb3169129
+Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244965
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ configure.ac                         |  7 -----
+ libglusterfs/src/Makefile.am         |  2 +-
+ libglusterfs/src/common-utils.c      |  5 ----
+ libglusterfs/src/glusterfs/locking.h | 51 ------------------------------------
+ libglusterfs/src/locking.c           | 27 -------------------
+ 5 files changed, 1 insertion(+), 91 deletions(-)
+ delete mode 100644 libglusterfs/src/locking.c
+
+diff --git a/configure.ac b/configure.ac
+index 6138a59..3d99f6a 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -585,13 +585,6 @@ AC_CHECK_HEADERS([linux/falloc.h])
+ 
+ AC_CHECK_HEADERS([linux/oom.h], AC_DEFINE(HAVE_LINUX_OOM_H, 1, [have linux/oom.h]))
+ 
+-dnl Mac OS X does not have spinlocks
+-AC_CHECK_FUNC([pthread_spin_init], [have_spinlock=yes])
+-if test "x${have_spinlock}" = "xyes"; then
+-   AC_DEFINE(HAVE_SPINLOCK, 1, [define if found spinlock])
+-fi
+-AC_SUBST(HAVE_SPINLOCK)
+-
+ dnl some os may not have GNU defined strnlen function
+ AC_CHECK_FUNC([strnlen], [have_strnlen=yes])
+ if test "x${have_strnlen}" = "xyes"; then
+diff --git a/libglusterfs/src/Makefile.am b/libglusterfs/src/Makefile.am
+index 970f4b7..830a0c3 100644
+--- a/libglusterfs/src/Makefile.am
++++ b/libglusterfs/src/Makefile.am
+@@ -35,7 +35,7 @@ libglusterfs_la_SOURCES = dict.c xlator.c logging.c \
+ 	strfd.c parse-utils.c $(CONTRIBDIR)/mount/mntent.c \
+ 	$(CONTRIBDIR)/libexecinfo/execinfo.c quota-common-utils.c rot-buffs.c \
+ 	$(CONTRIBDIR)/timer-wheel/timer-wheel.c \
+-	$(CONTRIBDIR)/timer-wheel/find_last_bit.c default-args.c locking.c \
++	$(CONTRIBDIR)/timer-wheel/find_last_bit.c default-args.c \
+ 	$(CONTRIBDIR)/xxhash/xxhash.c \
+ 	compound-fop-utils.c throttle-tbf.c monitoring.c
+ 
+diff --git a/libglusterfs/src/common-utils.c b/libglusterfs/src/common-utils.c
+index d351b93..c2dfe28 100644
+--- a/libglusterfs/src/common-utils.c
++++ b/libglusterfs/src/common-utils.c
+@@ -860,11 +860,6 @@ gf_dump_config_flags()
+     gf_msg_plain_nomem(GF_LOG_ALERT, "setfsid 1");
+ #endif
+ 
+-/* define if found spinlock */
+-#ifdef HAVE_SPINLOCK
+-    gf_msg_plain_nomem(GF_LOG_ALERT, "spinlock 1");
+-#endif
+-
+ /* Define to 1 if you have the <sys/epoll.h> header file. */
+ #ifdef HAVE_SYS_EPOLL_H
+     gf_msg_plain_nomem(GF_LOG_ALERT, "epoll.h 1");
+diff --git a/libglusterfs/src/glusterfs/locking.h b/libglusterfs/src/glusterfs/locking.h
+index 43cc877..63097bb 100644
+--- a/libglusterfs/src/glusterfs/locking.h
++++ b/libglusterfs/src/glusterfs/locking.h
+@@ -22,55 +22,6 @@
+ #define pthread_spin_init(l, v) (*l = v)
+ #endif
+ 
+-#if defined(HAVE_SPINLOCK)
+-
+-typedef union {
+-    pthread_spinlock_t spinlock;
+-    pthread_mutex_t mutex;
+-} gf_lock_t;
+-
+-#if !defined(LOCKING_IMPL)
+-extern int use_spinlocks;
+-
+-/*
+- * Using a dispatch table would be unpleasant because we're dealing with two
+- * different types.  If the dispatch contains direct pointers to pthread_xx
+- * or mutex_xxx then we have to hope that every possible union alternative
+- * starts at the same address as the union itself.  I'm old enough to remember
+- * compilers where this was not the case (for alignment reasons) so I'm a bit
+- * paranoid about that.  Also, I don't like casting arguments through "void *"
+- * which we'd also have to do to avoid type errors.  The other alternative would
+- * be to define actual functions which pick out the right union member, and put
+- * those in the dispatch tables.  Now we have a pointer dereference through the
+- * dispatch table plus a function call, which is likely to be worse than the
+- * branching here from the ?: construct.  If it were a clear win it might be
+- * worth the extra complexity, but for now this way seems preferable.
+- */
+-
+-#define LOCK_INIT(x)                                                           \
+-    (use_spinlocks ? pthread_spin_init(&((x)->spinlock), 0)                    \
+-                   : pthread_mutex_init(&((x)->mutex), 0))
+-
+-#define LOCK(x)                                                                \
+-    (use_spinlocks ? pthread_spin_lock(&((x)->spinlock))                       \
+-                   : pthread_mutex_lock(&((x)->mutex)))
+-
+-#define TRY_LOCK(x)                                                            \
+-    (use_spinlocks ? pthread_spin_trylock(&((x)->spinlock))                    \
+-                   : pthread_mutex_trylock(&((x)->mutex)))
+-
+-#define UNLOCK(x)                                                              \
+-    (use_spinlocks ? pthread_spin_unlock(&((x)->spinlock))                     \
+-                   : pthread_mutex_unlock(&((x)->mutex)))
+-
+-#define LOCK_DESTROY(x)                                                        \
+-    (use_spinlocks ? pthread_spin_destroy(&((x)->spinlock))                    \
+-                   : pthread_mutex_destroy(&((x)->mutex)))
+-
+-#endif
+-
+-#else
+-
+ typedef pthread_mutex_t gf_lock_t;
+ 
+ #define LOCK_INIT(x) pthread_mutex_init(x, 0)
+@@ -79,6 +30,4 @@ typedef pthread_mutex_t gf_lock_t;
+ #define UNLOCK(x) pthread_mutex_unlock(x)
+ #define LOCK_DESTROY(x) pthread_mutex_destroy(x)
+ 
+-#endif /* HAVE_SPINLOCK */
+-
+ #endif /* _LOCKING_H */
+diff --git a/libglusterfs/src/locking.c b/libglusterfs/src/locking.c
+deleted file mode 100644
+index 7577054..0000000
+--- a/libglusterfs/src/locking.c
++++ /dev/null
+@@ -1,27 +0,0 @@
+-/*
+-  Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+-  This file is part of GlusterFS.
+-
+-  This file is licensed to you under your choice of the GNU Lesser
+-  General Public License, version 3 or any later version (LGPLv3 or
+-  later), or the GNU General Public License, version 2 (GPLv2), in all
+-  cases as published by the Free Software Foundation.
+-*/
+-
+-#if defined(HAVE_SPINLOCK)
+-/* None of this matters otherwise. */
+-
+-#include <pthread.h>
+-#include <unistd.h>
+-
+-#define LOCKING_IMPL
+-#include "glusterfs/locking.h"
+-
+-int use_spinlocks = 0;
+-
+-static void __attribute__((constructor)) gf_lock_setup(void)
+-{
+-    // use_spinlocks = (sysconf(_SC_NPROCESSORS_ONLN) > 1);
+-}
+-
+-#endif
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0573-features-shard-unlink-fails-due-to-nospace-to-mknod-.patch b/SOURCES/0573-features-shard-unlink-fails-due-to-nospace-to-mknod-.patch
new file mode 100644
index 0000000..3033727
--- /dev/null
+++ b/SOURCES/0573-features-shard-unlink-fails-due-to-nospace-to-mknod-.patch
@@ -0,0 +1,148 @@
+From 0e453ede1f248a004965d0d368e2c4beb83f2ce1 Mon Sep 17 00:00:00 2001
+From: Vinayakswami Hariharmath <vharihar@redhat.com>
+Date: Mon, 25 Jan 2021 17:32:14 +0530
+Subject: [PATCH 573/584] features/shard: unlink fails due to nospace to mknod
+ marker file
+
+When we hit the max capacity of the storage space, shard_unlink()
+starts failing if there is no space left on the brick to create a
+marker file.
+
+shard_unlink() happens in below steps:
+
+1. create a marker file in the name of gfid of the base file under
+BRICK_PATH/.shard/.remove_me
+2. unlink the base file
+3. shard_delete_shards() deletes the shards in background by
+picking the entries in BRICK_PATH/.shard/.remove_me
+
+If a marker file creation fails then we can't really delete the
+shards which eventually a problem for user who is looking to make
+space by deleting unwanted data.
+
+Solution:
+Create the marker file by marking xdata = GLUSTERFS_INTERNAL_FOP_KEY
+which is considered to be internal op and allowed to create under
+reserved space.
+
+Backport of:
+> Upstream-patch: https://github.com/gluster/glusterfs/pull/2057
+> Fixes: #2038
+> Change-Id: I7facebab940f9aeee81d489df429e00ef4fb7c5d
+> Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
+
+BUG: 1891403
+Change-Id: I7facebab940f9aeee81d489df429e00ef4fb7c5d
+Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244966
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/bugs/shard/issue-2038.t      | 56 ++++++++++++++++++++++++++++++++++++++
+ xlators/features/shard/src/shard.c | 20 ++++++++++++++
+ 2 files changed, 76 insertions(+)
+ create mode 100644 tests/bugs/shard/issue-2038.t
+
+diff --git a/tests/bugs/shard/issue-2038.t b/tests/bugs/shard/issue-2038.t
+new file mode 100644
+index 0000000..fc3e7f9
+--- /dev/null
++++ b/tests/bugs/shard/issue-2038.t
+@@ -0,0 +1,56 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../snapshot.rc
++
++cleanup
++
++FILE_COUNT_TIME=5
++
++function get_file_count {
++    ls $1* | wc -l
++}
++
++TEST verify_lvm_version
++TEST glusterd
++TEST pidof glusterd
++TEST init_n_bricks 1
++TEST setup_lvm 1
++
++TEST $CLI volume create $V0 $H0:$L1
++TEST $CLI volume start $V0
++
++$CLI volume info
++
++TEST $CLI volume set $V0 features.shard on
++TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
++
++#Setting the size in percentage
++TEST $CLI volume set $V0 storage.reserve 40
++
++#wait 5s to reset disk_space_full flag
++sleep 5
++
++TEST touch $M0/test
++TEST unlink $M0/test
++
++TEST dd if=/dev/zero of=$M0/a bs=80M count=1
++TEST dd if=/dev/zero of=$M0/b bs=10M count=1
++
++gfid_new=$(get_gfid_string $M0/a)
++
++# Wait 5s to update disk_space_full flag because thread check disk space
++# after every 5s
++
++sleep 5
++# setup_lvm create lvm partition of 150M and 40M are reserve so after
++# consuming more than 110M next unlink should not fail
++# Delete the base shard and check shards get cleaned up
++TEST unlink $M0/a
++TEST ! stat $M0/a
++
++TEST $CLI volume stop $V0
++TEST $CLI volume delete $V0
++
++cleanup
+diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
+index d1d7d7a..8d4a970 100644
+--- a/xlators/features/shard/src/shard.c
++++ b/xlators/features/shard/src/shard.c
+@@ -4078,6 +4078,16 @@ shard_create_marker_file_under_remove_me(call_frame_t *frame, xlator_t *this,
+     SHARD_INODE_CREATE_INIT(this, bs, xattr_req, &local->newloc,
+                             local->prebuf.ia_size, 0, err);
+ 
++    /* Mark this as an internal operation, so that in case of disk full,
++     * the marker file will be created as part of reserve space */
++    ret = dict_set_int32_sizen(xattr_req, GLUSTERFS_INTERNAL_FOP_KEY, 1);
++    if (ret < 0) {
++        gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
++               "Failed to set key: %s on path %s", GLUSTERFS_INTERNAL_FOP_KEY,
++               local->newloc.path);
++        goto err;
++    }
++
+     STACK_WIND(frame, shard_create_marker_file_under_remove_me_cbk,
+                FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod,
+                &local->newloc, 0, 0, 0644, xattr_req);
+@@ -5843,6 +5853,16 @@ shard_mkdir_internal_dir(call_frame_t *frame, xlator_t *this,
+ 
+     SHARD_SET_ROOT_FS_ID(frame, local);
+ 
++    /* Mark this as an internal operation, so that in case of disk full
++     * the internal dir will be created as part of reserve space */
++    ret = dict_set_int32_sizen(xattr_req, GLUSTERFS_INTERNAL_FOP_KEY, 1);
++    if (ret < 0) {
++        gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
++               "Failed to set key: %s on path %s", GLUSTERFS_INTERNAL_FOP_KEY,
++               loc->path);
++        goto err;
++    }
++
+     STACK_WIND_COOKIE(frame, shard_mkdir_internal_dir_cbk, (void *)(long)type,
+                       FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir, loc,
+                       0755, 0, xattr_req);
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0574-features-shard-delay-unlink-of-a-file-that-has-fd_co.patch b/SOURCES/0574-features-shard-delay-unlink-of-a-file-that-has-fd_co.patch
new file mode 100644
index 0000000..810abd4
--- /dev/null
+++ b/SOURCES/0574-features-shard-delay-unlink-of-a-file-that-has-fd_co.patch
@@ -0,0 +1,712 @@
+From cb0d240004e6d40f8d7f30d177d5970ebc8e25fb Mon Sep 17 00:00:00 2001
+From: Vinayak hariharmath <65405035+VHariharmath-rh@users.noreply.github.com>
+Date: Wed, 3 Feb 2021 17:04:25 +0530
+Subject: [PATCH 574/584] features/shard: delay unlink of a file that has
+ fd_count > 0
+
+When there are multiple processes working on a file and if any
+process unlinks that file then unlink operation shouldn't harm
+other processes working on it. This is a posix a compliant
+behavior and this should be supported when shard feature is
+enabled also.
+
+Problem description:
+Let's consider 2 clients C1 and C2 working on a file F1 with 5
+shards on gluster mount and gluster server has 4 bricks
+B1, B2, B3, B4.
+
+Assume that base file/shard is present on B1, 1st, 2nd shards
+on B2, 3rd and 4th shards on B3 and 5th shard falls on B4 C1
+has opened the F1 in append mode and is writing to it. The
+write FOP goes to 5th shard in this case. So the
+inode->fd_count = 1 on B1(base file) and B4 (5th shard).
+
+C2 at the same time issued unlink to F1. On the server, the
+base file has fd_count = 1 (since C1 has opened the file),
+the base file is renamed under .glusterfs/unlink and
+returned to C2. Then unlink will be sent to shards on all
+bricks and shards on B2 and B3 will be deleted which have
+no open reference yet. C1 starts getting errors while
+accessing the remaining shards though it has open references
+for the file.
+
+This is one such undefined behavior. Likewise we will
+encounter many such undefined behaviors as we dont have one
+global lock to access all shards as one. Of Course having such
+global lock will lead to performance hit as it reduces window
+for parallel access of shards.
+
+Solution:
+The above undefined behavior can be addressed by delaying the
+unlink of a file when there are open references on it.
+File unlink happens in 2 steps.
+step 1: client creates marker file under .shard/remove_me and
+sends unlink on base file to the server
+step 2: on return from the server, the associated shards will
+be cleaned up and finally marker file will be removed.
+
+In step 2, the back ground deletion process does nameless
+lookup using marker file name (marker file is named after the
+gfid of the base file) in glusterfs/unlink dir. If the nameless
+look up is successful then that means the gfid still has open
+fds and deletion of shards has to be delayed. If nameless
+lookup fails then that indicates the gfid is unlinked and no
+open fds on that file (the gfid path is unlinked during final
+close on the file). The shards on which deletion is delayed
+are unlinked one the all open fds are closed and this is
+done through a thread which wakes up every 10 mins.
+
+Also removed active_fd_count from inode structure and
+referring fd_count wherever active_fd_count was used.
+
+Backport of:
+> Upstream-patch: https://github.com/gluster/glusterfs/pull/1563
+> Fixes: #1358
+> Change-Id: I8985093386e26215e0b0dce294c534a66f6ca11c
+> Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
+
+BUG: 1782428
+Change-Id: I8985093386e26215e0b0dce294c534a66f6ca11c
+Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244967
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/glusterfs/glusterfs.h         |   1 +
+ tests/bugs/shard/issue-1358.t                  | 100 +++++++++++++
+ tests/bugs/shard/unlinks-and-renames.t         |   5 +
+ xlators/features/shard/src/shard.c             | 199 ++++++++++++++++++++++++-
+ xlators/features/shard/src/shard.h             |  11 ++
+ xlators/storage/posix/src/posix-entry-ops.c    |  36 +++++
+ xlators/storage/posix/src/posix-inode-fd-ops.c |  64 +++++---
+ 7 files changed, 391 insertions(+), 25 deletions(-)
+ create mode 100644 tests/bugs/shard/issue-1358.t
+
+diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h
+index d3400bf..4401cf6 100644
+--- a/libglusterfs/src/glusterfs/glusterfs.h
++++ b/libglusterfs/src/glusterfs/glusterfs.h
+@@ -261,6 +261,7 @@ enum gf_internal_fop_indicator {
+ #define GF_XATTROP_PURGE_INDEX "glusterfs.xattrop-purge-index"
+ 
+ #define GF_GFIDLESS_LOOKUP "gfidless-lookup"
++#define GF_UNLINKED_LOOKUP "unlinked-lookup"
+ /* replace-brick and pump related internal xattrs */
+ #define RB_PUMP_CMD_START "glusterfs.pump.start"
+ #define RB_PUMP_CMD_PAUSE "glusterfs.pump.pause"
+diff --git a/tests/bugs/shard/issue-1358.t b/tests/bugs/shard/issue-1358.t
+new file mode 100644
+index 0000000..1838e06
+--- /dev/null
++++ b/tests/bugs/shard/issue-1358.t
+@@ -0,0 +1,100 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++
++cleanup;
++
++FILE_COUNT_TIME=5
++
++function get_file_count {
++    ls $1* | wc -l
++}
++
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
++TEST $CLI volume set $V0 features.shard on
++TEST $CLI volume set $V0 features.shard-block-size 4MB
++TEST $CLI volume set $V0 performance.quick-read off
++TEST $CLI volume set $V0 performance.io-cache off
++TEST $CLI volume set $V0 performance.read-ahead off
++TEST $CLI volume set $V0 performance.write-behind off
++TEST $CLI volume start $V0
++
++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
++
++TEST mkdir $M0/dir
++TEST dd if=/dev/urandom of=$M0/dir/foo bs=4M count=5
++gfid_new=$(get_gfid_string $M0/dir/foo)
++
++# Ensure its shards dir is created now.
++TEST stat $B0/${V0}0/.shard/$gfid_new.1
++TEST stat $B0/${V0}1/.shard/$gfid_new.1
++TEST stat $B0/${V0}0/.shard/$gfid_new.2
++TEST stat $B0/${V0}1/.shard/$gfid_new.2
++
++# Open a file and store descriptor in fd = 5
++exec 5>$M0/dir/foo
++
++# Write something on the file using the open fd = 5
++echo "issue-1358" >&5
++
++# Write on the descriptor should be succesful
++EXPECT 0 echo $?
++
++# Unlink the same file which is opened in prev step
++TEST unlink $M0/dir/foo
++
++# Check the base file
++TEST ! stat $M0/dir/foo
++TEST ! stat $B0/${V0}0/foo
++TEST ! stat $B0/${V0}1/foo
++
++# Write something on the file using the open fd = 5
++echo "issue-1281" >&5
++
++# Write on the descriptor should be succesful
++EXPECT 0 echo $?
++
++# Check ".shard/.remove_me"
++EXPECT_WITHIN $FILE_COUNT_TIME 1 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_new
++EXPECT_WITHIN $FILE_COUNT_TIME 1 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_new
++
++# Close the fd = 5
++exec 5>&-
++
++###### To see the shards deleted, wait for 10 mins or repeat the same steps i.e open a file #####
++###### write something to it, unlink it and close it. This will wake up the thread that is ######
++###### responsible to delete the shards
++
++TEST touch $M0/dir/new
++exec 6>$M0/dir/new
++echo "issue-1358" >&6
++EXPECT 0 echo $?
++TEST unlink $M0/dir/new
++exec 6>&-
++
++# Now check the ".shard/remove_me" and the gfid will not be there
++EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_new
++EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_new
++
++# check for the absence of shards
++TEST ! stat $B0/${V0}0/.shard/$gfid_new.1
++TEST ! stat $B0/${V0}1/.shard/$gfid_new.1
++TEST ! stat $B0/${V0}0/.shard/$gfid_new.2
++TEST ! stat $B0/${V0}1/.shard/$gfid_new.2
++
++#### Create the file with same name and check creation and deletion works fine ######
++TEST dd if=/dev/urandom of=$M0/dir/foo bs=4M count=5
++gfid_new=$(get_gfid_string $M0/dir/foo)
++
++# Ensure its shards dir is created now.
++TEST stat $B0/${V0}0/.shard/$gfid_new.1
++TEST stat $B0/${V0}1/.shard/$gfid_new.1
++TEST stat $B0/${V0}0/.shard/$gfid_new.2
++TEST stat $B0/${V0}1/.shard/$gfid_new.2
++
++TEST unlink $M0/dir/foo
++cleanup
++
+diff --git a/tests/bugs/shard/unlinks-and-renames.t b/tests/bugs/shard/unlinks-and-renames.t
+index 990ca69..3280fcb 100644
+--- a/tests/bugs/shard/unlinks-and-renames.t
++++ b/tests/bugs/shard/unlinks-and-renames.t
+@@ -24,6 +24,11 @@ TEST pidof glusterd
+ TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+ TEST $CLI volume set $V0 features.shard on
+ TEST $CLI volume set $V0 features.shard-block-size 4MB
++TEST $CLI volume set $V0 performance.quick-read off
++TEST $CLI volume set $V0 performance.io-cache off
++TEST $CLI volume set $V0 performance.read-ahead off
++TEST $CLI volume set $V0 performance.write-behind off
++
+ TEST $CLI volume start $V0
+ TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0
+ 
+diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
+index 8d4a970..b828ff9 100644
+--- a/xlators/features/shard/src/shard.c
++++ b/xlators/features/shard/src/shard.c
+@@ -1242,7 +1242,8 @@ out:
+ 
+ static inode_t *
+ shard_link_internal_dir_inode(shard_local_t *local, inode_t *inode,
+-                              struct iatt *buf, shard_internal_dir_type_t type)
++                              xlator_t *this, struct iatt *buf,
++                              shard_internal_dir_type_t type)
+ {
+     inode_t *linked_inode = NULL;
+     shard_priv_t *priv = NULL;
+@@ -1250,7 +1251,7 @@ shard_link_internal_dir_inode(shard_local_t *local, inode_t *inode,
+     inode_t **priv_inode = NULL;
+     inode_t *parent = NULL;
+ 
+-    priv = THIS->private;
++    priv = this->private;
+ 
+     switch (type) {
+         case SHARD_INTERNAL_DIR_DOT_SHARD:
+@@ -1294,7 +1295,7 @@ shard_refresh_internal_dir_cbk(call_frame_t *frame, void *cookie,
+     /* To-Do: Fix refcount increment per call to
+      * shard_link_internal_dir_inode().
+      */
+-    linked_inode = shard_link_internal_dir_inode(local, inode, buf, type);
++    linked_inode = shard_link_internal_dir_inode(local, inode, this, buf, type);
+     shard_inode_ctx_mark_dir_refreshed(linked_inode, this);
+ out:
+     shard_common_resolve_shards(frame, this, local->post_res_handler);
+@@ -1383,7 +1384,7 @@ shard_lookup_internal_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+         goto unwind;
+     }
+ 
+-    link_inode = shard_link_internal_dir_inode(local, inode, buf, type);
++    link_inode = shard_link_internal_dir_inode(local, inode, this, buf, type);
+     if (link_inode != inode) {
+         shard_refresh_internal_dir(frame, this, type);
+     } else {
+@@ -3586,7 +3587,8 @@ shard_resolve_internal_dir(xlator_t *this, shard_local_t *local,
+                        "Lookup on %s failed, exiting", bname);
+             goto err;
+         } else {
+-            shard_link_internal_dir_inode(local, loc->inode, &stbuf, type);
++            shard_link_internal_dir_inode(local, loc->inode, this, &stbuf,
++                                          type);
+         }
+     }
+     ret = 0;
+@@ -3633,6 +3635,45 @@ err:
+     return ret;
+ }
+ 
++static int
++shard_nameless_lookup_base_file(xlator_t *this, char *gfid)
++{
++    int ret = 0;
++    loc_t loc = {
++        0,
++    };
++    dict_t *xattr_req = dict_new();
++    if (!xattr_req) {
++        ret = -1;
++        goto out;
++    }
++
++    loc.inode = inode_new(this->itable);
++    if (loc.inode == NULL) {
++        ret = -1;
++        goto out;
++    }
++
++    ret = gf_uuid_parse(gfid, loc.gfid);
++    if (ret < 0)
++        goto out;
++
++    ret = dict_set_uint32(xattr_req, GF_UNLINKED_LOOKUP, 1);
++    if (ret < 0)
++        goto out;
++
++    ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, xattr_req, NULL);
++    if (ret < 0)
++        goto out;
++
++out:
++    if (xattr_req)
++        dict_unref(xattr_req);
++    loc_wipe(&loc);
++
++    return ret;
++}
++
+ int
+ shard_delete_shards(void *opaque)
+ {
+@@ -3734,6 +3775,11 @@ shard_delete_shards(void *opaque)
+                     if (ret < 0)
+                         continue;
+                 }
++
++                ret = shard_nameless_lookup_base_file(this, entry->d_name);
++                if (!ret)
++                    continue;
++
+                 link_inode = inode_link(entry->inode, local->fd->inode,
+                                         entry->d_name, &entry->d_stat);
+ 
+@@ -4105,6 +4151,9 @@ err:
+ int
+ shard_unlock_entrylk(call_frame_t *frame, xlator_t *this);
+ 
++static int
++shard_unlink_handler_spawn(xlator_t *this);
++
+ int
+ shard_unlink_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+                            int32_t op_ret, int32_t op_errno,
+@@ -4126,7 +4175,7 @@ shard_unlink_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+         if (xdata)
+             local->xattr_rsp = dict_ref(xdata);
+         if (local->cleanup_required)
+-            shard_start_background_deletion(this);
++            shard_unlink_handler_spawn(this);
+     }
+ 
+     if (local->entrylk_frame) {
+@@ -5785,7 +5834,7 @@ shard_mkdir_internal_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+         }
+     }
+ 
+-    link_inode = shard_link_internal_dir_inode(local, inode, buf, type);
++    link_inode = shard_link_internal_dir_inode(local, inode, this, buf, type);
+     if (link_inode != inode) {
+         shard_refresh_internal_dir(frame, this, type);
+     } else {
+@@ -7098,6 +7147,132 @@ shard_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+     return 0;
+ }
+ 
++static void
++shard_unlink_wait(shard_unlink_thread_t *ti)
++{
++    struct timespec wait_till = {
++        0,
++    };
++
++    pthread_mutex_lock(&ti->mutex);
++    {
++        /* shard_unlink_handler() runs every 10 mins of interval */
++        wait_till.tv_sec = time(NULL) + 600;
++
++        while (!ti->rerun) {
++            if (pthread_cond_timedwait(&ti->cond, &ti->mutex, &wait_till) ==
++                ETIMEDOUT)
++                break;
++        }
++        ti->rerun = _gf_false;
++    }
++    pthread_mutex_unlock(&ti->mutex);
++}
++
++static void *
++shard_unlink_handler(void *data)
++{
++    shard_unlink_thread_t *ti = data;
++    xlator_t *this = ti->this;
++
++    THIS = this;
++
++    while (!ti->stop) {
++        shard_start_background_deletion(this);
++        shard_unlink_wait(ti);
++    }
++    return NULL;
++}
++
++static int
++shard_unlink_handler_spawn(xlator_t *this)
++{
++    int ret = 0;
++    shard_priv_t *priv = this->private;
++    shard_unlink_thread_t *ti = &priv->thread_info;
++
++    ti->this = this;
++
++    pthread_mutex_lock(&ti->mutex);
++    {
++        if (ti->running) {
++            pthread_cond_signal(&ti->cond);
++        } else {
++            ret = gf_thread_create(&ti->thread, NULL, shard_unlink_handler, ti,
++                                   "shard_unlink");
++            if (ret < 0) {
++                gf_log(this->name, GF_LOG_ERROR,
++                       "Failed to create \"shard_unlink\" thread");
++                goto unlock;
++            }
++            ti->running = _gf_true;
++        }
++
++        ti->rerun = _gf_true;
++    }
++unlock:
++    pthread_mutex_unlock(&ti->mutex);
++    return ret;
++}
++
++static int
++shard_unlink_handler_init(shard_unlink_thread_t *ti)
++{
++    int ret = 0;
++    xlator_t *this = THIS;
++
++    ret = pthread_mutex_init(&ti->mutex, NULL);
++    if (ret) {
++        gf_log(this->name, GF_LOG_ERROR,
++               "Failed to init mutex for \"shard_unlink\" thread");
++        goto out;
++    }
++
++    ret = pthread_cond_init(&ti->cond, NULL);
++    if (ret) {
++        gf_log(this->name, GF_LOG_ERROR,
++               "Failed to init cond var for \"shard_unlink\" thread");
++        pthread_mutex_destroy(&ti->mutex);
++        goto out;
++    }
++
++    ti->running = _gf_false;
++    ti->rerun = _gf_false;
++    ti->stop = _gf_false;
++
++out:
++    return -ret;
++}
++
++static void
++shard_unlink_handler_fini(shard_unlink_thread_t *ti)
++{
++    int ret = 0;
++    xlator_t *this = THIS;
++    if (!ti)
++        return;
++
++    pthread_mutex_lock(&ti->mutex);
++    if (ti->running) {
++        ti->rerun = _gf_true;
++        ti->stop = _gf_true;
++        pthread_cond_signal(&ti->cond);
++    }
++    pthread_mutex_unlock(&ti->mutex);
++
++    if (ti->running) {
++        ret = pthread_join(ti->thread, NULL);
++        if (ret)
++            gf_msg(this->name, GF_LOG_WARNING, 0, 0,
++                   "Failed to clean up shard unlink thread.");
++        ti->running = _gf_false;
++    }
++    ti->thread = 0;
++
++    pthread_cond_destroy(&ti->cond);
++    pthread_mutex_destroy(&ti->mutex);
++}
++
+ int32_t
+ mem_acct_init(xlator_t *this)
+ {
+@@ -7164,6 +7339,14 @@ init(xlator_t *this)
+     this->private = priv;
+     LOCK_INIT(&priv->lock);
+     INIT_LIST_HEAD(&priv->ilist_head);
++
++    ret = shard_unlink_handler_init(&priv->thread_info);
++    if (ret) {
++        gf_log(this->name, GF_LOG_ERROR,
++               "Failed to initialize resources for \"shard_unlink\" thread");
++        goto out;
++    }
++
+     ret = 0;
+ out:
+     if (ret) {
+@@ -7188,6 +7371,8 @@ fini(xlator_t *this)
+     if (!priv)
+         goto out;
+ 
++    shard_unlink_handler_fini(&priv->thread_info);
++
+     this->private = NULL;
+     LOCK_DESTROY(&priv->lock);
+     GF_FREE(priv);
+diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h
+index 4fe181b..3dcb112 100644
+--- a/xlators/features/shard/src/shard.h
++++ b/xlators/features/shard/src/shard.h
+@@ -207,6 +207,16 @@ typedef enum {
+ 
+ /* rm = "remove me" */
+ 
++typedef struct shard_unlink_thread {
++    pthread_mutex_t mutex;
++    pthread_cond_t cond;
++    pthread_t thread;
++    gf_boolean_t running;
++    gf_boolean_t rerun;
++    gf_boolean_t stop;
++    xlator_t *this;
++} shard_unlink_thread_t;
++
+ typedef struct shard_priv {
+     uint64_t block_size;
+     uuid_t dot_shard_gfid;
+@@ -220,6 +230,7 @@ typedef struct shard_priv {
+     shard_bg_deletion_state_t bg_del_state;
+     gf_boolean_t first_lookup_done;
+     uint64_t lru_limit;
++    shard_unlink_thread_t thread_info;
+ } shard_priv_t;
+ 
+ typedef struct {
+diff --git a/xlators/storage/posix/src/posix-entry-ops.c b/xlators/storage/posix/src/posix-entry-ops.c
+index b3a5381..1511e68 100644
+--- a/xlators/storage/posix/src/posix-entry-ops.c
++++ b/xlators/storage/posix/src/posix-entry-ops.c
+@@ -183,6 +183,11 @@ posix_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+     struct posix_private *priv = NULL;
+     posix_inode_ctx_t *ctx = NULL;
+     int ret = 0;
++    uint32_t lookup_unlink_dir = 0;
++    char *unlink_path = NULL;
++    struct stat lstatbuf = {
++        0,
++    };
+ 
+     VALIDATE_OR_GOTO(frame, out);
+     VALIDATE_OR_GOTO(this, out);
+@@ -208,7 +213,36 @@ posix_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+     op_ret = -1;
+     if (gf_uuid_is_null(loc->pargfid) || (loc->name == NULL)) {
+         /* nameless lookup */
++        op_ret = op_errno = errno = 0;
+         MAKE_INODE_HANDLE(real_path, this, loc, &buf);
++
++        /* The gfid will be renamed to ".glusterfs/unlink" in case
++         * there are any open fds on the file in posix_unlink path.
++         * So client can request server to do nameless lookup with
++         * xdata = GF_UNLINKED_LOOKUP in ".glusterfs/unlink"
++         * dir if a client wants to know the status of the all open fds
++         * on the unlinked file. If the file still present in the
++         * ".glusterfs/unlink" dir then it indicates there still
++         * open fds present on the file and the file is still under
++         * unlink process */
++        if (op_ret < 0 && errno == ENOENT) {
++            ret = dict_get_uint32(xdata, GF_UNLINKED_LOOKUP,
++                                  &lookup_unlink_dir);
++            if (!ret && lookup_unlink_dir) {
++                op_ret = op_errno = errno = 0;
++                POSIX_GET_FILE_UNLINK_PATH(priv->base_path, loc->gfid,
++                                           unlink_path);
++                ret = sys_lstat(unlink_path, &lstatbuf);
++                if (ret) {
++                    op_ret = -1;
++                    op_errno = errno;
++                } else {
++                    iatt_from_stat(&buf, &lstatbuf);
++                    buf.ia_nlink = 0;
++                }
++                goto nameless_lookup_unlink_dir_out;
++            }
++        }
+     } else {
+         MAKE_ENTRY_HANDLE(real_path, par_path, this, loc, &buf);
+         if (!real_path || !par_path) {
+@@ -328,6 +362,8 @@ out:
+ 
+     if (op_ret == 0)
+         op_errno = 0;
++
++nameless_lookup_unlink_dir_out:
+     STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno,
+                         (loc) ? loc->inode : NULL, &buf, xattr, &postparent);
+ 
+diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c
+index 761e018..4c2983a 100644
+--- a/xlators/storage/posix/src/posix-inode-fd-ops.c
++++ b/xlators/storage/posix/src/posix-inode-fd-ops.c
+@@ -2504,6 +2504,39 @@ out:
+     return 0;
+ }
+ 
++static int
++posix_unlink_renamed_file(xlator_t *this, inode_t *inode)
++{
++    int ret = 0;
++    char *unlink_path = NULL;
++    uint64_t ctx_uint = 0;
++    posix_inode_ctx_t *ctx = NULL;
++    struct posix_private *priv = this->private;
++
++    ret = inode_ctx_get(inode, this, &ctx_uint);
++
++    if (ret < 0)
++        goto out;
++
++    ctx = (posix_inode_ctx_t *)(uintptr_t)ctx_uint;
++
++    if (ctx->unlink_flag == GF_UNLINK_TRUE) {
++        POSIX_GET_FILE_UNLINK_PATH(priv->base_path, inode->gfid, unlink_path);
++        if (!unlink_path) {
++            gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_UNLINK_FAILED,
++                   "Failed to remove gfid :%s", uuid_utoa(inode->gfid));
++            ret = -1;
++        } else {
++            ret = sys_unlink(unlink_path);
++            if (!ret)
++                ctx->unlink_flag = GF_UNLINK_FALSE;
++        }
++    }
++
++out:
++    return ret;
++}
++
+ int32_t
+ posix_release(xlator_t *this, fd_t *fd)
+ {
+@@ -2514,6 +2547,9 @@ posix_release(xlator_t *this, fd_t *fd)
+     VALIDATE_OR_GOTO(this, out);
+     VALIDATE_OR_GOTO(fd, out);
+ 
++    if (fd->inode->active_fd_count == 0)
++        posix_unlink_renamed_file(this, fd->inode);
++
+     ret = fd_ctx_del(fd, this, &tmp_pfd);
+     if (ret < 0) {
+         gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_PFD_NULL,
+@@ -5881,41 +5917,33 @@ posix_forget(xlator_t *this, inode_t *inode)
+     uint64_t ctx_uint1 = 0;
+     uint64_t ctx_uint2 = 0;
+     posix_inode_ctx_t *ctx = NULL;
+-    posix_mdata_t *mdata = NULL;
+-    struct posix_private *priv_posix = NULL;
+-
+-    priv_posix = (struct posix_private *)this->private;
+-    if (!priv_posix)
+-        return 0;
++    struct posix_private *priv = this->private;
+ 
+     ret = inode_ctx_del2(inode, this, &ctx_uint1, &ctx_uint2);
++
++    if (ctx_uint2)
++        GF_FREE((posix_mdata_t *)(uintptr_t)ctx_uint2);
++
+     if (!ctx_uint1)
+-        goto check_ctx2;
++        return 0;
+ 
+     ctx = (posix_inode_ctx_t *)(uintptr_t)ctx_uint1;
+ 
+     if (ctx->unlink_flag == GF_UNLINK_TRUE) {
+-        POSIX_GET_FILE_UNLINK_PATH(priv_posix->base_path, inode->gfid,
+-                                   unlink_path);
++        POSIX_GET_FILE_UNLINK_PATH(priv->base_path, inode->gfid, unlink_path);
+         if (!unlink_path) {
+             gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_UNLINK_FAILED,
+                    "Failed to remove gfid :%s", uuid_utoa(inode->gfid));
+             ret = -1;
+-            goto ctx_free;
++        } else {
++            ret = sys_unlink(unlink_path);
+         }
+-        ret = sys_unlink(unlink_path);
+     }
+-ctx_free:
++
+     pthread_mutex_destroy(&ctx->xattrop_lock);
+     pthread_mutex_destroy(&ctx->write_atomic_lock);
+     pthread_mutex_destroy(&ctx->pgfid_lock);
+     GF_FREE(ctx);
+ 
+-check_ctx2:
+-    if (ctx_uint2) {
+-        mdata = (posix_mdata_t *)(uintptr_t)ctx_uint2;
+-    }
+-
+-    GF_FREE(mdata);
+     return ret;
+ }
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0575-libglusterfs-add-functions-to-calculate-time-differe.patch b/SOURCES/0575-libglusterfs-add-functions-to-calculate-time-differe.patch
new file mode 100644
index 0000000..98ffc3c
--- /dev/null
+++ b/SOURCES/0575-libglusterfs-add-functions-to-calculate-time-differe.patch
@@ -0,0 +1,160 @@
+From 59e69ae1c7ccda74a8cbf8c9b2ae37bc74cbf612 Mon Sep 17 00:00:00 2001
+From: Ravishankar N <ravishankar@redhat.com>
+Date: Fri, 4 Jun 2021 10:55:37 +0530
+Subject: [PATCH 575/584] libglusterfs: add functions to calculate time
+ difference
+
+Add gf_tvdiff() and gf_tsdiff() to calculate the difference
+between 'struct timeval' and 'struct timespec' values, use
+them where appropriate.
+
+Upstream patch details:
+> https://github.com/gluster/glusterfs/commit/ba7f24b1cedf2549394c21b3f0df1661227cefae
+> Change-Id: I172be06ee84e99a1da76847c15e5ea3fbc059338
+> Signed-off-by: Dmitry Antipov <dmantipov@yandex.ru>
+> Updates: #1002
+
+BUG: 1928676
+Change-Id: I723ab9555b0f8caef108742acc2cb63d6a32eb96
+Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/245294
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ glusterfsd/src/glusterfsd-mgmt.c          |  4 ++--
+ libglusterfs/src/glusterfs/common-utils.h | 32 +++++++++++++++++++++++++++++++
+ libglusterfs/src/latency.c                |  3 +--
+ xlators/cluster/dht/src/dht-rebalance.c   |  6 ++----
+ xlators/debug/io-stats/src/io-stats.c     |  8 ++------
+ 5 files changed, 39 insertions(+), 14 deletions(-)
+
+diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c
+index 61d1b21..a51dd9e 100644
+--- a/glusterfsd/src/glusterfsd-mgmt.c
++++ b/glusterfsd/src/glusterfsd-mgmt.c
+@@ -534,7 +534,7 @@ glusterfs_volume_top_write_perf(uint32_t blk_size, uint32_t blk_count,
+     }
+ 
+     gettimeofday(&end, NULL);
+-    *time = (end.tv_sec - begin.tv_sec) * 1e6 + (end.tv_usec - begin.tv_usec);
++    *time = gf_tvdiff(&begin, &end);
+     *throughput = total_blks / *time;
+     gf_log("glusterd", GF_LOG_INFO,
+            "Throughput %.2f Mbps time %.2f secs "
+@@ -653,7 +653,7 @@ glusterfs_volume_top_read_perf(uint32_t blk_size, uint32_t blk_count,
+     }
+ 
+     gettimeofday(&end, NULL);
+-    *time = (end.tv_sec - begin.tv_sec) * 1e6 + (end.tv_usec - begin.tv_usec);
++    *time = gf_tvdiff(&begin, &end);
+     *throughput = total_blks / *time;
+     gf_log("glusterd", GF_LOG_INFO,
+            "Throughput %.2f Mbps time %.2f secs "
+diff --git a/libglusterfs/src/glusterfs/common-utils.h b/libglusterfs/src/glusterfs/common-utils.h
+index 604afd0..bd48b6f 100644
+--- a/libglusterfs/src/glusterfs/common-utils.h
++++ b/libglusterfs/src/glusterfs/common-utils.h
+@@ -1090,4 +1090,36 @@ find_xlator_option_in_cmd_args_t(const char *option_name, cmd_args_t *args);
+ int
+ gf_d_type_from_ia_type(ia_type_t type);
+ 
++/* Return delta value in microseconds. */
++
++static inline double
++gf_tvdiff(struct timeval *start, struct timeval *end)
++{
++    struct timeval t;
++
++    if (start->tv_usec > end->tv_usec)
++        t.tv_sec = end->tv_sec - 1, t.tv_usec = end->tv_usec + 1000000;
++    else
++        t.tv_sec = end->tv_sec, t.tv_usec = end->tv_usec;
++
++    return (double)(t.tv_sec - start->tv_sec) * 1e6 +
++           (double)(t.tv_usec - start->tv_usec);
++}
++
++/* Return delta value in nanoseconds. */
++
++static inline double
++gf_tsdiff(struct timespec *start, struct timespec *end)
++{
++    struct timespec t;
++
++    if (start->tv_nsec > end->tv_nsec)
++        t.tv_sec = end->tv_sec - 1, t.tv_nsec = end->tv_nsec + 1000000000;
++    else
++        t.tv_sec = end->tv_sec, t.tv_nsec = end->tv_nsec;
++
++    return (double)(t.tv_sec - start->tv_sec) * 1e9 +
++           (double)(t.tv_nsec - start->tv_nsec);
++}
++
+ #endif /* _COMMON_UTILS_H */
+diff --git a/libglusterfs/src/latency.c b/libglusterfs/src/latency.c
+index e1e6de7..ce61399 100644
+--- a/libglusterfs/src/latency.c
++++ b/libglusterfs/src/latency.c
+@@ -33,8 +33,7 @@ gf_update_latency(call_frame_t *frame)
+     if (!(begin->tv_sec && end->tv_sec))
+         goto out;
+ 
+-    elapsed = (end->tv_sec - begin->tv_sec) * 1e9 +
+-              (end->tv_nsec - begin->tv_nsec);
++    elapsed = gf_tsdiff(begin, end);
+ 
+     if (frame->op < 0 || frame->op >= GF_FOP_MAXVALUE) {
+         gf_log("[core]", GF_LOG_WARNING, "Invalid frame op value: %d",
+diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
+index eab7558..e07dec0 100644
+--- a/xlators/cluster/dht/src/dht-rebalance.c
++++ b/xlators/cluster/dht/src/dht-rebalance.c
+@@ -2927,8 +2927,7 @@ gf_defrag_migrate_single_file(void *opaque)
+ 
+     if (defrag->stats == _gf_true) {
+         gettimeofday(&end, NULL);
+-        elapsed = (end.tv_sec - start.tv_sec) * 1e6 +
+-                  (end.tv_usec - start.tv_usec);
++        elapsed = gf_tvdiff(&start, &end);
+         gf_log(this->name, GF_LOG_INFO,
+                "Migration of "
+                "file:%s size:%" PRIu64
+@@ -3529,8 +3528,7 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
+     }
+ 
+     gettimeofday(&end, NULL);
+-    elapsed = (end.tv_sec - dir_start.tv_sec) * 1e6 +
+-              (end.tv_usec - dir_start.tv_usec);
++    elapsed = gf_tvdiff(&dir_start, &end);
+     gf_log(this->name, GF_LOG_INFO,
+            "Migration operation on dir %s took "
+            "%.2f secs",
+diff --git a/xlators/debug/io-stats/src/io-stats.c b/xlators/debug/io-stats/src/io-stats.c
+index 9b34895..8ad96fb 100644
+--- a/xlators/debug/io-stats/src/io-stats.c
++++ b/xlators/debug/io-stats/src/io-stats.c
+@@ -281,9 +281,7 @@ is_fop_latency_started(call_frame_t *frame)
+         begin = &frame->begin;                                                 \
+         end = &frame->end;                                                     \
+                                                                                \
+-        elapsed = ((end->tv_sec - begin->tv_sec) * 1e9 +                       \
+-                   (end->tv_nsec - begin->tv_nsec)) /                          \
+-                  1000;                                                        \
++        elapsed = gf_tsdiff(begin, end) / 1000.0;                              \
+         throughput = op_ret / elapsed;                                         \
+                                                                                \
+         conf = this->private;                                                  \
+@@ -1774,9 +1772,7 @@ update_ios_latency(struct ios_conf *conf, call_frame_t *frame,
+     begin = &frame->begin;
+     end = &frame->end;
+ 
+-    elapsed = ((end->tv_sec - begin->tv_sec) * 1e9 +
+-               (end->tv_nsec - begin->tv_nsec)) /
+-              1000;
++    elapsed = gf_tsdiff(begin, end) / 1000.0;
+ 
+     update_ios_latency_stats(&conf->cumulative, elapsed, op);
+     update_ios_latency_stats(&conf->incremental, elapsed, op);
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0576-rpcsvc-Add-latency-tracking-for-rpc-programs.patch b/SOURCES/0576-rpcsvc-Add-latency-tracking-for-rpc-programs.patch
new file mode 100644
index 0000000..6883559
--- /dev/null
+++ b/SOURCES/0576-rpcsvc-Add-latency-tracking-for-rpc-programs.patch
@@ -0,0 +1,573 @@
+From f2b9d3a089cc9ff9910da0075defe306851aca5c Mon Sep 17 00:00:00 2001
+From: Ravishankar N <ravishankar@redhat.com>
+Date: Fri, 4 Jun 2021 12:27:57 +0530
+Subject: [PATCH 576/584] rpcsvc: Add latency tracking for rpc programs
+
+Added latency tracking of rpc-handling code. With this change we
+should be able to monitor the amount of time rpc-handling code is
+consuming for each of the rpc call.
+
+Upstream patch details:
+> https://review.gluster.org/#/c/glusterfs/+/24955/
+> fixes: #1466
+> Change-Id: I04fc7f3b12bfa5053c0fc36885f271cb78f581cd
+> Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
+
+BUG: 1928676
+Change-Id: Ibcedddb5db3ff4906607050cf9f7ea3ebb266cc5
+Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/245295
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Xavi Hernandez Juan <xhernandez@redhat.com>
+Reviewed-by: Ashish Pandey <aspandey@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/glusterfs/latency.h   | 22 +++++---
+ libglusterfs/src/glusterfs/mem-types.h |  1 +
+ libglusterfs/src/glusterfs/stack.h     |  7 +--
+ libglusterfs/src/glusterfs/statedump.h |  2 +
+ libglusterfs/src/glusterfs/xlator.h    |  2 +-
+ libglusterfs/src/latency.c             | 93 +++++++++++++++-------------------
+ libglusterfs/src/libglusterfs.sym      |  5 ++
+ libglusterfs/src/monitoring.c          |  8 +--
+ libglusterfs/src/statedump.c           | 38 +++++++++++++-
+ libglusterfs/src/xlator.c              |  5 ++
+ rpc/rpc-lib/src/libgfrpc.sym           |  1 +
+ rpc/rpc-lib/src/rpcsvc.c               | 72 +++++++++++++++++++++++++-
+ rpc/rpc-lib/src/rpcsvc.h               |  5 ++
+ xlators/protocol/server/src/server.c   |  2 +
+ 14 files changed, 193 insertions(+), 70 deletions(-)
+
+diff --git a/libglusterfs/src/glusterfs/latency.h b/libglusterfs/src/glusterfs/latency.h
+index ed47b1f..4d601bb 100644
+--- a/libglusterfs/src/glusterfs/latency.h
++++ b/libglusterfs/src/glusterfs/latency.h
+@@ -11,13 +11,23 @@
+ #ifndef __LATENCY_H__
+ #define __LATENCY_H__
+ 
+-#include "glusterfs/glusterfs.h"
++#include <inttypes.h>
++#include <time.h>
+ 
+-typedef struct fop_latency {
+-    double min;   /* min time for the call (microseconds) */
+-    double max;   /* max time for the call (microseconds) */
+-    double total; /* total time (microseconds) */
++typedef struct _gf_latency {
++    uint64_t min;   /* min time for the call (nanoseconds) */
++    uint64_t max;   /* max time for the call (nanoseconds) */
++    uint64_t total; /* total time (nanoseconds) */
+     uint64_t count;
+-} fop_latency_t;
++} gf_latency_t;
+ 
++gf_latency_t *
++gf_latency_new(size_t n);
++
++void
++gf_latency_reset(gf_latency_t *lat);
++
++void
++gf_latency_update(gf_latency_t *lat, struct timespec *begin,
++                  struct timespec *end);
+ #endif /* __LATENCY_H__ */
+diff --git a/libglusterfs/src/glusterfs/mem-types.h b/libglusterfs/src/glusterfs/mem-types.h
+index 92730a9..970b9ff 100644
+--- a/libglusterfs/src/glusterfs/mem-types.h
++++ b/libglusterfs/src/glusterfs/mem-types.h
+@@ -139,6 +139,7 @@ enum gf_common_mem_types_ {
+     gf_common_mt_mgmt_v3_lock_timer_t, /* used only in one location */
+     gf_common_mt_server_cmdline_t,     /* used only in one location */
+     gf_mt_gfdb_query_record_t,
++    gf_common_mt_latency_t,
+     gf_common_mt_end
+ };
+ #endif
+diff --git a/libglusterfs/src/glusterfs/stack.h b/libglusterfs/src/glusterfs/stack.h
+index bd466d8..536a330 100644
+--- a/libglusterfs/src/glusterfs/stack.h
++++ b/libglusterfs/src/glusterfs/stack.h
+@@ -45,6 +45,9 @@ typedef int32_t (*ret_fn_t)(call_frame_t *frame, call_frame_t *prev_frame,
+                             xlator_t *this, int32_t op_ret, int32_t op_errno,
+                             ...);
+ 
++void
++gf_frame_latency_update(call_frame_t *frame);
++
+ struct call_pool {
+     union {
+         struct list_head all_frames;
+@@ -149,8 +152,6 @@ struct _call_stack {
+     } while (0);
+ 
+ struct xlator_fops;
+-void
+-gf_update_latency(call_frame_t *frame);
+ 
+ static inline void
+ FRAME_DESTROY(call_frame_t *frame)
+@@ -158,7 +159,7 @@ FRAME_DESTROY(call_frame_t *frame)
+     void *local = NULL;
+ 
+     if (frame->root->ctx->measure_latency)
+-        gf_update_latency(frame);
++        gf_frame_latency_update(frame);
+ 
+     list_del_init(&frame->frames);
+     if (frame->local) {
+diff --git a/libglusterfs/src/glusterfs/statedump.h b/libglusterfs/src/glusterfs/statedump.h
+index 89d04f9..ce08270 100644
+--- a/libglusterfs/src/glusterfs/statedump.h
++++ b/libglusterfs/src/glusterfs/statedump.h
+@@ -127,4 +127,6 @@ gf_proc_dump_xlator_meminfo(xlator_t *this, strfd_t *strfd);
+ void
+ gf_proc_dump_xlator_profile(xlator_t *this, strfd_t *strfd);
+ 
++void
++gf_latency_statedump_and_reset(char *key, gf_latency_t *lat);
+ #endif /* STATEDUMP_H */
+diff --git a/libglusterfs/src/glusterfs/xlator.h b/libglusterfs/src/glusterfs/xlator.h
+index 273039a..ecb9fa4 100644
+--- a/libglusterfs/src/glusterfs/xlator.h
++++ b/libglusterfs/src/glusterfs/xlator.h
+@@ -808,7 +808,7 @@ struct _xlator {
+ 
+         struct {
+             /* for latency measurement */
+-            fop_latency_t latencies[GF_FOP_MAXVALUE];
++            gf_latency_t latencies[GF_FOP_MAXVALUE];
+             /* for latency measurement */
+             fop_metrics_t metrics[GF_FOP_MAXVALUE];
+ 
+diff --git a/libglusterfs/src/latency.c b/libglusterfs/src/latency.c
+index ce61399..ce4b0e8 100644
+--- a/libglusterfs/src/latency.c
++++ b/libglusterfs/src/latency.c
+@@ -14,39 +14,34 @@
+  */
+ 
+ #include "glusterfs/glusterfs.h"
+-#include "glusterfs/xlator.h"
+-#include "glusterfs/common-utils.h"
+ #include "glusterfs/statedump.h"
+-#include "glusterfs/libglusterfs-messages.h"
+ 
+-void
+-gf_update_latency(call_frame_t *frame)
++gf_latency_t *
++gf_latency_new(size_t n)
+ {
+-    double elapsed;
+-    struct timespec *begin, *end;
+-
+-    fop_latency_t *lat;
+-
+-    begin = &frame->begin;
+-    end = &frame->end;
++    int i = 0;
++    gf_latency_t *lat = NULL;
+ 
+-    if (!(begin->tv_sec && end->tv_sec))
+-        goto out;
++    lat = GF_MALLOC(n * sizeof(*lat), gf_common_mt_latency_t);
++    if (!lat)
++        return NULL;
+ 
+-    elapsed = gf_tsdiff(begin, end);
++    for (i = 0; i < n; i++) {
++        gf_latency_reset(lat + i);
++    }
++    return lat;
++}
+ 
+-    if (frame->op < 0 || frame->op >= GF_FOP_MAXVALUE) {
+-        gf_log("[core]", GF_LOG_WARNING, "Invalid frame op value: %d",
+-               frame->op);
++void
++gf_latency_update(gf_latency_t *lat, struct timespec *begin,
++                  struct timespec *end)
++{
++    if (!(begin->tv_sec && end->tv_sec)) {
++        /*Measure latency might have been enabled/disabled during the op*/
+         return;
+     }
+ 
+-    /* Can happen mostly at initiator xlator, as STACK_WIND/UNWIND macros
+-       set it right anyways for those frames */
+-    if (!frame->op)
+-        frame->op = frame->root->op;
+-
+-    lat = &frame->this->stats.interval.latencies[frame->op];
++    double elapsed = gf_tsdiff(begin, end);
+ 
+     if (lat->max < elapsed)
+         lat->max = elapsed;
+@@ -56,40 +51,34 @@ gf_update_latency(call_frame_t *frame)
+ 
+     lat->total += elapsed;
+     lat->count++;
+-out:
+-    return;
+ }
+ 
+ void
+-gf_proc_dump_latency_info(xlator_t *xl)
++gf_latency_reset(gf_latency_t *lat)
+ {
+-    char key_prefix[GF_DUMP_MAX_BUF_LEN];
+-    char key[GF_DUMP_MAX_BUF_LEN];
+-    int i;
+-
+-    snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.latency", xl->name);
+-    gf_proc_dump_add_section("%s", key_prefix);
+-
+-    for (i = 0; i < GF_FOP_MAXVALUE; i++) {
+-        gf_proc_dump_build_key(key, key_prefix, "%s", (char *)gf_fop_list[i]);
+-
+-        fop_latency_t *lat = &xl->stats.interval.latencies[i];
++    if (!lat)
++        return;
++    memset(lat, 0, sizeof(*lat));
++    lat->min = ULLONG_MAX;
++    /* make sure 'min' is set to high value, so it would be
++       properly set later */
++}
+ 
+-        /* Doesn't make sense to continue if there are no fops
+-           came in the given interval */
+-        if (!lat->count)
+-            continue;
++void
++gf_frame_latency_update(call_frame_t *frame)
++{
++    gf_latency_t *lat;
++    /* Can happen mostly at initiator xlator, as STACK_WIND/UNWIND macros
++       set it right anyways for those frames */
++    if (!frame->op)
++        frame->op = frame->root->op;
+ 
+-        gf_proc_dump_write(key, "%.03f,%" PRId64 ",%.03f",
+-                           (lat->total / lat->count), lat->count, lat->total);
++    if (frame->op < 0 || frame->op >= GF_FOP_MAXVALUE) {
++        gf_log("[core]", GF_LOG_WARNING, "Invalid frame op value: %d",
++               frame->op);
++        return;
+     }
+ 
+-    memset(xl->stats.interval.latencies, 0,
+-           sizeof(xl->stats.interval.latencies));
+-
+-    /* make sure 'min' is set to high value, so it would be
+-       properly set later */
+-    for (i = 0; i < GF_FOP_MAXVALUE; i++) {
+-        xl->stats.interval.latencies[i].min = 0xffffffff;
+-    }
++    lat = &frame->this->stats.interval.latencies[frame->op];
++    gf_latency_update(lat, &frame->begin, &frame->end);
+ }
+diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym
+index 9072afa..4f968e1 100644
+--- a/libglusterfs/src/libglusterfs.sym
++++ b/libglusterfs/src/libglusterfs.sym
+@@ -1183,3 +1183,8 @@ gf_latency_reset
+ gf_latency_update
+ gf_frame_latency_update
+ gf_assert
++gf_latency_statedump_and_reset
++gf_latency_new
++gf_latency_reset
++gf_latency_update
++gf_frame_latency_update
+diff --git a/libglusterfs/src/monitoring.c b/libglusterfs/src/monitoring.c
+index 6d9bfb1..20b7f52 100644
+--- a/libglusterfs/src/monitoring.c
++++ b/libglusterfs/src/monitoring.c
+@@ -113,15 +113,15 @@ dump_latency_and_count(xlator_t *xl, int fd)
+             dprintf(fd, "%s.interval.%s.fail_count %" PRIu64 "\n", xl->name,
+                     gf_fop_list[index], cbk);
+         }
+-        if (xl->stats.interval.latencies[index].count != 0.0) {
++        if (xl->stats.interval.latencies[index].count != 0) {
+             dprintf(fd, "%s.interval.%s.latency %lf\n", xl->name,
+                     gf_fop_list[index],
+-                    (xl->stats.interval.latencies[index].total /
++                    (((double)xl->stats.interval.latencies[index].total) /
+                      xl->stats.interval.latencies[index].count));
+-            dprintf(fd, "%s.interval.%s.max %lf\n", xl->name,
++            dprintf(fd, "%s.interval.%s.max %" PRIu64 "\n", xl->name,
+                     gf_fop_list[index],
+                     xl->stats.interval.latencies[index].max);
+-            dprintf(fd, "%s.interval.%s.min %lf\n", xl->name,
++            dprintf(fd, "%s.interval.%s.min %" PRIu64 "\n", xl->name,
+                     gf_fop_list[index],
+                     xl->stats.interval.latencies[index].min);
+         }
+diff --git a/libglusterfs/src/statedump.c b/libglusterfs/src/statedump.c
+index d18b50f..4bf4cc2 100644
+--- a/libglusterfs/src/statedump.c
++++ b/libglusterfs/src/statedump.c
+@@ -201,6 +201,40 @@ gf_proc_dump_write(char *key, char *value, ...)
+     return ret;
+ }
+ 
++void
++gf_latency_statedump_and_reset(char *key, gf_latency_t *lat)
++{
++    /* Doesn't make sense to continue if there are no fops
++       came in the given interval */
++    if (!lat || !lat->count)
++        return;
++    gf_proc_dump_write(key,
++                       "AVG:%lf CNT:%" PRIu64 " TOTAL:%" PRIu64 " MIN:%" PRIu64
++                       " MAX:%" PRIu64,
++                       (((double)lat->total) / lat->count), lat->count,
++                       lat->total, lat->min, lat->max);
++    gf_latency_reset(lat);
++}
++
++void
++gf_proc_dump_xl_latency_info(xlator_t *xl)
++{
++    char key_prefix[GF_DUMP_MAX_BUF_LEN];
++    char key[GF_DUMP_MAX_BUF_LEN];
++    int i;
++
++    snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.latency", xl->name);
++    gf_proc_dump_add_section("%s", key_prefix);
++
++    for (i = 0; i < GF_FOP_MAXVALUE; i++) {
++        gf_proc_dump_build_key(key, key_prefix, "%s", (char *)gf_fop_list[i]);
++
++        gf_latency_t *lat = &xl->stats.interval.latencies[i];
++
++        gf_latency_statedump_and_reset(key, lat);
++    }
++}
++
+ static void
+ gf_proc_dump_xlator_mem_info(xlator_t *xl)
+ {
+@@ -487,7 +521,7 @@ gf_proc_dump_single_xlator_info(xlator_t *trav)
+         return;
+ 
+     if (ctx->measure_latency)
+-        gf_proc_dump_latency_info(trav);
++        gf_proc_dump_xl_latency_info(trav);
+ 
+     gf_proc_dump_xlator_mem_info(trav);
+ 
+@@ -1024,7 +1058,7 @@ gf_proc_dump_xlator_profile(xlator_t *this, strfd_t *strfd)
+     {
+         gf_dump_strfd = strfd;
+ 
+-        gf_proc_dump_latency_info(this);
++        gf_proc_dump_xl_latency_info(this);
+ 
+         gf_dump_strfd = NULL;
+     }
+diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c
+index 36cc32c..b9ad411 100644
+--- a/libglusterfs/src/xlator.c
++++ b/libglusterfs/src/xlator.c
+@@ -246,6 +246,7 @@ xlator_dynload_apis(xlator_t *xl)
+     void *handle = NULL;
+     volume_opt_list_t *vol_opt = NULL;
+     xlator_api_t *xlapi = NULL;
++    int i = 0;
+ 
+     handle = xl->dlhandle;
+ 
+@@ -343,6 +344,10 @@ xlator_dynload_apis(xlator_t *xl)
+     memcpy(xl->op_version, xlapi->op_version,
+            sizeof(uint32_t) * GF_MAX_RELEASES);
+ 
++    for (i = 0; i < GF_FOP_MAXVALUE; i++) {
++        gf_latency_reset(&xl->stats.interval.latencies[i]);
++    }
++
+     ret = 0;
+ out:
+     return ret;
+diff --git a/rpc/rpc-lib/src/libgfrpc.sym b/rpc/rpc-lib/src/libgfrpc.sym
+index f3544e3..a1757cc 100644
+--- a/rpc/rpc-lib/src/libgfrpc.sym
++++ b/rpc/rpc-lib/src/libgfrpc.sym
+@@ -66,3 +66,4 @@ rpc_transport_unix_options_build
+ rpc_transport_unref
+ rpc_clnt_mgmt_pmap_signout
+ rpcsvc_autoscale_threads
++rpcsvc_statedump
+diff --git a/rpc/rpc-lib/src/rpcsvc.c b/rpc/rpc-lib/src/rpcsvc.c
+index b031d93..855b512 100644
+--- a/rpc/rpc-lib/src/rpcsvc.c
++++ b/rpc/rpc-lib/src/rpcsvc.c
+@@ -25,6 +25,7 @@
+ #include <glusterfs/syncop.h>
+ #include "rpc-drc.h"
+ #include "protocol-common.h"
++#include <glusterfs/statedump.h>
+ 
+ #include <errno.h>
+ #include <pthread.h>
+@@ -377,6 +378,10 @@ rpcsvc_program_actor(rpcsvc_request_t *req)
+         goto err;
+     }
+ 
++    if (svc->xl->ctx->measure_latency) {
++        timespec_now(&req->begin);
++    }
++
+     req->ownthread = program->ownthread;
+     req->synctask = program->synctask;
+ 
+@@ -1526,10 +1531,18 @@ rpcsvc_submit_generic(rpcsvc_request_t *req, struct iovec *proghdr,
+     size_t hdrlen = 0;
+     char new_iobref = 0;
+     rpcsvc_drc_globals_t *drc = NULL;
++    gf_latency_t *lat = NULL;
+ 
+     if ((!req) || (!req->trans))
+         return -1;
+ 
++    if (req->prog && req->begin.tv_sec) {
++        if ((req->procnum >= 0) && (req->procnum < req->prog->numactors)) {
++            timespec_now(&req->end);
++            lat = &req->prog->latencies[req->procnum];
++            gf_latency_update(lat, &req->begin, &req->end);
++        }
++    }
+     trans = req->trans;
+ 
+     for (i = 0; i < hdrcount; i++) {
+@@ -1860,6 +1873,15 @@ rpcsvc_submit_message(rpcsvc_request_t *req, struct iovec *proghdr,
+                                  iobref);
+ }
+ 
++void
++rpcsvc_program_destroy(rpcsvc_program_t *program)
++{
++    if (program) {
++        GF_FREE(program->latencies);
++        GF_FREE(program);
++    }
++}
++
+ int
+ rpcsvc_program_unregister(rpcsvc_t *svc, rpcsvc_program_t *program)
+ {
+@@ -1917,8 +1939,7 @@ rpcsvc_program_unregister(rpcsvc_t *svc, rpcsvc_program_t *program)
+ 
+     ret = 0;
+ out:
+-    if (prog)
+-        GF_FREE(prog);
++    rpcsvc_program_destroy(prog);
+ 
+     if (ret == -1) {
+         if (program) {
+@@ -2303,6 +2324,11 @@ rpcsvc_program_register(rpcsvc_t *svc, rpcsvc_program_t *program,
+     }
+ 
+     memcpy(newprog, program, sizeof(*program));
++    newprog->latencies = gf_latency_new(program->numactors);
++    if (!newprog->latencies) {
++        rpcsvc_program_destroy(newprog);
++        goto out;
++    }
+ 
+     INIT_LIST_HEAD(&newprog->program);
+     pthread_mutexattr_init(&thr_attr);
+@@ -3240,6 +3266,48 @@ out:
+     return ret;
+ }
+ 
++void
++rpcsvc_program_dump(rpcsvc_program_t *prog)
++{
++    char key_prefix[GF_DUMP_MAX_BUF_LEN];
++    char key[GF_DUMP_MAX_BUF_LEN];
++    int i;
++
++    snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s", prog->progname);
++    gf_proc_dump_add_section("%s", key_prefix);
++
++    gf_proc_dump_build_key(key, key_prefix, "program-number");
++    gf_proc_dump_write(key, "%d", prog->prognum);
++
++    gf_proc_dump_build_key(key, key_prefix, "program-version");
++    gf_proc_dump_write(key, "%d", prog->progver);
++
++    strncat(key_prefix, ".latency",
++            sizeof(key_prefix) - strlen(key_prefix) - 1);
++
++    for (i = 0; i < prog->numactors; i++) {
++        gf_proc_dump_build_key(key, key_prefix, "%s", prog->actors[i].procname);
++        gf_latency_statedump_and_reset(key, &prog->latencies[i]);
++    }
++}
++
++void
++rpcsvc_statedump(rpcsvc_t *svc)
++{
++    rpcsvc_program_t *prog = NULL;
++    int ret = 0;
++    ret = pthread_rwlock_tryrdlock(&svc->rpclock);
++    if (ret)
++        return;
++    {
++        list_for_each_entry(prog, &svc->programs, program)
++        {
++            rpcsvc_program_dump(prog);
++        }
++    }
++    pthread_rwlock_unlock(&svc->rpclock);
++}
++
+ rpcsvc_actor_t gluster_dump_actors[GF_DUMP_MAXVALUE] = {
+     [GF_DUMP_NULL] = {"NULL", GF_DUMP_NULL, NULL, NULL, 0, DRC_NA},
+     [GF_DUMP_DUMP] = {"DUMP", GF_DUMP_DUMP, rpcsvc_dump, NULL, 0, DRC_NA},
+diff --git a/rpc/rpc-lib/src/rpcsvc.h b/rpc/rpc-lib/src/rpcsvc.h
+index a51edc7..e336d00 100644
+--- a/rpc/rpc-lib/src/rpcsvc.h
++++ b/rpc/rpc-lib/src/rpcsvc.h
+@@ -275,6 +275,8 @@ struct rpcsvc_request {
+     gf_boolean_t ownthread;
+ 
+     gf_boolean_t synctask;
++    struct timespec begin; /*req handling start time*/
++    struct timespec end;   /*req handling end time*/
+ };
+ 
+ #define rpcsvc_request_program(req) ((rpcsvc_program_t *)((req)->prog))
+@@ -431,6 +433,7 @@ struct rpcsvc_program {
+ 
+     /* Program specific state handed to actors */
+     void *private;
++    gf_latency_t *latencies; /*Tracks latency statistics for the rpc call*/
+ 
+     /* This upcall is provided by the program during registration.
+      * It is used to notify the program about events like connection being
+@@ -696,4 +699,6 @@ rpcsvc_autoscale_threads(glusterfs_ctx_t *ctx, rpcsvc_t *rpc, int incr);
+ 
+ extern int
+ rpcsvc_destroy(rpcsvc_t *svc);
++void
++rpcsvc_statedump(rpcsvc_t *svc);
+ #endif
+diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c
+index 54d9c0f..90eb3ff 100644
+--- a/xlators/protocol/server/src/server.c
++++ b/xlators/protocol/server/src/server.c
+@@ -267,6 +267,8 @@ server_priv(xlator_t *this)
+     gf_proc_dump_build_key(key, "server", "total-bytes-write");
+     gf_proc_dump_write(key, "%" PRIu64, total_write);
+ 
++    rpcsvc_statedump(conf->rpc);
++
+     ret = 0;
+ out:
+     if (ret)
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0577-protocol-client-don-t-reopen-fds-on-which-POSIX-lock.patch b/SOURCES/0577-protocol-client-don-t-reopen-fds-on-which-POSIX-lock.patch
new file mode 100644
index 0000000..1a5d0ea
--- /dev/null
+++ b/SOURCES/0577-protocol-client-don-t-reopen-fds-on-which-POSIX-lock.patch
@@ -0,0 +1,472 @@
+From d7665cf3249310c5faf87368f395b4e25cb86b48 Mon Sep 17 00:00:00 2001
+From: karthik-us <ksubrahm@redhat.com>
+Date: Thu, 15 Apr 2021 10:29:06 +0530
+Subject: [PATCH 577/584] protocol/client: don't reopen fds on which POSIX
+ locks are held after a reconnect
+
+XXXXXXXXXXXXXXXXXXX
+    IMPORTANT:
+XXXXXXXXXXXXXXXXXXX
+As a best pratice, with this patch we are bumping up the op-version
+from GD_OP_VERSION_7_1 to GD_OP_VERSION_7_2 since it introduces a
+new volume option. Enabling the new option will have effect only
+after all the servers and clients are upgraded to this version.
+----------------------------------------------------------------------
+
+Bricks cleanup any granted locks after a client disconnects and
+currently these locks are not healed after a reconnect. This means
+post reconnect a competing process could be granted a lock even though
+the first process which was granted locks has not unlocked. By not
+re-opening fds, subsequent operations on such fds will fail forcing
+the application to close the current fd and reopen a new one. This way
+we prevent any silent corruption.
+
+A new option "client.strict-locks" is introduced to control this
+behaviour. This option is set to "off" by default.
+
+> Upstream patch: https://review.gluster.org/#/c/glusterfs/+/22712/
+> Change-Id: Ieed545efea466cb5e8f5a36199aa26380c301b9e
+> Signed-off-by: Raghavendra G <rgowdapp@redhat.com>
+> updates: bz#1694920
+
+BUG: 1689375
+Change-Id: Ieed545efea466cb5e8f5a36199aa26380c301b9e
+Signed-off-by: karthik-us <ksubrahm@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244909
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
+---
+ libglusterfs/src/glusterfs/globals.h             |  4 +-
+ tests/bugs/bug-1694920.t                         | 63 ++++++++++++++++++++++++
+ xlators/mgmt/glusterd/src/glusterd-volume-set.c  | 14 ++++++
+ xlators/protocol/client/src/client-handshake.c   |  3 +-
+ xlators/protocol/client/src/client-helpers.c     |  5 +-
+ xlators/protocol/client/src/client-lk.c          |  2 +-
+ xlators/protocol/client/src/client-rpc-fops.c    | 45 ++++++++++++++++-
+ xlators/protocol/client/src/client-rpc-fops_v2.c | 32 +++++++++++-
+ xlators/protocol/client/src/client.c             | 13 +++++
+ xlators/protocol/client/src/client.h             | 16 ++++++
+ 10 files changed, 190 insertions(+), 7 deletions(-)
+ create mode 100644 tests/bugs/bug-1694920.t
+
+diff --git a/libglusterfs/src/glusterfs/globals.h b/libglusterfs/src/glusterfs/globals.h
+index 33fb023..ce2d110 100644
+--- a/libglusterfs/src/glusterfs/globals.h
++++ b/libglusterfs/src/glusterfs/globals.h
+@@ -50,7 +50,7 @@
+     1 /* MIN is the fresh start op-version, mostly                             \
+          should not change */
+ #define GD_OP_VERSION_MAX                                                      \
+-    GD_OP_VERSION_7_1 /* MAX VERSION is the maximum                            \
++    GD_OP_VERSION_7_2 /* MAX VERSION is the maximum                            \
+                          count in VME table, should                            \
+                          keep changing with                                    \
+                          introduction of newer                                 \
+@@ -140,6 +140,8 @@
+ 
+ #define GD_OP_VERSION_7_1 70100 /* Op-version for GlusterFS 7.1 */
+ 
++#define GD_OP_VERSION_7_2 70200 /* Op-version for GlusterFS 7.2 */
++
+ #include "glusterfs/xlator.h"
+ #include "glusterfs/options.h"
+ 
+diff --git a/tests/bugs/bug-1694920.t b/tests/bugs/bug-1694920.t
+new file mode 100644
+index 0000000..5bf93c9
+--- /dev/null
++++ b/tests/bugs/bug-1694920.t
+@@ -0,0 +1,63 @@
++#!/bin/bash
++
++SCRIPT_TIMEOUT=300
++
++. $(dirname $0)/../include.rc
++. $(dirname $0)/../volume.rc
++. $(dirname $0)/../fileio.rc
++cleanup;
++
++TEST glusterd;
++TEST pidof glusterd
++
++TEST $CLI volume create $V0 $H0:$B0/${V0};
++TEST $CLI volume set $V0 performance.quick-read off
++TEST $CLI volume set $V0 performance.io-cache off
++TEST $CLI volume set $V0 performance.write-behind off
++TEST $CLI volume set $V0 performance.open-behind off
++TEST $CLI volume set $V0 performance.stat-prefetch off
++TEST $CLI volume set $V0 performance.read-ahead off
++TEST $CLI volume start $V0
++TEST $GFS -s $H0 --volfile-id=$V0  $M0;
++
++TEST touch $M0/a
++
++#When all bricks are up, lock and unlock should succeed
++TEST fd1=`fd_available`
++TEST fd_open $fd1 'w' $M0/a
++TEST flock -x $fd1
++TEST fd_close $fd1
++
++#When all bricks are down, lock/unlock should fail
++TEST fd1=`fd_available`
++TEST fd_open $fd1 'w' $M0/a
++TEST $CLI volume stop $V0
++TEST ! flock -x $fd1
++TEST $CLI volume start $V0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" client_connected_status_meta $M0 $V0-client-0
++TEST fd_close $fd1
++
++#When a brick goes down and comes back up operations on fd which had locks on it should succeed by default
++TEST fd1=`fd_available`
++TEST fd_open $fd1 'w' $M0/a
++TEST flock -x $fd1
++TEST $CLI volume stop $V0
++sleep 2
++TEST $CLI volume start $V0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" client_connected_status_meta $M0 $V0-client-0
++TEST fd_write $fd1 "data"
++TEST fd_close $fd1
++
++#When a brick goes down and comes back up operations on fd which had locks on it should fail when client.strict-locks is on
++TEST $CLI volume set $V0 client.strict-locks on
++TEST fd1=`fd_available`
++TEST fd_open $fd1 'w' $M0/a
++TEST flock -x $fd1
++TEST $CLI volume stop $V0
++sleep 2
++TEST $CLI volume start $V0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" client_connected_status_meta $M0 $V0-client-0
++TEST ! fd_write $fd1 "data"
++TEST fd_close $fd1
++
++cleanup
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+index c1ca190..01f3912 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+@@ -2022,6 +2022,20 @@ struct volopt_map_entry glusterd_volopt_map[] = {
+      .value = "9",
+      .flags = VOLOPT_FLAG_CLIENT_OPT},
+ 
++    {.key = "client.strict-locks",
++     .voltype = "protocol/client",
++     .option = "strict-locks",
++     .value = "off",
++     .op_version = GD_OP_VERSION_7_2,
++     .validate_fn = validate_boolean,
++     .type = GLOBAL_DOC,
++     .description = "When set, doesn't reopen saved fds after reconnect "
++                    "if POSIX locks are held on them. Hence subsequent "
++                    "operations on these fds will fail. This is "
++                    "necessary for stricter lock complaince as bricks "
++                    "cleanup any granted locks when a client "
++                    "disconnects."},
++
+     /* Server xlator options */
+     {.key = "network.tcp-window-size",
+      .voltype = "protocol/server",
+diff --git a/xlators/protocol/client/src/client-handshake.c b/xlators/protocol/client/src/client-handshake.c
+index 6b20d92..a12472b 100644
+--- a/xlators/protocol/client/src/client-handshake.c
++++ b/xlators/protocol/client/src/client-handshake.c
+@@ -910,7 +910,8 @@ client_post_handshake(call_frame_t *frame, xlator_t *this)
+     {
+         list_for_each_entry_safe(fdctx, tmp, &conf->saved_fds, sfd_pos)
+         {
+-            if (fdctx->remote_fd != -1)
++            if (fdctx->remote_fd != -1 ||
++                (!list_empty(&fdctx->lock_list) && conf->strict_locks))
+                 continue;
+ 
+             fdctx->reopen_done = client_child_up_reopen_done;
+diff --git a/xlators/protocol/client/src/client-helpers.c b/xlators/protocol/client/src/client-helpers.c
+index 53b4484..6543100 100644
+--- a/xlators/protocol/client/src/client-helpers.c
++++ b/xlators/protocol/client/src/client-helpers.c
+@@ -410,6 +410,7 @@ client_get_remote_fd(xlator_t *this, fd_t *fd, int flags, int64_t *remote_fd)
+ {
+     clnt_fd_ctx_t *fdctx = NULL;
+     clnt_conf_t *conf = NULL;
++    gf_boolean_t locks_held = _gf_false;
+ 
+     GF_VALIDATE_OR_GOTO(this->name, fd, out);
+     GF_VALIDATE_OR_GOTO(this->name, remote_fd, out);
+@@ -431,11 +432,13 @@ client_get_remote_fd(xlator_t *this, fd_t *fd, int flags, int64_t *remote_fd)
+                 *remote_fd = -1;
+             else
+                 *remote_fd = fdctx->remote_fd;
++
++            locks_held = !list_empty(&fdctx->lock_list);
+         }
+     }
+     pthread_spin_unlock(&conf->fd_lock);
+ 
+-    if ((flags & FALLBACK_TO_ANON_FD) && (*remote_fd == -1))
++    if ((flags & FALLBACK_TO_ANON_FD) && (*remote_fd == -1) && (!locks_held))
+         *remote_fd = GF_ANON_FD_NO;
+ 
+     return 0;
+diff --git a/xlators/protocol/client/src/client-lk.c b/xlators/protocol/client/src/client-lk.c
+index 679e198..c1fb055 100644
+--- a/xlators/protocol/client/src/client-lk.c
++++ b/xlators/protocol/client/src/client-lk.c
+@@ -351,7 +351,7 @@ delete_granted_locks_owner(fd_t *fd, gf_lkowner_t *owner)
+ 
+     list_for_each_entry_safe(lock, tmp, &fdctx->lock_list, list)
+     {
+-        if (!is_same_lkowner(&lock->owner, owner)) {
++        if (is_same_lkowner(&lock->owner, owner)) {
+             list_del_init(&lock->list);
+             list_add_tail(&lock->list, &delete_list);
+             count++;
+diff --git a/xlators/protocol/client/src/client-rpc-fops.c b/xlators/protocol/client/src/client-rpc-fops.c
+index 1c8b31b..3110c78 100644
+--- a/xlators/protocol/client/src/client-rpc-fops.c
++++ b/xlators/protocol/client/src/client-rpc-fops.c
+@@ -22,8 +22,18 @@ int32_t
+ client3_getspec(call_frame_t *frame, xlator_t *this, void *data);
+ rpc_clnt_prog_t clnt3_3_fop_prog;
+ 
+-/* CBK */
++int
++client_is_setlk(int32_t cmd)
++{
++    if ((cmd == F_SETLK) || (cmd == F_SETLK64) || (cmd == F_SETLKW) ||
++        (cmd == F_SETLKW64)) {
++        return 1;
++    }
+ 
++    return 0;
++}
++
++/* CBK */
+ int
+ client3_3_symlink_cbk(struct rpc_req *req, struct iovec *iov, int count,
+                       void *myframe)
+@@ -816,7 +826,8 @@ client3_3_flush_cbk(struct rpc_req *req, struct iovec *iov, int count,
+         goto out;
+     }
+ 
+-    if (rsp.op_ret >= 0 && !fd_is_anonymous(local->fd)) {
++    if ((rsp.op_ret >= 0 || (rsp.op_errno == ENOTCONN)) &&
++        !fd_is_anonymous(local->fd)) {
+         /* Delete all saved locks of the owner issuing flush */
+         ret = delete_granted_locks_owner(local->fd, &local->owner);
+         gf_msg_trace(this->name, 0, "deleting locks of owner (%s) returned %d",
+@@ -2388,10 +2399,12 @@ client3_3_lk_cbk(struct rpc_req *req, struct iovec *iov, int count,
+     int ret = 0;
+     xlator_t *this = NULL;
+     dict_t *xdata = NULL;
++    clnt_local_t *local = NULL;
+ 
+     this = THIS;
+ 
+     frame = myframe;
++    local = frame->local;
+ 
+     if (-1 == req->rpc_status) {
+         rsp.op_ret = -1;
+@@ -2412,6 +2425,18 @@ client3_3_lk_cbk(struct rpc_req *req, struct iovec *iov, int count,
+         ret = client_post_lk(this, &rsp, &lock, &xdata);
+         if (ret < 0)
+             goto out;
++
++        /* Save the lock to the client lock cache to be able
++           to recover in the case of server reboot.*/
++
++        if (client_is_setlk(local->cmd)) {
++            ret = client_add_lock_for_recovery(local->fd, &lock, &local->owner,
++                                               local->cmd);
++            if (ret < 0) {
++                rsp.op_ret = -1;
++                rsp.op_errno = -ret;
++            }
++        }
+     }
+ 
+ out:
+@@ -4263,8 +4288,16 @@ client3_3_flush(call_frame_t *frame, xlator_t *this, void *data)
+     ret = client_pre_flush(this, &req, args->fd, args->xdata);
+     if (ret) {
+         op_errno = -ret;
++        if (op_errno == EBADF) {
++            ret = delete_granted_locks_owner(local->fd, &local->owner);
++            gf_msg_trace(this->name, 0,
++                         "deleting locks of owner (%s) returned %d",
++                         lkowner_utoa(&local->owner), ret);
++        }
++
+         goto unwind;
+     }
++
+     ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FLUSH,
+                                 client3_3_flush_cbk, NULL,
+                                 (xdrproc_t)xdr_gfs3_flush_req);
+@@ -5199,8 +5232,16 @@ client3_3_lk(call_frame_t *frame, xlator_t *this, void *data)
+                         args->xdata);
+     if (ret) {
+         op_errno = -ret;
++
++        if ((op_errno == EBADF) && (args->flock->l_type == F_UNLCK) &&
++            client_is_setlk(local->cmd)) {
++            client_add_lock_for_recovery(local->fd, args->flock, &local->owner,
++                                         local->cmd);
++        }
++
+         goto unwind;
+     }
++
+     ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_LK,
+                                 client3_3_lk_cbk, NULL,
+                                 (xdrproc_t)xdr_gfs3_lk_req);
+diff --git a/xlators/protocol/client/src/client-rpc-fops_v2.c b/xlators/protocol/client/src/client-rpc-fops_v2.c
+index 613dda8..954fc58 100644
+--- a/xlators/protocol/client/src/client-rpc-fops_v2.c
++++ b/xlators/protocol/client/src/client-rpc-fops_v2.c
+@@ -723,7 +723,8 @@ client4_0_flush_cbk(struct rpc_req *req, struct iovec *iov, int count,
+         goto out;
+     }
+ 
+-    if (rsp.op_ret >= 0 && !fd_is_anonymous(local->fd)) {
++    if ((rsp.op_ret >= 0 || (rsp.op_errno == ENOTCONN)) &&
++        !fd_is_anonymous(local->fd)) {
+         /* Delete all saved locks of the owner issuing flush */
+         ret = delete_granted_locks_owner(local->fd, &local->owner);
+         gf_msg_trace(this->name, 0, "deleting locks of owner (%s) returned %d",
+@@ -2193,10 +2194,12 @@ client4_0_lk_cbk(struct rpc_req *req, struct iovec *iov, int count,
+     int ret = 0;
+     xlator_t *this = NULL;
+     dict_t *xdata = NULL;
++    clnt_local_t *local = NULL;
+ 
+     this = THIS;
+ 
+     frame = myframe;
++    local = frame->local;
+ 
+     if (-1 == req->rpc_status) {
+         rsp.op_ret = -1;
+@@ -2217,6 +2220,18 @@ client4_0_lk_cbk(struct rpc_req *req, struct iovec *iov, int count,
+         ret = client_post_lk_v2(this, &rsp, &lock, &xdata);
+         if (ret < 0)
+             goto out;
++
++        /* Save the lock to the client lock cache to be able
++           to recover in the case of server reboot.*/
++
++        if (client_is_setlk(local->cmd)) {
++            ret = client_add_lock_for_recovery(local->fd, &lock, &local->owner,
++                                               local->cmd);
++            if (ret < 0) {
++                rsp.op_ret = -1;
++                rsp.op_errno = -ret;
++            }
++        }
+     }
+ 
+ out:
+@@ -3998,6 +4013,13 @@ client4_0_flush(call_frame_t *frame, xlator_t *this, void *data)
+     ret = client_pre_flush_v2(this, &req, args->fd, args->xdata);
+     if (ret) {
+         op_errno = -ret;
++        if (op_errno == EBADF) {
++            ret = delete_granted_locks_owner(local->fd, &local->owner);
++            gf_msg_trace(this->name, 0,
++                         "deleting locks of owner (%s) returned %d",
++                         lkowner_utoa(&local->owner), ret);
++        }
++
+         goto unwind;
+     }
+     ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FLUSH,
+@@ -4771,8 +4793,16 @@ client4_0_lk(call_frame_t *frame, xlator_t *this, void *data)
+                            args->xdata);
+     if (ret) {
+         op_errno = -ret;
++
++        if ((op_errno == EBADF) && (args->flock->l_type == F_UNLCK) &&
++            client_is_setlk(local->cmd)) {
++            client_add_lock_for_recovery(local->fd, args->flock, &local->owner,
++                                         local->cmd);
++        }
++
+         goto unwind;
+     }
++
+     ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_LK,
+                                 client4_0_lk_cbk, NULL,
+                                 (xdrproc_t)xdr_gfx_lk_req);
+diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c
+index ed855ca..63c90ea 100644
+--- a/xlators/protocol/client/src/client.c
++++ b/xlators/protocol/client/src/client.c
+@@ -2491,6 +2491,7 @@ build_client_config(xlator_t *this, clnt_conf_t *conf)
+     GF_OPTION_INIT("filter-O_DIRECT", conf->filter_o_direct, bool, out);
+ 
+     GF_OPTION_INIT("send-gids", conf->send_gids, bool, out);
++    GF_OPTION_INIT("strict-locks", conf->strict_locks, bool, out);
+ 
+     conf->client_id = glusterfs_leaf_position(this);
+ 
+@@ -2676,6 +2677,7 @@ reconfigure(xlator_t *this, dict_t *options)
+                      out);
+ 
+     GF_OPTION_RECONF("send-gids", conf->send_gids, options, bool, out);
++    GF_OPTION_RECONF("strict-locks", conf->strict_locks, options, bool, out);
+ 
+     ret = 0;
+ out:
+@@ -3032,6 +3034,17 @@ struct volume_options options[] = {
+                     " power. Range 1-32 threads.",
+      .op_version = {GD_OP_VERSION_RHS_3_0},
+      .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
++    {.key = {"strict-locks"},
++     .type = GF_OPTION_TYPE_BOOL,
++     .default_value = "off",
++     .op_version = {GD_OP_VERSION_7_2},
++     .flags = OPT_FLAG_SETTABLE,
++     .description = "When set, doesn't reopen saved fds after reconnect "
++                    "if POSIX locks are held on them. Hence subsequent "
++                    "operations on these fds will fail. This is "
++                    "necessary for stricter lock complaince as bricks "
++                    "cleanup any granted locks when a client "
++                    "disconnects."},
+     {.key = {NULL}},
+ };
+ 
+diff --git a/xlators/protocol/client/src/client.h b/xlators/protocol/client/src/client.h
+index f12fa61..bde3d1a 100644
+--- a/xlators/protocol/client/src/client.h
++++ b/xlators/protocol/client/src/client.h
+@@ -235,6 +235,15 @@ typedef struct clnt_conf {
+                                       * up, disconnects can be
+                                       * logged
+                                       */
++
++    gf_boolean_t strict_locks; /* When set, doesn't reopen saved fds after
++                                  reconnect if POSIX locks are held on them.
++                                  Hence subsequent operations on these fds will
++                                  fail. This is necessary for stricter lock
++                                  complaince as bricks cleanup any granted
++                                  locks when a client disconnects.
++                               */
++
+ } clnt_conf_t;
+ 
+ typedef struct _client_fd_ctx {
+@@ -513,4 +522,11 @@ compound_request_cleanup_v2(gfx_compound_req *req);
+ void
+ client_compound_rsp_cleanup_v2(gfx_compound_rsp *rsp, int len);
+ 
++int
++client_add_lock_for_recovery(fd_t *fd, struct gf_flock *flock,
++                             gf_lkowner_t *owner, int32_t cmd);
++
++int
++client_is_setlk(int32_t cmd);
++
+ #endif /* !_CLIENT_H */
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0578-protocol-client-fallback-to-anonymous-fd-for-fsync.patch b/SOURCES/0578-protocol-client-fallback-to-anonymous-fd-for-fsync.patch
new file mode 100644
index 0000000..d5df9e2
--- /dev/null
+++ b/SOURCES/0578-protocol-client-fallback-to-anonymous-fd-for-fsync.patch
@@ -0,0 +1,46 @@
+From ffb4085b3e04878e85bf505a541203aa2ee71e9c Mon Sep 17 00:00:00 2001
+From: l17zhou <cynthia.zhou@nokia-sbell.com>
+Date: Fri, 6 Mar 2020 03:54:02 +0200
+Subject: [PATCH 578/584] protocol/client: fallback to anonymous fd for fsync
+
+> Upstream patch: https://review.gluster.org/#/c/glusterfs/+/24203/
+> Change-Id: I32f801206ce7fbd05aa693f44c2f140304f2e275
+> Fixes: bz#1810842
+
+BUG: 1689375
+Change-Id: I32f801206ce7fbd05aa693f44c2f140304f2e275
+Signed-off-by: karthik-us <ksubrahm@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/245538
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/protocol/client/src/client-common.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/xlators/protocol/client/src/client-common.c b/xlators/protocol/client/src/client-common.c
+index 64db98d..1417a60 100644
+--- a/xlators/protocol/client/src/client-common.c
++++ b/xlators/protocol/client/src/client-common.c
+@@ -449,7 +449,8 @@ client_pre_fsync(xlator_t *this, gfs3_fsync_req *req, fd_t *fd, int32_t flags,
+     int64_t remote_fd = -1;
+     int op_errno = 0;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, FALLBACK_TO_ANON_FD, remote_fd, op_errno,
++                         out);
+ 
+     req->fd = remote_fd;
+     req->data = flags;
+@@ -2641,7 +2642,8 @@ client_pre_fsync_v2(xlator_t *this, gfx_fsync_req *req, fd_t *fd, int32_t flags,
+     int64_t remote_fd = -1;
+     int op_errno = 0;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, FALLBACK_TO_ANON_FD, remote_fd, op_errno,
++                         out);
+ 
+     req->fd = remote_fd;
+     req->data = flags;
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0579-cli-changing-rebal-task-ID-to-None-in-case-status-is.patch b/SOURCES/0579-cli-changing-rebal-task-ID-to-None-in-case-status-is.patch
new file mode 100644
index 0000000..d568966
--- /dev/null
+++ b/SOURCES/0579-cli-changing-rebal-task-ID-to-None-in-case-status-is.patch
@@ -0,0 +1,168 @@
+From 96c4c3c47c914aced8864e7d178a4d57f7fced05 Mon Sep 17 00:00:00 2001
+From: Tamar Shacked <tshacked@redhat.com>
+Date: Sun, 6 Jun 2021 14:26:18 +0300
+Subject: [PATCH 579/584] cli: changing rebal task ID to "None" in case status
+ is being reset
+
+Rebalance status is being reset during replace/reset-brick operations.
+This cause 'volume status' to shows rebalance as "not started".
+
+Fix:
+change rebalance-status to "reset due to (replace|reset)-brick"
+
+Backport of:
+> Upstream-patch-link: https://github.com/gluster/glusterfs/pull/1869
+> Change-Id: Ia73a8bea3dcd8e51acf4faa6434c3cb0d09856d0
+> Signed-off-by: Tamar Shacked <tshacked@redhat.com>
+> Fixes: #1717
+
+BUG: 1889966
+
+Signed-off-by: Tamar Shacked <tshacked@redhat.com>
+Change-Id: Ia73a8bea3dcd8e51acf4faa6434c3cb0d09856d0
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/245402
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ cli/src/cli-rpc-ops.c                              | 15 ++++++-
+ rpc/xdr/src/cli1-xdr.x                             |  2 +
+ tests/bugs/glusterd/reset-rebalance-state.t        | 46 ++++++++++++++++++++++
+ xlators/mgmt/glusterd/src/glusterd-replace-brick.c |  4 +-
+ xlators/mgmt/glusterd/src/glusterd-reset-brick.c   |  3 +-
+ 5 files changed, 65 insertions(+), 5 deletions(-)
+ create mode 100644 tests/bugs/glusterd/reset-rebalance-state.t
+
+diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c
+index 51b5447..4167c68 100644
+--- a/cli/src/cli-rpc-ops.c
++++ b/cli/src/cli-rpc-ops.c
+@@ -72,6 +72,8 @@ char *cli_vol_task_status_str[] = {"not started",
+                                    "fix-layout stopped",
+                                    "fix-layout completed",
+                                    "fix-layout failed",
++                                   "reset due to replace-brick",
++                                   "reset due to reset-brick",
+                                    "unknown"};
+ 
+ int32_t
+@@ -8357,12 +8359,21 @@ cli_print_volume_status_tasks(dict_t *dict)
+         ret = dict_get_str(dict, key, &task_id_str);
+         if (ret)
+             return;
+-        cli_out("%-20s : %-20s", "ID", task_id_str);
+ 
+         snprintf(key, sizeof(key), "task%d.status", i);
+         ret = dict_get_int32(dict, key, &status);
+-        if (ret)
++        if (ret) {
++            cli_out("%-20s : %-20s", "ID", task_id_str);
+             return;
++        }
++
++        if (!strcmp(op, "Rebalance") &&
++            (status == GF_DEFRAG_STATUS_RESET_DUE_REPLACE_BRC ||
++             status == GF_DEFRAG_STATUS_RESET_DUE_RESET_BRC)) {
++            task_id_str = "None";
++        }
++
++        cli_out("%-20s : %-20s", "ID", task_id_str);
+ 
+         snprintf(task, sizeof(task), "task%d", i);
+ 
+diff --git a/rpc/xdr/src/cli1-xdr.x b/rpc/xdr/src/cli1-xdr.x
+index 777cb00..17d96f1 100644
+--- a/rpc/xdr/src/cli1-xdr.x
++++ b/rpc/xdr/src/cli1-xdr.x
+@@ -45,6 +45,8 @@
+         GF_DEFRAG_STATUS_LAYOUT_FIX_STOPPED,
+         GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE,
+         GF_DEFRAG_STATUS_LAYOUT_FIX_FAILED,
++        GF_DEFRAG_STATUS_RESET_DUE_REPLACE_BRC,
++        GF_DEFRAG_STATUS_RESET_DUE_RESET_BRC,
+         GF_DEFRAG_STATUS_MAX
+ };
+ 
+diff --git a/tests/bugs/glusterd/reset-rebalance-state.t b/tests/bugs/glusterd/reset-rebalance-state.t
+new file mode 100644
+index 0000000..829d2b1
+--- /dev/null
++++ b/tests/bugs/glusterd/reset-rebalance-state.t
+@@ -0,0 +1,46 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../cluster.rc
++. $(dirname $0)/../../volume.rc
++
++
++get_rebalance_status() {
++    $CLI volume status $V0 | egrep ^"Status   " | awk '{print $3}'
++}
++
++run_rebal_check_status() {
++    TEST $CLI volume rebalance $V0 start
++    EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $V0
++    REBAL_STATE=$(get_rebalance_status)
++    TEST [ $REBAL_STATE == "completed" ]
++}
++
++replace_brick_check_status() {
++    TEST $CLI volume replace-brick $V0 $H0:$B0/${V0}1 $H0:$B0/${V0}1_replace commit force
++    REBAL_STATE=$(get_rebalance_status)
++    TEST [ $REBAL_STATE == "reset" ]
++}
++
++reset_brick_check_status() {
++    TEST $CLI volume reset-brick $V0 $H0:$B0/${V0}2 start
++    TEST $CLI volume reset-brick $V0 $H0:$B0/${V0}2 $H0:$B0/${V0}2 commit force
++    REBAL_STATE=$(get_rebalance_status)
++    TEST [ $REBAL_STATE == "reset" ]
++}
++
++cleanup;
++
++TEST glusterd;
++TEST pidof glusterd;
++
++TEST $CLI volume info;
++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1..6} force;
++TEST $CLI volume start $V0;
++
++run_rebal_check_status;
++replace_brick_check_status;
++reset_brick_check_status;
++
++cleanup;
++
+diff --git a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c
+index 0615081..80b80e4 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c
++++ b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c
+@@ -548,8 +548,8 @@ glusterd_op_replace_brick(dict_t *dict, dict_t *rsp_dict)
+         (void)glusterd_svcs_manager(volinfo);
+         goto out;
+     }
+-
+-    volinfo->rebal.defrag_status = 0;
++    if (volinfo->rebal.defrag_status != GF_DEFRAG_STATUS_NOT_STARTED)
++        volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_RESET_DUE_REPLACE_BRC;
+ 
+     ret = glusterd_svcs_manager(volinfo);
+     if (ret) {
+diff --git a/xlators/mgmt/glusterd/src/glusterd-reset-brick.c b/xlators/mgmt/glusterd/src/glusterd-reset-brick.c
+index cf04ce8..19d7549 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-reset-brick.c
++++ b/xlators/mgmt/glusterd/src/glusterd-reset-brick.c
+@@ -342,7 +342,8 @@ glusterd_op_reset_brick(dict_t *dict, dict_t *rsp_dict)
+             goto out;
+         }
+ 
+-        volinfo->rebal.defrag_status = 0;
++        if (volinfo->rebal.defrag_status != GF_DEFRAG_STATUS_NOT_STARTED)
++            volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_RESET_DUE_RESET_BRC;
+ 
+         ret = glusterd_svcs_manager(volinfo);
+         if (ret) {
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0580-cluster-dht-suppress-file-migration-error-for-node-n.patch b/SOURCES/0580-cluster-dht-suppress-file-migration-error-for-node-n.patch
new file mode 100644
index 0000000..06befeb
--- /dev/null
+++ b/SOURCES/0580-cluster-dht-suppress-file-migration-error-for-node-n.patch
@@ -0,0 +1,138 @@
+From a5da8bb830e86b6dd77a06cd59d220052e80b21c Mon Sep 17 00:00:00 2001
+From: Tamar Shacked <tshacked@redhat.com>
+Date: Sun, 6 Jun 2021 11:57:06 +0300
+Subject: [PATCH 580/584] cluster/dht: suppress file migration error for node
+ not supposed to migrate file
+
+A rebalance process does a lookup for every file in the dir it is processing
+before checking if it supposed to migrate the file.
+In this issue there are two rebalance processses running on a replica subvol:
+R1 is migrating the FILE.
+R2 is not supposed to migrate the FILE, but it does a lookup and
+   finds a stale linkfile which is mostly due to a stale layout.
+   Then, it tries to unlink the stale linkfile and gets EBUSY
+   as the linkfile fd is open due R1 migration.
+   As a result a misleading error msg about FILE migration failure
+   due EBUSY is logged in R2 logfile.
+
+Fix:
+suppress the error in case it occured in a node that
+is not supposed to migrate the file.
+
+Backport of:
+> Upstream-patch-link: https://review.gluster.org/#/c/glusterfs/+/24712/
+> fixes: #1371
+> Change-Id: I37832b404e2b0cc40ac5caf45f14c32c891e71f3
+> Signed-off-by: Tamar Shacked <tshacked@redhat.com>
+
+BUG: 1815462
+Signed-off-by: Tamar Shacked <tshacked@redhat.com>
+Change-Id: I915ee8e7470d85a849b198bfa7d58d368a246aae
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/245401
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/dht/src/dht-rebalance.c | 38 ++++++++++++++++++++++-----------
+ 1 file changed, 25 insertions(+), 13 deletions(-)
+
+diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
+index e07dec0..cc0f2c9 100644
+--- a/xlators/cluster/dht/src/dht-rebalance.c
++++ b/xlators/cluster/dht/src/dht-rebalance.c
+@@ -2604,10 +2604,10 @@ out:
+  * all hardlinks.
+  */
+ 
+-int
++gf_boolean_t
+ gf_defrag_should_i_migrate(xlator_t *this, int local_subvol_index, uuid_t gfid)
+ {
+-    int ret = 0;
++    gf_boolean_t ret = _gf_false;
+     int i = local_subvol_index;
+     char *str = NULL;
+     uint32_t hashval = 0;
+@@ -2629,12 +2629,11 @@ gf_defrag_should_i_migrate(xlator_t *this, int local_subvol_index, uuid_t gfid)
+     }
+ 
+     str = uuid_utoa_r(gfid, buf);
+-    ret = dht_hash_compute(this, 0, str, &hashval);
+-    if (ret == 0) {
++    if (dht_hash_compute(this, 0, str, &hashval) == 0) {
+         index = (hashval % entry->count);
+         if (entry->elements[index].info == REBAL_NODEUUID_MINE) {
+             /* Index matches this node's nodeuuid.*/
+-            ret = 1;
++            ret = _gf_true;
+             goto out;
+         }
+ 
+@@ -2647,12 +2646,12 @@ gf_defrag_should_i_migrate(xlator_t *this, int local_subvol_index, uuid_t gfid)
+                 /* None of the bricks in the subvol are up.
+                  * CHILD_DOWN will kill the process soon */
+ 
+-                return 0;
++                return _gf_false;
+             }
+ 
+             if (entry->elements[index].info == REBAL_NODEUUID_MINE) {
+                 /* Index matches this node's nodeuuid.*/
+-                ret = 1;
++                ret = _gf_true;
+                 goto out;
+             }
+         }
+@@ -2701,6 +2700,7 @@ gf_defrag_migrate_single_file(void *opaque)
+     struct iatt *iatt_ptr = NULL;
+     gf_boolean_t update_skippedcount = _gf_true;
+     int i = 0;
++    gf_boolean_t should_i_migrate = 0;
+ 
+     rebal_entry = (struct dht_container *)opaque;
+     if (!rebal_entry) {
+@@ -2754,11 +2754,29 @@ gf_defrag_migrate_single_file(void *opaque)
+         goto out;
+     }
+ 
++    should_i_migrate = gf_defrag_should_i_migrate(
++        this, rebal_entry->local_subvol_index, entry->d_stat.ia_gfid);
++
+     gf_uuid_copy(entry_loc.gfid, entry->d_stat.ia_gfid);
+ 
+     gf_uuid_copy(entry_loc.pargfid, loc->gfid);
+ 
+     ret = syncop_lookup(this, &entry_loc, &iatt, NULL, NULL, NULL);
++
++    if (!should_i_migrate) {
++        /* this node isn't supposed to migrate the file. suppressing any
++         * potential error from lookup as this file is under migration by
++         * another node */
++        if (ret) {
++            gf_msg_debug(this->name, -ret,
++                         "Ignoring lookup failure: node isn't migrating %s",
++                         entry_loc.path);
++            ret = 0;
++        }
++        gf_msg_debug(this->name, 0, "Don't migrate %s ", entry_loc.path);
++        goto out;
++    }
++
+     if (ret) {
+         gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+                "Migrate file failed: %s lookup failed", entry_loc.path);
+@@ -2779,12 +2797,6 @@ gf_defrag_migrate_single_file(void *opaque)
+         goto out;
+     }
+ 
+-    if (!gf_defrag_should_i_migrate(this, rebal_entry->local_subvol_index,
+-                                    entry->d_stat.ia_gfid)) {
+-        gf_msg_debug(this->name, 0, "Don't migrate %s ", entry_loc.path);
+-        goto out;
+-    }
+-
+     iatt_ptr = &iatt;
+ 
+     hashed_subvol = dht_subvol_get_hashed(this, &entry_loc);
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0581-afr-don-t-reopen-fds-on-which-POSIX-locks-are-held.patch b/SOURCES/0581-afr-don-t-reopen-fds-on-which-POSIX-locks-are-held.patch
new file mode 100644
index 0000000..1267608
--- /dev/null
+++ b/SOURCES/0581-afr-don-t-reopen-fds-on-which-POSIX-locks-are-held.patch
@@ -0,0 +1,1431 @@
+From 57c794e31c0333f508ada740227c9afa1889f8ae Mon Sep 17 00:00:00 2001
+From: karthik-us <ksubrahm@redhat.com>
+Date: Thu, 15 Apr 2021 11:27:57 +0530
+Subject: [PATCH 581/584] afr: don't reopen fds on which POSIX locks are held
+
+When client.strict-locks is enabled on a volume and there are POSIX
+locks held on the files, after disconnect and reconnection of the
+clients do not re-open such fds which might lead to multiple clients
+acquiring the locks and cause data corruption.
+
+> Upstream patch: https://github.com/gluster/glusterfs/pull/1980/commits/56bde56c2741c5eac59937a6cf951a14f2878460
+> Change-Id: I8777ffbc2cc8d15ab57b58b72b56eb67521787c5
+> Fixes: #1977
+> Signed-off-by: karthik-us <ksubrahm@redhat.com>
+
+BUG: 1689375
+Change-Id: I8777ffbc2cc8d15ab57b58b72b56eb67521787c5
+Signed-off-by: karthik-us <ksubrahm@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/245414
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
+---
+ rpc/rpc-lib/src/protocol-common.h                |   6 +
+ tests/bugs/replicate/do-not-reopen-fd.t          | 206 +++++++++++++++++
+ xlators/cluster/afr/src/afr-common.c             |  15 +-
+ xlators/cluster/afr/src/afr-open.c               | 280 +++++++++++++++++++----
+ xlators/cluster/afr/src/afr.h                    |   3 +
+ xlators/protocol/client/src/client-common.c      | 148 ++++++++----
+ xlators/protocol/client/src/client-common.h      |   4 +
+ xlators/protocol/client/src/client-helpers.c     |  22 +-
+ xlators/protocol/client/src/client-rpc-fops.c    |  23 +-
+ xlators/protocol/client/src/client-rpc-fops_v2.c |  25 +-
+ xlators/protocol/client/src/client.c             |  21 +-
+ xlators/protocol/client/src/client.h             |   8 +-
+ 12 files changed, 654 insertions(+), 107 deletions(-)
+ create mode 100644 tests/bugs/replicate/do-not-reopen-fd.t
+
+diff --git a/rpc/rpc-lib/src/protocol-common.h b/rpc/rpc-lib/src/protocol-common.h
+index 779878f..f56aaaa 100644
+--- a/rpc/rpc-lib/src/protocol-common.h
++++ b/rpc/rpc-lib/src/protocol-common.h
+@@ -312,6 +312,12 @@ enum glusterd_mgmt_v3_procnum {
+     GLUSTERD_MGMT_V3_MAXVALUE,
+ };
+ 
++enum gf_fd_reopen_status {
++    FD_REOPEN_ALLOWED = 0,
++    FD_REOPEN_NOT_ALLOWED,
++    FD_BAD,
++};
++
+ typedef struct gf_gsync_detailed_status_ gf_gsync_status_t;
+ 
+ enum gf_get_volume_info_type {
+diff --git a/tests/bugs/replicate/do-not-reopen-fd.t b/tests/bugs/replicate/do-not-reopen-fd.t
+new file mode 100644
+index 0000000..76d8e70
+--- /dev/null
++++ b/tests/bugs/replicate/do-not-reopen-fd.t
+@@ -0,0 +1,206 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../fileio.rc
++
++cleanup;
++
++TEST glusterd;
++TEST pidof glusterd
++
++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
++TEST $CLI volume set $V0 performance.write-behind off
++TEST $CLI volume set $V0 performance.open-behind off
++TEST $CLI volume set $V0 client.strict-locks on
++TEST $CLI volume heal $V0 disable
++TEST $CLI volume start $V0
++EXPECT 'Started' volinfo_field $V0 'Status';
++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M1
++
++TEST touch $M0/a
++
++# Kill one brick and take lock on the fd and do a write.
++TEST kill_brick $V0 $H0 $B0/${V0}0
++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0
++TEST fd1=`fd_available`
++TEST fd_open $fd1 'rw' $M0/a
++
++TEST flock -x $fd1
++TEST fd_write $fd1 "data-1"
++
++# Restart the brick and then write. Now fd should not get re-opened but write
++# should still succeed as there were no quorum disconnects.
++TEST $CLI volume start $V0 force
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
++TEST fd_write $fd1 "data-2"
++EXPECT "" cat $B0/${V0}0/a
++EXPECT "data-2" cat $B0/${V0}1/a
++EXPECT "data-2" cat $B0/${V0}2/a
++
++# Check there is no fd opened on the 1st brick by checking for the gfid inside
++# /proc/pid-of-brick/fd/ directory
++gfid_a=$(gf_get_gfid_xattr $B0/${V0}0/a)
++gfid_str_a=$(gf_gfid_xattr_to_str $gfid_a)
++
++EXPECT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
++EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
++EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
++
++TEST fd2=`fd_available`
++TEST fd_open $fd2 'rw' $M1/a
++
++# Kill 2nd brick and try writing to the file. The write should fail due to
++# quorum failure.
++TEST kill_brick $V0 $H0 $B0/${V0}1
++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 1
++TEST ! fd_write $fd1 "data-3"
++TEST ! fd_cat $fd1
++
++# Restart the bricks and try writing to the file. This should fail as two bricks
++# which were down previously, will return EBADFD now.
++TEST $CLI volume start $V0 force
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1
++TEST ! fd_write $fd1 "data-4"
++TEST ! fd_cat $fd1
++
++# Enable heal and check the files will have same content on all the bricks after
++# the heal is completed.
++EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0
++TEST $CLI volume heal $V0 enable
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
++
++TEST $CLI volume heal $V0
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
++EXPECT "data-4" cat $B0/${V0}0/a
++EXPECT "data-4" cat $B0/${V0}1/a
++EXPECT "data-4" cat $B0/${V0}2/a
++TEST $CLI volume heal $V0 disable
++
++# Try writing to the file again on the same fd, which should fail again, since
++# it is not yet re-opened.
++TEST ! fd_write $fd1 "data-5"
++
++# At this point only one brick will have the lock. Try taking the lock again on
++# the bad fd, which should also fail with EBADFD.
++TEST ! flock -x $fd1
++
++# Kill the only brick that is having lock and try taking lock on another client
++# which should succeed.
++TEST kill_brick $V0 $H0 $B0/${V0}2
++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 2
++TEST flock -x $fd2
++TEST fd_write $fd2 "data-6"
++
++# Bring the brick up and try writing & reading on the old fd, which should still
++# fail and operations on the 2nd fd should succeed.
++TEST $CLI volume start $V0 force
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}2
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 2
++TEST ! fd_write $fd1 "data-7"
++
++TEST ! fd_cat $fd1
++TEST fd_cat $fd2
++
++# Close both the fds which will release the locks and then re-open and take lock
++# on the old fd. Operations on that fd should succeed afterwards.
++TEST fd_close $fd1
++TEST fd_close $fd2
++
++TEST ! ls /proc/$$/fd/$fd1
++TEST ! ls /proc/$$/fd/$fd2
++EXPECT_WITHIN $REOPEN_TIMEOUT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
++EXPECT_WITHIN $REOPEN_TIMEOUT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
++EXPECT_WITHIN $REOPEN_TIMEOUT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
++
++TEST fd1=`fd_available`
++TEST fd_open $fd1 'rw' $M0/a
++EXPECT_WITHIN $REOPEN_TIMEOUT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
++EXPECT_WITHIN $REOPEN_TIMEOUT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
++EXPECT_WITHIN $REOPEN_TIMEOUT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
++
++TEST flock -x $fd1
++TEST fd_write $fd1 "data-8"
++TEST fd_cat $fd1
++
++EXPECT "data-8" head -n 1 $B0/${V0}0/a
++EXPECT "data-8" head -n 1 $B0/${V0}1/a
++EXPECT "data-8" head -n 1 $B0/${V0}2/a
++
++TEST fd_close $fd1
++
++# Heal the volume
++TEST $CLI volume heal $V0 enable
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
++
++TEST $CLI volume heal $V0
++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
++TEST $CLI volume heal $V0 disable
++
++# Kill one brick and open a fd.
++TEST kill_brick $V0 $H0 $B0/${V0}0
++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0
++TEST fd1=`fd_available`
++TEST fd_open $fd1 'rw' $M0/a
++
++EXPECT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
++EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
++EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
++
++# Restart the brick and then write. Now fd should get re-opened and write should
++# succeed on the previously down brick as well since there are no locks held on
++# any of the bricks.
++TEST $CLI volume start $V0 force
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
++TEST fd_write $fd1 "data-10"
++EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
++
++EXPECT "data-10" head -n 1 $B0/${V0}0/a
++EXPECT "data-10" head -n 1 $B0/${V0}1/a
++EXPECT "data-10" head -n 1 $B0/${V0}2/a
++TEST fd_close $fd1
++
++# Kill one brick, open and take lock on a fd.
++TEST kill_brick $V0 $H0 $B0/${V0}0
++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0
++TEST fd1=`fd_available`
++TEST fd_open $fd1 'rw' $M0/a
++TEST flock -x $fd1
++
++EXPECT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
++EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
++EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
++
++# Kill & restart another brick so that it will return EBADFD
++TEST kill_brick $V0 $H0 $B0/${V0}1
++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" brick_up_status $V0 $H0 $B0/${V0}1
++
++# Restart the bricks and then write. Now fd should not get re-opened since lock
++# is still held on one brick and write should also fail as there is no quorum.
++
++TEST $CLI volume start $V0 force
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1
++TEST ! fd_write $fd1 "data-11"
++EXPECT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
++EXPECT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
++EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
++
++EXPECT "data-10" head -n 1 $B0/${V0}0/a
++EXPECT "data-10" head -n 1 $B0/${V0}1/a
++EXPECT "data-11" head -n 1 $B0/${V0}2/a
++
++TEST fd_close $fd1
++cleanup
+diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
+index 416012c..bd46e59 100644
+--- a/xlators/cluster/afr/src/afr-common.c
++++ b/xlators/cluster/afr/src/afr-common.c
+@@ -2067,6 +2067,8 @@ afr_local_cleanup(afr_local_t *local, xlator_t *this)
+             dict_unref(local->cont.entrylk.xdata);
+     }
+ 
++    GF_FREE(local->need_open);
++
+     if (local->xdata_req)
+         dict_unref(local->xdata_req);
+ 
+@@ -5689,6 +5691,14 @@ afr_local_init(afr_local_t *local, afr_private_t *priv, int32_t *op_errno)
+     }
+     local->is_new_entry = _gf_false;
+ 
++    local->need_open = GF_CALLOC(priv->child_count, sizeof(*local->need_open),
++                                 gf_afr_mt_char);
++    if (!local->need_open) {
++        if (op_errno)
++            *op_errno = ENOMEM;
++        goto out;
++    }
++
+     INIT_LIST_HEAD(&local->healer);
+     return 0;
+ out:
+@@ -6124,9 +6134,8 @@ afr_get_heal_info(call_frame_t *frame, xlator_t *this, loc_t *loc)
+     char *substr = NULL;
+     char *status = NULL;
+ 
+-    ret = afr_lockless_inspect(frame, this, loc->gfid, &inode,
+-                               &entry_selfheal, &data_selfheal,
+-                               &metadata_selfheal, &pending);
++    ret = afr_lockless_inspect(frame, this, loc->gfid, &inode, &entry_selfheal,
++                               &data_selfheal, &metadata_selfheal, &pending);
+ 
+     if (ret == -ENOMEM) {
+         ret = -1;
+diff --git a/xlators/cluster/afr/src/afr-open.c b/xlators/cluster/afr/src/afr-open.c
+index ff72c73..73c1552 100644
+--- a/xlators/cluster/afr/src/afr-open.c
++++ b/xlators/cluster/afr/src/afr-open.c
+@@ -35,6 +35,8 @@
+ #include "afr-dir-read.h"
+ #include "afr-dir-write.h"
+ #include "afr-transaction.h"
++#include "afr-self-heal.h"
++#include "protocol-common.h"
+ 
+ gf_boolean_t
+ afr_is_fd_fixable(fd_t *fd)
+@@ -239,8 +241,32 @@ afr_openfd_fix_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+     return 0;
+ }
+ 
++static void
++afr_fd_ctx_reset_need_open(fd_t *fd, xlator_t *this, unsigned char *need_open)
++{
++    afr_fd_ctx_t *fd_ctx = NULL;
++    afr_private_t *priv = NULL;
++    int i = 0;
++
++    priv = this->private;
++    fd_ctx = afr_fd_ctx_get(fd, this);
++    if (!fd_ctx)
++        return;
++
++    LOCK(&fd->lock);
++    {
++        for (i = 0; i < priv->child_count; i++) {
++            if (fd_ctx->opened_on[i] == AFR_FD_OPENING && need_open[i]) {
++                fd_ctx->opened_on[i] = AFR_FD_NOT_OPENED;
++                need_open[i] = 0;
++            }
++        }
++    }
++    UNLOCK(&fd->lock);
++}
++
+ static int
+-afr_fd_ctx_need_open(fd_t *fd, xlator_t *this, unsigned char *need_open)
++afr_fd_ctx_set_need_open(fd_t *fd, xlator_t *this, unsigned char *need_open)
+ {
+     afr_fd_ctx_t *fd_ctx = NULL;
+     afr_private_t *priv = NULL;
+@@ -248,7 +274,6 @@ afr_fd_ctx_need_open(fd_t *fd, xlator_t *this, unsigned char *need_open)
+     int count = 0;
+ 
+     priv = this->private;
+-
+     fd_ctx = afr_fd_ctx_get(fd, this);
+     if (!fd_ctx)
+         return 0;
+@@ -271,21 +296,217 @@ afr_fd_ctx_need_open(fd_t *fd, xlator_t *this, unsigned char *need_open)
+     return count;
+ }
+ 
++static int
++afr_do_fix_open(call_frame_t *frame, xlator_t *this)
++{
++    afr_local_t *local = frame->local;
++    afr_private_t *priv = NULL;
++    int i = 0;
++    int need_open_count = 0;
++
++    priv = this->private;
++
++    need_open_count = AFR_COUNT(local->need_open, priv->child_count);
++    if (!need_open_count) {
++        goto out;
++    }
++    gf_msg_debug(this->name, 0, "need open count: %d", need_open_count);
++    local->call_count = need_open_count;
++
++    for (i = 0; i < priv->child_count; i++) {
++        if (!local->need_open[i])
++            continue;
++
++        if (IA_IFDIR == local->fd->inode->ia_type) {
++            gf_msg_debug(this->name, 0, "opening fd for dir %s on subvolume %s",
++                         local->loc.path, priv->children[i]->name);
++            STACK_WIND_COOKIE(frame, afr_openfd_fix_open_cbk, (void *)(long)i,
++                              priv->children[i],
++                              priv->children[i]->fops->opendir, &local->loc,
++                              local->fd, NULL);
++        } else {
++            gf_msg_debug(this->name, 0,
++                         "opening fd for file %s on subvolume %s",
++                         local->loc.path, priv->children[i]->name);
++
++            STACK_WIND_COOKIE(
++                frame, afr_openfd_fix_open_cbk, (void *)(long)i,
++                priv->children[i], priv->children[i]->fops->open, &local->loc,
++                local->fd_ctx->flags & ~(O_CREAT | O_EXCL | O_TRUNC), local->fd,
++                NULL);
++        }
++        if (!--need_open_count)
++            break;
++    }
++    return 0;
++
++out:
++    afr_fd_ctx_reset_need_open(local->fd, this, local->need_open);
++    AFR_STACK_DESTROY(frame);
++    return 0;
++}
++
++static int
++afr_is_reopen_allowed_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
++                          int32_t op_ret, int32_t op_errno,
++                          struct gf_flock *lock, dict_t *xdata)
++{
++    afr_local_t *local = frame->local;
++    afr_private_t *priv = NULL;
++    int ret = -1;
++    int call_count = 0;
++    int i = (long)cookie;
++    int32_t fd_reopen_status = -1;
++    int32_t final_reopen_status = -1;
++
++    priv = this->private;
++    local->replies[i].valid = 1;
++    local->replies[i].op_ret = op_ret;
++    local->replies[i].op_errno = op_errno;
++    if (op_ret != 0) {
++        gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_DICT_GET_FAILED,
++               "Failed getlk for %s", uuid_utoa(local->fd->inode->gfid));
++    }
++
++    if (xdata)
++        local->replies[i].xdata = dict_ref(xdata);
++
++    call_count = afr_frame_return(frame);
++
++    if (call_count)
++        return 0;
++
++    /* Currently we get 3 values from the lower layer (protocol/client) in the
++     * getlk_cbk.
++     *  FD_REOPEN_ALLOWED : No conflicting locks are held and reopen is allowed
++     *  FD_REOPEN_NOT_ALLOWED : Conflicting locks are held and reopen is not
++     *                          allowed
++     *  FD_BAD : FD is not valid
++     *
++     * - If we get FD_REOPEN_NOT_ALLOWED from any of the bricks, will block the
++     *   reopen taking this as high priority.
++     * - If we get FD_BAD from all the replies, we will not reopen since we do
++     *   not know the correct status.
++     * - If we get FD_BAD from few brick and FD_REOPEN_NOT_ALLOWED from one or
++     *   more bricks, then we will block reopen.
++     * - If we get FD_BAD from few bricks and FD_REOPEN_ALLOWED from one or
++     *   more bricks, then we will allow the reopen.
++     *
++     *   We will update the final_reopen_status only when the value returned
++     *   from lower layer is >= FD_REOPEN_ALLOWED and < FD_BAD. We will not set
++     *   FD_BAD in final_reopen_status, since it can lead to unexpected
++     *   behaviours.
++     *
++     *   At the end of this loop, if we still have final_reopen_status as -1
++     *   i.e., the init value, it means we failed to get the fd status from any
++     *   of the bricks or we do not have a valid fd on any of the bricks. We
++     *   will not reopen the fd in this case as well.
++     */
++
++    for (i = 0; i < priv->child_count; i++) {
++        if (final_reopen_status != FD_REOPEN_NOT_ALLOWED &&
++            local->replies[i].xdata) {
++            ret = dict_get_int32(xdata, "fd-reopen-status", &fd_reopen_status);
++            if (ret) {
++                gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_GET_FAILED,
++                       "Failed to get whether reopen is allowed or not on fd "
++                       "for file %s on subvolume %s.",
++                       local->loc.path, priv->children[i]->name);
++            } else if (fd_reopen_status >= FD_REOPEN_ALLOWED &&
++                       fd_reopen_status < FD_BAD) {
++                final_reopen_status = fd_reopen_status;
++            }
++        }
++
++        if (final_reopen_status == FD_REOPEN_NOT_ALLOWED)
++            break;
++    }
++
++    if (final_reopen_status == FD_REOPEN_NOT_ALLOWED) {
++        gf_log(this->name, GF_LOG_INFO,
++               "Conflicting locks held on file %s. FD reopen is not allowed.",
++               local->loc.path);
++    } else if (final_reopen_status == -1) {
++        gf_log(this->name, GF_LOG_INFO,
++               "Failed to get the lock information "
++               "on file %s. FD reopen is not allowed.",
++               local->loc.path);
++    } else {
++        afr_local_replies_wipe(local, priv);
++        afr_do_fix_open(frame, this);
++        return 0;
++    }
++
++    afr_fd_ctx_reset_need_open(local->fd, this, local->need_open);
++    AFR_STACK_DESTROY(frame);
++    return 0;
++}
++
+ void
+-afr_fix_open(fd_t *fd, xlator_t *this)
++afr_is_reopen_allowed(xlator_t *this, call_frame_t *frame)
+ {
+     afr_private_t *priv = NULL;
++    afr_local_t *local = NULL;
++    dict_t *xdata = NULL;
+     int i = 0;
++    int call_count = 0;
++    struct gf_flock flock = {
++        0,
++    };
++
++    local = frame->local;
++    priv = this->private;
++
++    flock.l_type = F_WRLCK;
++    afr_set_lk_owner(frame, this, frame->root);
++    lk_owner_copy(&flock.l_owner, &frame->root->lk_owner);
++
++    call_count = AFR_COUNT(local->child_up, priv->child_count);
++    if (!call_count)
++        goto out;
++    local->call_count = call_count;
++
++    xdata = dict_new();
++    if (xdata == NULL)
++        goto out;
++
++    if (dict_set_int32(xdata, "fd-reopen-status", -1))
++        goto out;
++
++    for (i = 0; i < priv->child_count; i++) {
++        if (local->child_up[i]) {
++            STACK_WIND_COOKIE(frame, afr_is_reopen_allowed_cbk, (void *)(long)i,
++                              priv->children[i], priv->children[i]->fops->lk,
++                              local->fd, F_GETLK, &flock, xdata);
++        } else {
++            continue;
++        }
++
++        if (!--call_count)
++            break;
++    }
++
++    dict_unref(xdata);
++    return;
++
++out:
++    if (xdata)
++        dict_unref(xdata);
++    afr_fd_ctx_reset_need_open(local->fd, this, local->need_open);
++    AFR_STACK_DESTROY(frame);
++    return;
++}
++
++void
++afr_fix_open(fd_t *fd, xlator_t *this)
++{
+     call_frame_t *frame = NULL;
+     afr_local_t *local = NULL;
+     int ret = -1;
+     int32_t op_errno = 0;
+     afr_fd_ctx_t *fd_ctx = NULL;
+-    unsigned char *need_open = NULL;
+     int call_count = 0;
+ 
+-    priv = this->private;
+-
+     if (!afr_is_fd_fixable(fd))
+         goto out;
+ 
+@@ -293,12 +514,6 @@ afr_fix_open(fd_t *fd, xlator_t *this)
+     if (!fd_ctx)
+         goto out;
+ 
+-    need_open = alloca0(priv->child_count);
+-
+-    call_count = afr_fd_ctx_need_open(fd, this, need_open);
+-    if (!call_count)
+-        goto out;
+-
+     frame = create_frame(this, this->ctx->pool);
+     if (!frame)
+         goto out;
+@@ -307,47 +522,24 @@ afr_fix_open(fd_t *fd, xlator_t *this)
+     if (!local)
+         goto out;
+ 
++    call_count = afr_fd_ctx_set_need_open(fd, this, local->need_open);
++    if (!call_count)
++        goto out;
++
+     local->loc.inode = inode_ref(fd->inode);
+     ret = loc_path(&local->loc, NULL);
+     if (ret < 0)
+         goto out;
+-
+     local->fd = fd_ref(fd);
+     local->fd_ctx = fd_ctx;
+ 
+-    local->call_count = call_count;
+-
+-    gf_msg_debug(this->name, 0, "need open count: %d", call_count);
+-
+-    for (i = 0; i < priv->child_count; i++) {
+-        if (!need_open[i])
+-            continue;
+-
+-        if (IA_IFDIR == fd->inode->ia_type) {
+-            gf_msg_debug(this->name, 0, "opening fd for dir %s on subvolume %s",
+-                         local->loc.path, priv->children[i]->name);
+-
+-            STACK_WIND_COOKIE(frame, afr_openfd_fix_open_cbk, (void *)(long)i,
+-                              priv->children[i],
+-                              priv->children[i]->fops->opendir, &local->loc,
+-                              local->fd, NULL);
+-        } else {
+-            gf_msg_debug(this->name, 0,
+-                         "opening fd for file %s on subvolume %s",
+-                         local->loc.path, priv->children[i]->name);
+-
+-            STACK_WIND_COOKIE(frame, afr_openfd_fix_open_cbk, (void *)(long)i,
+-                              priv->children[i], priv->children[i]->fops->open,
+-                              &local->loc, fd_ctx->flags & (~O_TRUNC),
+-                              local->fd, NULL);
+-        }
+-
+-        if (!--call_count)
+-            break;
+-    }
+-
++    afr_is_reopen_allowed(this, frame);
+     return;
++
+ out:
++    if (call_count)
++        afr_fd_ctx_reset_need_open(fd, this, local->need_open);
+     if (frame)
+         AFR_STACK_DESTROY(frame);
++    return;
+ }
+diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
+index 6a9a763..ffc7317 100644
+--- a/xlators/cluster/afr/src/afr.h
++++ b/xlators/cluster/afr/src/afr.h
+@@ -895,6 +895,9 @@ typedef struct _afr_local {
+     afr_ta_fop_state_t fop_state;
+     int ta_failed_subvol;
+     gf_boolean_t is_new_entry;
++
++    /* For fix_open */
++    unsigned char *need_open;
+ } afr_local_t;
+ 
+ typedef struct afr_spbc_timeout {
+diff --git a/xlators/protocol/client/src/client-common.c b/xlators/protocol/client/src/client-common.c
+index 1417a60..92cda12 100644
+--- a/xlators/protocol/client/src/client-common.c
++++ b/xlators/protocol/client/src/client-common.c
+@@ -343,7 +343,7 @@ client_pre_readv(xlator_t *this, gfs3_read_req *req, fd_t *fd, size_t size,
+     int op_errno = ESTALE;
+ 
+     CLIENT_GET_REMOTE_FD(this, fd, FALLBACK_TO_ANON_FD, remote_fd, op_errno,
+-                         out);
++                         GFS3_OP_READ, out);
+ 
+     req->size = size;
+     req->offset = offset;
+@@ -368,7 +368,7 @@ client_pre_writev(xlator_t *this, gfs3_write_req *req, fd_t *fd, size_t size,
+     int op_errno = ESTALE;
+ 
+     CLIENT_GET_REMOTE_FD(this, fd, FALLBACK_TO_ANON_FD, remote_fd, op_errno,
+-                         out);
++                         GFS3_OP_WRITE, out);
+ 
+     req->size = size;
+     req->offset = offset;
+@@ -429,7 +429,8 @@ client_pre_flush(xlator_t *this, gfs3_flush_req *req, fd_t *fd, dict_t *xdata)
+     int64_t remote_fd = -1;
+     int op_errno = ESTALE;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_FLUSH, out);
+ 
+     req->fd = remote_fd;
+     memcpy(req->gfid, fd->inode->gfid, 16);
+@@ -450,7 +451,7 @@ client_pre_fsync(xlator_t *this, gfs3_fsync_req *req, fd_t *fd, int32_t flags,
+     int op_errno = 0;
+ 
+     CLIENT_GET_REMOTE_FD(this, fd, FALLBACK_TO_ANON_FD, remote_fd, op_errno,
+-                         out);
++                         GFS3_OP_FSYNC, out);
+ 
+     req->fd = remote_fd;
+     req->data = flags;
+@@ -591,7 +592,8 @@ client_pre_fsyncdir(xlator_t *this, gfs3_fsyncdir_req *req, fd_t *fd,
+     int32_t op_errno = ESTALE;
+     int64_t remote_fd = -1;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_FSYNCDIR, out);
+ 
+     req->fd = remote_fd;
+     req->data = flags;
+@@ -668,7 +670,8 @@ client_pre_ftruncate(xlator_t *this, gfs3_ftruncate_req *req, fd_t *fd,
+     int64_t remote_fd = -1;
+     int op_errno = EINVAL;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_FTRUNCATE, out);
+ 
+     req->offset = offset;
+     req->fd = remote_fd;
+@@ -687,7 +690,8 @@ client_pre_fstat(xlator_t *this, gfs3_fstat_req *req, fd_t *fd, dict_t *xdata)
+     int64_t remote_fd = -1;
+     int op_errno = ESTALE;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_FSTAT, out);
+ 
+     req->fd = remote_fd;
+     memcpy(req->gfid, fd->inode->gfid, 16);
+@@ -710,7 +714,8 @@ client_pre_lk(xlator_t *this, gfs3_lk_req *req, int32_t cmd,
+     int32_t gf_type = 0;
+     int ret = 0;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_LK, out);
+ 
+     ret = client_cmd_to_gf_cmd(cmd, &gf_cmd);
+     if (ret) {
+@@ -787,7 +792,8 @@ client_pre_readdir(xlator_t *this, gfs3_readdir_req *req, fd_t *fd, size_t size,
+     int64_t remote_fd = -1;
+     int op_errno = ESTALE;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_READDIR, out);
+ 
+     req->size = size;
+     req->offset = offset;
+@@ -869,7 +875,7 @@ client_pre_finodelk(xlator_t *this, gfs3_finodelk_req *req, fd_t *fd, int cmd,
+     int32_t gf_cmd = 0;
+ 
+     CLIENT_GET_REMOTE_FD(this, fd, FALLBACK_TO_ANON_FD, remote_fd, op_errno,
+-                         out);
++                         GFS3_OP_FINODELK, out);
+ 
+     if (cmd == F_GETLK || cmd == F_GETLK64)
+         gf_cmd = GF_LK_GETLK;
+@@ -952,7 +958,8 @@ client_pre_fentrylk(xlator_t *this, gfs3_fentrylk_req *req, fd_t *fd,
+     int64_t remote_fd = -1;
+     int op_errno = ESTALE;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_FENTRYLK, out);
+ 
+     req->fd = remote_fd;
+     req->cmd = cmd_entrylk;
+@@ -1013,7 +1020,7 @@ client_pre_fxattrop(xlator_t *this, gfs3_fxattrop_req *req, fd_t *fd,
+     int64_t remote_fd = -1;
+ 
+     CLIENT_GET_REMOTE_FD(this, fd, FALLBACK_TO_ANON_FD, remote_fd, op_errno,
+-                         out);
++                         GFS3_OP_FXATTROP, out);
+ 
+     req->fd = remote_fd;
+     req->flags = flags;
+@@ -1039,7 +1046,8 @@ client_pre_fgetxattr(xlator_t *this, gfs3_fgetxattr_req *req, fd_t *fd,
+     int64_t remote_fd = -1;
+     int op_errno = ESTALE;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_FGETXATTR, out);
+ 
+     req->namelen = 1; /* Use it as a flag */
+     req->fd = remote_fd;
+@@ -1065,7 +1073,8 @@ client_pre_fsetxattr(xlator_t *this, gfs3_fsetxattr_req *req, fd_t *fd,
+     int64_t remote_fd = -1;
+     int op_errno = ESTALE;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_FSETXATTR, out);
+ 
+     req->fd = remote_fd;
+     req->flags = flags;
+@@ -1091,7 +1100,8 @@ client_pre_rchecksum(xlator_t *this, gfs3_rchecksum_req *req, fd_t *fd,
+     int64_t remote_fd = -1;
+     int op_errno = ESTALE;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_RCHECKSUM, out);
+ 
+     req->len = len;
+     req->offset = offset;
+@@ -1141,7 +1151,8 @@ client_pre_fsetattr(xlator_t *this, gfs3_fsetattr_req *req, fd_t *fd,
+     int op_errno = ESTALE;
+     int64_t remote_fd = -1;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_FSETATTR, out);
+ 
+     req->fd = remote_fd;
+     req->valid = valid;
+@@ -1161,7 +1172,8 @@ client_pre_readdirp(xlator_t *this, gfs3_readdirp_req *req, fd_t *fd,
+     int op_errno = ESTALE;
+     int64_t remote_fd = -1;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_READDIRP, out);
+ 
+     req->size = size;
+     req->offset = offset;
+@@ -1187,7 +1199,8 @@ client_pre_fremovexattr(xlator_t *this, gfs3_fremovexattr_req *req, fd_t *fd,
+     if (!(fd && fd->inode))
+         goto out;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_FREMOVEXATTR, out);
+ 
+     memcpy(req->gfid, fd->inode->gfid, 16);
+     req->name = (char *)name;
+@@ -1208,7 +1221,8 @@ client_pre_fallocate(xlator_t *this, gfs3_fallocate_req *req, fd_t *fd,
+     int op_errno = ESTALE;
+     int64_t remote_fd = -1;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_FALLOCATE, out);
+ 
+     req->fd = remote_fd;
+     req->flags = flags;
+@@ -1230,7 +1244,8 @@ client_pre_discard(xlator_t *this, gfs3_discard_req *req, fd_t *fd,
+     int op_errno = ESTALE;
+     int64_t remote_fd = -1;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_DISCARD, out);
+ 
+     req->fd = remote_fd;
+     req->offset = offset;
+@@ -1251,7 +1266,8 @@ client_pre_zerofill(xlator_t *this, gfs3_zerofill_req *req, fd_t *fd,
+     int op_errno = ESTALE;
+     int64_t remote_fd = -1;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_ZEROFILL, out);
+ 
+     req->fd = remote_fd;
+     req->offset = offset;
+@@ -1286,7 +1302,8 @@ client_pre_seek(xlator_t *this, gfs3_seek_req *req, fd_t *fd, off_t offset,
+     int64_t remote_fd = -1;
+     int op_errno = ESTALE;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_SEEK, out);
+ 
+     memcpy(req->gfid, fd->inode->gfid, 16);
+     req->fd = remote_fd;
+@@ -2508,7 +2525,7 @@ client_pre_readv_v2(xlator_t *this, gfx_read_req *req, fd_t *fd, size_t size,
+     int op_errno = ESTALE;
+ 
+     CLIENT_GET_REMOTE_FD(this, fd, FALLBACK_TO_ANON_FD, remote_fd, op_errno,
+-                         out);
++                         GFS3_OP_READ, out);
+ 
+     req->size = size;
+     req->offset = offset;
+@@ -2532,7 +2549,7 @@ client_pre_writev_v2(xlator_t *this, gfx_write_req *req, fd_t *fd, size_t size,
+     int op_errno = ESTALE;
+ 
+     CLIENT_GET_REMOTE_FD(this, fd, FALLBACK_TO_ANON_FD, remote_fd, op_errno,
+-                         out);
++                         GFS3_OP_WRITE, out);
+ 
+     req->size = size;
+     req->offset = offset;
+@@ -2567,10 +2584,10 @@ client_pre_copy_file_range_v2(xlator_t *this, gfx_copy_file_range_req *req,
+     int op_errno = ESTALE;
+ 
+     CLIENT_GET_REMOTE_FD(this, fd_in, FALLBACK_TO_ANON_FD, remote_fd_in,
+-                         op_errno, out);
++                         op_errno, GFS3_OP_COPY_FILE_RANGE, out);
+ 
+     CLIENT_GET_REMOTE_FD(this, fd_out, FALLBACK_TO_ANON_FD, remote_fd_out,
+-                         op_errno, out);
++                         op_errno, GFS3_OP_COPY_FILE_RANGE, out);
+     req->size = size;
+     req->off_in = off_in;
+     req->off_out = off_out;
+@@ -2623,7 +2640,8 @@ client_pre_flush_v2(xlator_t *this, gfx_flush_req *req, fd_t *fd, dict_t *xdata)
+     int64_t remote_fd = -1;
+     int op_errno = ESTALE;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_FLUSH, out);
+ 
+     req->fd = remote_fd;
+     memcpy(req->gfid, fd->inode->gfid, 16);
+@@ -2643,7 +2661,7 @@ client_pre_fsync_v2(xlator_t *this, gfx_fsync_req *req, fd_t *fd, int32_t flags,
+     int op_errno = 0;
+ 
+     CLIENT_GET_REMOTE_FD(this, fd, FALLBACK_TO_ANON_FD, remote_fd, op_errno,
+-                         out);
++                         GFS3_OP_FSYNC, out);
+ 
+     req->fd = remote_fd;
+     req->data = flags;
+@@ -2778,7 +2796,8 @@ client_pre_fsyncdir_v2(xlator_t *this, gfx_fsyncdir_req *req, fd_t *fd,
+     int32_t op_errno = ESTALE;
+     int64_t remote_fd = -1;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_FSYNCDIR, out);
+ 
+     req->fd = remote_fd;
+     req->data = flags;
+@@ -2852,7 +2871,8 @@ client_pre_ftruncate_v2(xlator_t *this, gfx_ftruncate_req *req, fd_t *fd,
+     int64_t remote_fd = -1;
+     int op_errno = EINVAL;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_FTRUNCATE, out);
+ 
+     req->offset = offset;
+     req->fd = remote_fd;
+@@ -2870,7 +2890,8 @@ client_pre_fstat_v2(xlator_t *this, gfx_fstat_req *req, fd_t *fd, dict_t *xdata)
+     int64_t remote_fd = -1;
+     int op_errno = ESTALE;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_FSTAT, out);
+ 
+     req->fd = remote_fd;
+     memcpy(req->gfid, fd->inode->gfid, 16);
+@@ -2892,7 +2913,8 @@ client_pre_lk_v2(xlator_t *this, gfx_lk_req *req, int32_t cmd,
+     int32_t gf_type = 0;
+     int ret = 0;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_LK, out);
+ 
+     ret = client_cmd_to_gf_cmd(cmd, &gf_cmd);
+     if (ret) {
+@@ -2967,7 +2989,8 @@ client_pre_readdir_v2(xlator_t *this, gfx_readdir_req *req, fd_t *fd,
+     int64_t remote_fd = -1;
+     int op_errno = ESTALE;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_READDIR, out);
+ 
+     req->size = size;
+     req->offset = offset;
+@@ -3048,7 +3071,7 @@ client_pre_finodelk_v2(xlator_t *this, gfx_finodelk_req *req, fd_t *fd, int cmd,
+     int32_t gf_cmd = 0;
+ 
+     CLIENT_GET_REMOTE_FD(this, fd, FALLBACK_TO_ANON_FD, remote_fd, op_errno,
+-                         out);
++                         GFS3_OP_FINODELK, out);
+ 
+     if (cmd == F_GETLK || cmd == F_GETLK64)
+         gf_cmd = GF_LK_GETLK;
+@@ -3129,7 +3152,8 @@ client_pre_fentrylk_v2(xlator_t *this, gfx_fentrylk_req *req, fd_t *fd,
+     int64_t remote_fd = -1;
+     int op_errno = ESTALE;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_FENTRYLK, out);
+ 
+     req->fd = remote_fd;
+     req->cmd = cmd_entrylk;
+@@ -3185,7 +3209,7 @@ client_pre_fxattrop_v2(xlator_t *this, gfx_fxattrop_req *req, fd_t *fd,
+     int64_t remote_fd = -1;
+ 
+     CLIENT_GET_REMOTE_FD(this, fd, FALLBACK_TO_ANON_FD, remote_fd, op_errno,
+-                         out);
++                         GFS3_OP_FXATTROP, out);
+ 
+     req->fd = remote_fd;
+     req->flags = flags;
+@@ -3207,7 +3231,8 @@ client_pre_fgetxattr_v2(xlator_t *this, gfx_fgetxattr_req *req, fd_t *fd,
+     int64_t remote_fd = -1;
+     int op_errno = ESTALE;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_FGETXATTR, out);
+ 
+     req->namelen = 1; /* Use it as a flag */
+     req->fd = remote_fd;
+@@ -3232,7 +3257,8 @@ client_pre_fsetxattr_v2(xlator_t *this, gfx_fsetxattr_req *req, fd_t *fd,
+     int64_t remote_fd = -1;
+     int op_errno = ESTALE;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_FSETXATTR, out);
+ 
+     req->fd = remote_fd;
+     req->flags = flags;
+@@ -3256,7 +3282,8 @@ client_pre_rchecksum_v2(xlator_t *this, gfx_rchecksum_req *req, fd_t *fd,
+     int64_t remote_fd = -1;
+     int op_errno = ESTALE;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_RCHECKSUM, out);
+ 
+     req->len = len;
+     req->offset = offset;
+@@ -3304,7 +3331,8 @@ client_pre_fsetattr_v2(xlator_t *this, gfx_fsetattr_req *req, fd_t *fd,
+     int op_errno = ESTALE;
+     int64_t remote_fd = -1;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_FSETATTR, out);
+ 
+     memcpy(req->gfid, fd->inode->gfid, 16);
+     req->fd = remote_fd;
+@@ -3324,7 +3352,8 @@ client_pre_readdirp_v2(xlator_t *this, gfx_readdirp_req *req, fd_t *fd,
+     int op_errno = ESTALE;
+     int64_t remote_fd = -1;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_READDIRP, out);
+ 
+     req->size = size;
+     req->offset = offset;
+@@ -3349,7 +3378,8 @@ client_pre_fremovexattr_v2(xlator_t *this, gfx_fremovexattr_req *req, fd_t *fd,
+     if (!(fd && fd->inode))
+         goto out;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_FREMOVEXATTR, out);
+ 
+     memcpy(req->gfid, fd->inode->gfid, 16);
+     req->name = (char *)name;
+@@ -3369,7 +3399,8 @@ client_pre_fallocate_v2(xlator_t *this, gfx_fallocate_req *req, fd_t *fd,
+     int op_errno = ESTALE;
+     int64_t remote_fd = -1;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_FALLOCATE, out);
+ 
+     req->fd = remote_fd;
+     req->flags = flags;
+@@ -3390,7 +3421,8 @@ client_pre_discard_v2(xlator_t *this, gfx_discard_req *req, fd_t *fd,
+     int op_errno = ESTALE;
+     int64_t remote_fd = -1;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_DISCARD, out);
+ 
+     req->fd = remote_fd;
+     req->offset = offset;
+@@ -3410,7 +3442,8 @@ client_pre_zerofill_v2(xlator_t *this, gfx_zerofill_req *req, fd_t *fd,
+     int op_errno = ESTALE;
+     int64_t remote_fd = -1;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_ZEROFILL, out);
+ 
+     req->fd = remote_fd;
+     req->offset = offset;
+@@ -3439,7 +3472,8 @@ client_pre_seek_v2(xlator_t *this, gfx_seek_req *req, fd_t *fd, off_t offset,
+     int64_t remote_fd = -1;
+     int op_errno = ESTALE;
+ 
+-    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
++    CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
++                         GFS3_OP_SEEK, out);
+ 
+     memcpy(req->gfid, fd->inode->gfid, 16);
+     req->fd = remote_fd;
+@@ -3587,3 +3621,25 @@ client_post_rename_v2(xlator_t *this, gfx_rename_rsp *rsp, struct iatt *stbuf,
+ 
+     return xdr_to_dict(&rsp->xdata, xdata);
+ }
++
++void
++set_fd_reopen_status(xlator_t *this, dict_t *xdata,
++                     enum gf_fd_reopen_status fd_reopen_status)
++{
++    clnt_conf_t *conf = NULL;
++
++    conf = this->private;
++    if (!conf) {
++        gf_msg_debug(this->name, ENOMEM, "Failed to get client conf");
++        return;
++    }
++
++    if (!conf->strict_locks)
++        fd_reopen_status = FD_REOPEN_ALLOWED;
++
++    if (dict_set_int32(xdata, "fd-reopen-status", fd_reopen_status))
++        gf_msg(this->name, GF_LOG_WARNING, ENOMEM, PC_MSG_DICT_SET_FAILED,
++               NULL);
++
++    return;
++}
+diff --git a/xlators/protocol/client/src/client-common.h b/xlators/protocol/client/src/client-common.h
+index a2043d8..16fb167 100644
+--- a/xlators/protocol/client/src/client-common.h
++++ b/xlators/protocol/client/src/client-common.h
+@@ -627,4 +627,8 @@ client_pre_copy_file_range_v2(xlator_t *this, gfx_copy_file_range_req *req,
+                               off64_t off_out, size_t size, int32_t flags,
+                               dict_t **xdata);
+ 
++void
++set_fd_reopen_status(xlator_t *this, dict_t *xdata,
++                     enum gf_fd_reopen_status fd_reopen_allowed);
++
+ #endif /* __CLIENT_COMMON_H__ */
+diff --git a/xlators/protocol/client/src/client-helpers.c b/xlators/protocol/client/src/client-helpers.c
+index 6543100..48b6448 100644
+--- a/xlators/protocol/client/src/client-helpers.c
++++ b/xlators/protocol/client/src/client-helpers.c
+@@ -406,11 +406,12 @@ clnt_readdir_rsp_cleanup_v2(gfx_readdir_rsp *rsp)
+ }
+ 
+ int
+-client_get_remote_fd(xlator_t *this, fd_t *fd, int flags, int64_t *remote_fd)
++client_get_remote_fd(xlator_t *this, fd_t *fd, int flags, int64_t *remote_fd,
++                     enum gf_fop_procnum fop)
+ {
+     clnt_fd_ctx_t *fdctx = NULL;
+     clnt_conf_t *conf = NULL;
+-    gf_boolean_t locks_held = _gf_false;
++    gf_boolean_t locks_involved = _gf_false;
+ 
+     GF_VALIDATE_OR_GOTO(this->name, fd, out);
+     GF_VALIDATE_OR_GOTO(this->name, remote_fd, out);
+@@ -423,23 +424,32 @@ client_get_remote_fd(xlator_t *this, fd_t *fd, int flags, int64_t *remote_fd)
+             if (fd->anonymous) {
+                 *remote_fd = GF_ANON_FD_NO;
+             } else {
++                if (conf->strict_locks &&
++                    (fop == GFS3_OP_WRITE || fop == GFS3_OP_FTRUNCATE ||
++                     fop == GFS3_OP_FALLOCATE || fop == GFS3_OP_ZEROFILL ||
++                     fop == GFS3_OP_DISCARD)) {
++                    locks_involved = _gf_true;
++                }
+                 *remote_fd = -1;
+                 gf_msg_debug(this->name, EBADF, "not a valid fd for gfid: %s",
+                              uuid_utoa(fd->inode->gfid));
+             }
+         } else {
+-            if (__is_fd_reopen_in_progress(fdctx))
++            if (__is_fd_reopen_in_progress(fdctx)) {
+                 *remote_fd = -1;
+-            else
++            } else {
+                 *remote_fd = fdctx->remote_fd;
++            }
+ 
+-            locks_held = !list_empty(&fdctx->lock_list);
++            locks_involved = !list_empty(&fdctx->lock_list);
+         }
+     }
+     pthread_spin_unlock(&conf->fd_lock);
+ 
+-    if ((flags & FALLBACK_TO_ANON_FD) && (*remote_fd == -1) && (!locks_held))
++    if ((flags & FALLBACK_TO_ANON_FD) && (*remote_fd == -1) &&
++        (!locks_involved)) {
+         *remote_fd = GF_ANON_FD_NO;
++    }
+ 
+     return 0;
+ out:
+diff --git a/xlators/protocol/client/src/client-rpc-fops.c b/xlators/protocol/client/src/client-rpc-fops.c
+index 3110c78..46ac544 100644
+--- a/xlators/protocol/client/src/client-rpc-fops.c
++++ b/xlators/protocol/client/src/client-rpc-fops.c
+@@ -2439,6 +2439,13 @@ client3_3_lk_cbk(struct rpc_req *req, struct iovec *iov, int count,
+         }
+     }
+ 
++    if (local->check_reopen) {
++        if (lock.l_type == F_WRLCK)
++            set_fd_reopen_status(this, xdata, FD_REOPEN_NOT_ALLOWED);
++        else
++            set_fd_reopen_status(this, xdata, FD_REOPEN_ALLOWED);
++    }
++
+ out:
+     if ((rsp.op_ret == -1) && (EAGAIN != gf_error_to_errno(rsp.op_errno))) {
+         gf_msg(this->name, GF_LOG_WARNING, gf_error_to_errno(rsp.op_errno),
+@@ -5198,6 +5205,7 @@ client3_3_lk(call_frame_t *frame, xlator_t *this, void *data)
+             0,
+         },
+     };
++    dict_t *xdata = NULL;
+     int32_t gf_cmd = 0;
+     clnt_local_t *local = NULL;
+     clnt_conf_t *conf = NULL;
+@@ -5224,6 +5232,10 @@ client3_3_lk(call_frame_t *frame, xlator_t *this, void *data)
+         goto unwind;
+     }
+ 
++    ret = dict_get_int32(args->xdata, "fd-reopen-status", &local->check_reopen);
++    if (ret)
++        local->check_reopen = 0;
++
+     local->owner = frame->root->lk_owner;
+     local->cmd = args->cmd;
+     local->fd = fd_ref(args->fd);
+@@ -5237,6 +5249,13 @@ client3_3_lk(call_frame_t *frame, xlator_t *this, void *data)
+             client_is_setlk(local->cmd)) {
+             client_add_lock_for_recovery(local->fd, args->flock, &local->owner,
+                                          local->cmd);
++        } else if (local->check_reopen) {
++            xdata = dict_new();
++            if (xdata == NULL) {
++                op_errno = ENOMEM;
++                goto unwind;
++            }
++            set_fd_reopen_status(this, xdata, FD_BAD);
+         }
+ 
+         goto unwind;
+@@ -5254,8 +5273,10 @@ client3_3_lk(call_frame_t *frame, xlator_t *this, void *data)
+ 
+     return 0;
+ unwind:
+-    CLIENT_STACK_UNWIND(lk, frame, -1, op_errno, NULL, NULL);
++    CLIENT_STACK_UNWIND(lk, frame, -1, op_errno, NULL, xdata);
+     GF_FREE(req.xdata.xdata_val);
++    if (xdata)
++        dict_unref(xdata);
+ 
+     return 0;
+ }
+diff --git a/xlators/protocol/client/src/client-rpc-fops_v2.c b/xlators/protocol/client/src/client-rpc-fops_v2.c
+index 954fc58..d0055e9 100644
+--- a/xlators/protocol/client/src/client-rpc-fops_v2.c
++++ b/xlators/protocol/client/src/client-rpc-fops_v2.c
+@@ -2234,6 +2234,13 @@ client4_0_lk_cbk(struct rpc_req *req, struct iovec *iov, int count,
+         }
+     }
+ 
++    if (local->check_reopen) {
++        if (lock.l_type == F_WRLCK)
++            set_fd_reopen_status(this, xdata, FD_REOPEN_NOT_ALLOWED);
++        else
++            set_fd_reopen_status(this, xdata, FD_REOPEN_ALLOWED);
++    }
++
+ out:
+     if ((rsp.op_ret == -1) && (EAGAIN != gf_error_to_errno(rsp.op_errno))) {
+         gf_msg(this->name, GF_LOG_WARNING, gf_error_to_errno(rsp.op_errno),
+@@ -4759,6 +4766,7 @@ client4_0_lk(call_frame_t *frame, xlator_t *this, void *data)
+             0,
+         },
+     };
++    dict_t *xdata = NULL;
+     int32_t gf_cmd = 0;
+     clnt_local_t *local = NULL;
+     clnt_conf_t *conf = NULL;
+@@ -4785,6 +4793,10 @@ client4_0_lk(call_frame_t *frame, xlator_t *this, void *data)
+         goto unwind;
+     }
+ 
++    ret = dict_get_int32(args->xdata, "fd-reopen-status", &local->check_reopen);
++    if (ret)
++        local->check_reopen = 0;
++
+     local->owner = frame->root->lk_owner;
+     local->cmd = args->cmd;
+     local->fd = fd_ref(args->fd);
+@@ -4798,6 +4810,13 @@ client4_0_lk(call_frame_t *frame, xlator_t *this, void *data)
+             client_is_setlk(local->cmd)) {
+             client_add_lock_for_recovery(local->fd, args->flock, &local->owner,
+                                          local->cmd);
++        } else if (local->check_reopen) {
++            xdata = dict_new();
++            if (xdata == NULL) {
++                op_errno = ENOMEM;
++                goto unwind;
++            }
++            set_fd_reopen_status(this, xdata, FD_BAD);
+         }
+ 
+         goto unwind;
+@@ -4815,8 +4834,10 @@ client4_0_lk(call_frame_t *frame, xlator_t *this, void *data)
+ 
+     return 0;
+ unwind:
+-    CLIENT_STACK_UNWIND(lk, frame, -1, op_errno, NULL, NULL);
++    CLIENT_STACK_UNWIND(lk, frame, -1, op_errno, NULL, xdata);
+     GF_FREE(req.xdata.pairs.pairs_val);
++    if (xdata)
++        dict_unref(xdata);
+ 
+     return 0;
+ }
+@@ -6094,7 +6115,7 @@ client4_0_rchecksum(call_frame_t *frame, xlator_t *this, void *data)
+     conf = this->private;
+ 
+     CLIENT_GET_REMOTE_FD(this, args->fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
+-                         unwind);
++                         GFS3_OP_RCHECKSUM, unwind);
+ 
+     req.len = args->len;
+     req.offset = args->offset;
+diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c
+index 63c90ea..35a5340 100644
+--- a/xlators/protocol/client/src/client.c
++++ b/xlators/protocol/client/src/client.c
+@@ -864,9 +864,11 @@ int32_t
+ client_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+             fd_t *fd, dict_t *xdata)
+ {
+-    int ret = -1;
++    int ret = 0;
++    int op_errno = ENOTCONN;
+     clnt_conf_t *conf = NULL;
+     rpc_clnt_procedure_t *proc = NULL;
++    clnt_fd_ctx_t *fdctx = NULL;
+     clnt_args_t args = {
+         0,
+     };
+@@ -875,6 +877,21 @@ client_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+     if (!conf || !conf->fops)
+         goto out;
+ 
++    if (conf->strict_locks) {
++        pthread_spin_lock(&conf->fd_lock);
++        {
++            fdctx = this_fd_get_ctx(fd, this);
++            if (fdctx && !list_empty(&fdctx->lock_list)) {
++                ret = -1;
++                op_errno = EBADFD;
++            }
++        }
++        pthread_spin_unlock(&conf->fd_lock);
++
++        if (ret)
++            goto out;
++    }
++
+     args.loc = loc;
+     args.fd = fd;
+     args.xdata = xdata;
+@@ -888,7 +905,7 @@ client_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ 
+ out:
+     if (ret)
+-        STACK_UNWIND_STRICT(open, frame, -1, ENOTCONN, NULL, NULL);
++        STACK_UNWIND_STRICT(open, frame, -1, op_errno, NULL, NULL);
+ 
+     return 0;
+ }
+diff --git a/xlators/protocol/client/src/client.h b/xlators/protocol/client/src/client.h
+index bde3d1a..2a50625 100644
+--- a/xlators/protocol/client/src/client.h
++++ b/xlators/protocol/client/src/client.h
+@@ -98,10 +98,10 @@ typedef enum {
+         free(_this_rsp->xdata.xdata_val);                                      \
+     } while (0)
+ 
+-#define CLIENT_GET_REMOTE_FD(xl, fd, flags, remote_fd, op_errno, label)        \
++#define CLIENT_GET_REMOTE_FD(xl, fd, flags, remote_fd, op_errno, fop, label)   \
+     do {                                                                       \
+         int _ret = 0;                                                          \
+-        _ret = client_get_remote_fd(xl, fd, flags, &remote_fd);                \
++        _ret = client_get_remote_fd(xl, fd, flags, &remote_fd, fop);           \
+         if (_ret < 0) {                                                        \
+             op_errno = errno;                                                  \
+             goto label;                                                        \
+@@ -286,6 +286,7 @@ typedef struct client_local {
+     client_posix_lock_t *client_lock;
+     gf_lkowner_t owner;
+     int32_t cmd;
++    int32_t check_reopen;
+     struct list_head lock_list;
+     pthread_mutex_t mutex;
+     char *name;
+@@ -435,7 +436,8 @@ client_default_reopen_done(clnt_fd_ctx_t *fdctx, int64_t rfd, xlator_t *this);
+ void
+ client_attempt_reopen(fd_t *fd, xlator_t *this);
+ int
+-client_get_remote_fd(xlator_t *this, fd_t *fd, int flags, int64_t *remote_fd);
++client_get_remote_fd(xlator_t *this, fd_t *fd, int flags, int64_t *remote_fd,
++                     enum gf_fop_procnum fop);
+ int
+ client_fd_fop_prepare_local(call_frame_t *frame, fd_t *fd, int64_t remote_fd);
+ gf_boolean_t
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0582-protocol-client-Fix-lock-memory-leak.patch b/SOURCES/0582-protocol-client-Fix-lock-memory-leak.patch
new file mode 100644
index 0000000..3fd1dae
--- /dev/null
+++ b/SOURCES/0582-protocol-client-Fix-lock-memory-leak.patch
@@ -0,0 +1,501 @@
+From adeec3d5d85baad8b50d203f34a47ad5360d7cd7 Mon Sep 17 00:00:00 2001
+From: karthik-us <ksubrahm@redhat.com>
+Date: Mon, 7 Jun 2021 18:36:11 +0530
+Subject: [PATCH 582/584] protocol/client: Fix lock memory leak
+
+Problem-1:
+When an overlapping lock is issued the merged lock is not assigned the
+owner. When flush is issued on the fd, this particular lock is not freed
+leading to memory leak
+
+Fix-1:
+Assign the owner while merging the locks.
+
+Problem-2:
+On fd-destroy lock structs could be present in fdctx. For some reason
+with flock -x command and closing of the bash fd, it leads to this code
+path. Which leaks the lock structs.
+
+Fix-2:
+When fdctx is being destroyed in client, make sure to cleanup any lock
+structs.
+
+> Upstream patch: https://github.com/gluster/glusterfs/pull/2338/commits/926402f639471d2664bf00c6692221ba297c525f
+> fixes: gluster#2337
+> Change-Id: I298124213ce5a1cf2b1f1756d5e8a9745d9c0a1c
+> Signed-off-by: Pranith Kumar K <pranith.karampuri@phonepe.com>
+
+BUG: 1689375
+Change-Id: I298124213ce5a1cf2b1f1756d5e8a9745d9c0a1c
+Signed-off-by: karthik-us <ksubrahm@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/245603
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/bugs/client/issue-2337-lock-mem-leak.c | 52 ++++++++++++++++++
+ tests/bugs/client/issue-2337-lock-mem-leak.t | 42 ++++++++++++++
+ tests/bugs/replicate/do-not-reopen-fd.t      | 65 ++++++++++++++--------
+ tests/volume.rc                              |  8 +++
+ xlators/protocol/client/src/client-helpers.c | 10 ++++
+ xlators/protocol/client/src/client-lk.c      | 82 ++++++++++++++++++----------
+ xlators/protocol/client/src/client.h         |  8 ++-
+ 7 files changed, 213 insertions(+), 54 deletions(-)
+ create mode 100644 tests/bugs/client/issue-2337-lock-mem-leak.c
+ create mode 100644 tests/bugs/client/issue-2337-lock-mem-leak.t
+
+diff --git a/tests/bugs/client/issue-2337-lock-mem-leak.c b/tests/bugs/client/issue-2337-lock-mem-leak.c
+new file mode 100644
+index 0000000..d4e02a7
+--- /dev/null
++++ b/tests/bugs/client/issue-2337-lock-mem-leak.c
+@@ -0,0 +1,52 @@
++#include <sys/file.h>
++#include <stdio.h>
++#include <string.h>
++#include <errno.h>
++#include <sys/types.h>
++#include <sys/stat.h>
++#include <fcntl.h>
++
++int
++main(int argc, char *argv[])
++{
++    int fd = -1;
++    char *filename = NULL;
++    struct flock lock = {
++        0,
++    };
++    int i = 0;
++    int ret = -1;
++
++    if (argc != 2) {
++        fprintf(stderr, "Usage: %s <filename> ", argv[0]);
++        goto out;
++    }
++
++    filename = argv[1];
++
++    fd = open(filename, O_RDWR | O_CREAT, 0);
++    if (fd < 0) {
++        fprintf(stderr, "open (%s) failed (%s)\n", filename, strerror(errno));
++        goto out;
++    }
++
++    lock.l_type = F_WRLCK;
++    lock.l_whence = SEEK_SET;
++    lock.l_len = 2;
++
++    while (i < 100) {
++        lock.l_start = i;
++        ret = fcntl(fd, F_SETLK, &lock);
++        if (ret < 0) {
++            fprintf(stderr, "fcntl setlk failed (%s)\n", strerror(errno));
++            goto out;
++        }
++
++        i++;
++    }
++
++    ret = 0;
++
++out:
++    return ret;
++}
+diff --git a/tests/bugs/client/issue-2337-lock-mem-leak.t b/tests/bugs/client/issue-2337-lock-mem-leak.t
+new file mode 100644
+index 0000000..64132a2
+--- /dev/null
++++ b/tests/bugs/client/issue-2337-lock-mem-leak.t
+@@ -0,0 +1,42 @@
++#!/bin/bash
++
++#Test that lock fop is not leaking any memory for overlapping regions
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++. $(dirname $0)/../../fileio.rc
++
++cleanup;
++
++LOCK_TEST=$(dirname $0)/issue-2337-lock-mem-leak
++build_tester $(dirname $0)/issue-2337-lock-mem-leak.c -o ${LOCK_TEST}
++
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 $H0:$B0/${V0}1
++#Guard against flush-behind
++TEST $CLI volume set $V0 performance.write-behind off
++TEST $CLI volume start $V0
++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
++
++TEST touch $M0/a
++TEST fd1=`fd_available`
++TEST fd_open $fd1 'w' $M0/a
++TEST flock -x $fd1
++statedump=$(generate_mount_statedump $V0 $M0)
++EXPECT_NOT "^nostatedump$" echo $statedump
++#Making sure no one changes this mem-tracker name
++TEST grep gf_client_mt_clnt_lock_t $statedump
++TEST fd_close $fd1
++
++statedump=$(generate_mount_statedump $V0 $M0)
++EXPECT_NOT "^nostatedump$" echo $statedump
++TEST ! grep gf_client_mt_clnt_lock_t $statedump
++
++TEST ${LOCK_TEST} $M0/a
++
++statedump=$(generate_mount_statedump $V0 $M0)
++EXPECT_NOT "^nostatedump$" echo $statedump
++TEST ! grep gf_client_mt_clnt_lock_t $statedump
++TEST cleanup_mount_statedump $V0
++TEST rm ${LOCK_TEST}
++cleanup
+diff --git a/tests/bugs/replicate/do-not-reopen-fd.t b/tests/bugs/replicate/do-not-reopen-fd.t
+index 76d8e70..13b5218 100644
+--- a/tests/bugs/replicate/do-not-reopen-fd.t
++++ b/tests/bugs/replicate/do-not-reopen-fd.t
+@@ -45,13 +45,17 @@ EXPECT "data-2" cat $B0/${V0}2/a
+ gfid_a=$(gf_get_gfid_xattr $B0/${V0}0/a)
+ gfid_str_a=$(gf_gfid_xattr_to_str $gfid_a)
+ 
+-EXPECT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+-EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
+-EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
++EXPECT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
++EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
++EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+ 
+ TEST fd2=`fd_available`
+ TEST fd_open $fd2 'rw' $M1/a
+ 
++EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
++EXPECT "^2$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
++EXPECT "^2$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
++
+ # Kill 2nd brick and try writing to the file. The write should fail due to
+ # quorum failure.
+ TEST kill_brick $V0 $H0 $B0/${V0}1
+@@ -66,6 +70,9 @@ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}1
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1
+ TEST ! fd_write $fd1 "data-4"
+ TEST ! fd_cat $fd1
++EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
++EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
++EXPECT "^2$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+ 
+ # Enable heal and check the files will have same content on all the bricks after
+ # the heal is completed.
+@@ -89,7 +96,9 @@ TEST ! fd_write $fd1 "data-5"
+ 
+ # At this point only one brick will have the lock. Try taking the lock again on
+ # the bad fd, which should also fail with EBADFD.
+-TEST ! flock -x $fd1
++# TODO: At the moment quorum failure in lk leads to unlock on the bricks where
++# lock succeeds. This will change lock state on 3rd brick, commenting for now
++#TEST ! flock -x $fd1
+ 
+ # Kill the only brick that is having lock and try taking lock on another client
+ # which should succeed.
+@@ -97,15 +106,25 @@ TEST kill_brick $V0 $H0 $B0/${V0}2
+ EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 2
+ TEST flock -x $fd2
+ TEST fd_write $fd2 "data-6"
++EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
++EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
++
+ 
+ # Bring the brick up and try writing & reading on the old fd, which should still
+ # fail and operations on the 2nd fd should succeed.
+ TEST $CLI volume start $V0 force
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}2
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 2
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M1 $V0-replicate-0 2
++EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
++EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
++EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+ TEST ! fd_write $fd1 "data-7"
+ 
+ TEST ! fd_cat $fd1
++EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
++EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
++EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+ TEST fd_cat $fd2
+ 
+ # Close both the fds which will release the locks and then re-open and take lock
+@@ -113,17 +132,15 @@ TEST fd_cat $fd2
+ TEST fd_close $fd1
+ TEST fd_close $fd2
+ 
+-TEST ! ls /proc/$$/fd/$fd1
+-TEST ! ls /proc/$$/fd/$fd2
+-EXPECT_WITHIN $REOPEN_TIMEOUT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+-EXPECT_WITHIN $REOPEN_TIMEOUT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
+-EXPECT_WITHIN $REOPEN_TIMEOUT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
++EXPECT_WITHIN $REOPEN_TIMEOUT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
++EXPECT_WITHIN $REOPEN_TIMEOUT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
++EXPECT_WITHIN $REOPEN_TIMEOUT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+ 
+ TEST fd1=`fd_available`
+ TEST fd_open $fd1 'rw' $M0/a
+-EXPECT_WITHIN $REOPEN_TIMEOUT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+-EXPECT_WITHIN $REOPEN_TIMEOUT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
+-EXPECT_WITHIN $REOPEN_TIMEOUT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
++EXPECT_WITHIN $REOPEN_TIMEOUT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
++EXPECT_WITHIN $REOPEN_TIMEOUT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
++EXPECT_WITHIN $REOPEN_TIMEOUT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+ 
+ TEST flock -x $fd1
+ TEST fd_write $fd1 "data-8"
+@@ -134,6 +151,10 @@ EXPECT "data-8" head -n 1 $B0/${V0}1/a
+ EXPECT "data-8" head -n 1 $B0/${V0}2/a
+ 
+ TEST fd_close $fd1
++EXPECT_WITHIN $REOPEN_TIMEOUT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
++EXPECT_WITHIN $REOPEN_TIMEOUT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
++EXPECT_WITHIN $REOPEN_TIMEOUT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
++
+ 
+ # Heal the volume
+ TEST $CLI volume heal $V0 enable
+@@ -152,9 +173,9 @@ EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replica
+ TEST fd1=`fd_available`
+ TEST fd_open $fd1 'rw' $M0/a
+ 
+-EXPECT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+-EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
+-EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
++EXPECT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
++EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
++EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+ 
+ # Restart the brick and then write. Now fd should get re-opened and write should
+ # succeed on the previously down brick as well since there are no locks held on
+@@ -163,7 +184,7 @@ TEST $CLI volume start $V0 force
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}0
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
+ TEST fd_write $fd1 "data-10"
+-EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
++EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+ 
+ EXPECT "data-10" head -n 1 $B0/${V0}0/a
+ EXPECT "data-10" head -n 1 $B0/${V0}1/a
+@@ -177,9 +198,9 @@ TEST fd1=`fd_available`
+ TEST fd_open $fd1 'rw' $M0/a
+ TEST flock -x $fd1
+ 
+-EXPECT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+-EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
+-EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
++EXPECT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
++EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
++EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+ 
+ # Kill & restart another brick so that it will return EBADFD
+ TEST kill_brick $V0 $H0 $B0/${V0}1
+@@ -194,9 +215,9 @@ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}1
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1
+ TEST ! fd_write $fd1 "data-11"
+-EXPECT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+-EXPECT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
+-EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
++EXPECT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
++EXPECT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
++EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+ 
+ EXPECT "data-10" head -n 1 $B0/${V0}0/a
+ EXPECT "data-10" head -n 1 $B0/${V0}1/a
+diff --git a/tests/volume.rc b/tests/volume.rc
+index f5dd0b1..17c3835 100644
+--- a/tests/volume.rc
++++ b/tests/volume.rc
+@@ -407,6 +407,14 @@ function gf_check_file_opened_in_brick {
+         fi
+ }
+ 
++function gf_open_file_count_in_brick {
++        vol=$1
++        host=$2
++        brick=$3
++        realpath=$4
++        ls -l /proc/$(get_brick_pid $vol $host $brick)/fd | grep "${realpath}$" | wc -l
++}
++
+ function gf_get_gfid_backend_file_path {
+         brickpath=$1
+         filepath_in_brick=$2
+diff --git a/xlators/protocol/client/src/client-helpers.c b/xlators/protocol/client/src/client-helpers.c
+index 48b6448..a80f303 100644
+--- a/xlators/protocol/client/src/client-helpers.c
++++ b/xlators/protocol/client/src/client-helpers.c
+@@ -3156,11 +3156,14 @@ client_fdctx_destroy(xlator_t *this, clnt_fd_ctx_t *fdctx)
+     int32_t ret = -1;
+     char parent_down = 0;
+     fd_lk_ctx_t *lk_ctx = NULL;
++    gf_lkowner_t null_owner = {0};
++    struct list_head deleted_list;
+ 
+     GF_VALIDATE_OR_GOTO("client", this, out);
+     GF_VALIDATE_OR_GOTO(this->name, fdctx, out);
+ 
+     conf = (clnt_conf_t *)this->private;
++    INIT_LIST_HEAD(&deleted_list);
+ 
+     if (fdctx->remote_fd == -1) {
+         gf_msg_debug(this->name, 0, "not a valid fd");
+@@ -3174,6 +3177,13 @@ client_fdctx_destroy(xlator_t *this, clnt_fd_ctx_t *fdctx)
+     pthread_mutex_unlock(&conf->lock);
+     lk_ctx = fdctx->lk_ctx;
+     fdctx->lk_ctx = NULL;
++    pthread_spin_lock(&conf->fd_lock);
++    {
++        __delete_granted_locks_owner_from_fdctx(fdctx, &null_owner,
++                                                &deleted_list);
++    }
++    pthread_spin_unlock(&conf->fd_lock);
++    destroy_client_locks_from_list(&deleted_list);
+ 
+     if (lk_ctx)
+         fd_lk_ctx_unref(lk_ctx);
+diff --git a/xlators/protocol/client/src/client-lk.c b/xlators/protocol/client/src/client-lk.c
+index c1fb055..cb4e894 100644
+--- a/xlators/protocol/client/src/client-lk.c
++++ b/xlators/protocol/client/src/client-lk.c
+@@ -253,6 +253,7 @@ __insert_and_merge(clnt_fd_ctx_t *fdctx, client_posix_lock_t *lock)
+                 sum = add_locks(lock, conf);
+ 
+                 sum->fd = lock->fd;
++                sum->owner = conf->owner;
+ 
+                 __delete_client_lock(conf);
+                 __destroy_client_lock(conf);
+@@ -320,56 +321,77 @@ destroy_client_lock(client_posix_lock_t *lock)
+     GF_FREE(lock);
+ }
+ 
+-int32_t
+-delete_granted_locks_owner(fd_t *fd, gf_lkowner_t *owner)
++void
++destroy_client_locks_from_list(struct list_head *deleted)
+ {
+-    clnt_fd_ctx_t *fdctx = NULL;
+     client_posix_lock_t *lock = NULL;
+     client_posix_lock_t *tmp = NULL;
+-    xlator_t *this = NULL;
+-    clnt_conf_t *conf = NULL;
+-
+-    struct list_head delete_list;
+-    int ret = 0;
++    xlator_t *this = THIS;
+     int count = 0;
+ 
+-    INIT_LIST_HEAD(&delete_list);
+-    this = THIS;
+-    conf = this->private;
++    list_for_each_entry_safe(lock, tmp, deleted, list)
++    {
++        list_del_init(&lock->list);
++        destroy_client_lock(lock);
++        count++;
++    }
+ 
+-    pthread_spin_lock(&conf->fd_lock);
++    /* FIXME: Need to actually print the locks instead of count */
++    gf_msg_trace(this->name, 0, "Number of locks cleared=%d", count);
++}
+ 
+-    fdctx = this_fd_get_ctx(fd, this);
+-    if (!fdctx) {
+-        pthread_spin_unlock(&conf->fd_lock);
++void
++__delete_granted_locks_owner_from_fdctx(clnt_fd_ctx_t *fdctx,
++                                        gf_lkowner_t *owner,
++                                        struct list_head *deleted)
++{
++    client_posix_lock_t *lock = NULL;
++    client_posix_lock_t *tmp = NULL;
+ 
+-        gf_msg(this->name, GF_LOG_WARNING, EINVAL, PC_MSG_FD_CTX_INVALID,
+-               "fdctx not valid");
+-        ret = -1;
+-        goto out;
++    gf_boolean_t is_null_lkowner = _gf_false;
++
++    if (is_lk_owner_null(owner)) {
++        is_null_lkowner = _gf_true;
+     }
+ 
+     list_for_each_entry_safe(lock, tmp, &fdctx->lock_list, list)
+     {
+-        if (is_same_lkowner(&lock->owner, owner)) {
++        if (is_null_lkowner || is_same_lkowner(&lock->owner, owner)) {
+             list_del_init(&lock->list);
+-            list_add_tail(&lock->list, &delete_list);
+-            count++;
++            list_add_tail(&lock->list, deleted);
+         }
+     }
++}
+ 
+-    pthread_spin_unlock(&conf->fd_lock);
++int32_t
++delete_granted_locks_owner(fd_t *fd, gf_lkowner_t *owner)
++{
++    clnt_fd_ctx_t *fdctx = NULL;
++    xlator_t *this = NULL;
++    clnt_conf_t *conf = NULL;
++    int ret = 0;
++    struct list_head deleted_locks;
+ 
+-    if (!list_empty(&delete_list)) {
+-        list_for_each_entry_safe(lock, tmp, &delete_list, list)
+-        {
+-            list_del_init(&lock->list);
+-            destroy_client_lock(lock);
++    this = THIS;
++    conf = this->private;
++    INIT_LIST_HEAD(&deleted_locks);
++
++    pthread_spin_lock(&conf->fd_lock);
++    {
++        fdctx = this_fd_get_ctx(fd, this);
++        if (!fdctx) {
++            pthread_spin_unlock(&conf->fd_lock);
++
++            gf_smsg(this->name, GF_LOG_WARNING, EINVAL, PC_MSG_FD_CTX_INVALID,
++                    NULL);
++            ret = -1;
++            goto out;
+         }
++        __delete_granted_locks_owner_from_fdctx(fdctx, owner, &deleted_locks);
+     }
++    pthread_spin_unlock(&conf->fd_lock);
+ 
+-    /* FIXME: Need to actually print the locks instead of count */
+-    gf_msg_trace(this->name, 0, "Number of locks cleared=%d", count);
++    destroy_client_locks_from_list(&deleted_locks);
+ 
+ out:
+     return ret;
+diff --git a/xlators/protocol/client/src/client.h b/xlators/protocol/client/src/client.h
+index 2a50625..f952aea 100644
+--- a/xlators/protocol/client/src/client.h
++++ b/xlators/protocol/client/src/client.h
+@@ -406,8 +406,12 @@ int
+ client_attempt_lock_recovery(xlator_t *this, clnt_fd_ctx_t *fdctx);
+ int32_t
+ delete_granted_locks_owner(fd_t *fd, gf_lkowner_t *owner);
+-int32_t
+-delete_granted_locks_fd(clnt_fd_ctx_t *fdctx);
++void
++__delete_granted_locks_owner_from_fdctx(clnt_fd_ctx_t *fdctx,
++                                        gf_lkowner_t *owner,
++                                        struct list_head *deleted);
++void
++destroy_client_locks_from_list(struct list_head *deleted);
+ int32_t
+ client_cmd_to_gf_cmd(int32_t cmd, int32_t *gf_cmd);
+ void
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0583-protocol-client-Initialize-list-head-to-prevent-NULL.patch b/SOURCES/0583-protocol-client-Initialize-list-head-to-prevent-NULL.patch
new file mode 100644
index 0000000..1ac1777
--- /dev/null
+++ b/SOURCES/0583-protocol-client-Initialize-list-head-to-prevent-NULL.patch
@@ -0,0 +1,138 @@
+From f114ba25fab57d1ab9a51fc1f101f2b5571f167a Mon Sep 17 00:00:00 2001
+From: karthik-us <ksubrahm@redhat.com>
+Date: Mon, 7 Jun 2021 19:24:55 +0530
+Subject: [PATCH 583/584] protocol/client: Initialize list head to prevent NULL
+ de-reference
+
+> Upstream patch: https://github.com/gluster/glusterfs/pull/2456/commits/00761df0cd14833ff256b69dba7cf8e2b699554c
+> fixes: #2443
+> Change-Id: I86ef0270d41d6fb924db97fde3196d7c98c8b564
+> Signed-off-by: Pranith Kumar K <pranith.karampuri@phonepe.com>
+
+BUG: 1689375
+Change-Id: I86ef0270d41d6fb924db97fde3196d7c98c8b564
+Signed-off-by: karthik-us <ksubrahm@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/245613
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/bugs/locks/issue-2443-crash.c     | 67 +++++++++++++++++++++++++++++++++
+ tests/bugs/locks/issue-2443-crash.t     | 18 +++++++++
+ xlators/protocol/client/src/client-lk.c |  1 +
+ 3 files changed, 86 insertions(+)
+ create mode 100644 tests/bugs/locks/issue-2443-crash.c
+ create mode 100644 tests/bugs/locks/issue-2443-crash.t
+
+diff --git a/tests/bugs/locks/issue-2443-crash.c b/tests/bugs/locks/issue-2443-crash.c
+new file mode 100644
+index 0000000..5f580bf
+--- /dev/null
++++ b/tests/bugs/locks/issue-2443-crash.c
+@@ -0,0 +1,67 @@
++#include <sys/file.h>
++#include <stdio.h>
++#include <string.h>
++#include <errno.h>
++#include <sys/types.h>
++#include <sys/stat.h>
++#include <fcntl.h>
++
++int
++main(int argc, char *argv[])
++{
++    int fd = -1;
++    char *filename = NULL;
++    struct flock lock = {
++        0,
++    };
++    int i = 0;
++    int ret = -1;
++
++    if (argc != 2) {
++        fprintf(stderr, "Usage: %s <filename> ", argv[0]);
++        goto out;
++    }
++
++    filename = argv[1];
++
++    fd = open(filename, O_RDWR | O_CREAT, 0);
++    if (fd < 0) {
++        fprintf(stderr, "open (%s) failed (%s)\n", filename, strerror(errno));
++        goto out;
++    }
++
++    lock.l_start = 0;
++    lock.l_type = F_RDLCK;
++    lock.l_whence = SEEK_SET;
++    lock.l_len = 2;
++
++    ret = fcntl(fd, F_SETLK, &lock);
++    if (ret < 0) {
++        fprintf(stderr, "fcntl setlk failed (%s)\n", strerror(errno));
++        goto out;
++    }
++
++    lock.l_start = 2;
++    lock.l_type = F_WRLCK;
++    lock.l_whence = SEEK_SET;
++    lock.l_len = 2;
++
++    ret = fcntl(fd, F_SETLK, &lock);
++    if (ret < 0) {
++        fprintf(stderr, "fcntl setlk failed (%s)\n", strerror(errno));
++        goto out;
++    }
++
++    lock.l_start = 0;
++    lock.l_type = F_RDLCK;
++    lock.l_whence = SEEK_SET;
++    lock.l_len = 4;
++
++    ret = fcntl(fd, F_SETLK, &lock);
++    if (ret < 0) {
++        fprintf(stderr, "fcntl setlk failed (%s)\n", strerror(errno));
++        goto out;
++    }
++out:
++    return ret;
++}
+diff --git a/tests/bugs/locks/issue-2443-crash.t b/tests/bugs/locks/issue-2443-crash.t
+new file mode 100644
+index 0000000..162a4d7
+--- /dev/null
++++ b/tests/bugs/locks/issue-2443-crash.t
+@@ -0,0 +1,18 @@
++#!/bin/bash
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++cleanup;
++
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 $H0:$B0/brick0
++TEST $CLI volume start $V0
++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
++
++build_tester $(dirname $0)/issue-2443-crash.c
++TEST mv $(dirname $0)/issue-2443-crash $M0
++cd $M0
++TEST ./issue-2443-crash a
++
++cd -
++cleanup;
+diff --git a/xlators/protocol/client/src/client-lk.c b/xlators/protocol/client/src/client-lk.c
+index cb4e894..37c1d35 100644
+--- a/xlators/protocol/client/src/client-lk.c
++++ b/xlators/protocol/client/src/client-lk.c
+@@ -101,6 +101,7 @@ add_locks(client_posix_lock_t *l1, client_posix_lock_t *l2)
+     sum = GF_CALLOC(1, sizeof(*sum), gf_client_mt_clnt_lock_t);
+     if (!sum)
+         return NULL;
++    INIT_LIST_HEAD(&sum->list);
+ 
+     sum->fl_start = min(l1->fl_start, l2->fl_start);
+     sum->fl_end = max(l1->fl_end, l2->fl_end);
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0584-dht-fixing-xattr-inconsistency.patch b/SOURCES/0584-dht-fixing-xattr-inconsistency.patch
new file mode 100644
index 0000000..bf2c6b9
--- /dev/null
+++ b/SOURCES/0584-dht-fixing-xattr-inconsistency.patch
@@ -0,0 +1,429 @@
+From 2c6c4ad77ba5511a62846af932840deb5bc389ae Mon Sep 17 00:00:00 2001
+From: Tamar Shacked <tshacked@redhat.com>
+Date: Mon, 7 Jun 2021 12:25:57 +0300
+Subject: [PATCH 584/584] dht - fixing xattr inconsistency
+
+The scenario of setting an xattr to a dir, killing one of the bricks,
+removing the xattr, bringing back the brick results in xattr
+inconsistency - The downed brick will still have the xattr, but the rest
+won't.
+This patch add a mechanism that will remove the extra xattrs during
+lookup.
+
+Backport of:
+> Upstream-patch-link: https://review.gluster.org/#/c/glusterfs/+/24687/
+> fixes: #1324
+> Change-Id: Ifec0b7aea6cd40daa8b0319b881191cf83e031d1
+> Signed-off-by: Barak Sason Rofman <bsasonro@redhat.com>
+
+BUG: 1600379
+Change-Id: I588f69b283e5354cd362d74486d6ec6d226ecc96
+Signed-off-by: Tamar Shacked <tshacked@redhat.com>
+Signed-off-by: srijan-sivakumar <ssivakum@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/245560
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/common-utils.c                | 20 +++++++-
+ libglusterfs/src/glusterfs/common-utils.h      |  6 +++
+ tests/bugs/distribute/bug-1600379.t            | 54 ++++++++++++++++++++
+ xlators/cluster/dht/src/dht-common.c           | 14 ++----
+ xlators/cluster/dht/src/dht-common.h           |  4 --
+ xlators/cluster/dht/src/dht-helper.c           |  4 ++
+ xlators/cluster/dht/src/dht-selfheal.c         | 11 ++++
+ xlators/storage/posix/src/posix-helpers.c      | 19 +++++++
+ xlators/storage/posix/src/posix-inode-fd-ops.c | 69 ++++++++++++++++++++++++++
+ xlators/storage/posix/src/posix.h              |  3 ++
+ 10 files changed, 189 insertions(+), 15 deletions(-)
+ create mode 100644 tests/bugs/distribute/bug-1600379.t
+
+diff --git a/libglusterfs/src/common-utils.c b/libglusterfs/src/common-utils.c
+index c2dfe28..d8b7c6e 100644
+--- a/libglusterfs/src/common-utils.c
++++ b/libglusterfs/src/common-utils.c
+@@ -54,6 +54,7 @@
+ #include "xxhash.h"
+ #include <ifaddrs.h>
+ #include "glusterfs/libglusterfs-messages.h"
++#include "glusterfs/glusterfs-acl.h"
+ #include "protocol-common.h"
+ #ifdef __FreeBSD__
+ #include <pthread_np.h>
+@@ -82,12 +83,21 @@ gf_boolean_t gf_signal_on_assert = false;
+ typedef int32_t (*rw_op_t)(int32_t fd, char *buf, int32_t size);
+ typedef int32_t (*rwv_op_t)(int32_t fd, const struct iovec *buf, int32_t size);
+ 
+-void gf_assert(void)
++char *xattrs_to_heal[] = {"user.",
++                          POSIX_ACL_ACCESS_XATTR,
++                          POSIX_ACL_DEFAULT_XATTR,
++                          QUOTA_LIMIT_KEY,
++                          QUOTA_LIMIT_OBJECTS_KEY,
++                          GF_SELINUX_XATTR_KEY,
++                          GF_XATTR_MDATA_KEY,
++                          NULL};
++
++void
++gf_assert(void)
+ {
+     if (gf_signal_on_assert) {
+         raise(SIGCONT);
+     }
+-
+ }
+ 
+ void
+@@ -5430,3 +5440,9 @@ gf_d_type_from_ia_type(ia_type_t type)
+             return DT_UNKNOWN;
+     }
+ }
++
++char **
++get_xattrs_to_heal()
++{
++    return xattrs_to_heal;
++}
+diff --git a/libglusterfs/src/glusterfs/common-utils.h b/libglusterfs/src/glusterfs/common-utils.h
+index bd48b6f..8439bb6 100644
+--- a/libglusterfs/src/glusterfs/common-utils.h
++++ b/libglusterfs/src/glusterfs/common-utils.h
+@@ -183,6 +183,12 @@ enum _gf_xlator_ipc_targets {
+ typedef enum _gf_special_pid gf_special_pid_t;
+ typedef enum _gf_xlator_ipc_targets _gf_xlator_ipc_targets_t;
+ 
++/* Array to hold custom xattr keys */
++extern char *xattrs_to_heal[];
++
++char **
++get_xattrs_to_heal();
++
+ /* The DHT file rename operation is not a straightforward rename.
+  * It involves creating linkto and linkfiles, and can unlink or rename the
+  * source file depending on the hashed and cached subvols for the source
+diff --git a/tests/bugs/distribute/bug-1600379.t b/tests/bugs/distribute/bug-1600379.t
+new file mode 100644
+index 0000000..8d2f615
+--- /dev/null
++++ b/tests/bugs/distribute/bug-1600379.t
+@@ -0,0 +1,54 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++
++# Initialize
++#------------------------------------------------------------
++cleanup;
++
++# Start glusterd
++TEST glusterd;
++TEST pidof glusterd;
++TEST $CLI volume info;
++
++# Create a volume
++TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2}
++
++# Verify volume creation
++EXPECT "$V0" volinfo_field $V0 'Volume Name';
++EXPECT 'Created' volinfo_field $V0 'Status';
++
++# Start volume and verify successful start
++TEST $CLI volume start $V0;
++EXPECT 'Started' volinfo_field $V0 'Status';
++TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
++#------------------------------------------------------------
++
++# Test case - Remove xattr from killed brick on lookup
++#------------------------------------------------------------
++# Create a dir and set custom xattr
++TEST mkdir $M0/testdir
++TEST setfattr -n user.attr -v val $M0/testdir
++xattr_val=`getfattr -d $B0/${V0}2/testdir | awk '{print $1}'`;
++TEST ${xattr_val}='user.attr="val"';
++
++# Kill 2nd brick process
++TEST kill_brick $V0 $H0 $B0/${V0}2
++EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "1" online_brick_count
++
++# Remove custom xattr
++TEST setfattr -x user.attr $M0/testdir
++
++# Bring up the killed brick process
++TEST $CLI volume start $V0 force
++
++# Perform lookup
++sleep 5
++TEST ls $M0/testdir
++
++# Check brick xattrs
++xattr_val_2=`getfattr -d $B0/${V0}2/testdir`;
++TEST [ ${xattr_val_2} = ''] ;
++
++cleanup;
+diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
+index ce0fbbf..edfc6e7 100644
+--- a/xlators/cluster/dht/src/dht-common.c
++++ b/xlators/cluster/dht/src/dht-common.c
+@@ -19,6 +19,7 @@
+ #include <glusterfs/byte-order.h>
+ #include <glusterfs/quota-common-utils.h>
+ #include <glusterfs/upcall-utils.h>
++#include <glusterfs/common-utils.h>
+ 
+ #include <sys/time.h>
+ #include <libgen.h>
+@@ -127,15 +128,6 @@ dht_read_iatt_from_xdata(xlator_t *this, dict_t *xdata, struct iatt *stbuf)
+ int
+ dht_rmdir_unlock(call_frame_t *frame, xlator_t *this);
+ 
+-char *xattrs_to_heal[] = {"user.",
+-                          POSIX_ACL_ACCESS_XATTR,
+-                          POSIX_ACL_DEFAULT_XATTR,
+-                          QUOTA_LIMIT_KEY,
+-                          QUOTA_LIMIT_OBJECTS_KEY,
+-                          GF_SELINUX_XATTR_KEY,
+-                          GF_XATTR_MDATA_KEY,
+-                          NULL};
+-
+ char *dht_dbg_vxattrs[] = {DHT_DBG_HASHED_SUBVOL_PATTERN, NULL};
+ 
+ /* Return true if key exists in array
+@@ -143,6 +135,8 @@ char *dht_dbg_vxattrs[] = {DHT_DBG_HASHED_SUBVOL_PATTERN, NULL};
+ static gf_boolean_t
+ dht_match_xattr(const char *key)
+ {
++    char **xattrs_to_heal = get_xattrs_to_heal();
++
+     return gf_get_index_by_elem(xattrs_to_heal, (char *)key) >= 0;
+ }
+ 
+@@ -5399,11 +5393,13 @@ dht_dir_common_set_remove_xattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+     int call_cnt = 0;
+     dht_local_t *local = NULL;
+     char gfid_local[GF_UUID_BUF_SIZE] = {0};
++    char **xattrs_to_heal;
+ 
+     conf = this->private;
+     local = frame->local;
+     call_cnt = conf->subvolume_cnt;
+     local->flags = flags;
++    xattrs_to_heal = get_xattrs_to_heal();
+ 
+     if (!gf_uuid_is_null(local->gfid)) {
+         gf_uuid_unparse(local->gfid, gfid_local);
+diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
+index 132b3b3..b856c68 100644
+--- a/xlators/cluster/dht/src/dht-common.h
++++ b/xlators/cluster/dht/src/dht-common.h
+@@ -54,10 +54,6 @@
+ #define DHT_DBG_HASHED_SUBVOL_PATTERN "dht.file.hashed-subvol.*"
+ #define DHT_DBG_HASHED_SUBVOL_KEY "dht.file.hashed-subvol."
+ 
+-/* Array to hold custom xattr keys
+- */
+-extern char *xattrs_to_heal[];
+-
+ /* Rebalance nodeuuid flags */
+ #define REBAL_NODEUUID_MINE 0x01
+ 
+diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c
+index 4f7370d..4c3940a 100644
+--- a/xlators/cluster/dht/src/dht-helper.c
++++ b/xlators/cluster/dht/src/dht-helper.c
+@@ -2289,6 +2289,7 @@ dht_dir_set_heal_xattr(xlator_t *this, dht_local_t *local, dict_t *dst,
+     int luret = -1;
+     int luflag = -1;
+     int i = 0;
++    char **xattrs_to_heal;
+ 
+     if (!src || !dst) {
+         gf_msg(this->name, GF_LOG_WARNING, EINVAL, DHT_MSG_DICT_SET_FAILED,
+@@ -2305,6 +2306,9 @@ dht_dir_set_heal_xattr(xlator_t *this, dht_local_t *local, dict_t *dst,
+        and set it to dst dict, here index start from 1 because
+        user xattr already checked in previous statement
+     */
++
++    xattrs_to_heal = get_xattrs_to_heal();
++
+     for (i = 1; xattrs_to_heal[i]; i++) {
+         keyval = dict_get(src, xattrs_to_heal[i]);
+         if (keyval) {
+diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c
+index f4e17d1..8af7301 100644
+--- a/xlators/cluster/dht/src/dht-selfheal.c
++++ b/xlators/cluster/dht/src/dht-selfheal.c
+@@ -2315,6 +2315,15 @@ dht_dir_heal_xattrs(void *data)
+         if (subvol == mds_subvol)
+             continue;
+         if (uret || uflag) {
++            /* Custom xattr heal is required - let posix handle it */
++            ret = dict_set_int8(xdata, "sync_backend_xattrs", _gf_true);
++            if (ret) {
++                gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
++                        "path=%s", local->loc.path, "key=%s",
++                        "sync_backend_xattrs", NULL);
++                goto out;
++            }
++
+             ret = syncop_setxattr(subvol, &local->loc, user_xattr, 0, xdata,
+                                   NULL);
+             if (ret) {
+@@ -2325,6 +2334,8 @@ dht_dir_heal_xattrs(void *data)
+                        "user xattr on path %s on "
+                        "subvol %s, gfid = %s ",
+                        local->loc.path, subvol->name, gfid);
++            } else {
++                dict_del(xdata, "sync_backend_xattrs");
+             }
+         }
+     }
+diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
+index 16351d8..40a9ee4 100644
+--- a/xlators/storage/posix/src/posix-helpers.c
++++ b/xlators/storage/posix/src/posix-helpers.c
+@@ -3656,3 +3656,22 @@ out:
+ 
+     return is_stale;
+ }
++
++/* Delete user xattr from the file at the file-path specified by data and from
++ * dict */
++int
++posix_delete_user_xattr(dict_t *dict, char *k, data_t *v, void *data)
++{
++    int ret;
++    char *real_path = data;
++
++    ret = sys_lremovexattr(real_path, k);
++    if (ret) {
++        gf_msg("posix-helpers", GF_LOG_ERROR, P_MSG_XATTR_NOT_REMOVED, errno,
++               "removexattr failed. key %s path %s", k, real_path);
++    }
++
++    dict_del(dict, k);
++
++    return ret;
++}
+diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c
+index 4c2983a..be22c5e 100644
+--- a/xlators/storage/posix/src/posix-inode-fd-ops.c
++++ b/xlators/storage/posix/src/posix-inode-fd-ops.c
+@@ -62,6 +62,7 @@
+ #include <glusterfs/events.h>
+ #include "posix-gfid-path.h"
+ #include <glusterfs/compat-uuid.h>
++#include <glusterfs/common-utils.h>
+ 
+ extern char *marker_xattrs[];
+ #define ALIGN_SIZE 4096
+@@ -2733,6 +2734,7 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+     int32_t ret = 0;
+     ssize_t acl_size = 0;
+     dict_t *xattr = NULL;
++    dict_t *subvol_xattrs = NULL;
+     posix_xattr_filler_t filler = {
+         0,
+     };
+@@ -2748,6 +2750,10 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+     struct mdata_iatt mdata_iatt = {
+         0,
+     };
++    int8_t sync_backend_xattrs = _gf_false;
++    data_pair_t *custom_xattrs;
++    data_t *keyval = NULL;
++    char **xattrs_to_heal = get_xattrs_to_heal();
+ 
+     DECLARE_OLD_FS_ID_VAR;
+     SET_FS_ID(frame->root->uid, frame->root->gid);
+@@ -2930,6 +2936,66 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+         goto out;
+     }
+ 
++    ret = dict_get_int8(xdata, "sync_backend_xattrs", &sync_backend_xattrs);
++    if (ret) {
++        gf_msg_debug(this->name, -ret, "Unable to get sync_backend_xattrs");
++    }
++
++    if (sync_backend_xattrs) {
++        /* List all custom xattrs */
++        subvol_xattrs = dict_new();
++        if (!subvol_xattrs)
++            goto out;
++
++        ret = dict_set_int32_sizen(xdata, "list-xattr", 1);
++        if (ret) {
++            gf_msg(this->name, GF_LOG_ERROR, 0, ENOMEM,
++                   "Unable to set list-xattr in dict ");
++            goto out;
++        }
++
++        subvol_xattrs = posix_xattr_fill(this, real_path, loc, NULL, -1, xdata,
++                                         NULL);
++
++        /* Remove all user xattrs from the file */
++        dict_foreach_fnmatch(subvol_xattrs, "user.*", posix_delete_user_xattr,
++                             real_path);
++
++        /* Remove all custom xattrs from the file */
++        for (i = 1; xattrs_to_heal[i]; i++) {
++            keyval = dict_get(subvol_xattrs, xattrs_to_heal[i]);
++            if (keyval) {
++                ret = sys_lremovexattr(real_path, xattrs_to_heal[i]);
++                if (ret) {
++                    gf_msg(this->name, GF_LOG_ERROR, P_MSG_XATTR_NOT_REMOVED,
++                           errno, "removexattr failed. key %s path %s",
++                           xattrs_to_heal[i], loc->path);
++                    goto out;
++                }
++
++                dict_del(subvol_xattrs, xattrs_to_heal[i]);
++                keyval = NULL;
++            }
++        }
++
++        /* Set custom xattrs based on info provided by DHT */
++        custom_xattrs = dict->members_list;
++
++        while (custom_xattrs != NULL) {
++            ret = sys_lsetxattr(real_path, custom_xattrs->key,
++                                custom_xattrs->value->data,
++                                custom_xattrs->value->len, flags);
++            if (ret) {
++                op_errno = errno;
++                gf_log(this->name, GF_LOG_ERROR, "setxattr failed - %s %d",
++                       custom_xattrs->key, ret);
++                goto out;
++            }
++
++            custom_xattrs = custom_xattrs->next;
++        }
++    }
++
+     xattr = dict_new();
+     if (!xattr)
+         goto out;
+@@ -3037,6 +3103,9 @@ out:
+     if (xattr)
+         dict_unref(xattr);
+ 
++    if (subvol_xattrs)
++        dict_unref(subvol_xattrs);
++
+     return 0;
+ }
+ 
+diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h
+index 4be979c..b357d34 100644
+--- a/xlators/storage/posix/src/posix.h
++++ b/xlators/storage/posix/src/posix.h
+@@ -686,4 +686,7 @@ posix_update_iatt_buf(struct iatt *buf, int fd, char *loc, dict_t *xdata);
+ gf_boolean_t
+ posix_is_layout_stale(dict_t *xdata, char *par_path, xlator_t *this);
+ 
++int
++posix_delete_user_xattr(dict_t *dict, char *k, data_t *v, void *data);
++
+ #endif /* _POSIX_H */
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0585-ganesha_ha-ganesha_grace-RA-fails-in-start-and-or-fa.patch b/SOURCES/0585-ganesha_ha-ganesha_grace-RA-fails-in-start-and-or-fa.patch
new file mode 100644
index 0000000..e3fa401
--- /dev/null
+++ b/SOURCES/0585-ganesha_ha-ganesha_grace-RA-fails-in-start-and-or-fa.patch
@@ -0,0 +1,77 @@
+From ba399a083a56963bb7414535ede6eff6afcd1a0a Mon Sep 17 00:00:00 2001
+From: "Kaleb S. KEITHLEY" <kkeithle@redhat.com>
+Date: Mon, 14 Jun 2021 12:32:06 -0400
+Subject: [PATCH 585/585] ganesha_ha: ganesha_grace RA fails in start() and/or
+ fails in monitor () (#2523)
+
+shell [[ ]] string compare fails to match returned attr to the
+pattern and subsequently returns status of "not running", resulting
+in dependencies such as the IPaddr (cluster_ip) RA not starting
+
+Change-Id: I2c8d6f5c4cf0480672d52d8aa0d9226950441dc9
+commit 8ec66a43eedd505ec0b40f55c05f13a77fe8074e
+PR: https://github.com/gluster/glusterfs/pull/2523
+issue: https://github.com/gluster/glusterfs/issues/2522
+BUG: 1945143
+Signed-off-by: Kaleb S. KEITHLEY <kkeithle@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/247613
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ extras/ganesha/ocf/ganesha_grace | 12 +++++++++---
+ 1 file changed, 9 insertions(+), 3 deletions(-)
+
+diff --git a/extras/ganesha/ocf/ganesha_grace b/extras/ganesha/ocf/ganesha_grace
+index edc6fa2..ca219af 100644
+--- a/extras/ganesha/ocf/ganesha_grace
++++ b/extras/ganesha/ocf/ganesha_grace
+@@ -122,15 +122,18 @@ ganesha_grace_start()
+ 
+ 	# case 1
+ 	if [[ -z "${attr}" ]]; then
++		ocf_log debug "grace start: returning success case 1"
+ 		return ${OCF_SUCCESS}
+ 	fi
+ 
+ 	# case 2
+-	if [[ "${attr}" = *"value=1" ]]; then
++	if [[ "${attr}" = *"host=\"${host}\" value=\"1\"" ]]; then
++		ocf_log debug "grace start: returning success case 2"
+ 		return ${OCF_SUCCESS}
+ 	fi
+ 
+ 	# case 3
++	ocf_log info "grace start returning: not running case 3 (${attr})"
+ 	return ${OCF_NOT_RUNNING}
+ }
+ 
+@@ -162,7 +165,7 @@ ganesha_grace_monitor()
+ {
+ 	local host=$(ocf_local_nodename)
+ 
+-	ocf_log debug "ganesha_grace monitor ${host}"
++	ocf_log debug "ganesha_grace_monitor ${host}"
+ 
+ 	attr=$(attrd_updater --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null)
+         if [ $? -ne 0 ]; then
+@@ -174,13 +177,16 @@ ganesha_grace_monitor()
+ 	# chance to create it. In which case we'll pretend
+ 	# everything is okay this time around
+ 	if [[ -z "${attr}" ]]; then
++		ocf_log debug "grace monitor: returning success case 1"
+ 		return ${OCF_SUCCESS}
+ 	fi
+ 
+-	if [[ "${attr}" = *"value=1" ]]; then
++	if [[ "${attr}" = *"host=\"${host}\" value=\"1\"" ]]; then
++		ocf_log debug "grace monitor: returning success case 2"
+ 		return ${OCF_SUCCESS}
+ 	fi
+ 
++	ocf_log info "grace monitor: returning not running case 3 (${attr})"
+ 	return ${OCF_NOT_RUNNING}
+ }
+ 
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0586-protocol-client-Do-not-reopen-fd-post-handshake-if-p.patch b/SOURCES/0586-protocol-client-Do-not-reopen-fd-post-handshake-if-p.patch
new file mode 100644
index 0000000..62c574d
--- /dev/null
+++ b/SOURCES/0586-protocol-client-Do-not-reopen-fd-post-handshake-if-p.patch
@@ -0,0 +1,298 @@
+From e431321f1348b5d51733a6b6c5e046fd8c6e28cc Mon Sep 17 00:00:00 2001
+From: karthik-us <ksubrahm@redhat.com>
+Date: Mon, 5 Jul 2021 10:52:10 +0530
+Subject: [PATCH 586/586] protocol/client: Do not reopen fd post handshake if
+ posix lock is held
+
+Problem:
+With client.strict-locks enabled, in some cases where the posix lock is
+taken after a brick gets disconnected, the fd is getting reopened when
+the brick gets reconnected to the client as part of client_post_handshake.
+In such cases the saved fdctx's lock_list may not have the latest
+information.
+
+Fix:
+Check the lock information in the fdctx->lk_ctx as well post handshake
+which will have the latest information on the locks.
+Also check for this field in other places as well to prevent writes
+happening with anonymous fd even without re-opening the fd on the
+restarted brick.
+
+> Upstream patch: https://github.com/gluster/glusterfs/pull/2582
+> Fixes: #2581
+> Change-Id: I7a0799e242ce188c6597dec0a65b4dae7dcd815b
+> Signed-off-by: karthik-us ksubrahm@redhat.com
+
+BUG: 1689375
+Change-Id: I7a0799e242ce188c6597dec0a65b4dae7dcd815b
+Signed-off-by: karthik-us <ksubrahm@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/252588
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/bugs/replicate/do-not-reopen-fd.t        | 76 ++++++++++++++++++--------
+ xlators/protocol/client/src/client-handshake.c |  2 +-
+ xlators/protocol/client/src/client-helpers.c   | 11 +++-
+ xlators/protocol/client/src/client.c           |  2 +-
+ xlators/protocol/client/src/client.h           |  3 +
+ 5 files changed, 67 insertions(+), 27 deletions(-)
+
+diff --git a/tests/bugs/replicate/do-not-reopen-fd.t b/tests/bugs/replicate/do-not-reopen-fd.t
+index 13b5218..f346709 100644
+--- a/tests/bugs/replicate/do-not-reopen-fd.t
++++ b/tests/bugs/replicate/do-not-reopen-fd.t
+@@ -20,10 +20,41 @@ TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+ TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M1
+ 
+ TEST touch $M0/a
++gfid_a=$(gf_get_gfid_xattr $B0/${V0}0/a)
++gfid_str_a=$(gf_gfid_xattr_to_str $gfid_a)
++
++
++# Open fd from a client, check for open fd on all the bricks.
++TEST fd1=`fd_available`
++TEST fd_open $fd1 'rw' $M0/a
++EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
++EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
++EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
++
++# Kill a brick and take lock on the fd
++TEST kill_brick $V0 $H0 $B0/${V0}0
++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_child_up_status_meta $M0 $V0-replicate-0 0
++TEST flock -x $fd1
++
++# Restart the brick and check for no open fd on the restarted brick.
++TEST $CLI volume start $V0 force
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M0 $V0-replicate-0 0
++EXPECT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
++EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
++EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
++
++# Write on the fd. It should fail on the restarted brick.
++TEST fd_write $fd1 "data-0"
++EXPECT "" cat $B0/${V0}0/a
++EXPECT "data-0" cat $B0/${V0}1/a
++EXPECT "data-0" cat $B0/${V0}2/a
++
++TEST fd_close $fd1
+ 
+ # Kill one brick and take lock on the fd and do a write.
+ TEST kill_brick $V0 $H0 $B0/${V0}0
+-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0
++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_child_up_status_meta $M0 $V0-replicate-0 0
+ TEST fd1=`fd_available`
+ TEST fd_open $fd1 'rw' $M0/a
+ 
+@@ -34,7 +65,7 @@ TEST fd_write $fd1 "data-1"
+ # should still succeed as there were no quorum disconnects.
+ TEST $CLI volume start $V0 force
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}0
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M0 $V0-replicate-0 0
+ TEST fd_write $fd1 "data-2"
+ EXPECT "" cat $B0/${V0}0/a
+ EXPECT "data-2" cat $B0/${V0}1/a
+@@ -42,9 +73,6 @@ EXPECT "data-2" cat $B0/${V0}2/a
+ 
+ # Check there is no fd opened on the 1st brick by checking for the gfid inside
+ # /proc/pid-of-brick/fd/ directory
+-gfid_a=$(gf_get_gfid_xattr $B0/${V0}0/a)
+-gfid_str_a=$(gf_gfid_xattr_to_str $gfid_a)
+-
+ EXPECT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+ EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
+ EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+@@ -59,7 +87,7 @@ EXPECT "^2$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+ # Kill 2nd brick and try writing to the file. The write should fail due to
+ # quorum failure.
+ TEST kill_brick $V0 $H0 $B0/${V0}1
+-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 1
++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_child_up_status_meta $M0 $V0-replicate-0 1
+ TEST ! fd_write $fd1 "data-3"
+ TEST ! fd_cat $fd1
+ 
+@@ -67,7 +95,7 @@ TEST ! fd_cat $fd1
+ # which were down previously, will return EBADFD now.
+ TEST $CLI volume start $V0 force
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}1
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M0 $V0-replicate-0 1
+ TEST ! fd_write $fd1 "data-4"
+ TEST ! fd_cat $fd1
+ EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+@@ -79,9 +107,9 @@ EXPECT "^2$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+ EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0
+ TEST $CLI volume heal $V0 enable
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
+ 
+ TEST $CLI volume heal $V0
+ EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+@@ -103,7 +131,7 @@ TEST ! fd_write $fd1 "data-5"
+ # Kill the only brick that is having lock and try taking lock on another client
+ # which should succeed.
+ TEST kill_brick $V0 $H0 $B0/${V0}2
+-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 2
++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_child_up_status_meta $M0 $V0-replicate-0 2
+ TEST flock -x $fd2
+ TEST fd_write $fd2 "data-6"
+ EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+@@ -114,17 +142,17 @@ EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
+ # fail and operations on the 2nd fd should succeed.
+ TEST $CLI volume start $V0 force
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}2
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 2
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M1 $V0-replicate-0 2
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M0 $V0-replicate-0 2
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M1 $V0-replicate-0 2
+ EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+ EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
+-EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
++EXPECT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+ TEST ! fd_write $fd1 "data-7"
+ 
+ TEST ! fd_cat $fd1
+ EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+ EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
+-EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
++EXPECT "^0" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+ TEST fd_cat $fd2
+ 
+ # Close both the fds which will release the locks and then re-open and take lock
+@@ -159,9 +187,9 @@ EXPECT_WITHIN $REOPEN_TIMEOUT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0
+ # Heal the volume
+ TEST $CLI volume heal $V0 enable
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
+ 
+ TEST $CLI volume heal $V0
+ EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+@@ -169,7 +197,7 @@ TEST $CLI volume heal $V0 disable
+ 
+ # Kill one brick and open a fd.
+ TEST kill_brick $V0 $H0 $B0/${V0}0
+-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0
++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_child_up_status_meta $M0 $V0-replicate-0 0
+ TEST fd1=`fd_available`
+ TEST fd_open $fd1 'rw' $M0/a
+ 
+@@ -182,7 +210,7 @@ EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+ # any of the bricks.
+ TEST $CLI volume start $V0 force
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}0
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M0 $V0-replicate-0 0
+ TEST fd_write $fd1 "data-10"
+ EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+ 
+@@ -193,7 +221,7 @@ TEST fd_close $fd1
+ 
+ # Kill one brick, open and take lock on a fd.
+ TEST kill_brick $V0 $H0 $B0/${V0}0
+-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0
++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_child_up_status_meta $M0 $V0-replicate-0 0
+ TEST fd1=`fd_available`
+ TEST fd_open $fd1 'rw' $M0/a
+ TEST flock -x $fd1
+@@ -204,7 +232,7 @@ EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+ 
+ # Kill & restart another brick so that it will return EBADFD
+ TEST kill_brick $V0 $H0 $B0/${V0}1
+-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" brick_up_status $V0 $H0 $B0/${V0}1
++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" brick_up_status $V0 $H0 $B0/${V0}1
+ 
+ # Restart the bricks and then write. Now fd should not get re-opened since lock
+ # is still held on one brick and write should also fail as there is no quorum.
+@@ -212,8 +240,8 @@ EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" brick_up_status $V0 $H0 $B0/${V0}1
+ TEST $CLI volume start $V0 force
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}0
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}1
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M0 $V0-replicate-0 0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M0 $V0-replicate-0 1
+ TEST ! fd_write $fd1 "data-11"
+ EXPECT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+ EXPECT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
+diff --git a/xlators/protocol/client/src/client-handshake.c b/xlators/protocol/client/src/client-handshake.c
+index a12472b..20e03d8 100644
+--- a/xlators/protocol/client/src/client-handshake.c
++++ b/xlators/protocol/client/src/client-handshake.c
+@@ -911,7 +911,7 @@ client_post_handshake(call_frame_t *frame, xlator_t *this)
+         list_for_each_entry_safe(fdctx, tmp, &conf->saved_fds, sfd_pos)
+         {
+             if (fdctx->remote_fd != -1 ||
+-                (!list_empty(&fdctx->lock_list) && conf->strict_locks))
++                (!fdctx_lock_lists_empty(fdctx) && conf->strict_locks))
+                 continue;
+ 
+             fdctx->reopen_done = client_child_up_reopen_done;
+diff --git a/xlators/protocol/client/src/client-helpers.c b/xlators/protocol/client/src/client-helpers.c
+index a80f303..b4a7294 100644
+--- a/xlators/protocol/client/src/client-helpers.c
++++ b/xlators/protocol/client/src/client-helpers.c
+@@ -15,6 +15,15 @@
+ #include <glusterfs/compat-errno.h>
+ #include <glusterfs/common-utils.h>
+ 
++gf_boolean_t
++fdctx_lock_lists_empty(clnt_fd_ctx_t *fdctx)
++{
++    if (list_empty(&fdctx->lock_list) && fd_lk_ctx_empty(fdctx->lk_ctx))
++        return _gf_true;
++
++    return _gf_false;
++}
++
+ int
+ client_fd_lk_list_empty(fd_lk_ctx_t *lk_ctx, gf_boolean_t try_lock)
+ {
+@@ -441,7 +450,7 @@ client_get_remote_fd(xlator_t *this, fd_t *fd, int flags, int64_t *remote_fd,
+                 *remote_fd = fdctx->remote_fd;
+             }
+ 
+-            locks_involved = !list_empty(&fdctx->lock_list);
++            locks_involved = !fdctx_lock_lists_empty(fdctx);
+         }
+     }
+     pthread_spin_unlock(&conf->fd_lock);
+diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c
+index 35a5340..6df2ed1 100644
+--- a/xlators/protocol/client/src/client.c
++++ b/xlators/protocol/client/src/client.c
+@@ -881,7 +881,7 @@ client_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+         pthread_spin_lock(&conf->fd_lock);
+         {
+             fdctx = this_fd_get_ctx(fd, this);
+-            if (fdctx && !list_empty(&fdctx->lock_list)) {
++            if (fdctx && !fdctx_lock_lists_empty(fdctx)) {
+                 ret = -1;
+                 op_errno = EBADFD;
+             }
+diff --git a/xlators/protocol/client/src/client.h b/xlators/protocol/client/src/client.h
+index f952aea..799fe6e 100644
+--- a/xlators/protocol/client/src/client.h
++++ b/xlators/protocol/client/src/client.h
+@@ -535,4 +535,7 @@ client_add_lock_for_recovery(fd_t *fd, struct gf_flock *flock,
+ int
+ client_is_setlk(int32_t cmd);
+ 
++gf_boolean_t
++fdctx_lock_lists_empty(clnt_fd_ctx_t *fdctx);
++
+ #endif /* !_CLIENT_H */
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0587-Update-rfc.sh-to-rhgs-3.5.6.patch b/SOURCES/0587-Update-rfc.sh-to-rhgs-3.5.6.patch
new file mode 100644
index 0000000..420a4cf
--- /dev/null
+++ b/SOURCES/0587-Update-rfc.sh-to-rhgs-3.5.6.patch
@@ -0,0 +1,26 @@
+From f72780b560ea8efe1508aa9ddc574e6dc066bf9a Mon Sep 17 00:00:00 2001
+From: Csaba Henk <chenk@redhat.com>
+Date: Wed, 29 Sep 2021 10:44:37 +0200
+Subject: [PATCH 587/610] Update rfc.sh to rhgs-3.5.6
+
+Signed-off-by: Csaba Henk <chenk@redhat.com>
+---
+ rfc.sh | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/rfc.sh b/rfc.sh
+index daeff32..67798cb 100755
+--- a/rfc.sh
++++ b/rfc.sh
+@@ -18,7 +18,7 @@ done
+ shift $((OPTIND-1))
+ 
+ 
+-branch="rhgs-3.5.5";
++branch="rhgs-3.5.6";
+ 
+ set_hooks_commit_msg()
+ {
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0588-locks-Fix-null-gfid-in-lock-contention-notifications.patch b/SOURCES/0588-locks-Fix-null-gfid-in-lock-contention-notifications.patch
new file mode 100644
index 0000000..1e6c488
--- /dev/null
+++ b/SOURCES/0588-locks-Fix-null-gfid-in-lock-contention-notifications.patch
@@ -0,0 +1,388 @@
+From e3813685237dbdf8dc7cf28726fff2caf2288706 Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <xhernandez@redhat.com>
+Date: Mon, 19 Jul 2021 15:37:02 +0200
+Subject: [PATCH 588/610] locks: Fix null gfid in lock contention notifications
+
+This patch fixes 3 problems:
+
+First problem:
+
+After commit c0bd592e, the pl_inode_t object was also created in the
+cbk of lookup requests. Lookup requests are a bit different than any
+other request because the inode received may not be completely
+initialized. In particular, inode->gfid may be null.
+
+This caused that the gfid stored in the pl_inode_t object was null in
+some cases. This gfid is used mostly for logs, but also to send lock
+contention notifications. This meant that some notifications could be
+sent with a null gfid, making impossible for the client xlator to
+correctly identify the contending inode, so the lock was not released
+immediately when eager-lock was also enabled.
+
+Second problem:
+
+The feature introduced by c0bd592e needed to track the number of
+hardlinks of each inode to detect when it was deleted. However it
+was done using the 'get-link-count' special xattr on lookup, while
+posix only implements it for unlink and rename.
+
+Also, the number of hardlinks was not incremented for mkdir, mknod,
+rename, ..., so it didn't work correctly for directories.
+
+Third problem:
+
+When the last hardlink of an open file is deleted, all locks will be
+denied with ESTALE error, but that's not correct. Access to the open
+fd must succeed.
+
+The first problem is fixed by avoiding creating pl_inode_t objects
+during lookup. Second and third problems are fixed by completely
+ignoring if the file has been deleted or not. Even if we grant a
+lock on a non-existing file, the next operation done by the client
+inside the lock will return the correct error, which should be enough.
+
+Upstream patch:
+> Upstream-patch-link: https://github.com/gluster/glusterfs/pull/2553
+> Fixes: #2551
+> Change-Id: Ic73e82f6b725b838c1600b6a128ea36a75f13253
+> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+
+BUG: 1962972
+Change-Id: Ic73e82f6b725b838c1600b6a128ea36a75f13253
+Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/279192
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/bugs/locks/issue-2551.t       |  58 ++++++++++++++++++
+ xlators/features/locks/src/common.c |  31 +++-------
+ xlators/features/locks/src/locks.h  |   2 -
+ xlators/features/locks/src/posix.c  | 118 +++---------------------------------
+ 4 files changed, 74 insertions(+), 135 deletions(-)
+ create mode 100644 tests/bugs/locks/issue-2551.t
+
+diff --git a/tests/bugs/locks/issue-2551.t b/tests/bugs/locks/issue-2551.t
+new file mode 100644
+index 0000000..a32af02
+--- /dev/null
++++ b/tests/bugs/locks/issue-2551.t
+@@ -0,0 +1,58 @@
++#!/bin/bash
++
++. $(dirname $0)/../../include.rc
++. $(dirname $0)/../../volume.rc
++
++function check_time() {
++    local max="${1}"
++    local start="$(date +"%s")"
++
++    shift
++
++    if "${@}"; then
++        if [[ $(($(date +"%s") - ${start})) -lt ${max} ]]; then
++            return 0
++        fi
++    fi
++
++    return 1
++}
++
++cleanup
++
++TEST glusterd
++TEST pidof glusterd
++TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/brick{0..2}
++TEST $CLI volume set $V0 disperse.eager-lock on
++TEST $CLI volume set $V0 disperse.eager-lock-timeout 30
++TEST $CLI volume set $V0 features.locks-notify-contention on
++TEST $CLI volume set $V0 performance.write-behind off
++TEST $CLI volume set $V0 performance.open-behind off
++TEST $CLI volume set $V0 performance.quick-read off
++
++TEST $CLI volume start $V0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/brick0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/brick1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/brick2
++
++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0 $M0
++
++TEST mkdir $M0/dir
++TEST dd if=/dev/zero of=$M0/dir/test bs=4k count=1
++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
++
++TEST $CLI volume stop $V0
++TEST $CLI volume start $V0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/brick0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/brick1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/brick2
++
++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0 $M0
++
++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M1
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0 $M1
++
++TEST dd if=/dev/zero of=$M0/dir/test bs=4k count=1 conv=notrunc
++TEST check_time 5 dd if=/dev/zero of=$M1/dir/test bs=4k count=1 conv=notrunc
+diff --git a/xlators/features/locks/src/common.c b/xlators/features/locks/src/common.c
+index cddbfa6..5403086 100644
+--- a/xlators/features/locks/src/common.c
++++ b/xlators/features/locks/src/common.c
+@@ -468,9 +468,7 @@ pl_inode_get(xlator_t *this, inode_t *inode, pl_local_t *local)
+         pl_inode->check_mlock_info = _gf_true;
+         pl_inode->mlock_enforced = _gf_false;
+ 
+-        /* -2 means never looked up. -1 means something went wrong and link
+-         * tracking is disabled. */
+-        pl_inode->links = -2;
++        pl_inode->remove_running = 0;
+ 
+         ret = __inode_ctx_put(inode, this, (uint64_t)(long)(pl_inode));
+         if (ret) {
+@@ -1403,11 +1401,6 @@ pl_inode_remove_prepare(xlator_t *xl, call_frame_t *frame, loc_t *loc,
+ 
+     pthread_mutex_lock(&pl_inode->mutex);
+ 
+-    if (pl_inode->removed) {
+-        error = ESTALE;
+-        goto unlock;
+-    }
+-
+     if (pl_inode_has_owners(xl, frame->root->client, pl_inode, &now, contend)) {
+         error = -1;
+         /* We skip the unlock here because the caller must create a stub when
+@@ -1420,7 +1413,6 @@ pl_inode_remove_prepare(xlator_t *xl, call_frame_t *frame, loc_t *loc,
+     pl_inode->is_locked = _gf_true;
+     pl_inode->remove_running++;
+ 
+-unlock:
+     pthread_mutex_unlock(&pl_inode->mutex);
+ 
+ done:
+@@ -1490,20 +1482,18 @@ pl_inode_remove_cbk(xlator_t *xl, pl_inode_t *pl_inode, int32_t error)
+ 
+     pthread_mutex_lock(&pl_inode->mutex);
+ 
+-    if (error == 0) {
+-        if (pl_inode->links >= 0) {
+-            pl_inode->links--;
+-        }
+-        if (pl_inode->links == 0) {
+-            pl_inode->removed = _gf_true;
+-        }
+-    }
+-
+     pl_inode->remove_running--;
+ 
+     if ((pl_inode->remove_running == 0) && list_empty(&pl_inode->waiting)) {
+         pl_inode->is_locked = _gf_false;
+ 
++        /* At this point it's possible that the inode has been deleted, but
++         * there could be open fd's still referencing it, so we can't prevent
++         * pending locks from being granted. If the file has really been
++         * deleted, whatever the client does once the lock is granted will
++         * fail with the appropriate error, so we don't need to worry about
++         * it here. */
++
+         list_for_each_entry(dom, &pl_inode->dom_list, inode_list)
+         {
+             __grant_blocked_inode_locks(xl, pl_inode, &granted, dom, &now,
+@@ -1555,11 +1545,6 @@ pl_inode_remove_inodelk(pl_inode_t *pl_inode, pl_inode_lock_t *lock)
+     pl_dom_list_t *dom;
+     pl_inode_lock_t *ilock;
+ 
+-    /* If the inode has been deleted, we won't allow any lock. */
+-    if (pl_inode->removed) {
+-        return -ESTALE;
+-    }
+-
+     /* We only synchronize with locks made for regular operations coming from
+      * the user. Locks done for internal purposes are hard to control and could
+      * lead to long delays or deadlocks quite easily. */
+diff --git a/xlators/features/locks/src/locks.h b/xlators/features/locks/src/locks.h
+index 6666feb..2406dcd 100644
+--- a/xlators/features/locks/src/locks.h
++++ b/xlators/features/locks/src/locks.h
+@@ -202,10 +202,8 @@ struct __pl_inode {
+     int fop_wind_count;
+     pthread_cond_t check_fop_wind_count;
+ 
+-    int32_t links;           /* Number of hard links the inode has. */
+     uint32_t remove_running; /* Number of remove operations running. */
+     gf_boolean_t is_locked;  /* Regular locks will be blocked. */
+-    gf_boolean_t removed;    /* The inode has been deleted. */
+ };
+ typedef struct __pl_inode pl_inode_t;
+ 
+diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c
+index 22ef5b8..d5effef 100644
+--- a/xlators/features/locks/src/posix.c
++++ b/xlators/features/locks/src/posix.c
+@@ -2975,104 +2975,24 @@ out:
+     return ret;
+ }
+ 
+-static int32_t
+-pl_request_link_count(dict_t **pxdata)
+-{
+-    dict_t *xdata;
+-
+-    xdata = *pxdata;
+-    if (xdata == NULL) {
+-        xdata = dict_new();
+-        if (xdata == NULL) {
+-            return ENOMEM;
+-        }
+-    } else {
+-        dict_ref(xdata);
+-    }
+-
+-    if (dict_set_uint32(xdata, GET_LINK_COUNT, 0) != 0) {
+-        dict_unref(xdata);
+-        return ENOMEM;
+-    }
+-
+-    *pxdata = xdata;
+-
+-    return 0;
+-}
+-
+-static int32_t
+-pl_check_link_count(dict_t *xdata)
+-{
+-    int32_t count;
+-
+-    /* In case we are unable to read the link count from xdata, we take a
+-     * conservative approach and return -2, which will prevent the inode from
+-     * being considered deleted. In fact it will cause link tracking for this
+-     * inode to be disabled completely to avoid races. */
+-
+-    if (xdata == NULL) {
+-        return -2;
+-    }
+-
+-    if (dict_get_int32(xdata, GET_LINK_COUNT, &count) != 0) {
+-        return -2;
+-    }
+-
+-    return count;
+-}
+-
+ int32_t
+ pl_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+               int32_t op_errno, inode_t *inode, struct iatt *buf, dict_t *xdata,
+               struct iatt *postparent)
+ {
+-    pl_inode_t *pl_inode;
+-
+-    if (op_ret >= 0) {
+-        pl_inode = pl_inode_get(this, inode, NULL);
+-        if (pl_inode == NULL) {
+-            PL_STACK_UNWIND(lookup, xdata, frame, -1, ENOMEM, NULL, NULL, NULL,
+-                            NULL);
+-            return 0;
+-        }
+-
+-        pthread_mutex_lock(&pl_inode->mutex);
+-
+-        /* We only update the link count if we previously didn't know it.
+-         * Doing it always can lead to races since lookup is not executed
+-         * atomically most of the times. */
+-        if (pl_inode->links == -2) {
+-            pl_inode->links = pl_check_link_count(xdata);
+-            if (buf->ia_type == IA_IFDIR) {
+-                /* Directories have at least 2 links. To avoid special handling
+-                 * for directories, we simply decrement the value here to make
+-                 * them equivalent to regular files. */
+-                pl_inode->links--;
+-            }
+-        }
+-
+-        pthread_mutex_unlock(&pl_inode->mutex);
+-    }
+-
+     PL_STACK_UNWIND(lookup, xdata, frame, op_ret, op_errno, inode, buf, xdata,
+                     postparent);
++
+     return 0;
+ }
+ 
+ int32_t
+ pl_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+ {
+-    int32_t error;
++    PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
++    STACK_WIND(frame, pl_lookup_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->lookup, loc, xdata);
+ 
+-    error = pl_request_link_count(&xdata);
+-    if (error == 0) {
+-        PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
+-        STACK_WIND(frame, pl_lookup_cbk, FIRST_CHILD(this),
+-                   FIRST_CHILD(this)->fops->lookup, loc, xdata);
+-        dict_unref(xdata);
+-    } else {
+-        STACK_UNWIND_STRICT(lookup, frame, -1, error, NULL, NULL, NULL, NULL);
+-    }
+     return 0;
+ }
+ 
+@@ -3881,9 +3801,7 @@ unlock:
+             __dump_posixlks(pl_inode);
+         }
+ 
+-        gf_proc_dump_write("links", "%d", pl_inode->links);
+         gf_proc_dump_write("removes_pending", "%u", pl_inode->remove_running);
+-        gf_proc_dump_write("removed", "%u", pl_inode->removed);
+     }
+     pthread_mutex_unlock(&pl_inode->mutex);
+ 
+@@ -4508,21 +4426,9 @@ pl_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+             int32_t op_errno, inode_t *inode, struct iatt *buf,
+             struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+ {
+-    pl_inode_t *pl_inode = (pl_inode_t *)cookie;
+-
+-    if (op_ret >= 0) {
+-        pthread_mutex_lock(&pl_inode->mutex);
+-
+-        /* TODO: can happen pl_inode->links == 0 ? */
+-        if (pl_inode->links >= 0) {
+-            pl_inode->links++;
+-        }
+-
+-        pthread_mutex_unlock(&pl_inode->mutex);
+-    }
+-
+     PL_STACK_UNWIND_FOR_CLIENT(link, xdata, frame, op_ret, op_errno, inode, buf,
+                                preparent, postparent, xdata);
++
+     return 0;
+ }
+ 
+@@ -4530,18 +4436,10 @@ int
+ pl_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+         dict_t *xdata)
+ {
+-    pl_inode_t *pl_inode;
+-
+-    pl_inode = pl_inode_get(this, oldloc->inode, NULL);
+-    if (pl_inode == NULL) {
+-        STACK_UNWIND_STRICT(link, frame, -1, ENOMEM, NULL, NULL, NULL, NULL,
+-                            NULL);
+-        return 0;
+-    }
+-
+     PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), oldloc, newloc);
+-    STACK_WIND_COOKIE(frame, pl_link_cbk, pl_inode, FIRST_CHILD(this),
+-                      FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
++    STACK_WIND(frame, pl_link_cbk, FIRST_CHILD(this),
++               FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
++
+     return 0;
+ }
+ 
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0589-extras-fix-for-postscript-failure-on-logrotation-of-.patch b/SOURCES/0589-extras-fix-for-postscript-failure-on-logrotation-of-.patch
new file mode 100644
index 0000000..861791f
--- /dev/null
+++ b/SOURCES/0589-extras-fix-for-postscript-failure-on-logrotation-of-.patch
@@ -0,0 +1,63 @@
+From 0bb71e1492b1ad442758399eb8dcb5f087d77f12 Mon Sep 17 00:00:00 2001
+From: Nikhil Ladha <nladha@redhat.com>
+Date: Wed, 28 Apr 2021 02:14:27 +0530
+Subject: [PATCH 589/610] extras: fix for postscript failure on logrotation of
+ snapd logs (#2310)
+
+Issue:
+On executing the logrotate command, the postscript runs as a separate process,
+and when we do a grep for the snapd process it returns the PID of that
+short-term process as well, and executing a kill on that throws the error.
+To check a similar error could be seen if we replace the killall for bricks
+log rotation with a for loop on PIDs.
+
+Fix:
+Use the killall command on the list of snapd processes instead of
+using the kill command to individually kill them.
+
+>Fixes: #2360
+>Change-Id: I1ad6e3e4d74128706e71900d02e715635294ff72
+>Signed-off-by: nik-redhat <nladha@redhat.com>
+
+Upstream patch: https://github.com/gluster/glusterfs/pull/2310
+BUG: 1668303
+
+Change-Id: I59910fc3660e11e131b1aa813848c2e19cbffefd
+Signed-off-by: nik-redhat <nladha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/279533
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ extras/glusterfs-logrotate | 19 +++++++++++++++++++
+ 1 file changed, 19 insertions(+)
+
+diff --git a/extras/glusterfs-logrotate b/extras/glusterfs-logrotate
+index 75f700e..2b9028b 100644
+--- a/extras/glusterfs-logrotate
++++ b/extras/glusterfs-logrotate
+@@ -45,3 +45,22 @@
+   compress
+   delaycompress
+ }
++
++# Rotate snapd log
++/var/log/glusterfs/snaps/*/*.log {
++    sharedscripts
++    weekly
++    maxsize 10M
++    minsize 100k
++
++    # 6 months of logs are good enough
++    rotate 26
++
++    missingok
++    compress
++    delaycompress
++    notifempty
++    postrotate
++    /usr/bin/killall -HUP `pgrep -f "glusterfs.*snapd"` > /dev/null 2>&1 || true
++    endscript
++}
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0590-cluster-afr-Don-t-check-for-stale-entry-index.patch b/SOURCES/0590-cluster-afr-Don-t-check-for-stale-entry-index.patch
new file mode 100644
index 0000000..c7ff40a
--- /dev/null
+++ b/SOURCES/0590-cluster-afr-Don-t-check-for-stale-entry-index.patch
@@ -0,0 +1,128 @@
+From 87138f86b8cb98d1c9d1a4c9a2393e7978d20b1d Mon Sep 17 00:00:00 2001
+From: karthik-us <ksubrahm@redhat.com>
+Date: Tue, 5 Oct 2021 12:33:01 +0530
+Subject: [PATCH 590/610] cluster/afr: Don't check for stale entry-index
+
+Problem:
+In every entry index heal there is a check to see if the
+index is stale or not.
+    1. If a file is created when the brick is down this
+will lead to an extra index lookup because the name is not stale.
+    2. If a file is deleted when the brick is down this will also lead to
+      and extra index lookup because the name is not stale.
+    3. If a file is created and deleted when the brick is down then the
+      index is stale and this will save entry-heal i.e. 2 entrylks and 2 lookups
+
+Since 1, 2 happen significantly more than 3, this is a bad tradeoff.
+
+Fix:
+Let stale index be removed as part of normal entry heal detecting 'the
+name is already deleted' code path.
+
+> Upstream patch: https://github.com/gluster/glusterfs/pull/2612
+> fixes: gluster#2611
+> Change-Id: I29bcc07f2480877a83b30dbd7e2e5631a74df8e8
+> Signed-off-by: Pranith Kumar K <pranith.karampuri@phonepe.com>
+
+BUG: 1994593
+Change-Id: I29bcc07f2480877a83b30dbd7e2e5631a74df8e8
+Signed-off-by: karthik-us <ksubrahm@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/279606
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/afr/src/afr-self-heal-entry.c | 46 +++++++--------------------
+ 1 file changed, 11 insertions(+), 35 deletions(-)
+
+diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
+index a17dd93..14b7417 100644
+--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
++++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
+@@ -933,37 +933,8 @@ afr_selfheal_entry_granular_dirent(xlator_t *subvol, gf_dirent_t *entry,
+                                    loc_t *parent, void *data)
+ {
+     int ret = 0;
+-    loc_t loc = {
+-        0,
+-    };
+-    struct iatt iatt = {
+-        0,
+-    };
+     afr_granular_esh_args_t *args = data;
+ 
+-    /* Look up the actual inode associated with entry. If the lookup returns
+-     * ESTALE or ENOENT, then it means we have a stale index. Remove it.
+-     * This is analogous to the check in afr_shd_index_heal() except that
+-     * here it is achieved through LOOKUP and in afr_shd_index_heal() through
+-     * a GETXATTR.
+-     */
+-
+-    loc.inode = inode_new(args->xl->itable);
+-    loc.parent = inode_ref(args->heal_fd->inode);
+-    gf_uuid_copy(loc.pargfid, loc.parent->gfid);
+-    loc.name = entry->d_name;
+-
+-    ret = syncop_lookup(args->xl, &loc, &iatt, NULL, NULL, NULL);
+-    if ((ret == -ENOENT) || (ret == -ESTALE)) {
+-        /* The name indices under the pgfid index dir are guaranteed
+-         * to be regular files. Hence the hardcoding.
+-         */
+-        afr_shd_entry_purge(subvol, parent->inode, entry->d_name, IA_IFREG);
+-        ret = 0;
+-        goto out;
+-    }
+-    /* TBD: afr_shd_zero_xattrop? */
+-
+     ret = afr_selfheal_entry_dirent(args->frame, args->xl, args->heal_fd,
+                                     entry->d_name, parent->inode, subvol,
+                                     _gf_false);
+@@ -974,8 +945,6 @@ afr_selfheal_entry_granular_dirent(xlator_t *subvol, gf_dirent_t *entry,
+     if (ret == -1)
+         args->mismatch = _gf_true;
+ 
+-out:
+-    loc_wipe(&loc);
+     return ret;
+ }
+ 
+@@ -1050,7 +1019,9 @@ afr_selfheal_entry_do(call_frame_t *frame, xlator_t *this, fd_t *fd, int source,
+     local = frame->local;
+ 
+     gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_SELF_HEAL_INFO,
+-           "performing entry selfheal on %s", uuid_utoa(fd->inode->gfid));
++           "performing %s entry selfheal on %s",
++           (local->need_full_crawl ? "full" : "granular"),
++           uuid_utoa(fd->inode->gfid));
+ 
+     for (i = 0; i < priv->child_count; i++) {
+         /* Expunge */
+@@ -1112,6 +1083,7 @@ __afr_selfheal_entry(call_frame_t *frame, xlator_t *this, fd_t *fd,
+     afr_local_t *local = NULL;
+     afr_private_t *priv = NULL;
+     gf_boolean_t did_sh = _gf_true;
++    char *heal_type = "granular entry";
+ 
+     priv = this->private;
+     local = frame->local;
+@@ -1194,11 +1166,15 @@ postop_unlock:
+     afr_selfheal_unentrylk(frame, this, fd->inode, this->name, NULL,
+                            postop_lock, NULL);
+ out:
+-    if (did_sh)
+-        afr_log_selfheal(fd->inode->gfid, this, ret, "entry", source, sources,
++    if (did_sh) {
++        if (local->need_full_crawl) {
++            heal_type = "full entry";
++        }
++        afr_log_selfheal(fd->inode->gfid, this, ret, heal_type, source, sources,
+                          healed_sinks);
+-    else
++    } else {
+         ret = 1;
++    }
+ 
+     if (locked_replies)
+         afr_replies_wipe(locked_replies, priv->child_count);
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0591-afr-check-for-valid-iatt.patch b/SOURCES/0591-afr-check-for-valid-iatt.patch
new file mode 100644
index 0000000..8f1e48e
--- /dev/null
+++ b/SOURCES/0591-afr-check-for-valid-iatt.patch
@@ -0,0 +1,44 @@
+From 19460ebc988795eeabaeb8e25d6eba9a3cf2864b Mon Sep 17 00:00:00 2001
+From: karthik-us <ksubrahm@redhat.com>
+Date: Mon, 4 Oct 2021 12:44:21 +0530
+Subject: [PATCH 591/610] afr: check for valid iatt
+
+Problem:
+If the entry being processed by afr_shd_anon_inode_cleaner() is no
+longer present, gfid lookup fails with ENOENT on all bricks and iatt
+will never be assigned, causing a crash due to null dereference.
+
+Fix:
+Add a null-check for iatt.
+
+> Upstream patch: https://github.com/gluster/glusterfs/pull/2660
+> Fixes: gluster#2659
+> Change-Id: I6abfc8063677861ce9388ca4efdf491ec956dc74
+> Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+
+BUG: 1995029
+Change-Id: I6abfc8063677861ce9388ca4efdf491ec956dc74
+Signed-off-by: karthik-us <ksubrahm@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/279529
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/afr/src/afr-self-heald.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
+index 18aed93..bc720cf 100644
+--- a/xlators/cluster/afr/src/afr-self-heald.c
++++ b/xlators/cluster/afr/src/afr-self-heald.c
+@@ -870,7 +870,7 @@ afr_shd_anon_inode_cleaner(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
+     }
+ 
+     /*Inode is deleted from subvol*/
+-    if (count == 1 || (iatt->ia_type != IA_IFDIR && multiple_links)) {
++    if (count == 1 || (iatt && iatt->ia_type != IA_IFDIR && multiple_links)) {
+         gf_msg(healer->this->name, GF_LOG_WARNING, 0,
+                AFR_MSG_EXPUNGING_FILE_OR_DIR, "expunging %s %s/%s on %s", type,
+                priv->anon_inode_name, entry->d_name, subvol->name);
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0592-md-cache-fix-integer-signedness-mismatch.patch b/SOURCES/0592-md-cache-fix-integer-signedness-mismatch.patch
new file mode 100644
index 0000000..94cfe88
--- /dev/null
+++ b/SOURCES/0592-md-cache-fix-integer-signedness-mismatch.patch
@@ -0,0 +1,119 @@
+From be3448ed5d9d59752cff4df8325ee67eb7d41531 Mon Sep 17 00:00:00 2001
+From: Xavi Hernandez <xhernandez@redhat.com>
+Date: Mon, 19 Jul 2021 06:56:18 +0200
+Subject: [PATCH 592/610] md-cache: fix integer signedness mismatch
+
+md-cache uses a mechanism based on a generation number to detect
+modifications made by other clients to the entries and invalidate
+the cached data.
+
+This generation number is a 32 bit integer. When it overflows,
+special management is done to avoid problems. This overflow condition
+is tracked with a single bit.
+
+For many fops, when they are received, the overflow bit and the
+current generation number are recorded in a single 64-bit value
+which is used later in the cbk.
+
+This is the problematic function:
+
+    uint64_t
+    __mdc_get_generation(xlator_t *this, struct md_cache *mdc)
+    {
+        uint64_t gen = 0, rollover;
+        struct mdc_conf *conf = NULL;
+
+        conf = this->private;
+
+        gen = GF_ATOMIC_INC(conf->generation);
+        if (gen == 0) {
+            gf_log("MDC", GF_LOG_NOTICE, "%p Reset 1", mdc);
+            mdc->gen_rollover = !mdc->gen_rollover;
+            gen = GF_ATOMIC_INC(conf->generation);
+            mdc->ia_time = 0;
+            mdc->generation = 0;
+            mdc->invalidation_time = gen - 1;
+        }
+
+        rollover = mdc->gen_rollover;
+        gen |= (rollover << 32);
+        return gen;
+    }
+
+'conf->generation' is declared as an atomic signed 32-bit integer,
+and 'gen' is an unsigned 64-bit value. When 'gen' is assigned from
+a signed int, the sign bit is extended to fill the high 32 bits of
+'gen'. If the counter has overflown the maximum signed positive
+value, it will become negative (sign bit = 1).
+
+In this case, when 'rollover' is later combined with 'gen', all the
+high bits remain at '1'.
+
+This value is used later in 'mdc_inode_iatt_set_validate' during
+callback processing. The overflow condition and generation numbers
+from when the operation was received are recovered this way:
+
+    rollover = incident_time >> 32;
+    incident_time = (incident_time & 0xffffffff);
+
+('incident_time' is the saved value from '__mdc_get_generation').
+
+So here rollover will be 0xffffffff, when it's expected to be 0
+or 1 only. When this is compared later with the cached overflow
+bit, it doesn't match, which prevents updating the cached info.
+
+This is bad in general, but it's even worse when an entry is not
+cached and 'rollover' is 0xffffffff the first time. When md-cache
+doesn't have cached data it assumes it's everything 0. This causes
+a mismatch, which sends an invalidation request to the kernel, but
+since the 'rollover' doesn't match, the cached data is not updated.
+So the next time the cached data is checked, it will also send an
+invalidation to the kernel, indefinitely.
+
+This patch fixes two things:
+
+1. The 'generation' field is made unsigned to avoid sign extension.
+2. Invalidation requests are only sent if we already had valid cached
+   data. Otherwise it doesn't make sense to send an invalidation.
+
+Upstream patch:
+> Upstream-patch-link: https://github.com/gluster/glusterfs/pull/2619
+> Fixes: #2617
+> Change-Id: Ie40e68288cf143e1bc1a40f46da98f51bb2d6864
+> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+
+BUG: 1904137
+Change-Id: Ie40e68288cf143e1bc1a40f46da98f51bb2d6864
+Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/279188
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/performance/md-cache/src/md-cache.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/xlators/performance/md-cache/src/md-cache.c b/xlators/performance/md-cache/src/md-cache.c
+index bbbee3b..e0256d6 100644
+--- a/xlators/performance/md-cache/src/md-cache.c
++++ b/xlators/performance/md-cache/src/md-cache.c
+@@ -79,7 +79,7 @@ struct mdc_conf {
+     gf_boolean_t cache_statfs;
+     struct mdc_statfs_cache statfs_cache;
+     char *mdc_xattr_str;
+-    gf_atomic_int32_t generation;
++    gf_atomic_uint32_t generation;
+ };
+ 
+ struct mdc_local;
+@@ -537,7 +537,7 @@ mdc_inode_iatt_set_validate(xlator_t *this, inode_t *inode, struct iatt *prebuf,
+             (iatt->ia_mtime_nsec != mdc->md_mtime_nsec) ||
+             (iatt->ia_ctime != mdc->md_ctime) ||
+             (iatt->ia_ctime_nsec != mdc->md_ctime_nsec)) {
+-            if (conf->global_invalidation &&
++            if (conf->global_invalidation && mdc->valid &&
+                 (!prebuf || (prebuf->ia_mtime != mdc->md_mtime) ||
+                  (prebuf->ia_mtime_nsec != mdc->md_mtime_nsec) ||
+                  (prebuf->ia_ctime != mdc->md_ctime) ||
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0593-dht-explicit-null-dereference.patch b/SOURCES/0593-dht-explicit-null-dereference.patch
new file mode 100644
index 0000000..4ad9eea
--- /dev/null
+++ b/SOURCES/0593-dht-explicit-null-dereference.patch
@@ -0,0 +1,58 @@
+From 76c9faf5c750428e5eb69462b82ee0c12cbdabc0 Mon Sep 17 00:00:00 2001
+From: nik-redhat <nladha@redhat.com>
+Date: Fri, 25 Sep 2020 18:39:51 +0530
+Subject: [PATCH 593/610] dht: explicit null dereference
+
+Added a null check for uuid_list_copy, to avoid
+null dereference in strtok_r() in case of strdup()
+failure.
+
+CID: 1325612
+CID: 1274223
+
+>Updates: #1060
+
+>Change-Id: I641a5068cd76d7b2ed92eccf39e7f97d6f7b2480
+>Signed-off-by: nik-redhat <nladha@redhat.com>
+
+Upstream link: https://review.gluster.org/c/glusterfs/+/25046
+BUG: 1997447
+
+Change-Id: I576b4ce610948bdb84eb30377a684c54df718bdc
+Signed-off-by: nik-redhat <nladha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/280063
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/dht/src/dht-common.c | 2 ++
+ xlators/cluster/dht/src/dht-shared.c | 2 ++
+ 2 files changed, 4 insertions(+)
+
+diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
+index edfc6e7..e6a16ff 100644
+--- a/xlators/cluster/dht/src/dht-common.c
++++ b/xlators/cluster/dht/src/dht-common.c
+@@ -4296,6 +4296,8 @@ dht_find_local_subvol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+         index = conf->local_subvols_cnt;
+ 
+         uuid_list_copy = gf_strdup(uuid_list);
++        if (!uuid_list_copy)
++            goto unlock;
+ 
+         for (uuid_str = strtok_r(uuid_list, " ", &saveptr); uuid_str;
+              uuid_str = next_uuid_str) {
+diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c
+index 58e3339..cca272a 100644
+--- a/xlators/cluster/dht/src/dht-shared.c
++++ b/xlators/cluster/dht/src/dht-shared.c
+@@ -567,6 +567,8 @@ gf_defrag_pattern_list_fill(xlator_t *this, gf_defrag_info_t *defrag,
+     pattern_str = strtok_r(data, ",", &tmp_str);
+     while (pattern_str) {
+         dup_str = gf_strdup(pattern_str);
++        if (!dup_str)
++            goto out;
+         pattern_list = GF_CALLOC(1, sizeof(gf_defrag_pattern_list_t), 1);
+         if (!pattern_list) {
+             goto out;
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0594-glusterd-resource-leaks.patch b/SOURCES/0594-glusterd-resource-leaks.patch
new file mode 100644
index 0000000..ccc2f3b
--- /dev/null
+++ b/SOURCES/0594-glusterd-resource-leaks.patch
@@ -0,0 +1,52 @@
+From 663df92f9b4b9f35ae10f84487494829987e2f58 Mon Sep 17 00:00:00 2001
+From: nik-redhat <nladha@redhat.com>
+Date: Fri, 25 Sep 2020 17:56:19 +0530
+Subject: [PATCH 594/610] glusterd: resource leaks
+
+Issue:
+iobref was not freed before exiting the function.
+
+Fix:
+Modified the code to free iobref before exiting.
+
+CID: 1430107
+>Updates: #1060
+
+>Change-Id: I89351b3aa645792eb8dda6292d1e559057b02d8b
+>Signed-off-by: nik-redhat <nladha@redhat.com>
+
+Upstream link: https://review.gluster.org/c/glusterfs/+/25042
+BUG: 1997447
+
+Change-Id: Iea56afca015a7c0f15ab32f490ea27f5ea323a07
+Signed-off-by: nik-redhat <nladha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/280066
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-utils.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
+index 6d40be5..c037933 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
+@@ -6042,7 +6042,6 @@ send_attach_req(xlator_t *this, struct rpc_clnt *rpc, char *path,
+     GF_ATOMIC_INC(conf->blockers);
+     ret = rpc_clnt_submit(rpc, &gd_brick_prog, op, cbkfn, &iov, 1, NULL, 0,
+                           iobref, frame, NULL, 0, NULL, 0, NULL);
+-    return ret;
+ 
+ free_iobref:
+     iobref_unref(iobref);
+@@ -6051,7 +6050,7 @@ maybe_free_iobuf:
+         iobuf_unref(iobuf);
+     }
+ err:
+-    return -1;
++    return ret;
+ }
+ 
+ extern size_t
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0595-glusterd-use-after-free-coverity-issue.patch b/SOURCES/0595-glusterd-use-after-free-coverity-issue.patch
new file mode 100644
index 0000000..7430838
--- /dev/null
+++ b/SOURCES/0595-glusterd-use-after-free-coverity-issue.patch
@@ -0,0 +1,51 @@
+From 025718f1734655c411475ea338cee1659d96763e Mon Sep 17 00:00:00 2001
+From: nik-redhat <nladha@redhat.com>
+Date: Thu, 3 Sep 2020 15:42:45 +0530
+Subject: [PATCH 595/610] glusterd: use after free (coverity issue)
+
+Issue:
+dict_unref is called on the same dict again,
+in the out label of the code, which causes the
+use after free issue.
+
+Fix:
+Set the dict to NULL after unref, to avoid
+use after free issue.
+
+CID: 1430127
+
+>Updates: #1060
+
+>Change-Id: Ide9a5cbc5f496705c671e72b0260da6d4c06f16d
+>Signed-off-by: nik-redhat <nladha@redhat.com>
+
+Upstream link: https://review.gluster.org/c/glusterfs/+/24946
+BUG: 1997447
+
+Change-Id: Id1e58cd6226b9329ad49bd5b75ee96a3a5ec5ab7
+Signed-off-by: nik-redhat <nladha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/280067
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
+index 386eed2..b0fa490 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
+@@ -2039,8 +2039,9 @@ glusterd_update_snaps_synctask(void *opaque)
+                        "Failed to remove snap %s", snap->snapname);
+                 goto out;
+             }
+-            if (dict)
+-                dict_unref(dict);
++
++            dict_unref(dict);
++            dict = NULL;
+         }
+         snprintf(buf, sizeof(buf), "%s.accept_peer_data", prefix);
+         ret = dict_get_int32(peer_data, buf, &val);
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0596-locks-null-dereference.patch b/SOURCES/0596-locks-null-dereference.patch
new file mode 100644
index 0000000..4ad016f
--- /dev/null
+++ b/SOURCES/0596-locks-null-dereference.patch
@@ -0,0 +1,43 @@
+From 099fcac6fecef6fc367d8fcae8442195f3f174db Mon Sep 17 00:00:00 2001
+From: nik-redhat <nladha@redhat.com>
+Date: Fri, 25 Sep 2020 18:19:39 +0530
+Subject: [PATCH 596/610] locks: null dereference
+
+Added a null check before executing the strtok_r()
+to avoid null dereference in case of strdup() failure.
+
+CID: 1407938
+>Updates: #1060
+
+>Change-Id: Iec6e72ae8cb54f6d0a287615c43756325b2026ec
+>Signed-off-by: nik-redhat <nladha@redhat.com>
+
+Upstream link: https://review.gluster.org/c/glusterfs/+/25045
+BUG: 1997447
+
+Change-Id: I47e6e2402badaf4103607b4164f19142a99a2f71
+Signed-off-by: nik-redhat <nladha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/280065
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/features/locks/src/posix.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c
+index d5effef..03c4907 100644
+--- a/xlators/features/locks/src/posix.c
++++ b/xlators/features/locks/src/posix.c
+@@ -494,6 +494,9 @@ pl_inodelk_xattr_fill_multiple(dict_t *this, char *key, data_t *value,
+     char *save_ptr = NULL;
+ 
+     tmp_key = gf_strdup(key);
++    if (!tmp_key)
++        return -1;
++
+     strtok_r(tmp_key, ":", &save_ptr);
+     if (!*save_ptr) {
+         gf_msg(THIS->name, GF_LOG_ERROR, 0, EINVAL,
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0597-glusterd-memory-deallocated-twice.patch b/SOURCES/0597-glusterd-memory-deallocated-twice.patch
new file mode 100644
index 0000000..7e2c49f
--- /dev/null
+++ b/SOURCES/0597-glusterd-memory-deallocated-twice.patch
@@ -0,0 +1,163 @@
+From 59c05230c0df58765e30553c66bbcc0c9965d362 Mon Sep 17 00:00:00 2001
+From: nik-redhat <nladha@redhat.com>
+Date: Tue, 11 Aug 2020 23:12:26 +0530
+Subject: [PATCH 597/610] glusterd: memory deallocated twice
+
+Issue:
+If the the pointer tmptier is destroyed in the function
+code it still it checks for the same in the out label.
+And tries to destroy the same pointer again.
+
+Fix:
+So, instead of passing the ptr by value, if we
+pass it by reference then, on making the ptr in the
+function the value will persist, in the calling
+function and next time when the gf_store_iter_destory()
+is called it won't try to free the ptr again.
+
+CID: 1430122
+
+>Updates: #1060
+
+>Change-Id: I019cea8e301c7cc87be792c03b58722fc96f04ef
+>Signed-off-by: nik-redhat <nladha@redhat.com>
+
+Upstream link: https://review.gluster.org/c/glusterfs/+/24855
+BUG: 1997447
+
+Change-Id: Ib403efd08d47a69d25f291ae61c9cbfcaaa05da8
+Signed-off-by: nik-redhat <nladha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/280076
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/glusterfs/store.h         |  2 +-
+ libglusterfs/src/store.c                   | 12 +++++++-----
+ xlators/mgmt/glusterd/src/glusterd-store.c | 16 ++++++++--------
+ 3 files changed, 16 insertions(+), 14 deletions(-)
+
+diff --git a/libglusterfs/src/glusterfs/store.h b/libglusterfs/src/glusterfs/store.h
+index 68a20ad..76af2df 100644
+--- a/libglusterfs/src/glusterfs/store.h
++++ b/libglusterfs/src/glusterfs/store.h
+@@ -93,7 +93,7 @@ int32_t
+ gf_store_iter_get_matching(gf_store_iter_t *iter, char *key, char **value);
+ 
+ int32_t
+-gf_store_iter_destroy(gf_store_iter_t *iter);
++gf_store_iter_destroy(gf_store_iter_t **iter);
+ 
+ char *
+ gf_store_strerror(gf_store_op_errno_t op_errno);
+diff --git a/libglusterfs/src/store.c b/libglusterfs/src/store.c
+index 3af627a..e4931bf 100644
+--- a/libglusterfs/src/store.c
++++ b/libglusterfs/src/store.c
+@@ -606,23 +606,25 @@ out:
+ }
+ 
+ int32_t
+-gf_store_iter_destroy(gf_store_iter_t *iter)
++gf_store_iter_destroy(gf_store_iter_t **iter)
+ {
+     int32_t ret = -1;
+ 
+-    if (!iter)
++    if (!(*iter))
+         return 0;
+ 
+     /* gf_store_iter_new will not return a valid iter object with iter->file
+      * being NULL*/
+-    ret = fclose(iter->file);
++    ret = fclose((*iter)->file);
+     if (ret)
+         gf_msg("", GF_LOG_ERROR, errno, LG_MSG_FILE_OP_FAILED,
+                "Unable"
+                " to close file: %s, ret: %d",
+-               iter->filepath, ret);
++               (*iter)->filepath, ret);
++
++    GF_FREE(*iter);
++    *iter = NULL;
+ 
+-    GF_FREE(iter);
+     return ret;
+ }
+ 
+diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c
+index a8651d8..e027575 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-store.c
++++ b/xlators/mgmt/glusterd/src/glusterd-store.c
+@@ -2576,7 +2576,7 @@ glusterd_store_retrieve_snapd(glusterd_volinfo_t *volinfo)
+     ret = 0;
+ 
+ out:
+-    if (gf_store_iter_destroy(iter)) {
++    if (gf_store_iter_destroy(&iter)) {
+         gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL,
+                "Failed to destroy store iter");
+         ret = -1;
+@@ -2895,13 +2895,13 @@ glusterd_store_retrieve_bricks(glusterd_volinfo_t *volinfo)
+     ret = 0;
+ 
+ out:
+-    if (gf_store_iter_destroy(tmpiter)) {
++    if (gf_store_iter_destroy(&tmpiter)) {
+         gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL,
+                "Failed to destroy store iter");
+         ret = -1;
+     }
+ 
+-    if (gf_store_iter_destroy(iter)) {
++    if (gf_store_iter_destroy(&iter)) {
+         gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL,
+                "Failed to destroy store iter");
+         ret = -1;
+@@ -3067,7 +3067,7 @@ glusterd_store_retrieve_node_state(glusterd_volinfo_t *volinfo)
+     ret = 0;
+ 
+ out:
+-    if (gf_store_iter_destroy(iter)) {
++    if (gf_store_iter_destroy(&iter)) {
+         gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL,
+                "Failed to destroy store iter");
+         ret = -1;
+@@ -3379,7 +3379,7 @@ glusterd_store_update_volinfo(glusterd_volinfo_t *volinfo)
+     ret = 0;
+ 
+ out:
+-    if (gf_store_iter_destroy(iter)) {
++    if (gf_store_iter_destroy(&iter)) {
+         gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL,
+                "Failed to destroy store iter");
+         ret = -1;
+@@ -3574,7 +3574,7 @@ glusterd_store_retrieve_options(xlator_t *this)
+         goto out;
+     ret = 0;
+ out:
+-    (void)gf_store_iter_destroy(iter);
++    (void)gf_store_iter_destroy(&iter);
+     gf_store_handle_destroy(shandle);
+     return ret;
+ }
+@@ -4026,7 +4026,7 @@ glusterd_store_update_snap(glusterd_snap_t *snap)
+     ret = 0;
+ 
+ out:
+-    if (gf_store_iter_destroy(iter)) {
++    if (gf_store_iter_destroy(&iter)) {
+         gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL,
+                "Failed to destroy store iter");
+         ret = -1;
+@@ -4774,7 +4774,7 @@ glusterd_store_retrieve_peers(xlator_t *this)
+         is_ok = _gf_true;
+ 
+     next:
+-        (void)gf_store_iter_destroy(iter);
++        (void)gf_store_iter_destroy(&iter);
+ 
+         if (!is_ok) {
+             gf_log(this->name, GF_LOG_WARNING,
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0598-glusterd-null-dereference.patch b/SOURCES/0598-glusterd-null-dereference.patch
new file mode 100644
index 0000000..fac1b8f
--- /dev/null
+++ b/SOURCES/0598-glusterd-null-dereference.patch
@@ -0,0 +1,51 @@
+From 84aaaded4e958a10c7492233c053e3c681f2d575 Mon Sep 17 00:00:00 2001
+From: nik-redhat <nladha@redhat.com>
+Date: Thu, 2 Jul 2020 18:10:32 +0530
+Subject: [PATCH 598/610] glusterd: null dereference
+
+Issue:
+There has been either an explicit null
+dereference or a dereference after null
+check in some cases.
+
+Fix:
+Added the proper condition for null check
+and fixed null derefencing.
+
+CID: 1430106 : Dereference after null check
+CID: 1430120 : Explicit null dereferenced
+CID: 1430132 : Dereference after null check
+CID: 1430134 : Dereference after null check
+
+>Change-Id: I7e795cf9f7146a633097c26a766f16b159881fa3
+>Updates: #1060
+>Signed-off-by: nik-redhat <nladha@redhat.com>
+
+Upstream link: https://review.gluster.org/c/glusterfs/+/24664
+BUG: 1997447
+
+Change-Id: I2b2632c93094d0e7b9fbd65a2ca2b0eaf6212d79
+Signed-off-by: nik-redhat <nladha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/280083
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-syncop.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c
+index 05c9e11..f1807cd 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-syncop.c
++++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c
+@@ -1797,7 +1797,7 @@ gd_brick_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
+     pending_node = NULL;
+     ret = 0;
+ out:
+-    if (pending_node)
++    if (pending_node && pending_node->node)
+         glusterd_pending_node_put_rpc(pending_node);
+ 
+     if (rsp_dict)
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0599-afr-null-dereference-nagative-value.patch b/SOURCES/0599-afr-null-dereference-nagative-value.patch
new file mode 100644
index 0000000..7d59cc7
--- /dev/null
+++ b/SOURCES/0599-afr-null-dereference-nagative-value.patch
@@ -0,0 +1,59 @@
+From 4186f81596a481a5c0c5a707fc9b2358ee8f49f0 Mon Sep 17 00:00:00 2001
+From: nik-redhat <nladha@redhat.com>
+Date: Fri, 3 Jul 2020 17:18:33 +0530
+Subject: [PATCH 599/610] afr: null dereference & nagative value
+
+Added a check for NULL before dereferencing
+the object as it may be NULL in few cases
+inside the funtion. Also, added a check for
+the negative value of gfid_idx.
+
+CID: 1430140
+CID: 1430145
+
+>Change-Id: Ib7d23459b48bbc471dbcccab6d20572261882d11
+>Updates: #1060
+>Signed-off-by: nik-redhat <nladha@redhat.com>
+
+Upstream link: https://review.gluster.org/c/glusterfs/+/24671
+BUG: 1997447
+
+Change-Id: I7e705a106d97001b67f5cde8589413c0c24ee507
+Signed-off-by: nik-redhat <nladha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/280085
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/afr/src/afr-self-heal-common.c | 2 +-
+ xlators/cluster/afr/src/afr-self-heal-name.c   | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
+index 0954d2c..cbd5117 100644
+--- a/xlators/cluster/afr/src/afr-self-heal-common.c
++++ b/xlators/cluster/afr/src/afr-self-heal-common.c
+@@ -140,7 +140,7 @@ heal:
+         }
+     }
+ out:
+-    if (gfid_idx && (*gfid_idx == -1) && (ret == 0)) {
++    if (gfid_idx && (*gfid_idx == -1) && (ret == 0) && local) {
+         ret = -afr_final_errno(local, priv);
+     }
+     loc_wipe(&loc);
+diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c
+index 9ec2066..c5ab8d7 100644
+--- a/xlators/cluster/afr/src/afr-self-heal-name.c
++++ b/xlators/cluster/afr/src/afr-self-heal-name.c
+@@ -353,7 +353,7 @@ __afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent,
+     ret = __afr_selfheal_assign_gfid(this, parent, pargfid, bname, inode,
+                                      replies, gfid, locked_on, source, sources,
+                                      is_gfid_absent, &gfid_idx);
+-    if (ret)
++    if (ret || (gfid_idx < 0))
+         return ret;
+ 
+     ret = __afr_selfheal_name_impunge(frame, this, parent, pargfid, bname,
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0600-dht-xlator-integer-handling-issue.patch b/SOURCES/0600-dht-xlator-integer-handling-issue.patch
new file mode 100644
index 0000000..c3970ac
--- /dev/null
+++ b/SOURCES/0600-dht-xlator-integer-handling-issue.patch
@@ -0,0 +1,161 @@
+From 1cd16553d436fa703f5e18d71c35108d0e179e8b Mon Sep 17 00:00:00 2001
+From: nik-redhat <nladha@redhat.com>
+Date: Thu, 9 Apr 2020 11:36:34 +0530
+Subject: [PATCH 600/610] dht xlator: integer handling issue
+
+Issue: The ret value is passed to the function
+instead of the proper errno value
+
+Fix: Passing the errno generated to
+the log function
+
+CID: 1415824 : Improper use of negative value
+CID: 1420205 : Improper use of negative value
+>Change-Id: Iaa7407ebd03eda46a2c027695e6bf0f598b371b2
+>Updates: #1060
+>Signed-off-by: nik-redhat <nladha@redhat.com>
+
+Upstream link: https://review.gluster.org/c/glusterfs/+/24314
+BUG: 1997447
+
+Change-Id: Ibb7f432dbcc9ffd8dff6be6f984a6705894d6bef
+Signed-off-by: nik-redhat <nladha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/280086
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/dht/src/dht-common.c   | 12 ++++++++----
+ xlators/cluster/dht/src/dht-common.h   |  2 +-
+ xlators/cluster/dht/src/dht-helper.c   |  9 ++++++---
+ xlators/cluster/dht/src/dht-selfheal.c |  8 +++++---
+ 4 files changed, 20 insertions(+), 11 deletions(-)
+
+diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
+index e6a16ff..5eaaa1e 100644
+--- a/xlators/cluster/dht/src/dht-common.c
++++ b/xlators/cluster/dht/src/dht-common.c
+@@ -672,13 +672,14 @@ dht_discover_complete(xlator_t *this, call_frame_t *discover_frame)
+ 
+         if (local->need_xattr_heal && !heal_path) {
+             local->need_xattr_heal = 0;
+-            ret = dht_dir_xattr_heal(this, local);
+-            if (ret)
+-                gf_msg(this->name, GF_LOG_ERROR, ret,
++            ret = dht_dir_xattr_heal(this, local, &op_errno);
++            if (ret) {
++                gf_msg(this->name, GF_LOG_ERROR, op_errno,
+                        DHT_MSG_DIR_XATTR_HEAL_FAILED,
+                        "xattr heal failed for "
+                        "directory  gfid is %s ",
+                        gfid_local);
++            }
+         }
+     }
+ 
+@@ -1205,7 +1206,7 @@ dht_dict_get_array(dict_t *dict, char *key, int32_t value[], int32_t size,
+    to non hashed subvol
+ */
+ int
+-dht_dir_xattr_heal(xlator_t *this, dht_local_t *local)
++dht_dir_xattr_heal(xlator_t *this, dht_local_t *local, int *op_errno)
+ {
+     dht_local_t *copy_local = NULL;
+     call_frame_t *copy = NULL;
+@@ -1217,6 +1218,7 @@ dht_dir_xattr_heal(xlator_t *this, dht_local_t *local)
+                "No gfid exists for path %s "
+                "so healing xattr is not possible",
+                local->loc.path);
++        *op_errno = EIO;
+         goto out;
+     }
+ 
+@@ -1230,6 +1232,7 @@ dht_dir_xattr_heal(xlator_t *this, dht_local_t *local)
+                    "Memory allocation failed "
+                    "for path %s gfid %s ",
+                    local->loc.path, gfid_local);
++            *op_errno = ENOMEM;
+             DHT_STACK_DESTROY(copy);
+         } else {
+             copy_local->stbuf = local->stbuf;
+@@ -1244,6 +1247,7 @@ dht_dir_xattr_heal(xlator_t *this, dht_local_t *local)
+                        "Synctask creation failed to heal xattr "
+                        "for path %s gfid %s ",
+                        local->loc.path, gfid_local);
++                *op_errno = ENOMEM;
+                 DHT_STACK_DESTROY(copy);
+             }
+         }
+diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
+index b856c68..1cb1c0c 100644
+--- a/xlators/cluster/dht/src/dht-common.h
++++ b/xlators/cluster/dht/src/dht-common.h
+@@ -1493,7 +1493,7 @@ dht_dir_set_heal_xattr(xlator_t *this, dht_local_t *local, dict_t *dst,
+                        dict_t *src, int *uret, int *uflag);
+ 
+ int
+-dht_dir_xattr_heal(xlator_t *this, dht_local_t *local);
++dht_dir_xattr_heal(xlator_t *this, dht_local_t *local, int *op_errno);
+ 
+ int32_t
+ dht_dict_get_array(dict_t *dict, char *key, int32_t value[], int32_t size,
+diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c
+index 4c3940a..d3444b3 100644
+--- a/xlators/cluster/dht/src/dht-helper.c
++++ b/xlators/cluster/dht/src/dht-helper.c
+@@ -2105,6 +2105,7 @@ dht_heal_full_path_done(int op_ret, call_frame_t *heal_frame, void *data)
+     dht_local_t *local = NULL;
+     xlator_t *this = NULL;
+     int ret = -1;
++    int op_errno = 0;
+ 
+     local = heal_frame->local;
+     main_frame = local->main_frame;
+@@ -2114,10 +2115,12 @@ dht_heal_full_path_done(int op_ret, call_frame_t *heal_frame, void *data)
+     dht_set_fixed_dir_stat(&local->postparent);
+     if (local->need_xattr_heal) {
+         local->need_xattr_heal = 0;
+-        ret = dht_dir_xattr_heal(this, local);
+-        if (ret)
+-            gf_msg(this->name, GF_LOG_ERROR, ret, DHT_MSG_DIR_XATTR_HEAL_FAILED,
++        ret = dht_dir_xattr_heal(this, local, &op_errno);
++        if (ret) {
++            gf_msg(this->name, GF_LOG_ERROR, op_errno,
++                   DHT_MSG_DIR_XATTR_HEAL_FAILED,
+                    "xattr heal failed for directory  %s ", local->loc.path);
++        }
+     }
+ 
+     DHT_STACK_UNWIND(lookup, main_frame, 0, 0, local->inode, &local->stbuf,
+diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c
+index 8af7301..2da9817 100644
+--- a/xlators/cluster/dht/src/dht-selfheal.c
++++ b/xlators/cluster/dht/src/dht-selfheal.c
+@@ -1471,6 +1471,7 @@ dht_selfheal_dir_mkdir(call_frame_t *frame, loc_t *loc, dht_layout_t *layout,
+ {
+     int missing_dirs = 0;
+     int i = 0;
++    int op_errno = 0;
+     int ret = -1;
+     dht_local_t *local = NULL;
+     xlator_t *this = NULL;
+@@ -1493,13 +1494,14 @@ dht_selfheal_dir_mkdir(call_frame_t *frame, loc_t *loc, dht_layout_t *layout,
+         if (!__is_root_gfid(local->stbuf.ia_gfid)) {
+             if (local->need_xattr_heal) {
+                 local->need_xattr_heal = 0;
+-                ret = dht_dir_xattr_heal(this, local);
+-                if (ret)
+-                    gf_msg(this->name, GF_LOG_ERROR, ret,
++                ret = dht_dir_xattr_heal(this, local, &op_errno);
++                if (ret) {
++                    gf_msg(this->name, GF_LOG_ERROR, op_errno,
+                            DHT_MSG_DIR_XATTR_HEAL_FAILED,
+                            "%s:xattr heal failed for "
+                            "directory (gfid = %s)",
+                            local->loc.path, local->gfid);
++                }
+             } else {
+                 if (!gf_uuid_is_null(local->gfid))
+                     gf_uuid_copy(loc->gfid, local->gfid);
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0601-coverity-resource-leak-2321.patch b/SOURCES/0601-coverity-resource-leak-2321.patch
new file mode 100644
index 0000000..35dc964
--- /dev/null
+++ b/SOURCES/0601-coverity-resource-leak-2321.patch
@@ -0,0 +1,99 @@
+From 6d7049a19029331266f70f68d860bbccef01a35d Mon Sep 17 00:00:00 2001
+From: Nikhil Ladha <nladha@redhat.com>
+Date: Thu, 8 Jul 2021 11:26:54 +0530
+Subject: [PATCH 601/610] coverity: resource leak (#2321)
+
+Issue:
+Variable `arg` is not freed before the function exits,
+and leads to resource leak.
+
+Fix:
+Free the arg variable if the status of function call
+`glusterd_compare_friend_volume` is
+`GLUSTERD_VOL_COMP_UPDATE_REQ`, or if the `glusterd_launch_synctask`
+fails to start the process.
+
+And, added a check for return value on calling
+`glusterd_launch_synctask` function and exit if the
+thread creation fails.
+
+CID: 1401716
+>Updates: #1060
+
+>Change-Id: I4abd621771f88853d8d01e9039cdee2f3d862c4f
+>Signed-off-by: nik-redhat <nladha@redhat.com>
+
+Upstream link: https://github.com/gluster/glusterfs/pull/2321
+BUG: 1997447
+
+Change-Id: Ida81dfcd58c5ef45d3ae036d6bd6b36dc6693538
+Signed-off-by: nik-redhat <nladha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/280090
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-utils.c | 10 +++++++---
+ xlators/mgmt/glusterd/src/glusterd-utils.h |  2 +-
+ 2 files changed, 8 insertions(+), 4 deletions(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
+index c037933..cec9c20 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
+@@ -5371,6 +5371,7 @@ glusterd_compare_friend_data(dict_t *peer_data, dict_t *cmp, int32_t *status,
+ 
+         if (GLUSTERD_VOL_COMP_RJT == *status) {
+             ret = 0;
++            update = _gf_false;
+             goto out;
+         }
+         if (GLUSTERD_VOL_COMP_UPDATE_REQ == *status) {
+@@ -5385,11 +5386,12 @@ glusterd_compare_friend_data(dict_t *peer_data, dict_t *cmp, int32_t *status,
+          * first brick to come up before attaching the subsequent bricks
+          * in case brick multiplexing is enabled
+          */
+-        glusterd_launch_synctask(glusterd_import_friend_volumes_synctask, arg);
++        ret = glusterd_launch_synctask(glusterd_import_friend_volumes_synctask,
++                                       arg);
+     }
+ 
+ out:
+-    if (ret && arg) {
++    if ((ret || !update) && arg) {
+         dict_unref(arg->peer_data);
+         dict_unref(arg->peer_ver_data);
+         GF_FREE(arg);
+@@ -13115,7 +13117,7 @@ gd_default_synctask_cbk(int ret, call_frame_t *frame, void *opaque)
+     return ret;
+ }
+ 
+-void
++int
+ glusterd_launch_synctask(synctask_fn_t fn, void *opaque)
+ {
+     xlator_t *this = NULL;
+@@ -13131,6 +13133,8 @@ glusterd_launch_synctask(synctask_fn_t fn, void *opaque)
+         gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_SPAWN_SVCS_FAIL,
+                "Failed to spawn bricks"
+                " and other volume related services");
++
++    return ret;
+ }
+ 
+ /*
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
+index 4541471..3f4f3b8 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.h
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
+@@ -681,7 +681,7 @@ int32_t
+ glusterd_take_lvm_snapshot(glusterd_brickinfo_t *brickinfo,
+                            char *origin_brick_path);
+ 
+-void
++int
+ glusterd_launch_synctask(synctask_fn_t fn, void *opaque);
+ 
+ int
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0602-coverity-null-dereference-2395.patch b/SOURCES/0602-coverity-null-dereference-2395.patch
new file mode 100644
index 0000000..6edc3aa
--- /dev/null
+++ b/SOURCES/0602-coverity-null-dereference-2395.patch
@@ -0,0 +1,87 @@
+From 2ff83650a5f05e3f06853df6d79d3b18f88dfb23 Mon Sep 17 00:00:00 2001
+From: Nikhil Ladha <nladha@redhat.com>
+Date: Thu, 6 May 2021 10:45:46 +0530
+Subject: [PATCH 602/610] coverity: null dereference (#2395)
+
+Fix:
+Updated the code to make it more readable and fixed
+the NULL dereferencing.
+
+CID: 1234622
+>Updates: #1060
+
+>Change-Id: I05bd203bc46fe84be86398bd664a3485409c3bfe
+>Signed-off-by: nik-redhat <nladha@redhat.com>
+
+Upstream link: https://github.com/gluster/glusterfs/pull/2395
+BUG: 1997447
+
+Change-Id: If39cc85115de673a83b6c97137ea8d1f0f825245
+Signed-off-by: nik-redhat <nladha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/280093
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/dht/src/dht-lock.c | 32 +++++++++++++++-----------------
+ 1 file changed, 15 insertions(+), 17 deletions(-)
+
+diff --git a/xlators/cluster/dht/src/dht-lock.c b/xlators/cluster/dht/src/dht-lock.c
+index f9bac4f..6474dfa 100644
+--- a/xlators/cluster/dht/src/dht-lock.c
++++ b/xlators/cluster/dht/src/dht-lock.c
+@@ -914,37 +914,35 @@ dht_nonblocking_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+     dht_local_t *local = NULL;
+     int lk_index = 0, call_cnt = 0;
+     char gfid[GF_UUID_BUF_SIZE] = {0};
++    dht_ilock_wrap_t *my_layout;
+ 
+     local = frame->local;
+     lk_index = (long)cookie;
+ 
++    my_layout = &(local->lock[0].layout.my_layout);
++
+     if (op_ret == -1) {
+-        local->lock[0].layout.my_layout.op_ret = -1;
+-        local->lock[0].layout.my_layout.op_errno = op_errno;
+-
+-        if (local && local->lock[0].layout.my_layout.locks[lk_index]) {
+-            uuid_utoa_r(local->lock[0]
+-                            .layout.my_layout.locks[lk_index]
+-                            ->loc.inode->gfid,
+-                        gfid);
+-
+-            gf_msg_debug(
+-                this->name, op_errno,
+-                "inodelk failed on gfid: %s "
+-                "subvolume: %s",
+-                gfid,
+-                local->lock[0].layout.my_layout.locks[lk_index]->xl->name);
++        my_layout->op_ret = -1;
++        my_layout->op_errno = op_errno;
++
++        if (my_layout->locks[lk_index]) {
++            uuid_utoa_r(my_layout->locks[lk_index]->loc.inode->gfid, gfid);
++
++            gf_msg_debug(this->name, op_errno,
++                         "inodelk failed on gfid: %s "
++                         "subvolume: %s",
++                         gfid, my_layout->locks[lk_index]->xl->name);
+         }
+ 
+         goto out;
+     }
+ 
+-    local->lock[0].layout.my_layout.locks[lk_index]->locked = _gf_true;
++    my_layout->locks[lk_index]->locked = _gf_true;
+ 
+ out:
+     call_cnt = dht_frame_return(frame);
+     if (is_last_call(call_cnt)) {
+-        if (local->lock[0].layout.my_layout.op_ret < 0) {
++        if (my_layout->op_ret < 0) {
+             dht_inodelk_cleanup(frame);
+             return 0;
+         }
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0603-Coverity-Resource-leak-fix-CID-1356547.patch b/SOURCES/0603-Coverity-Resource-leak-fix-CID-1356547.patch
new file mode 100644
index 0000000..8c6b53b
--- /dev/null
+++ b/SOURCES/0603-Coverity-Resource-leak-fix-CID-1356547.patch
@@ -0,0 +1,51 @@
+From 015e6cac71b0a0c330f1e4792f9d60214b191f45 Mon Sep 17 00:00:00 2001
+From: karthik-us <ksubrahm@redhat.com>
+Date: Thu, 7 Oct 2021 21:07:46 +0530
+Subject: [PATCH 603/610] Coverity: Resource leak fix (CID: 1356547)
+
+Issue:
+In function gf_svc_readdirp() there is a chance that 'local' will be allocated
+memory but not released in the failure path.
+
+Fix:
+Assign 'local' to 'frame->local' immediately after the successful allocation, so
+it will be released by the existing failure path code itself.
+
+> Upstream patch: https://github.com/gluster/glusterfs/pull/2362/
+> Change-Id: I4474dc4d4be5432d169cb7d434728f211054997e
+> Signed-off-by: karthik-us <ksubrahm@redhat.com>
+> Updates: gluster#1060
+
+BUG: 1997447
+Change-Id: I4474dc4d4be5432d169cb7d434728f211054997e
+Signed-off-by: karthik-us <ksubrahm@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/280100
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/features/snapview-client/src/snapview-client.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xlators/features/snapview-client/src/snapview-client.c b/xlators/features/snapview-client/src/snapview-client.c
+index 9c789ae..e97db89 100644
+--- a/xlators/features/snapview-client/src/snapview-client.c
++++ b/xlators/features/snapview-client/src/snapview-client.c
+@@ -2156,6 +2156,7 @@ gf_svc_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+                "failed to allocate local");
+         goto out;
+     }
++    frame->local = local;
+ 
+     /*
+      * This is mainly for samba shares (or windows clients). As part of
+@@ -2184,7 +2185,6 @@ gf_svc_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ 
+     local->subvolume = subvolume;
+     local->fd = fd_ref(fd);
+-    frame->local = local;
+ 
+     STACK_WIND(frame, gf_svc_readdirp_cbk, subvolume, subvolume->fops->readdirp,
+                fd, size, off, xdata);
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0604-Coverity-Fix-dereference-before-null-check-CID-13914.patch b/SOURCES/0604-Coverity-Fix-dereference-before-null-check-CID-13914.patch
new file mode 100644
index 0000000..a680327
--- /dev/null
+++ b/SOURCES/0604-Coverity-Fix-dereference-before-null-check-CID-13914.patch
@@ -0,0 +1,50 @@
+From dee1c932df22ee12fe4568b40e58a475309e62fd Mon Sep 17 00:00:00 2001
+From: karthik-us <ksubrahm@redhat.com>
+Date: Thu, 7 Oct 2021 21:18:49 +0530
+Subject: [PATCH 604/610] Coverity: Fix dereference before null check (CID:
+ 1391415)
+
+Problem:
+In function gf_client_dump_inodes_to_dict() there is a null check for
+a variable which is already dereferenced in the previous line. This
+means that there could be a chance that this variable is null. But it
+is not being validate for null before dereferencing it in the first
+place.
+
+Fix:
+Added null check before dereferencing the variable at the first place.
+
+> Upstream patch: https://github.com/gluster/glusterfs/pull/2369/
+> Change-Id: I988b0e93542782353a8059e33db1522b6a5e55f8
+> Signed-off-by: karthik-us <ksubrahm@redhat.com>
+> Updates: gluster#1060
+
+BUG: 1997447
+Change-Id: I988b0e93542782353a8059e33db1522b6a5e55f8
+Signed-off-by: karthik-us <ksubrahm@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/280103
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ libglusterfs/src/client_t.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/libglusterfs/src/client_t.c b/libglusterfs/src/client_t.c
+index e875c8b..216900a 100644
+--- a/libglusterfs/src/client_t.c
++++ b/libglusterfs/src/client_t.c
+@@ -828,8 +828,9 @@ gf_client_dump_inodes_to_dict(xlator_t *this, dict_t *dict)
+                 clienttable->cliententries[count].next_free)
+                 continue;
+             client = clienttable->cliententries[count].client;
+-            if (!strcmp(client->bound_xl->name, this->name)) {
+-                if (client->bound_xl && client->bound_xl->itable) {
++            if (client->bound_xl &&
++                !strcmp(client->bound_xl->name, this->name)) {
++                if (client->bound_xl->itable) {
+                     /* Presently every brick contains only
+                      * one bound_xl for all connections.
+                      * This will lead to duplicating of
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0605-Coverity-Fix-copy-into-fixed-size-buffer-CID-1325542.patch b/SOURCES/0605-Coverity-Fix-copy-into-fixed-size-buffer-CID-1325542.patch
new file mode 100644
index 0000000..849c959
--- /dev/null
+++ b/SOURCES/0605-Coverity-Fix-copy-into-fixed-size-buffer-CID-1325542.patch
@@ -0,0 +1,53 @@
+From 25fc2530f7ee6d7267e2ccc1b75a47a3ae539dff Mon Sep 17 00:00:00 2001
+From: karthik-us <ksubrahm@redhat.com>
+Date: Thu, 7 Oct 2021 21:29:27 +0530
+Subject: [PATCH 605/610] Coverity: Fix copy into fixed size buffer (CID:
+ 1325542)
+
+Problem:
+In __mnt3_fresh_lookup() mres->resolveloc.path is being copied into
+a fixed size string mres->remainingdir, with strncpy without checking
+the size of the source string. This could lead to string overflow.
+
+Fix:
+Copy only till the destination string length and check whether the
+soruce string overflows. If so log an error message and return.
+
+> Upstream patch: https://github.com/gluster/glusterfs/pull/2474/
+> Change-Id: I26dd0653d2636c667ad4e356d12d3d51956c77c3
+> Signed-off-by: karthik-us <ksubrahm@redhat.com>
+> Updates: gluster#1060
+
+BUG: 1997447
+Change-Id: I26dd0653d2636c667ad4e356d12d3d51956c77c3
+Signed-off-by: karthik-us <ksubrahm@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/280106
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/nfs/server/src/mount3.c | 9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+diff --git a/xlators/nfs/server/src/mount3.c b/xlators/nfs/server/src/mount3.c
+index 734453c..3951b9e 100644
+--- a/xlators/nfs/server/src/mount3.c
++++ b/xlators/nfs/server/src/mount3.c
+@@ -1104,8 +1104,13 @@ __mnt3_fresh_lookup(mnt3_resolve_t *mres)
+ {
+     inode_unlink(mres->resolveloc.inode, mres->resolveloc.parent,
+                  mres->resolveloc.name);
+-    strncpy(mres->remainingdir, mres->resolveloc.path,
+-            strlen(mres->resolveloc.path));
++    if (snprintf(mres->remainingdir, sizeof(mres->remainingdir), "%s",
++                 mres->resolveloc.path) >= sizeof(mres->remainingdir)) {
++        gf_msg(GF_MNT, GF_LOG_ERROR, EFAULT, NFS_MSG_RESOLVE_INODE_FAIL,
++               "Failed to copy resolve path: %s", mres->resolveloc.path);
++        nfs_loc_wipe(&mres->resolveloc);
++        return -EFAULT;
++    }
+     nfs_loc_wipe(&mres->resolveloc);
+     return __mnt3_resolve_subdir(mres);
+ }
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0606-dht-handle-DHT_SUBVOL_STATUS_KEY-in-dht_pt_getxattr-.patch b/SOURCES/0606-dht-handle-DHT_SUBVOL_STATUS_KEY-in-dht_pt_getxattr-.patch
new file mode 100644
index 0000000..05ca17b
--- /dev/null
+++ b/SOURCES/0606-dht-handle-DHT_SUBVOL_STATUS_KEY-in-dht_pt_getxattr-.patch
@@ -0,0 +1,69 @@
+From a6ba95b73469ad81d8c5a27293f8d09cc26928a3 Mon Sep 17 00:00:00 2001
+From: Ravishankar N <ravishankar@redhat.com>
+Date: Fri, 18 Dec 2020 16:28:29 +0530
+Subject: [PATCH 606/610] dht: handle DHT_SUBVOL_STATUS_KEY in dht_pt_getxattr
+ (#1934)
+
+In non distribute volumes (plain replicate, ec), DHT uses pass-through
+FOPs (dht_pt_getxattr) instead of the usual FOPS (dht_getxattr). The
+pass through FOP was not handling the DHT_SUBVOL_STATUS_KEY virtual
+xattr because of which geo-rep session was going into a faulty state.
+Fixing it now.
+
+> updates: #1925
+> Change-Id: I766b5b5c047c954a9957ab78aca680eedef1ff1f
+> Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+
+Upstream patch: https://github.com/gluster/glusterfs/pull/1934
+
+BUG: 2006205
+Change-Id: I766b5b5c047c954a9957ab78aca680eedef1ff1f
+Signed-off-by: Shwetha K Acharya <sacharya@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/280112
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/dht/src/dht-common.c | 24 ++++++++++++++++++++++++
+ 1 file changed, 24 insertions(+)
+
+diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
+index 5eaaa1e..c8980e5 100644
+--- a/xlators/cluster/dht/src/dht-common.c
++++ b/xlators/cluster/dht/src/dht-common.c
+@@ -11584,9 +11584,33 @@ int
+ dht_pt_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+                 const char *key, dict_t *xdata)
+ {
++    int op_errno = -1;
++    dht_local_t *local = NULL;
++
++    VALIDATE_OR_GOTO(frame, err);
++    VALIDATE_OR_GOTO(this, err);
++    VALIDATE_OR_GOTO(loc, err);
++    VALIDATE_OR_GOTO(loc->inode, err);
++    VALIDATE_OR_GOTO(this->private, err);
++
++    local = dht_local_init(frame, loc, NULL, GF_FOP_GETXATTR);
++    if (!local) {
++        op_errno = ENOMEM;
++        goto err;
++    }
++
++    if (key &&
++        strncmp(key, DHT_SUBVOL_STATUS_KEY, SLEN(DHT_SUBVOL_STATUS_KEY)) == 0) {
++        dht_vgetxattr_subvol_status(frame, this, key);
++        return 0;
++    }
++
+     STACK_WIND(frame, dht_pt_getxattr_cbk, FIRST_CHILD(this),
+                FIRST_CHILD(this)->fops->getxattr, loc, key, xdata);
+     return 0;
++err:
++    DHT_STACK_UNWIND(getxattr, frame, -1, op_errno, NULL, NULL);
++    return 0;
+ }
+ 
+ static int
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0607-SELinux-Fix-boolean-management.patch b/SOURCES/0607-SELinux-Fix-boolean-management.patch
new file mode 100644
index 0000000..4a62b03
--- /dev/null
+++ b/SOURCES/0607-SELinux-Fix-boolean-management.patch
@@ -0,0 +1,121 @@
+From 4b65ff0d1a3d70fcf3cfa8ab769135ae12f529d8 Mon Sep 17 00:00:00 2001
+From: nik-redhat <nladha@redhat.com>
+Date: Thu, 7 Oct 2021 22:02:32 +0530
+Subject: [PATCH 607/610] SELinux: Fix boolean management
+
+Remove %triggerun ganesha
+This trigger shouldn't be needed to begin with since removing
+selinux-policy-targeted means that the user is switching SELinux off, or
+is is switching the policy (to "mls" or "minimum"). In either case the
+current boolean setting is not going to be used any more. The last
+option, removal of glusterfs-ganesha, is covered by '%postun ganesha'.
+But more importantly, the trigger is called every time
+selinux-policy-targeted is updated (which can be avoided).
+%triggerun is executed after %triggerin -
+https://docs.fedoraproject.org/en-US/packaging-guidelines/Scriptlets/#ordering
+So when selinux-policy-targeted is updated, the new version is installed
+first triggering `semanage boolean -m ganesha_use_fusefs --on`,
+and then the old version is uninstalled triggering
+`semanage boolean -m ganesha_use_fusefs --off`.
+
+* use selinux_[un]set_booleans instead of "semanage boolean"
+  The macro pair properly manages SELinux stores and doesn't disable the
+  boolean in case it was enabled before ${name}-ganesha was installed.
+
+* Only change booleans when the package is first installed or
+  uninstalled
+Updating ${name}-ganesha would disable the boolean because %postun is
+called after %post (same issue as with the triggers).
+
+Signed-off-by: Vit Mojzis <vmojzis@redhat.com>
+Signed-off-by: Kaleb S. KEITHLEY <kkeithle@redhat.com>
+Change-Id: Ibb926ffbe00c9f000bd740708c0a4b3435ee7871
+PR: https://github.com/gluster/glusterfs/pull/2833
+Issue: https://github.com/gluster/glusterfs/issues/2522
+Resolves: rhbz#1973566
+Resolves: rhbz#1975400
+
+BUG: 1973566
+Change-Id: Idef6cbd6bce35151518d6f76e5b74774e5756fc9
+Signed-off-by: nik-redhat <nladha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/280114
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Kaleb Keithley <kkeithle@redhat.com>
+---
+ glusterfs.spec.in | 34 +++++++++++++++++++++-------------
+ 1 file changed, 21 insertions(+), 13 deletions(-)
+
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index 424f4ab..a9a83b1 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -954,7 +954,10 @@ exit 0
+ %if ( 0%{!?_without_server:1} )
+ %if ( 0%{?fedora} && 0%{?fedora} > 25 || ( 0%{?rhel} && 0%{?rhel} > 6 ) )
+ %post ganesha
+-semanage boolean -m ganesha_use_fusefs --on
++# first install
++if [ $1 -eq 1 ]; then
++  %selinux_set_booleans ganesha_use_fusefs=1
++fi
+ exit 0
+ %endif
+ %endif
+@@ -962,7 +965,9 @@ exit 0
+ %if ( 0%{!?_without_georeplication:1} )
+ %post geo-replication
+ %if ( 0%{?rhel} && 0%{?rhel} >= 8 )
+-%selinux_set_booleans %{selinuxbooleans}
++if [ $1 -eq 1 ]; then
++  %selinux_set_booleans %{selinuxbooleans}
++fi
+ %endif
+ if [ $1 -ge 1 ]; then
+     %systemd_postun_with_restart glusterd
+@@ -1089,29 +1094,32 @@ exit 0
+ %if ( 0%{!?_without_server:1} )
+ %if ( 0%{?fedora} && 0%{?fedora} > 25  || ( 0%{?rhel} && 0%{?rhel} > 6 ) )
+ %postun ganesha
+-semanage boolean -m ganesha_use_fusefs --off
++if [ $1 -eq 0 ]; then
++  # use the value of ganesha_use_fusefs from before glusterfs-ganesha was installed
++  %selinux_unset_booleans ganesha_use_fusefs=1
++fi
+ exit 0
+ %endif
+ %endif
+ 
+-##-----------------------------------------------------------------------------
+-## All %%trigger should be placed here and keep them sorted
+-##
+-%if ( 0%{!?_without_server:1} )
+-%if ( 0%{?fedora} && 0%{?fedora} > 25  || ( 0%{?rhel} && 0%{?rhel} > 6 ) )
+-%trigger ganesha -- selinux-policy-targeted
+-semanage boolean -m ganesha_use_fusefs --on
++%if ( 0%{!?_without_georeplication:1} )
++%postun geo-replication
++%if ( 0%{?rhel} && 0%{?rhel} >= 8 )
++if [ $1 -eq 0 ]; then
++  %selinux_unset_booleans %{selinuxbooleans}
++fi
+ exit 0
+ %endif
+ %endif
+ 
+ ##-----------------------------------------------------------------------------
+-## All %%triggerun should be placed here and keep them sorted
++## All %%trigger should be placed here and keep them sorted
+ ##
+ %if ( 0%{!?_without_server:1} )
+ %if ( 0%{?fedora} && 0%{?fedora} > 25  || ( 0%{?rhel} && 0%{?rhel} > 6 ) )
+-%triggerun ganesha -- selinux-policy-targeted
+-semanage boolean -m ganesha_use_fusefs --off
++# ensure ganesha_use_fusefs is on in case of policy mode switch (eg. mls->targeted)
++%triggerin ganesha -- selinux-policy-targeted
++semanage boolean -m ganesha_use_fusefs --on -S targeted
+ exit 0
+ %endif
+ %endif
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0608-cluster-ec-Track-heal-statistics-in-shd.patch b/SOURCES/0608-cluster-ec-Track-heal-statistics-in-shd.patch
new file mode 100644
index 0000000..b08d7a9
--- /dev/null
+++ b/SOURCES/0608-cluster-ec-Track-heal-statistics-in-shd.patch
@@ -0,0 +1,143 @@
+From d806760f1d4c78a2519b01f1c2d07aba0c533755 Mon Sep 17 00:00:00 2001
+From: Pranith Kumar K <pkarampu@redhat.com>
+Date: Fri, 28 Aug 2020 16:03:54 +0530
+Subject: [PATCH 608/610] cluster/ec: Track heal statistics in shd
+
+With this change we should be able to inspect number of heals
+attempted and completed by each shd.
+
+> Upstream patch: https://review.gluster.org/#/c/glusterfs/+/24926/
+> fixes: #1453
+> Change-Id: I10f5d86efcc0a8e4d648da808751d37725682c39
+> Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
+
+BUG: 1853631
+Change-Id: I10f5d86efcc0a8e4d648da808751d37725682c39
+Signed-off-by: Sheetal Pamecha <spamecha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/280208
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/ec/src/ec-heald.c | 49 ++++++++++++++++++++++++++++++++++++++-
+ xlators/cluster/ec/src/ec-types.h |  5 ++++
+ xlators/cluster/ec/src/ec.c       |  6 +++++
+ 3 files changed, 59 insertions(+), 1 deletion(-)
+
+diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c
+index 4f4b6aa..cd4d3ad 100644
+--- a/xlators/cluster/ec/src/ec-heald.c
++++ b/xlators/cluster/ec/src/ec-heald.c
+@@ -152,15 +152,58 @@ ec_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name)
+     return ret;
+ }
+ 
++static gf_boolean_t
++ec_is_heal_completed(char *status)
++{
++    char *bad_pos = NULL;
++    char *zero_pos = NULL;
++
++    if (!status) {
++        return _gf_false;
++    }
++
++    /*Logic:
++     * Status will be of the form Good: <binary>, Bad: <binary>
++     * If heal completes, if we do strchr for '0' it should be present after
++     * 'Bad:' i.e. strRchr for ':'
++     * */
++
++    zero_pos = strchr(status, '0');
++    bad_pos = strrchr(status, ':');
++    if (!zero_pos || !bad_pos) {
++        /*malformed status*/
++        return _gf_false;
++    }
++
++    if (zero_pos > bad_pos) {
++        return _gf_true;
++    }
++
++    return _gf_false;
++}
++
+ int
+ ec_shd_selfheal(struct subvol_healer *healer, int child, loc_t *loc,
+                 gf_boolean_t full)
+ {
+     dict_t *xdata = NULL;
++    dict_t *dict = NULL;
+     uint32_t count;
+     int32_t ret;
++    char *heal_status = NULL;
++    ec_t *ec = healer->this->private;
++
++    GF_ATOMIC_INC(ec->stats.shd.attempted);
++    ret = syncop_getxattr(healer->this, loc, &dict, EC_XATTR_HEAL, NULL,
++                          &xdata);
++    if (ret == 0) {
++        if (dict && (dict_get_str(dict, EC_XATTR_HEAL, &heal_status) == 0)) {
++            if (ec_is_heal_completed(heal_status)) {
++                GF_ATOMIC_INC(ec->stats.shd.completed);
++            }
++        }
++    }
+ 
+-    ret = syncop_getxattr(healer->this, loc, NULL, EC_XATTR_HEAL, NULL, &xdata);
+     if (!full && (loc->inode->ia_type == IA_IFDIR)) {
+         /* If we have just healed a directory, it's possible that
+          * other index entries have appeared to be healed. */
+@@ -179,6 +222,10 @@ ec_shd_selfheal(struct subvol_healer *healer, int child, loc_t *loc,
+         dict_unref(xdata);
+     }
+ 
++    if (dict) {
++        dict_unref(dict);
++    }
++
+     return ret;
+ }
+ 
+diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h
+index 700dc39..ef7a7fe 100644
+--- a/xlators/cluster/ec/src/ec-types.h
++++ b/xlators/cluster/ec/src/ec-types.h
+@@ -626,6 +626,11 @@ struct _ec_statistics {
+                                 requests. (Basically memory allocation
+                                 errors). */
+     } stripe_cache;
++    struct {
++        gf_atomic_t attempted; /*Number of heals attempted on
++                                files/directories*/
++        gf_atomic_t completed; /*Number of heals complted on files/directories*/
++    } shd;
+ };
+ 
+ struct _ec {
+diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
+index 047cdd8..24de9e8 100644
+--- a/xlators/cluster/ec/src/ec.c
++++ b/xlators/cluster/ec/src/ec.c
+@@ -649,6 +649,8 @@ ec_statistics_init(ec_t *ec)
+     GF_ATOMIC_INIT(ec->stats.stripe_cache.evicts, 0);
+     GF_ATOMIC_INIT(ec->stats.stripe_cache.allocs, 0);
+     GF_ATOMIC_INIT(ec->stats.stripe_cache.errors, 0);
++    GF_ATOMIC_INIT(ec->stats.shd.attempted, 0);
++    GF_ATOMIC_INIT(ec->stats.shd.completed, 0);
+ }
+ 
+ int32_t
+@@ -1445,6 +1447,10 @@ ec_dump_private(xlator_t *this)
+                        GF_ATOMIC_GET(ec->stats.stripe_cache.allocs));
+     gf_proc_dump_write("errors", "%" GF_PRI_ATOMIC,
+                        GF_ATOMIC_GET(ec->stats.stripe_cache.errors));
++    gf_proc_dump_write("heals-attempted", "%" GF_PRI_ATOMIC,
++                       GF_ATOMIC_GET(ec->stats.shd.attempted));
++    gf_proc_dump_write("heals-completed", "%" GF_PRI_ATOMIC,
++                       GF_ATOMIC_GET(ec->stats.shd.completed));
+ 
+     return 0;
+ }
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0609-feature-shard-wrong-dname-results-in-dentry-not-foun.patch b/SOURCES/0609-feature-shard-wrong-dname-results-in-dentry-not-foun.patch
new file mode 100644
index 0000000..a3290cb
--- /dev/null
+++ b/SOURCES/0609-feature-shard-wrong-dname-results-in-dentry-not-foun.patch
@@ -0,0 +1,43 @@
+From 89cdfb40264c12105a1b4990fa9b45290aa6cef0 Mon Sep 17 00:00:00 2001
+From: Vinayakswami Hariharmath <vharihar@redhat.com>
+Date: Fri, 8 Oct 2021 09:40:41 +0530
+Subject: [PATCH 609/610] feature/shard: wrong dname results in dentry not
+ found error
+
+Due to wrong dname passed to inode_unlink in
+shard_evicted_inode_fsync_cbk() resulting in dentry not found
+error.
+
+This patch addresses the issue.
+
+> upstream patch: https://github.com/gluster/glusterfs/pull/2475
+> Fixes: #2470
+> Change-Id: I6c479980ae3fa7ba558327055a9e5e5c2d2a850f
+> Signed-off-by: Vinayakswami Hariharmath vharihar@redhat.com
+
+BUG: 1911665
+Change-Id: I96aa5f57303b69a08990de039ddeecad7e7ae6af
+Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/280202
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/features/shard/src/shard.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
+index b828ff9..882373f 100644
+--- a/xlators/features/shard/src/shard.c
++++ b/xlators/features/shard/src/shard.c
+@@ -950,7 +950,7 @@ shard_evicted_inode_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+     {
+         __shard_inode_ctx_get(shard_inode, this, &ctx);
+         if ((list_empty(&ctx->to_fsync_list)) && (list_empty(&ctx->ilist))) {
+-            shard_make_block_bname(ctx->block_num, shard_inode->gfid,
++            shard_make_block_bname(ctx->block_num, ctx->base_gfid,
+                                    block_bname, sizeof(block_bname));
+             inode_unlink(shard_inode, priv->dot_shard_inode, block_bname);
+             /* The following unref corresponds to the ref held by
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0610-glusterfs.spec.in-remove-condtionals-from-tar-depend.patch b/SOURCES/0610-glusterfs.spec.in-remove-condtionals-from-tar-depend.patch
new file mode 100644
index 0000000..132da9c
--- /dev/null
+++ b/SOURCES/0610-glusterfs.spec.in-remove-condtionals-from-tar-depend.patch
@@ -0,0 +1,51 @@
+From b3e86a66de224107f6760157a7cb692227e42954 Mon Sep 17 00:00:00 2001
+From: Shwetha Acharya <sacharya@redhat.com>
+Date: Mon, 30 Aug 2021 18:54:15 +0530
+Subject: [PATCH 610/610] glusterfs.spec.in: remove condtionals from tar
+ dependency (#2734)
+
+* glusterfs.spec.in: remove condtionals from tar dependency
+
+The conditional on rhel minor version fails and tar is not
+marked as required.
+
+As there is not any universal macro to specify the
+minor release, removing the conditionals above the
+"Requires: tar" statement
+
+with this change irrespective of rhel 8.3 and
+above, tar will be marked required for geo-rep.
+
+> Change-Id: Id1e3320a0b1a245fc9cd8c7acb09cc119fca18b8
+> Signed-off-by: Shwetha K Acharya <sacharya@redhat.com>
+
+Upstream patch: https://github.com/gluster/glusterfs/pull/2734
+
+BUG: 1901468
+Change-Id: Id1e3320a0b1a245fc9cd8c7acb09cc119fca18b8
+Signed-off-by: Shwetha K Acharya <sacharya@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/280116
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+---
+ glusterfs.spec.in | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index a9a83b1..8b6646f 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -521,9 +521,8 @@ Requires:         python%{_pythonver}-gluster = %{version}-%{release}
+ Requires:         rsync
+ Requires:         util-linux
+ Requires:         %{name}-libs%{?_isa} = %{version}-%{release}
+-%if ( 0%{?rhel} && ( ( 0%{?rhel} == 8 && 0%{?rhel_minor_version} >= 3 ) || 0%{?rhel} >= 9 ) )
+ Requires:         tar
+-%endif
++
+ # required for setting selinux bools
+ %if ( 0%{?rhel} && 0%{?rhel} >= 8 )
+ Requires(post):      policycoreutils-python-utils
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0611-SELinux-Fix-boolean-management-again.patch b/SOURCES/0611-SELinux-Fix-boolean-management-again.patch
new file mode 100644
index 0000000..a5b2612
--- /dev/null
+++ b/SOURCES/0611-SELinux-Fix-boolean-management-again.patch
@@ -0,0 +1,54 @@
+From 5ad4711f40c0e8ab7c196ac1c9025bf78b8b94e0 Mon Sep 17 00:00:00 2001
+From: "Kaleb S. KEITHLEY" <kkeithle@redhat.com>
+Date: Thu, 18 Nov 2021 09:21:56 -0500
+Subject: [PATCH 611/611] SELinux: Fix boolean management, again
+
+When upgrading from a version of the package that does not include
+the previous fix this means the flawed scriptlet is still executed,
+undoing the setting of the boolean.
+
+In order to work the boolean needs to be set in %posttrans. This is
+a temporary change that can (or should) be removed in the next version
+of RHGS, i.e. 3.5.7.
+
+Issue: https://github.com/gluster/glusterfs/issues/2522
+Resolves: rhbz#1973566
+Resolves: rhbz#1975400
+
+Label: DOWNSTREAM ONLY
+
+BUG: 1973566
+Change-Id: Ida39a3ee5e6b4b0d3255bfef95601890afd80709
+Signed-off-by: Kaleb S. KEITHLEY <kkeithle@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/292189
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ glusterfs.spec.in | 11 +++++++++++
+ 1 file changed, 11 insertions(+)
+
+diff --git a/glusterfs.spec.in b/glusterfs.spec.in
+index 8b6646f..87176c9 100644
+--- a/glusterfs.spec.in
++++ b/glusterfs.spec.in
+@@ -1123,6 +1123,17 @@ exit 0
+ %endif
+ %endif
+ 
++%if ( 0%{!?_without_server:1} )
++%if ( ( 0%{?fedora} && 0%{?fedora} > 25 )  || ( 0%{?rhel} && 0%{?rhel} > 6 ) )
++# temporary fix to be removed in the next version (i.e. RHGS 3.5.7). This
++# is only needed when upgrading from the flawed versions (e.g. RHGS 3.5.5
++# and earlier.)
++%posttrans ganesha
++semanage boolean -m ganesha_use_fusefs --on -S targeted
++exit 0
++%endif
++%endif
++
+ ##-----------------------------------------------------------------------------
+ ## All %%files should be placed here and keep them grouped
+ ##
+-- 
+1.8.3.1
+
diff --git a/SPECS/glusterfs.spec b/SPECS/glusterfs.spec
index c4f7f83..8a7701b 100644
--- a/SPECS/glusterfs.spec
+++ b/SPECS/glusterfs.spec
@@ -79,6 +79,11 @@
 # rpmbuild -ta glusterfs-6.0.tar.gz --without rdma
 %{?_without_rdma:%global _without_rdma --disable-ibverbs}
 
+# No RDMA Support on 32-bit ARM
+%ifarch armv7hl
+%global _without_rdma --disable-ibverbs
+%endif
+
 # server
 # if you wish to build rpms without server components, compile like this
 # rpmbuild -ta glusterfs-6.0.tar.gz --without server
@@ -232,7 +237,8 @@ Release:          0.1%{?prereltag:.%{prereltag}}%{?dist}
 %else
 Name:             glusterfs
 Version:          6.0
-Release:          49.1%{?dist}
+Release:          61%{?dist}
+ExcludeArch:      i686
 %endif
 License:          GPLv2 or LGPLv3+
 URL:              http://docs.gluster.org/
@@ -789,7 +795,137 @@ Patch0477: 0477-glusterd-snapshot-Snapshot-prevalidation-failure-not.patch
 Patch0478: 0478-DHT-Fixing-rebalance-failure-on-issuing-stop-command.patch
 Patch0479: 0479-ganesha-ha-revised-regex-exprs-for-status.patch
 Patch0480: 0480-DHT-Rebalance-Ensure-Rebalance-reports-status-only-o.patch
-Patch0481: 0481-RHGS-3.5.3-rebuild-to-ship-with-RHEL.patch
+Patch0481: 0481-Update-rfc.sh-to-rhgs-3.5.4.patch
+Patch0482: 0482-logger-Always-print-errors-in-english.patch
+Patch0483: 0483-afr-more-quorum-checks-in-lookup-and-new-entry-marki.patch
+Patch0484: 0484-glusterd-rebalance-status-displays-stats-as-0-after-.patch
+Patch0485: 0485-cli-rpc-conditional-init-of-global-quota-rpc-1578.patch
+Patch0486: 0486-glusterd-brick-sock-file-deleted-log-error-1560.patch
+Patch0487: 0487-Events-Log-file-not-re-opened-after-logrotate.patch
+Patch0488: 0488-glusterd-afr-enable-granular-entry-heal-by-default.patch
+Patch0489: 0489-glusterd-fix-bug-in-enabling-granular-entry-heal.patch
+Patch0490: 0490-Segmentation-fault-occurs-during-truncate.patch
+Patch0491: 0491-glusterd-mount-directory-getting-truncated-on-mounti.patch
+Patch0492: 0492-afr-lookup-Pass-xattr_req-in-while-doing-a-selfheal-.patch
+Patch0493: 0493-geo-rep-Note-section-is-required-for-ignore_deletes.patch
+Patch0494: 0494-glusterd-start-the-brick-on-a-different-port.patch
+Patch0495: 0495-geo-rep-descriptive-message-when-worker-crashes-due-.patch
+Patch0496: 0496-posix-Use-MALLOC-instead-of-alloca-to-allocate-memor.patch
+Patch0497: 0497-socket-Use-AES128-cipher-in-SSL-if-AES-is-supported-.patch
+Patch0498: 0498-geo-rep-Fix-corner-case-in-rename-on-mkdir-during-hy.patch
+Patch0499: 0499-gfapi-give-appropriate-error-when-size-exceeds.patch
+Patch0500: 0500-features-shard-Convert-shard-block-indices-to-uint64.patch
+Patch0501: 0501-Cli-Removing-old-syntax-of-tier-cmds-from-help-menu.patch
+Patch0502: 0502-dht-fixing-a-permission-update-issue.patch
+Patch0503: 0503-gfapi-Suspend-synctasks-instead-of-blocking-them.patch
+Patch0504: 0504-io-stats-Configure-ios_sample_buf_size-based-on-samp.patch
+Patch0505: 0505-trash-Create-inode_table-only-while-feature-is-enabl.patch
+Patch0506: 0506-posix-Attach-a-posix_spawn_disk_thread-with-glusterf.patch
+Patch0507: 0507-inode-make-critical-section-smaller.patch
+Patch0508: 0508-fuse-fetch-arbitrary-number-of-groups-from-proc-pid-.patch
+Patch0509: 0509-core-configure-optimum-inode-table-hash_size-for-shd.patch
+Patch0510: 0510-glusterd-brick_mux-Optimize-friend-handshake-code-to.patch
+Patch0511: 0511-features-shard-Missing-format-specifier.patch
+Patch0512: 0512-glusterd-shared-storage-mount-fails-in-ipv6-environm.patch
+Patch0513: 0513-afr-mark-pending-xattrs-as-a-part-of-metadata-heal.patch
+Patch0514: 0514-afr-event-gen-changes.patch
+Patch0515: 0515-cluster-afr-Heal-directory-rename-without-rmdir-mkdi.patch
+Patch0516: 0516-afr-return-EIO-for-gfid-split-brains.patch
+Patch0517: 0517-gfapi-glfs_h_creat_open-new-API-to-create-handle-and.patch
+Patch0518: 0518-glusterd-Fix-for-shared-storage-in-ipv6-env.patch
+Patch0519: 0519-glusterfs-events-Fix-incorrect-attribute-access-2002.patch
+Patch0520: 0520-performance-open-behind-seek-fop-should-open_and_res.patch
+Patch0521: 0521-open-behind-fix-missing-fd-reference.patch
+Patch0522: 0522-lcov-improve-line-coverage.patch
+Patch0523: 0523-open-behind-rewrite-of-internal-logic.patch
+Patch0524: 0524-open-behind-fix-call_frame-leak.patch
+Patch0525: 0525-open-behind-implement-create-fop.patch
+Patch0526: 0526-Quota-quota_fsck.py-converting-byte-string-to-string.patch
+Patch0527: 0527-Events-Socket-creation-after-getaddrinfo-and-IPv4-an.patch
+Patch0528: 0528-Extras-Removing-xattr_analysis-script.patch
+Patch0529: 0529-geo-rep-prompt-should-work-for-ignore_deletes.patch
+Patch0530: 0530-gfapi-avoid-crash-while-logging-message.patch
+Patch0531: 0531-Glustereventsd-Default-port-change-2091.patch
+Patch0532: 0532-glusterd-fix-for-starting-brick-on-new-port.patch
+Patch0533: 0533-glusterd-Rebalance-cli-is-not-showing-correct-status.patch
+Patch0534: 0534-glusterd-Resolve-use-after-free-bug-2181.patch
+Patch0535: 0535-multiple-files-use-dict_allocate_and_serialize-where.patch
+Patch0536: 0536-dht-Ongoing-IO-is-failed-during-volume-shrink-operat.patch
+Patch0537: 0537-cluster-afr-Fix-race-in-lockinfo-f-getxattr.patch
+Patch0538: 0538-afr-fix-coverity-issue-introduced-by-90cefde.patch
+Patch0539: 0539-extras-disable-lookup-optimize-in-virt-and-block-gro.patch
+Patch0540: 0540-extras-Disable-write-behind-for-group-samba.patch
+Patch0541: 0541-glusterd-volgen-Add-functionality-to-accept-any-cust.patch
+Patch0542: 0542-xlaotrs-mgmt-Fixing-coverity-issue-1445996.patch
+Patch0543: 0543-glusterd-handle-custom-xlator-failure-cases.patch
+Patch0544: 0544-tests-avoid-empty-paths-in-environment-variables.patch
+Patch0545: 0545-tests-Excluded-tests-for-unsupported-components.patch
+Patch0546: 0546-Update-rfc.sh-to-rhgs-3.5.5.patch
+Patch0547: 0547-perf-write-behind-Clear-frame-local-on-conflict-erro.patch
+Patch0548: 0548-Add-tar-as-dependency-to-geo-rep-rpm-for-RHEL-8.3-an.patch
+Patch0549: 0549-geo-rep-Change-in-attribute-for-getting-function-nam.patch
+Patch0550: 0550-common-ha-stability-fixes-for-ganesha_grace-and-gane.patch
+Patch0551: 0551-common-ha-ensure-shared_storage-is-mounted-before-se.patch
+Patch0552: 0552-cluster-afr-Change-default-self-heal-window-size-to-.patch
+Patch0553: 0553-cluster-ec-Change-self-heal-window-size-to-4MiB-by-d.patch
+Patch0554: 0554-dht-fix-rebalance-of-sparse-files.patch
+Patch0555: 0555-geo-rep-Improve-handling-of-gfid-mismatches.patch
+Patch0556: 0556-dht-don-t-ignore-xdata-in-fgetxattr.patch
+Patch0557: 0557-cluster-dht-Fix-stack-overflow-in-readdir-p.patch
+Patch0558: 0558-afr-fix-directory-entry-count.patch
+Patch0559: 0559-afr-make-fsync-post-op-aware-of-inodelk-count-2273.patch
+Patch0560: 0560-posix-Avoid-dict_del-logs-in-posix_is_layout_stale-w.patch
+Patch0561: 0561-cluster-ec-Inform-failure-when-some-bricks-are-unava.patch
+Patch0562: 0562-shard.c-Fix-formatting.patch
+Patch0563: 0563-features-shard-Use-fd-lookup-post-file-open.patch
+Patch0564: 0564-store.c-glusterd-store.c-remove-sys_stat-calls.patch
+Patch0565: 0565-libglusterfs-coverity-pointer-to-local-outside-the-s.patch
+Patch0566: 0566-enahancement-debug-Option-to-generate-core-dump-with.patch
+Patch0567: 0567-inode-create-inode-outside-locked-region.patch
+Patch0568: 0568-core-tcmu-runner-process-continuous-growing-logs-lru.patch
+Patch0569: 0569-features-shard-optimization-over-shard-lookup-in-cas.patch
+Patch0570: 0570-features-shard-avoid-repeatative-calls-to-gf_uuid_un.patch
+Patch0571: 0571-NetBSD-build-fixes.patch
+Patch0572: 0572-locks-remove-unused-conditional-switch-to-spin_lock-.patch
+Patch0573: 0573-features-shard-unlink-fails-due-to-nospace-to-mknod-.patch
+Patch0574: 0574-features-shard-delay-unlink-of-a-file-that-has-fd_co.patch
+Patch0575: 0575-libglusterfs-add-functions-to-calculate-time-differe.patch
+Patch0576: 0576-rpcsvc-Add-latency-tracking-for-rpc-programs.patch
+Patch0577: 0577-protocol-client-don-t-reopen-fds-on-which-POSIX-lock.patch
+Patch0578: 0578-protocol-client-fallback-to-anonymous-fd-for-fsync.patch
+Patch0579: 0579-cli-changing-rebal-task-ID-to-None-in-case-status-is.patch
+Patch0580: 0580-cluster-dht-suppress-file-migration-error-for-node-n.patch
+Patch0581: 0581-afr-don-t-reopen-fds-on-which-POSIX-locks-are-held.patch
+Patch0582: 0582-protocol-client-Fix-lock-memory-leak.patch
+Patch0583: 0583-protocol-client-Initialize-list-head-to-prevent-NULL.patch
+Patch0584: 0584-dht-fixing-xattr-inconsistency.patch
+Patch0585: 0585-ganesha_ha-ganesha_grace-RA-fails-in-start-and-or-fa.patch
+Patch0586: 0586-protocol-client-Do-not-reopen-fd-post-handshake-if-p.patch
+Patch0587: 0587-Update-rfc.sh-to-rhgs-3.5.6.patch
+Patch0588: 0588-locks-Fix-null-gfid-in-lock-contention-notifications.patch
+Patch0589: 0589-extras-fix-for-postscript-failure-on-logrotation-of-.patch
+Patch0590: 0590-cluster-afr-Don-t-check-for-stale-entry-index.patch
+Patch0591: 0591-afr-check-for-valid-iatt.patch
+Patch0592: 0592-md-cache-fix-integer-signedness-mismatch.patch
+Patch0593: 0593-dht-explicit-null-dereference.patch
+Patch0594: 0594-glusterd-resource-leaks.patch
+Patch0595: 0595-glusterd-use-after-free-coverity-issue.patch
+Patch0596: 0596-locks-null-dereference.patch
+Patch0597: 0597-glusterd-memory-deallocated-twice.patch
+Patch0598: 0598-glusterd-null-dereference.patch
+Patch0599: 0599-afr-null-dereference-nagative-value.patch
+Patch0600: 0600-dht-xlator-integer-handling-issue.patch
+Patch0601: 0601-coverity-resource-leak-2321.patch
+Patch0602: 0602-coverity-null-dereference-2395.patch
+Patch0603: 0603-Coverity-Resource-leak-fix-CID-1356547.patch
+Patch0604: 0604-Coverity-Fix-dereference-before-null-check-CID-13914.patch
+Patch0605: 0605-Coverity-Fix-copy-into-fixed-size-buffer-CID-1325542.patch
+Patch0606: 0606-dht-handle-DHT_SUBVOL_STATUS_KEY-in-dht_pt_getxattr-.patch
+Patch0607: 0607-SELinux-Fix-boolean-management.patch
+Patch0608: 0608-cluster-ec-Track-heal-statistics-in-shd.patch
+Patch0609: 0609-feature-shard-wrong-dname-results-in-dentry-not-foun.patch
+Patch0610: 0610-glusterfs.spec.in-remove-condtionals-from-tar-depend.patch
+Patch0611: 0611-SELinux-Fix-boolean-management-again.patch
 
 %description
 GlusterFS is a distributed file-system capable of scaling to several
@@ -998,6 +1134,8 @@ Requires:         python%{_pythonver}-gluster = %{version}-%{release}
 Requires:         rsync
 Requires:         util-linux
 Requires:         %{name}-libs%{?_isa} = %{version}-%{release}
+Requires:         tar
+
 # required for setting selinux bools
 %if ( 0%{?rhel} && 0%{?rhel} >= 8 )
 Requires(post):      policycoreutils-python-utils
@@ -1501,7 +1639,10 @@ exit 0
 %if ( 0%{!?_without_server:1} )
 %if ( 0%{?fedora} && 0%{?fedora} > 25 || ( 0%{?rhel} && 0%{?rhel} > 6 ) )
 %post ganesha
-semanage boolean -m ganesha_use_fusefs --on
+# first install
+if [ $1 -eq 1 ]; then
+  %selinux_set_booleans ganesha_use_fusefs=1
+fi
 exit 0
 %endif
 %endif
@@ -1509,7 +1650,9 @@ exit 0
 %if ( 0%{!?_without_georeplication:1} )
 %post geo-replication
 %if ( 0%{?rhel} && 0%{?rhel} >= 8 )
-%selinux_set_booleans %{selinuxbooleans}
+if [ $1 -eq 1 ]; then
+  %selinux_set_booleans %{selinuxbooleans}
+fi
 %endif
 if [ $1 -ge 1 ]; then
     %systemd_postun_with_restart glusterd
@@ -1636,7 +1779,20 @@ exit 0
 %if ( 0%{!?_without_server:1} )
 %if ( 0%{?fedora} && 0%{?fedora} > 25  || ( 0%{?rhel} && 0%{?rhel} > 6 ) )
 %postun ganesha
-semanage boolean -m ganesha_use_fusefs --off
+if [ $1 -eq 0 ]; then
+  # use the value of ganesha_use_fusefs from before glusterfs-ganesha was installed
+  %selinux_unset_booleans ganesha_use_fusefs=1
+fi
+exit 0
+%endif
+%endif
+
+%if ( 0%{!?_without_georeplication:1} )
+%postun geo-replication
+%if ( 0%{?rhel} && 0%{?rhel} >= 8 )
+if [ $1 -eq 0 ]; then
+  %selinux_unset_booleans %{selinuxbooleans}
+fi
 exit 0
 %endif
 %endif
@@ -1646,19 +1802,20 @@ exit 0
 ##
 %if ( 0%{!?_without_server:1} )
 %if ( 0%{?fedora} && 0%{?fedora} > 25  || ( 0%{?rhel} && 0%{?rhel} > 6 ) )
-%trigger ganesha -- selinux-policy-targeted
-semanage boolean -m ganesha_use_fusefs --on
+# ensure ganesha_use_fusefs is on in case of policy mode switch (eg. mls->targeted)
+%triggerin ganesha -- selinux-policy-targeted
+semanage boolean -m ganesha_use_fusefs --on -S targeted
 exit 0
 %endif
 %endif
 
-##-----------------------------------------------------------------------------
-## All %%triggerun should be placed here and keep them sorted
-##
 %if ( 0%{!?_without_server:1} )
-%if ( 0%{?fedora} && 0%{?fedora} > 25  || ( 0%{?rhel} && 0%{?rhel} > 6 ) )
-%triggerun ganesha -- selinux-policy-targeted
-semanage boolean -m ganesha_use_fusefs --off
+%if ( ( 0%{?fedora} && 0%{?fedora} > 25 )  || ( 0%{?rhel} && 0%{?rhel} > 6 ) )
+# temporary fix to be removed in the next version (i.e. RHGS 3.5.7). This
+# is only needed when upgrading from the flawed versions (e.g. RHGS 3.5.5
+# and earlier.)
+%posttrans ganesha
+semanage boolean -m ganesha_use_fusefs --on -S targeted
 exit 0
 %endif
 %endif
@@ -1930,7 +2087,6 @@ exit 0
 %if ( 0%{!?_without_server:1} )
 %files server
 %doc extras/clear_xattrs.sh
-%{_datadir}/glusterfs/scripts/xattr_analysis.py*
 %{_datadir}/glusterfs/scripts/quota_fsck.py*
 # sysconf
 %config(noreplace) %{_sysconfdir}/glusterfs
@@ -2533,11 +2689,55 @@ fi
 %endif
 
 %changelog
-* Tue Mar 16 2021 CentOS Sources <bugs@centos.org> - 6.0-49.1.el7.centos
-- remove vendor and/or packager lines
+* Mon Nov 29 2021 Gluster Jenkins <dkhandel+glusterjenkins@redhat.com> - 6.0-61
+- fixes bugs bz#1973566
+
+* Mon Oct 11 2021 Gluster Jenkins <dkhandel+glusterjenkins@redhat.com> - 6.0-60
+- fixes bugs bz#1668303 bz#1853631 bz#1901468 bz#1904137 bz#1911665 
+  bz#1962972 bz#1973566 bz#1994593 bz#1995029 bz#1997447 bz#2006205
+
+* Tue Jul 06 2021 Gluster Jenkins <dkhandel+glusterjenkins@redhat.com> - 6.0-59
+- fixes bugs bz#1689375
+
+* Wed Jun 16 2021 Gluster Jenkins <dkhandel+glusterjenkins@redhat.com> - 6.0-58
+- fixes bugs bz#1945143
+
+* Tue Jun 08 2021 Gluster Jenkins <dkhandel+glusterjenkins@redhat.com> - 6.0-57
+- fixes bugs bz#1600379 bz#1689375 bz#1782428 bz#1798897 bz#1815462 
+  bz#1889966 bz#1891403 bz#1901468 bz#1903911 bz#1908635 bz#1917488 bz#1918018 
+  bz#1919132 bz#1925425 bz#1927411 bz#1927640 bz#1928676 bz#1942816 bz#1943467 
+  bz#1945143 bz#1946171 bz#1957191 bz#1957641
+
+* Thu May 06 2021 Gluster Jenkins <dkhandel+glusterjenkins@redhat.com> - 6.0-56.2
+- fixes bugs bz#1953901
+
+* Thu Apr 22 2021 Gluster Jenkins <dkhandel+glusterjenkins@redhat.com> - 6.0-56.1
+- fixes bugs bz#1927235
+
+* Wed Apr 14 2021 Gluster Jenkins <dkhandel+glusterjenkins@redhat.com> - 6.0-56
+- fixes bugs bz#1948547
+
+* Fri Mar 19 2021 Gluster Jenkins <dkhandel+glusterjenkins@redhat.com> - 6.0-55
+- fixes bugs bz#1939372
+
+* Wed Mar 03 2021 Gluster Jenkins <dkhandel+glusterjenkins@redhat.com> - 6.0-54
+- fixes bugs bz#1832306 bz#1911292 bz#1924044
+
+* Thu Feb 11 2021 Gluster Jenkins <dkhandel+glusterjenkins@redhat.com> - 6.0-53
+- fixes bugs bz#1224906 bz#1691320 bz#1719171 bz#1814744 bz#1865796
+
+* Thu Jan 28 2021 Gluster Jenkins <dkhandel+glusterjenkins@redhat.com> - 6.0-52
+- fixes bugs bz#1600459 bz#1719171 bz#1830713 bz#1856574
+
+* Mon Dec 28 2020 Gluster Jenkins <dkhandel+glusterjenkins@redhat.com> - 6.0-51
+- fixes bugs bz#1640148 bz#1856574 bz#1910119
 
-* Fri Feb 19 2021 Gluster Jenkins <dkhandel+glusterjenkins@redhat.com> - 6.0-49.1
-- fixes bugs bz#1930561
+* Tue Dec 15 2020 Gluster Jenkins <dkhandel+glusterjenkins@redhat.com> - 6.0-50
+- fixes bugs bz#1224906 bz#1412494 bz#1612973 bz#1663821 bz#1691320 
+  bz#1726673 bz#1749304 bz#1752739 bz#1779238 bz#1813866 bz#1814744 bz#1821599 
+  bz#1832306 bz#1835229 bz#1842449 bz#1865796 bz#1878077 bz#1882923 bz#1885966 
+  bz#1890506 bz#1896425 bz#1898776 bz#1898777 bz#1898778 bz#1898781 bz#1898784 
+  bz#1903468
 
 * Wed Nov 25 2020 Gluster Jenkins <dkhandel+glusterjenkins@redhat.com> - 6.0-49
 - fixes bugs bz#1286171