diff --git a/SOURCES/0481-RHGS-3.5.3-rebuild-to-ship-with-RHEL.patch b/SOURCES/0481-RHGS-3.5.3-rebuild-to-ship-with-RHEL.patch deleted file mode 100644 index dd9b0ab..0000000 --- a/SOURCES/0481-RHGS-3.5.3-rebuild-to-ship-with-RHEL.patch +++ /dev/null @@ -1,33 +0,0 @@ -From 346aa7cbc34b9bbbaca45180215a4d9ffd5055df Mon Sep 17 00:00:00 2001 -From: Rinku Kothiya -Date: Fri, 19 Feb 2021 06:19:07 +0000 -Subject: [PATCH 481/481] RHGS-3.5.3 rebuild to ship with RHEL. - -Label: DOWNSTREAM ONLY -BUG: 1930561 - -Change-Id: I9c7f30cc6bc616344b27072bfde056c7bba1e143 -Signed-off-by: Rinku Kothiya -Reviewed-on: https://code.engineering.redhat.com/gerrit/228413 -Tested-by: RHGS Build Bot -Reviewed-by: Sunil Kumar Heggodu Gopala Acharya ---- - glusterfs.spec.in | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/glusterfs.spec.in b/glusterfs.spec.in -index 30d7162..52f9b40 100644 ---- a/glusterfs.spec.in -+++ b/glusterfs.spec.in -@@ -1983,6 +1983,8 @@ fi - %endif - - %changelog -+* Fri Feb 19 2021 Rinku Kothiya -+- Build RGHS clients for RHEL (#1930561) - - * Mon May 11 2020 Sunny Kumar - - added requires policycoreutils-python-utils on rhel8 for geo-replication --- -1.8.3.1 - diff --git a/SOURCES/0481-Update-rfc.sh-to-rhgs-3.5.4.patch b/SOURCES/0481-Update-rfc.sh-to-rhgs-3.5.4.patch new file mode 100644 index 0000000..0ba12d2 --- /dev/null +++ b/SOURCES/0481-Update-rfc.sh-to-rhgs-3.5.4.patch @@ -0,0 +1,26 @@ +From 828be8e789db3c77587c708f930d7fe8c9456e3b Mon Sep 17 00:00:00 2001 +From: Rinku Kothiya +Date: Fri, 4 Dec 2020 05:18:45 +0530 +Subject: [PATCH 481/511] Update rfc.sh to rhgs-3.5.4 + +Signed-off-by: Rinku Kothiya +--- + rfc.sh | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/rfc.sh b/rfc.sh +index 1dca29f..c0559b9 100755 +--- a/rfc.sh ++++ b/rfc.sh +@@ -18,7 +18,7 @@ done + shift $((OPTIND-1)) + + +-branch="rhgs-3.5.3"; ++branch="rhgs-3.5.4"; + + set_hooks_commit_msg() + { +-- +1.8.3.1 + diff --git a/SOURCES/0482-logger-Always-print-errors-in-english.patch b/SOURCES/0482-logger-Always-print-errors-in-english.patch new file mode 100644 index 0000000..e454bec --- /dev/null +++ b/SOURCES/0482-logger-Always-print-errors-in-english.patch @@ -0,0 +1,49 @@ +From e43af5b15d14e43c3201fd0fb7bf02663e3e0127 Mon Sep 17 00:00:00 2001 +From: Rinku Kothiya +Date: Sat, 7 Nov 2020 12:09:36 +0530 +Subject: [PATCH 482/511] logger: Always print errors in english + +Upstream: +> Reviewed-on: https://github.com/gluster/glusterfs/pull/1657 +> fixes: #1302 +> Change-Id: If0e21f016155276a953c64a8dd13ff3eb281d09d +> Signed-off-by: Rinku Kothiya + +BUG: 1896425 + +Change-Id: If0e21f016155276a953c64a8dd13ff3eb281d09d +Signed-off-by: Rinku Kothiya +Reviewed-on: https://code.engineering.redhat.com/gerrit/219999 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/logging.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/libglusterfs/src/logging.c b/libglusterfs/src/logging.c +index 7f0eff6..5874c34 100644 +--- a/libglusterfs/src/logging.c ++++ b/libglusterfs/src/logging.c +@@ -513,6 +513,7 @@ gf_openlog(const char *ident, int option, int facility) + { + int _option = option; + int _facility = facility; ++ char *language = NULL; + + if (-1 == _option) { + _option = LOG_PID | LOG_NDELAY; +@@ -522,7 +523,10 @@ gf_openlog(const char *ident, int option, int facility) + } + + /* TODO: Should check for errors here and return appropriately */ +- setlocale(LC_ALL, ""); ++ language = setlocale(LC_ALL, "en_US.UTF-8"); ++ if (!language) ++ setlocale(LC_ALL, ""); ++ + setlocale(LC_NUMERIC, "C"); /* C-locale for strtod, ... */ + /* close the previous syslog if open as we are changing settings */ + closelog(); +-- +1.8.3.1 + diff --git a/SOURCES/0483-afr-more-quorum-checks-in-lookup-and-new-entry-marki.patch b/SOURCES/0483-afr-more-quorum-checks-in-lookup-and-new-entry-marki.patch new file mode 100644 index 0000000..c0f2118 --- /dev/null +++ b/SOURCES/0483-afr-more-quorum-checks-in-lookup-and-new-entry-marki.patch @@ -0,0 +1,150 @@ +From 8c366f34a279a5ab2a6301bfd93534fe746a23e8 Mon Sep 17 00:00:00 2001 +From: Ravishankar N +Date: Mon, 7 Dec 2020 09:53:27 +0530 +Subject: [PATCH 483/511] afr: more quorum checks in lookup and new entry + marking + +Problem: See upstream github issue for details. + +Fix: +-In lookup if the entry exists in 2 out of 3 bricks, don't fail the +lookup with ENOENT just because there is an entrylk on the parent. +Consider quorum before deciding. + +-If entry FOP does not succeed on quorum no. of bricks, do not perform +new entry mark. + +Upstream patch details: +> Reviewed-on: https://review.gluster.org/#/c/glusterfs/+/24499/ +> Fixes: #1303 +> Change-Id: I56df8c89ad53b29fa450c7930a7b7ccec9f4a6c5 +> Signed-off-by: Ravishankar N + +BUG: 1821599 +Change-Id: If513e8a7d6088a676288927630d8e616269bf5d5 +Signed-off-by: Ravishankar N +Reviewed-on: https://code.engineering.redhat.com/gerrit/220363 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + ...20-mark-dirty-for-entry-txn-on-quorum-failure.t | 2 -- + xlators/cluster/afr/src/afr-common.c | 24 ++++++++++++---------- + xlators/cluster/afr/src/afr-dir-write.c | 8 ++++++++ + xlators/cluster/afr/src/afr.h | 4 ++++ + 4 files changed, 25 insertions(+), 13 deletions(-) + +diff --git a/tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t b/tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t +index 26f9049..49c4dea 100644 +--- a/tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t ++++ b/tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t +@@ -53,8 +53,6 @@ TEST ! ls $B0/${V0}1/file$i + TEST ls $B0/${V0}2/file$i + dirty=$(get_hex_xattr trusted.afr.dirty $B0/${V0}2) + TEST [ "$dirty" != "000000000000000000000000" ] +-EXPECT "000000010000000100000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}2/file$i +-EXPECT "000000010000000100000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}2/file$i + + TEST $CLI volume set $V0 self-heal-daemon on + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c +index 89e2483..851ccad 100644 +--- a/xlators/cluster/afr/src/afr-common.c ++++ b/xlators/cluster/afr/src/afr-common.c +@@ -1236,7 +1236,7 @@ refresh_done: + return 0; + } + +-static void ++void + afr_fill_success_replies(afr_local_t *local, afr_private_t *priv, + unsigned char *replies) + { +@@ -2290,6 +2290,7 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this) + 0, + }; + gf_boolean_t locked_entry = _gf_false; ++ gf_boolean_t in_flight_create = _gf_false; + gf_boolean_t can_interpret = _gf_true; + inode_t *parent = NULL; + ia_type_t ia_type = IA_INVAL; +@@ -2333,17 +2334,12 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this) + if (!replies[i].valid) + continue; + +- if (locked_entry && replies[i].op_ret == -1 && +- replies[i].op_errno == ENOENT) { +- /* Second, check entry is still +- "underway" in creation */ +- local->op_ret = -1; +- local->op_errno = ENOENT; +- goto error; +- } +- +- if (replies[i].op_ret == -1) ++ if (replies[i].op_ret == -1) { ++ if (locked_entry && replies[i].op_errno == ENOENT) { ++ in_flight_create = _gf_true; ++ } + continue; ++ } + + if (read_subvol == -1 || !readable[read_subvol]) { + read_subvol = i; +@@ -2353,6 +2349,12 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this) + } + } + ++ if (in_flight_create && !afr_has_quorum(success_replies, this, NULL)) { ++ local->op_ret = -1; ++ local->op_errno = ENOENT; ++ goto error; ++ } ++ + if (read_subvol == -1) + goto error; + /* We now have a read_subvol, which is readable[] (if there +diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c +index 84e2a34..416c19d 100644 +--- a/xlators/cluster/afr/src/afr-dir-write.c ++++ b/xlators/cluster/afr/src/afr-dir-write.c +@@ -349,6 +349,7 @@ afr_mark_entry_pending_changelog(call_frame_t *frame, xlator_t *this) + afr_private_t *priv = NULL; + int pre_op_count = 0; + int failed_count = 0; ++ unsigned char *success_replies = NULL; + + local = frame->local; + priv = this->private; +@@ -364,9 +365,16 @@ afr_mark_entry_pending_changelog(call_frame_t *frame, xlator_t *this) + failed_count = AFR_COUNT(local->transaction.failed_subvols, + priv->child_count); + ++ /* FOP succeeded on all bricks. */ + if (pre_op_count == priv->child_count && !failed_count) + return; + ++ /* FOP did not suceed on quorum no. of bricks. */ ++ success_replies = alloca0(priv->child_count); ++ afr_fill_success_replies(local, priv, success_replies); ++ if (!afr_has_quorum(success_replies, this, NULL)) ++ return; ++ + if (priv->thin_arbiter_count) { + /*Mark new entry using ta file*/ + local->is_new_entry = _gf_true; +diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h +index ff96246..ed5096e 100644 +--- a/xlators/cluster/afr/src/afr.h ++++ b/xlators/cluster/afr/src/afr.h +@@ -1334,4 +1334,8 @@ afr_mark_new_entry_changelog(call_frame_t *frame, xlator_t *this); + + void + afr_selfheal_childup(xlator_t *this, afr_private_t *priv); ++ ++void ++afr_fill_success_replies(afr_local_t *local, afr_private_t *priv, ++ unsigned char *replies); + #endif /* __AFR_H__ */ +-- +1.8.3.1 + diff --git a/SOURCES/0484-glusterd-rebalance-status-displays-stats-as-0-after-.patch b/SOURCES/0484-glusterd-rebalance-status-displays-stats-as-0-after-.patch new file mode 100644 index 0000000..56d4feb --- /dev/null +++ b/SOURCES/0484-glusterd-rebalance-status-displays-stats-as-0-after-.patch @@ -0,0 +1,90 @@ +From 6c3b21ce5bb76b35856a6c270eb65d11f869061f Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde +Date: Fri, 26 Jun 2020 12:10:31 +0530 +Subject: [PATCH 484/511] glusterd: rebalance status displays stats as 0 after + reboot + +problem: while the rebalance is in progress, if a node is +rebooted rebalance v status shows the stats of this node as +0 once the node is back. + +Reason: when the node is rebooted, once it is back +glusterd_volume_defrag_restart() starts the rebalance and +creates the rpc. but due to some race, rebalance process is +sending disconnect event, so rpc object is getting destroyed. As +the rpc object is null, request for fetching the latest stats is +not sent to rebalance process. and stats are shows as default values +which is 0. + +Solution: When the rpc object null, we should create the rpc if the +rebalance process is up. so that request can be sent to rebalance +process using the rpc. + +>fixes: #1339 +>Change-Id: I1c7533fedd17dcaffc0f7a5a918c87356133a81c +>Signed-off-by: Sanju Rakonde +Upstream Patch : https://review.gluster.org/c/glusterfs/+/24641 + +BUG: 1832306 +Change-Id: I1c7533fedd17dcaffc0f7a5a918c87356133a81c +Signed-off-by: Srijan Sivakumar +Reviewed-on: https://code.engineering.redhat.com/gerrit/220369 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mgmt/glusterd/src/glusterd-syncop.c | 29 ++++++++++++++++++++--------- + 1 file changed, 20 insertions(+), 9 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c +index c78983a..df78fef 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-syncop.c ++++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c +@@ -1693,6 +1693,7 @@ gd_brick_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, + rpc_clnt_t *rpc = NULL; + dict_t *rsp_dict = NULL; + int32_t cmd = GF_OP_CMD_NONE; ++ glusterd_volinfo_t *volinfo = NULL; + + this = THIS; + rsp_dict = dict_new(); +@@ -1724,18 +1725,28 @@ gd_brick_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, + cds_list_for_each_entry_safe(pending_node, tmp, &selected, list) + { + rpc = glusterd_pending_node_get_rpc(pending_node); ++ /* In the case of rebalance if the rpc object is null, we try to ++ * create the rpc object. if the rebalance daemon is down, it returns ++ * -1. otherwise, rpc object will be created and referenced. ++ */ + if (!rpc) { +- if (pending_node->type == GD_NODE_REBALANCE) { +- ret = 0; +- glusterd_defrag_volume_node_rsp(req_dict, NULL, op_ctx); ++ if (pending_node->type == GD_NODE_REBALANCE && pending_node->node) { ++ volinfo = pending_node->node; ++ ret = glusterd_rebalance_rpc_create(volinfo); ++ if (ret) { ++ ret = 0; ++ glusterd_defrag_volume_node_rsp(req_dict, NULL, op_ctx); ++ goto out; ++ } else { ++ rpc = glusterd_defrag_rpc_get(volinfo->rebal.defrag); ++ } ++ } else { ++ ret = -1; ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RPC_FAILURE, ++ "Brick Op failed " ++ "due to rpc failure."); + goto out; + } +- +- ret = -1; +- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RPC_FAILURE, +- "Brick Op failed " +- "due to rpc failure."); +- goto out; + } + + /* Redirect operation to be detach tier via rebalance flow. */ +-- +1.8.3.1 + diff --git a/SOURCES/0485-cli-rpc-conditional-init-of-global-quota-rpc-1578.patch b/SOURCES/0485-cli-rpc-conditional-init-of-global-quota-rpc-1578.patch new file mode 100644 index 0000000..6ed4f1c --- /dev/null +++ b/SOURCES/0485-cli-rpc-conditional-init-of-global-quota-rpc-1578.patch @@ -0,0 +1,87 @@ +From 2e6a5e504e66bc95208420e4882e453a53ac9ea2 Mon Sep 17 00:00:00 2001 +From: schaffung +Date: Mon, 2 Nov 2020 11:18:01 +0530 +Subject: [PATCH 485/511] cli-rpc: conditional init of global quota rpc (#1578) + +Issue: It is seem that the initialization of rpc to +connect with quotad is done in every glusterfs cli command, +irrespective of whether the quota feature is enabled or disabled. +This seems to be an overkill. + +Code change: The file /var/run/quotad/quotad.pid is present +signals that quotad is enabled. Hence we can put a conditional +check for seeing when this file exists and if it doesn't we +just skip over the initialization of the global quotad rpc. + +This will go on to reduce the extra rpc calls and operations +being performed in the kernel space. + +>Fixes: #1577 +>Change-Id: Icb69d35330f76ce95626f59af75a12726eb620ff +>Signed-off-by: srijan-sivakumar +Upstream Patch : https://github.com/gluster/glusterfs/pull/1578 + +BUG: 1885966 +Change-Id: Icb69d35330f76ce95626f59af75a12726eb620ff +Signed-off-by: Srijan Sivakumar +Reviewed-on: https://code.engineering.redhat.com/gerrit/220371 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + cli/src/cli.c | 18 +++++++++++++----- + cli/src/cli.h | 3 +++ + 2 files changed, 16 insertions(+), 5 deletions(-) + +diff --git a/cli/src/cli.c b/cli/src/cli.c +index 99a16a0..a76c5a2 100644 +--- a/cli/src/cli.c ++++ b/cli/src/cli.c +@@ -64,8 +64,7 @@ + extern int connected; + /* using argp for command line parsing */ + +-const char *argp_program_version = +- PACKAGE_NAME" "PACKAGE_VERSION; ++const char *argp_program_version = PACKAGE_NAME " " PACKAGE_VERSION; + const char *argp_program_bug_address = "<" PACKAGE_BUGREPORT ">"; + + struct rpc_clnt *global_quotad_rpc; +@@ -840,9 +839,18 @@ main(int argc, char *argv[]) + if (!global_rpc) + goto out; + +- global_quotad_rpc = cli_quotad_clnt_rpc_init(); +- if (!global_quotad_rpc) +- goto out; ++ /* ++ * Now, one doesn't need to initialize global rpc ++ * for quota unless and until quota is enabled. ++ * So why not put a check to save all the rpc related ++ * ops here. ++ */ ++ ret = sys_access(QUOTAD_PID_PATH, F_OK); ++ if (!ret) { ++ global_quotad_rpc = cli_quotad_clnt_rpc_init(); ++ if (!global_quotad_rpc) ++ goto out; ++ } + + ret = cli_cmds_register(&state); + if (ret) +diff --git a/cli/src/cli.h b/cli/src/cli.h +index 37e4d9d..c30ae9c 100644 +--- a/cli/src/cli.h ++++ b/cli/src/cli.h +@@ -30,6 +30,9 @@ + #define CLI_TAB_LENGTH 8 + #define CLI_BRICK_STATUS_LINE_LEN 78 + ++// Quotad pid path. ++#define QUOTAD_PID_PATH "/var/run/gluster/quotad/quotad.pid" ++ + /* Geo-rep command positional arguments' index */ + #define GEO_REP_CMD_INDEX 1 + #define GEO_REP_CMD_CONFIG_INDEX 4 +-- +1.8.3.1 + diff --git a/SOURCES/0486-glusterd-brick-sock-file-deleted-log-error-1560.patch b/SOURCES/0486-glusterd-brick-sock-file-deleted-log-error-1560.patch new file mode 100644 index 0000000..60750db --- /dev/null +++ b/SOURCES/0486-glusterd-brick-sock-file-deleted-log-error-1560.patch @@ -0,0 +1,87 @@ +From 9b19d4841fc3002d30ec3e44c85ec37682c11bfb Mon Sep 17 00:00:00 2001 +From: schaffung +Date: Thu, 22 Oct 2020 13:07:09 +0530 +Subject: [PATCH 486/511] glusterd: brick sock file deleted, log error (#1560) + +Issue: The satus of the brick as tracked by glusterd is +stopped if the socket file corresponding to a running +brick process is absent in /var/run/gluster. The glusterd +keeps on trying to reconnect ( rpc layer ) but it fails. + +Code change: Rather than registering the rpc connection +with the help of the given sockfilepath which is not +even present as it keeps on reconnecting, why not log +this as an error and not try to reconnect using the +non-existing sock file path. + +>Fixes: #1526 +>Change-Id: I6c81691ab1624c66dec74f5ffcc6c383201ac757 +>Signed-off-by: srijan-sivakumar +Upstream Patch : https://github.com/gluster/glusterfs/pull/1560 + +BUG: 1882923 +Change-Id: I6c81691ab1624c66dec74f5ffcc6c383201ac757 +Signed-off-by: Srijan Sivakumar +Reviewed-on: https://code.engineering.redhat.com/gerrit/220376 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mgmt/glusterd/src/glusterd-utils.c | 27 +++++++++++++++++++++++++-- + 1 file changed, 25 insertions(+), 2 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index d25fc8a..a72c494 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -6310,7 +6310,7 @@ find_compatible_brick(glusterd_conf_t *conf, glusterd_volinfo_t *volinfo, + check if passed pid is match with running glusterfs process + */ + +-int ++static int + glusterd_get_sock_from_brick_pid(int pid, char *sockpath, size_t len) + { + char fname[128] = ""; +@@ -6383,7 +6383,17 @@ glusterd_get_sock_from_brick_pid(int pid, char *sockpath, size_t len) + + if (tmpsockpath[0]) { + strncpy(sockpath, tmpsockpath, i); +- ret = 0; ++ /* ++ * Condition to check if the brick socket file is present ++ * in the stated path or not. This helps in preventing ++ * constant re-connect triggered in the RPC layer and also ++ * a log message would help out the user. ++ */ ++ ret = sys_access(sockpath, F_OK); ++ if (ret) { ++ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_NOT_FOUND, ++ "%s not found", sockpath, NULL); ++ } + } + + return ret; +@@ -6581,7 +6591,20 @@ glusterd_brick_start(glusterd_volinfo_t *volinfo, + if (!is_brick_mx_enabled()) { + glusterd_set_brick_socket_filepath( + volinfo, brickinfo, socketpath, sizeof(socketpath)); ++ /* ++ * Condition to check if the brick socket file is present ++ * in the stated path or not. This helps in preventing ++ * constant re-connect triggered in the RPC layer and also ++ * a log message would help out the user. ++ */ ++ ret = sys_access(socketpath, F_OK); ++ if (ret) { ++ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_NOT_FOUND, ++ "%s not found", socketpath, NULL); ++ goto out; ++ } + } ++ + gf_log(this->name, GF_LOG_DEBUG, + "Using %s as sockfile for brick %s of volume %s ", + socketpath, brickinfo->path, volinfo->volname); +-- +1.8.3.1 + diff --git a/SOURCES/0487-Events-Log-file-not-re-opened-after-logrotate.patch b/SOURCES/0487-Events-Log-file-not-re-opened-after-logrotate.patch new file mode 100644 index 0000000..ac0d1cc --- /dev/null +++ b/SOURCES/0487-Events-Log-file-not-re-opened-after-logrotate.patch @@ -0,0 +1,56 @@ +From c961ee1d7c1abb2552b79ed39ed7fd1bd1b3962f Mon Sep 17 00:00:00 2001 +From: srijan-sivakumar +Date: Fri, 7 Aug 2020 15:02:07 +0530 +Subject: [PATCH 487/511] Events: Log file not re-opened after logrotate. + +Issue: The logging is being done in the same file +even after the logrotate utility has changed the file. +This causes the logfile to grow indefinitely. + +Code Changes: Using the WatchedFileHandler class instead +of FileHandler class. This watches the file it is logging +into and if the file changes, it is closed and reopened +using the file name. Hence after file rotate, a new file +will be used for logging instead of continuing with +the same old file. + +>Fixes: #1289 +>Change-Id: I773d04f17613a03709cb682692efb39fd8e664e2 +>Signed-off-by: srijan-sivakumar +Upstream Patch : https://review.gluster.org/c/glusterfs/+/24820 + +BUG: 1814744 +Change-Id: I773d04f17613a03709cb682692efb39fd8e664e2 +Signed-off-by: srijan-sivakumar +Reviewed-on: https://code.engineering.redhat.com/gerrit/220370 +Reviewed-by: Shwetha Acharya +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + events/src/utils.py | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/events/src/utils.py b/events/src/utils.py +index 38b707a..6d4e079 100644 +--- a/events/src/utils.py ++++ b/events/src/utils.py +@@ -13,6 +13,7 @@ import sys + import json + import os + import logging ++import logging.handlers + import fcntl + from errno import EBADF + from threading import Thread +@@ -98,7 +99,7 @@ def setup_logger(): + logger.setLevel(logging.INFO) + + # create the logging file handler +- fh = logging.FileHandler(LOG_FILE) ++ fh = logging.handlers.WatchedFileHandler(LOG_FILE) + + formatter = logging.Formatter("[%(asctime)s] %(levelname)s " + "[%(module)s - %(lineno)s:%(funcName)s] " +-- +1.8.3.1 + diff --git a/SOURCES/0488-glusterd-afr-enable-granular-entry-heal-by-default.patch b/SOURCES/0488-glusterd-afr-enable-granular-entry-heal-by-default.patch new file mode 100644 index 0000000..310bc53 --- /dev/null +++ b/SOURCES/0488-glusterd-afr-enable-granular-entry-heal-by-default.patch @@ -0,0 +1,864 @@ +From 0502383024cbf7e4776816e0a992dccc484a3cf2 Mon Sep 17 00:00:00 2001 +From: Ravishankar N +Date: Tue, 8 Dec 2020 17:23:22 +0530 +Subject: [PATCH 488/511] glusterd/afr: enable granular-entry-heal by default + +XXXXXXXXXXXXXXXXXXX + IMPORTANT: +XXXXXXXXXXXXXXXXXXXX +I see that for rhgs-3.5.3, GD_OP_VERSION_MAX is GD_OP_VERSION_7_0. Since +this patch should only act on new volumes in rhgs-3.5.4, I am bumping +the op-version to GD_OP_VERSION_7_1. In glusterfs upstream, the patch +acts only if op-version >= GD_OP_VERSION_9_0 as seen in the commit +messae below. + +Upstream patch details: +/------------------------------------------------------------------------------/ +1. The option has been enabled and tested for quite some time now in RHHI-V +downstream and I think it is safe to make it 'on' by default. Since it +is not possible to simply change it from 'off' to 'on' without breaking +rolling upgrades, old clients etc., I have made it default only for new volumes +starting from op-verison GD_OP_VERSION_9_0. + +Note: If you do a volume reset, the option will be turned back off. +This is okay as the dir's gfid will be captured in 'xattrop' folder and heals +will proceed. There might be stale entries inside entry-changes' folder, +which will be removed when we enable the option again. + +2. I encountered a cust. issue where entry heal was pending on a dir. with +236436 files in it and the glustershd.log output was just stuck at +"performing entry selfheal", so I have added logs to give us +more info in DEBUG level about whether entry heal and data heal are +progressing (metadata heal doesn't take much time). That way, we have a +quick visual indication to say things are not 'stuck' if we briefly +enable debug logs, instead of taking statedumps or checking profile info +etc. + +>Fixes: #1483 +>Change-Id: I4f116f8c92f8cd33f209b758ff14f3c7e1981422 +>Signed-off-by: Ravishankar N +Upstream Patch: https://github.com/gluster/glusterfs/pull/1621 +/------------------------------------------------------------------------------/ + +BUG: 1890506 +Change-Id: If449a1e873633616cfc508d74b5c22eb434b55ae +Signed-off-by: Ravishankar N +Reviewed-on: https://code.engineering.redhat.com/gerrit/220555 +Tested-by: Sunil Kumar Heggodu Gopala Acharya +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/glusterfs/globals.h | 4 +- + libglusterfs/src/syncop-utils.c | 4 +- + tests/basic/afr/add-brick-self-heal-non-granular.t | 75 +++++++++++++ + tests/basic/afr/add-brick-self-heal.t | 4 +- + tests/basic/afr/bug-1130892-non-granular.t | 77 ++++++++++++++ + .../basic/afr/bug-1493415-gfid-heal-non-granular.t | 79 ++++++++++++++ + ...507-type-mismatch-error-handling-non-granular.t | 117 +++++++++++++++++++++ + ...1749322-entry-heal-not-happening-non-granular.t | 90 ++++++++++++++++ + .../afr/replace-brick-self-heal-non-granular.t | 65 ++++++++++++ + tests/basic/afr/replace-brick-self-heal.t | 2 +- + tests/bugs/replicate/bug-1130892.t | 2 +- + tests/bugs/replicate/bug-1493415-gfid-heal.t | 2 +- + .../bug-1722507-type-mismatch-error-handling.t | 26 +++-- + .../bug-1749322-entry-heal-not-happening.t | 7 +- + xlators/cluster/afr/src/afr-self-heal-common.c | 5 + + xlators/cluster/afr/src/afr-self-heal-data.c | 3 + + xlators/cluster/afr/src/afr-self-heal-entry.c | 7 +- + xlators/mgmt/glusterd/src/glusterd-utils.c | 13 +++ + 18 files changed, 558 insertions(+), 24 deletions(-) + create mode 100644 tests/basic/afr/add-brick-self-heal-non-granular.t + create mode 100644 tests/basic/afr/bug-1130892-non-granular.t + create mode 100644 tests/basic/afr/bug-1493415-gfid-heal-non-granular.t + create mode 100644 tests/basic/afr/bug-1722507-type-mismatch-error-handling-non-granular.t + create mode 100644 tests/basic/afr/bug-1749322-entry-heal-not-happening-non-granular.t + create mode 100644 tests/basic/afr/replace-brick-self-heal-non-granular.t + +diff --git a/libglusterfs/src/glusterfs/globals.h b/libglusterfs/src/glusterfs/globals.h +index 31717ed..cc145cd 100644 +--- a/libglusterfs/src/glusterfs/globals.h ++++ b/libglusterfs/src/glusterfs/globals.h +@@ -50,7 +50,7 @@ + 1 /* MIN is the fresh start op-version, mostly \ + should not change */ + #define GD_OP_VERSION_MAX \ +- GD_OP_VERSION_7_0 /* MAX VERSION is the maximum \ ++ GD_OP_VERSION_7_1 /* MAX VERSION is the maximum \ + count in VME table, should \ + keep changing with \ + introduction of newer \ +@@ -138,6 +138,8 @@ + + #define GD_OP_VERSION_7_0 70000 /* Op-version for GlusterFS 7.0 */ + ++#define GD_OP_VERSION_7_1 70100 /* Op-version for GlusterFS 7.1 */ ++ + #include "glusterfs/xlator.h" + #include "glusterfs/options.h" + +diff --git a/libglusterfs/src/syncop-utils.c b/libglusterfs/src/syncop-utils.c +index be03527..2269c76 100644 +--- a/libglusterfs/src/syncop-utils.c ++++ b/libglusterfs/src/syncop-utils.c +@@ -495,9 +495,7 @@ syncop_dir_scan(xlator_t *subvol, loc_t *loc, int pid, void *data, + if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..")) + continue; + +- ret = fn(subvol, entry, loc, data); +- if (ret) +- break; ++ ret |= fn(subvol, entry, loc, data); + } + gf_dirent_free(&entries); + if (ret) +diff --git a/tests/basic/afr/add-brick-self-heal-non-granular.t b/tests/basic/afr/add-brick-self-heal-non-granular.t +new file mode 100644 +index 0000000..19caf24 +--- /dev/null ++++ b/tests/basic/afr/add-brick-self-heal-non-granular.t +@@ -0,0 +1,75 @@ ++#!/bin/bash ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++cleanup; ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} ++EXPECT 'Created' volinfo_field $V0 'Status'; ++TEST $CLI volume set $V0 cluster.granular-entry-heal off ++TEST $CLI volume start $V0 ++EXPECT 'Started' volinfo_field $V0 'Status'; ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 ++ ++TEST $CLI volume set $V0 cluster.data-self-heal off ++TEST $CLI volume set $V0 cluster.metadata-self-heal off ++TEST $CLI volume set $V0 cluster.entry-self-heal off ++TEST $CLI volume set $V0 cluster.heal-timeout 5 ++ ++TEST $CLI volume set $V0 self-heal-daemon off ++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; ++ ++# Create files ++for i in {1..5} ++do ++ echo $i > $M0/file$i.txt ++done ++ ++# Metadata changes ++TEST setfattr -n user.test -v qwerty $M0/file5.txt ++ ++# Add brick1 ++TEST $CLI volume add-brick $V0 replica 3 $H0:$B0/${V0}2 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 ++ ++# New-brick should accuse the old-bricks (Simulating case for data-loss) ++TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}2/ ++TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}2/ ++ ++# Check if pending xattr and dirty-xattr are set for newly-added-brick ++EXPECT "000000000000000100000001" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0 ++EXPECT "000000000000000100000001" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1 ++EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.dirty $B0/${V0}2 ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2 ++ ++TEST $CLI volume set $V0 self-heal-daemon on ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 ++TEST $CLI volume heal $V0 ++ ++# Wait for heal to complete ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 ++ ++# Check if entry-heal has happened ++TEST diff <(ls $B0/${V0}0 | sort) <(ls $B0/${V0}2 | sort) ++TEST diff <(ls $B0/${V0}1 | sort) <(ls $B0/${V0}2 | sort) ++ ++# Test if data was healed ++TEST diff $B0/${V0}0/file1.txt $B0/${V0}2/file1.txt ++ ++# Test if metadata was healed and exists on both the bricks ++EXPECT "qwerty" get_text_xattr user.test $B0/${V0}2/file5.txt ++EXPECT "qwerty" get_text_xattr user.test $B0/${V0}0/file5.txt ++ ++EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0 ++EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1 ++EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.dirty $B0/${V0}2 ++ ++cleanup; +diff --git a/tests/basic/afr/add-brick-self-heal.t b/tests/basic/afr/add-brick-self-heal.t +index c847e22..7ebf4f6 100644 +--- a/tests/basic/afr/add-brick-self-heal.t ++++ b/tests/basic/afr/add-brick-self-heal.t +@@ -38,8 +38,8 @@ TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0 + TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}2/ + + # Check if pending xattr and dirty-xattr are set for newly-added-brick +-EXPECT "000000000000000100000001" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0 +-EXPECT "000000000000000100000001" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1 ++EXPECT "000000010000000100000001" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0 ++EXPECT "000000010000000100000001" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1 + EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.dirty $B0/${V0}2 + + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +diff --git a/tests/basic/afr/bug-1130892-non-granular.t b/tests/basic/afr/bug-1130892-non-granular.t +new file mode 100644 +index 0000000..3cdbc7d +--- /dev/null ++++ b/tests/basic/afr/bug-1130892-non-granular.t +@@ -0,0 +1,77 @@ ++#!/bin/bash ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../afr.rc ++ ++cleanup; ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume info; ++ ++# Create a 1X2 replica ++TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}-{0,1} ++EXPECT 'Created' volinfo_field $V0 'Status'; ++TEST $CLI volume set $V0 cluster.granular-entry-heal off ++ ++# Disable self-heal daemon ++TEST gluster volume set $V0 self-heal-daemon off ++ ++# Enable Client side heal ++TEST $CLI volume set $V0 cluster.data-self-heal off ++TEST $CLI volume set $V0 cluster.metadata-self-heal off ++TEST $CLI volume set $V0 cluster.entry-self-heal off ++ ++# Disable all perf-xlators ++TEST $CLI volume set $V0 performance.quick-read off ++TEST $CLI volume set $V0 performance.io-cache off ++TEST $CLI volume set $V0 performance.write-behind off ++TEST $CLI volume set $V0 performance.stat-prefetch off ++TEST $CLI volume set $V0 performance.read-ahead off ++ ++# Volume start ++TEST $CLI volume start $V0; ++EXPECT 'Started' volinfo_field $V0 'Status'; ++ ++# FUSE Mount ++TEST ${GFS} -s $H0 --volfile-id $V0 $M0 ++ ++# Create files and dirs ++TEST mkdir -p $M0/one/two/ ++TEST `echo "Carpe diem" > $M0/one/two/three` ++ ++# Simulate disk-replacement ++TEST kill_brick $V0 $H0 $B0/${V0}-1 ++EXPECT_WITHIN ${PROCESS_DOWN_TIMEOUT} "^0$" afr_child_up_status $V0 1 ++TEST rm -rf $B0/${V0}-1/one ++TEST rm -rf $B0/${V0}-1/.glusterfs ++ ++#Ideally, disk replacement is done using reset-brick or replace-brick gluster CLI ++#which will create .glusterfs folder. ++mkdir $B0/${V0}-1/.glusterfs && chmod 600 $B0/${V0}-1/.glusterfs ++ ++# Start force ++TEST $CLI volume start $V0 force ++ ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 ++ ++TEST stat $M0/one ++ ++sleep 1 ++ ++# Check pending xattrs ++EXPECT "00000000" afr_get_specific_changelog_xattr $B0/${V0}-0/one trusted.afr.$V0-client-1 data ++EXPECT_NOT "00000000" afr_get_specific_changelog_xattr $B0/${V0}-0/one trusted.afr.$V0-client-1 entry ++EXPECT_NOT "00000000" afr_get_specific_changelog_xattr $B0/${V0}-0/one trusted.afr.$V0-client-1 metadata ++ ++TEST gluster volume set $V0 self-heal-daemon on ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 ++TEST $CLI volume heal $V0 ++EXPECT_WITHIN $HEAL_TIMEOUT "Y" is_dir_heal_done $B0/${V0}-0 $B0/${V0}-1 one ++EXPECT_WITHIN $HEAL_TIMEOUT "Y" is_dir_heal_done $B0/${V0}-0 $B0/${V0}-1 one/two ++EXPECT_WITHIN $HEAL_TIMEOUT "Y" is_file_heal_done $B0/${V0}-0 $B0/${V0}-1 one/two/three ++ ++cleanup; +diff --git a/tests/basic/afr/bug-1493415-gfid-heal-non-granular.t b/tests/basic/afr/bug-1493415-gfid-heal-non-granular.t +new file mode 100644 +index 0000000..aff001c +--- /dev/null ++++ b/tests/basic/afr/bug-1493415-gfid-heal-non-granular.t +@@ -0,0 +1,79 @@ ++#!/bin/bash ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../afr.rc ++cleanup; ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} ++TEST $CLI volume set $V0 cluster.granular-entry-heal off ++TEST $CLI volume start $V0 ++ ++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0; ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 ++TEST $CLI volume set $V0 self-heal-daemon off ++ ++# Create base entry in indices/xattrop ++echo "Data" > $M0/FILE ++ ++#------------------------------------------------------------------------------# ++TEST touch $M0/f1 ++gfid_f1=$(gf_get_gfid_xattr $B0/${V0}0/f1) ++gfid_str_f1=$(gf_gfid_xattr_to_str $gfid_f1) ++ ++# Remove gfid xattr and .glusterfs hard link from 2nd brick. This simulates a ++# brick crash at the point where file got created but no xattrs were set. ++TEST setfattr -x trusted.gfid $B0/${V0}1/f1 ++TEST rm $B0/${V0}1/.glusterfs/${gfid_str_f1:0:2}/${gfid_str_f1:2:2}/$gfid_str_f1 ++ ++# storage/posix considers that a file without gfid changed less than a second ++# before doesn't exist, so we need to wait for a second to force posix to ++# consider that this is a valid file but without gfid. ++sleep 2 ++ ++# Assume there were no pending xattrs on parent dir due to 1st brick crashing ++# too. Then name heal from client must heal the gfid. ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0; ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 ++TEST stat $M0/f1 ++EXPECT "$gfid_f1" gf_get_gfid_xattr $B0/${V0}1/f1 ++TEST stat $B0/${V0}1/.glusterfs/${gfid_str_f1:0:2}/${gfid_str_f1:2:2}/$gfid_str_f1 ++ ++#------------------------------------------------------------------------------# ++TEST mkdir $M0/dir ++TEST touch $M0/dir/f2 ++gfid_f2=$(gf_get_gfid_xattr $B0/${V0}0/dir/f2) ++gfid_str_f2=$(gf_gfid_xattr_to_str $gfid_f2) ++ ++# Remove gfid xattr and .glusterfs hard link from 2nd brick. This simulates a ++# brick crash at the point where file got created but no xattrs were set. ++TEST setfattr -x trusted.gfid $B0/${V0}1/dir/f2 ++TEST rm $B0/${V0}1/.glusterfs/${gfid_str_f2:0:2}/${gfid_str_f2:2:2}/$gfid_str_f2 ++ ++#Now simulate setting of pending entry xattr on parent dir of 1st brick. ++TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}0/dir ++create_brick_xattrop_entry $B0/${V0}0 dir ++ ++# storage/posix considers that a file without gfid changed less than a second ++# before doesn't exist, so we need to wait for a second to force posix to ++# consider that this is a valid file but without gfid. ++sleep 2 ++ ++#Trigger entry-heal via shd ++TEST $CLI volume set $V0 self-heal-daemon on ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 ++ ++TEST $CLI volume heal $V0 ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 ++ ++EXPECT "$gfid_f2" gf_get_gfid_xattr $B0/${V0}1/dir/f2 ++TEST stat $B0/${V0}1/.glusterfs/${gfid_str_f2:0:2}/${gfid_str_f2:2:2}/$gfid_str_f2 ++ ++#------------------------------------------------------------------------------# ++cleanup; +diff --git a/tests/basic/afr/bug-1722507-type-mismatch-error-handling-non-granular.t b/tests/basic/afr/bug-1722507-type-mismatch-error-handling-non-granular.t +new file mode 100644 +index 0000000..9079c93 +--- /dev/null ++++ b/tests/basic/afr/bug-1722507-type-mismatch-error-handling-non-granular.t +@@ -0,0 +1,117 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../afr.rc ++ ++cleanup; ++ ++## Start and create a volume ++TEST glusterd; ++TEST pidof glusterd; ++TEST $CLI volume info; ++ ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}; ++TEST $CLI volume set $V0 cluster.granular-entry-heal off ++TEST $CLI volume start $V0; ++TEST $CLI volume set $V0 cluster.heal-timeout 5 ++TEST $CLI volume heal $V0 disable ++EXPECT 'Started' volinfo_field $V0 'Status'; ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 ++ ++TEST mkdir $M0/dir ++ ++########################################################################################## ++# GFID link file and the GFID is missing on one brick and all the bricks are being blamed. ++ ++TEST touch $M0/dir/file ++TEST `echo append>> $M0/dir/file` ++ ++#B0 and B2 must blame B1 ++setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir ++setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/$V0"0"/dir ++setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir ++ ++# Add entry to xattrop dir to trigger index heal. ++xattrop_dir0=$(afr_get_index_path $B0/$V0"0") ++base_entry_b0=`ls $xattrop_dir0` ++gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/)) ++ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str ++EXPECT "^1$" get_pending_heal_count $V0 ++ ++# Remove the gfid xattr and the link file on one brick. ++gfid_file=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file) ++gfid_str_file=$(gf_gfid_xattr_to_str $gfid_file) ++TEST setfattr -x trusted.gfid $B0/${V0}0/dir/file ++TEST rm -f $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file ++ ++# Launch heal ++TEST $CLI volume heal $V0 enable ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2 ++ ++# Wait for 2 second to force posix to consider that this is a valid file but ++# without gfid. ++sleep 2 ++TEST $CLI volume heal $V0 ++ ++# Heal should not fail as the file is missing gfid xattr and the link file, ++# which is not actually the gfid or type mismatch. ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 ++ ++EXPECT "$gfid_file" gf_get_gfid_xattr $B0/${V0}0/dir/file ++TEST stat $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file ++rm -f $M0/dir/file ++ ++ ++########################################################################################### ++# GFID link file and the GFID is missing on two bricks and all the bricks are being blamed. ++ ++TEST $CLI volume heal $V0 disable ++TEST touch $M0/dir/file ++#TEST kill_brick $V0 $H0 $B0/$V0"1" ++ ++#B0 and B2 must blame B1 ++setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir ++setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/$V0"0"/dir ++setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir ++ ++# Add entry to xattrop dir to trigger index heal. ++xattrop_dir0=$(afr_get_index_path $B0/$V0"0") ++base_entry_b0=`ls $xattrop_dir0` ++gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/)) ++ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str ++EXPECT "^1$" get_pending_heal_count $V0 ++ ++# Remove the gfid xattr and the link file on two bricks. ++gfid_file=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file) ++gfid_str_file=$(gf_gfid_xattr_to_str $gfid_file) ++TEST setfattr -x trusted.gfid $B0/${V0}0/dir/file ++TEST rm -f $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file ++TEST setfattr -x trusted.gfid $B0/${V0}1/dir/file ++TEST rm -f $B0/${V0}1/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file ++ ++# Launch heal ++TEST $CLI volume heal $V0 enable ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2 ++ ++# Wait for 2 second to force posix to consider that this is a valid file but ++# without gfid. ++sleep 2 ++TEST $CLI volume heal $V0 ++ ++# Heal should not fail as the file is missing gfid xattr and the link file, ++# which is not actually the gfid or type mismatch. ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 ++ ++EXPECT "$gfid_file" gf_get_gfid_xattr $B0/${V0}0/dir/file ++TEST stat $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file ++EXPECT "$gfid_file" gf_get_gfid_xattr $B0/${V0}1/dir/file ++TEST stat $B0/${V0}1/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file ++ ++cleanup +diff --git a/tests/basic/afr/bug-1749322-entry-heal-not-happening-non-granular.t b/tests/basic/afr/bug-1749322-entry-heal-not-happening-non-granular.t +new file mode 100644 +index 0000000..4f27da4 +--- /dev/null ++++ b/tests/basic/afr/bug-1749322-entry-heal-not-happening-non-granular.t +@@ -0,0 +1,90 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../afr.rc ++ ++cleanup ++ ++function check_gfid_and_link_count ++{ ++ local file=$1 ++ ++ file_gfid_b0=$(gf_get_gfid_xattr $B0/${V0}0/$file) ++ TEST [ ! -z $file_gfid_b0 ] ++ file_gfid_b1=$(gf_get_gfid_xattr $B0/${V0}1/$file) ++ file_gfid_b2=$(gf_get_gfid_xattr $B0/${V0}2/$file) ++ EXPECT $file_gfid_b0 echo $file_gfid_b1 ++ EXPECT $file_gfid_b0 echo $file_gfid_b2 ++ ++ EXPECT "2" stat -c %h $B0/${V0}0/$file ++ EXPECT "2" stat -c %h $B0/${V0}1/$file ++ EXPECT "2" stat -c %h $B0/${V0}2/$file ++} ++TESTS_EXPECTED_IN_LOOP=18 ++ ++################################################################################ ++## Start and create a volume ++TEST glusterd; ++TEST pidof glusterd; ++TEST $CLI volume info; ++ ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}; ++TEST $CLI volume set $V0 cluster.granular-entry-heal off ++TEST $CLI volume start $V0; ++TEST $CLI volume set $V0 cluster.heal-timeout 5 ++TEST $CLI volume heal $V0 disable ++EXPECT 'Started' volinfo_field $V0 'Status'; ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 ++ ++TEST mkdir $M0/dir ++TEST `echo "File 1 " > $M0/dir/file1` ++TEST touch $M0/dir/file{2..4} ++ ++# Remove file2 from 1st & 3rd bricks ++TEST rm -f $B0/$V0"0"/dir/file2 ++TEST rm -f $B0/$V0"2"/dir/file2 ++ ++# Remove file3 and the .glusterfs hardlink from 1st & 2nd bricks ++gfid_file3=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file3) ++gfid_str_file3=$(gf_gfid_xattr_to_str $gfid_file3) ++TEST rm $B0/$V0"0"/.glusterfs/${gfid_str_file3:0:2}/${gfid_str_file3:2:2}/$gfid_str_file3 ++TEST rm $B0/$V0"1"/.glusterfs/${gfid_str_file3:0:2}/${gfid_str_file3:2:2}/$gfid_str_file3 ++TEST rm -f $B0/$V0"0"/dir/file3 ++TEST rm -f $B0/$V0"1"/dir/file3 ++ ++# Remove the .glusterfs hardlink and the gfid xattr of file4 on 3rd brick ++gfid_file4=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file4) ++gfid_str_file4=$(gf_gfid_xattr_to_str $gfid_file4) ++TEST rm $B0/$V0"2"/.glusterfs/${gfid_str_file4:0:2}/${gfid_str_file4:2:2}/$gfid_str_file4 ++TEST setfattr -x trusted.gfid $B0/$V0"2"/dir/file4 ++ ++# B0 and B2 blame each other ++setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir ++setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir ++ ++# Add entry to xattrop dir on first brick. ++xattrop_dir0=$(afr_get_index_path $B0/$V0"0") ++base_entry_b0=`ls $xattrop_dir0` ++gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/)) ++TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str ++ ++EXPECT "^1$" get_pending_heal_count $V0 ++ ++# Launch heal ++TEST $CLI volume heal $V0 enable ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2 ++TEST $CLI volume heal $V0 ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 ++ ++# All the files must be present on all the bricks after conservative merge and ++# should have the gfid xattr and the .glusterfs hardlink. ++check_gfid_and_link_count dir/file1 ++check_gfid_and_link_count dir/file2 ++check_gfid_and_link_count dir/file3 ++check_gfid_and_link_count dir/file4 ++ ++cleanup +diff --git a/tests/basic/afr/replace-brick-self-heal-non-granular.t b/tests/basic/afr/replace-brick-self-heal-non-granular.t +new file mode 100644 +index 0000000..c86bff1 +--- /dev/null ++++ b/tests/basic/afr/replace-brick-self-heal-non-granular.t +@@ -0,0 +1,65 @@ ++#!/bin/bash ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++cleanup; ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} ++TEST $CLI volume set $V0 cluster.granular-entry-heal off ++TEST $CLI volume start $V0 ++TEST $CLI volume set $V0 cluster.data-self-heal off ++TEST $CLI volume set $V0 cluster.metadata-self-heal off ++TEST $CLI volume set $V0 cluster.entry-self-heal off ++TEST $CLI volume set $V0 cluster.heal-timeout 5 ++TEST $CLI volume set $V0 self-heal-daemon off ++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; ++ ++# Create files ++for i in {1..5} ++do ++ echo $i > $M0/file$i.txt ++done ++ ++# Metadata changes ++TEST setfattr -n user.test -v qwerty $M0/file5.txt ++ ++# Replace brick1 ++TEST $CLI volume replace-brick $V0 $H0:$B0/${V0}1 $H0:$B0/${V0}1_new commit force ++ ++# Replaced-brick should accuse the non-replaced-brick (Simulating case for data-loss) ++TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}1_new/ ++ ++# Check if pending xattr and dirty-xattr are set for replaced-brick ++EXPECT "000000000000000100000001" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0 ++EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.dirty $B0/${V0}1_new ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 ++ ++TEST $CLI volume set $V0 self-heal-daemon on ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 ++TEST $CLI volume heal $V0 ++ ++# Wait for heal to complete ++EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0 ++ ++# Check if entry-heal has happened ++TEST diff <(ls $B0/${V0}0 | sort) <(ls $B0/${V0}1_new | sort) ++ ++# To make sure that files were not lost from brick0 ++TEST diff <(ls $B0/${V0}0 | sort) <(ls $B0/${V0}1 | sort) ++EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0 ++ ++# Test if data was healed ++TEST diff $B0/${V0}0/file1.txt $B0/${V0}1_new/file1.txt ++# To make sure that data was not lost from brick0 ++TEST diff $B0/${V0}0/file1.txt $B0/${V0}1/file1.txt ++ ++# Test if metadata was healed and exists on both the bricks ++EXPECT "qwerty" get_text_xattr user.test $B0/${V0}1_new/file5.txt ++EXPECT "qwerty" get_text_xattr user.test $B0/${V0}0/file5.txt ++ ++cleanup; +diff --git a/tests/basic/afr/replace-brick-self-heal.t b/tests/basic/afr/replace-brick-self-heal.t +index 0360db7..da31c87 100644 +--- a/tests/basic/afr/replace-brick-self-heal.t ++++ b/tests/basic/afr/replace-brick-self-heal.t +@@ -30,7 +30,7 @@ TEST $CLI volume replace-brick $V0 $H0:$B0/${V0}1 $H0:$B0/${V0}1_new commit forc + TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}1_new/ + + # Check if pending xattr and dirty-xattr are set for replaced-brick +-EXPECT "000000000000000100000001" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0 ++EXPECT "000000010000000100000001" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0 + EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.dirty $B0/${V0}1_new + + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +diff --git a/tests/bugs/replicate/bug-1130892.t b/tests/bugs/replicate/bug-1130892.t +index 0f57d66..e23eb26 100644 +--- a/tests/bugs/replicate/bug-1130892.t ++++ b/tests/bugs/replicate/bug-1130892.t +@@ -56,7 +56,7 @@ EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 + TEST stat $M0/one + + # Check pending xattrs +-EXPECT "00000000" afr_get_specific_changelog_xattr $B0/${V0}-0/one trusted.afr.$V0-client-1 data ++EXPECT "00000001" afr_get_specific_changelog_xattr $B0/${V0}-0/one trusted.afr.$V0-client-1 data + EXPECT_NOT "00000000" afr_get_specific_changelog_xattr $B0/${V0}-0/one trusted.afr.$V0-client-1 entry + EXPECT "00000000" afr_get_specific_changelog_xattr $B0/${V0}-0/one trusted.afr.$V0-client-1 metadata + +diff --git a/tests/bugs/replicate/bug-1493415-gfid-heal.t b/tests/bugs/replicate/bug-1493415-gfid-heal.t +index 125c35a..9714d5e 100644 +--- a/tests/bugs/replicate/bug-1493415-gfid-heal.t ++++ b/tests/bugs/replicate/bug-1493415-gfid-heal.t +@@ -49,7 +49,7 @@ TEST setfattr -x trusted.gfid $B0/${V0}1/dir/f2 + TEST rm $B0/${V0}1/.glusterfs/${gfid_str_f2:0:2}/${gfid_str_f2:2:2}/$gfid_str_f2 + + #Now simulate setting of pending entry xattr on parent dir of 1st brick. +-TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}0/dir ++TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000000000001 $B0/${V0}0/dir + create_brick_xattrop_entry $B0/${V0}0 dir + + #Trigger entry-heal via shd +diff --git a/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t b/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t +index 0aeaaaf..1fdf7ea 100644 +--- a/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t ++++ b/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t +@@ -23,19 +23,21 @@ TEST mkdir $M0/dir + ########################################################################################## + # GFID link file and the GFID is missing on one brick and all the bricks are being blamed. + +-TEST touch $M0/dir/file +-#TEST kill_brick $V0 $H0 $B0/$V0"1" ++TEST `echo append>> $M0/dir/file` + + #B0 and B2 must blame B1 +-setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir +-setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/$V0"0"/dir +-setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir ++# Set data part of the xattr also to 1 so that local->need_full_crawl is true. ++# Another way is to create the needed entries inside indices/entry-changes ++# folder. ++setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000000000001 $B0/$V0"2"/dir ++setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000000000001 $B0/$V0"0"/dir ++setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000000000001 $B0/$V0"0"/dir + + # Add entry to xattrop dir to trigger index heal. + xattrop_dir0=$(afr_get_index_path $B0/$V0"0") + base_entry_b0=`ls $xattrop_dir0` + gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/)) +-ln -s $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str ++ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str + EXPECT "^1$" get_pending_heal_count $V0 + + # Remove the gfid xattr and the link file on one brick. +@@ -70,18 +72,20 @@ rm -f $M0/dir/file + + TEST $CLI volume heal $V0 disable + TEST touch $M0/dir/file +-#TEST kill_brick $V0 $H0 $B0/$V0"1" + + #B0 and B2 must blame B1 +-setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir +-setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/$V0"0"/dir +-setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir ++# Set data part of the xattr also to 1 so that local->need_full_crawl is true. ++# Another way is to create the needed entries inside indices/entry-changes ++# folder. ++setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000000000001 $B0/$V0"2"/dir ++setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000000000001 $B0/$V0"0"/dir ++setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000000000001 $B0/$V0"0"/dir + + # Add entry to xattrop dir to trigger index heal. + xattrop_dir0=$(afr_get_index_path $B0/$V0"0") + base_entry_b0=`ls $xattrop_dir0` + gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/)) +-ln -s $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str ++ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str + EXPECT "^1$" get_pending_heal_count $V0 + + # Remove the gfid xattr and the link file on two bricks. +diff --git a/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t b/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t +index 9627908..3da873a 100644 +--- a/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t ++++ b/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t +@@ -59,8 +59,11 @@ TEST rm $B0/$V0"2"/.glusterfs/${gfid_str_file4:0:2}/${gfid_str_file4:2:2}/$gfid_ + TEST setfattr -x trusted.gfid $B0/$V0"2"/dir/file4 + + # B0 and B2 blame each other +-setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir +-setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir ++# Set data part of the xattr also to 1 so that local->need_full_crawl is true. ++# Another way is to create the needed entries inside indices/entry-changes ++# folder. ++setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000000000001 $B0/$V0"2"/dir ++setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000000000001 $B0/$V0"0"/dir + + # Add entry to xattrop dir on first brick. + xattrop_dir0=$(afr_get_index_path $B0/$V0"0") +diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c +index 1608f75..36fd3a9 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-common.c ++++ b/xlators/cluster/afr/src/afr-self-heal-common.c +@@ -2549,6 +2549,11 @@ afr_selfheal_do(call_frame_t *frame, xlator_t *this, uuid_t gfid) + } + } + ++ gf_msg_debug( ++ this->name, 0, ++ "heals needed for %s: [entry-heal=%d, metadata-heal=%d, data-heal=%d]", ++ uuid_utoa(gfid), entry_selfheal, metadata_selfheal, data_selfheal); ++ + if (data_selfheal && priv->data_self_heal) + data_ret = afr_selfheal_data(frame, this, fd); + +diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c +index cdff4a5..b97c66b 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-data.c ++++ b/xlators/cluster/afr/src/afr-self-heal-data.c +@@ -239,6 +239,9 @@ afr_selfheal_data_block(call_frame_t *frame, xlator_t *this, fd_t *fd, + sink_count = AFR_COUNT(healed_sinks, priv->child_count); + data_lock = alloca0(priv->child_count); + ++ gf_msg_debug(this->name, 0, "gfid:%s, offset=%jd, size=%zu", ++ uuid_utoa(fd->inode->gfid), offset, size); ++ + ret = afr_selfheal_inodelk(frame, this, fd->inode, this->name, offset, size, + data_lock); + { +diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c +index 40be898..00b5b2d 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-entry.c ++++ b/xlators/cluster/afr/src/afr-self-heal-entry.c +@@ -206,8 +206,11 @@ __afr_selfheal_heal_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd, + replies); + } else { + if (!gf_uuid_compare(replies[i].poststat.ia_gfid, +- replies[source].poststat.ia_gfid)) ++ replies[source].poststat.ia_gfid)) { ++ gf_msg_debug(this->name, 0, "skipping %s, no heal needed.", ++ name); + continue; ++ } + + ret = afr_selfheal_recreate_entry(frame, i, source, sources, + fd->inode, name, inode, replies); +@@ -839,7 +842,7 @@ afr_selfheal_entry_granular_dirent(xlator_t *subvol, gf_dirent_t *entry, + + out: + loc_wipe(&loc); +- return 0; ++ return ret; + } + + static int +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index a72c494..bd17a82 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -13181,6 +13181,19 @@ glusterd_enable_default_options(glusterd_volinfo_t *volinfo, char *option) + goto out; + } + } ++ ++ if ((conf->op_version >= GD_OP_VERSION_7_1) && ++ (volinfo->status == GLUSTERD_STATUS_NONE)) { ++ ret = dict_set_dynstr_with_alloc(volinfo->dict, ++ "cluster.granular-entry-heal", "on"); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, ++ "Failed to set option 'cluster.granular-entry-heal' " ++ "on volume %s", ++ volinfo->volname); ++ goto out; ++ } ++ } + out: + return ret; + } +-- +1.8.3.1 + diff --git a/SOURCES/0489-glusterd-fix-bug-in-enabling-granular-entry-heal.patch b/SOURCES/0489-glusterd-fix-bug-in-enabling-granular-entry-heal.patch new file mode 100644 index 0000000..dde2156 --- /dev/null +++ b/SOURCES/0489-glusterd-fix-bug-in-enabling-granular-entry-heal.patch @@ -0,0 +1,141 @@ +From 2d172144810956225eac3599c943416c4a7e25d0 Mon Sep 17 00:00:00 2001 +From: Ravishankar N +Date: Tue, 8 Dec 2020 20:30:23 +0530 +Subject: [PATCH 489/511] glusterd: fix bug in enabling granular-entry-heal + +Upstream patch details: +/------------------------------------------------------------------------------/ +commit f5e1eb87d4af44be3b317b7f99ab88f89c2f0b1a meant to enable the +volume option only for replica volumes but inadvertently enabled +it for all volume types. Fixing it now. + +Also found a bug in glusterd where disabling the option on plain +distribute was succeeding even though setting it in the fist place +fails. Fixed that too. + +>Fixes: #1483 +>Change-Id: Icb6c169a8eec44cc4fb4dd636405d3b3485e91b4 +>Reported-by: Sheetal Pamecha +>Signed-off-by: Ravishankar N +Upstream Patch: https://github.com/gluster/glusterfs/pull/1752 +/------------------------------------------------------------------------------/ + +BUG: 1890506 +Change-Id: Id63655dac08d2cfda4899d7ee0efe96e72cd6986 +Signed-off-by: Ravishankar N +Reviewed-on: https://code.engineering.redhat.com/gerrit/220556 +Tested-by: Sunil Kumar Heggodu Gopala Acharya +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + tests/basic/afr/granular-esh/cli.t | 30 ++++++++++++++++++++----- + xlators/mgmt/glusterd/src/glusterd-utils.c | 3 ++- + xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 12 +++++----- + 3 files changed, 34 insertions(+), 11 deletions(-) + +diff --git a/tests/basic/afr/granular-esh/cli.t b/tests/basic/afr/granular-esh/cli.t +index 995d93e..5ab2e39 100644 +--- a/tests/basic/afr/granular-esh/cli.t ++++ b/tests/basic/afr/granular-esh/cli.t +@@ -11,25 +11,38 @@ TESTS_EXPECTED_IN_LOOP=4 + TEST glusterd + TEST pidof glusterd + +-TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} +-# Test that enabling the option should work on a newly created volume +-TEST $CLI volume set $V0 cluster.granular-entry-heal on +-TEST $CLI volume set $V0 cluster.granular-entry-heal off +- + ######################### + ##### DISPERSE TEST ##### + ######################### + # Execute the same command on a disperse volume and make sure it fails. + TEST $CLI volume create $V1 disperse 3 redundancy 1 $H0:$B0/${V1}{0,1,2} ++EXPECT "no" volume_get_field $V1 cluster.granular-entry-heal ++TEST $CLI volume start $V1 ++TEST ! $CLI volume heal $V1 granular-entry-heal enable ++TEST ! $CLI volume heal $V1 granular-entry-heal disable ++ ++TEST $CLI volume stop $V1 ++TEST $CLI volume delete $V1 ++ ++######################### ++##### PLAIN DISTRIBUTE TEST ##### ++######################### ++# Execute the same command on a distribute volume and make sure it fails. ++TEST $CLI volume create $V1 $H0:$B0/${V1}{0,1,2} ++EXPECT "no" volume_get_field $V1 cluster.granular-entry-heal + TEST $CLI volume start $V1 + TEST ! $CLI volume heal $V1 granular-entry-heal enable + TEST ! $CLI volume heal $V1 granular-entry-heal disable ++TEST $CLI volume stop $V1 ++TEST $CLI volume delete $V1 + + ####################### + ###### TIER TEST ###### + ####################### + # Execute the same command on a disperse + replicate tiered volume and make + # sure the option is set on the replicate leg of the volume ++TEST $CLI volume create $V1 disperse 3 redundancy 1 $H0:$B0/${V1}{0,1,2} ++TEST $CLI volume start $V1 + TEST $CLI volume tier $V1 attach replica 2 $H0:$B0/${V1}{3,4} + TEST $CLI volume heal $V1 granular-entry-heal enable + EXPECT "enable" volume_get_field $V1 cluster.granular-entry-heal +@@ -52,10 +65,17 @@ TEST kill_brick $V1 $H0 $B0/${V1}3 + # failed. + TEST ! $CLI volume heal $V1 granular-entry-heal enable + EXPECT "disable" volume_get_field $V1 cluster.granular-entry-heal ++TEST $CLI volume stop $V1 ++TEST $CLI volume delete $V1 + + ###################### + ### REPLICATE TEST ### + ###################### ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} ++EXPECT "on" volume_get_field $V0 cluster.granular-entry-heal ++# Test that enabling the option should work on a newly created volume ++TEST $CLI volume set $V0 cluster.granular-entry-heal on ++TEST $CLI volume set $V0 cluster.granular-entry-heal off + TEST $CLI volume start $V0 + TEST $CLI volume set $V0 cluster.data-self-heal off + TEST $CLI volume set $V0 cluster.metadata-self-heal off +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index bd17a82..ad3750e 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -13183,7 +13183,8 @@ glusterd_enable_default_options(glusterd_volinfo_t *volinfo, char *option) + } + + if ((conf->op_version >= GD_OP_VERSION_7_1) && +- (volinfo->status == GLUSTERD_STATUS_NONE)) { ++ (volinfo->status == GLUSTERD_STATUS_NONE) && ++ (volinfo->type == GF_CLUSTER_TYPE_REPLICATE)) { + ret = dict_set_dynstr_with_alloc(volinfo->dict, + "cluster.granular-entry-heal", "on"); + if (ret) { +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +index 134b04c..09e6ead 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +@@ -621,11 +621,13 @@ glusterd_handle_heal_options_enable_disable(rpcsvc_request_t *req, dict_t *dict, + goto out; + } + +- if (((heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE) || +- (heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_DISABLE)) && +- (volinfo->type == GF_CLUSTER_TYPE_DISPERSE)) { +- ret = -1; +- goto out; ++ if ((heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE) || ++ (heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_DISABLE)) { ++ if ((volinfo->type != GF_CLUSTER_TYPE_REPLICATE) && ++ (volinfo->type != GF_CLUSTER_TYPE_TIER)) { ++ ret = -1; ++ goto out; ++ } + } + + if ((heal_op == GF_SHD_OP_HEAL_ENABLE) || +-- +1.8.3.1 + diff --git a/SOURCES/0490-Segmentation-fault-occurs-during-truncate.patch b/SOURCES/0490-Segmentation-fault-occurs-during-truncate.patch new file mode 100644 index 0000000..bd3c777 --- /dev/null +++ b/SOURCES/0490-Segmentation-fault-occurs-during-truncate.patch @@ -0,0 +1,57 @@ +From 5a110946b41619577b365cdceddc4da551ff49f0 Mon Sep 17 00:00:00 2001 +From: kinsu +Date: Thu, 19 Sep 2019 08:34:32 +0000 +Subject: [PATCH 490/511] Segmentation fault occurs during truncate + +Problem: +Segmentation fault occurs when bricks are nearly full 100% and in +parallel truncate of a file is attempted (No space left on device). +Prerequicite is that performance xlators are activated +(read-ahead, write-behind etc) +while stack unwind of the frames following an error responce +from brick (No space left on device) frame->local includes a memory +location that is not allocated via mem_get but via calloc. +The destroyed frame is always ra_truncate_cbk winded from ra_ftruncate +and the inode ptr is copied to the frame local in the wb_ftruncate. + +Fix: +extra check is added for the pool ptr + +>Change-Id: Ic5d3bd0ab7011e40b2811c6dece063b256e4d9d1 +>Fixes: bz#1797882 +>Signed-off-by: kinsu + +Upstream-patch: https://review.gluster.org/c/glusterfs/+/23445 + +BUG: 1842449 +Change-Id: Ic5d3bd0ab7011e40b2811c6dece063b256e4d9d1 +Signed-off-by: nik-redhat +Reviewed-on: https://code.engineering.redhat.com/gerrit/220540 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/mem-pool.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c +index 73503e0..1390747 100644 +--- a/libglusterfs/src/mem-pool.c ++++ b/libglusterfs/src/mem-pool.c +@@ -857,6 +857,14 @@ mem_put(void *ptr) + /* Not one of ours; don't touch it. */ + return; + } ++ ++ if (!hdr->pool_list) { ++ gf_msg_callingfn("mem-pool", GF_LOG_CRITICAL, EINVAL, ++ LG_MSG_INVALID_ARG, ++ "invalid argument hdr->pool_list NULL"); ++ return; ++ } ++ + pool_list = hdr->pool_list; + pt_pool = &pool_list->pools[hdr->power_of_two - POOL_SMALLEST]; + +-- +1.8.3.1 + diff --git a/SOURCES/0491-glusterd-mount-directory-getting-truncated-on-mounti.patch b/SOURCES/0491-glusterd-mount-directory-getting-truncated-on-mounti.patch new file mode 100644 index 0000000..375cfd2 --- /dev/null +++ b/SOURCES/0491-glusterd-mount-directory-getting-truncated-on-mounti.patch @@ -0,0 +1,56 @@ +From 0fed8ca9c6c9e3a9041951bc748c7936d0abc8cf Mon Sep 17 00:00:00 2001 +From: nik-redhat +Date: Tue, 15 Sep 2020 16:20:19 +0530 +Subject: [PATCH 491/511] glusterd: mount directory getting truncated on + mounting shared_storage + +Issue: +In case of a user created volume the mount point +is the brick path 'ex: /data/brick' but in case of +shared_storage the mount point is '/'.So, here +we increment the array by one so as to get the exact +path of brick without '/', which works fine for other +volumes as the pointer of the brick_dir variable is +at '/', but for shared_storage it is at 'v'(where v is +starting letter of 'var' directory). So, on incrementing +the path we get in case of shared_storage starts from +'ar/lib/glusterd/...' + +Fix: +Only, increment the pointer if the current position is '/', +else the path will be wrong. + +>Fixes: #1480 + +>Change-Id: Id31bb13f58134ae2099884fbc5984c4e055fb357 +>Signed-off-by: nik-redhat + +Upstream patch: https://review.gluster.org/c/glusterfs/+/24989 + +BUG: 1878077 +Change-Id: Id31bb13f58134ae2099884fbc5984c4e055fb357 +Signed-off-by: nik-redhat +Reviewed-on: https://code.engineering.redhat.com/gerrit/220536 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mgmt/glusterd/src/glusterd-utils.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index ad3750e..b343eee 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -1221,7 +1221,8 @@ glusterd_get_brick_mount_dir(char *brickpath, char *hostname, char *mount_dir) + } + + brick_dir = &brickpath[strlen(mnt_pt)]; +- brick_dir++; ++ if (brick_dir[0] == '/') ++ brick_dir++; + + snprintf(mount_dir, VALID_GLUSTERD_PATHMAX, "/%s", brick_dir); + } +-- +1.8.3.1 + diff --git a/SOURCES/0492-afr-lookup-Pass-xattr_req-in-while-doing-a-selfheal-.patch b/SOURCES/0492-afr-lookup-Pass-xattr_req-in-while-doing-a-selfheal-.patch new file mode 100644 index 0000000..a983baa --- /dev/null +++ b/SOURCES/0492-afr-lookup-Pass-xattr_req-in-while-doing-a-selfheal-.patch @@ -0,0 +1,188 @@ +From bde1ad97f8739f8370a2bbb92229b1b397ecd82c Mon Sep 17 00:00:00 2001 +From: karthik-us +Date: Tue, 8 Dec 2020 19:06:03 +0530 +Subject: [PATCH 492/511] afr/lookup: Pass xattr_req in while doing a selfheal + in lookup + +We were not passing xattr_req when doing a name self heal +as well as a meta data heal. Because of this, some xdata +was missing which causes i/o errors + +Upstream patch details: +> Change-Id: Ibfb1205a7eb0195632dc3820116ffbbb8043545f +> Fixes: bz#1728770 +> Signed-off-by: Mohammed Rafi KC +Upstream Patch : https://review.gluster.org/#/c/glusterfs/+/23024/ + +BUG: 1726673 +Change-Id: Ibfb1205a7eb0195632dc3820116ffbbb8043545f +Signed-off-by: karthik-us +Reviewed-on: https://code.engineering.redhat.com/gerrit/220538 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + tests/bugs/replicate/bug-1728770-pass-xattrs.t | 52 ++++++++++++++++++++++++++ + tests/include.rc | 1 + + xlators/cluster/afr/src/afr-common.c | 8 +++- + xlators/cluster/afr/src/afr-self-heal-common.c | 9 ++++- + xlators/cluster/afr/src/afr-self-heal.h | 2 +- + 5 files changed, 67 insertions(+), 5 deletions(-) + create mode 100644 tests/bugs/replicate/bug-1728770-pass-xattrs.t + +diff --git a/tests/bugs/replicate/bug-1728770-pass-xattrs.t b/tests/bugs/replicate/bug-1728770-pass-xattrs.t +new file mode 100644 +index 0000000..159c4fc +--- /dev/null ++++ b/tests/bugs/replicate/bug-1728770-pass-xattrs.t +@@ -0,0 +1,52 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../snapshot.rc ++ ++cleanup; ++ ++function fop_on_bad_disk { ++ local path=$1 ++ mkdir $path/dir{1..1000} 2>/dev/null ++ mv $path/dir1 $path/newdir ++ touch $path/foo.txt ++ echo $? ++} ++ ++function ls_fop_on_bad_disk { ++ local path=$1 ++ ls $path ++ echo $? ++} ++ ++TEST init_n_bricks 6; ++TEST setup_lvm 6; ++ ++TEST glusterd; ++TEST pidof glusterd; ++ ++TEST $CLI volume create $V0 replica 3 $H0:$L1 $H0:$L2 $H0:$L3 $H0:$L4 $H0:$L5 $H0:$L6; ++TEST $CLI volume set $V0 health-check-interval 1000; ++ ++TEST $CLI volume start $V0; ++ ++TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0; ++#corrupt last disk ++dd if=/dev/urandom of=/dev/mapper/patchy_snap_vg_6-brick_lvm bs=512K count=200 status=progress && sync ++ ++ ++# Test the disk is now returning EIO for touch and ls ++EXPECT_WITHIN $DISK_FAIL_TIMEOUT "^1$" fop_on_bad_disk "$L6" ++EXPECT_WITHIN $DISK_FAIL_TIMEOUT "^2$" ls_fop_on_bad_disk "$L6" ++ ++TEST touch $M0/foo{1..100} ++TEST $CLI volume remove-brick $V0 replica 3 $H0:$L4 $H0:$L5 $H0:$L6 start ++EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" remove_brick_status_completed_field "$V0" "$H0:$L4 $H0:$L5 $H0:$L6"; ++ ++#check that remove-brick status should not have any failed or skipped files ++var=`$CLI volume remove-brick $V0 $H0:$L4 $H0:$L5 $H0:$L6 status | grep completed` ++TEST [ `echo $var | awk '{print $5}'` = "0" ] ++TEST [ `echo $var | awk '{print $6}'` = "0" ] ++ ++cleanup; +diff --git a/tests/include.rc b/tests/include.rc +index 762c5e2..c925941 100644 +--- a/tests/include.rc ++++ b/tests/include.rc +@@ -89,6 +89,7 @@ GRAPH_SWITCH_TIMEOUT=10 + UNLINK_TIMEOUT=5 + MDC_TIMEOUT=5 + IO_WAIT_TIMEOUT=5 ++DISK_FAIL_TIMEOUT=80 + + LOGDIR=$(gluster --print-logdir) + +diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c +index 851ccad..fca2cd5 100644 +--- a/xlators/cluster/afr/src/afr-common.c ++++ b/xlators/cluster/afr/src/afr-common.c +@@ -2609,6 +2609,10 @@ afr_lookup_sh_metadata_wrap(void *opaque) + dict = dict_new(); + if (!dict) + goto out; ++ if (local->xattr_req) { ++ dict_copy(local->xattr_req, dict); ++ } ++ + ret = dict_set_sizen_str_sizen(dict, "link-count", GF_XATTROP_INDEX_COUNT); + if (ret) { + gf_msg_debug(this->name, -ret, "Unable to set link-count in dict "); +@@ -2617,7 +2621,7 @@ afr_lookup_sh_metadata_wrap(void *opaque) + if (loc_is_nameless(&local->loc)) { + ret = afr_selfheal_unlocked_discover_on(frame, local->inode, + local->loc.gfid, local->replies, +- local->child_up); ++ local->child_up, dict); + } else { + inode = afr_selfheal_unlocked_lookup_on(frame, local->loc.parent, + local->loc.name, local->replies, +@@ -2791,7 +2795,7 @@ afr_lookup_selfheal_wrap(void *opaque) + + inode = afr_selfheal_unlocked_lookup_on(frame, local->loc.parent, + local->loc.name, local->replies, +- local->child_up, NULL); ++ local->child_up, local->xattr_req); + if (inode) + inode_unref(inode); + +diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c +index 36fd3a9..9b6575f 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-common.c ++++ b/xlators/cluster/afr/src/afr-self-heal-common.c +@@ -1861,7 +1861,7 @@ afr_set_multi_dom_lock_count_request(xlator_t *this, dict_t *dict) + int + afr_selfheal_unlocked_discover_on(call_frame_t *frame, inode_t *inode, + uuid_t gfid, struct afr_reply *replies, +- unsigned char *discover_on) ++ unsigned char *discover_on, dict_t *dict) + { + loc_t loc = { + 0, +@@ -1876,6 +1876,8 @@ afr_selfheal_unlocked_discover_on(call_frame_t *frame, inode_t *inode, + xattr_req = dict_new(); + if (!xattr_req) + return -ENOMEM; ++ if (dict) ++ dict_copy(dict, xattr_req); + + if (afr_xattr_req_prepare(frame->this, xattr_req) != 0) { + dict_unref(xattr_req); +@@ -1906,11 +1908,14 @@ afr_selfheal_unlocked_discover(call_frame_t *frame, inode_t *inode, uuid_t gfid, + struct afr_reply *replies) + { + afr_local_t *local = NULL; ++ dict_t *dict = NULL; + + local = frame->local; ++ if (local && local->xattr_req) ++ dict = local->xattr_req; + + return afr_selfheal_unlocked_discover_on(frame, inode, gfid, replies, +- local->child_up); ++ local->child_up, dict); + } + + unsigned int +diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h +index b39af02..8f6fb00 100644 +--- a/xlators/cluster/afr/src/afr-self-heal.h ++++ b/xlators/cluster/afr/src/afr-self-heal.h +@@ -188,7 +188,7 @@ afr_selfheal_unlocked_discover(call_frame_t *frame, inode_t *inode, uuid_t gfid, + int + afr_selfheal_unlocked_discover_on(call_frame_t *frame, inode_t *inode, + uuid_t gfid, struct afr_reply *replies, +- unsigned char *discover_on); ++ unsigned char *discover_on, dict_t *dict); + inode_t * + afr_selfheal_unlocked_lookup_on(call_frame_t *frame, inode_t *parent, + const char *name, struct afr_reply *replies, +-- +1.8.3.1 + diff --git a/SOURCES/0493-geo-rep-Note-section-is-required-for-ignore_deletes.patch b/SOURCES/0493-geo-rep-Note-section-is-required-for-ignore_deletes.patch new file mode 100644 index 0000000..e712886 --- /dev/null +++ b/SOURCES/0493-geo-rep-Note-section-is-required-for-ignore_deletes.patch @@ -0,0 +1,283 @@ +From 03de45e5fb1c8aa5369848ed9e52abd1365e1d21 Mon Sep 17 00:00:00 2001 +From: Shwetha K Acharya +Date: Wed, 31 Jul 2019 11:34:19 +0530 +Subject: [PATCH 493/511] geo-rep: Note section is required for ignore_deletes + +There exists a window of 15 sec, where the deletes are picked up +by history crawl when the ignore_deletes is set to true. +And it eventually deletes the file/s from slave which is/are not +supposed to be deleted. Though it is working as per design, a +note regarding this is needed. + +Added a warning message indicating the same. +Also logged info when the worker restarts after ignore-deletes +option set. + +>fixes: bz#1708603 +>Change-Id: I103be882fac18b4cef935efa355f5037a396f7c1 +>Signed-off-by: Shwetha K Acharya +Upstream patch: https://review.gluster.org/c/glusterfs/+/22702 + +BUG: 1224906 +Change-Id: I103be882fac18b4cef935efa355f5037a396f7c1 +Signed-off-by: srijan-sivakumar +Reviewed-on: https://code.engineering.redhat.com/gerrit/220757 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + cli/src/cli-cmd-parser.c | 45 ++++++++++++++++++++------ + cli/src/cli-cmd-volume.c | 20 ++++++++---- + cli/src/cli.h | 3 +- + geo-replication/syncdaemon/gsyncd.py | 2 +- + geo-replication/syncdaemon/master.py | 6 ++++ + tests/00-geo-rep/bug-1708603.t | 63 ++++++++++++++++++++++++++++++++++++ + 6 files changed, 120 insertions(+), 19 deletions(-) + create mode 100644 tests/00-geo-rep/bug-1708603.t + +diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c +index 5fd05f4..34f17c9 100644 +--- a/cli/src/cli-cmd-parser.c ++++ b/cli/src/cli-cmd-parser.c +@@ -2901,7 +2901,8 @@ out: + } + + int32_t +-cli_cmd_gsync_set_parse(const char **words, int wordcount, dict_t **options) ++cli_cmd_gsync_set_parse(struct cli_state *state, const char **words, ++ int wordcount, dict_t **options, char **errstr) + { + int32_t ret = -1; + dict_t *dict = NULL; +@@ -2918,6 +2919,8 @@ cli_cmd_gsync_set_parse(const char **words, int wordcount, dict_t **options) + char *save_ptr = NULL; + char *slave_temp = NULL; + char *token = NULL; ++ gf_answer_t answer = GF_ANSWER_NO; ++ const char *question = NULL; + + GF_ASSERT(words); + GF_ASSERT(options); +@@ -2990,8 +2993,10 @@ cli_cmd_gsync_set_parse(const char **words, int wordcount, dict_t **options) + + if (masteri && gsyncd_url_check(words[masteri])) + goto out; +- if (slavei && !glob && !gsyncd_url_check(words[slavei])) ++ if (slavei && !glob && !gsyncd_url_check(words[slavei])) { ++ gf_asprintf(errstr, "Invalid slave url: %s", words[slavei]); + goto out; ++ } + + w = str_getunamb(words[cmdi], opwords); + if (!w) +@@ -3101,16 +3106,36 @@ cli_cmd_gsync_set_parse(const char **words, int wordcount, dict_t **options) + } + if (!ret) + ret = dict_set_int32(dict, "type", type); +- if (!ret && type == GF_GSYNC_OPTION_TYPE_CONFIG) ++ if (!ret && type == GF_GSYNC_OPTION_TYPE_CONFIG) { ++ if (!strcmp((char *)words[wordcount - 2], "ignore-deletes") && ++ !strcmp((char *)words[wordcount - 1], "true")) { ++ question = ++ "There exists ~15 seconds delay for the option to take" ++ " effect from stime of the corresponding brick. Please" ++ " check the log for the time, the option is effective." ++ " Proceed"; ++ ++ answer = cli_cmd_get_confirmation(state, question); ++ ++ if (GF_ANSWER_NO == answer) { ++ gf_log("cli", GF_LOG_INFO, ++ "Operation " ++ "cancelled, exiting"); ++ *errstr = gf_strdup("Aborted by user."); ++ ret = -1; ++ goto out; ++ } ++ } ++ + ret = config_parse(words, wordcount, dict, cmdi, glob); ++ } + + out: + if (slave_temp) + GF_FREE(slave_temp); +- if (ret) { +- if (dict) +- dict_unref(dict); +- } else ++ if (ret && dict) ++ dict_unref(dict); ++ else + *options = dict; + + return ret; +@@ -5659,9 +5684,9 @@ cli_cmd_bitrot_parse(const char **words, int wordcount, dict_t **options) + int32_t ret = -1; + char *w = NULL; + char *volname = NULL; +- char *opwords[] = { +- "enable", "disable", "scrub-throttle", "scrub-frequency", "scrub", +- "signing-time", "signer-threads", NULL}; ++ char *opwords[] = {"enable", "disable", "scrub-throttle", ++ "scrub-frequency", "scrub", "signing-time", ++ "signer-threads", NULL}; + char *scrub_throt_values[] = {"lazy", "normal", "aggressive", NULL}; + char *scrub_freq_values[] = {"hourly", "daily", "weekly", "biweekly", + "monthly", "minute", NULL}; +diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c +index 72504ca..6f5bf8b 100644 +--- a/cli/src/cli-cmd-volume.c ++++ b/cli/src/cli-cmd-volume.c +@@ -2457,6 +2457,7 @@ cli_cmd_volume_gsync_set_cbk(struct cli_state *state, struct cli_cmd_word *word, + rpc_clnt_procedure_t *proc = NULL; + call_frame_t *frame = NULL; + cli_local_t *local = NULL; ++ char *errstr = NULL; + #if (USE_EVENTS) + int ret1 = -1; + int cmd_type = -1; +@@ -2468,16 +2469,21 @@ cli_cmd_volume_gsync_set_cbk(struct cli_state *state, struct cli_cmd_word *word, + + proc = &cli_rpc_prog->proctable[GLUSTER_CLI_GSYNC_SET]; + +- frame = create_frame(THIS, THIS->ctx->pool); +- if (frame == NULL) { +- ret = -1; ++ ret = cli_cmd_gsync_set_parse(state, words, wordcount, &options, &errstr); ++ if (ret) { ++ if (errstr) { ++ cli_err("%s", errstr); ++ GF_FREE(errstr); ++ } else { ++ cli_usage_out(word->pattern); ++ } ++ parse_err = 1; + goto out; + } + +- ret = cli_cmd_gsync_set_parse(words, wordcount, &options); +- if (ret) { +- cli_usage_out(word->pattern); +- parse_err = 1; ++ frame = create_frame(THIS, THIS->ctx->pool); ++ if (frame == NULL) { ++ ret = -1; + goto out; + } + +diff --git a/cli/src/cli.h b/cli/src/cli.h +index c30ae9c..7b4f446 100644 +--- a/cli/src/cli.h ++++ b/cli/src/cli.h +@@ -269,7 +269,8 @@ int32_t + cli_cmd_volume_reset_parse(const char **words, int wordcount, dict_t **opt); + + int32_t +-cli_cmd_gsync_set_parse(const char **words, int wordcount, dict_t **opt); ++cli_cmd_gsync_set_parse(struct cli_state *state, const char **words, ++ int wordcount, dict_t **opt, char **errstr); + + int32_t + cli_cmd_quota_parse(const char **words, int wordcount, dict_t **opt); +diff --git a/geo-replication/syncdaemon/gsyncd.py b/geo-replication/syncdaemon/gsyncd.py +index 8940384..215c62d 100644 +--- a/geo-replication/syncdaemon/gsyncd.py ++++ b/geo-replication/syncdaemon/gsyncd.py +@@ -315,7 +315,7 @@ def main(): + + # Log message for loaded config file + if config_file is not None: +- logging.info(lf("Using session config file", path=config_file)) ++ logging.debug(lf("Using session config file", path=config_file)) + + set_term_handler() + excont = FreeObject(exval=0) +diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py +index 08e98f8..98637e7 100644 +--- a/geo-replication/syncdaemon/master.py ++++ b/geo-replication/syncdaemon/master.py +@@ -1549,6 +1549,12 @@ class GMasterChangeloghistoryMixin(GMasterChangelogMixin): + data_stime = self.get_data_stime() + + end_time = int(time.time()) ++ ++ #as start of historical crawl marks Geo-rep worker restart ++ if gconf.get("ignore-deletes"): ++ logging.info(lf('ignore-deletes config option is set', ++ stime=data_stime)) ++ + logging.info(lf('starting history crawl', + turns=self.history_turns, + stime=data_stime, +diff --git a/tests/00-geo-rep/bug-1708603.t b/tests/00-geo-rep/bug-1708603.t +new file mode 100644 +index 0000000..26913f1 +--- /dev/null ++++ b/tests/00-geo-rep/bug-1708603.t +@@ -0,0 +1,63 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../include.rc ++. $(dirname $0)/../volume.rc ++. $(dirname $0)/../geo-rep.rc ++. $(dirname $0)/../env.rc ++ ++SCRIPT_TIMEOUT=300 ++ ++##Cleanup and start glusterd ++cleanup; ++TEST glusterd; ++TEST pidof glusterd ++ ++ ++##Variables ++GEOREP_CLI="gluster volume geo-replication" ++master=$GMV0 ++SH0="127.0.0.1" ++slave=${SH0}::${GSV0} ++num_active=2 ++num_passive=2 ++master_mnt=$M0 ++slave_mnt=$M1 ++ ++############################################################ ++#SETUP VOLUMES AND GEO-REPLICATION ++############################################################ ++ ++##create_and_start_master_volume ++TEST $CLI volume create $GMV0 replica 2 $H0:$B0/${GMV0}{1,2,3,4}; ++TEST $CLI volume start $GMV0 ++ ++##create_and_start_slave_volume ++TEST $CLI volume create $GSV0 replica 2 $H0:$B0/${GSV0}{1,2,3,4}; ++TEST $CLI volume start $GSV0 ++ ++##Mount master ++TEST glusterfs -s $H0 --volfile-id $GMV0 $M0 ++ ++##Mount slave ++TEST glusterfs -s $H0 --volfile-id $GSV0 $M1 ++ ++#Create geo-rep session ++TEST create_georep_session $master $slave ++ ++echo n | $GEOREP_CLI $master $slave config ignore-deletes true >/dev/null 2>&1 ++EXPECT "false" echo $($GEOREP_CLI $master $slave config ignore-deletes) ++echo y | $GEOREP_CLI $master $slave config ignore-deletes true ++EXPECT "true" echo $($GEOREP_CLI $master $slave config ignore-deletes) ++ ++#Stop Geo-rep ++TEST $GEOREP_CLI $master $slave stop ++ ++#Delete Geo-rep ++TEST $GEOREP_CLI $master $slave delete ++ ++#Cleanup authorized keys ++sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' ~/.ssh/authorized_keys ++sed -i '/^command=.*gsyncd.*/d' ~/.ssh/authorized_keys ++ ++cleanup; ++#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000 +-- +1.8.3.1 + diff --git a/SOURCES/0494-glusterd-start-the-brick-on-a-different-port.patch b/SOURCES/0494-glusterd-start-the-brick-on-a-different-port.patch new file mode 100644 index 0000000..d11b138 --- /dev/null +++ b/SOURCES/0494-glusterd-start-the-brick-on-a-different-port.patch @@ -0,0 +1,54 @@ +From 1b24bc4319203128a9ff7f97fe14f4b3622c4eec Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde +Date: Wed, 26 Aug 2020 20:05:35 +0530 +Subject: [PATCH 494/511] glusterd: start the brick on a different port + +Problem: brick fails to start when the port provided by +glusterd is in use by any other process + +Solution: glusterd should check errno set by runner_run() +and if it is set to EADDRINUSE, it should allocate a new +port to the brick and try to start it again. + +Previously ret value is checked instead of errno, so the +retry part never executed. Now, we initialize errno to 0 +before calling runner framework. and afterwards store the +errno into ret to avoid modification of errno in subsequent +function calls. + +>fixes: #1101 + +>Change-Id: I1aa048a77c5f8b035dece36976d60602d9753b1a +>Signed-off-by: Sanju Rakonde +>Signed-off-by: nik-redhat + +Upstream patch: https://review.gluster.org/c/glusterfs/+/24923/ + +BUG: 1865796 +Change-Id: I1aa048a77c5f8b035dece36976d60602d9753b1a +Signed-off-by: nik-redhat +Reviewed-on: https://code.engineering.redhat.com/gerrit/220541 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mgmt/glusterd/src/glusterd-utils.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index b343eee..f7030fb 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -2289,7 +2289,10 @@ retry: + + if (wait) { + synclock_unlock(&priv->big_lock); ++ errno = 0; + ret = runner_run(&runner); ++ if (errno != 0) ++ ret = errno; + synclock_lock(&priv->big_lock); + + if (ret == EADDRINUSE) { +-- +1.8.3.1 + diff --git a/SOURCES/0495-geo-rep-descriptive-message-when-worker-crashes-due-.patch b/SOURCES/0495-geo-rep-descriptive-message-when-worker-crashes-due-.patch new file mode 100644 index 0000000..6b3f6f5 --- /dev/null +++ b/SOURCES/0495-geo-rep-descriptive-message-when-worker-crashes-due-.patch @@ -0,0 +1,60 @@ +From 17a2a880290d2038c913c23985df620e3c9741b3 Mon Sep 17 00:00:00 2001 +From: Sunny Kumar +Date: Mon, 16 Mar 2020 15:17:23 +0000 +Subject: [PATCH 495/511] geo-rep: descriptive message when worker crashes due + to EIO + +With this patch now you can notice log if it is due to EIO: + +[2020-03-16 16:24:48.293837] E [syncdutils(worker /bricks/brick1/mbr3):348:log_raise_exception] : Getting "Input/Output error" is most likely due to a. Brick is down or b. Split brain issue. +[2020-03-16 16:24:48.293915] E [syncdutils(worker /bricks/brick1/mbr3):352:log_raise_exception] : This is expected as per design to keep the consistency of the file system. Once the above issue is resolved geo-rep would automatically proceed further. + +>Change-Id: Ie33f2440bc96089731ce12afa8dab91d9550a7ca +>Fixes: #1104 +>Signed-off-by: Sunny Kumar +>Upstream Patch : https://review.gluster.org/c/glusterfs/+/24228/ + +BUG: 1412494 +Change-Id: Ie33f2440bc96089731ce12afa8dab91d9550a7ca +Signed-off-by: srijan-sivakumar +Reviewed-on: https://code.engineering.redhat.com/gerrit/220874 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + geo-replication/syncdaemon/syncdutils.py | 13 ++++++++++++- + 1 file changed, 12 insertions(+), 1 deletion(-) + +diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py +index f43e13b..d5a94d4 100644 +--- a/geo-replication/syncdaemon/syncdutils.py ++++ b/geo-replication/syncdaemon/syncdutils.py +@@ -22,7 +22,7 @@ import socket + from subprocess import PIPE + from threading import Lock, Thread as baseThread + from errno import EACCES, EAGAIN, EPIPE, ENOTCONN, ENOMEM, ECONNABORTED +-from errno import EINTR, ENOENT, ESTALE, EBUSY, ENODATA, errorcode ++from errno import EINTR, ENOENT, ESTALE, EBUSY, ENODATA, errorcode, EIO + from signal import signal, SIGTERM + import select as oselect + from os import waitpid as owaitpid +@@ -346,6 +346,17 @@ def log_raise_exception(excont): + ECONNABORTED): + logging.error(lf('Gluster Mount process exited', + error=errorcode[exc.errno])) ++ elif isinstance(exc, OSError) and exc.errno == EIO: ++ logging.error("Getting \"Input/Output error\" " ++ "is most likely due to " ++ "a. Brick is down or " ++ "b. Split brain issue.") ++ logging.error("This is expected as per design to " ++ "keep the consistency of the file system. " ++ "Once the above issue is resolved " ++ "geo-replication would automatically " ++ "proceed further.") ++ logtag = "FAIL" + else: + logtag = "FAIL" + if not logtag and logging.getLogger().isEnabledFor(logging.DEBUG): +-- +1.8.3.1 + diff --git a/SOURCES/0496-posix-Use-MALLOC-instead-of-alloca-to-allocate-memor.patch b/SOURCES/0496-posix-Use-MALLOC-instead-of-alloca-to-allocate-memor.patch new file mode 100644 index 0000000..590aea3 --- /dev/null +++ b/SOURCES/0496-posix-Use-MALLOC-instead-of-alloca-to-allocate-memor.patch @@ -0,0 +1,139 @@ +From 5893e64ca8c147b7acfa12cd9824f254d53ee261 Mon Sep 17 00:00:00 2001 +From: mohit84 +Date: Wed, 4 Nov 2020 09:02:03 +0530 +Subject: [PATCH 496/511] posix: Use MALLOC instead of alloca to allocate + memory for xattrs list (#1730) + +In case of file is having huge xattrs on backend a brick process is +crashed while alloca(size) limit has been crossed 256k because iot_worker +stack size is 256k. + +> Fixes: #1699 +> Signed-off-by: Mohit Agrawal +> Change-Id: I100468234f83329a7d65b43cbe4e10450c1ccecd +> (Cherry pick from commit fd666caa35ac84dd1cba55399761982011b77112) +> (Reviewed on upstream link https://github.com/gluster/glusterfs/pull/1828) + +Change-Id: I100468234f83329a7d65b43cbe4e10450c1ccecd +Bug: 1903468 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/220872 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/storage/posix/src/posix-gfid-path.c | 5 ++++- + xlators/storage/posix/src/posix-helpers.c | 3 ++- + xlators/storage/posix/src/posix-inode-fd-ops.c | 12 +++++++++--- + 3 files changed, 15 insertions(+), 5 deletions(-) + +diff --git a/xlators/storage/posix/src/posix-gfid-path.c b/xlators/storage/posix/src/posix-gfid-path.c +index 64b5c6c..01315ac 100644 +--- a/xlators/storage/posix/src/posix-gfid-path.c ++++ b/xlators/storage/posix/src/posix-gfid-path.c +@@ -195,7 +195,8 @@ posix_get_gfid2path(xlator_t *this, inode_t *inode, const char *real_path, + if (size == 0) + goto done; + } +- list = alloca(size); ++ ++ list = GF_MALLOC(size, gf_posix_mt_char); + if (!list) { + *op_errno = errno; + goto err; +@@ -309,6 +310,7 @@ done: + GF_FREE(paths[j]); + } + ret = 0; ++ GF_FREE(list); + return ret; + err: + if (path) +@@ -317,5 +319,6 @@ err: + if (paths[j]) + GF_FREE(paths[j]); + } ++ GF_FREE(list); + return ret; + } +diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c +index 73a44be..ceac52a 100644 +--- a/xlators/storage/posix/src/posix-helpers.c ++++ b/xlators/storage/posix/src/posix-helpers.c +@@ -349,7 +349,7 @@ _posix_get_marker_all_contributions(posix_xattr_filler_t *filler) + goto out; + } + +- list = alloca(size); ++ list = GF_MALLOC(size, gf_posix_mt_char); + if (!list) { + goto out; + } +@@ -379,6 +379,7 @@ _posix_get_marker_all_contributions(posix_xattr_filler_t *filler) + ret = 0; + + out: ++ GF_FREE(list); + return ret; + } + +diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c +index 21119ea..1d37aed 100644 +--- a/xlators/storage/posix/src/posix-inode-fd-ops.c ++++ b/xlators/storage/posix/src/posix-inode-fd-ops.c +@@ -3305,7 +3305,7 @@ posix_get_ancestry_non_directory(xlator_t *this, inode_t *leaf_inode, + goto out; + } + +- list = alloca(size); ++ list = GF_MALLOC(size, gf_posix_mt_char); + if (!list) { + *op_errno = errno; + goto out; +@@ -3385,6 +3385,7 @@ posix_get_ancestry_non_directory(xlator_t *this, inode_t *leaf_inode, + op_ret = 0; + + out: ++ GF_FREE(list); + return op_ret; + } + +@@ -3810,7 +3811,8 @@ posix_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + if (size == 0) + goto done; + } +- list = alloca(size); ++ ++ list = GF_MALLOC(size, gf_posix_mt_char); + if (!list) { + op_errno = errno; + goto out; +@@ -3937,6 +3939,7 @@ out: + dict_unref(dict); + } + ++ GF_FREE(list); + return 0; + } + +@@ -4136,7 +4139,8 @@ posix_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, + if (size == 0) + goto done; + } +- list = alloca(size + 1); ++ ++ list = GF_MALLOC(size, gf_posix_mt_char); + if (!list) { + op_ret = -1; + op_errno = ENOMEM; +@@ -4240,6 +4244,8 @@ out: + if (dict) + dict_unref(dict); + ++ GF_FREE(list); ++ + return 0; + } + +-- +1.8.3.1 + diff --git a/SOURCES/0497-socket-Use-AES128-cipher-in-SSL-if-AES-is-supported-.patch b/SOURCES/0497-socket-Use-AES128-cipher-in-SSL-if-AES-is-supported-.patch new file mode 100644 index 0000000..9d477ae --- /dev/null +++ b/SOURCES/0497-socket-Use-AES128-cipher-in-SSL-if-AES-is-supported-.patch @@ -0,0 +1,80 @@ +From 85a5cce40dba0393e636c0eb5af9d8f8746f2315 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Thu, 2 Jan 2020 10:23:52 +0530 +Subject: [PATCH 497/511] socket: Use AES128 cipher in SSL if AES is supported + by CPU + +SSL performance is improved after configuring AES128 cipher +so use AES128 cipher as a default cipher on the CPU those +enabled AES bits otherwise ssl use AES256 cipher + +> Change-Id: I91c50fe987cbb22ed76f8012094730c592c63506 +> Fixes: #1050 +> Signed-off-by: Mohit Agrawal +> (Cherry pick from commit 177cc09d24515596eb51739ce0a276c26e3c52f1) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23952/) + +Change-Id: I91c50fe987cbb22ed76f8012094730c592c63506 +Bug: 1612973 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/220870 +Tested-by: Mohit Agrawal +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + rpc/rpc-transport/socket/src/socket.c | 32 ++++++++++++++++++++++++++++++++ + 1 file changed, 32 insertions(+) + +diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c +index 54cd5df..1ee7320 100644 +--- a/rpc/rpc-transport/socket/src/socket.c ++++ b/rpc/rpc-transport/socket/src/socket.c +@@ -4238,6 +4238,34 @@ static void __attribute__((destructor)) fini_openssl_mt(void) + ERR_free_strings(); + } + ++/* The function returns 0 if AES bit is enabled on the CPU */ ++static int ++ssl_check_aes_bit(void) ++{ ++ FILE *fp = fopen("/proc/cpuinfo", "r"); ++ int ret = 1; ++ size_t len = 0; ++ char *line = NULL; ++ char *match = NULL; ++ ++ GF_ASSERT(fp != NULL); ++ ++ while (getline(&line, &len, fp) > 0) { ++ if (!strncmp(line, "flags", 5)) { ++ match = strstr(line, " aes"); ++ if ((match != NULL) && ((match[4] == ' ') || (match[4] == 0))) { ++ ret = 0; ++ break; ++ } ++ } ++ } ++ ++ free(line); ++ fclose(fp); ++ ++ return ret; ++} ++ + static int + ssl_setup_connection_params(rpc_transport_t *this) + { +@@ -4261,6 +4289,10 @@ ssl_setup_connection_params(rpc_transport_t *this) + return 0; + } + ++ if (!ssl_check_aes_bit()) { ++ cipher_list = "AES128:" DEFAULT_CIPHER_LIST; ++ } ++ + priv->ssl_own_cert = DEFAULT_CERT_PATH; + if (dict_get_str(this->options, SSL_OWN_CERT_OPT, &optstr) == 0) { + if (!priv->ssl_enabled) { +-- +1.8.3.1 + diff --git a/SOURCES/0498-geo-rep-Fix-corner-case-in-rename-on-mkdir-during-hy.patch b/SOURCES/0498-geo-rep-Fix-corner-case-in-rename-on-mkdir-during-hy.patch new file mode 100644 index 0000000..078c390 --- /dev/null +++ b/SOURCES/0498-geo-rep-Fix-corner-case-in-rename-on-mkdir-during-hy.patch @@ -0,0 +1,69 @@ +From 11d648660b8bd246756f87b2f40c72fbabf084d1 Mon Sep 17 00:00:00 2001 +From: Sunny Kumar +Date: Tue, 19 May 2020 16:13:01 +0100 +Subject: [PATCH 498/511] geo-rep: Fix corner case in rename on mkdir during + hybrid crawl +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Problem: +The issue is being hit during hybrid mode while handling rename on slave. +In this special case the rename is recorded as mkdir and geo-rep process it +by resolving the path form backend. + +While resolving the backend path during this special handling one corner case is not considered. + + +Traceback (most recent call last): +  File "/usr/libexec/glusterfs/python/syncdaemon/repce.py", line 118, in worker +    res = getattr(self.obj, rmeth)(*in_data[2:]) +  File "/usr/libexec/glusterfs/python/syncdaemon/resource.py", line 588, in entry_ops +    src_entry = get_slv_dir_path(slv_host, slv_volume, gfid) +  File "/usr/libexec/glusterfs/python/syncdaemon/syncdutils.py", line 710, in get_slv_dir_path +    dir_entry = os.path.join(pfx, pargfid, basename) +  File "/usr/lib64/python2.7/posixpath.py", line 75, in join +    if b.startswith('/'): +AttributeError: 'int' object has no attribute 'startswith' + +In pyhthon3: +Traceback (most recent call last): + File "", line 1, in + File "/usr/lib64/python3.8/posixpath.py", line 90, in join + genericpath._check_arg_types('join', a, *p) + File "/usr/lib64/python3.8/genericpath.py", line 152, in _check_arg_types + raise TypeError(f'{funcname}() argument must be str, bytes, or ' +TypeError: join() argument must be str, bytes, or os.PathLike object, not 'int' + + +>Change-Id: I8b926899c60ad8c4ffc886d57028ba70fd21e332 +>Fixes: #1250 +>Signed-off-by: Sunny Kumar +Upstream Patch: https://review.gluster.org/c/glusterfs/+/24468/ + +BUG: 1835229 +Change-Id: I8b926899c60ad8c4ffc886d57028ba70fd21e332 +Signed-off-by: nik-redhat +Reviewed-on: https://code.engineering.redhat.com/gerrit/220867 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + geo-replication/syncdaemon/syncdutils.py | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py +index d5a94d4..26c79d0 100644 +--- a/geo-replication/syncdaemon/syncdutils.py ++++ b/geo-replication/syncdaemon/syncdutils.py +@@ -732,6 +732,8 @@ def get_slv_dir_path(slv_host, slv_volume, gfid): + else: + dirpath = dirpath.strip("/") + pargfid = get_gfid_from_mnt(dirpath) ++ if isinstance(pargfid, int): ++ return None + dir_entry = os.path.join(pfx, pargfid, basename) + return dir_entry + +-- +1.8.3.1 + diff --git a/SOURCES/0499-gfapi-give-appropriate-error-when-size-exceeds.patch b/SOURCES/0499-gfapi-give-appropriate-error-when-size-exceeds.patch new file mode 100644 index 0000000..edeca1a --- /dev/null +++ b/SOURCES/0499-gfapi-give-appropriate-error-when-size-exceeds.patch @@ -0,0 +1,63 @@ +From f78a5d86c55149d80b6efdf60eae7221c238654e Mon Sep 17 00:00:00 2001 +From: Rinku Kothiya +Date: Thu, 24 Sep 2020 12:43:51 +0000 +Subject: [PATCH 499/511] gfapi: give appropriate error when size exceeds + +This patch help generate appropriate error message +when the gfapi tries to write data equal to or +greater than 1 Gb due to the limitation at the +socket layer. + +Upstream: +> Reviewed-on: https://github.com/gluster/glusterfs/pull/1557 +> fixes: #1518 +> Change-Id: I1234a0b5a6e675a0b20c6b1afe0f4390fd721f6f +> Signed-off-by: Rinku Kothiya + +BUG: 1691320 +Change-Id: I1234a0b5a6e675a0b20c6b1afe0f4390fd721f6f +Signed-off-by: Rinku Kothiya +Signed-off-by: Sunil Kumar Acharya +Reviewed-on: https://code.engineering.redhat.com/gerrit/219998 +Tested-by: RHGS Build Bot +--- + api/src/gfapi-messages.h | 4 +++- + api/src/glfs-fops.c | 8 ++++++++ + 2 files changed, 11 insertions(+), 1 deletion(-) + +diff --git a/api/src/gfapi-messages.h b/api/src/gfapi-messages.h +index 68d1242..2ffd5ac 100644 +--- a/api/src/gfapi-messages.h ++++ b/api/src/gfapi-messages.h +@@ -49,6 +49,8 @@ GLFS_MSGID(API, API_MSG_MEM_ACCT_INIT_FAILED, API_MSG_MASTER_XLATOR_INIT_FAILED, + API_MSG_INODE_LINK_FAILED, API_MSG_STATEDUMP_FAILED, + API_MSG_XREADDIRP_R_FAILED, API_MSG_LOCK_INSERT_MERGE_FAILED, + API_MSG_SETTING_LOCK_TYPE_FAILED, API_MSG_INODE_FIND_FAILED, +- API_MSG_FDCTX_SET_FAILED, API_MSG_UPCALL_SYNCOP_FAILED); ++ API_MSG_FDCTX_SET_FAILED, API_MSG_UPCALL_SYNCOP_FAILED, ++ API_MSG_INVALID_ARG); + ++#define API_MSG_INVALID_ARG_STR "Invalid" + #endif /* !_GFAPI_MESSAGES_H__ */ +diff --git a/api/src/glfs-fops.c b/api/src/glfs-fops.c +index e6adea5..051541f 100644 +--- a/api/src/glfs-fops.c ++++ b/api/src/glfs-fops.c +@@ -1525,6 +1525,14 @@ glfs_pwritev_common(struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt, + + GF_REF_GET(glfd); + ++ if (iovec->iov_len >= GF_UNIT_GB) { ++ ret = -1; ++ errno = EINVAL; ++ gf_smsg(THIS->name, GF_LOG_ERROR, errno, API_MSG_INVALID_ARG, ++ "size >= %llu is not allowed", GF_UNIT_GB, NULL); ++ goto out; ++ } ++ + subvol = glfs_active_subvol(glfd->fs); + if (!subvol) { + ret = -1; +-- +1.8.3.1 + diff --git a/SOURCES/0500-features-shard-Convert-shard-block-indices-to-uint64.patch b/SOURCES/0500-features-shard-Convert-shard-block-indices-to-uint64.patch new file mode 100644 index 0000000..4898422 --- /dev/null +++ b/SOURCES/0500-features-shard-Convert-shard-block-indices-to-uint64.patch @@ -0,0 +1,104 @@ +From 60789c658ea22063c26168cb4ce15ac5fd279e58 Mon Sep 17 00:00:00 2001 +From: Ravishankar N +Date: Mon, 14 Dec 2020 10:57:03 +0530 +Subject: [PATCH 500/511] features/shard: Convert shard block indices to uint64 + +This patch fixes a crash in FOPs that operate on really large sharded +files where number of participant shards could sometimes exceed +signed int32 max. + +The patch also adds GF_ASSERTs to ensure that number of participating +shards is always greater than 0 for files that do have more than one +shard. + +Upstream: +> https://review.gluster.org/#/c/glusterfs/+/23407/ +> Change-Id: I354de58796f350eb1aa42fcdf8092ca2e69ccbb6 +> Fixes: #1348 +> Signed-off-by: Krutika Dhananjay + +BUG: 1752739 +Change-Id: I354de58796f350eb1aa42fcdf8092ca2e69ccbb6 +Signed-off-by: Vinayakswami Hariharmath +Reviewed-on: https://code.engineering.redhat.com/gerrit/221061 +Tested-by: Ravishankar Narayanankutty +Tested-by: RHGS Build Bot +Reviewed-by: Ravishankar Narayanankutty +--- + xlators/features/shard/src/shard.c | 14 ++++++++------ + xlators/features/shard/src/shard.h | 6 +++--- + 2 files changed, 11 insertions(+), 9 deletions(-) + +diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c +index 16d557b..a967f35 100644 +--- a/xlators/features/shard/src/shard.c ++++ b/xlators/features/shard/src/shard.c +@@ -1855,10 +1855,9 @@ int shard_truncate_last_shard(call_frame_t *frame, xlator_t *this, + */ + if (!inode) { + gf_msg_debug(this->name, 0, +- "Last shard to be truncated absent" +- " in backend: %s. Directly proceeding to update " +- "file size", +- uuid_utoa(inode->gfid)); ++ "Last shard to be truncated absent in backend: " PRIu64 ++ " of gfid: %s. Directly proceeding to update file size", ++ local->first_block, uuid_utoa(local->loc.inode->gfid)); + shard_update_file_size(frame, this, NULL, &local->loc, + shard_post_update_size_truncate_handler); + return 0; +@@ -2389,6 +2388,7 @@ int shard_truncate_begin(call_frame_t *frame, xlator_t *this) { + get_highest_block(0, local->prebuf.ia_size, local->block_size); + + local->num_blocks = local->last_block - local->first_block + 1; ++ GF_ASSERT(local->num_blocks > 0); + local->resolver_base_inode = + (local->fop == GF_FOP_TRUNCATE) ? local->loc.inode : local->fd->inode; + +@@ -4809,6 +4809,7 @@ int shard_post_lookup_readv_handler(call_frame_t *frame, xlator_t *this) { + get_highest_block(local->offset, local->total_size, local->block_size); + + local->num_blocks = local->last_block - local->first_block + 1; ++ GF_ASSERT(local->num_blocks > 0); + local->resolver_base_inode = local->loc.inode; + + local->inode_list = +@@ -5266,6 +5267,7 @@ int shard_common_inode_write_post_lookup_handler(call_frame_t *frame, + local->last_block = + get_highest_block(local->offset, local->total_size, local->block_size); + local->num_blocks = local->last_block - local->first_block + 1; ++ GF_ASSERT(local->num_blocks > 0); + local->inode_list = + GF_CALLOC(local->num_blocks, sizeof(inode_t *), gf_shard_mt_inode_list); + if (!local->inode_list) { +@@ -5274,8 +5276,8 @@ int shard_common_inode_write_post_lookup_handler(call_frame_t *frame, + } + + gf_msg_trace( +- this->name, 0, "%s: gfid=%s first_block=%" PRIu32 " " +- "last_block=%" PRIu32 " num_blocks=%" PRIu32 ++ this->name, 0, "%s: gfid=%s first_block=%" PRIu64 " " ++ "last_block=%" PRIu64 " num_blocks=%" PRIu64 + " offset=%" PRId64 " total_size=%zu flags=%" PRId32 "", + gf_fop_list[local->fop], uuid_utoa(local->resolver_base_inode->gfid), + local->first_block, local->last_block, local->num_blocks, local->offset, +diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h +index 1721417..4fe181b 100644 +--- a/xlators/features/shard/src/shard.h ++++ b/xlators/features/shard/src/shard.h +@@ -254,9 +254,9 @@ typedef int32_t (*shard_post_update_size_fop_handler_t)(call_frame_t *frame, + typedef struct shard_local { + int op_ret; + int op_errno; +- int first_block; +- int last_block; +- int num_blocks; ++ uint64_t first_block; ++ uint64_t last_block; ++ uint64_t num_blocks; + int call_count; + int eexist_count; + int create_count; +-- +1.8.3.1 + diff --git a/SOURCES/0501-Cli-Removing-old-syntax-of-tier-cmds-from-help-menu.patch b/SOURCES/0501-Cli-Removing-old-syntax-of-tier-cmds-from-help-menu.patch new file mode 100644 index 0000000..5152df8 --- /dev/null +++ b/SOURCES/0501-Cli-Removing-old-syntax-of-tier-cmds-from-help-menu.patch @@ -0,0 +1,48 @@ +From 070698ede9c3765c95364e8207c8311dbf895499 Mon Sep 17 00:00:00 2001 +From: kiyer +Date: Tue, 8 Dec 2020 15:18:49 +0530 +Subject: [PATCH 501/511] Cli: Removing old syntax of tier cmds from help menu + +Remove old syntax of attach-tier and detach-tier +commands from help menu. + +Label: DOWNSTREAM ONLY +BUG: 1813866 + +Change-Id: If86e4828b475fb593a5105ca8deac96374f9542d +Signed-off-by: kiyer +Reviewed-on: https://code.engineering.redhat.com/gerrit/220510 +Tested-by: RHGS Build Bot +Reviewed-by: Mohit Agrawal +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + cli/src/cli-cmd-volume.c | 13 ------------- + 1 file changed, 13 deletions(-) + +diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c +index 6f5bf8b..b6bef80 100644 +--- a/cli/src/cli-cmd-volume.c ++++ b/cli/src/cli-cmd-volume.c +@@ -3331,19 +3331,6 @@ struct cli_cmd tier_cmds[] = { + {"volume tier detach ", + cli_cmd_volume_tier_cbk, "Detach the hot tier from "}, + +- {"volume attach-tier [] ...", +- cli_cmd_volume_tier_cbk, +- "NOTE: this is old syntax, will be deprecated in next release. " +- "Please use gluster volume tier attach " +- "[] ..."}, +- +- {"volume detach-tier " +- "", +- cli_cmd_volume_tier_cbk, +- "NOTE: this is old syntax, will be deprecated in next release. " +- "Please use gluster volume tier detach " +- "{start|stop|commit} [force]"}, +- + {"volume tier status\n" + "volume tier start [force]\n" + "volume tier stop\n" +-- +1.8.3.1 + diff --git a/SOURCES/0502-dht-fixing-a-permission-update-issue.patch b/SOURCES/0502-dht-fixing-a-permission-update-issue.patch new file mode 100644 index 0000000..7c136d0 --- /dev/null +++ b/SOURCES/0502-dht-fixing-a-permission-update-issue.patch @@ -0,0 +1,225 @@ +From 3f1eee125a35c33ecb078e5d3bfd80d80e63881d Mon Sep 17 00:00:00 2001 +From: Barak Sason Rofman +Date: Wed, 15 Jan 2020 12:02:05 +0200 +Subject: [PATCH 502/511] dht - fixing a permission update issue + +When bringing back a downed brick and performing lookup from the client +side, the permission on said brick aren't updated on the first lookup, +but only on the second. + +This patch modifies permission update logic so the first lookup will +trigger a permission update on the downed brick. + +LIMITATIONS OF THE PATCH: +As the choice of source depends on whether the directory has layout or not. +Even the directories on the newly added brick will have layout xattr[zeroed], but the same is not true for a root directory. +Hence, in case in the entire cluster only the newly added bricks are up [and others are down], then any change in permission during this time will be overwritten by the older permissions when the cluster is restarted. + +Upstream: +> Reviewed-on: https://review.gluster.org/#/c/glusterfs/+/24020/ +> fixes: #999 +> Change-Id: Ieb70246d41e59f9cae9f70bc203627a433dfbd33 +> Signed-off-by: Barak Sason Rofman + +BUG: 1663821 +Change-Id: Ieb70246d41e59f9cae9f70bc203627a433dfbd33 +Signed-off-by: Barak Sason Rofman +Reviewed-on: https://code.engineering.redhat.com/gerrit/221116 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + tests/bugs/bug-1064147.t | 71 ++++++++++++++++++++++++++++++++ + xlators/cluster/dht/src/dht-common.c | 28 ++++++++++--- + xlators/cluster/dht/src/dht-selfheal.c | 15 +++++-- + xlators/storage/posix/src/posix-common.c | 16 +++---- + 4 files changed, 111 insertions(+), 19 deletions(-) + create mode 100755 tests/bugs/bug-1064147.t + +diff --git a/tests/bugs/bug-1064147.t b/tests/bugs/bug-1064147.t +new file mode 100755 +index 0000000..617a1aa +--- /dev/null ++++ b/tests/bugs/bug-1064147.t +@@ -0,0 +1,71 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../include.rc ++. $(dirname $0)/../volume.rc ++ ++# Initialize ++#------------------------------------------------------------ ++cleanup; ++ ++# Start glusterd ++TEST glusterd; ++TEST pidof glusterd; ++TEST $CLI volume info; ++ ++# Create a volume ++TEST $CLI volume create $V0 $H0:/${V0}{1,2}; ++ ++# Verify volume creation ++ EXPECT "$V0" volinfo_field $V0 'Volume Name'; ++ EXPECT 'Created' volinfo_field $V0 'Status'; ++ ++# Start volume and verify successful start ++ TEST $CLI volume start $V0; ++ EXPECT 'Started' volinfo_field $V0 'Status'; ++ TEST glusterfs -s $H0 --volfile-id=$V0 $M0 ++#------------------------------------------------------------ ++ ++# Test case 1 - Subvolume down + Healing ++#------------------------------------------------------------ ++# Kill 2nd brick process ++TEST kill -9 `ps aux | grep glusterfsd | grep ${V0}2 | grep -v grep | awk '{print $2}'`; ++ ++# Change root permissions ++TEST chmod 444 $M0 ++ ++# Store permission for comparision ++TEST permission_new=`stat -c "%A" $M0` ++ ++# Bring up the killed brick process ++TEST $CLI volume start $V0 force ++ ++# Perform lookup ++sleep 5 ++TEST ls $M0 ++ ++# Check brick permissions ++TEST brick_perm=`stat -c "%A" /${V0}2` ++TEST [ ${brick_perm} = ${permission_new} ] ++#------------------------------------------------------------ ++ ++# Test case 2 - Add-brick + Healing ++#------------------------------------------------------------ ++# Change root permissions ++TEST chmod 777 $M0 ++ ++# Store permission for comparision ++TEST permission_new_2=`stat -c "%A" $M0` ++ ++# Add a 3rd brick ++TEST $CLI volume add-brick $V0 $H0:/${V0}3 ++ ++# Perform lookup ++sleep 5 ++TEST ls $M0 ++ ++# Check permissions on the new brick ++TEST brick_perm2=`stat -c "%A" /${V0}3` ++ ++TEST [ ${brick_perm2} = ${permission_new_2} ] ++ ++cleanup; +diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c +index 4db89df..fe1d0ee 100644 +--- a/xlators/cluster/dht/src/dht-common.c ++++ b/xlators/cluster/dht/src/dht-common.c +@@ -1363,13 +1363,29 @@ dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + dht_aggregate_xattr(local->xattr, xattr); + } + ++ if (__is_root_gfid(stbuf->ia_gfid)) { ++ ret = dht_dir_has_layout(xattr, conf->xattr_name); ++ if (ret >= 0) { ++ if (is_greater_time(local->prebuf.ia_ctime, ++ local->prebuf.ia_ctime_nsec, ++ stbuf->ia_ctime, stbuf->ia_ctime_nsec)) { ++ /* Choose source */ ++ local->prebuf.ia_gid = stbuf->ia_gid; ++ local->prebuf.ia_uid = stbuf->ia_uid; ++ ++ local->prebuf.ia_ctime = stbuf->ia_ctime; ++ local->prebuf.ia_ctime_nsec = stbuf->ia_ctime_nsec; ++ local->prebuf.ia_prot = stbuf->ia_prot; ++ } ++ } ++ } ++ + if (local->stbuf.ia_type != IA_INVAL) { + /* This is not the first subvol to respond */ +- if (!__is_root_gfid(stbuf->ia_gfid) && +- ((local->stbuf.ia_gid != stbuf->ia_gid) || +- (local->stbuf.ia_uid != stbuf->ia_uid) || +- (is_permission_different(&local->stbuf.ia_prot, +- &stbuf->ia_prot)))) { ++ if ((local->stbuf.ia_gid != stbuf->ia_gid) || ++ (local->stbuf.ia_uid != stbuf->ia_uid) || ++ (is_permission_different(&local->stbuf.ia_prot, ++ &stbuf->ia_prot))) { + local->need_attrheal = 1; + } + } +@@ -10969,7 +10985,7 @@ dht_notify(xlator_t *this, int event, void *data, ...) + if ((cmd == GF_DEFRAG_CMD_STATUS) || + (cmd == GF_DEFRAG_CMD_STATUS_TIER) || + (cmd == GF_DEFRAG_CMD_DETACH_STATUS)) +- gf_defrag_status_get(conf, output, _gf_false); ++ gf_defrag_status_get(conf, output, _gf_false); + else if (cmd == GF_DEFRAG_CMD_START_DETACH_TIER) + gf_defrag_start_detach_tier(defrag); + else if (cmd == GF_DEFRAG_CMD_DETACH_START) +diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c +index f5dfff9..f4e17d1 100644 +--- a/xlators/cluster/dht/src/dht-selfheal.c ++++ b/xlators/cluster/dht/src/dht-selfheal.c +@@ -2097,9 +2097,18 @@ dht_selfheal_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk, + local->selfheal.dir_cbk = dir_cbk; + local->selfheal.layout = dht_layout_ref(this, layout); + +- if (local->need_attrheal && !IA_ISINVAL(local->mds_stbuf.ia_type)) { +- /*Use the one in the mds_stbuf*/ +- local->stbuf = local->mds_stbuf; ++ if (local->need_attrheal) { ++ if (__is_root_gfid(local->stbuf.ia_gfid)) { ++ local->stbuf.ia_gid = local->prebuf.ia_gid; ++ local->stbuf.ia_uid = local->prebuf.ia_uid; ++ ++ local->stbuf.ia_ctime = local->prebuf.ia_ctime; ++ local->stbuf.ia_ctime_nsec = local->prebuf.ia_ctime_nsec; ++ local->stbuf.ia_prot = local->prebuf.ia_prot; ++ ++ } else if (!IA_ISINVAL(local->mds_stbuf.ia_type)) { ++ local->stbuf = local->mds_stbuf; ++ } + } + + if (!__is_root_gfid(local->stbuf.ia_gfid)) { +diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c +index c5a43a1..e5c6e62 100644 +--- a/xlators/storage/posix/src/posix-common.c ++++ b/xlators/storage/posix/src/posix-common.c +@@ -598,6 +598,7 @@ posix_init(xlator_t *this) + int force_directory = -1; + int create_mask = -1; + int create_directory_mask = -1; ++ char value; + + dir_data = dict_get(this->options, "directory"); + +@@ -654,16 +655,11 @@ posix_init(xlator_t *this) + } + + /* Check for Extended attribute support, if not present, log it */ +- op_ret = sys_lsetxattr(dir_data->data, "trusted.glusterfs.test", "working", +- 8, 0); +- if (op_ret != -1) { +- ret = sys_lremovexattr(dir_data->data, "trusted.glusterfs.test"); +- if (ret) { +- gf_msg(this->name, GF_LOG_DEBUG, errno, P_MSG_INVALID_OPTION, +- "failed to remove xattr: " +- "trusted.glusterfs.test"); +- } +- } else { ++ size = sys_lgetxattr(dir_data->data, "user.x", &value, sizeof(value)); ++ ++ if ((size == -1) && (errno == EOPNOTSUPP)) { ++ gf_msg(this->name, GF_LOG_DEBUG, 0, P_MSG_XDATA_GETXATTR, ++ "getxattr returned %zd", size); + tmp_data = dict_get(this->options, "mandate-attribute"); + if (tmp_data) { + if (gf_string2boolean(tmp_data->data, &tmp_bool) == -1) { +-- +1.8.3.1 + diff --git a/SOURCES/0503-gfapi-Suspend-synctasks-instead-of-blocking-them.patch b/SOURCES/0503-gfapi-Suspend-synctasks-instead-of-blocking-them.patch new file mode 100644 index 0000000..466bf4e --- /dev/null +++ b/SOURCES/0503-gfapi-Suspend-synctasks-instead-of-blocking-them.patch @@ -0,0 +1,179 @@ +From 5946a6ec18976c0f52162fe0f47e9b5171af87ec Mon Sep 17 00:00:00 2001 +From: Soumya Koduri +Date: Mon, 6 Apr 2020 12:36:44 +0530 +Subject: [PATCH 503/511] gfapi: Suspend synctasks instead of blocking them + +There are certain conditions which blocks the current +execution thread (like waiting on mutex lock or condition +variable or I/O response). In such cases, if it is a +synctask thread, we should suspend the task instead +of blocking it (like done in SYNCOP using synctask_yield) + +This is to avoid deadlock like the one mentioned below - + +1) synctaskA sets fs->migration_in_progress to 1 and + does I/O (LOOKUP) +2) Other synctask threads wait for fs->migration_in_progress + to be reset to 0 by synctaskA and hence blocked +3) but synctaskA cannot resume as all synctask threads are blocked + on (2). + +Note: this same approach is already used by few other components +like syncbarrier etc. + +>Change-Id: If90f870d663bb242c702a5b86ac52eeda67c6f0d +>Fixes: #1146 +>Signed-off-by: Soumya Koduri +Upstream patch: https://review.gluster.org/c/glusterfs/+/24276 + +BUG: 1779238 +Change-Id: If90f870d663bb242c702a5b86ac52eeda67c6f0d +Signed-off-by: nik-redhat +Reviewed-on: https://code.engineering.redhat.com/gerrit/221081 +Tested-by: RHGS Build Bot +Reviewed-by: Soumya Koduri +--- + api/src/glfs-internal.h | 34 ++++++++++++++++++++++++++++++++-- + api/src/glfs-resolve.c | 9 +++++++++ + api/src/glfs.c | 9 +++++++++ + 3 files changed, 50 insertions(+), 2 deletions(-) + +diff --git a/api/src/glfs-internal.h b/api/src/glfs-internal.h +index 55401b2..15cf0ee 100644 +--- a/api/src/glfs-internal.h ++++ b/api/src/glfs-internal.h +@@ -16,6 +16,7 @@ + #include + #include "glfs-handles.h" + #include ++#include + + #define GLFS_SYMLINK_MAX_FOLLOW 2048 + +@@ -207,6 +208,7 @@ struct glfs { + glfs_upcall_cbk up_cbk; /* upcall cbk function to be registered */ + void *up_data; /* Opaque data provided by application + * during upcall registration */ ++ struct list_head waitq; /* waiting synctasks */ + }; + + /* This enum is used to maintain the state of glfd. In case of async fops +@@ -442,6 +444,34 @@ glfs_process_upcall_event(struct glfs *fs, void *data) + THIS = glfd->fd->inode->table->xl->ctx->master; \ + } while (0) + ++#define __GLFS_LOCK_WAIT(fs) \ ++ do { \ ++ struct synctask *task = NULL; \ ++ \ ++ task = synctask_get(); \ ++ \ ++ if (task) { \ ++ list_add_tail(&task->waitq, &fs->waitq); \ ++ pthread_mutex_unlock(&fs->mutex); \ ++ synctask_yield(task, NULL); \ ++ pthread_mutex_lock(&fs->mutex); \ ++ } else { \ ++ /* non-synctask */ \ ++ pthread_cond_wait(&fs->cond, &fs->mutex); \ ++ } \ ++ } while (0) ++ ++#define __GLFS_SYNCTASK_WAKE(fs) \ ++ do { \ ++ struct synctask *waittask = NULL; \ ++ \ ++ while (!list_empty(&fs->waitq)) { \ ++ waittask = list_entry(fs->waitq.next, struct synctask, waitq); \ ++ list_del_init(&waittask->waitq); \ ++ synctask_wake(waittask); \ ++ } \ ++ } while (0) ++ + /* + By default all lock attempts from user context must + use glfs_lock() and glfs_unlock(). This allows +@@ -466,10 +496,10 @@ glfs_lock(struct glfs *fs, gf_boolean_t wait_for_migration) + pthread_mutex_lock(&fs->mutex); + + while (!fs->init) +- pthread_cond_wait(&fs->cond, &fs->mutex); ++ __GLFS_LOCK_WAIT(fs); + + while (wait_for_migration && fs->migration_in_progress) +- pthread_cond_wait(&fs->cond, &fs->mutex); ++ __GLFS_LOCK_WAIT(fs); + + return 0; + } +diff --git a/api/src/glfs-resolve.c b/api/src/glfs-resolve.c +index 062b7dc..58b6ace 100644 +--- a/api/src/glfs-resolve.c ++++ b/api/src/glfs-resolve.c +@@ -65,6 +65,9 @@ __glfs_first_lookup(struct glfs *fs, xlator_t *subvol) + fs->migration_in_progress = 0; + pthread_cond_broadcast(&fs->cond); + ++ /* wake up other waiting tasks */ ++ __GLFS_SYNCTASK_WAKE(fs); ++ + return ret; + } + +@@ -154,6 +157,9 @@ __glfs_refresh_inode(struct glfs *fs, xlator_t *subvol, inode_t *inode, + fs->migration_in_progress = 0; + pthread_cond_broadcast(&fs->cond); + ++ /* wake up other waiting tasks */ ++ __GLFS_SYNCTASK_WAKE(fs); ++ + return newinode; + } + +@@ -841,6 +847,9 @@ __glfs_migrate_fd(struct glfs *fs, xlator_t *newsubvol, struct glfs_fd *glfd) + fs->migration_in_progress = 0; + pthread_cond_broadcast(&fs->cond); + ++ /* wake up other waiting tasks */ ++ __GLFS_SYNCTASK_WAKE(fs); ++ + return newfd; + } + +diff --git a/api/src/glfs.c b/api/src/glfs.c +index f36616d..ae994fa 100644 +--- a/api/src/glfs.c ++++ b/api/src/glfs.c +@@ -740,6 +740,7 @@ glfs_new_fs(const char *volname) + + INIT_LIST_HEAD(&fs->openfds); + INIT_LIST_HEAD(&fs->upcall_list); ++ INIT_LIST_HEAD(&fs->waitq); + + PTHREAD_MUTEX_INIT(&fs->mutex, NULL, fs->pthread_flags, GLFS_INIT_MUTEX, + err); +@@ -1228,6 +1229,7 @@ pub_glfs_fini(struct glfs *fs) + call_pool_t *call_pool = NULL; + int fs_init = 0; + int err = -1; ++ struct synctask *waittask = NULL; + + DECLARE_OLD_THIS; + +@@ -1249,6 +1251,13 @@ pub_glfs_fini(struct glfs *fs) + + call_pool = fs->ctx->pool; + ++ /* Wake up any suspended synctasks */ ++ while (!list_empty(&fs->waitq)) { ++ waittask = list_entry(fs->waitq.next, struct synctask, waitq); ++ list_del_init(&waittask->waitq); ++ synctask_wake(waittask); ++ } ++ + while (countdown--) { + /* give some time for background frames to finish */ + pthread_mutex_lock(&fs->mutex); +-- +1.8.3.1 + diff --git a/SOURCES/0504-io-stats-Configure-ios_sample_buf_size-based-on-samp.patch b/SOURCES/0504-io-stats-Configure-ios_sample_buf_size-based-on-samp.patch new file mode 100644 index 0000000..21d7f7f --- /dev/null +++ b/SOURCES/0504-io-stats-Configure-ios_sample_buf_size-based-on-samp.patch @@ -0,0 +1,109 @@ +From baa566be8832a56fdea7068d84844ec1ec84d8d9 Mon Sep 17 00:00:00 2001 +From: mohit84 +Date: Thu, 15 Oct 2020 16:28:58 +0530 +Subject: [PATCH 504/511] io-stats: Configure ios_sample_buf_size based on + sample_interval value (#1574) + +io-stats xlator declares a ios_sample_buf_size 64k object(10M) per xlator +but in case of sample_interval is 0 this big buffer is not required so +declare the default value only while sample_interval is not 0.The new +change would be helpful to reduce RSS size for a brick and shd process +while the number of volumes are huge. + +> Change-Id: I3e82cca92e40549355edfac32580169f3ce51af8 +> Fixes: #1542 +> Signed-off-by: Mohit Agrawal +> (Cherry picked from commit f71660eb879a9cd5761e5adbf10c783e959a990a) +> (Reviewed on upstream link https://github.com/gluster/glusterfs/issues/1542) + +Change-Id: I3e82cca92e40549355edfac32580169f3ce51af8 +BUG: 1898778 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/221183 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + tests/bugs/glusterd/daemon-log-level-option.t | 8 ++++---- + xlators/debug/io-stats/src/io-stats.c | 26 ++++++++++++++++++++++---- + 2 files changed, 26 insertions(+), 8 deletions(-) + +diff --git a/tests/bugs/glusterd/daemon-log-level-option.t b/tests/bugs/glusterd/daemon-log-level-option.t +index 66e55e3..5352a63 100644 +--- a/tests/bugs/glusterd/daemon-log-level-option.t ++++ b/tests/bugs/glusterd/daemon-log-level-option.t +@@ -61,8 +61,8 @@ rm -f /var/log/glusterfs/glustershd.log + TEST $CLI volume set all cluster.daemon-log-level WARNING + TEST $CLI volume start $V0 + +-# log should not have any info messages +-EXPECT 0 Info_messages_count "/var/log/glusterfs/glustershd.log" ++# log does have 1 info message specific to configure ios_sample_buf_size in io-stats xlator ++EXPECT 1 Info_messages_count "/var/log/glusterfs/glustershd.log" + + # log should not have any debug messages + EXPECT 0 Debug_messages_count "/var/log/glusterfs/glustershd.log" +@@ -78,8 +78,8 @@ rm -f /var/log/glusterfs/glustershd.log + TEST $CLI volume set all cluster.daemon-log-level ERROR + TEST $CLI volume start $V0 + +-# log should not have any info messages +-EXPECT 0 Info_messages_count "/var/log/glusterfs/glustershd.log" ++# log does have 1 info message specific to configure ios_sample_buf_size in io-stats xlator ++EXPECT 1 Info_messages_count "/var/log/glusterfs/glustershd.log" + + # log should not have any warning messages + EXPECT 0 Warning_messages_count "/var/log/glusterfs/glustershd.log" +diff --git a/xlators/debug/io-stats/src/io-stats.c b/xlators/debug/io-stats/src/io-stats.c +index aa91a0a..9b34895 100644 +--- a/xlators/debug/io-stats/src/io-stats.c ++++ b/xlators/debug/io-stats/src/io-stats.c +@@ -3724,6 +3724,15 @@ xlator_set_loglevel(xlator_t *this, int log_level) + } + } + ++void ++ios_sample_buf_size_configure(char *name, struct ios_conf *conf) ++{ ++ conf->ios_sample_buf_size = 1024; ++ gf_log(name, GF_LOG_INFO, ++ "Configure ios_sample_buf " ++ " size is 1024 because ios_sample_interval is 0"); ++} ++ + int + reconfigure(xlator_t *this, dict_t *options) + { +@@ -3779,8 +3788,13 @@ reconfigure(xlator_t *this, dict_t *options) + int32, out); + GF_OPTION_RECONF("ios-dump-format", dump_format_str, options, str, out); + ios_set_log_format_code(conf, dump_format_str); +- GF_OPTION_RECONF("ios-sample-buf-size", conf->ios_sample_buf_size, options, +- int32, out); ++ if (conf->ios_sample_interval) { ++ GF_OPTION_RECONF("ios-sample-buf-size", conf->ios_sample_buf_size, ++ options, int32, out); ++ } else { ++ ios_sample_buf_size_configure(this->name, conf); ++ } ++ + GF_OPTION_RECONF("sys-log-level", sys_log_str, options, str, out); + if (sys_log_str) { + sys_log_level = glusterd_check_log_level(sys_log_str); +@@ -3947,8 +3961,12 @@ init(xlator_t *this) + GF_OPTION_INIT("ios-dump-format", dump_format_str, str, out); + ios_set_log_format_code(conf, dump_format_str); + +- GF_OPTION_INIT("ios-sample-buf-size", conf->ios_sample_buf_size, int32, +- out); ++ if (conf->ios_sample_interval) { ++ GF_OPTION_INIT("ios-sample-buf-size", conf->ios_sample_buf_size, int32, ++ out); ++ } else { ++ ios_sample_buf_size_configure(this->name, conf); ++ } + + ret = ios_init_sample_buf(conf); + if (ret) { +-- +1.8.3.1 + diff --git a/SOURCES/0505-trash-Create-inode_table-only-while-feature-is-enabl.patch b/SOURCES/0505-trash-Create-inode_table-only-while-feature-is-enabl.patch new file mode 100644 index 0000000..a0f6b62 --- /dev/null +++ b/SOURCES/0505-trash-Create-inode_table-only-while-feature-is-enabl.patch @@ -0,0 +1,107 @@ +From 43a8e2c7441b14f5f238cb11d83f32f248b16abb Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Tue, 13 Oct 2020 18:56:20 +0530 +Subject: [PATCH 505/511] trash: Create inode_table only while feature is + enabled + +Currently trash xlator create a inode table(1M) even if +feature is not enabled.In brick_mux environment while 250 +bricks are attached with a single brick process and feature +is not enable brick process increase RSS size unnecessarily. + +Solution: Create inode_table only while a feature is enabled. +The patch reduces 250M RSS size per brick process +if trash feature is not enabled. + +> Change-Id: I11a6fd2b8419fe2988f398be6ec30fb4f3b99a5d +> Fixes: #1543 +> Signed-off-by: Mohit Agrawal +> (Cherry pick from commit 32f25e7b1b4b080ab2640e178b407c878e629376) +> (Reviewed on upstream link https://github.com/gluster/glusterfs/issues/1543) + +Change-Id: I11a6fd2b8419fe2988f398be6ec30fb4f3b99a5d +BUG: 1898781 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/221184 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/features/trash/src/trash.c | 47 +++++++++++++++++++++++++++++++++++--- + 1 file changed, 44 insertions(+), 3 deletions(-) + +diff --git a/xlators/features/trash/src/trash.c b/xlators/features/trash/src/trash.c +index f96ed73..93f020f 100644 +--- a/xlators/features/trash/src/trash.c ++++ b/xlators/features/trash/src/trash.c +@@ -2235,16 +2235,47 @@ reconfigure(xlator_t *this, dict_t *options) + char trash_dir[PATH_MAX] = { + 0, + }; ++ gf_boolean_t active_earlier = _gf_false; ++ gf_boolean_t active_now = _gf_false; + + priv = this->private; + + GF_VALIDATE_OR_GOTO("trash", priv, out); + ++ active_earlier = priv->state; ++ GF_OPTION_RECONF("trash", active_now, options, bool, out); ++ ++ /* Disable of trash feature is not allowed at this point until ++ we are not able to find an approach to cleanup resource ++ gracefully. Here to disable the feature need to destroy inode ++ table and currently it is difficult to ensure inode is not ++ being used ++ */ ++ if (active_earlier && !active_now) { ++ gf_log(this->name, GF_LOG_INFO, ++ "Disable of trash feature is not allowed " ++ "during graph reconfigure"); ++ ret = 0; ++ goto out; ++ } ++ ++ if (!active_earlier && active_now) { ++ if (!priv->trash_itable) { ++ priv->trash_itable = inode_table_new(0, this); ++ if (!priv->trash_itable) { ++ ret = -ENOMEM; ++ gf_log(this->name, GF_LOG_ERROR, ++ "failed to create trash inode_table" ++ " during graph reconfigure"); ++ goto out; ++ } ++ } ++ priv->state = active_now; ++ } ++ + GF_OPTION_RECONF("trash-internal-op", priv->internal, options, bool, out); + GF_OPTION_RECONF("trash-dir", tmp, options, str, out); + +- GF_OPTION_RECONF("trash", priv->state, options, bool, out); +- + if (priv->state) { + ret = create_or_rename_trash_directory(this); + +@@ -2501,7 +2532,17 @@ init(xlator_t *this) + goto out; + } + +- priv->trash_itable = inode_table_new(0, this); ++ if (priv->state) { ++ priv->trash_itable = inode_table_new(0, this); ++ if (!priv->trash_itable) { ++ ret = -ENOMEM; ++ priv->state = _gf_false; ++ gf_log(this->name, GF_LOG_ERROR, ++ "failed to create trash inode_table disable trash"); ++ goto out; ++ } ++ } ++ + gf_log(this->name, GF_LOG_DEBUG, "brick path is%s", priv->brick_path); + + this->private = (void *)priv; +-- +1.8.3.1 + diff --git a/SOURCES/0506-posix-Attach-a-posix_spawn_disk_thread-with-glusterf.patch b/SOURCES/0506-posix-Attach-a-posix_spawn_disk_thread-with-glusterf.patch new file mode 100644 index 0000000..cf978f5 --- /dev/null +++ b/SOURCES/0506-posix-Attach-a-posix_spawn_disk_thread-with-glusterf.patch @@ -0,0 +1,499 @@ +From 17a9ce965ef2fec9ee5c8e4b76981bb7cbcf1352 Mon Sep 17 00:00:00 2001 +From: mohit84 +Date: Mon, 9 Nov 2020 17:15:42 +0530 +Subject: [PATCH 506/511] posix: Attach a posix_spawn_disk_thread with + glusterfs_ctx (#1595) + +Currently posix xlator spawns posix_disk_space_threads per brick and in +case of brick_mux environment while glusterd attached bricks at maximum +level(250) with a single brick process in that case 250 threads are +spawned for all bricks and brick process memory size also increased. + +Solution: Attach a posix_disk_space thread with glusterfs_ctx to + spawn a thread per process basis instead of spawning a per brick + +> Fixes: #1482 +> Change-Id: I8dd88f252a950495b71742e2a7588bd5bb019ec7 +> Cherry-picked from commit 3f93be77e1acf5baacafa97a320e91e6879d1c0e +> Reviewed on upstream link https://github.com/gluster/glusterfs/issues/1482 +> Signed-off-by: Mohit Agrawal + +Change-Id: I8dd88f252a950495b71742e2a7588bd5bb019ec7 +Bug: 1898776 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/220366 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + glusterfsd/src/glusterfsd.c | 4 + + libglusterfs/src/glusterfs/glusterfs.h | 6 ++ + xlators/storage/posix/src/posix-common.c | 68 +++++++++++-- + xlators/storage/posix/src/posix-handle.h | 3 +- + xlators/storage/posix/src/posix-helpers.c | 131 ++++++++++++++----------- + xlators/storage/posix/src/posix-inode-fd-ops.c | 3 +- + xlators/storage/posix/src/posix-mem-types.h | 1 + + xlators/storage/posix/src/posix.h | 12 ++- + 8 files changed, 160 insertions(+), 68 deletions(-) + +diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c +index 955bf1d..ac25255 100644 +--- a/glusterfsd/src/glusterfsd.c ++++ b/glusterfsd/src/glusterfsd.c +@@ -1840,9 +1840,13 @@ glusterfs_ctx_defaults_init(glusterfs_ctx_t *ctx) + INIT_LIST_HEAD(&cmd_args->xlator_options); + INIT_LIST_HEAD(&cmd_args->volfile_servers); + ctx->pxl_count = 0; ++ ctx->diskxl_count = 0; + pthread_mutex_init(&ctx->fd_lock, NULL); + pthread_cond_init(&ctx->fd_cond, NULL); + INIT_LIST_HEAD(&ctx->janitor_fds); ++ pthread_mutex_init(&ctx->xl_lock, NULL); ++ pthread_cond_init(&ctx->xl_cond, NULL); ++ INIT_LIST_HEAD(&ctx->diskth_xl); + + lim.rlim_cur = RLIM_INFINITY; + lim.rlim_max = RLIM_INFINITY; +diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h +index bf6a987..d3400bf 100644 +--- a/libglusterfs/src/glusterfs/glusterfs.h ++++ b/libglusterfs/src/glusterfs/glusterfs.h +@@ -740,7 +740,13 @@ struct _glusterfs_ctx { + pthread_t janitor; + /* The variable is use to save total posix xlator count */ + uint32_t pxl_count; ++ uint32_t diskxl_count; + ++ /* List of posix xlator use by disk thread*/ ++ struct list_head diskth_xl; ++ pthread_mutex_t xl_lock; ++ pthread_cond_t xl_cond; ++ pthread_t disk_space_check; + char volume_id[GF_UUID_BUF_SIZE]; /* Used only in protocol/client */ + }; + typedef struct _glusterfs_ctx glusterfs_ctx_t; +diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c +index e5c6e62..2c9030b 100644 +--- a/xlators/storage/posix/src/posix-common.c ++++ b/xlators/storage/posix/src/posix-common.c +@@ -138,6 +138,36 @@ posix_inode(xlator_t *this) + return 0; + } + ++static void ++delete_posix_diskxl(xlator_t *this) ++{ ++ struct posix_private *priv = this->private; ++ struct posix_diskxl *pxl = priv->pxl; ++ glusterfs_ctx_t *ctx = this->ctx; ++ uint32_t count = 1; ++ ++ if (pxl) { ++ pthread_mutex_lock(&ctx->xl_lock); ++ { ++ pxl->detach_notify = _gf_true; ++ while (pxl->is_use) ++ pthread_cond_wait(&pxl->cond, &ctx->xl_lock); ++ list_del_init(&pxl->list); ++ priv->pxl = NULL; ++ count = --ctx->diskxl_count; ++ if (count == 0) ++ pthread_cond_signal(&ctx->xl_cond); ++ } ++ pthread_mutex_unlock(&ctx->xl_lock); ++ pthread_cond_destroy(&pxl->cond); ++ GF_FREE(pxl); ++ if (count == 0) { ++ pthread_join(ctx->disk_space_check, NULL); ++ ctx->disk_space_check = 0; ++ } ++ } ++} ++ + /** + * notify - when parent sends PARENT_UP, send CHILD_UP event from here + */ +@@ -194,6 +224,8 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...) + } + pthread_mutex_unlock(&ctx->fd_lock); + ++ delete_posix_diskxl(this); ++ + gf_log(this->name, GF_LOG_INFO, "Sending CHILD_DOWN for brick %s", + victim->name); + default_notify(this->parents->xlator, GF_EVENT_CHILD_DOWN, data); +@@ -318,6 +350,7 @@ posix_reconfigure(xlator_t *this, dict_t *options) + int32_t force_directory_mode = -1; + int32_t create_mask = -1; + int32_t create_directory_mask = -1; ++ double old_disk_reserve = 0.0; + + priv = this->private; + +@@ -383,6 +416,7 @@ posix_reconfigure(xlator_t *this, dict_t *options) + " fallback to :"); + } + ++ old_disk_reserve = priv->disk_reserve; + GF_OPTION_RECONF("reserve", priv->disk_reserve, options, percent_or_size, + out); + /* option can be any one of percent or bytes */ +@@ -390,11 +424,19 @@ posix_reconfigure(xlator_t *this, dict_t *options) + if (priv->disk_reserve < 100.0) + priv->disk_unit = 'p'; + +- if (priv->disk_reserve) { ++ /* Delete a pxl object from a list of disk_reserve while something ++ is changed for reserve option during graph reconfigure ++ */ ++ if (old_disk_reserve != priv->disk_reserve) { ++ delete_posix_diskxl(this); ++ old_disk_reserve = 0; ++ } ++ ++ if (!old_disk_reserve && priv->disk_reserve) { + ret = posix_spawn_disk_space_check_thread(this); + if (ret) { + gf_msg(this->name, GF_LOG_INFO, 0, P_MSG_DISK_SPACE_CHECK_FAILED, +- "Getting disk space check from thread failed"); ++ "Getting disk space check from thread failed "); + goto out; + } + } +@@ -1008,13 +1050,13 @@ posix_init(xlator_t *this) + " fallback to :"); + } + +- _private->disk_space_check_active = _gf_false; + _private->disk_space_full = 0; + + GF_OPTION_INIT("reserve", _private->disk_reserve, percent_or_size, out); + + /* option can be any one of percent or bytes */ + _private->disk_unit = 0; ++ pthread_cond_init(&_private->fd_cond, NULL); + if (_private->disk_reserve < 100.0) + _private->disk_unit = 'p'; + +@@ -1162,12 +1204,6 @@ posix_fini(xlator_t *this) + priv->health_check = 0; + } + +- if (priv->disk_space_check) { +- priv->disk_space_check_active = _gf_false; +- (void)gf_thread_cleanup_xint(priv->disk_space_check); +- priv->disk_space_check = 0; +- } +- + if (priv->janitor) { + /*TODO: Make sure the synctask is also complete */ + ret = gf_tw_del_timer(this->ctx->tw->timer_wheel, priv->janitor); +@@ -1192,10 +1228,24 @@ posix_fini(xlator_t *this) + pthread_join(ctx->janitor, NULL); + } + ++ pthread_mutex_lock(&ctx->xl_lock); ++ { ++ count = --ctx->diskxl_count; ++ if (count == 0) ++ pthread_cond_signal(&ctx->xl_cond); ++ } ++ pthread_mutex_unlock(&ctx->xl_lock); ++ ++ if (count == 0) { ++ pthread_join(ctx->disk_space_check, NULL); ++ ctx->disk_space_check = 0; ++ } ++ + if (priv->fsyncer) { + (void)gf_thread_cleanup_xint(priv->fsyncer); + priv->fsyncer = 0; + } ++ + /*unlock brick dir*/ + if (priv->mount_lock) + (void)sys_closedir(priv->mount_lock); +diff --git a/xlators/storage/posix/src/posix-handle.h b/xlators/storage/posix/src/posix-handle.h +index c4d7cb1..8e4c719 100644 +--- a/xlators/storage/posix/src/posix-handle.h ++++ b/xlators/storage/posix/src/posix-handle.h +@@ -206,5 +206,6 @@ int + posix_check_internal_writes(xlator_t *this, fd_t *fd, int sysfd, dict_t *xdata); + + void +-posix_disk_space_check(xlator_t *this); ++posix_disk_space_check(struct posix_private* priv); ++ + #endif /* !_POSIX_HANDLE_H */ +diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c +index ceac52a..110d383 100644 +--- a/xlators/storage/posix/src/posix-helpers.c ++++ b/xlators/storage/posix/src/posix-helpers.c +@@ -2284,9 +2284,8 @@ unlock: + } + + void +-posix_disk_space_check(xlator_t *this) ++posix_disk_space_check(struct posix_private *priv) + { +- struct posix_private *priv = NULL; + char *subvol_path = NULL; + int op_ret = 0; + double size = 0; +@@ -2295,16 +2294,14 @@ posix_disk_space_check(xlator_t *this) + double totsz = 0; + double freesz = 0; + +- GF_VALIDATE_OR_GOTO(this->name, this, out); +- priv = this->private; +- GF_VALIDATE_OR_GOTO(this->name, priv, out); ++ GF_VALIDATE_OR_GOTO("posix-helpers", priv, out); + + subvol_path = priv->base_path; + + op_ret = sys_statvfs(subvol_path, &buf); + + if (op_ret == -1) { +- gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_STATVFS_FAILED, ++ gf_msg("posix-disk", GF_LOG_ERROR, errno, P_MSG_STATVFS_FAILED, + "statvfs failed on %s", subvol_path); + goto out; + } +@@ -2328,78 +2325,102 @@ out: + } + + static void * +-posix_disk_space_check_thread_proc(void *data) ++posix_ctx_disk_thread_proc(void *data) + { +- xlator_t *this = NULL; + struct posix_private *priv = NULL; ++ glusterfs_ctx_t *ctx = NULL; + uint32_t interval = 0; +- int ret = -1; +- +- this = data; +- priv = this->private; ++ struct posix_diskxl *pthis = NULL; ++ xlator_t *this = NULL; ++ struct timespec sleep_till = { ++ 0, ++ }; + ++ ctx = data; + interval = 5; +- gf_msg_debug(this->name, 0, +- "disk-space thread started, " ++ ++ gf_msg_debug("glusterfs_ctx", 0, ++ "Ctx disk-space thread started, " + "interval = %d seconds", + interval); +- while (1) { +- /* aborting sleep() is a request to exit this thread, sleep() +- * will normally not return when cancelled */ +- ret = sleep(interval); +- if (ret > 0) +- break; +- /* prevent thread errors while doing the health-check(s) */ +- pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL); +- +- /* Do the disk-check.*/ +- posix_disk_space_check(this); +- if (!priv->disk_space_check_active) +- goto out; +- pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); +- } + +-out: +- gf_msg_debug(this->name, 0, "disk space check thread exiting"); +- LOCK(&priv->lock); ++ pthread_mutex_lock(&ctx->xl_lock); + { +- priv->disk_space_check_active = _gf_false; ++ while (ctx->diskxl_count > 0) { ++ list_for_each_entry(pthis, &ctx->diskth_xl, list) ++ { ++ pthis->is_use = _gf_true; ++ pthread_mutex_unlock(&ctx->xl_lock); ++ ++ THIS = this = pthis->xl; ++ priv = this->private; ++ ++ posix_disk_space_check(priv); ++ ++ pthread_mutex_lock(&ctx->xl_lock); ++ pthis->is_use = _gf_false; ++ /* Send a signal to posix_notify function */ ++ if (pthis->detach_notify) ++ pthread_cond_signal(&pthis->cond); ++ } ++ ++ timespec_now_realtime(&sleep_till); ++ sleep_till.tv_sec += 5; ++ (void)pthread_cond_timedwait(&ctx->xl_cond, &ctx->xl_lock, ++ &sleep_till); ++ } + } +- UNLOCK(&priv->lock); ++ pthread_mutex_unlock(&ctx->xl_lock); + + return NULL; + } + + int +-posix_spawn_disk_space_check_thread(xlator_t *xl) ++posix_spawn_disk_space_check_thread(xlator_t *this) + { +- struct posix_private *priv = NULL; +- int ret = -1; ++ int ret = 0; ++ glusterfs_ctx_t *ctx = this->ctx; ++ struct posix_diskxl *pxl = NULL; ++ struct posix_private *priv = this->private; + +- priv = xl->private; ++ pxl = GF_CALLOC(1, sizeof(struct posix_diskxl), gf_posix_mt_diskxl_t); ++ if (!pxl) { ++ ret = -ENOMEM; ++ gf_log(this->name, GF_LOG_ERROR, ++ "Calloc is failed to allocate " ++ "memory for diskxl object"); ++ goto out; ++ } ++ pthread_cond_init(&pxl->cond, NULL); + +- LOCK(&priv->lock); ++ pthread_mutex_lock(&ctx->xl_lock); + { +- /* cancel the running thread */ +- if (priv->disk_space_check_active == _gf_true) { +- pthread_cancel(priv->disk_space_check); +- priv->disk_space_check_active = _gf_false; +- } ++ if (ctx->diskxl_count++ == 0) { ++ ret = gf_thread_create(&ctx->disk_space_check, NULL, ++ posix_ctx_disk_thread_proc, ctx, ++ "posixctxres"); + +- ret = gf_thread_create(&priv->disk_space_check, NULL, +- posix_disk_space_check_thread_proc, xl, +- "posix_reserve"); +- if (ret) { +- priv->disk_space_check_active = _gf_false; +- gf_msg(xl->name, GF_LOG_ERROR, errno, P_MSG_DISK_SPACE_CHECK_FAILED, +- "unable to setup disk space check thread"); +- goto unlock; ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_THREAD_FAILED, ++ "spawning disk space check thread failed"); ++ ctx->diskxl_count--; ++ pthread_mutex_unlock(&ctx->xl_lock); ++ goto out; ++ } + } ++ pxl->xl = this; ++ priv->pxl = (void *)pxl; ++ list_add_tail(&pxl->list, &ctx->diskth_xl); ++ } ++ pthread_mutex_unlock(&ctx->xl_lock); + +- priv->disk_space_check_active = _gf_true; ++out: ++ if (ret) { ++ if (pxl) { ++ pthread_cond_destroy(&pxl->cond); ++ GF_FREE(pxl); ++ } + } +-unlock: +- UNLOCK(&priv->lock); + return ret; + } + +diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c +index 1d37aed..761e018 100644 +--- a/xlators/storage/posix/src/posix-inode-fd-ops.c ++++ b/xlators/storage/posix/src/posix-inode-fd-ops.c +@@ -37,6 +37,7 @@ + #include + #endif /* HAVE_LINKAT */ + ++#include "posix-handle.h" + #include + #include + #include +@@ -713,7 +714,7 @@ posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, + option behaviour + */ + if (priv->disk_reserve) +- posix_disk_space_check(this); ++ posix_disk_space_check(priv); + + DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, ret, ret, unlock); + +diff --git a/xlators/storage/posix/src/posix-mem-types.h b/xlators/storage/posix/src/posix-mem-types.h +index 2253f38..bb4c56d 100644 +--- a/xlators/storage/posix/src/posix-mem-types.h ++++ b/xlators/storage/posix/src/posix-mem-types.h +@@ -20,6 +20,7 @@ enum gf_posix_mem_types_ { + gf_posix_mt_paiocb, + gf_posix_mt_inode_ctx_t, + gf_posix_mt_mdata_attr, ++ gf_posix_mt_diskxl_t, + gf_posix_mt_end + }; + #endif +diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h +index 07f367b..4be979c 100644 +--- a/xlators/storage/posix/src/posix.h ++++ b/xlators/storage/posix/src/posix.h +@@ -36,7 +36,6 @@ + #include + #include + #include "posix-mem-types.h" +-#include "posix-handle.h" + #include + + #ifdef HAVE_LIBAIO +@@ -138,6 +137,14 @@ struct posix_fd { + char _pad[4]; /* manual padding */ + }; + ++struct posix_diskxl { ++ pthread_cond_t cond; ++ struct list_head list; ++ xlator_t *xl; ++ gf_boolean_t detach_notify; ++ gf_boolean_t is_use; ++}; ++ + struct posix_private { + char *base_path; + int32_t base_path_length; +@@ -207,6 +214,7 @@ struct posix_private { + pthread_mutex_t janitor_mutex; + pthread_cond_t janitor_cond; + pthread_cond_t fd_cond; ++ pthread_cond_t disk_cond; + int fsync_queue_count; + + enum { +@@ -233,7 +241,6 @@ struct posix_private { + char disk_unit; + uint32_t disk_space_full; + pthread_t disk_space_check; +- gf_boolean_t disk_space_check_active; + + #ifdef GF_DARWIN_HOST_OS + enum { +@@ -263,6 +270,7 @@ struct posix_private { + gf_boolean_t ctime; + gf_boolean_t janitor_task_stop; + uint32_t rel_fdcount; ++ void *pxl; + }; + + typedef struct { +-- +1.8.3.1 + diff --git a/SOURCES/0507-inode-make-critical-section-smaller.patch b/SOURCES/0507-inode-make-critical-section-smaller.patch new file mode 100644 index 0000000..3b1dac5 --- /dev/null +++ b/SOURCES/0507-inode-make-critical-section-smaller.patch @@ -0,0 +1,764 @@ +From b3a17b67a69142eef1b4adde3409d5e54dda1e0b Mon Sep 17 00:00:00 2001 +From: Amar Tumballi +Date: Sat, 9 Feb 2019 13:23:06 +0530 +Subject: [PATCH 507/511] inode: make critical section smaller + +do all the 'static' tasks outside of locked region. + +* hash_dentry() and hash_gfid() are now called outside locked region. +* remove extra __dentry_hash exported in libglusterfs.sym +* avoid checks in locked functions, if the check is done in calling + function. +* implement dentry_destroy(), which handles freeing of dentry separately, + from that of dentry_unset (which takes care of separating dentry from + inode, and table) + +> Updates: bz#1670031 +> Change-Id: I584213e0748464bb427fbdef3c4ab6615d7d5eb0 +> Signed-off-by: Amar Tumballi +> (Cherry pick from commit 8a90d346b9d3f69ff11241feb0011c90a8e57e30) +> (Review on upstream link https://review.gluster.org/#/c/glusterfs/+/22184/) + +Change-Id: I584213e0748464bb427fbdef3c4ab6615d7d5eb0 +BUG: 1898777 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/221189 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/glusterfs/inode.h | 3 - + libglusterfs/src/inode.c | 323 +++++++++++++------------------------ + libglusterfs/src/libglusterfs.sym | 1 - + 3 files changed, 111 insertions(+), 216 deletions(-) + +diff --git a/libglusterfs/src/glusterfs/inode.h b/libglusterfs/src/glusterfs/inode.h +index 4421c47..c875653 100644 +--- a/libglusterfs/src/glusterfs/inode.h ++++ b/libglusterfs/src/glusterfs/inode.h +@@ -167,9 +167,6 @@ inode_rename(inode_table_t *table, inode_t *olddir, const char *oldname, + inode_t *newdir, const char *newname, inode_t *inode, + struct iatt *stbuf); + +-dentry_t * +-__dentry_grep(inode_table_t *table, inode_t *parent, const char *name); +- + inode_t * + inode_grep(inode_table_t *table, inode_t *parent, const char *name); + +diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c +index 4c3c546..71b2d2a 100644 +--- a/libglusterfs/src/inode.c ++++ b/libglusterfs/src/inode.c +@@ -159,27 +159,15 @@ hash_dentry(inode_t *parent, const char *name, int mod) + static int + hash_gfid(uuid_t uuid, int mod) + { +- int ret = 0; +- +- ret = uuid[15] + (uuid[14] << 8); +- +- return ret; ++ return ((uuid[15] + (uuid[14] << 8)) % mod); + } + + static void +-__dentry_hash(dentry_t *dentry) ++__dentry_hash(dentry_t *dentry, const int hash) + { + inode_table_t *table = NULL; +- int hash = 0; +- +- if (!dentry) { +- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_DENTRY_NOT_FOUND, +- "dentry not found"); +- return; +- } + + table = dentry->inode->table; +- hash = hash_dentry(dentry->parent, dentry->name, table->hashsize); + + list_del_init(&dentry->hash); + list_add(&dentry->hash, &table->name_hash[hash]); +@@ -188,49 +176,44 @@ __dentry_hash(dentry_t *dentry) + static int + __is_dentry_hashed(dentry_t *dentry) + { +- if (!dentry) { +- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_DENTRY_NOT_FOUND, +- "dentry not found"); +- return 0; +- } +- + return !list_empty(&dentry->hash); + } + + static void + __dentry_unhash(dentry_t *dentry) + { +- if (!dentry) { +- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_DENTRY_NOT_FOUND, +- "dentry not found"); +- return; +- } +- + list_del_init(&dentry->hash); + } + + static void +-__dentry_unset(dentry_t *dentry) ++dentry_destroy(dentry_t *dentry) + { +- if (!dentry) { +- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_DENTRY_NOT_FOUND, +- "dentry not found"); ++ if (!dentry) + return; +- } ++ ++ GF_FREE(dentry->name); ++ dentry->name = NULL; ++ mem_put(dentry); ++ ++ return; ++} ++ ++static dentry_t * ++__dentry_unset(dentry_t *dentry) ++{ ++ if (!dentry) ++ return NULL; + + __dentry_unhash(dentry); + + list_del_init(&dentry->inode_list); + +- GF_FREE(dentry->name); +- dentry->name = NULL; +- + if (dentry->parent) { + __inode_unref(dentry->parent, false); + dentry->parent = NULL; + } + +- mem_put(dentry); ++ return dentry; + } + + static int +@@ -289,22 +272,14 @@ static int + __is_dentry_cyclic(dentry_t *dentry) + { + int ret = 0; +- inode_t *inode = NULL; +- char *name = ""; + + ret = __foreach_ancestor_dentry(dentry, __check_cycle, dentry->inode); + if (ret) { +- inode = dentry->inode; +- +- if (dentry->name) +- name = dentry->name; +- + gf_msg(dentry->inode->table->name, GF_LOG_CRITICAL, 0, + LG_MSG_DENTRY_CYCLIC_LOOP, +- "detected cyclic loop " +- "formation during inode linkage. inode (%s) linking " +- "under itself as %s", +- uuid_utoa(inode->gfid), name); ++ "detected cyclic loop formation during inode linkage. " ++ "inode (%s) linking under itself as %s", ++ uuid_utoa(dentry->inode->gfid), dentry->name); + } + + return ret; +@@ -313,41 +288,19 @@ __is_dentry_cyclic(dentry_t *dentry) + static void + __inode_unhash(inode_t *inode) + { +- if (!inode) { +- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND, +- "inode not found"); +- return; +- } +- + list_del_init(&inode->hash); + } + + static int + __is_inode_hashed(inode_t *inode) + { +- if (!inode) { +- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND, +- "inode not found"); +- return 0; +- } +- + return !list_empty(&inode->hash); + } + + static void +-__inode_hash(inode_t *inode) ++__inode_hash(inode_t *inode, const int hash) + { +- inode_table_t *table = NULL; +- int hash = 0; +- +- if (!inode) { +- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND, +- "inode not found"); +- return; +- } +- +- table = inode->table; +- hash = hash_gfid(inode->gfid, 65536); ++ inode_table_t *table = inode->table; + + list_del_init(&inode->hash); + list_add(&inode->hash, &table->inode_hash[hash]); +@@ -359,12 +312,6 @@ __dentry_search_for_inode(inode_t *inode, uuid_t pargfid, const char *name) + dentry_t *dentry = NULL; + dentry_t *tmp = NULL; + +- if (!inode || !name) { +- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG, +- "inode || name not found"); +- return NULL; +- } +- + /* earlier, just the ino was sent, which could have been 0, now + we deal with gfid, and if sent gfid is null or 0, no need to + continue with the check */ +@@ -390,12 +337,6 @@ __inode_ctx_free(inode_t *inode) + xlator_t *xl = NULL; + xlator_t *old_THIS = NULL; + +- if (!inode) { +- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND, +- "inode not found"); +- return; +- } +- + if (!inode->_ctx) { + gf_msg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_CTX_NULL, + "_ctx not found"); +@@ -424,12 +365,6 @@ noctx: + static void + __inode_destroy(inode_t *inode) + { +- if (!inode) { +- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND, +- "inode not found"); +- return; +- } +- + __inode_ctx_free(inode); + + LOCK_DESTROY(&inode->lock); +@@ -472,9 +407,6 @@ inode_ctx_merge(fd_t *fd, inode_t *inode, inode_t *linked_inode) + static void + __inode_activate(inode_t *inode) + { +- if (!inode) +- return; +- + list_move(&inode->list, &inode->table->active); + inode->table->active_size++; + } +@@ -485,19 +417,13 @@ __inode_passivate(inode_t *inode) + dentry_t *dentry = NULL; + dentry_t *t = NULL; + +- if (!inode) { +- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND, +- "inode not found"); +- return; +- } +- + list_move_tail(&inode->list, &inode->table->lru); + inode->table->lru_size++; + + list_for_each_entry_safe(dentry, t, &inode->dentry_list, inode_list) + { + if (!__is_dentry_hashed(dentry)) +- __dentry_unset(dentry); ++ dentry_destroy(__dentry_unset(dentry)); + } + } + +@@ -507,12 +433,6 @@ __inode_retire(inode_t *inode) + dentry_t *dentry = NULL; + dentry_t *t = NULL; + +- if (!inode) { +- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND, +- "inode not found"); +- return; +- } +- + list_move_tail(&inode->list, &inode->table->purge); + inode->table->purge_size++; + +@@ -520,7 +440,7 @@ __inode_retire(inode_t *inode) + + list_for_each_entry_safe(dentry, t, &inode->dentry_list, inode_list) + { +- __dentry_unset(dentry); ++ dentry_destroy(__dentry_unset(dentry)); + } + } + +@@ -547,9 +467,6 @@ __inode_unref(inode_t *inode, bool clear) + xlator_t *this = NULL; + uint64_t nlookup = 0; + +- if (!inode) +- return NULL; +- + /* + * Root inode should always be in active list of inode table. So unrefs + * on root inode are no-ops. +@@ -677,16 +594,10 @@ inode_ref(inode_t *inode) + } + + static dentry_t * +-__dentry_create(inode_t *inode, inode_t *parent, const char *name) ++dentry_create(inode_t *inode, inode_t *parent, const char *name) + { + dentry_t *newd = NULL; + +- if (!inode || !parent || !name) { +- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG, +- "inode || parent || name not found"); +- return NULL; +- } +- + newd = mem_get0(parent->table->dentry_pool); + if (newd == NULL) { + goto out; +@@ -702,10 +613,6 @@ __dentry_create(inode_t *inode, inode_t *parent, const char *name) + goto out; + } + +- if (parent) +- newd->parent = __inode_ref(parent, false); +- +- list_add(&newd->inode_list, &inode->dentry_list); + newd->inode = inode; + + out: +@@ -717,14 +624,6 @@ __inode_create(inode_table_t *table) + { + inode_t *newi = NULL; + +- if (!table) { +- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, +- LG_MSG_INODE_TABLE_NOT_FOUND, +- "table not " +- "found"); +- return NULL; +- } +- + newi = mem_get0(table->inode_pool); + if (!newi) { + goto out; +@@ -795,9 +694,6 @@ __inode_ref_reduce_by_n(inode_t *inode, uint64_t nref) + { + uint64_t nlookup = 0; + +- if (!inode) +- return NULL; +- + GF_ASSERT(inode->ref >= nref); + + inode->ref -= nref; +@@ -837,17 +733,12 @@ inode_forget_atomic(inode_t *inode, uint64_t nlookup) + } + + dentry_t * +-__dentry_grep(inode_table_t *table, inode_t *parent, const char *name) ++__dentry_grep(inode_table_t *table, inode_t *parent, const char *name, ++ const int hash) + { +- int hash = 0; + dentry_t *dentry = NULL; + dentry_t *tmp = NULL; + +- if (!table || !name || !parent) +- return NULL; +- +- hash = hash_dentry(parent, name, table->hashsize); +- + list_for_each_entry(tmp, &table->name_hash[hash], hash) + { + if (tmp->parent == parent && !strcmp(tmp->name, name)) { +@@ -872,15 +763,16 @@ inode_grep(inode_table_t *table, inode_t *parent, const char *name) + return NULL; + } + ++ int hash = hash_dentry(parent, name, table->hashsize); ++ + pthread_mutex_lock(&table->lock); + { +- dentry = __dentry_grep(table, parent, name); +- +- if (dentry) ++ dentry = __dentry_grep(table, parent, name, hash); ++ if (dentry) { + inode = dentry->inode; +- +- if (inode) +- __inode_ref(inode, false); ++ if (inode) ++ __inode_ref(inode, false); ++ } + } + pthread_mutex_unlock(&table->lock); + +@@ -947,17 +839,18 @@ inode_grep_for_gfid(inode_table_t *table, inode_t *parent, const char *name, + return ret; + } + ++ int hash = hash_dentry(parent, name, table->hashsize); ++ + pthread_mutex_lock(&table->lock); + { +- dentry = __dentry_grep(table, parent, name); +- +- if (dentry) ++ dentry = __dentry_grep(table, parent, name, hash); ++ if (dentry) { + inode = dentry->inode; +- +- if (inode) { +- gf_uuid_copy(gfid, inode->gfid); +- *type = inode->ia_type; +- ret = 0; ++ if (inode) { ++ gf_uuid_copy(gfid, inode->gfid); ++ *type = inode->ia_type; ++ ret = 0; ++ } + } + } + pthread_mutex_unlock(&table->lock); +@@ -978,25 +871,14 @@ __is_root_gfid(uuid_t gfid) + } + + inode_t * +-__inode_find(inode_table_t *table, uuid_t gfid) ++__inode_find(inode_table_t *table, uuid_t gfid, const int hash) + { + inode_t *inode = NULL; + inode_t *tmp = NULL; +- int hash = 0; +- +- if (!table) { +- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, +- LG_MSG_INODE_TABLE_NOT_FOUND, +- "table not " +- "found"); +- goto out; +- } + + if (__is_root_gfid(gfid)) + return table->root; + +- hash = hash_gfid(gfid, 65536); +- + list_for_each_entry(tmp, &table->inode_hash[hash], hash) + { + if (gf_uuid_compare(tmp->gfid, gfid) == 0) { +@@ -1005,7 +887,6 @@ __inode_find(inode_table_t *table, uuid_t gfid) + } + } + +-out: + return inode; + } + +@@ -1022,9 +903,11 @@ inode_find(inode_table_t *table, uuid_t gfid) + return NULL; + } + ++ int hash = hash_gfid(gfid, 65536); ++ + pthread_mutex_lock(&table->lock); + { +- inode = __inode_find(table, gfid); ++ inode = __inode_find(table, gfid, hash); + if (inode) + __inode_ref(inode, false); + } +@@ -1035,7 +918,7 @@ inode_find(inode_table_t *table, uuid_t gfid) + + static inode_t * + __inode_link(inode_t *inode, inode_t *parent, const char *name, +- struct iatt *iatt) ++ struct iatt *iatt, const int dhash) + { + dentry_t *dentry = NULL; + dentry_t *old_dentry = NULL; +@@ -1043,16 +926,7 @@ __inode_link(inode_t *inode, inode_t *parent, const char *name, + inode_table_t *table = NULL; + inode_t *link_inode = NULL; + +- if (!inode) { +- errno = EINVAL; +- return NULL; +- } +- + table = inode->table; +- if (!table) { +- errno = EINVAL; +- return NULL; +- } + + if (parent) { + /* We should prevent inode linking between different +@@ -1090,14 +964,16 @@ __inode_link(inode_t *inode, inode_t *parent, const char *name, + return NULL; + } + +- old_inode = __inode_find(table, iatt->ia_gfid); ++ int ihash = hash_gfid(iatt->ia_gfid, 65536); ++ ++ old_inode = __inode_find(table, iatt->ia_gfid, ihash); + + if (old_inode) { + link_inode = old_inode; + } else { + gf_uuid_copy(inode->gfid, iatt->ia_gfid); + inode->ia_type = iatt->ia_type; +- __inode_hash(inode); ++ __inode_hash(inode, ihash); + } + } else { + /* @old_inode serves another important purpose - it indicates +@@ -1112,22 +988,16 @@ __inode_link(inode_t *inode, inode_t *parent, const char *name, + old_inode = inode; + } + +- if (name) { +- if (!strcmp(name, ".") || !strcmp(name, "..")) +- return link_inode; +- +- if (strchr(name, '/')) { +- GF_ASSERT(!"inode link attempted with '/' in name"); +- return NULL; +- } ++ if (name && (!strcmp(name, ".") || !strcmp(name, ".."))) { ++ return link_inode; + } + + /* use only link_inode beyond this point */ + if (parent) { +- old_dentry = __dentry_grep(table, parent, name); ++ old_dentry = __dentry_grep(table, parent, name, dhash); + + if (!old_dentry || old_dentry->inode != link_inode) { +- dentry = __dentry_create(link_inode, parent, name); ++ dentry = dentry_create(link_inode, parent, name); + if (!dentry) { + gf_msg_callingfn( + THIS->name, GF_LOG_ERROR, 0, LG_MSG_DENTRY_CREATE_FAILED, +@@ -1137,15 +1007,20 @@ __inode_link(inode_t *inode, inode_t *parent, const char *name, + errno = ENOMEM; + return NULL; + } ++ ++ /* dentry linking needs to happen inside lock */ ++ dentry->parent = __inode_ref(parent, false); ++ list_add(&dentry->inode_list, &link_inode->dentry_list); ++ + if (old_inode && __is_dentry_cyclic(dentry)) { + errno = ELOOP; +- __dentry_unset(dentry); ++ dentry_destroy(__dentry_unset(dentry)); + return NULL; + } +- __dentry_hash(dentry); ++ __dentry_hash(dentry, dhash); + + if (old_dentry) +- __dentry_unset(old_dentry); ++ dentry_destroy(__dentry_unset(old_dentry)); + } + } + +@@ -1155,6 +1030,7 @@ __inode_link(inode_t *inode, inode_t *parent, const char *name, + inode_t * + inode_link(inode_t *inode, inode_t *parent, const char *name, struct iatt *iatt) + { ++ int hash = 0; + inode_table_t *table = NULL; + inode_t *linked_inode = NULL; + +@@ -1166,10 +1042,18 @@ inode_link(inode_t *inode, inode_t *parent, const char *name, struct iatt *iatt) + + table = inode->table; + ++ if (parent && name) { ++ hash = hash_dentry(parent, name, table->hashsize); ++ } ++ ++ if (name && strchr(name, '/')) { ++ GF_ASSERT(!"inode link attempted with '/' in name"); ++ return NULL; ++ } ++ + pthread_mutex_lock(&table->lock); + { +- linked_inode = __inode_link(inode, parent, name, iatt); +- ++ linked_inode = __inode_link(inode, parent, name, iatt, hash); + if (linked_inode) + __inode_ref(linked_inode, false); + } +@@ -1312,48 +1196,47 @@ inode_invalidate(inode_t *inode) + return ret; + } + +-static void ++static dentry_t * + __inode_unlink(inode_t *inode, inode_t *parent, const char *name) + { + dentry_t *dentry = NULL; + char pgfid[64] = {0}; + char gfid[64] = {0}; + +- if (!inode || !parent || !name) +- return; +- + dentry = __dentry_search_for_inode(inode, parent->gfid, name); + + /* dentry NULL for corrupted backend */ + if (dentry) { +- __dentry_unset(dentry); ++ dentry = __dentry_unset(dentry); + } else { + gf_msg("inode", GF_LOG_WARNING, 0, LG_MSG_DENTRY_NOT_FOUND, + "%s/%s: dentry not found in %s", + uuid_utoa_r(parent->gfid, pgfid), name, + uuid_utoa_r(inode->gfid, gfid)); + } ++ ++ return dentry; + } + + void + inode_unlink(inode_t *inode, inode_t *parent, const char *name) + { +- inode_table_t *table = NULL; ++ inode_table_t *table; ++ dentry_t *dentry; + +- if (!inode) { +- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND, +- "inode not found"); ++ if (!inode || !parent || !name) + return; +- } + + table = inode->table; + + pthread_mutex_lock(&table->lock); + { +- __inode_unlink(inode, parent, name); ++ dentry = __inode_unlink(inode, parent, name); + } + pthread_mutex_unlock(&table->lock); + ++ dentry_destroy(dentry); ++ + inode_table_prune(table); + } + +@@ -1362,6 +1245,9 @@ inode_rename(inode_table_t *table, inode_t *srcdir, const char *srcname, + inode_t *dstdir, const char *dstname, inode_t *inode, + struct iatt *iatt) + { ++ int hash = 0; ++ dentry_t *dentry = NULL; ++ + if (!inode) { + gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND, + "inode not found"); +@@ -1370,13 +1256,26 @@ inode_rename(inode_table_t *table, inode_t *srcdir, const char *srcname, + + table = inode->table; + ++ if (dstname && strchr(dstname, '/')) { ++ GF_ASSERT(!"inode link attempted with '/' in name"); ++ return -1; ++ } ++ ++ if (dstdir && dstname) { ++ hash = hash_dentry(dstdir, dstname, table->hashsize); ++ } ++ + pthread_mutex_lock(&table->lock); + { +- __inode_link(inode, dstdir, dstname, iatt); +- __inode_unlink(inode, srcdir, srcname); ++ __inode_link(inode, dstdir, dstname, iatt, hash); ++ /* pick the old dentry */ ++ dentry = __inode_unlink(inode, srcdir, srcname); + } + pthread_mutex_unlock(&table->lock); + ++ /* free the old dentry */ ++ dentry_destroy(dentry); ++ + inode_table_prune(table); + + return 0; +@@ -1447,12 +1346,6 @@ inode_parent(inode_t *inode, uuid_t pargfid, const char *name) + static int + __inode_has_dentry(inode_t *inode) + { +- if (!inode) { +- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND, +- "inode not found"); +- return 0; +- } +- + return !list_empty(&inode->dentry_list); + } + +@@ -1461,6 +1354,12 @@ inode_has_dentry(inode_t *inode) + { + int dentry_present = 0; + ++ if (!inode) { ++ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND, ++ "inode not found"); ++ return 0; ++ } ++ + LOCK(&inode->lock); + { + dentry_present = __inode_has_dentry(inode); +@@ -1720,7 +1619,7 @@ __inode_table_init_root(inode_table_t *table) + iatt.ia_ino = 1; + iatt.ia_type = IA_IFDIR; + +- __inode_link(root, NULL, NULL, &iatt); ++ __inode_link(root, NULL, NULL, &iatt, 0); + table->root = root; + } + +diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym +index 5a721e0..d060292 100644 +--- a/libglusterfs/src/libglusterfs.sym ++++ b/libglusterfs/src/libglusterfs.sym +@@ -357,7 +357,6 @@ default_copy_file_range + default_copy_file_range_cbk + default_copy_file_range_failure_cbk + default_copy_file_range_resume +-__dentry_grep + dht_is_linkfile + dict_add + dict_addn +-- +1.8.3.1 + diff --git a/SOURCES/0508-fuse-fetch-arbitrary-number-of-groups-from-proc-pid-.patch b/SOURCES/0508-fuse-fetch-arbitrary-number-of-groups-from-proc-pid-.patch new file mode 100644 index 0000000..9ccc1b5 --- /dev/null +++ b/SOURCES/0508-fuse-fetch-arbitrary-number-of-groups-from-proc-pid-.patch @@ -0,0 +1,232 @@ +From 87b7689f7727a542c5afa22bdebd3781dd650a2f Mon Sep 17 00:00:00 2001 +From: Csaba Henk +Date: Fri, 17 Jul 2020 11:33:36 +0200 +Subject: [PATCH 508/511] fuse: fetch arbitrary number of groups from + /proc/[pid]/status + +Glusterfs so far constrained itself with an arbitrary limit (32) +for the number of groups read from /proc/[pid]/status (this was +the number of groups shown there prior to Linux commit +v3.7-9553-g8d238027b87e (v3.8-rc1~74^2~59); since this commit, all +groups are shown). + +With this change we'll read groups up to the number Glusterfs +supports in general (64k). + +Note: the actual number of groups that are made use of in a +regular Glusterfs setup shall still be capped at ~93 due to limitations +of the RPC transport. To be able to handle more groups than that, +brick side gid resolution (server.manage-gids option) can be used along +with NIS, LDAP or other such networked directory service (see +https://github.com/gluster/glusterdocs/blob/5ba15a2/docs/Administrator%20Guide/Handling-of-users-with-many-groups.md#limit-in-the-glusterfs-protocol +). + +Also adding some diagnostic messages to frame_fill_groups(). + +Upstream: +> Reviewed-on: https://review.gluster.org/c/glusterfs/+/24721 +> Change-Id: I271f3dc3e6d3c44d6d989c7a2073ea5f16c26ee0 +> fixes: #1075 +> Signed-off-by: Csaba Henk + +BUG: 1749304 +Change-Id: I80bf99d34087fb95768bf2259d8c4774d9f5d0c5 +Signed-off-by: Csaba Henk +Reviewed-on: https://code.engineering.redhat.com/gerrit/220920 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/glusterfs/stack.h | 7 ++++ + tests/bugs/fuse/many-groups-for-acl.t | 13 ++++++- + xlators/mount/fuse/src/fuse-helpers.c | 71 +++++++++++++++++++++++------------ + 3 files changed, 65 insertions(+), 26 deletions(-) + +diff --git a/libglusterfs/src/glusterfs/stack.h b/libglusterfs/src/glusterfs/stack.h +index 1758550..bd466d8 100644 +--- a/libglusterfs/src/glusterfs/stack.h ++++ b/libglusterfs/src/glusterfs/stack.h +@@ -429,6 +429,7 @@ call_stack_alloc_groups(call_stack_t *stack, int ngrps) + if (ngrps <= SMALL_GROUP_COUNT) { + stack->groups = stack->groups_small; + } else { ++ GF_FREE(stack->groups_large); + stack->groups_large = GF_CALLOC(ngrps, sizeof(gid_t), + gf_common_mt_groups_t); + if (!stack->groups_large) +@@ -442,6 +443,12 @@ call_stack_alloc_groups(call_stack_t *stack, int ngrps) + } + + static inline int ++call_stack_groups_capacity(call_stack_t *stack) ++{ ++ return max(stack->ngrps, SMALL_GROUP_COUNT); ++} ++ ++static inline int + call_frames_count(call_stack_t *call_stack) + { + call_frame_t *pos; +diff --git a/tests/bugs/fuse/many-groups-for-acl.t b/tests/bugs/fuse/many-groups-for-acl.t +index d959f75..a51b1bc 100755 +--- a/tests/bugs/fuse/many-groups-for-acl.t ++++ b/tests/bugs/fuse/many-groups-for-acl.t +@@ -38,6 +38,13 @@ do + done + TEST useradd -o -M -u ${NEW_UID} -g ${NEW_GID} -G ${NEW_USER}-${NEW_GIDS} ${NEW_USER} + ++# Linux < 3.8 exports only first 32 gids of pid to userspace ++kernel_exports_few_gids=0 ++if [ "$OSTYPE" = Linux ] && \ ++ su -m ${NEW_USER} -c "grep ^Groups: /proc/self/status | wc -w | xargs -I@ expr @ - 1 '<' $LAST_GID - $NEW_GID + 1" > /dev/null; then ++ kernel_exports_few_gids=1 ++fi ++ + # preparation done, start the tests + + TEST glusterd +@@ -48,6 +55,8 @@ TEST $CLI volume set $V0 nfs.disable off + TEST $CLI volume set ${V0} server.manage-gids off + TEST $CLI volume start ${V0} + ++# This is just a synchronization hack to make sure the bricks are ++# up before going on. + EXPECT_WITHIN ${NFS_EXPORT_TIMEOUT} "1" is_nfs_export_available + + # mount the volume with POSIX ACL support, without --resolve-gids +@@ -69,8 +78,8 @@ TEST [ $? -eq 0 ] + su -m ${NEW_USER} -c "touch ${M0}/first-32-gids-2/success > /dev/null" + TEST [ $? -eq 0 ] + +-su -m ${NEW_USER} -c "touch ${M0}/gid-64/failure > /dev/null" +-TEST [ $? -ne 0 ] ++su -m ${NEW_USER} -c "touch ${M0}/gid-64/success--if-all-gids-exported > /dev/null" ++TEST [ $? -eq $kernel_exports_few_gids ] + + su -m ${NEW_USER} -c "touch ${M0}/gid-120/failure > /dev/null" + TEST [ $? -ne 0 ] +diff --git a/xlators/mount/fuse/src/fuse-helpers.c b/xlators/mount/fuse/src/fuse-helpers.c +index 5bfc40c..6e04cd4 100644 +--- a/xlators/mount/fuse/src/fuse-helpers.c ++++ b/xlators/mount/fuse/src/fuse-helpers.c +@@ -139,8 +139,6 @@ get_fuse_state(xlator_t *this, fuse_in_header_t *finh) + return state; + } + +-#define FUSE_MAX_AUX_GROUPS \ +- 32 /* We can get only up to 32 aux groups from /proc */ + void + frame_fill_groups(call_frame_t *frame) + { +@@ -150,8 +148,6 @@ frame_fill_groups(call_frame_t *frame) + char filename[32]; + char line[4096]; + char *ptr = NULL; +- FILE *fp = NULL; +- int idx = 0; + long int id = 0; + char *saveptr = NULL; + char *endptr = NULL; +@@ -191,45 +187,72 @@ frame_fill_groups(call_frame_t *frame) + + call_stack_set_groups(frame->root, ngroups, &mygroups); + } else { ++ FILE *fp = NULL; ++ + ret = snprintf(filename, sizeof filename, "/proc/%d/status", + frame->root->pid); +- if (ret >= sizeof filename) ++ if (ret >= sizeof filename) { ++ gf_log(this->name, GF_LOG_ERROR, "procfs path exceeds buffer size"); + goto out; ++ } + + fp = fopen(filename, "r"); +- if (!fp) ++ if (!fp) { ++ gf_log(this->name, GF_LOG_ERROR, "failed to open %s: %s", filename, ++ strerror(errno)); + goto out; ++ } + +- if (call_stack_alloc_groups(frame->root, ngroups) != 0) +- goto out; ++ for (;;) { ++ gf_boolean_t found_groups = _gf_false; ++ int idx = 0; + +- while ((ptr = fgets(line, sizeof line, fp))) { +- if (strncmp(ptr, "Groups:", 7) != 0) +- continue; ++ if (call_stack_alloc_groups(frame->root, ngroups) != 0) { ++ gf_log(this->name, GF_LOG_ERROR, ++ "failed to allocate gid buffer"); ++ goto out; ++ } + ++ while ((ptr = fgets(line, sizeof line, fp))) { ++ if (strncmp(ptr, "Groups:", 7) == 0) { ++ found_groups = _gf_true; ++ break; ++ } ++ } ++ if (!found_groups) { ++ gf_log(this->name, GF_LOG_ERROR, "cannot find gid list in %s", ++ filename); ++ break; ++ } + ptr = line + 8; + + for (ptr = strtok_r(ptr, " \t\r\n", &saveptr); ptr; + ptr = strtok_r(NULL, " \t\r\n", &saveptr)) { + errno = 0; + id = strtol(ptr, &endptr, 0); +- if (errno == ERANGE) +- break; +- if (!endptr || *endptr) ++ if (errno == ERANGE || !endptr || *endptr) { ++ gf_log(this->name, GF_LOG_ERROR, "failed to parse %s", ++ filename); + break; +- frame->root->groups[idx++] = id; +- if (idx == FUSE_MAX_AUX_GROUPS) ++ } ++ if (idx < call_stack_groups_capacity(frame->root)) ++ frame->root->groups[idx] = id; ++ idx++; ++ if (idx == GF_MAX_AUX_GROUPS) + break; + } +- +- frame->root->ngrps = idx; +- break; ++ if (idx > call_stack_groups_capacity(frame->root)) { ++ ngroups = idx; ++ rewind(fp); ++ } else { ++ frame->root->ngrps = idx; ++ break; ++ } + } ++ out: ++ if (fp) ++ fclose(fp); + } +- +-out: +- if (fp) +- fclose(fp); + #elif defined(GF_SOLARIS_HOST_OS) + char filename[32]; + char scratch[128]; +@@ -245,7 +268,7 @@ out: + fp = fopen(filename, "r"); + if (fp != NULL) { + if (fgets(scratch, sizeof scratch, fp) != NULL) { +- ngrps = MIN(prcred->pr_ngroups, FUSE_MAX_AUX_GROUPS); ++ ngrps = MIN(prcred->pr_ngroups, GF_MAX_AUX_GROUPS); + if (call_stack_alloc_groups(frame->root, ngrps) != 0) { + fclose(fp); + return; +-- +1.8.3.1 + diff --git a/SOURCES/0509-core-configure-optimum-inode-table-hash_size-for-shd.patch b/SOURCES/0509-core-configure-optimum-inode-table-hash_size-for-shd.patch new file mode 100644 index 0000000..fdfc9bb --- /dev/null +++ b/SOURCES/0509-core-configure-optimum-inode-table-hash_size-for-shd.patch @@ -0,0 +1,407 @@ +From a18f03cbf2b5652f8617cb4dd236bb4ca9838d96 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Tue, 6 Oct 2020 16:54:15 +0530 +Subject: [PATCH 509/511] core: configure optimum inode table hash_size for shd + +In brick_mux environment a shd process consume high memory. +After print the statedump i have found it allocates 1M per afr xlator +for all bricks.In case of configure 4k volumes it consumes almost total +6G RSS size in which 4G consumes by inode_tables + +[cluster/replicate.test1-replicate-0 - usage-type gf_common_mt_list_head memusage] +size=1273488 +num_allocs=2 +max_size=1273488 +max_num_allocs=2 +total_allocs=2 + +inode_new_table function allocates memory(1M) for a list of inode and dentry hash. +For shd lru_limit size is 1 so we don't need to create a big hash table so to reduce +RSS size for shd process pass optimum bucket count at the time of creating inode_table. + +> Change-Id: I039716d42321a232fdee1ee8fd50295e638715bb +> Fixes: #1538 +> Signed-off-by: Mohit Agrawal +> (Cherry pick from commit ca6bbc486e76fdb9a8e07119bb10d7fa45b2e93b) +> (Reviewed on upstream link https://github.com/gluster/glusterfs/issues/1538) + +Change-Id: I039716d42321a232fdee1ee8fd50295e638715bb +BUG: 1898777 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/221191 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + api/src/glfs-master.c | 2 +- + libglusterfs/src/glusterfs/inode.h | 17 +++++---- + libglusterfs/src/inode.c | 53 +++++++++++++++++--------- + xlators/cluster/afr/src/afr.c | 10 ++++- + xlators/cluster/dht/src/dht-rebalance.c | 3 +- + xlators/cluster/ec/src/ec.c | 2 +- + xlators/features/bit-rot/src/bitd/bit-rot.c | 2 +- + xlators/features/quota/src/quotad-helpers.c | 2 +- + xlators/features/trash/src/trash.c | 4 +- + xlators/mount/fuse/src/fuse-bridge.c | 6 +-- + xlators/nfs/server/src/nfs.c | 2 +- + xlators/protocol/server/src/server-handshake.c | 3 +- + 12 files changed, 66 insertions(+), 40 deletions(-) + +diff --git a/api/src/glfs-master.c b/api/src/glfs-master.c +index b4473b1..9e604d3 100644 +--- a/api/src/glfs-master.c ++++ b/api/src/glfs-master.c +@@ -45,7 +45,7 @@ graph_setup(struct glfs *fs, glusterfs_graph_t *graph) + } + + if (!new_subvol->itable) { +- itable = inode_table_new(131072, new_subvol); ++ itable = inode_table_new(131072, new_subvol, 0, 0); + if (!itable) { + errno = ENOMEM; + ret = -1; +diff --git a/libglusterfs/src/glusterfs/inode.h b/libglusterfs/src/glusterfs/inode.h +index c875653..62c093d 100644 +--- a/libglusterfs/src/glusterfs/inode.h ++++ b/libglusterfs/src/glusterfs/inode.h +@@ -35,11 +35,12 @@ typedef struct _dentry dentry_t; + + struct _inode_table { + pthread_mutex_t lock; +- size_t hashsize; /* bucket size of inode hash and dentry hash */ +- char *name; /* name of the inode table, just for gf_log() */ +- inode_t *root; /* root directory inode, with number 1 */ +- xlator_t *xl; /* xlator to be called to do purge */ +- uint32_t lru_limit; /* maximum LRU cache size */ ++ size_t dentry_hashsize; /* Number of buckets for dentry hash*/ ++ size_t inode_hashsize; /* Size of inode hash table */ ++ char *name; /* name of the inode table, just for gf_log() */ ++ inode_t *root; /* root directory inode, with number 1 */ ++ xlator_t *xl; /* xlator to be called to do purge */ ++ uint32_t lru_limit; /* maximum LRU cache size */ + struct list_head *inode_hash; /* buckets for inode hash table */ + struct list_head *name_hash; /* buckets for dentry hash table */ + struct list_head active; /* list of inodes currently active (in an fop) */ +@@ -116,12 +117,14 @@ struct _inode { + #define GFID_STR_PFX_LEN (sizeof(GFID_STR_PFX) - 1) + + inode_table_t * +-inode_table_new(uint32_t lru_limit, xlator_t *xl); ++inode_table_new(uint32_t lru_limit, xlator_t *xl, uint32_t dhash_size, ++ uint32_t inodehash_size); + + inode_table_t * + inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl, + int32_t (*invalidator_fn)(xlator_t *, inode_t *), +- xlator_t *invalidator_xl); ++ xlator_t *invalidator_xl, uint32_t dentry_hashsize, ++ uint32_t inode_hashsize); + + void + inode_table_destroy_all(glusterfs_ctx_t *ctx); +diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c +index 71b2d2a..98f8ea6 100644 +--- a/libglusterfs/src/inode.c ++++ b/libglusterfs/src/inode.c +@@ -763,7 +763,7 @@ inode_grep(inode_table_t *table, inode_t *parent, const char *name) + return NULL; + } + +- int hash = hash_dentry(parent, name, table->hashsize); ++ int hash = hash_dentry(parent, name, table->dentry_hashsize); + + pthread_mutex_lock(&table->lock); + { +@@ -839,7 +839,7 @@ inode_grep_for_gfid(inode_table_t *table, inode_t *parent, const char *name, + return ret; + } + +- int hash = hash_dentry(parent, name, table->hashsize); ++ int hash = hash_dentry(parent, name, table->dentry_hashsize); + + pthread_mutex_lock(&table->lock); + { +@@ -903,7 +903,7 @@ inode_find(inode_table_t *table, uuid_t gfid) + return NULL; + } + +- int hash = hash_gfid(gfid, 65536); ++ int hash = hash_gfid(gfid, table->inode_hashsize); + + pthread_mutex_lock(&table->lock); + { +@@ -964,7 +964,7 @@ __inode_link(inode_t *inode, inode_t *parent, const char *name, + return NULL; + } + +- int ihash = hash_gfid(iatt->ia_gfid, 65536); ++ int ihash = hash_gfid(iatt->ia_gfid, table->inode_hashsize); + + old_inode = __inode_find(table, iatt->ia_gfid, ihash); + +@@ -1043,7 +1043,7 @@ inode_link(inode_t *inode, inode_t *parent, const char *name, struct iatt *iatt) + table = inode->table; + + if (parent && name) { +- hash = hash_dentry(parent, name, table->hashsize); ++ hash = hash_dentry(parent, name, table->dentry_hashsize); + } + + if (name && strchr(name, '/')) { +@@ -1262,7 +1262,7 @@ inode_rename(inode_table_t *table, inode_t *srcdir, const char *srcname, + } + + if (dstdir && dstname) { +- hash = hash_dentry(dstdir, dstname, table->hashsize); ++ hash = hash_dentry(dstdir, dstname, table->dentry_hashsize); + } + + pthread_mutex_lock(&table->lock); +@@ -1626,7 +1626,8 @@ __inode_table_init_root(inode_table_t *table) + inode_table_t * + inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl, + int32_t (*invalidator_fn)(xlator_t *, inode_t *), +- xlator_t *invalidator_xl) ++ xlator_t *invalidator_xl, uint32_t dentry_hashsize, ++ uint32_t inode_hashsize) + { + inode_table_t *new = NULL; + uint32_t mem_pool_size = lru_limit; +@@ -1644,7 +1645,19 @@ inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl, + new->invalidator_fn = invalidator_fn; + new->invalidator_xl = invalidator_xl; + +- new->hashsize = 14057; /* TODO: Random Number?? */ ++ if (dentry_hashsize == 0) { ++ /* Prime number for uniform distribution */ ++ new->dentry_hashsize = 14057; ++ } else { ++ new->dentry_hashsize = dentry_hashsize; ++ } ++ ++ if (inode_hashsize == 0) { ++ /* The size of hash table always should be power of 2 */ ++ new->inode_hashsize = 65536; ++ } else { ++ new->inode_hashsize = inode_hashsize; ++ } + + /* In case FUSE is initing the inode table. */ + if (!mem_pool_size || (mem_pool_size > DEFAULT_INODE_MEMPOOL_ENTRIES)) +@@ -1658,13 +1671,13 @@ inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl, + if (!new->dentry_pool) + goto out; + +- new->inode_hash = (void *)GF_CALLOC(65536, sizeof(struct list_head), +- gf_common_mt_list_head); ++ new->inode_hash = (void *)GF_CALLOC( ++ new->inode_hashsize, sizeof(struct list_head), gf_common_mt_list_head); + if (!new->inode_hash) + goto out; + +- new->name_hash = (void *)GF_CALLOC(new->hashsize, sizeof(struct list_head), +- gf_common_mt_list_head); ++ new->name_hash = (void *)GF_CALLOC( ++ new->dentry_hashsize, sizeof(struct list_head), gf_common_mt_list_head); + if (!new->name_hash) + goto out; + +@@ -1675,11 +1688,11 @@ inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl, + if (!new->fd_mem_pool) + goto out; + +- for (i = 0; i < 65536; i++) { ++ for (i = 0; i < new->inode_hashsize; i++) { + INIT_LIST_HEAD(&new->inode_hash[i]); + } + +- for (i = 0; i < new->hashsize; i++) { ++ for (i = 0; i < new->dentry_hashsize; i++) { + INIT_LIST_HEAD(&new->name_hash[i]); + } + +@@ -1717,10 +1730,12 @@ out: + } + + inode_table_t * +-inode_table_new(uint32_t lru_limit, xlator_t *xl) ++inode_table_new(uint32_t lru_limit, xlator_t *xl, uint32_t dentry_hashsize, ++ uint32_t inode_hashsize) + { + /* Only fuse for now requires the inode table with invalidator */ +- return inode_table_with_invalidator(lru_limit, xl, NULL, NULL); ++ return inode_table_with_invalidator(lru_limit, xl, NULL, NULL, ++ dentry_hashsize, inode_hashsize); + } + + int +@@ -2439,8 +2454,10 @@ inode_table_dump(inode_table_t *itable, char *prefix) + return; + } + +- gf_proc_dump_build_key(key, prefix, "hashsize"); +- gf_proc_dump_write(key, "%" GF_PRI_SIZET, itable->hashsize); ++ gf_proc_dump_build_key(key, prefix, "dentry_hashsize"); ++ gf_proc_dump_write(key, "%" GF_PRI_SIZET, itable->dentry_hashsize); ++ gf_proc_dump_build_key(key, prefix, "inode_hashsize"); ++ gf_proc_dump_write(key, "%" GF_PRI_SIZET, itable->inode_hashsize); + gf_proc_dump_build_key(key, prefix, "name"); + gf_proc_dump_write(key, "%s", itable->name); + +diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c +index 8f9e71f..bfa464f 100644 +--- a/xlators/cluster/afr/src/afr.c ++++ b/xlators/cluster/afr/src/afr.c +@@ -594,7 +594,15 @@ init(xlator_t *this) + goto out; + } + +- this->itable = inode_table_new(SHD_INODE_LRU_LIMIT, this); ++ if (priv->shd.iamshd) { ++ /* Number of hash bucket should be prime number so declare 131 ++ total dentry hash buckets ++ */ ++ this->itable = inode_table_new(SHD_INODE_LRU_LIMIT, this, 131, 128); ++ } else { ++ this->itable = inode_table_new(SHD_INODE_LRU_LIMIT, this, 0, 0); ++ } ++ + if (!this->itable) { + ret = -ENOMEM; + goto out; +diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c +index 16ac16c..072896d 100644 +--- a/xlators/cluster/dht/src/dht-rebalance.c ++++ b/xlators/cluster/dht/src/dht-rebalance.c +@@ -1168,7 +1168,6 @@ __dht_rebalance_migrate_data(xlator_t *this, gf_defrag_info_t *defrag, + break; + } + +- + offset += ret; + total += ret; + +@@ -2467,7 +2466,7 @@ dht_build_root_inode(xlator_t *this, inode_t **inode) + 0, + }; + +- itable = inode_table_new(0, this); ++ itable = inode_table_new(0, this, 0, 0); + if (!itable) + return; + +diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c +index 3f31c74..4118c3b 100644 +--- a/xlators/cluster/ec/src/ec.c ++++ b/xlators/cluster/ec/src/ec.c +@@ -734,7 +734,7 @@ init(xlator_t *this) + GF_OPTION_INIT("stripe-cache", ec->stripe_cache, uint32, failed); + GF_OPTION_INIT("quorum-count", ec->quorum_count, uint32, failed); + +- this->itable = inode_table_new(EC_SHD_INODE_LRU_LIMIT, this); ++ this->itable = inode_table_new(EC_SHD_INODE_LRU_LIMIT, this, 0, 0); + if (!this->itable) + goto failed; + +diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c +index 424c0d5..4e0e798 100644 +--- a/xlators/features/bit-rot/src/bitd/bit-rot.c ++++ b/xlators/features/bit-rot/src/bitd/bit-rot.c +@@ -1658,7 +1658,7 @@ notify(xlator_t *this, int32_t event, void *data, ...) + child->child_up = 1; + child->xl = subvol; + if (!child->table) +- child->table = inode_table_new(4096, subvol); ++ child->table = inode_table_new(4096, subvol, 0, 0); + + _br_qchild_event(this, child, br_brick_connect); + pthread_cond_signal(&priv->cond); +diff --git a/xlators/features/quota/src/quotad-helpers.c b/xlators/features/quota/src/quotad-helpers.c +index d9f0351..46ac116 100644 +--- a/xlators/features/quota/src/quotad-helpers.c ++++ b/xlators/features/quota/src/quotad-helpers.c +@@ -32,7 +32,7 @@ get_quotad_aggregator_state(xlator_t *this, rpcsvc_request_t *req) + UNLOCK(&priv->lock); + + if (active_subvol->itable == NULL) +- active_subvol->itable = inode_table_new(4096, active_subvol); ++ active_subvol->itable = inode_table_new(4096, active_subvol, 0, 0); + + state->itable = active_subvol->itable; + +diff --git a/xlators/features/trash/src/trash.c b/xlators/features/trash/src/trash.c +index 93f020f..099c887 100644 +--- a/xlators/features/trash/src/trash.c ++++ b/xlators/features/trash/src/trash.c +@@ -2261,7 +2261,7 @@ reconfigure(xlator_t *this, dict_t *options) + + if (!active_earlier && active_now) { + if (!priv->trash_itable) { +- priv->trash_itable = inode_table_new(0, this); ++ priv->trash_itable = inode_table_new(0, this, 0, 0); + if (!priv->trash_itable) { + ret = -ENOMEM; + gf_log(this->name, GF_LOG_ERROR, +@@ -2533,7 +2533,7 @@ init(xlator_t *this) + } + + if (priv->state) { +- priv->trash_itable = inode_table_new(0, this); ++ priv->trash_itable = inode_table_new(0, this, 0, 0); + if (!priv->trash_itable) { + ret = -ENOMEM; + priv->state = _gf_false; +diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c +index 1bddac2..919eea3 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.c ++++ b/xlators/mount/fuse/src/fuse-bridge.c +@@ -6298,10 +6298,10 @@ fuse_graph_setup(xlator_t *this, glusterfs_graph_t *graph) + } + + #if FUSE_KERNEL_MINOR_VERSION >= 11 +- itable = inode_table_with_invalidator(priv->lru_limit, graph->top, +- fuse_inode_invalidate_fn, this); ++ itable = inode_table_with_invalidator( ++ priv->lru_limit, graph->top, fuse_inode_invalidate_fn, this, 0, 0); + #else +- itable = inode_table_new(0, graph->top); ++ itable = inode_table_new(0, graph->top, 0, 0); + #endif + if (!itable) { + ret = -1; +diff --git a/xlators/nfs/server/src/nfs.c b/xlators/nfs/server/src/nfs.c +index ebded41..402be30 100644 +--- a/xlators/nfs/server/src/nfs.c ++++ b/xlators/nfs/server/src/nfs.c +@@ -564,7 +564,7 @@ nfs_init_subvolume(struct nfs_state *nfs, xlator_t *xl) + return -1; + + lrusize = nfs->memfactor * GF_NFS_INODE_LRU_MULT; +- xl->itable = inode_table_new(lrusize, xl); ++ xl->itable = inode_table_new(lrusize, xl, 0, 0); + if (!xl->itable) { + gf_msg(GF_NFS, GF_LOG_CRITICAL, ENOMEM, NFS_MSG_NO_MEMORY, + "Failed to allocate inode table"); +diff --git a/xlators/protocol/server/src/server-handshake.c b/xlators/protocol/server/src/server-handshake.c +index 1d1177d..eeca73c 100644 +--- a/xlators/protocol/server/src/server-handshake.c ++++ b/xlators/protocol/server/src/server-handshake.c +@@ -36,7 +36,6 @@ gf_compare_client_version(rpcsvc_request_t *req, int fop_prognum, + return ret; + } + +- + int + server_getspec(rpcsvc_request_t *req) + { +@@ -629,7 +628,7 @@ server_setvolume(rpcsvc_request_t *req) + + /* TODO: what is this ? */ + client->bound_xl->itable = inode_table_new(conf->inode_lru_limit, +- client->bound_xl); ++ client->bound_xl, 0, 0); + } + } + UNLOCK(&conf->itable_lock); +-- +1.8.3.1 + diff --git a/SOURCES/0510-glusterd-brick_mux-Optimize-friend-handshake-code-to.patch b/SOURCES/0510-glusterd-brick_mux-Optimize-friend-handshake-code-to.patch new file mode 100644 index 0000000..e8a4906 --- /dev/null +++ b/SOURCES/0510-glusterd-brick_mux-Optimize-friend-handshake-code-to.patch @@ -0,0 +1,784 @@ +From 5294c82e0528059b10cbaab7805b20e76ffdd66b Mon Sep 17 00:00:00 2001 +From: mohit84 +Date: Mon, 30 Nov 2020 17:39:53 +0530 +Subject: [PATCH 510/511] glusterd[brick_mux]: Optimize friend handshake code + to avoid call_bail (#1614) + +During glusterd handshake glusterd received a volume dictionary +from peer end to compare the own volume dictionary data.If the options +are differ it sets the key to recognize volume options are changed +and call import syntask to delete/start the volume.In brick_mux +environment while number of volumes are high(5k) the dict api in function +glusterd_compare_friend_volume takes time because the function +glusterd_handle_friend_req saves all peer volume data in a single dictionary. +Due to time taken by the function glusterd_handle_friend RPC requests receives +a call_bail from a peer end gluster(CLI) won't be able to show volume status. + +Solution: To optimize the code done below changes +1) Populate a new specific dictionary to save the peer end version specific + data so that function won't take much time to take the decision about the + peer end has some volume updates. +2) In case of volume has differ version set the key in status_arr instead + of saving in a dictionary to make the operation is faster. + +Note: To validate the changes followed below procedure +1) Setup 5100 distributed volumes 3x1 +2) Enable brick_mux +3) Start all the volumes +4) Kill all gluster processes on 3rd node +5) Run a loop to update volume option on a 1st node + for i in {1..5100}; do gluster v set vol$i performance.open-behind off; done +6) Start the glusterd process on the 3rd node +7) Wait to finish handshake and check there should not be any call_bail message + in the logs + +> Change-Id: Ibad7c23988539cc369ecc39dea2ea6985470bee1 +> Fixes: #1613 +> Signed-off-by: Mohit Agrawal +> (Cherry pick from commit 12545d91eed27ff9abb0505a12c7d4e75b45a53e) +> (Reviewed on upstream link https://github.com/gluster/glusterfs/issues/1613) + +Change-Id: Ibad7c23988539cc369ecc39dea2ea6985470bee1 +BUG: 1898784 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/221193 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/ctx.c | 4 + + libglusterfs/src/dict.c | 166 ++++++++++++++++++++++++++- + libglusterfs/src/globals.c | 2 - + libglusterfs/src/glusterfs/dict.h | 5 + + libglusterfs/src/glusterfs/globals.h | 2 + + libglusterfs/src/libglusterfs.sym | 1 + + xlators/mgmt/glusterd/src/glusterd-handler.c | 39 ++++--- + xlators/mgmt/glusterd/src/glusterd-sm.c | 6 +- + xlators/mgmt/glusterd/src/glusterd-sm.h | 1 + + xlators/mgmt/glusterd/src/glusterd-utils.c | 148 ++++++++++++++---------- + xlators/mgmt/glusterd/src/glusterd-utils.h | 2 +- + xlators/mgmt/glusterd/src/glusterd.h | 8 +- + 12 files changed, 301 insertions(+), 83 deletions(-) + +diff --git a/libglusterfs/src/ctx.c b/libglusterfs/src/ctx.c +index 4a001c2..ae1a77a 100644 +--- a/libglusterfs/src/ctx.c ++++ b/libglusterfs/src/ctx.c +@@ -14,6 +14,7 @@ + #include "glusterfs/glusterfs.h" + #include "timer-wheel.h" + ++glusterfs_ctx_t *global_ctx = NULL; + glusterfs_ctx_t * + glusterfs_ctx_new() + { +@@ -51,6 +52,9 @@ glusterfs_ctx_new() + GF_ATOMIC_INIT(ctx->stats.max_dict_pairs, 0); + GF_ATOMIC_INIT(ctx->stats.total_pairs_used, 0); + GF_ATOMIC_INIT(ctx->stats.total_dicts_used, 0); ++ ++ if (!global_ctx) ++ global_ctx = ctx; + out: + return ctx; + } +diff --git a/libglusterfs/src/dict.c b/libglusterfs/src/dict.c +index d8cdda4..e5f619c 100644 +--- a/libglusterfs/src/dict.c ++++ b/libglusterfs/src/dict.c +@@ -56,7 +56,13 @@ struct dict_cmp { + static data_t * + get_new_data() + { +- data_t *data = mem_get(THIS->ctx->dict_data_pool); ++ data_t *data = NULL; ++ ++ if (global_ctx) { ++ data = mem_get(global_ctx->dict_data_pool); ++ } else { ++ data = mem_get(THIS->ctx->dict_data_pool); ++ } + + if (!data) + return NULL; +@@ -3503,3 +3509,161 @@ unlock: + UNLOCK(&dict->lock); + return 0; + } ++ ++/* Popluate specific dictionary on the basis of passed key array at the ++ time of unserialize buffer ++*/ ++int32_t ++dict_unserialize_specific_keys(char *orig_buf, int32_t size, dict_t **fill, ++ char **suffix_key_arr, dict_t **specific_dict, ++ int totkeycount) ++{ ++ char *buf = orig_buf; ++ int ret = -1; ++ int32_t count = 0; ++ int i = 0; ++ int j = 0; ++ ++ data_t *value = NULL; ++ char *key = NULL; ++ int32_t keylen = 0; ++ int32_t vallen = 0; ++ int32_t hostord = 0; ++ xlator_t *this = NULL; ++ int32_t keylenarr[totkeycount]; ++ ++ this = THIS; ++ GF_ASSERT(this); ++ ++ if (!buf) { ++ gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG, ++ "buf is null!"); ++ goto out; ++ } ++ ++ if (size == 0) { ++ gf_msg_callingfn("dict", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG, ++ "size is 0!"); ++ goto out; ++ } ++ ++ if (!fill) { ++ gf_msg_callingfn("dict", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG, ++ "fill is null!"); ++ goto out; ++ } ++ ++ if (!*fill) { ++ gf_msg_callingfn("dict", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG, ++ "*fill is null!"); ++ goto out; ++ } ++ ++ if ((buf + DICT_HDR_LEN) > (orig_buf + size)) { ++ gf_msg_callingfn("dict", GF_LOG_ERROR, 0, LG_MSG_UNDERSIZED_BUF, ++ "undersized buffer " ++ "passed. available (%lu) < required (%lu)", ++ (long)(orig_buf + size), (long)(buf + DICT_HDR_LEN)); ++ goto out; ++ } ++ ++ memcpy(&hostord, buf, sizeof(hostord)); ++ count = ntoh32(hostord); ++ buf += DICT_HDR_LEN; ++ ++ if (count < 0) { ++ gf_smsg("dict", GF_LOG_ERROR, 0, LG_MSG_COUNT_LESS_THAN_ZERO, ++ "count=%d", count, NULL); ++ goto out; ++ } ++ ++ /* Compute specific key length and save in array */ ++ for (i = 0; i < totkeycount; i++) { ++ keylenarr[i] = strlen(suffix_key_arr[i]); ++ } ++ ++ for (i = 0; i < count; i++) { ++ if ((buf + DICT_DATA_HDR_KEY_LEN) > (orig_buf + size)) { ++ gf_msg_callingfn("dict", GF_LOG_ERROR, 0, LG_MSG_UNDERSIZED_BUF, ++ "undersized " ++ "buffer passed. available (%lu) < " ++ "required (%lu)", ++ (long)(orig_buf + size), ++ (long)(buf + DICT_DATA_HDR_KEY_LEN)); ++ goto out; ++ } ++ memcpy(&hostord, buf, sizeof(hostord)); ++ keylen = ntoh32(hostord); ++ buf += DICT_DATA_HDR_KEY_LEN; ++ ++ if ((buf + DICT_DATA_HDR_VAL_LEN) > (orig_buf + size)) { ++ gf_msg_callingfn("dict", GF_LOG_ERROR, 0, LG_MSG_UNDERSIZED_BUF, ++ "undersized " ++ "buffer passed. available (%lu) < " ++ "required (%lu)", ++ (long)(orig_buf + size), ++ (long)(buf + DICT_DATA_HDR_VAL_LEN)); ++ goto out; ++ } ++ memcpy(&hostord, buf, sizeof(hostord)); ++ vallen = ntoh32(hostord); ++ buf += DICT_DATA_HDR_VAL_LEN; ++ ++ if ((keylen < 0) || (vallen < 0)) { ++ gf_msg_callingfn("dict", GF_LOG_ERROR, 0, LG_MSG_UNDERSIZED_BUF, ++ "undersized length passed " ++ "key:%d val:%d", ++ keylen, vallen); ++ goto out; ++ } ++ if ((buf + keylen) > (orig_buf + size)) { ++ gf_msg_callingfn("dict", GF_LOG_ERROR, 0, LG_MSG_UNDERSIZED_BUF, ++ "undersized buffer passed. " ++ "available (%lu) < required (%lu)", ++ (long)(orig_buf + size), (long)(buf + keylen)); ++ goto out; ++ } ++ key = buf; ++ buf += keylen + 1; /* for '\0' */ ++ ++ if ((buf + vallen) > (orig_buf + size)) { ++ gf_msg_callingfn("dict", GF_LOG_ERROR, 0, LG_MSG_UNDERSIZED_BUF, ++ "undersized buffer passed. " ++ "available (%lu) < required (%lu)", ++ (long)(orig_buf + size), (long)(buf + vallen)); ++ goto out; ++ } ++ value = get_new_data(); ++ ++ if (!value) { ++ ret = -1; ++ goto out; ++ } ++ value->len = vallen; ++ value->data = gf_memdup(buf, vallen); ++ value->data_type = GF_DATA_TYPE_STR_OLD; ++ value->is_static = _gf_false; ++ buf += vallen; ++ ++ ret = dict_addn(*fill, key, keylen, value); ++ if (ret < 0) { ++ data_destroy(value); ++ goto out; ++ } ++ for (j = 0; j < totkeycount; j++) { ++ if (keylen > keylenarr[j]) { ++ if (!strcmp(key + keylen - keylenarr[j], suffix_key_arr[j])) { ++ ret = dict_addn(*specific_dict, key, keylen, value); ++ break; ++ } ++ } ++ } ++ ++ if (ret < 0) ++ goto out; ++ } ++ ++ ret = 0; ++out: ++ return ret; ++} +diff --git a/libglusterfs/src/globals.c b/libglusterfs/src/globals.c +index e433ee8..30c15b6 100644 +--- a/libglusterfs/src/globals.c ++++ b/libglusterfs/src/globals.c +@@ -96,7 +96,6 @@ const char *gf_upcall_list[GF_UPCALL_FLAGS_MAXVALUE] = { + /* This global ctx is a bad hack to prevent some of the libgfapi crashes. + * This should be removed once the patch on resource pool is accepted + */ +-glusterfs_ctx_t *global_ctx = NULL; + pthread_mutex_t global_ctx_mutex = PTHREAD_MUTEX_INITIALIZER; + xlator_t global_xlator; + static int gf_global_mem_acct_enable = 1; +@@ -236,7 +235,6 @@ __glusterfs_this_location() + if (*this_location == NULL) { + thread_xlator = &global_xlator; + } +- + return this_location; + } + +diff --git a/libglusterfs/src/glusterfs/dict.h b/libglusterfs/src/glusterfs/dict.h +index 8239c7a..6e469c7 100644 +--- a/libglusterfs/src/glusterfs/dict.h ++++ b/libglusterfs/src/glusterfs/dict.h +@@ -423,4 +423,9 @@ dict_has_key_from_array(dict_t *dict, char **strings, gf_boolean_t *result); + + int + dict_serialized_length_lk(dict_t *this); ++ ++int32_t ++dict_unserialize_specific_keys(char *orig_buf, int32_t size, dict_t **fill, ++ char **specific_key_arr, dict_t **specific_dict, ++ int totkeycount); + #endif +diff --git a/libglusterfs/src/glusterfs/globals.h b/libglusterfs/src/glusterfs/globals.h +index cc145cd..33fb023 100644 +--- a/libglusterfs/src/glusterfs/globals.h ++++ b/libglusterfs/src/glusterfs/globals.h +@@ -199,4 +199,6 @@ int + gf_global_mem_acct_enable_get(void); + int + gf_global_mem_acct_enable_set(int val); ++ ++extern glusterfs_ctx_t *global_ctx; + #endif /* !_GLOBALS_H */ +diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym +index d060292..bc770e2 100644 +--- a/libglusterfs/src/libglusterfs.sym ++++ b/libglusterfs/src/libglusterfs.sym +@@ -436,6 +436,7 @@ dict_clear_flag + dict_check_flag + dict_unref + dict_unserialize ++dict_unserialize_specific_keys + drop_token + eh_destroy + eh_dump +diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c +index b8799ab..908361c 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-handler.c ++++ b/xlators/mgmt/glusterd/src/glusterd-handler.c +@@ -86,6 +86,9 @@ glusterd_big_locked_handler(rpcsvc_request_t *req, rpcsvc_actor actor_fn) + return ret; + } + ++static char *specific_key_suffix[] = {".quota-cksum", ".ckusm", ".version", ++ ".quota-version", ".name"}; ++ + static int + glusterd_handle_friend_req(rpcsvc_request_t *req, uuid_t uuid, char *hostname, + int port, gd1_mgmt_friend_req *friend_req) +@@ -97,6 +100,8 @@ glusterd_handle_friend_req(rpcsvc_request_t *req, uuid_t uuid, char *hostname, + char rhost[UNIX_PATH_MAX + 1] = {0}; + uuid_t friend_uuid = {0}; + dict_t *dict = NULL; ++ dict_t *peer_ver = NULL; ++ int totcount = sizeof(specific_key_suffix) / sizeof(specific_key_suffix[0]); + + gf_uuid_parse(uuid_utoa(uuid), friend_uuid); + if (!port) +@@ -104,8 +109,19 @@ glusterd_handle_friend_req(rpcsvc_request_t *req, uuid_t uuid, char *hostname, + + ret = glusterd_remote_hostname_get(req, rhost, sizeof(rhost)); + ++ ctx = GF_CALLOC(1, sizeof(*ctx), gf_gld_mt_friend_req_ctx_t); ++ dict = dict_new(); ++ peer_ver = dict_new(); ++ + RCU_READ_LOCK; + ++ if (!ctx || !dict || !peer_ver) { ++ gf_msg("glusterd", GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, ++ "Unable to allocate memory"); ++ ret = -1; ++ goto out; ++ } ++ + peerinfo = glusterd_peerinfo_find(uuid, rhost); + + if (peerinfo == NULL) { +@@ -130,28 +146,14 @@ glusterd_handle_friend_req(rpcsvc_request_t *req, uuid_t uuid, char *hostname, + event->peername = gf_strdup(peerinfo->hostname); + gf_uuid_copy(event->peerid, peerinfo->uuid); + +- ctx = GF_CALLOC(1, sizeof(*ctx), gf_gld_mt_friend_req_ctx_t); +- +- if (!ctx) { +- gf_msg("glusterd", GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, +- "Unable to allocate memory"); +- ret = -1; +- goto out; +- } +- + gf_uuid_copy(ctx->uuid, uuid); + if (hostname) + ctx->hostname = gf_strdup(hostname); + ctx->req = req; + +- dict = dict_new(); +- if (!dict) { +- ret = -1; +- goto out; +- } +- +- ret = dict_unserialize(friend_req->vols.vols_val, friend_req->vols.vols_len, +- &dict); ++ ret = dict_unserialize_specific_keys( ++ friend_req->vols.vols_val, friend_req->vols.vols_len, &dict, ++ specific_key_suffix, &peer_ver, totcount); + + if (ret) + goto out; +@@ -159,6 +161,7 @@ glusterd_handle_friend_req(rpcsvc_request_t *req, uuid_t uuid, char *hostname, + dict->extra_stdfree = friend_req->vols.vols_val; + + ctx->vols = dict; ++ ctx->peer_ver = peer_ver; + event->ctx = ctx; + + ret = glusterd_friend_sm_inject_event(event); +@@ -188,6 +191,8 @@ out: + } else { + free(friend_req->vols.vols_val); + } ++ if (peer_ver) ++ dict_unref(peer_ver); + if (event) + GF_FREE(event->peername); + GF_FREE(event); +diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.c b/xlators/mgmt/glusterd/src/glusterd-sm.c +index 044da3d..d10a792 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-sm.c ++++ b/xlators/mgmt/glusterd/src/glusterd-sm.c +@@ -106,6 +106,8 @@ glusterd_destroy_friend_req_ctx(glusterd_friend_req_ctx_t *ctx) + + if (ctx->vols) + dict_unref(ctx->vols); ++ if (ctx->peer_ver) ++ dict_unref(ctx->peer_ver); + GF_FREE(ctx->hostname); + GF_FREE(ctx); + } +@@ -936,8 +938,8 @@ glusterd_ac_handle_friend_add_req(glusterd_friend_sm_event_t *event, void *ctx) + // Build comparison logic here. + pthread_mutex_lock(&conf->import_volumes); + { +- ret = glusterd_compare_friend_data(ev_ctx->vols, &status, +- event->peername); ++ ret = glusterd_compare_friend_data(ev_ctx->vols, ev_ctx->peer_ver, ++ &status, event->peername); + if (ret) { + pthread_mutex_unlock(&conf->import_volumes); + goto out; +diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.h b/xlators/mgmt/glusterd/src/glusterd-sm.h +index ce008ac..efdf68e 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-sm.h ++++ b/xlators/mgmt/glusterd/src/glusterd-sm.h +@@ -174,6 +174,7 @@ typedef struct glusterd_friend_req_ctx_ { + rpcsvc_request_t *req; + int port; + dict_t *vols; ++ dict_t *peer_ver; // Dictionary to save peer ver data + } glusterd_friend_req_ctx_t; + + typedef struct glusterd_friend_update_ctx_ { +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index f7030fb..cf32bd9 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -3709,12 +3709,14 @@ out: + return ret; + } + +-int32_t +-glusterd_compare_friend_volume(dict_t *peer_data, int32_t count, +- int32_t *status, char *hostname) ++static int32_t ++glusterd_compare_friend_volume(dict_t *peer_data, ++ glusterd_friend_synctask_args_t *arg, ++ int32_t count, int32_t *status, char *hostname) + { + int32_t ret = -1; + char key[64] = ""; ++ char key_prefix[32]; + int keylen; + glusterd_volinfo_t *volinfo = NULL; + char *volname = NULL; +@@ -3726,15 +3728,20 @@ glusterd_compare_friend_volume(dict_t *peer_data, int32_t count, + xlator_t *this = NULL; + + GF_ASSERT(peer_data); ++ GF_ASSERT(arg); + GF_ASSERT(status); + + this = THIS; + GF_ASSERT(this); + +- keylen = snprintf(key, sizeof(key), "volume%d.name", count); +- ret = dict_get_strn(peer_data, key, keylen, &volname); +- if (ret) ++ snprintf(key_prefix, sizeof(key_prefix), "volume%d", count); ++ keylen = snprintf(key, sizeof(key), "%s.name", key_prefix); ++ ret = dict_get_strn(arg->peer_ver_data, key, keylen, &volname); ++ if (ret) { ++ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, ++ "Key=%s is NULL in peer_ver_data", key, NULL); + goto out; ++ } + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { +@@ -3750,10 +3757,13 @@ glusterd_compare_friend_volume(dict_t *peer_data, int32_t count, + goto out; + } + +- keylen = snprintf(key, sizeof(key), "volume%d.version", count); +- ret = dict_get_int32n(peer_data, key, keylen, &version); +- if (ret) ++ keylen = snprintf(key, sizeof(key), "%s.version", key_prefix); ++ ret = dict_get_int32n(arg->peer_ver_data, key, keylen, &version); ++ if (ret) { ++ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, ++ "Key=%s is NULL in peer_ver_data", key, NULL); + goto out; ++ } + + if (version > volinfo->version) { + // Mismatch detected +@@ -3772,10 +3782,13 @@ glusterd_compare_friend_volume(dict_t *peer_data, int32_t count, + + // Now, versions are same, compare cksums. + // +- snprintf(key, sizeof(key), "volume%d.ckusm", count); +- ret = dict_get_uint32(peer_data, key, &cksum); +- if (ret) ++ snprintf(key, sizeof(key), "%s.ckusm", key_prefix); ++ ret = dict_get_uint32(arg->peer_ver_data, key, &cksum); ++ if (ret) { ++ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, ++ "Key=%s is NULL in peer_ver_data", key, NULL); + goto out; ++ } + + if (cksum != volinfo->cksum) { + ret = 0; +@@ -3790,8 +3803,8 @@ glusterd_compare_friend_volume(dict_t *peer_data, int32_t count, + if (!dict_get_sizen(volinfo->dict, VKEY_FEATURES_QUOTA)) + goto skip_quota; + +- snprintf(key, sizeof(key), "volume%d.quota-version", count); +- ret = dict_get_uint32(peer_data, key, "a_version); ++ snprintf(key, sizeof(key), "%s.quota-version", key_prefix); ++ ret = dict_get_uint32(arg->peer_ver_data, key, "a_version); + if (ret) { + gf_msg_debug(this->name, 0, + "quota-version key absent for" +@@ -3809,6 +3822,7 @@ glusterd_compare_friend_volume(dict_t *peer_data, int32_t count, + "%d on peer %s", + volinfo->volname, volinfo->quota_conf_version, quota_version, + hostname); ++ GF_ATOMIC_INIT(volinfo->volpeerupdate, 1); + *status = GLUSTERD_VOL_COMP_UPDATE_REQ; + goto out; + } else if (quota_version < volinfo->quota_conf_version) { +@@ -3819,8 +3833,8 @@ glusterd_compare_friend_volume(dict_t *peer_data, int32_t count, + + // Now, versions are same, compare cksums. + // +- snprintf(key, sizeof(key), "volume%d.quota-cksum", count); +- ret = dict_get_uint32(peer_data, key, "a_cksum); ++ snprintf(key, sizeof(key), "%s.quota-cksum", key_prefix); ++ ret = dict_get_uint32(arg->peer_ver_data, key, "a_cksum); + if (ret) { + gf_msg_debug(this->name, 0, + "quota checksum absent for " +@@ -3846,13 +3860,12 @@ skip_quota: + *status = GLUSTERD_VOL_COMP_SCS; + + out: +- keylen = snprintf(key, sizeof(key), "volume%d.update", count); +- + if (*status == GLUSTERD_VOL_COMP_UPDATE_REQ) { +- ret = dict_set_int32n(peer_data, key, keylen, 1); +- } else { +- ret = dict_set_int32n(peer_data, key, keylen, 0); ++ /*Set the status to ensure volume is updated on the peer ++ */ ++ arg->status_arr[(count / 64)] ^= 1UL << (count % 64); + } ++ + if (*status == GLUSTERD_VOL_COMP_RJT) { + gf_event(EVENT_COMPARE_FRIEND_VOLUME_FAILED, "volume=%s", + volinfo->volname); +@@ -4935,8 +4948,9 @@ out: + return ret; + } + +-int32_t +-glusterd_import_friend_volume(dict_t *peer_data, int count) ++static int32_t ++glusterd_import_friend_volume(dict_t *peer_data, int count, ++ glusterd_friend_synctask_args_t *arg) + { + int32_t ret = -1; + glusterd_conf_t *priv = NULL; +@@ -4954,10 +4968,27 @@ glusterd_import_friend_volume(dict_t *peer_data, int count) + priv = this->private; + GF_ASSERT(priv); + +- ret = snprintf(key, sizeof(key), "volume%d.update", count); +- ret = dict_get_int32n(peer_data, key, ret, &update); +- if (ret || !update) { ++ if (arg) { ++ /*Check if the volume options are updated on the other peers ++ */ ++ update = (1UL & (arg->status_arr[(count / 64)] >> (count % 64))); ++ } else { ++ ret = snprintf(key, sizeof(key), "volume%d.update", count); ++ ret = dict_get_int32n(peer_data, key, ret, &update); ++ if (ret) { ++ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, ++ "Key=%s", key, NULL); ++ goto out; ++ } ++ } ++ ++ if (!update) { + /* if update is 0 that means the volume is not imported */ ++ gf_log(this->name, GF_LOG_DEBUG, ++ "The volume%d does" ++ " not have any peer change", ++ count); ++ ret = 0; + goto out; + } + +@@ -5045,6 +5076,8 @@ glusterd_import_friend_volumes_synctask(void *opaque) + glusterd_conf_t *conf = NULL; + dict_t *peer_data = NULL; + glusterd_friend_synctask_args_t *arg = NULL; ++ uint64_t bm = 0; ++ uint64_t mask = 0; + + this = THIS; + GF_ASSERT(this); +@@ -5056,17 +5089,7 @@ glusterd_import_friend_volumes_synctask(void *opaque) + if (!arg) + goto out; + +- peer_data = dict_new(); +- if (!peer_data) { +- goto out; +- } +- +- ret = dict_unserialize(arg->dict_buf, arg->dictlen, &peer_data); +- if (ret) { +- errno = ENOMEM; +- goto out; +- } +- ++ peer_data = arg->peer_data; + ret = dict_get_int32n(peer_data, "count", SLEN("count"), &count); + if (ret) + goto out; +@@ -5083,11 +5106,18 @@ glusterd_import_friend_volumes_synctask(void *opaque) + conf->restart_bricks = _gf_true; + + while (i <= count) { +- ret = glusterd_import_friend_volume(peer_data, i); +- if (ret) { +- break; ++ bm = arg->status_arr[i / 64]; ++ while (bm != 0) { ++ /* mask will contain the lowest bit set from bm. */ ++ mask = bm & (-bm); ++ bm ^= mask; ++ ret = glusterd_import_friend_volume(peer_data, i + ffsll(mask) - 2, ++ arg); ++ if (ret < 0) { ++ break; ++ } + } +- i++; ++ i += 64; + } + if (i > count) { + glusterd_svcs_manager(NULL); +@@ -5095,11 +5125,9 @@ glusterd_import_friend_volumes_synctask(void *opaque) + conf->restart_bricks = _gf_false; + synccond_broadcast(&conf->cond_restart_bricks); + out: +- if (peer_data) +- dict_unref(peer_data); + if (arg) { +- if (arg->dict_buf) +- GF_FREE(arg->dict_buf); ++ dict_unref(arg->peer_data); ++ dict_unref(arg->peer_ver_data); + GF_FREE(arg); + } + +@@ -5121,7 +5149,7 @@ glusterd_import_friend_volumes(dict_t *peer_data) + goto out; + + while (i <= count) { +- ret = glusterd_import_friend_volume(peer_data, i); ++ ret = glusterd_import_friend_volume(peer_data, i, NULL); + if (ret) + goto out; + i++; +@@ -5260,7 +5288,8 @@ out: + } + + int32_t +-glusterd_compare_friend_data(dict_t *peer_data, int32_t *status, char *hostname) ++glusterd_compare_friend_data(dict_t *peer_data, dict_t *cmp, int32_t *status, ++ char *hostname) + { + int32_t ret = -1; + int32_t count = 0; +@@ -5289,8 +5318,19 @@ glusterd_compare_friend_data(dict_t *peer_data, int32_t *status, char *hostname) + if (ret) + goto out; + ++ arg = GF_CALLOC(1, sizeof(*arg) + sizeof(uint64_t) * (count / 64), ++ gf_common_mt_char); ++ if (!arg) { ++ ret = -1; ++ gf_msg("glusterd", GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, ++ "Out Of Memory"); ++ goto out; ++ } ++ arg->peer_data = dict_ref(peer_data); ++ arg->peer_ver_data = dict_ref(cmp); + while (i <= count) { +- ret = glusterd_compare_friend_volume(peer_data, i, status, hostname); ++ ret = glusterd_compare_friend_volume(peer_data, arg, i, status, ++ hostname); + if (ret) + goto out; + +@@ -5310,21 +5350,13 @@ glusterd_compare_friend_data(dict_t *peer_data, int32_t *status, char *hostname) + * first brick to come up before attaching the subsequent bricks + * in case brick multiplexing is enabled + */ +- arg = GF_CALLOC(1, sizeof(*arg), gf_common_mt_char); +- ret = dict_allocate_and_serialize(peer_data, &arg->dict_buf, +- &arg->dictlen); +- if (ret < 0) { +- gf_log(this->name, GF_LOG_ERROR, +- "dict_serialize failed while handling " +- " import friend volume request"); +- goto out; +- } +- + glusterd_launch_synctask(glusterd_import_friend_volumes_synctask, arg); + } + + out: + if (ret && arg) { ++ dict_unref(arg->peer_data); ++ dict_unref(arg->peer_ver_data); + GF_FREE(arg); + } + gf_msg_debug(this->name, 0, "Returning with ret: %d, status: %d", ret, +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h +index 5f5de82..02d85d2 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.h ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.h +@@ -231,7 +231,7 @@ glusterd_add_volumes_to_export_dict(dict_t *peer_data, char **buf, + u_int *length); + + int32_t +-glusterd_compare_friend_data(dict_t *peer_data, int32_t *status, ++glusterd_compare_friend_data(dict_t *peer_data, dict_t *cmp, int32_t *status, + char *hostname); + + int +diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h +index f739b5d..efe4d0e 100644 +--- a/xlators/mgmt/glusterd/src/glusterd.h ++++ b/xlators/mgmt/glusterd/src/glusterd.h +@@ -234,8 +234,12 @@ typedef struct glusterd_add_dict_args { + } glusterd_add_dict_args_t; + + typedef struct glusterd_friend_synctask_args { +- char *dict_buf; +- u_int dictlen; ++ dict_t *peer_data; ++ dict_t *peer_ver_data; // Dictionary to save peer version data ++ /* This status_arr[1] is not a real size, real size of the array ++ is dynamically allocated ++ */ ++ uint64_t status_arr[1]; + } glusterd_friend_synctask_args_t; + + typedef enum gf_brick_status { +-- +1.8.3.1 + diff --git a/SOURCES/0511-features-shard-Missing-format-specifier.patch b/SOURCES/0511-features-shard-Missing-format-specifier.patch new file mode 100644 index 0000000..baf6cf4 --- /dev/null +++ b/SOURCES/0511-features-shard-Missing-format-specifier.patch @@ -0,0 +1,39 @@ +From 868d346cc35c222d19b95bd9c367674c9ea859df Mon Sep 17 00:00:00 2001 +From: Vinayakswami Hariharmath +Date: Tue, 15 Dec 2020 16:23:49 +0530 +Subject: [PATCH 511/511] features/shard: Missing format specifier + +PRIu64 format specifier explicitly needs (percent sign) as +prefix and that was missing as part of the below commit on +downstream + +https://code.engineering.redhat.com/gerrit/#/c/221061/ + +BUG: 1752739 +Change-Id: I354de58796f350eb1aa42fcdf8092ca2e69ccbb6 + +Signed-off-by: Vinayakswami Hariharmath +Change-Id: I4598893e3fcca3a2b3e6e8ef9b64b3e5e98923e6 +Reviewed-on: https://code.engineering.redhat.com/gerrit/221217 +Tested-by: RHGS Build Bot +Reviewed-by: Ravishankar Narayanankutty +--- + xlators/features/shard/src/shard.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c +index a967f35..099b062 100644 +--- a/xlators/features/shard/src/shard.c ++++ b/xlators/features/shard/src/shard.c +@@ -1855,7 +1855,7 @@ int shard_truncate_last_shard(call_frame_t *frame, xlator_t *this, + */ + if (!inode) { + gf_msg_debug(this->name, 0, +- "Last shard to be truncated absent in backend: " PRIu64 ++ "Last shard to be truncated absent in backend:%" PRIu64 + " of gfid: %s. Directly proceeding to update file size", + local->first_block, uuid_utoa(local->loc.inode->gfid)); + shard_update_file_size(frame, this, NULL, &local->loc, +-- +1.8.3.1 + diff --git a/SOURCES/0512-glusterd-shared-storage-mount-fails-in-ipv6-environm.patch b/SOURCES/0512-glusterd-shared-storage-mount-fails-in-ipv6-environm.patch new file mode 100644 index 0000000..37de503 --- /dev/null +++ b/SOURCES/0512-glusterd-shared-storage-mount-fails-in-ipv6-environm.patch @@ -0,0 +1,105 @@ +From c963653a89c3f6466af9a3e8f19246a7907f7f8c Mon Sep 17 00:00:00 2001 +From: nik-redhat +Date: Thu, 30 Jul 2020 13:04:52 +0530 +Subject: [PATCH 512/517] glusterd: shared storage mount fails in ipv6 + environment + +Issue: +In case of ipv6 environment, the mounting of glusterd_shared_storage +volume fails as it doesn't recognises the ipv6 enviornment. + +Fix: +In case of ipv6 environment, the address-family is passed +to the hooks script on creating shared-storage, then depending +upon the address-family --xlator-option=transport.address-family=inet6 +option is added to the mount command, and the mounting succeeds. + +>Fixes: #1406 +> +>Change-Id: Ib1888c34d85e6c01618b0ba214cbe1f57576908d +>Signed-off-by: nik-redhat + +Upstream patch: https://review.gluster.org/c/glusterfs/+/24797 +BUG: 1856574 + +Change-Id: Ib1888c34d85e6c01618b0ba214cbe1f57576908d +Signed-off-by: nik-redhat +Reviewed-on: https://code.engineering.redhat.com/gerrit/221844 +Tested-by: RHGS Build Bot +Reviewed-by: Srijan Sivakumar +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + .../set/post/S32gluster_enable_shared_storage.sh | 11 +++++++++-- + xlators/mgmt/glusterd/src/glusterd-hooks.c | 19 +++++++++++++++++++ + 2 files changed, 28 insertions(+), 2 deletions(-) + +diff --git a/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh b/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh +index 3bae37c..9597503 100755 +--- a/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh ++++ b/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh +@@ -104,8 +104,15 @@ function check_volume_status() + echo $status + } + +-mount_cmd="mount -t glusterfs $local_node_hostname:/gluster_shared_storage \ +- /run/gluster/shared_storage" ++key=`echo $5 | cut -d '=' -f 1` ++val=`echo $5 | cut -d '=' -f 2` ++if [ "$key" == "transport.address-family" ]; then ++ mount_cmd="mount -t glusterfs -o xlator-option=transport.address-family=inet6 \ ++ $local_node_hostname:/gluster_shared_storage /var/run/gluster/shared_storage" ++else ++ mount_cmd="mount -t glusterfs $local_node_hostname:/gluster_shared_storage \ ++ /var/run/gluster/shared_storage" ++fi + + if [ "$option" == "enable" ]; then + retry=0; +diff --git a/xlators/mgmt/glusterd/src/glusterd-hooks.c b/xlators/mgmt/glusterd/src/glusterd-hooks.c +index 216cdf7..4f0d775 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-hooks.c ++++ b/xlators/mgmt/glusterd/src/glusterd-hooks.c +@@ -200,11 +200,16 @@ glusterd_hooks_set_volume_args(dict_t *dict, runner_t *runner) + int i = 0; + int count = 0; + int ret = -1; ++ int flag = 0; + char query[1024] = { + 0, + }; + char *key = NULL; + char *value = NULL; ++ char *inet_family = NULL; ++ xlator_t *this = NULL; ++ this = THIS; ++ GF_ASSERT(this); + + ret = dict_get_int32(dict, "count", &count); + if (ret) +@@ -228,9 +233,23 @@ glusterd_hooks_set_volume_args(dict_t *dict, runner_t *runner) + continue; + + runner_argprintf(runner, "%s=%s", key, value); ++ if ((strncmp(key, "cluster.enable-shared-storage", ++ SLEN("cluster.enable-shared-storage")) == 0 || ++ strncmp(key, "enable-shared-storage", ++ SLEN("enable-shared-storage")) == 0) && ++ strncmp(value, "enable", SLEN("enable")) == 0) ++ flag = 1; + } + + glusterd_hooks_add_custom_args(dict, runner); ++ if (flag == 1) { ++ ret = dict_get_str_sizen(this->options, "transport.address-family", ++ &inet_family); ++ if (!ret) { ++ runner_argprintf(runner, "transport.address-family=%s", ++ inet_family); ++ } ++ } + + ret = 0; + out: +-- +1.8.3.1 + diff --git a/SOURCES/0513-afr-mark-pending-xattrs-as-a-part-of-metadata-heal.patch b/SOURCES/0513-afr-mark-pending-xattrs-as-a-part-of-metadata-heal.patch new file mode 100644 index 0000000..ebd5609 --- /dev/null +++ b/SOURCES/0513-afr-mark-pending-xattrs-as-a-part-of-metadata-heal.patch @@ -0,0 +1,191 @@ +From 708c17a8a69b2657f384affaedfcf4ba0a123893 Mon Sep 17 00:00:00 2001 +From: karthik-us +Date: Wed, 23 Dec 2020 14:45:07 +0530 +Subject: [PATCH 513/517] afr: mark pending xattrs as a part of metadata heal + +...if pending xattrs are zero for all children. + +Problem: +If there are no pending xattrs and a metadata heal needs to be +performed, it can be possible that we end up with xattrs inadvertendly +deleted from all bricks, as explained in the BZ. + +Fix: +After picking one among the sources as the good copy, mark pending xattrs on +all sources to blame the sinks. Now even if this metadata heal fails midway, +a subsequent heal will still choose one of the valid sources that it +picked previously. + +Upstream patch details: +> Fixes: #1067 +> Change-Id: If1b050b70b0ad911e162c04db4d89b263e2b8d7b +> Signed-off-by: Ravishankar N +Upstream patch: https://review.gluster.org/#/c/glusterfs/+/21922/ + +BUG: 1640148 +Change-Id: If1b050b70b0ad911e162c04db4d89b263e2b8d7b +Signed-off-by: karthik-us +Reviewed-on: https://code.engineering.redhat.com/gerrit/222073 +Tested-by: RHGS Build Bot +Reviewed-by: Ravishankar Narayanankutty +--- + tests/bugs/replicate/mdata-heal-no-xattrs.t | 59 ++++++++++++++++++++++ + xlators/cluster/afr/src/afr-self-heal-metadata.c | 62 +++++++++++++++++++++++- + 2 files changed, 120 insertions(+), 1 deletion(-) + create mode 100644 tests/bugs/replicate/mdata-heal-no-xattrs.t + +diff --git a/tests/bugs/replicate/mdata-heal-no-xattrs.t b/tests/bugs/replicate/mdata-heal-no-xattrs.t +new file mode 100644 +index 0000000..d3b0c50 +--- /dev/null ++++ b/tests/bugs/replicate/mdata-heal-no-xattrs.t +@@ -0,0 +1,59 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++cleanup; ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}; ++TEST $CLI volume set $V0 cluster.self-heal-daemon off ++TEST $CLI volume start $V0 ++ ++TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2 ++echo "Data">$M0/FILE ++ret=$? ++TEST [ $ret -eq 0 ] ++ ++# Change permission on brick-0: simulates the case where there is metadata ++# mismatch but no pending xattrs. This brick will become the source for heal. ++TEST chmod +x $B0/$V0"0"/FILE ++ ++# Add gfid to xattrop ++xattrop_b0=$(afr_get_index_path $B0/$V0"0") ++base_entry_b0=`ls $xattrop_b0` ++gfid_str_FILE=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/FILE)) ++TEST ln $xattrop_b0/$base_entry_b0 $xattrop_b0/$gfid_str_FILE ++EXPECT_WITHIN $HEAL_TIMEOUT "^1$" get_pending_heal_count $V0 ++ ++TEST $CLI volume set $V0 cluster.self-heal-daemon on ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 ++TEST $CLI volume heal $V0 ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 ++ ++# Brick-0 should contain xattrs blaming other 2 bricks. ++# The values will be zero because heal is over. ++EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0/FILE ++EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0/FILE ++TEST ! getfattr -n trusted.afr.$V0-client-0 $B0/${V0}0/FILE ++ ++# Brick-1 and Brick-2 must not contain any afr xattrs. ++TEST ! getfattr -n trusted.afr.$V0-client-0 $B0/${V0}1/FILE ++TEST ! getfattr -n trusted.afr.$V0-client-1 $B0/${V0}1/FILE ++TEST ! getfattr -n trusted.afr.$V0-client-2 $B0/${V0}1/FILE ++TEST ! getfattr -n trusted.afr.$V0-client-0 $B0/${V0}2/FILE ++TEST ! getfattr -n trusted.afr.$V0-client-1 $B0/${V0}2/FILE ++TEST ! getfattr -n trusted.afr.$V0-client-2 $B0/${V0}2/FILE ++ ++# check permission bits. ++EXPECT '755' stat -c %a $B0/${V0}0/FILE ++EXPECT '755' stat -c %a $B0/${V0}1/FILE ++EXPECT '755' stat -c %a $B0/${V0}2/FILE ++ ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++cleanup; +diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c +index f4e31b6..03f43ba 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c ++++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c +@@ -190,6 +190,59 @@ out: + return ret; + } + ++static int ++__afr_selfheal_metadata_mark_pending_xattrs(call_frame_t *frame, xlator_t *this, ++ inode_t *inode, ++ struct afr_reply *replies, ++ unsigned char *sources) ++{ ++ int ret = 0; ++ int i = 0; ++ int m_idx = 0; ++ afr_private_t *priv = NULL; ++ int raw[AFR_NUM_CHANGE_LOGS] = {0}; ++ dict_t *xattr = NULL; ++ ++ priv = this->private; ++ m_idx = afr_index_for_transaction_type(AFR_METADATA_TRANSACTION); ++ raw[m_idx] = 1; ++ ++ xattr = dict_new(); ++ if (!xattr) ++ return -ENOMEM; ++ ++ for (i = 0; i < priv->child_count; i++) { ++ if (sources[i]) ++ continue; ++ ret = dict_set_static_bin(xattr, priv->pending_key[i], raw, ++ sizeof(int) * AFR_NUM_CHANGE_LOGS); ++ if (ret) { ++ ret = -1; ++ goto out; ++ } ++ } ++ ++ for (i = 0; i < priv->child_count; i++) { ++ if (!sources[i]) ++ continue; ++ ret = afr_selfheal_post_op(frame, this, inode, i, xattr, NULL); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_INFO, -ret, AFR_MSG_SELF_HEAL_INFO, ++ "Failed to set pending metadata xattr on child %d for %s", i, ++ uuid_utoa(inode->gfid)); ++ goto out; ++ } ++ } ++ ++ afr_replies_wipe(replies, priv->child_count); ++ ret = afr_selfheal_unlocked_discover(frame, inode, inode->gfid, replies); ++ ++out: ++ if (xattr) ++ dict_unref(xattr); ++ return ret; ++} ++ + /* + * Look for mismatching uid/gid or mode or user xattrs even if + * AFR xattrs don't say so, and pick one arbitrarily as winner. */ +@@ -210,6 +263,7 @@ __afr_selfheal_metadata_finalize_source(call_frame_t *frame, xlator_t *this, + }; + int source = -1; + int sources_count = 0; ++ int ret = 0; + + priv = this->private; + +@@ -300,7 +354,13 @@ __afr_selfheal_metadata_finalize_source(call_frame_t *frame, xlator_t *this, + healed_sinks[i] = 1; + } + } +- ++ if ((sources_count == priv->child_count) && (source > -1) && ++ (AFR_COUNT(healed_sinks, priv->child_count) != 0)) { ++ ret = __afr_selfheal_metadata_mark_pending_xattrs(frame, this, inode, ++ replies, sources); ++ if (ret < 0) ++ return ret; ++ } + out: + afr_mark_active_sinks(this, sources, locked_on, healed_sinks); + return source; +-- +1.8.3.1 + diff --git a/SOURCES/0514-afr-event-gen-changes.patch b/SOURCES/0514-afr-event-gen-changes.patch new file mode 100644 index 0000000..9f9562e --- /dev/null +++ b/SOURCES/0514-afr-event-gen-changes.patch @@ -0,0 +1,308 @@ +From 4c47d6dd7c5ddcaa2a1e159427c0f6713fd33907 Mon Sep 17 00:00:00 2001 +From: karthik-us +Date: Wed, 23 Dec 2020 14:57:51 +0530 +Subject: [PATCH 514/517] afr: event gen changes + +The general idea of the changes is to prevent resetting event generation +to zero in the inode ctx, since event gen is something that should +follow 'causal order'. + +Change #1: +For a read txn, in inode refresh cbk, if event_generation is +found zero, we are failing the read fop. This is not needed +because change in event gen is only a marker for the next inode refresh to +happen and should not be taken into account by the current read txn. + +Change #2: +The event gen being zero above can happen if there is a racing lookup, +which resets even get (in afr_lookup_done) if there are non zero afr +xattrs. The resetting is done only to trigger an inode refresh and a +possible client side heal on the next lookup. That can be acheived by +setting the need_refresh flag in the inode ctx. So replaced all +occurences of resetting even gen to zero with a call to +afr_inode_need_refresh_set(). + +Change #3: +In both lookup and discover path, we are doing an inode refresh which is +not required since all 3 essentially do the same thing- update the inode +ctx with the good/bad copies from the brick replies. Inode refresh also +triggers background heals, but I think it is okay to do it when we call +refresh during the read and write txns and not in the lookup path. + +The .ts which relied on inode refresh in lookup path to trigger heals are +now changed to do read txn so that inode refresh and the heal happens. + +Upstream patch details: +> Change-Id: Iebf39a9be6ffd7ffd6e4046c96b0fa78ade6c5ec +> Fixes: #1179 +> Signed-off-by: Ravishankar N +> Reported-by: Erik Jacobson +Upstream patch: https://review.gluster.org/#/c/glusterfs/+/24316/ + +BUG: 1640148 +Change-Id: Iebf39a9be6ffd7ffd6e4046c96b0fa78ade6c5ec +Signed-off-by: karthik-us +Reviewed-on: https://code.engineering.redhat.com/gerrit/222074 +Tested-by: RHGS Build Bot +Reviewed-by: Ravishankar Narayanankutty +--- + ...fid-mismatch-resolution-with-fav-child-policy.t | 8 +- + xlators/cluster/afr/src/afr-common.c | 92 +++++----------------- + xlators/cluster/afr/src/afr-dir-write.c | 6 +- + xlators/cluster/afr/src/afr.h | 5 +- + 4 files changed, 29 insertions(+), 82 deletions(-) + +diff --git a/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t b/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t +index f4aa351..12af0c8 100644 +--- a/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t ++++ b/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t +@@ -168,8 +168,8 @@ TEST [ "$gfid_1" != "$gfid_2" ] + #We know that second brick has the bigger size file + BIGGER_FILE_MD5=$(md5sum $B0/${V0}1/f3 | cut -d\ -f1) + +-TEST ls $M0/f3 +-TEST cat $M0/f3 ++TEST ls $M0 #Trigger entry heal via readdir inode refresh ++TEST cat $M0/f3 #Trigger data heal via readv inode refresh + EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 + + #gfid split-brain should be resolved +@@ -215,8 +215,8 @@ TEST $CLI volume start $V0 force + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 + EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2 + +-TEST ls $M0/f4 +-TEST cat $M0/f4 ++TEST ls $M0 #Trigger entry heal via readdir inode refresh ++TEST cat $M0/f4 #Trigger data heal via readv inode refresh + EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 + + #gfid split-brain should be resolved +diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c +index fca2cd5..90b4f14 100644 +--- a/xlators/cluster/afr/src/afr-common.c ++++ b/xlators/cluster/afr/src/afr-common.c +@@ -284,7 +284,7 @@ __afr_set_in_flight_sb_status(xlator_t *this, afr_local_t *local, + metadatamap |= (1 << index); + } + if (metadatamap_old != metadatamap) { +- event = 0; ++ __afr_inode_need_refresh_set(inode, this); + } + break; + +@@ -297,7 +297,7 @@ __afr_set_in_flight_sb_status(xlator_t *this, afr_local_t *local, + datamap |= (1 << index); + } + if (datamap_old != datamap) +- event = 0; ++ __afr_inode_need_refresh_set(inode, this); + break; + + default: +@@ -461,34 +461,6 @@ out: + } + + int +-__afr_inode_event_gen_reset_small(inode_t *inode, xlator_t *this) +-{ +- int ret = -1; +- uint16_t datamap = 0; +- uint16_t metadatamap = 0; +- uint32_t event = 0; +- uint64_t val = 0; +- afr_inode_ctx_t *ctx = NULL; +- +- ret = __afr_inode_ctx_get(this, inode, &ctx); +- if (ret) +- return ret; +- +- val = ctx->read_subvol; +- +- metadatamap = (val & 0x000000000000ffff) >> 0; +- datamap = (val & 0x00000000ffff0000) >> 16; +- event = 0; +- +- val = ((uint64_t)metadatamap) | (((uint64_t)datamap) << 16) | +- (((uint64_t)event) << 32); +- +- ctx->read_subvol = val; +- +- return ret; +-} +- +-int + __afr_inode_read_subvol_get(inode_t *inode, xlator_t *this, unsigned char *data, + unsigned char *metadata, int *event_p) + { +@@ -559,22 +531,6 @@ out: + } + + int +-__afr_inode_event_gen_reset(inode_t *inode, xlator_t *this) +-{ +- afr_private_t *priv = NULL; +- int ret = -1; +- +- priv = this->private; +- +- if (priv->child_count <= 16) +- ret = __afr_inode_event_gen_reset_small(inode, this); +- else +- ret = -1; +- +- return ret; +-} +- +-int + afr_inode_read_subvol_get(inode_t *inode, xlator_t *this, unsigned char *data, + unsigned char *metadata, int *event_p) + { +@@ -723,30 +679,22 @@ out: + return need_refresh; + } + +-static int +-afr_inode_need_refresh_set(inode_t *inode, xlator_t *this) ++int ++__afr_inode_need_refresh_set(inode_t *inode, xlator_t *this) + { + int ret = -1; + afr_inode_ctx_t *ctx = NULL; + +- GF_VALIDATE_OR_GOTO(this->name, inode, out); +- +- LOCK(&inode->lock); +- { +- ret = __afr_inode_ctx_get(this, inode, &ctx); +- if (ret) +- goto unlock; +- ++ ret = __afr_inode_ctx_get(this, inode, &ctx); ++ if (ret == 0) { + ctx->need_refresh = _gf_true; + } +-unlock: +- UNLOCK(&inode->lock); +-out: ++ + return ret; + } + + int +-afr_inode_event_gen_reset(inode_t *inode, xlator_t *this) ++afr_inode_need_refresh_set(inode_t *inode, xlator_t *this) + { + int ret = -1; + +@@ -754,7 +702,7 @@ afr_inode_event_gen_reset(inode_t *inode, xlator_t *this) + + LOCK(&inode->lock); + { +- ret = __afr_inode_event_gen_reset(inode, this); ++ ret = __afr_inode_need_refresh_set(inode, this); + } + UNLOCK(&inode->lock); + out: +@@ -1191,7 +1139,7 @@ afr_txn_refresh_done(call_frame_t *frame, xlator_t *this, int err) + ret = afr_inode_get_readable(frame, inode, this, local->readable, + &event_generation, local->transaction.type); + +- if (ret == -EIO || (local->is_read_txn && !event_generation)) { ++ if (ret == -EIO) { + /* No readable subvolume even after refresh ==> splitbrain.*/ + if (!priv->fav_child_policy) { + err = EIO; +@@ -2413,7 +2361,7 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this) + if (read_subvol == -1) + goto cant_interpret; + if (ret) { +- afr_inode_event_gen_reset(local->inode, this); ++ afr_inode_need_refresh_set(local->inode, this); + dict_del_sizen(local->replies[read_subvol].xdata, GF_CONTENT_KEY); + } + } else { +@@ -2971,6 +2919,7 @@ afr_discover_unwind(call_frame_t *frame, xlator_t *this) + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + int read_subvol = -1; ++ int ret = 0; + unsigned char *data_readable = NULL; + unsigned char *success_replies = NULL; + +@@ -2992,7 +2941,10 @@ afr_discover_unwind(call_frame_t *frame, xlator_t *this) + if (!afr_has_quorum(success_replies, this, frame)) + goto unwind; + +- afr_replies_interpret(frame, this, local->inode, NULL); ++ ret = afr_replies_interpret(frame, this, local->inode, NULL); ++ if (ret) { ++ afr_inode_need_refresh_set(local->inode, this); ++ } + + read_subvol = afr_read_subvol_decide(local->inode, this, NULL, + data_readable); +@@ -3248,11 +3200,7 @@ afr_discover(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) + afr_read_subvol_get(loc->inode, this, NULL, NULL, &event, + AFR_DATA_TRANSACTION, NULL); + +- if (afr_is_inode_refresh_reqd(loc->inode, this, event, +- local->event_generation)) +- afr_inode_refresh(frame, this, loc->inode, NULL, afr_discover_do); +- else +- afr_discover_do(frame, this, 0); ++ afr_discover_do(frame, this, 0); + + return 0; + out: +@@ -3393,11 +3341,7 @@ afr_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) + afr_read_subvol_get(loc->parent, this, NULL, NULL, &event, + AFR_DATA_TRANSACTION, NULL); + +- if (afr_is_inode_refresh_reqd(loc->inode, this, event, +- local->event_generation)) +- afr_inode_refresh(frame, this, loc->parent, NULL, afr_lookup_do); +- else +- afr_lookup_do(frame, this, 0); ++ afr_lookup_do(frame, this, 0); + + return 0; + out: +diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c +index 416c19d..d419bfc 100644 +--- a/xlators/cluster/afr/src/afr-dir-write.c ++++ b/xlators/cluster/afr/src/afr-dir-write.c +@@ -123,11 +123,11 @@ __afr_dir_write_finalize(call_frame_t *frame, xlator_t *this) + continue; + if (local->replies[i].op_ret < 0) { + if (local->inode) +- afr_inode_event_gen_reset(local->inode, this); ++ afr_inode_need_refresh_set(local->inode, this); + if (local->parent) +- afr_inode_event_gen_reset(local->parent, this); ++ afr_inode_need_refresh_set(local->parent, this); + if (local->parent2) +- afr_inode_event_gen_reset(local->parent2, this); ++ afr_inode_need_refresh_set(local->parent2, this); + continue; + } + +diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h +index ed5096e..3a2b26d 100644 +--- a/xlators/cluster/afr/src/afr.h ++++ b/xlators/cluster/afr/src/afr.h +@@ -948,7 +948,10 @@ afr_inode_read_subvol_set(inode_t *inode, xlator_t *this, + int event_generation); + + int +-afr_inode_event_gen_reset(inode_t *inode, xlator_t *this); ++__afr_inode_need_refresh_set(inode_t *inode, xlator_t *this); ++ ++int ++afr_inode_need_refresh_set(inode_t *inode, xlator_t *this); + + int + afr_read_subvol_select_by_policy(inode_t *inode, xlator_t *this, +-- +1.8.3.1 + diff --git a/SOURCES/0515-cluster-afr-Heal-directory-rename-without-rmdir-mkdi.patch b/SOURCES/0515-cluster-afr-Heal-directory-rename-without-rmdir-mkdi.patch new file mode 100644 index 0000000..9c7693a --- /dev/null +++ b/SOURCES/0515-cluster-afr-Heal-directory-rename-without-rmdir-mkdi.patch @@ -0,0 +1,2155 @@ +From aab8a587360214432c4a2ab59134411f1d38c509 Mon Sep 17 00:00:00 2001 +From: karthik-us +Date: Wed, 9 Dec 2020 10:46:31 +0530 +Subject: [PATCH 515/517] cluster/afr: Heal directory rename without + rmdir/mkdir + +Problem1: +When a directory is renamed while a brick +is down entry-heal always did an rm -rf on that directory on +the sink on old location and did mkdir and created the directory +hierarchy again in the new location. This is inefficient. + +Problem2: +Renamedir heal order may lead to a scenario where directory in +the new location could be created before deleting it from old +location leading to 2 directories with same gfid in posix. + +Fix: +As part of heal, if oldlocation is healed first and is not present in +source-brick always rename it into a hidden directory inside the +sink-brick so that when heal is triggered in new-location shd can +rename it from this hidden directory to the new-location. + +If new-location heal is triggered first and it detects that the +directory already exists in the brick, then it should skip healing the +directory until it appears in the hidden directory. + +Credits: Ravi for rename-data-loss.t script + +Upstream patch details: +> Fixes: #1211 +> Change-Id: I0cba2006f35cd03d314d18211ce0bd530e254843 +> Signed-off-by: Pranith Kumar K +Upstream patch: https://review.gluster.org/#/c/glusterfs/+/24373/ + +BUG: 1640148 +Change-Id: I0cba2006f35cd03d314d18211ce0bd530e254843 +Signed-off-by: karthik-us +Reviewed-on: https://code.engineering.redhat.com/gerrit/220660 +Tested-by: RHGS Build Bot +Reviewed-by: Ravishankar Narayanankutty +--- + tests/afr.rc | 16 + + tests/basic/afr/afr-anon-inode-no-quorum.t | 63 ++++ + tests/basic/afr/afr-anon-inode.t | 114 ++++++ + tests/basic/afr/entry-self-heal-anon-dir-off.t | 464 ++++++++++++++++++++++++ + tests/basic/afr/rename-data-loss.t | 72 ++++ + tests/bugs/replicate/bug-1744548-heal-timeout.t | 6 +- + tests/features/trash.t | 74 ++-- + xlators/cluster/afr/src/afr-common.c | 46 ++- + xlators/cluster/afr/src/afr-dir-read.c | 12 +- + xlators/cluster/afr/src/afr-self-heal-common.c | 182 ++++++++++ + xlators/cluster/afr/src/afr-self-heal-entry.c | 206 +++++++++-- + xlators/cluster/afr/src/afr-self-heal-name.c | 33 +- + xlators/cluster/afr/src/afr-self-heal.h | 5 + + xlators/cluster/afr/src/afr-self-heald.c | 178 ++++++++- + xlators/cluster/afr/src/afr-self-heald.h | 2 +- + xlators/cluster/afr/src/afr.c | 40 +- + xlators/cluster/afr/src/afr.h | 11 + + xlators/mgmt/glusterd/src/glusterd-volgen.c | 39 ++ + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 6 + + 19 files changed, 1442 insertions(+), 127 deletions(-) + create mode 100644 tests/basic/afr/afr-anon-inode-no-quorum.t + create mode 100644 tests/basic/afr/afr-anon-inode.t + create mode 100644 tests/basic/afr/entry-self-heal-anon-dir-off.t + create mode 100644 tests/basic/afr/rename-data-loss.t + +diff --git a/tests/afr.rc b/tests/afr.rc +index 35f352d..2417899 100644 +--- a/tests/afr.rc ++++ b/tests/afr.rc +@@ -105,3 +105,19 @@ function get_quorum_type() + local repl_id="$3" + cat $m/.meta/graphs/active/$v-replicate-$repl_id/private|grep quorum-type|awk '{print $3}' + } ++ ++function afr_private_key_value() ++{ ++ local v=$1 ++ local m=$2 ++ local replica_id=$3 ++ local key=$4 ++#xargs at the end will strip leading spaces ++ grep -E "^${key} = " $m/.meta/graphs/active/${v}-replicate-${replica_id}/private | cut -f2 -d'=' | xargs ++} ++ ++function afr_anon_entry_count() ++{ ++ local b=$1 ++ ls $b/.glusterfs-anonymous-inode* | wc -l ++} +diff --git a/tests/basic/afr/afr-anon-inode-no-quorum.t b/tests/basic/afr/afr-anon-inode-no-quorum.t +new file mode 100644 +index 0000000..896ba0c +--- /dev/null ++++ b/tests/basic/afr/afr-anon-inode-no-quorum.t +@@ -0,0 +1,63 @@ ++#!/bin/bash ++ ++#Test that anon-inode entry is not cleaned up as long as there exists at least ++#one valid entry ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../afr.rc ++ ++cleanup; ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} ++TEST $CLI volume heal $V0 disable ++TEST $CLI volume set $V0 performance.write-behind off ++TEST $CLI volume set $V0 performance.read-ahead off ++TEST $CLI volume set $V0 performance.readdir-ahead off ++TEST $CLI volume set $V0 performance.open-behind off ++TEST $CLI volume set $V0 performance.stat-prefetch off ++TEST $CLI volume set $V0 performance.io-cache off ++TEST $CLI volume set $V0 performance.quick-read off ++TEST $CLI volume set $V0 cluster.entry-self-heal off ++TEST $CLI volume start $V0 ++ ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 ++ ++TEST touch $M0/a $M0/b ++ ++gfid_a=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/a)) ++gfid_b=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/b)) ++TEST kill_brick $V0 $H0 $B0/${V0}0 ++TEST mv $M0/a $M0/a-new ++TEST mv $M0/b $M0/b-new ++ ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++TEST ! ls $M0/a ++TEST ! ls $M0/b ++anon_inode_name=$(ls -a $B0/${V0}0 | grep glusterfs-anonymous-inode) ++TEST stat $B0/${V0}0/$anon_inode_name/$gfid_a ++TEST stat $B0/${V0}0/$anon_inode_name/$gfid_b ++#Make sure index heal doesn't happen after enabling heal ++TEST setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1 ++TEST rm -f $B0/${V0}1/.glusterfs/indices/xattrop/* ++TEST $CLI volume heal $V0 enable ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 ++TEST $CLI volume heal $V0 ++#Allow time for a scan ++sleep 5 ++TEST stat $B0/${V0}0/$anon_inode_name/$gfid_a ++TEST stat $B0/${V0}0/$anon_inode_name/$gfid_b ++inum_b=$(STAT_INO $B0/${V0}0/$anon_inode_name/$gfid_b) ++TEST rm -f $M0/a-new ++TEST stat $M0/b-new ++ ++TEST $CLI volume heal $V0 ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}0 ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}1 ++EXPECT "$inum_b" STAT_INO $B0/${V0}0/b-new ++ ++cleanup +diff --git a/tests/basic/afr/afr-anon-inode.t b/tests/basic/afr/afr-anon-inode.t +new file mode 100644 +index 0000000..f4cf37a +--- /dev/null ++++ b/tests/basic/afr/afr-anon-inode.t +@@ -0,0 +1,114 @@ ++#!/bin/bash ++#Tests that afr-anon-inode test cases work fine as expected ++#These are cases where in entry-heal/name-heal we dont know entry for an inode ++#so these inodes are kept in a special directory ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../afr.rc ++ ++cleanup; ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0..2} ++TEST $CLI volume set $V0 performance.quick-read off ++TEST $CLI volume set $V0 performance.io-cache off ++TEST $CLI volume set $V0 performance.write-behind off ++TEST $CLI volume set $V0 performance.stat-prefetch off ++TEST $CLI volume set $V0 performance.read-ahead off ++TEST $CLI volume set $V0 performance.open-behind off ++TEST $CLI volume start $V0 ++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; ++EXPECT "^1$" afr_private_key_value $V0 $M0 0 "use-anonymous-inode" ++TEST $CLI volume set $V0 cluster.use-anonymous-inode no ++EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^0$" afr_private_key_value $V0 $M0 0 "use-anonymous-inode" ++TEST $CLI volume set $V0 cluster.use-anonymous-inode yes ++EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "use-anonymous-inode" ++TEST mkdir -p $M0/d1/b $M0/d2/a ++TEST kill_brick $V0 $H0 $B0/${V0}0 ++TEST mv $M0/d2/a $M0/d1 ++TEST mv $M0/d1/b $M0/d2 ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 ++anon_inode_name=$(ls -a $B0/${V0}0 | grep glusterfs-anonymous-inode) ++TEST [[ -d $B0/${V0}1/$anon_inode_name ]] ++TEST [[ -d $B0/${V0}2/$anon_inode_name ]] ++anon_gfid=$(gf_get_gfid_xattr $B0/${V0}0/$anon_inode_name) ++EXPECT "$anon_gfid" gf_get_gfid_xattr $B0/${V0}1/$anon_inode_name ++EXPECT "$anon_gfid" gf_get_gfid_xattr $B0/${V0}2/$anon_inode_name ++ ++TEST ! ls $M0/$anon_inode_name ++EXPECT "^4$" echo $(ls -a $M0 | wc -l) ++ ++#Test purging code path by shd ++TEST $CLI volume heal $V0 disable ++TEST mkdir $M0/l0 $M0/l1 $M0/l2 ++TEST touch $M0/del-file $M0/del-file-nolink $M0/l0/file ++TEST ln $M0/del-file $M0/del-file-link ++TEST ln $M0/l0/file $M0/l1/file-link1 ++TEST ln $M0/l0/file $M0/l2/file-link2 ++TEST mkdir -p $M0/del-recursive-dir/d1 ++ ++TEST kill_brick $V0 $H0 $B0/${V0}0 ++TEST rm -f $M0/del-file $M0/del-file-nolink ++TEST rm -rf $M0/del-recursive-dir ++TEST mv $M0/d1/a $M0/d2 ++TEST mv $M0/l0/file $M0/l0/renamed-file ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 0 ++ ++nolink_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/del-file-nolink)) ++link_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/del-file)) ++dir_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/del-recursive-dir)) ++rename_dir_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/d1/a)) ++rename_file_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/l0/file)) ++TEST ! stat $M0/del-file ++TEST stat $B0/${V0}0/$anon_inode_name/$link_gfid ++TEST ! stat $M0/del-file-nolink ++TEST ! stat $B0/${V0}0/$anon_inode_name/$nolink_gfid ++TEST ! stat $M0/del-recursive-dir ++TEST stat $B0/${V0}0/$anon_inode_name/$dir_gfid ++TEST ! stat $M0/d1/a ++TEST stat $B0/${V0}0/$anon_inode_name/$rename_dir_gfid ++TEST ! stat $M0/l0/file ++TEST stat $B0/${V0}0/$anon_inode_name/$rename_file_gfid ++ ++TEST kill_brick $V0 $H0 $B0/${V0}1 ++TEST mv $M0/l1/file-link1 $M0/l1/renamed-file-link1 ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 1 ++TEST ! stat $M0/l1/file-link1 ++TEST stat $B0/${V0}1/$anon_inode_name/$rename_file_gfid ++ ++TEST kill_brick $V0 $H0 $B0/${V0}2 ++TEST mv $M0/l2/file-link2 $M0/l2/renamed-file-link2 ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 2 ++TEST ! stat $M0/l2/file-link2 ++TEST stat $B0/${V0}2/$anon_inode_name/$rename_file_gfid ++ ++#Simulate only anon-inodes present in all bricks ++TEST rm -f $M0/l0/renamed-file $M0/l1/renamed-file-link1 $M0/l2/renamed-file-link2 ++ ++#Test that shd doesn't cleanup anon-inodes when some bricks are down ++TEST kill_brick $V0 $H0 $B0/${V0}1 ++TEST $CLI volume heal $V0 enable ++$CLI volume heal $V0 ++sleep 5 #Allow time for completion of one scan ++TEST stat $B0/${V0}0/$anon_inode_name/$link_gfid ++TEST stat $B0/${V0}0/$anon_inode_name/$rename_dir_gfid ++TEST stat $B0/${V0}0/$anon_inode_name/$dir_gfid ++rename_dir_inum=$(STAT_INO $B0/${V0}0/$anon_inode_name/$rename_dir_gfid) ++ ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 1 ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}0 ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}1 ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}2 ++ ++#Test that rename indeed happened instead of rmdir/mkdir ++renamed_dir_inum=$(STAT_INO $B0/${V0}0/d2/a) ++EXPECT "$rename_dir_inum" echo $renamed_dir_inum ++cleanup; +diff --git a/tests/basic/afr/entry-self-heal-anon-dir-off.t b/tests/basic/afr/entry-self-heal-anon-dir-off.t +new file mode 100644 +index 0000000..0803a08 +--- /dev/null ++++ b/tests/basic/afr/entry-self-heal-anon-dir-off.t +@@ -0,0 +1,464 @@ ++#!/bin/bash ++ ++#This file checks if missing entry self-heal and entry self-heal are working ++#as expected. ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../afr.rc ++ ++cleanup; ++ ++function get_file_type { ++ stat -c "%a:%F:%g:%t:%T:%u" $1 ++} ++ ++function diff_dirs { ++ diff <(ls $1 | sort) <(ls $2 | sort) ++} ++ ++function heal_status { ++ local f1_path="${1}/${3}" ++ local f2_path="${2}/${3}" ++ local insync="" ++ diff_dirs $f1_path $f2_path ++ if [ $? -eq 0 ]; ++ then ++ insync="Y" ++ else ++ insync="N" ++ fi ++ local xattr11=$(get_hex_xattr trusted.afr.$V0-client-0 $f1_path) ++ local xattr12=$(get_hex_xattr trusted.afr.$V0-client-1 $f1_path) ++ local xattr21=$(get_hex_xattr trusted.afr.$V0-client-0 $f2_path) ++ local xattr22=$(get_hex_xattr trusted.afr.$V0-client-1 $f2_path) ++ local dirty1=$(get_hex_xattr trusted.afr.dirty $f1_path) ++ local dirty2=$(get_hex_xattr trusted.afr.dirty $f2_path) ++ if [ -z $xattr11 ]; then xattr11="000000000000000000000000"; fi ++ if [ -z $xattr12 ]; then xattr12="000000000000000000000000"; fi ++ if [ -z $xattr21 ]; then xattr21="000000000000000000000000"; fi ++ if [ -z $xattr22 ]; then xattr22="000000000000000000000000"; fi ++ if [ -z $dirty1 ]; then dirty1="000000000000000000000000"; fi ++ if [ -z $dirty2 ]; then dirty2="000000000000000000000000"; fi ++ echo ${insync}${xattr11}${xattr12}${xattr21}${xattr22}${dirty1}${dirty2} ++} ++ ++function is_heal_done { ++ local zero_xattr="000000000000000000000000" ++ if [ "$(heal_status $@)" == "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" ]; ++ then ++ echo "Y" ++ else ++ echo "N" ++ fi ++} ++ ++function print_pending_heals { ++ local result=":" ++ for i in "$@"; ++ do ++ if [ "N" == $(is_heal_done $B0/${V0}0 $B0/${V0}1 $i) ]; ++ then ++ result="$result:$i" ++ fi ++ done ++#To prevent any match for EXPECT_WITHIN, print a char non-existent in file-names ++ if [ $result == ":" ]; then result="~"; fi ++ echo $result ++} ++ ++zero_xattr="000000000000000000000000" ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} ++TEST $CLI volume heal $V0 disable ++TEST $CLI volume set $V0 cluster.use-anonymous-inode off ++TEST $CLI volume set $V0 performance.write-behind off ++TEST $CLI volume set $V0 performance.read-ahead off ++TEST $CLI volume set $V0 performance.readdir-ahead off ++TEST $CLI volume set $V0 performance.open-behind off ++TEST $CLI volume set $V0 performance.stat-prefetch off ++TEST $CLI volume set $V0 performance.io-cache off ++TEST $CLI volume set $V0 performance.quick-read off ++TEST $CLI volume set $V0 cluster.data-self-heal on ++TEST $CLI volume set $V0 cluster.metadata-self-heal on ++TEST $CLI volume set $V0 cluster.entry-self-heal on ++TEST $CLI volume start $V0 ++ ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --use-readdirp=no $M0 ++cd $M0 ++#_me_ is dir on which missing entry self-heal happens, _heal is where dir self-heal happens ++#spb is split-brain, fool is all fool ++ ++#source_self_accusing means there exists source and a sink which self-accuses. ++#This simulates failures where fops failed on the bricks without it going down. ++#Something like EACCESS/EDQUOT etc ++ ++TEST mkdir spb_heal spb spb_me_heal spb_me fool_heal fool_me v1_fool_heal v1_fool_me source_creations_heal source_deletions_heal source_creations_me source_deletions_me v1_dirty_me v1_dirty_heal source_self_accusing ++TEST mkfifo source_deletions_heal/fifo ++TEST mknod source_deletions_heal/block b 4 5 ++TEST mknod source_deletions_heal/char c 1 5 ++TEST touch source_deletions_heal/file ++TEST ln -s source_deletions_heal/file source_deletions_heal/slink ++TEST mkdir source_deletions_heal/dir1 ++TEST mkdir source_deletions_heal/dir1/dir2 ++ ++TEST mkfifo source_deletions_me/fifo ++TEST mknod source_deletions_me/block b 4 5 ++TEST mknod source_deletions_me/char c 1 5 ++TEST touch source_deletions_me/file ++TEST ln -s source_deletions_me/file source_deletions_me/slink ++TEST mkdir source_deletions_me/dir1 ++TEST mkdir source_deletions_me/dir1/dir2 ++ ++TEST mkfifo source_self_accusing/fifo ++TEST mknod source_self_accusing/block b 4 5 ++TEST mknod source_self_accusing/char c 1 5 ++TEST touch source_self_accusing/file ++TEST ln -s source_self_accusing/file source_self_accusing/slink ++TEST mkdir source_self_accusing/dir1 ++TEST mkdir source_self_accusing/dir1/dir2 ++ ++TEST kill_brick $V0 $H0 $B0/${V0}0 ++ ++TEST touch spb_heal/0 spb/0 spb_me_heal/0 spb_me/0 fool_heal/0 fool_me/0 v1_fool_heal/0 v1_fool_me/0 v1_dirty_heal/0 v1_dirty_me/0 ++TEST rm -rf source_deletions_heal/fifo source_deletions_heal/block source_deletions_heal/char source_deletions_heal/file source_deletions_heal/slink source_deletions_heal/dir1 ++TEST rm -rf source_deletions_me/fifo source_deletions_me/block source_deletions_me/char source_deletions_me/file source_deletions_me/slink source_deletions_me/dir1 ++TEST rm -rf source_self_accusing/fifo source_self_accusing/block source_self_accusing/char source_self_accusing/file source_self_accusing/slink source_self_accusing/dir1 ++ ++#Test that the files are deleted ++TEST ! stat $B0/${V0}1/source_deletions_heal/fifo ++TEST ! stat $B0/${V0}1/source_deletions_heal/block ++TEST ! stat $B0/${V0}1/source_deletions_heal/char ++TEST ! stat $B0/${V0}1/source_deletions_heal/file ++TEST ! stat $B0/${V0}1/source_deletions_heal/slink ++TEST ! stat $B0/${V0}1/source_deletions_heal/dir1 ++TEST ! stat $B0/${V0}1/source_deletions_me/fifo ++TEST ! stat $B0/${V0}1/source_deletions_me/block ++TEST ! stat $B0/${V0}1/source_deletions_me/char ++TEST ! stat $B0/${V0}1/source_deletions_me/file ++TEST ! stat $B0/${V0}1/source_deletions_me/slink ++TEST ! stat $B0/${V0}1/source_deletions_me/dir1 ++TEST ! stat $B0/${V0}1/source_self_accusing/fifo ++TEST ! stat $B0/${V0}1/source_self_accusing/block ++TEST ! stat $B0/${V0}1/source_self_accusing/char ++TEST ! stat $B0/${V0}1/source_self_accusing/file ++TEST ! stat $B0/${V0}1/source_self_accusing/slink ++TEST ! stat $B0/${V0}1/source_self_accusing/dir1 ++ ++ ++TEST mkfifo source_creations_heal/fifo ++TEST mknod source_creations_heal/block b 4 5 ++TEST mknod source_creations_heal/char c 1 5 ++TEST touch source_creations_heal/file ++TEST ln -s source_creations_heal/file source_creations_heal/slink ++TEST mkdir source_creations_heal/dir1 ++TEST mkdir source_creations_heal/dir1/dir2 ++ ++TEST mkfifo source_creations_me/fifo ++TEST mknod source_creations_me/block b 4 5 ++TEST mknod source_creations_me/char c 1 5 ++TEST touch source_creations_me/file ++TEST ln -s source_creations_me/file source_creations_me/slink ++TEST mkdir source_creations_me/dir1 ++TEST mkdir source_creations_me/dir1/dir2 ++ ++$CLI volume stop $V0 ++ ++#simulate fool fool scenario for fool_* dirs ++setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1/{fool_heal,fool_me} ++setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}1/{fool_heal,fool_me} ++setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}1/{v1_fool_heal,v1_fool_me} ++ ++#Simulate v1-dirty(self-accusing but no pending ops on others) scenario for v1-dirty ++setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1/v1_dirty_{heal,me} ++setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}1/v1_dirty_{heal,me} ++ ++$CLI volume start $V0 force ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 ++TEST kill_brick $V0 $H0 $B0/${V0}1 ++ ++TEST touch spb_heal/1 spb/0 spb_me_heal/1 spb_me/0 fool_heal/1 fool_me/1 v1_fool_heal/1 v1_fool_me/1 ++ ++$CLI volume stop $V0 ++ ++#simulate fool fool scenario for fool_* dirs ++setfattr -x trusted.afr.$V0-client-1 $B0/${V0}0/{fool_heal,fool_me} ++setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}1/{fool_heal,fool_me} ++setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}1/{v1_fool_heal,v1_fool_me} ++ ++#simulate self-accusing for source_self_accusing ++TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000006 $B0/${V0}0/source_self_accusing ++ ++$CLI volume start $V0 force ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++ ++# Check if conservative merges happened correctly on _me_ dirs ++TEST stat spb_me_heal/1 ++TEST stat $B0/${V0}0/spb_me_heal/1 ++TEST stat $B0/${V0}1/spb_me_heal/1 ++ ++TEST stat spb_me_heal/0 ++TEST stat $B0/${V0}0/spb_me_heal/0 ++TEST stat $B0/${V0}1/spb_me_heal/0 ++ ++TEST stat fool_me/1 ++TEST stat $B0/${V0}0/fool_me/1 ++TEST stat $B0/${V0}1/fool_me/1 ++ ++TEST stat fool_me/0 ++TEST stat $B0/${V0}0/fool_me/0 ++TEST stat $B0/${V0}1/fool_me/0 ++ ++TEST stat v1_fool_me/0 ++TEST stat $B0/${V0}0/v1_fool_me/0 ++TEST stat $B0/${V0}1/v1_fool_me/0 ++ ++TEST stat v1_fool_me/1 ++TEST stat $B0/${V0}0/v1_fool_me/1 ++TEST stat $B0/${V0}1/v1_fool_me/1 ++ ++TEST stat v1_dirty_me/0 ++TEST stat $B0/${V0}0/v1_dirty_me/0 ++TEST stat $B0/${V0}1/v1_dirty_me/0 ++ ++#Check if files that have gfid-mismatches in _me_ are giving EIO ++TEST ! stat spb_me/0 ++ ++#Check if stale files are deleted on access ++TEST ! stat source_deletions_me/fifo ++TEST ! stat $B0/${V0}0/source_deletions_me/fifo ++TEST ! stat $B0/${V0}1/source_deletions_me/fifo ++TEST ! stat source_deletions_me/block ++TEST ! stat $B0/${V0}0/source_deletions_me/block ++TEST ! stat $B0/${V0}1/source_deletions_me/block ++TEST ! stat source_deletions_me/char ++TEST ! stat $B0/${V0}0/source_deletions_me/char ++TEST ! stat $B0/${V0}1/source_deletions_me/char ++TEST ! stat source_deletions_me/file ++TEST ! stat $B0/${V0}0/source_deletions_me/file ++TEST ! stat $B0/${V0}1/source_deletions_me/file ++TEST ! stat source_deletions_me/file ++TEST ! stat $B0/${V0}0/source_deletions_me/file ++TEST ! stat $B0/${V0}1/source_deletions_me/file ++TEST ! stat source_deletions_me/dir1/dir2 ++TEST ! stat $B0/${V0}0/source_deletions_me/dir1/dir2 ++TEST ! stat $B0/${V0}1/source_deletions_me/dir1/dir2 ++TEST ! stat source_deletions_me/dir1 ++TEST ! stat $B0/${V0}0/source_deletions_me/dir1 ++TEST ! stat $B0/${V0}1/source_deletions_me/dir1 ++ ++#Test if the files created as part of access are healed correctly ++r=$(get_file_type source_creations_me/fifo) ++EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/fifo ++EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/fifo ++TEST [ -p source_creations_me/fifo ] ++ ++r=$(get_file_type source_creations_me/block) ++EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/block ++EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/block ++EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}1/source_creations_me/block ++EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}0/source_creations_me/block ++TEST [ -b source_creations_me/block ] ++ ++r=$(get_file_type source_creations_me/char) ++EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/char ++EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/char ++EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}1/source_creations_me/char ++EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}0/source_creations_me/char ++TEST [ -c source_creations_me/char ] ++ ++r=$(get_file_type source_creations_me/file) ++EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/file ++EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/file ++TEST [ -f source_creations_me/file ] ++ ++r=$(get_file_type source_creations_me/slink) ++EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/slink ++EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/slink ++TEST [ -h source_creations_me/slink ] ++ ++r=$(get_file_type source_creations_me/dir1/dir2) ++EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/dir1/dir2 ++EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/dir1/dir2 ++TEST [ -d source_creations_me/dir1/dir2 ] ++ ++r=$(get_file_type source_creations_me/dir1) ++EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/dir1 ++EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/dir1 ++TEST [ -d source_creations_me/dir1 ] ++ ++#Trigger heal and check _heal dirs are healed properly ++#Trigger change in event generation number. That way inodes would get refreshed during lookup ++TEST kill_brick $V0 $H0 $B0/${V0}1 ++$CLI volume start $V0 force ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++ ++TEST stat spb_heal ++TEST stat spb_me_heal ++TEST stat fool_heal ++TEST stat fool_me ++TEST stat v1_fool_heal ++TEST stat v1_fool_me ++TEST stat source_deletions_heal ++TEST stat source_deletions_me ++TEST stat source_self_accusing ++TEST stat source_creations_heal ++TEST stat source_creations_me ++TEST stat v1_dirty_heal ++TEST stat v1_dirty_me ++TEST $CLI volume stop $V0 ++TEST rm -rf $B0/${V0}{0,1}/.glusterfs/indices/xattrop/* ++ ++$CLI volume start $V0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++ ++#Create base entry in indices/xattrop ++echo "Data" > $M0/FILE ++rm -f $M0/FILE ++EXPECT "1" count_index_entries $B0/${V0}0 ++EXPECT "1" count_index_entries $B0/${V0}1 ++ ++TEST $CLI volume stop $V0; ++ ++#Create entries for fool_heal and fool_me to ensure they are fully healed and dirty xattrs erased, before triggering index heal ++create_brick_xattrop_entry $B0/${V0}0 fool_heal fool_me source_creations_heal/dir1 ++ ++$CLI volume start $V0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++ ++$CLI volume heal $V0 enable ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 ++ ++TEST $CLI volume heal $V0; ++EXPECT_WITHIN $HEAL_TIMEOUT "~" print_pending_heals spb_heal spb_me_heal fool_heal fool_me v1_fool_heal v1_fool_me source_deletions_heal source_deletions_me source_creations_heal source_creations_me v1_dirty_heal v1_dirty_me source_self_accusing ++ ++EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 spb_heal ++EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 spb_me_heal ++EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 fool_heal ++EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 fool_me ++EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_fool_heal ++EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_fool_me ++EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_deletions_heal ++EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_deletions_me ++EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_self_accusing ++EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_creations_heal ++EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_creations_me ++EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_dirty_heal ++EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_dirty_me ++ ++#Don't access the files/dirs from mount point as that may cause self-heals ++# Check if conservative merges happened correctly on heal dirs ++TEST stat $B0/${V0}0/spb_heal/1 ++TEST stat $B0/${V0}1/spb_heal/1 ++ ++TEST stat $B0/${V0}0/spb_heal/0 ++TEST stat $B0/${V0}1/spb_heal/0 ++ ++TEST stat $B0/${V0}0/fool_heal/1 ++TEST stat $B0/${V0}1/fool_heal/1 ++ ++TEST stat $B0/${V0}0/fool_heal/0 ++TEST stat $B0/${V0}1/fool_heal/0 ++ ++TEST stat $B0/${V0}0/v1_fool_heal/0 ++TEST stat $B0/${V0}1/v1_fool_heal/0 ++ ++TEST stat $B0/${V0}0/v1_fool_heal/1 ++TEST stat $B0/${V0}1/v1_fool_heal/1 ++ ++TEST stat $B0/${V0}0/v1_dirty_heal/0 ++TEST stat $B0/${V0}1/v1_dirty_heal/0 ++ ++#Check if files that have gfid-mismatches in spb are giving EIO ++TEST ! stat spb/0 ++ ++#Check if stale files are deleted on access ++TEST ! stat $B0/${V0}0/source_deletions_heal/fifo ++TEST ! stat $B0/${V0}1/source_deletions_heal/fifo ++TEST ! stat $B0/${V0}0/source_deletions_heal/block ++TEST ! stat $B0/${V0}1/source_deletions_heal/block ++TEST ! stat $B0/${V0}0/source_deletions_heal/char ++TEST ! stat $B0/${V0}1/source_deletions_heal/char ++TEST ! stat $B0/${V0}0/source_deletions_heal/file ++TEST ! stat $B0/${V0}1/source_deletions_heal/file ++TEST ! stat $B0/${V0}0/source_deletions_heal/file ++TEST ! stat $B0/${V0}1/source_deletions_heal/file ++TEST ! stat $B0/${V0}0/source_deletions_heal/dir1/dir2 ++TEST ! stat $B0/${V0}1/source_deletions_heal/dir1/dir2 ++TEST ! stat $B0/${V0}0/source_deletions_heal/dir1 ++TEST ! stat $B0/${V0}1/source_deletions_heal/dir1 ++ ++#Check if stale files are deleted on access ++TEST ! stat $B0/${V0}0/source_self_accusing/fifo ++TEST ! stat $B0/${V0}1/source_self_accusing/fifo ++TEST ! stat $B0/${V0}0/source_self_accusing/block ++TEST ! stat $B0/${V0}1/source_self_accusing/block ++TEST ! stat $B0/${V0}0/source_self_accusing/char ++TEST ! stat $B0/${V0}1/source_self_accusing/char ++TEST ! stat $B0/${V0}0/source_self_accusing/file ++TEST ! stat $B0/${V0}1/source_self_accusing/file ++TEST ! stat $B0/${V0}0/source_self_accusing/file ++TEST ! stat $B0/${V0}1/source_self_accusing/file ++TEST ! stat $B0/${V0}0/source_self_accusing/dir1/dir2 ++TEST ! stat $B0/${V0}1/source_self_accusing/dir1/dir2 ++TEST ! stat $B0/${V0}0/source_self_accusing/dir1 ++TEST ! stat $B0/${V0}1/source_self_accusing/dir1 ++ ++#Test if the files created as part of full self-heal correctly ++r=$(get_file_type $B0/${V0}0/source_creations_heal/fifo) ++EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/fifo ++TEST [ -p $B0/${V0}0/source_creations_heal/fifo ] ++EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}1/source_creations_heal/block ++EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}0/source_creations_heal/block ++ ++r=$(get_file_type $B0/${V0}0/source_creations_heal/block) ++EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/block ++ ++r=$(get_file_type $B0/${V0}0/source_creations_heal/char) ++EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/char ++EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}1/source_creations_heal/char ++EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}0/source_creations_heal/char ++ ++r=$(get_file_type $B0/${V0}0/source_creations_heal/file) ++EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/file ++TEST [ -f $B0/${V0}0/source_creations_heal/file ] ++ ++r=$(get_file_type source_creations_heal/file $B0/${V0}0/slink) ++EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/file slink ++TEST [ -h $B0/${V0}0/source_creations_heal/slink ] ++ ++r=$(get_file_type $B0/${V0}0/source_creations_heal/dir1/dir2) ++EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/dir1/dir2 ++TEST [ -d $B0/${V0}0/source_creations_heal/dir1/dir2 ] ++ ++r=$(get_file_type $B0/${V0}0/source_creations_heal/dir1) ++EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/dir1 ++TEST [ -d $B0/${V0}0/source_creations_heal/dir1 ] ++ ++cd - ++ ++#Anonymous directory shouldn't be created ++TEST mkdir $M0/rename-dir ++before_rename=$(STAT_INO $B0/${V0}1/rename-dir) ++TEST kill_brick $V0 $H0 $B0/${V0}1 ++TEST mv $M0/rename-dir $M0/new-name ++TEST $CLI volume start $V0 force ++#Since features.ctime is not enabled by default in downstream, the below test ++#will fail. If ctime feature is enabled, there will be trusted.glusterfs.mdata ++#xattr set which will differ for the parent in the gfid split-brain scenario ++#and when lookup is triggered, the gfid gets added to indices/xattrop leading ++#the below test to pass in upstream. Hence commenting it here. ++#'spb' is in split-brain so pending-heal-count will be 2 ++#EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0 ++after_rename=$(STAT_INO $B0/${V0}1/new-name) ++EXPECT "0" echo $(ls -a $B0/${V0}0/ | grep anonymous-inode | wc -l) ++EXPECT "0" echo $(ls -a $B0/${V0}1/ | grep anonymous-inode | wc -l) ++EXPECT_NOT "$before_rename" echo $after_rename ++cleanup +diff --git a/tests/basic/afr/rename-data-loss.t b/tests/basic/afr/rename-data-loss.t +new file mode 100644 +index 0000000..256ee2a +--- /dev/null ++++ b/tests/basic/afr/rename-data-loss.t +@@ -0,0 +1,72 @@ ++#!/bin/bash ++#Self-heal tests ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../afr.rc ++ ++cleanup; ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1} ++TEST $CLI volume set $V0 write-behind off ++TEST $CLI volume set $V0 self-heal-daemon off ++TEST $CLI volume set $V0 data-self-heal off ++TEST $CLI volume set $V0 metadata-self-heal off ++TEST $CLI volume set $V0 entry-self-heal off ++TEST $CLI volume start $V0 ++EXPECT 'Started' volinfo_field $V0 'Status' ++TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0; ++ ++cd $M0 ++TEST `echo "line1" >> file1` ++TEST mkdir dir1 ++TEST mkdir dir2 ++TEST mkdir -p dir1/dira/dirb ++TEST `echo "line1">>dir1/dira/dirb/file1` ++TEST mkdir delete_me ++TEST `echo "line1" >> delete_me/file1` ++ ++#brick0 has witnessed the second write while brick1 is down. ++TEST kill_brick $V0 $H0 $B0/brick1 ++TEST `echo "line2" >> file1` ++TEST `echo "line2" >> dir1/dira/dirb/file1` ++TEST `echo "line2" >> delete_me/file1` ++ ++#Toggle the bricks that are up/down. ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 ++TEST kill_brick $V0 $H0 $B0/brick0 ++ ++#Rename when the 'source' brick0 for data-selfheals is down. ++mv file1 file2 ++mv dir1/dira dir2 ++ ++#Delete a dir when brick0 is down. ++rm -rf delete_me ++cd - ++ ++#Bring everything up and trigger heal ++TEST $CLI volume set $V0 self-heal-daemon on ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 ++TEST $CLI volume heal $V0 ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/brick0 ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/brick1 ++ ++#Remount to avoid reading from caches ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0; ++EXPECT "line2" tail -1 $M0/file2 ++EXPECT "line2" tail -1 $M0/dir2/dira/dirb/file1 ++TEST ! stat $M0/delete_me/file1 ++TEST ! stat $M0/delete_me ++ ++anon_inode_name=$(ls -a $B0/brick0 | grep glusterfs-anonymous-inode) ++TEST [[ -d $B0/brick0/$anon_inode_name ]] ++TEST [[ -d $B0/brick1/$anon_inode_name ]] ++cleanup +diff --git a/tests/bugs/replicate/bug-1744548-heal-timeout.t b/tests/bugs/replicate/bug-1744548-heal-timeout.t +index c208112..0115350 100644 +--- a/tests/bugs/replicate/bug-1744548-heal-timeout.t ++++ b/tests/bugs/replicate/bug-1744548-heal-timeout.t +@@ -25,14 +25,14 @@ TEST ! $CLI volume heal $V0 + TEST $CLI volume profile $V0 start + TEST $CLI volume profile $V0 info clear + TEST $CLI volume heal $V0 enable +-# Each brick does 3 opendirs, corresponding to dirty, xattrop and entry-changes +-EXPECT_WITHIN $HEAL_TIMEOUT "^333$" get_cumulative_opendir_count ++# Each brick does 4 opendirs, corresponding to dirty, xattrop and entry-changes, anonymous-inode ++EXPECT_WITHIN 4 "^444$" get_cumulative_opendir_count + + # Check that a change in heal-timeout is honoured immediately. + TEST $CLI volume set $V0 cluster.heal-timeout 5 + sleep 10 + # Two crawls must have happened. +-EXPECT_WITHIN $HEAL_TIMEOUT "^999$" get_cumulative_opendir_count ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^121212$" get_cumulative_opendir_count + + # shd must not heal if it is disabled and heal-timeout is changed. + TEST $CLI volume heal $V0 disable +diff --git a/tests/features/trash.t b/tests/features/trash.t +index 472e909..da5b50b 100755 +--- a/tests/features/trash.t ++++ b/tests/features/trash.t +@@ -94,105 +94,105 @@ wildcard_not_exists() { + if [ $? -eq 0 ]; then echo "Y"; else echo "N"; fi + } + +-# testing glusterd [1-3] ++# testing glusterd + TEST glusterd + TEST pidof glusterd + TEST $CLI volume info + +-# creating distributed volume [4] ++# creating distributed volume + TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2} + +-# checking volume status [5-7] ++# checking volume status + EXPECT "$V0" volinfo_field $V0 'Volume Name' + EXPECT 'Created' volinfo_field $V0 'Status' + EXPECT '2' brick_count $V0 + +-# test without enabling trash translator [8] ++# test without enabling trash translator + TEST start_vol $V0 $M0 + +-# test on enabling trash translator [9-10] ++# test on enabling trash translator + TEST $CLI volume set $V0 features.trash on + EXPECT 'on' volinfo_field $V0 'features.trash' + +-# files directly under mount point [11] ++# files directly under mount point + create_files $M0/file1 $M0/file2 + TEST file_exists $V0 file1 file2 + +-# perform unlink [12] ++# perform unlink + TEST unlink_op file1 + +-# perform truncate [13] ++# perform truncate + TEST truncate_op file2 4 + +-# create files directory hierarchy and check [14] ++# create files directory hierarchy and check + mkdir -p $M0/1/2/3 + create_files $M0/1/2/3/foo1 $M0/1/2/3/foo2 + TEST file_exists $V0 1/2/3/foo1 1/2/3/foo2 + +-# perform unlink [15] ++# perform unlink + TEST unlink_op 1/2/3/foo1 + +-# perform truncate [16] ++# perform truncate + TEST truncate_op 1/2/3/foo2 4 + + # create a directory for eliminate pattern + mkdir $M0/a + +-# set the eliminate pattern [17-18] ++# set the eliminate pattern + TEST $CLI volume set $V0 features.trash-eliminate-path /a + EXPECT '/a' volinfo_field $V0 'features.trash-eliminate-path' + +-# create two files and check [19] ++# create two files and check + create_files $M0/a/test1 $M0/a/test2 + TEST file_exists $V0 a/test1 a/test2 + +-# remove from eliminate pattern [20] ++# remove from eliminate pattern + rm -f $M0/a/test1 + EXPECT "Y" wildcard_not_exists $M0/.trashcan/a/test1* + +-# truncate from eliminate path [21-23] ++# truncate from eliminate path + truncate -s 2 $M0/a/test2 + TEST [ -e $M0/a/test2 ] + TEST [ `ls -l $M0/a/test2 | awk '{print $5}'` -eq 2 ] + EXPECT "Y" wildcard_not_exists $M0/.trashcan/a/test2* + +-# set internal op on [24-25] ++# set internal op on + TEST $CLI volume set $V0 features.trash-internal-op on + EXPECT 'on' volinfo_field $V0 'features.trash-internal-op' + +-# again create two files and check [26] ++# again create two files and check + create_files $M0/inop1 $M0/inop2 + TEST file_exists $V0 inop1 inop2 + +-# perform unlink [27] ++# perform unlink + TEST unlink_op inop1 + +-# perform truncate [28] ++# perform truncate + TEST truncate_op inop2 4 + +-# remove one brick and restart the volume [28-31] ++# remove one brick and restart the volume + TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}2 force + EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 + TEST $CLI volume stop $V0 + TEST start_vol $V0 $M0 $M0/.trashcan + +-# again create two files and check [33] ++# again create two files and check + create_files $M0/rebal1 $M0/rebal2 + TEST file_exists $V0 rebal1 rebal2 + +-# add one brick [34-35] ++# add one brick + TEST $CLI volume add-brick $V0 $H0:$B0/${V0}3 + TEST [ -d $B0/${V0}3 ] + + +-# perform rebalance [36] ++# perform rebalance + TEST $CLI volume rebalance $V0 start force + EXPECT_WITHIN $REBALANCE_TIMEOUT "0" rebalance_completed + + #Find out which file was migrated to the new brick + file_name=$(ls $B0/${V0}3/rebal*| xargs basename) + +-# check whether rebalance was succesful [37-40] ++# check whether rebalance was succesful + EXPECT "Y" wildcard_exists $B0/${V0}3/$file_name* + EXPECT "Y" wildcard_exists $B0/${V0}1/.trashcan/internal_op/$file_name* + +@@ -201,52 +201,42 @@ EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 + # force required in case rebalance is not over + TEST $CLI volume stop $V0 force + +-# create a replicated volume [41] ++# create a replicated volume + TEST $CLI volume create $V1 replica 2 $H0:$B0/${V1}{1,2} + +-# checking volume status [42-45] ++# checking volume status + EXPECT "$V1" volinfo_field $V1 'Volume Name' + EXPECT 'Replicate' volinfo_field $V1 'Type' + EXPECT 'Created' volinfo_field $V1 'Status' + EXPECT '2' brick_count $V1 + +-# enable trash with options and start the replicate volume by disabling automatic self-heal [46-50] ++# enable trash with options and start the replicate volume by disabling automatic self-heal + TEST $CLI volume set $V1 features.trash on + TEST $CLI volume set $V1 features.trash-internal-op on + EXPECT 'on' volinfo_field $V1 'features.trash' + EXPECT 'on' volinfo_field $V1 'features.trash-internal-op' + TEST start_vol $V1 $M1 $M1/.trashcan + +-# mount and check for trash directory [51] ++# mount and check for trash directory + TEST [ -d $M1/.trashcan/internal_op ] + +-# create a file and check [52] ++# create a file and check + touch $M1/self + TEST [ -e $B0/${V1}1/self -a -e $B0/${V1}2/self ] + +-# kill one brick and delete the file from mount point [53-54] ++# kill one brick and delete the file from mount point + kill_brick $V1 $H0 $B0/${V1}1 + EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "1" online_brick_count + rm -f $M1/self + EXPECT "Y" wildcard_exists $B0/${V1}2/.trashcan/self* + +-# force start the volume and trigger the self-heal manually [55-57] +-TEST $CLI volume start $V1 force +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" online_brick_count +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +-# Since we created the file under root of the volume, it will be +-# healed automatically +- +-# check for the removed file in trashcan [58] +-EXPECT_WITHIN $HEAL_TIMEOUT "Y" wildcard_exists $B0/${V1}1/.trashcan/internal_op/self* +- +-# check renaming of trash directory through cli [59-62] ++# check renaming of trash directory through cli + TEST $CLI volume set $V0 trash-dir abc + TEST start_vol $V0 $M0 $M0/abc + TEST [ -e $M0/abc -a ! -e $M0/.trashcan ] + EXPECT "Y" wildcard_exists $B0/${V0}1/abc/internal_op/rebal* + +-# ensure that rename and delete operation on trash directory fails [63-65] ++# ensure that rename and delete operation on trash directory fails + rm -rf $M0/abc/internal_op + TEST [ -e $M0/abc/internal_op ] + rm -rf $M0/abc/ +diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c +index 90b4f14..6f2da11 100644 +--- a/xlators/cluster/afr/src/afr-common.c ++++ b/xlators/cluster/afr/src/afr-common.c +@@ -47,6 +47,41 @@ afr_quorum_errno(afr_private_t *priv) + return ENOTCONN; + } + ++gf_boolean_t ++afr_is_private_directory(afr_private_t *priv, uuid_t pargfid, const char *name, ++ pid_t pid) ++{ ++ if (!__is_root_gfid(pargfid)) { ++ return _gf_false; ++ } ++ ++ if (strcmp(name, GF_REPLICATE_TRASH_DIR) == 0) { ++ /*For backward compatibility /.landfill is private*/ ++ return _gf_true; ++ } ++ ++ if (pid == GF_CLIENT_PID_GSYNCD) { ++ /*geo-rep needs to create/sync private directory on slave because ++ * it appears in changelog*/ ++ return _gf_false; ++ } ++ ++ if (pid == GF_CLIENT_PID_GLFS_HEAL || pid == GF_CLIENT_PID_SELF_HEALD) { ++ if (strcmp(name, priv->anon_inode_name) == 0) { ++ /* anonymous-inode dir is private*/ ++ return _gf_true; ++ } ++ } else { ++ if (strncmp(name, AFR_ANON_DIR_PREFIX, strlen(AFR_ANON_DIR_PREFIX)) == ++ 0) { ++ /* anonymous-inode dir prefix is private for geo-rep to work*/ ++ return _gf_true; ++ } ++ } ++ ++ return _gf_false; ++} ++ + int + afr_fav_child_reset_sink_xattrs(void *opaque); + +@@ -3301,11 +3336,10 @@ afr_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) + return 0; + } + +- if (__is_root_gfid(loc->parent->gfid)) { +- if (!strcmp(loc->name, GF_REPLICATE_TRASH_DIR)) { +- op_errno = EPERM; +- goto out; +- } ++ if (afr_is_private_directory(this->private, loc->parent->gfid, loc->name, ++ frame->root->pid)) { ++ op_errno = EPERM; ++ goto out; + } + + local = AFR_FRAME_INIT(frame, op_errno); +@@ -4832,6 +4866,7 @@ afr_priv_dump(xlator_t *this) + priv->background_self_heal_count); + gf_proc_dump_write("healers", "%d", priv->healers); + gf_proc_dump_write("read-hash-mode", "%d", priv->hash_mode); ++ gf_proc_dump_write("use-anonymous-inode", "%d", priv->use_anon_inode); + if (priv->quorum_count == AFR_QUORUM_AUTO) { + gf_proc_dump_write("quorum-type", "auto"); + } else if (priv->quorum_count == 0) { +@@ -5792,6 +5827,7 @@ afr_priv_destroy(afr_private_t *priv) + GF_FREE(priv->local); + GF_FREE(priv->pending_key); + GF_FREE(priv->children); ++ GF_FREE(priv->anon_inode); + GF_FREE(priv->child_up); + GF_FREE(priv->child_latency); + LOCK_DESTROY(&priv->lock); +diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c +index 6307b63..d64b6a9 100644 +--- a/xlators/cluster/afr/src/afr-dir-read.c ++++ b/xlators/cluster/afr/src/afr-dir-read.c +@@ -158,8 +158,8 @@ afr_validate_read_subvol(inode_t *inode, xlator_t *this, int par_read_subvol) + } + + static void +-afr_readdir_transform_entries(gf_dirent_t *subvol_entries, int subvol, +- gf_dirent_t *entries, fd_t *fd) ++afr_readdir_transform_entries(call_frame_t *frame, gf_dirent_t *subvol_entries, ++ int subvol, gf_dirent_t *entries, fd_t *fd) + { + int ret = -1; + gf_dirent_t *entry = NULL; +@@ -177,8 +177,8 @@ afr_readdir_transform_entries(gf_dirent_t *subvol_entries, int subvol, + + list_for_each_entry_safe(entry, tmp, &subvol_entries->list, list) + { +- if (__is_root_gfid(fd->inode->gfid) && +- !strcmp(entry->d_name, GF_REPLICATE_TRASH_DIR)) { ++ if (afr_is_private_directory(priv, fd->inode->gfid, entry->d_name, ++ frame->root->pid)) { + continue; + } + +@@ -222,8 +222,8 @@ afr_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + } + + if (op_ret >= 0) +- afr_readdir_transform_entries(subvol_entries, (long)cookie, &entries, +- local->fd); ++ afr_readdir_transform_entries(frame, subvol_entries, (long)cookie, ++ &entries, local->fd); + + AFR_STACK_UNWIND(readdir, frame, op_ret, op_errno, &entries, xdata); + +diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c +index 9b6575f..0a8a7fd 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-common.c ++++ b/xlators/cluster/afr/src/afr-self-heal-common.c +@@ -2753,3 +2753,185 @@ afr_choose_source_by_policy(afr_private_t *priv, unsigned char *sources, + out: + return source; + } ++ ++static int ++afr_anon_inode_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, inode_t *inode, ++ struct iatt *buf, struct iatt *preparent, ++ struct iatt *postparent, dict_t *xdata) ++{ ++ afr_local_t *local = frame->local; ++ int i = (long)cookie; ++ ++ local->replies[i].valid = 1; ++ local->replies[i].op_ret = op_ret; ++ local->replies[i].op_errno = op_errno; ++ if (op_ret == 0) { ++ local->op_ret = 0; ++ local->replies[i].poststat = *buf; ++ local->replies[i].preparent = *preparent; ++ local->replies[i].postparent = *postparent; ++ } ++ if (xdata) { ++ local->replies[i].xdata = dict_ref(xdata); ++ } ++ ++ syncbarrier_wake(&local->barrier); ++ return 0; ++} ++ ++int ++afr_anon_inode_create(xlator_t *this, int child, inode_t **linked_inode) ++{ ++ call_frame_t *frame = NULL; ++ afr_local_t *local = NULL; ++ afr_private_t *priv = this->private; ++ unsigned char *mkdir_on = alloca0(priv->child_count); ++ unsigned char *lookup_on = alloca0(priv->child_count); ++ loc_t loc = {0}; ++ int32_t op_errno = 0; ++ int32_t child_op_errno = 0; ++ struct iatt iatt = {0}; ++ dict_t *xdata = NULL; ++ uuid_t anon_inode_gfid = {0}; ++ int mkdir_count = 0; ++ int i = 0; ++ ++ /*Try to mkdir everywhere and return success if the dir exists on 'child' ++ */ ++ ++ if (!priv->use_anon_inode) { ++ op_errno = EINVAL; ++ goto out; ++ } ++ ++ frame = afr_frame_create(this, &op_errno); ++ if (op_errno) { ++ goto out; ++ } ++ local = frame->local; ++ if (!local->child_up[child]) { ++ /*Other bricks may need mkdir so don't error out yet*/ ++ child_op_errno = ENOTCONN; ++ } ++ gf_uuid_parse(priv->anon_gfid_str, anon_inode_gfid); ++ for (i = 0; i < priv->child_count; i++) { ++ if (!local->child_up[i]) ++ continue; ++ ++ if (priv->anon_inode[i]) { ++ mkdir_on[i] = 0; ++ } else { ++ mkdir_on[i] = 1; ++ mkdir_count++; ++ } ++ } ++ ++ if (mkdir_count == 0) { ++ *linked_inode = inode_find(this->itable, anon_inode_gfid); ++ if (*linked_inode) { ++ op_errno = 0; ++ goto out; ++ } ++ } ++ ++ loc.parent = inode_ref(this->itable->root); ++ loc.name = priv->anon_inode_name; ++ loc.inode = inode_new(this->itable); ++ if (!loc.inode) { ++ op_errno = ENOMEM; ++ goto out; ++ } ++ ++ xdata = dict_new(); ++ if (!xdata) { ++ op_errno = ENOMEM; ++ goto out; ++ } ++ ++ op_errno = -dict_set_gfuuid(xdata, "gfid-req", anon_inode_gfid, _gf_true); ++ if (op_errno) { ++ goto out; ++ } ++ ++ if (mkdir_count == 0) { ++ memcpy(lookup_on, local->child_up, priv->child_count); ++ goto lookup; ++ } ++ ++ AFR_ONLIST(mkdir_on, frame, afr_anon_inode_mkdir_cbk, mkdir, &loc, 0755, 0, ++ xdata); ++ ++ for (i = 0; i < priv->child_count; i++) { ++ if (!mkdir_on[i]) { ++ continue; ++ } ++ ++ if (local->replies[i].op_ret == 0) { ++ priv->anon_inode[i] = 1; ++ iatt = local->replies[i].poststat; ++ } else if (local->replies[i].op_ret < 0 && ++ local->replies[i].op_errno == EEXIST) { ++ lookup_on[i] = 1; ++ } else if (i == child) { ++ child_op_errno = local->replies[i].op_errno; ++ } ++ } ++ ++ if (AFR_COUNT(lookup_on, priv->child_count) == 0) { ++ goto link; ++ } ++ ++lookup: ++ AFR_ONLIST(lookup_on, frame, afr_selfheal_discover_cbk, lookup, &loc, ++ xdata); ++ for (i = 0; i < priv->child_count; i++) { ++ if (!lookup_on[i]) { ++ continue; ++ } ++ ++ if (local->replies[i].op_ret == 0) { ++ if (gf_uuid_compare(anon_inode_gfid, ++ local->replies[i].poststat.ia_gfid) == 0) { ++ priv->anon_inode[i] = 1; ++ iatt = local->replies[i].poststat; ++ } else { ++ if (i == child) ++ child_op_errno = EINVAL; ++ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_DATA, ++ "%s has gfid: %s", priv->anon_inode_name, ++ uuid_utoa(local->replies[i].poststat.ia_gfid)); ++ } ++ } else if (i == child) { ++ child_op_errno = local->replies[i].op_errno; ++ } ++ } ++link: ++ if (!gf_uuid_is_null(iatt.ia_gfid)) { ++ *linked_inode = inode_link(loc.inode, loc.parent, loc.name, &iatt); ++ if (*linked_inode) { ++ op_errno = 0; ++ inode_lookup(*linked_inode); ++ } else { ++ op_errno = ENOMEM; ++ } ++ goto out; ++ } ++ ++out: ++ if (xdata) ++ dict_unref(xdata); ++ loc_wipe(&loc); ++ /*child_op_errno takes precedence*/ ++ if (child_op_errno == 0) { ++ child_op_errno = op_errno; ++ } ++ ++ if (child_op_errno && *linked_inode) { ++ inode_unref(*linked_inode); ++ *linked_inode = NULL; ++ } ++ if (frame) ++ AFR_STACK_DESTROY(frame); ++ return -child_op_errno; ++} +diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c +index 00b5b2d..20b07dd 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-entry.c ++++ b/xlators/cluster/afr/src/afr-self-heal-entry.c +@@ -16,54 +16,170 @@ + #include + #include + +-static int +-afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name, +- inode_t *inode, int child, struct afr_reply *replies) ++int ++afr_selfheal_entry_anon_inode(xlator_t *this, inode_t *dir, const char *name, ++ inode_t *inode, int child, ++ struct afr_reply *replies, ++ gf_boolean_t *anon_inode) + { + afr_private_t *priv = NULL; ++ afr_local_t *local = NULL; + xlator_t *subvol = NULL; + int ret = 0; ++ int i = 0; ++ char g[64] = {0}; ++ unsigned char *lookup_success = NULL; ++ call_frame_t *frame = NULL; ++ loc_t loc2 = { ++ 0, ++ }; + loc_t loc = { + 0, + }; +- char g[64]; + + priv = this->private; +- + subvol = priv->children[child]; ++ lookup_success = alloca0(priv->child_count); ++ uuid_utoa_r(replies[child].poststat.ia_gfid, g); ++ loc.inode = inode_new(inode->table); ++ if (!loc.inode) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ ++ if (replies[child].poststat.ia_type == IA_IFDIR) { ++ /* This directory may have sub-directory hierarchy which may need to ++ * be preserved for subsequent heals. So unconditionally move the ++ * directory to anonymous-inode directory*/ ++ *anon_inode = _gf_true; ++ goto anon_inode; ++ } ++ ++ frame = afr_frame_create(this, &ret); ++ if (!frame) { ++ ret = -ret; ++ goto out; ++ } ++ local = frame->local; ++ gf_uuid_copy(loc.gfid, replies[child].poststat.ia_gfid); ++ AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup, &loc, ++ NULL); ++ for (i = 0; i < priv->child_count; i++) { ++ if (local->replies[i].op_ret == 0) { ++ lookup_success[i] = 1; ++ } else if (local->replies[i].op_errno != ENOENT && ++ local->replies[i].op_errno != ESTALE) { ++ ret = -local->replies[i].op_errno; ++ } ++ } ++ ++ if (priv->quorum_count) { ++ if (afr_has_quorum(lookup_success, this, NULL)) { ++ *anon_inode = _gf_true; ++ } ++ } else if (AFR_COUNT(lookup_success, priv->child_count) > 1) { ++ *anon_inode = _gf_true; ++ } else if (ret) { ++ goto out; ++ } ++ ++anon_inode: ++ if (!*anon_inode) { ++ ret = 0; ++ goto out; ++ } + + loc.parent = inode_ref(dir); + gf_uuid_copy(loc.pargfid, dir->gfid); + loc.name = name; +- loc.inode = inode_ref(inode); + +- if (replies[child].valid && replies[child].op_ret == 0) { +- switch (replies[child].poststat.ia_type) { +- case IA_IFDIR: +- gf_msg(this->name, GF_LOG_WARNING, 0, +- AFR_MSG_EXPUNGING_FILE_OR_DIR, +- "expunging dir %s/%s (%s) on %s", uuid_utoa(dir->gfid), +- name, uuid_utoa_r(replies[child].poststat.ia_gfid, g), +- subvol->name); +- ret = syncop_rmdir(subvol, &loc, 1, NULL, NULL); +- break; +- default: +- gf_msg(this->name, GF_LOG_WARNING, 0, +- AFR_MSG_EXPUNGING_FILE_OR_DIR, +- "expunging file %s/%s (%s) on %s", uuid_utoa(dir->gfid), +- name, uuid_utoa_r(replies[child].poststat.ia_gfid, g), +- subvol->name); +- ret = syncop_unlink(subvol, &loc, NULL, NULL); +- break; +- } ++ ret = afr_anon_inode_create(this, child, &loc2.parent); ++ if (ret < 0) ++ goto out; ++ ++ loc2.name = g; ++ ret = syncop_rename(subvol, &loc, &loc2, NULL, NULL); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_WARNING, -ret, AFR_MSG_EXPUNGING_FILE_OR_DIR, ++ "Rename to %s dir %s/%s (%s) on %s failed", ++ priv->anon_inode_name, uuid_utoa(dir->gfid), name, g, ++ subvol->name); ++ } else { ++ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR, ++ "Rename to %s dir %s/%s (%s) on %s successful", ++ priv->anon_inode_name, uuid_utoa(dir->gfid), name, g, ++ subvol->name); + } + ++out: + loc_wipe(&loc); ++ loc_wipe(&loc2); ++ if (frame) { ++ AFR_STACK_DESTROY(frame); ++ } + + return ret; + } + + int ++afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name, ++ inode_t *inode, int child, struct afr_reply *replies) ++{ ++ char g[64] = {0}; ++ afr_private_t *priv = NULL; ++ xlator_t *subvol = NULL; ++ int ret = 0; ++ loc_t loc = { ++ 0, ++ }; ++ gf_boolean_t anon_inode = _gf_false; ++ ++ priv = this->private; ++ subvol = priv->children[child]; ++ ++ if ((!replies[child].valid) || (replies[child].op_ret < 0)) { ++ /*Nothing to do*/ ++ ret = 0; ++ goto out; ++ } ++ ++ if (priv->use_anon_inode) { ++ ret = afr_selfheal_entry_anon_inode(this, dir, name, inode, child, ++ replies, &anon_inode); ++ if (ret < 0 || anon_inode) ++ goto out; ++ } ++ ++ loc.parent = inode_ref(dir); ++ loc.inode = inode_new(inode->table); ++ if (!loc.inode) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ loc.name = name; ++ switch (replies[child].poststat.ia_type) { ++ case IA_IFDIR: ++ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR, ++ "expunging dir %s/%s (%s) on %s", uuid_utoa(dir->gfid), name, ++ uuid_utoa_r(replies[child].poststat.ia_gfid, g), ++ subvol->name); ++ ret = syncop_rmdir(subvol, &loc, 1, NULL, NULL); ++ break; ++ default: ++ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR, ++ "expunging file %s/%s (%s) on %s", uuid_utoa(dir->gfid), ++ name, uuid_utoa_r(replies[child].poststat.ia_gfid, g), ++ subvol->name); ++ ret = syncop_unlink(subvol, &loc, NULL, NULL); ++ break; ++ } ++ ++out: ++ loc_wipe(&loc); ++ return ret; ++} ++ ++int + afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source, + unsigned char *sources, inode_t *dir, + const char *name, inode_t *inode, +@@ -76,6 +192,9 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source, + loc_t srcloc = { + 0, + }; ++ loc_t anonloc = { ++ 0, ++ }; + xlator_t *this = frame->this; + afr_private_t *priv = NULL; + dict_t *xdata = NULL; +@@ -86,15 +205,18 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source, + 0, + }; + unsigned char *newentry = NULL; ++ char iatt_uuid_str[64] = {0}; ++ char dir_uuid_str[64] = {0}; + + priv = this->private; + iatt = &replies[source].poststat; ++ uuid_utoa_r(iatt->ia_gfid, iatt_uuid_str); + if (iatt->ia_type == IA_INVAL || gf_uuid_is_null(iatt->ia_gfid)) { + gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SELF_HEAL_FAILED, + "Invalid ia_type (%d) or gfid(%s). source brick=%d, " + "pargfid=%s, name=%s", +- iatt->ia_type, uuid_utoa(iatt->ia_gfid), source, +- uuid_utoa(dir->gfid), name); ++ iatt->ia_type, iatt_uuid_str, source, ++ uuid_utoa_r(dir->gfid, dir_uuid_str), name); + ret = -EINVAL; + goto out; + } +@@ -119,14 +241,24 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source, + + srcloc.inode = inode_ref(inode); + gf_uuid_copy(srcloc.gfid, iatt->ia_gfid); +- if (iatt->ia_type != IA_IFDIR) +- ret = syncop_lookup(priv->children[dst], &srcloc, 0, 0, 0, 0); +- if (iatt->ia_type == IA_IFDIR || ret == -ENOENT || ret == -ESTALE) { ++ ret = syncop_lookup(priv->children[dst], &srcloc, 0, 0, 0, 0); ++ if (ret == -ENOENT || ret == -ESTALE) { + newentry[dst] = 1; + ret = afr_selfheal_newentry_mark(frame, this, inode, source, replies, + sources, newentry); + if (ret) + goto out; ++ } else if (ret == 0 && iatt->ia_type == IA_IFDIR && priv->use_anon_inode) { ++ // Try rename from hidden directory ++ ret = afr_anon_inode_create(this, dst, &anonloc.parent); ++ if (ret < 0) ++ goto out; ++ anonloc.inode = inode_ref(inode); ++ anonloc.name = iatt_uuid_str; ++ ret = syncop_rename(priv->children[dst], &anonloc, &loc, NULL, NULL); ++ if (ret == -ENOENT || ret == -ESTALE) ++ ret = -1; /*This sets 'mismatch' to true*/ ++ goto out; + } + + mode = st_mode_from_ia(iatt->ia_prot, iatt->ia_type); +@@ -165,6 +297,7 @@ out: + GF_FREE(linkname); + loc_wipe(&loc); + loc_wipe(&srcloc); ++ loc_wipe(&anonloc); + return ret; + } + +@@ -580,6 +713,11 @@ afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd, + + priv = this->private; + ++ if (afr_is_private_directory(priv, fd->inode->gfid, name, ++ GF_CLIENT_PID_SELF_HEALD)) { ++ return 0; ++ } ++ + xattr = dict_new(); + if (!xattr) + return -ENOMEM; +@@ -628,7 +766,7 @@ afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd, + replies); + + if ((ret == 0) && (priv->esh_granular) && parent_idx_inode) { +- ret = afr_shd_index_purge(subvol, parent_idx_inode, name, ++ ret = afr_shd_entry_purge(subvol, parent_idx_inode, name, + inode->ia_type); + /* Why is ret force-set to 0? We do not care about + * index purge failing for full heal as it is quite +@@ -758,10 +896,6 @@ afr_selfheal_entry_do_subvol(call_frame_t *frame, xlator_t *this, fd_t *fd, + if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..")) + continue; + +- if (__is_root_gfid(fd->inode->gfid) && +- !strcmp(entry->d_name, GF_REPLICATE_TRASH_DIR)) +- continue; +- + ret = afr_selfheal_entry_dirent(iter_frame, this, fd, entry->d_name, + loc.inode, subvol, + local->need_full_crawl); +@@ -824,7 +958,7 @@ afr_selfheal_entry_granular_dirent(xlator_t *subvol, gf_dirent_t *entry, + /* The name indices under the pgfid index dir are guaranteed + * to be regular files. Hence the hardcoding. + */ +- afr_shd_index_purge(subvol, parent->inode, entry->d_name, IA_IFREG); ++ afr_shd_entry_purge(subvol, parent->inode, entry->d_name, IA_IFREG); + ret = 0; + goto out; + } +diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c +index dace071..51e3d8c 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-name.c ++++ b/xlators/cluster/afr/src/afr-self-heal-name.c +@@ -98,21 +98,12 @@ __afr_selfheal_name_expunge(xlator_t *this, inode_t *parent, uuid_t pargfid, + const char *bname, inode_t *inode, + struct afr_reply *replies) + { +- loc_t loc = { +- 0, +- }; + int i = 0; + afr_private_t *priv = NULL; +- char g[64]; + int ret = 0; + + priv = this->private; + +- loc.parent = inode_ref(parent); +- gf_uuid_copy(loc.pargfid, pargfid); +- loc.name = bname; +- loc.inode = inode_ref(inode); +- + for (i = 0; i < priv->child_count; i++) { + if (!replies[i].valid) + continue; +@@ -120,30 +111,10 @@ __afr_selfheal_name_expunge(xlator_t *this, inode_t *parent, uuid_t pargfid, + if (replies[i].op_ret) + continue; + +- switch (replies[i].poststat.ia_type) { +- case IA_IFDIR: +- gf_msg(this->name, GF_LOG_WARNING, 0, +- AFR_MSG_EXPUNGING_FILE_OR_DIR, +- "expunging dir %s/%s (%s) on %s", uuid_utoa(pargfid), +- bname, uuid_utoa_r(replies[i].poststat.ia_gfid, g), +- priv->children[i]->name); +- +- ret |= syncop_rmdir(priv->children[i], &loc, 1, NULL, NULL); +- break; +- default: +- gf_msg(this->name, GF_LOG_WARNING, 0, +- AFR_MSG_EXPUNGING_FILE_OR_DIR, +- "expunging file %s/%s (%s) on %s", uuid_utoa(pargfid), +- bname, uuid_utoa_r(replies[i].poststat.ia_gfid, g), +- priv->children[i]->name); +- +- ret |= syncop_unlink(priv->children[i], &loc, NULL, NULL); +- break; +- } ++ ret |= afr_selfheal_entry_delete(this, parent, bname, inode, i, ++ replies); + } + +- loc_wipe(&loc); +- + return ret; + } + +diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h +index 8f6fb00..c8dc384 100644 +--- a/xlators/cluster/afr/src/afr-self-heal.h ++++ b/xlators/cluster/afr/src/afr-self-heal.h +@@ -370,4 +370,9 @@ gf_boolean_t + afr_is_file_empty_on_all_children(afr_private_t *priv, + struct afr_reply *replies); + ++int ++afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name, ++ inode_t *inode, int child, struct afr_reply *replies); ++int ++afr_anon_inode_create(xlator_t *this, int child, inode_t **linked_inode); + #endif /* !_AFR_SELFHEAL_H */ +diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c +index 95ac5f2..939a135 100644 +--- a/xlators/cluster/afr/src/afr-self-heald.c ++++ b/xlators/cluster/afr/src/afr-self-heald.c +@@ -222,7 +222,7 @@ out: + } + + int +-afr_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name, ++afr_shd_entry_purge(xlator_t *subvol, inode_t *inode, char *name, + ia_type_t type) + { + int ret = 0; +@@ -422,7 +422,7 @@ afr_shd_index_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, + ret = afr_shd_selfheal(healer, healer->subvol, gfid); + + if (ret == -ENOENT || ret == -ESTALE) +- afr_shd_index_purge(subvol, parent->inode, entry->d_name, val); ++ afr_shd_entry_purge(subvol, parent->inode, entry->d_name, val); + + if (ret == 2) + /* If bricks crashed in pre-op after creating indices/xattrop +@@ -798,6 +798,176 @@ afr_bricks_available_for_heal(afr_private_t *priv) + return _gf_true; + } + ++static int ++afr_shd_anon_inode_cleaner(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, ++ void *data) ++{ ++ struct subvol_healer *healer = data; ++ afr_private_t *priv = healer->this->private; ++ call_frame_t *frame = NULL; ++ afr_local_t *local = NULL; ++ int ret = 0; ++ loc_t loc = {0}; ++ int count = 0; ++ int i = 0; ++ int op_errno = 0; ++ struct iatt *iatt = NULL; ++ gf_boolean_t multiple_links = _gf_false; ++ unsigned char *gfid_present = alloca0(priv->child_count); ++ unsigned char *entry_present = alloca0(priv->child_count); ++ char *type = "file"; ++ ++ frame = afr_frame_create(healer->this, &ret); ++ if (!frame) { ++ ret = -ret; ++ goto out; ++ } ++ local = frame->local; ++ if (AFR_COUNT(local->child_up, priv->child_count) != priv->child_count) { ++ gf_msg_debug(healer->this->name, 0, ++ "Not all bricks are up. Skipping " ++ "cleanup of %s on %s", ++ entry->d_name, subvol->name); ++ ret = 0; ++ goto out; ++ } ++ ++ loc.inode = inode_new(parent->inode->table); ++ if (!loc.inode) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ ret = gf_uuid_parse(entry->d_name, loc.gfid); ++ if (ret) { ++ ret = 0; ++ goto out; ++ } ++ AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup, &loc, ++ NULL); ++ for (i = 0; i < priv->child_count; i++) { ++ if (local->replies[i].op_ret == 0) { ++ count++; ++ gfid_present[i] = 1; ++ iatt = &local->replies[i].poststat; ++ if (iatt->ia_type == IA_IFDIR) { ++ type = "dir"; ++ } ++ ++ if (i == healer->subvol) { ++ if (local->replies[i].poststat.ia_nlink > 1) { ++ multiple_links = _gf_true; ++ } ++ } ++ } else if (local->replies[i].op_errno != ENOENT && ++ local->replies[i].op_errno != ESTALE) { ++ /*We don't have complete view. Skip the entry*/ ++ gf_msg_debug(healer->this->name, local->replies[i].op_errno, ++ "Skipping cleanup of %s on %s", entry->d_name, ++ subvol->name); ++ ret = 0; ++ goto out; ++ } ++ } ++ ++ /*Inode is deleted from subvol*/ ++ if (count == 1 || (iatt->ia_type != IA_IFDIR && multiple_links)) { ++ gf_msg(healer->this->name, GF_LOG_WARNING, 0, ++ AFR_MSG_EXPUNGING_FILE_OR_DIR, "expunging %s %s/%s on %s", type, ++ priv->anon_inode_name, entry->d_name, subvol->name); ++ ret = afr_shd_entry_purge(subvol, parent->inode, entry->d_name, ++ iatt->ia_type); ++ if (ret == -ENOENT || ret == -ESTALE) ++ ret = 0; ++ } else if (count > 1) { ++ loc_wipe(&loc); ++ loc.parent = inode_ref(parent->inode); ++ loc.name = entry->d_name; ++ loc.inode = inode_new(parent->inode->table); ++ if (!loc.inode) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup, ++ &loc, NULL); ++ count = 0; ++ for (i = 0; i < priv->child_count; i++) { ++ if (local->replies[i].op_ret == 0) { ++ count++; ++ entry_present[i] = 1; ++ iatt = &local->replies[i].poststat; ++ } else if (local->replies[i].op_errno != ENOENT && ++ local->replies[i].op_errno != ESTALE) { ++ /*We don't have complete view. Skip the entry*/ ++ gf_msg_debug(healer->this->name, local->replies[i].op_errno, ++ "Skipping cleanup of %s on %s", entry->d_name, ++ subvol->name); ++ ret = 0; ++ goto out; ++ } ++ } ++ for (i = 0; i < priv->child_count; i++) { ++ if (gfid_present[i] && !entry_present[i]) { ++ /*Entry is not anonymous on at least one subvol*/ ++ gf_msg_debug(healer->this->name, 0, ++ "Valid entry present on %s " ++ "Skipping cleanup of %s on %s", ++ priv->children[i]->name, entry->d_name, ++ subvol->name); ++ ret = 0; ++ goto out; ++ } ++ } ++ ++ gf_msg(healer->this->name, GF_LOG_WARNING, 0, ++ AFR_MSG_EXPUNGING_FILE_OR_DIR, ++ "expunging %s %s/%s on all subvols", type, priv->anon_inode_name, ++ entry->d_name); ++ ret = 0; ++ for (i = 0; i < priv->child_count; i++) { ++ op_errno = -afr_shd_entry_purge(priv->children[i], loc.parent, ++ entry->d_name, iatt->ia_type); ++ if (op_errno != ENOENT && op_errno != ESTALE) { ++ ret |= -op_errno; ++ } ++ } ++ } ++ ++out: ++ if (frame) ++ AFR_STACK_DESTROY(frame); ++ loc_wipe(&loc); ++ return ret; ++} ++ ++static void ++afr_cleanup_anon_inode_dir(struct subvol_healer *healer) ++{ ++ int ret = 0; ++ call_frame_t *frame = NULL; ++ afr_private_t *priv = healer->this->private; ++ loc_t loc = {0}; ++ ++ ret = afr_anon_inode_create(healer->this, healer->subvol, &loc.inode); ++ if (ret) ++ goto out; ++ ++ frame = afr_frame_create(healer->this, &ret); ++ if (!frame) { ++ ret = -ret; ++ goto out; ++ } ++ ++ ret = syncop_mt_dir_scan(frame, priv->children[healer->subvol], &loc, ++ GF_CLIENT_PID_SELF_HEALD, healer, ++ afr_shd_anon_inode_cleaner, NULL, ++ priv->shd.max_threads, priv->shd.wait_qlength); ++out: ++ if (frame) ++ AFR_STACK_DESTROY(frame); ++ loc_wipe(&loc); ++ return; ++} ++ + void * + afr_shd_index_healer(void *data) + { +@@ -854,6 +1024,10 @@ afr_shd_index_healer(void *data) + sleep(1); + } while (ret > 0); + ++ if (ret == 0) { ++ afr_cleanup_anon_inode_dir(healer); ++ } ++ + if (pre_crawl_xdata && !healer->crawl_event.heal_failed_count) { + afr_shd_ta_check_and_unset_xattrs(this, &loc, healer, + pre_crawl_xdata); +diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h +index 1990539..acd567e 100644 +--- a/xlators/cluster/afr/src/afr-self-heald.h ++++ b/xlators/cluster/afr/src/afr-self-heald.h +@@ -70,6 +70,6 @@ afr_shd_gfid_to_path(xlator_t *this, xlator_t *subvol, uuid_t gfid, + char **path_p); + + int +-afr_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name, ++afr_shd_entry_purge(xlator_t *subvol, inode_t *inode, char *name, + ia_type_t type); + #endif /* !_AFR_SELF_HEALD_H */ +diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c +index bfa464f..33fe4d8 100644 +--- a/xlators/cluster/afr/src/afr.c ++++ b/xlators/cluster/afr/src/afr.c +@@ -135,6 +135,27 @@ set_data_self_heal_algorithm(afr_private_t *priv, char *algo) + } + } + ++void ++afr_handle_anon_inode_options(afr_private_t *priv, dict_t *options) ++{ ++ char *volfile_id_str = NULL; ++ uuid_t anon_inode_gfid = {0}; ++ ++ /*If volume id is not present don't enable anything*/ ++ if (dict_get_str(options, "volume-id", &volfile_id_str)) ++ return; ++ GF_ASSERT(strlen(AFR_ANON_DIR_PREFIX) + strlen(volfile_id_str) <= NAME_MAX); ++ /*anon_inode_name is not supposed to change once assigned*/ ++ if (!priv->anon_inode_name[0]) { ++ snprintf(priv->anon_inode_name, sizeof(priv->anon_inode_name), "%s-%s", ++ AFR_ANON_DIR_PREFIX, volfile_id_str); ++ gf_uuid_parse(volfile_id_str, anon_inode_gfid); ++ /*Flip a bit to make sure volfile-id and anon-gfid are not same*/ ++ anon_inode_gfid[0] ^= 1; ++ uuid_utoa_r(anon_inode_gfid, priv->anon_gfid_str); ++ } ++} ++ + int + reconfigure(xlator_t *this, dict_t *options) + { +@@ -287,6 +308,10 @@ reconfigure(xlator_t *this, dict_t *options) + consistent_io = _gf_false; + priv->consistent_io = consistent_io; + ++ afr_handle_anon_inode_options(priv, options); ++ ++ GF_OPTION_RECONF("use-anonymous-inode", priv->use_anon_inode, options, bool, ++ out); + if (priv->shd.enabled) { + if ((priv->shd.enabled != enabled_old) || + (timeout_old != priv->shd.timeout)) +@@ -535,7 +560,9 @@ init(xlator_t *this) + + GF_OPTION_INIT("consistent-metadata", priv->consistent_metadata, bool, out); + GF_OPTION_INIT("consistent-io", priv->consistent_io, bool, out); ++ afr_handle_anon_inode_options(priv, this->options); + ++ GF_OPTION_INIT("use-anonymous-inode", priv->use_anon_inode, bool, out); + if (priv->quorum_count != 0) + priv->consistent_io = _gf_false; + +@@ -547,13 +574,16 @@ init(xlator_t *this) + goto out; + } + ++ priv->anon_inode = GF_CALLOC(sizeof(unsigned char), child_count, ++ gf_afr_mt_char); ++ + priv->child_up = GF_CALLOC(sizeof(unsigned char), child_count, + gf_afr_mt_char); + + priv->child_latency = GF_MALLOC(sizeof(*priv->child_latency) * child_count, + gf_afr_mt_child_latency_t); + +- if (!priv->child_up || !priv->child_latency) { ++ if (!priv->child_up || !priv->child_latency || !priv->anon_inode) { + ret = -ENOMEM; + goto out; + } +@@ -1218,6 +1248,14 @@ struct volume_options options[] = { + .tags = {"replicate"}, + .description = "This option exists only for backward compatibility " + "and configuring it doesn't have any effect"}, ++ {.key = {"use-anonymous-inode"}, ++ .type = GF_OPTION_TYPE_BOOL, ++ .default_value = "no", ++ .op_version = {GD_OP_VERSION_7_0}, ++ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE, ++ .tags = {"replicate"}, ++ .description = "Setting this option heals directory renames efficiently"}, ++ + {.key = {NULL}}, + }; + +diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h +index 3a2b26d..6a9a763 100644 +--- a/xlators/cluster/afr/src/afr.h ++++ b/xlators/cluster/afr/src/afr.h +@@ -40,6 +40,8 @@ + #define AFR_TA_DOM_MODIFY "afr.ta.dom-modify" + + #define AFR_HALO_MAX_LATENCY 99999 ++#define AFR_ANON_DIR_PREFIX ".glusterfs-anonymous-inode" ++ + + #define PFLAG_PENDING (1 << 0) + #define PFLAG_SBRAIN (1 << 1) +@@ -155,6 +157,7 @@ typedef struct _afr_private { + struct list_head ta_waitq; + struct list_head ta_onwireq; + ++ unsigned char *anon_inode; + unsigned char *child_up; + int64_t *child_latency; + unsigned char *local; +@@ -240,6 +243,11 @@ typedef struct _afr_private { + gf_boolean_t esh_granular; + gf_boolean_t consistent_io; + gf_boolean_t data_self_heal; /* on/off */ ++ gf_boolean_t use_anon_inode; ++ ++ /*For anon-inode handling */ ++ char anon_inode_name[NAME_MAX + 1]; ++ char anon_gfid_str[UUID_SIZE + 1]; + } afr_private_t; + + typedef enum { +@@ -1341,4 +1349,7 @@ afr_selfheal_childup(xlator_t *this, afr_private_t *priv); + void + afr_fill_success_replies(afr_local_t *local, afr_private_t *priv, + unsigned char *replies); ++gf_boolean_t ++afr_is_private_directory(afr_private_t *priv, uuid_t pargfid, const char *name, ++ pid_t pid); + #endif /* __AFR_H__ */ +diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c +index 094a71f..1920284 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c +@@ -3867,6 +3867,38 @@ out: + } + + static int ++set_volfile_id_option(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, ++ int clusters) ++{ ++ xlator_t *xlator = NULL; ++ int i = 0; ++ int ret = -1; ++ glusterd_conf_t *conf = NULL; ++ xlator_t *this = NULL; ++ ++ this = THIS; ++ GF_VALIDATE_OR_GOTO("glusterd", this, out); ++ conf = this->private; ++ GF_VALIDATE_OR_GOTO(this->name, conf, out); ++ ++ if (conf->op_version < GD_OP_VERSION_7_1) ++ return 0; ++ xlator = first_of(graph); ++ ++ for (i = 0; i < clusters; i++) { ++ ret = xlator_set_fixed_option(xlator, "volume-id", ++ uuid_utoa(volinfo->volume_id)); ++ if (ret) ++ goto out; ++ ++ xlator = xlator->next; ++ } ++ ++out: ++ return ret; ++} ++ ++static int + volgen_graph_build_afr_clusters(volgen_graph_t *graph, + glusterd_volinfo_t *volinfo) + { +@@ -3906,6 +3938,13 @@ volgen_graph_build_afr_clusters(volgen_graph_t *graph, + clusters = -1; + goto out; + } ++ ++ ret = set_volfile_id_option(graph, volinfo, clusters); ++ if (ret) { ++ clusters = -1; ++ goto out; ++ } ++ + if (!volinfo->arbiter_count) + goto out; + +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index 62acadf..c1ca190 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -3789,4 +3789,10 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .voltype = "features/cloudsync", + .op_version = GD_OP_VERSION_7_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, ++ ++ {.key = "cluster.use-anonymous-inode", ++ .voltype = "cluster/replicate", ++ .op_version = GD_OP_VERSION_7_1, ++ .value = "yes", ++ .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = NULL}}; +-- +1.8.3.1 + diff --git a/SOURCES/0516-afr-return-EIO-for-gfid-split-brains.patch b/SOURCES/0516-afr-return-EIO-for-gfid-split-brains.patch new file mode 100644 index 0000000..0f6249e --- /dev/null +++ b/SOURCES/0516-afr-return-EIO-for-gfid-split-brains.patch @@ -0,0 +1,338 @@ +From 8d24d891aade910b0bb86b27c25a8d2382e19ba0 Mon Sep 17 00:00:00 2001 +From: karthik-us +Date: Tue, 15 Dec 2020 15:04:19 +0530 +Subject: [PATCH 516/517] afr: return -EIO for gfid split-brains. + +Problem: +entry-self-heal-anon-dir-off.t was failing occasionally because +afr_gfid_split_brain_source() returned -1 instead of -EIO for +split-brains, causing the code to proceed to afr_lookup_done(), which +in turn succeeded the lookup if there was a parallel client side heal +going on. + +Fix: +Return -EIO instead of -1 so that lookp fails. + +Also, afr_selfheal_name() was using the same dict to get and set values. This +could be problematic if the caller passed local->xdata_req, since +setting a response in a request dict can lead to bugs.So changed it to use +separate request and response dicts. + +Upstream patch details: +> Fixes: #1739 +> Credits Pranith Karampuri +> Signed-off-by: Ravishankar N +>Change-Id: I5cb4c547fb25e6bfc8bec1740f7eb64e1a5ad443 +Upstream patch: https://github.com/gluster/glusterfs/pull/1819/ + +BUG: 1640148 +Signed-off-by: karthik-us +Change-Id: I5cb4c547fb25e6bfc8bec1740f7eb64e1a5ad443 +Reviewed-on: https://code.engineering.redhat.com/gerrit/221209 +Tested-by: RHGS Build Bot +Reviewed-by: Ravishankar Narayanankutty +--- + xlators/cluster/afr/src/afr-common.c | 12 ++++++++---- + xlators/cluster/afr/src/afr-self-heal-common.c | 27 +++++++++++++------------- + xlators/cluster/afr/src/afr-self-heal-entry.c | 8 ++++---- + xlators/cluster/afr/src/afr-self-heal-name.c | 23 +++++++++++----------- + xlators/cluster/afr/src/afr-self-heal.h | 5 +++-- + xlators/cluster/afr/src/afr-self-heald.c | 2 +- + 6 files changed, 42 insertions(+), 35 deletions(-) + +diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c +index 6f2da11..416012c 100644 +--- a/xlators/cluster/afr/src/afr-common.c ++++ b/xlators/cluster/afr/src/afr-common.c +@@ -2366,7 +2366,7 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this) + /* If we were called from glfsheal and there is still a gfid + * mismatch, succeed the lookup and let glfsheal print the + * response via gfid-heal-msg.*/ +- if (!dict_get_str_sizen(local->xattr_req, "gfid-heal-msg", ++ if (!dict_get_str_sizen(local->xattr_rsp, "gfid-heal-msg", + &gfid_heal_msg)) + goto cant_interpret; + +@@ -2421,7 +2421,7 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this) + goto error; + } + +- ret = dict_get_str_sizen(local->xattr_req, "gfid-heal-msg", &gfid_heal_msg); ++ ret = dict_get_str_sizen(local->xattr_rsp, "gfid-heal-msg", &gfid_heal_msg); + if (!ret) { + ret = dict_set_str_sizen(local->replies[read_subvol].xdata, + "gfid-heal-msg", gfid_heal_msg); +@@ -2768,9 +2768,12 @@ afr_lookup_selfheal_wrap(void *opaque) + local = frame->local; + this = frame->this; + loc_pargfid(&local->loc, pargfid); ++ if (!local->xattr_rsp) ++ local->xattr_rsp = dict_new(); + + ret = afr_selfheal_name(frame->this, pargfid, local->loc.name, +- &local->cont.lookup.gfid_req, local->xattr_req); ++ &local->cont.lookup.gfid_req, local->xattr_req, ++ local->xattr_rsp); + if (ret == -EIO) + goto unwind; + +@@ -2786,7 +2789,8 @@ afr_lookup_selfheal_wrap(void *opaque) + return 0; + + unwind: +- AFR_STACK_UNWIND(lookup, frame, -1, EIO, NULL, NULL, NULL, NULL); ++ AFR_STACK_UNWIND(lookup, frame, -1, EIO, NULL, NULL, local->xattr_rsp, ++ NULL); + return 0; + } + +diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c +index 0a8a7fd..0954d2c 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-common.c ++++ b/xlators/cluster/afr/src/afr-self-heal-common.c +@@ -245,7 +245,8 @@ int + afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies, + inode_t *inode, uuid_t pargfid, const char *bname, + int src_idx, int child_idx, +- unsigned char *locked_on, int *src, dict_t *xdata) ++ unsigned char *locked_on, int *src, dict_t *req, ++ dict_t *rsp) + { + afr_private_t *priv = NULL; + char g1[64] = { +@@ -266,8 +267,8 @@ afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies, + gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN, + "All the bricks should be up to resolve the gfid split " + "barin"); +- if (xdata) { +- ret = dict_set_sizen_str_sizen(xdata, "gfid-heal-msg", ++ if (rsp) { ++ ret = dict_set_sizen_str_sizen(rsp, "gfid-heal-msg", + SALL_BRICKS_UP_TO_RESOLVE); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_DICT_SET_FAILED, +@@ -277,8 +278,8 @@ afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies, + goto out; + } + +- if (xdata) { +- ret = dict_get_int32_sizen(xdata, "heal-op", &heal_op); ++ if (req) { ++ ret = dict_get_int32_sizen(req, "heal-op", &heal_op); + if (ret) + goto fav_child; + } else { +@@ -292,8 +293,8 @@ afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies, + if (*src == -1) { + gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN, + SNO_BIGGER_FILE); +- if (xdata) { +- ret = dict_set_sizen_str_sizen(xdata, "gfid-heal-msg", ++ if (rsp) { ++ ret = dict_set_sizen_str_sizen(rsp, "gfid-heal-msg", + SNO_BIGGER_FILE); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, +@@ -310,8 +311,8 @@ afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies, + if (*src == -1) { + gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN, + SNO_DIFF_IN_MTIME); +- if (xdata) { +- ret = dict_set_sizen_str_sizen(xdata, "gfid-heal-msg", ++ if (rsp) { ++ ret = dict_set_sizen_str_sizen(rsp, "gfid-heal-msg", + SNO_DIFF_IN_MTIME); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, +@@ -323,7 +324,7 @@ afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies, + break; + + case GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK: +- ret = dict_get_str_sizen(xdata, "child-name", &src_brick); ++ ret = dict_get_str_sizen(req, "child-name", &src_brick); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN, + "Error getting the source " +@@ -335,8 +336,8 @@ afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies, + if (*src == -1) { + gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN, + SERROR_GETTING_SRC_BRICK); +- if (xdata) { +- ret = dict_set_sizen_str_sizen(xdata, "gfid-heal-msg", ++ if (rsp) { ++ ret = dict_set_sizen_str_sizen(rsp, "gfid-heal-msg", + SERROR_GETTING_SRC_BRICK); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, +@@ -400,7 +401,7 @@ out: + uuid_utoa_r(replies[child_idx].poststat.ia_gfid, g1), src_idx, + priv->children[src_idx]->name, src_idx, + uuid_utoa_r(replies[src_idx].poststat.ia_gfid, g2)); +- return -1; ++ return -EIO; + } + return 0; + } +diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c +index 20b07dd..a17dd93 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-entry.c ++++ b/xlators/cluster/afr/src/afr-self-heal-entry.c +@@ -399,7 +399,7 @@ afr_selfheal_detect_gfid_and_type_mismatch(xlator_t *this, + (ia_type == replies[i].poststat.ia_type)) { + ret = afr_gfid_split_brain_source(this, replies, inode, pargfid, + bname, src_idx, i, locked_on, src, +- NULL); ++ NULL, NULL); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN, + "Skipping conservative merge on the " +@@ -474,7 +474,7 @@ __afr_selfheal_merge_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd, + return ret; + + /* In case of type mismatch / unable to resolve gfid mismatch on the +- * entry, return -1.*/ ++ * entry, return -EIO.*/ + ret = afr_selfheal_detect_gfid_and_type_mismatch( + this, replies, inode, fd->inode->gfid, name, source, locked_on, &src); + +@@ -905,7 +905,7 @@ afr_selfheal_entry_do_subvol(call_frame_t *frame, xlator_t *this, fd_t *fd, + break; + } + +- if (ret == -1) { ++ if (ret == -EIO) { + /* gfid or type mismatch. */ + mismatch = _gf_true; + ret = 0; +@@ -1072,7 +1072,7 @@ afr_selfheal_entry_do(call_frame_t *frame, xlator_t *this, fd_t *fd, int source, + else + ret = afr_selfheal_entry_do_subvol(frame, this, fd, i); + +- if (ret == -1) { ++ if (ret == -EIO) { + /* gfid or type mismatch. */ + mismatch = _gf_true; + ret = 0; +diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c +index 51e3d8c..9ec2066 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-name.c ++++ b/xlators/cluster/afr/src/afr-self-heal-name.c +@@ -217,7 +217,8 @@ afr_selfheal_name_gfid_mismatch_check(xlator_t *this, struct afr_reply *replies, + int source, unsigned char *sources, + int *gfid_idx, uuid_t pargfid, + const char *bname, inode_t *inode, +- unsigned char *locked_on, dict_t *xdata) ++ unsigned char *locked_on, dict_t *req, ++ dict_t *rsp) + { + int i = 0; + int gfid_idx_iter = -1; +@@ -245,11 +246,11 @@ afr_selfheal_name_gfid_mismatch_check(xlator_t *this, struct afr_reply *replies, + if (sources[i] || source == -1) { + if ((sources[gfid_idx_iter] || source == -1) && + gf_uuid_compare(gfid, gfid1)) { +- ret = afr_gfid_split_brain_source(this, replies, inode, pargfid, +- bname, gfid_idx_iter, i, +- locked_on, gfid_idx, xdata); ++ ret = afr_gfid_split_brain_source( ++ this, replies, inode, pargfid, bname, gfid_idx_iter, i, ++ locked_on, gfid_idx, req, rsp); + if (!ret && *gfid_idx >= 0) { +- ret = dict_set_sizen_str_sizen(xdata, "gfid-heal-msg", ++ ret = dict_set_sizen_str_sizen(rsp, "gfid-heal-msg", + "GFID split-brain resolved"); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, +@@ -303,7 +304,7 @@ __afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent, + unsigned char *sources, unsigned char *sinks, + unsigned char *healed_sinks, int source, + unsigned char *locked_on, struct afr_reply *replies, +- void *gfid_req, dict_t *xdata) ++ void *gfid_req, dict_t *req, dict_t *rsp) + { + int gfid_idx = -1; + int ret = -1; +@@ -333,7 +334,7 @@ __afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent, + + ret = afr_selfheal_name_gfid_mismatch_check(this, replies, source, sources, + &gfid_idx, pargfid, bname, +- inode, locked_on, xdata); ++ inode, locked_on, req, rsp); + if (ret) + return ret; + +@@ -450,7 +451,7 @@ out: + int + afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent, + uuid_t pargfid, const char *bname, void *gfid_req, +- dict_t *xdata) ++ dict_t *req, dict_t *rsp) + { + afr_private_t *priv = NULL; + unsigned char *sources = NULL; +@@ -505,7 +506,7 @@ afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent, + + ret = __afr_selfheal_name_do(frame, this, parent, pargfid, bname, inode, + sources, sinks, healed_sinks, source, +- locked_on, replies, gfid_req, xdata); ++ locked_on, replies, gfid_req, req, rsp); + } + unlock: + afr_selfheal_unentrylk(frame, this, parent, this->name, bname, locked_on, +@@ -578,7 +579,7 @@ afr_selfheal_name_unlocked_inspect(call_frame_t *frame, xlator_t *this, + + int + afr_selfheal_name(xlator_t *this, uuid_t pargfid, const char *bname, +- void *gfid_req, dict_t *xdata) ++ void *gfid_req, dict_t *req, dict_t *rsp) + { + inode_t *parent = NULL; + call_frame_t *frame = NULL; +@@ -600,7 +601,7 @@ afr_selfheal_name(xlator_t *this, uuid_t pargfid, const char *bname, + + if (need_heal) { + ret = afr_selfheal_name_do(frame, this, parent, pargfid, bname, +- gfid_req, xdata); ++ gfid_req, req, rsp); + if (ret) + goto out; + } +diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h +index c8dc384..6b0bf69 100644 +--- a/xlators/cluster/afr/src/afr-self-heal.h ++++ b/xlators/cluster/afr/src/afr-self-heal.h +@@ -127,7 +127,7 @@ afr_throttled_selfheal(call_frame_t *frame, xlator_t *this); + + int + afr_selfheal_name(xlator_t *this, uuid_t gfid, const char *name, void *gfid_req, +- dict_t *xdata); ++ dict_t *req, dict_t *rsp); + + int + afr_selfheal_data(call_frame_t *frame, xlator_t *this, fd_t *fd); +@@ -357,7 +357,8 @@ int + afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies, + inode_t *inode, uuid_t pargfid, const char *bname, + int src_idx, int child_idx, +- unsigned char *locked_on, int *src, dict_t *xdata); ++ unsigned char *locked_on, int *src, dict_t *req, ++ dict_t *rsp); + int + afr_mark_source_sinks_if_file_empty(xlator_t *this, unsigned char *sources, + unsigned char *sinks, +diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c +index 939a135..18aed93 100644 +--- a/xlators/cluster/afr/src/afr-self-heald.c ++++ b/xlators/cluster/afr/src/afr-self-heald.c +@@ -295,7 +295,7 @@ afr_shd_selfheal_name(struct subvol_healer *healer, int child, uuid_t parent, + { + int ret = -1; + +- ret = afr_selfheal_name(THIS, parent, bname, NULL, NULL); ++ ret = afr_selfheal_name(THIS, parent, bname, NULL, NULL, NULL); + + return ret; + } +-- +1.8.3.1 + diff --git a/SOURCES/0517-gfapi-glfs_h_creat_open-new-API-to-create-handle-and.patch b/SOURCES/0517-gfapi-glfs_h_creat_open-new-API-to-create-handle-and.patch new file mode 100644 index 0000000..bc1b263 --- /dev/null +++ b/SOURCES/0517-gfapi-glfs_h_creat_open-new-API-to-create-handle-and.patch @@ -0,0 +1,388 @@ +From da75c2857fd8b173d47fb7fc3b925ffd14105f64 Mon Sep 17 00:00:00 2001 +From: "Kaleb S. KEITHLEY" +Date: Wed, 23 Dec 2020 07:39:13 -0500 +Subject: [PATCH 517/517] gfapi: 'glfs_h_creat_open' - new API to create handle + and open fd + +Right now we have two separate APIs, one +- 'glfs_h_creat_handle' to create handle & another +- 'glfs_h_open' to create a glfd to return to application + +Having two separate routines can result in access errors +while trying to create and write into a read-only file. + +Since a fd is opened even during file/directory creation, +introducing a new API to make these two operations atomic i.e, +which can create both handle & fd and pass them to application + +This is backport of below mainline patch - +- https://review.gluster.org/#/c/glusterfs/+/23448/ +- bz#1753569 + +> Signed-off-by: Soumya Koduri +> Change-Id: Ibf513fcfcdad175f4d7eb6fa7a61b8feec6d33b5 +> release-6: commit 5a2af2fd06356f6fc79d591c352caffd4c511c9e +> master: commit 41a0f2aa755ec7162facd30209f2fa3f40308766 + +BUG: 1910119 +Change-Id: Ib397dbe82a6928d8f24251809d30febddd007bfc +Signed-off-by: Kaleb S. KEITHLEY +Reviewed-on: https://code.engineering.redhat.com/gerrit/222083 +Reviewed-by: Soumya Koduri +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + api/src/gfapi.aliases | 1 + + api/src/gfapi.map | 5 ++ + api/src/glfs-handleops.c | 135 ++++++++++++++++++++++++++++++++++ + api/src/glfs-handles.h | 5 ++ + tests/basic/gfapi/glfs_h_creat_open.c | 118 +++++++++++++++++++++++++++++ + tests/basic/gfapi/glfs_h_creat_open.t | 27 +++++++ + 6 files changed, 291 insertions(+) + create mode 100644 tests/basic/gfapi/glfs_h_creat_open.c + create mode 100755 tests/basic/gfapi/glfs_h_creat_open.t + +diff --git a/api/src/gfapi.aliases b/api/src/gfapi.aliases +index 692ae13..3d3415c 100644 +--- a/api/src/gfapi.aliases ++++ b/api/src/gfapi.aliases +@@ -197,3 +197,4 @@ _pub_glfs_fsetattr _glfs_fsetattr$GFAPI_6.0 + _pub_glfs_setattr _glfs_setattr$GFAPI_6.0 + + _pub_glfs_set_statedump_path _glfs_set_statedump_path@GFAPI_6.4 ++_pub_glfs_h_creat_open _glfs_h_creat_open@GFAPI_6.6 +diff --git a/api/src/gfapi.map b/api/src/gfapi.map +index df65837..614f3f6 100644 +--- a/api/src/gfapi.map ++++ b/api/src/gfapi.map +@@ -276,3 +276,8 @@ GFAPI_6.4 { + global: + glfs_set_statedump_path; + } GFAPI_PRIVATE_6.1; ++ ++GFAPI_6.6 { ++ global: ++ glfs_h_creat_open; ++} GFAPI_6.4; +diff --git a/api/src/glfs-handleops.c b/api/src/glfs-handleops.c +index d4e1545..7b8ff14 100644 +--- a/api/src/glfs-handleops.c ++++ b/api/src/glfs-handleops.c +@@ -843,6 +843,141 @@ invalid_fs: + GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_creat, 3.4.2); + + struct glfs_object * ++pub_glfs_h_creat_open(struct glfs *fs, struct glfs_object *parent, ++ const char *path, int flags, mode_t mode, ++ struct stat *stat, struct glfs_fd **out_fd) ++{ ++ int ret = -1; ++ struct glfs_fd *glfd = NULL; ++ xlator_t *subvol = NULL; ++ inode_t *inode = NULL; ++ loc_t loc = { ++ 0, ++ }; ++ struct iatt iatt = { ++ 0, ++ }; ++ uuid_t gfid; ++ dict_t *xattr_req = NULL; ++ struct glfs_object *object = NULL; ++ dict_t *fop_attr = NULL; ++ ++ /* validate in args */ ++ if ((fs == NULL) || (parent == NULL) || (path == NULL) || ++ (out_fd == NULL)) { ++ errno = EINVAL; ++ return NULL; ++ } ++ ++ DECLARE_OLD_THIS; ++ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs); ++ ++ /* get the active volume */ ++ subvol = glfs_active_subvol(fs); ++ if (!subvol) { ++ ret = -1; ++ goto out; ++ } ++ ++ /* get/refresh the in arg objects inode in correlation to the xlator */ ++ inode = glfs_resolve_inode(fs, subvol, parent); ++ if (!inode) { ++ ret = -1; ++ goto out; ++ } ++ ++ xattr_req = dict_new(); ++ if (!xattr_req) { ++ ret = -1; ++ errno = ENOMEM; ++ goto out; ++ } ++ ++ gf_uuid_generate(gfid); ++ ret = dict_set_gfuuid(xattr_req, "gfid-req", gfid, true); ++ if (ret) { ++ ret = -1; ++ errno = ENOMEM; ++ goto out; ++ } ++ ++ GLFS_LOC_FILL_PINODE(inode, loc, ret, errno, out, path); ++ ++ glfd = glfs_fd_new(fs); ++ if (!glfd) { ++ ret = -1; ++ errno = ENOMEM; ++ goto out; ++ } ++ ++ glfd->fd = fd_create(loc.inode, getpid()); ++ if (!glfd->fd) { ++ ret = -1; ++ errno = ENOMEM; ++ goto out; ++ } ++ glfd->fd->flags = flags; ++ ++ ret = get_fop_attr_thrd_key(&fop_attr); ++ if (ret) ++ gf_msg_debug("gfapi", 0, "Getting leaseid from thread failed"); ++ ++ /* fop/op */ ++ ret = syncop_create(subvol, &loc, flags, mode, glfd->fd, &iatt, xattr_req, ++ NULL); ++ DECODE_SYNCOP_ERR(ret); ++ ++ /* populate out args */ ++ if (ret == 0) { ++ glfd->fd->flags = flags; ++ ++ ret = glfs_loc_link(&loc, &iatt); ++ if (ret != 0) { ++ goto out; ++ } ++ ++ if (stat) ++ glfs_iatt_to_stat(fs, &iatt, stat); ++ ++ ret = glfs_create_object(&loc, &object); ++ } ++ ++out: ++ if (ret && object != NULL) { ++ /* Release the held reference */ ++ glfs_h_close(object); ++ object = NULL; ++ } ++ ++ loc_wipe(&loc); ++ ++ if (inode) ++ inode_unref(inode); ++ ++ if (fop_attr) ++ dict_unref(fop_attr); ++ ++ if (xattr_req) ++ dict_unref(xattr_req); ++ ++ if (ret && glfd) { ++ GF_REF_PUT(glfd); ++ } else if (glfd) { ++ glfd_set_state_bind(glfd); ++ *out_fd = glfd; ++ } ++ ++ glfs_subvol_done(fs, subvol); ++ ++ __GLFS_EXIT_FS; ++ ++invalid_fs: ++ return object; ++} ++ ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_creat_open, 6.6); ++ ++struct glfs_object * + pub_glfs_h_mkdir(struct glfs *fs, struct glfs_object *parent, const char *path, + mode_t mode, struct stat *stat) + { +diff --git a/api/src/glfs-handles.h b/api/src/glfs-handles.h +index f7e6a06..4d039b9 100644 +--- a/api/src/glfs-handles.h ++++ b/api/src/glfs-handles.h +@@ -250,6 +250,11 @@ int + glfs_h_access(glfs_t *fs, glfs_object_t *object, int mask) __THROW + GFAPI_PUBLIC(glfs_h_access, 3.6.0); + ++struct glfs_object * ++glfs_h_creat_open(struct glfs *fs, struct glfs_object *parent, const char *path, ++ int flags, mode_t mode, struct stat *stat, ++ struct glfs_fd **out_fd) __THROW ++ GFAPI_PUBLIC(glfs_h_creat_open, 6.6); + /* + SYNOPSIS + +diff --git a/tests/basic/gfapi/glfs_h_creat_open.c b/tests/basic/gfapi/glfs_h_creat_open.c +new file mode 100644 +index 0000000..7672561 +--- /dev/null ++++ b/tests/basic/gfapi/glfs_h_creat_open.c +@@ -0,0 +1,118 @@ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#define LOG_ERR(func, ret) \ ++ do { \ ++ if (ret != 0) { \ ++ fprintf(stderr, "%s : returned error ret(%d), errno(%d)\n", func, \ ++ ret, errno); \ ++ exit(1); \ ++ } else { \ ++ fprintf(stderr, "%s : returned %d\n", func, ret); \ ++ } \ ++ } while (0) ++#define LOG_IF_NO_ERR(func, ret) \ ++ do { \ ++ if (ret == 0) { \ ++ fprintf(stderr, "%s : hasn't returned error %d\n", func, ret); \ ++ exit(1); \ ++ } else { \ ++ fprintf(stderr, "%s : returned %d\n", func, ret); \ ++ } \ ++ } while (0) ++int ++main(int argc, char *argv[]) ++{ ++ glfs_t *fs = NULL; ++ int ret = 0; ++ struct glfs_object *root = NULL, *leaf = NULL; ++ glfs_fd_t *fd = NULL; ++ char *filename = "/ro-file"; ++ struct stat sb = { ++ 0, ++ }; ++ char *logfile = NULL; ++ char *volname = NULL; ++ char *hostname = NULL; ++ char buf[32] = "abcdefghijklmnopqrstuvwxyz012345"; ++ ++ fprintf(stderr, "Starting glfs_h_creat_open\n"); ++ ++ if (argc != 4) { ++ fprintf(stderr, "Invalid argument\n"); ++ exit(1); ++ } ++ ++ hostname = argv[1]; ++ volname = argv[2]; ++ logfile = argv[3]; ++ ++ fs = glfs_new(volname); ++ if (!fs) { ++ fprintf(stderr, "glfs_new: returned NULL\n"); ++ return 1; ++ } ++ ++ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007); ++ LOG_ERR("glfs_set_volfile_server", ret); ++ ++ ret = glfs_set_logging(fs, logfile, 7); ++ LOG_ERR("glfs_set_logging", ret); ++ ++ ret = glfs_init(fs); ++ LOG_ERR("glfs_init", ret); ++ ++ sleep(2); ++ root = glfs_h_lookupat(fs, NULL, "/", &sb, 0); ++ if (!root) { ++ ret = -1; ++ LOG_ERR("glfs_h_lookupat root", ret); ++ } ++ leaf = glfs_h_lookupat(fs, root, filename, &sb, 0); ++ if (!leaf) { ++ ret = -1; ++ LOG_IF_NO_ERR("glfs_h_lookupat leaf", ret); ++ } ++ ++ leaf = glfs_h_creat_open(fs, root, filename, O_RDONLY, 00444, &sb, &fd); ++ if (!leaf || !fd) { ++ ret = -1; ++ LOG_ERR("glfs_h_creat leaf", ret); ++ } ++ fprintf(stderr, "glfs_h_create_open leaf - %p\n", leaf); ++ ++ ret = glfs_write(fd, buf, 32, 0); ++ if (ret < 0) { ++ fprintf(stderr, "glfs_write: error writing to file %s, %s\n", filename, ++ strerror(errno)); ++ goto out; ++ } ++ ++ ret = glfs_h_getattrs(fs, leaf, &sb); ++ LOG_ERR("glfs_h_getattrs", ret); ++ ++ if (sb.st_size != 32) { ++ fprintf(stderr, "glfs_write: post size mismatch\n"); ++ goto out; ++ } ++ ++ fprintf(stderr, "Successfully opened and written to a read-only file \n"); ++out: ++ if (fd) ++ glfs_close(fd); ++ ++ ret = glfs_fini(fs); ++ LOG_ERR("glfs_fini", ret); ++ ++ fprintf(stderr, "End of libgfapi_fini\n"); ++ ++ exit(0); ++} +diff --git a/tests/basic/gfapi/glfs_h_creat_open.t b/tests/basic/gfapi/glfs_h_creat_open.t +new file mode 100755 +index 0000000..f24ae73 +--- /dev/null ++++ b/tests/basic/gfapi/glfs_h_creat_open.t +@@ -0,0 +1,27 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++ ++cleanup; ++ ++TEST glusterd ++ ++TEST $CLI volume create $V0 $H0:$B0/brick1; ++EXPECT 'Created' volinfo_field $V0 'Status'; ++ ++TEST $CLI volume start $V0; ++EXPECT 'Started' volinfo_field $V0 'Status'; ++ ++logdir=`gluster --print-logdir` ++ ++TEST build_tester $(dirname $0)/glfs_h_creat_open.c -lgfapi ++ ++TEST ./$(dirname $0)/glfs_h_creat_open $H0 $V0 $logdir/glfs.log ++ ++cleanup_tester $(dirname $0)/glfs_h_creat_open ++ ++TEST $CLI volume stop $V0 ++TEST $CLI volume delete $V0 ++ ++cleanup; +-- +1.8.3.1 + diff --git a/SOURCES/0518-glusterd-Fix-for-shared-storage-in-ipv6-env.patch b/SOURCES/0518-glusterd-Fix-for-shared-storage-in-ipv6-env.patch new file mode 100644 index 0000000..00d29b9 --- /dev/null +++ b/SOURCES/0518-glusterd-Fix-for-shared-storage-in-ipv6-env.patch @@ -0,0 +1,41 @@ +From 818025e467ea98b32a855c92ba6aef6e172e029f Mon Sep 17 00:00:00 2001 +From: Nikhil Ladha +Date: Fri, 8 Jan 2021 13:12:46 +0530 +Subject: [PATCH 518/526] glusterd: Fix for shared storage in ipv6 env + +Issue: +Mounting shared storage volume was failing in ipv6 env if the hostnames were FQDNs. +The brickname for the volume was being cut off, as a result, volume creation was failing. + +>Change-Id: Ib38993724c709b35b603f9ac666630c50c932c3e +>Fixes: #1406 +>Signed-off-by: nik-redhat +Upstream patch: https://github.com/gluster/glusterfs/pull/1972 + +BUG: 1856574 + +Change-Id: Ib38993724c709b35b603f9ac666630c50c932c3e +Signed-off-by: nik-redhat +Reviewed-on: https://code.engineering.redhat.com/gerrit/223248 +Tested-by: RHGS Build Bot +Reviewed-by: Ravishankar Narayanankutty +--- + extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh b/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh +index 9597503..e9261af 100755 +--- a/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh ++++ b/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh +@@ -46,7 +46,7 @@ do + + key=`echo $line | cut -d ':' -f 1` + if [ "$key" == "Hostname" ]; then +- hostname=`echo $line | cut -d ':' -f 2 | xargs` ++ hostname=`echo $line | cut -d ' ' -f 2 | xargs` + fi + + if [ "$key" == "State" ]; then +-- +1.8.3.1 + diff --git a/SOURCES/0519-glusterfs-events-Fix-incorrect-attribute-access-2002.patch b/SOURCES/0519-glusterfs-events-Fix-incorrect-attribute-access-2002.patch new file mode 100644 index 0000000..f37acfd --- /dev/null +++ b/SOURCES/0519-glusterfs-events-Fix-incorrect-attribute-access-2002.patch @@ -0,0 +1,58 @@ +From 6ed227367b6eb7d6d7afde3859ad0a711a3adf36 Mon Sep 17 00:00:00 2001 +From: Leela Venkaiah G +Date: Wed, 13 Jan 2021 16:02:25 +0530 +Subject: [PATCH 519/526] glusterfs-events: Fix incorrect attribute access + (#2002) + +Issue: When GlusterCmdException is raised, current code try to access +message atrribute which doesn't exist and resulting in a malformed +error string on failure operations + +Code Change: Replace `message` with `args[0]` + +>Fixes: #2001 +>Change-Id: I65c9f0ee79310937a384025b8d454acda154e4bb +>Signed-off-by: Leela Venkaiah G +Upstream patch: https://github.com/gluster/glusterfs/pull/2002 + +BUG: 1600459 +Change-Id: I65c9f0ee79310937a384025b8d454acda154e4bb +Signed-off-by: srijan-sivakumar +Reviewed-on: https://code.engineering.redhat.com/gerrit/223584 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + events/src/peer_eventsapi.py | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/events/src/peer_eventsapi.py b/events/src/peer_eventsapi.py +index 26b77a0..c388da4 100644 +--- a/events/src/peer_eventsapi.py ++++ b/events/src/peer_eventsapi.py +@@ -174,9 +174,9 @@ def sync_to_peers(args): + sync_file_to_peers(WEBHOOKS_FILE_TO_SYNC) + except GlusterCmdException as e: + # Print stdout if stderr is empty +- errmsg = e.message[2] if e.message[2] else e.message[1] ++ errmsg = e.args[0][2] if e.args[0][2] else e.args[0][1] + handle_output_error("Failed to sync Webhooks file: [Error: {0}]" +- "{1}".format(e.message[0], errmsg), ++ "{1}".format(e.args[0][0], errmsg), + errcode=ERROR_WEBHOOK_SYNC_FAILED, + json_output=args.json) + +@@ -185,9 +185,9 @@ def sync_to_peers(args): + sync_file_to_peers(CUSTOM_CONFIG_FILE_TO_SYNC) + except GlusterCmdException as e: + # Print stdout if stderr is empty +- errmsg = e.message[2] if e.message[2] else e.message[1] ++ errmsg = e.args[0][2] if e.args[0][2] else e.args[0][1] + handle_output_error("Failed to sync Config file: [Error: {0}]" +- "{1}".format(e.message[0], errmsg), ++ "{1}".format(e.args[0][0], errmsg), + errcode=ERROR_CONFIG_SYNC_FAILED, + json_output=args.json) + +-- +1.8.3.1 + diff --git a/SOURCES/0520-performance-open-behind-seek-fop-should-open_and_res.patch b/SOURCES/0520-performance-open-behind-seek-fop-should-open_and_res.patch new file mode 100644 index 0000000..c46a9ca --- /dev/null +++ b/SOURCES/0520-performance-open-behind-seek-fop-should-open_and_res.patch @@ -0,0 +1,70 @@ +From a3fd2c9d85bbd23131c985599d9c9d74f66f32d2 Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K +Date: Thu, 10 Oct 2019 10:50:59 +0530 +Subject: [PATCH 520/526] performance/open-behind: seek fop should + open_and_resume + +Upstream patch: +> Upstream-patch-link: https://review.gluster.org/#/c/glusterfs/+/23530 +> fixes: bz#1760187 +> Change-Id: I4c6ad13194d4fc5c7705e35bf9a27fce504b51f9 +> Signed-off-by: Pranith Kumar K + +BUG: 1830713 +Change-Id: I4c6ad13194d4fc5c7705e35bf9a27fce504b51f9 +Signed-off-by: Pranith Kumar K +Reviewed-on: https://code.engineering.redhat.com/gerrit/224484 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/performance/open-behind/src/open-behind.c | 27 +++++++++++++++++++++++ + 1 file changed, 27 insertions(+) + +diff --git a/xlators/performance/open-behind/src/open-behind.c b/xlators/performance/open-behind/src/open-behind.c +index 268c717..3ee3c40 100644 +--- a/xlators/performance/open-behind/src/open-behind.c ++++ b/xlators/performance/open-behind/src/open-behind.c +@@ -711,6 +711,32 @@ err: + } + + int ++ob_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, ++ gf_seek_what_t what, dict_t *xdata) ++{ ++ call_stub_t *stub = NULL; ++ fd_t *wind_fd = NULL; ++ ++ wind_fd = ob_get_wind_fd(this, fd, NULL); ++ ++ stub = fop_seek_stub(frame, default_seek_resume, wind_fd, offset, what, ++ xdata); ++ ++ fd_unref(wind_fd); ++ ++ if (!stub) ++ goto err; ++ ++ open_and_resume(this, wind_fd, stub); ++ ++ return 0; ++err: ++ STACK_UNWIND_STRICT(fstat, frame, -1, ENOMEM, 0, 0); ++ ++ return 0; ++} ++ ++int + ob_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) + { + call_stub_t *stub = NULL; +@@ -1276,6 +1302,7 @@ struct xlator_fops fops = { + .flush = ob_flush, + .fsync = ob_fsync, + .fstat = ob_fstat, ++ .seek = ob_seek, + .ftruncate = ob_ftruncate, + .fsetxattr = ob_fsetxattr, + .setxattr = ob_setxattr, +-- +1.8.3.1 + diff --git a/SOURCES/0521-open-behind-fix-missing-fd-reference.patch b/SOURCES/0521-open-behind-fix-missing-fd-reference.patch new file mode 100644 index 0000000..8e18af8 --- /dev/null +++ b/SOURCES/0521-open-behind-fix-missing-fd-reference.patch @@ -0,0 +1,121 @@ +From 211d0f7dbb4991b2191925973222ebc79f010e84 Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez +Date: Sun, 8 Mar 2020 18:36:45 +0100 +Subject: [PATCH 521/526] open-behind: fix missing fd reference + +Open behind was not keeping any reference on fd's pending to be +opened. This makes it possible that a concurrent close and en entry +fop (unlink, rename, ...) caused destruction of the fd while it +was still being used. + +Upstream patch: +> Upstream patch link: https://review.gluster.org/c/glusterfs/+/24204 +> Change-Id: Ie9e992902cf2cd7be4af1f8b4e57af9bd6afd8e9 +> Fixes: bz#1810934 +> Signed-off-by: Xavi Hernandez + +Change-Id: Ie9e992902cf2cd7be4af1f8b4e57af9bd6afd8e9 +BUG: 1830713 +Signed-off-by: Xavi Hernandez +Reviewed-on: https://code.engineering.redhat.com/gerrit/224485 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/performance/open-behind/src/open-behind.c | 27 ++++++++++++++--------- + 1 file changed, 16 insertions(+), 11 deletions(-) + +diff --git a/xlators/performance/open-behind/src/open-behind.c b/xlators/performance/open-behind/src/open-behind.c +index 3ee3c40..dd2f2fd 100644 +--- a/xlators/performance/open-behind/src/open-behind.c ++++ b/xlators/performance/open-behind/src/open-behind.c +@@ -206,8 +206,13 @@ ob_fd_free(ob_fd_t *ob_fd) + if (ob_fd->xdata) + dict_unref(ob_fd->xdata); + +- if (ob_fd->open_frame) ++ if (ob_fd->open_frame) { ++ /* If we sill have a frame it means that background open has never ++ * been triggered. We need to release the pending reference. */ ++ fd_unref(ob_fd->fd); ++ + STACK_DESTROY(ob_fd->open_frame->root); ++ } + + GF_FREE(ob_fd); + } +@@ -297,6 +302,7 @@ ob_wake_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + call_resume(stub); + } + ++ /* The background open is completed. We can release the 'fd' reference. */ + fd_unref(fd); + + STACK_DESTROY(frame->root); +@@ -331,7 +337,9 @@ ob_fd_wake(xlator_t *this, fd_t *fd, ob_fd_t *ob_fd) + } + + if (frame) { +- frame->local = fd_ref(fd); ++ /* We don't need to take a reference here. We already have a reference ++ * while the open is pending. */ ++ frame->local = fd; + + STACK_WIND(frame, ob_wake_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->open, &ob_fd->loc, ob_fd->flags, fd, +@@ -345,15 +353,12 @@ void + ob_inode_wake(xlator_t *this, struct list_head *ob_fds) + { + ob_fd_t *ob_fd = NULL, *tmp = NULL; +- fd_t *fd = NULL; + + if (!list_empty(ob_fds)) { + list_for_each_entry_safe(ob_fd, tmp, ob_fds, ob_fds_on_inode) + { + ob_fd_wake(this, ob_fd->fd, ob_fd); +- fd = ob_fd->fd; + ob_fd_free(ob_fd); +- fd_unref(fd); + } + } + } +@@ -365,7 +370,7 @@ ob_fd_copy(ob_fd_t *src, ob_fd_t *dst) + if (!src || !dst) + goto out; + +- dst->fd = __fd_ref(src->fd); ++ dst->fd = src->fd; + dst->loc.inode = inode_ref(src->loc.inode); + gf_uuid_copy(dst->loc.gfid, src->loc.gfid); + dst->flags = src->flags; +@@ -509,7 +514,6 @@ ob_open_behind(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + + ob_fd->ob_inode = ob_inode; + +- /* don't do fd_ref, it'll cause leaks */ + ob_fd->fd = fd; + + ob_fd->open_frame = copy_frame(frame); +@@ -539,15 +543,16 @@ ob_open_behind(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + } + UNLOCK(&fd->inode->lock); + +- if (!open_in_progress && !unlinked) { +- fd_ref(fd); ++ /* We take a reference while the background open is pending or being ++ * processed. If we finally wind the request in the foreground, then ++ * ob_fd_free() will take care of this additional reference. */ ++ fd_ref(fd); + ++ if (!open_in_progress && !unlinked) { + STACK_UNWIND_STRICT(open, frame, 0, 0, fd, xdata); + + if (!conf->lazy_open) + ob_fd_wake(this, fd, NULL); +- +- fd_unref(fd); + } else { + ob_fd_free(ob_fd); + STACK_WIND(frame, default_open_cbk, FIRST_CHILD(this), +-- +1.8.3.1 + diff --git a/SOURCES/0522-lcov-improve-line-coverage.patch b/SOURCES/0522-lcov-improve-line-coverage.patch new file mode 100644 index 0000000..13ece12 --- /dev/null +++ b/SOURCES/0522-lcov-improve-line-coverage.patch @@ -0,0 +1,746 @@ +From 46e2bbd52d4427c1348fa38dcb5d2b5f125555f1 Mon Sep 17 00:00:00 2001 +From: Amar Tumballi +Date: Thu, 30 May 2019 15:25:01 +0530 +Subject: [PATCH 522/526] lcov: improve line coverage + +upcall: remove extra variable assignment and use just one + initialization. +open-behind: reduce the overall number of lines, in functions + not frequently called +selinux: reduce some lines in init failure cases + +Upstream patch: +> Upstream-patch-link: https://review.gluster.org/#/c/glusterfs/+/22789 +> updates: bz#1693692 +> Change-Id: I7c1de94f2ec76a5bfe1f48a9632879b18e5fbb95 +> Signed-off-by: Amar Tumballi + +BUG: 1830713 +Change-Id: I7c1de94f2ec76a5bfe1f48a9632879b18e5fbb95 +Signed-off-by: Amar Tumballi +Reviewed-on: https://code.engineering.redhat.com/gerrit/224486 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/features/selinux/src/selinux.c | 6 +- + xlators/features/upcall/src/upcall.c | 108 +++++++--------------- + xlators/performance/open-behind/src/open-behind.c | 58 ++++-------- + 3 files changed, 55 insertions(+), 117 deletions(-) + +diff --git a/xlators/features/selinux/src/selinux.c b/xlators/features/selinux/src/selinux.c +index 58b4c5d..e8e16cd 100644 +--- a/xlators/features/selinux/src/selinux.c ++++ b/xlators/features/selinux/src/selinux.c +@@ -234,7 +234,6 @@ init(xlator_t *this) + priv = GF_CALLOC(1, sizeof(*priv), gf_selinux_mt_selinux_priv_t); + if (!priv) { + gf_log(this->name, GF_LOG_ERROR, "out of memory"); +- ret = ENOMEM; + goto out; + } + +@@ -242,7 +241,6 @@ init(xlator_t *this) + + this->local_pool = mem_pool_new(selinux_priv_t, 64); + if (!this->local_pool) { +- ret = -1; + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, SL_MSG_ENOMEM, + "Failed to create local_t's memory pool"); + goto out; +@@ -252,9 +250,7 @@ init(xlator_t *this) + ret = 0; + out: + if (ret) { +- if (priv) { +- GF_FREE(priv); +- } ++ GF_FREE(priv); + mem_pool_destroy(this->local_pool); + } + return ret; +diff --git a/xlators/features/upcall/src/upcall.c b/xlators/features/upcall/src/upcall.c +index 2583c50..0795f58 100644 +--- a/xlators/features/upcall/src/upcall.c ++++ b/xlators/features/upcall/src/upcall.c +@@ -57,14 +57,13 @@ static int32_t + up_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + fd_t *fd, dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -111,14 +110,13 @@ up_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, + int count, off_t off, uint32_t flags, struct iobref *iobref, + dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -167,14 +165,13 @@ static int32_t + up_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, uint32_t flags, dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -220,14 +217,13 @@ static int32_t + up_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, + struct gf_flock *flock, dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -274,14 +270,13 @@ static int32_t + up_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -343,14 +338,13 @@ static int32_t + up_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf, + int32_t valid, dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -410,14 +404,13 @@ static int32_t + up_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, newloc, NULL, oldloc->inode, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -472,14 +465,13 @@ static int32_t + up_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, + dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, loc, NULL, loc->inode, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -531,14 +523,13 @@ static int32_t + up_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, newloc, NULL, oldloc->inode, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -592,14 +583,13 @@ static int32_t + up_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, loc, NULL, loc->inode, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -653,14 +643,13 @@ static int32_t + up_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + mode_t umask, dict_t *params) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, loc, NULL, loc->parent, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -717,15 +706,13 @@ static int32_t + up_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *params) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, loc, NULL, loc->parent, NULL); +- + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -774,14 +761,13 @@ out: + static int32_t + up_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -826,14 +812,13 @@ out: + static int32_t + up_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -852,14 +837,13 @@ err: + static int32_t + up_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -879,14 +863,13 @@ static int32_t + up_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -932,14 +915,13 @@ static int32_t + up_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask, + dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -986,14 +968,13 @@ static int32_t + up_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size, + dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -1047,14 +1028,13 @@ static int32_t + up_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + dev_t rdev, mode_t umask, dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, loc, NULL, loc->parent, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -1110,14 +1090,13 @@ static int32_t + up_symlink(call_frame_t *frame, xlator_t *this, const char *linkpath, + loc_t *loc, mode_t umask, dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, loc, NULL, loc->parent, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -1164,14 +1143,13 @@ static int32_t + up_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, + dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -1216,14 +1194,13 @@ out: + static int32_t + up_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -1270,14 +1247,13 @@ static int32_t + up_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t off, dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -1334,14 +1310,13 @@ static int32_t + up_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t off, dict_t *dict) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -1361,14 +1336,13 @@ static int32_t + up_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf, + int32_t valid, dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -1415,14 +1389,13 @@ static int32_t + up_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode, + off_t offset, size_t len, dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -1470,14 +1443,13 @@ static int32_t + up_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -1524,14 +1496,13 @@ static int + up_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + off_t len, dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -1577,14 +1548,13 @@ static int32_t + up_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + gf_seek_what_t what, dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -1652,14 +1622,13 @@ static int32_t + up_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + int32_t flags, dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, loc, NULL, loc->inode, dict); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -1727,14 +1696,13 @@ static int32_t + up_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, + int32_t flags, dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, NULL, fd, fd->inode, dict); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -1800,7 +1768,7 @@ static int32_t + up_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, + dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + dict_t *xattr = NULL; + +@@ -1808,13 +1776,11 @@ up_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, + + xattr = dict_for_key_value(name, "", 1, _gf_true); + if (!xattr) { +- op_errno = ENOMEM; + goto err; + } + + local = upcall_local_init(frame, this, NULL, fd, fd->inode, xattr); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -1885,7 +1851,7 @@ static int32_t + up_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + dict_t *xattr = NULL; + +@@ -1893,13 +1859,11 @@ up_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + + xattr = dict_for_key_value(name, "", 1, _gf_true); + if (!xattr) { +- op_errno = ENOMEM; + goto err; + } + + local = upcall_local_init(frame, this, loc, NULL, loc->inode, xattr); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -1950,14 +1914,13 @@ static int32_t + up_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, + dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -2000,14 +1963,13 @@ static int32_t + up_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name, + dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +diff --git a/xlators/performance/open-behind/src/open-behind.c b/xlators/performance/open-behind/src/open-behind.c +index dd2f2fd..cbe89ec 100644 +--- a/xlators/performance/open-behind/src/open-behind.c ++++ b/xlators/performance/open-behind/src/open-behind.c +@@ -581,7 +581,7 @@ ob_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, fd_t *fd, + { + fd_t *old_fd = NULL; + int ret = -1; +- int op_errno = 0; ++ int op_errno = ENOMEM; + call_stub_t *stub = NULL; + + old_fd = fd_lookup(fd->inode, 0); +@@ -589,7 +589,6 @@ ob_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, fd_t *fd, + /* open-behind only when this is the first FD */ + stub = fop_open_stub(frame, default_open_resume, loc, flags, fd, xdata); + if (!stub) { +- op_errno = ENOMEM; + fd_unref(old_fd); + goto err; + } +@@ -603,7 +602,6 @@ ob_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, fd_t *fd, + + ret = ob_open_behind(frame, this, loc, flags, fd, xdata); + if (ret) { +- op_errno = ENOMEM; + goto err; + } + +@@ -900,18 +898,12 @@ int + ob_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, + int cmd, struct gf_flock *flock, dict_t *xdata) + { +- call_stub_t *stub = NULL; +- +- stub = fop_finodelk_stub(frame, default_finodelk_resume, volume, fd, cmd, +- flock, xdata); +- if (!stub) +- goto err; +- +- open_and_resume(this, fd, stub); +- +- return 0; +-err: +- STACK_UNWIND_STRICT(finodelk, frame, -1, ENOMEM, 0); ++ call_stub_t *stub = fop_finodelk_stub(frame, default_finodelk_resume, ++ volume, fd, cmd, flock, xdata); ++ if (stub) ++ open_and_resume(this, fd, stub); ++ else ++ STACK_UNWIND_STRICT(finodelk, frame, -1, ENOMEM, 0); + + return 0; + } +@@ -921,18 +913,12 @@ ob_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, + const char *basename, entrylk_cmd cmd, entrylk_type type, + dict_t *xdata) + { +- call_stub_t *stub = NULL; +- +- stub = fop_fentrylk_stub(frame, default_fentrylk_resume, volume, fd, +- basename, cmd, type, xdata); +- if (!stub) +- goto err; +- +- open_and_resume(this, fd, stub); +- +- return 0; +-err: +- STACK_UNWIND_STRICT(fentrylk, frame, -1, ENOMEM, 0); ++ call_stub_t *stub = fop_fentrylk_stub( ++ frame, default_fentrylk_resume, volume, fd, basename, cmd, type, xdata); ++ if (stub) ++ open_and_resume(this, fd, stub); ++ else ++ STACK_UNWIND_STRICT(fentrylk, frame, -1, ENOMEM, 0); + + return 0; + } +@@ -941,18 +927,12 @@ int + ob_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd, + gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) + { +- call_stub_t *stub = NULL; +- +- stub = fop_fxattrop_stub(frame, default_fxattrop_resume, fd, optype, xattr, +- xdata); +- if (!stub) +- goto err; +- +- open_and_resume(this, fd, stub); +- +- return 0; +-err: +- STACK_UNWIND_STRICT(fxattrop, frame, -1, ENOMEM, 0, 0); ++ call_stub_t *stub = fop_fxattrop_stub(frame, default_fxattrop_resume, fd, ++ optype, xattr, xdata); ++ if (stub) ++ open_and_resume(this, fd, stub); ++ else ++ STACK_UNWIND_STRICT(fxattrop, frame, -1, ENOMEM, 0, 0); + + return 0; + } +-- +1.8.3.1 + diff --git a/SOURCES/0523-open-behind-rewrite-of-internal-logic.patch b/SOURCES/0523-open-behind-rewrite-of-internal-logic.patch new file mode 100644 index 0000000..621d5ae --- /dev/null +++ b/SOURCES/0523-open-behind-rewrite-of-internal-logic.patch @@ -0,0 +1,2720 @@ +From b924c8ca8a133fc9413c8ed1407e63f1658c7e79 Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez +Date: Tue, 12 May 2020 23:54:54 +0200 +Subject: [PATCH 523/526] open-behind: rewrite of internal logic + +There was a critical flaw in the previous implementation of open-behind. + +When an open is done in the background, it's necessary to take a +reference on the fd_t object because once we "fake" the open answer, +the fd could be destroyed. However as long as there's a reference, +the release function won't be called. So, if the application closes +the file descriptor without having actually opened it, there will +always remain at least 1 reference, causing a leak. + +To avoid this problem, the previous implementation didn't take a +reference on the fd_t, so there were races where the fd could be +destroyed while it was still in use. + +To fix this, I've implemented a new xlator cbk that gets called from +fuse when the application closes a file descriptor. + +The whole logic of handling background opens have been simplified and +it's more efficient now. Only if the fop needs to be delayed until an +open completes, a stub is created. Otherwise no memory allocations are +needed. + +Correctly handling the close request while the open is still pending +has added a bit of complexity, but overall normal operation is simpler. + +Upstream patch: +> Upstream-patch-link: https://review.gluster.org/#/c/glusterfs/+/24451 +> Change-Id: I6376a5491368e0e1c283cc452849032636261592 +> Fixes: #1225 +> Signed-off-by: Xavi Hernandez + +BUG: 1830713 +Change-Id: I6376a5491368e0e1c283cc452849032636261592 +Signed-off-by: Xavi Hernandez +Reviewed-on: https://code.engineering.redhat.com/gerrit/224487 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/fd.c | 26 + + libglusterfs/src/glusterfs/fd.h | 3 + + libglusterfs/src/glusterfs/xlator.h | 4 + + libglusterfs/src/libglusterfs.sym | 1 + + tests/basic/open-behind/open-behind.t | 183 +++ + tests/basic/open-behind/tester-fd.c | 99 ++ + tests/basic/open-behind/tester.c | 444 +++++++ + tests/basic/open-behind/tester.h | 145 +++ + tests/bugs/glusterfs/bug-873962-spb.t | 1 + + xlators/mount/fuse/src/fuse-bridge.c | 2 + + .../open-behind/src/open-behind-messages.h | 6 +- + xlators/performance/open-behind/src/open-behind.c | 1302 ++++++++------------ + 12 files changed, 1393 insertions(+), 823 deletions(-) + create mode 100644 tests/basic/open-behind/open-behind.t + create mode 100644 tests/basic/open-behind/tester-fd.c + create mode 100644 tests/basic/open-behind/tester.c + create mode 100644 tests/basic/open-behind/tester.h + +diff --git a/libglusterfs/src/fd.c b/libglusterfs/src/fd.c +index 314546a..e4ec401 100644 +--- a/libglusterfs/src/fd.c ++++ b/libglusterfs/src/fd.c +@@ -501,6 +501,32 @@ out: + } + + void ++fd_close(fd_t *fd) ++{ ++ xlator_t *xl, *old_THIS; ++ ++ old_THIS = THIS; ++ ++ for (xl = fd->inode->table->xl->graph->first; xl != NULL; xl = xl->next) { ++ if (!xl->call_cleanup) { ++ THIS = xl; ++ ++ if (IA_ISDIR(fd->inode->ia_type)) { ++ if (xl->cbks->fdclosedir != NULL) { ++ xl->cbks->fdclosedir(xl, fd); ++ } ++ } else { ++ if (xl->cbks->fdclose != NULL) { ++ xl->cbks->fdclose(xl, fd); ++ } ++ } ++ } ++ } ++ ++ THIS = old_THIS; ++} ++ ++void + fd_unref(fd_t *fd) + { + int32_t refcount = 0; +diff --git a/libglusterfs/src/glusterfs/fd.h b/libglusterfs/src/glusterfs/fd.h +index cdbe289..4d157c4 100644 +--- a/libglusterfs/src/glusterfs/fd.h ++++ b/libglusterfs/src/glusterfs/fd.h +@@ -107,6 +107,9 @@ fd_ref(fd_t *fd); + void + fd_unref(fd_t *fd); + ++void ++fd_close(fd_t *fd); ++ + fd_t * + fd_create(struct _inode *inode, pid_t pid); + +diff --git a/libglusterfs/src/glusterfs/xlator.h b/libglusterfs/src/glusterfs/xlator.h +index 8650ccc..273039a 100644 +--- a/libglusterfs/src/glusterfs/xlator.h ++++ b/libglusterfs/src/glusterfs/xlator.h +@@ -705,6 +705,8 @@ typedef size_t (*cbk_inodectx_size_t)(xlator_t *this, inode_t *inode); + + typedef size_t (*cbk_fdctx_size_t)(xlator_t *this, fd_t *fd); + ++typedef void (*cbk_fdclose_t)(xlator_t *this, fd_t *fd); ++ + struct xlator_cbks { + cbk_forget_t forget; + cbk_release_t release; +@@ -715,6 +717,8 @@ struct xlator_cbks { + cbk_ictxmerge_t ictxmerge; + cbk_inodectx_size_t ictxsize; + cbk_fdctx_size_t fdctxsize; ++ cbk_fdclose_t fdclose; ++ cbk_fdclose_t fdclosedir; + }; + + typedef int32_t (*dumpop_priv_t)(xlator_t *this); +diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym +index bc770e2..0a0862e 100644 +--- a/libglusterfs/src/libglusterfs.sym ++++ b/libglusterfs/src/libglusterfs.sym +@@ -456,6 +456,7 @@ event_unregister_close + fd_anonymous + fd_anonymous_with_flags + fd_bind ++fd_close + fd_create + fd_create_uint64 + __fd_ctx_del +diff --git a/tests/basic/open-behind/open-behind.t b/tests/basic/open-behind/open-behind.t +new file mode 100644 +index 0000000..5e865d6 +--- /dev/null ++++ b/tests/basic/open-behind/open-behind.t +@@ -0,0 +1,183 @@ ++#!/bin/bash ++ ++WD="$(dirname "${0}")" ++ ++. ${WD}/../../include.rc ++. ${WD}/../../volume.rc ++ ++function assign() { ++ local _assign_var="${1}" ++ local _assign_value="${2}" ++ ++ printf -v "${_assign_var}" "%s" "${_assign_value}" ++} ++ ++function pipe_create() { ++ local _pipe_create_var="${1}" ++ local _pipe_create_name ++ local _pipe_create_fd ++ ++ _pipe_create_name="$(mktemp -u)" ++ mkfifo "${_pipe_create_name}" ++ exec {_pipe_create_fd}<>"${_pipe_create_name}" ++ rm "${_pipe_create_name}" ++ ++ assign "${_pipe_create_var}" "${_pipe_create_fd}" ++} ++ ++function pipe_close() { ++ local _pipe_close_fd="${!1}" ++ ++ exec {_pipe_close_fd}>&- ++} ++ ++function tester_start() { ++ declare -ag tester ++ local tester_in ++ local tester_out ++ ++ pipe_create tester_in ++ pipe_create tester_out ++ ++ ${WD}/tester <&${tester_in} >&${tester_out} & ++ ++ tester=("$!" "${tester_in}" "${tester_out}") ++} ++ ++function tester_send() { ++ declare -ag tester ++ local tester_res ++ local tester_extra ++ ++ echo "${*}" >&${tester[1]} ++ ++ read -t 3 -u ${tester[2]} tester_res tester_extra ++ echo "${tester_res} ${tester_extra}" ++ if [[ "${tester_res}" == "OK" ]]; then ++ return 0 ++ fi ++ ++ return 1 ++} ++ ++function tester_stop() { ++ declare -ag tester ++ local tester_res ++ ++ tester_send "quit" ++ ++ tester_res=0 ++ if ! wait ${tester[0]}; then ++ tester_res=$? ++ fi ++ ++ unset tester ++ ++ return ${tester_res} ++} ++ ++function count_open() { ++ local file="$(realpath "${B0}/${V0}/${1}")" ++ local count="0" ++ local inode ++ local ref ++ ++ inode="$(stat -c %i "${file}")" ++ ++ for fd in /proc/${BRICK_PID}/fd/*; do ++ ref="$(readlink "${fd}")" ++ if [[ "${ref}" == "${B0}/${V0}/"* ]]; then ++ if [[ "$(stat -c %i "${ref}")" == "${inode}" ]]; then ++ count="$((${count} + 1))" ++ fi ++ fi ++ done ++ ++ echo "${count}" ++} ++ ++cleanup ++ ++TEST build_tester ${WD}/tester.c ${WD}/tester-fd.c ++ ++TEST glusterd ++TEST pidof glusterd ++TEST ${CLI} volume create ${V0} ${H0}:${B0}/${V0} ++TEST ${CLI} volume set ${V0} flush-behind off ++TEST ${CLI} volume set ${V0} write-behind off ++TEST ${CLI} volume set ${V0} quick-read off ++TEST ${CLI} volume set ${V0} stat-prefetch on ++TEST ${CLI} volume set ${V0} io-cache off ++TEST ${CLI} volume set ${V0} open-behind on ++TEST ${CLI} volume set ${V0} lazy-open off ++TEST ${CLI} volume set ${V0} read-after-open off ++TEST ${CLI} volume start ${V0} ++ ++TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0}; ++ ++BRICK_PID="$(get_brick_pid ${V0} ${H0} ${B0}/${V0})" ++ ++TEST touch "${M0}/test" ++ ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0}; ++ ++TEST tester_start ++ ++TEST tester_send fd open 0 "${M0}/test" ++EXPECT_WITHIN 5 "1" count_open "/test" ++TEST tester_send fd close 0 ++EXPECT_WITHIN 5 "0" count_open "/test" ++ ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++TEST ${CLI} volume set ${V0} lazy-open on ++TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0}; ++ ++TEST tester_send fd open 0 "${M0}/test" ++sleep 2 ++EXPECT "0" count_open "/test" ++TEST tester_send fd write 0 "test" ++EXPECT "1" count_open "/test" ++TEST tester_send fd close 0 ++EXPECT_WITHIN 5 "0" count_open "/test" ++ ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0}; ++ ++TEST tester_send fd open 0 "${M0}/test" ++EXPECT "0" count_open "/test" ++EXPECT "test" tester_send fd read 0 64 ++# Even though read-after-open is disabled, use-anonymous-fd is also disabled, ++# so reads need to open the file first. ++EXPECT "1" count_open "/test" ++TEST tester_send fd close 0 ++EXPECT "0" count_open "/test" ++ ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0}; ++ ++TEST tester_send fd open 0 "${M0}/test" ++EXPECT "0" count_open "/test" ++TEST tester_send fd open 1 "${M0}/test" ++EXPECT "2" count_open "/test" ++TEST tester_send fd close 0 ++EXPECT_WITHIN 5 "1" count_open "/test" ++TEST tester_send fd close 1 ++EXPECT_WITHIN 5 "0" count_open "/test" ++ ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++TEST ${CLI} volume set ${V0} read-after-open on ++TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0}; ++ ++TEST tester_send fd open 0 "${M0}/test" ++EXPECT "0" count_open "/test" ++EXPECT "test" tester_send fd read 0 64 ++EXPECT "1" count_open "/test" ++TEST tester_send fd close 0 ++EXPECT_WITHIN 5 "0" count_open "/test" ++ ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++ ++TEST tester_stop ++ ++cleanup +diff --git a/tests/basic/open-behind/tester-fd.c b/tests/basic/open-behind/tester-fd.c +new file mode 100644 +index 0000000..00f02bc +--- /dev/null ++++ b/tests/basic/open-behind/tester-fd.c +@@ -0,0 +1,99 @@ ++/* ++ Copyright (c) 2020 Red Hat, Inc. ++ This file is part of GlusterFS. ++ ++ This file is licensed to you under your choice of the GNU Lesser ++ General Public License, version 3 or any later version (LGPLv3 or ++ later), or the GNU General Public License, version 2 (GPLv2), in all ++ cases as published by the Free Software Foundation. ++*/ ++ ++#include "tester.h" ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++static int32_t ++fd_open(context_t *ctx, command_t *cmd) ++{ ++ obj_t *obj; ++ int32_t fd; ++ ++ obj = cmd->args[0].obj.ref; ++ ++ fd = open(cmd->args[1].str.data, O_RDWR); ++ if (fd < 0) { ++ return error(errno, "open() failed"); ++ } ++ ++ obj->type = OBJ_TYPE_FD; ++ obj->fd = fd; ++ ++ out_ok("%d", fd); ++ ++ return 0; ++} ++ ++static int32_t ++fd_close(context_t *ctx, command_t *cmd) ++{ ++ obj_t *obj; ++ ++ obj = cmd->args[0].obj.ref; ++ obj->type = OBJ_TYPE_NONE; ++ ++ if (close(obj->fd) != 0) { ++ return error(errno, "close() failed"); ++ } ++ ++ out_ok(); ++ ++ return 0; ++} ++ ++static int32_t ++fd_write(context_t *ctx, command_t *cmd) ++{ ++ ssize_t len, ret; ++ ++ len = strlen(cmd->args[1].str.data); ++ ret = write(cmd->args[0].obj.ref->fd, cmd->args[1].str.data, len); ++ if (ret < 0) { ++ return error(errno, "write() failed"); ++ } ++ ++ out_ok("%zd", ret); ++ ++ return 0; ++} ++ ++static int32_t ++fd_read(context_t *ctx, command_t *cmd) ++{ ++ char data[cmd->args[1].num.value + 1]; ++ ssize_t ret; ++ ++ ret = read(cmd->args[0].obj.ref->fd, data, cmd->args[1].num.value); ++ if (ret < 0) { ++ return error(errno, "read() failed"); ++ } ++ ++ data[ret] = 0; ++ ++ out_ok("%zd %s", ret, data); ++ ++ return 0; ++} ++ ++command_t fd_commands[] = { ++ {"open", fd_open, CMD_ARGS(ARG_VAL(OBJ_TYPE_NONE), ARG_STR(1024))}, ++ {"close", fd_close, CMD_ARGS(ARG_VAL(OBJ_TYPE_FD))}, ++ {"write", fd_write, CMD_ARGS(ARG_VAL(OBJ_TYPE_FD), ARG_STR(1024))}, ++ {"read", fd_read, CMD_ARGS(ARG_VAL(OBJ_TYPE_FD), ARG_NUM(0, 1024))}, ++ CMD_END}; +diff --git a/tests/basic/open-behind/tester.c b/tests/basic/open-behind/tester.c +new file mode 100644 +index 0000000..b2da71c +--- /dev/null ++++ b/tests/basic/open-behind/tester.c +@@ -0,0 +1,444 @@ ++/* ++ Copyright (c) 2020 Red Hat, Inc. ++ This file is part of GlusterFS. ++ ++ This file is licensed to you under your choice of the GNU Lesser ++ General Public License, version 3 or any later version (LGPLv3 or ++ later), or the GNU General Public License, version 2 (GPLv2), in all ++ cases as published by the Free Software Foundation. ++*/ ++ ++#include "tester.h" ++ ++#include ++#include ++#include ++#include ++#include ++ ++static void * ++mem_alloc(size_t size) ++{ ++ void *ptr; ++ ++ ptr = malloc(size); ++ if (ptr == NULL) { ++ error(ENOMEM, "Failed to allocate memory (%zu bytes)", size); ++ } ++ ++ return ptr; ++} ++ ++static void ++mem_free(void *ptr) ++{ ++ free(ptr); ++} ++ ++static bool ++buffer_create(context_t *ctx, size_t size) ++{ ++ ctx->buffer.base = mem_alloc(size); ++ if (ctx->buffer.base == NULL) { ++ return false; ++ } ++ ++ ctx->buffer.size = size; ++ ctx->buffer.len = 0; ++ ctx->buffer.pos = 0; ++ ++ return true; ++} ++ ++static void ++buffer_destroy(context_t *ctx) ++{ ++ mem_free(ctx->buffer.base); ++ ctx->buffer.size = 0; ++ ctx->buffer.len = 0; ++} ++ ++static int32_t ++buffer_get(context_t *ctx) ++{ ++ ssize_t len; ++ ++ if (ctx->buffer.pos >= ctx->buffer.len) { ++ len = read(0, ctx->buffer.base, ctx->buffer.size); ++ if (len < 0) { ++ return error(errno, "read() failed"); ++ } ++ if (len == 0) { ++ return 0; ++ } ++ ++ ctx->buffer.len = len; ++ ctx->buffer.pos = 0; ++ } ++ ++ return ctx->buffer.base[ctx->buffer.pos++]; ++} ++ ++static int32_t ++str_skip_spaces(context_t *ctx, int32_t current) ++{ ++ while ((current > 0) && (current != '\n') && isspace(current)) { ++ current = buffer_get(ctx); ++ } ++ ++ return current; ++} ++ ++static int32_t ++str_token(context_t *ctx, char *buffer, uint32_t size, int32_t current) ++{ ++ uint32_t len; ++ ++ current = str_skip_spaces(ctx, current); ++ ++ len = 0; ++ while ((size > 0) && (current > 0) && (current != '\n') && ++ !isspace(current)) { ++ len++; ++ *buffer++ = current; ++ size--; ++ current = buffer_get(ctx); ++ } ++ ++ if (len == 0) { ++ return error(ENODATA, "Expecting a token"); ++ } ++ ++ if (size == 0) { ++ return error(ENOBUFS, "Token too long"); ++ } ++ ++ *buffer = 0; ++ ++ return current; ++} ++ ++static int32_t ++str_number(context_t *ctx, uint64_t min, uint64_t max, uint64_t *value, ++ int32_t current) ++{ ++ char text[32], *ptr; ++ uint64_t num; ++ ++ current = str_token(ctx, text, sizeof(text), current); ++ if (current > 0) { ++ num = strtoul(text, &ptr, 0); ++ if ((*ptr != 0) || (num < min) || (num > max)) { ++ return error(ERANGE, "Invalid number"); ++ } ++ *value = num; ++ } ++ ++ return current; ++} ++ ++static int32_t ++str_eol(context_t *ctx, int32_t current) ++{ ++ current = str_skip_spaces(ctx, current); ++ if (current != '\n') { ++ return error(EINVAL, "Expecting end of command"); ++ } ++ ++ return current; ++} ++ ++static void ++str_skip(context_t *ctx, int32_t current) ++{ ++ while ((current > 0) && (current != '\n')) { ++ current = buffer_get(ctx); ++ } ++} ++ ++static int32_t ++cmd_parse_obj(context_t *ctx, arg_t *arg, int32_t current) ++{ ++ obj_t *obj; ++ uint64_t id; ++ ++ current = str_number(ctx, 0, ctx->obj_count, &id, current); ++ if (current <= 0) { ++ return current; ++ } ++ ++ obj = &ctx->objs[id]; ++ if (obj->type != arg->obj.type) { ++ if (obj->type != OBJ_TYPE_NONE) { ++ return error(EBUSY, "Object is in use"); ++ } ++ return error(ENOENT, "Object is not defined"); ++ } ++ ++ arg->obj.ref = obj; ++ ++ return current; ++} ++ ++static int32_t ++cmd_parse_num(context_t *ctx, arg_t *arg, int32_t current) ++{ ++ return str_number(ctx, arg->num.min, arg->num.max, &arg->num.value, ++ current); ++} ++ ++static int32_t ++cmd_parse_str(context_t *ctx, arg_t *arg, int32_t current) ++{ ++ return str_token(ctx, arg->str.data, arg->str.size, current); ++} ++ ++static int32_t ++cmd_parse_args(context_t *ctx, command_t *cmd, int32_t current) ++{ ++ arg_t *arg; ++ ++ for (arg = cmd->args; arg->type != ARG_TYPE_NONE; arg++) { ++ switch (arg->type) { ++ case ARG_TYPE_OBJ: ++ current = cmd_parse_obj(ctx, arg, current); ++ break; ++ case ARG_TYPE_NUM: ++ current = cmd_parse_num(ctx, arg, current); ++ break; ++ case ARG_TYPE_STR: ++ current = cmd_parse_str(ctx, arg, current); ++ break; ++ default: ++ return error(EINVAL, "Unknown argument type"); ++ } ++ } ++ ++ if (current < 0) { ++ return current; ++ } ++ ++ current = str_eol(ctx, current); ++ if (current <= 0) { ++ return error(EINVAL, "Syntax error"); ++ } ++ ++ return cmd->handler(ctx, cmd); ++} ++ ++static int32_t ++cmd_parse(context_t *ctx, command_t *cmds) ++{ ++ char text[32]; ++ command_t *cmd; ++ int32_t current; ++ ++ cmd = cmds; ++ do { ++ current = str_token(ctx, text, sizeof(text), buffer_get(ctx)); ++ if (current <= 0) { ++ return current; ++ } ++ ++ while (cmd->name != NULL) { ++ if (strcmp(cmd->name, text) == 0) { ++ if (cmd->handler != NULL) { ++ return cmd_parse_args(ctx, cmd, current); ++ } ++ cmd = cmd->cmds; ++ break; ++ } ++ cmd++; ++ } ++ } while (cmd->name != NULL); ++ ++ str_skip(ctx, current); ++ ++ return error(ENOTSUP, "Unknown command"); ++} ++ ++static void ++cmd_fini(context_t *ctx, command_t *cmds) ++{ ++ command_t *cmd; ++ arg_t *arg; ++ ++ for (cmd = cmds; cmd->name != NULL; cmd++) { ++ if (cmd->handler == NULL) { ++ cmd_fini(ctx, cmd->cmds); ++ } else { ++ for (arg = cmd->args; arg->type != ARG_TYPE_NONE; arg++) { ++ switch (arg->type) { ++ case ARG_TYPE_STR: ++ mem_free(arg->str.data); ++ arg->str.data = NULL; ++ break; ++ default: ++ break; ++ } ++ } ++ } ++ } ++} ++ ++static bool ++cmd_init(context_t *ctx, command_t *cmds) ++{ ++ command_t *cmd; ++ arg_t *arg; ++ ++ for (cmd = cmds; cmd->name != NULL; cmd++) { ++ if (cmd->handler == NULL) { ++ if (!cmd_init(ctx, cmd->cmds)) { ++ return false; ++ } ++ } else { ++ for (arg = cmd->args; arg->type != ARG_TYPE_NONE; arg++) { ++ switch (arg->type) { ++ case ARG_TYPE_STR: ++ arg->str.data = mem_alloc(arg->str.size); ++ if (arg->str.data == NULL) { ++ return false; ++ } ++ break; ++ default: ++ break; ++ } ++ } ++ } ++ } ++ ++ return true; ++} ++ ++static bool ++objs_create(context_t *ctx, uint32_t count) ++{ ++ uint32_t i; ++ ++ ctx->objs = mem_alloc(sizeof(obj_t) * count); ++ if (ctx->objs == NULL) { ++ return false; ++ } ++ ctx->obj_count = count; ++ ++ for (i = 0; i < count; i++) { ++ ctx->objs[i].type = OBJ_TYPE_NONE; ++ } ++ ++ return true; ++} ++ ++static int32_t ++objs_destroy(context_t *ctx) ++{ ++ uint32_t i; ++ int32_t err; ++ ++ err = 0; ++ for (i = 0; i < ctx->obj_count; i++) { ++ if (ctx->objs[i].type != OBJ_TYPE_NONE) { ++ err = error(ENOTEMPTY, "Objects not destroyed"); ++ break; ++ } ++ } ++ ++ mem_free(ctx->objs); ++ ctx->objs = NULL; ++ ctx->obj_count = 0; ++ ++ return err; ++} ++ ++static context_t * ++init(size_t size, uint32_t objs, command_t *cmds) ++{ ++ context_t *ctx; ++ ++ ctx = mem_alloc(sizeof(context_t)); ++ if (ctx == NULL) { ++ goto failed; ++ } ++ ++ if (!buffer_create(ctx, size)) { ++ goto failed_ctx; ++ } ++ ++ if (!objs_create(ctx, objs)) { ++ goto failed_buffer; ++ } ++ ++ if (!cmd_init(ctx, cmds)) { ++ goto failed_objs; ++ } ++ ++ ctx->active = true; ++ ++ return ctx; ++ ++failed_objs: ++ cmd_fini(ctx, cmds); ++ objs_destroy(ctx); ++failed_buffer: ++ buffer_destroy(ctx); ++failed_ctx: ++ mem_free(ctx); ++failed: ++ return NULL; ++} ++ ++static int32_t ++fini(context_t *ctx, command_t *cmds) ++{ ++ int32_t ret; ++ ++ cmd_fini(ctx, cmds); ++ buffer_destroy(ctx); ++ ++ ret = objs_destroy(ctx); ++ ++ ctx->active = false; ++ ++ return ret; ++} ++ ++static int32_t ++exec_quit(context_t *ctx, command_t *cmd) ++{ ++ ctx->active = false; ++ ++ return 0; ++} ++ ++static command_t commands[] = {{"fd", NULL, CMD_SUB(fd_commands)}, ++ {"quit", exec_quit, CMD_ARGS()}, ++ CMD_END}; ++ ++int32_t ++main(int32_t argc, char *argv[]) ++{ ++ context_t *ctx; ++ int32_t res; ++ ++ ctx = init(1024, 16, commands); ++ if (ctx == NULL) { ++ return 1; ++ } ++ ++ do { ++ res = cmd_parse(ctx, commands); ++ if (res < 0) { ++ out_err(-res); ++ } ++ } while (ctx->active); ++ ++ res = fini(ctx, commands); ++ if (res >= 0) { ++ out_ok(); ++ return 0; ++ } ++ ++ out_err(-res); ++ ++ return 1; ++} +diff --git a/tests/basic/open-behind/tester.h b/tests/basic/open-behind/tester.h +new file mode 100644 +index 0000000..64e940c +--- /dev/null ++++ b/tests/basic/open-behind/tester.h +@@ -0,0 +1,145 @@ ++/* ++ Copyright (c) 2020 Red Hat, Inc. ++ This file is part of GlusterFS. ++ ++ This file is licensed to you under your choice of the GNU Lesser ++ General Public License, version 3 or any later version (LGPLv3 or ++ later), or the GNU General Public License, version 2 (GPLv2), in all ++ cases as published by the Free Software Foundation. ++*/ ++ ++#ifndef __TESTER_H__ ++#define __TESTER_H__ ++ ++#include ++#include ++#include ++ ++enum _obj_type; ++typedef enum _obj_type obj_type_t; ++ ++enum _arg_type; ++typedef enum _arg_type arg_type_t; ++ ++struct _buffer; ++typedef struct _buffer buffer_t; ++ ++struct _obj; ++typedef struct _obj obj_t; ++ ++struct _context; ++typedef struct _context context_t; ++ ++struct _arg; ++typedef struct _arg arg_t; ++ ++struct _command; ++typedef struct _command command_t; ++ ++enum _obj_type { OBJ_TYPE_NONE, OBJ_TYPE_FD }; ++ ++enum _arg_type { ARG_TYPE_NONE, ARG_TYPE_OBJ, ARG_TYPE_NUM, ARG_TYPE_STR }; ++ ++struct _buffer { ++ char *base; ++ uint32_t size; ++ uint32_t len; ++ uint32_t pos; ++}; ++ ++struct _obj { ++ obj_type_t type; ++ union { ++ int32_t fd; ++ }; ++}; ++ ++struct _context { ++ obj_t *objs; ++ buffer_t buffer; ++ uint32_t obj_count; ++ bool active; ++}; ++ ++struct _arg { ++ arg_type_t type; ++ union { ++ struct { ++ obj_type_t type; ++ obj_t *ref; ++ } obj; ++ struct { ++ uint64_t value; ++ uint64_t min; ++ uint64_t max; ++ } num; ++ struct { ++ uint32_t size; ++ char *data; ++ } str; ++ }; ++}; ++ ++struct _command { ++ const char *name; ++ int32_t (*handler)(context_t *ctx, command_t *cmd); ++ union { ++ arg_t *args; ++ command_t *cmds; ++ }; ++}; ++ ++#define msg(_stream, _fmt, _args...) \ ++ do { \ ++ fprintf(_stream, _fmt "\n", ##_args); \ ++ fflush(_stream); \ ++ } while (0) ++ ++#define msg_out(_fmt, _args...) msg(stdout, _fmt, ##_args) ++#define msg_err(_err, _fmt, _args...) \ ++ ({ \ ++ int32_t __msg_err = (_err); \ ++ msg(stderr, "[%4u:%-15s] " _fmt, __LINE__, __FUNCTION__, __msg_err, \ ++ ##_args); \ ++ -__msg_err; \ ++ }) ++ ++#define error(_err, _fmt, _args...) msg_err(_err, "E(%4d) " _fmt, ##_args) ++#define warn(_err, _fmt, _args...) msg_err(_err, "W(%4d) " _fmt, ##_args) ++#define info(_err, _fmt, _args...) msg_err(_err, "I(%4d) " _fmt, ##_args) ++ ++#define out_ok(_args...) msg_out("OK " _args) ++#define out_err(_err) msg_out("ERR %d", _err) ++ ++#define ARG_END \ ++ { \ ++ ARG_TYPE_NONE \ ++ } ++ ++#define CMD_ARGS1(_x, _args...) \ ++ .args = (arg_t[]) { _args } ++#define CMD_ARGS(_args...) CMD_ARGS1(, ##_args, ARG_END) ++ ++#define CMD_SUB(_cmds) .cmds = _cmds ++ ++#define CMD_END \ ++ { \ ++ NULL, NULL, CMD_SUB(NULL) \ ++ } ++ ++#define ARG_VAL(_type) \ ++ { \ ++ ARG_TYPE_OBJ, .obj = {.type = _type } \ ++ } ++#define ARG_NUM(_min, _max) \ ++ { \ ++ ARG_TYPE_NUM, .num = {.min = _min, .max = _max } \ ++ } ++#define ARG_STR(_size) \ ++ { \ ++ ARG_TYPE_STR, .str = {.size = _size } \ ++ } ++ ++extern command_t fd_commands[]; ++ ++#endif /* __TESTER_H__ */ +\ No newline at end of file +diff --git a/tests/bugs/glusterfs/bug-873962-spb.t b/tests/bugs/glusterfs/bug-873962-spb.t +index db84a22..db71cc0 100644 +--- a/tests/bugs/glusterfs/bug-873962-spb.t ++++ b/tests/bugs/glusterfs/bug-873962-spb.t +@@ -14,6 +14,7 @@ TEST $CLI volume set $V0 performance.io-cache off + TEST $CLI volume set $V0 performance.write-behind off + TEST $CLI volume set $V0 performance.stat-prefetch off + TEST $CLI volume set $V0 performance.read-ahead off ++TEST $CLI volume set $V0 performance.open-behind off + TEST $CLI volume set $V0 cluster.background-self-heal-count 0 + TEST $CLI volume start $V0 + TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=$V0 $M0 --direct-io-mode=enable +diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c +index 919eea3..76b5809 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.c ++++ b/xlators/mount/fuse/src/fuse-bridge.c +@@ -3398,6 +3398,8 @@ fuse_release(xlator_t *this, fuse_in_header_t *finh, void *msg, + gf_log("glusterfs-fuse", GF_LOG_TRACE, + "finh->unique: %" PRIu64 ": RELEASE %p", finh->unique, state->fd); + ++ fd_close(state->fd); ++ + fuse_fd_ctx_destroy(this, state->fd); + fd_unref(fd); + +diff --git a/xlators/performance/open-behind/src/open-behind-messages.h b/xlators/performance/open-behind/src/open-behind-messages.h +index f250824..0e78917 100644 +--- a/xlators/performance/open-behind/src/open-behind-messages.h ++++ b/xlators/performance/open-behind/src/open-behind-messages.h +@@ -23,6 +23,10 @@ + */ + + GLFS_MSGID(OPEN_BEHIND, OPEN_BEHIND_MSG_XLATOR_CHILD_MISCONFIGURED, +- OPEN_BEHIND_MSG_VOL_MISCONFIGURED, OPEN_BEHIND_MSG_NO_MEMORY); ++ OPEN_BEHIND_MSG_VOL_MISCONFIGURED, OPEN_BEHIND_MSG_NO_MEMORY, ++ OPEN_BEHIND_MSG_FAILED, OPEN_BEHIND_MSG_BAD_STATE); ++ ++#define OPEN_BEHIND_MSG_FAILED_STR "Failed to submit fop" ++#define OPEN_BEHIND_MSG_BAD_STATE_STR "Unexpected state" + + #endif /* _OPEN_BEHIND_MESSAGES_H_ */ +diff --git a/xlators/performance/open-behind/src/open-behind.c b/xlators/performance/open-behind/src/open-behind.c +index cbe89ec..e43fe73 100644 +--- a/xlators/performance/open-behind/src/open-behind.c ++++ b/xlators/performance/open-behind/src/open-behind.c +@@ -16,6 +16,18 @@ + #include "open-behind-messages.h" + #include + ++/* Note: The initial design of open-behind was made to cover the simple case ++ * of open, read, close for small files. This pattern combined with ++ * quick-read can do the whole operation without a single request to the ++ * bricks (except the initial lookup). ++ * ++ * The way to do this has been improved, but the logic remains the same. ++ * Basically, this means that any operation sent to the fd or the inode ++ * that it's not a read, causes the open request to be sent to the ++ * bricks, and all future operations will be executed synchronously, ++ * including opens (it's reset once all fd's are closed). ++ */ ++ + typedef struct ob_conf { + gf_boolean_t use_anonymous_fd; /* use anonymous FDs wherever safe + e.g - fstat() readv() +@@ -32,1096 +44,754 @@ typedef struct ob_conf { + */ + } ob_conf_t; + +-typedef struct ob_inode { +- inode_t *inode; +- struct list_head resume_fops; +- struct list_head ob_fds; +- int count; +- int op_ret; +- int op_errno; +- gf_boolean_t open_in_progress; +- int unlinked; +-} ob_inode_t; ++/* A negative state represents an errno value negated. In this case the ++ * current operation cannot be processed. */ ++typedef enum _ob_state { ++ /* There are no opens on the inode or the first open is already ++ * completed. The current operation can be sent directly. */ ++ OB_STATE_READY = 0, + +-typedef struct ob_fd { +- call_frame_t *open_frame; +- loc_t loc; +- dict_t *xdata; +- int flags; +- int op_errno; +- ob_inode_t *ob_inode; +- fd_t *fd; +- gf_boolean_t opened; +- gf_boolean_t ob_inode_fops_waiting; +- struct list_head list; +- struct list_head ob_fds_on_inode; +-} ob_fd_t; ++ /* There's an open pending and it has been triggered. The current ++ * operation should be "stubbified" and processed with ++ * ob_stub_dispatch(). */ ++ OB_STATE_OPEN_TRIGGERED, + +-ob_inode_t * +-ob_inode_alloc(inode_t *inode) +-{ +- ob_inode_t *ob_inode = NULL; ++ /* There's an open pending but it has not been triggered. The current ++ * operation can be processed directly but using an anonymous fd. */ ++ OB_STATE_OPEN_PENDING, + +- ob_inode = GF_CALLOC(1, sizeof(*ob_inode), gf_ob_mt_inode_t); +- if (ob_inode == NULL) +- goto out; ++ /* The current operation is the first open on the inode. */ ++ OB_STATE_FIRST_OPEN ++} ob_state_t; + +- ob_inode->inode = inode; +- INIT_LIST_HEAD(&ob_inode->resume_fops); +- INIT_LIST_HEAD(&ob_inode->ob_fds); +-out: +- return ob_inode; +-} +- +-void +-ob_inode_free(ob_inode_t *ob_inode) +-{ +- if (ob_inode == NULL) +- goto out; ++typedef struct ob_inode { ++ /* List of stubs pending on the first open. Once the first open is ++ * complete, all these stubs will be resubmitted, and dependencies ++ * will be checked again. */ ++ struct list_head resume_fops; + +- list_del_init(&ob_inode->resume_fops); +- list_del_init(&ob_inode->ob_fds); ++ /* The inode this object references. */ ++ inode_t *inode; + +- GF_FREE(ob_inode); +-out: +- return; +-} ++ /* The fd from the first open sent to this inode. It will be set ++ * from the moment the open is processed until the open if fully ++ * executed or closed before actually opened. It's NULL in all ++ * other cases. */ ++ fd_t *first_fd; ++ ++ /* The stub from the first open operation. When open fop starts ++ * being processed, it's assigned the OB_OPEN_PREPARING value ++ * until the actual stub is created. This is necessary to avoid ++ * creating the stub inside a locked region. Once the stub is ++ * successfully created, it's assigned here. This value is set ++ * to NULL once the stub is resumed. */ ++ call_stub_t *first_open; ++ ++ /* The total number of currently open fd's on this inode. */ ++ int32_t open_count; ++ ++ /* This flag is set as soon as we know that the open will be ++ * sent to the bricks, even before the stub is ready. */ ++ bool triggered; ++} ob_inode_t; + +-ob_inode_t * +-ob_inode_get(xlator_t *this, inode_t *inode) ++/* Dummy pointer used temporarily while the actual open stub is being created */ ++#define OB_OPEN_PREPARING ((call_stub_t *)-1) ++ ++#define OB_POST_COMMON(_fop, _xl, _frame, _fd, _args...) \ ++ case OB_STATE_FIRST_OPEN: \ ++ gf_smsg((_xl)->name, GF_LOG_ERROR, EINVAL, OPEN_BEHIND_MSG_BAD_STATE, \ ++ "fop=%s", #_fop, "state=%d", __ob_state, NULL); \ ++ default_##_fop##_failure_cbk(_frame, EINVAL); \ ++ break; \ ++ case OB_STATE_READY: \ ++ default_##_fop(_frame, _xl, ##_args); \ ++ break; \ ++ case OB_STATE_OPEN_TRIGGERED: { \ ++ call_stub_t *__ob_stub = fop_##_fop##_stub(_frame, ob_##_fop, \ ++ ##_args); \ ++ if (__ob_stub != NULL) { \ ++ ob_stub_dispatch(_xl, __ob_inode, _fd, __ob_stub); \ ++ break; \ ++ } \ ++ __ob_state = -ENOMEM; \ ++ } \ ++ default: \ ++ gf_smsg((_xl)->name, GF_LOG_ERROR, -__ob_state, \ ++ OPEN_BEHIND_MSG_FAILED, "fop=%s", #_fop, NULL); \ ++ default_##_fop##_failure_cbk(_frame, -__ob_state) ++ ++#define OB_POST_FD(_fop, _xl, _frame, _fd, _trigger, _args...) \ ++ do { \ ++ ob_inode_t *__ob_inode; \ ++ fd_t *__first_fd; \ ++ ob_state_t __ob_state = ob_open_and_resume_fd( \ ++ _xl, _fd, 0, true, _trigger, &__ob_inode, &__first_fd); \ ++ switch (__ob_state) { \ ++ case OB_STATE_OPEN_PENDING: \ ++ if (!(_trigger)) { \ ++ fd_t *__ob_fd = fd_anonymous_with_flags((_fd)->inode, \ ++ (_fd)->flags); \ ++ if (__ob_fd != NULL) { \ ++ default_##_fop(_frame, _xl, ##_args); \ ++ fd_unref(__ob_fd); \ ++ break; \ ++ } \ ++ __ob_state = -ENOMEM; \ ++ } \ ++ OB_POST_COMMON(_fop, _xl, _frame, __first_fd, ##_args); \ ++ } \ ++ } while (0) ++ ++#define OB_POST_FLUSH(_xl, _frame, _fd, _args...) \ ++ do { \ ++ ob_inode_t *__ob_inode; \ ++ fd_t *__first_fd; \ ++ ob_state_t __ob_state = ob_open_and_resume_fd( \ ++ _xl, _fd, 0, true, false, &__ob_inode, &__first_fd); \ ++ switch (__ob_state) { \ ++ case OB_STATE_OPEN_PENDING: \ ++ default_flush_cbk(_frame, NULL, _xl, 0, 0, NULL); \ ++ break; \ ++ OB_POST_COMMON(flush, _xl, _frame, __first_fd, ##_args); \ ++ } \ ++ } while (0) ++ ++#define OB_POST_INODE(_fop, _xl, _frame, _inode, _trigger, _args...) \ ++ do { \ ++ ob_inode_t *__ob_inode; \ ++ fd_t *__first_fd; \ ++ ob_state_t __ob_state = ob_open_and_resume_inode( \ ++ _xl, _inode, NULL, 0, true, _trigger, &__ob_inode, &__first_fd); \ ++ switch (__ob_state) { \ ++ case OB_STATE_OPEN_PENDING: \ ++ OB_POST_COMMON(_fop, _xl, _frame, __first_fd, ##_args); \ ++ } \ ++ } while (0) ++ ++static ob_inode_t * ++ob_inode_get_locked(xlator_t *this, inode_t *inode) + { + ob_inode_t *ob_inode = NULL; + uint64_t value = 0; +- int ret = 0; + +- if (!inode) +- goto out; ++ if ((__inode_ctx_get(inode, this, &value) == 0) && (value != 0)) { ++ return (ob_inode_t *)(uintptr_t)value; ++ } + +- LOCK(&inode->lock); +- { +- __inode_ctx_get(inode, this, &value); +- if (value == 0) { +- ob_inode = ob_inode_alloc(inode); +- if (ob_inode == NULL) +- goto unlock; +- +- value = (uint64_t)(uintptr_t)ob_inode; +- ret = __inode_ctx_set(inode, this, &value); +- if (ret < 0) { +- ob_inode_free(ob_inode); +- ob_inode = NULL; +- } +- } else { +- ob_inode = (ob_inode_t *)(uintptr_t)value; ++ ob_inode = GF_CALLOC(1, sizeof(*ob_inode), gf_ob_mt_inode_t); ++ if (ob_inode != NULL) { ++ ob_inode->inode = inode; ++ INIT_LIST_HEAD(&ob_inode->resume_fops); ++ ++ value = (uint64_t)(uintptr_t)ob_inode; ++ if (__inode_ctx_set(inode, this, &value) < 0) { ++ GF_FREE(ob_inode); ++ ob_inode = NULL; + } + } +-unlock: +- UNLOCK(&inode->lock); + +-out: + return ob_inode; + } + +-ob_fd_t * +-__ob_fd_ctx_get(xlator_t *this, fd_t *fd) ++static ob_state_t ++ob_open_and_resume_inode(xlator_t *xl, inode_t *inode, fd_t *fd, ++ int32_t open_count, bool synchronous, bool trigger, ++ ob_inode_t **pob_inode, fd_t **pfd) + { +- uint64_t value = 0; +- int ret = -1; +- ob_fd_t *ob_fd = NULL; ++ ob_conf_t *conf; ++ ob_inode_t *ob_inode; ++ call_stub_t *open_stub; + +- ret = __fd_ctx_get(fd, this, &value); +- if (ret) +- return NULL; ++ if (inode == NULL) { ++ return OB_STATE_READY; ++ } + +- ob_fd = (void *)((long)value); ++ conf = xl->private; + +- return ob_fd; +-} ++ *pfd = NULL; + +-ob_fd_t * +-ob_fd_ctx_get(xlator_t *this, fd_t *fd) +-{ +- ob_fd_t *ob_fd = NULL; +- +- LOCK(&fd->lock); ++ LOCK(&inode->lock); + { +- ob_fd = __ob_fd_ctx_get(this, fd); +- } +- UNLOCK(&fd->lock); +- +- return ob_fd; +-} ++ ob_inode = ob_inode_get_locked(xl, inode); ++ if (ob_inode == NULL) { ++ UNLOCK(&inode->lock); + +-int +-__ob_fd_ctx_set(xlator_t *this, fd_t *fd, ob_fd_t *ob_fd) +-{ +- uint64_t value = 0; +- int ret = -1; ++ return -ENOMEM; ++ } ++ *pob_inode = ob_inode; ++ ++ ob_inode->open_count += open_count; ++ ++ /* If first_fd is not NULL, it means that there's a previous open not ++ * yet completed. */ ++ if (ob_inode->first_fd != NULL) { ++ *pfd = ob_inode->first_fd; ++ /* If the current request doesn't trigger the open and it hasn't ++ * been triggered yet, we can continue without issuing the open ++ * only if the current request belongs to the same fd as the ++ * first one. */ ++ if (!trigger && !ob_inode->triggered && ++ (ob_inode->first_fd == fd)) { ++ UNLOCK(&inode->lock); ++ ++ return OB_STATE_OPEN_PENDING; ++ } + +- value = (long)((void *)ob_fd); ++ /* We need to issue the open. It could have already been triggered ++ * before. In this case open_stub will be NULL. Or the initial open ++ * may not be completely ready yet. In this case open_stub will be ++ * OB_OPEN_PREPARING. */ ++ open_stub = ob_inode->first_open; ++ ob_inode->first_open = NULL; ++ ob_inode->triggered = true; + +- ret = __fd_ctx_set(fd, this, value); ++ UNLOCK(&inode->lock); + +- return ret; +-} ++ if ((open_stub != NULL) && (open_stub != OB_OPEN_PREPARING)) { ++ call_resume(open_stub); ++ } + +-int +-ob_fd_ctx_set(xlator_t *this, fd_t *fd, ob_fd_t *ob_fd) +-{ +- int ret = -1; ++ return OB_STATE_OPEN_TRIGGERED; ++ } + +- LOCK(&fd->lock); +- { +- ret = __ob_fd_ctx_set(this, fd, ob_fd); +- } +- UNLOCK(&fd->lock); ++ /* There's no pending open. Only opens can be non synchronous, so all ++ * regular fops will be processed directly. For non synchronous opens, ++ * we'll still process them normally (i.e. synchornous) if there are ++ * more file descriptors open. */ ++ if (synchronous || (ob_inode->open_count > open_count)) { ++ UNLOCK(&inode->lock); + +- return ret; +-} ++ return OB_STATE_READY; ++ } + +-ob_fd_t * +-ob_fd_new(void) +-{ +- ob_fd_t *ob_fd = NULL; ++ *pfd = fd; + +- ob_fd = GF_CALLOC(1, sizeof(*ob_fd), gf_ob_mt_fd_t); ++ /* This is the first open. We keep a reference on the fd and set ++ * first_open stub to OB_OPEN_PREPARING until the actual stub can ++ * be assigned (we don't create the stub here to avoid doing memory ++ * allocations inside the mutex). */ ++ ob_inode->first_fd = __fd_ref(fd); ++ ob_inode->first_open = OB_OPEN_PREPARING; + +- INIT_LIST_HEAD(&ob_fd->list); +- INIT_LIST_HEAD(&ob_fd->ob_fds_on_inode); ++ /* If lazy_open is not set, we'll need to immediately send the open, ++ * so we set triggered right now. */ ++ ob_inode->triggered = !conf->lazy_open; ++ } ++ UNLOCK(&inode->lock); + +- return ob_fd; ++ return OB_STATE_FIRST_OPEN; + } + +-void +-ob_fd_free(ob_fd_t *ob_fd) ++static ob_state_t ++ob_open_and_resume_fd(xlator_t *xl, fd_t *fd, int32_t open_count, ++ bool synchronous, bool trigger, ob_inode_t **pob_inode, ++ fd_t **pfd) + { +- LOCK(&ob_fd->fd->inode->lock); +- { +- list_del_init(&ob_fd->ob_fds_on_inode); +- } +- UNLOCK(&ob_fd->fd->inode->lock); +- +- loc_wipe(&ob_fd->loc); +- +- if (ob_fd->xdata) +- dict_unref(ob_fd->xdata); ++ uint64_t err; + +- if (ob_fd->open_frame) { +- /* If we sill have a frame it means that background open has never +- * been triggered. We need to release the pending reference. */ +- fd_unref(ob_fd->fd); +- +- STACK_DESTROY(ob_fd->open_frame->root); ++ if ((fd_ctx_get(fd, xl, &err) == 0) && (err != 0)) { ++ return (ob_state_t)-err; + } + +- GF_FREE(ob_fd); ++ return ob_open_and_resume_inode(xl, fd->inode, fd, open_count, synchronous, ++ trigger, pob_inode, pfd); + } + +-int +-ob_wake_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, +- int op_errno, fd_t *fd_ret, dict_t *xdata) ++static ob_state_t ++ob_open_behind(xlator_t *xl, fd_t *fd, int32_t flags, ob_inode_t **pob_inode, ++ fd_t **pfd) + { +- fd_t *fd = NULL; +- int count = 0; +- int ob_inode_op_ret = 0; +- int ob_inode_op_errno = 0; +- ob_fd_t *ob_fd = NULL; +- call_stub_t *stub = NULL, *tmp = NULL; +- ob_inode_t *ob_inode = NULL; +- gf_boolean_t ob_inode_fops_waiting = _gf_false; +- struct list_head fops_waiting_on_fd, fops_waiting_on_inode; ++ bool synchronous; + +- fd = frame->local; +- frame->local = NULL; +- +- INIT_LIST_HEAD(&fops_waiting_on_fd); +- INIT_LIST_HEAD(&fops_waiting_on_inode); ++ /* TODO: If O_CREAT, O_APPEND, O_WRONLY or O_DIRECT are specified, shouldn't ++ * we also execute this open synchronously ? */ ++ synchronous = (flags & O_TRUNC) != 0; + +- ob_inode = ob_inode_get(this, fd->inode); ++ return ob_open_and_resume_fd(xl, fd, 1, synchronous, true, pob_inode, pfd); ++} + +- LOCK(&fd->lock); ++static int32_t ++ob_stub_dispatch(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd, ++ call_stub_t *stub) ++{ ++ LOCK(&ob_inode->inode->lock); + { +- ob_fd = __ob_fd_ctx_get(this, fd); +- ob_fd->opened = _gf_true; +- +- ob_inode_fops_waiting = ob_fd->ob_inode_fops_waiting; +- +- list_splice_init(&ob_fd->list, &fops_waiting_on_fd); +- +- if (op_ret < 0) { +- /* mark fd BAD for ever */ +- ob_fd->op_errno = op_errno; +- ob_fd = NULL; /*shouldn't be freed*/ +- } else { +- __fd_ctx_del(fd, this, NULL); +- } +- } +- UNLOCK(&fd->lock); +- +- if (ob_inode_fops_waiting) { +- LOCK(&fd->inode->lock); +- { +- count = --ob_inode->count; +- if (op_ret < 0) { +- /* TODO: when to reset the error? */ +- ob_inode->op_ret = -1; +- ob_inode->op_errno = op_errno; +- } +- +- if (count == 0) { +- ob_inode->open_in_progress = _gf_false; +- ob_inode_op_ret = ob_inode->op_ret; +- ob_inode_op_errno = ob_inode->op_errno; +- list_splice_init(&ob_inode->resume_fops, +- &fops_waiting_on_inode); +- } ++ /* We only queue a stub if the open has not been completed or ++ * cancelled. */ ++ if (ob_inode->first_fd == fd) { ++ list_add_tail(&stub->list, &ob_inode->resume_fops); ++ stub = NULL; + } +- UNLOCK(&fd->inode->lock); +- } +- +- if (ob_fd) +- ob_fd_free(ob_fd); +- +- list_for_each_entry_safe(stub, tmp, &fops_waiting_on_fd, list) +- { +- list_del_init(&stub->list); +- +- if (op_ret < 0) +- call_unwind_error(stub, -1, op_errno); +- else +- call_resume(stub); + } ++ UNLOCK(&ob_inode->inode->lock); + +- list_for_each_entry_safe(stub, tmp, &fops_waiting_on_inode, list) +- { +- list_del_init(&stub->list); +- +- if (ob_inode_op_ret < 0) +- call_unwind_error(stub, -1, ob_inode_op_errno); +- else +- call_resume(stub); ++ if (stub != NULL) { ++ call_resume(stub); + } + +- /* The background open is completed. We can release the 'fd' reference. */ +- fd_unref(fd); +- +- STACK_DESTROY(frame->root); +- + return 0; + } + +-int +-ob_fd_wake(xlator_t *this, fd_t *fd, ob_fd_t *ob_fd) ++static int32_t ++ob_open_dispatch(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd, ++ call_stub_t *stub) + { +- call_frame_t *frame = NULL; +- +- if (ob_fd == NULL) { +- LOCK(&fd->lock); +- { +- ob_fd = __ob_fd_ctx_get(this, fd); +- if (!ob_fd) +- goto unlock; ++ bool closed; + +- frame = ob_fd->open_frame; +- ob_fd->open_frame = NULL; +- } +- unlock: +- UNLOCK(&fd->lock); +- } else { +- LOCK(&fd->lock); +- { +- frame = ob_fd->open_frame; +- ob_fd->open_frame = NULL; ++ LOCK(&ob_inode->inode->lock); ++ { ++ closed = ob_inode->first_fd != fd; ++ if (!closed) { ++ if (ob_inode->triggered) { ++ ob_inode->first_open = NULL; ++ } else { ++ ob_inode->first_open = stub; ++ stub = NULL; ++ } + } +- UNLOCK(&fd->lock); + } ++ UNLOCK(&ob_inode->inode->lock); + +- if (frame) { +- /* We don't need to take a reference here. We already have a reference +- * while the open is pending. */ +- frame->local = fd; +- +- STACK_WIND(frame, ob_wake_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->open, &ob_fd->loc, ob_fd->flags, fd, +- ob_fd->xdata); ++ if (stub != NULL) { ++ if (closed) { ++ call_stub_destroy(stub); ++ fd_unref(fd); ++ } else { ++ call_resume(stub); ++ } + } + + return 0; + } + +-void +-ob_inode_wake(xlator_t *this, struct list_head *ob_fds) ++static void ++ob_resume_pending(struct list_head *list) + { +- ob_fd_t *ob_fd = NULL, *tmp = NULL; ++ call_stub_t *stub; + +- if (!list_empty(ob_fds)) { +- list_for_each_entry_safe(ob_fd, tmp, ob_fds, ob_fds_on_inode) +- { +- ob_fd_wake(this, ob_fd->fd, ob_fd); +- ob_fd_free(ob_fd); +- } +- } +-} ++ while (!list_empty(list)) { ++ stub = list_first_entry(list, call_stub_t, list); ++ list_del_init(&stub->list); + +-/* called holding inode->lock and fd->lock */ +-void +-ob_fd_copy(ob_fd_t *src, ob_fd_t *dst) +-{ +- if (!src || !dst) +- goto out; +- +- dst->fd = src->fd; +- dst->loc.inode = inode_ref(src->loc.inode); +- gf_uuid_copy(dst->loc.gfid, src->loc.gfid); +- dst->flags = src->flags; +- dst->xdata = dict_ref(src->xdata); +- dst->ob_inode = src->ob_inode; +-out: +- return; ++ call_resume(stub); ++ } + } + +-int +-open_all_pending_fds_and_resume(xlator_t *this, inode_t *inode, +- call_stub_t *stub) ++static void ++ob_open_completed(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd, int32_t op_ret, ++ int32_t op_errno) + { +- ob_inode_t *ob_inode = NULL; +- ob_fd_t *ob_fd = NULL, *tmp = NULL; +- gf_boolean_t was_open_in_progress = _gf_false; +- gf_boolean_t wait_for_open = _gf_false; +- struct list_head ob_fds; ++ struct list_head list; + +- ob_inode = ob_inode_get(this, inode); +- if (ob_inode == NULL) +- goto out; ++ INIT_LIST_HEAD(&list); + +- INIT_LIST_HEAD(&ob_fds); ++ if (op_ret < 0) { ++ fd_ctx_set(fd, xl, op_errno <= 0 ? EIO : op_errno); ++ } + +- LOCK(&inode->lock); ++ LOCK(&ob_inode->inode->lock); + { +- was_open_in_progress = ob_inode->open_in_progress; +- ob_inode->unlinked = 1; +- +- if (was_open_in_progress) { +- list_add_tail(&stub->list, &ob_inode->resume_fops); +- goto inode_unlock; +- } +- +- list_for_each_entry(ob_fd, &ob_inode->ob_fds, ob_fds_on_inode) +- { +- LOCK(&ob_fd->fd->lock); +- { +- if (ob_fd->opened) +- goto fd_unlock; +- +- ob_inode->count++; +- ob_fd->ob_inode_fops_waiting = _gf_true; +- +- if (ob_fd->open_frame == NULL) { +- /* open in progress no need of wake */ +- } else { +- tmp = ob_fd_new(); +- tmp->open_frame = ob_fd->open_frame; +- ob_fd->open_frame = NULL; +- +- ob_fd_copy(ob_fd, tmp); +- list_add_tail(&tmp->ob_fds_on_inode, &ob_fds); +- } +- } +- fd_unlock: +- UNLOCK(&ob_fd->fd->lock); +- } +- +- if (ob_inode->count) { +- wait_for_open = ob_inode->open_in_progress = _gf_true; +- list_add_tail(&stub->list, &ob_inode->resume_fops); ++ /* Only update the fields if the file has not been closed before ++ * getting here. */ ++ if (ob_inode->first_fd == fd) { ++ list_splice_init(&ob_inode->resume_fops, &list); ++ ob_inode->first_fd = NULL; ++ ob_inode->first_open = NULL; ++ ob_inode->triggered = false; + } + } +-inode_unlock: +- UNLOCK(&inode->lock); ++ UNLOCK(&ob_inode->inode->lock); + +-out: +- if (!was_open_in_progress) { +- if (!wait_for_open) { +- call_resume(stub); +- } else { +- ob_inode_wake(this, &ob_fds); +- } +- } ++ ob_resume_pending(&list); + +- return 0; ++ fd_unref(fd); + } + +-int +-open_and_resume(xlator_t *this, fd_t *fd, call_stub_t *stub) ++static int32_t ++ob_open_cbk(call_frame_t *frame, void *cookie, xlator_t *xl, int32_t op_ret, ++ int32_t op_errno, fd_t *fd, dict_t *xdata) + { +- ob_fd_t *ob_fd = NULL; +- int op_errno = 0; +- +- if (!fd) +- goto nofd; +- +- LOCK(&fd->lock); +- { +- ob_fd = __ob_fd_ctx_get(this, fd); +- if (!ob_fd) +- goto unlock; ++ ob_inode_t *ob_inode; + +- if (ob_fd->op_errno) { +- op_errno = ob_fd->op_errno; +- goto unlock; +- } ++ ob_inode = frame->local; ++ frame->local = NULL; + +- list_add_tail(&stub->list, &ob_fd->list); +- } +-unlock: +- UNLOCK(&fd->lock); ++ ob_open_completed(xl, ob_inode, cookie, op_ret, op_errno); + +-nofd: +- if (op_errno) +- call_unwind_error(stub, -1, op_errno); +- else if (ob_fd) +- ob_fd_wake(this, fd, NULL); +- else +- call_resume(stub); ++ STACK_DESTROY(frame->root); + + return 0; + } + +-int +-ob_open_behind(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, ++static int32_t ++ob_open_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + fd_t *fd, dict_t *xdata) + { +- ob_fd_t *ob_fd = NULL; +- int ret = -1; +- ob_conf_t *conf = NULL; +- ob_inode_t *ob_inode = NULL; +- gf_boolean_t open_in_progress = _gf_false; +- int unlinked = 0; +- +- conf = this->private; +- +- if (flags & O_TRUNC) { +- STACK_WIND(frame, default_open_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata); +- return 0; +- } +- +- ob_inode = ob_inode_get(this, fd->inode); +- +- ob_fd = ob_fd_new(); +- if (!ob_fd) +- goto enomem; +- +- ob_fd->ob_inode = ob_inode; +- +- ob_fd->fd = fd; +- +- ob_fd->open_frame = copy_frame(frame); +- if (!ob_fd->open_frame) +- goto enomem; +- ret = loc_copy(&ob_fd->loc, loc); +- if (ret) +- goto enomem; +- +- ob_fd->flags = flags; +- if (xdata) +- ob_fd->xdata = dict_ref(xdata); +- +- LOCK(&fd->inode->lock); +- { +- open_in_progress = ob_inode->open_in_progress; +- unlinked = ob_inode->unlinked; +- if (!open_in_progress && !unlinked) { +- ret = ob_fd_ctx_set(this, fd, ob_fd); +- if (ret) { +- UNLOCK(&fd->inode->lock); +- goto enomem; +- } +- +- list_add(&ob_fd->ob_fds_on_inode, &ob_inode->ob_fds); +- } +- } +- UNLOCK(&fd->inode->lock); +- +- /* We take a reference while the background open is pending or being +- * processed. If we finally wind the request in the foreground, then +- * ob_fd_free() will take care of this additional reference. */ +- fd_ref(fd); +- +- if (!open_in_progress && !unlinked) { +- STACK_UNWIND_STRICT(open, frame, 0, 0, fd, xdata); +- +- if (!conf->lazy_open) +- ob_fd_wake(this, fd, NULL); +- } else { +- ob_fd_free(ob_fd); +- STACK_WIND(frame, default_open_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata); +- } ++ STACK_WIND_COOKIE(frame, ob_open_cbk, fd, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata); + + return 0; +-enomem: +- if (ob_fd) { +- if (ob_fd->open_frame) +- STACK_DESTROY(ob_fd->open_frame->root); +- +- loc_wipe(&ob_fd->loc); +- if (ob_fd->xdata) +- dict_unref(ob_fd->xdata); +- +- GF_FREE(ob_fd); +- } +- +- return -1; + } + +-int ++static int32_t + ob_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, fd_t *fd, + dict_t *xdata) + { +- fd_t *old_fd = NULL; +- int ret = -1; +- int op_errno = ENOMEM; +- call_stub_t *stub = NULL; +- +- old_fd = fd_lookup(fd->inode, 0); +- if (old_fd) { +- /* open-behind only when this is the first FD */ +- stub = fop_open_stub(frame, default_open_resume, loc, flags, fd, xdata); +- if (!stub) { +- fd_unref(old_fd); +- goto err; +- } +- +- open_and_resume(this, old_fd, stub); ++ ob_inode_t *ob_inode; ++ call_frame_t *open_frame; ++ call_stub_t *stub; ++ fd_t *first_fd; ++ ob_state_t state; ++ ++ state = ob_open_behind(this, fd, flags, &ob_inode, &first_fd); ++ if (state == OB_STATE_READY) { ++ /* There's no pending open, but there are other file descriptors opened ++ * or the current flags require a synchronous open. */ ++ return default_open(frame, this, loc, flags, fd, xdata); ++ } + +- fd_unref(old_fd); ++ if (state == OB_STATE_OPEN_TRIGGERED) { ++ /* The first open is in progress (either because it was already issued ++ * or because this request triggered it). We try to create a new stub ++ * to retry the operation once the initial open completes. */ ++ stub = fop_open_stub(frame, ob_open, loc, flags, fd, xdata); ++ if (stub != NULL) { ++ return ob_stub_dispatch(this, ob_inode, first_fd, stub); ++ } + +- return 0; ++ state = -ENOMEM; + } + +- ret = ob_open_behind(frame, this, loc, flags, fd, xdata); +- if (ret) { +- goto err; +- } ++ if (state == OB_STATE_FIRST_OPEN) { ++ /* We try to create a stub for the new open. A new frame needs to be ++ * used because the current one may be destroyed soon after sending ++ * the open's reply. */ ++ open_frame = copy_frame(frame); ++ if (open_frame != NULL) { ++ stub = fop_open_stub(open_frame, ob_open_resume, loc, flags, fd, ++ xdata); ++ if (stub != NULL) { ++ open_frame->local = ob_inode; + +- return 0; +-err: +- gf_msg(this->name, GF_LOG_ERROR, op_errno, OPEN_BEHIND_MSG_NO_MEMORY, "%s", +- loc->path); ++ /* TODO: Previous version passed xdata back to the caller, but ++ * probably this doesn't make sense since it won't contain ++ * any requested data. I think it would be better to pass ++ * NULL for xdata. */ ++ default_open_cbk(frame, NULL, this, 0, 0, fd, xdata); + +- STACK_UNWIND_STRICT(open, frame, -1, op_errno, 0, 0); ++ return ob_open_dispatch(this, ob_inode, first_fd, stub); ++ } + +- return 0; +-} ++ STACK_DESTROY(open_frame->root); ++ } + +-fd_t * +-ob_get_wind_fd(xlator_t *this, fd_t *fd, uint32_t *flag) +-{ +- fd_t *wind_fd = NULL; +- ob_fd_t *ob_fd = NULL; +- ob_conf_t *conf = NULL; ++ /* In case of error, simulate a regular completion but with an error ++ * code. */ ++ ob_open_completed(this, ob_inode, first_fd, -1, ENOMEM); + +- conf = this->private; ++ state = -ENOMEM; ++ } + +- ob_fd = ob_fd_ctx_get(this, fd); ++ /* In case of failure we need to decrement the number of open files because ++ * ob_fdclose() won't be called. */ + +- if (ob_fd && ob_fd->open_frame && conf->use_anonymous_fd) { +- wind_fd = fd_anonymous(fd->inode); +- if ((ob_fd->flags & O_DIRECT) && (flag)) +- *flag = *flag | O_DIRECT; +- } else { +- wind_fd = fd_ref(fd); ++ LOCK(&fd->inode->lock); ++ { ++ ob_inode->open_count--; + } ++ UNLOCK(&fd->inode->lock); + +- return wind_fd; ++ gf_smsg(this->name, GF_LOG_ERROR, -state, OPEN_BEHIND_MSG_FAILED, "fop=%s", ++ "open", "path=%s", loc->path, NULL); ++ ++ return default_open_failure_cbk(frame, -state); + } + +-int ++static int32_t + ob_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, uint32_t flags, dict_t *xdata) + { +- call_stub_t *stub = NULL; +- fd_t *wind_fd = NULL; +- ob_conf_t *conf = NULL; ++ ob_conf_t *conf = this->private; ++ bool trigger = conf->read_after_open || !conf->use_anonymous_fd; + +- conf = this->private; +- +- if (!conf->read_after_open) +- wind_fd = ob_get_wind_fd(this, fd, &flags); +- else +- wind_fd = fd_ref(fd); +- +- stub = fop_readv_stub(frame, default_readv_resume, wind_fd, size, offset, +- flags, xdata); +- fd_unref(wind_fd); +- +- if (!stub) +- goto err; +- +- open_and_resume(this, wind_fd, stub); +- +- return 0; +-err: +- STACK_UNWIND_STRICT(readv, frame, -1, ENOMEM, 0, 0, 0, 0, 0); ++ OB_POST_FD(readv, this, frame, fd, trigger, fd, size, offset, flags, xdata); + + return 0; + } + +-int ++static int32_t + ob_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *iov, + int count, off_t offset, uint32_t flags, struct iobref *iobref, + dict_t *xdata) + { +- call_stub_t *stub = NULL; +- +- stub = fop_writev_stub(frame, default_writev_resume, fd, iov, count, offset, +- flags, iobref, xdata); +- if (!stub) +- goto err; +- +- open_and_resume(this, fd, stub); +- +- return 0; +-err: +- STACK_UNWIND_STRICT(writev, frame, -1, ENOMEM, 0, 0, 0); ++ OB_POST_FD(writev, this, frame, fd, true, fd, iov, count, offset, flags, ++ iobref, xdata); + + return 0; + } + +-int ++static int32_t + ob_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) + { +- call_stub_t *stub = NULL; +- fd_t *wind_fd = NULL; +- +- wind_fd = ob_get_wind_fd(this, fd, NULL); +- +- stub = fop_fstat_stub(frame, default_fstat_resume, wind_fd, xdata); ++ ob_conf_t *conf = this->private; ++ bool trigger = !conf->use_anonymous_fd; + +- fd_unref(wind_fd); +- +- if (!stub) +- goto err; +- +- open_and_resume(this, wind_fd, stub); +- +- return 0; +-err: +- STACK_UNWIND_STRICT(fstat, frame, -1, ENOMEM, 0, 0); ++ OB_POST_FD(fstat, this, frame, fd, trigger, fd, xdata); + + return 0; + } + +-int ++static int32_t + ob_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + gf_seek_what_t what, dict_t *xdata) + { +- call_stub_t *stub = NULL; +- fd_t *wind_fd = NULL; +- +- wind_fd = ob_get_wind_fd(this, fd, NULL); ++ ob_conf_t *conf = this->private; ++ bool trigger = !conf->use_anonymous_fd; + +- stub = fop_seek_stub(frame, default_seek_resume, wind_fd, offset, what, +- xdata); +- +- fd_unref(wind_fd); +- +- if (!stub) +- goto err; +- +- open_and_resume(this, wind_fd, stub); +- +- return 0; +-err: +- STACK_UNWIND_STRICT(fstat, frame, -1, ENOMEM, 0, 0); ++ OB_POST_FD(seek, this, frame, fd, trigger, fd, offset, what, xdata); + + return 0; + } + +-int ++static int32_t + ob_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) + { +- call_stub_t *stub = NULL; +- ob_fd_t *ob_fd = NULL; +- gf_boolean_t unwind = _gf_false; +- +- LOCK(&fd->lock); +- { +- ob_fd = __ob_fd_ctx_get(this, fd); +- if (ob_fd && ob_fd->open_frame) +- /* if open() was never wound to backend, +- no need to wind flush() either. +- */ +- unwind = _gf_true; +- } +- UNLOCK(&fd->lock); +- +- if (unwind) +- goto unwind; +- +- stub = fop_flush_stub(frame, default_flush_resume, fd, xdata); +- if (!stub) +- goto err; +- +- open_and_resume(this, fd, stub); +- +- return 0; +-err: +- STACK_UNWIND_STRICT(flush, frame, -1, ENOMEM, 0); +- +- return 0; +- +-unwind: +- STACK_UNWIND_STRICT(flush, frame, 0, 0, 0); ++ OB_POST_FLUSH(this, frame, fd, fd, xdata); + + return 0; + } + +-int ++static int32_t + ob_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int flag, dict_t *xdata) + { +- call_stub_t *stub = NULL; +- +- stub = fop_fsync_stub(frame, default_fsync_resume, fd, flag, xdata); +- if (!stub) +- goto err; +- +- open_and_resume(this, fd, stub); +- +- return 0; +-err: +- STACK_UNWIND_STRICT(fsync, frame, -1, ENOMEM, 0, 0, 0); ++ OB_POST_FD(fsync, this, frame, fd, true, fd, flag, xdata); + + return 0; + } + +-int ++static int32_t + ob_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int cmd, + struct gf_flock *flock, dict_t *xdata) + { +- call_stub_t *stub = NULL; +- +- stub = fop_lk_stub(frame, default_lk_resume, fd, cmd, flock, xdata); +- if (!stub) +- goto err; +- +- open_and_resume(this, fd, stub); +- +- return 0; +-err: +- STACK_UNWIND_STRICT(lk, frame, -1, ENOMEM, 0, 0); ++ OB_POST_FD(lk, this, frame, fd, true, fd, cmd, flock, xdata); + + return 0; + } + +-int ++static int32_t + ob_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + dict_t *xdata) + { +- call_stub_t *stub = NULL; +- +- stub = fop_ftruncate_stub(frame, default_ftruncate_resume, fd, offset, +- xdata); +- if (!stub) +- goto err; +- +- open_and_resume(this, fd, stub); +- +- return 0; +-err: +- STACK_UNWIND_STRICT(ftruncate, frame, -1, ENOMEM, 0, 0, 0); ++ OB_POST_FD(ftruncate, this, frame, fd, true, fd, offset, xdata); + + return 0; + } + +-int ++static int32_t + ob_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xattr, + int flags, dict_t *xdata) + { +- call_stub_t *stub = NULL; +- +- stub = fop_fsetxattr_stub(frame, default_fsetxattr_resume, fd, xattr, flags, +- xdata); +- if (!stub) +- goto err; +- +- open_and_resume(this, fd, stub); +- +- return 0; +-err: +- STACK_UNWIND_STRICT(fsetxattr, frame, -1, ENOMEM, 0); ++ OB_POST_FD(fsetxattr, this, frame, fd, true, fd, xattr, flags, xdata); + + return 0; + } + +-int ++static int32_t + ob_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, + dict_t *xdata) + { +- call_stub_t *stub = NULL; +- +- stub = fop_fgetxattr_stub(frame, default_fgetxattr_resume, fd, name, xdata); +- if (!stub) +- goto err; +- +- open_and_resume(this, fd, stub); +- +- return 0; +-err: +- STACK_UNWIND_STRICT(fgetxattr, frame, -1, ENOMEM, 0, 0); ++ OB_POST_FD(fgetxattr, this, frame, fd, true, fd, name, xdata); + + return 0; + } + +-int ++static int32_t + ob_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, + dict_t *xdata) + { +- call_stub_t *stub = NULL; +- +- stub = fop_fremovexattr_stub(frame, default_fremovexattr_resume, fd, name, +- xdata); +- if (!stub) +- goto err; +- +- open_and_resume(this, fd, stub); +- +- return 0; +-err: +- STACK_UNWIND_STRICT(fremovexattr, frame, -1, ENOMEM, 0); ++ OB_POST_FD(fremovexattr, this, frame, fd, true, fd, name, xdata); + + return 0; + } + +-int ++static int32_t + ob_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, + int cmd, struct gf_flock *flock, dict_t *xdata) + { +- call_stub_t *stub = fop_finodelk_stub(frame, default_finodelk_resume, +- volume, fd, cmd, flock, xdata); +- if (stub) +- open_and_resume(this, fd, stub); +- else +- STACK_UNWIND_STRICT(finodelk, frame, -1, ENOMEM, 0); ++ OB_POST_FD(finodelk, this, frame, fd, true, volume, fd, cmd, flock, xdata); + + return 0; + } + +-int ++static int32_t + ob_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, + const char *basename, entrylk_cmd cmd, entrylk_type type, + dict_t *xdata) + { +- call_stub_t *stub = fop_fentrylk_stub( +- frame, default_fentrylk_resume, volume, fd, basename, cmd, type, xdata); +- if (stub) +- open_and_resume(this, fd, stub); +- else +- STACK_UNWIND_STRICT(fentrylk, frame, -1, ENOMEM, 0); ++ OB_POST_FD(fentrylk, this, frame, fd, true, volume, fd, basename, cmd, type, ++ xdata); + + return 0; + } + +-int ++static int32_t + ob_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd, + gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) + { +- call_stub_t *stub = fop_fxattrop_stub(frame, default_fxattrop_resume, fd, +- optype, xattr, xdata); +- if (stub) +- open_and_resume(this, fd, stub); +- else +- STACK_UNWIND_STRICT(fxattrop, frame, -1, ENOMEM, 0, 0); ++ OB_POST_FD(fxattrop, this, frame, fd, true, fd, optype, xattr, xdata); + + return 0; + } + +-int ++static int32_t + ob_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *iatt, + int valid, dict_t *xdata) + { +- call_stub_t *stub = NULL; +- +- stub = fop_fsetattr_stub(frame, default_fsetattr_resume, fd, iatt, valid, +- xdata); +- if (!stub) +- goto err; +- +- open_and_resume(this, fd, stub); +- +- return 0; +-err: +- STACK_UNWIND_STRICT(fsetattr, frame, -1, ENOMEM, 0, 0, 0); ++ OB_POST_FD(fsetattr, this, frame, fd, true, fd, iatt, valid, xdata); + + return 0; + } + +-int ++static int32_t + ob_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode, + off_t offset, size_t len, dict_t *xdata) + { +- call_stub_t *stub; +- +- stub = fop_fallocate_stub(frame, default_fallocate_resume, fd, mode, offset, +- len, xdata); +- if (!stub) +- goto err; +- +- open_and_resume(this, fd, stub); ++ OB_POST_FD(fallocate, this, frame, fd, true, fd, mode, offset, len, xdata); + + return 0; +-err: +- STACK_UNWIND_STRICT(fallocate, frame, -1, ENOMEM, NULL, NULL, NULL); +- return 0; + } + +-int ++static int32_t + ob_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) + { +- call_stub_t *stub; +- +- stub = fop_discard_stub(frame, default_discard_resume, fd, offset, len, +- xdata); +- if (!stub) +- goto err; +- +- open_and_resume(this, fd, stub); ++ OB_POST_FD(discard, this, frame, fd, true, fd, offset, len, xdata); + + return 0; +-err: +- STACK_UNWIND_STRICT(discard, frame, -1, ENOMEM, NULL, NULL, NULL); +- return 0; + } + +-int ++static int32_t + ob_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + off_t len, dict_t *xdata) + { +- call_stub_t *stub; +- +- stub = fop_zerofill_stub(frame, default_zerofill_resume, fd, offset, len, +- xdata); +- if (!stub) +- goto err; ++ OB_POST_FD(zerofill, this, frame, fd, true, fd, offset, len, xdata); + +- open_and_resume(this, fd, stub); +- +- return 0; +-err: +- STACK_UNWIND_STRICT(zerofill, frame, -1, ENOMEM, NULL, NULL, NULL); + return 0; + } + +-int ++static int32_t + ob_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags, + dict_t *xdata) + { +- call_stub_t *stub = NULL; +- +- stub = fop_unlink_stub(frame, default_unlink_resume, loc, xflags, xdata); +- if (!stub) +- goto err; +- +- open_all_pending_fds_and_resume(this, loc->inode, stub); +- +- return 0; +-err: +- STACK_UNWIND_STRICT(unlink, frame, -1, ENOMEM, 0, 0, 0); ++ OB_POST_INODE(unlink, this, frame, loc->inode, true, loc, xflags, xdata); + + return 0; + } + +-int ++static int32_t + ob_rename(call_frame_t *frame, xlator_t *this, loc_t *src, loc_t *dst, + dict_t *xdata) + { +- call_stub_t *stub = NULL; +- +- stub = fop_rename_stub(frame, default_rename_resume, src, dst, xdata); +- if (!stub) +- goto err; +- +- open_all_pending_fds_and_resume(this, dst->inode, stub); +- +- return 0; +-err: +- STACK_UNWIND_STRICT(rename, frame, -1, ENOMEM, 0, 0, 0, 0, 0, 0); ++ OB_POST_INODE(rename, this, frame, dst->inode, true, src, dst, xdata); + + return 0; + } + +-int32_t ++static int32_t + ob_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf, + int32_t valid, dict_t *xdata) + { +- call_stub_t *stub = NULL; +- +- stub = fop_setattr_stub(frame, default_setattr_resume, loc, stbuf, valid, +- xdata); +- if (!stub) +- goto err; ++ OB_POST_INODE(setattr, this, frame, loc->inode, true, loc, stbuf, valid, ++ xdata); + +- open_all_pending_fds_and_resume(this, loc->inode, stub); +- +- return 0; +-err: +- STACK_UNWIND_STRICT(setattr, frame, -1, ENOMEM, NULL, NULL, NULL); + return 0; + } + +-int32_t ++static int32_t + ob_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + int32_t flags, dict_t *xdata) + { +- call_stub_t *stub = NULL; +- gf_boolean_t access_xattr = _gf_false; +- + if (dict_get(dict, POSIX_ACL_DEFAULT_XATTR) || + dict_get(dict, POSIX_ACL_ACCESS_XATTR) || +- dict_get(dict, GF_SELINUX_XATTR_KEY)) +- access_xattr = _gf_true; +- +- if (!access_xattr) ++ dict_get(dict, GF_SELINUX_XATTR_KEY)) { + return default_setxattr(frame, this, loc, dict, flags, xdata); ++ } + +- stub = fop_setxattr_stub(frame, default_setxattr_resume, loc, dict, flags, +- xdata); +- if (!stub) +- goto err; +- +- open_all_pending_fds_and_resume(this, loc->inode, stub); ++ OB_POST_INODE(setxattr, this, frame, loc->inode, true, loc, dict, flags, ++ xdata); + + return 0; +-err: +- STACK_UNWIND_STRICT(setxattr, frame, -1, ENOMEM, NULL); +- return 0; + } + +-int +-ob_release(xlator_t *this, fd_t *fd) ++static void ++ob_fdclose(xlator_t *this, fd_t *fd) + { +- ob_fd_t *ob_fd = NULL; ++ struct list_head list; ++ ob_inode_t *ob_inode; ++ call_stub_t *stub; ++ ++ INIT_LIST_HEAD(&list); ++ stub = NULL; + +- ob_fd = ob_fd_ctx_get(this, fd); ++ LOCK(&fd->inode->lock); ++ { ++ ob_inode = ob_inode_get_locked(this, fd->inode); ++ if (ob_inode != NULL) { ++ ob_inode->open_count--; ++ ++ /* If this fd is the same as ob_inode->first_fd, it means that ++ * the initial open has not fully completed. We'll try to cancel ++ * it. */ ++ if (ob_inode->first_fd == fd) { ++ if (ob_inode->first_open == OB_OPEN_PREPARING) { ++ /* In this case ob_open_dispatch() has not been called yet. ++ * We clear first_fd and first_open to allow that function ++ * to know that the open is not really needed. This also ++ * allows other requests to work as expected if they ++ * arrive before the dispatch function is called. If there ++ * are pending fops, we can directly process them here. ++ * (note that there shouldn't be any fd related fops, but ++ * if there are, it's fine if they fail). */ ++ ob_inode->first_fd = NULL; ++ ob_inode->first_open = NULL; ++ ob_inode->triggered = false; ++ list_splice_init(&ob_inode->resume_fops, &list); ++ } else if (!ob_inode->triggered) { ++ /* If the open has already been dispatched, we can only ++ * cancel it if it has not been triggered. Otherwise we ++ * simply wait until it completes. While it's not triggered, ++ * first_open must be a valid stub and there can't be any ++ * pending fops. */ ++ GF_ASSERT((ob_inode->first_open != NULL) && ++ list_empty(&ob_inode->resume_fops)); ++ ++ ob_inode->first_fd = NULL; ++ stub = ob_inode->first_open; ++ ob_inode->first_open = NULL; ++ } ++ } ++ } ++ } ++ UNLOCK(&fd->inode->lock); + +- ob_fd_free(ob_fd); ++ if (stub != NULL) { ++ call_stub_destroy(stub); ++ fd_unref(fd); ++ } + +- return 0; ++ ob_resume_pending(&list); + } + + int + ob_forget(xlator_t *this, inode_t *inode) + { +- ob_inode_t *ob_inode = NULL; ++ ob_inode_t *ob_inode; + uint64_t value = 0; + +- inode_ctx_del(inode, this, &value); +- +- if (value) { ++ if ((inode_ctx_del(inode, this, &value) == 0) && (value != 0)) { + ob_inode = (ob_inode_t *)(uintptr_t)value; +- ob_inode_free(ob_inode); ++ GF_FREE(ob_inode); + } + + return 0; +@@ -1153,20 +823,18 @@ ob_priv_dump(xlator_t *this) + int + ob_fdctx_dump(xlator_t *this, fd_t *fd) + { +- ob_fd_t *ob_fd = NULL; + char key_prefix[GF_DUMP_MAX_BUF_LEN] = { + 0, + }; +- int ret = 0; ++ uint64_t value = 0; ++ int ret = 0, error = 0; + + ret = TRY_LOCK(&fd->lock); + if (ret) + return 0; + +- ob_fd = __ob_fd_ctx_get(this, fd); +- if (!ob_fd) { +- UNLOCK(&fd->lock); +- return 0; ++ if ((__fd_ctx_get(fd, this, &value) == 0) && (value != 0)) { ++ error = (int32_t)value; + } + + gf_proc_dump_build_key(key_prefix, "xlator.performance.open-behind", +@@ -1175,17 +843,7 @@ ob_fdctx_dump(xlator_t *this, fd_t *fd) + + gf_proc_dump_write("fd", "%p", fd); + +- gf_proc_dump_write("open_frame", "%p", ob_fd->open_frame); +- +- if (ob_fd->open_frame) +- gf_proc_dump_write("open_frame.root.unique", "%" PRIu64, +- ob_fd->open_frame->root->unique); +- +- gf_proc_dump_write("loc.path", "%s", ob_fd->loc.path); +- +- gf_proc_dump_write("loc.ino", "%s", uuid_utoa(ob_fd->loc.gfid)); +- +- gf_proc_dump_write("flags", "%d", ob_fd->flags); ++ gf_proc_dump_write("error", "%d", error); + + UNLOCK(&fd->lock); + +@@ -1307,7 +965,7 @@ struct xlator_fops fops = { + }; + + struct xlator_cbks cbks = { +- .release = ob_release, ++ .fdclose = ob_fdclose, + .forget = ob_forget, + }; + +-- +1.8.3.1 + diff --git a/SOURCES/0524-open-behind-fix-call_frame-leak.patch b/SOURCES/0524-open-behind-fix-call_frame-leak.patch new file mode 100644 index 0000000..75a243d --- /dev/null +++ b/SOURCES/0524-open-behind-fix-call_frame-leak.patch @@ -0,0 +1,70 @@ +From 36dddf59a02d91d3db5b124be626ab6bc235ed5a Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez +Date: Wed, 19 Aug 2020 23:27:38 +0200 +Subject: [PATCH 524/526] open-behind: fix call_frame leak + +When an open was delayed, a copy of the frame was created because the +current frame was used to unwind the "fake" open. When the open was +actually sent, the frame was correctly destroyed. However if the file +was closed before needing to send the open, the frame was not destroyed. + +This patch correctly destroys the frame in all cases. + +Upstream patch: +> Upstream-patch-link: https://review.gluster.org/#/c/glusterfs/+/24892 +> Change-Id: I8c00fc7f15545c240e8151305d9e4cf06d653926 +> Signed-off-by: Xavi Hernandez +> Fixes: #1440 + +BUG: 1830713 +Change-Id: I8c00fc7f15545c240e8151305d9e4cf06d653926 +Signed-off-by: Xavi Hernandez +Reviewed-on: https://code.engineering.redhat.com/gerrit/224488 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/performance/open-behind/src/open-behind.c | 14 ++++++++++---- + 1 file changed, 10 insertions(+), 4 deletions(-) + +diff --git a/xlators/performance/open-behind/src/open-behind.c b/xlators/performance/open-behind/src/open-behind.c +index e43fe73..1ab635e 100644 +--- a/xlators/performance/open-behind/src/open-behind.c ++++ b/xlators/performance/open-behind/src/open-behind.c +@@ -333,6 +333,14 @@ ob_stub_dispatch(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd, + return 0; + } + ++static void ++ob_open_destroy(call_stub_t *stub, fd_t *fd) ++{ ++ STACK_DESTROY(stub->frame->root); ++ call_stub_destroy(stub); ++ fd_unref(fd); ++} ++ + static int32_t + ob_open_dispatch(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd, + call_stub_t *stub) +@@ -355,8 +363,7 @@ ob_open_dispatch(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd, + + if (stub != NULL) { + if (closed) { +- call_stub_destroy(stub); +- fd_unref(fd); ++ ob_open_destroy(stub, fd); + } else { + call_resume(stub); + } +@@ -776,8 +783,7 @@ ob_fdclose(xlator_t *this, fd_t *fd) + UNLOCK(&fd->inode->lock); + + if (stub != NULL) { +- call_stub_destroy(stub); +- fd_unref(fd); ++ ob_open_destroy(stub, fd); + } + + ob_resume_pending(&list); +-- +1.8.3.1 + diff --git a/SOURCES/0525-open-behind-implement-create-fop.patch b/SOURCES/0525-open-behind-implement-create-fop.patch new file mode 100644 index 0000000..c7a5329 --- /dev/null +++ b/SOURCES/0525-open-behind-implement-create-fop.patch @@ -0,0 +1,109 @@ +From 41aae052b5e3afe64d3e0668643726bab0e77265 Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez +Date: Fri, 4 Sep 2020 14:49:50 +0200 +Subject: [PATCH 525/526] open-behind: implement create fop + +Open behind didn't implement create fop. This caused that files created +were not accounted for the number of open fd's. This could cause future +opens to be delayed when they shouldn't. + +This patch implements the create fop. It also fixes a problem when +destroying the stack: when frame->local was not NULL, STACK_DESTROY() +tried to mem_put() it, which is not correct. + +Upstream patch: +> Upstream-patch-link: https://review.gluster.org/#/c/glusterfs/+/24953 +> Fixes: #1440 +> Change-Id: Ic982bad07d4af30b915d7eb1fbcef7a847a45869 +> Signed-off-by: Xavi Hernandez + +BUG: 1830713 +Change-Id: Ic982bad07d4af30b915d7eb1fbcef7a847a45869 +Signed-off-by: Xavi Hernandez +Reviewed-on: https://code.engineering.redhat.com/gerrit/224489 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/performance/open-behind/src/open-behind.c | 52 +++++++++++++++++++++++ + 1 file changed, 52 insertions(+) + +diff --git a/xlators/performance/open-behind/src/open-behind.c b/xlators/performance/open-behind/src/open-behind.c +index 1ab635e..600c3b6 100644 +--- a/xlators/performance/open-behind/src/open-behind.c ++++ b/xlators/performance/open-behind/src/open-behind.c +@@ -336,6 +336,7 @@ ob_stub_dispatch(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd, + static void + ob_open_destroy(call_stub_t *stub, fd_t *fd) + { ++ stub->frame->local = NULL; + STACK_DESTROY(stub->frame->root); + call_stub_destroy(stub); + fd_unref(fd); +@@ -516,6 +517,56 @@ ob_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, fd_t *fd, + } + + static int32_t ++ob_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, ++ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) ++{ ++ ob_inode_t *ob_inode; ++ call_stub_t *stub; ++ fd_t *first_fd; ++ ob_state_t state; ++ ++ /* Create requests are never delayed. We always send them synchronously. */ ++ state = ob_open_and_resume_fd(this, fd, 1, true, true, &ob_inode, ++ &first_fd); ++ if (state == OB_STATE_READY) { ++ /* There's no pending open, but there are other file descriptors opened ++ * so we simply forward the request synchronously. */ ++ return default_create(frame, this, loc, flags, mode, umask, fd, xdata); ++ } ++ ++ if (state == OB_STATE_OPEN_TRIGGERED) { ++ /* The first open is in progress (either because it was already issued ++ * or because this request triggered it). We try to create a new stub ++ * to retry the operation once the initial open completes. */ ++ stub = fop_create_stub(frame, ob_create, loc, flags, mode, umask, fd, ++ xdata); ++ if (stub != NULL) { ++ return ob_stub_dispatch(this, ob_inode, first_fd, stub); ++ } ++ ++ state = -ENOMEM; ++ } ++ ++ /* Since we forced a synchronous request, OB_STATE_FIRST_OPEN will never ++ * be returned by ob_open_and_resume_fd(). If we are here it can only be ++ * because there has been a problem. */ ++ ++ /* In case of failure we need to decrement the number of open files because ++ * ob_fdclose() won't be called. */ ++ ++ LOCK(&fd->inode->lock); ++ { ++ ob_inode->open_count--; ++ } ++ UNLOCK(&fd->inode->lock); ++ ++ gf_smsg(this->name, GF_LOG_ERROR, -state, OPEN_BEHIND_MSG_FAILED, "fop=%s", ++ "create", "path=%s", loc->path, NULL); ++ ++ return default_create_failure_cbk(frame, -state); ++} ++ ++static int32_t + ob_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, uint32_t flags, dict_t *xdata) + { +@@ -946,6 +997,7 @@ fini(xlator_t *this) + + struct xlator_fops fops = { + .open = ob_open, ++ .create = ob_create, + .readv = ob_readv, + .writev = ob_writev, + .flush = ob_flush, +-- +1.8.3.1 + diff --git a/SOURCES/0526-Quota-quota_fsck.py-converting-byte-string-to-string.patch b/SOURCES/0526-Quota-quota_fsck.py-converting-byte-string-to-string.patch new file mode 100644 index 0000000..fb74fd8 --- /dev/null +++ b/SOURCES/0526-Quota-quota_fsck.py-converting-byte-string-to-string.patch @@ -0,0 +1,44 @@ +From baeca3c9b70548463ceea0ae27e6f98cf06e96b7 Mon Sep 17 00:00:00 2001 +From: srijan-sivakumar +Date: Tue, 28 Jul 2020 22:27:34 +0530 +Subject: [PATCH 526/526] Quota quota_fsck.py, converting byte string to string + +Issue: The quota_fsck.py script throws an TypeError +due to the fact that the data is read as bytes and then +the string operations are applied on the. Now, in python3 +string is unicode and hence we get the type error. + +Code Changes: +Decoding the bytes value into utf-8 format. + +>Change-Id: Ia1ff52a821d664a371c8166692ff506ae39f6e40 +>Signed-off-by: srijan-sivakumar +>Fixes: #1401 +Upstream patch: https://review.gluster.org/c/glusterfs/+/24785 + +BUG: 1719171 +Change-Id: Ia1ff52a821d664a371c8166692ff506ae39f6e40 +Signed-off-by: srijan-sivakumar +Reviewed-on: https://code.engineering.redhat.com/gerrit/224780 +Tested-by: RHGS Build Bot +Reviewed-by: Kshithij Iyer +Reviewed-by: Rinku Kothiya +--- + extras/quota/quota_fsck.py | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/extras/quota/quota_fsck.py b/extras/quota/quota_fsck.py +index 174f2a2..ea8d638 100755 +--- a/extras/quota/quota_fsck.py ++++ b/extras/quota/quota_fsck.py +@@ -157,6 +157,7 @@ def get_quota_xattr_brick(dpath): + xattr_dict['parents'] = {} + + for xattr in pairs: ++ xattr = xattr.decode("utf-8") + xattr_key = xattr.split("=")[0] + if re.search("# file:", xattr_key): + # skip the file comment +-- +1.8.3.1 + diff --git a/SOURCES/0527-Events-Socket-creation-after-getaddrinfo-and-IPv4-an.patch b/SOURCES/0527-Events-Socket-creation-after-getaddrinfo-and-IPv4-an.patch new file mode 100644 index 0000000..133a24e --- /dev/null +++ b/SOURCES/0527-Events-Socket-creation-after-getaddrinfo-and-IPv4-an.patch @@ -0,0 +1,200 @@ +From 4152c77defac24ace3b1b6b9cc81a4f614254e4f Mon Sep 17 00:00:00 2001 +From: srijan-sivakumar +Date: Sat, 18 Jul 2020 05:59:09 +0530 +Subject: [PATCH 527/532] Events: Socket creation after getaddrinfo and IPv4 + and IPv6 packet capture + +Issue: Currently, the socket creation is done +prior to getaddrinfo function being invoked. This +can cause mismatch in the protocol and address +families of the created socket and the result +of the getaddrinfo api. Also, the glustereventsd +UDP server by default only captures IPv4 packets +hence IPv6 packets are not even captured. + +Code Changes: +1. Modified the socket creation in such a way that +the parameters taken in are dependent upon the +result of the getaddrinfo function. +2. Created a subclass for adding address family +in glustereventsd.py for both AF_INET and AF_INET6. +3. Modified addresses in the eventsapiconf.py.in + +Reasoning behind the approach: +1. If we are using getaddrinfo function then +socket creation should happen only after we +check if we received back valid addresses. +Hence socket creation should come after the call +to getaddrinfo +2. The listening server which pushes the events +to the webhook has to listen for both IPv4 +and IPv6 messages as we would not be sure as to +what address family is picked in _gf_event. + +>Fixes: #1377 +>Change-Id: I568dcd1a977c8832f0fef981e1f81cac7043c760 +>Signed-off-by: srijan-sivakumar +Upstream patch: https://review.gluster.org/c/glusterfs/+/24722 + +BUG: 1814744 +Change-Id: I568dcd1a977c8832f0fef981e1f81cac7043c760 +Signed-off-by: srijan-sivakumar +Reviewed-on: https://code.engineering.redhat.com/gerrit/225567 +Tested-by: RHGS Build Bot +Reviewed-by: Ravishankar Narayanankutty +--- + events/src/eventsapiconf.py.in | 2 ++ + events/src/glustereventsd.py | 37 ++++++++++++++++++++++++++++++------- + libglusterfs/src/events.c | 27 +++++++++++++++++++-------- + 3 files changed, 51 insertions(+), 15 deletions(-) + +diff --git a/events/src/eventsapiconf.py.in b/events/src/eventsapiconf.py.in +index 76b5954..700093b 100644 +--- a/events/src/eventsapiconf.py.in ++++ b/events/src/eventsapiconf.py.in +@@ -28,6 +28,8 @@ def get_glusterd_workdir(): + return glusterd_workdir + + SERVER_ADDRESS = "0.0.0.0" ++SERVER_ADDRESSv4 = "0.0.0.0" ++SERVER_ADDRESSv6 = "::1" + DEFAULT_CONFIG_FILE = "@SYSCONF_DIR@/glusterfs/eventsconfig.json" + CUSTOM_CONFIG_FILE_TO_SYNC = "/events/config.json" + CUSTOM_CONFIG_FILE = get_glusterd_workdir() + CUSTOM_CONFIG_FILE_TO_SYNC +diff --git a/events/src/glustereventsd.py b/events/src/glustereventsd.py +index c4c7b65..341a3b6 100644 +--- a/events/src/glustereventsd.py ++++ b/events/src/glustereventsd.py +@@ -13,6 +13,7 @@ + from __future__ import print_function + import sys + import signal ++import threading + try: + import socketserver + except ImportError: +@@ -23,10 +24,17 @@ from argparse import ArgumentParser, RawDescriptionHelpFormatter + from eventtypes import all_events + import handlers + import utils +-from eventsapiconf import SERVER_ADDRESS, PID_FILE ++from eventsapiconf import SERVER_ADDRESSv4, SERVER_ADDRESSv6, PID_FILE + from eventsapiconf import AUTO_BOOL_ATTRIBUTES, AUTO_INT_ATTRIBUTES + from utils import logger, PidFile, PidFileLockFailed, boolify + ++# Subclass so that specifically IPv4 packets are captured ++class UDPServerv4(socketserver.ThreadingUDPServer): ++ address_family = socket.AF_INET ++ ++# Subclass so that specifically IPv6 packets are captured ++class UDPServerv6(socketserver.ThreadingUDPServer): ++ address_family = socket.AF_INET6 + + class GlusterEventsRequestHandler(socketserver.BaseRequestHandler): + +@@ -89,6 +97,10 @@ def signal_handler_sigusr2(sig, frame): + utils.restart_webhook_pool() + + ++def UDP_server_thread(sock): ++ sock.serve_forever() ++ ++ + def init_event_server(): + utils.setup_logger() + utils.load_all() +@@ -99,15 +111,26 @@ def init_event_server(): + sys.stderr.write("Unable to get Port details from Config\n") + sys.exit(1) + +- # Start the Eventing Server, UDP Server ++ # Creating the Eventing Server, UDP Server for IPv4 packets ++ try: ++ serverv4 = UDPServerv4((SERVER_ADDRESSv4, port), ++ GlusterEventsRequestHandler) ++ except socket.error as e: ++ sys.stderr.write("Failed to start Eventsd for IPv4: {0}\n".format(e)) ++ sys.exit(1) ++ # Creating the Eventing Server, UDP Server for IPv6 packets + try: +- server = socketserver.ThreadingUDPServer( +- (SERVER_ADDRESS, port), +- GlusterEventsRequestHandler) ++ serverv6 = UDPServerv6((SERVER_ADDRESSv6, port), ++ GlusterEventsRequestHandler) + except socket.error as e: +- sys.stderr.write("Failed to start Eventsd: {0}\n".format(e)) ++ sys.stderr.write("Failed to start Eventsd for IPv6: {0}\n".format(e)) + sys.exit(1) +- server.serve_forever() ++ server_thread1 = threading.Thread(target=UDP_server_thread, ++ args=(serverv4,)) ++ server_thread2 = threading.Thread(target=UDP_server_thread, ++ args=(serverv6,)) ++ server_thread1.start() ++ server_thread2.start() + + + def get_args(): +diff --git a/libglusterfs/src/events.c b/libglusterfs/src/events.c +index 6d1e383..4d720ca 100644 +--- a/libglusterfs/src/events.c ++++ b/libglusterfs/src/events.c +@@ -40,6 +40,7 @@ _gf_event(eventtypes_t event, const char *fmt, ...) + char *host = NULL; + struct addrinfo hints; + struct addrinfo *result = NULL; ++ struct addrinfo *iter_result_ptr = NULL; + xlator_t *this = THIS; + char *volfile_server_transport = NULL; + +@@ -51,13 +52,6 @@ _gf_event(eventtypes_t event, const char *fmt, ...) + goto out; + } + +- /* Initialize UDP socket */ +- sock = socket(AF_INET, SOCK_DGRAM, 0); +- if (sock < 0) { +- ret = EVENT_ERROR_SOCKET; +- goto out; +- } +- + if (ctx) { + volfile_server_transport = ctx->cmd_args.volfile_server_transport; + } +@@ -66,7 +60,6 @@ _gf_event(eventtypes_t event, const char *fmt, ...) + } + + /* host = NULL returns localhost */ +- host = NULL; + if (ctx && ctx->cmd_args.volfile_server && + (strcmp(volfile_server_transport, "unix"))) { + /* If it is client code then volfile_server is set +@@ -84,6 +77,24 @@ _gf_event(eventtypes_t event, const char *fmt, ...) + goto out; + } + ++ // iterate over the result and break when socket creation is success. ++ for (iter_result_ptr = result; iter_result_ptr != NULL; ++ iter_result_ptr = iter_result_ptr->ai_next) { ++ sock = socket(iter_result_ptr->ai_family, iter_result_ptr->ai_socktype, ++ iter_result_ptr->ai_protocol); ++ if (sock != -1) { ++ break; ++ } ++ } ++ /* ++ * If none of the addrinfo structures lead to a successful socket ++ * creation, socket creation has failed. ++ */ ++ if (sock < 0) { ++ ret = EVENT_ERROR_SOCKET; ++ goto out; ++ } ++ + va_start(arguments, fmt); + ret = gf_vasprintf(&msg, fmt, arguments); + va_end(arguments); +-- +1.8.3.1 + diff --git a/SOURCES/0528-Extras-Removing-xattr_analysis-script.patch b/SOURCES/0528-Extras-Removing-xattr_analysis-script.patch new file mode 100644 index 0000000..d04068d --- /dev/null +++ b/SOURCES/0528-Extras-Removing-xattr_analysis-script.patch @@ -0,0 +1,134 @@ +From 3fc74ce6c282f0f43fdcfeda47b71a1b19945b6d Mon Sep 17 00:00:00 2001 +From: srijan-sivakumar +Date: Wed, 3 Feb 2021 10:11:04 +0530 +Subject: [PATCH 528/532] Extras: Removing xattr_analysis script + +The xattr_analysis.py script is used rarely for +debugging and seeing that it has some dependencies, +removing it from the release. + +If need be, it would be directly shared with the cu. + +Label: DOWNSTREAM ONLY +BUG: 1719171 + +Change-Id: I4bb0df3ebfa7e43e13858b4b6e3efbb02ea79d5f +Signed-off-by: srijan-sivakumar +Reviewed-on: https://code.engineering.redhat.com/gerrit/226301 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + extras/quota/Makefile.am | 4 +-- + extras/quota/xattr_analysis.py | 73 ------------------------------------------ + glusterfs.spec.in | 1 - + 3 files changed, 2 insertions(+), 76 deletions(-) + delete mode 100755 extras/quota/xattr_analysis.py + +diff --git a/extras/quota/Makefile.am b/extras/quota/Makefile.am +index cdb6be1..e4d9322 100644 +--- a/extras/quota/Makefile.am ++++ b/extras/quota/Makefile.am +@@ -2,7 +2,7 @@ scriptsdir = $(datadir)/glusterfs/scripts + scripts_SCRIPTS = log_accounting.sh + + if WITH_SERVER +-scripts_SCRIPTS += xattr_analysis.py quota_fsck.py ++scripts_SCRIPTS += quota_fsck.py + endif + +-EXTRA_DIST = log_accounting.sh xattr_analysis.py quota_fsck.py ++EXTRA_DIST = log_accounting.sh quota_fsck.py +diff --git a/extras/quota/xattr_analysis.py b/extras/quota/xattr_analysis.py +deleted file mode 100755 +index 7bd7d96..0000000 +--- a/extras/quota/xattr_analysis.py ++++ /dev/null +@@ -1,73 +0,0 @@ +-#!/usr/bin/python3 +-# Below script has two purposes +-# 1. Display xattr of entire FS tree in a human readable form +-# 2. Display all the directory where contri and size mismatch. +-# (If there are any directory with contri and size mismatch that are not dirty +-# then that highlights a propagation issue) +-# The script takes only one input LOG _FILE generated from the command, +-# find | xargs getfattr -d -m. -e hex > log_gluster_xattr +- +-from __future__ import print_function +-import re +-import subprocess +-import sys +-from hurry.filesize import size +- +-if len(sys.argv) < 2: +- sys.exit('Usage: %s log_gluster_xattr \n' +- 'to generate log_gluster_xattr use: \n' +- 'find | xargs getfattr -d -m. -e hex > log_gluster_xattr' +- % sys.argv[0]) +-LOG_FILE=sys.argv[1] +- +-def get_quota_xattr_brick(): +- out = subprocess.check_output (["/usr/bin/cat", LOG_FILE]) +- pairs = out.splitlines() +- +- xdict = {} +- mismatch_size = [('====contri_size===', '====size====')] +- for xattr in pairs: +- k = xattr.split("=")[0] +- if re.search("# file:", k): +- print(xdict) +- filename=k +- print("=====" + filename + "=======") +- xdict = {} +- elif k is "": +- pass +- else: +- print(xattr) +- v = xattr.split("=")[1] +- if re.search("contri", k): +- if len(v) == 34: +- # for files size is obtained in iatt, file count should be 1, dir count=0 +- xdict['contri_file_count'] = int(v[18:34], 16) +- xdict['contri_dir_count'] = 0 +- else: +- xdict['contri_size'] = size(int(v[2:18], 16)) +- xdict['contri_file_count'] = int(v[18:34], 16) +- xdict['contri_dir_count'] = int(v[34:], 16) +- elif re.search("size", k): +- xdict['size'] = size(int(v[2:18], 16)) +- xdict['file_count'] = int(v[18:34], 16) +- xdict['dir_count'] = int(v[34:], 16) +- elif re.search("dirty", k): +- if v == '0x3000': +- xdict['dirty'] = False +- elif v == '0x3100': +- xdict['dirty'] = True +- elif re.search("limit_objects", k): +- xdict['limit_objects'] = int(v[2:18], 16) +- elif re.search("limit_set", k): +- xdict['limit_set'] = size(int(v[2:18], 16)) +- +- if 'size' in xdict and 'contri_size' in xdict and xdict['size'] != xdict['contri_size']: +- mismatch_size.append((xdict['contri_size'], xdict['size'], filename)) +- +- for values in mismatch_size: +- print(values) +- +- +-if __name__ == '__main__': +- get_quota_xattr_brick() +- +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 30d7162..2be7677 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -1380,7 +1380,6 @@ exit 0 + %if ( 0%{!?_without_server:1} ) + %files server + %doc extras/clear_xattrs.sh +-%{_datadir}/glusterfs/scripts/xattr_analysis.py* + %{_datadir}/glusterfs/scripts/quota_fsck.py* + # sysconf + %config(noreplace) %{_sysconfdir}/glusterfs +-- +1.8.3.1 + diff --git a/SOURCES/0529-geo-rep-prompt-should-work-for-ignore_deletes.patch b/SOURCES/0529-geo-rep-prompt-should-work-for-ignore_deletes.patch new file mode 100644 index 0000000..671451d --- /dev/null +++ b/SOURCES/0529-geo-rep-prompt-should-work-for-ignore_deletes.patch @@ -0,0 +1,75 @@ +From 1c7e96e73273b7891ea6ef0d768c2bf7ff5de7b0 Mon Sep 17 00:00:00 2001 +From: Shwetha K Acharya +Date: Thu, 4 Feb 2021 16:29:39 +0530 +Subject: [PATCH 529/532] geo-rep: prompt should work for ignore_deletes + +The python cli is intelligent enough to parse both "-" and "_" alike: + +Example: +geo-replication config updated successfully +sync_job 4 +geo-replication config updated successfully +gluster volume geo-replication primary 127.0.0.1::secondary config | grep sync_jobs +sync_jobs:5 + +Thus the prompt which appears after ignore-deletes true should +work for both ignore-deletes and ignore_deletes. + +Label: DOWNSTREAM ONLY + +BUG: 1224906 +Change-Id: I89f854200a604d07d3ac6c374fe6d445ce9f22ca +Signed-off-by: Shwetha K Acharya +Reviewed-on: https://code.engineering.redhat.com/gerrit/226599 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + cli/src/cli-cmd-parser.c | 5 +++-- + tests/00-geo-rep/bug-1708603.t | 12 ++++++++++-- + 2 files changed, 13 insertions(+), 4 deletions(-) + +diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c +index 34f17c9..dda8979 100644 +--- a/cli/src/cli-cmd-parser.c ++++ b/cli/src/cli-cmd-parser.c +@@ -3107,8 +3107,9 @@ cli_cmd_gsync_set_parse(struct cli_state *state, const char **words, + if (!ret) + ret = dict_set_int32(dict, "type", type); + if (!ret && type == GF_GSYNC_OPTION_TYPE_CONFIG) { +- if (!strcmp((char *)words[wordcount - 2], "ignore-deletes") && +- !strcmp((char *)words[wordcount - 1], "true")) { ++ if ((((!strcmp((char *)words[wordcount - 2], "ignore_deletes")) || ++ (!strcmp((char *)words[wordcount - 2], "ignore-deletes")))) && ++ ((!strcmp((char *)words[wordcount - 1], "true")))) { + question = + "There exists ~15 seconds delay for the option to take" + " effect from stime of the corresponding brick. Please" +diff --git a/tests/00-geo-rep/bug-1708603.t b/tests/00-geo-rep/bug-1708603.t +index 26913f1..edafb48 100644 +--- a/tests/00-geo-rep/bug-1708603.t ++++ b/tests/00-geo-rep/bug-1708603.t +@@ -44,11 +44,19 @@ TEST glusterfs -s $H0 --volfile-id $GSV0 $M1 + #Create geo-rep session + TEST create_georep_session $master $slave + +-echo n | $GEOREP_CLI $master $slave config ignore-deletes true >/dev/null 2>&1 +-EXPECT "false" echo $($GEOREP_CLI $master $slave config ignore-deletes) ++echo n | $GEOREP_CLI $master $slave config ignore_deletes true >/dev/null 2>&1 ++EXPECT "false" echo $($GEOREP_CLI $master $slave config ignore_deletes) ++ ++echo y | $GEOREP_CLI $master $slave config ignore_deletes true ++EXPECT "true" echo $($GEOREP_CLI $master $slave config ignore_deletes) ++ ++$GEOREP_CLI $master $slave config ignore_deletes false + echo y | $GEOREP_CLI $master $slave config ignore-deletes true + EXPECT "true" echo $($GEOREP_CLI $master $slave config ignore-deletes) + ++echo n | $GEOREP_CLI $master $slave config ignore-deletes true >/dev/null 2>&1 ++EXPECT "true" echo $($GEOREP_CLI $master $slave config ignore-deletes) ++ + #Stop Geo-rep + TEST $GEOREP_CLI $master $slave stop + +-- +1.8.3.1 + diff --git a/SOURCES/0530-gfapi-avoid-crash-while-logging-message.patch b/SOURCES/0530-gfapi-avoid-crash-while-logging-message.patch new file mode 100644 index 0000000..aec73b7 --- /dev/null +++ b/SOURCES/0530-gfapi-avoid-crash-while-logging-message.patch @@ -0,0 +1,41 @@ +From 5a7348a266587704dae4f1ddda16b7c95f547251 Mon Sep 17 00:00:00 2001 +From: Rinku Kothiya +Date: Sun, 7 Feb 2021 13:40:24 +0000 +Subject: [PATCH 530/532] gfapi: avoid crash while logging message. + +Breaking parameter into two different parameter +to avoid a crash. + +Upstream: +> Reviewed-on: https://github.com/gluster/glusterfs/pull/2139 +> fixes: #2138 +> Change-Id: Idd5f3631488c1d892748f83e6847fb6fd2d0802a +> Signed-off-by: Rinku Kothiya + +BUG: 1691320 + +Change-Id: Ifd6a96982ffd4e5334f8be2297de2ad826f3145b +Signed-off-by: Rinku Kothiya +Reviewed-on: https://code.engineering.redhat.com/gerrit/226851 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + api/src/glfs-fops.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/api/src/glfs-fops.c b/api/src/glfs-fops.c +index 051541f..6dc3b66 100644 +--- a/api/src/glfs-fops.c ++++ b/api/src/glfs-fops.c +@@ -1529,7 +1529,7 @@ glfs_pwritev_common(struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt, + ret = -1; + errno = EINVAL; + gf_smsg(THIS->name, GF_LOG_ERROR, errno, API_MSG_INVALID_ARG, +- "size >= %llu is not allowed", GF_UNIT_GB, NULL); ++ "Data size too large", "size=%llu", GF_UNIT_GB, NULL); + goto out; + } + +-- +1.8.3.1 + diff --git a/SOURCES/0531-Glustereventsd-Default-port-change-2091.patch b/SOURCES/0531-Glustereventsd-Default-port-change-2091.patch new file mode 100644 index 0000000..8c2ecbf --- /dev/null +++ b/SOURCES/0531-Glustereventsd-Default-port-change-2091.patch @@ -0,0 +1,69 @@ +From 058a853a1438b2a62586c545f71150ade3de23b7 Mon Sep 17 00:00:00 2001 +From: schaffung +Date: Wed, 10 Feb 2021 13:43:48 +0530 +Subject: [PATCH 531/532] Glustereventsd Default port change (#2091) + +Issue : The default port of glustereventsd is currently 24009 +which is preventing glustereventsd from binding to the UDP port +due to selinux policies. + +Fix: Changing the default port to be bound by chanding it to something +in the ephemeral range. + +>Fixes: #2080 +>Change-Id: Ibdc87f83f82f69660dca95d6d14b226e10d8bd33 +>Signed-off-by: srijan-sivakumar +Upstream Patch : https://github.com/gluster/glusterfs/pull/2091 + +BUG: 1814744 +Change-Id: Ibdc87f83f82f69660dca95d6d14b226e10d8bd33 +Signed-off-by: srijan-sivakumar +Reviewed-on: https://code.engineering.redhat.com/gerrit/227249 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + events/src/eventsconfig.json | 2 +- + extras/firewalld/glusterfs.xml | 2 +- + libglusterfs/src/events.c | 2 +- + 3 files changed, 3 insertions(+), 3 deletions(-) + +diff --git a/events/src/eventsconfig.json b/events/src/eventsconfig.json +index 89e5b9c..14d8f84 100644 +--- a/events/src/eventsconfig.json ++++ b/events/src/eventsconfig.json +@@ -1,5 +1,5 @@ + { + "log-level": "INFO", +- "port": 24009, ++ "port": 55555, + "disable-events-log": false + } +diff --git a/extras/firewalld/glusterfs.xml b/extras/firewalld/glusterfs.xml +index 7e17644..dc74b2e 100644 +--- a/extras/firewalld/glusterfs.xml ++++ b/extras/firewalld/glusterfs.xml +@@ -4,7 +4,7 @@ + Default ports for gluster-distributed storage + + +- ++ + + + +diff --git a/libglusterfs/src/events.c b/libglusterfs/src/events.c +index 4d720ca..3659606 100644 +--- a/libglusterfs/src/events.c ++++ b/libglusterfs/src/events.c +@@ -26,7 +26,7 @@ + #include "glusterfs/events.h" + + #define EVENT_HOST "127.0.0.1" +-#define EVENT_PORT 24009 ++#define EVENT_PORT 55555 + + int + _gf_event(eventtypes_t event, const char *fmt, ...) +-- +1.8.3.1 + diff --git a/SOURCES/0532-glusterd-fix-for-starting-brick-on-new-port.patch b/SOURCES/0532-glusterd-fix-for-starting-brick-on-new-port.patch new file mode 100644 index 0000000..97e5aa7 --- /dev/null +++ b/SOURCES/0532-glusterd-fix-for-starting-brick-on-new-port.patch @@ -0,0 +1,79 @@ +From 2dad17fdbaab2ab2cda6a05dec9dcd2d37ea32ff Mon Sep 17 00:00:00 2001 +From: Nikhil Ladha +Date: Wed, 10 Feb 2021 15:07:32 +0530 +Subject: [PATCH 532/532] glusterd: fix for starting brick on new port + +The Errno set by the runner code was not correct when the bind() fails +to assign an already occupied port in the __socket_server_bind(). + +Fix: +Updated the code to return the correct errno from the +__socket_server_bind() if the bind() fails due to EADDRINUSE error. And, +use the returned errno from runner_run() to retry allocating a new port +to the brick process. + +>Fixes: #1101 + +>Change-Id: If124337f41344a04f050754e402490529ef4ecdc +>Signed-off-by: nik-redhat nladha@redhat.com + +Upstream patch: https://github.com/gluster/glusterfs/pull/2090 + +BUG: 1865796 + +Change-Id: If124337f41344a04f050754e402490529ef4ecdc +Signed-off-by: nik-redhat +Reviewed-on: https://code.engineering.redhat.com/gerrit/227261 +Tested-by: RHGS Build Bot +Reviewed-by: Xavi Hernandez Juan +--- + rpc/rpc-transport/socket/src/socket.c | 3 +++ + xlators/mgmt/glusterd/src/glusterd-utils.c | 6 ++---- + 2 files changed, 5 insertions(+), 4 deletions(-) + +diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c +index 1ee7320..96ed9f1 100644 +--- a/rpc/rpc-transport/socket/src/socket.c ++++ b/rpc/rpc-transport/socket/src/socket.c +@@ -973,8 +973,11 @@ __socket_server_bind(rpc_transport_t *this) + this->myinfo.identifier, strerror(errno)); + if (errno == EADDRINUSE) { + gf_log(this->name, GF_LOG_ERROR, "Port is already in use"); ++ ret = -EADDRINUSE; ++ goto out; + } + } ++ + if (AF_UNIX != SA(&this->myinfo.sockaddr)->sa_family) { + if (getsockname(priv->sock, SA(&this->myinfo.sockaddr), + &this->myinfo.sockaddr_len) != 0) { +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index cf32bd9..bc188a2 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -2151,6 +2151,7 @@ glusterd_volume_start_glusterfs(glusterd_volinfo_t *volinfo, + ret = -1; + goto out; + } ++ + /* Build the exp_path, before starting the glusterfsd even in + valgrind mode. Otherwise all the glusterfsd processes start + writing the valgrind log to the same file. +@@ -2289,13 +2290,10 @@ retry: + + if (wait) { + synclock_unlock(&priv->big_lock); +- errno = 0; + ret = runner_run(&runner); +- if (errno != 0) +- ret = errno; + synclock_lock(&priv->big_lock); + +- if (ret == EADDRINUSE) { ++ if (ret == -EADDRINUSE) { + /* retry after getting a new port */ + gf_msg(this->name, GF_LOG_WARNING, -ret, + GD_MSG_SRC_BRICK_PORT_UNAVAIL, +-- +1.8.3.1 + diff --git a/SOURCES/0533-glusterd-Rebalance-cli-is-not-showing-correct-status.patch b/SOURCES/0533-glusterd-Rebalance-cli-is-not-showing-correct-status.patch new file mode 100644 index 0000000..158b4b7 --- /dev/null +++ b/SOURCES/0533-glusterd-Rebalance-cli-is-not-showing-correct-status.patch @@ -0,0 +1,250 @@ +From 854ab79dbef449c39adf66e3faebb4681359fce4 Mon Sep 17 00:00:00 2001 +From: mohit84 +Date: Thu, 18 Feb 2021 09:40:44 +0530 +Subject: [PATCH 533/538] glusterd: Rebalance cli is not showing correct status + after reboot (#2172) + +Rebalance cli is not showing correct status after reboot. + +The CLI is not correct status because defrag object is not +valid at the time of creating a rpc connection to show the status. +The defrag object is not valid because at the time of start a glusterd +glusterd_restart_rebalance can be call almost at the same time by two +different synctask and glusterd got a disconnect on rpc object and it +cleanup the defrag object. + +Solution: To avoid the defrag object populate a reference count before + create a defrag rpc object. +>Fixes: #1339 +>Signed-off-by: Mohit Agrawal +>Change-Id: Ia284015d79beaa3d703ebabb92f26870a5aaafba +Upstream Patch : https://github.com/gluster/glusterfs/pull/2172 + +BUG: 1832306 +Change-Id: Ia284015d79beaa3d703ebabb92f26870a5aaafba +Signed-off-by: srijan-sivakumar +Reviewed-on: https://code.engineering.redhat.com/gerrit/228249 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mgmt/glusterd/src/glusterd-rebalance.c | 35 ++++++++++----- + xlators/mgmt/glusterd/src/glusterd-syncop.c | 1 + + xlators/mgmt/glusterd/src/glusterd-utils.c | 59 +++++++++++++++++++++++++- + xlators/mgmt/glusterd/src/glusterd-utils.h | 5 +++ + xlators/mgmt/glusterd/src/glusterd.h | 1 + + 5 files changed, 90 insertions(+), 11 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c +index b419a89..fcd5318 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c ++++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c +@@ -86,6 +86,7 @@ __glusterd_defrag_notify(struct rpc_clnt *rpc, void *mydata, + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + int pid = -1; ++ int refcnt = 0; + + this = THIS; + if (!this) +@@ -125,11 +126,12 @@ __glusterd_defrag_notify(struct rpc_clnt *rpc, void *mydata, + } + + case RPC_CLNT_DISCONNECT: { +- if (!defrag->connected) +- return 0; +- + LOCK(&defrag->lock); + { ++ if (!defrag->connected) { ++ UNLOCK(&defrag->lock); ++ return 0; ++ } + defrag->connected = 0; + } + UNLOCK(&defrag->lock); +@@ -146,11 +148,11 @@ __glusterd_defrag_notify(struct rpc_clnt *rpc, void *mydata, + glusterd_defrag_rpc_put(defrag); + if (defrag->cbk_fn) + defrag->cbk_fn(volinfo, volinfo->rebal.defrag_status); +- +- GF_FREE(defrag); ++ refcnt = glusterd_defrag_unref(defrag); + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_REBALANCE_DISCONNECTED, +- "Rebalance process for volume %s has disconnected.", +- volinfo->volname); ++ "Rebalance process for volume %s has disconnected" ++ " and defrag refcnt is %d.", ++ volinfo->volname, refcnt); + break; + } + case RPC_CLNT_DESTROY: +@@ -309,7 +311,11 @@ glusterd_handle_defrag_start(glusterd_volinfo_t *volinfo, char *op_errstr, + gf_msg_debug("glusterd", 0, "rebalance command failed"); + goto out; + } +- ++ /* Take reference before sleep to save defrag object cleanup while ++ glusterd_restart_rebalance call for other bricks by syncktask ++ at the time of restart a glusterd. ++ */ ++ glusterd_defrag_ref(defrag); + sleep(5); + + ret = glusterd_rebalance_rpc_create(volinfo); +@@ -372,6 +378,7 @@ glusterd_rebalance_rpc_create(glusterd_volinfo_t *volinfo) + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); ++ struct rpc_clnt *rpc = NULL; + + // rebalance process is not started + if (!defrag) +@@ -396,13 +403,21 @@ glusterd_rebalance_rpc_create(glusterd_volinfo_t *volinfo) + } + + glusterd_volinfo_ref(volinfo); +- ret = glusterd_rpc_create(&defrag->rpc, options, glusterd_defrag_notify, +- volinfo, _gf_true); ++ ret = glusterd_rpc_create(&rpc, options, glusterd_defrag_notify, volinfo, ++ _gf_false); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_RPC_CREATE_FAIL, + "Glusterd RPC creation failed"); + goto out; + } ++ LOCK(&defrag->lock); ++ { ++ if (!defrag->rpc) ++ defrag->rpc = rpc; ++ else ++ rpc_clnt_unref(rpc); ++ } ++ UNLOCK(&defrag->lock); + ret = 0; + out: + if (options) +diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c +index df78fef..05c9e11 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-syncop.c ++++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c +@@ -1732,6 +1732,7 @@ gd_brick_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, + if (!rpc) { + if (pending_node->type == GD_NODE_REBALANCE && pending_node->node) { + volinfo = pending_node->node; ++ glusterd_defrag_ref(volinfo->rebal.defrag); + ret = glusterd_rebalance_rpc_create(volinfo); + if (ret) { + ret = 0; +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index bc188a2..9fb8eab 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -93,6 +93,44 @@ + #define NLMV4_VERSION 4 + #define NLMV1_VERSION 1 + ++int ++glusterd_defrag_ref(glusterd_defrag_info_t *defrag) ++{ ++ int refcnt = 0; ++ ++ if (!defrag) ++ goto out; ++ ++ LOCK(&defrag->lock); ++ { ++ refcnt = ++defrag->refcnt; ++ } ++ UNLOCK(&defrag->lock); ++ ++out: ++ return refcnt; ++} ++ ++int ++glusterd_defrag_unref(glusterd_defrag_info_t *defrag) ++{ ++ int refcnt = -1; ++ ++ if (!defrag) ++ goto out; ++ ++ LOCK(&defrag->lock); ++ { ++ refcnt = --defrag->refcnt; ++ if (refcnt <= 0) ++ GF_FREE(defrag); ++ } ++ UNLOCK(&defrag->lock); ++ ++out: ++ return refcnt; ++} ++ + gf_boolean_t + is_brick_mx_enabled(void) + { +@@ -9370,6 +9408,7 @@ glusterd_volume_defrag_restart(glusterd_volinfo_t *volinfo, char *op_errstr, + char pidfile[PATH_MAX] = ""; + int ret = -1; + pid_t pid = 0; ++ int refcnt = 0; + + this = THIS; + GF_ASSERT(this); +@@ -9410,7 +9449,25 @@ glusterd_volume_defrag_restart(glusterd_volinfo_t *volinfo, char *op_errstr, + volinfo->volname); + goto out; + } +- ret = glusterd_rebalance_rpc_create(volinfo); ++ refcnt = glusterd_defrag_ref(volinfo->rebal.defrag); ++ /* If refcnt value is 1 it means either defrag object is ++ poulated by glusterd_rebalance_defrag_init or previous ++ rpc creation was failed.If it is not 1 it means it(defrag) ++ was populated at the time of start a rebalance daemon. ++ We need to create a rpc object only while a previous ++ rpc connection was not established successfully at the ++ time of restart a rebalance daemon by ++ glusterd_handle_defrag_start otherwise rebalance cli ++ does not show correct status after just reboot a node and try ++ to print the rebalance status because defrag object has been ++ destroyed during handling of rpc disconnect. ++ */ ++ if (refcnt == 1) { ++ ret = glusterd_rebalance_rpc_create(volinfo); ++ } else { ++ ret = 0; ++ glusterd_defrag_unref(volinfo->rebal.defrag); ++ } + break; + } + case GF_DEFRAG_STATUS_NOT_STARTED: +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h +index 02d85d2..4541471 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.h ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.h +@@ -886,4 +886,9 @@ int32_t + glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type, + int32_t sub_count); + ++int ++glusterd_defrag_ref(glusterd_defrag_info_t *defrag); ++ ++int ++glusterd_defrag_unref(glusterd_defrag_info_t *defrag); + #endif +diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h +index efe4d0e..9de3f28 100644 +--- a/xlators/mgmt/glusterd/src/glusterd.h ++++ b/xlators/mgmt/glusterd/src/glusterd.h +@@ -321,6 +321,7 @@ struct glusterd_defrag_info_ { + uint64_t total_data; + uint64_t num_files_lookedup; + uint64_t total_failures; ++ int refcnt; + gf_lock_t lock; + int cmd; + pthread_t th; +-- +1.8.3.1 + diff --git a/SOURCES/0534-glusterd-Resolve-use-after-free-bug-2181.patch b/SOURCES/0534-glusterd-Resolve-use-after-free-bug-2181.patch new file mode 100644 index 0000000..2dc72c1 --- /dev/null +++ b/SOURCES/0534-glusterd-Resolve-use-after-free-bug-2181.patch @@ -0,0 +1,47 @@ +From b3647eb5415b2e3d9e1a11ad6c4689e520f17b39 Mon Sep 17 00:00:00 2001 +From: mohit84 +Date: Mon, 22 Feb 2021 10:09:34 +0530 +Subject: [PATCH 534/538] glusterd: Resolve use after free bug (#2181) + +In the commit 61ae58e67567ea4de8f8efc6b70a9b1f8e0f1bea +introduced a coverity bug use object after cleanup +the object. + +Cleanup memory after comeout from a critical section +>Fixes: #2180 + +>Change-Id: Iee2050c4883a0dd44b8523bb822b664462ab6041 +>Signed-off-by: Mohit Agrawal +Upstream Patch : https://github.com/gluster/glusterfs/pull/2181 + +BUG: 1832306 +Change-Id: Iee2050c4883a0dd44b8523bb822b664462ab6041 +Signed-off-by: srijan-sivakumar +Reviewed-on: https://code.engineering.redhat.com/gerrit/228578 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mgmt/glusterd/src/glusterd-utils.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index 9fb8eab..6d40be5 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -122,11 +122,10 @@ glusterd_defrag_unref(glusterd_defrag_info_t *defrag) + LOCK(&defrag->lock); + { + refcnt = --defrag->refcnt; +- if (refcnt <= 0) +- GF_FREE(defrag); + } + UNLOCK(&defrag->lock); +- ++ if (refcnt <= 0) ++ GF_FREE(defrag); + out: + return refcnt; + } +-- +1.8.3.1 + diff --git a/SOURCES/0535-multiple-files-use-dict_allocate_and_serialize-where.patch b/SOURCES/0535-multiple-files-use-dict_allocate_and_serialize-where.patch new file mode 100644 index 0000000..e1622de --- /dev/null +++ b/SOURCES/0535-multiple-files-use-dict_allocate_and_serialize-where.patch @@ -0,0 +1,270 @@ +From 775d500cd136bd8c940faaeffde1217c25a87e3d Mon Sep 17 00:00:00 2001 +From: Yaniv Kaul +Date: Sun, 2 Jun 2019 21:14:18 +0300 +Subject: [PATCH 535/538] (multiple files) use dict_allocate_and_serialize() + where applicable. + +This function does length, allocation and serialization for you. + +Upstream patch: +> Upstream-patch-link: https://review.gluster.org/#/c/glusterfs/+/22800 +> Change-Id: I142a259952a2fe83dd719442afaefe4a43a8e55e +> updates: bz#1193929 +> Signed-off-by: Yaniv Kaul + +Change-Id: I142a259952a2fe83dd719442afaefe4a43a8e55e +BUG: 1911292 +Signed-off-by: Yaniv Kaul +Reviewed-on: https://code.engineering.redhat.com/gerrit/228611 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/cluster/afr/src/afr-inode-read.c | 34 +++++--------------------- + xlators/cluster/ec/src/ec-combine.c | 16 +++--------- + xlators/features/locks/src/posix.c | 23 +++-------------- + xlators/protocol/client/src/client-handshake.c | 14 +++-------- + xlators/protocol/server/src/server-handshake.c | 24 +++++++----------- + xlators/protocol/server/src/server-helpers.c | 27 +++----------------- + 6 files changed, 28 insertions(+), 110 deletions(-) + +diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c +index 523a5b4..cf305af 100644 +--- a/xlators/cluster/afr/src/afr-inode-read.c ++++ b/xlators/cluster/afr/src/afr-inode-read.c +@@ -948,24 +948,13 @@ unlock: + goto unwind; + } + +- len = dict_serialized_length(local->dict); +- if (len <= 0) { +- goto unwind; +- } +- +- lockinfo_buf = GF_CALLOC(1, len, gf_common_mt_char); +- if (!lockinfo_buf) { ++ op_ret = dict_allocate_and_serialize( ++ local->dict, (char **)&lockinfo_buf, (unsigned int *)&len); ++ if (op_ret != 0) { + local->op_ret = -1; +- local->op_errno = ENOMEM; + goto unwind; + } + +- op_ret = dict_serialize(local->dict, lockinfo_buf); +- if (op_ret < 0) { +- local->op_ret = -1; +- local->op_errno = -op_ret; +- } +- + op_ret = dict_set_dynptr(newdict, GF_XATTR_LOCKINFO_KEY, + (void *)lockinfo_buf, len); + if (op_ret < 0) { +@@ -1064,24 +1053,13 @@ unlock: + goto unwind; + } + +- len = dict_serialized_length(local->dict); +- if (len <= 0) { +- goto unwind; +- } +- +- lockinfo_buf = GF_CALLOC(1, len, gf_common_mt_char); +- if (!lockinfo_buf) { ++ op_ret = dict_allocate_and_serialize( ++ local->dict, (char **)&lockinfo_buf, (unsigned int *)&len); ++ if (op_ret != 0) { + local->op_ret = -1; +- local->op_errno = ENOMEM; + goto unwind; + } + +- op_ret = dict_serialize(local->dict, lockinfo_buf); +- if (op_ret < 0) { +- local->op_ret = -1; +- local->op_errno = -op_ret; +- } +- + op_ret = dict_set_dynptr(newdict, GF_XATTR_LOCKINFO_KEY, + (void *)lockinfo_buf, len); + if (op_ret < 0) { +diff --git a/xlators/cluster/ec/src/ec-combine.c b/xlators/cluster/ec/src/ec-combine.c +index 99e5534..9d712b3 100644 +--- a/xlators/cluster/ec/src/ec-combine.c ++++ b/xlators/cluster/ec/src/ec-combine.c +@@ -486,22 +486,12 @@ ec_dict_data_merge(ec_cbk_data_t *cbk, int32_t which, char *key) + + tmp = NULL; + +- len = dict_serialized_length(lockinfo); +- if (len < 0) { +- err = len; +- +- goto out; +- } +- ptr = GF_MALLOC(len, gf_common_mt_char); +- if (ptr == NULL) { +- err = -ENOMEM; +- +- goto out; +- } +- err = dict_serialize(lockinfo, ptr); ++ err = dict_allocate_and_serialize(lockinfo, (char **)&ptr, ++ (unsigned int *)&len); + if (err != 0) { + goto out; + } ++ + dict = (which == EC_COMBINE_XDATA) ? cbk->xdata : cbk->dict; + err = dict_set_dynptr(dict, key, ptr, len); + if (err != 0) { +diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c +index 5ae0125..cdd1ff7 100644 +--- a/xlators/features/locks/src/posix.c ++++ b/xlators/features/locks/src/posix.c +@@ -1547,8 +1547,9 @@ pl_fgetxattr_handle_lockinfo(xlator_t *this, fd_t *fd, dict_t *dict, + goto out; + } + +- len = dict_serialized_length(tmp); +- if (len < 0) { ++ op_ret = dict_allocate_and_serialize(tmp, (char **)&buf, ++ (unsigned int *)&len); ++ if (op_ret != 0) { + *op_errno = -op_ret; + op_ret = -1; + gf_log(this->name, GF_LOG_WARNING, +@@ -1558,24 +1559,6 @@ pl_fgetxattr_handle_lockinfo(xlator_t *this, fd_t *fd, dict_t *dict, + goto out; + } + +- buf = GF_CALLOC(1, len, gf_common_mt_char); +- if (buf == NULL) { +- op_ret = -1; +- *op_errno = ENOMEM; +- goto out; +- } +- +- op_ret = dict_serialize(tmp, buf); +- if (op_ret < 0) { +- *op_errno = -op_ret; +- op_ret = -1; +- gf_log(this->name, GF_LOG_WARNING, +- "dict_serialize failed (%s) while handling lockinfo " +- "for fd (ptr: %p inode-gfid:%s)", +- strerror(*op_errno), fd, uuid_utoa(fd->inode->gfid)); +- goto out; +- } +- + op_ret = dict_set_dynptr(dict, GF_XATTR_LOCKINFO_KEY, buf, len); + if (op_ret < 0) { + *op_errno = -op_ret; +diff --git a/xlators/protocol/client/src/client-handshake.c b/xlators/protocol/client/src/client-handshake.c +index 0002361..6b20d92 100644 +--- a/xlators/protocol/client/src/client-handshake.c ++++ b/xlators/protocol/client/src/client-handshake.c +@@ -1286,18 +1286,10 @@ client_setvolume(xlator_t *this, struct rpc_clnt *rpc) + "Failed to set client opversion in handshake message"); + } + +- ret = dict_serialized_length(options); +- if (ret < 0) { +- gf_msg(this->name, GF_LOG_ERROR, 0, PC_MSG_DICT_ERROR, +- "failed to get serialized length of dict"); ++ ret = dict_allocate_and_serialize(options, (char **)&req.dict.dict_val, ++ &req.dict.dict_len); ++ if (ret != 0) { + ret = -1; +- goto fail; +- } +- req.dict.dict_len = ret; +- req.dict.dict_val = GF_CALLOC(1, req.dict.dict_len, +- gf_client_mt_clnt_req_buf_t); +- ret = dict_serialize(options, req.dict.dict_val); +- if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, PC_MSG_DICT_SERIALIZE_FAIL, + "failed to serialize " + "dictionary"); +diff --git a/xlators/protocol/server/src/server-handshake.c b/xlators/protocol/server/src/server-handshake.c +index eeca73c..54dc030 100644 +--- a/xlators/protocol/server/src/server-handshake.c ++++ b/xlators/protocol/server/src/server-handshake.c +@@ -676,22 +676,16 @@ fail: + GF_ASSERT(rsp); + + rsp->op_ret = 0; +- ret = dict_serialized_length(reply); +- if (ret > 0) { +- rsp->dict.dict_len = ret; +- rsp->dict.dict_val = GF_CALLOC(1, rsp->dict.dict_len, +- gf_server_mt_rsp_buf_t); +- if (rsp->dict.dict_val) { +- ret = dict_serialize(reply, rsp->dict.dict_val); +- if (ret < 0) { +- gf_msg_debug("server-handshake", 0, +- "failed " +- "to serialize reply dict"); +- op_ret = -1; +- op_errno = -ret; +- } +- } ++ ++ ret = dict_allocate_and_serialize(reply, (char **)&rsp->dict.dict_val, ++ &rsp->dict.dict_len); ++ if (ret != 0) { ++ ret = -1; ++ gf_msg_debug("server-handshake", 0, "failed to serialize reply dict"); ++ op_ret = -1; ++ op_errno = -ret; + } ++ + rsp->op_ret = op_ret; + rsp->op_errno = gf_errno_to_error(op_errno); + +diff --git a/xlators/protocol/server/src/server-helpers.c b/xlators/protocol/server/src/server-helpers.c +index e74a24d..33959b5 100644 +--- a/xlators/protocol/server/src/server-helpers.c ++++ b/xlators/protocol/server/src/server-helpers.c +@@ -902,7 +902,6 @@ serialize_rsp_direntp(gf_dirent_t *entries, gfs3_readdirp_rsp *rsp) + gfs3_dirplist *trav = NULL; + gfs3_dirplist *prev = NULL; + int ret = -1; +- int temp = 0; + + GF_VALIDATE_OR_GOTO("server", entries, out); + GF_VALIDATE_OR_GOTO("server", rsp, out); +@@ -923,28 +922,10 @@ serialize_rsp_direntp(gf_dirent_t *entries, gfs3_readdirp_rsp *rsp) + + /* if 'dict' is present, pack it */ + if (entry->dict) { +- temp = dict_serialized_length(entry->dict); +- +- if (temp < 0) { +- gf_msg(THIS->name, GF_LOG_ERROR, EINVAL, PS_MSG_INVALID_ENTRY, +- "failed to get " +- "serialized length of reply dict"); +- errno = EINVAL; +- trav->dict.dict_len = 0; +- goto out; +- } +- trav->dict.dict_len = temp; +- +- trav->dict.dict_val = GF_CALLOC(1, trav->dict.dict_len, +- gf_server_mt_rsp_buf_t); +- if (!trav->dict.dict_val) { +- errno = ENOMEM; +- trav->dict.dict_len = 0; +- goto out; +- } +- +- ret = dict_serialize(entry->dict, trav->dict.dict_val); +- if (ret < 0) { ++ ret = dict_allocate_and_serialize(entry->dict, ++ (char **)&trav->dict.dict_val, ++ &trav->dict.dict_len); ++ if (ret != 0) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, PS_MSG_DICT_SERIALIZE_FAIL, + "failed to serialize reply dict"); + errno = -ret; +-- +1.8.3.1 + diff --git a/SOURCES/0536-dht-Ongoing-IO-is-failed-during-volume-shrink-operat.patch b/SOURCES/0536-dht-Ongoing-IO-is-failed-during-volume-shrink-operat.patch new file mode 100644 index 0000000..94e0b64 --- /dev/null +++ b/SOURCES/0536-dht-Ongoing-IO-is-failed-during-volume-shrink-operat.patch @@ -0,0 +1,102 @@ +From 32281b4b5cf79d0ef6f0c65775bb81093e1ba479 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Wed, 24 Feb 2021 18:44:12 +0530 +Subject: [PATCH 536/538] dht: Ongoing IO is failed during volume shrink + operation (#2188) + +In the commit (c878174) we have introduced a check +to avoid stale layout issue.To avoid a stale layout +issue dht has set a key along with layout at the time +of wind a create fop and posix validates the parent +layout based on the key value. If layout does not match +it throw and error.In case of volume shrink layout has +been changed by reabalance daemon and if layout does not +matches dht is not able to wind a create fop successfully. + +Solution: To avoid the issue populate a key only while + dht has wind a fop first time. After got an + error in 2nd attempt dht takes a lock and then + reattempt to wind a fop again. + +> Fixes: #2187 +> Change-Id: Ie018386e7823a11eea415496bb226ca032453a55 +> Signed-off-by: Mohit Agrawal +> (Cherry pick from commit da6ce622b722f7d12619c5860293faf03f7cd00c +> Reviewed on upstream link https://github.com/gluster/glusterfs/pull/2188 + +Bug: 1924044 +Change-Id: I7670dbe2d562b83db0af3753f994653ffdd49591 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/228941 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/cluster/dht/src/dht-common.c | 41 ++++++++++++++++++++++++++---------- + 1 file changed, 30 insertions(+), 11 deletions(-) + +diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c +index fe1d0ee..7425c1a 100644 +--- a/xlators/cluster/dht/src/dht-common.c ++++ b/xlators/cluster/dht/src/dht-common.c +@@ -8526,15 +8526,32 @@ dht_create_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this, + { + dht_local_t *local = NULL; + xlator_t *avail_subvol = NULL; ++ int lk_count = 0; + + local = frame->local; + + if (!dht_is_subvol_filled(this, subvol)) { +- gf_msg_debug(this->name, 0, "creating %s on %s", loc->path, +- subvol->name); +- +- dht_set_parent_layout_in_dict(loc, this, local); +- ++ lk_count = local->lock[0].layout.parent_layout.lk_count; ++ gf_msg_debug(this->name, 0, "creating %s on %s with lock_count %d", ++ loc->path, subvol->name, lk_count); ++ /*The function dht_set_parent_layout_in_dict sets the layout ++ in dictionary and posix_create validates a layout before ++ creating a file.In case if parent layout does not match ++ with disk layout posix xlator throw an error but in case ++ if volume is shrunk layout has been changed by rebalance daemon ++ so we need to call this function only while a function is calling ++ without taking any lock otherwise we would not able to populate a ++ layout on disk in case if layout has changed. ++ */ ++ if (!lk_count) { ++ dht_set_parent_layout_in_dict(loc, this, local); ++ } else { ++ /* Delete a key to avoid layout validate if it was set by ++ previous STACK_WIND attempt when a lock was not taken ++ by dht_create ++ */ ++ (void)dict_del_sizen(local->params, GF_PREOP_PARENT_KEY); ++ } + STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol, + subvol->fops->create, loc, flags, mode, umask, fd, + params); +@@ -8554,12 +8571,14 @@ dht_create_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this, + + goto out; + } +- +- gf_msg_debug(this->name, 0, "creating %s on %s", loc->path, +- subvol->name); +- +- dht_set_parent_layout_in_dict(loc, this, local); +- ++ lk_count = local->lock[0].layout.parent_layout.lk_count; ++ gf_msg_debug(this->name, 0, "creating %s on %s with lk_count %d", ++ loc->path, subvol->name, lk_count); ++ if (!lk_count) { ++ dht_set_parent_layout_in_dict(loc, this, local); ++ } else { ++ (void)dict_del_sizen(local->params, GF_PREOP_PARENT_KEY); ++ } + STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol, + subvol->fops->create, loc, flags, mode, umask, fd, + params); +-- +1.8.3.1 + diff --git a/SOURCES/0537-cluster-afr-Fix-race-in-lockinfo-f-getxattr.patch b/SOURCES/0537-cluster-afr-Fix-race-in-lockinfo-f-getxattr.patch new file mode 100644 index 0000000..dcf0940 --- /dev/null +++ b/SOURCES/0537-cluster-afr-Fix-race-in-lockinfo-f-getxattr.patch @@ -0,0 +1,387 @@ +From 7b7ec67680415c22773ebb2a5daacf298b6b1e06 Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez +Date: Sat, 13 Feb 2021 18:37:32 +0100 +Subject: [PATCH 537/538] cluster/afr: Fix race in lockinfo (f)getxattr + +A shared dictionary was updated outside the lock after having updated +the number of remaining answers. This means that one thread may be +processing the last answer and unwinding the request before another +thread completes updating the dict. + + Thread 1 Thread 2 + + LOCK() + call_cnt-- (=1) + UNLOCK() + LOCK() + call_cnt-- (=0) + UNLOCK() + update_dict(dict) + if (call_cnt == 0) { + STACK_UNWIND(dict); + } + update_dict(dict) + if (call_cnt == 0) { + STACK_UNWIND(dict); + } + +The updates from thread 1 are lost. + +This patch also reduces the work done inside the locked region and +reduces code duplication. + +Upstream-patch: +> Upstream-patch-link: https://github.com/gluster/glusterfs/pull/2162 +> Fixes: #2161 +> Change-Id: Idc0d34ab19ea6031de0641f7b05c624d90fac8fa +> Signed-off-by: Xavi Hernandez + +BUG: 1911292 +Change-Id: Idc0d34ab19ea6031de0641f7b05c624d90fac8fa +Signed-off-by: Xavi Hernandez +Reviewed-on: https://code.engineering.redhat.com/gerrit/228924 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/cluster/afr/src/afr-inode-read.c | 254 ++++++++++++++----------------- + 1 file changed, 112 insertions(+), 142 deletions(-) + +diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c +index cf305af..98e195a 100644 +--- a/xlators/cluster/afr/src/afr-inode-read.c ++++ b/xlators/cluster/afr/src/afr-inode-read.c +@@ -15,6 +15,8 @@ + #include + #include + ++#include ++ + #include + #include "afr.h" + #include +@@ -868,188 +870,121 @@ afr_getxattr_quota_size_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + return 0; + } + +-int32_t +-afr_getxattr_lockinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, dict_t *dict, +- dict_t *xdata) ++static int32_t ++afr_update_local_dicts(call_frame_t *frame, dict_t *dict, dict_t *xdata) + { +- int call_cnt = 0, len = 0; +- char *lockinfo_buf = NULL; +- dict_t *lockinfo = NULL, *newdict = NULL; +- afr_local_t *local = NULL; ++ afr_local_t *local; ++ dict_t *local_dict; ++ dict_t *local_xdata; ++ int32_t ret; + +- LOCK(&frame->lock); +- { +- local = frame->local; ++ local = frame->local; ++ local_dict = NULL; ++ local_xdata = NULL; + +- call_cnt = --local->call_count; ++ ret = -ENOMEM; + +- if ((op_ret < 0) || (!dict && !xdata)) { +- goto unlock; +- } +- +- if (xdata) { +- if (!local->xdata_rsp) { +- local->xdata_rsp = dict_new(); +- if (!local->xdata_rsp) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto unlock; +- } +- } ++ if ((dict != NULL) && (local->dict == NULL)) { ++ local_dict = dict_new(); ++ if (local_dict == NULL) { ++ goto done; + } ++ } + +- if (!dict) { +- goto unlock; ++ if ((xdata != NULL) && (local->xdata_rsp == NULL)) { ++ local_xdata = dict_new(); ++ if (local_xdata == NULL) { ++ goto done; + } ++ } + +- op_ret = dict_get_ptr_and_len(dict, GF_XATTR_LOCKINFO_KEY, +- (void **)&lockinfo_buf, &len); ++ if ((local_dict != NULL) || (local_xdata != NULL)) { ++ /* TODO: Maybe it would be better to preallocate both dicts before ++ * sending the requests. This way we don't need to use a LOCK() ++ * here. */ ++ LOCK(&frame->lock); + +- if (!lockinfo_buf) { +- goto unlock; ++ if ((local_dict != NULL) && (local->dict == NULL)) { ++ local->dict = local_dict; ++ local_dict = NULL; + } + +- if (!local->dict) { +- local->dict = dict_new(); +- if (!local->dict) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto unlock; +- } ++ if ((local_xdata != NULL) && (local->xdata_rsp == NULL)) { ++ local->xdata_rsp = local_xdata; ++ local_xdata = NULL; + } +- } +-unlock: +- UNLOCK(&frame->lock); + +- if (lockinfo_buf != NULL) { +- lockinfo = dict_new(); +- if (lockinfo == NULL) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- } else { +- op_ret = dict_unserialize(lockinfo_buf, len, &lockinfo); +- +- if (lockinfo && local->dict) { +- dict_copy(lockinfo, local->dict); +- } +- } +- } +- +- if (xdata && local->xdata_rsp) { +- dict_copy(xdata, local->xdata_rsp); ++ UNLOCK(&frame->lock); + } + +- if (!call_cnt) { +- newdict = dict_new(); +- if (!newdict) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto unwind; ++ if (dict != NULL) { ++ if (dict_copy(dict, local->dict) < 0) { ++ goto done; + } ++ } + +- op_ret = dict_allocate_and_serialize( +- local->dict, (char **)&lockinfo_buf, (unsigned int *)&len); +- if (op_ret != 0) { +- local->op_ret = -1; +- goto unwind; ++ if (xdata != NULL) { ++ if (dict_copy(xdata, local->xdata_rsp) < 0) { ++ goto done; + } ++ } + +- op_ret = dict_set_dynptr(newdict, GF_XATTR_LOCKINFO_KEY, +- (void *)lockinfo_buf, len); +- if (op_ret < 0) { +- local->op_ret = -1; +- local->op_errno = -op_ret; +- goto unwind; +- } ++ ret = 0; + +- unwind: +- AFR_STACK_UNWIND(getxattr, frame, op_ret, op_errno, newdict, +- local->xdata_rsp); ++done: ++ if (local_dict != NULL) { ++ dict_unref(local_dict); + } + +- dict_unref(lockinfo); ++ if (local_xdata != NULL) { ++ dict_unref(local_xdata); ++ } + +- return 0; ++ return ret; + } + +-int32_t +-afr_fgetxattr_lockinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, dict_t *dict, +- dict_t *xdata) ++static void ++afr_getxattr_lockinfo_cbk_common(call_frame_t *frame, int32_t op_ret, ++ int32_t op_errno, dict_t *dict, dict_t *xdata, ++ bool is_fgetxattr) + { +- int call_cnt = 0, len = 0; ++ int len = 0; + char *lockinfo_buf = NULL; + dict_t *lockinfo = NULL, *newdict = NULL; + afr_local_t *local = NULL; + +- LOCK(&frame->lock); +- { +- local = frame->local; +- +- call_cnt = --local->call_count; +- +- if ((op_ret < 0) || (!dict && !xdata)) { +- goto unlock; +- } +- +- if (xdata) { +- if (!local->xdata_rsp) { +- local->xdata_rsp = dict_new(); +- if (!local->xdata_rsp) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto unlock; +- } +- } +- } +- +- if (!dict) { +- goto unlock; +- } ++ local = frame->local; + ++ if ((op_ret >= 0) && (dict != NULL)) { + op_ret = dict_get_ptr_and_len(dict, GF_XATTR_LOCKINFO_KEY, + (void **)&lockinfo_buf, &len); +- +- if (!lockinfo_buf) { +- goto unlock; +- } +- +- if (!local->dict) { +- local->dict = dict_new(); +- if (!local->dict) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto unlock; ++ if (lockinfo_buf != NULL) { ++ lockinfo = dict_new(); ++ if (lockinfo == NULL) { ++ op_ret = -1; ++ } else { ++ op_ret = dict_unserialize(lockinfo_buf, len, &lockinfo); + } + } + } +-unlock: +- UNLOCK(&frame->lock); + +- if (lockinfo_buf != NULL) { +- lockinfo = dict_new(); +- if (lockinfo == NULL) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- } else { +- op_ret = dict_unserialize(lockinfo_buf, len, &lockinfo); +- +- if (lockinfo && local->dict) { +- dict_copy(lockinfo, local->dict); +- } ++ if ((op_ret >= 0) && ((lockinfo != NULL) || (xdata != NULL))) { ++ op_ret = afr_update_local_dicts(frame, lockinfo, xdata); ++ if (lockinfo != NULL) { ++ dict_unref(lockinfo); + } + } + +- if (xdata && local->xdata_rsp) { +- dict_copy(xdata, local->xdata_rsp); ++ if (op_ret < 0) { ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; + } + +- if (!call_cnt) { ++ if (uatomic_sub_return(&local->call_count, 1) == 0) { + newdict = dict_new(); + if (!newdict) { + local->op_ret = -1; +- local->op_errno = ENOMEM; ++ local->op_errno = op_errno = ENOMEM; + goto unwind; + } + +@@ -1057,23 +992,58 @@ unlock: + local->dict, (char **)&lockinfo_buf, (unsigned int *)&len); + if (op_ret != 0) { + local->op_ret = -1; ++ local->op_errno = op_errno = ENOMEM; + goto unwind; + } + + op_ret = dict_set_dynptr(newdict, GF_XATTR_LOCKINFO_KEY, + (void *)lockinfo_buf, len); + if (op_ret < 0) { +- local->op_ret = -1; +- local->op_errno = -op_ret; ++ GF_FREE(lockinfo_buf); ++ local->op_ret = op_ret = -1; ++ local->op_errno = op_errno = -op_ret; + goto unwind; + } + + unwind: +- AFR_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, newdict, +- local->xdata_rsp); ++ /* TODO: These unwinds use op_ret and op_errno instead of local->op_ret ++ * and local->op_errno. This doesn't seem right because any ++ * failure during processing of each answer could be silently ++ * ignored. This is kept this was the old behavior and because ++ * local->op_ret is initialized as -1 and local->op_errno is ++ * initialized as EUCLEAN, which makes these values useless. */ ++ if (is_fgetxattr) { ++ AFR_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, newdict, ++ local->xdata_rsp); ++ } else { ++ AFR_STACK_UNWIND(getxattr, frame, op_ret, op_errno, newdict, ++ local->xdata_rsp); ++ } ++ ++ if (newdict != NULL) { ++ dict_unref(newdict); ++ } + } ++} ++ ++static int32_t ++afr_getxattr_lockinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, dict_t *dict, ++ dict_t *xdata) ++{ ++ afr_getxattr_lockinfo_cbk_common(frame, op_ret, op_errno, dict, xdata, ++ false); + +- dict_unref(lockinfo); ++ return 0; ++} ++ ++static int32_t ++afr_fgetxattr_lockinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, dict_t *dict, ++ dict_t *xdata) ++{ ++ afr_getxattr_lockinfo_cbk_common(frame, op_ret, op_errno, dict, xdata, ++ true); + + return 0; + } +-- +1.8.3.1 + diff --git a/SOURCES/0538-afr-fix-coverity-issue-introduced-by-90cefde.patch b/SOURCES/0538-afr-fix-coverity-issue-introduced-by-90cefde.patch new file mode 100644 index 0000000..de164a3 --- /dev/null +++ b/SOURCES/0538-afr-fix-coverity-issue-introduced-by-90cefde.patch @@ -0,0 +1,46 @@ +From 31cd7627ff329a39691239322df3bc88e962ad02 Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez +Date: Mon, 1 Mar 2021 05:19:39 +0100 +Subject: [PATCH 538/538] afr: fix coverity issue introduced by 90cefde + +Fixes coverity issues 1447029 and 1447028. + +Backport of: +> Upstream-patch-link: https://github.com/gluster/glusterfs/pull/2201 +> Updates: #2161 +> Change-Id: I6a564231d6aeb76de20675b7ced5d45eed8c377f +> Signed-off-by: Xavi Hernandez + +BUG: 1911292 +Change-Id: I6a564231d6aeb76de20675b7ced5d45eed8c377f +Signed-off-by: Xavi Hernandez +Reviewed-on: https://code.engineering.redhat.com/gerrit/229200 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/cluster/afr/src/afr-inode-read.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c +index 98e195a..d874172 100644 +--- a/xlators/cluster/afr/src/afr-inode-read.c ++++ b/xlators/cluster/afr/src/afr-inode-read.c +@@ -918,13 +918,13 @@ afr_update_local_dicts(call_frame_t *frame, dict_t *dict, dict_t *xdata) + } + + if (dict != NULL) { +- if (dict_copy(dict, local->dict) < 0) { ++ if (dict_copy(dict, local->dict) == NULL) { + goto done; + } + } + + if (xdata != NULL) { +- if (dict_copy(xdata, local->xdata_rsp) < 0) { ++ if (dict_copy(xdata, local->xdata_rsp) == NULL) { + goto done; + } + } +-- +1.8.3.1 + diff --git a/SOURCES/0539-extras-disable-lookup-optimize-in-virt-and-block-gro.patch b/SOURCES/0539-extras-disable-lookup-optimize-in-virt-and-block-gro.patch new file mode 100644 index 0000000..18f851f --- /dev/null +++ b/SOURCES/0539-extras-disable-lookup-optimize-in-virt-and-block-gro.patch @@ -0,0 +1,62 @@ +From 88523814fe296c9cc9f7619e06210830f59c5edf Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez +Date: Fri, 12 Mar 2021 10:32:09 +0100 +Subject: [PATCH 539/539] extras: disable lookup-optimize in virt and block + groups + +lookup-optimize doesn't provide any benefit for virtualized +environments and gluster-block workloads, but it's known to cause +corruption in some cases when sharding is also enabled and the volume +is expanded or shrunk. + +For this reason, we disable lookup-optimize by default on those +environments. + +Backport of: +> Upstream-patch-link: https://github.com/gluster/glusterfs/pull/2254 +> Fixes: #2253 +> Change-Id: I25861aa50b335556a995a9c33318dd3afb41bf71 +> Signed-off-by: Xavi Hernandez + +BUG: 1939372 +Change-Id: I25861aa50b335556a995a9c33318dd3afb41bf71 +Signed-off-by: Xavi Hernandez +Reviewed-on: https://code.engineering.redhat.com/gerrit/231173 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + extras/group-distributed-virt | 1 + + extras/group-gluster-block | 1 + + extras/group-virt.example | 1 + + 3 files changed, 3 insertions(+) + +diff --git a/extras/group-distributed-virt b/extras/group-distributed-virt +index a960b76..6da3de0 100644 +--- a/extras/group-distributed-virt ++++ b/extras/group-distributed-virt +@@ -8,3 +8,4 @@ user.cifs=off + client.event-threads=4 + server.event-threads=4 + performance.client-io-threads=on ++cluster.lookup-optimize=off +diff --git a/extras/group-gluster-block b/extras/group-gluster-block +index 1e39801..b8d3e8d 100644 +--- a/extras/group-gluster-block ++++ b/extras/group-gluster-block +@@ -25,3 +25,4 @@ features.shard-block-size=64MB + user.cifs=off + server.allow-insecure=on + cluster.choose-local=off ++cluster.lookup-optimize=off +diff --git a/extras/group-virt.example b/extras/group-virt.example +index 3a441eb..155f5f5 100644 +--- a/extras/group-virt.example ++++ b/extras/group-virt.example +@@ -21,3 +21,4 @@ server.tcp-user-timeout=20 + server.keepalive-time=10 + server.keepalive-interval=2 + server.keepalive-count=5 ++cluster.lookup-optimize=off +-- +1.8.3.1 + diff --git a/SOURCES/0540-extras-Disable-write-behind-for-group-samba.patch b/SOURCES/0540-extras-Disable-write-behind-for-group-samba.patch new file mode 100644 index 0000000..0a89c64 --- /dev/null +++ b/SOURCES/0540-extras-Disable-write-behind-for-group-samba.patch @@ -0,0 +1,37 @@ +From 6895b6c67e9c29af3f966b4d9ee5cb40da763d24 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Wed, 14 Apr 2021 12:38:45 +0530 +Subject: [PATCH 540/540] extras: Disable write-behind for group samba. + +when write-behind is enabled with Samba it could be a +source of data corruption. The translator, while +processing a write call, immediately returns success but continues +writing the data to the server in the background. This can cause data +corruption when two clients relying on Samba to provide data consistency +are operating on the same file. + +> fixes: https://github.com/gluster/glusterfs/issues/2329 + +Change-Id: I5265056ff315a5f3cd97ea11b18db0831b1b901d +Solution: Disable write-behind for samba group +BUG: 1948547 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/235876 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + extras/group-samba | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/extras/group-samba b/extras/group-samba +index eeee6e0..9611a1f 100644 +--- a/extras/group-samba ++++ b/extras/group-samba +@@ -9,3 +9,4 @@ performance.nl-cache=on + performance.nl-cache-timeout=600 + performance.readdir-ahead=on + performance.parallel-readdir=on ++performance.write-behind=off +-- +1.8.3.1 + diff --git a/SOURCES/0541-glusterd-volgen-Add-functionality-to-accept-any-cust.patch b/SOURCES/0541-glusterd-volgen-Add-functionality-to-accept-any-cust.patch new file mode 100644 index 0000000..29135df --- /dev/null +++ b/SOURCES/0541-glusterd-volgen-Add-functionality-to-accept-any-cust.patch @@ -0,0 +1,545 @@ +From 23ab7175e64ab4d75fbcb6874008843cc78b65b8 Mon Sep 17 00:00:00 2001 +From: Ashish Pandey +Date: Fri, 16 Apr 2021 18:48:56 +0530 +Subject: [PATCH 541/542] glusterd-volgen: Add functionality to accept any + custom xlator + +Add new function which allow users to insert any custom xlators. +It makes to provide a way to add any processing into file operations. + +Users can deploy the plugin(xlator shared object) and integrate it to glusterfsd. + +If users want to enable a custom xlator, do the follows: + +1. put xlator object(.so file) into "XLATOR_DIR/user/" +2. set the option user.xlator. to the existing xlator-name to specify of the position in graph +3. restart gluster volume + +Options for custom xlator are able to set in "user.xlator..". + +Backport of : +>https://github.com/gluster/glusterfs/commit/ea86b664f3b1f54901ce1b7d7fba7d80456f2089 +>Fixes: https://github.com/gluster/glusterfs/issues/1943 +>Change-Id: Ife3ae1514ea474f5dae2897223012f9d04b64674 +>Signed-off-by:Ryo Furuhashi +>Co-authored-by: Yaniv Kaul +>Co-authored-by: Xavi Hernandez + +Change-Id: Ic8f28bfcfde67213eb1092b0ebf4822c874d37bb +BUG: 1927235 +Signed-off-by: Ashish Pandey +Reviewed-on: https://code.engineering.redhat.com/gerrit/236830 +Tested-by: RHGS Build Bot +Reviewed-by: Ravishankar Narayanankutty +Reviewed-by: Xavi Hernandez Juan +--- + cli/src/cli-rpc-ops.c | 148 ++++++++++++++++++++------ + cli/src/cli.h | 2 - + tests/basic/user-xlator.t | 65 ++++++++++++ + tests/env.rc.in | 3 + + xlators/mgmt/glusterd/src/glusterd-volgen.c | 155 ++++++++++++++++++++++++++++ + 5 files changed, 342 insertions(+), 31 deletions(-) + create mode 100755 tests/basic/user-xlator.t + +diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c +index 4e91265..51b5447 100644 +--- a/cli/src/cli-rpc-ops.c ++++ b/cli/src/cli-rpc-ops.c +@@ -2269,49 +2269,131 @@ out: + return ret; + } + +-char * +-is_server_debug_xlator(void *myframe) ++/* ++ * returns ++ * 1 : is server debug xlator ++ * 0 : is not server debug xlator ++ * <0 : error ++ */ ++static int ++is_server_debug_xlator(char *key, char *value) ++{ ++ if (!key || !value) ++ return -1; ++ ++ if (strcmp("debug.trace", key) == 0 || ++ strcmp("debug.error-gen", key) == 0) { ++ if (strcmp("client", value) == 0) ++ return 0; ++ else ++ return 1; ++ } ++ ++ return 0; ++} ++ ++/* ++ * returns ++ * 1 : is user xlator ++ * 0 : is not user xlator ++ * <0 : error ++ */ ++static int ++is_server_user_xlator(char *key, char *value) ++{ ++ int ret = 0; ++ ++ if (!key || !value) ++ return -1; ++ ++ ret = fnmatch("user.xlator.*", key, 0); ++ if (ret < 0) { ++ ret = -1; ++ goto out; ++ } else if (ret == FNM_NOMATCH) { ++ ret = 0; ++ goto out; ++ } ++ ++ ret = fnmatch("user.xlator.*.*", key, 0); ++ if (ret < 0) { ++ ret = -1; ++ goto out; ++ } else if (ret != FNM_NOMATCH) { // this is user xlator's option key ++ ret = 0; ++ goto out; ++ } ++ ++ ret = 1; ++ ++out: ++ return ret; ++} ++ ++static int ++added_server_xlator(void *myframe, char **added_xlator) + { + call_frame_t *frame = NULL; + cli_local_t *local = NULL; + char **words = NULL; + char *key = NULL; + char *value = NULL; +- char *debug_xlator = NULL; ++ int ret = 0; + + frame = myframe; + local = frame->local; + words = (char **)local->words; + + while (*words != NULL) { +- if (strstr(*words, "trace") == NULL && +- strstr(*words, "error-gen") == NULL) { +- words++; +- continue; +- } +- + key = *words; + words++; + value = *words; +- if (value == NULL) ++ ++ if (!value) { + break; +- if (strstr(value, "client")) { +- words++; +- continue; +- } else { +- if (!(strstr(value, "posix") || strstr(value, "acl") || +- strstr(value, "locks") || strstr(value, "io-threads") || +- strstr(value, "marker") || strstr(value, "index"))) { +- words++; +- continue; +- } else { +- debug_xlator = gf_strdup(key); +- break; ++ } ++ ++ ret = is_server_debug_xlator(key, value); ++ if (ret < 0) { ++ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR, ++ "failed to check that debug xlator was added"); ++ ret = -1; ++ goto out; ++ } ++ ++ if (ret) { ++ *added_xlator = gf_strdup(key); ++ if (!*added_xlator) { ++ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR, ++ "Out of memory"); ++ ret = -1; ++ goto out; ++ } ++ break; ++ } ++ ++ ret = is_server_user_xlator(key, value); ++ if (ret < 0) { ++ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR, ++ "failed to check that user xlator was added"); ++ ret = -1; ++ goto out; ++ } ++ ++ if (ret) { ++ *added_xlator = gf_strdup(key); ++ if (!*added_xlator) { ++ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR, ++ "Out of memory"); ++ ret = -1; ++ goto out; + } ++ break; + } + } + +- return debug_xlator; ++out: ++ return ret; + } + + int +@@ -2327,7 +2409,7 @@ gf_cli_set_volume_cbk(struct rpc_req *req, struct iovec *iov, int count, + char msg[1024] = { + 0, + }; +- char *debug_xlator = NULL; ++ char *added_xlator = NULL; + char tmp_str[512] = { + 0, + }; +@@ -2365,18 +2447,26 @@ gf_cli_set_volume_cbk(struct rpc_req *req, struct iovec *iov, int count, + * The process has to be restarted. So this is a check from the + * volume set option such that if debug xlators such as trace/errorgen + * are provided in the set command, warn the user. ++ * volume set option such that if user custom xlators or debug ++ * xlators such as trace/errorgen are provided in the set command, ++ * warn the user. + */ +- debug_xlator = is_server_debug_xlator(myframe); ++ ret = added_server_xlator(myframe, &added_xlator); ++ if (ret < 0) { ++ gf_log("cli", GF_LOG_ERROR, ++ "failed to check that server graph has been changed"); ++ goto out; ++ } + + if (dict_get_str(dict, "help-str", &help_str) && !msg[0]) + snprintf(msg, sizeof(msg), "Set volume %s", + (rsp.op_ret) ? "unsuccessful" : "successful"); +- if (rsp.op_ret == 0 && debug_xlator) { ++ if (rsp.op_ret == 0 && added_xlator) { + snprintf(tmp_str, sizeof(tmp_str), + "\n%s translator has been " + "added to the server volume file. Please restart the" + " volume for enabling the translator", +- debug_xlator); ++ added_xlator); + } + + if ((global_state->mode & GLUSTER_MODE_XML) && (help_str == NULL)) { +@@ -2394,7 +2484,7 @@ gf_cli_set_volume_cbk(struct rpc_req *req, struct iovec *iov, int count, + cli_err("volume set: failed"); + } else { + if (help_str == NULL) { +- if (debug_xlator == NULL) ++ if (added_xlator == NULL) + cli_out("volume set: success"); + else + cli_out("volume set: success%s", tmp_str); +@@ -2408,7 +2498,7 @@ gf_cli_set_volume_cbk(struct rpc_req *req, struct iovec *iov, int count, + out: + if (dict) + dict_unref(dict); +- GF_FREE(debug_xlator); ++ GF_FREE(added_xlator); + cli_cmd_broadcast_response(ret); + gf_free_xdr_cli_rsp(rsp); + return ret; +diff --git a/cli/src/cli.h b/cli/src/cli.h +index 7b4f446..b5b69ea 100644 +--- a/cli/src/cli.h ++++ b/cli/src/cli.h +@@ -502,8 +502,6 @@ cli_xml_output_snapshot(int cmd_type, dict_t *dict, int op_ret, int op_errno, + int + cli_xml_snapshot_status_single_snap(cli_local_t *local, dict_t *dict, + char *key); +-char * +-is_server_debug_xlator(void *myframe); + + int32_t + cli_cmd_snapshot_parse(const char **words, int wordcount, dict_t **options, +diff --git a/tests/basic/user-xlator.t b/tests/basic/user-xlator.t +new file mode 100755 +index 0000000..a711f9f +--- /dev/null ++++ b/tests/basic/user-xlator.t +@@ -0,0 +1,65 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../include.rc ++. $(dirname $0)/../volume.rc ++ ++#### patchy.dev.d-backends-patchy1.vol ++brick=${B0//\//-} ++SERVER_VOLFILE="/var/lib/glusterd/vols/${V0}/${V0}.${H0}.${brick:1}-${V0}1.vol" ++ ++cleanup; ++ ++TEST mkdir -p $B0/single-brick ++TEST mkdir -p ${GLUSTER_XLATOR_DIR}/user ++ ++## deploy dummy user xlator ++TEST cp ${GLUSTER_XLATOR_DIR}/playground/template.so ${GLUSTER_XLATOR_DIR}/user/hoge.so ++ ++TEST glusterd ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6}; ++TEST $CLI volume set $V0 user.xlator.hoge posix ++TEST grep -q 'user/hoge' ${SERVER_VOLFILE} ++ ++TEST $CLI volume set $V0 user.xlator.hoge.opt1 10 ++TEST grep -q '"option opt1 10"' ${SERVER_VOLFILE} ++TEST $CLI volume set $V0 user.xlator.hoge.opt2 hogehoge ++TEST grep -q '"option opt2 hogehoge"' ${SERVER_VOLFILE} ++TEST $CLI volume set $V0 user.xlator.hoge.opt3 true ++TEST grep -q '"option opt3 true"' ${SERVER_VOLFILE} ++ ++TEST $CLI volume start $V0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}3 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}4 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}5 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}6 ++ ++TEST $CLI volume set $V0 user.xlator.hoge trash ++TEST grep -q 'user/hoge' ${SERVER_VOLFILE} ++ ++TEST $CLI volume stop $V0 ++TEST $CLI volume start $V0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}3 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}4 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}5 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}6 ++ ++TEST ! $CLI volume set $V0 user.xlator.hoge unknown ++TEST grep -q 'user/hoge' ${SERVER_VOLFILE} # When the CLI fails, the volfile is not modified. ++ ++TEST $CLI volume stop $V0 ++TEST $CLI volume start $V0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}3 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}4 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}5 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}6 ++ ++#### teardown ++ ++TEST rm -f ${GLUSTER_XLATOR_DIR}/user/hoge.so ++cleanup; +diff --git a/tests/env.rc.in b/tests/env.rc.in +index c7472a7..1f0ca88 100644 +--- a/tests/env.rc.in ++++ b/tests/env.rc.in +@@ -40,3 +40,6 @@ export GLUSTER_LIBEXECDIR + + RUN_NFS_TESTS=@BUILD_GNFS@ + export RUN_NFS_TESTS ++ ++GLUSTER_XLATOR_DIR=@libdir@/glusterfs/@PACKAGE_VERSION@/xlator ++export GLUSTER_XLATOR_DIR +\ No newline at end of file +diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c +index 1920284..a242b5c 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c +@@ -45,6 +45,11 @@ struct gd_validate_reconf_opts { + + extern struct volopt_map_entry glusterd_volopt_map[]; + ++struct check_and_add_user_xlator_t { ++ volgen_graph_t *graph; ++ char *volname; ++}; ++ + #define RPC_SET_OPT(XL, CLI_OPT, XLATOR_OPT, ERROR_CMD) \ + do { \ + char *_value = NULL; \ +@@ -2822,6 +2827,145 @@ out: + return ret; + } + ++static gf_boolean_t ++check_user_xlator_position(dict_t *dict, char *key, data_t *value, ++ void *prev_xlname) ++{ ++ if (strncmp(key, "user.xlator.", SLEN("user.xlator.")) != 0) { ++ return false; ++ } ++ ++ if (fnmatch("user.xlator.*.*", key, 0) == 0) { ++ return false; ++ } ++ ++ char *value_str = data_to_str(value); ++ if (!value_str) { ++ return false; ++ } ++ ++ if (strcmp(value_str, prev_xlname) == 0) { ++ gf_log("glusterd", GF_LOG_INFO, ++ "found insert position of user-xlator(%s)", key); ++ return true; ++ } ++ ++ return false; ++} ++ ++static int ++set_user_xlator_option(dict_t *set_dict, char *key, data_t *value, void *data) ++{ ++ xlator_t *xl = data; ++ char *optname = strrchr(key, '.') + 1; ++ ++ gf_log("glusterd", GF_LOG_DEBUG, "set user xlator option %s = %s", key, ++ value->data); ++ ++ return xlator_set_option(xl, optname, strlen(optname), data_to_str(value)); ++} ++ ++static int ++insert_user_xlator_to_graph(dict_t *set_dict, char *key, data_t *value, ++ void *action_data) ++{ ++ int ret = -1; ++ ++ struct check_and_add_user_xlator_t *data = action_data; ++ ++ char *xlator_name = strrchr(key, '.') + 1; // user.xlator. ++ char *xlator_option_matcher = NULL; ++ char *type = NULL; ++ xlator_t *xl = NULL; ++ ++ // convert optkey to xlator type ++ if (gf_asprintf(&type, "user/%s", xlator_name) < 0) { ++ gf_log("glusterd", GF_LOG_ERROR, "failed to generate user-xlator type"); ++ goto out; ++ } ++ ++ gf_log("glusterd", GF_LOG_INFO, "add user xlator=%s to graph", type); ++ ++ xl = volgen_graph_add(data->graph, type, data->volname); ++ if (!xl) { ++ goto out; ++ } ++ ++ ret = gf_asprintf(&xlator_option_matcher, "user.xlator.%s.*", xlator_name); ++ if (ret < 0) { ++ gf_log("glusterd", GF_LOG_ERROR, ++ "failed to generate user-xlator option matcher"); ++ goto out; ++ } ++ ++ dict_foreach_fnmatch(set_dict, xlator_option_matcher, ++ set_user_xlator_option, xl); ++ ++out: ++ if (type) ++ GF_FREE(type); ++ if (xlator_option_matcher) ++ GF_FREE(xlator_option_matcher); ++ ++ return ret; ++} ++ ++static int ++validate_user_xlator_position(dict_t *this, char *key, data_t *value, ++ void *unused) ++{ ++ int ret = -1; ++ int i = 0; ++ ++ if (!value) ++ goto out; ++ ++ if (fnmatch("user.xlator.*.*", key, 0) == 0) { ++ ret = 0; ++ goto out; ++ } ++ ++ char *value_str = data_to_str(value); ++ if (!value_str) ++ goto out; ++ ++ int num_xlators = sizeof(server_graph_table) / ++ sizeof(server_graph_table[0]); ++ for (i = 0; i < num_xlators; i++) { ++ if (server_graph_table[i].dbg_key && ++ strcmp(value_str, server_graph_table[i].dbg_key) == 0) { ++ ret = 0; ++ goto out; ++ } ++ } ++ ++out: ++ if (ret == -1) ++ gf_log("glusterd", GF_LOG_ERROR, "invalid user xlator position %s = %s", ++ key, value->data); ++ ++ return ret; ++} ++ ++static int ++check_and_add_user_xl(volgen_graph_t *graph, dict_t *set_dict, char *volname, ++ char *prev_xlname) ++{ ++ if (!prev_xlname) ++ goto out; ++ ++ struct check_and_add_user_xlator_t data = {.graph = graph, ++ .volname = volname}; ++ ++ if (dict_foreach_match(set_dict, check_user_xlator_position, prev_xlname, ++ insert_user_xlator_to_graph, &data) < 0) { ++ return -1; ++ } ++ ++out: ++ return 0; ++} ++ + static int + server_graph_builder(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + dict_t *set_dict, void *param) +@@ -2831,6 +2975,12 @@ server_graph_builder(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + char *loglevel = NULL; + int i = 0; + ++ if (dict_foreach_fnmatch(set_dict, "user.xlator.*", ++ validate_user_xlator_position, NULL) < 0) { ++ ret = -EINVAL; ++ goto out; ++ } ++ + i = sizeof(server_graph_table) / sizeof(server_graph_table[0]) - 1; + + while (i >= 0) { +@@ -2848,6 +2998,11 @@ server_graph_builder(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + if (ret) + goto out; + ++ ret = check_and_add_user_xl(graph, set_dict, volinfo->volname, ++ server_graph_table[i].dbg_key); ++ if (ret) ++ goto out; ++ + i--; + } + +-- +1.8.3.1 + diff --git a/SOURCES/0542-xlaotrs-mgmt-Fixing-coverity-issue-1445996.patch b/SOURCES/0542-xlaotrs-mgmt-Fixing-coverity-issue-1445996.patch new file mode 100644 index 0000000..f6e0641 --- /dev/null +++ b/SOURCES/0542-xlaotrs-mgmt-Fixing-coverity-issue-1445996.patch @@ -0,0 +1,64 @@ +From f3db0c99faf813e0f2e9ffcf599416555a59df1f Mon Sep 17 00:00:00 2001 +From: Ashish Pandey +Date: Tue, 9 Feb 2021 16:43:35 +0530 +Subject: [PATCH 542/542] xlaotrs/mgmt: Fixing coverity issue 1445996 + +Backport of https://github.com/gluster/glusterfs/pull/2148/commits/9785e96e0bdf6e60896570fdf5e4a6976a6f60ba + +Fixing "Null pointer dereferences" + +BUG: 1927235 +Change-Id: Idbc014e1302d2450f97bccd028681198c0d97424 +Signed-off-by: Ashish Pandey +Signed-off-by: Ravishankar N +Reviewed-on: https://code.engineering.redhat.com/gerrit/237433 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mgmt/glusterd/src/glusterd-volgen.c | 12 +++++++----- + 1 file changed, 7 insertions(+), 5 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c +index a242b5c..71aed08 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c +@@ -2916,21 +2916,23 @@ validate_user_xlator_position(dict_t *this, char *key, data_t *value, + { + int ret = -1; + int i = 0; ++ char *value_str = NULL; + + if (!value) + goto out; + ++ value_str = data_to_str(value); ++ if (!value_str) ++ goto out; ++ + if (fnmatch("user.xlator.*.*", key, 0) == 0) { + ret = 0; + goto out; + } + +- char *value_str = data_to_str(value); +- if (!value_str) +- goto out; +- + int num_xlators = sizeof(server_graph_table) / + sizeof(server_graph_table[0]); ++ + for (i = 0; i < num_xlators; i++) { + if (server_graph_table[i].dbg_key && + strcmp(value_str, server_graph_table[i].dbg_key) == 0) { +@@ -2942,7 +2944,7 @@ validate_user_xlator_position(dict_t *this, char *key, data_t *value, + out: + if (ret == -1) + gf_log("glusterd", GF_LOG_ERROR, "invalid user xlator position %s = %s", +- key, value->data); ++ key, value_str); + + return ret; + } +-- +1.8.3.1 + diff --git a/SOURCES/0543-glusterd-handle-custom-xlator-failure-cases.patch b/SOURCES/0543-glusterd-handle-custom-xlator-failure-cases.patch new file mode 100644 index 0000000..c6194c7 --- /dev/null +++ b/SOURCES/0543-glusterd-handle-custom-xlator-failure-cases.patch @@ -0,0 +1,162 @@ +From 71fc5b7949e00c4448f5ec1291e756b201a70082 Mon Sep 17 00:00:00 2001 +From: Ravishankar N +Date: Thu, 29 Apr 2021 18:34:57 +0530 +Subject: [PATCH 543/543] glusterd: handle custom xlator failure cases + +Problem-1: +custom xlator insertion was failing for those xlators in the brick graph +whose dbg_key was NULL in the server_graph_table. Looking at the git log, +the dbg_key was added in commit d1397dbd7d6cdbd2d81d5d36d608b6175d449db4 +for inserting debug xlators. + +Fix: I think it is fine to define it for all brick xlators below server. + +Problem-2: +In the commit-op phase, glusterd_op_set_volume() updates the volinfo +dict with the key-value pairs and then proceeds to create the volfiles. +If any of the steps fail, the volinfo dict retains those key-values, +until glusterd is restarted or `gluster vol reset $VOLNAME` is issued. + +Fix: +Make a copy of the volinfo dict and if there are any failures in +proceeding with the set volume logic, restore the dict to its original +state. + +Backport of: +> Upstream-patch-link: https://github.com/gluster/glusterfs/pull/2371 +> Change-Id: I9010dab33d0139b8e6d603308e331b6d220a4849 +> Updates: #2370 +> Signed-off-by: Ravishankar N + +Change-Id: I9010dab33d0139b8e6d603308e331b6d220a4849 +BUG: 1953901 +Signed-off-by: Ravishankar N +Reviewed-on: https://code.engineering.redhat.com/gerrit/239889 +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + tests/basic/user-xlator.t | 16 ++++++++++++++-- + xlators/mgmt/glusterd/src/glusterd-op-sm.c | 16 ++++++++++++++++ + xlators/mgmt/glusterd/src/glusterd-volgen.c | 14 +++++++------- + 3 files changed, 37 insertions(+), 9 deletions(-) + +diff --git a/tests/basic/user-xlator.t b/tests/basic/user-xlator.t +index a711f9f..ed2d831 100755 +--- a/tests/basic/user-xlator.t ++++ b/tests/basic/user-xlator.t +@@ -35,8 +35,18 @@ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}4 + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}5 + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}6 + +-TEST $CLI volume set $V0 user.xlator.hoge trash +-TEST grep -q 'user/hoge' ${SERVER_VOLFILE} ++# Test that the insertion at all positions between server and posix is successful. ++# It is not guaranteed that the brick process will start/work in all positions though. ++TESTS_EXPECTED_IN_LOOP=34 ++declare -a brick_side_xlators=("decompounder" "io-stats" "quota" "index" "barrier" ++ "marker" "selinux" "io-threads" "upcall" "leases" ++ "read-only" "worm" "locks" "access-control" ++ "bitrot-stub" "changelog" "trash") ++for xlator in "${brick_side_xlators[@]}" ++ do ++ TEST_IN_LOOP $CLI volume set $V0 user.xlator.hoge $xlator ++ TEST_IN_LOOP grep -q 'user/hoge' ${SERVER_VOLFILE} ++ done + + TEST $CLI volume stop $V0 + TEST $CLI volume start $V0 +@@ -49,6 +59,8 @@ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}6 + + TEST ! $CLI volume set $V0 user.xlator.hoge unknown + TEST grep -q 'user/hoge' ${SERVER_VOLFILE} # When the CLI fails, the volfile is not modified. ++# User xlator insert failures must not prevent setting other volume options. ++TEST $CLI volume set $V0 storage.reserve 10% + + TEST $CLI volume stop $V0 + TEST $CLI volume start $V0 +diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +index 1e84f5f..893af29 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c ++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +@@ -2911,6 +2911,7 @@ glusterd_op_set_volume(dict_t *dict, char **errstr) + uint32_t new_op_version = 0; + gf_boolean_t quorum_action = _gf_false; + glusterd_svc_t *svc = NULL; ++ dict_t *volinfo_dict_orig = NULL; + + this = THIS; + GF_ASSERT(this); +@@ -2918,6 +2919,10 @@ glusterd_op_set_volume(dict_t *dict, char **errstr) + priv = this->private; + GF_ASSERT(priv); + ++ volinfo_dict_orig = dict_new(); ++ if (!volinfo_dict_orig) ++ goto out; ++ + ret = dict_get_int32n(dict, "count", SLEN("count"), &dict_count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, +@@ -2949,6 +2954,11 @@ glusterd_op_set_volume(dict_t *dict, char **errstr) + goto out; + } + ++ if (dict_copy(volinfo->dict, volinfo_dict_orig) == NULL) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ + /* TODO: Remove this once v3.3 compatibility is not required */ + check_op_version = dict_get_str_boolean(dict, "check-op-version", + _gf_false); +@@ -3171,6 +3181,12 @@ out: + gf_msg_debug(this->name, 0, "returning %d", ret); + if (quorum_action) + glusterd_do_quorum_action(); ++ if (ret < 0 && count > 1) { ++ if (dict_reset(volinfo->dict) == 0) ++ dict_copy(volinfo_dict_orig, volinfo->dict); ++ } ++ if (volinfo_dict_orig) ++ dict_unref(volinfo_dict_orig); + return ret; + } + +diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c +index 71aed08..aa85bdb 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c +@@ -2706,24 +2706,24 @@ out: + static volgen_brick_xlator_t server_graph_table[] = { + {brick_graph_add_server, NULL}, + {brick_graph_add_decompounder, "decompounder"}, +- {brick_graph_add_io_stats, "NULL"}, ++ {brick_graph_add_io_stats, "io-stats"}, + {brick_graph_add_sdfs, "sdfs"}, + {brick_graph_add_namespace, "namespace"}, +- {brick_graph_add_cdc, NULL}, ++ {brick_graph_add_cdc, "cdc" }, + {brick_graph_add_quota, "quota"}, + {brick_graph_add_index, "index"}, +- {brick_graph_add_barrier, NULL}, ++ {brick_graph_add_barrier, "barrier" }, + {brick_graph_add_marker, "marker"}, + {brick_graph_add_selinux, "selinux"}, + {brick_graph_add_fdl, "fdl"}, + {brick_graph_add_iot, "io-threads"}, + {brick_graph_add_upcall, "upcall"}, + {brick_graph_add_leases, "leases"}, +- {brick_graph_add_pump, NULL}, +- {brick_graph_add_ro, NULL}, +- {brick_graph_add_worm, NULL}, ++ {brick_graph_add_pump, "pump" }, ++ {brick_graph_add_ro, "read-only" }, ++ {brick_graph_add_worm, "worm" }, + {brick_graph_add_locks, "locks"}, +- {brick_graph_add_acl, "acl"}, ++ {brick_graph_add_acl, "access-control"}, + {brick_graph_add_bitrot_stub, "bitrot-stub"}, + {brick_graph_add_changelog, "changelog"}, + #if USE_GFDB /* changetimerecorder depends on gfdb */ +-- +1.8.3.1 + diff --git a/SOURCES/0544-RHGS-3.5.4-rebuild-to-ship-with-RHEL-8.5.patch b/SOURCES/0544-RHGS-3.5.4-rebuild-to-ship-with-RHEL-8.5.patch new file mode 100644 index 0000000..171ed10 --- /dev/null +++ b/SOURCES/0544-RHGS-3.5.4-rebuild-to-ship-with-RHEL-8.5.patch @@ -0,0 +1,47 @@ +From 840f437d232fbafac9f4448b0f8d0e9976ea1e1d Mon Sep 17 00:00:00 2001 +From: Tamar Shacked +Date: Mon, 23 Aug 2021 20:46:13 +0300 +Subject: [PATCH 544/544] RHGS-3.5.4: rebuild to ship with RHEL-8.5 + +Label: DOWNSTREAM ONLY +BUG: 1996984 + +Signed-off-by: Tamar Shacked +Change-Id: Idafc64b8ee5da165c87428b8a5166cf319ef7660 +Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/267350 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + glusterfs.spec.in | 2 ++ + rfc.sh | 2 +- + 2 files changed, 3 insertions(+), 1 deletion(-) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 2be7677..4511979 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -1982,6 +1982,8 @@ fi + %endif + + %changelog ++* Tue Aug 24 2021 Tamar Shacked ++- build RGHS client for RHEL-8.5 (#1996984) + + * Mon May 11 2020 Sunny Kumar + - added requires policycoreutils-python-utils on rhel8 for geo-replication +diff --git a/rfc.sh b/rfc.sh +index c0559b9..b1153be 100755 +--- a/rfc.sh ++++ b/rfc.sh +@@ -315,7 +315,7 @@ main() + if [ -z "${reference}" ]; then + $drier git push $ORIGIN HEAD:refs/for/$branch/rfc; + else +- $drier git push $ORIGIN HEAD:refs/for/$branch/ref-${reference}; ++ $drier git push $ORIGIN HEAD:refs/for/$branch; + fi + } + +-- +1.8.3.1 + diff --git a/SPECS/glusterfs.spec b/SPECS/glusterfs.spec index 905084f..c0e2ed4 100644 --- a/SPECS/glusterfs.spec +++ b/SPECS/glusterfs.spec @@ -237,7 +237,7 @@ Release: 0.1%{?prereltag:.%{prereltag}}%{?dist} %else Name: glusterfs Version: 6.0 -Release: 49.1%{?dist} +Release: 56.4%{?dist} ExcludeArch: i686 %endif License: GPLv2 or LGPLv3+ @@ -795,7 +795,70 @@ Patch0477: 0477-glusterd-snapshot-Snapshot-prevalidation-failure-not.patch Patch0478: 0478-DHT-Fixing-rebalance-failure-on-issuing-stop-command.patch Patch0479: 0479-ganesha-ha-revised-regex-exprs-for-status.patch Patch0480: 0480-DHT-Rebalance-Ensure-Rebalance-reports-status-only-o.patch -Patch0481: 0481-RHGS-3.5.3-rebuild-to-ship-with-RHEL.patch +Patch0481: 0481-Update-rfc.sh-to-rhgs-3.5.4.patch +Patch0482: 0482-logger-Always-print-errors-in-english.patch +Patch0483: 0483-afr-more-quorum-checks-in-lookup-and-new-entry-marki.patch +Patch0484: 0484-glusterd-rebalance-status-displays-stats-as-0-after-.patch +Patch0485: 0485-cli-rpc-conditional-init-of-global-quota-rpc-1578.patch +Patch0486: 0486-glusterd-brick-sock-file-deleted-log-error-1560.patch +Patch0487: 0487-Events-Log-file-not-re-opened-after-logrotate.patch +Patch0488: 0488-glusterd-afr-enable-granular-entry-heal-by-default.patch +Patch0489: 0489-glusterd-fix-bug-in-enabling-granular-entry-heal.patch +Patch0490: 0490-Segmentation-fault-occurs-during-truncate.patch +Patch0491: 0491-glusterd-mount-directory-getting-truncated-on-mounti.patch +Patch0492: 0492-afr-lookup-Pass-xattr_req-in-while-doing-a-selfheal-.patch +Patch0493: 0493-geo-rep-Note-section-is-required-for-ignore_deletes.patch +Patch0494: 0494-glusterd-start-the-brick-on-a-different-port.patch +Patch0495: 0495-geo-rep-descriptive-message-when-worker-crashes-due-.patch +Patch0496: 0496-posix-Use-MALLOC-instead-of-alloca-to-allocate-memor.patch +Patch0497: 0497-socket-Use-AES128-cipher-in-SSL-if-AES-is-supported-.patch +Patch0498: 0498-geo-rep-Fix-corner-case-in-rename-on-mkdir-during-hy.patch +Patch0499: 0499-gfapi-give-appropriate-error-when-size-exceeds.patch +Patch0500: 0500-features-shard-Convert-shard-block-indices-to-uint64.patch +Patch0501: 0501-Cli-Removing-old-syntax-of-tier-cmds-from-help-menu.patch +Patch0502: 0502-dht-fixing-a-permission-update-issue.patch +Patch0503: 0503-gfapi-Suspend-synctasks-instead-of-blocking-them.patch +Patch0504: 0504-io-stats-Configure-ios_sample_buf_size-based-on-samp.patch +Patch0505: 0505-trash-Create-inode_table-only-while-feature-is-enabl.patch +Patch0506: 0506-posix-Attach-a-posix_spawn_disk_thread-with-glusterf.patch +Patch0507: 0507-inode-make-critical-section-smaller.patch +Patch0508: 0508-fuse-fetch-arbitrary-number-of-groups-from-proc-pid-.patch +Patch0509: 0509-core-configure-optimum-inode-table-hash_size-for-shd.patch +Patch0510: 0510-glusterd-brick_mux-Optimize-friend-handshake-code-to.patch +Patch0511: 0511-features-shard-Missing-format-specifier.patch +Patch0512: 0512-glusterd-shared-storage-mount-fails-in-ipv6-environm.patch +Patch0513: 0513-afr-mark-pending-xattrs-as-a-part-of-metadata-heal.patch +Patch0514: 0514-afr-event-gen-changes.patch +Patch0515: 0515-cluster-afr-Heal-directory-rename-without-rmdir-mkdi.patch +Patch0516: 0516-afr-return-EIO-for-gfid-split-brains.patch +Patch0517: 0517-gfapi-glfs_h_creat_open-new-API-to-create-handle-and.patch +Patch0518: 0518-glusterd-Fix-for-shared-storage-in-ipv6-env.patch +Patch0519: 0519-glusterfs-events-Fix-incorrect-attribute-access-2002.patch +Patch0520: 0520-performance-open-behind-seek-fop-should-open_and_res.patch +Patch0521: 0521-open-behind-fix-missing-fd-reference.patch +Patch0522: 0522-lcov-improve-line-coverage.patch +Patch0523: 0523-open-behind-rewrite-of-internal-logic.patch +Patch0524: 0524-open-behind-fix-call_frame-leak.patch +Patch0525: 0525-open-behind-implement-create-fop.patch +Patch0526: 0526-Quota-quota_fsck.py-converting-byte-string-to-string.patch +Patch0527: 0527-Events-Socket-creation-after-getaddrinfo-and-IPv4-an.patch +Patch0528: 0528-Extras-Removing-xattr_analysis-script.patch +Patch0529: 0529-geo-rep-prompt-should-work-for-ignore_deletes.patch +Patch0530: 0530-gfapi-avoid-crash-while-logging-message.patch +Patch0531: 0531-Glustereventsd-Default-port-change-2091.patch +Patch0532: 0532-glusterd-fix-for-starting-brick-on-new-port.patch +Patch0533: 0533-glusterd-Rebalance-cli-is-not-showing-correct-status.patch +Patch0534: 0534-glusterd-Resolve-use-after-free-bug-2181.patch +Patch0535: 0535-multiple-files-use-dict_allocate_and_serialize-where.patch +Patch0536: 0536-dht-Ongoing-IO-is-failed-during-volume-shrink-operat.patch +Patch0537: 0537-cluster-afr-Fix-race-in-lockinfo-f-getxattr.patch +Patch0538: 0538-afr-fix-coverity-issue-introduced-by-90cefde.patch +Patch0539: 0539-extras-disable-lookup-optimize-in-virt-and-block-gro.patch +Patch0540: 0540-extras-Disable-write-behind-for-group-samba.patch +Patch0541: 0541-glusterd-volgen-Add-functionality-to-accept-any-cust.patch +Patch0542: 0542-xlaotrs-mgmt-Fixing-coverity-issue-1445996.patch +Patch0543: 0543-glusterd-handle-custom-xlator-failure-cases.patch +Patch0544: 0544-RHGS-3.5.4-rebuild-to-ship-with-RHEL-8.5.patch %description GlusterFS is a distributed file-system capable of scaling to several @@ -1936,7 +1999,6 @@ exit 0 %if ( 0%{!?_without_server:1} ) %files server %doc extras/clear_xattrs.sh -%{_datadir}/glusterfs/scripts/xattr_analysis.py* %{_datadir}/glusterfs/scripts/quota_fsck.py* # sysconf %config(noreplace) %{_sysconfdir}/glusterfs @@ -2539,8 +2601,42 @@ fi %endif %changelog -* Fri Feb 19 2021 Gluster Jenkins - 6.0-49.1 -- fixes bugs bz#1930561 +* Mon Aug 30 2021 Gluster Jenkins - 6.0-56.4 +- Add gating.yaml, fixes bugs bz#1996984 + +* Tue Aug 24 2021 Gluster Jenkins - 6.0-56.3 +- fixes bugs bz#1996984 + +* Thu May 06 2021 Gluster Jenkins - 6.0-56.2 +- fixes bugs bz#1953901 + +* Thu Apr 22 2021 Gluster Jenkins - 6.0-56.1 +- fixes bugs bz#1927235 + +* Wed Apr 14 2021 Gluster Jenkins - 6.0-56 +- fixes bugs bz#1948547 + +* Fri Mar 19 2021 Gluster Jenkins - 6.0-55 +- fixes bugs bz#1939372 + +* Wed Mar 03 2021 Gluster Jenkins - 6.0-54 +- fixes bugs bz#1832306 bz#1911292 bz#1924044 + +* Thu Feb 11 2021 Gluster Jenkins - 6.0-53 +- fixes bugs bz#1224906 bz#1691320 bz#1719171 bz#1814744 bz#1865796 + +* Thu Jan 28 2021 Gluster Jenkins - 6.0-52 +- fixes bugs bz#1600459 bz#1719171 bz#1830713 bz#1856574 + +* Mon Dec 28 2020 Gluster Jenkins - 6.0-51 +- fixes bugs bz#1640148 bz#1856574 bz#1910119 + +* Tue Dec 15 2020 Gluster Jenkins - 6.0-50 +- fixes bugs bz#1224906 bz#1412494 bz#1612973 bz#1663821 bz#1691320 + bz#1726673 bz#1749304 bz#1752739 bz#1779238 bz#1813866 bz#1814744 bz#1821599 + bz#1832306 bz#1835229 bz#1842449 bz#1865796 bz#1878077 bz#1882923 bz#1885966 + bz#1890506 bz#1896425 bz#1898776 bz#1898777 bz#1898778 bz#1898781 bz#1898784 + bz#1903468 * Wed Nov 25 2020 Gluster Jenkins - 6.0-49 - fixes bugs bz#1286171