diff --git a/README.debrand b/README.debrand deleted file mode 100644 index 01c46d2..0000000 --- a/README.debrand +++ /dev/null @@ -1,2 +0,0 @@ -Warning: This package was configured for automatic debranding, but the changes -failed to apply. diff --git a/SOURCES/0314-glusterd-tier-is_tier_enabled-inserted-causing-check.patch b/SOURCES/0314-glusterd-tier-is_tier_enabled-inserted-causing-check.patch new file mode 100644 index 0000000..adde426 --- /dev/null +++ b/SOURCES/0314-glusterd-tier-is_tier_enabled-inserted-causing-check.patch @@ -0,0 +1,38 @@ +From 2a4f19df70276ba41db19938507297f7580286fa Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee +Date: Fri, 25 Oct 2019 18:07:27 +0530 +Subject: [PATCH 314/314] glusterd/tier: is_tier_enabled inserted causing + checksum mismatch + +the volfile entry is_tier_enabled is checked for version 3.7.6 while it was +supposed to check for 3.10. this is to fix it downstream only but changing the +version of check to 3.13.1 + +Label: DOWNSTREAM ONLY +BUG: 1765555 +Change-Id: Id631f3ba520b3e7b126c7607dca1bb7874532e81 +Signed-off-by: Atin Mukherjee +Reviewed-on: https://code.engineering.redhat.com/gerrit/183932 +Reviewed-by: Sanju Rakonde +Tested-by: Sanju Rakonde +Tested-by: RHGS Build Bot +--- + xlators/mgmt/glusterd/src/glusterd-store.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c +index 4889217..8a10eb8 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-store.c ++++ b/xlators/mgmt/glusterd/src/glusterd-store.c +@@ -1036,7 +1036,7 @@ glusterd_volume_exclude_options_write(int fd, glusterd_volinfo_t *volinfo) + if (ret) + goto out; + } +- if (conf->op_version >= GD_OP_VERSION_3_10_0) { ++ if (conf->op_version >= GD_OP_VERSION_3_13_1) { + snprintf(buf, sizeof(buf), "%d", volinfo->is_tier_enabled); + ret = gf_store_save_value(fd, GF_TIER_ENABLED, buf); + if (ret) +-- +1.8.3.1 + diff --git a/SOURCES/0315-geo-rep-Fix-py2-py3-compatibility-in-repce.patch b/SOURCES/0315-geo-rep-Fix-py2-py3-compatibility-in-repce.patch new file mode 100644 index 0000000..a0448cc --- /dev/null +++ b/SOURCES/0315-geo-rep-Fix-py2-py3-compatibility-in-repce.patch @@ -0,0 +1,52 @@ +From 4a04e1b5540921db22f1894f71eb30342127192d Mon Sep 17 00:00:00 2001 +From: Kotresh HR +Date: Tue, 12 Nov 2019 21:53:20 +0530 +Subject: [PATCH 315/316] geo-rep: Fix py2/py3 compatibility in repce + +Geo-rep fails to start on python2 only machine like +centos6. It fails with "ImportError no module named _io". +This patch fixes the same. + +Backport of: + > Patch: https://review.gluster.org/23702 + > fixes: bz#1771577 + > Change-Id: I8228458a853a230546f9faf29a0e9e0f23b3efec + > Signed-off-by: Kotresh HR + +BUG: 1771524 +Change-Id: I8228458a853a230546f9faf29a0e9e0f23b3efec +Signed-off-by: Kotresh HR +Reviewed-on: https://code.engineering.redhat.com/gerrit/185377 +Tested-by: RHGS Build Bot +Reviewed-by: Sunny Kumar +--- + geo-replication/syncdaemon/repce.py | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/geo-replication/syncdaemon/repce.py b/geo-replication/syncdaemon/repce.py +index 6065b82..c622afa 100644 +--- a/geo-replication/syncdaemon/repce.py ++++ b/geo-replication/syncdaemon/repce.py +@@ -8,7 +8,6 @@ + # cases as published by the Free Software Foundation. + # + +-import _io + import os + import sys + import time +@@ -58,9 +57,9 @@ def recv(inf): + """load an object from input stream + python2 and python3 compatibility, inf is sys.stdin + and is opened as text stream by default. Hence using the +- buffer attribute ++ buffer attribute in python3 + """ +- if isinstance(inf, _io.TextIOWrapper): ++ if hasattr(inf, "buffer"): + return pickle.load(inf.buffer) + else: + return pickle.load(inf) +-- +1.8.3.1 + diff --git a/SOURCES/0316-spec-fixed-python-prettytable-dependency-for-rhel6.patch b/SOURCES/0316-spec-fixed-python-prettytable-dependency-for-rhel6.patch new file mode 100644 index 0000000..c2045a0 --- /dev/null +++ b/SOURCES/0316-spec-fixed-python-prettytable-dependency-for-rhel6.patch @@ -0,0 +1,51 @@ +From b9a19aef5de94eb91162448ad687f2d2d194f82c Mon Sep 17 00:00:00 2001 +From: Rinku Kothiya +Date: Thu, 14 Nov 2019 09:55:15 +0000 +Subject: [PATCH 316/316] spec: fixed python-prettytable dependency for rhel6 + +Installing glusterfs on rhel6 was failing with python-prettytable +dependency as it required python2-prettytable for glusterfs-events. +This patch conditionally sets the python version for rhel7 and +fixes the problem. + +Label: DOWNSTREAM ONLY + +BUG: 1771614 + +Change-Id: I6288daa5d8c2d82a6d73a0d9722786a2a99b9db5 +fixes: bz#1771614 +Signed-off-by: Rinku Kothiya +Reviewed-on: https://code.engineering.redhat.com/gerrit/185385 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + glusterfs.spec.in | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 3c2e2dc..eeadb65 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -706,7 +706,7 @@ This package provides the translators needed on any GlusterFS client. + %package events + Summary: GlusterFS Events + Requires: %{name}-server%{?_isa} = %{version}-%{release} +-Requires: python%{_pythonver} python%{_pythonver}-prettytable ++Requires: python%{_pythonver} + Requires: python%{_pythonver}-gluster = %{version}-%{release} + %if ( 0%{?rhel} && 0%{?rhel} < 8 ) + Requires: python-requests +@@ -714,7 +714,10 @@ Requires: python-requests + Requires: python%{_pythonver}-requests + %endif + %if ( 0%{?rhel} && 0%{?rhel} < 7 ) ++Requires: python-prettytable + Requires: python-argparse ++%else ++Requires: python%{_pythonver}-prettytable + %endif + %if ( 0%{?_with_systemd:1} ) + %{?systemd_requires} +-- +1.8.3.1 + diff --git a/SOURCES/0317-Update-rfc.sh-to-rhgs-3.5.1.patch b/SOURCES/0317-Update-rfc.sh-to-rhgs-3.5.1.patch new file mode 100644 index 0000000..eccf2e3 --- /dev/null +++ b/SOURCES/0317-Update-rfc.sh-to-rhgs-3.5.1.patch @@ -0,0 +1,43 @@ +From 985ef94c63859907339c11b158e4540a5568d638 Mon Sep 17 00:00:00 2001 +From: Rinku Kothiya +Date: Mon, 18 Nov 2019 02:25:25 -0500 +Subject: [PATCH 317/335] Update rfc.sh to rhgs-3.5.1 + +Signed-off-by: Rinku Kothiya +--- + README | 9 +++++++++ + rfc.sh | 2 +- + 2 files changed, 10 insertions(+), 1 deletion(-) + create mode 100644 README + +diff --git a/README b/README +new file mode 100644 +index 0000000..44a118b +--- /dev/null ++++ b/README +@@ -0,0 +1,9 @@ ++ ++'master' branch is just dummy branch in downstream. Any reference to 'upstream' ++will point to http://git.gluster.org. ++ ++You can checkout the release specific branch by running below command ++ bash$ git checkout -t -b rhs-x.y origin/rhs-x.y ++ ++Happy Hacking!! ++ +diff --git a/rfc.sh b/rfc.sh +index 94c92ef..69ddd2b 100755 +--- a/rfc.sh ++++ b/rfc.sh +@@ -18,7 +18,7 @@ done + shift $((OPTIND-1)) + + +-branch="rhgs-3.5.0"; ++branch="rhgs-3.5.1"; + + set_hooks_commit_msg() + { +-- +1.8.3.1 + diff --git a/SOURCES/0318-Update-rfc.sh-to-rhgs-3.5.1.patch b/SOURCES/0318-Update-rfc.sh-to-rhgs-3.5.1.patch new file mode 100644 index 0000000..e65ae38 --- /dev/null +++ b/SOURCES/0318-Update-rfc.sh-to-rhgs-3.5.1.patch @@ -0,0 +1,114 @@ +From 1f03327887645be2500cd29f69f7a77a4f5d0164 Mon Sep 17 00:00:00 2001 +From: Rinku Kothiya +Date: Mon, 18 Nov 2019 14:25:12 -0500 +Subject: [PATCH 318/335] Update rfc.sh to rhgs-3.5.1 + +Removed the checks for updates and fixes from rfc.sh + +Label: DOWNSTREAM ONLY + +Change-Id: I436c959aa3b3366cd313b29f41c2466c4072efd7 +Signed-off-by: Rinku Kothiya +--- + rfc.sh | 47 ++++++++--------------------------------------- + 1 file changed, 8 insertions(+), 39 deletions(-) + +diff --git a/rfc.sh b/rfc.sh +index 69ddd2b..918fb11 100755 +--- a/rfc.sh ++++ b/rfc.sh +@@ -129,13 +129,8 @@ editor_mode() + + if [ $(basename "$1") = "COMMIT_EDITMSG" ]; then + # see note above function warn_reference_missing for regex elaboration +- # Lets first check for github issues +- ref=$(git log -n1 --format='%b' | grep -ow -E "([fF][iI][xX][eE][sS]|[uU][pP][dD][aA][tT][eE][sS])(:)?[[:space:]]+(gluster\/glusterfs)?#[[:digit:]]+" | awk -F '#' '{print $2}'); +- if [ "x${ref}" = "x" ]; then +- # if not found, check for bugs +- ref=$(git log -n1 --format='%b' | grep -ow -E "([fF][iI][xX][eE][sS]|[uU][pP][dD][aA][tT][eE][sS])(:)?[[:space:]]+bz#[[:digit:]]+" | awk -F '#' '{print $2}'); +- fi + ++ ref=$(git log -n1 --format='%b' | grep -ow -E "^[bB][uU][gG](:)[[:space:]]+[[:digit:]]+") + if [ "x${ref}" != "x" ]; then + return; + fi +@@ -157,16 +152,6 @@ editor_mode() + bz_string="" + fi + +- echo "Select yes '(y)' if this patch fixes the bug/feature completely," +- echo -n "or is the last of the patchset which brings feature (Y/n): " +- read fixes +- fixes_string="fixes" +- if [ "${fixes}" = 'N' ] || [ "${fixes}" = 'n' ]; then +- fixes_string="updates" +- fi +- +- sed "/^Change-Id:/{p; s/^.*$/${fixes_string}: ${bz_string}#${bug}/;}" $1 > $1.new && \ +- mv $1.new $1; + return; + done + fi +@@ -234,8 +219,8 @@ check_patches_for_coding_style() + # IOW, the above helps us find the pattern with leading or training spaces + # or non word consituents like , or ; + # +-# [fF][iI][xX][eE][sS]|[uU][pP][dD][aA][tT][eE][sS]) +-# Finds 'fixes' OR 'updates' in any case combination ++# [bB][uU][gG] ++# Finds 'bug' in any case + # + # (:)? + # Followed by an optional : (colon) +@@ -256,28 +241,11 @@ warn_reference_missing() + echo "" + echo "=== Missing a reference in commit! ===" + echo "" +- echo "Gluster commits are made with a reference to a bug or a github issue" +- echo "" +- echo "Submissions that are enhancements (IOW, not functional" +- echo "bug fixes, but improvements of any nature to the code) are tracked" +- echo "using github issues [1]." ++ echo "You must give BUG: " + echo "" +- echo "Submissions that are bug fixes are tracked using Bugzilla [2]." ++ echo "for example:" + echo "" +- echo "A check on the commit message, reveals that there is no bug or" +- echo "github issue referenced in the commit message" +- echo "" +- echo "[1] https://github.com/gluster/glusterfs/issues/new" +- echo "[2] https://bugzilla.redhat.com/enter_bug.cgi?product=GlusterFS" +- echo "" +- echo "Please file an issue or a bug report and reference the same in the" +- echo "commit message using the following tags:" +- echo "GitHub Issues:" +- echo "\"Fixes: gluster/glusterfs#n\" OR \"Updates: gluster/glusterfs#n\"," +- echo "\"Fixes: #n\" OR \"Updates: #n\"," +- echo "Bugzilla ID:" +- echo "\"Fixes: bz#n\" OR \"Updates: bz#n\"," +- echo "where n is the issue or bug number" ++ echo "BUG: 1234567" + echo "" + echo "You may abort the submission choosing 'N' below and use" + echo "'git commit --amend' to add the issue reference before posting" +@@ -312,7 +280,7 @@ main() + assert_diverge; + + # see note above function warn_reference_missing for regex elaboration +- reference=$(git log -n1 --format='%b' | grep -ow -E "([fF][iI][xX][eE][sS]|[uU][pP][dD][aA][tT][eE][sS])(:)?[[:space:]]+(gluster\/glusterfs)?(bz)?#[[:digit:]]+" | awk -F '#' '{print $2}'); ++ reference=$(git log -n1 --format='%b' | grep -ow -E "^[bB][uU][gG](:)[[:space:]]+[[:digit:]]+" | awk '{print $2}') + + # If this is a commit against master and does not have a bug ID or a github + # issue reference. Warn the contributor that one of the 2 is required +@@ -320,6 +288,7 @@ main() + warn_reference_missing; + fi + ++ + # TODO: add clang-format command here. It will after the changes are done everywhere else + clang_format=$(clang-format --version) + if [ ! -z "${clang_format}" ]; then +-- +1.8.3.1 + diff --git a/SOURCES/0319-features-snapview-server-obtain-the-list-of-snapshot.patch b/SOURCES/0319-features-snapview-server-obtain-the-list-of-snapshot.patch new file mode 100644 index 0000000..d37efaf --- /dev/null +++ b/SOURCES/0319-features-snapview-server-obtain-the-list-of-snapshot.patch @@ -0,0 +1,48 @@ +From 659bd2a0fde9ba0cb8fc3905bcdb63d91e3dfa9d Mon Sep 17 00:00:00 2001 +From: Raghavendra Bhat +Date: Tue, 2 Jul 2019 16:50:23 -0400 +Subject: [PATCH 319/335] features/snapview-server: obtain the list of + snapshots inside the lock + +The current list of snapshots from priv->dirents is obtained outside +the lock. + +Upstream patch: +> Change-Id: I8876ec0a38308da5db058397382fbc82cc7ac177 +> Fixes: bz#1726783 +> Signed-off-by: Raghavendra Bhat +> patch: https://review.gluster.org/#/c/glusterfs/+/22990/ + +BUG: 1731513 +Change-Id: I8876ec0a38308da5db058397382fbc82cc7ac177 +Signed-off-by: Raghavendra Bhat +Reviewed-on: https://code.engineering.redhat.com/gerrit/185838 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/features/snapview-server/src/snapview-server-mgmt.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xlators/features/snapview-server/src/snapview-server-mgmt.c b/xlators/features/snapview-server/src/snapview-server-mgmt.c +index bc415ef..3d64383 100644 +--- a/xlators/features/snapview-server/src/snapview-server-mgmt.c ++++ b/xlators/features/snapview-server/src/snapview-server-mgmt.c +@@ -256,7 +256,6 @@ mgmt_get_snapinfo_cbk(struct rpc_req *req, struct iovec *iov, int count, + this = frame->this; + ctx = frame->this->ctx; + priv = this->private; +- old_dirents = priv->dirents; + + if (!ctx) { + errno = EINVAL; +@@ -388,6 +387,7 @@ mgmt_get_snapinfo_cbk(struct rpc_req *req, struct iovec *iov, int count, + LOCK(&priv->snaplist_lock); + { + oldcount = priv->num_snaps; ++ old_dirents = priv->dirents; + for (i = 0; i < priv->num_snaps; i++) { + for (j = 0; j < snapcount; j++) { + if ((!strcmp(old_dirents[i].name, dirents[j].name)) && +-- +1.8.3.1 + diff --git a/SOURCES/0320-gf-event-Handle-unix-volfile-servers.patch b/SOURCES/0320-gf-event-Handle-unix-volfile-servers.patch new file mode 100644 index 0000000..48a9cad --- /dev/null +++ b/SOURCES/0320-gf-event-Handle-unix-volfile-servers.patch @@ -0,0 +1,58 @@ +From 7e5d8dcb4f557eaca259e8d81cf34d651907396c Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K +Date: Thu, 24 Oct 2019 12:24:35 +0530 +Subject: [PATCH 320/335] gf-event: Handle unix volfile-servers + +Problem: +glfsheal program uses unix-socket-based volfile server. +volfile server will be the path to socket in this case. +gf_event expects this to be hostname in all cases. So getaddrinfo +will fail on the unix-socket path, events won't be sent in this case. + +Fix: +In case of unix sockets, default to localhost + +upstream-patch: https://review.gluster.org/c/glusterfs/+/23606 +BUG: 1758923 +Change-Id: I60d27608792c29d83fb82beb5fde5ef4754bece8 +Signed-off-by: Pranith Kumar K +Reviewed-on: https://code.engineering.redhat.com/gerrit/185851 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/events.c | 11 ++++++++++- + 1 file changed, 10 insertions(+), 1 deletion(-) + +diff --git a/libglusterfs/src/events.c b/libglusterfs/src/events.c +index 9d33783..4e2f8f9 100644 +--- a/libglusterfs/src/events.c ++++ b/libglusterfs/src/events.c +@@ -43,6 +43,7 @@ _gf_event(eventtypes_t event, const char *fmt, ...) + struct addrinfo *result = NULL; + xlator_t *this = THIS; + int sin_family = AF_INET; ++ char *volfile_server_transport = NULL; + + /* Global context */ + ctx = THIS->ctx; +@@ -62,8 +63,16 @@ _gf_event(eventtypes_t event, const char *fmt, ...) + memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_UNSPEC; + ++ if (ctx) { ++ volfile_server_transport = ctx->cmd_args.volfile_server_transport; ++ } ++ ++ if (!volfile_server_transport) { ++ volfile_server_transport = "tcp"; ++ } + /* Get Host name to send message */ +- if (ctx && ctx->cmd_args.volfile_server) { ++ if (ctx && ctx->cmd_args.volfile_server && ++ (strcmp(volfile_server_transport, "unix"))) { + /* If it is client code then volfile_server is set + use that information to push the events. */ + if ((getaddrinfo(ctx->cmd_args.volfile_server, NULL, &hints, +-- +1.8.3.1 + diff --git a/SOURCES/0321-Adding-white-spaces-to-description-of-set-group.patch b/SOURCES/0321-Adding-white-spaces-to-description-of-set-group.patch new file mode 100644 index 0000000..8dec96f --- /dev/null +++ b/SOURCES/0321-Adding-white-spaces-to-description-of-set-group.patch @@ -0,0 +1,55 @@ +From 5e7a2ad35a174d6d0ee5ed58a3e27955e85aa47c Mon Sep 17 00:00:00 2001 +From: kshithijiyer +Date: Mon, 24 Jun 2019 20:08:48 +0530 +Subject: [PATCH 321/335] Adding white spaces to description of set group. + +The description of set group is missing spaces which +leads to the description look like: +volume set group - This option can be used for +setting multiple pre-defined volume optionswhere group_name is a +file under /var/lib/glusterd/groups containing onekey, value pair +per line + +Instead of: +volume set group - This option can be used for +setting multiple pre-defined volume options where group_name is a +file under /var/lib/glusterd/groups containing one key value +pair per line + +> upstream patch: https://review.gluster.org/#/c/glusterfs/+/22934/ +> Fixes: bz#1723455 +> Change-Id: I4957988c0c1f35f043db3f64089c049193e60e8f +> Signed-off-by: kshithijiyer + +BUG: 1724021 +Change-Id: I4957988c0c1f35f043db3f64089c049193e60e8f +Signed-off-by: Sanju Rakonde +Reviewed-on: https://code.engineering.redhat.com/gerrit/185756 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + cli/src/cli-cmd-volume.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c +index 6b958bd..66beb1b 100644 +--- a/cli/src/cli-cmd-volume.c ++++ b/cli/src/cli-cmd-volume.c +@@ -3393,10 +3393,10 @@ struct cli_cmd volume_cmds[] = { + {"volume set ", cli_cmd_volume_set_cbk, + "set options for volume "}, + +- {"volume set group ", cli_cmd_volume_set_cbk, +- "This option can be used for setting multiple pre-defined volume options" +- "where group_name is a file under /var/lib/glusterd/groups containing one" +- "key, value pair per line"}, ++ {"volume set group ", cli_cmd_volume_set_cbk, ++ "This option can be used for setting multiple pre-defined volume options " ++ "where group_name is a file under /var/lib/glusterd/groups containing one " ++ "key value pair per line"}, + + {"volume log rotate [BRICK]", cli_cmd_log_rotate_cbk, + "rotate the log file for corresponding volume/brick"}, +-- +1.8.3.1 + diff --git a/SOURCES/0322-glusterd-display-correct-rebalance-data-size-after-g.patch b/SOURCES/0322-glusterd-display-correct-rebalance-data-size-after-g.patch new file mode 100644 index 0000000..35a234b --- /dev/null +++ b/SOURCES/0322-glusterd-display-correct-rebalance-data-size-after-g.patch @@ -0,0 +1,65 @@ +From 9be255f76c78fcbbda1e3a72eb2e99d3aface53e Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde +Date: Wed, 16 Oct 2019 23:26:03 +0530 +Subject: [PATCH 322/335] glusterd: display correct rebalance data size after + glusterd restart + +Problem: After completion of rebalance, if glusterd is restarted, +rebalance status displays wrong rebalance data size in its output. + +Cause: While glusterd restoring the information from /var/lib/glusterd/ +into its memory, glusterd fetches rebalance_data from +/var/lib/glusterd/vols/volname/node_state.info. This value is +converted into an integer using atoi(), which is returning +incorrect value for larger values. + +Solution: use sscanf() instead of atoi() to convert string to +integer(in this case it is unsigned long) + +> upstream patch: https://review.gluster.org/#/c/glusterfs/+/23560/ +> fixes: bz#1762438 +> Change-Id: Icbdb096919612b4a1d6fb0e315f09d38900abf4e +> Signed-off-by: Sanju Rakonde + +BUG: 1761486 +Change-Id: Icbdb096919612b4a1d6fb0e315f09d38900abf4e +Signed-off-by: Sanju Rakonde +Reviewed-on: https://code.engineering.redhat.com/gerrit/185752 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mgmt/glusterd/src/glusterd-store.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c +index 8a10eb8..b3b5ee9 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-store.c ++++ b/xlators/mgmt/glusterd/src/glusterd-store.c +@@ -2974,19 +2974,19 @@ glusterd_store_retrieve_node_state(glusterd_volinfo_t *volinfo) + volinfo->rebal.op = atoi(value); + } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG_REB_FILES, + SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG_REB_FILES))) { +- volinfo->rebal.rebalance_files = atoi(value); ++ sscanf(value, "%" PRIu64, &volinfo->rebal.rebalance_files); + } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG_SIZE, + SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG_SIZE))) { +- volinfo->rebal.rebalance_data = atoi(value); ++ sscanf(value, "%" PRIu64, &volinfo->rebal.rebalance_data); + } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG_SCANNED, + SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG_SCANNED))) { +- volinfo->rebal.lookedup_files = atoi(value); ++ sscanf(value, "%" PRIu64, &volinfo->rebal.lookedup_files); + } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG_FAILURES, + SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG_FAILURES))) { +- volinfo->rebal.rebalance_failures = atoi(value); ++ sscanf(value, "%" PRIu64, &volinfo->rebal.rebalance_failures); + } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG_SKIPPED, + SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG_SKIPPED))) { +- volinfo->rebal.skipped_files = atoi(value); ++ sscanf(value, "%" PRIu64, &volinfo->rebal.skipped_files); + } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG_RUN_TIME, + SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG_RUN_TIME))) { + volinfo->rebal.rebalance_time = atoi(value); +-- +1.8.3.1 + diff --git a/SOURCES/0323-cli-display-detailed-rebalance-info.patch b/SOURCES/0323-cli-display-detailed-rebalance-info.patch new file mode 100644 index 0000000..a00faf8 --- /dev/null +++ b/SOURCES/0323-cli-display-detailed-rebalance-info.patch @@ -0,0 +1,101 @@ +From 852c475040a599ed35798dbb388c6b59c1d0a820 Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde +Date: Tue, 22 Oct 2019 15:06:29 +0530 +Subject: [PATCH 323/335] cli: display detailed rebalance info + +Problem: When one of the node is down in cluster, +rebalance status is not displaying detailed +information. + +Cause: In glusterd_volume_rebalance_use_rsp_dict() +we are aggregating rsp from all the nodes into a +dictionary and sending it to cli for printing. While +assigning a index to keys we are considering all the +peers instead of considering only the peers which are +up. Because of which, index is not reaching till 1. +while parsing the rsp cli unable to find status-1 +key in dictionary and going out without printing +any information. + +Solution: The simplest fix for this without much +code change is to continue to look for other keys +when status-1 key is not found. + +> upstream patch: https://review.gluster.org/#/c/glusterfs/+/23588 +> fixes: bz#1764119 +> Change-Id: I0062839933c9706119eb85416256eade97e976dc +> Signed-off-by: Sanju Rakonde + +BUG: 1761326 +Change-Id: I0062839933c9706119eb85416256eade97e976dc +Signed-off-by: Sanju Rakonde +Reviewed-on: https://code.engineering.redhat.com/gerrit/185749 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + cli/src/cli-rpc-ops.c | 21 ++++++++++++++------- + tests/bugs/glusterd/rebalance-in-cluster.t | 9 +++++++++ + 2 files changed, 23 insertions(+), 7 deletions(-) + +diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c +index b167e26..4e91265 100644 +--- a/cli/src/cli-rpc-ops.c ++++ b/cli/src/cli-rpc-ops.c +@@ -1597,13 +1597,20 @@ gf_cli_print_rebalance_status(dict_t *dict, enum gf_task_types task_type, + goto out; + } + +- snprintf(key, sizeof(key), "status-1"); +- +- ret = dict_get_int32(dict, key, (int32_t *)&status_rcd); +- if (ret) { +- gf_log("cli", GF_LOG_TRACE, "count %d %d", count, 1); +- gf_log("cli", GF_LOG_TRACE, "failed to get status"); +- goto out; ++ for (i = 1; i <= count; i++) { ++ snprintf(key, sizeof(key), "status-%d", i); ++ ret = dict_get_int32(dict, key, (int32_t *)&status_rcd); ++ /* If information from a node is missing we should skip ++ * the node and try to fetch information of other nodes. ++ * If information is not found for all nodes, we should ++ * error out. ++ */ ++ if (!ret) ++ break; ++ if (ret && i == count) { ++ gf_log("cli", GF_LOG_TRACE, "failed to get status"); ++ goto out; ++ } + } + + /* Fix layout will be sent to all nodes for the volume +diff --git a/tests/bugs/glusterd/rebalance-in-cluster.t b/tests/bugs/glusterd/rebalance-in-cluster.t +index 9565fae..469ec6c 100644 +--- a/tests/bugs/glusterd/rebalance-in-cluster.t ++++ b/tests/bugs/glusterd/rebalance-in-cluster.t +@@ -4,6 +4,10 @@ + . $(dirname $0)/../../cluster.rc + . $(dirname $0)/../../volume.rc + ++function rebalance_status_field_1 { ++ $CLI_1 volume rebalance $1 status | awk '{print $7}' | sed -n 3p ++} ++ + cleanup; + TEST launch_cluster 2; + TEST $CLI_1 peer probe $H2; +@@ -29,6 +33,11 @@ TEST $CLI_1 volume add-brick $V0 $H1:$B1/${V0}1 $H2:$B2/${V0}1 + TEST $CLI_1 volume rebalance $V0 start + EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" cluster_rebalance_status_field 1 $V0 + ++#bug - 1764119 - rebalance status should display detailed info when any of the node is dowm ++TEST kill_glusterd 2 ++EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field_1 $V0 ++ ++TEST start_glusterd 2 + #bug-1245142 + + $CLI_1 volume rebalance $V0 start & +-- +1.8.3.1 + diff --git a/SOURCES/0324-extras-hooks-Add-SELinux-label-on-new-bricks-during-.patch b/SOURCES/0324-extras-hooks-Add-SELinux-label-on-new-bricks-during-.patch new file mode 100644 index 0000000..26e1577 --- /dev/null +++ b/SOURCES/0324-extras-hooks-Add-SELinux-label-on-new-bricks-during-.patch @@ -0,0 +1,128 @@ +From dcf3f74fa7e812dfe89667bd6219f70a8457f755 Mon Sep 17 00:00:00 2001 +From: Anoop C S +Date: Thu, 6 Jun 2019 18:33:19 +0530 +Subject: [PATCH 324/335] extras/hooks: Add SELinux label on new bricks during + add-brick + +Backport of https://review.gluster.org/c/glusterfs/+/22834 + +Change-Id: Ifd8ae5eeb91b968cc1a9a9b5d15844c5233d56db +BUG: 1686800 +Signed-off-by: Anoop C S +Reviewed-on: https://code.engineering.redhat.com/gerrit/185855 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + .../add-brick/post/S10selinux-label-brick.sh | 100 +++++++++++++++++++++ + 1 file changed, 100 insertions(+) + create mode 100755 extras/hook-scripts/add-brick/post/S10selinux-label-brick.sh + +diff --git a/extras/hook-scripts/add-brick/post/S10selinux-label-brick.sh b/extras/hook-scripts/add-brick/post/S10selinux-label-brick.sh +new file mode 100755 +index 0000000..4a17c99 +--- /dev/null ++++ b/extras/hook-scripts/add-brick/post/S10selinux-label-brick.sh +@@ -0,0 +1,100 @@ ++#!/bin/bash ++# ++# Install to hooks//add-brick/post ++# ++# Add an SELinux file context for each brick using the glusterd_brick_t type. ++# This ensures that the brick is relabeled correctly on an SELinux restart or ++# restore. Subsequently, run a restore on the brick path to set the selinux ++# labels. ++# ++### ++ ++PROGNAME="Sselinux" ++OPTSPEC="volname:,version:,gd-workdir:,volume-op:" ++VOL= ++ ++parse_args () { ++ ARGS=$(getopt -o '' -l ${OPTSPEC} -n ${PROGNAME} -- "$@") ++ eval set -- "${ARGS}" ++ ++ while true; do ++ case ${1} in ++ --volname) ++ shift ++ VOL=${1} ++ ;; ++ --gd-workdir) ++ shift ++ GLUSTERD_WORKDIR=$1 ++ ;; ++ --version) ++ shift ++ ;; ++ --volume-op) ++ shift ++ ;; ++ *) ++ shift ++ break ++ ;; ++ esac ++ shift ++ done ++} ++ ++set_brick_labels() ++{ ++ local volname="${1}" ++ local fctx ++ local list=() ++ ++ fctx="$(semanage fcontext --list -C)" ++ ++ # wait for new brick path to be updated under ++ # ${GLUSTERD_WORKDIR}/vols/${volname}/bricks/ ++ sleep 5 ++ ++ # grab the path for each local brick ++ brickpath="${GLUSTERD_WORKDIR}/vols/${volname}/bricks/" ++ brickdirs=$( ++ find "${brickpath}" -type f -exec grep '^path=' {} \; | \ ++ cut -d= -f 2 | \ ++ sort -u ++ ) ++ ++ # create a list of bricks for which custom SELinux ++ # label doesn't exist ++ for b in ${brickdirs}; do ++ pattern="${b}(/.*)?" ++ echo "${fctx}" | grep "^${pattern}\s" >/dev/null ++ if [[ $? -ne 0 ]]; then ++ list+=("${pattern}") ++ fi ++ done ++ ++ # Add a file context for each brick path in the list and associate with the ++ # glusterd_brick_t SELinux type. ++ for p in ${list[@]} ++ do ++ semanage fcontext --add -t glusterd_brick_t -r s0 "${p}" ++ done ++ ++ # Set the labels for which SELinux label was added above ++ for b in ${brickdirs} ++ do ++ echo "${list[@]}" | grep "${b}" >/dev/null ++ if [[ $? -eq 0 ]]; then ++ restorecon -R "${b}" ++ fi ++ done ++} ++ ++SELINUX_STATE=$(which getenforce && getenforce) ++[ "${SELINUX_STATE}" = 'Disabled' ] && exit 0 ++ ++parse_args "$@" ++[ -z "${VOL}" ] && exit 1 ++ ++set_brick_labels "${VOL}" ++ ++exit 0 +-- +1.8.3.1 + diff --git a/SOURCES/0325-extras-hooks-Install-and-package-newly-added-post-ad.patch b/SOURCES/0325-extras-hooks-Install-and-package-newly-added-post-ad.patch new file mode 100644 index 0000000..8e5a5fa --- /dev/null +++ b/SOURCES/0325-extras-hooks-Install-and-package-newly-added-post-ad.patch @@ -0,0 +1,52 @@ +From 27d69d8927a946562aef08a6edfee38b9998f96d Mon Sep 17 00:00:00 2001 +From: Anoop C S +Date: Wed, 12 Jun 2019 15:41:27 +0530 +Subject: [PATCH 325/335] extras/hooks: Install and package newly added post + add-brick hook script + +Previously a new SELinux hook script was added as a post add-brick +operation to label new brick paths. But the change failed to install +and package new script. Therefore making necessary changes to Makefile +and spec file to get it installed and packaged. + +Backport of https://review.gluster.org/c/glusterfs/+/22856 + +Change-Id: I67b8f4982c2783c34a4bc749fb4387c19a038225 +BUG: 1686800 +Signed-off-by: Anoop C S +Reviewed-on: https://code.engineering.redhat.com/gerrit/185856 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + extras/hook-scripts/add-brick/post/Makefile.am | 4 ++-- + glusterfs.spec.in | 1 + + 2 files changed, 3 insertions(+), 2 deletions(-) + +diff --git a/extras/hook-scripts/add-brick/post/Makefile.am b/extras/hook-scripts/add-brick/post/Makefile.am +index bfc0c1c..9b236df 100644 +--- a/extras/hook-scripts/add-brick/post/Makefile.am ++++ b/extras/hook-scripts/add-brick/post/Makefile.am +@@ -1,6 +1,6 @@ +-EXTRA_DIST = disabled-quota-root-xattr-heal.sh S13create-subdir-mounts.sh ++EXTRA_DIST = disabled-quota-root-xattr-heal.sh S10selinux-label-brick.sh S13create-subdir-mounts.sh + + hookdir = $(GLUSTERD_WORKDIR)/hooks/1/add-brick/post/ + if WITH_SERVER +-hook_SCRIPTS = disabled-quota-root-xattr-heal.sh S13create-subdir-mounts.sh ++hook_SCRIPTS = disabled-quota-root-xattr-heal.sh S10selinux-label-brick.sh S13create-subdir-mounts.sh + endif +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index eeadb65..91180db 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -1447,6 +1447,7 @@ exit 0 + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post + %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post/disabled-quota-root-xattr-heal.sh ++ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post/S10selinux-label-brick.sh + %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post/S13create-subdir-mounts.sh + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/pre + %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/pre/S28Quota-enable-root-xattr-heal.sh +-- +1.8.3.1 + diff --git a/SOURCES/0326-tests-subdir-mount.t-is-failing-for-brick_mux-regrss.patch b/SOURCES/0326-tests-subdir-mount.t-is-failing-for-brick_mux-regrss.patch new file mode 100644 index 0000000..b0afcc7 --- /dev/null +++ b/SOURCES/0326-tests-subdir-mount.t-is-failing-for-brick_mux-regrss.patch @@ -0,0 +1,51 @@ +From a4f01ad90a0c0dfd0655da509c5ed2a11a507cc3 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Mon, 17 Jun 2019 11:10:42 +0530 +Subject: [PATCH 326/335] tests: subdir-mount.t is failing for brick_mux + regrssion + +To avoid the failure wait to run hook script S13create-subdir-mounts.sh +after executed add-brick command by test case. + +This is required as a dependency for the bz referenced below. + +Backport of https://review.gluster.org/c/glusterfs/+/22877 + +Change-Id: I063b6d0f86a550ed0a0527255e4dfbe8f0a8c02e +BUG: 1686800 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/185857 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + tests/features/subdir-mount.t | 11 ++++++++--- + 1 file changed, 8 insertions(+), 3 deletions(-) + +diff --git a/tests/features/subdir-mount.t b/tests/features/subdir-mount.t +index 8401946..a02bd6b 100644 +--- a/tests/features/subdir-mount.t ++++ b/tests/features/subdir-mount.t +@@ -85,12 +85,17 @@ TEST $CLI volume start $V0 + TEST $GFS --subdir-mount /subdir1/subdir1.1/subdir1.2 -s $H0 --volfile-id $V0 $M2 + TEST stat $M2 + ++initcnt=`grep -i create-subdir-mounts /var/log/glusterfs/glusterd.log | wc -l` + # mount shouldn't fail even after add-brick + TEST $CLI volume add-brick $V0 replica 2 $H0:$B0/${V0}{5,6}; + +-# Give time for client process to get notified and use the new +-# volfile after add-brick +-sleep 1 ++# Wait to execute create-subdir-mounts.sh script by glusterd ++newcnt=`grep -i create-subdir-mounts /var/log/glusterfs/glusterd.log | wc -l` ++while [ $newcnt -eq $initcnt ] ++do ++ newcnt=`grep -i create-subdir-mounts /var/log/glusterfs/glusterd.log | wc -l` ++ sleep 1 ++done + + # Existing mount should still be active + mount_inode=$(stat --format "%i" "$M2") +-- +1.8.3.1 + diff --git a/SOURCES/0327-glusterfind-integrate-with-gfid2path.patch b/SOURCES/0327-glusterfind-integrate-with-gfid2path.patch new file mode 100644 index 0000000..e3e42fa --- /dev/null +++ b/SOURCES/0327-glusterfind-integrate-with-gfid2path.patch @@ -0,0 +1,93 @@ +From f89242132dc4756c827113154cc6ad18ad6bde88 Mon Sep 17 00:00:00 2001 +From: Milind Changire +Date: Tue, 19 Feb 2019 12:49:12 +0530 +Subject: [PATCH 327/335] glusterfind: integrate with gfid2path + +Integration with gfid2path helps avoid file-system crawl and saves +precious time. Extended attributes starting with "trusted.gfid2path." +are read and the / values are extracted and the is +iteratively resolved from the brick backend to arrive at the full path. + +>Change-Id: I593b02880e3413b77bfceed4a36b00d401f03bc0 +>fixes: #529 +>Signed-off-by: Milind Changire +>Signed-off-by: Shwetha K Acharya + +backport of https://review.gluster.org/#/c/glusterfs/+/22225/ +BUG: 1599802 +Change-Id: I593b02880e3413b77bfceed4a36b00d401f03bc0 +Signed-off-by: Milind Changire +Signed-off-by: Shwetha K Acharya +Reviewed-on: https://code.engineering.redhat.com/gerrit/185706 +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + tools/glusterfind/src/changelog.py | 45 ++++++++++++++++++++++++++++++++++---- + 1 file changed, 41 insertions(+), 4 deletions(-) + +diff --git a/tools/glusterfind/src/changelog.py b/tools/glusterfind/src/changelog.py +index ef982db..d8f97e0 100644 +--- a/tools/glusterfind/src/changelog.py ++++ b/tools/glusterfind/src/changelog.py +@@ -114,6 +114,43 @@ def populate_pgfid_and_inodegfid(brick, changelog_data): + continue + + ++def enum_hard_links_using_gfid2path(brick, gfid, args): ++ hardlinks = [] ++ p = os.path.join(brick, ".glusterfs", gfid[0:2], gfid[2:4], gfid) ++ if not os.path.isdir(p): ++ # we have a symlink or a normal file ++ try: ++ file_xattrs = xattr.list(p) ++ for x in file_xattrs: ++ if x.startswith("trusted.gfid2path."): ++ # get the value for the xattr i.e. / ++ v = xattr.getxattr(p, x) ++ pgfid, bn = v.split(os.sep) ++ try: ++ path = symlink_gfid_to_path(brick, pgfid) ++ fullpath = os.path.join(path, bn) ++ fullpath = output_path_prepare(fullpath, args) ++ hardlinks.append(fullpath) ++ except (IOError, OSError) as e: ++ logger.warn("Error converting to path: %s" % e) ++ continue ++ except (IOError, OSError): ++ pass ++ return hardlinks ++ ++ ++def gfid_to_all_paths_using_gfid2path(brick, changelog_data, args): ++ path = "" ++ for row in changelog_data.gfidpath_get({"path1": "", "type": "MODIFY"}): ++ gfid = row[3].strip() ++ logger.debug("Processing gfid %s" % gfid) ++ hardlinks = enum_hard_links_using_gfid2path(brick, gfid, args) ++ ++ path = ",".join(hardlinks) ++ ++ changelog_data.gfidpath_update({"path1": path}, {"gfid": gfid}) ++ ++ + def gfid_to_path_using_pgfid(brick, changelog_data, args): + """ + For all the pgfids collected, Converts to Path and +@@ -314,11 +351,11 @@ def get_changes(brick, hash_dir, log_file, start, end, args): + changelog_data.commit() + logger.info("[2/4] Finished 'pgfid to path' conversions.") + +- # Convert all GFIDs for which no other additional details available +- logger.info("[3/4] Starting 'gfid to path using pgfid' conversions ...") +- gfid_to_path_using_pgfid(brick, changelog_data, args) ++ # Convert all gfids recorded for data and metadata to all hardlink paths ++ logger.info("[3/4] Starting 'gfid2path' conversions ...") ++ gfid_to_all_paths_using_gfid2path(brick, changelog_data, args) + changelog_data.commit() +- logger.info("[3/4] Finished 'gfid to path using pgfid' conversions.") ++ logger.info("[3/4] Finished 'gfid2path' conversions.") + + # If some GFIDs fail to get converted from previous step, + # convert using find +-- +1.8.3.1 + diff --git a/SOURCES/0328-glusterd-Add-warning-and-abort-in-case-of-failures-i.patch b/SOURCES/0328-glusterd-Add-warning-and-abort-in-case-of-failures-i.patch new file mode 100644 index 0000000..0d12daa --- /dev/null +++ b/SOURCES/0328-glusterd-Add-warning-and-abort-in-case-of-failures-i.patch @@ -0,0 +1,55 @@ +From a8d8fc91af226fbf49e9dd1d7d91ad287707c4fe Mon Sep 17 00:00:00 2001 +From: Vishal Pandey +Date: Wed, 7 Aug 2019 12:53:06 +0530 +Subject: [PATCH 328/335] glusterd: Add warning and abort in case of failures + in migration during remove-brick commit + +Problem - +Currently remove-brick commit goes through even though there were files +that failed to migrate or were skipped. There is no warning raised to the user. +Solution- +Add a check in the remove brick staging phase to verify if the status of the +rebalnce process is complete but there has been failures or some skipped files +while migration, In this case user will be given a warning and remove-brick +commit. User will need to use the force option to remove the bricks. + +> Upstream Path Link: https://review.gluster.org/#/c/glusterfs/+/23171/ +> Fixes: bz#1514683 +> Signed-offby- Vishal Pandey +> Change-Id: I014d0f0afb4b2fac35ab0de52227f98dbae079d5 + +BUG: 1344758 +Change-Id: I014d0f0afb4b2fac35ab0de52227f98dbae079d5 +Signed-off-by: Vishal Pandey +Reviewed-on: https://code.engineering.redhat.com/gerrit/185831 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mgmt/glusterd/src/glusterd-brick-ops.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +index ad9a572..c5141de 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c ++++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +@@ -2191,6 +2191,17 @@ glusterd_op_stage_remove_brick(dict_t *dict, char **op_errstr) + goto out; + } + ++ if (volinfo->rebal.defrag_status == GF_DEFRAG_STATUS_COMPLETE) { ++ if (volinfo->rebal.rebalance_failures > 0 || ++ volinfo->rebal.skipped_files > 0) { ++ errstr = gf_strdup( ++ "use 'force' option as migration " ++ "of some files might have been skipped or " ++ "has failed"); ++ goto out; ++ } ++ } ++ + ret = glusterd_remove_brick_validate_bricks( + cmd, brick_count, dict, volinfo, &errstr, GF_DEFRAG_CMD_NONE); + if (ret) +-- +1.8.3.1 + diff --git a/SOURCES/0329-cluster-afr-Heal-entries-when-there-is-a-source-no-h.patch b/SOURCES/0329-cluster-afr-Heal-entries-when-there-is-a-source-no-h.patch new file mode 100644 index 0000000..935824d --- /dev/null +++ b/SOURCES/0329-cluster-afr-Heal-entries-when-there-is-a-source-no-h.patch @@ -0,0 +1,165 @@ +From babbd49cc053993a4ecff8eaf178d5a29f3a0bf0 Mon Sep 17 00:00:00 2001 +From: karthik-us +Date: Wed, 20 Nov 2019 12:26:11 +0530 +Subject: [PATCH 329/335] cluster/afr: Heal entries when there is a source & no + healed_sinks + +Backport of: https://review.gluster.org/#/c/glusterfs/+/23364/ + +Problem: +In a situation where B1 blames B2, B2 blames B1 and B3 doesn't blame +anything for entry heal, heal will not complete even though we have +clear source and sinks. This will happen because while doing +afr_selfheal_find_direction() only the bricks which are blamed by +non-accused bricks are considered as sinks. Later in +__afr_selfheal_entry_finalize_source() when it tries to mark all the +non-sources as sinks it fails to do so because there won't be any +healed_sinks marked, no witness present and there will be a source. + +Fix: +If there is a source and no healed_sinks, then reset all the locked +sources to 0 and healed sinks to 1 to do conservative merge. + +Change-Id: I8831603ac037b6a3000bee092abfdcc92f7f2e57 +Signed-off-by: karthik-us +BUG: 1764095 +Reviewed-on: https://code.engineering.redhat.com/gerrit/185834 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + .../bug-1749322-entry-heal-not-happening.t | 89 ++++++++++++++++++++++ + xlators/cluster/afr/src/afr-self-heal-entry.c | 15 ++++ + 2 files changed, 104 insertions(+) + create mode 100644 tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t + +diff --git a/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t b/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t +new file mode 100644 +index 0000000..9627908 +--- /dev/null ++++ b/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t +@@ -0,0 +1,89 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../afr.rc ++ ++cleanup ++ ++function check_gfid_and_link_count ++{ ++ local file=$1 ++ ++ file_gfid_b0=$(gf_get_gfid_xattr $B0/${V0}0/$file) ++ TEST [ ! -z $file_gfid_b0 ] ++ file_gfid_b1=$(gf_get_gfid_xattr $B0/${V0}1/$file) ++ file_gfid_b2=$(gf_get_gfid_xattr $B0/${V0}2/$file) ++ EXPECT $file_gfid_b0 echo $file_gfid_b1 ++ EXPECT $file_gfid_b0 echo $file_gfid_b2 ++ ++ EXPECT "2" stat -c %h $B0/${V0}0/$file ++ EXPECT "2" stat -c %h $B0/${V0}1/$file ++ EXPECT "2" stat -c %h $B0/${V0}2/$file ++} ++TESTS_EXPECTED_IN_LOOP=18 ++ ++################################################################################ ++## Start and create a volume ++TEST glusterd; ++TEST pidof glusterd; ++TEST $CLI volume info; ++ ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}; ++TEST $CLI volume start $V0; ++TEST $CLI volume set $V0 cluster.heal-timeout 5 ++TEST $CLI volume heal $V0 disable ++EXPECT 'Started' volinfo_field $V0 'Status'; ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 ++ ++TEST mkdir $M0/dir ++TEST `echo "File 1 " > $M0/dir/file1` ++TEST touch $M0/dir/file{2..4} ++ ++# Remove file2 from 1st & 3rd bricks ++TEST rm -f $B0/$V0"0"/dir/file2 ++TEST rm -f $B0/$V0"2"/dir/file2 ++ ++# Remove file3 and the .glusterfs hardlink from 1st & 2nd bricks ++gfid_file3=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file3) ++gfid_str_file3=$(gf_gfid_xattr_to_str $gfid_file3) ++TEST rm $B0/$V0"0"/.glusterfs/${gfid_str_file3:0:2}/${gfid_str_file3:2:2}/$gfid_str_file3 ++TEST rm $B0/$V0"1"/.glusterfs/${gfid_str_file3:0:2}/${gfid_str_file3:2:2}/$gfid_str_file3 ++TEST rm -f $B0/$V0"0"/dir/file3 ++TEST rm -f $B0/$V0"1"/dir/file3 ++ ++# Remove the .glusterfs hardlink and the gfid xattr of file4 on 3rd brick ++gfid_file4=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file4) ++gfid_str_file4=$(gf_gfid_xattr_to_str $gfid_file4) ++TEST rm $B0/$V0"2"/.glusterfs/${gfid_str_file4:0:2}/${gfid_str_file4:2:2}/$gfid_str_file4 ++TEST setfattr -x trusted.gfid $B0/$V0"2"/dir/file4 ++ ++# B0 and B2 blame each other ++setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir ++setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir ++ ++# Add entry to xattrop dir on first brick. ++xattrop_dir0=$(afr_get_index_path $B0/$V0"0") ++base_entry_b0=`ls $xattrop_dir0` ++gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/)) ++TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str ++ ++EXPECT "^1$" get_pending_heal_count $V0 ++ ++# Launch heal ++TEST $CLI volume heal $V0 enable ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2 ++TEST $CLI volume heal $V0 ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 ++ ++# All the files must be present on all the bricks after conservative merge and ++# should have the gfid xattr and the .glusterfs hardlink. ++check_gfid_and_link_count dir/file1 ++check_gfid_and_link_count dir/file2 ++check_gfid_and_link_count dir/file3 ++check_gfid_and_link_count dir/file4 ++ ++cleanup +diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c +index 35b600f..3ce882e 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-entry.c ++++ b/xlators/cluster/afr/src/afr-self-heal-entry.c +@@ -479,6 +479,7 @@ __afr_selfheal_entry_finalize_source(xlator_t *this, unsigned char *sources, + afr_private_t *priv = NULL; + int source = -1; + int sources_count = 0; ++ int i = 0; + + priv = this->private; + +@@ -492,6 +493,20 @@ __afr_selfheal_entry_finalize_source(xlator_t *this, unsigned char *sources, + } + + source = afr_choose_source_by_policy(priv, sources, AFR_ENTRY_TRANSACTION); ++ ++ /*If the selected source does not blame any other brick, then mark ++ * everything as sink to trigger conservative merge. ++ */ ++ if (source != -1 && !AFR_COUNT(healed_sinks, priv->child_count)) { ++ for (i = 0; i < priv->child_count; i++) { ++ if (locked_on[i]) { ++ sources[i] = 0; ++ healed_sinks[i] = 1; ++ } ++ } ++ return -1; ++ } ++ + return source; + } + +-- +1.8.3.1 + diff --git a/SOURCES/0330-mount.glusterfs-change-the-error-message.patch b/SOURCES/0330-mount.glusterfs-change-the-error-message.patch new file mode 100644 index 0000000..b64f0c6 --- /dev/null +++ b/SOURCES/0330-mount.glusterfs-change-the-error-message.patch @@ -0,0 +1,59 @@ +From 72168245761592a2cd0ebec05dd9bd9bc00745ca Mon Sep 17 00:00:00 2001 +From: Amar Tumballi +Date: Wed, 13 Mar 2019 08:51:31 +0530 +Subject: [PATCH 330/335] mount.glusterfs: change the error message + +In scenarios where a mount fails before creating log file, doesn't +make sense to give message to 'check log file'. See below: + +``` +ERROR: failed to create logfile "/var/log/glusterfs/mnt.log" (No space left on device) +ERROR: failed to open logfile /var/log/glusterfs/mnt.log +Mount failed. Please check the log file for more details. +``` + +>upstream patch: https://review.gluster.org/#/c/glusterfs/+/22346/ +>Fixes: bz#1688068 +>Change-Id: I1d837caa4f9bc9f1a37780783e95007e01ae4e3f +>Signed-off-by: Amar Tumballi + +BUG: 1685406 +Change-Id: I1d837caa4f9bc9f1a37780783e95007e01ae4e3f +Signed-off-by: Sheetal Pamecha +Reviewed-on: https://code.engineering.redhat.com/gerrit/185828 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mount/fuse/utils/mount.glusterfs.in | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +diff --git a/xlators/mount/fuse/utils/mount.glusterfs.in b/xlators/mount/fuse/utils/mount.glusterfs.in +index 3f5d76d..cbde42d 100755 +--- a/xlators/mount/fuse/utils/mount.glusterfs.in ++++ b/xlators/mount/fuse/utils/mount.glusterfs.in +@@ -361,7 +361,10 @@ start_glusterfs () + cmd_line=$(echo "$cmd_line $mount_point"); + $cmd_line; + if [ $? -ne 0 ]; then +- warn "Mount failed. Please check the log file for more details." ++ # If this is true, then glusterfs process returned error without ++ # getting daemonized. We have made sure the logs are posted to ++ # 'stderr', so no need to point them to logfile. ++ warn "Mounting glusterfs on $mount_point failed." + exit 1; + fi + +@@ -369,7 +372,9 @@ start_glusterfs () + inode=$( ${getinode} $mount_point 2>/dev/null); + # this is required if the stat returns error + if [ $? -ne 0 ]; then +- warn "Mount failed. Please check the log file for more details." ++ # At this time, glusterfs got daemonized, and then later exited. ++ # These failures are only logged in log file. ++ warn "Mount failed. Check the log file ${log_file} for more details." + umount $mount_point > /dev/null 2>&1; + exit 1; + fi +-- +1.8.3.1 + diff --git a/SOURCES/0331-features-locks-Do-special-handling-for-op-version-3..patch b/SOURCES/0331-features-locks-Do-special-handling-for-op-version-3..patch new file mode 100644 index 0000000..6eb15b0 --- /dev/null +++ b/SOURCES/0331-features-locks-Do-special-handling-for-op-version-3..patch @@ -0,0 +1,44 @@ +From 147cff762b307bf60519bae4cdefc62f655119a7 Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K +Date: Wed, 30 Oct 2019 10:47:17 +0530 +Subject: [PATCH 331/335] features/locks: Do special handling for op-version < + 3.12.0 + +Problem: +Patch https://code.engineering.redhat.com/gerrit/#/c/140080/ diverges from +its upstream patch(https://review.gluster.org/c/glusterfs/+/20031) in op-version. +On upstream special-handling happens for version < 3.10.0 whereas for downstream +special-handling happens for version < 3.12.0. + When rebase happened for 3.5.0 from upstream, this downstream specific change +is missed as there was no special downstream-only patch tracking this difference. +This leads to I/O errors on upgrade from 3.3.1->3.5.0 + +Fix: +Do special handling for op-version < 3.12.0 as in 3.4.x + +Change-Id: I72fec058bdfb3cd30d017d205c90aa61aec86c5d +Label: DOWNSTREAM ONLY +BUG: 1766640 +Signed-off-by: Pranith Kumar K +Reviewed-on: https://code.engineering.redhat.com/gerrit/185835 +Reviewed-by: Xavi Hernandez Juan +--- + xlators/features/locks/src/posix.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c +index 9db5ac6..4592240 100644 +--- a/xlators/features/locks/src/posix.c ++++ b/xlators/features/locks/src/posix.c +@@ -57,7 +57,7 @@ fetch_pathinfo(xlator_t *, inode_t *, int32_t *, char **); + do { \ + pl_local_t *__local = NULL; \ + if (frame->root->client && \ +- (frame->root->client->opversion < GD_OP_VERSION_3_10_0)) { \ ++ (frame->root->client->opversion < GD_OP_VERSION_3_12_0)) { \ + __local = frame->local; \ + PL_STACK_UNWIND_AND_FREE(__local, fop, frame, op_ret, params); \ + } else { \ +-- +1.8.3.1 + diff --git a/SOURCES/0332-Removing-one-top-command-from-gluster-v-help.patch b/SOURCES/0332-Removing-one-top-command-from-gluster-v-help.patch new file mode 100644 index 0000000..c9b2b56 --- /dev/null +++ b/SOURCES/0332-Removing-one-top-command-from-gluster-v-help.patch @@ -0,0 +1,57 @@ +From 808f311bd4f38f06b8afc49fc8d2c65fc4797431 Mon Sep 17 00:00:00 2001 +From: kshithijiyer +Date: Fri, 28 Jun 2019 15:32:31 +0530 +Subject: [PATCH 332/335] Removing one top command from gluster v help + +The current help show 2 different top commands +intead of one single top command which can be +easily observed when "# gluster v help" command +is issued. Removing one "volume top " +and clubbing into them into a single command. + +Current help: +volume top {open|read|write|opendir|readdir|clear} +[nfs|brick ] [list-cnt ] | +volume top {read-perf|write-perf} +[bs count ] [brick ] +[list-cnt ] - volume top operations + +Expected help: +volume top {open|read|write|opendir|readdir|clear} +[nfs|brick ] [list-cnt ] | {read-perf|write-perf} +[bs count ] [brick ] [list-cnt ] +- volume top operations + +> upstream patch: https://review.gluster.org/#/c/glusterfs/+/22972/ +> fixes: bz#1725034 +> Change-Id: Ifbc4c95f2558286e27dfc5e9667046b80eb1715d +> Signed-off-by: kshithijiyer + +BUG: 1726058 +Change-Id: Ifbc4c95f2558286e27dfc5e9667046b80eb1715d +Signed-off-by: Sanju Rakonde +Reviewed-on: https://code.engineering.redhat.com/gerrit/185757 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + cli/src/cli-cmd-volume.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c +index 66beb1b..754d333 100644 +--- a/cli/src/cli-cmd-volume.c ++++ b/cli/src/cli-cmd-volume.c +@@ -3427,8 +3427,8 @@ struct cli_cmd volume_cmds[] = { + cli_cmd_volume_profile_cbk, "volume profile operations"}, + + {"volume top {open|read|write|opendir|readdir|clear} [nfs|brick " +- "] [list-cnt ] |\n" +- "volume top {read-perf|write-perf} [bs count ] " ++ "] [list-cnt ] | " ++ "{read-perf|write-perf} [bs count ] " + "[brick ] [list-cnt ]", + cli_cmd_volume_top_cbk, "volume top operations"}, + +-- +1.8.3.1 + diff --git a/SOURCES/0333-rpc-Synchronize-slot-allocation-code.patch b/SOURCES/0333-rpc-Synchronize-slot-allocation-code.patch new file mode 100644 index 0000000..b1d94b4 --- /dev/null +++ b/SOURCES/0333-rpc-Synchronize-slot-allocation-code.patch @@ -0,0 +1,195 @@ +From f199094cb61341a47c98a8ed91b293446182b5a9 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Thu, 3 Oct 2019 14:06:52 +0530 +Subject: [PATCH 333/335] rpc: Synchronize slot allocation code + +Problem: Current slot allocation/deallocation code path is not + synchronized.There are scenario when due to race condition + in slot allocation/deallocation code path brick is crashed. + +Solution: Synchronize slot allocation/deallocation code path to + avoid the issue + +> Change-Id: I4fb659a75234218ffa0e5e0bf9308f669f75fc25 +> Fixes: bz#1763036 +> Signed-off-by: Mohit Agrawal +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23508/) +> (Cherry pick from commit faf5ac13c4ee00a05e9451bf8da3be2a9043bbf2) + +Change-Id: I4fb659a75234218ffa0e5e0bf9308f669f75fc25 +BUG: 1741193 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/185827 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/event-epoll.c | 74 +++++++++++++++++++++++------------------- + 1 file changed, 41 insertions(+), 33 deletions(-) + +diff --git a/libglusterfs/src/event-epoll.c b/libglusterfs/src/event-epoll.c +index 0cec47e..65f5efd 100644 +--- a/libglusterfs/src/event-epoll.c ++++ b/libglusterfs/src/event-epoll.c +@@ -69,15 +69,27 @@ __event_newtable(struct event_pool *event_pool, int table_idx) + } + + static int ++event_slot_ref(struct event_slot_epoll *slot) ++{ ++ if (!slot) ++ return -1; ++ ++ return GF_ATOMIC_INC(slot->ref); ++} ++ ++static int + __event_slot_alloc(struct event_pool *event_pool, int fd, +- char notify_poller_death) ++ char notify_poller_death, struct event_slot_epoll **slot) + { + int i = 0; ++ int j = 0; + int table_idx = -1; + int gen = -1; + struct event_slot_epoll *table = NULL; + +- for (i = 0; i < EVENT_EPOLL_TABLES; i++) { ++retry: ++ ++ while (i < EVENT_EPOLL_TABLES) { + switch (event_pool->slots_used[i]) { + case EVENT_EPOLL_SLOTS: + continue; +@@ -98,6 +110,7 @@ __event_slot_alloc(struct event_pool *event_pool, int fd, + if (table) + /* break out of the loop */ + break; ++ i++; + } + + if (!table) +@@ -105,20 +118,20 @@ __event_slot_alloc(struct event_pool *event_pool, int fd, + + table_idx = i; + +- for (i = 0; i < EVENT_EPOLL_SLOTS; i++) { +- if (table[i].fd == -1) { ++ for (j = 0; j < EVENT_EPOLL_SLOTS; j++) { ++ if (table[j].fd == -1) { + /* wipe everything except bump the generation */ +- gen = table[i].gen; +- memset(&table[i], 0, sizeof(table[i])); +- table[i].gen = gen + 1; ++ gen = table[j].gen; ++ memset(&table[j], 0, sizeof(table[j])); ++ table[j].gen = gen + 1; + +- LOCK_INIT(&table[i].lock); +- INIT_LIST_HEAD(&table[i].poller_death); ++ LOCK_INIT(&table[j].lock); ++ INIT_LIST_HEAD(&table[j].poller_death); + +- table[i].fd = fd; ++ table[j].fd = fd; + if (notify_poller_death) { +- table[i].idx = table_idx * EVENT_EPOLL_SLOTS + i; +- list_add_tail(&table[i].poller_death, ++ table[j].idx = table_idx * EVENT_EPOLL_SLOTS + j; ++ list_add_tail(&table[j].poller_death, + &event_pool->poller_death); + } + +@@ -128,18 +141,26 @@ __event_slot_alloc(struct event_pool *event_pool, int fd, + } + } + +- return table_idx * EVENT_EPOLL_SLOTS + i; ++ if (j == EVENT_EPOLL_SLOTS) { ++ table = NULL; ++ i++; ++ goto retry; ++ } else { ++ (*slot) = &table[j]; ++ event_slot_ref(*slot); ++ return table_idx * EVENT_EPOLL_SLOTS + j; ++ } + } + + static int + event_slot_alloc(struct event_pool *event_pool, int fd, +- char notify_poller_death) ++ char notify_poller_death, struct event_slot_epoll **slot) + { + int idx = -1; + + pthread_mutex_lock(&event_pool->mutex); + { +- idx = __event_slot_alloc(event_pool, fd, notify_poller_death); ++ idx = __event_slot_alloc(event_pool, fd, notify_poller_death, slot); + } + pthread_mutex_unlock(&event_pool->mutex); + +@@ -153,6 +174,7 @@ __event_slot_dealloc(struct event_pool *event_pool, int idx) + int offset = 0; + struct event_slot_epoll *table = NULL; + struct event_slot_epoll *slot = NULL; ++ int fd = -1; + + table_idx = idx / EVENT_EPOLL_SLOTS; + offset = idx % EVENT_EPOLL_SLOTS; +@@ -164,11 +186,13 @@ __event_slot_dealloc(struct event_pool *event_pool, int idx) + slot = &table[offset]; + slot->gen++; + ++ fd = slot->fd; + slot->fd = -1; + slot->handled_error = 0; + slot->in_handler = 0; + list_del_init(&slot->poller_death); +- event_pool->slots_used[table_idx]--; ++ if (fd != -1) ++ event_pool->slots_used[table_idx]--; + + return; + } +@@ -185,15 +209,6 @@ event_slot_dealloc(struct event_pool *event_pool, int idx) + return; + } + +-static int +-event_slot_ref(struct event_slot_epoll *slot) +-{ +- if (!slot) +- return -1; +- +- return GF_ATOMIC_INC(slot->ref); +-} +- + static struct event_slot_epoll * + event_slot_get(struct event_pool *event_pool, int idx) + { +@@ -379,20 +394,13 @@ event_register_epoll(struct event_pool *event_pool, int fd, + if (destroy == 1) + goto out; + +- idx = event_slot_alloc(event_pool, fd, notify_poller_death); ++ idx = event_slot_alloc(event_pool, fd, notify_poller_death, &slot); + if (idx == -1) { + gf_msg("epoll", GF_LOG_ERROR, 0, LG_MSG_SLOT_NOT_FOUND, + "could not find slot for fd=%d", fd); + return -1; + } + +- slot = event_slot_get(event_pool, idx); +- if (!slot) { +- gf_msg("epoll", GF_LOG_ERROR, 0, LG_MSG_SLOT_NOT_FOUND, +- "could not find slot for fd=%d idx=%d", fd, idx); +- return -1; +- } +- + assert(slot->fd == fd); + + LOCK(&slot->lock); +-- +1.8.3.1 + diff --git a/SOURCES/0334-dht-log-getxattr-failure-for-node-uuid-at-DEBUG.patch b/SOURCES/0334-dht-log-getxattr-failure-for-node-uuid-at-DEBUG.patch new file mode 100644 index 0000000..48f927f --- /dev/null +++ b/SOURCES/0334-dht-log-getxattr-failure-for-node-uuid-at-DEBUG.patch @@ -0,0 +1,54 @@ +From 17940583c4d991a568582581f68dcbf08463ccaf Mon Sep 17 00:00:00 2001 +From: Susant Palai +Date: Tue, 16 Jul 2019 10:31:46 +0530 +Subject: [PATCH 334/335] dht: log getxattr failure for node-uuid at "DEBUG" + +There are two ways to fetch node-uuid information from dht. + +1 - #define GF_XATTR_LIST_NODE_UUIDS_KEY "trusted.glusterfs.list-node-uuids" +This key is used by AFR. + +2 - #define GF_REBAL_FIND_LOCAL_SUBVOL "glusterfs.find-local-subvol" +This key is used for non-afr volume type. + +We do two getxattr operations. First on the #1 key followed by on #2 if +getxattr on #1 key fails. + +Since the parent function "dht_init_local_subvols_and_nodeuuids" logs failure, +moving the log-level to DEBUG in dht_find_local_subvol_cbk. + +>fixes: bz#1730175 +>Change-Id: I4d88244dc26587b111ca5b00d4c00118efdaac14 +>Signed-off-by: Susant Palai +Upstream patch: https://review.gluster.org/#/c/glusterfs/+/23053/ + +BUG: 1727755 +Change-Id: I4d88244dc26587b111ca5b00d4c00118efdaac14 +Signed-off-by: Sunil Kumar Acharya +Reviewed-on: https://code.engineering.redhat.com/gerrit/185876 +Tested-by: RHGS Build Bot +--- + xlators/cluster/dht/src/dht-common.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c +index 37952ba..d0b5287 100644 +--- a/xlators/cluster/dht/src/dht-common.c ++++ b/xlators/cluster/dht/src/dht-common.c +@@ -4253,8 +4253,11 @@ dht_find_local_subvol_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + local->op_ret = -1; + local->op_errno = op_errno; + UNLOCK(&frame->lock); +- gf_msg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_GET_XATTR_FAILED, +- "getxattr err for dir"); ++ if (op_errno == ENODATA) ++ gf_msg_debug(this->name, 0, "failed to get node-uuid"); ++ else ++ gf_msg(this->name, GF_LOG_ERROR, op_errno, ++ DHT_MSG_GET_XATTR_FAILED, "failed to get node-uuid"); + goto post_unlock; + } + +-- +1.8.3.1 + diff --git a/SOURCES/0335-tests-RHEL8-test-failure-fixes-for-RHGS.patch b/SOURCES/0335-tests-RHEL8-test-failure-fixes-for-RHGS.patch new file mode 100644 index 0000000..c3341df --- /dev/null +++ b/SOURCES/0335-tests-RHEL8-test-failure-fixes-for-RHGS.patch @@ -0,0 +1,15991 @@ +From 39523fd6c1b4789b12c8db81f4e08a3eb0c6a65c Mon Sep 17 00:00:00 2001 +From: Sunil Kumar Acharya +Date: Thu, 17 Oct 2019 13:03:56 +0530 +Subject: [PATCH 335/335] tests: RHEL8 test failure fixes for RHGS + +- tests/bugs/shard/bug-1272986.t + https://review.gluster.org/#/c/glusterfs/+/23499/ + https://review.gluster.org/#/c/glusterfs/+/23551/ + +- tests/basic/posix/shared-statfs.t + https://review.gluster.org/c/glusterfs/+/23550 + +- tests/basic/fops-sanity.t + https://review.gluster.org/c/glusterfs/+/22210/ + +- tests/bugs/transport/bug-873367.t +- tests/features/ssl-authz.t +- tests/bugs/snapshot/bug-1399598-uss-with-ssl.t + https://review.gluster.org/#/c/glusterfs/+/23587/ + +- remove gnfs relatedtests + +- tests/bugs/shard/unlinks-and-renames.t + https://review.gluster.org/#/c/glusterfs/+/23585/ + +- tests/bugs/rpc/bug-954057.t +- tests/bugs/glusterfs-server/bug-887145.t + https://review.gluster.org/#/c/glusterfs/+/23710/ + +- tests/features/ssl-ciphers.t + https://review.gluster.org/#/c/glusterfs/+/23703/ + +- tests/bugs/fuse/bug-985074.t + https://review.gluster.org/#/c/glusterfs/+/23734/ + +BUG: 1762180 +Change-Id: I97b344a632b49ca9ca332a5a463756b160aee5bd +Signed-off-by: Sunil Kumar Acharya +Reviewed-on: https://code.engineering.redhat.com/gerrit/185716 +Tested-by: RHGS Build Bot +--- + tests/basic/fops-sanity.c | 1862 ++-- + tests/basic/posix/shared-statfs.t | 11 +- + tests/bugs/cli/bug-1320388.t | 2 +- + tests/bugs/fuse/bug-985074.t | 4 +- + tests/bugs/glusterd/quorum-value-check.t | 35 - + tests/bugs/glusterfs-server/bug-887145.t | 14 +- + tests/bugs/nfs/bug-1053579.t | 114 - + tests/bugs/nfs/bug-1116503.t | 47 - + tests/bugs/nfs/bug-1143880-fix-gNFSd-auth-crash.t | 24 - + tests/bugs/nfs/bug-1157223-symlink-mounting.t | 126 - + tests/bugs/nfs/bug-1161092-nfs-acls.t | 39 - + tests/bugs/nfs/bug-1166862.t | 69 - + tests/bugs/nfs/bug-1210338.c | 31 - + tests/bugs/nfs/bug-1210338.t | 30 - + tests/bugs/nfs/bug-1302948.t | 13 - + tests/bugs/nfs/bug-847622.t | 39 - + tests/bugs/nfs/bug-877885.t | 39 - + tests/bugs/nfs/bug-904065.t | 100 - + tests/bugs/nfs/bug-915280.t | 54 - + tests/bugs/nfs/bug-970070.t | 13 - + tests/bugs/nfs/bug-974972.t | 41 - + tests/bugs/nfs/showmount-many-clients.t | 41 - + tests/bugs/nfs/socket-as-fifo.py | 33 - + tests/bugs/nfs/socket-as-fifo.t | 25 - + tests/bugs/nfs/subdir-trailing-slash.t | 32 - + tests/bugs/nfs/zero-atime.t | 33 - + tests/bugs/rpc/bug-954057.t | 10 +- + tests/bugs/shard/bug-1272986.t | 6 +- + tests/bugs/transport/bug-873367.t | 2 +- + tests/features/ssl-authz.t | 2 +- + tests/features/ssl-ciphers.t | 61 +- + tests/ssl.rc | 2 +- + xlators/features/shard/src/shard.c | 11754 ++++++++++---------- + 33 files changed, 6638 insertions(+), 8070 deletions(-) + delete mode 100755 tests/bugs/glusterd/quorum-value-check.t + delete mode 100755 tests/bugs/nfs/bug-1053579.t + delete mode 100644 tests/bugs/nfs/bug-1116503.t + delete mode 100644 tests/bugs/nfs/bug-1143880-fix-gNFSd-auth-crash.t + delete mode 100644 tests/bugs/nfs/bug-1157223-symlink-mounting.t + delete mode 100644 tests/bugs/nfs/bug-1161092-nfs-acls.t + delete mode 100755 tests/bugs/nfs/bug-1166862.t + delete mode 100644 tests/bugs/nfs/bug-1210338.c + delete mode 100644 tests/bugs/nfs/bug-1210338.t + delete mode 100755 tests/bugs/nfs/bug-1302948.t + delete mode 100755 tests/bugs/nfs/bug-847622.t + delete mode 100755 tests/bugs/nfs/bug-877885.t + delete mode 100755 tests/bugs/nfs/bug-904065.t + delete mode 100755 tests/bugs/nfs/bug-915280.t + delete mode 100755 tests/bugs/nfs/bug-970070.t + delete mode 100755 tests/bugs/nfs/bug-974972.t + delete mode 100644 tests/bugs/nfs/showmount-many-clients.t + delete mode 100755 tests/bugs/nfs/socket-as-fifo.py + delete mode 100644 tests/bugs/nfs/socket-as-fifo.t + delete mode 100644 tests/bugs/nfs/subdir-trailing-slash.t + delete mode 100755 tests/bugs/nfs/zero-atime.t + +diff --git a/tests/basic/fops-sanity.c b/tests/basic/fops-sanity.c +index aff72d8..171d003 100644 +--- a/tests/basic/fops-sanity.c ++++ b/tests/basic/fops-sanity.c +@@ -17,15 +17,16 @@ + + /* Filesystem basic sanity check, tests all (almost) fops. */ + +-#include ++#include ++#include + #include +-#include +-#include ++#include ++#include + #include ++#include ++#include + #include +-#include +-#include +-#include ++#include + + #ifndef linux + #include +@@ -34,904 +35,880 @@ + #endif + + /* for fd based fops after unlink */ +-int +-fd_based_fops_1(char *filename); ++int fd_based_fops_1(char *filename); + /* for fd based fops before unlink */ +-int +-fd_based_fops_2(char *filename); ++int fd_based_fops_2(char *filename); + /* fops based on fd after dup */ +-int +-dup_fd_based_fops(char *filename); ++int dup_fd_based_fops(char *filename); + /* for fops based on path */ +-int +-path_based_fops(char *filename); ++int path_based_fops(char *filename); + /* for fops which operate on directory */ +-int +-dir_based_fops(char *filename); ++int dir_based_fops(char *filename); + /* for fops which operate in link files (symlinks) */ +-int +-link_based_fops(char *filename); ++int link_based_fops(char *filename); + /* to test open syscall with open modes available. */ +-int +-test_open_modes(char *filename); ++int test_open_modes(char *filename); + /* generic function which does open write and read. */ +-int +-generic_open_read_write(char *filename, int flag, mode_t mode); ++int generic_open_read_write(char *filename, int flag, mode_t mode); + + #define OPEN_MODE 0666 + +-int +-main(int argc, char *argv[]) +-{ +- int ret = -1; +- int result = 0; +- char filename[255] = { +- 0, +- }; +- +- if (argc > 1) +- strcpy(filename, argv[1]); +- else +- strcpy(filename, "temp-xattr-test-file"); +- +- ret = fd_based_fops_1(strcat(filename, "_1")); +- if (ret < 0) { +- fprintf(stderr, "fd based file operation 1 failed\n"); +- result |= ret; +- } else { +- fprintf(stdout, "fd based file operation 1 passed\n"); +- } +- +- ret = fd_based_fops_2(strcat(filename, "_2")); +- if (ret < 0) { +- result |= ret; +- fprintf(stderr, "fd based file operation 2 failed\n"); +- } else { +- fprintf(stdout, "fd based file operation 2 passed\n"); +- } +- +- ret = dup_fd_based_fops(strcat(filename, "_3")); +- if (ret < 0) { +- result |= ret; +- fprintf(stderr, "dup fd based file operation failed\n"); +- } else { +- fprintf(stdout, "dup fd based file operation passed\n"); +- } +- +- ret = path_based_fops(strcat(filename, "_4")); +- if (ret < 0) { +- result |= ret; +- fprintf(stderr, "path based file operation failed\n"); +- } else { +- fprintf(stdout, "path based file operation passed\n"); +- } +- +- ret = dir_based_fops(strcat(filename, "_5")); +- if (ret < 0) { +- result |= ret; +- fprintf(stderr, "directory based file operation failed\n"); +- } else { +- fprintf(stdout, "directory based file operation passed\n"); +- } +- +- ret = link_based_fops(strcat(filename, "_5")); +- if (ret < 0) { +- result |= ret; +- fprintf(stderr, "link based file operation failed\n"); +- } else { +- fprintf(stdout, "link based file operation passed\n"); +- } +- +- ret = test_open_modes(strcat(filename, "_5")); +- if (ret < 0) { +- result |= ret; +- fprintf(stderr, "testing modes of `open' call failed\n"); +- } else { +- fprintf(stdout, "testing modes of `open' call passed\n"); +- } +- return result; ++int main(int argc, char *argv[]) { ++ int ret = -1; ++ int result = 0; ++ char filename[255] = { ++ 0, ++ }; ++ ++ if (argc > 1) ++ strcpy(filename, argv[1]); ++ else ++ strcpy(filename, "temp-xattr-test-file"); ++ ++ ret = fd_based_fops_1(strcat(filename, "_1")); ++ if (ret < 0) { ++ fprintf(stderr, "fd based file operation 1 failed\n"); ++ result |= ret; ++ } else { ++ fprintf(stdout, "fd based file operation 1 passed\n"); ++ } ++ ++ ret = fd_based_fops_2(strcat(filename, "_2")); ++ if (ret < 0) { ++ result |= ret; ++ fprintf(stderr, "fd based file operation 2 failed\n"); ++ } else { ++ fprintf(stdout, "fd based file operation 2 passed\n"); ++ } ++ ++ ret = dup_fd_based_fops(strcat(filename, "_3")); ++ if (ret < 0) { ++ result |= ret; ++ fprintf(stderr, "dup fd based file operation failed\n"); ++ } else { ++ fprintf(stdout, "dup fd based file operation passed\n"); ++ } ++ ++ ret = path_based_fops(strcat(filename, "_4")); ++ if (ret < 0) { ++ result |= ret; ++ fprintf(stderr, "path based file operation failed\n"); ++ } else { ++ fprintf(stdout, "path based file operation passed\n"); ++ } ++ ++ ret = dir_based_fops(strcat(filename, "_5")); ++ if (ret < 0) { ++ result |= ret; ++ fprintf(stderr, "directory based file operation failed\n"); ++ } else { ++ fprintf(stdout, "directory based file operation passed\n"); ++ } ++ ++ ret = link_based_fops(strcat(filename, "_5")); ++ if (ret < 0) { ++ result |= ret; ++ fprintf(stderr, "link based file operation failed\n"); ++ } else { ++ fprintf(stdout, "link based file operation passed\n"); ++ } ++ ++ ret = test_open_modes(strcat(filename, "_5")); ++ if (ret < 0) { ++ result |= ret; ++ fprintf(stderr, "testing modes of `open' call failed\n"); ++ } else { ++ fprintf(stdout, "testing modes of `open' call passed\n"); ++ } ++ return result; + } + + /* Execute all possible fops on a fd which is unlinked */ +-int +-fd_based_fops_1(char *filename) +-{ +- int fd = 0; +- int ret = -1; +- int result = 0; +- struct stat stbuf = { +- 0, +- }; +- char wstr[50] = { +- 0, +- }; +- char rstr[50] = { +- 0, +- }; +- +- fd = open(filename, O_RDWR | O_CREAT, OPEN_MODE); +- if (fd < 0) { +- fprintf(stderr, "open failed : %s\n", strerror(errno)); +- return ret; +- } +- +- ret = unlink(filename); +- if (ret < 0) { +- fprintf(stderr, "unlink failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- strcpy(wstr, "This is my string\n"); +- ret = write(fd, wstr, strlen(wstr)); +- if (ret <= 0) { +- fprintf(stderr, "write failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = lseek(fd, 0, SEEK_SET); +- if (ret < 0) { +- fprintf(stderr, "lseek failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = read(fd, rstr, strlen(wstr)); +- if (ret <= 0) { +- fprintf(stderr, "read failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = memcmp(rstr, wstr, strlen(wstr)); +- if (ret != 0) { +- fprintf(stderr, "read returning junk\n"); +- result |= ret; +- } +- +- ret = ftruncate(fd, 0); +- if (ret < 0) { +- fprintf(stderr, "ftruncate failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fstat(fd, &stbuf); +- if (ret < 0) { +- fprintf(stderr, "fstat failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fsync(fd); +- if (ret < 0) { +- fprintf(stderr, "fsync failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fdatasync(fd); +- if (ret < 0) { +- fprintf(stderr, "fdatasync failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- /* +- * These metadata operations fail at the moment because kernel doesn't +- * pass the client fd in the operation. +- * The following bug tracks this change. +- * https://bugzilla.redhat.com/show_bug.cgi?id=1084422 +- * ret = fchmod (fd, 0640); +- * if (ret < 0) { +- * fprintf (stderr, "fchmod failed : %s\n", strerror (errno)); +- * result |= ret; +- * } +- +- * ret = fchown (fd, 10001, 10001); +- * if (ret < 0) { +- * fprintf (stderr, "fchown failed : %s\n", strerror (errno)); +- * result |= ret; +- * } +- +- * ret = fsetxattr (fd, "trusted.xattr-test", "working", 8, 0); +- * if (ret < 0) { +- * fprintf (stderr, "fsetxattr failed : %s\n", strerror +- (errno)); +- * result |= ret; +- * } +- +- * ret = flistxattr (fd, NULL, 0); +- * if (ret <= 0) { +- * fprintf (stderr, "flistxattr failed : %s\n", strerror +- (errno)); +- * result |= ret; +- * } +- +- * ret = fgetxattr (fd, "trusted.xattr-test", NULL, 0); +- * if (ret <= 0) { +- * fprintf (stderr, "fgetxattr failed : %s\n", strerror +- (errno)); +- * result |= ret; +- * } +- +- * ret = fremovexattr (fd, "trusted.xattr-test"); +- * if (ret < 0) { +- * fprintf (stderr, "fremovexattr failed : %s\n", strerror +- (errno)); +- * result |= ret; +- * } +- */ +- +- if (fd) +- close(fd); +- return result; ++int fd_based_fops_1(char *filename) { ++ int fd = 0; ++ int ret = -1; ++ int result = 0; ++ struct stat stbuf = { ++ 0, ++ }; ++ char wstr[50] = { ++ 0, ++ }; ++ char rstr[50] = { ++ 0, ++ }; ++ ++ fd = open(filename, O_RDWR | O_CREAT, OPEN_MODE); ++ if (fd < 0) { ++ fprintf(stderr, "open failed : %s\n", strerror(errno)); ++ return ret; ++ } ++ ++ ret = unlink(filename); ++ if (ret < 0) { ++ fprintf(stderr, "unlink failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ strcpy(wstr, "This is my string\n"); ++ ret = write(fd, wstr, strlen(wstr)); ++ if (ret <= 0) { ++ fprintf(stderr, "write failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = lseek(fd, 0, SEEK_SET); ++ if (ret < 0) { ++ fprintf(stderr, "lseek failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = read(fd, rstr, strlen(wstr)); ++ if (ret <= 0) { ++ fprintf(stderr, "read failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = memcmp(rstr, wstr, strlen(wstr)); ++ if (ret != 0) { ++ fprintf(stderr, "read returning junk\n"); ++ result |= ret; ++ } ++ ++ ret = ftruncate(fd, 0); ++ if (ret < 0) { ++ fprintf(stderr, "ftruncate failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fstat(fd, &stbuf); ++ if (ret < 0) { ++ fprintf(stderr, "fstat failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fsync(fd); ++ if (ret < 0) { ++ fprintf(stderr, "fsync failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fdatasync(fd); ++ if (ret < 0) { ++ fprintf(stderr, "fdatasync failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ /* ++ * These metadata operations fail at the moment because kernel doesn't ++ * pass the client fd in the operation. ++ * The following bug tracks this change. ++ * https://bugzilla.redhat.com/show_bug.cgi?id=1084422 ++ * ret = fchmod (fd, 0640); ++ * if (ret < 0) { ++ * fprintf (stderr, "fchmod failed : %s\n", strerror (errno)); ++ * result |= ret; ++ * } ++ ++ * ret = fchown (fd, 10001, 10001); ++ * if (ret < 0) { ++ * fprintf (stderr, "fchown failed : %s\n", strerror (errno)); ++ * result |= ret; ++ * } ++ ++ * ret = fsetxattr (fd, "trusted.xattr-test", "working", 8, 0); ++ * if (ret < 0) { ++ * fprintf (stderr, "fsetxattr failed : %s\n", strerror ++ (errno)); ++ * result |= ret; ++ * } ++ ++ * ret = flistxattr (fd, NULL, 0); ++ * if (ret <= 0) { ++ * fprintf (stderr, "flistxattr failed : %s\n", strerror ++ (errno)); ++ * result |= ret; ++ * } ++ ++ * ret = fgetxattr (fd, "trusted.xattr-test", NULL, 0); ++ * if (ret <= 0) { ++ * fprintf (stderr, "fgetxattr failed : %s\n", strerror ++ (errno)); ++ * result |= ret; ++ * } ++ ++ * ret = fremovexattr (fd, "trusted.xattr-test"); ++ * if (ret < 0) { ++ * fprintf (stderr, "fremovexattr failed : %s\n", strerror ++ (errno)); ++ * result |= ret; ++ * } ++ */ ++ ++ if (fd) ++ close(fd); ++ return result; + } + +-int +-fd_based_fops_2(char *filename) +-{ +- int fd = 0; +- int ret = -1; +- int result = 0; +- struct stat stbuf = { +- 0, +- }; +- char wstr[50] = { +- 0, +- }; +- char rstr[50] = { +- 0, +- }; +- +- fd = open(filename, O_RDWR | O_CREAT, OPEN_MODE); +- if (fd < 0) { +- fprintf(stderr, "open failed : %s\n", strerror(errno)); +- return ret; +- } +- +- ret = ftruncate(fd, 0); +- if (ret < 0) { +- fprintf(stderr, "ftruncate failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- strcpy(wstr, "This is my second string\n"); +- ret = write(fd, wstr, strlen(wstr)); +- if (ret < 0) { +- fprintf(stderr, "write failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- lseek(fd, 0, SEEK_SET); +- if (ret < 0) { +- fprintf(stderr, "lseek failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = read(fd, rstr, strlen(wstr)); +- if (ret <= 0) { +- fprintf(stderr, "read failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = memcmp(rstr, wstr, strlen(wstr)); +- if (ret != 0) { +- fprintf(stderr, "read returning junk\n"); +- result |= ret; +- } +- +- ret = fstat(fd, &stbuf); +- if (ret < 0) { +- fprintf(stderr, "fstat failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fchmod(fd, 0640); +- if (ret < 0) { +- fprintf(stderr, "fchmod failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fchown(fd, 10001, 10001); +- if (ret < 0) { +- fprintf(stderr, "fchown failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fsync(fd); +- if (ret < 0) { +- fprintf(stderr, "fsync failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fsetxattr(fd, "trusted.xattr-test", "working", 8, 0); +- if (ret < 0) { +- fprintf(stderr, "fsetxattr failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fdatasync(fd); +- if (ret < 0) { +- fprintf(stderr, "fdatasync failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = flistxattr(fd, NULL, 0); +- if (ret <= 0) { +- fprintf(stderr, "flistxattr failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fgetxattr(fd, "trusted.xattr-test", NULL, 0); +- if (ret <= 0) { +- fprintf(stderr, "fgetxattr failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fremovexattr(fd, "trusted.xattr-test"); +- if (ret < 0) { +- fprintf(stderr, "fremovexattr failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- if (fd) +- close(fd); +- unlink(filename); ++int fd_based_fops_2(char *filename) { ++ int fd = 0; ++ int ret = -1; ++ int result = 0; ++ struct stat stbuf = { ++ 0, ++ }; ++ char wstr[50] = { ++ 0, ++ }; ++ char rstr[50] = { ++ 0, ++ }; ++ ++ fd = open(filename, O_RDWR | O_CREAT, OPEN_MODE); ++ if (fd < 0) { ++ fprintf(stderr, "open failed : %s\n", strerror(errno)); ++ return ret; ++ } ++ ++ ret = ftruncate(fd, 0); ++ if (ret < 0) { ++ fprintf(stderr, "ftruncate failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ strcpy(wstr, "This is my second string\n"); ++ ret = write(fd, wstr, strlen(wstr)); ++ if (ret < 0) { ++ fprintf(stderr, "write failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ lseek(fd, 0, SEEK_SET); ++ if (ret < 0) { ++ fprintf(stderr, "lseek failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = read(fd, rstr, strlen(wstr)); ++ if (ret <= 0) { ++ fprintf(stderr, "read failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = memcmp(rstr, wstr, strlen(wstr)); ++ if (ret != 0) { ++ fprintf(stderr, "read returning junk\n"); ++ result |= ret; ++ } ++ ++ ret = fstat(fd, &stbuf); ++ if (ret < 0) { ++ fprintf(stderr, "fstat failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fchmod(fd, 0640); ++ if (ret < 0) { ++ fprintf(stderr, "fchmod failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fchown(fd, 10001, 10001); ++ if (ret < 0) { ++ fprintf(stderr, "fchown failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fsync(fd); ++ if (ret < 0) { ++ fprintf(stderr, "fsync failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fsetxattr(fd, "trusted.xattr-test", "working", 8, 0); ++ if (ret < 0) { ++ fprintf(stderr, "fsetxattr failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fdatasync(fd); ++ if (ret < 0) { ++ fprintf(stderr, "fdatasync failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = flistxattr(fd, NULL, 0); ++ if (ret <= 0) { ++ fprintf(stderr, "flistxattr failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fgetxattr(fd, "trusted.xattr-test", NULL, 0); ++ if (ret <= 0) { ++ fprintf(stderr, "fgetxattr failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fremovexattr(fd, "trusted.xattr-test"); ++ if (ret < 0) { ++ fprintf(stderr, "fremovexattr failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ if (fd) ++ close(fd); ++ unlink(filename); + +- return result; ++ return result; + } + +-int +-path_based_fops(char *filename) +-{ +- int ret = -1; +- int fd = 0; +- int result = 0; +- struct stat stbuf = { +- 0, +- }; +- char newfilename[255] = { +- 0, +- }; +- char *hardlink = "linkfile-hard.txt"; +- char *symlnk = "linkfile-soft.txt"; +- char buf[1024] = { +- 0, +- }; +- +- fd = creat(filename, 0644); +- if (fd < 0) { +- fprintf(stderr, "creat failed: %s\n", strerror(errno)); +- return ret; +- } +- +- ret = truncate(filename, 0); +- if (ret < 0) { +- fprintf(stderr, "truncate failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = stat(filename, &stbuf); +- if (ret < 0) { +- fprintf(stderr, "stat failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = chmod(filename, 0640); +- if (ret < 0) { +- fprintf(stderr, "chmod failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = chown(filename, 10001, 10001); +- if (ret < 0) { +- fprintf(stderr, "chown failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = setxattr(filename, "trusted.xattr-test", "working", 8, 0); +- if (ret < 0) { +- fprintf(stderr, "setxattr failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = listxattr(filename, NULL, 0); +- if (ret <= 0) { +- ret = -1; +- fprintf(stderr, "listxattr failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = getxattr(filename, "trusted.xattr-test", NULL, 0); +- if (ret <= 0) { +- fprintf(stderr, "getxattr failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = removexattr(filename, "trusted.xattr-test"); +- if (ret < 0) { +- fprintf(stderr, "removexattr failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = access(filename, R_OK | W_OK); +- if (ret < 0) { +- fprintf(stderr, "access failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = link(filename, hardlink); +- if (ret < 0) { +- fprintf(stderr, "link failed: %s\n", strerror(errno)); +- result |= ret; +- } +- unlink(hardlink); +- +- ret = symlink(filename, symlnk); +- if (ret < 0) { +- fprintf(stderr, "symlink failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = readlink(symlnk, buf, sizeof(buf)); +- if (ret < 0) { +- fprintf(stderr, "readlink failed: %s\n", strerror(errno)); +- result |= ret; +- } +- unlink(symlnk); +- +- /* Create a character special file */ +- ret = mknod("cspecial", S_IFCHR | S_IRWXU | S_IRWXG, makedev(2, 3)); +- if (ret < 0) { +- fprintf(stderr, "cpsecial mknod failed: %s\n", strerror(errno)); +- result |= ret; +- } +- unlink("cspecial"); +- +- ret = mknod("bspecial", S_IFBLK | S_IRWXU | S_IRWXG, makedev(4, 5)); +- if (ret < 0) { +- fprintf(stderr, "bspecial mknod failed: %s\n", strerror(errno)); +- result |= ret; +- } +- unlink("bspecial"); ++int path_based_fops(char *filename) { ++ int ret = -1; ++ int fd = 0; ++ int result = 0; ++ struct stat stbuf = { ++ 0, ++ }; ++ char newfilename[255] = { ++ 0, ++ }; ++ char *hardlink = "linkfile-hard.txt"; ++ char *symlnk = "linkfile-soft.txt"; ++ char buf[1024] = { ++ 0, ++ }; ++ ++ fd = creat(filename, 0644); ++ if (fd < 0) { ++ fprintf(stderr, "creat failed: %s\n", strerror(errno)); ++ return ret; ++ } ++ ++ ret = truncate(filename, 0); ++ if (ret < 0) { ++ fprintf(stderr, "truncate failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = stat(filename, &stbuf); ++ if (ret < 0) { ++ fprintf(stderr, "stat failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = chmod(filename, 0640); ++ if (ret < 0) { ++ fprintf(stderr, "chmod failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = chown(filename, 10001, 10001); ++ if (ret < 0) { ++ fprintf(stderr, "chown failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = setxattr(filename, "trusted.xattr-test", "working", 8, 0); ++ if (ret < 0) { ++ fprintf(stderr, "setxattr failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = listxattr(filename, NULL, 0); ++ if (ret <= 0) { ++ ret = -1; ++ fprintf(stderr, "listxattr failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = getxattr(filename, "trusted.xattr-test", NULL, 0); ++ if (ret <= 0) { ++ fprintf(stderr, "getxattr failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = removexattr(filename, "trusted.xattr-test"); ++ if (ret < 0) { ++ fprintf(stderr, "removexattr failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = access(filename, R_OK | W_OK); ++ if (ret < 0) { ++ fprintf(stderr, "access failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = link(filename, hardlink); ++ if (ret < 0) { ++ fprintf(stderr, "link failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ unlink(hardlink); ++ ++ ret = symlink(filename, symlnk); ++ if (ret < 0) { ++ fprintf(stderr, "symlink failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = readlink(symlnk, buf, sizeof(buf)); ++ if (ret < 0) { ++ fprintf(stderr, "readlink failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ unlink(symlnk); ++ ++ /* Create a character special file */ ++ ret = mknod("cspecial", S_IFCHR | S_IRWXU | S_IRWXG, makedev(2, 3)); ++ if (ret < 0) { ++ fprintf(stderr, "cpsecial mknod failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ unlink("cspecial"); ++ ++ ret = mknod("bspecial", S_IFBLK | S_IRWXU | S_IRWXG, makedev(4, 5)); ++ if (ret < 0) { ++ fprintf(stderr, "bspecial mknod failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ unlink("bspecial"); + + #ifdef linux +- ret = mknod("fifo", S_IFIFO | S_IRWXU | S_IRWXG, 0); ++ ret = mknod("fifo", S_IFIFO | S_IRWXU | S_IRWXG, 0); + #else +- ret = mkfifo("fifo", 0); ++ ret = mkfifo("fifo", 0); + #endif +- if (ret < 0) { +- fprintf(stderr, "fifo mknod failed: %s\n", strerror(errno)); +- result |= ret; +- } +- unlink("fifo"); ++ if (ret < 0) { ++ fprintf(stderr, "fifo mknod failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ unlink("fifo"); + + #ifdef linux +- ret = mknod("sock", S_IFSOCK | S_IRWXU | S_IRWXG, 0); +- if (ret < 0) { +- fprintf(stderr, "sock mknod failed: %s\n", strerror(errno)); +- result |= ret; +- } ++ ret = mknod("sock", S_IFSOCK | S_IRWXU | S_IRWXG, 0); ++ if (ret < 0) { ++ fprintf(stderr, "sock mknod failed: %s\n", strerror(errno)); ++ result |= ret; ++ } + #else +- { +- int s; +- const char *pathname = "sock"; +- struct sockaddr_un addr; +- +- s = socket(PF_LOCAL, SOCK_STREAM, 0); +- memset(&addr, 0, sizeof(addr)); +- strncpy(addr.sun_path, pathname, sizeof(addr.sun_path)); +- ret = bind(s, (const struct sockaddr *)&addr, SUN_LEN(&addr)); +- if (ret < 0) { +- fprintf(stderr, "fifo mknod failed: %s\n", strerror(errno)); +- result |= ret; +- } +- close(s); +- } +-#endif +- unlink("sock"); ++ { ++ int s; ++ const char *pathname = "sock"; ++ struct sockaddr_un addr; + +- strcpy(newfilename, filename); +- strcat(newfilename, "_new"); +- ret = rename(filename, newfilename); ++ s = socket(PF_LOCAL, SOCK_STREAM, 0); ++ memset(&addr, 0, sizeof(addr)); ++ strncpy(addr.sun_path, pathname, sizeof(addr.sun_path)); ++ ret = bind(s, (const struct sockaddr *)&addr, SUN_LEN(&addr)); + if (ret < 0) { +- fprintf(stderr, "rename failed: %s\n", strerror(errno)); +- result |= ret; +- } +- unlink(newfilename); +- +- if (fd) +- close(fd); +- +- unlink(filename); +- return result; +-} +- +-int +-dup_fd_based_fops(char *filename) +-{ +- int fd = 0; +- int result = 0; +- int newfd = 0; +- int ret = -1; +- struct stat stbuf = { +- 0, +- }; +- char wstr[50] = { +- 0, +- }; +- char rstr[50] = { +- 0, +- }; +- +- fd = open(filename, O_RDWR | O_CREAT, OPEN_MODE); +- if (fd < 0) { +- fprintf(stderr, "open failed : %s\n", strerror(errno)); +- return ret; +- } +- +- newfd = dup(fd); +- if (newfd < 0) { +- fprintf(stderr, "dup failed: %s\n", strerror(errno)); +- result |= ret; ++ fprintf(stderr, "fifo mknod failed: %s\n", strerror(errno)); ++ result |= ret; + } +- ++ close(s); ++ } ++#endif ++ unlink("sock"); ++ ++ strcpy(newfilename, filename); ++ strcat(newfilename, "_new"); ++ ret = rename(filename, newfilename); ++ if (ret < 0) { ++ fprintf(stderr, "rename failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ unlink(newfilename); ++ ++ if (fd) + close(fd); + +- strcpy(wstr, "This is my string\n"); +- ret = write(newfd, wstr, strlen(wstr)); +- if (ret <= 0) { +- fprintf(stderr, "write failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = lseek(newfd, 0, SEEK_SET); +- if (ret < 0) { +- fprintf(stderr, "lseek failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = read(newfd, rstr, strlen(wstr)); +- if (ret <= 0) { +- fprintf(stderr, "read failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = memcmp(rstr, wstr, strlen(wstr)); +- if (ret != 0) { +- fprintf(stderr, "read returning junk\n"); +- result |= ret; +- } +- +- ret = ftruncate(newfd, 0); +- if (ret < 0) { +- fprintf(stderr, "ftruncate failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fstat(newfd, &stbuf); +- if (ret < 0) { +- fprintf(stderr, "fstat failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fchmod(newfd, 0640); +- if (ret < 0) { +- fprintf(stderr, "fchmod failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fchown(newfd, 10001, 10001); +- if (ret < 0) { +- fprintf(stderr, "fchown failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fsync(newfd); +- if (ret < 0) { +- fprintf(stderr, "fsync failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fsetxattr(newfd, "trusted.xattr-test", "working", 8, 0); +- if (ret < 0) { +- fprintf(stderr, "fsetxattr failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fdatasync(newfd); +- if (ret < 0) { +- fprintf(stderr, "fdatasync failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = flistxattr(newfd, NULL, 0); +- if (ret <= 0) { +- fprintf(stderr, "flistxattr failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fgetxattr(newfd, "trusted.xattr-test", NULL, 0); +- if (ret <= 0) { +- fprintf(stderr, "fgetxattr failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fremovexattr(newfd, "trusted.xattr-test"); +- if (ret < 0) { +- fprintf(stderr, "fremovexattr failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- if (newfd) +- close(newfd); +- ret = unlink(filename); +- if (ret < 0) { +- fprintf(stderr, "unlink failed : %s\n", strerror(errno)); +- result |= ret; +- } +- return result; ++ unlink(filename); ++ return result; + } + +-int +-dir_based_fops(char *dirname) +-{ +- int ret = -1; +- int result = 0; +- DIR *dp = NULL; +- char buff[255] = { +- 0, +- }; +- struct dirent *dbuff = { +- 0, +- }; +- struct stat stbuff = { +- 0, +- }; +- char newdname[255] = { +- 0, +- }; +- char *cwd = NULL; +- +- ret = mkdir(dirname, 0755); +- if (ret < 0) { +- fprintf(stderr, "mkdir failed: %s\n", strerror(errno)); +- return ret; +- } +- +- dp = opendir(dirname); +- if (dp == NULL) { +- fprintf(stderr, "opendir failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- dbuff = readdir(dp); +- if (NULL == dbuff) { +- fprintf(stderr, "readdir failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = closedir(dp); +- if (ret < 0) { +- fprintf(stderr, "closedir failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = stat(dirname, &stbuff); +- if (ret < 0) { +- fprintf(stderr, "stat failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = chmod(dirname, 0744); +- if (ret < 0) { +- fprintf(stderr, "chmod failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = chown(dirname, 10001, 10001); +- if (ret < 0) { +- fprintf(stderr, "chmod failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = setxattr(dirname, "trusted.xattr-test", "working", 8, 0); +- if (ret < 0) { +- fprintf(stderr, "setxattr failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = listxattr(dirname, NULL, 0); +- if (ret <= 0) { +- ret = -1; +- fprintf(stderr, "listxattr failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = getxattr(dirname, "trusted.xattr-test", NULL, 0); +- if (ret <= 0) { +- ret = -1; +- fprintf(stderr, "getxattr failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = removexattr(dirname, "trusted.xattr-test"); +- if (ret < 0) { +- fprintf(stderr, "removexattr failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- strcpy(newdname, dirname); +- strcat(newdname, "/../"); +- ret = chdir(newdname); +- if (ret < 0) { +- fprintf(stderr, "chdir failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- cwd = getcwd(buff, 255); +- if (NULL == cwd) { +- fprintf(stderr, "getcwd failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- strcpy(newdname, dirname); +- strcat(newdname, "new"); +- ret = rename(dirname, newdname); +- if (ret < 0) { +- fprintf(stderr, "rename failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = rmdir(newdname); +- if (ret < 0) { +- fprintf(stderr, "rmdir failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- rmdir(dirname); +- return result; ++int dup_fd_based_fops(char *filename) { ++ int fd = 0; ++ int result = 0; ++ int newfd = 0; ++ int ret = -1; ++ struct stat stbuf = { ++ 0, ++ }; ++ char wstr[50] = { ++ 0, ++ }; ++ char rstr[50] = { ++ 0, ++ }; ++ ++ fd = open(filename, O_RDWR | O_CREAT, OPEN_MODE); ++ if (fd < 0) { ++ fprintf(stderr, "open failed : %s\n", strerror(errno)); ++ return ret; ++ } ++ ++ newfd = dup(fd); ++ if (newfd < 0) { ++ fprintf(stderr, "dup failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ close(fd); ++ ++ strcpy(wstr, "This is my string\n"); ++ ret = write(newfd, wstr, strlen(wstr)); ++ if (ret <= 0) { ++ fprintf(stderr, "write failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = lseek(newfd, 0, SEEK_SET); ++ if (ret < 0) { ++ fprintf(stderr, "lseek failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = read(newfd, rstr, strlen(wstr)); ++ if (ret <= 0) { ++ fprintf(stderr, "read failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = memcmp(rstr, wstr, strlen(wstr)); ++ if (ret != 0) { ++ fprintf(stderr, "read returning junk\n"); ++ result |= ret; ++ } ++ ++ ret = ftruncate(newfd, 0); ++ if (ret < 0) { ++ fprintf(stderr, "ftruncate failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fstat(newfd, &stbuf); ++ if (ret < 0) { ++ fprintf(stderr, "fstat failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fchmod(newfd, 0640); ++ if (ret < 0) { ++ fprintf(stderr, "fchmod failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fchown(newfd, 10001, 10001); ++ if (ret < 0) { ++ fprintf(stderr, "fchown failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fsync(newfd); ++ if (ret < 0) { ++ fprintf(stderr, "fsync failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fsetxattr(newfd, "trusted.xattr-test", "working", 8, 0); ++ if (ret < 0) { ++ fprintf(stderr, "fsetxattr failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fdatasync(newfd); ++ if (ret < 0) { ++ fprintf(stderr, "fdatasync failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = flistxattr(newfd, NULL, 0); ++ if (ret <= 0) { ++ fprintf(stderr, "flistxattr failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fgetxattr(newfd, "trusted.xattr-test", NULL, 0); ++ if (ret <= 0) { ++ fprintf(stderr, "fgetxattr failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fremovexattr(newfd, "trusted.xattr-test"); ++ if (ret < 0) { ++ fprintf(stderr, "fremovexattr failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ if (newfd) ++ close(newfd); ++ ret = unlink(filename); ++ if (ret < 0) { ++ fprintf(stderr, "unlink failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ return result; + } + +-int +-link_based_fops(char *filename) +-{ +- int ret = -1; +- int result = 0; +- int fd = 0; +- char newname[255] = { +- 0, +- }; +- char linkname[255] = { +- 0, +- }; +- struct stat lstbuf = { +- 0, +- }; +- +- fd = creat(filename, 0644); +- if (fd < 0) { +- fd = 0; +- fprintf(stderr, "creat failed: %s\n", strerror(errno)); +- return ret; +- } +- +- strcpy(newname, filename); +- strcat(newname, "_hlink"); +- ret = link(filename, newname); +- if (ret < 0) { +- fprintf(stderr, "link failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = unlink(filename); +- if (ret < 0) { +- fprintf(stderr, "unlink failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- strcpy(linkname, filename); +- strcat(linkname, "_slink"); +- ret = symlink(newname, linkname); +- if (ret < 0) { +- fprintf(stderr, "symlink failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = lstat(linkname, &lstbuf); +- if (ret < 0) { +- fprintf(stderr, "lstbuf failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = lchown(linkname, 10001, 10001); +- if (ret < 0) { +- fprintf(stderr, "lchown failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = lsetxattr(linkname, "trusted.lxattr-test", "working", 8, 0); +- if (ret < 0) { +- fprintf(stderr, "lsetxattr failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = llistxattr(linkname, NULL, 0); +- if (ret < 0) { +- ret = -1; +- fprintf(stderr, "llistxattr failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = lgetxattr(linkname, "trusted.lxattr-test", NULL, 0); +- if (ret < 0) { +- ret = -1; +- fprintf(stderr, "lgetxattr failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = lremovexattr(linkname, "trusted.lxattr-test"); +- if (ret < 0) { +- fprintf(stderr, "lremovexattr failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- if (fd) +- close(fd); +- unlink(linkname); +- unlink(newname); +- return result; ++int dir_based_fops(char *dirname) { ++ int ret = -1; ++ int result = 0; ++ DIR *dp = NULL; ++ char buff[255] = { ++ 0, ++ }; ++ struct dirent *dbuff = { ++ 0, ++ }; ++ struct stat stbuff = { ++ 0, ++ }; ++ char newdname[255] = { ++ 0, ++ }; ++ char *cwd = NULL; ++ ++ ret = mkdir(dirname, 0755); ++ if (ret < 0) { ++ fprintf(stderr, "mkdir failed: %s\n", strerror(errno)); ++ return ret; ++ } ++ ++ dp = opendir(dirname); ++ if (dp == NULL) { ++ fprintf(stderr, "opendir failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ dbuff = readdir(dp); ++ if (NULL == dbuff) { ++ fprintf(stderr, "readdir failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = closedir(dp); ++ if (ret < 0) { ++ fprintf(stderr, "closedir failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = stat(dirname, &stbuff); ++ if (ret < 0) { ++ fprintf(stderr, "stat failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = chmod(dirname, 0744); ++ if (ret < 0) { ++ fprintf(stderr, "chmod failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = chown(dirname, 10001, 10001); ++ if (ret < 0) { ++ fprintf(stderr, "chmod failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = setxattr(dirname, "trusted.xattr-test", "working", 8, 0); ++ if (ret < 0) { ++ fprintf(stderr, "setxattr failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = listxattr(dirname, NULL, 0); ++ if (ret <= 0) { ++ ret = -1; ++ fprintf(stderr, "listxattr failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = getxattr(dirname, "trusted.xattr-test", NULL, 0); ++ if (ret <= 0) { ++ ret = -1; ++ fprintf(stderr, "getxattr failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = removexattr(dirname, "trusted.xattr-test"); ++ if (ret < 0) { ++ fprintf(stderr, "removexattr failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ strcpy(newdname, dirname); ++ strcat(newdname, "/../"); ++ ret = chdir(newdname); ++ if (ret < 0) { ++ fprintf(stderr, "chdir failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ cwd = getcwd(buff, 255); ++ if (NULL == cwd) { ++ fprintf(stderr, "getcwd failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ strcpy(newdname, dirname); ++ strcat(newdname, "new"); ++ ret = rename(dirname, newdname); ++ if (ret < 0) { ++ fprintf(stderr, "rename failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = rmdir(newdname); ++ if (ret < 0) { ++ fprintf(stderr, "rmdir failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ rmdir(dirname); ++ return result; + } + +-int +-test_open_modes(char *filename) +-{ +- int ret = -1; +- int result = 0; +- +- ret = generic_open_read_write(filename, O_CREAT | O_WRONLY, OPEN_MODE); +- if (ret != 0) { +- fprintf(stderr, "flag O_CREAT|O_WRONLY failed: \n"); +- result |= ret; +- } +- +- ret = generic_open_read_write(filename, O_CREAT | O_RDWR, OPEN_MODE); +- if (ret != 0) { +- fprintf(stderr, "flag O_CREAT|O_RDWR failed\n"); +- result |= ret; +- } +- +- ret = generic_open_read_write(filename, O_CREAT | O_RDONLY, OPEN_MODE); +- if (ret != 0) { +- fprintf(stderr, "flag O_CREAT|O_RDONLY failed\n"); +- result |= ret; +- } +- +- ret = creat(filename, 0644); +- close(ret); +- ret = generic_open_read_write(filename, O_WRONLY, 0); +- if (ret != 0) { +- fprintf(stderr, "flag O_WRONLY failed\n"); +- result |= ret; +- } +- +- ret = creat(filename, 0644); +- close(ret); +- ret = generic_open_read_write(filename, O_RDWR, 0); +- if (0 != ret) { +- fprintf(stderr, "flag O_RDWR failed\n"); +- result |= ret; +- } +- +- ret = creat(filename, 0644); +- close(ret); +- ret = generic_open_read_write(filename, O_RDONLY, 0); +- if (0 != ret) { +- fprintf(stderr, "flag O_RDONLY failed\n"); +- result |= ret; +- } ++int link_based_fops(char *filename) { ++ int ret = -1; ++ int result = 0; ++ int fd = 0; ++ char newname[255] = { ++ 0, ++ }; ++ char linkname[255] = { ++ 0, ++ }; ++ struct stat lstbuf = { ++ 0, ++ }; ++ ++ fd = creat(filename, 0644); ++ if (fd < 0) { ++ fd = 0; ++ fprintf(stderr, "creat failed: %s\n", strerror(errno)); ++ return ret; ++ } ++ ++ strcpy(newname, filename); ++ strcat(newname, "_hlink"); ++ ret = link(filename, newname); ++ if (ret < 0) { ++ fprintf(stderr, "link failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = unlink(filename); ++ if (ret < 0) { ++ fprintf(stderr, "unlink failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ strcpy(linkname, filename); ++ strcat(linkname, "_slink"); ++ ret = symlink(newname, linkname); ++ if (ret < 0) { ++ fprintf(stderr, "symlink failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = lstat(linkname, &lstbuf); ++ if (ret < 0) { ++ fprintf(stderr, "lstbuf failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = lchown(linkname, 10001, 10001); ++ if (ret < 0) { ++ fprintf(stderr, "lchown failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = lsetxattr(linkname, "trusted.lxattr-test", "working", 8, 0); ++ if (ret < 0) { ++ fprintf(stderr, "lsetxattr failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = llistxattr(linkname, NULL, 0); ++ if (ret < 0) { ++ ret = -1; ++ fprintf(stderr, "llistxattr failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = lgetxattr(linkname, "trusted.lxattr-test", NULL, 0); ++ if (ret < 0) { ++ ret = -1; ++ fprintf(stderr, "lgetxattr failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = lremovexattr(linkname, "trusted.lxattr-test"); ++ if (ret < 0) { ++ fprintf(stderr, "lremovexattr failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ if (fd) ++ close(fd); ++ unlink(linkname); ++ unlink(newname); ++ return result; ++} + +- ret = creat(filename, 0644); +- close(ret); +- ret = generic_open_read_write(filename, O_TRUNC | O_WRONLY, 0); +- if (0 != ret) { +- fprintf(stderr, "flag O_TRUNC|O_WRONLY failed\n"); +- result |= ret; +- } ++int test_open_modes(char *filename) { ++ int ret = -1; ++ int result = 0; ++ ++ ret = generic_open_read_write(filename, O_CREAT | O_WRONLY, OPEN_MODE); ++ if (ret != 0) { ++ fprintf(stderr, "flag O_CREAT|O_WRONLY failed: \n"); ++ result |= ret; ++ } ++ ++ ret = generic_open_read_write(filename, O_CREAT | O_RDWR, OPEN_MODE); ++ if (ret != 0) { ++ fprintf(stderr, "flag O_CREAT|O_RDWR failed\n"); ++ result |= ret; ++ } ++ ++ ret = generic_open_read_write(filename, O_CREAT | O_RDONLY, OPEN_MODE); ++ if (ret != 0) { ++ fprintf(stderr, "flag O_CREAT|O_RDONLY failed\n"); ++ result |= ret; ++ } ++ ++ ret = creat(filename, 0644); ++ close(ret); ++ ret = generic_open_read_write(filename, O_WRONLY, 0); ++ if (ret != 0) { ++ fprintf(stderr, "flag O_WRONLY failed\n"); ++ result |= ret; ++ } ++ ++ ret = creat(filename, 0644); ++ close(ret); ++ ret = generic_open_read_write(filename, O_RDWR, 0); ++ if (0 != ret) { ++ fprintf(stderr, "flag O_RDWR failed\n"); ++ result |= ret; ++ } ++ ++ ret = creat(filename, 0644); ++ close(ret); ++ ret = generic_open_read_write(filename, O_RDONLY, 0); ++ if (0 != ret) { ++ fprintf(stderr, "flag O_RDONLY failed\n"); ++ result |= ret; ++ } ++ ++ ret = creat(filename, 0644); ++ close(ret); ++ ret = generic_open_read_write(filename, O_TRUNC | O_WRONLY, 0); ++ if (0 != ret) { ++ fprintf(stderr, "flag O_TRUNC|O_WRONLY failed\n"); ++ result |= ret; ++ } + + #if 0 /* undefined behaviour, unable to reliably test */ + ret = creat (filename, 0644); +@@ -943,90 +920,87 @@ test_open_modes(char *filename) + } + #endif + +- ret = generic_open_read_write(filename, O_CREAT | O_RDWR | O_SYNC, +- OPEN_MODE); +- if (0 != ret) { +- fprintf(stderr, "flag O_CREAT|O_RDWR|O_SYNC failed\n"); +- result |= ret; +- } +- +- ret = creat(filename, 0644); +- close(ret); +- ret = generic_open_read_write(filename, O_CREAT | O_EXCL, OPEN_MODE); +- if (0 != ret) { +- fprintf(stderr, "flag O_CREAT|O_EXCL failed\n"); +- result |= ret; +- } +- +- return result; ++ ret = generic_open_read_write(filename, O_CREAT | O_RDWR | O_SYNC, OPEN_MODE); ++ if (0 != ret) { ++ fprintf(stderr, "flag O_CREAT|O_RDWR|O_SYNC failed\n"); ++ result |= ret; ++ } ++ ++ ret = creat(filename, 0644); ++ close(ret); ++ ret = generic_open_read_write(filename, O_CREAT | O_EXCL, OPEN_MODE); ++ if (0 != ret) { ++ fprintf(stderr, "flag O_CREAT|O_EXCL failed\n"); ++ result |= ret; ++ } ++ ++ return result; + } + +-int +-generic_open_read_write(char *filename, int flag, mode_t mode) +-{ +- int fd = 0; +- int ret = -1; +- char wstring[50] = { +- 0, +- }; +- char rstring[50] = { +- 0, +- }; +- +- fd = open(filename, flag, mode); +- if (fd < 0) { +- if (flag == (O_CREAT | O_EXCL) && errno == EEXIST) { +- unlink(filename); +- return 0; +- } else { +- fprintf(stderr, "open failed: %s\n", strerror(errno)); +- return -1; +- } +- } +- +- strcpy(wstring, "My string to write\n"); +- ret = write(fd, wstring, strlen(wstring)); +- if (ret <= 0) { +- if (errno != EBADF) { +- fprintf(stderr, "write failed: %s\n", strerror(errno)); +- close(fd); +- unlink(filename); +- return ret; +- } +- } +- +- ret = lseek(fd, 0, SEEK_SET); +- if (ret < 0) { +- close(fd); +- unlink(filename); +- return ret; ++int generic_open_read_write(char *filename, int flag, mode_t mode) { ++ int fd = 0; ++ int ret = -1; ++ char wstring[50] = { ++ 0, ++ }; ++ char rstring[50] = { ++ 0, ++ }; ++ ++ fd = open(filename, flag, mode); ++ if (fd < 0) { ++ if (flag == (O_CREAT | O_EXCL) && errno == EEXIST) { ++ unlink(filename); ++ return 0; ++ } else { ++ fprintf(stderr, "open failed: %s\n", strerror(errno)); ++ return -1; + } ++ } + +- ret = read(fd, rstring, strlen(wstring)); +- if (ret < 0 && flag != (O_CREAT | O_WRONLY) && flag != O_WRONLY && +- flag != (O_TRUNC | O_WRONLY)) { +- close(fd); +- unlink(filename); +- return ret; ++ strcpy(wstring, "My string to write\n"); ++ ret = write(fd, wstring, strlen(wstring)); ++ if (ret <= 0) { ++ if (errno != EBADF) { ++ fprintf(stderr, "write failed: %s\n", strerror(errno)); ++ close(fd); ++ unlink(filename); ++ return ret; + } ++ } + +- /* Compare the rstring with wstring. But we do not want to return +- * error when the flag is either O_RDONLY, O_CREAT|O_RDONLY or +- * O_TRUNC|O_RDONLY. Because in that case we are not writing +- * anything to the file.*/ +- +- ret = memcmp(wstring, rstring, strlen(wstring)); +- if (0 != ret && flag != (O_TRUNC | O_WRONLY) && flag != O_WRONLY && +- flag != (O_CREAT | O_WRONLY) && +- !(flag == (O_CREAT | O_RDONLY) || flag == O_RDONLY || +- flag == (O_TRUNC | O_RDONLY))) { +- fprintf(stderr, "read is returning junk\n"); +- close(fd); +- unlink(filename); +- return ret; +- } ++ ret = lseek(fd, 0, SEEK_SET); ++ if (ret < 0) { ++ close(fd); ++ unlink(filename); ++ return ret; ++ } + ++ ret = read(fd, rstring, strlen(wstring)); ++ if (ret < 0 && flag != (O_CREAT | O_WRONLY) && flag != O_WRONLY && ++ flag != (O_TRUNC | O_WRONLY)) { ++ close(fd); ++ unlink(filename); ++ return ret; ++ } ++ ++ /* Compare the rstring with wstring. But we do not want to return ++ * error when the flag is either O_RDONLY, O_CREAT|O_RDONLY or ++ * O_TRUNC|O_RDONLY. Because in that case we are not writing ++ * anything to the file.*/ ++ ++ ret = memcmp(wstring, rstring, strlen(wstring)); ++ if (0 != ret && flag != (O_TRUNC | O_WRONLY) && flag != O_WRONLY && ++ flag != (O_CREAT | O_WRONLY) && ++ !(flag == (O_CREAT | O_RDONLY) || flag == O_RDONLY || ++ flag == (O_TRUNC | O_RDONLY))) { ++ fprintf(stderr, "read is returning junk\n"); + close(fd); + unlink(filename); +- return 0; ++ return ret; ++ } ++ ++ close(fd); ++ unlink(filename); ++ return 0; + } +diff --git a/tests/basic/posix/shared-statfs.t b/tests/basic/posix/shared-statfs.t +index 3343956..0e4a1bb 100644 +--- a/tests/basic/posix/shared-statfs.t ++++ b/tests/basic/posix/shared-statfs.t +@@ -20,15 +20,18 @@ TEST mkdir -p $B0/${V0}1 $B0/${V0}2 + TEST MOUNT_LOOP $LO1 $B0/${V0}1 + TEST MOUNT_LOOP $LO2 $B0/${V0}2 + ++total_brick_blocks=$(df -P $B0/${V0}1 $B0/${V0}2 | tail -2 | awk '{sum = sum+$2}END{print sum}') ++#Account for rounding error ++brick_blocks_two_percent_less=$((total_brick_blocks*98/100)) + # Create a subdir in mountpoint and use that for volume. + TEST $CLI volume create $V0 $H0:$B0/${V0}1/1 $H0:$B0/${V0}2/1; + TEST $CLI volume start $V0 + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" online_brick_count + TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0 +-total_space=$(df -P $M0 | tail -1 | awk '{ print $2}') ++total_mount_blocks=$(df -P $M0 | tail -1 | awk '{ print $2}') + # Keeping the size less than 200M mainly because XFS will use + # some storage in brick to keep its own metadata. +-TEST [ $total_space -gt 194000 -a $total_space -lt 200000 ] ++TEST [ $total_mount_blocks -gt $brick_blocks_two_percent_less -a $total_mount_blocks -lt 200000 ] + + + TEST force_umount $M0 +@@ -41,8 +44,8 @@ TEST $CLI volume add-brick $V0 $H0:$B0/${V0}1/2 $H0:$B0/${V0}2/2 $H0:$B0/${V0}1/ + TEST $CLI volume start $V0 + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "6" online_brick_count + TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0 +-total_space=$(df -P $M0 | tail -1 | awk '{ print $2}') +-TEST [ $total_space -gt 194000 -a $total_space -lt 200000 ] ++total_mount_blocks=$(df -P $M0 | tail -1 | awk '{ print $2}') ++TEST [ $total_mount_blocks -gt $brick_blocks_two_percent_less -a $total_mount_blocks -lt 200000 ] + + TEST force_umount $M0 + TEST $CLI volume stop $V0 +diff --git a/tests/bugs/cli/bug-1320388.t b/tests/bugs/cli/bug-1320388.t +index 8e5d77b..e719fc5 100755 +--- a/tests/bugs/cli/bug-1320388.t ++++ b/tests/bugs/cli/bug-1320388.t +@@ -21,7 +21,7 @@ cleanup; + rm -f $SSL_BASE/glusterfs.* + touch "$GLUSTERD_WORKDIR"/secure-access + +-TEST openssl genrsa -out $SSL_KEY 3072 ++TEST openssl genrsa -out $SSL_KEY 2048 + TEST openssl req -new -x509 -key $SSL_KEY -subj /CN=Anyone -out $SSL_CERT + ln $SSL_CERT $SSL_CA + +diff --git a/tests/bugs/fuse/bug-985074.t b/tests/bugs/fuse/bug-985074.t +index d10fd9f..26d196e 100644 +--- a/tests/bugs/fuse/bug-985074.t ++++ b/tests/bugs/fuse/bug-985074.t +@@ -30,7 +30,7 @@ TEST glusterd + + TEST $CLI volume create $V0 $H0:$B0/$V0 + TEST $CLI volume start $V0 +-TEST $CLI volume set $V0 md-cache-timeout 3 ++TEST $CLI volume set $V0 performance.stat-prefetch off + + TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 --entry-timeout=0 --attribute-timeout=0 + TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M1 --entry-timeout=0 --attribute-timeout=0 +@@ -40,8 +40,6 @@ TEST ln $M0/file $M0/file.link + TEST ls -ali $M0 $M1 + TEST rm -f $M1/file.link + TEST ls -ali $M0 $M1 +-# expire the md-cache timeout +-sleep 3 + TEST mv $M0/file $M0/file.link + TEST stat $M0/file.link + TEST ! stat $M0/file +diff --git a/tests/bugs/glusterd/quorum-value-check.t b/tests/bugs/glusterd/quorum-value-check.t +deleted file mode 100755 +index aaf6362..0000000 +--- a/tests/bugs/glusterd/quorum-value-check.t ++++ /dev/null +@@ -1,35 +0,0 @@ +-#!/bin/bash +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../volume.rc +- +-function check_quorum_nfs() { +- local qnfs="$(less /var/lib/glusterd/nfs/nfs-server.vol | grep "quorum-count"| awk '{print $3}')" +- local qinfo="$($CLI volume info $V0| grep "cluster.quorum-count"| awk '{print $2}')" +- +- if [ $qnfs = $qinfo ]; then +- echo "Y" +- else +- echo "N" +- fi +-} +- +-cleanup; +- +-TEST glusterd +-TEST pidof glusterd +-TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} +-TEST $CLI volume set $V0 nfs.disable off +-TEST $CLI volume set $V0 performance.write-behind off +-TEST $CLI volume set $V0 cluster.self-heal-daemon off +-TEST $CLI volume set $V0 cluster.quorum-type fixed +-TEST $CLI volume start $V0 +- +-TEST $CLI volume set $V0 cluster.quorum-count 1 +-EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "Y" check_quorum_nfs +-TEST $CLI volume set $V0 cluster.quorum-count 2 +-EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "Y" check_quorum_nfs +-TEST $CLI volume set $V0 cluster.quorum-count 3 +-EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "Y" check_quorum_nfs +- +-cleanup; +diff --git a/tests/bugs/glusterfs-server/bug-887145.t b/tests/bugs/glusterfs-server/bug-887145.t +index 82f7cca..f65b1bd 100755 +--- a/tests/bugs/glusterfs-server/bug-887145.t ++++ b/tests/bugs/glusterfs-server/bug-887145.t +@@ -29,7 +29,15 @@ chmod 600 $M0/file; + + TEST mount_nfs $H0:/$V0 $N0 nolock; + +-chown -R nfsnobody:nfsnobody $M0/dir; ++grep nfsnobody /etc/passwd > /dev/nul ++if [ $? -eq 1 ]; then ++usr=nobody ++grp=nobody ++else ++usr=nfsnobody ++grp=nfsnobody ++fi ++chown -R $usr:$grp $M0/dir; + chown -R tmp_user:tmp_user $M0/other; + + TEST $CLI volume set $V0 server.root-squash on; +@@ -38,7 +46,7 @@ EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; + + # create files and directories in the root of the glusterfs and nfs mount + # which is owned by root and hence the right behavior is getting EACCESS +-# as the fops are executed as nfsnobody. ++# as the fops are executed as nfsnobody/nobody. + touch $M0/foo 2>/dev/null; + TEST [ $? -ne 0 ] + touch $N0/foo 2>/dev/null; +@@ -61,7 +69,7 @@ cat $N0/passwd 1>/dev/null; + TEST [ $? -eq 0 ] + + # create files and directories should succeed as the fops are being executed +-# inside the directory owned by nfsnobody ++# inside the directory owned by nfsnobody/nobody + TEST touch $M0/dir/file; + TEST touch $N0/dir/foo; + TEST mkdir $M0/dir/new; +diff --git a/tests/bugs/nfs/bug-1053579.t b/tests/bugs/nfs/bug-1053579.t +deleted file mode 100755 +index 2f53172..0000000 +--- a/tests/bugs/nfs/bug-1053579.t ++++ /dev/null +@@ -1,114 +0,0 @@ +-#!/bin/bash +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../volume.rc +-. $(dirname $0)/../../nfs.rc +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-cleanup +- +-# prepare the users and groups +-NEW_USER=bug1053579 +-NEW_UID=1053579 +-NEW_GID=1053579 +-LAST_GID=1053779 +-NEW_GIDS=${NEW_GID} +- +-# OS-specific overrides +-case $OSTYPE in +-NetBSD|Darwin) +- # only NGROUPS_MAX=16 secondary groups are supported +- LAST_GID=1053593 +- ;; +-FreeBSD) +- # NGROUPS_MAX=1023 (FreeBSD>=8.0), we can afford 200 groups +- ;; +-Linux) +- # NGROUPS_MAX=65536, we can afford 200 groups +- ;; +-*) +- ;; +-esac +- +-# create a user that belongs to many groups +-for GID in $(seq -f '%6.0f' ${NEW_GID} ${LAST_GID}) +-do +- groupadd -o -g ${GID} ${NEW_USER}-${GID} +- NEW_GIDS="${NEW_GIDS},${NEW_USER}-${GID}" +-done +-TEST useradd -o -M -u ${NEW_UID} -g ${NEW_GID} -G ${NEW_USER}-${NEW_GIDS} ${NEW_USER} +- +-# preparation done, start the tests +- +-TEST glusterd +-TEST pidof glusterd +-TEST $CLI volume create $V0 $H0:$B0/${V0}1 +-TEST $CLI volume set $V0 nfs.disable false +-TEST $CLI volume set $V0 nfs.server-aux-gids on +-TEST $CLI volume start $V0 +- +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available +- +-# mount the volume +-TEST mount_nfs $H0:/$V0 $N0 nolock +-TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 +- +-# the actual test, this used to crash +-su -m ${NEW_USER} -c "stat $N0/. > /dev/null" +-TEST [ $? -eq 0 ] +- +-# create a file that only a user in a high-group can access +-echo 'Hello World!' > $N0/README +-chgrp ${LAST_GID} $N0/README +-chmod 0640 $N0/README +- +-#su -m ${NEW_USER} -c "cat $N0/README 2>&1 > /dev/null" +-su -m ${NEW_USER} -c "cat $N0/README" +-ret=$? +- +-case $OSTYPE in +-Linux) # Linux NFS fails with big GID +- if [ $ret -ne 0 ] ; then +- res="Y" +- else +- res="N" +- fi +- ;; +-*) # Other systems should cope better +- if [ $ret -eq 0 ] ; then +- res="Y" +- else +- res="N" +- fi +- ;; +-esac +-TEST [ "x$res" = "xY" ] +- +-# This passes only on build.gluster.org, not reproducible on other machines?! +-#su -m ${NEW_USER} -c "cat $M0/README 2>&1 > /dev/null" +-#TEST [ $? -ne 0 ] +- +-# enable server.manage-gids and things should work +-TEST $CLI volume set $V0 server.manage-gids on +- +-su -m ${NEW_USER} -c "cat $N0/README 2>&1 > /dev/null" +-TEST [ $? -eq 0 ] +-su -m ${NEW_USER} -c "cat $M0/README 2>&1 > /dev/null" +-TEST [ $? -eq 0 ] +- +-# cleanup +-userdel --force ${NEW_USER} +-for GID in $(seq -f '%6.0f' ${NEW_GID} ${LAST_GID}) +-do +- groupdel ${NEW_USER}-${GID} +-done +- +-rm -f $N0/README +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +- +-TEST $CLI volume stop $V0 +-TEST $CLI volume delete $V0 +- +-cleanup +diff --git a/tests/bugs/nfs/bug-1116503.t b/tests/bugs/nfs/bug-1116503.t +deleted file mode 100644 +index dd3998d..0000000 +--- a/tests/bugs/nfs/bug-1116503.t ++++ /dev/null +@@ -1,47 +0,0 @@ +-#!/bin/bash +-# +-# Verify that mounting NFS over UDP (MOUNT service only) works. +-# +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../volume.rc +-. $(dirname $0)/../../nfs.rc +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-cleanup; +-TEST glusterd +-TEST pidof glusterd +- +-TEST $CLI volume create $V0 $H0:$B0/$V0 +-TEST $CLI volume set $V0 nfs.disable false +-TEST $CLI volume set $V0 nfs.mount-udp on +- +-TEST $CLI volume start $V0 +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +- +-TEST mount_nfs $H0:/$V0 $N0 nolock,mountproto=udp,proto=tcp; +-TEST mkdir -p $N0/foo/bar +-TEST ls $N0/foo +-TEST ls $N0/foo/bar +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 +- +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +-TEST mount_nfs $H0:/$V0/foo $N0 nolock,mountproto=udp,proto=tcp; +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 +- +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +-TEST mount_nfs $H0:/$V0/foo/bar $N0 nolock,mountproto=udp,proto=tcp; +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 +- +-TEST $CLI volume set $V0 nfs.addr-namelookup on +-TEST $CLI volume set $V0 nfs.rpc-auth-allow $H0 +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +-TEST mount_nfs $H0:/$V0/foo/bar $N0 nolock,mountproto=udp,proto=tcp; +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 +- +-TEST $CLI volume set $V0 nfs.rpc-auth-reject $H0 +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +-TEST ! mount_nfs $H0:/$V0/foo/bar $N0 nolock,mountproto=udp,proto=tcp; +- +-cleanup; +diff --git a/tests/bugs/nfs/bug-1143880-fix-gNFSd-auth-crash.t b/tests/bugs/nfs/bug-1143880-fix-gNFSd-auth-crash.t +deleted file mode 100644 +index c360db4..0000000 +--- a/tests/bugs/nfs/bug-1143880-fix-gNFSd-auth-crash.t ++++ /dev/null +@@ -1,24 +0,0 @@ +-#!/bin/bash +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../nfs.rc +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-cleanup; +- +-TEST glusterd +-TEST pidof glusterd +-TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2} +-TEST $CLI volume set $V0 nfs.disable false +-TEST $CLI volume set $V0 performance.open-behind off +-TEST $CLI volume start $V0 +- +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available +- +-TEST mount_nfs $H0:/$V0 $N0 nolock +-TEST mkdir -p $N0/foo +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0 +-TEST mount_nfs $H0:/$V0/foo $N0 nolock +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0 +-cleanup +diff --git a/tests/bugs/nfs/bug-1157223-symlink-mounting.t b/tests/bugs/nfs/bug-1157223-symlink-mounting.t +deleted file mode 100644 +index dea609e..0000000 +--- a/tests/bugs/nfs/bug-1157223-symlink-mounting.t ++++ /dev/null +@@ -1,126 +0,0 @@ +-#!/bin/bash +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../nfs.rc +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-cleanup; +- +-## Start and create a volume +-TEST glusterd +-TEST pidof glusterd +- +-TEST $CLI volume info; +-TEST $CLI volume create $V0 $H0:$B0/$V0 +-TEST $CLI volume set $V0 nfs.disable false +-TEST $CLI volume start $V0; +- +-## Wait for volume to register with rpc.mountd +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +- +-## Mount NFS +-TEST mount_nfs $H0:/$V0 $N0 nolock; +- +-mkdir $N0/dir1; +-mkdir $N0/dir2; +-pushd $N0/ ; +- +-##link created using relative path +-ln -s dir1 symlink1; +- +-##relative path contains ".." +-ln -s ../dir1 dir2/symlink2; +- +-##link created using absolute path +-ln -s $N0/dir1 symlink3; +- +-##link pointing to another symlinks +-ln -s symlink1 symlink4 +-ln -s symlink3 symlink5 +- +-##dead links +-ln -s does/not/exist symlink6 +- +-##link which contains ".." points out of glusterfs +-ln -s ../../ symlink7 +- +-##links pointing to unauthorized area +-ln -s .glusterfs symlink8 +- +-popd ; +- +-##Umount the volume +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0 +- +-## Mount and umount NFS via directory +-TEST mount_nfs $H0:/$V0/dir1 $N0 nolock; +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0 +- +-## Mount and umount NFS via symlink1 +-TEST mount_nfs $H0:/$V0/symlink1 $N0 nolock; +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0 +- +-## Mount and umount NFS via symlink2 +-TEST mount_nfs $H0:/$V0/dir2/symlink2 $N0 nolock; +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0 +- +-## Mount NFS via symlink3 should fail +-TEST ! mount_nfs $H0:/$V0/symlink3 $N0 nolock; +- +-## Mount and umount NFS via symlink4 +-TEST mount_nfs $H0:/$V0/symlink4 $N0 nolock; +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0 +- +-## Mount NFS via symlink5 should fail +-TEST ! mount_nfs $H0:/$V0/symlink5 $N0 nolock; +- +-## Mount NFS via symlink6 should fail +-TEST ! mount_nfs $H0:/$V0/symlink6 $N0 nolock; +- +-## Mount NFS via symlink7 should fail +-TEST ! mount_nfs $H0:/$V0/symlink7 $N0 nolock; +- +-## Mount NFS via symlink8 should fail +-TEST ! mount_nfs $H0:/$V0/symlink8 $N0 nolock; +- +-##Similar check for udp mount +-$CLI volume stop $V0 +-TEST $CLI volume set $V0 nfs.mount-udp on +-$CLI volume start $V0 +- +-## Wait for volume to register with rpc.mountd +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +- +-## Mount and umount NFS via directory +-TEST mount_nfs $H0:/$V0/dir1 $N0 nolock,mountproto=udp,proto=tcp; +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0 +- +-## Mount and umount NFS via symlink1 +-TEST mount_nfs $H0:/$V0/symlink1 $N0 nolock,mountproto=udp,proto=tcp; +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0 +- +-## Mount and umount NFS via symlink2 +-TEST mount_nfs $H0:/$V0/dir2/symlink2 $N0 nolock,mountproto=udp,proto=tcp; +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0 +- +-## Mount NFS via symlink3 should fail +-TEST ! mount_nfs $H0:/$V0/symlink3 $N0 nolock,mountproto=udp,proto=tcp; +- +-## Mount and umount NFS via symlink4 +-TEST mount_nfs $H0:/$V0/symlink4 $N0 nolock,mountproto=udp,proto=tcp; +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0 +- +-## Mount NFS via symlink5 should fail +-TEST ! mount_nfs $H0:/$V0/symlink5 $N0 nolock,mountproto=udp,proto=tcp; +- +-## Mount NFS via symlink6 should fail +-TEST ! mount_nfs $H0:/$V0/symlink6 $N0 nolock,mountproto=udp,proto=tcp; +- +-##symlink7 is not check here, because in udp mount ../../ resolves into root '/' +- +-## Mount NFS via symlink8 should fail +-TEST ! mount_nfs $H0:/$V0/symlink8 $N0 nolock,mountproto=udp,proto=tcp; +- +-rm -rf $H0:$B0/ +-cleanup; +diff --git a/tests/bugs/nfs/bug-1161092-nfs-acls.t b/tests/bugs/nfs/bug-1161092-nfs-acls.t +deleted file mode 100644 +index 45a22e7..0000000 +--- a/tests/bugs/nfs/bug-1161092-nfs-acls.t ++++ /dev/null +@@ -1,39 +0,0 @@ +-#!/bin/bash +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../nfs.rc +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-cleanup; +- +-TEST glusterd +-TEST pidof glusterd +-TEST $CLI volume info +- +-TEST $CLI volume create $V0 $H0:$B0/brick1; +-EXPECT 'Created' volinfo_field $V0 'Status'; +-TEST $CLI volume set $V0 nfs.disable false +- +-TEST $CLI volume start $V0; +-EXPECT 'Started' volinfo_field $V0 'Status'; +- +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available +-TEST mount_nfs $H0:/$V0 $N0 +- +-TEST touch $N0/file1 +-TEST chmod 700 $N0/file1 +-TEST getfacl $N0/file1 +- +-TEST $CLI volume set $V0 root-squash on +-TEST getfacl $N0/file1 +- +-TEST umount_nfs $H0:/$V0 $N0 +-TEST mount_nfs $H0:/$V0 $N0 +-TEST getfacl $N0/file1 +- +-## Before killing daemon to avoid deadlocks +-umount_nfs $N0 +- +-cleanup; +- +diff --git a/tests/bugs/nfs/bug-1166862.t b/tests/bugs/nfs/bug-1166862.t +deleted file mode 100755 +index c4f51a2..0000000 +--- a/tests/bugs/nfs/bug-1166862.t ++++ /dev/null +@@ -1,69 +0,0 @@ +-#!/bin/bash +-# +-# When nfs.mount-rmtab is disabled, it should not get updated. +-# +-# Based on: bug-904065.t +-# +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-# count the lines of a file, return 0 if the file does not exist +-function count_lines() +-{ +- if [ -n "$1" ] +- then +- $@ 2>/dev/null | wc -l +- else +- echo 0 +- fi +-} +- +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../nfs.rc +-. $(dirname $0)/../../volume.rc +- +-cleanup +- +-TEST glusterd +-TEST pidof glusterd +- +-TEST $CLI volume create $V0 $H0:$B0/brick1 +-EXPECT 'Created' volinfo_field $V0 'Status' +-TEST $CLI volume set $V0 nfs.disable false +- +-TEST $CLI volume start $V0; +-EXPECT 'Started' volinfo_field $V0 'Status' +- +-# glusterfs/nfs needs some time to start up in the background +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available +- +-# disable the rmtab by settting it to the magic "/-" value +-TEST $CLI volume set $V0 nfs.mount-rmtab /- +- +-# before mounting the rmtab should be empty +-EXPECT '0' count_lines cat $GLUSTERD_WORKDIR/nfs/rmtab +- +-TEST mount_nfs $H0:/$V0 $N0 nolock +-EXPECT '0' count_lines cat $GLUSTERD_WORKDIR/nfs/rmtab +- +-# showmount should list one client +-EXPECT '1' count_lines showmount --no-headers $H0 +- +-# unmount +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 +- +-# after resetting the option, the rmtab should get updated again +-TEST $CLI volume reset $V0 nfs.mount-rmtab +- +-# before mounting the rmtab should be empty +-EXPECT '0' count_lines cat $GLUSTERD_WORKDIR/nfs/rmtab +- +-TEST mount_nfs $H0:/$V0 $N0 nolock +-EXPECT '2' count_lines cat $GLUSTERD_WORKDIR/nfs/rmtab +- +-# removing a mount +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 +-EXPECT '0' count_lines cat $GLUSTERD_WORKDIR/nfs/rmtab +- +-cleanup +diff --git a/tests/bugs/nfs/bug-1210338.c b/tests/bugs/nfs/bug-1210338.c +deleted file mode 100644 +index d409924..0000000 +--- a/tests/bugs/nfs/bug-1210338.c ++++ /dev/null +@@ -1,31 +0,0 @@ +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +-int +-main(int argc, char *argv[]) +-{ +- int ret = -1; +- int fd = -1; +- +- fd = open(argv[1], O_CREAT | O_EXCL, 0644); +- +- if (fd == -1) { +- fprintf(stderr, "creation of the file %s failed (%s)\n", argv[1], +- strerror(errno)); +- goto out; +- } +- +- ret = 0; +- +-out: +- if (fd > 0) +- close(fd); +- +- return ret; +-} +diff --git a/tests/bugs/nfs/bug-1210338.t b/tests/bugs/nfs/bug-1210338.t +deleted file mode 100644 +index b5c9245..0000000 +--- a/tests/bugs/nfs/bug-1210338.t ++++ /dev/null +@@ -1,30 +0,0 @@ +-#!/bin/bash +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../volume.rc +-. $(dirname $0)/../../nfs.rc +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-cleanup; +- +-NFS_SOURCE=$(dirname $0)/bug-1210338.c +-NFS_EXEC=$(dirname $0)/excl_create +- +-TEST glusterd +-TEST pidof glusterd +- +-TEST $CLI volume create $V0 $H0:$B0/$V0 +-TEST $CLI volume set $V0 nfs.disable false +-TEST $CLI volume start $V0 +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +-TEST mount_nfs $H0:/$V0 $N0 nolock +- +-build_tester $NFS_SOURCE -o $NFS_EXEC +-TEST [ -e $NFS_EXEC ] +- +-TEST $NFS_EXEC $N0/my_file +- +-rm -f $NFS_EXEC; +- +-cleanup +diff --git a/tests/bugs/nfs/bug-1302948.t b/tests/bugs/nfs/bug-1302948.t +deleted file mode 100755 +index a2fb0e6..0000000 +--- a/tests/bugs/nfs/bug-1302948.t ++++ /dev/null +@@ -1,13 +0,0 @@ +-#!/bin/bash +-# TEST the nfs.rdirplus option +-. $(dirname $0)/../../include.rc +- +-cleanup +-TEST glusterd +-TEST pidof glusterd +- +-TEST $CLI volume create $V0 $H0:$B0/$V0 +-TEST $CLI volume start $V0 +-TEST $CLI volume set $V0 nfs.rdirplus off +-TEST $CLI volume set $V0 nfs.rdirplus on +-cleanup +diff --git a/tests/bugs/nfs/bug-847622.t b/tests/bugs/nfs/bug-847622.t +deleted file mode 100755 +index 5ccee72..0000000 +--- a/tests/bugs/nfs/bug-847622.t ++++ /dev/null +@@ -1,39 +0,0 @@ +-#!/bin/bash +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../nfs.rc +-. $(dirname $0)/../../volume.rc +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-case $OSTYPE in +-NetBSD) +- echo "Skip test on ACL which are not available on NetBSD" >&2 +- SKIP_TESTS +- exit 0 +- ;; +-*) +- ;; +-esac +- +-cleanup; +- +-TEST glusterd +-TEST pidof glusterd +-TEST $CLI volume create $V0 $H0:$B0/brick0 +-TEST $CLI volume set $V0 nfs.disable false +-TEST $CLI volume start $V0 +- +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +-TEST mount_nfs $H0:/$V0 $N0 nolock +-cd $N0 +- +-# simple getfacl setfacl commands +-TEST touch testfile +-TEST setfacl -m u:14:r testfile +-TEST getfacl testfile +- +-cd +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 +-cleanup +- +diff --git a/tests/bugs/nfs/bug-877885.t b/tests/bugs/nfs/bug-877885.t +deleted file mode 100755 +index dca315a..0000000 +--- a/tests/bugs/nfs/bug-877885.t ++++ /dev/null +@@ -1,39 +0,0 @@ +-#!/bin/bash +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../nfs.rc +-. $(dirname $0)/../../volume.rc +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-cleanup; +- +-TEST glusterd +-TEST pidof glusterd +-TEST $CLI volume create $V0 replica 2 $H0:$B0/brick0 $H0:$B0/brick1 +-TEST $CLI volume set $V0 nfs.disable false +-TEST $CLI volume start $V0 +- +-## Mount FUSE with caching disabled +-TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 \ +-$M0; +- +-TEST touch $M0/file +-TEST mkdir $M0/dir +- +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +-TEST mount_nfs $H0:/$V0 $N0 nolock +-cd $N0 +- +-rm -rf * & +- +-TEST mount_nfs $H0:/$V0 $N1 retry=0,nolock; +- +-cd; +- +-kill %1; +- +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N1 +- +-cleanup +diff --git a/tests/bugs/nfs/bug-904065.t b/tests/bugs/nfs/bug-904065.t +deleted file mode 100755 +index 0eba86e..0000000 +--- a/tests/bugs/nfs/bug-904065.t ++++ /dev/null +@@ -1,100 +0,0 @@ +-#!/bin/bash +-# +-# This test does not use 'showmount' from the nfs-utils package, it would +-# require setting up a portmapper (either rpcbind or portmap, depending on the +-# Linux distribution used for testing). The persistancy of the rmtab should not +-# affect the current showmount outputs, so existing regression tests should be +-# sufficient. +-# +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-# count the lines of a file, return 0 if the file does not exist +-function count_lines() +-{ +- if [ -e "$1" ] +- then +- wc -l < $1 +- else +- echo 0 +- fi +-} +- +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../nfs.rc +-. $(dirname $0)/../../volume.rc +- +-cleanup +- +-TEST glusterd +-TEST pidof glusterd +- +-TEST $CLI volume create $V0 $H0:$B0/brick1 +-EXPECT 'Created' volinfo_field $V0 'Status' +-TEST $CLI volume set $V0 nfs.disable false +- +-TEST $CLI volume start $V0; +-EXPECT 'Started' volinfo_field $V0 'Status' +- +-# glusterfs/nfs needs some time to start up in the background +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available +- +-# before mounting the rmtab should be empty +-EXPECT '0' count_lines $GLUSTERD_WORKDIR/nfs/rmtab +- +-TEST mount_nfs $H0:/$V0 $N0 nolock +-# the output would looks similar to: +-# +-# hostname-0=172.31.122.104 +-# mountpoint-0=/ufo +-# +-EXPECT '2' count_lines $GLUSTERD_WORKDIR/nfs/rmtab +- +-# duplicate mounts should not be recorded (client could have crashed) +-TEST mount_nfs $H0:/$V0 $N1 nolock +-EXPECT '2' count_lines $GLUSTERD_WORKDIR/nfs/rmtab +- +-# removing a mount should (even if there are two) should remove the entry +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N1 +-EXPECT '0' count_lines $GLUSTERD_WORKDIR/nfs/rmtab +- +-# unmounting the other mount should work flawlessly +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 +-EXPECT '0' count_lines $GLUSTERD_WORKDIR/nfs/rmtab +- +-TEST glusterfs --entry-timeout=0 --attribute-timeout=0 --volfile-server=$H0 --volfile-id=$V0 $M0 +- +-# we'll create a fake rmtab here, similar to how an other storage server would do +-# using an invalid IP address to prevent (unlikely) collisions on the test-machine +-cat << EOF > $M0/rmtab +-hostname-0=127.0.0.256 +-mountpoint-0=/ufo +-EOF +-EXPECT '2' count_lines $M0/rmtab +- +-# reconfigure merges the rmtab with the one on the volume +-TEST gluster volume set $V0 nfs.mount-rmtab $M0/rmtab +- +-# glusterfs/nfs needs some time to restart +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available +- +-# Apparently "is_nfs_export_available" might return even if the export is +-# not, in fact, available. (eyeroll) Give it a bit of extra time. +-# +-# TBD: fix the broken shell function instead of working around it here +-sleep 5 +- +-# a new mount should be added to the rmtab, not overwrite exiting ones +-TEST mount_nfs $H0:/$V0 $N0 nolock +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT '4' count_lines $M0/rmtab +- +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 +-EXPECT '2' count_lines $M0/rmtab +- +-# TODO: nfs/reconfigure() is never called and is therefor disabled. When the +-# NFS-server supports reloading and does not get restarted anymore, we should +-# add a test that includes the merging of entries in the old rmtab with the new +-# rmtab. +- +-cleanup +diff --git a/tests/bugs/nfs/bug-915280.t b/tests/bugs/nfs/bug-915280.t +deleted file mode 100755 +index bd27915..0000000 +--- a/tests/bugs/nfs/bug-915280.t ++++ /dev/null +@@ -1,54 +0,0 @@ +-#!/bin/bash +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../volume.rc +-. $(dirname $0)/../../nfs.rc +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-cleanup; +- +-TEST glusterd +-TEST pidof glusterd +- +-function volinfo_field() +-{ +- local vol=$1; +- local field=$2; +- +- $CLI volume info $vol | grep "^$field: " | sed 's/.*: //'; +-} +- +-TEST $CLI volume create $V0 $H0:$B0/brick1 $H0:$B0/brick2; +-EXPECT 'Created' volinfo_field $V0 'Status'; +-TEST $CLI volume set $V0 nfs.disable false +- +-TEST $CLI volume start $V0; +-EXPECT 'Started' volinfo_field $V0 'Status'; +- +-MOUNTDIR=$N0; +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +-TEST mount_nfs $H0:/$V0 $N0 nolock,timeo=30,retrans=1 +-TEST touch $N0/testfile +- +-TEST $CLI volume set $V0 debug.error-gen client +-TEST $CLI volume set $V0 debug.error-fops stat +-TEST $CLI volume set $V0 debug.error-failure 100 +- +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +- +-pid_file=$(read_nfs_pidfile); +- +-getfacl $N0/testfile 2>/dev/null +- +-nfs_pid=$(get_nfs_pid); +-if [ ! $nfs_pid ] +-then +- nfs_pid=0; +-fi +- +-TEST [ $nfs_pid -eq $pid_file ] +- +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $MOUNTDIR +- +-cleanup; +diff --git a/tests/bugs/nfs/bug-970070.t b/tests/bugs/nfs/bug-970070.t +deleted file mode 100755 +index 61be484..0000000 +--- a/tests/bugs/nfs/bug-970070.t ++++ /dev/null +@@ -1,13 +0,0 @@ +-#!/bin/bash +-# TEST the nfs.acl option +-. $(dirname $0)/../../include.rc +- +-cleanup +-TEST glusterd +-TEST pidof glusterd +- +-TEST $CLI volume create $V0 $H0:$B0/$V0 +-TEST $CLI volume start $V0 +-TEST $CLI volume set $V0 nfs.acl off +-TEST $CLI volume set $V0 nfs.acl on +-cleanup +diff --git a/tests/bugs/nfs/bug-974972.t b/tests/bugs/nfs/bug-974972.t +deleted file mode 100755 +index 975c46f..0000000 +--- a/tests/bugs/nfs/bug-974972.t ++++ /dev/null +@@ -1,41 +0,0 @@ +-#!/bin/bash +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../volume.rc +-. $(dirname $0)/../../nfs.rc +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-#This script checks that nfs mount does not fail lookup on files with split-brain +-cleanup; +- +-TEST glusterd +-TEST pidof glusterd +-TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} +-TEST $CLI volume set $V0 self-heal-daemon off +-TEST $CLI volume set $V0 cluster.eager-lock off +-TEST $CLI volume set $V0 nfs.disable false +-TEST $CLI volume start $V0 +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +-TEST mount_nfs $H0:/$V0 $N0 +-TEST touch $N0/1 +-TEST kill_brick ${V0} ${H0} ${B0}/${V0}1 +-echo abc > $N0/1 +-TEST $CLI volume start $V0 force +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" nfs_up_status +-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_nfs $V0 0 +-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_nfs $V0 1 +- +-TEST kill_brick ${V0} ${H0} ${B0}/${V0}0 +-echo def > $N0/1 +-TEST $CLI volume start $V0 force +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" nfs_up_status +-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_nfs $V0 0 +-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_nfs $V0 1 +- +-#Lookup should not fail +-TEST ls $N0/1 +-TEST ! cat $N0/1 +- +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 +-cleanup +diff --git a/tests/bugs/nfs/showmount-many-clients.t b/tests/bugs/nfs/showmount-many-clients.t +deleted file mode 100644 +index f1b6859..0000000 +--- a/tests/bugs/nfs/showmount-many-clients.t ++++ /dev/null +@@ -1,41 +0,0 @@ +-#!/bin/bash +-# +-# The nfs.rpc-auth-allow volume option is used to generate the list of clients +-# that are displayed as able to mount the export. The "group" in the export +-# should be a list of all clients, identified by "name". In previous versions, +-# the "name" was the copied string from nfs.rpc-auth-allow. This is not +-# correct, as the volume option should be parsed and split into different +-# groups. +-# +-# When the single string is passed, this testcase fails when the +-# nfs.rpc-auth-allow volume option is longer than 256 characters. By splitting +-# the groups into their own structures, this testcase passes. +-# +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../nfs.rc +-. $(dirname $0)/../../volume.rc +- +-cleanup +- +-TEST glusterd +-TEST pidof glusterd +- +-TEST $CLI volume create $V0 $H0:$B0/brick1 +-EXPECT 'Created' volinfo_field $V0 'Status' +-TEST $CLI volume set $V0 nfs.disable false +- +-CLIENTS=$(echo 127.0.0.{1..128} | tr ' ' ,) +-TEST $CLI volume set $V0 nfs.rpc-auth-allow ${CLIENTS} +-TEST $CLI volume set $V0 nfs.rpc-auth-reject all +- +-TEST $CLI volume start $V0; +-EXPECT 'Started' volinfo_field $V0 'Status' +- +-# glusterfs/nfs needs some time to start up in the background +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available +- +-# showmount should not timeout (no reply is sent on error) +-TEST showmount -e $H0 +- +-cleanup +diff --git a/tests/bugs/nfs/socket-as-fifo.py b/tests/bugs/nfs/socket-as-fifo.py +deleted file mode 100755 +index eb507e1..0000000 +--- a/tests/bugs/nfs/socket-as-fifo.py ++++ /dev/null +@@ -1,33 +0,0 @@ +-# +-# Create a unix domain socket and test if it is a socket (and not a fifo/pipe). +-# +-# Author: Niels de Vos +-# +- +-from __future__ import print_function +-import os +-import stat +-import sys +-import socket +- +-ret = 1 +- +-if len(sys.argv) != 2: +- print('Usage: %s ' % (sys.argv[0])) +- sys.exit(ret) +- +-path = sys.argv[1] +- +-sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) +-sock.bind(path) +- +-stbuf = os.stat(path) +-mode = stbuf.st_mode +- +-if stat.S_ISSOCK(mode): +- ret = 0 +- +-sock.close() +-os.unlink(path) +- +-sys.exit(ret) +diff --git a/tests/bugs/nfs/socket-as-fifo.t b/tests/bugs/nfs/socket-as-fifo.t +deleted file mode 100644 +index d9b9e95..0000000 +--- a/tests/bugs/nfs/socket-as-fifo.t ++++ /dev/null +@@ -1,25 +0,0 @@ +-#!/bin/bash +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../volume.rc +-. $(dirname $0)/../../nfs.rc +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-cleanup; +- +-TEST glusterd +-TEST pidof glusterd +- +-TEST $CLI volume create $V0 $H0:$B0/$V0 +-TEST $CLI volume set $V0 nfs.disable false +-TEST $CLI volume start $V0 +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +-TEST mount_nfs $H0:/$V0 $N0 nolock +- +-# this is the actual test +-TEST $PYTHON $(dirname $0)/socket-as-fifo.py $N0/not-a-fifo.socket +- +-TEST umount_nfs $N0 +- +-cleanup +diff --git a/tests/bugs/nfs/subdir-trailing-slash.t b/tests/bugs/nfs/subdir-trailing-slash.t +deleted file mode 100644 +index 6a11487..0000000 +--- a/tests/bugs/nfs/subdir-trailing-slash.t ++++ /dev/null +@@ -1,32 +0,0 @@ +-#!/bin/bash +-# +-# Verify that mounting a subdir over NFS works, even with a trailing / +-# +-# For example: +-# mount -t nfs server.example.com:/volume/subdir/ +-# +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../volume.rc +-. $(dirname $0)/../../nfs.rc +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-cleanup; +-TEST glusterd +-TEST pidof glusterd +- +-TEST $CLI volume create $V0 $H0:$B0/$V0 +-TEST $CLI volume set $V0 nfs.disable false +- +-TEST $CLI volume start $V0 +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available +- +-TEST mount_nfs $H0:/$V0 $N0 nolock +-TEST mkdir -p $N0/subdir +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 +- +-TEST mount_nfs $H0:/$V0/subdir/ $N0 nolock +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 +- +-cleanup +diff --git a/tests/bugs/nfs/zero-atime.t b/tests/bugs/nfs/zero-atime.t +deleted file mode 100755 +index 2a94009..0000000 +--- a/tests/bugs/nfs/zero-atime.t ++++ /dev/null +@@ -1,33 +0,0 @@ +-#!/bin/bash +-# +-# posix_do_utimes() sets atime and mtime to the values in the passed IATT. If +-# not set, these values are 0 and cause a atime/mtime set to the Epoch. +-# +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../volume.rc +-. $(dirname $0)/../../nfs.rc +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-cleanup +- +-TEST glusterd +-TEST pidof glusterd +- +-TEST $CLI volume create $V0 $H0:$B0/$V0 +-TEST $CLI volume set $V0 nfs.disable false +-TEST $CLI volume start $V0 +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +-TEST mount_nfs $H0:/$V0 $N0 nolock +- +-# create a file for testing +-TEST dd if=/dev/urandom of=$M0/small count=1 bs=1024k +- +-# timezone in UTC results in atime=0 if not set correctly +-TEST TZ=UTC dd if=/dev/urandom of=$M0/small bs=64k count=1 conv=nocreat +-TEST [ "$(stat --format=%X $M0/small)" != "0" ] +- +-TEST rm $M0/small +- +-cleanup +diff --git a/tests/bugs/rpc/bug-954057.t b/tests/bugs/rpc/bug-954057.t +index 65af274..9ad0ab2 100755 +--- a/tests/bugs/rpc/bug-954057.t ++++ b/tests/bugs/rpc/bug-954057.t +@@ -25,7 +25,15 @@ TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 + + TEST mkdir $M0/dir + TEST mkdir $M0/nobody +-TEST chown nfsnobody:nfsnobody $M0/nobody ++grep nfsnobody /etc/passwd > /dev/nul ++if [ $? -eq 1 ]; then ++usr=nobody ++grp=nobody ++else ++usr=nfsnobody ++grp=nfsnobody ++fi ++TEST chown $usr:$grp $M0/nobody + TEST `echo "file" >> $M0/file` + TEST cp $M0/file $M0/new + TEST chmod 700 $M0/new +diff --git a/tests/bugs/shard/bug-1272986.t b/tests/bugs/shard/bug-1272986.t +index 7628870..66e896a 100644 +--- a/tests/bugs/shard/bug-1272986.t ++++ b/tests/bugs/shard/bug-1272986.t +@@ -16,16 +16,16 @@ TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 + TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M1 + + # Write some data into a file, such that its size crosses the shard block size. +-TEST dd if=/dev/zero of=$M1/file bs=1M count=5 conv=notrunc ++TEST dd if=/dev/urandom of=$M1/file bs=1M count=5 conv=notrunc oflag=direct + + md5sum1_reader=$(md5sum $M0/file | awk '{print $1}') + + EXPECT "$md5sum1_reader" echo `md5sum $M1/file | awk '{print $1}'` + + # Append some more data into the file. +-TEST `echo "abcdefg" >> $M1/file` ++TEST dd if=/dev/urandom of=$M1/file bs=256k count=1 conv=notrunc oflag=direct + +-md5sum2_reader=$(md5sum $M0/file | awk '{print $1}') ++md5sum2_reader=$(dd if=$M0/file iflag=direct bs=256k| md5sum | awk '{print $1}') + + # Test to see if the reader refreshes its cache correctly as part of the reads + # triggered through md5sum. If it does, then the md5sum on the reader and writer +diff --git a/tests/bugs/transport/bug-873367.t b/tests/bugs/transport/bug-873367.t +index d4c0702..8070bc1 100755 +--- a/tests/bugs/transport/bug-873367.t ++++ b/tests/bugs/transport/bug-873367.t +@@ -13,7 +13,7 @@ rm -f $SSL_BASE/glusterfs.* + mkdir -p $B0/1 + mkdir -p $M0 + +-TEST openssl genrsa -out $SSL_KEY 1024 ++TEST openssl genrsa -out $SSL_KEY 2048 + TEST openssl req -new -x509 -key $SSL_KEY -subj /CN=Anyone -out $SSL_CERT + ln $SSL_CERT $SSL_CA + +diff --git a/tests/features/ssl-authz.t b/tests/features/ssl-authz.t +index 3cb45b5..cae010c 100755 +--- a/tests/features/ssl-authz.t ++++ b/tests/features/ssl-authz.t +@@ -41,7 +41,7 @@ function valid_ciphers { + -e '/:$/s///' + } + +-TEST openssl genrsa -out $SSL_KEY 1024 ++TEST openssl genrsa -out $SSL_KEY 2048 + TEST openssl req -new -x509 -key $SSL_KEY -subj /CN=Anyone -out $SSL_CERT + ln $SSL_CERT $SSL_CA + +diff --git a/tests/features/ssl-ciphers.t b/tests/features/ssl-ciphers.t +index 7e1e199..e4bcdf5 100644 +--- a/tests/features/ssl-ciphers.t ++++ b/tests/features/ssl-ciphers.t +@@ -33,18 +33,26 @@ wait_mount() { + openssl_connect() { + ssl_opt="-verify 3 -verify_return_error -CAfile $SSL_CA" + ssl_opt="$ssl_opt -crl_check_all -CApath $TMPDIR" +- #echo openssl s_client $ssl_opt $@ > /dev/tty +- #read -p "Continue? " nothing +- CIPHER=`echo "" | +- openssl s_client $ssl_opt $@ 2>/dev/null | +- awk '/^ Cipher/{print $3}'` +- if [ "x${CIPHER}" = "x" -o "x${CIPHER}" = "x0000" ] ; then ++ cmd="echo "" | openssl s_client $ssl_opt $@ 2>/dev/null" ++ CIPHER=$(eval $cmd | awk -F "Cipher is" '{print $2}' | tr -d '[:space:]' | awk -F " " '{print $1}') ++ if [ "x${CIPHER}" = "x" -o "x${CIPHER}" = "x0000" -o "x${CIPHER}" = "x(NONE)" ] ; then + echo "N" + else + echo "Y" + fi + } + ++#Validate the cipher to pass EXPECT test case before call openssl_connect ++check_cipher() { ++ cmd="echo "" | openssl s_client $@ 2> /dev/null" ++ cipher=$(eval $cmd |awk -F "Cipher is" '{print $2}' | tr -d '[:space:]' | awk -F " " '{print $1}') ++ if [ "x${cipher}" = "x" -o "x${cipher}" = "x0000" -o "x${cipher}" = "x(NONE)" ] ; then ++ echo "N" ++ else ++ echo "Y" ++ fi ++} ++ + cleanup; + mkdir -p $B0 + mkdir -p $M0 +@@ -65,7 +73,7 @@ TEST glusterd + TEST pidof glusterd + TEST $CLI volume info; + +-TEST openssl genrsa -out $SSL_KEY 1024 2>/dev/null ++TEST openssl genrsa -out $SSL_KEY 2048 2>/dev/null + TEST openssl req -config $SSL_CFG -new -key $SSL_KEY -x509 \ + -subj /CN=CA -out $SSL_CA + TEST openssl req -config $SSL_CFG -new -key $SSL_KEY \ +@@ -106,28 +114,36 @@ EXPECT "N" openssl_connect -ssl3 -connect $H0:$BRICK_PORT + EXPECT "N" openssl_connect -tls1 -connect $H0:$BRICK_PORT + + # Test a HIGH CBC cipher +-EXPECT "Y" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT ++cph=`check_cipher -cipher AES256-SHA -connect $H0:$BRICK_PORT` ++EXPECT "$cph" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT + + # Test EECDH +-EXPECT "Y" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT ++cph=`check_cipher -cipher EECDH -connect $H0:$BRICK_PORT` ++EXPECT "$cph" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT + + # test MD5 fails +-EXPECT "N" openssl_connect -cipher DES-CBC3-MD5 -connect $H0:$BRICK_PORT ++cph=`check_cipher -cipher DES-CBC3-MD5 -connect $H0:$BRICK_PORT` ++EXPECT "$cph" openssl_connect -cipher DES-CBC3-MD5 -connect $H0:$BRICK_PORT + + # test RC4 fails +-EXPECT "N" openssl_connect -cipher RC4-SHA -connect $H0:$BRICK_PORT ++cph=`check_cipher -cipher RC4-SHA -connect $H0:$BRICK_PORT` ++EXPECT "$cph" openssl_connect -cipher RC4-SHA -connect $H0:$BRICK_PORT + + # test eNULL fails +-EXPECT "N" openssl_connect -cipher NULL-SHA256 -connect $H0:$BRICK_PORT ++cph=`check_cipher -cipher NULL-SHA256 -connect $H0:$BRICK_PORT` ++EXPECT "$cph" openssl_connect -cipher NULL-SHA256 -connect $H0:$BRICK_PORT + + # test SHA2 +-EXPECT "Y" openssl_connect -cipher AES256-SHA256 -connect $H0:$BRICK_PORT ++cph=`check_cipher -cipher AES256-SHA256 -connect $H0:$BRICK_PORT` ++EXPECT "$cph" openssl_connect -cipher AES256-SHA256 -connect $H0:$BRICK_PORT + + # test GCM +-EXPECT "Y" openssl_connect -cipher AES256-GCM-SHA384 -connect $H0:$BRICK_PORT ++cph=`check_cipher -cipher AES256-GCM-SHA384 -connect $H0:$BRICK_PORT` ++EXPECT "$cph" openssl_connect -cipher AES256-GCM-SHA384 -connect $H0:$BRICK_PORT + + # Test DH fails without DH params +-EXPECT "N" openssl_connect -cipher EDH -connect $H0:$BRICK_PORT ++cph=`check_cipher -cipher EDH -connect $H0:$BRICK_PORT` ++EXPECT "$cph" openssl_connect -cipher EDH -connect $H0:$BRICK_PORT + + # Test DH with DH params + TEST $CLI volume set $V0 ssl.dh-param `pwd`/`dirname $0`/dh1024.pem +@@ -145,8 +161,10 @@ TEST $CLI volume stop $V0 + TEST $CLI volume start $V0 + EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count + BRICK_PORT=`brick_port $V0` +-EXPECT "Y" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT +-EXPECT "N" openssl_connect -cipher AES128-SHA -connect $H0:$BRICK_PORT ++cph=`check_cipher -cipher AES256-SHA -connect $H0:$BRICK_PORT` ++EXPECT "$cph" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT ++cph=`check_cipher -cipher AES128-SHA -connect $H0:$BRICK_PORT` ++EXPECT "$cph" openssl_connect -cipher AES128-SHA -connect $H0:$BRICK_PORT + + # Test the ec-curve option + TEST $CLI volume set $V0 ssl.cipher-list EECDH:EDH:!TLSv1 +@@ -155,8 +173,10 @@ TEST $CLI volume stop $V0 + TEST $CLI volume start $V0 + EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count + BRICK_PORT=`brick_port $V0` +-EXPECT "N" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT +-EXPECT "Y" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT ++cph=`check_cipher -cipher AES256-SHA -connect $H0:$BRICK_PORT` ++EXPECT "$cph" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT ++cph=`check_cipher -cipher EECDH -connect $H0:$BRICK_PORT` ++EXPECT "$cph" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT + + TEST $CLI volume set $V0 ssl.ec-curve invalid + EXPECT invalid volume_option $V0 ssl.ec-curve +@@ -164,7 +184,8 @@ TEST $CLI volume stop $V0 + TEST $CLI volume start $V0 + EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count + BRICK_PORT=`brick_port $V0` +-EXPECT "N" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT ++cph=`check_cipher -cipher EECDH -connect $H0:$BRICK_PORT` ++EXPECT "$cph" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT + + TEST $CLI volume set $V0 ssl.ec-curve secp521r1 + EXPECT secp521r1 volume_option $V0 ssl.ec-curve +diff --git a/tests/ssl.rc b/tests/ssl.rc +index 127f83f..b1ccc4c 100644 +--- a/tests/ssl.rc ++++ b/tests/ssl.rc +@@ -20,7 +20,7 @@ SSL_CA=$SSL_BASE/glusterfs.ca + + # Create self-signed certificates + function create_self_signed_certs (){ +- openssl genrsa -out $SSL_KEY 1024 ++ openssl genrsa -out $SSL_KEY 2048 + openssl req -new -x509 -key $SSL_KEY -subj /CN=Anyone -out $SSL_CERT + ln $SSL_CERT $SSL_CA + return $? +diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c +index b248767..b224abd 100644 +--- a/xlators/features/shard/src/shard.c ++++ b/xlators/features/shard/src/shard.c +@@ -10,6883 +10,6417 @@ + + #include + +-#include "shard.h" + #include "shard-mem-types.h" ++#include "shard.h" + #include + #include + #include + +-static gf_boolean_t +-__is_shard_dir(uuid_t gfid) +-{ +- shard_priv_t *priv = THIS->private; ++static gf_boolean_t __is_shard_dir(uuid_t gfid) { ++ shard_priv_t *priv = THIS->private; + +- if (gf_uuid_compare(gfid, priv->dot_shard_gfid) == 0) +- return _gf_true; ++ if (gf_uuid_compare(gfid, priv->dot_shard_gfid) == 0) ++ return _gf_true; + +- return _gf_false; ++ return _gf_false; + } + +-static gf_boolean_t +-__is_gsyncd_on_shard_dir(call_frame_t *frame, loc_t *loc) +-{ +- if (frame->root->pid == GF_CLIENT_PID_GSYNCD && +- (__is_shard_dir(loc->pargfid) || +- (loc->parent && __is_shard_dir(loc->parent->gfid)))) +- return _gf_true; ++static gf_boolean_t __is_gsyncd_on_shard_dir(call_frame_t *frame, loc_t *loc) { ++ if (frame->root->pid == GF_CLIENT_PID_GSYNCD && ++ (__is_shard_dir(loc->pargfid) || ++ (loc->parent && __is_shard_dir(loc->parent->gfid)))) ++ return _gf_true; + +- return _gf_false; ++ return _gf_false; + } + +-void +-shard_make_block_bname(int block_num, uuid_t gfid, char *buf, size_t len) +-{ +- char gfid_str[GF_UUID_BUF_SIZE] = { +- 0, +- }; ++void shard_make_block_bname(int block_num, uuid_t gfid, char *buf, size_t len) { ++ char gfid_str[GF_UUID_BUF_SIZE] = { ++ 0, ++ }; + +- gf_uuid_unparse(gfid, gfid_str); +- snprintf(buf, len, "%s.%d", gfid_str, block_num); ++ gf_uuid_unparse(gfid, gfid_str); ++ snprintf(buf, len, "%s.%d", gfid_str, block_num); + } + +-void +-shard_make_block_abspath(int block_num, uuid_t gfid, char *filepath, size_t len) +-{ +- char gfid_str[GF_UUID_BUF_SIZE] = { +- 0, +- }; ++void shard_make_block_abspath(int block_num, uuid_t gfid, char *filepath, ++ size_t len) { ++ char gfid_str[GF_UUID_BUF_SIZE] = { ++ 0, ++ }; + +- gf_uuid_unparse(gfid, gfid_str); +- snprintf(filepath, len, "/%s/%s.%d", GF_SHARD_DIR, gfid_str, block_num); ++ gf_uuid_unparse(gfid, gfid_str); ++ snprintf(filepath, len, "/%s/%s.%d", GF_SHARD_DIR, gfid_str, block_num); + } + +-int +-__shard_inode_ctx_get(inode_t *inode, xlator_t *this, shard_inode_ctx_t **ctx) +-{ +- int ret = -1; +- uint64_t ctx_uint = 0; +- shard_inode_ctx_t *ctx_p = NULL; ++int __shard_inode_ctx_get(inode_t *inode, xlator_t *this, ++ shard_inode_ctx_t **ctx) { ++ int ret = -1; ++ uint64_t ctx_uint = 0; ++ shard_inode_ctx_t *ctx_p = NULL; + +- ret = __inode_ctx_get(inode, this, &ctx_uint); +- if (ret == 0) { +- *ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; +- return ret; +- } ++ ret = __inode_ctx_get(inode, this, &ctx_uint); ++ if (ret == 0) { ++ *ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; ++ return ret; ++ } + +- ctx_p = GF_CALLOC(1, sizeof(*ctx_p), gf_shard_mt_inode_ctx_t); +- if (!ctx_p) +- return ret; ++ ctx_p = GF_CALLOC(1, sizeof(*ctx_p), gf_shard_mt_inode_ctx_t); ++ if (!ctx_p) ++ return ret; + +- INIT_LIST_HEAD(&ctx_p->ilist); +- INIT_LIST_HEAD(&ctx_p->to_fsync_list); ++ INIT_LIST_HEAD(&ctx_p->ilist); ++ INIT_LIST_HEAD(&ctx_p->to_fsync_list); + +- ret = __inode_ctx_set(inode, this, (uint64_t *)&ctx_p); +- if (ret < 0) { +- GF_FREE(ctx_p); +- return ret; +- } ++ ret = __inode_ctx_set(inode, this, (uint64_t *)&ctx_p); ++ if (ret < 0) { ++ GF_FREE(ctx_p); ++ return ret; ++ } + +- *ctx = ctx_p; ++ *ctx = ctx_p; + +- return ret; ++ return ret; + } + +-int +-shard_inode_ctx_get(inode_t *inode, xlator_t *this, shard_inode_ctx_t **ctx) +-{ +- int ret = 0; ++int shard_inode_ctx_get(inode_t *inode, xlator_t *this, ++ shard_inode_ctx_t **ctx) { ++ int ret = 0; + +- LOCK(&inode->lock); +- { +- ret = __shard_inode_ctx_get(inode, this, ctx); +- } +- UNLOCK(&inode->lock); ++ LOCK(&inode->lock); ++ { ret = __shard_inode_ctx_get(inode, this, ctx); } ++ UNLOCK(&inode->lock); + +- return ret; ++ return ret; + } + +-int +-__shard_inode_ctx_set(inode_t *inode, xlator_t *this, struct iatt *stbuf, +- uint64_t block_size, int32_t valid) +-{ +- int ret = -1; +- shard_inode_ctx_t *ctx = NULL; ++int __shard_inode_ctx_set(inode_t *inode, xlator_t *this, struct iatt *stbuf, ++ uint64_t block_size, int32_t valid) { ++ int ret = -1; ++ shard_inode_ctx_t *ctx = NULL; + +- ret = __shard_inode_ctx_get(inode, this, &ctx); +- if (ret) +- return ret; ++ ret = __shard_inode_ctx_get(inode, this, &ctx); ++ if (ret) ++ return ret; + +- if (valid & SHARD_MASK_BLOCK_SIZE) +- ctx->block_size = block_size; ++ if (valid & SHARD_MASK_BLOCK_SIZE) ++ ctx->block_size = block_size; + +- if (valid & SHARD_MASK_PROT) +- ctx->stat.ia_prot = stbuf->ia_prot; ++ if (valid & SHARD_MASK_PROT) ++ ctx->stat.ia_prot = stbuf->ia_prot; + +- if (valid & SHARD_MASK_NLINK) +- ctx->stat.ia_nlink = stbuf->ia_nlink; ++ if (valid & SHARD_MASK_NLINK) ++ ctx->stat.ia_nlink = stbuf->ia_nlink; + +- if (valid & SHARD_MASK_UID) +- ctx->stat.ia_uid = stbuf->ia_uid; ++ if (valid & SHARD_MASK_UID) ++ ctx->stat.ia_uid = stbuf->ia_uid; + +- if (valid & SHARD_MASK_GID) +- ctx->stat.ia_gid = stbuf->ia_gid; ++ if (valid & SHARD_MASK_GID) ++ ctx->stat.ia_gid = stbuf->ia_gid; + +- if (valid & SHARD_MASK_SIZE) +- ctx->stat.ia_size = stbuf->ia_size; ++ if (valid & SHARD_MASK_SIZE) ++ ctx->stat.ia_size = stbuf->ia_size; + +- if (valid & SHARD_MASK_BLOCKS) +- ctx->stat.ia_blocks = stbuf->ia_blocks; ++ if (valid & SHARD_MASK_BLOCKS) ++ ctx->stat.ia_blocks = stbuf->ia_blocks; + +- if (valid & SHARD_MASK_TIMES) { +- SHARD_TIME_UPDATE(ctx->stat.ia_mtime, ctx->stat.ia_mtime_nsec, +- stbuf->ia_mtime, stbuf->ia_mtime_nsec); +- SHARD_TIME_UPDATE(ctx->stat.ia_ctime, ctx->stat.ia_ctime_nsec, +- stbuf->ia_ctime, stbuf->ia_ctime_nsec); +- SHARD_TIME_UPDATE(ctx->stat.ia_atime, ctx->stat.ia_atime_nsec, +- stbuf->ia_atime, stbuf->ia_atime_nsec); +- } ++ if (valid & SHARD_MASK_TIMES) { ++ SHARD_TIME_UPDATE(ctx->stat.ia_mtime, ctx->stat.ia_mtime_nsec, ++ stbuf->ia_mtime, stbuf->ia_mtime_nsec); ++ SHARD_TIME_UPDATE(ctx->stat.ia_ctime, ctx->stat.ia_ctime_nsec, ++ stbuf->ia_ctime, stbuf->ia_ctime_nsec); ++ SHARD_TIME_UPDATE(ctx->stat.ia_atime, ctx->stat.ia_atime_nsec, ++ stbuf->ia_atime, stbuf->ia_atime_nsec); ++ } + +- if (valid & SHARD_MASK_OTHERS) { +- ctx->stat.ia_ino = stbuf->ia_ino; +- gf_uuid_copy(ctx->stat.ia_gfid, stbuf->ia_gfid); +- ctx->stat.ia_dev = stbuf->ia_dev; +- ctx->stat.ia_type = stbuf->ia_type; +- ctx->stat.ia_rdev = stbuf->ia_rdev; +- ctx->stat.ia_blksize = stbuf->ia_blksize; +- } ++ if (valid & SHARD_MASK_OTHERS) { ++ ctx->stat.ia_ino = stbuf->ia_ino; ++ gf_uuid_copy(ctx->stat.ia_gfid, stbuf->ia_gfid); ++ ctx->stat.ia_dev = stbuf->ia_dev; ++ ctx->stat.ia_type = stbuf->ia_type; ++ ctx->stat.ia_rdev = stbuf->ia_rdev; ++ ctx->stat.ia_blksize = stbuf->ia_blksize; ++ } + +- if (valid & SHARD_MASK_REFRESH_RESET) +- ctx->refresh = _gf_false; ++ if (valid & SHARD_MASK_REFRESH_RESET) ++ ctx->refresh = _gf_false; + +- return 0; ++ return 0; + } + +-int +-shard_inode_ctx_set(inode_t *inode, xlator_t *this, struct iatt *stbuf, +- uint64_t block_size, int32_t valid) +-{ +- int ret = -1; ++int shard_inode_ctx_set(inode_t *inode, xlator_t *this, struct iatt *stbuf, ++ uint64_t block_size, int32_t valid) { ++ int ret = -1; + +- LOCK(&inode->lock); +- { +- ret = __shard_inode_ctx_set(inode, this, stbuf, block_size, valid); +- } +- UNLOCK(&inode->lock); ++ LOCK(&inode->lock); ++ { ret = __shard_inode_ctx_set(inode, this, stbuf, block_size, valid); } ++ UNLOCK(&inode->lock); + +- return ret; ++ return ret; + } + +-int +-__shard_inode_ctx_set_refresh_flag(inode_t *inode, xlator_t *this) +-{ +- int ret = -1; +- shard_inode_ctx_t *ctx = NULL; ++int __shard_inode_ctx_set_refresh_flag(inode_t *inode, xlator_t *this) { ++ int ret = -1; ++ shard_inode_ctx_t *ctx = NULL; + +- ret = __shard_inode_ctx_get(inode, this, &ctx); +- if (ret) +- return ret; ++ ret = __shard_inode_ctx_get(inode, this, &ctx); ++ if (ret) ++ return ret; + +- ctx->refresh = _gf_true; ++ ctx->refresh = _gf_true; + +- return 0; ++ return 0; + } +-int +-shard_inode_ctx_set_refresh_flag(inode_t *inode, xlator_t *this) +-{ +- int ret = -1; ++int shard_inode_ctx_set_refresh_flag(inode_t *inode, xlator_t *this) { ++ int ret = -1; + +- LOCK(&inode->lock); +- { +- ret = __shard_inode_ctx_set_refresh_flag(inode, this); +- } +- UNLOCK(&inode->lock); ++ LOCK(&inode->lock); ++ { ret = __shard_inode_ctx_set_refresh_flag(inode, this); } ++ UNLOCK(&inode->lock); + +- return ret; ++ return ret; + } + +-int +-__shard_inode_ctx_mark_dir_refreshed(inode_t *inode, xlator_t *this) +-{ +- int ret = -1; +- shard_inode_ctx_t *ctx = NULL; ++int __shard_inode_ctx_mark_dir_refreshed(inode_t *inode, xlator_t *this) { ++ int ret = -1; ++ shard_inode_ctx_t *ctx = NULL; + +- ret = __shard_inode_ctx_get(inode, this, &ctx); +- if (ret) +- return ret; ++ ret = __shard_inode_ctx_get(inode, this, &ctx); ++ if (ret) ++ return ret; + +- ctx->refreshed = _gf_true; +- return 0; ++ ctx->refreshed = _gf_true; ++ return 0; + } + +-int +-shard_inode_ctx_mark_dir_refreshed(inode_t *inode, xlator_t *this) +-{ +- int ret = -1; ++int shard_inode_ctx_mark_dir_refreshed(inode_t *inode, xlator_t *this) { ++ int ret = -1; + +- LOCK(&inode->lock); +- { +- ret = __shard_inode_ctx_mark_dir_refreshed(inode, this); +- } +- UNLOCK(&inode->lock); ++ LOCK(&inode->lock); ++ { ret = __shard_inode_ctx_mark_dir_refreshed(inode, this); } ++ UNLOCK(&inode->lock); + +- return ret; ++ return ret; + } + +-int +-__shard_inode_ctx_add_to_fsync_list(inode_t *base_inode, xlator_t *this, +- inode_t *shard_inode) +-{ +- int ret = -1; +- shard_inode_ctx_t *base_ictx = NULL; +- shard_inode_ctx_t *shard_ictx = NULL; +- +- ret = __shard_inode_ctx_get(base_inode, this, &base_ictx); +- if (ret) +- return ret; ++int __shard_inode_ctx_add_to_fsync_list(inode_t *base_inode, xlator_t *this, ++ inode_t *shard_inode) { ++ int ret = -1; ++ shard_inode_ctx_t *base_ictx = NULL; ++ shard_inode_ctx_t *shard_ictx = NULL; + +- ret = __shard_inode_ctx_get(shard_inode, this, &shard_ictx); +- if (ret) +- return ret; ++ ret = __shard_inode_ctx_get(base_inode, this, &base_ictx); ++ if (ret) ++ return ret; + +- if (shard_ictx->fsync_needed) { +- shard_ictx->fsync_needed++; +- return 1; +- } ++ ret = __shard_inode_ctx_get(shard_inode, this, &shard_ictx); ++ if (ret) ++ return ret; + +- list_add_tail(&shard_ictx->to_fsync_list, &base_ictx->to_fsync_list); +- shard_ictx->inode = shard_inode; ++ if (shard_ictx->fsync_needed) { + shard_ictx->fsync_needed++; +- base_ictx->fsync_count++; +- shard_ictx->base_inode = base_inode; ++ return 1; ++ } + +- return 0; ++ list_add_tail(&shard_ictx->to_fsync_list, &base_ictx->to_fsync_list); ++ shard_ictx->inode = shard_inode; ++ shard_ictx->fsync_needed++; ++ base_ictx->fsync_count++; ++ shard_ictx->base_inode = base_inode; ++ ++ return 0; + } + +-int +-shard_inode_ctx_add_to_fsync_list(inode_t *base_inode, xlator_t *this, +- inode_t *shard_inode) +-{ +- int ret = -1; ++int shard_inode_ctx_add_to_fsync_list(inode_t *base_inode, xlator_t *this, ++ inode_t *shard_inode) { ++ int ret = -1; + +- /* This ref acts as a refkeepr on the base inode. We +- * need to keep this inode alive as it holds the head +- * of the to_fsync_list. +- */ +- inode_ref(base_inode); +- inode_ref(shard_inode); ++ /* This ref acts as a refkeepr on the base inode. We ++ * need to keep this inode alive as it holds the head ++ * of the to_fsync_list. ++ */ ++ inode_ref(base_inode); ++ inode_ref(shard_inode); + +- LOCK(&base_inode->lock); +- LOCK(&shard_inode->lock); +- { +- ret = __shard_inode_ctx_add_to_fsync_list(base_inode, this, +- shard_inode); +- } +- UNLOCK(&shard_inode->lock); +- UNLOCK(&base_inode->lock); ++ LOCK(&base_inode->lock); ++ LOCK(&shard_inode->lock); ++ { ret = __shard_inode_ctx_add_to_fsync_list(base_inode, this, shard_inode); } ++ UNLOCK(&shard_inode->lock); ++ UNLOCK(&base_inode->lock); + +- /* Unref the base inode corresponding to the ref above, if the shard is +- * found to be already part of the fsync list. +- */ +- if (ret != 0) { +- inode_unref(base_inode); +- inode_unref(shard_inode); +- } +- return ret; ++ /* Unref the base inode corresponding to the ref above, if the shard is ++ * found to be already part of the fsync list. ++ */ ++ if (ret != 0) { ++ inode_unref(base_inode); ++ inode_unref(shard_inode); ++ } ++ return ret; + } + +-gf_boolean_t +-__shard_inode_ctx_needs_lookup(inode_t *inode, xlator_t *this) +-{ +- int ret = -1; +- shard_inode_ctx_t *ctx = NULL; ++gf_boolean_t __shard_inode_ctx_needs_lookup(inode_t *inode, xlator_t *this) { ++ int ret = -1; ++ shard_inode_ctx_t *ctx = NULL; + +- ret = __shard_inode_ctx_get(inode, this, &ctx); +- /* If inode ctx get fails, better to err on the side of caution and +- * try again? Unless the failure is due to mem-allocation. +- */ +- if (ret) +- return _gf_true; ++ ret = __shard_inode_ctx_get(inode, this, &ctx); ++ /* If inode ctx get fails, better to err on the side of caution and ++ * try again? Unless the failure is due to mem-allocation. ++ */ ++ if (ret) ++ return _gf_true; + +- return !ctx->refreshed; ++ return !ctx->refreshed; + } + +-gf_boolean_t +-shard_inode_ctx_needs_lookup(inode_t *inode, xlator_t *this) +-{ +- gf_boolean_t flag = _gf_false; ++gf_boolean_t shard_inode_ctx_needs_lookup(inode_t *inode, xlator_t *this) { ++ gf_boolean_t flag = _gf_false; + +- LOCK(&inode->lock); +- { +- flag = __shard_inode_ctx_needs_lookup(inode, this); +- } +- UNLOCK(&inode->lock); ++ LOCK(&inode->lock); ++ { flag = __shard_inode_ctx_needs_lookup(inode, this); } ++ UNLOCK(&inode->lock); + +- return flag; ++ return flag; + } +-int +-__shard_inode_ctx_invalidate(inode_t *inode, xlator_t *this, struct iatt *stbuf) +-{ +- int ret = -1; +- shard_inode_ctx_t *ctx = NULL; ++int __shard_inode_ctx_invalidate(inode_t *inode, xlator_t *this, ++ struct iatt *stbuf) { ++ int ret = -1; ++ shard_inode_ctx_t *ctx = NULL; + +- ret = __shard_inode_ctx_get(inode, this, &ctx); +- if (ret) +- return ret; ++ ret = __shard_inode_ctx_get(inode, this, &ctx); ++ if (ret) ++ return ret; + +- if ((stbuf->ia_size != ctx->stat.ia_size) || +- (stbuf->ia_blocks != ctx->stat.ia_blocks)) +- ctx->refresh = _gf_true; ++ if ((stbuf->ia_size != ctx->stat.ia_size) || ++ (stbuf->ia_blocks != ctx->stat.ia_blocks)) ++ ctx->refresh = _gf_true; + +- return 0; ++ return 0; + } + +-int +-shard_inode_ctx_invalidate(inode_t *inode, xlator_t *this, struct iatt *stbuf) +-{ +- int ret = -1; ++int shard_inode_ctx_invalidate(inode_t *inode, xlator_t *this, ++ struct iatt *stbuf) { ++ int ret = -1; + +- LOCK(&inode->lock); +- { +- ret = __shard_inode_ctx_invalidate(inode, this, stbuf); +- } +- UNLOCK(&inode->lock); ++ LOCK(&inode->lock); ++ { ret = __shard_inode_ctx_invalidate(inode, this, stbuf); } ++ UNLOCK(&inode->lock); + +- return ret; ++ return ret; + } + +-int +-__shard_inode_ctx_get_block_size(inode_t *inode, xlator_t *this, +- uint64_t *block_size) +-{ +- int ret = -1; +- uint64_t ctx_uint = 0; +- shard_inode_ctx_t *ctx = NULL; ++int __shard_inode_ctx_get_block_size(inode_t *inode, xlator_t *this, ++ uint64_t *block_size) { ++ int ret = -1; ++ uint64_t ctx_uint = 0; ++ shard_inode_ctx_t *ctx = NULL; + +- ret = __inode_ctx_get(inode, this, &ctx_uint); +- if (ret < 0) +- return ret; ++ ret = __inode_ctx_get(inode, this, &ctx_uint); ++ if (ret < 0) ++ return ret; + +- ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; ++ ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; + +- *block_size = ctx->block_size; ++ *block_size = ctx->block_size; + +- return 0; ++ return 0; + } + +-int +-shard_inode_ctx_get_block_size(inode_t *inode, xlator_t *this, +- uint64_t *block_size) +-{ +- int ret = -1; ++int shard_inode_ctx_get_block_size(inode_t *inode, xlator_t *this, ++ uint64_t *block_size) { ++ int ret = -1; + +- LOCK(&inode->lock); +- { +- ret = __shard_inode_ctx_get_block_size(inode, this, block_size); +- } +- UNLOCK(&inode->lock); ++ LOCK(&inode->lock); ++ { ret = __shard_inode_ctx_get_block_size(inode, this, block_size); } ++ UNLOCK(&inode->lock); + +- return ret; ++ return ret; + } + +-int +-__shard_inode_ctx_get_fsync_count(inode_t *inode, xlator_t *this, +- int *fsync_count) +-{ +- int ret = -1; +- uint64_t ctx_uint = 0; +- shard_inode_ctx_t *ctx = NULL; ++int __shard_inode_ctx_get_fsync_count(inode_t *inode, xlator_t *this, ++ int *fsync_count) { ++ int ret = -1; ++ uint64_t ctx_uint = 0; ++ shard_inode_ctx_t *ctx = NULL; + +- ret = __inode_ctx_get(inode, this, &ctx_uint); +- if (ret < 0) +- return ret; ++ ret = __inode_ctx_get(inode, this, &ctx_uint); ++ if (ret < 0) ++ return ret; + +- ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; ++ ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; + +- *fsync_count = ctx->fsync_needed; ++ *fsync_count = ctx->fsync_needed; + +- return 0; ++ return 0; + } + +-int +-shard_inode_ctx_get_fsync_count(inode_t *inode, xlator_t *this, +- int *fsync_count) +-{ +- int ret = -1; ++int shard_inode_ctx_get_fsync_count(inode_t *inode, xlator_t *this, ++ int *fsync_count) { ++ int ret = -1; + +- LOCK(&inode->lock); +- { +- ret = __shard_inode_ctx_get_fsync_count(inode, this, fsync_count); +- } +- UNLOCK(&inode->lock); ++ LOCK(&inode->lock); ++ { ret = __shard_inode_ctx_get_fsync_count(inode, this, fsync_count); } ++ UNLOCK(&inode->lock); + +- return ret; ++ return ret; + } +-int +-__shard_inode_ctx_get_all(inode_t *inode, xlator_t *this, +- shard_inode_ctx_t *ctx_out) +-{ +- int ret = -1; +- uint64_t ctx_uint = 0; +- shard_inode_ctx_t *ctx = NULL; ++int __shard_inode_ctx_get_all(inode_t *inode, xlator_t *this, ++ shard_inode_ctx_t *ctx_out) { ++ int ret = -1; ++ uint64_t ctx_uint = 0; ++ shard_inode_ctx_t *ctx = NULL; + +- ret = __inode_ctx_get(inode, this, &ctx_uint); +- if (ret < 0) +- return ret; ++ ret = __inode_ctx_get(inode, this, &ctx_uint); ++ if (ret < 0) ++ return ret; + +- ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; ++ ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; + +- memcpy(ctx_out, ctx, sizeof(shard_inode_ctx_t)); +- return 0; ++ memcpy(ctx_out, ctx, sizeof(shard_inode_ctx_t)); ++ return 0; + } + +-int +-shard_inode_ctx_get_all(inode_t *inode, xlator_t *this, +- shard_inode_ctx_t *ctx_out) +-{ +- int ret = -1; ++int shard_inode_ctx_get_all(inode_t *inode, xlator_t *this, ++ shard_inode_ctx_t *ctx_out) { ++ int ret = -1; + +- LOCK(&inode->lock); +- { +- ret = __shard_inode_ctx_get_all(inode, this, ctx_out); +- } +- UNLOCK(&inode->lock); ++ LOCK(&inode->lock); ++ { ret = __shard_inode_ctx_get_all(inode, this, ctx_out); } ++ UNLOCK(&inode->lock); + +- return ret; ++ return ret; + } + +-int +-__shard_inode_ctx_fill_iatt_from_cache(inode_t *inode, xlator_t *this, +- struct iatt *buf, +- gf_boolean_t *need_refresh) +-{ +- int ret = -1; +- uint64_t ctx_uint = 0; +- shard_inode_ctx_t *ctx = NULL; ++int __shard_inode_ctx_fill_iatt_from_cache(inode_t *inode, xlator_t *this, ++ struct iatt *buf, ++ gf_boolean_t *need_refresh) { ++ int ret = -1; ++ uint64_t ctx_uint = 0; ++ shard_inode_ctx_t *ctx = NULL; + +- ret = __inode_ctx_get(inode, this, &ctx_uint); +- if (ret < 0) +- return ret; ++ ret = __inode_ctx_get(inode, this, &ctx_uint); ++ if (ret < 0) ++ return ret; + +- ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; ++ ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; + +- if (ctx->refresh == _gf_false) +- *buf = ctx->stat; +- else +- *need_refresh = _gf_true; ++ if (ctx->refresh == _gf_false) ++ *buf = ctx->stat; ++ else ++ *need_refresh = _gf_true; + +- return 0; ++ return 0; + } + +-int +-shard_inode_ctx_fill_iatt_from_cache(inode_t *inode, xlator_t *this, +- struct iatt *buf, +- gf_boolean_t *need_refresh) +-{ +- int ret = -1; ++int shard_inode_ctx_fill_iatt_from_cache(inode_t *inode, xlator_t *this, ++ struct iatt *buf, ++ gf_boolean_t *need_refresh) { ++ int ret = -1; + +- LOCK(&inode->lock); +- { +- ret = __shard_inode_ctx_fill_iatt_from_cache(inode, this, buf, +- need_refresh); +- } +- UNLOCK(&inode->lock); ++ LOCK(&inode->lock); ++ { ++ ret = ++ __shard_inode_ctx_fill_iatt_from_cache(inode, this, buf, need_refresh); ++ } ++ UNLOCK(&inode->lock); + +- return ret; ++ return ret; + } + +-void +-shard_local_wipe(shard_local_t *local) +-{ +- int i = 0; +- int count = 0; +- +- count = local->num_blocks; +- +- syncbarrier_destroy(&local->barrier); +- loc_wipe(&local->loc); +- loc_wipe(&local->dot_shard_loc); +- loc_wipe(&local->dot_shard_rm_loc); +- loc_wipe(&local->loc2); +- loc_wipe(&local->tmp_loc); +- loc_wipe(&local->int_inodelk.loc); +- loc_wipe(&local->int_entrylk.loc); +- loc_wipe(&local->newloc); +- +- if (local->int_entrylk.basename) +- GF_FREE(local->int_entrylk.basename); +- if (local->fd) +- fd_unref(local->fd); +- +- if (local->xattr_req) +- dict_unref(local->xattr_req); +- if (local->xattr_rsp) +- dict_unref(local->xattr_rsp); +- +- for (i = 0; i < count; i++) { +- if (!local->inode_list) +- break; +- +- if (local->inode_list[i]) +- inode_unref(local->inode_list[i]); +- } +- +- GF_FREE(local->inode_list); +- +- GF_FREE(local->vector); +- if (local->iobref) +- iobref_unref(local->iobref); +- if (local->list_inited) +- gf_dirent_free(&local->entries_head); +- if (local->inodelk_frame) +- SHARD_STACK_DESTROY(local->inodelk_frame); +- if (local->entrylk_frame) +- SHARD_STACK_DESTROY(local->entrylk_frame); +-} +- +-int +-shard_modify_size_and_block_count(struct iatt *stbuf, dict_t *dict) +-{ +- int ret = -1; +- void *size_attr = NULL; +- uint64_t size_array[4]; +- +- ret = dict_get_ptr(dict, GF_XATTR_SHARD_FILE_SIZE, &size_attr); +- if (ret) { +- gf_msg_callingfn(THIS->name, GF_LOG_ERROR, 0, +- SHARD_MSG_INTERNAL_XATTR_MISSING, +- "Failed to " +- "get " GF_XATTR_SHARD_FILE_SIZE " for %s", +- uuid_utoa(stbuf->ia_gfid)); +- return ret; +- } ++void shard_local_wipe(shard_local_t *local) { ++ int i = 0; ++ int count = 0; + +- memcpy(size_array, size_attr, sizeof(size_array)); ++ count = local->num_blocks; + +- stbuf->ia_size = ntoh64(size_array[0]); +- stbuf->ia_blocks = ntoh64(size_array[2]); ++ syncbarrier_destroy(&local->barrier); ++ loc_wipe(&local->loc); ++ loc_wipe(&local->dot_shard_loc); ++ loc_wipe(&local->dot_shard_rm_loc); ++ loc_wipe(&local->loc2); ++ loc_wipe(&local->tmp_loc); ++ loc_wipe(&local->int_inodelk.loc); ++ loc_wipe(&local->int_entrylk.loc); ++ loc_wipe(&local->newloc); + +- return 0; +-} ++ if (local->int_entrylk.basename) ++ GF_FREE(local->int_entrylk.basename); ++ if (local->fd) ++ fd_unref(local->fd); + +-int +-shard_call_count_return(call_frame_t *frame) +-{ +- int call_count = 0; +- shard_local_t *local = NULL; ++ if (local->xattr_req) ++ dict_unref(local->xattr_req); ++ if (local->xattr_rsp) ++ dict_unref(local->xattr_rsp); + +- local = frame->local; ++ for (i = 0; i < count; i++) { ++ if (!local->inode_list) ++ break; ++ ++ if (local->inode_list[i]) ++ inode_unref(local->inode_list[i]); ++ } ++ ++ GF_FREE(local->inode_list); ++ ++ GF_FREE(local->vector); ++ if (local->iobref) ++ iobref_unref(local->iobref); ++ if (local->list_inited) ++ gf_dirent_free(&local->entries_head); ++ if (local->inodelk_frame) ++ SHARD_STACK_DESTROY(local->inodelk_frame); ++ if (local->entrylk_frame) ++ SHARD_STACK_DESTROY(local->entrylk_frame); ++} ++ ++int shard_modify_size_and_block_count(struct iatt *stbuf, dict_t *dict) { ++ int ret = -1; ++ void *size_attr = NULL; ++ uint64_t size_array[4]; ++ ++ ret = dict_get_ptr(dict, GF_XATTR_SHARD_FILE_SIZE, &size_attr); ++ if (ret) { ++ gf_msg_callingfn(THIS->name, GF_LOG_ERROR, 0, ++ SHARD_MSG_INTERNAL_XATTR_MISSING, ++ "Failed to " ++ "get " GF_XATTR_SHARD_FILE_SIZE " for %s", ++ uuid_utoa(stbuf->ia_gfid)); ++ return ret; ++ } ++ ++ memcpy(size_array, size_attr, sizeof(size_array)); ++ ++ stbuf->ia_size = ntoh64(size_array[0]); ++ stbuf->ia_blocks = ntoh64(size_array[2]); ++ ++ return 0; ++} ++ ++int shard_call_count_return(call_frame_t *frame) { ++ int call_count = 0; ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ ++ LOCK(&frame->lock); ++ { call_count = --local->call_count; } ++ UNLOCK(&frame->lock); ++ ++ return call_count; ++} ++ ++static char *shard_internal_dir_string(shard_internal_dir_type_t type) { ++ char *str = NULL; ++ ++ switch (type) { ++ case SHARD_INTERNAL_DIR_DOT_SHARD: ++ str = GF_SHARD_DIR; ++ break; ++ case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: ++ str = GF_SHARD_REMOVE_ME_DIR; ++ break; ++ default: ++ break; ++ } ++ return str; ++} ++ ++static int shard_init_internal_dir_loc(xlator_t *this, shard_local_t *local, ++ shard_internal_dir_type_t type) { ++ int ret = -1; ++ char *bname = NULL; ++ inode_t *parent = NULL; ++ loc_t *internal_dir_loc = NULL; ++ shard_priv_t *priv = NULL; ++ ++ priv = this->private; ++ if (!local) ++ return -1; ++ ++ switch (type) { ++ case SHARD_INTERNAL_DIR_DOT_SHARD: ++ internal_dir_loc = &local->dot_shard_loc; ++ bname = GF_SHARD_DIR; ++ parent = inode_ref(this->itable->root); ++ break; ++ case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: ++ internal_dir_loc = &local->dot_shard_rm_loc; ++ bname = GF_SHARD_REMOVE_ME_DIR; ++ parent = inode_ref(priv->dot_shard_inode); ++ break; ++ default: ++ break; ++ } ++ ++ internal_dir_loc->inode = inode_new(this->itable); ++ internal_dir_loc->parent = parent; ++ ret = inode_path(internal_dir_loc->parent, bname, ++ (char **)&internal_dir_loc->path); ++ if (ret < 0 || !(internal_dir_loc->inode)) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, ++ "Inode path failed on %s", bname); ++ goto out; ++ } ++ ++ internal_dir_loc->name = strrchr(internal_dir_loc->path, '/'); ++ if (internal_dir_loc->name) ++ internal_dir_loc->name++; ++ ++ ret = 0; ++out: ++ return ret; ++} ++ ++inode_t *__shard_update_shards_inode_list(inode_t *linked_inode, xlator_t *this, ++ inode_t *base_inode, int block_num, ++ uuid_t gfid) { ++ char block_bname[256] = { ++ 0, ++ }; ++ inode_t *lru_inode = NULL; ++ shard_priv_t *priv = NULL; ++ shard_inode_ctx_t *ctx = NULL; ++ shard_inode_ctx_t *lru_inode_ctx = NULL; ++ shard_inode_ctx_t *lru_base_inode_ctx = NULL; ++ inode_t *fsync_inode = NULL; ++ inode_t *lru_base_inode = NULL; ++ gf_boolean_t do_fsync = _gf_false; ++ ++ priv = this->private; ++ ++ shard_inode_ctx_get(linked_inode, this, &ctx); ++ ++ if (list_empty(&ctx->ilist)) { ++ if (priv->inode_count + 1 <= priv->lru_limit) { ++ /* If this inode was linked here for the first time (indicated ++ * by empty list), and if there is still space in the priv list, ++ * add this ctx to the tail of the list. ++ */ ++ /* For as long as an inode is in lru list, we try to ++ * keep it alive by holding a ref on it. ++ */ ++ inode_ref(linked_inode); ++ if (base_inode) ++ gf_uuid_copy(ctx->base_gfid, base_inode->gfid); ++ else ++ gf_uuid_copy(ctx->base_gfid, gfid); ++ ctx->block_num = block_num; ++ list_add_tail(&ctx->ilist, &priv->ilist_head); ++ priv->inode_count++; ++ ctx->base_inode = inode_ref(base_inode); ++ } else { ++ /*If on the other hand there is no available slot for this inode ++ * in the list, delete the lru inode from the head of the list, ++ * unlink it. And in its place add this new inode into the list. ++ */ ++ lru_inode_ctx = ++ list_first_entry(&priv->ilist_head, shard_inode_ctx_t, ilist); ++ GF_ASSERT(lru_inode_ctx->block_num > 0); ++ lru_base_inode = lru_inode_ctx->base_inode; ++ list_del_init(&lru_inode_ctx->ilist); ++ lru_inode = inode_find(linked_inode->table, lru_inode_ctx->stat.ia_gfid); ++ /* If the lru inode was part of the pending-fsync list, ++ * the base inode needs to be unref'd, the lru inode ++ * deleted from fsync list and fsync'd in a new frame, ++ * and then unlinked in memory and forgotten. ++ */ ++ if (!lru_base_inode) ++ goto after_fsync_check; ++ LOCK(&lru_base_inode->lock); ++ LOCK(&lru_inode->lock); ++ { ++ if (!list_empty(&lru_inode_ctx->to_fsync_list)) { ++ list_del_init(&lru_inode_ctx->to_fsync_list); ++ lru_inode_ctx->fsync_needed = 0; ++ do_fsync = _gf_true; ++ __shard_inode_ctx_get(lru_base_inode, this, &lru_base_inode_ctx); ++ lru_base_inode_ctx->fsync_count--; ++ } ++ } ++ UNLOCK(&lru_inode->lock); ++ UNLOCK(&lru_base_inode->lock); ++ ++ after_fsync_check: ++ if (!do_fsync) { ++ shard_make_block_bname(lru_inode_ctx->block_num, ++ lru_inode_ctx->base_gfid, block_bname, ++ sizeof(block_bname)); ++ /* The following unref corresponds to the ref held at ++ * the time the shard was added to the lru list. ++ */ ++ inode_unref(lru_inode); ++ inode_unlink(lru_inode, priv->dot_shard_inode, block_bname); ++ inode_forget(lru_inode, 0); ++ } else { ++ /* The following unref corresponds to the ref ++ * held when the shard was added to fsync list. ++ */ ++ inode_unref(lru_inode); ++ fsync_inode = lru_inode; ++ if (lru_base_inode) ++ inode_unref(lru_base_inode); ++ } ++ /* The following unref corresponds to the ref ++ * held by inode_find() above. ++ */ ++ inode_unref(lru_inode); ++ ++ /* The following unref corresponds to the ref held on the base shard ++ * at the time of adding shard inode to lru list ++ */ ++ if (lru_base_inode) ++ inode_unref(lru_base_inode); ++ ++ /* For as long as an inode is in lru list, we try to ++ * keep it alive by holding a ref on it. ++ */ ++ inode_ref(linked_inode); ++ if (base_inode) ++ gf_uuid_copy(ctx->base_gfid, base_inode->gfid); ++ else ++ gf_uuid_copy(ctx->base_gfid, gfid); ++ ctx->block_num = block_num; ++ ctx->base_inode = inode_ref(base_inode); ++ list_add_tail(&ctx->ilist, &priv->ilist_head); ++ } ++ } else { ++ /* If this is not the first time this inode is being operated on, move ++ * it to the most recently used end of the list. ++ */ ++ list_move_tail(&ctx->ilist, &priv->ilist_head); ++ } ++ return fsync_inode; ++} ++ ++int shard_common_failure_unwind(glusterfs_fop_t fop, call_frame_t *frame, ++ int32_t op_ret, int32_t op_errno) { ++ switch (fop) { ++ case GF_FOP_LOOKUP: ++ SHARD_STACK_UNWIND(lookup, frame, op_ret, op_errno, NULL, NULL, NULL, NULL); ++ break; ++ case GF_FOP_STAT: ++ SHARD_STACK_UNWIND(stat, frame, op_ret, op_errno, NULL, NULL); ++ break; ++ case GF_FOP_FSTAT: ++ SHARD_STACK_UNWIND(fstat, frame, op_ret, op_errno, NULL, NULL); ++ break; ++ case GF_FOP_TRUNCATE: ++ SHARD_STACK_UNWIND(truncate, frame, op_ret, op_errno, NULL, NULL, NULL); ++ break; ++ case GF_FOP_FTRUNCATE: ++ SHARD_STACK_UNWIND(ftruncate, frame, op_ret, op_errno, NULL, NULL, NULL); ++ break; ++ case GF_FOP_MKNOD: ++ SHARD_STACK_UNWIND(mknod, frame, op_ret, op_errno, NULL, NULL, NULL, NULL, ++ NULL); ++ break; ++ case GF_FOP_LINK: ++ SHARD_STACK_UNWIND(link, frame, op_ret, op_errno, NULL, NULL, NULL, NULL, ++ NULL); ++ break; ++ case GF_FOP_CREATE: ++ SHARD_STACK_UNWIND(create, frame, op_ret, op_errno, NULL, NULL, NULL, NULL, ++ NULL, NULL); ++ break; ++ case GF_FOP_UNLINK: ++ SHARD_STACK_UNWIND(unlink, frame, op_ret, op_errno, NULL, NULL, NULL); ++ break; ++ case GF_FOP_RENAME: ++ SHARD_STACK_UNWIND(rename, frame, op_ret, op_errno, NULL, NULL, NULL, NULL, ++ NULL, NULL); ++ break; ++ case GF_FOP_WRITE: ++ SHARD_STACK_UNWIND(writev, frame, op_ret, op_errno, NULL, NULL, NULL); ++ break; ++ case GF_FOP_FALLOCATE: ++ SHARD_STACK_UNWIND(fallocate, frame, op_ret, op_errno, NULL, NULL, NULL); ++ break; ++ case GF_FOP_ZEROFILL: ++ SHARD_STACK_UNWIND(zerofill, frame, op_ret, op_errno, NULL, NULL, NULL); ++ break; ++ case GF_FOP_DISCARD: ++ SHARD_STACK_UNWIND(discard, frame, op_ret, op_errno, NULL, NULL, NULL); ++ break; ++ case GF_FOP_READ: ++ SHARD_STACK_UNWIND(readv, frame, op_ret, op_errno, NULL, -1, NULL, NULL, ++ NULL); ++ break; ++ case GF_FOP_FSYNC: ++ SHARD_STACK_UNWIND(fsync, frame, op_ret, op_errno, NULL, NULL, NULL); ++ break; ++ case GF_FOP_REMOVEXATTR: ++ SHARD_STACK_UNWIND(removexattr, frame, op_ret, op_errno, NULL); ++ break; ++ case GF_FOP_FREMOVEXATTR: ++ SHARD_STACK_UNWIND(fremovexattr, frame, op_ret, op_errno, NULL); ++ break; ++ case GF_FOP_FGETXATTR: ++ SHARD_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, NULL, NULL); ++ break; ++ case GF_FOP_GETXATTR: ++ SHARD_STACK_UNWIND(getxattr, frame, op_ret, op_errno, NULL, NULL); ++ break; ++ case GF_FOP_FSETXATTR: ++ SHARD_STACK_UNWIND(fsetxattr, frame, op_ret, op_errno, NULL); ++ break; ++ case GF_FOP_SETXATTR: ++ SHARD_STACK_UNWIND(setxattr, frame, op_ret, op_errno, NULL); ++ break; ++ case GF_FOP_SETATTR: ++ SHARD_STACK_UNWIND(setattr, frame, op_ret, op_errno, NULL, NULL, NULL); ++ break; ++ case GF_FOP_FSETATTR: ++ SHARD_STACK_UNWIND(fsetattr, frame, op_ret, op_errno, NULL, NULL, NULL); ++ break; ++ case GF_FOP_SEEK: ++ SHARD_STACK_UNWIND(seek, frame, op_ret, op_errno, 0, NULL); ++ break; ++ default: ++ gf_msg(THIS->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, ++ "Invalid fop id = %d", fop); ++ break; ++ } ++ return 0; ++} ++ ++int shard_common_inode_write_success_unwind(glusterfs_fop_t fop, ++ call_frame_t *frame, ++ int32_t op_ret) { ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ ++ switch (fop) { ++ case GF_FOP_WRITE: ++ SHARD_STACK_UNWIND(writev, frame, op_ret, 0, &local->prebuf, ++ &local->postbuf, local->xattr_rsp); ++ break; ++ case GF_FOP_FALLOCATE: ++ SHARD_STACK_UNWIND(fallocate, frame, op_ret, 0, &local->prebuf, ++ &local->postbuf, local->xattr_rsp); ++ break; ++ case GF_FOP_ZEROFILL: ++ SHARD_STACK_UNWIND(zerofill, frame, op_ret, 0, &local->prebuf, ++ &local->postbuf, local->xattr_rsp); ++ break; ++ case GF_FOP_DISCARD: ++ SHARD_STACK_UNWIND(discard, frame, op_ret, 0, &local->prebuf, ++ &local->postbuf, local->xattr_rsp); ++ break; ++ default: ++ gf_msg(THIS->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, ++ "Invalid fop id = %d", fop); ++ break; ++ } ++ return 0; ++} ++ ++int shard_evicted_inode_fsync_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, ++ int32_t op_errno, struct iatt *prebuf, ++ struct iatt *postbuf, dict_t *xdata) { ++ char block_bname[256] = { ++ 0, ++ }; ++ fd_t *anon_fd = cookie; ++ inode_t *shard_inode = NULL; ++ shard_inode_ctx_t *ctx = NULL; ++ shard_priv_t *priv = NULL; ++ ++ priv = this->private; ++ ++ if (anon_fd == NULL || op_ret < 0) { ++ gf_msg(this->name, GF_LOG_WARNING, op_errno, SHARD_MSG_MEMALLOC_FAILED, ++ "fsync failed on shard"); ++ goto out; ++ } ++ shard_inode = anon_fd->inode; ++ ++ LOCK(&priv->lock); ++ LOCK(&shard_inode->lock); ++ { ++ __shard_inode_ctx_get(shard_inode, this, &ctx); ++ if ((list_empty(&ctx->to_fsync_list)) && (list_empty(&ctx->ilist))) { ++ shard_make_block_bname(ctx->block_num, shard_inode->gfid, block_bname, ++ sizeof(block_bname)); ++ inode_unlink(shard_inode, priv->dot_shard_inode, block_bname); ++ /* The following unref corresponds to the ref held by ++ * inode_link() at the time the shard was created or ++ * looked up ++ */ ++ inode_unref(shard_inode); ++ inode_forget(shard_inode, 0); ++ } ++ } ++ UNLOCK(&shard_inode->lock); ++ UNLOCK(&priv->lock); + +- LOCK(&frame->lock); +- { +- call_count = --local->call_count; ++out: ++ if (anon_fd) ++ fd_unref(anon_fd); ++ STACK_DESTROY(frame->root); ++ return 0; ++} ++ ++int shard_initiate_evicted_inode_fsync(xlator_t *this, inode_t *inode) { ++ fd_t *anon_fd = NULL; ++ call_frame_t *fsync_frame = NULL; ++ ++ fsync_frame = create_frame(this, this->ctx->pool); ++ if (!fsync_frame) { ++ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, ++ "Failed to create new frame " ++ "to fsync shard"); ++ return -1; ++ } ++ ++ anon_fd = fd_anonymous(inode); ++ if (!anon_fd) { ++ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, ++ "Failed to create anon fd to" ++ " fsync shard"); ++ STACK_DESTROY(fsync_frame->root); ++ return -1; ++ } ++ ++ STACK_WIND_COOKIE(fsync_frame, shard_evicted_inode_fsync_cbk, anon_fd, ++ FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsync, anon_fd, ++ 1, NULL); ++ return 0; ++} ++ ++int shard_common_resolve_shards( ++ call_frame_t *frame, xlator_t *this, ++ shard_post_resolve_fop_handler_t post_res_handler) { ++ int i = -1; ++ uint32_t shard_idx_iter = 0; ++ char path[PATH_MAX] = { ++ 0, ++ }; ++ uuid_t gfid = { ++ 0, ++ }; ++ inode_t *inode = NULL; ++ inode_t *res_inode = NULL; ++ inode_t *fsync_inode = NULL; ++ shard_priv_t *priv = NULL; ++ shard_local_t *local = NULL; ++ ++ priv = this->private; ++ local = frame->local; ++ local->call_count = 0; ++ shard_idx_iter = local->first_block; ++ res_inode = local->resolver_base_inode; ++ if (res_inode) ++ gf_uuid_copy(gfid, res_inode->gfid); ++ else ++ gf_uuid_copy(gfid, local->base_gfid); ++ ++ if ((local->op_ret < 0) || (local->resolve_not)) ++ goto out; ++ ++ while (shard_idx_iter <= local->last_block) { ++ i++; ++ if (shard_idx_iter == 0) { ++ local->inode_list[i] = inode_ref(res_inode); ++ shard_idx_iter++; ++ continue; ++ } ++ ++ shard_make_block_abspath(shard_idx_iter, gfid, path, sizeof(path)); ++ ++ inode = NULL; ++ inode = inode_resolve(this->itable, path); ++ if (inode) { ++ gf_msg_debug(this->name, 0, "Shard %d already " ++ "present. gfid=%s. Saving inode for future.", ++ shard_idx_iter, uuid_utoa(inode->gfid)); ++ local->inode_list[i] = inode; ++ /* Let the ref on the inodes that are already present ++ * in inode table still be held so that they don't get ++ * forgotten by the time the fop reaches the actual ++ * write stage. ++ */ ++ LOCK(&priv->lock); ++ { ++ fsync_inode = __shard_update_shards_inode_list(inode, this, res_inode, ++ shard_idx_iter, gfid); ++ } ++ UNLOCK(&priv->lock); ++ shard_idx_iter++; ++ if (fsync_inode) ++ shard_initiate_evicted_inode_fsync(this, fsync_inode); ++ continue; ++ } else { ++ local->call_count++; ++ shard_idx_iter++; + } +- UNLOCK(&frame->lock); ++ } ++out: ++ post_res_handler(frame, this); ++ return 0; ++} ++ ++int shard_update_file_size_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, int32_t op_errno, ++ dict_t *dict, dict_t *xdata) { ++ inode_t *inode = NULL; ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ ++ if ((local->fd) && (local->fd->inode)) ++ inode = local->fd->inode; ++ else if (local->loc.inode) ++ inode = local->loc.inode; ++ ++ if (op_ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, op_errno, ++ SHARD_MSG_UPDATE_FILE_SIZE_FAILED, "Update to file size" ++ " xattr failed on %s", ++ uuid_utoa(inode->gfid)); ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ goto err; ++ } + +- return call_count; ++ if (shard_modify_size_and_block_count(&local->postbuf, dict)) { ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto err; ++ } ++err: ++ local->post_update_size_handler(frame, this); ++ return 0; + } + +-static char * +-shard_internal_dir_string(shard_internal_dir_type_t type) +-{ +- char *str = NULL; ++int shard_set_size_attrs(int64_t size, int64_t block_count, ++ int64_t **size_attr_p) { ++ int ret = -1; ++ int64_t *size_attr = NULL; + +- switch (type) { +- case SHARD_INTERNAL_DIR_DOT_SHARD: +- str = GF_SHARD_DIR; +- break; +- case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: +- str = GF_SHARD_REMOVE_ME_DIR; +- break; +- default: +- break; +- } +- return str; +-} +- +-static int +-shard_init_internal_dir_loc(xlator_t *this, shard_local_t *local, +- shard_internal_dir_type_t type) +-{ +- int ret = -1; +- char *bname = NULL; +- inode_t *parent = NULL; +- loc_t *internal_dir_loc = NULL; +- shard_priv_t *priv = NULL; +- +- priv = this->private; +- if (!local) +- return -1; +- +- switch (type) { +- case SHARD_INTERNAL_DIR_DOT_SHARD: +- internal_dir_loc = &local->dot_shard_loc; +- bname = GF_SHARD_DIR; +- parent = inode_ref(this->itable->root); +- break; +- case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: +- internal_dir_loc = &local->dot_shard_rm_loc; +- bname = GF_SHARD_REMOVE_ME_DIR; +- parent = inode_ref(priv->dot_shard_inode); +- break; +- default: +- break; +- } ++ if (!size_attr_p) ++ goto out; + +- internal_dir_loc->inode = inode_new(this->itable); +- internal_dir_loc->parent = parent; +- ret = inode_path(internal_dir_loc->parent, bname, +- (char **)&internal_dir_loc->path); +- if (ret < 0 || !(internal_dir_loc->inode)) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, +- "Inode path failed on %s", bname); +- goto out; +- } ++ size_attr = GF_CALLOC(4, sizeof(int64_t), gf_shard_mt_int64_t); ++ if (!size_attr) ++ goto out; + +- internal_dir_loc->name = strrchr(internal_dir_loc->path, '/'); +- if (internal_dir_loc->name) +- internal_dir_loc->name++; ++ size_attr[0] = hton64(size); ++ /* As sharding evolves, it _may_ be necessary to embed more pieces of ++ * information within the same xattr. So allocating slots for them in ++ * advance. For now, only bytes 0-63 and 128-191 which would make up the ++ * current size and block count respectively of the file are valid. ++ */ ++ size_attr[2] = hton64(block_count); + +- ret = 0; ++ *size_attr_p = size_attr; ++ ++ ret = 0; + out: +- return ret; ++ return ret; + } + +-inode_t * +-__shard_update_shards_inode_list(inode_t *linked_inode, xlator_t *this, +- inode_t *base_inode, int block_num, +- uuid_t gfid) +-{ +- char block_bname[256] = { +- 0, +- }; +- inode_t *lru_inode = NULL; +- shard_priv_t *priv = NULL; +- shard_inode_ctx_t *ctx = NULL; +- shard_inode_ctx_t *lru_inode_ctx = NULL; +- shard_inode_ctx_t *lru_base_inode_ctx = NULL; +- inode_t *fsync_inode = NULL; +- inode_t *lru_base_inode = NULL; +- gf_boolean_t do_fsync = _gf_false; +- +- priv = this->private; +- +- shard_inode_ctx_get(linked_inode, this, &ctx); +- +- if (list_empty(&ctx->ilist)) { +- if (priv->inode_count + 1 <= priv->lru_limit) { +- /* If this inode was linked here for the first time (indicated +- * by empty list), and if there is still space in the priv list, +- * add this ctx to the tail of the list. +- */ +- /* For as long as an inode is in lru list, we try to +- * keep it alive by holding a ref on it. +- */ +- inode_ref(linked_inode); +- if (base_inode) +- gf_uuid_copy(ctx->base_gfid, base_inode->gfid); +- else +- gf_uuid_copy(ctx->base_gfid, gfid); +- ctx->block_num = block_num; +- list_add_tail(&ctx->ilist, &priv->ilist_head); +- priv->inode_count++; +- ctx->base_inode = inode_ref(base_inode); +- } else { +- /*If on the other hand there is no available slot for this inode +- * in the list, delete the lru inode from the head of the list, +- * unlink it. And in its place add this new inode into the list. +- */ +- lru_inode_ctx = list_first_entry(&priv->ilist_head, +- shard_inode_ctx_t, ilist); +- GF_ASSERT(lru_inode_ctx->block_num > 0); +- lru_base_inode = lru_inode_ctx->base_inode; +- list_del_init(&lru_inode_ctx->ilist); +- lru_inode = inode_find(linked_inode->table, +- lru_inode_ctx->stat.ia_gfid); +- /* If the lru inode was part of the pending-fsync list, +- * the base inode needs to be unref'd, the lru inode +- * deleted from fsync list and fsync'd in a new frame, +- * and then unlinked in memory and forgotten. +- */ +- if (!lru_base_inode) +- goto after_fsync_check; +- LOCK(&lru_base_inode->lock); +- LOCK(&lru_inode->lock); +- { +- if (!list_empty(&lru_inode_ctx->to_fsync_list)) { +- list_del_init(&lru_inode_ctx->to_fsync_list); +- lru_inode_ctx->fsync_needed = 0; +- do_fsync = _gf_true; +- __shard_inode_ctx_get(lru_base_inode, this, +- &lru_base_inode_ctx); +- lru_base_inode_ctx->fsync_count--; +- } +- } +- UNLOCK(&lru_inode->lock); +- UNLOCK(&lru_base_inode->lock); +- +- after_fsync_check: +- if (!do_fsync) { +- shard_make_block_bname(lru_inode_ctx->block_num, +- lru_inode_ctx->base_gfid, block_bname, +- sizeof(block_bname)); +- /* The following unref corresponds to the ref held at +- * the time the shard was added to the lru list. +- */ +- inode_unref(lru_inode); +- inode_unlink(lru_inode, priv->dot_shard_inode, block_bname); +- inode_forget(lru_inode, 0); +- } else { +- /* The following unref corresponds to the ref +- * held when the shard was added to fsync list. +- */ +- inode_unref(lru_inode); +- fsync_inode = lru_inode; +- if (lru_base_inode) +- inode_unref(lru_base_inode); +- } +- /* The following unref corresponds to the ref +- * held by inode_find() above. +- */ +- inode_unref(lru_inode); +- +- /* The following unref corresponds to the ref held on the base shard +- * at the time of adding shard inode to lru list +- */ +- if (lru_base_inode) +- inode_unref(lru_base_inode); +- +- /* For as long as an inode is in lru list, we try to +- * keep it alive by holding a ref on it. +- */ +- inode_ref(linked_inode); +- if (base_inode) +- gf_uuid_copy(ctx->base_gfid, base_inode->gfid); +- else +- gf_uuid_copy(ctx->base_gfid, gfid); +- ctx->block_num = block_num; +- ctx->base_inode = inode_ref(base_inode); +- list_add_tail(&ctx->ilist, &priv->ilist_head); +- } +- } else { +- /* If this is not the first time this inode is being operated on, move +- * it to the most recently used end of the list. +- */ +- list_move_tail(&ctx->ilist, &priv->ilist_head); +- } +- return fsync_inode; +-} ++int shard_update_file_size(call_frame_t *frame, xlator_t *this, fd_t *fd, ++ loc_t *loc, ++ shard_post_update_size_fop_handler_t handler) { ++ int ret = -1; ++ int64_t *size_attr = NULL; ++ int64_t delta_blocks = 0; ++ inode_t *inode = NULL; ++ shard_local_t *local = NULL; ++ dict_t *xattr_req = NULL; + +-int +-shard_common_failure_unwind(glusterfs_fop_t fop, call_frame_t *frame, +- int32_t op_ret, int32_t op_errno) +-{ +- switch (fop) { +- case GF_FOP_LOOKUP: +- SHARD_STACK_UNWIND(lookup, frame, op_ret, op_errno, NULL, NULL, +- NULL, NULL); +- break; +- case GF_FOP_STAT: +- SHARD_STACK_UNWIND(stat, frame, op_ret, op_errno, NULL, NULL); +- break; +- case GF_FOP_FSTAT: +- SHARD_STACK_UNWIND(fstat, frame, op_ret, op_errno, NULL, NULL); +- break; +- case GF_FOP_TRUNCATE: +- SHARD_STACK_UNWIND(truncate, frame, op_ret, op_errno, NULL, NULL, +- NULL); +- break; +- case GF_FOP_FTRUNCATE: +- SHARD_STACK_UNWIND(ftruncate, frame, op_ret, op_errno, NULL, NULL, +- NULL); +- break; +- case GF_FOP_MKNOD: +- SHARD_STACK_UNWIND(mknod, frame, op_ret, op_errno, NULL, NULL, NULL, +- NULL, NULL); +- break; +- case GF_FOP_LINK: +- SHARD_STACK_UNWIND(link, frame, op_ret, op_errno, NULL, NULL, NULL, +- NULL, NULL); +- break; +- case GF_FOP_CREATE: +- SHARD_STACK_UNWIND(create, frame, op_ret, op_errno, NULL, NULL, +- NULL, NULL, NULL, NULL); +- break; +- case GF_FOP_UNLINK: +- SHARD_STACK_UNWIND(unlink, frame, op_ret, op_errno, NULL, NULL, +- NULL); +- break; +- case GF_FOP_RENAME: +- SHARD_STACK_UNWIND(rename, frame, op_ret, op_errno, NULL, NULL, +- NULL, NULL, NULL, NULL); +- break; +- case GF_FOP_WRITE: +- SHARD_STACK_UNWIND(writev, frame, op_ret, op_errno, NULL, NULL, +- NULL); +- break; +- case GF_FOP_FALLOCATE: +- SHARD_STACK_UNWIND(fallocate, frame, op_ret, op_errno, NULL, NULL, +- NULL); +- break; +- case GF_FOP_ZEROFILL: +- SHARD_STACK_UNWIND(zerofill, frame, op_ret, op_errno, NULL, NULL, +- NULL); +- break; +- case GF_FOP_DISCARD: +- SHARD_STACK_UNWIND(discard, frame, op_ret, op_errno, NULL, NULL, +- NULL); +- break; +- case GF_FOP_READ: +- SHARD_STACK_UNWIND(readv, frame, op_ret, op_errno, NULL, -1, NULL, +- NULL, NULL); +- break; +- case GF_FOP_FSYNC: +- SHARD_STACK_UNWIND(fsync, frame, op_ret, op_errno, NULL, NULL, +- NULL); +- break; +- case GF_FOP_REMOVEXATTR: +- SHARD_STACK_UNWIND(removexattr, frame, op_ret, op_errno, NULL); +- break; +- case GF_FOP_FREMOVEXATTR: +- SHARD_STACK_UNWIND(fremovexattr, frame, op_ret, op_errno, NULL); +- break; +- case GF_FOP_FGETXATTR: +- SHARD_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, NULL, NULL); +- break; +- case GF_FOP_GETXATTR: +- SHARD_STACK_UNWIND(getxattr, frame, op_ret, op_errno, NULL, NULL); +- break; +- case GF_FOP_FSETXATTR: +- SHARD_STACK_UNWIND(fsetxattr, frame, op_ret, op_errno, NULL); +- break; +- case GF_FOP_SETXATTR: +- SHARD_STACK_UNWIND(setxattr, frame, op_ret, op_errno, NULL); +- break; +- case GF_FOP_SETATTR: +- SHARD_STACK_UNWIND(setattr, frame, op_ret, op_errno, NULL, NULL, +- NULL); +- break; +- case GF_FOP_FSETATTR: +- SHARD_STACK_UNWIND(fsetattr, frame, op_ret, op_errno, NULL, NULL, +- NULL); +- break; +- case GF_FOP_SEEK: +- SHARD_STACK_UNWIND(seek, frame, op_ret, op_errno, 0, NULL); +- break; +- default: +- gf_msg(THIS->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, +- "Invalid fop id = %d", fop); +- break; +- } +- return 0; +-} ++ local = frame->local; ++ local->post_update_size_handler = handler; + +-int +-shard_common_inode_write_success_unwind(glusterfs_fop_t fop, +- call_frame_t *frame, int32_t op_ret) +-{ +- shard_local_t *local = NULL; ++ xattr_req = dict_new(); ++ if (!xattr_req) { ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto out; ++ } ++ ++ if (fd) ++ inode = fd->inode; ++ else ++ inode = loc->inode; ++ ++ /* If both size and block count have not changed, then skip the xattrop. ++ */ ++ delta_blocks = GF_ATOMIC_GET(local->delta_blocks); ++ if ((local->delta_size + local->hole_size == 0) && (delta_blocks == 0)) { ++ goto out; ++ } ++ ++ ret = shard_set_size_attrs(local->delta_size + local->hole_size, delta_blocks, ++ &size_attr); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_SIZE_SET_FAILED, ++ "Failed to set size attrs for %s", uuid_utoa(inode->gfid)); ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto out; ++ } ++ ++ ret = dict_set_bin(xattr_req, GF_XATTR_SHARD_FILE_SIZE, size_attr, 8 * 4); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to set key %s into dict. gfid=%s", GF_XATTR_SHARD_FILE_SIZE, ++ uuid_utoa(inode->gfid)); ++ GF_FREE(size_attr); ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto out; ++ } + +- local = frame->local; ++ if (fd) ++ STACK_WIND(frame, shard_update_file_size_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fxattrop, fd, GF_XATTROP_ADD_ARRAY64, ++ xattr_req, NULL); ++ else ++ STACK_WIND(frame, shard_update_file_size_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->xattrop, loc, GF_XATTROP_ADD_ARRAY64, ++ xattr_req, NULL); + +- switch (fop) { +- case GF_FOP_WRITE: +- SHARD_STACK_UNWIND(writev, frame, op_ret, 0, &local->prebuf, +- &local->postbuf, local->xattr_rsp); +- break; +- case GF_FOP_FALLOCATE: +- SHARD_STACK_UNWIND(fallocate, frame, op_ret, 0, &local->prebuf, +- &local->postbuf, local->xattr_rsp); +- break; +- case GF_FOP_ZEROFILL: +- SHARD_STACK_UNWIND(zerofill, frame, op_ret, 0, &local->prebuf, +- &local->postbuf, local->xattr_rsp); +- break; +- case GF_FOP_DISCARD: +- SHARD_STACK_UNWIND(discard, frame, op_ret, 0, &local->prebuf, +- &local->postbuf, local->xattr_rsp); +- break; +- default: +- gf_msg(THIS->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, +- "Invalid fop id = %d", fop); +- break; +- } +- return 0; +-} ++ dict_unref(xattr_req); ++ return 0; + +-int +-shard_evicted_inode_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, +- struct iatt *prebuf, struct iatt *postbuf, +- dict_t *xdata) +-{ +- char block_bname[256] = { +- 0, +- }; +- fd_t *anon_fd = cookie; +- inode_t *shard_inode = NULL; +- shard_inode_ctx_t *ctx = NULL; +- shard_priv_t *priv = NULL; ++out: ++ if (xattr_req) ++ dict_unref(xattr_req); ++ handler(frame, this); ++ return 0; ++} ++ ++static inode_t *shard_link_internal_dir_inode(shard_local_t *local, ++ inode_t *inode, struct iatt *buf, ++ shard_internal_dir_type_t type) { ++ inode_t *linked_inode = NULL; ++ shard_priv_t *priv = NULL; ++ char *bname = NULL; ++ inode_t **priv_inode = NULL; ++ inode_t *parent = NULL; ++ ++ priv = THIS->private; ++ ++ switch (type) { ++ case SHARD_INTERNAL_DIR_DOT_SHARD: ++ bname = GF_SHARD_DIR; ++ priv_inode = &priv->dot_shard_inode; ++ parent = inode->table->root; ++ break; ++ case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: ++ bname = GF_SHARD_REMOVE_ME_DIR; ++ priv_inode = &priv->dot_shard_rm_inode; ++ parent = priv->dot_shard_inode; ++ break; ++ default: ++ break; ++ } ++ ++ linked_inode = inode_link(inode, parent, bname, buf); ++ inode_lookup(linked_inode); ++ *priv_inode = linked_inode; ++ return linked_inode; ++} ++ ++int shard_refresh_internal_dir_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, ++ int32_t op_errno, inode_t *inode, ++ struct iatt *buf, dict_t *xdata, ++ struct iatt *postparent) { ++ shard_local_t *local = NULL; ++ inode_t *linked_inode = NULL; ++ shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie; ++ ++ local = frame->local; ++ ++ if (op_ret) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ goto out; ++ } ++ ++ /* To-Do: Fix refcount increment per call to ++ * shard_link_internal_dir_inode(). ++ */ ++ linked_inode = shard_link_internal_dir_inode(local, inode, buf, type); ++ shard_inode_ctx_mark_dir_refreshed(linked_inode, this); ++out: ++ shard_common_resolve_shards(frame, this, local->post_res_handler); ++ return 0; ++} ++ ++int shard_refresh_internal_dir(call_frame_t *frame, xlator_t *this, ++ shard_internal_dir_type_t type) { ++ loc_t loc = { ++ 0, ++ }; ++ inode_t *inode = NULL; ++ shard_priv_t *priv = NULL; ++ shard_local_t *local = NULL; ++ uuid_t gfid = { ++ 0, ++ }; ++ ++ local = frame->local; ++ priv = this->private; ++ ++ switch (type) { ++ case SHARD_INTERNAL_DIR_DOT_SHARD: ++ gf_uuid_copy(gfid, priv->dot_shard_gfid); ++ break; ++ case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: ++ gf_uuid_copy(gfid, priv->dot_shard_rm_gfid); ++ break; ++ default: ++ break; ++ } ++ ++ inode = inode_find(this->itable, gfid); ++ ++ if (!shard_inode_ctx_needs_lookup(inode, this)) { ++ local->op_ret = 0; ++ goto out; ++ } + +- priv = this->private; ++ /* Plain assignment because the ref is already taken above through ++ * call to inode_find() ++ */ ++ loc.inode = inode; ++ gf_uuid_copy(loc.gfid, gfid); + +- if (anon_fd == NULL || op_ret < 0) { +- gf_msg(this->name, GF_LOG_WARNING, op_errno, SHARD_MSG_MEMALLOC_FAILED, +- "fsync failed on shard"); +- goto out; +- } +- shard_inode = anon_fd->inode; ++ STACK_WIND_COOKIE(frame, shard_refresh_internal_dir_cbk, (void *)(long)type, ++ FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup, &loc, ++ NULL); ++ loc_wipe(&loc); + +- LOCK(&priv->lock); +- LOCK(&shard_inode->lock); +- { +- __shard_inode_ctx_get(shard_inode, this, &ctx); +- if ((list_empty(&ctx->to_fsync_list)) && (list_empty(&ctx->ilist))) { +- shard_make_block_bname(ctx->block_num, shard_inode->gfid, +- block_bname, sizeof(block_bname)); +- inode_unlink(shard_inode, priv->dot_shard_inode, block_bname); +- /* The following unref corresponds to the ref held by +- * inode_link() at the time the shard was created or +- * looked up +- */ +- inode_unref(shard_inode); +- inode_forget(shard_inode, 0); +- } +- } +- UNLOCK(&shard_inode->lock); +- UNLOCK(&priv->lock); ++ return 0; + + out: +- if (anon_fd) +- fd_unref(anon_fd); +- STACK_DESTROY(frame->root); +- return 0; ++ shard_common_resolve_shards(frame, this, local->post_res_handler); ++ return 0; + } + +-int +-shard_initiate_evicted_inode_fsync(xlator_t *this, inode_t *inode) +-{ +- fd_t *anon_fd = NULL; +- call_frame_t *fsync_frame = NULL; ++int shard_lookup_internal_dir_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, ++ int32_t op_errno, inode_t *inode, ++ struct iatt *buf, dict_t *xdata, ++ struct iatt *postparent) { ++ inode_t *link_inode = NULL; ++ shard_local_t *local = NULL; ++ shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie; + +- fsync_frame = create_frame(this, this->ctx->pool); +- if (!fsync_frame) { +- gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, +- "Failed to create new frame " +- "to fsync shard"); +- return -1; +- } ++ local = frame->local; + +- anon_fd = fd_anonymous(inode); +- if (!anon_fd) { +- gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, +- "Failed to create anon fd to" +- " fsync shard"); +- STACK_DESTROY(fsync_frame->root); +- return -1; +- } ++ if (op_ret) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ goto unwind; ++ } ++ ++ if (!IA_ISDIR(buf->ia_type)) { ++ gf_msg(this->name, GF_LOG_CRITICAL, 0, SHARD_MSG_DOT_SHARD_NODIR, ++ "%s already exists and " ++ "is not a directory. Please remove it from all bricks " ++ "and try again", ++ shard_internal_dir_string(type)); ++ local->op_ret = -1; ++ local->op_errno = EIO; ++ goto unwind; ++ } ++ ++ link_inode = shard_link_internal_dir_inode(local, inode, buf, type); ++ if (link_inode != inode) { ++ shard_refresh_internal_dir(frame, this, type); ++ } else { ++ shard_inode_ctx_mark_dir_refreshed(link_inode, this); ++ shard_common_resolve_shards(frame, this, local->post_res_handler); ++ } ++ return 0; + +- STACK_WIND_COOKIE(fsync_frame, shard_evicted_inode_fsync_cbk, anon_fd, +- FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsync, +- anon_fd, 1, NULL); +- return 0; +-} ++unwind: ++ local->post_res_handler(frame, this); ++ return 0; ++} ++ ++int shard_lookup_internal_dir(call_frame_t *frame, xlator_t *this, ++ shard_post_resolve_fop_handler_t post_res_handler, ++ shard_internal_dir_type_t type) { ++ int ret = -1; ++ dict_t *xattr_req = NULL; ++ shard_priv_t *priv = NULL; ++ shard_local_t *local = NULL; ++ uuid_t *gfid = NULL; ++ loc_t *loc = NULL; ++ gf_boolean_t free_gfid = _gf_true; ++ ++ local = frame->local; ++ priv = this->private; ++ local->post_res_handler = post_res_handler; ++ ++ gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t); ++ if (!gfid) ++ goto err; ++ ++ xattr_req = dict_new(); ++ if (!xattr_req) { ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto err; ++ } ++ ++ switch (type) { ++ case SHARD_INTERNAL_DIR_DOT_SHARD: ++ gf_uuid_copy(*gfid, priv->dot_shard_gfid); ++ loc = &local->dot_shard_loc; ++ break; ++ case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: ++ gf_uuid_copy(*gfid, priv->dot_shard_rm_gfid); ++ loc = &local->dot_shard_rm_loc; ++ break; ++ default: ++ bzero(*gfid, sizeof(uuid_t)); ++ break; ++ } ++ ++ ret = dict_set_gfuuid(xattr_req, "gfid-req", *gfid, false); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to set gfid of %s into dict", ++ shard_internal_dir_string(type)); ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto err; ++ } else { ++ free_gfid = _gf_false; ++ } + +-int +-shard_common_resolve_shards(call_frame_t *frame, xlator_t *this, +- shard_post_resolve_fop_handler_t post_res_handler) +-{ +- int i = -1; +- uint32_t shard_idx_iter = 0; +- char path[PATH_MAX] = { +- 0, +- }; +- uuid_t gfid = { +- 0, +- }; +- inode_t *inode = NULL; +- inode_t *res_inode = NULL; +- inode_t *fsync_inode = NULL; +- shard_priv_t *priv = NULL; +- shard_local_t *local = NULL; +- +- priv = this->private; +- local = frame->local; +- local->call_count = 0; +- shard_idx_iter = local->first_block; +- res_inode = local->resolver_base_inode; +- if (res_inode) +- gf_uuid_copy(gfid, res_inode->gfid); +- else +- gf_uuid_copy(gfid, local->base_gfid); ++ STACK_WIND_COOKIE(frame, shard_lookup_internal_dir_cbk, (void *)(long)type, ++ FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup, loc, ++ xattr_req); + +- if ((local->op_ret < 0) || (local->resolve_not)) +- goto out; ++ dict_unref(xattr_req); ++ return 0; + +- while (shard_idx_iter <= local->last_block) { +- i++; +- if (shard_idx_iter == 0) { +- local->inode_list[i] = inode_ref(res_inode); +- shard_idx_iter++; +- continue; +- } ++err: ++ if (xattr_req) ++ dict_unref(xattr_req); ++ if (free_gfid) ++ GF_FREE(gfid); ++ post_res_handler(frame, this); ++ return 0; ++} ++ ++static void shard_inode_ctx_update(inode_t *inode, xlator_t *this, ++ dict_t *xdata, struct iatt *buf) { ++ int ret = 0; ++ uint64_t size = 0; ++ void *bsize = NULL; ++ ++ if (shard_inode_ctx_get_block_size(inode, this, &size)) { ++ /* Fresh lookup */ ++ ret = dict_get_ptr(xdata, GF_XATTR_SHARD_BLOCK_SIZE, &bsize); ++ if (!ret) ++ size = ntoh64(*((uint64_t *)bsize)); ++ /* If the file is sharded, set its block size, otherwise just ++ * set 0. ++ */ + +- shard_make_block_abspath(shard_idx_iter, gfid, path, sizeof(path)); +- +- inode = NULL; +- inode = inode_resolve(this->itable, path); +- if (inode) { +- gf_msg_debug(this->name, 0, +- "Shard %d already " +- "present. gfid=%s. Saving inode for future.", +- shard_idx_iter, uuid_utoa(inode->gfid)); +- local->inode_list[i] = inode; +- /* Let the ref on the inodes that are already present +- * in inode table still be held so that they don't get +- * forgotten by the time the fop reaches the actual +- * write stage. +- */ +- LOCK(&priv->lock); +- { +- fsync_inode = __shard_update_shards_inode_list( +- inode, this, res_inode, shard_idx_iter, gfid); +- } +- UNLOCK(&priv->lock); +- shard_idx_iter++; +- if (fsync_inode) +- shard_initiate_evicted_inode_fsync(this, fsync_inode); +- continue; +- } else { +- local->call_count++; +- shard_idx_iter++; +- } +- } +-out: +- post_res_handler(frame, this); +- return 0; ++ shard_inode_ctx_set(inode, this, buf, size, SHARD_MASK_BLOCK_SIZE); ++ } ++ /* If the file is sharded, also set the remaining attributes, ++ * except for ia_size and ia_blocks. ++ */ ++ if (size) { ++ shard_inode_ctx_set(inode, this, buf, 0, SHARD_LOOKUP_MASK); ++ (void)shard_inode_ctx_invalidate(inode, this, buf); ++ } ++} ++ ++int shard_delete_shards(void *opaque); ++ ++int shard_delete_shards_cbk(int ret, call_frame_t *frame, void *data); ++ ++int shard_start_background_deletion(xlator_t *this) { ++ int ret = 0; ++ gf_boolean_t i_cleanup = _gf_true; ++ shard_priv_t *priv = NULL; ++ call_frame_t *cleanup_frame = NULL; ++ ++ priv = this->private; ++ ++ LOCK(&priv->lock); ++ { ++ switch (priv->bg_del_state) { ++ case SHARD_BG_DELETION_NONE: ++ i_cleanup = _gf_true; ++ priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING; ++ break; ++ case SHARD_BG_DELETION_LAUNCHING: ++ i_cleanup = _gf_false; ++ break; ++ case SHARD_BG_DELETION_IN_PROGRESS: ++ priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING; ++ i_cleanup = _gf_false; ++ break; ++ default: ++ break; ++ } ++ } ++ UNLOCK(&priv->lock); ++ if (!i_cleanup) ++ return 0; ++ ++ cleanup_frame = create_frame(this, this->ctx->pool); ++ if (!cleanup_frame) { ++ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, ++ "Failed to create " ++ "new frame to delete shards"); ++ ret = -ENOMEM; ++ goto err; ++ } ++ ++ set_lk_owner_from_ptr(&cleanup_frame->root->lk_owner, cleanup_frame->root); ++ ++ ret = synctask_new(this->ctx->env, shard_delete_shards, ++ shard_delete_shards_cbk, cleanup_frame, cleanup_frame); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_WARNING, errno, SHARD_MSG_SHARDS_DELETION_FAILED, ++ "failed to create task to do background " ++ "cleanup of shards"); ++ STACK_DESTROY(cleanup_frame->root); ++ goto err; ++ } ++ return 0; ++ ++err: ++ LOCK(&priv->lock); ++ { priv->bg_del_state = SHARD_BG_DELETION_NONE; } ++ UNLOCK(&priv->lock); ++ return ret; + } + +-int +-shard_update_file_size_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, dict_t *dict, +- dict_t *xdata) +-{ +- inode_t *inode = NULL; +- shard_local_t *local = NULL; ++int shard_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, inode_t *inode, ++ struct iatt *buf, dict_t *xdata, struct iatt *postparent) { ++ int ret = -1; ++ shard_priv_t *priv = NULL; ++ gf_boolean_t i_start_cleanup = _gf_false; + +- local = frame->local; ++ priv = this->private; + +- if ((local->fd) && (local->fd->inode)) +- inode = local->fd->inode; +- else if (local->loc.inode) +- inode = local->loc.inode; ++ if (op_ret < 0) ++ goto unwind; + +- if (op_ret < 0) { +- gf_msg(this->name, GF_LOG_ERROR, op_errno, +- SHARD_MSG_UPDATE_FILE_SIZE_FAILED, +- "Update to file size" +- " xattr failed on %s", +- uuid_utoa(inode->gfid)); +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- goto err; +- } ++ if (IA_ISDIR(buf->ia_type)) ++ goto unwind; + +- if (shard_modify_size_and_block_count(&local->postbuf, dict)) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto err; +- } +-err: +- local->post_update_size_handler(frame, this); +- return 0; +-} ++ /* Also, if the file is sharded, get the file size and block cnt xattr, ++ * and store them in the stbuf appropriately. ++ */ + +-int +-shard_set_size_attrs(int64_t size, int64_t block_count, int64_t **size_attr_p) +-{ +- int ret = -1; +- int64_t *size_attr = NULL; ++ if (dict_get(xdata, GF_XATTR_SHARD_FILE_SIZE) && ++ frame->root->pid != GF_CLIENT_PID_GSYNCD) ++ shard_modify_size_and_block_count(buf, xdata); + +- if (!size_attr_p) +- goto out; ++ /* If this was a fresh lookup, there are two possibilities: ++ * 1) If the file is sharded (indicated by the presence of block size ++ * xattr), store this block size, along with rdev and mode in its ++ * inode ctx. ++ * 2) If the file is not sharded, store size along with rdev and mode ++ * (which are anyway don't cares) in inode ctx. Since @ctx_tmp is ++ * already initialised to all zeroes, nothing more needs to be done. ++ */ + +- size_attr = GF_CALLOC(4, sizeof(int64_t), gf_shard_mt_int64_t); +- if (!size_attr) +- goto out; ++ (void)shard_inode_ctx_update(inode, this, xdata, buf); + +- size_attr[0] = hton64(size); +- /* As sharding evolves, it _may_ be necessary to embed more pieces of +- * information within the same xattr. So allocating slots for them in +- * advance. For now, only bytes 0-63 and 128-191 which would make up the +- * current size and block count respectively of the file are valid. +- */ +- size_attr[2] = hton64(block_count); ++ LOCK(&priv->lock); ++ { ++ if (priv->first_lookup_done == _gf_false) { ++ priv->first_lookup_done = _gf_true; ++ i_start_cleanup = _gf_true; ++ } ++ } ++ UNLOCK(&priv->lock); + +- *size_attr_p = size_attr; ++ if (!i_start_cleanup) ++ goto unwind; + +- ret = 0; +-out: +- return ret; ++ ret = shard_start_background_deletion(this); ++ if (ret < 0) { ++ LOCK(&priv->lock); ++ { priv->first_lookup_done = _gf_false; } ++ UNLOCK(&priv->lock); ++ } ++ ++unwind: ++ SHARD_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, buf, xdata, ++ postparent); ++ return 0; + } + +-int +-shard_update_file_size(call_frame_t *frame, xlator_t *this, fd_t *fd, +- loc_t *loc, shard_post_update_size_fop_handler_t handler) +-{ +- int ret = -1; +- int64_t *size_attr = NULL; +- int64_t delta_blocks = 0; +- inode_t *inode = NULL; +- shard_local_t *local = NULL; +- dict_t *xattr_req = NULL; ++int shard_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, ++ dict_t *xattr_req) { ++ int ret = -1; ++ int32_t op_errno = ENOMEM; ++ uint64_t block_size = 0; ++ shard_local_t *local = NULL; + +- local = frame->local; +- local->post_update_size_handler = handler; ++ this->itable = loc->inode->table; ++ if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { ++ SHARD_ENTRY_FOP_CHECK(loc, op_errno, err); ++ } + +- xattr_req = dict_new(); +- if (!xattr_req) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto out; +- } ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; + +- if (fd) +- inode = fd->inode; +- else +- inode = loc->inode; ++ frame->local = local; + +- /* If both size and block count have not changed, then skip the xattrop. +- */ +- delta_blocks = GF_ATOMIC_GET(local->delta_blocks); +- if ((local->delta_size + local->hole_size == 0) && (delta_blocks == 0)) { +- goto out; +- } ++ loc_copy(&local->loc, loc); + +- ret = shard_set_size_attrs(local->delta_size + local->hole_size, +- delta_blocks, &size_attr); ++ local->xattr_req = xattr_req ? dict_ref(xattr_req) : dict_new(); ++ if (!local->xattr_req) ++ goto err; ++ ++ if (shard_inode_ctx_get_block_size(loc->inode, this, &block_size)) { ++ ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0); + if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_SIZE_SET_FAILED, +- "Failed to set size attrs for %s", uuid_utoa(inode->gfid)); +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto out; ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to set dict" ++ " value: key:%s for path %s", ++ GF_XATTR_SHARD_BLOCK_SIZE, loc->path); ++ goto err; + } ++ } + +- ret = dict_set_bin(xattr_req, GF_XATTR_SHARD_FILE_SIZE, size_attr, 8 * 4); ++ if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { ++ ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_FILE_SIZE, 8 * 4); + if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, +- "Failed to set key %s into dict. gfid=%s", +- GF_XATTR_SHARD_FILE_SIZE, uuid_utoa(inode->gfid)); +- GF_FREE(size_attr); +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto out; ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to set dict value: key:%s for path %s.", ++ GF_XATTR_SHARD_FILE_SIZE, loc->path); ++ goto err; + } ++ } + +- if (fd) +- STACK_WIND(frame, shard_update_file_size_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->fxattrop, fd, +- GF_XATTROP_ADD_ARRAY64, xattr_req, NULL); +- else +- STACK_WIND(frame, shard_update_file_size_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->xattrop, loc, +- GF_XATTROP_ADD_ARRAY64, xattr_req, NULL); +- +- dict_unref(xattr_req); +- return 0; +- +-out: +- if (xattr_req) +- dict_unref(xattr_req); +- handler(frame, this); +- return 0; +-} +- +-static inode_t * +-shard_link_internal_dir_inode(shard_local_t *local, inode_t *inode, +- struct iatt *buf, shard_internal_dir_type_t type) +-{ +- inode_t *linked_inode = NULL; +- shard_priv_t *priv = NULL; +- char *bname = NULL; +- inode_t **priv_inode = NULL; +- inode_t *parent = NULL; +- +- priv = THIS->private; +- +- switch (type) { +- case SHARD_INTERNAL_DIR_DOT_SHARD: +- bname = GF_SHARD_DIR; +- priv_inode = &priv->dot_shard_inode; +- parent = inode->table->root; +- break; +- case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: +- bname = GF_SHARD_REMOVE_ME_DIR; +- priv_inode = &priv->dot_shard_rm_inode; +- parent = priv->dot_shard_inode; +- break; +- default: +- break; +- } ++ if ((xattr_req) && (dict_get(xattr_req, GF_CONTENT_KEY))) ++ dict_del(xattr_req, GF_CONTENT_KEY); + +- linked_inode = inode_link(inode, parent, bname, buf); +- inode_lookup(linked_inode); +- *priv_inode = linked_inode; +- return linked_inode; ++ STACK_WIND(frame, shard_lookup_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->lookup, loc, local->xattr_req); ++ return 0; ++err: ++ shard_common_failure_unwind(GF_FOP_LOOKUP, frame, -1, op_errno); ++ return 0; + } + +-int +-shard_refresh_internal_dir_cbk(call_frame_t *frame, void *cookie, ++int shard_lookup_base_file_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + inode_t *inode, struct iatt *buf, dict_t *xdata, +- struct iatt *postparent) +-{ +- shard_local_t *local = NULL; +- inode_t *linked_inode = NULL; +- shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie; +- +- local = frame->local; +- +- if (op_ret) { +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- goto out; +- } ++ struct iatt *postparent) { ++ int ret = -1; ++ int32_t mask = SHARD_INODE_WRITE_MASK; ++ shard_local_t *local = NULL; ++ shard_inode_ctx_t ctx = { ++ 0, ++ }; ++ ++ local = frame->local; ++ ++ if (op_ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, op_errno, ++ SHARD_MSG_BASE_FILE_LOOKUP_FAILED, "Lookup on base file" ++ " failed : %s", ++ loc_gfid_utoa(&(local->loc))); ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ goto unwind; ++ } ++ ++ local->prebuf = *buf; ++ if (shard_modify_size_and_block_count(&local->prebuf, xdata)) { ++ local->op_ret = -1; ++ local->op_errno = EINVAL; ++ goto unwind; ++ } ++ ++ if (shard_inode_ctx_get_all(inode, this, &ctx)) ++ mask = SHARD_ALL_MASK; ++ ++ ret = shard_inode_ctx_set(inode, this, &local->prebuf, 0, ++ (mask | SHARD_MASK_REFRESH_RESET)); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, SHARD_MSG_INODE_CTX_SET_FAILED, 0, ++ "Failed to set inode" ++ " write params into inode ctx for %s", ++ uuid_utoa(buf->ia_gfid)); ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto unwind; ++ } ++ ++unwind: ++ local->handler(frame, this); ++ return 0; ++} ++ ++int shard_lookup_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc, ++ shard_post_fop_handler_t handler) { ++ int ret = -1; ++ shard_local_t *local = NULL; ++ dict_t *xattr_req = NULL; ++ gf_boolean_t need_refresh = _gf_false; ++ ++ local = frame->local; ++ local->handler = handler; ++ ++ ret = shard_inode_ctx_fill_iatt_from_cache(loc->inode, this, &local->prebuf, ++ &need_refresh); ++ /* By this time, inode ctx should have been created either in create, ++ * mknod, readdirp or lookup. If not it is a bug! ++ */ ++ if ((ret == 0) && (need_refresh == _gf_false)) { ++ gf_msg_debug(this->name, 0, "Skipping lookup on base file: %s" ++ "Serving prebuf off the inode ctx cache", ++ uuid_utoa(loc->gfid)); ++ goto out; ++ } ++ ++ xattr_req = dict_new(); ++ if (!xattr_req) { ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto out; ++ } ++ ++ SHARD_MD_READ_FOP_INIT_REQ_DICT(this, xattr_req, loc->gfid, local, out); ++ ++ STACK_WIND(frame, shard_lookup_base_file_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->lookup, loc, xattr_req); ++ ++ dict_unref(xattr_req); ++ return 0; + +- /* To-Do: Fix refcount increment per call to +- * shard_link_internal_dir_inode(). +- */ +- linked_inode = shard_link_internal_dir_inode(local, inode, buf, type); +- shard_inode_ctx_mark_dir_refreshed(linked_inode, this); + out: +- shard_common_resolve_shards(frame, this, local->post_res_handler); +- return 0; ++ if (xattr_req) ++ dict_unref(xattr_req); ++ handler(frame, this); ++ return 0; + } + +-int +-shard_refresh_internal_dir(call_frame_t *frame, xlator_t *this, +- shard_internal_dir_type_t type) +-{ +- loc_t loc = { +- 0, +- }; +- inode_t *inode = NULL; +- shard_priv_t *priv = NULL; +- shard_local_t *local = NULL; +- uuid_t gfid = { +- 0, +- }; ++int shard_post_fstat_handler(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = NULL; + +- local = frame->local; +- priv = this->private; +- +- switch (type) { +- case SHARD_INTERNAL_DIR_DOT_SHARD: +- gf_uuid_copy(gfid, priv->dot_shard_gfid); +- break; +- case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: +- gf_uuid_copy(gfid, priv->dot_shard_rm_gfid); +- break; +- default: +- break; +- } ++ local = frame->local; + +- inode = inode_find(this->itable, gfid); ++ if (local->op_ret >= 0) ++ shard_inode_ctx_set(local->fd->inode, this, &local->prebuf, 0, ++ SHARD_LOOKUP_MASK); + +- if (!shard_inode_ctx_needs_lookup(inode, this)) { +- local->op_ret = 0; +- goto out; +- } ++ SHARD_STACK_UNWIND(fstat, frame, local->op_ret, local->op_errno, ++ &local->prebuf, local->xattr_rsp); ++ return 0; ++} + +- /* Plain assignment because the ref is already taken above through +- * call to inode_find() +- */ +- loc.inode = inode; +- gf_uuid_copy(loc.gfid, gfid); ++int shard_post_stat_handler(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = NULL; + +- STACK_WIND_COOKIE(frame, shard_refresh_internal_dir_cbk, (void *)(long)type, +- FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup, &loc, +- NULL); +- loc_wipe(&loc); ++ local = frame->local; + +- return 0; ++ if (local->op_ret >= 0) ++ shard_inode_ctx_set(local->loc.inode, this, &local->prebuf, 0, ++ SHARD_LOOKUP_MASK); + +-out: +- shard_common_resolve_shards(frame, this, local->post_res_handler); +- return 0; ++ SHARD_STACK_UNWIND(stat, frame, local->op_ret, local->op_errno, ++ &local->prebuf, local->xattr_rsp); ++ return 0; + } + +-int +-shard_lookup_internal_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, inode_t *inode, +- struct iatt *buf, dict_t *xdata, +- struct iatt *postparent) +-{ +- inode_t *link_inode = NULL; +- shard_local_t *local = NULL; +- shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie; ++int shard_common_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, struct iatt *buf, ++ dict_t *xdata) { ++ inode_t *inode = NULL; ++ shard_local_t *local = NULL; + +- local = frame->local; ++ local = frame->local; + +- if (op_ret) { +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- goto unwind; +- } ++ if (op_ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_STAT_FAILED, ++ "stat failed: %s", local->fd ? uuid_utoa(local->fd->inode->gfid) ++ : uuid_utoa((local->loc.inode)->gfid)); ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ goto unwind; ++ } + +- if (!IA_ISDIR(buf->ia_type)) { +- gf_msg(this->name, GF_LOG_CRITICAL, 0, SHARD_MSG_DOT_SHARD_NODIR, +- "%s already exists and " +- "is not a directory. Please remove it from all bricks " +- "and try again", +- shard_internal_dir_string(type)); +- local->op_ret = -1; +- local->op_errno = EIO; +- goto unwind; +- } ++ local->prebuf = *buf; ++ if (shard_modify_size_and_block_count(&local->prebuf, xdata)) { ++ local->op_ret = -1; ++ local->op_errno = EINVAL; ++ goto unwind; ++ } ++ local->xattr_rsp = dict_ref(xdata); + +- link_inode = shard_link_internal_dir_inode(local, inode, buf, type); +- if (link_inode != inode) { +- shard_refresh_internal_dir(frame, this, type); +- } else { +- shard_inode_ctx_mark_dir_refreshed(link_inode, this); +- shard_common_resolve_shards(frame, this, local->post_res_handler); +- } +- return 0; ++ if (local->loc.inode) ++ inode = local->loc.inode; ++ else ++ inode = local->fd->inode; ++ ++ shard_inode_ctx_invalidate(inode, this, &local->prebuf); + + unwind: +- local->post_res_handler(frame, this); +- return 0; ++ local->handler(frame, this); ++ return 0; + } + +-int +-shard_lookup_internal_dir(call_frame_t *frame, xlator_t *this, +- shard_post_resolve_fop_handler_t post_res_handler, +- shard_internal_dir_type_t type) +-{ +- int ret = -1; +- dict_t *xattr_req = NULL; +- shard_priv_t *priv = NULL; +- shard_local_t *local = NULL; +- uuid_t *gfid = NULL; +- loc_t *loc = NULL; +- gf_boolean_t free_gfid = _gf_true; +- +- local = frame->local; +- priv = this->private; +- local->post_res_handler = post_res_handler; +- +- gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t); +- if (!gfid) +- goto err; +- +- xattr_req = dict_new(); +- if (!xattr_req) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto err; +- } +- +- switch (type) { +- case SHARD_INTERNAL_DIR_DOT_SHARD: +- gf_uuid_copy(*gfid, priv->dot_shard_gfid); +- loc = &local->dot_shard_loc; +- break; +- case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: +- gf_uuid_copy(*gfid, priv->dot_shard_rm_gfid); +- loc = &local->dot_shard_rm_loc; +- break; +- default: +- bzero(*gfid, sizeof(uuid_t)); +- break; +- } ++int shard_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { ++ int ret = -1; ++ uint64_t block_size = 0; ++ shard_local_t *local = NULL; + +- ret = dict_set_gfuuid(xattr_req, "gfid-req", *gfid, false); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, +- "Failed to set gfid of %s into dict", +- shard_internal_dir_string(type)); +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto err; +- } else { +- free_gfid = _gf_false; +- } ++ if ((IA_ISDIR(loc->inode->ia_type)) || (IA_ISLNK(loc->inode->ia_type))) { ++ STACK_WIND(frame, default_stat_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->stat, loc, xdata); ++ return 0; ++ } + +- STACK_WIND_COOKIE(frame, shard_lookup_internal_dir_cbk, (void *)(long)type, +- FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup, loc, +- xattr_req); ++ ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, ++ "Failed to get block " ++ "size from inode ctx of %s", ++ uuid_utoa(loc->inode->gfid)); ++ goto err; ++ } + +- dict_unref(xattr_req); ++ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { ++ STACK_WIND(frame, default_stat_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->stat, loc, xdata); + return 0; ++ } ++ ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; + ++ frame->local = local; ++ ++ local->handler = shard_post_stat_handler; ++ loc_copy(&local->loc, loc); ++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); ++ if (!local->xattr_req) ++ goto err; ++ ++ SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, local->loc.gfid, ++ local, err); ++ ++ STACK_WIND(frame, shard_common_stat_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->stat, loc, local->xattr_req); ++ return 0; + err: +- if (xattr_req) +- dict_unref(xattr_req); +- if (free_gfid) +- GF_FREE(gfid); +- post_res_handler(frame, this); +- return 0; ++ shard_common_failure_unwind(GF_FOP_STAT, frame, -1, ENOMEM); ++ return 0; + } + +-static void +-shard_inode_ctx_update(inode_t *inode, xlator_t *this, dict_t *xdata, +- struct iatt *buf) +-{ +- int ret = 0; +- uint64_t size = 0; +- void *bsize = NULL; +- +- if (shard_inode_ctx_get_block_size(inode, this, &size)) { +- /* Fresh lookup */ +- ret = dict_get_ptr(xdata, GF_XATTR_SHARD_BLOCK_SIZE, &bsize); +- if (!ret) +- size = ntoh64(*((uint64_t *)bsize)); +- /* If the file is sharded, set its block size, otherwise just +- * set 0. +- */ ++int shard_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) { ++ int ret = -1; ++ uint64_t block_size = 0; ++ shard_local_t *local = NULL; + +- shard_inode_ctx_set(inode, this, buf, size, SHARD_MASK_BLOCK_SIZE); +- } +- /* If the file is sharded, also set the remaining attributes, +- * except for ia_size and ia_blocks. +- */ +- if (size) { +- shard_inode_ctx_set(inode, this, buf, 0, SHARD_LOOKUP_MASK); +- (void)shard_inode_ctx_invalidate(inode, this, buf); +- } +-} ++ if ((IA_ISDIR(fd->inode->ia_type)) || (IA_ISLNK(fd->inode->ia_type))) { ++ STACK_WIND(frame, default_fstat_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fstat, fd, xdata); ++ return 0; ++ } + +-int +-shard_delete_shards(void *opaque); ++ ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, ++ "Failed to get block " ++ "size from inode ctx of %s", ++ uuid_utoa(fd->inode->gfid)); ++ goto err; ++ } + +-int +-shard_delete_shards_cbk(int ret, call_frame_t *frame, void *data); ++ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { ++ STACK_WIND(frame, default_fstat_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fstat, fd, xdata); ++ return 0; ++ } + +-int +-shard_start_background_deletion(xlator_t *this) +-{ +- int ret = 0; +- gf_boolean_t i_cleanup = _gf_true; +- shard_priv_t *priv = NULL; +- call_frame_t *cleanup_frame = NULL; ++ if (!this->itable) ++ this->itable = fd->inode->table; + +- priv = this->private; ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; + +- LOCK(&priv->lock); +- { +- switch (priv->bg_del_state) { +- case SHARD_BG_DELETION_NONE: +- i_cleanup = _gf_true; +- priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING; +- break; +- case SHARD_BG_DELETION_LAUNCHING: +- i_cleanup = _gf_false; +- break; +- case SHARD_BG_DELETION_IN_PROGRESS: +- priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING; +- i_cleanup = _gf_false; +- break; +- default: +- break; +- } +- } +- UNLOCK(&priv->lock); +- if (!i_cleanup) +- return 0; +- +- cleanup_frame = create_frame(this, this->ctx->pool); +- if (!cleanup_frame) { +- gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, +- "Failed to create " +- "new frame to delete shards"); +- ret = -ENOMEM; +- goto err; +- } ++ frame->local = local; + +- set_lk_owner_from_ptr(&cleanup_frame->root->lk_owner, cleanup_frame->root); ++ local->handler = shard_post_fstat_handler; ++ local->fd = fd_ref(fd); ++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); ++ if (!local->xattr_req) ++ goto err; + +- ret = synctask_new(this->ctx->env, shard_delete_shards, +- shard_delete_shards_cbk, cleanup_frame, cleanup_frame); +- if (ret < 0) { +- gf_msg(this->name, GF_LOG_WARNING, errno, +- SHARD_MSG_SHARDS_DELETION_FAILED, +- "failed to create task to do background " +- "cleanup of shards"); +- STACK_DESTROY(cleanup_frame->root); +- goto err; +- } +- return 0; ++ SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid, ++ local, err); + ++ STACK_WIND(frame, shard_common_stat_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fstat, fd, local->xattr_req); ++ return 0; + err: +- LOCK(&priv->lock); +- { +- priv->bg_del_state = SHARD_BG_DELETION_NONE; +- } +- UNLOCK(&priv->lock); +- return ret; ++ shard_common_failure_unwind(GF_FOP_FSTAT, frame, -1, ENOMEM); ++ return 0; + } + +-int +-shard_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, inode_t *inode, +- struct iatt *buf, dict_t *xdata, struct iatt *postparent) +-{ +- int ret = -1; +- shard_priv_t *priv = NULL; +- gf_boolean_t i_start_cleanup = _gf_false; ++int shard_post_update_size_truncate_handler(call_frame_t *frame, ++ xlator_t *this) { ++ shard_local_t *local = NULL; + +- priv = this->private; ++ local = frame->local; + +- if (op_ret < 0) +- goto unwind; ++ if (local->fop == GF_FOP_TRUNCATE) ++ SHARD_STACK_UNWIND(truncate, frame, local->op_ret, local->op_errno, ++ &local->prebuf, &local->postbuf, NULL); ++ else ++ SHARD_STACK_UNWIND(ftruncate, frame, local->op_ret, local->op_errno, ++ &local->prebuf, &local->postbuf, NULL); ++ return 0; ++} + +- if (IA_ISDIR(buf->ia_type)) +- goto unwind; ++int shard_truncate_last_shard_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, ++ int32_t op_errno, struct iatt *prebuf, ++ struct iatt *postbuf, dict_t *xdata) { ++ inode_t *inode = NULL; ++ int64_t delta_blocks = 0; ++ shard_local_t *local = NULL; + +- /* Also, if the file is sharded, get the file size and block cnt xattr, +- * and store them in the stbuf appropriately. +- */ ++ local = frame->local; + +- if (dict_get(xdata, GF_XATTR_SHARD_FILE_SIZE) && +- frame->root->pid != GF_CLIENT_PID_GSYNCD) +- shard_modify_size_and_block_count(buf, xdata); +- +- /* If this was a fresh lookup, there are two possibilities: +- * 1) If the file is sharded (indicated by the presence of block size +- * xattr), store this block size, along with rdev and mode in its +- * inode ctx. +- * 2) If the file is not sharded, store size along with rdev and mode +- * (which are anyway don't cares) in inode ctx. Since @ctx_tmp is +- * already initialised to all zeroes, nothing more needs to be done. +- */ ++ SHARD_UNSET_ROOT_FS_ID(frame, local); + +- (void)shard_inode_ctx_update(inode, this, xdata, buf); ++ inode = (local->fop == GF_FOP_TRUNCATE) ? local->loc.inode : local->fd->inode; ++ if (op_ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, op_errno, ++ SHARD_MSG_TRUNCATE_LAST_SHARD_FAILED, "truncate on last" ++ " shard failed : %s", ++ uuid_utoa(inode->gfid)); ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ goto err; ++ } ++ ++ local->postbuf.ia_size = local->offset; ++ /* Let the delta be negative. We want xattrop to do subtraction */ ++ local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size; ++ delta_blocks = GF_ATOMIC_ADD(local->delta_blocks, ++ postbuf->ia_blocks - prebuf->ia_blocks); ++ GF_ASSERT(delta_blocks <= 0); ++ local->postbuf.ia_blocks += delta_blocks; ++ local->hole_size = 0; ++ ++ shard_inode_ctx_set(inode, this, &local->postbuf, 0, SHARD_MASK_TIMES); ++ shard_update_file_size(frame, this, NULL, &local->loc, ++ shard_post_update_size_truncate_handler); ++ return 0; ++err: ++ shard_common_failure_unwind(local->fop, frame, local->op_ret, ++ local->op_errno); ++ return 0; ++} ++ ++int shard_truncate_last_shard(call_frame_t *frame, xlator_t *this, ++ inode_t *inode) { ++ size_t last_shard_size_after = 0; ++ loc_t loc = { ++ 0, ++ }; ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ ++ /* A NULL inode could be due to the fact that the last shard which ++ * needs to be truncated does not exist due to it lying in a hole ++ * region. So the only thing left to do in that case would be an ++ * update to file size xattr. ++ */ ++ if (!inode) { ++ gf_msg_debug(this->name, 0, ++ "Last shard to be truncated absent" ++ " in backend: %s. Directly proceeding to update " ++ "file size", ++ uuid_utoa(inode->gfid)); ++ shard_update_file_size(frame, this, NULL, &local->loc, ++ shard_post_update_size_truncate_handler); ++ return 0; ++ } + +- LOCK(&priv->lock); +- { +- if (priv->first_lookup_done == _gf_false) { +- priv->first_lookup_done = _gf_true; +- i_start_cleanup = _gf_true; +- } +- } +- UNLOCK(&priv->lock); ++ SHARD_SET_ROOT_FS_ID(frame, local); + +- if (!i_start_cleanup) +- goto unwind; ++ loc.inode = inode_ref(inode); ++ gf_uuid_copy(loc.gfid, inode->gfid); + +- ret = shard_start_background_deletion(this); +- if (ret < 0) { +- LOCK(&priv->lock); +- { +- priv->first_lookup_done = _gf_false; +- } +- UNLOCK(&priv->lock); +- } ++ last_shard_size_after = (local->offset % local->block_size); + +-unwind: +- SHARD_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, buf, xdata, +- postparent); +- return 0; ++ STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->truncate, &loc, last_shard_size_after, ++ NULL); ++ loc_wipe(&loc); ++ return 0; + } + +-int +-shard_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) +-{ +- int ret = -1; +- int32_t op_errno = ENOMEM; +- uint64_t block_size = 0; +- shard_local_t *local = NULL; +- +- this->itable = loc->inode->table; +- if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { +- SHARD_ENTRY_FOP_CHECK(loc, op_errno, err); +- } ++void shard_unlink_block_inode(shard_local_t *local, int shard_block_num); + +- local = mem_get0(this->local_pool); +- if (!local) +- goto err; ++int shard_truncate_htol_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, ++ struct iatt *preparent, struct iatt *postparent, ++ dict_t *xdata) { ++ int ret = 0; ++ int call_count = 0; ++ int shard_block_num = (long)cookie; ++ uint64_t block_count = 0; ++ shard_local_t *local = NULL; + +- frame->local = local; ++ local = frame->local; + +- loc_copy(&local->loc, loc); ++ if (op_ret < 0) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ goto done; ++ } ++ ret = dict_get_uint64(xdata, GF_GET_FILE_BLOCK_COUNT, &block_count); ++ if (!ret) { ++ GF_ATOMIC_SUB(local->delta_blocks, block_count); ++ } else { ++ /* dict_get failed possibly due to a heterogeneous cluster? */ ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to get key %s from dict during truncate of gfid %s", ++ GF_GET_FILE_BLOCK_COUNT, ++ uuid_utoa(local->resolver_base_inode->gfid)); ++ } ++ ++ shard_unlink_block_inode(local, shard_block_num); ++done: ++ call_count = shard_call_count_return(frame); ++ if (call_count == 0) { ++ SHARD_UNSET_ROOT_FS_ID(frame, local); ++ shard_truncate_last_shard(frame, this, local->inode_list[0]); ++ } ++ return 0; ++} ++ ++int shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode) { ++ int i = 1; ++ int ret = -1; ++ int call_count = 0; ++ uint32_t cur_block = 0; ++ uint32_t last_block = 0; ++ char path[PATH_MAX] = { ++ 0, ++ }; ++ char *bname = NULL; ++ loc_t loc = { ++ 0, ++ }; ++ gf_boolean_t wind_failed = _gf_false; ++ shard_local_t *local = NULL; ++ shard_priv_t *priv = NULL; ++ dict_t *xdata_req = NULL; ++ ++ local = frame->local; ++ priv = this->private; ++ ++ cur_block = local->first_block + 1; ++ last_block = local->last_block; ++ ++ /* Determine call count */ ++ for (i = 1; i < local->num_blocks; i++) { ++ if (!local->inode_list[i]) ++ continue; ++ call_count++; ++ } ++ ++ if (!call_count) { ++ /* Call count = 0 implies that all of the shards that need to be ++ * unlinked do not exist. So shard xlator would now proceed to ++ * do the final truncate + size updates. ++ */ ++ gf_msg_debug(this->name, 0, "Shards to be unlinked as part of " ++ "truncate absent in backend: %s. Directly " ++ "proceeding to update file size", ++ uuid_utoa(inode->gfid)); ++ local->postbuf.ia_size = local->offset; ++ local->postbuf.ia_blocks = local->prebuf.ia_blocks; ++ local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size; ++ GF_ATOMIC_INIT(local->delta_blocks, 0); ++ local->hole_size = 0; ++ shard_update_file_size(frame, this, local->fd, &local->loc, ++ shard_post_update_size_truncate_handler); ++ return 0; ++ } + +- local->xattr_req = xattr_req ? dict_ref(xattr_req) : dict_new(); +- if (!local->xattr_req) +- goto err; ++ local->call_count = call_count; ++ i = 1; ++ xdata_req = dict_new(); ++ if (!xdata_req) { ++ shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); ++ return 0; ++ } ++ ret = dict_set_uint64(xdata_req, GF_GET_FILE_BLOCK_COUNT, 8 * 8); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to set key %s into dict during truncate of %s", ++ GF_GET_FILE_BLOCK_COUNT, ++ uuid_utoa(local->resolver_base_inode->gfid)); ++ dict_unref(xdata_req); ++ shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); ++ return 0; ++ } + +- if (shard_inode_ctx_get_block_size(loc->inode, this, &block_size)) { +- ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0); +- if (ret) { +- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, +- "Failed to set dict" +- " value: key:%s for path %s", +- GF_XATTR_SHARD_BLOCK_SIZE, loc->path); +- goto err; +- } ++ SHARD_SET_ROOT_FS_ID(frame, local); ++ while (cur_block <= last_block) { ++ if (!local->inode_list[i]) { ++ cur_block++; ++ i++; ++ continue; ++ } ++ if (wind_failed) { ++ shard_truncate_htol_cbk(frame, (void *)(long)cur_block, this, -1, ENOMEM, ++ NULL, NULL, NULL); ++ goto next; + } + +- if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { +- ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_FILE_SIZE, +- 8 * 4); +- if (ret) { +- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, +- "Failed to set dict value: key:%s for path %s.", +- GF_XATTR_SHARD_FILE_SIZE, loc->path); +- goto err; +- } ++ shard_make_block_abspath(cur_block, inode->gfid, path, sizeof(path)); ++ bname = strrchr(path, '/') + 1; ++ loc.parent = inode_ref(priv->dot_shard_inode); ++ ret = inode_path(loc.parent, bname, (char **)&(loc.path)); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, ++ "Inode path failed" ++ " on %s. Base file gfid = %s", ++ bname, uuid_utoa(inode->gfid)); ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ loc_wipe(&loc); ++ wind_failed = _gf_true; ++ shard_truncate_htol_cbk(frame, (void *)(long)cur_block, this, -1, ENOMEM, ++ NULL, NULL, NULL); ++ goto next; + } ++ loc.name = strrchr(loc.path, '/'); ++ if (loc.name) ++ loc.name++; ++ loc.inode = inode_ref(local->inode_list[i]); + +- if ((xattr_req) && (dict_get(xattr_req, GF_CONTENT_KEY))) +- dict_del(xattr_req, GF_CONTENT_KEY); ++ STACK_WIND_COOKIE(frame, shard_truncate_htol_cbk, (void *)(long)cur_block, ++ FIRST_CHILD(this), FIRST_CHILD(this)->fops->unlink, &loc, ++ 0, xdata_req); ++ loc_wipe(&loc); ++ next: ++ i++; ++ cur_block++; ++ if (!--call_count) ++ break; ++ } ++ dict_unref(xdata_req); ++ return 0; ++} + +- STACK_WIND(frame, shard_lookup_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->lookup, loc, local->xattr_req); +- return 0; +-err: +- shard_common_failure_unwind(GF_FOP_LOOKUP, frame, -1, op_errno); ++int shard_truncate_do(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ ++ if (local->num_blocks == 1) { ++ /* This means that there are no shards to be unlinked. ++ * The fop boils down to truncating the last shard, updating ++ * the size and unwinding. ++ */ ++ shard_truncate_last_shard(frame, this, local->inode_list[0]); + return 0; ++ } else { ++ shard_truncate_htol(frame, this, local->loc.inode); ++ } ++ return 0; + } + +-int +-shard_lookup_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, inode_t *inode, +- struct iatt *buf, dict_t *xdata, +- struct iatt *postparent) +-{ +- int ret = -1; +- int32_t mask = SHARD_INODE_WRITE_MASK; +- shard_local_t *local = NULL; +- shard_inode_ctx_t ctx = { +- 0, +- }; +- +- local = frame->local; ++int shard_post_lookup_shards_truncate_handler(call_frame_t *frame, ++ xlator_t *this) { ++ shard_local_t *local = NULL; + +- if (op_ret < 0) { +- gf_msg(this->name, GF_LOG_ERROR, op_errno, +- SHARD_MSG_BASE_FILE_LOOKUP_FAILED, +- "Lookup on base file" +- " failed : %s", +- loc_gfid_utoa(&(local->loc))); +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- goto unwind; +- } ++ local = frame->local; + +- local->prebuf = *buf; +- if (shard_modify_size_and_block_count(&local->prebuf, xdata)) { +- local->op_ret = -1; +- local->op_errno = EINVAL; +- goto unwind; ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(local->fop, frame, local->op_ret, ++ local->op_errno); ++ return 0; ++ } ++ ++ shard_truncate_do(frame, this); ++ return 0; ++} ++ ++void shard_link_block_inode(shard_local_t *local, int block_num, inode_t *inode, ++ struct iatt *buf) { ++ int list_index = 0; ++ char block_bname[256] = { ++ 0, ++ }; ++ uuid_t gfid = { ++ 0, ++ }; ++ inode_t *linked_inode = NULL; ++ xlator_t *this = NULL; ++ inode_t *fsync_inode = NULL; ++ shard_priv_t *priv = NULL; ++ inode_t *base_inode = NULL; ++ ++ this = THIS; ++ priv = this->private; ++ if (local->loc.inode) { ++ gf_uuid_copy(gfid, local->loc.inode->gfid); ++ base_inode = local->loc.inode; ++ } else if (local->resolver_base_inode) { ++ gf_uuid_copy(gfid, local->resolver_base_inode->gfid); ++ base_inode = local->resolver_base_inode; ++ } else { ++ gf_uuid_copy(gfid, local->base_gfid); ++ } ++ ++ shard_make_block_bname(block_num, gfid, block_bname, sizeof(block_bname)); ++ ++ shard_inode_ctx_set(inode, this, buf, 0, SHARD_LOOKUP_MASK); ++ linked_inode = inode_link(inode, priv->dot_shard_inode, block_bname, buf); ++ inode_lookup(linked_inode); ++ list_index = block_num - local->first_block; ++ local->inode_list[list_index] = linked_inode; ++ ++ LOCK(&priv->lock); ++ { ++ fsync_inode = __shard_update_shards_inode_list(linked_inode, this, ++ base_inode, block_num, gfid); ++ } ++ UNLOCK(&priv->lock); ++ if (fsync_inode) ++ shard_initiate_evicted_inode_fsync(this, fsync_inode); ++} ++ ++int shard_common_lookup_shards_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, ++ int32_t op_errno, inode_t *inode, ++ struct iatt *buf, dict_t *xdata, ++ struct iatt *postparent) { ++ int call_count = 0; ++ int shard_block_num = (long)cookie; ++ uuid_t gfid = { ++ 0, ++ }; ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ if (local->resolver_base_inode) ++ gf_uuid_copy(gfid, local->resolver_base_inode->gfid); ++ else ++ gf_uuid_copy(gfid, local->base_gfid); ++ ++ if (op_ret < 0) { ++ /* Ignore absence of shards in the backend in truncate fop. */ ++ switch (local->fop) { ++ case GF_FOP_TRUNCATE: ++ case GF_FOP_FTRUNCATE: ++ case GF_FOP_RENAME: ++ case GF_FOP_UNLINK: ++ if (op_errno == ENOENT) ++ goto done; ++ break; ++ case GF_FOP_WRITE: ++ case GF_FOP_READ: ++ case GF_FOP_ZEROFILL: ++ case GF_FOP_DISCARD: ++ case GF_FOP_FALLOCATE: ++ if ((!local->first_lookup_done) && (op_errno == ENOENT)) { ++ LOCK(&frame->lock); ++ { local->create_count++; } ++ UNLOCK(&frame->lock); ++ goto done; ++ } ++ break; ++ default: ++ break; + } + +- if (shard_inode_ctx_get_all(inode, this, &ctx)) +- mask = SHARD_ALL_MASK; ++ /* else */ ++ gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_LOOKUP_SHARD_FAILED, ++ "Lookup on shard %d " ++ "failed. Base file gfid = %s", ++ shard_block_num, uuid_utoa(gfid)); ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ goto done; ++ } + +- ret = shard_inode_ctx_set(inode, this, &local->prebuf, 0, +- (mask | SHARD_MASK_REFRESH_RESET)); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, SHARD_MSG_INODE_CTX_SET_FAILED, 0, +- "Failed to set inode" +- " write params into inode ctx for %s", +- uuid_utoa(buf->ia_gfid)); +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto unwind; +- } ++ shard_link_block_inode(local, shard_block_num, inode, buf); + +-unwind: +- local->handler(frame, this); ++done: ++ if (local->lookup_shards_barriered) { ++ syncbarrier_wake(&local->barrier); + return 0; ++ } else { ++ call_count = shard_call_count_return(frame); ++ if (call_count == 0) { ++ if (!local->first_lookup_done) ++ local->first_lookup_done = _gf_true; ++ local->pls_fop_handler(frame, this); ++ } ++ } ++ return 0; + } + +-int +-shard_lookup_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc, +- shard_post_fop_handler_t handler) +-{ +- int ret = -1; +- shard_local_t *local = NULL; +- dict_t *xattr_req = NULL; +- gf_boolean_t need_refresh = _gf_false; ++dict_t *shard_create_gfid_dict(dict_t *dict) { ++ int ret = 0; ++ dict_t *new = NULL; ++ unsigned char *gfid = NULL; + +- local = frame->local; +- local->handler = handler; ++ new = dict_copy_with_ref(dict, NULL); ++ if (!new) ++ return NULL; + +- ret = shard_inode_ctx_fill_iatt_from_cache(loc->inode, this, &local->prebuf, +- &need_refresh); +- /* By this time, inode ctx should have been created either in create, +- * mknod, readdirp or lookup. If not it is a bug! +- */ +- if ((ret == 0) && (need_refresh == _gf_false)) { +- gf_msg_debug(this->name, 0, +- "Skipping lookup on base file: %s" +- "Serving prebuf off the inode ctx cache", +- uuid_utoa(loc->gfid)); +- goto out; ++ gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_char); ++ if (!gfid) { ++ ret = -1; ++ goto out; ++ } ++ ++ gf_uuid_generate(gfid); ++ ++ ret = dict_set_gfuuid(new, "gfid-req", gfid, false); ++ ++out: ++ if (ret) { ++ dict_unref(new); ++ new = NULL; ++ GF_FREE(gfid); ++ } ++ ++ return new; ++} ++ ++int shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, ++ inode_t *inode, ++ shard_post_lookup_shards_fop_handler_t handler) { ++ int i = 0; ++ int ret = 0; ++ int count = 0; ++ int call_count = 0; ++ int32_t shard_idx_iter = 0; ++ int last_block = 0; ++ char path[PATH_MAX] = { ++ 0, ++ }; ++ char *bname = NULL; ++ uuid_t gfid = { ++ 0, ++ }; ++ loc_t loc = { ++ 0, ++ }; ++ shard_local_t *local = NULL; ++ shard_priv_t *priv = NULL; ++ gf_boolean_t wind_failed = _gf_false; ++ dict_t *xattr_req = NULL; ++ ++ priv = this->private; ++ local = frame->local; ++ count = call_count = local->call_count; ++ shard_idx_iter = local->first_block; ++ last_block = local->last_block; ++ local->pls_fop_handler = handler; ++ if (local->lookup_shards_barriered) ++ local->barrier.waitfor = local->call_count; ++ ++ if (inode) ++ gf_uuid_copy(gfid, inode->gfid); ++ else ++ gf_uuid_copy(gfid, local->base_gfid); ++ ++ while (shard_idx_iter <= last_block) { ++ if (local->inode_list[i]) { ++ i++; ++ shard_idx_iter++; ++ continue; ++ } ++ ++ if (wind_failed) { ++ shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter, this, ++ -1, ENOMEM, NULL, NULL, NULL, NULL); ++ goto next; ++ } ++ ++ shard_make_block_abspath(shard_idx_iter, gfid, path, sizeof(path)); ++ ++ bname = strrchr(path, '/') + 1; ++ loc.inode = inode_new(this->itable); ++ loc.parent = inode_ref(priv->dot_shard_inode); ++ gf_uuid_copy(loc.pargfid, priv->dot_shard_gfid); ++ ret = inode_path(loc.parent, bname, (char **)&(loc.path)); ++ if (ret < 0 || !(loc.inode)) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, ++ "Inode path failed" ++ " on %s, base file gfid = %s", ++ bname, uuid_utoa(gfid)); ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ loc_wipe(&loc); ++ wind_failed = _gf_true; ++ shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter, this, ++ -1, ENOMEM, NULL, NULL, NULL, NULL); ++ goto next; + } + +- xattr_req = dict_new(); ++ loc.name = strrchr(loc.path, '/'); ++ if (loc.name) ++ loc.name++; ++ ++ xattr_req = shard_create_gfid_dict(local->xattr_req); + if (!xattr_req) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto out; ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ wind_failed = _gf_true; ++ loc_wipe(&loc); ++ shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter, this, ++ -1, ENOMEM, NULL, NULL, NULL, NULL); ++ goto next; ++ } ++ ++ STACK_WIND_COOKIE(frame, shard_common_lookup_shards_cbk, ++ (void *)(long)shard_idx_iter, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->lookup, &loc, xattr_req); ++ loc_wipe(&loc); ++ dict_unref(xattr_req); ++ next: ++ shard_idx_iter++; ++ i++; ++ ++ if (!--call_count) ++ break; ++ } ++ if (local->lookup_shards_barriered) { ++ syncbarrier_wait(&local->barrier, count); ++ local->pls_fop_handler(frame, this); ++ } ++ return 0; ++} ++ ++int shard_post_resolve_truncate_handler(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ ++ if (local->op_ret < 0) { ++ if (local->op_errno == ENOENT) { ++ /* If lookup on /.shard fails with ENOENT, it means that ++ * the file was 0-byte in size but truncated sometime in ++ * the past to a higher size which is reflected in the ++ * size xattr, and now being truncated to a lower size. ++ * In this case, the only thing that needs to be done is ++ * to update the size xattr of the file and unwind. ++ */ ++ local->first_block = local->last_block = 0; ++ local->num_blocks = 1; ++ local->call_count = 0; ++ local->op_ret = 0; ++ local->postbuf.ia_size = local->offset; ++ shard_update_file_size(frame, this, local->fd, &local->loc, ++ shard_post_update_size_truncate_handler); ++ return 0; ++ } else { ++ shard_common_failure_unwind(local->fop, frame, local->op_ret, ++ local->op_errno); ++ return 0; + } ++ } + +- SHARD_MD_READ_FOP_INIT_REQ_DICT(this, xattr_req, loc->gfid, local, out); ++ if (!local->call_count) ++ shard_truncate_do(frame, this); ++ else ++ shard_common_lookup_shards(frame, this, local->loc.inode, ++ shard_post_lookup_shards_truncate_handler); ++ ++ return 0; ++} ++ ++int shard_truncate_begin(call_frame_t *frame, xlator_t *this) { ++ int ret = 0; ++ shard_local_t *local = NULL; ++ shard_priv_t *priv = NULL; ++ ++ priv = this->private; ++ local = frame->local; ++ ++ /* First participant block here is the lowest numbered block that would ++ * hold the last byte of the file post successful truncation. ++ * Last participant block is the block that contains the last byte in ++ * the current state of the file. ++ * If (first block == last_block): ++ * then that means that the file only needs truncation of the ++ * first (or last since both are same) block. ++ * Else ++ * if (new_size % block_size == 0) ++ * then that means there is no truncate to be done with ++ * only shards from first_block + 1 through the last ++ * block needing to be unlinked. ++ * else ++ * both truncate of the first block and unlink of the ++ * remaining shards until end of file is required. ++ */ ++ local->first_block = ++ (local->offset == 0) ? 0 : get_lowest_block(local->offset - 1, ++ local->block_size); ++ local->last_block = ++ get_highest_block(0, local->prebuf.ia_size, local->block_size); ++ ++ local->num_blocks = local->last_block - local->first_block + 1; ++ local->resolver_base_inode = ++ (local->fop == GF_FOP_TRUNCATE) ? local->loc.inode : local->fd->inode; ++ ++ if ((local->first_block == 0) && (local->num_blocks == 1)) { ++ if (local->fop == GF_FOP_TRUNCATE) ++ STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->truncate, &local->loc, local->offset, ++ local->xattr_req); ++ else ++ STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->ftruncate, local->fd, local->offset, ++ local->xattr_req); ++ return 0; ++ } + +- STACK_WIND(frame, shard_lookup_base_file_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->lookup, loc, xattr_req); ++ local->inode_list = ++ GF_CALLOC(local->num_blocks, sizeof(inode_t *), gf_shard_mt_inode_list); ++ if (!local->inode_list) ++ goto err; + +- dict_unref(xattr_req); +- return 0; ++ local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid); ++ if (!local->dot_shard_loc.inode) { ++ ret = ++ shard_init_internal_dir_loc(this, local, SHARD_INTERNAL_DIR_DOT_SHARD); ++ if (ret) ++ goto err; ++ shard_lookup_internal_dir(frame, this, shard_post_resolve_truncate_handler, ++ SHARD_INTERNAL_DIR_DOT_SHARD); ++ } else { ++ local->post_res_handler = shard_post_resolve_truncate_handler; ++ shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD); ++ } ++ return 0; + +-out: +- if (xattr_req) +- dict_unref(xattr_req); +- handler(frame, this); +- return 0; ++err: ++ shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); ++ return 0; + } + +-int +-shard_post_fstat_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; ++int shard_post_lookup_truncate_handler(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = NULL; ++ struct iatt tmp_stbuf = { ++ 0, ++ }; + +- local = frame->local; +- +- if (local->op_ret >= 0) +- shard_inode_ctx_set(local->fd->inode, this, &local->prebuf, 0, +- SHARD_LOOKUP_MASK); ++ local = frame->local; + +- SHARD_STACK_UNWIND(fstat, frame, local->op_ret, local->op_errno, +- &local->prebuf, local->xattr_rsp); ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(local->fop, frame, local->op_ret, ++ local->op_errno); + return 0; ++ } ++ ++ local->postbuf = tmp_stbuf = local->prebuf; ++ ++ if (local->prebuf.ia_size == local->offset) { ++ /* If the file size is same as requested size, unwind the call ++ * immediately. ++ */ ++ if (local->fop == GF_FOP_TRUNCATE) ++ SHARD_STACK_UNWIND(truncate, frame, 0, 0, &local->prebuf, &local->postbuf, ++ NULL); ++ else ++ SHARD_STACK_UNWIND(ftruncate, frame, 0, 0, &local->prebuf, ++ &local->postbuf, NULL); ++ } else if (local->offset > local->prebuf.ia_size) { ++ /* If the truncate is from a lower to a higher size, set the ++ * new size xattr and unwind. ++ */ ++ local->hole_size = local->offset - local->prebuf.ia_size; ++ local->delta_size = 0; ++ GF_ATOMIC_INIT(local->delta_blocks, 0); ++ local->postbuf.ia_size = local->offset; ++ tmp_stbuf.ia_size = local->offset; ++ shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0, ++ SHARD_INODE_WRITE_MASK); ++ shard_update_file_size(frame, this, NULL, &local->loc, ++ shard_post_update_size_truncate_handler); ++ } else { ++ /* ... else ++ * i. unlink all shards that need to be unlinked. ++ * ii. truncate the last of the shards. ++ * iii. update the new size using setxattr. ++ * and unwind the fop. ++ */ ++ local->hole_size = 0; ++ local->delta_size = (local->offset - local->prebuf.ia_size); ++ GF_ATOMIC_INIT(local->delta_blocks, 0); ++ tmp_stbuf.ia_size = local->offset; ++ shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0, ++ SHARD_INODE_WRITE_MASK); ++ shard_truncate_begin(frame, this); ++ } ++ return 0; + } + +-int +-shard_post_stat_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; ++/* TO-DO: ++ * Fix updates to size and block count with racing write(s) and truncate(s). ++ */ + +- local = frame->local; ++int shard_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, ++ off_t offset, dict_t *xdata) { ++ int ret = -1; ++ uint64_t block_size = 0; ++ shard_local_t *local = NULL; + +- if (local->op_ret >= 0) +- shard_inode_ctx_set(local->loc.inode, this, &local->prebuf, 0, +- SHARD_LOOKUP_MASK); ++ ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, ++ "Failed to get block " ++ "size from inode ctx of %s", ++ uuid_utoa(loc->inode->gfid)); ++ goto err; ++ } + +- SHARD_STACK_UNWIND(stat, frame, local->op_ret, local->op_errno, +- &local->prebuf, local->xattr_rsp); ++ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { ++ STACK_WIND(frame, default_truncate_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata); + return 0; +-} ++ } + +-int +-shard_common_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, struct iatt *buf, +- dict_t *xdata) +-{ +- inode_t *inode = NULL; +- shard_local_t *local = NULL; ++ if (!this->itable) ++ this->itable = loc->inode->table; + +- local = frame->local; ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; ++ ++ frame->local = local; ++ ++ ret = syncbarrier_init(&local->barrier); ++ if (ret) ++ goto err; ++ loc_copy(&local->loc, loc); ++ local->offset = offset; ++ local->block_size = block_size; ++ local->fop = GF_FOP_TRUNCATE; ++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); ++ if (!local->xattr_req) ++ goto err; ++ local->resolver_base_inode = loc->inode; ++ GF_ATOMIC_INIT(local->delta_blocks, 0); ++ ++ shard_lookup_base_file(frame, this, &local->loc, ++ shard_post_lookup_truncate_handler); ++ return 0; + +- if (op_ret < 0) { +- gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_STAT_FAILED, +- "stat failed: %s", +- local->fd ? uuid_utoa(local->fd->inode->gfid) +- : uuid_utoa((local->loc.inode)->gfid)); +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- goto unwind; +- } ++err: ++ shard_common_failure_unwind(GF_FOP_TRUNCATE, frame, -1, ENOMEM); ++ return 0; ++} ++ ++int shard_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, ++ dict_t *xdata) { ++ int ret = -1; ++ uint64_t block_size = 0; ++ shard_local_t *local = NULL; ++ ++ ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, ++ "Failed to get block " ++ "size from inode ctx of %s", ++ uuid_utoa(fd->inode->gfid)); ++ goto err; ++ } ++ ++ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { ++ STACK_WIND(frame, default_ftruncate_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata); ++ return 0; ++ } ++ ++ if (!this->itable) ++ this->itable = fd->inode->table; ++ ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; ++ ++ frame->local = local; ++ ret = syncbarrier_init(&local->barrier); ++ if (ret) ++ goto err; ++ local->fd = fd_ref(fd); ++ local->offset = offset; ++ local->block_size = block_size; ++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); ++ if (!local->xattr_req) ++ goto err; ++ local->fop = GF_FOP_FTRUNCATE; ++ ++ local->loc.inode = inode_ref(fd->inode); ++ gf_uuid_copy(local->loc.gfid, fd->inode->gfid); ++ local->resolver_base_inode = fd->inode; ++ GF_ATOMIC_INIT(local->delta_blocks, 0); ++ ++ shard_lookup_base_file(frame, this, &local->loc, ++ shard_post_lookup_truncate_handler); ++ return 0; ++err: ++ shard_common_failure_unwind(GF_FOP_FTRUNCATE, frame, -1, ENOMEM); ++ return 0; ++} + +- local->prebuf = *buf; +- if (shard_modify_size_and_block_count(&local->prebuf, xdata)) { +- local->op_ret = -1; +- local->op_errno = EINVAL; +- goto unwind; +- } +- local->xattr_rsp = dict_ref(xdata); ++int shard_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, inode_t *inode, ++ struct iatt *buf, struct iatt *preparent, ++ struct iatt *postparent, dict_t *xdata) { ++ int ret = -1; ++ shard_local_t *local = NULL; + +- if (local->loc.inode) +- inode = local->loc.inode; +- else +- inode = local->fd->inode; ++ local = frame->local; + +- shard_inode_ctx_invalidate(inode, this, &local->prebuf); ++ if (op_ret == -1) ++ goto unwind; ++ ++ ret = ++ shard_inode_ctx_set(inode, this, buf, local->block_size, SHARD_ALL_MASK); ++ if (ret) ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INODE_CTX_SET_FAILED, ++ "Failed to set inode " ++ "ctx for %s", ++ uuid_utoa(inode->gfid)); + + unwind: +- local->handler(frame, this); +- return 0; +-} ++ SHARD_STACK_UNWIND(mknod, frame, op_ret, op_errno, inode, buf, preparent, ++ postparent, xdata); + +-int +-shard_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +-{ +- int ret = -1; +- uint64_t block_size = 0; +- shard_local_t *local = NULL; ++ return 0; ++} + +- if ((IA_ISDIR(loc->inode->ia_type)) || (IA_ISLNK(loc->inode->ia_type))) { +- STACK_WIND(frame, default_stat_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->stat, loc, xdata); +- return 0; +- } ++int shard_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, ++ dev_t rdev, mode_t umask, dict_t *xdata) { ++ shard_priv_t *priv = NULL; ++ shard_local_t *local = NULL; + +- ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, +- "Failed to get block " +- "size from inode ctx of %s", +- uuid_utoa(loc->inode->gfid)); +- goto err; +- } ++ priv = this->private; ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; + +- if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { +- STACK_WIND(frame, default_stat_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->stat, loc, xdata); +- return 0; +- } ++ frame->local = local; ++ local->block_size = priv->block_size; ++ if (!__is_gsyncd_on_shard_dir(frame, loc)) { ++ SHARD_INODE_CREATE_INIT(this, local->block_size, xdata, loc, 0, 0, err); ++ } + +- local = mem_get0(this->local_pool); +- if (!local) +- goto err; ++ STACK_WIND(frame, shard_mknod_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata); ++ return 0; ++err: ++ shard_common_failure_unwind(GF_FOP_MKNOD, frame, -1, ENOMEM); ++ return 0; ++} + +- frame->local = local; ++int32_t shard_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, inode_t *inode, ++ struct iatt *buf, struct iatt *preparent, ++ struct iatt *postparent, dict_t *xdata) { ++ shard_local_t *local = NULL; + +- local->handler = shard_post_stat_handler; +- loc_copy(&local->loc, loc); +- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); +- if (!local->xattr_req) +- goto err; ++ local = frame->local; ++ if (op_ret < 0) ++ goto err; + +- SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, local->loc.gfid, +- local, err); ++ shard_inode_ctx_set(inode, this, buf, 0, SHARD_MASK_NLINK | SHARD_MASK_TIMES); ++ buf->ia_size = local->prebuf.ia_size; ++ buf->ia_blocks = local->prebuf.ia_blocks; + +- STACK_WIND(frame, shard_common_stat_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->stat, loc, local->xattr_req); +- return 0; ++ SHARD_STACK_UNWIND(link, frame, op_ret, op_errno, inode, buf, preparent, ++ postparent, xdata); ++ return 0; + err: +- shard_common_failure_unwind(GF_FOP_STAT, frame, -1, ENOMEM); +- return 0; ++ shard_common_failure_unwind(GF_FOP_LINK, frame, op_ret, op_errno); ++ return 0; + } + +-int +-shard_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) +-{ +- int ret = -1; +- uint64_t block_size = 0; +- shard_local_t *local = NULL; ++int shard_post_lookup_link_handler(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = NULL; + +- if ((IA_ISDIR(fd->inode->ia_type)) || (IA_ISLNK(fd->inode->ia_type))) { +- STACK_WIND(frame, default_fstat_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->fstat, fd, xdata); +- return 0; +- } ++ local = frame->local; + +- ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, +- "Failed to get block " +- "size from inode ctx of %s", +- uuid_utoa(fd->inode->gfid)); +- goto err; +- } ++ if (local->op_ret < 0) { ++ SHARD_STACK_UNWIND(link, frame, local->op_ret, local->op_errno, NULL, NULL, ++ NULL, NULL, NULL); ++ return 0; ++ } + +- if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { +- STACK_WIND(frame, default_fstat_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->fstat, fd, xdata); +- return 0; +- } ++ STACK_WIND(frame, shard_link_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->link, &local->loc, &local->loc2, ++ local->xattr_req); ++ return 0; ++} + +- if (!this->itable) +- this->itable = fd->inode->table; ++int32_t shard_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, ++ loc_t *newloc, dict_t *xdata) { ++ int ret = -1; ++ uint64_t block_size = 0; ++ shard_local_t *local = NULL; + +- local = mem_get0(this->local_pool); +- if (!local) +- goto err; ++ ret = shard_inode_ctx_get_block_size(oldloc->inode, this, &block_size); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, ++ "Failed to get block " ++ "size from inode ctx of %s", ++ uuid_utoa(oldloc->inode->gfid)); ++ goto err; ++ } + +- frame->local = local; ++ if (!block_size) { ++ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->link, ++ oldloc, newloc, xdata); ++ return 0; ++ } + +- local->handler = shard_post_fstat_handler; +- local->fd = fd_ref(fd); +- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); +- if (!local->xattr_req) +- goto err; ++ if (!this->itable) ++ this->itable = oldloc->inode->table; + +- SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid, +- local, err); ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; + +- STACK_WIND(frame, shard_common_stat_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->fstat, fd, local->xattr_req); +- return 0; ++ frame->local = local; ++ ++ loc_copy(&local->loc, oldloc); ++ loc_copy(&local->loc2, newloc); ++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); ++ if (!local->xattr_req) ++ goto err; ++ ++ shard_lookup_base_file(frame, this, &local->loc, ++ shard_post_lookup_link_handler); ++ return 0; + err: +- shard_common_failure_unwind(GF_FOP_FSTAT, frame, -1, ENOMEM); +- return 0; ++ shard_common_failure_unwind(GF_FOP_LINK, frame, -1, ENOMEM); ++ return 0; + } + +-int +-shard_post_update_size_truncate_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; ++int shard_unlink_shards_do(call_frame_t *frame, xlator_t *this, inode_t *inode); + +- local = frame->local; ++int shard_post_lookup_shards_unlink_handler(call_frame_t *frame, ++ xlator_t *this) { ++ shard_local_t *local = NULL; + +- if (local->fop == GF_FOP_TRUNCATE) +- SHARD_STACK_UNWIND(truncate, frame, local->op_ret, local->op_errno, +- &local->prebuf, &local->postbuf, NULL); +- else +- SHARD_STACK_UNWIND(ftruncate, frame, local->op_ret, local->op_errno, +- &local->prebuf, &local->postbuf, NULL); ++ local = frame->local; ++ ++ if ((local->op_ret < 0) && (local->op_errno != ENOENT)) { ++ gf_msg(this->name, GF_LOG_ERROR, local->op_errno, SHARD_MSG_FOP_FAILED, ++ "failed to delete shards of %s", ++ uuid_utoa(local->resolver_base_inode->gfid)); + return 0; +-} ++ } ++ local->op_ret = 0; ++ local->op_errno = 0; + +-int +-shard_truncate_last_shard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, +- struct iatt *prebuf, struct iatt *postbuf, +- dict_t *xdata) +-{ +- inode_t *inode = NULL; +- int64_t delta_blocks = 0; +- shard_local_t *local = NULL; ++ shard_unlink_shards_do(frame, this, local->resolver_base_inode); ++ return 0; ++} + +- local = frame->local; ++int shard_post_resolve_unlink_handler(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = NULL; + +- SHARD_UNSET_ROOT_FS_ID(frame, local); ++ local = frame->local; ++ local->lookup_shards_barriered = _gf_true; + +- inode = (local->fop == GF_FOP_TRUNCATE) ? local->loc.inode +- : local->fd->inode; +- if (op_ret < 0) { +- gf_msg(this->name, GF_LOG_ERROR, op_errno, +- SHARD_MSG_TRUNCATE_LAST_SHARD_FAILED, +- "truncate on last" +- " shard failed : %s", +- uuid_utoa(inode->gfid)); +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- goto err; +- } ++ if (!local->call_count) ++ shard_unlink_shards_do(frame, this, local->resolver_base_inode); ++ else ++ shard_common_lookup_shards(frame, this, local->resolver_base_inode, ++ shard_post_lookup_shards_unlink_handler); ++ return 0; ++} ++ ++void shard_unlink_block_inode(shard_local_t *local, int shard_block_num) { ++ char block_bname[256] = { ++ 0, ++ }; ++ uuid_t gfid = { ++ 0, ++ }; ++ inode_t *inode = NULL; ++ inode_t *base_inode = NULL; ++ xlator_t *this = NULL; ++ shard_priv_t *priv = NULL; ++ shard_inode_ctx_t *ctx = NULL; ++ shard_inode_ctx_t *base_ictx = NULL; ++ int unref_base_inode = 0; ++ int unref_shard_inode = 0; ++ ++ this = THIS; ++ priv = this->private; ++ ++ inode = local->inode_list[shard_block_num - local->first_block]; ++ shard_inode_ctx_get(inode, this, &ctx); ++ base_inode = ctx->base_inode; ++ if (base_inode) ++ gf_uuid_copy(gfid, base_inode->gfid); ++ else ++ gf_uuid_copy(gfid, ctx->base_gfid); ++ shard_make_block_bname(shard_block_num, gfid, block_bname, ++ sizeof(block_bname)); ++ ++ LOCK(&priv->lock); ++ if (base_inode) ++ LOCK(&base_inode->lock); ++ LOCK(&inode->lock); ++ { ++ __shard_inode_ctx_get(inode, this, &ctx); ++ if (!list_empty(&ctx->ilist)) { ++ list_del_init(&ctx->ilist); ++ priv->inode_count--; ++ unref_base_inode++; ++ unref_shard_inode++; ++ GF_ASSERT(priv->inode_count >= 0); ++ } ++ if (ctx->fsync_needed) { ++ unref_base_inode++; ++ unref_shard_inode++; ++ list_del_init(&ctx->to_fsync_list); ++ if (base_inode) { ++ __shard_inode_ctx_get(base_inode, this, &base_ictx); ++ base_ictx->fsync_count--; ++ } ++ } ++ } ++ UNLOCK(&inode->lock); ++ if (base_inode) ++ UNLOCK(&base_inode->lock); + +- local->postbuf.ia_size = local->offset; +- /* Let the delta be negative. We want xattrop to do subtraction */ +- local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size; +- delta_blocks = GF_ATOMIC_ADD(local->delta_blocks, +- postbuf->ia_blocks - prebuf->ia_blocks); +- GF_ASSERT(delta_blocks <= 0); +- local->postbuf.ia_blocks += delta_blocks; +- local->hole_size = 0; ++ inode_unlink(inode, priv->dot_shard_inode, block_bname); ++ inode_ref_reduce_by_n(inode, unref_shard_inode); ++ inode_forget(inode, 0); + +- shard_inode_ctx_set(inode, this, &local->postbuf, 0, SHARD_MASK_TIMES); +- shard_update_file_size(frame, this, NULL, &local->loc, +- shard_post_update_size_truncate_handler); +- return 0; +-err: +- shard_common_failure_unwind(local->fop, frame, local->op_ret, +- local->op_errno); +- return 0; ++ if (base_inode && unref_base_inode) ++ inode_ref_reduce_by_n(base_inode, unref_base_inode); ++ UNLOCK(&priv->lock); + } + +-int +-shard_truncate_last_shard(call_frame_t *frame, xlator_t *this, inode_t *inode) +-{ +- size_t last_shard_size_after = 0; +- loc_t loc = { +- 0, +- }; +- shard_local_t *local = NULL; ++int shard_rename_cbk(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = NULL; + +- local = frame->local; ++ local = frame->local; + +- /* A NULL inode could be due to the fact that the last shard which +- * needs to be truncated does not exist due to it lying in a hole +- * region. So the only thing left to do in that case would be an +- * update to file size xattr. +- */ +- if (!inode) { +- gf_msg_debug(this->name, 0, +- "Last shard to be truncated absent" +- " in backend: %s. Directly proceeding to update " +- "file size", +- uuid_utoa(inode->gfid)); +- shard_update_file_size(frame, this, NULL, &local->loc, +- shard_post_update_size_truncate_handler); +- return 0; +- } ++ SHARD_STACK_UNWIND(rename, frame, local->op_ret, local->op_errno, ++ &local->prebuf, &local->preoldparent, ++ &local->postoldparent, &local->prenewparent, ++ &local->postnewparent, local->xattr_rsp); ++ return 0; ++} + +- SHARD_SET_ROOT_FS_ID(frame, local); ++int32_t shard_unlink_cbk(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = frame->local; + +- loc.inode = inode_ref(inode); +- gf_uuid_copy(loc.gfid, inode->gfid); ++ SHARD_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno, ++ &local->preoldparent, &local->postoldparent, ++ local->xattr_rsp); ++ return 0; ++} + +- last_shard_size_after = (local->offset % local->block_size); ++int shard_unlink_shards_do_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, int32_t op_errno, ++ struct iatt *preparent, struct iatt *postparent, ++ dict_t *xdata) { ++ int shard_block_num = (long)cookie; ++ shard_local_t *local = NULL; + +- STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->truncate, &loc, last_shard_size_after, +- NULL); +- loc_wipe(&loc); +- return 0; +-} ++ local = frame->local; + +-void +-shard_unlink_block_inode(shard_local_t *local, int shard_block_num); ++ if (op_ret < 0) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ goto done; ++ } + +-int +-shard_truncate_htol_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, +- struct iatt *preparent, struct iatt *postparent, +- dict_t *xdata) +-{ +- int ret = 0; +- int call_count = 0; +- int shard_block_num = (long)cookie; +- uint64_t block_count = 0; +- shard_local_t *local = NULL; ++ shard_unlink_block_inode(local, shard_block_num); ++done: ++ syncbarrier_wake(&local->barrier); ++ return 0; ++} ++ ++int shard_unlink_shards_do(call_frame_t *frame, xlator_t *this, ++ inode_t *inode) { ++ int i = 0; ++ int ret = -1; ++ int count = 0; ++ uint32_t cur_block = 0; ++ uint32_t cur_block_idx = 0; /*this is idx into inode_list[] array */ ++ char *bname = NULL; ++ char path[PATH_MAX] = { ++ 0, ++ }; ++ uuid_t gfid = { ++ 0, ++ }; ++ loc_t loc = { ++ 0, ++ }; ++ gf_boolean_t wind_failed = _gf_false; ++ shard_local_t *local = NULL; ++ shard_priv_t *priv = NULL; ++ ++ priv = this->private; ++ local = frame->local; ++ ++ if (inode) ++ gf_uuid_copy(gfid, inode->gfid); ++ else ++ gf_uuid_copy(gfid, local->base_gfid); ++ ++ for (i = 0; i < local->num_blocks; i++) { ++ if (!local->inode_list[i]) ++ continue; ++ count++; ++ } ++ ++ if (!count) { ++ /* callcount = 0 implies that all of the shards that need to be ++ * unlinked are non-existent (in other words the file is full of ++ * holes). ++ */ ++ gf_msg_debug(this->name, 0, "All shards that need to be " ++ "unlinked are non-existent: %s", ++ uuid_utoa(gfid)); ++ return 0; ++ } + +- local = frame->local; ++ SHARD_SET_ROOT_FS_ID(frame, local); ++ local->barrier.waitfor = count; ++ cur_block = cur_block_idx + local->first_block; + +- if (op_ret < 0) { +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- goto done; +- } +- ret = dict_get_uint64(xdata, GF_GET_FILE_BLOCK_COUNT, &block_count); +- if (!ret) { +- GF_ATOMIC_SUB(local->delta_blocks, block_count); +- } else { +- /* dict_get failed possibly due to a heterogeneous cluster? */ +- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, +- "Failed to get key %s from dict during truncate of gfid %s", +- GF_GET_FILE_BLOCK_COUNT, +- uuid_utoa(local->resolver_base_inode->gfid)); +- } +- +- shard_unlink_block_inode(local, shard_block_num); +-done: +- call_count = shard_call_count_return(frame); +- if (call_count == 0) { +- SHARD_UNSET_ROOT_FS_ID(frame, local); +- shard_truncate_last_shard(frame, this, local->inode_list[0]); +- } +- return 0; +-} +- +-int +-shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode) +-{ +- int i = 1; +- int ret = -1; +- int call_count = 0; +- uint32_t cur_block = 0; +- uint32_t last_block = 0; +- char path[PATH_MAX] = { +- 0, +- }; +- char *bname = NULL; +- loc_t loc = { +- 0, +- }; +- gf_boolean_t wind_failed = _gf_false; +- shard_local_t *local = NULL; +- shard_priv_t *priv = NULL; +- dict_t *xdata_req = NULL; +- +- local = frame->local; +- priv = this->private; +- +- cur_block = local->first_block + 1; +- last_block = local->last_block; +- +- /* Determine call count */ +- for (i = 1; i < local->num_blocks; i++) { +- if (!local->inode_list[i]) +- continue; +- call_count++; +- } ++ while (cur_block_idx < local->num_blocks) { ++ if (!local->inode_list[cur_block_idx]) ++ goto next; + +- if (!call_count) { +- /* Call count = 0 implies that all of the shards that need to be +- * unlinked do not exist. So shard xlator would now proceed to +- * do the final truncate + size updates. +- */ +- gf_msg_debug(this->name, 0, +- "Shards to be unlinked as part of " +- "truncate absent in backend: %s. Directly " +- "proceeding to update file size", +- uuid_utoa(inode->gfid)); +- local->postbuf.ia_size = local->offset; +- local->postbuf.ia_blocks = local->prebuf.ia_blocks; +- local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size; +- GF_ATOMIC_INIT(local->delta_blocks, 0); +- local->hole_size = 0; +- shard_update_file_size(frame, this, local->fd, &local->loc, +- shard_post_update_size_truncate_handler); +- return 0; ++ if (wind_failed) { ++ shard_unlink_shards_do_cbk(frame, (void *)(long)cur_block, this, -1, ++ ENOMEM, NULL, NULL, NULL); ++ goto next; + } + +- local->call_count = call_count; +- i = 1; +- xdata_req = dict_new(); +- if (!xdata_req) { +- shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); +- return 0; +- } +- ret = dict_set_uint64(xdata_req, GF_GET_FILE_BLOCK_COUNT, 8 * 8); +- if (ret) { +- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, +- "Failed to set key %s into dict during truncate of %s", +- GF_GET_FILE_BLOCK_COUNT, +- uuid_utoa(local->resolver_base_inode->gfid)); +- dict_unref(xdata_req); +- shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); +- return 0; ++ shard_make_block_abspath(cur_block, gfid, path, sizeof(path)); ++ bname = strrchr(path, '/') + 1; ++ loc.parent = inode_ref(priv->dot_shard_inode); ++ ret = inode_path(loc.parent, bname, (char **)&(loc.path)); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, ++ "Inode path failed" ++ " on %s, base file gfid = %s", ++ bname, uuid_utoa(gfid)); ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ loc_wipe(&loc); ++ wind_failed = _gf_true; ++ shard_unlink_shards_do_cbk(frame, (void *)(long)cur_block, this, -1, ++ ENOMEM, NULL, NULL, NULL); ++ goto next; + } + +- SHARD_SET_ROOT_FS_ID(frame, local); +- while (cur_block <= last_block) { +- if (!local->inode_list[i]) { +- cur_block++; +- i++; +- continue; +- } +- if (wind_failed) { +- shard_truncate_htol_cbk(frame, (void *)(long)cur_block, this, -1, +- ENOMEM, NULL, NULL, NULL); +- goto next; +- } +- +- shard_make_block_abspath(cur_block, inode->gfid, path, sizeof(path)); +- bname = strrchr(path, '/') + 1; +- loc.parent = inode_ref(priv->dot_shard_inode); +- ret = inode_path(loc.parent, bname, (char **)&(loc.path)); +- if (ret < 0) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, +- "Inode path failed" +- " on %s. Base file gfid = %s", +- bname, uuid_utoa(inode->gfid)); +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- loc_wipe(&loc); +- wind_failed = _gf_true; +- shard_truncate_htol_cbk(frame, (void *)(long)cur_block, this, -1, +- ENOMEM, NULL, NULL, NULL); +- goto next; +- } +- loc.name = strrchr(loc.path, '/'); +- if (loc.name) +- loc.name++; +- loc.inode = inode_ref(local->inode_list[i]); +- +- STACK_WIND_COOKIE(frame, shard_truncate_htol_cbk, +- (void *)(long)cur_block, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->unlink, &loc, 0, xdata_req); +- loc_wipe(&loc); +- next: +- i++; +- cur_block++; +- if (!--call_count) +- break; +- } +- dict_unref(xdata_req); +- return 0; +-} ++ loc.name = strrchr(loc.path, '/'); ++ if (loc.name) ++ loc.name++; ++ loc.inode = inode_ref(local->inode_list[cur_block_idx]); + +-int +-shard_truncate_do(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; ++ STACK_WIND_COOKIE(frame, shard_unlink_shards_do_cbk, ++ (void *)(long)cur_block, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->unlink, &loc, local->xflag, ++ local->xattr_req); ++ loc_wipe(&loc); ++ next: ++ cur_block++; ++ cur_block_idx++; ++ } ++ syncbarrier_wait(&local->barrier, count); ++ SHARD_UNSET_ROOT_FS_ID(frame, local); ++ return 0; ++} ++ ++int shard_regulated_shards_deletion(call_frame_t *cleanup_frame, xlator_t *this, ++ int now, int first_block, ++ gf_dirent_t *entry) { ++ int i = 0; ++ int ret = 0; ++ shard_local_t *local = NULL; ++ uuid_t gfid = { ++ 0, ++ }; ++ ++ local = cleanup_frame->local; ++ ++ local->inode_list = GF_CALLOC(now, sizeof(inode_t *), gf_shard_mt_inode_list); ++ if (!local->inode_list) ++ return -ENOMEM; ++ ++ local->first_block = first_block; ++ local->last_block = first_block + now - 1; ++ local->num_blocks = now; ++ gf_uuid_parse(entry->d_name, gfid); ++ gf_uuid_copy(local->base_gfid, gfid); ++ local->resolver_base_inode = inode_find(this->itable, gfid); ++ local->call_count = 0; ++ ret = syncbarrier_init(&local->barrier); ++ if (ret) { ++ GF_FREE(local->inode_list); ++ local->inode_list = NULL; ++ inode_unref(local->resolver_base_inode); ++ local->resolver_base_inode = NULL; ++ return -errno; ++ } ++ shard_common_resolve_shards(cleanup_frame, this, ++ shard_post_resolve_unlink_handler); ++ ++ for (i = 0; i < local->num_blocks; i++) { ++ if (local->inode_list[i]) ++ inode_unref(local->inode_list[i]); ++ } ++ GF_FREE(local->inode_list); ++ local->inode_list = NULL; ++ if (local->op_ret) ++ ret = -local->op_errno; ++ syncbarrier_destroy(&local->barrier); ++ inode_unref(local->resolver_base_inode); ++ local->resolver_base_inode = NULL; ++ STACK_RESET(cleanup_frame->root); ++ return ret; ++} ++ ++int __shard_delete_shards_of_entry(call_frame_t *cleanup_frame, xlator_t *this, ++ gf_dirent_t *entry, inode_t *inode) { ++ int ret = 0; ++ int shard_count = 0; ++ int first_block = 0; ++ int now = 0; ++ uint64_t size = 0; ++ uint64_t block_size = 0; ++ uint64_t size_array[4] = { ++ 0, ++ }; ++ void *bsize = NULL; ++ void *size_attr = NULL; ++ dict_t *xattr_rsp = NULL; ++ loc_t loc = { ++ 0, ++ }; ++ shard_local_t *local = NULL; ++ shard_priv_t *priv = NULL; ++ ++ priv = this->private; ++ local = cleanup_frame->local; ++ ret = dict_reset(local->xattr_req); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to reset dict"); ++ ret = -ENOMEM; ++ goto err; ++ } ++ ++ ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to set dict value: key:%s", GF_XATTR_SHARD_BLOCK_SIZE); ++ ret = -ENOMEM; ++ goto err; ++ } ++ ++ ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_FILE_SIZE, 8 * 4); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to set dict value: key:%s", GF_XATTR_SHARD_FILE_SIZE); ++ ret = -ENOMEM; ++ goto err; ++ } ++ ++ loc.inode = inode_ref(inode); ++ loc.parent = inode_ref(priv->dot_shard_rm_inode); ++ ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path)); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, ++ "Inode path failed on %s", entry->d_name); ++ ret = -ENOMEM; ++ goto err; ++ } ++ ++ loc.name = strrchr(loc.path, '/'); ++ if (loc.name) ++ loc.name++; ++ ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, local->xattr_req, ++ &xattr_rsp); ++ if (ret) ++ goto err; ++ ++ ret = dict_get_ptr(xattr_rsp, GF_XATTR_SHARD_BLOCK_SIZE, &bsize); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to get dict value: key:%s", GF_XATTR_SHARD_BLOCK_SIZE); ++ goto err; ++ } ++ block_size = ntoh64(*((uint64_t *)bsize)); ++ ++ ret = dict_get_ptr(xattr_rsp, GF_XATTR_SHARD_FILE_SIZE, &size_attr); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to get dict value: key:%s", GF_XATTR_SHARD_FILE_SIZE); ++ goto err; ++ } ++ ++ memcpy(size_array, size_attr, sizeof(size_array)); ++ size = ntoh64(size_array[0]); ++ ++ shard_count = (size / block_size) - 1; ++ if (shard_count < 0) { ++ gf_msg_debug(this->name, 0, "Size of %s hasn't grown beyond " ++ "its shard-block-size. Nothing to delete. " ++ "Returning", ++ entry->d_name); ++ /* File size < shard-block-size, so nothing to delete */ ++ ret = 0; ++ goto delete_marker; ++ } ++ if ((size % block_size) > 0) ++ shard_count++; ++ ++ if (shard_count == 0) { ++ gf_msg_debug(this->name, 0, "Size of %s is exactly equal to " ++ "its shard-block-size. Nothing to delete. " ++ "Returning", ++ entry->d_name); ++ ret = 0; ++ goto delete_marker; ++ } ++ gf_msg_debug(this->name, 0, ++ "base file = %s, " ++ "shard-block-size=%" PRIu64 ", file-size=%" PRIu64 ", " ++ "shard_count=%d", ++ entry->d_name, block_size, size, shard_count); ++ ++ /* Perform a gfid-based lookup to see if gfid corresponding to marker ++ * file's base name exists. ++ */ ++ loc_wipe(&loc); ++ loc.inode = inode_new(this->itable); ++ if (!loc.inode) { ++ ret = -ENOMEM; ++ goto err; ++ } ++ gf_uuid_parse(entry->d_name, loc.gfid); ++ ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, NULL, NULL); ++ if (!ret) { ++ gf_msg_debug(this->name, 0, "Base shard corresponding to gfid " ++ "%s is present. Skipping shard deletion. " ++ "Returning", ++ entry->d_name); ++ ret = 0; ++ goto delete_marker; ++ } + +- local = frame->local; ++ first_block = 1; + +- if (local->num_blocks == 1) { +- /* This means that there are no shards to be unlinked. +- * The fop boils down to truncating the last shard, updating +- * the size and unwinding. +- */ +- shard_truncate_last_shard(frame, this, local->inode_list[0]); +- return 0; ++ while (shard_count) { ++ if (shard_count < local->deletion_rate) { ++ now = shard_count; ++ shard_count = 0; + } else { +- shard_truncate_htol(frame, this, local->loc.inode); +- } +- return 0; +-} +- +-int +-shard_post_lookup_shards_truncate_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (local->op_ret < 0) { +- shard_common_failure_unwind(local->fop, frame, local->op_ret, +- local->op_errno); +- return 0; ++ now = local->deletion_rate; ++ shard_count -= local->deletion_rate; + } + +- shard_truncate_do(frame, this); +- return 0; +-} ++ gf_msg_debug(this->name, 0, "deleting %d shards starting from " ++ "block %d of gfid %s", ++ now, first_block, entry->d_name); ++ ret = shard_regulated_shards_deletion(cleanup_frame, this, now, first_block, ++ entry); ++ if (ret) ++ goto err; ++ first_block += now; ++ } + +-void +-shard_link_block_inode(shard_local_t *local, int block_num, inode_t *inode, +- struct iatt *buf) +-{ +- int list_index = 0; +- char block_bname[256] = { +- 0, +- }; +- uuid_t gfid = { +- 0, +- }; +- inode_t *linked_inode = NULL; +- xlator_t *this = NULL; +- inode_t *fsync_inode = NULL; +- shard_priv_t *priv = NULL; +- inode_t *base_inode = NULL; +- +- this = THIS; +- priv = this->private; +- if (local->loc.inode) { +- gf_uuid_copy(gfid, local->loc.inode->gfid); +- base_inode = local->loc.inode; +- } else if (local->resolver_base_inode) { +- gf_uuid_copy(gfid, local->resolver_base_inode->gfid); +- base_inode = local->resolver_base_inode; ++delete_marker: ++ loc_wipe(&loc); ++ loc.inode = inode_ref(inode); ++ loc.parent = inode_ref(priv->dot_shard_rm_inode); ++ ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path)); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, ++ "Inode path failed on %s", entry->d_name); ++ ret = -ENOMEM; ++ goto err; ++ } ++ loc.name = strrchr(loc.path, '/'); ++ if (loc.name) ++ loc.name++; ++ ret = syncop_unlink(FIRST_CHILD(this), &loc, NULL, NULL); ++ if (ret) ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_SHARDS_DELETION_FAILED, ++ "Failed to delete %s " ++ "from /%s", ++ entry->d_name, GF_SHARD_REMOVE_ME_DIR); ++err: ++ if (xattr_rsp) ++ dict_unref(xattr_rsp); ++ loc_wipe(&loc); ++ return ret; ++} ++ ++int shard_delete_shards_of_entry(call_frame_t *cleanup_frame, xlator_t *this, ++ gf_dirent_t *entry, inode_t *inode) { ++ int ret = -1; ++ loc_t loc = { ++ 0, ++ }; ++ shard_priv_t *priv = NULL; ++ ++ priv = this->private; ++ loc.inode = inode_ref(priv->dot_shard_rm_inode); ++ ++ ret = syncop_entrylk(FIRST_CHILD(this), this->name, &loc, entry->d_name, ++ ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL, NULL); ++ if (ret < 0) { ++ if (ret == -EAGAIN) { ++ ret = 0; ++ } ++ goto out; ++ } ++ { ret = __shard_delete_shards_of_entry(cleanup_frame, this, entry, inode); } ++ syncop_entrylk(FIRST_CHILD(this), this->name, &loc, entry->d_name, ++ ENTRYLK_UNLOCK, ENTRYLK_WRLCK, NULL, NULL); ++out: ++ loc_wipe(&loc); ++ return ret; ++} ++ ++int shard_delete_shards_cbk(int ret, call_frame_t *frame, void *data) { ++ SHARD_STACK_DESTROY(frame); ++ return 0; ++} ++ ++int shard_resolve_internal_dir(xlator_t *this, shard_local_t *local, ++ shard_internal_dir_type_t type) { ++ int ret = 0; ++ char *bname = NULL; ++ loc_t *loc = NULL; ++ shard_priv_t *priv = NULL; ++ uuid_t gfid = { ++ 0, ++ }; ++ struct iatt stbuf = { ++ 0, ++ }; ++ ++ priv = this->private; ++ ++ switch (type) { ++ case SHARD_INTERNAL_DIR_DOT_SHARD: ++ loc = &local->dot_shard_loc; ++ gf_uuid_copy(gfid, priv->dot_shard_gfid); ++ bname = GF_SHARD_DIR; ++ break; ++ case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: ++ loc = &local->dot_shard_rm_loc; ++ gf_uuid_copy(gfid, priv->dot_shard_rm_gfid); ++ bname = GF_SHARD_REMOVE_ME_DIR; ++ break; ++ default: ++ break; ++ } ++ ++ loc->inode = inode_find(this->itable, gfid); ++ if (!loc->inode) { ++ ret = shard_init_internal_dir_loc(this, local, type); ++ if (ret) ++ goto err; ++ ret = dict_reset(local->xattr_req); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to reset " ++ "dict"); ++ ret = -ENOMEM; ++ goto err; ++ } ++ ret = dict_set_gfuuid(local->xattr_req, "gfid-req", gfid, true); ++ ret = syncop_lookup(FIRST_CHILD(this), loc, &stbuf, NULL, local->xattr_req, ++ NULL); ++ if (ret < 0) { ++ if (ret != -ENOENT) ++ gf_msg(this->name, GF_LOG_ERROR, -ret, SHARD_MSG_SHARDS_DELETION_FAILED, ++ "Lookup on %s failed, exiting", bname); ++ goto err; + } else { +- gf_uuid_copy(gfid, local->base_gfid); ++ shard_link_internal_dir_inode(local, loc->inode, &stbuf, type); + } ++ } ++ ret = 0; ++err: ++ return ret; ++} ++ ++int shard_lookup_marker_entry(xlator_t *this, shard_local_t *local, ++ gf_dirent_t *entry) { ++ int ret = 0; ++ loc_t loc = { ++ 0, ++ }; ++ ++ loc.inode = inode_new(this->itable); ++ if (!loc.inode) { ++ ret = -ENOMEM; ++ goto err; ++ } ++ loc.parent = inode_ref(local->fd->inode); ++ ++ ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path)); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, ++ "Inode path failed on %s", entry->d_name); ++ ret = -ENOMEM; ++ goto err; ++ } ++ ++ loc.name = strrchr(loc.path, '/'); ++ if (loc.name) ++ loc.name++; ++ ++ ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, NULL, NULL); ++ if (ret < 0) { ++ goto err; ++ } ++ entry->inode = inode_ref(loc.inode); ++ ret = 0; ++err: ++ loc_wipe(&loc); ++ return ret; ++} ++ ++int shard_delete_shards(void *opaque) { ++ int ret = 0; ++ off_t offset = 0; ++ loc_t loc = { ++ 0, ++ }; ++ inode_t *link_inode = NULL; ++ xlator_t *this = NULL; ++ shard_priv_t *priv = NULL; ++ shard_local_t *local = NULL; ++ gf_dirent_t entries; ++ gf_dirent_t *entry = NULL; ++ call_frame_t *cleanup_frame = NULL; ++ gf_boolean_t done = _gf_false; ++ ++ this = THIS; ++ priv = this->private; ++ INIT_LIST_HEAD(&entries.list); ++ ++ cleanup_frame = opaque; ++ ++ local = mem_get0(this->local_pool); ++ if (!local) { ++ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, ++ "Failed to create local to " ++ "delete shards"); ++ ret = -ENOMEM; ++ goto err; ++ } ++ cleanup_frame->local = local; ++ local->fop = GF_FOP_UNLINK; ++ ++ local->xattr_req = dict_new(); ++ if (!local->xattr_req) { ++ ret = -ENOMEM; ++ goto err; ++ } ++ local->deletion_rate = priv->deletion_rate; ++ ++ ret = shard_resolve_internal_dir(this, local, SHARD_INTERNAL_DIR_DOT_SHARD); ++ if (ret == -ENOENT) { ++ gf_msg_debug(this->name, 0, ".shard absent. Nothing to" ++ " delete. Exiting"); ++ ret = 0; ++ goto err; ++ } else if (ret < 0) { ++ goto err; ++ } + +- shard_make_block_bname(block_num, gfid, block_bname, sizeof(block_bname)); +- +- shard_inode_ctx_set(inode, this, buf, 0, SHARD_LOOKUP_MASK); +- linked_inode = inode_link(inode, priv->dot_shard_inode, block_bname, buf); +- inode_lookup(linked_inode); +- list_index = block_num - local->first_block; +- local->inode_list[list_index] = linked_inode; +- ++ ret = shard_resolve_internal_dir(this, local, ++ SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME); ++ if (ret == -ENOENT) { ++ gf_msg_debug(this->name, 0, ".remove_me absent. " ++ "Nothing to delete. Exiting"); ++ ret = 0; ++ goto err; ++ } else if (ret < 0) { ++ goto err; ++ } ++ ++ local->fd = fd_anonymous(local->dot_shard_rm_loc.inode); ++ if (!local->fd) { ++ ret = -ENOMEM; ++ goto err; ++ } ++ ++ for (;;) { ++ offset = 0; + LOCK(&priv->lock); + { +- fsync_inode = __shard_update_shards_inode_list( +- linked_inode, this, base_inode, block_num, gfid); ++ if (priv->bg_del_state == SHARD_BG_DELETION_LAUNCHING) { ++ priv->bg_del_state = SHARD_BG_DELETION_IN_PROGRESS; ++ } else if (priv->bg_del_state == SHARD_BG_DELETION_IN_PROGRESS) { ++ priv->bg_del_state = SHARD_BG_DELETION_NONE; ++ done = _gf_true; ++ } + } + UNLOCK(&priv->lock); +- if (fsync_inode) +- shard_initiate_evicted_inode_fsync(this, fsync_inode); +-} +- +-int +-shard_common_lookup_shards_cbk(call_frame_t *frame, void *cookie, +- xlator_t *this, int32_t op_ret, int32_t op_errno, +- inode_t *inode, struct iatt *buf, dict_t *xdata, +- struct iatt *postparent) +-{ +- int call_count = 0; +- int shard_block_num = (long)cookie; +- uuid_t gfid = { +- 0, +- }; +- shard_local_t *local = NULL; +- +- local = frame->local; +- if (local->resolver_base_inode) +- gf_uuid_copy(gfid, local->resolver_base_inode->gfid); +- else +- gf_uuid_copy(gfid, local->base_gfid); +- +- if (op_ret < 0) { +- /* Ignore absence of shards in the backend in truncate fop. */ +- switch (local->fop) { +- case GF_FOP_TRUNCATE: +- case GF_FOP_FTRUNCATE: +- case GF_FOP_RENAME: +- case GF_FOP_UNLINK: +- if (op_errno == ENOENT) +- goto done; +- break; +- case GF_FOP_WRITE: +- case GF_FOP_READ: +- case GF_FOP_ZEROFILL: +- case GF_FOP_DISCARD: +- case GF_FOP_FALLOCATE: +- if ((!local->first_lookup_done) && (op_errno == ENOENT)) { +- LOCK(&frame->lock); +- { +- local->create_count++; +- } +- UNLOCK(&frame->lock); +- goto done; +- } +- break; +- default: +- break; +- } +- +- /* else */ +- gf_msg(this->name, GF_LOG_ERROR, op_errno, +- SHARD_MSG_LOOKUP_SHARD_FAILED, +- "Lookup on shard %d " +- "failed. Base file gfid = %s", +- shard_block_num, uuid_utoa(gfid)); +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- goto done; +- } +- +- shard_link_block_inode(local, shard_block_num, inode, buf); +- +-done: +- if (local->lookup_shards_barriered) { +- syncbarrier_wake(&local->barrier); +- return 0; +- } else { +- call_count = shard_call_count_return(frame); +- if (call_count == 0) { +- if (!local->first_lookup_done) +- local->first_lookup_done = _gf_true; +- local->pls_fop_handler(frame, this); +- } +- } +- return 0; +-} +- +-dict_t * +-shard_create_gfid_dict(dict_t *dict) +-{ +- int ret = 0; +- dict_t *new = NULL; +- unsigned char *gfid = NULL; +- +- new = dict_copy_with_ref(dict, NULL); +- if (!new) +- return NULL; +- +- gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_char); +- if (!gfid) { +- ret = -1; +- goto out; +- } +- +- gf_uuid_generate(gfid); +- +- ret = dict_set_gfuuid(new, "gfid-req", gfid, false); +- +-out: +- if (ret) { +- dict_unref(new); +- new = NULL; +- GF_FREE(gfid); +- } +- +- return new; +-} ++ if (done) ++ break; ++ while ((ret = syncop_readdirp(FIRST_CHILD(this), local->fd, 131072, offset, ++ &entries, local->xattr_req, NULL))) { ++ if (ret > 0) ++ ret = 0; ++ list_for_each_entry(entry, &entries.list, list) { ++ offset = entry->d_off; + +-int +-shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode, +- shard_post_lookup_shards_fop_handler_t handler) +-{ +- int i = 0; +- int ret = 0; +- int count = 0; +- int call_count = 0; +- int32_t shard_idx_iter = 0; +- int last_block = 0; +- char path[PATH_MAX] = { +- 0, +- }; +- char *bname = NULL; +- uuid_t gfid = { +- 0, +- }; +- loc_t loc = { +- 0, +- }; +- shard_local_t *local = NULL; +- shard_priv_t *priv = NULL; +- gf_boolean_t wind_failed = _gf_false; +- dict_t *xattr_req = NULL; +- +- priv = this->private; +- local = frame->local; +- count = call_count = local->call_count; +- shard_idx_iter = local->first_block; +- last_block = local->last_block; +- local->pls_fop_handler = handler; +- if (local->lookup_shards_barriered) +- local->barrier.waitfor = local->call_count; +- +- if (inode) +- gf_uuid_copy(gfid, inode->gfid); +- else +- gf_uuid_copy(gfid, local->base_gfid); ++ if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..")) ++ continue; + +- while (shard_idx_iter <= last_block) { +- if (local->inode_list[i]) { +- i++; +- shard_idx_iter++; ++ if (!entry->inode) { ++ ret = shard_lookup_marker_entry(this, local, entry); ++ if (ret < 0) + continue; + } ++ link_inode = inode_link(entry->inode, local->fd->inode, entry->d_name, ++ &entry->d_stat); + +- if (wind_failed) { +- shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter, +- this, -1, ENOMEM, NULL, NULL, NULL, +- NULL); +- goto next; +- } +- +- shard_make_block_abspath(shard_idx_iter, gfid, path, sizeof(path)); +- +- bname = strrchr(path, '/') + 1; +- loc.inode = inode_new(this->itable); +- loc.parent = inode_ref(priv->dot_shard_inode); +- gf_uuid_copy(loc.pargfid, priv->dot_shard_gfid); +- ret = inode_path(loc.parent, bname, (char **)&(loc.path)); +- if (ret < 0 || !(loc.inode)) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, +- "Inode path failed" +- " on %s, base file gfid = %s", +- bname, uuid_utoa(gfid)); +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- loc_wipe(&loc); +- wind_failed = _gf_true; +- shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter, +- this, -1, ENOMEM, NULL, NULL, NULL, +- NULL); +- goto next; +- } +- +- loc.name = strrchr(loc.path, '/'); +- if (loc.name) +- loc.name++; +- +- xattr_req = shard_create_gfid_dict(local->xattr_req); +- if (!xattr_req) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- wind_failed = _gf_true; +- loc_wipe(&loc); +- shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter, +- this, -1, ENOMEM, NULL, NULL, NULL, +- NULL); +- goto next; ++ gf_msg_debug(this->name, 0, "Initiating deletion of " ++ "shards of gfid %s", ++ entry->d_name); ++ ret = shard_delete_shards_of_entry(cleanup_frame, this, entry, ++ link_inode); ++ inode_unlink(link_inode, local->fd->inode, entry->d_name); ++ inode_unref(link_inode); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, -ret, ++ SHARD_MSG_SHARDS_DELETION_FAILED, ++ "Failed to clean up shards of gfid %s", entry->d_name); ++ continue; + } ++ gf_msg(this->name, GF_LOG_INFO, 0, SHARD_MSG_SHARD_DELETION_COMPLETED, ++ "Deleted " ++ "shards of gfid=%s from backend", ++ entry->d_name); ++ } ++ gf_dirent_free(&entries); ++ if (ret) ++ break; ++ } ++ } ++ ret = 0; ++ loc_wipe(&loc); ++ return ret; + +- STACK_WIND_COOKIE(frame, shard_common_lookup_shards_cbk, +- (void *)(long)shard_idx_iter, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->lookup, &loc, xattr_req); +- loc_wipe(&loc); +- dict_unref(xattr_req); +- next: +- shard_idx_iter++; +- i++; +- +- if (!--call_count) +- break; +- } +- if (local->lookup_shards_barriered) { +- syncbarrier_wait(&local->barrier, count); +- local->pls_fop_handler(frame, this); +- } +- return 0; ++err: ++ LOCK(&priv->lock); ++ { priv->bg_del_state = SHARD_BG_DELETION_NONE; } ++ UNLOCK(&priv->lock); ++ loc_wipe(&loc); ++ return ret; ++} ++ ++int shard_unlock_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, dict_t *xdata) { ++ if (op_ret) ++ gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, ++ "Unlock failed. Please check brick logs for " ++ "more details"); ++ SHARD_STACK_DESTROY(frame); ++ return 0; ++} ++ ++int shard_unlock_inodelk(call_frame_t *frame, xlator_t *this) { ++ loc_t *loc = NULL; ++ call_frame_t *lk_frame = NULL; ++ shard_local_t *local = NULL; ++ shard_local_t *lk_local = NULL; ++ shard_inodelk_t *lock = NULL; ++ ++ local = frame->local; ++ lk_frame = local->inodelk_frame; ++ lk_local = lk_frame->local; ++ local->inodelk_frame = NULL; ++ loc = &local->int_inodelk.loc; ++ lock = &lk_local->int_inodelk; ++ lock->flock.l_type = F_UNLCK; ++ ++ STACK_WIND(lk_frame, shard_unlock_inodelk_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->inodelk, lock->domain, loc, F_SETLK, ++ &lock->flock, NULL); ++ local->int_inodelk.acquired_lock = _gf_false; ++ return 0; ++} ++ ++int shard_rename_src_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, struct iatt *buf, ++ struct iatt *preoldparent, struct iatt *postoldparent, ++ struct iatt *prenewparent, struct iatt *postnewparent, ++ dict_t *xdata); ++int shard_rename_src_base_file(call_frame_t *frame, xlator_t *this) { ++ int ret = 0; ++ loc_t *dst_loc = NULL; ++ loc_t tmp_loc = { ++ 0, ++ }; ++ shard_local_t *local = frame->local; ++ ++ if (local->dst_block_size) { ++ tmp_loc.parent = inode_ref(local->loc2.parent); ++ ret = inode_path(tmp_loc.parent, local->loc2.name, (char **)&tmp_loc.path); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, ++ "Inode path failed" ++ " on pargfid=%s bname=%s", ++ uuid_utoa(tmp_loc.parent->gfid), local->loc2.name); ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto err; ++ } ++ ++ tmp_loc.name = strrchr(tmp_loc.path, '/'); ++ if (tmp_loc.name) ++ tmp_loc.name++; ++ dst_loc = &tmp_loc; ++ } else { ++ dst_loc = &local->loc2; ++ } ++ ++ /* To-Do: Request open-fd count on dst base file */ ++ STACK_WIND(frame, shard_rename_src_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->rename, &local->loc, dst_loc, ++ local->xattr_req); ++ loc_wipe(&tmp_loc); ++ return 0; ++err: ++ loc_wipe(&tmp_loc); ++ shard_common_failure_unwind(local->fop, frame, local->op_ret, ++ local->op_errno); ++ return 0; ++} ++ ++int shard_unlink_base_file(call_frame_t *frame, xlator_t *this); ++ ++int shard_set_size_attrs_on_marker_file_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, ++ int32_t op_errno, dict_t *dict, ++ dict_t *xdata) { ++ shard_priv_t *priv = NULL; ++ shard_local_t *local = NULL; ++ ++ priv = this->private; ++ local = frame->local; ++ if (op_ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, ++ "Xattrop on marker file failed " ++ "while performing %s; entry gfid=%s", ++ gf_fop_string(local->fop), local->newloc.name); ++ goto err; ++ } ++ ++ inode_unlink(local->newloc.inode, priv->dot_shard_rm_inode, ++ local->newloc.name); ++ ++ if (local->fop == GF_FOP_UNLINK) ++ shard_unlink_base_file(frame, this); ++ else if (local->fop == GF_FOP_RENAME) ++ shard_rename_src_base_file(frame, this); ++ return 0; ++err: ++ shard_common_failure_unwind(local->fop, frame, op_ret, op_errno); ++ return 0; ++} ++ ++int shard_set_size_attrs_on_marker_file(call_frame_t *frame, xlator_t *this) { ++ int op_errno = ENOMEM; ++ uint64_t bs = 0; ++ dict_t *xdata = NULL; ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ xdata = dict_new(); ++ if (!xdata) ++ goto err; ++ ++ if (local->fop == GF_FOP_UNLINK) ++ bs = local->block_size; ++ else if (local->fop == GF_FOP_RENAME) ++ bs = local->dst_block_size; ++ SHARD_INODE_CREATE_INIT(this, bs, xdata, &local->newloc, ++ local->prebuf.ia_size, 0, err); ++ STACK_WIND(frame, shard_set_size_attrs_on_marker_file_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->xattrop, &local->newloc, ++ GF_XATTROP_GET_AND_SET, xdata, NULL); ++ dict_unref(xdata); ++ return 0; ++err: ++ if (xdata) ++ dict_unref(xdata); ++ shard_common_failure_unwind(local->fop, frame, -1, op_errno); ++ return 0; + } + +-int +-shard_post_resolve_truncate_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (local->op_ret < 0) { +- if (local->op_errno == ENOENT) { +- /* If lookup on /.shard fails with ENOENT, it means that +- * the file was 0-byte in size but truncated sometime in +- * the past to a higher size which is reflected in the +- * size xattr, and now being truncated to a lower size. +- * In this case, the only thing that needs to be done is +- * to update the size xattr of the file and unwind. +- */ +- local->first_block = local->last_block = 0; +- local->num_blocks = 1; +- local->call_count = 0; +- local->op_ret = 0; +- local->postbuf.ia_size = local->offset; +- shard_update_file_size(frame, this, local->fd, &local->loc, +- shard_post_update_size_truncate_handler); +- return 0; +- } else { +- shard_common_failure_unwind(local->fop, frame, local->op_ret, +- local->op_errno); +- return 0; +- } +- } +- +- if (!local->call_count) +- shard_truncate_do(frame, this); +- else +- shard_common_lookup_shards(frame, this, local->loc.inode, +- shard_post_lookup_shards_truncate_handler); +- +- return 0; ++int shard_lookup_marker_file_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, ++ int32_t op_errno, inode_t *inode, ++ struct iatt *buf, dict_t *xdata, ++ struct iatt *postparent) { ++ inode_t *linked_inode = NULL; ++ shard_priv_t *priv = NULL; ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ priv = this->private; ++ ++ if (op_ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, ++ "Lookup on marker file failed " ++ "while performing %s; entry gfid=%s", ++ gf_fop_string(local->fop), local->newloc.name); ++ goto err; ++ } ++ ++ linked_inode = ++ inode_link(inode, priv->dot_shard_rm_inode, local->newloc.name, buf); ++ inode_unref(local->newloc.inode); ++ local->newloc.inode = linked_inode; ++ shard_set_size_attrs_on_marker_file(frame, this); ++ return 0; ++err: ++ shard_common_failure_unwind(local->fop, frame, op_ret, op_errno); ++ return 0; + } + +-int +-shard_truncate_begin(call_frame_t *frame, xlator_t *this) +-{ +- int ret = 0; +- shard_local_t *local = NULL; +- shard_priv_t *priv = NULL; +- +- priv = this->private; +- local = frame->local; +- +- /* First participant block here is the lowest numbered block that would +- * hold the last byte of the file post successful truncation. +- * Last participant block is the block that contains the last byte in +- * the current state of the file. +- * If (first block == last_block): +- * then that means that the file only needs truncation of the +- * first (or last since both are same) block. +- * Else +- * if (new_size % block_size == 0) +- * then that means there is no truncate to be done with +- * only shards from first_block + 1 through the last +- * block needing to be unlinked. +- * else +- * both truncate of the first block and unlink of the +- * remaining shards until end of file is required. +- */ +- local->first_block = (local->offset == 0) +- ? 0 +- : get_lowest_block(local->offset - 1, +- local->block_size); +- local->last_block = get_highest_block(0, local->prebuf.ia_size, +- local->block_size); +- +- local->num_blocks = local->last_block - local->first_block + 1; +- local->resolver_base_inode = (local->fop == GF_FOP_TRUNCATE) +- ? local->loc.inode +- : local->fd->inode; +- +- if ((local->first_block == 0) && (local->num_blocks == 1)) { +- if (local->fop == GF_FOP_TRUNCATE) +- STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->truncate, &local->loc, +- local->offset, local->xattr_req); +- else +- STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->ftruncate, local->fd, +- local->offset, local->xattr_req); +- return 0; +- } ++int shard_lookup_marker_file(call_frame_t *frame, xlator_t *this) { ++ int op_errno = ENOMEM; ++ dict_t *xattr_req = NULL; ++ shard_local_t *local = NULL; + +- local->inode_list = GF_CALLOC(local->num_blocks, sizeof(inode_t *), +- gf_shard_mt_inode_list); +- if (!local->inode_list) +- goto err; ++ local = frame->local; + +- local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid); +- if (!local->dot_shard_loc.inode) { +- ret = shard_init_internal_dir_loc(this, local, +- SHARD_INTERNAL_DIR_DOT_SHARD); +- if (ret) +- goto err; +- shard_lookup_internal_dir(frame, this, +- shard_post_resolve_truncate_handler, +- SHARD_INTERNAL_DIR_DOT_SHARD); +- } else { +- local->post_res_handler = shard_post_resolve_truncate_handler; +- shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD); +- } +- return 0; ++ xattr_req = shard_create_gfid_dict(local->xattr_req); ++ if (!xattr_req) ++ goto err; + ++ STACK_WIND(frame, shard_lookup_marker_file_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->lookup, &local->newloc, xattr_req); ++ dict_unref(xattr_req); ++ return 0; + err: +- shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); +- return 0; ++ shard_common_failure_unwind(local->fop, frame, -1, op_errno); ++ return 0; + } + +-int +-shard_post_lookup_truncate_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- struct iatt tmp_stbuf = { +- 0, +- }; +- +- local = frame->local; +- +- if (local->op_ret < 0) { +- shard_common_failure_unwind(local->fop, frame, local->op_ret, +- local->op_errno); +- return 0; +- } +- +- local->postbuf = tmp_stbuf = local->prebuf; +- +- if (local->prebuf.ia_size == local->offset) { +- /* If the file size is same as requested size, unwind the call +- * immediately. +- */ +- if (local->fop == GF_FOP_TRUNCATE) +- SHARD_STACK_UNWIND(truncate, frame, 0, 0, &local->prebuf, +- &local->postbuf, NULL); +- else +- SHARD_STACK_UNWIND(ftruncate, frame, 0, 0, &local->prebuf, +- &local->postbuf, NULL); +- } else if (local->offset > local->prebuf.ia_size) { +- /* If the truncate is from a lower to a higher size, set the +- * new size xattr and unwind. +- */ +- local->hole_size = local->offset - local->prebuf.ia_size; +- local->delta_size = 0; +- GF_ATOMIC_INIT(local->delta_blocks, 0); +- local->postbuf.ia_size = local->offset; +- tmp_stbuf.ia_size = local->offset; +- shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0, +- SHARD_INODE_WRITE_MASK); +- shard_update_file_size(frame, this, NULL, &local->loc, +- shard_post_update_size_truncate_handler); ++int shard_create_marker_file_under_remove_me_cbk( ++ call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, ++ int32_t op_errno, inode_t *inode, struct iatt *buf, struct iatt *preparent, ++ struct iatt *postparent, dict_t *xdata) { ++ inode_t *linked_inode = NULL; ++ shard_priv_t *priv = NULL; ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ priv = this->private; ++ ++ SHARD_UNSET_ROOT_FS_ID(frame, local); ++ if (op_ret < 0) { ++ if ((op_errno != EEXIST) && (op_errno != ENODATA)) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, ++ "Marker file creation " ++ "failed while performing %s; entry gfid=%s", ++ gf_fop_string(local->fop), local->newloc.name); ++ goto err; + } else { +- /* ... else +- * i. unlink all shards that need to be unlinked. +- * ii. truncate the last of the shards. +- * iii. update the new size using setxattr. +- * and unwind the fop. +- */ +- local->hole_size = 0; +- local->delta_size = (local->offset - local->prebuf.ia_size); +- GF_ATOMIC_INIT(local->delta_blocks, 0); +- tmp_stbuf.ia_size = local->offset; +- shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0, +- SHARD_INODE_WRITE_MASK); +- shard_truncate_begin(frame, this); +- } +- return 0; +-} +- +-/* TO-DO: +- * Fix updates to size and block count with racing write(s) and truncate(s). +- */ +- +-int +-shard_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, +- dict_t *xdata) +-{ +- int ret = -1; +- uint64_t block_size = 0; +- shard_local_t *local = NULL; +- +- ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, +- "Failed to get block " +- "size from inode ctx of %s", +- uuid_utoa(loc->inode->gfid)); +- goto err; ++ shard_lookup_marker_file(frame, this); ++ return 0; + } ++ } + +- if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { +- STACK_WIND(frame, default_truncate_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->truncate, loc, offset, xdata); +- return 0; +- } +- +- if (!this->itable) +- this->itable = loc->inode->table; +- +- local = mem_get0(this->local_pool); +- if (!local) +- goto err; +- +- frame->local = local; +- +- ret = syncbarrier_init(&local->barrier); +- if (ret) +- goto err; +- loc_copy(&local->loc, loc); +- local->offset = offset; +- local->block_size = block_size; +- local->fop = GF_FOP_TRUNCATE; +- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); +- if (!local->xattr_req) +- goto err; +- local->resolver_base_inode = loc->inode; +- GF_ATOMIC_INIT(local->delta_blocks, 0); +- +- shard_lookup_base_file(frame, this, &local->loc, +- shard_post_lookup_truncate_handler); +- return 0; ++ linked_inode = ++ inode_link(inode, priv->dot_shard_rm_inode, local->newloc.name, buf); ++ inode_unref(local->newloc.inode); ++ local->newloc.inode = linked_inode; + ++ if (local->fop == GF_FOP_UNLINK) ++ shard_unlink_base_file(frame, this); ++ else if (local->fop == GF_FOP_RENAME) ++ shard_rename_src_base_file(frame, this); ++ return 0; + err: +- shard_common_failure_unwind(GF_FOP_TRUNCATE, frame, -1, ENOMEM); +- return 0; +-} +- +-int +-shard_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, +- dict_t *xdata) +-{ +- int ret = -1; +- uint64_t block_size = 0; +- shard_local_t *local = NULL; +- +- ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, +- "Failed to get block " +- "size from inode ctx of %s", +- uuid_utoa(fd->inode->gfid)); +- goto err; +- } +- +- if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { +- STACK_WIND(frame, default_ftruncate_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata); +- return 0; +- } +- +- if (!this->itable) +- this->itable = fd->inode->table; +- +- local = mem_get0(this->local_pool); +- if (!local) +- goto err; +- +- frame->local = local; +- ret = syncbarrier_init(&local->barrier); +- if (ret) +- goto err; +- local->fd = fd_ref(fd); +- local->offset = offset; +- local->block_size = block_size; +- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); +- if (!local->xattr_req) +- goto err; +- local->fop = GF_FOP_FTRUNCATE; ++ shard_common_failure_unwind(local->fop, frame, -1, local->op_errno); ++ return 0; ++} ++ ++int shard_create_marker_file_under_remove_me(call_frame_t *frame, ++ xlator_t *this, loc_t *loc) { ++ int ret = 0; ++ int op_errno = ENOMEM; ++ uint64_t bs = 0; ++ char g1[64] = { ++ 0, ++ }; ++ char g2[64] = { ++ 0, ++ }; ++ dict_t *xattr_req = NULL; ++ shard_priv_t *priv = NULL; ++ shard_local_t *local = NULL; ++ ++ priv = this->private; ++ local = frame->local; ++ ++ SHARD_SET_ROOT_FS_ID(frame, local); ++ ++ xattr_req = shard_create_gfid_dict(local->xattr_req); ++ if (!xattr_req) ++ goto err; ++ ++ local->newloc.inode = inode_new(this->itable); ++ local->newloc.parent = inode_ref(priv->dot_shard_rm_inode); ++ ret = inode_path(local->newloc.parent, uuid_utoa(loc->inode->gfid), ++ (char **)&local->newloc.path); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, ++ "Inode path failed on " ++ "pargfid=%s bname=%s", ++ uuid_utoa_r(priv->dot_shard_rm_gfid, g1), ++ uuid_utoa_r(loc->inode->gfid, g2)); ++ goto err; ++ } ++ local->newloc.name = strrchr(local->newloc.path, '/'); ++ if (local->newloc.name) ++ local->newloc.name++; ++ ++ if (local->fop == GF_FOP_UNLINK) ++ bs = local->block_size; ++ else if (local->fop == GF_FOP_RENAME) ++ bs = local->dst_block_size; ++ ++ SHARD_INODE_CREATE_INIT(this, bs, xattr_req, &local->newloc, ++ local->prebuf.ia_size, 0, err); ++ ++ STACK_WIND(frame, shard_create_marker_file_under_remove_me_cbk, ++ FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod, &local->newloc, ++ 0, 0, 0644, xattr_req); ++ dict_unref(xattr_req); ++ return 0; + +- local->loc.inode = inode_ref(fd->inode); +- gf_uuid_copy(local->loc.gfid, fd->inode->gfid); +- local->resolver_base_inode = fd->inode; +- GF_ATOMIC_INIT(local->delta_blocks, 0); +- +- shard_lookup_base_file(frame, this, &local->loc, +- shard_post_lookup_truncate_handler); +- return 0; + err: +- shard_common_failure_unwind(GF_FOP_FTRUNCATE, frame, -1, ENOMEM); +- return 0; ++ if (xattr_req) ++ dict_unref(xattr_req); ++ shard_create_marker_file_under_remove_me_cbk(frame, 0, this, -1, op_errno, ++ NULL, NULL, NULL, NULL, NULL); ++ return 0; + } + +-int +-shard_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, inode_t *inode, +- struct iatt *buf, struct iatt *preparent, +- struct iatt *postparent, dict_t *xdata) +-{ +- int ret = -1; +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (op_ret == -1) +- goto unwind; +- +- ret = shard_inode_ctx_set(inode, this, buf, local->block_size, +- SHARD_ALL_MASK); +- if (ret) +- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INODE_CTX_SET_FAILED, +- "Failed to set inode " +- "ctx for %s", +- uuid_utoa(inode->gfid)); +- +-unwind: +- SHARD_STACK_UNWIND(mknod, frame, op_ret, op_errno, inode, buf, preparent, +- postparent, xdata); ++int shard_unlock_entrylk(call_frame_t *frame, xlator_t *this); + +- return 0; +-} ++int shard_unlink_base_file_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, int32_t op_errno, ++ struct iatt *preparent, struct iatt *postparent, ++ dict_t *xdata) { ++ int ret = 0; ++ shard_local_t *local = NULL; + +-int +-shard_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, +- dev_t rdev, mode_t umask, dict_t *xdata) +-{ +- shard_priv_t *priv = NULL; +- shard_local_t *local = NULL; ++ local = frame->local; + +- priv = this->private; +- local = mem_get0(this->local_pool); +- if (!local) +- goto err; ++ if (op_ret < 0) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ } else { ++ shard_inode_ctx_set_refresh_flag(local->int_inodelk.loc.inode, this); ++ local->preoldparent = *preparent; ++ local->postoldparent = *postparent; ++ if (xdata) ++ local->xattr_rsp = dict_ref(xdata); ++ if (local->cleanup_required) ++ shard_start_background_deletion(this); ++ } + +- frame->local = local; +- local->block_size = priv->block_size; +- if (!__is_gsyncd_on_shard_dir(frame, loc)) { +- SHARD_INODE_CREATE_INIT(this, local->block_size, xdata, loc, 0, 0, err); ++ if (local->entrylk_frame) { ++ ret = shard_unlock_entrylk(frame, this); ++ if (ret < 0) { ++ local->op_ret = -1; ++ local->op_errno = -ret; + } ++ } + +- STACK_WIND(frame, shard_mknod_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata); +- return 0; +-err: +- shard_common_failure_unwind(GF_FOP_MKNOD, frame, -1, ENOMEM); +- return 0; +-} +- +-int32_t +-shard_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, inode_t *inode, +- struct iatt *buf, struct iatt *preparent, +- struct iatt *postparent, dict_t *xdata) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- if (op_ret < 0) +- goto err; +- +- shard_inode_ctx_set(inode, this, buf, 0, +- SHARD_MASK_NLINK | SHARD_MASK_TIMES); +- buf->ia_size = local->prebuf.ia_size; +- buf->ia_blocks = local->prebuf.ia_blocks; +- +- SHARD_STACK_UNWIND(link, frame, op_ret, op_errno, inode, buf, preparent, +- postparent, xdata); +- return 0; ++ ret = shard_unlock_inodelk(frame, this); ++ if (ret < 0) { ++ local->op_ret = -1; ++ local->op_errno = -ret; ++ } ++ ++ shard_unlink_cbk(frame, this); ++ return 0; ++} ++ ++int shard_unlink_base_file(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = frame->local; ++ ++ /* To-Do: Request open-fd count on base file */ ++ STACK_WIND(frame, shard_unlink_base_file_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->unlink, &local->loc, local->xflag, ++ local->xattr_req); ++ return 0; ++} ++ ++int shard_unlock_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, dict_t *xdata) { ++ if (op_ret) ++ gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, ++ "Unlock failed. Please check brick logs for " ++ "more details"); ++ SHARD_STACK_DESTROY(frame); ++ return 0; ++} ++ ++int shard_unlock_entrylk(call_frame_t *frame, xlator_t *this) { ++ loc_t *loc = NULL; ++ call_frame_t *lk_frame = NULL; ++ shard_local_t *local = NULL; ++ shard_local_t *lk_local = NULL; ++ shard_entrylk_t *lock = NULL; ++ ++ local = frame->local; ++ lk_frame = local->entrylk_frame; ++ lk_local = lk_frame->local; ++ local->entrylk_frame = NULL; ++ lock = &lk_local->int_entrylk; ++ loc = &lock->loc; ++ ++ STACK_WIND(lk_frame, shard_unlock_entrylk_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->entrylk, this->name, loc, ++ lk_local->int_entrylk.basename, ENTRYLK_UNLOCK, ENTRYLK_WRLCK, ++ NULL); ++ local->int_entrylk.acquired_lock = _gf_false; ++ return 0; ++} ++ ++int shard_post_entrylk_fop_handler(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ ++ switch (local->fop) { ++ case GF_FOP_UNLINK: ++ case GF_FOP_RENAME: ++ shard_create_marker_file_under_remove_me(frame, this, ++ &local->int_inodelk.loc); ++ break; ++ default: ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, ++ "post-entrylk handler not defined. This case should not" ++ " be hit"); ++ break; ++ } ++ return 0; ++} ++ ++int shard_acquire_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, dict_t *xdata) { ++ call_frame_t *main_frame = NULL; ++ shard_local_t *local = NULL; ++ shard_local_t *main_local = NULL; ++ ++ local = frame->local; ++ main_frame = local->main_frame; ++ main_local = main_frame->local; ++ ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(main_local->fop, main_frame, op_ret, op_errno); ++ return 0; ++ } ++ main_local->int_entrylk.acquired_lock = _gf_true; ++ shard_post_entrylk_fop_handler(main_frame, this); ++ return 0; ++} ++ ++int shard_acquire_entrylk(call_frame_t *frame, xlator_t *this, inode_t *inode, ++ uuid_t gfid) { ++ char gfid_str[GF_UUID_BUF_SIZE] = { ++ 0, ++ }; ++ shard_local_t *local = NULL; ++ shard_local_t *entrylk_local = NULL; ++ shard_entrylk_t *int_entrylk = NULL; ++ call_frame_t *entrylk_frame = NULL; ++ ++ local = frame->local; ++ entrylk_frame = create_frame(this, this->ctx->pool); ++ if (!entrylk_frame) { ++ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, ++ "Failed to create new frame " ++ "to lock marker file"); ++ goto err; ++ } ++ ++ entrylk_local = mem_get0(this->local_pool); ++ if (!entrylk_local) { ++ STACK_DESTROY(entrylk_frame->root); ++ goto err; ++ } ++ ++ entrylk_frame->local = entrylk_local; ++ entrylk_local->main_frame = frame; ++ int_entrylk = &entrylk_local->int_entrylk; ++ ++ int_entrylk->loc.inode = inode_ref(inode); ++ set_lk_owner_from_ptr(&entrylk_frame->root->lk_owner, entrylk_frame->root); ++ local->entrylk_frame = entrylk_frame; ++ gf_uuid_unparse(gfid, gfid_str); ++ int_entrylk->basename = gf_strdup(gfid_str); ++ ++ STACK_WIND(entrylk_frame, shard_acquire_entrylk_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->entrylk, this->name, &int_entrylk->loc, ++ int_entrylk->basename, ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL); ++ return 0; + err: +- shard_common_failure_unwind(GF_FOP_LINK, frame, op_ret, op_errno); +- return 0; +-} +- +-int +-shard_post_lookup_link_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (local->op_ret < 0) { +- SHARD_STACK_UNWIND(link, frame, local->op_ret, local->op_errno, NULL, +- NULL, NULL, NULL, NULL); +- return 0; +- } +- +- STACK_WIND(frame, shard_link_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->link, &local->loc, &local->loc2, +- local->xattr_req); +- return 0; ++ shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); ++ return 0; + } + +-int32_t +-shard_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, +- dict_t *xdata) +-{ +- int ret = -1; +- uint64_t block_size = 0; +- shard_local_t *local = NULL; +- +- ret = shard_inode_ctx_get_block_size(oldloc->inode, this, &block_size); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, +- "Failed to get block " +- "size from inode ctx of %s", +- uuid_utoa(oldloc->inode->gfid)); +- goto err; +- } +- +- if (!block_size) { +- STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->link, +- oldloc, newloc, xdata); +- return 0; +- } +- +- if (!this->itable) +- this->itable = oldloc->inode->table; +- +- local = mem_get0(this->local_pool); +- if (!local) +- goto err; +- +- frame->local = local; +- +- loc_copy(&local->loc, oldloc); +- loc_copy(&local->loc2, newloc); +- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); +- if (!local->xattr_req) +- goto err; +- +- shard_lookup_base_file(frame, this, &local->loc, +- shard_post_lookup_link_handler); +- return 0; +-err: +- shard_common_failure_unwind(GF_FOP_LINK, frame, -1, ENOMEM); +- return 0; +-} +- +-int +-shard_unlink_shards_do(call_frame_t *frame, xlator_t *this, inode_t *inode); +- +-int +-shard_post_lookup_shards_unlink_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if ((local->op_ret < 0) && (local->op_errno != ENOENT)) { +- gf_msg(this->name, GF_LOG_ERROR, local->op_errno, SHARD_MSG_FOP_FAILED, +- "failed to delete shards of %s", +- uuid_utoa(local->resolver_base_inode->gfid)); +- return 0; +- } +- local->op_ret = 0; +- local->op_errno = 0; +- +- shard_unlink_shards_do(frame, this, local->resolver_base_inode); +- return 0; +-} +- +-int +-shard_post_resolve_unlink_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- local->lookup_shards_barriered = _gf_true; +- +- if (!local->call_count) +- shard_unlink_shards_do(frame, this, local->resolver_base_inode); +- else +- shard_common_lookup_shards(frame, this, local->resolver_base_inode, +- shard_post_lookup_shards_unlink_handler); +- return 0; +-} +- +-void +-shard_unlink_block_inode(shard_local_t *local, int shard_block_num) +-{ +- char block_bname[256] = { +- 0, +- }; +- uuid_t gfid = { +- 0, +- }; +- inode_t *inode = NULL; +- inode_t *base_inode = NULL; +- xlator_t *this = NULL; +- shard_priv_t *priv = NULL; +- shard_inode_ctx_t *ctx = NULL; +- shard_inode_ctx_t *base_ictx = NULL; +- int unref_base_inode = 0; +- int unref_shard_inode = 0; +- +- this = THIS; +- priv = this->private; +- +- inode = local->inode_list[shard_block_num - local->first_block]; +- shard_inode_ctx_get(inode, this, &ctx); +- base_inode = ctx->base_inode; +- if (base_inode) +- gf_uuid_copy(gfid, base_inode->gfid); +- else +- gf_uuid_copy(gfid, ctx->base_gfid); +- shard_make_block_bname(shard_block_num, gfid, block_bname, +- sizeof(block_bname)); +- +- LOCK(&priv->lock); +- if (base_inode) +- LOCK(&base_inode->lock); +- LOCK(&inode->lock); +- { +- __shard_inode_ctx_get(inode, this, &ctx); +- if (!list_empty(&ctx->ilist)) { +- list_del_init(&ctx->ilist); +- priv->inode_count--; +- unref_base_inode++; +- unref_shard_inode++; +- GF_ASSERT(priv->inode_count >= 0); +- } +- if (ctx->fsync_needed) { +- unref_base_inode++; +- unref_shard_inode++; +- list_del_init(&ctx->to_fsync_list); +- if (base_inode) { +- __shard_inode_ctx_get(base_inode, this, &base_ictx); +- base_ictx->fsync_count--; +- } +- } +- } +- UNLOCK(&inode->lock); +- if (base_inode) +- UNLOCK(&base_inode->lock); +- +- inode_unlink(inode, priv->dot_shard_inode, block_bname); +- inode_ref_reduce_by_n(inode, unref_shard_inode); +- inode_forget(inode, 0); +- +- if (base_inode && unref_base_inode) +- inode_ref_reduce_by_n(base_inode, unref_base_inode); +- UNLOCK(&priv->lock); +-} +- +-int +-shard_rename_cbk(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- SHARD_STACK_UNWIND(rename, frame, local->op_ret, local->op_errno, +- &local->prebuf, &local->preoldparent, +- &local->postoldparent, &local->prenewparent, +- &local->postnewparent, local->xattr_rsp); +- return 0; +-} +- +-int32_t +-shard_unlink_cbk(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = frame->local; +- +- SHARD_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno, +- &local->preoldparent, &local->postoldparent, +- local->xattr_rsp); +- return 0; +-} +- +-int +-shard_unlink_shards_do_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, +- struct iatt *preparent, struct iatt *postparent, +- dict_t *xdata) +-{ +- int shard_block_num = (long)cookie; +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (op_ret < 0) { +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- goto done; +- } +- +- shard_unlink_block_inode(local, shard_block_num); +-done: +- syncbarrier_wake(&local->barrier); +- return 0; +-} +- +-int +-shard_unlink_shards_do(call_frame_t *frame, xlator_t *this, inode_t *inode) +-{ +- int i = 0; +- int ret = -1; +- int count = 0; +- uint32_t cur_block = 0; +- uint32_t cur_block_idx = 0; /*this is idx into inode_list[] array */ +- char *bname = NULL; +- char path[PATH_MAX] = { +- 0, +- }; +- uuid_t gfid = { +- 0, +- }; +- loc_t loc = { +- 0, +- }; +- gf_boolean_t wind_failed = _gf_false; +- shard_local_t *local = NULL; +- shard_priv_t *priv = NULL; +- +- priv = this->private; +- local = frame->local; +- +- if (inode) +- gf_uuid_copy(gfid, inode->gfid); +- else +- gf_uuid_copy(gfid, local->base_gfid); +- +- for (i = 0; i < local->num_blocks; i++) { +- if (!local->inode_list[i]) +- continue; +- count++; +- } +- +- if (!count) { +- /* callcount = 0 implies that all of the shards that need to be +- * unlinked are non-existent (in other words the file is full of +- * holes). +- */ +- gf_msg_debug(this->name, 0, +- "All shards that need to be " +- "unlinked are non-existent: %s", +- uuid_utoa(gfid)); +- return 0; +- } +- +- SHARD_SET_ROOT_FS_ID(frame, local); +- local->barrier.waitfor = count; +- cur_block = cur_block_idx + local->first_block; +- +- while (cur_block_idx < local->num_blocks) { +- if (!local->inode_list[cur_block_idx]) +- goto next; +- +- if (wind_failed) { +- shard_unlink_shards_do_cbk(frame, (void *)(long)cur_block, this, -1, +- ENOMEM, NULL, NULL, NULL); +- goto next; +- } +- +- shard_make_block_abspath(cur_block, gfid, path, sizeof(path)); +- bname = strrchr(path, '/') + 1; +- loc.parent = inode_ref(priv->dot_shard_inode); +- ret = inode_path(loc.parent, bname, (char **)&(loc.path)); +- if (ret < 0) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, +- "Inode path failed" +- " on %s, base file gfid = %s", +- bname, uuid_utoa(gfid)); +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- loc_wipe(&loc); +- wind_failed = _gf_true; +- shard_unlink_shards_do_cbk(frame, (void *)(long)cur_block, this, -1, +- ENOMEM, NULL, NULL, NULL); +- goto next; +- } +- +- loc.name = strrchr(loc.path, '/'); +- if (loc.name) +- loc.name++; +- loc.inode = inode_ref(local->inode_list[cur_block_idx]); +- +- STACK_WIND_COOKIE(frame, shard_unlink_shards_do_cbk, +- (void *)(long)cur_block, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->unlink, &loc, local->xflag, +- local->xattr_req); +- loc_wipe(&loc); +- next: +- cur_block++; +- cur_block_idx++; +- } +- syncbarrier_wait(&local->barrier, count); +- SHARD_UNSET_ROOT_FS_ID(frame, local); +- return 0; +-} +- +-int +-shard_regulated_shards_deletion(call_frame_t *cleanup_frame, xlator_t *this, +- int now, int first_block, gf_dirent_t *entry) +-{ +- int i = 0; +- int ret = 0; +- shard_local_t *local = NULL; +- uuid_t gfid = { +- 0, +- }; +- +- local = cleanup_frame->local; +- +- local->inode_list = GF_CALLOC(now, sizeof(inode_t *), +- gf_shard_mt_inode_list); +- if (!local->inode_list) +- return -ENOMEM; +- +- local->first_block = first_block; +- local->last_block = first_block + now - 1; +- local->num_blocks = now; +- gf_uuid_parse(entry->d_name, gfid); +- gf_uuid_copy(local->base_gfid, gfid); +- local->resolver_base_inode = inode_find(this->itable, gfid); +- local->call_count = 0; +- ret = syncbarrier_init(&local->barrier); +- if (ret) { +- GF_FREE(local->inode_list); +- local->inode_list = NULL; +- inode_unref(local->resolver_base_inode); +- local->resolver_base_inode = NULL; +- return -errno; +- } +- shard_common_resolve_shards(cleanup_frame, this, +- shard_post_resolve_unlink_handler); +- +- for (i = 0; i < local->num_blocks; i++) { +- if (local->inode_list[i]) +- inode_unref(local->inode_list[i]); +- } +- GF_FREE(local->inode_list); +- local->inode_list = NULL; +- if (local->op_ret) +- ret = -local->op_errno; +- syncbarrier_destroy(&local->barrier); +- inode_unref(local->resolver_base_inode); +- local->resolver_base_inode = NULL; +- STACK_RESET(cleanup_frame->root); +- return ret; +-} +- +-int +-__shard_delete_shards_of_entry(call_frame_t *cleanup_frame, xlator_t *this, +- gf_dirent_t *entry, inode_t *inode) +-{ +- int ret = 0; +- int shard_count = 0; +- int first_block = 0; +- int now = 0; +- uint64_t size = 0; +- uint64_t block_size = 0; +- uint64_t size_array[4] = { +- 0, +- }; +- void *bsize = NULL; +- void *size_attr = NULL; +- dict_t *xattr_rsp = NULL; +- loc_t loc = { +- 0, +- }; +- shard_local_t *local = NULL; +- shard_priv_t *priv = NULL; +- +- priv = this->private; +- local = cleanup_frame->local; +- ret = dict_reset(local->xattr_req); +- if (ret) { +- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, +- "Failed to reset dict"); +- ret = -ENOMEM; +- goto err; +- } +- +- ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0); +- if (ret) { +- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, +- "Failed to set dict value: key:%s", GF_XATTR_SHARD_BLOCK_SIZE); +- ret = -ENOMEM; +- goto err; +- } +- +- ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_FILE_SIZE, 8 * 4); +- if (ret) { +- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, +- "Failed to set dict value: key:%s", GF_XATTR_SHARD_FILE_SIZE); +- ret = -ENOMEM; +- goto err; +- } +- +- loc.inode = inode_ref(inode); +- loc.parent = inode_ref(priv->dot_shard_rm_inode); +- ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path)); +- if (ret < 0) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, +- "Inode path failed on %s", entry->d_name); +- ret = -ENOMEM; +- goto err; +- } +- +- loc.name = strrchr(loc.path, '/'); +- if (loc.name) +- loc.name++; +- ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, local->xattr_req, +- &xattr_rsp); +- if (ret) +- goto err; +- +- ret = dict_get_ptr(xattr_rsp, GF_XATTR_SHARD_BLOCK_SIZE, &bsize); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, +- "Failed to get dict value: key:%s", GF_XATTR_SHARD_BLOCK_SIZE); +- goto err; +- } +- block_size = ntoh64(*((uint64_t *)bsize)); +- +- ret = dict_get_ptr(xattr_rsp, GF_XATTR_SHARD_FILE_SIZE, &size_attr); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, +- "Failed to get dict value: key:%s", GF_XATTR_SHARD_FILE_SIZE); +- goto err; +- } +- +- memcpy(size_array, size_attr, sizeof(size_array)); +- size = ntoh64(size_array[0]); +- +- shard_count = (size / block_size) - 1; +- if (shard_count < 0) { +- gf_msg_debug(this->name, 0, +- "Size of %s hasn't grown beyond " +- "its shard-block-size. Nothing to delete. " +- "Returning", +- entry->d_name); +- /* File size < shard-block-size, so nothing to delete */ +- ret = 0; +- goto delete_marker; +- } +- if ((size % block_size) > 0) +- shard_count++; +- +- if (shard_count == 0) { +- gf_msg_debug(this->name, 0, +- "Size of %s is exactly equal to " +- "its shard-block-size. Nothing to delete. " +- "Returning", +- entry->d_name); +- ret = 0; +- goto delete_marker; +- } +- gf_msg_debug(this->name, 0, +- "base file = %s, " +- "shard-block-size=%" PRIu64 ", file-size=%" PRIu64 +- ", " +- "shard_count=%d", +- entry->d_name, block_size, size, shard_count); +- +- /* Perform a gfid-based lookup to see if gfid corresponding to marker +- * file's base name exists. +- */ +- loc_wipe(&loc); +- loc.inode = inode_new(this->itable); +- if (!loc.inode) { +- ret = -ENOMEM; +- goto err; +- } +- gf_uuid_parse(entry->d_name, loc.gfid); +- ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, NULL, NULL); +- if (!ret) { +- gf_msg_debug(this->name, 0, +- "Base shard corresponding to gfid " +- "%s is present. Skipping shard deletion. " +- "Returning", +- entry->d_name); +- ret = 0; +- goto delete_marker; +- } +- +- first_block = 1; +- +- while (shard_count) { +- if (shard_count < local->deletion_rate) { +- now = shard_count; +- shard_count = 0; +- } else { +- now = local->deletion_rate; +- shard_count -= local->deletion_rate; +- } +- +- gf_msg_debug(this->name, 0, +- "deleting %d shards starting from " +- "block %d of gfid %s", +- now, first_block, entry->d_name); +- ret = shard_regulated_shards_deletion(cleanup_frame, this, now, +- first_block, entry); +- if (ret) +- goto err; +- first_block += now; +- } +- +-delete_marker: +- loc_wipe(&loc); +- loc.inode = inode_ref(inode); +- loc.parent = inode_ref(priv->dot_shard_rm_inode); +- ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path)); +- if (ret < 0) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, +- "Inode path failed on %s", entry->d_name); +- ret = -ENOMEM; +- goto err; +- } +- loc.name = strrchr(loc.path, '/'); +- if (loc.name) +- loc.name++; +- ret = syncop_unlink(FIRST_CHILD(this), &loc, NULL, NULL); +- if (ret) +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_SHARDS_DELETION_FAILED, +- "Failed to delete %s " +- "from /%s", +- entry->d_name, GF_SHARD_REMOVE_ME_DIR); +-err: +- if (xattr_rsp) +- dict_unref(xattr_rsp); +- loc_wipe(&loc); +- return ret; +-} +- +-int +-shard_delete_shards_of_entry(call_frame_t *cleanup_frame, xlator_t *this, +- gf_dirent_t *entry, inode_t *inode) +-{ +- int ret = -1; +- loc_t loc = { +- 0, +- }; +- shard_priv_t *priv = NULL; +- +- priv = this->private; +- loc.inode = inode_ref(priv->dot_shard_rm_inode); +- +- ret = syncop_entrylk(FIRST_CHILD(this), this->name, &loc, entry->d_name, +- ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL, NULL); +- if (ret < 0) { +- if (ret == -EAGAIN) { +- ret = 0; +- } +- goto out; +- } +- { +- ret = __shard_delete_shards_of_entry(cleanup_frame, this, entry, inode); +- } +- syncop_entrylk(FIRST_CHILD(this), this->name, &loc, entry->d_name, +- ENTRYLK_UNLOCK, ENTRYLK_WRLCK, NULL, NULL); +-out: +- loc_wipe(&loc); +- return ret; +-} +- +-int +-shard_delete_shards_cbk(int ret, call_frame_t *frame, void *data) +-{ +- SHARD_STACK_DESTROY(frame); +- return 0; +-} +- +-int +-shard_resolve_internal_dir(xlator_t *this, shard_local_t *local, +- shard_internal_dir_type_t type) +-{ +- int ret = 0; +- char *bname = NULL; +- loc_t *loc = NULL; +- shard_priv_t *priv = NULL; +- uuid_t gfid = { +- 0, +- }; +- struct iatt stbuf = { +- 0, +- }; +- +- priv = this->private; +- +- switch (type) { +- case SHARD_INTERNAL_DIR_DOT_SHARD: +- loc = &local->dot_shard_loc; +- gf_uuid_copy(gfid, priv->dot_shard_gfid); +- bname = GF_SHARD_DIR; +- break; +- case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: +- loc = &local->dot_shard_rm_loc; +- gf_uuid_copy(gfid, priv->dot_shard_rm_gfid); +- bname = GF_SHARD_REMOVE_ME_DIR; +- break; +- default: +- break; +- } +- +- loc->inode = inode_find(this->itable, gfid); +- if (!loc->inode) { +- ret = shard_init_internal_dir_loc(this, local, type); +- if (ret) +- goto err; +- ret = dict_reset(local->xattr_req); +- if (ret) { +- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, +- "Failed to reset " +- "dict"); +- ret = -ENOMEM; +- goto err; +- } +- ret = dict_set_gfuuid(local->xattr_req, "gfid-req", gfid, true); +- ret = syncop_lookup(FIRST_CHILD(this), loc, &stbuf, NULL, +- local->xattr_req, NULL); +- if (ret < 0) { +- if (ret != -ENOENT) +- gf_msg(this->name, GF_LOG_ERROR, -ret, +- SHARD_MSG_SHARDS_DELETION_FAILED, +- "Lookup on %s failed, exiting", bname); +- goto err; +- } else { +- shard_link_internal_dir_inode(local, loc->inode, &stbuf, type); +- } +- } +- ret = 0; +-err: +- return ret; +-} +- +-int +-shard_lookup_marker_entry(xlator_t *this, shard_local_t *local, +- gf_dirent_t *entry) +-{ +- int ret = 0; +- loc_t loc = { +- 0, +- }; +- +- loc.inode = inode_new(this->itable); +- if (!loc.inode) { +- ret = -ENOMEM; +- goto err; +- } +- loc.parent = inode_ref(local->fd->inode); +- +- ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path)); +- if (ret < 0) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, +- "Inode path failed on %s", entry->d_name); +- ret = -ENOMEM; +- goto err; +- } +- +- loc.name = strrchr(loc.path, '/'); +- if (loc.name) +- loc.name++; +- +- ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, NULL, NULL); +- if (ret < 0) { +- goto err; +- } +- entry->inode = inode_ref(loc.inode); +- ret = 0; +-err: +- loc_wipe(&loc); +- return ret; +-} +- +-int +-shard_delete_shards(void *opaque) +-{ +- int ret = 0; +- off_t offset = 0; +- loc_t loc = { +- 0, +- }; +- inode_t *link_inode = NULL; +- xlator_t *this = NULL; +- shard_priv_t *priv = NULL; +- shard_local_t *local = NULL; +- gf_dirent_t entries; +- gf_dirent_t *entry = NULL; +- call_frame_t *cleanup_frame = NULL; +- gf_boolean_t done = _gf_false; +- +- this = THIS; +- priv = this->private; +- INIT_LIST_HEAD(&entries.list); +- +- cleanup_frame = opaque; +- +- local = mem_get0(this->local_pool); +- if (!local) { +- gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, +- "Failed to create local to " +- "delete shards"); +- ret = -ENOMEM; +- goto err; +- } +- cleanup_frame->local = local; +- local->fop = GF_FOP_UNLINK; +- +- local->xattr_req = dict_new(); +- if (!local->xattr_req) { +- ret = -ENOMEM; +- goto err; +- } +- local->deletion_rate = priv->deletion_rate; +- +- ret = shard_resolve_internal_dir(this, local, SHARD_INTERNAL_DIR_DOT_SHARD); +- if (ret == -ENOENT) { +- gf_msg_debug(this->name, 0, +- ".shard absent. Nothing to" +- " delete. Exiting"); +- ret = 0; +- goto err; +- } else if (ret < 0) { +- goto err; +- } +- +- ret = shard_resolve_internal_dir(this, local, +- SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME); +- if (ret == -ENOENT) { +- gf_msg_debug(this->name, 0, +- ".remove_me absent. " +- "Nothing to delete. Exiting"); +- ret = 0; +- goto err; +- } else if (ret < 0) { +- goto err; +- } +- +- local->fd = fd_anonymous(local->dot_shard_rm_loc.inode); +- if (!local->fd) { +- ret = -ENOMEM; +- goto err; +- } +- +- for (;;) { +- offset = 0; +- LOCK(&priv->lock); +- { +- if (priv->bg_del_state == SHARD_BG_DELETION_LAUNCHING) { +- priv->bg_del_state = SHARD_BG_DELETION_IN_PROGRESS; +- } else if (priv->bg_del_state == SHARD_BG_DELETION_IN_PROGRESS) { +- priv->bg_del_state = SHARD_BG_DELETION_NONE; +- done = _gf_true; +- } +- } +- UNLOCK(&priv->lock); +- if (done) +- break; +- while ( +- (ret = syncop_readdirp(FIRST_CHILD(this), local->fd, 131072, offset, +- &entries, local->xattr_req, NULL))) { +- if (ret > 0) +- ret = 0; +- list_for_each_entry(entry, &entries.list, list) +- { +- offset = entry->d_off; +- +- if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..")) +- continue; +- +- if (!entry->inode) { +- ret = shard_lookup_marker_entry(this, local, entry); +- if (ret < 0) +- continue; +- } +- link_inode = inode_link(entry->inode, local->fd->inode, +- entry->d_name, &entry->d_stat); +- +- gf_msg_debug(this->name, 0, +- "Initiating deletion of " +- "shards of gfid %s", +- entry->d_name); +- ret = shard_delete_shards_of_entry(cleanup_frame, this, entry, +- link_inode); +- inode_unlink(link_inode, local->fd->inode, entry->d_name); +- inode_unref(link_inode); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, -ret, +- SHARD_MSG_SHARDS_DELETION_FAILED, +- "Failed to clean up shards of gfid %s", +- entry->d_name); +- continue; +- } +- gf_msg(this->name, GF_LOG_INFO, 0, +- SHARD_MSG_SHARD_DELETION_COMPLETED, +- "Deleted " +- "shards of gfid=%s from backend", +- entry->d_name); +- } +- gf_dirent_free(&entries); +- if (ret) +- break; +- } +- } +- ret = 0; +- loc_wipe(&loc); +- return ret; +- +-err: +- LOCK(&priv->lock); +- { +- priv->bg_del_state = SHARD_BG_DELETION_NONE; +- } +- UNLOCK(&priv->lock); +- loc_wipe(&loc); +- return ret; +-} +- +-int +-shard_unlock_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, dict_t *xdata) +-{ +- if (op_ret) +- gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, +- "Unlock failed. Please check brick logs for " +- "more details"); +- SHARD_STACK_DESTROY(frame); +- return 0; +-} +- +-int +-shard_unlock_inodelk(call_frame_t *frame, xlator_t *this) +-{ +- loc_t *loc = NULL; +- call_frame_t *lk_frame = NULL; +- shard_local_t *local = NULL; +- shard_local_t *lk_local = NULL; +- shard_inodelk_t *lock = NULL; +- +- local = frame->local; +- lk_frame = local->inodelk_frame; +- lk_local = lk_frame->local; +- local->inodelk_frame = NULL; +- loc = &local->int_inodelk.loc; +- lock = &lk_local->int_inodelk; +- lock->flock.l_type = F_UNLCK; +- +- STACK_WIND(lk_frame, shard_unlock_inodelk_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->inodelk, lock->domain, loc, F_SETLK, +- &lock->flock, NULL); +- local->int_inodelk.acquired_lock = _gf_false; +- return 0; +-} +- +-int +-shard_rename_src_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, struct iatt *buf, +- struct iatt *preoldparent, struct iatt *postoldparent, +- struct iatt *prenewparent, struct iatt *postnewparent, +- dict_t *xdata); +-int +-shard_rename_src_base_file(call_frame_t *frame, xlator_t *this) +-{ +- int ret = 0; +- loc_t *dst_loc = NULL; +- loc_t tmp_loc = { +- 0, +- }; +- shard_local_t *local = frame->local; +- +- if (local->dst_block_size) { +- tmp_loc.parent = inode_ref(local->loc2.parent); +- ret = inode_path(tmp_loc.parent, local->loc2.name, +- (char **)&tmp_loc.path); +- if (ret < 0) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, +- "Inode path failed" +- " on pargfid=%s bname=%s", +- uuid_utoa(tmp_loc.parent->gfid), local->loc2.name); +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto err; +- } +- +- tmp_loc.name = strrchr(tmp_loc.path, '/'); +- if (tmp_loc.name) +- tmp_loc.name++; +- dst_loc = &tmp_loc; +- } else { +- dst_loc = &local->loc2; +- } +- +- /* To-Do: Request open-fd count on dst base file */ +- STACK_WIND(frame, shard_rename_src_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->rename, &local->loc, dst_loc, +- local->xattr_req); +- loc_wipe(&tmp_loc); +- return 0; +-err: +- loc_wipe(&tmp_loc); +- shard_common_failure_unwind(local->fop, frame, local->op_ret, +- local->op_errno); +- return 0; +-} +- +-int +-shard_unlink_base_file(call_frame_t *frame, xlator_t *this); +- +-int +-shard_set_size_attrs_on_marker_file_cbk(call_frame_t *frame, void *cookie, +- xlator_t *this, int32_t op_ret, +- int32_t op_errno, dict_t *dict, +- dict_t *xdata) +-{ +- shard_priv_t *priv = NULL; +- shard_local_t *local = NULL; +- +- priv = this->private; +- local = frame->local; +- if (op_ret < 0) { +- gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, +- "Xattrop on marker file failed " +- "while performing %s; entry gfid=%s", +- gf_fop_string(local->fop), local->newloc.name); +- goto err; +- } +- +- inode_unlink(local->newloc.inode, priv->dot_shard_rm_inode, +- local->newloc.name); +- +- if (local->fop == GF_FOP_UNLINK) +- shard_unlink_base_file(frame, this); +- else if (local->fop == GF_FOP_RENAME) +- shard_rename_src_base_file(frame, this); +- return 0; +-err: +- shard_common_failure_unwind(local->fop, frame, op_ret, op_errno); +- return 0; +-} +- +-int +-shard_set_size_attrs_on_marker_file(call_frame_t *frame, xlator_t *this) +-{ +- int op_errno = ENOMEM; +- uint64_t bs = 0; +- dict_t *xdata = NULL; +- shard_local_t *local = NULL; +- +- local = frame->local; +- xdata = dict_new(); +- if (!xdata) +- goto err; +- +- if (local->fop == GF_FOP_UNLINK) +- bs = local->block_size; +- else if (local->fop == GF_FOP_RENAME) +- bs = local->dst_block_size; +- SHARD_INODE_CREATE_INIT(this, bs, xdata, &local->newloc, +- local->prebuf.ia_size, 0, err); +- STACK_WIND(frame, shard_set_size_attrs_on_marker_file_cbk, +- FIRST_CHILD(this), FIRST_CHILD(this)->fops->xattrop, +- &local->newloc, GF_XATTROP_GET_AND_SET, xdata, NULL); +- dict_unref(xdata); +- return 0; +-err: +- if (xdata) +- dict_unref(xdata); +- shard_common_failure_unwind(local->fop, frame, -1, op_errno); +- return 0; +-} +- +-int +-shard_lookup_marker_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, inode_t *inode, +- struct iatt *buf, dict_t *xdata, +- struct iatt *postparent) +-{ +- inode_t *linked_inode = NULL; +- shard_priv_t *priv = NULL; +- shard_local_t *local = NULL; +- +- local = frame->local; +- priv = this->private; +- +- if (op_ret < 0) { +- gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, +- "Lookup on marker file failed " +- "while performing %s; entry gfid=%s", +- gf_fop_string(local->fop), local->newloc.name); +- goto err; +- } +- +- linked_inode = inode_link(inode, priv->dot_shard_rm_inode, +- local->newloc.name, buf); +- inode_unref(local->newloc.inode); +- local->newloc.inode = linked_inode; +- shard_set_size_attrs_on_marker_file(frame, this); +- return 0; +-err: +- shard_common_failure_unwind(local->fop, frame, op_ret, op_errno); +- return 0; +-} +- +-int +-shard_lookup_marker_file(call_frame_t *frame, xlator_t *this) +-{ +- int op_errno = ENOMEM; +- dict_t *xattr_req = NULL; +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- xattr_req = shard_create_gfid_dict(local->xattr_req); +- if (!xattr_req) +- goto err; +- +- STACK_WIND(frame, shard_lookup_marker_file_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->lookup, &local->newloc, xattr_req); +- dict_unref(xattr_req); +- return 0; +-err: +- shard_common_failure_unwind(local->fop, frame, -1, op_errno); +- return 0; +-} +- +-int +-shard_create_marker_file_under_remove_me_cbk( +- call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, +- int32_t op_errno, inode_t *inode, struct iatt *buf, struct iatt *preparent, +- struct iatt *postparent, dict_t *xdata) +-{ +- inode_t *linked_inode = NULL; +- shard_priv_t *priv = NULL; +- shard_local_t *local = NULL; +- +- local = frame->local; +- priv = this->private; +- +- SHARD_UNSET_ROOT_FS_ID(frame, local); +- if (op_ret < 0) { +- if ((op_errno != EEXIST) && (op_errno != ENODATA)) { +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, +- "Marker file creation " +- "failed while performing %s; entry gfid=%s", +- gf_fop_string(local->fop), local->newloc.name); +- goto err; +- } else { +- shard_lookup_marker_file(frame, this); +- return 0; +- } +- } +- +- linked_inode = inode_link(inode, priv->dot_shard_rm_inode, +- local->newloc.name, buf); +- inode_unref(local->newloc.inode); +- local->newloc.inode = linked_inode; +- +- if (local->fop == GF_FOP_UNLINK) +- shard_unlink_base_file(frame, this); +- else if (local->fop == GF_FOP_RENAME) +- shard_rename_src_base_file(frame, this); +- return 0; +-err: +- shard_common_failure_unwind(local->fop, frame, -1, local->op_errno); +- return 0; +-} +- +-int +-shard_create_marker_file_under_remove_me(call_frame_t *frame, xlator_t *this, +- loc_t *loc) +-{ +- int ret = 0; +- int op_errno = ENOMEM; +- uint64_t bs = 0; +- char g1[64] = { +- 0, +- }; +- char g2[64] = { +- 0, +- }; +- dict_t *xattr_req = NULL; +- shard_priv_t *priv = NULL; +- shard_local_t *local = NULL; +- +- priv = this->private; +- local = frame->local; +- +- SHARD_SET_ROOT_FS_ID(frame, local); +- +- xattr_req = shard_create_gfid_dict(local->xattr_req); +- if (!xattr_req) +- goto err; +- +- local->newloc.inode = inode_new(this->itable); +- local->newloc.parent = inode_ref(priv->dot_shard_rm_inode); +- ret = inode_path(local->newloc.parent, uuid_utoa(loc->inode->gfid), +- (char **)&local->newloc.path); +- if (ret < 0) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, +- "Inode path failed on " +- "pargfid=%s bname=%s", +- uuid_utoa_r(priv->dot_shard_rm_gfid, g1), +- uuid_utoa_r(loc->inode->gfid, g2)); +- goto err; +- } +- local->newloc.name = strrchr(local->newloc.path, '/'); +- if (local->newloc.name) +- local->newloc.name++; +- +- if (local->fop == GF_FOP_UNLINK) +- bs = local->block_size; +- else if (local->fop == GF_FOP_RENAME) +- bs = local->dst_block_size; +- +- SHARD_INODE_CREATE_INIT(this, bs, xattr_req, &local->newloc, +- local->prebuf.ia_size, 0, err); +- +- STACK_WIND(frame, shard_create_marker_file_under_remove_me_cbk, +- FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod, +- &local->newloc, 0, 0, 0644, xattr_req); +- dict_unref(xattr_req); +- return 0; +- +-err: +- if (xattr_req) +- dict_unref(xattr_req); +- shard_create_marker_file_under_remove_me_cbk(frame, 0, this, -1, op_errno, +- NULL, NULL, NULL, NULL, NULL); +- return 0; +-} +- +-int +-shard_unlock_entrylk(call_frame_t *frame, xlator_t *this); +- +-int +-shard_unlink_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, +- struct iatt *preparent, struct iatt *postparent, +- dict_t *xdata) +-{ +- int ret = 0; +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (op_ret < 0) { +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- } else { +- local->preoldparent = *preparent; +- local->postoldparent = *postparent; +- if (xdata) +- local->xattr_rsp = dict_ref(xdata); +- if (local->cleanup_required) +- shard_start_background_deletion(this); +- } +- +- if (local->entrylk_frame) { +- ret = shard_unlock_entrylk(frame, this); +- if (ret < 0) { +- local->op_ret = -1; +- local->op_errno = -ret; +- } +- } +- +- ret = shard_unlock_inodelk(frame, this); +- if (ret < 0) { +- local->op_ret = -1; +- local->op_errno = -ret; +- } +- +- shard_unlink_cbk(frame, this); +- return 0; +-} +- +-int +-shard_unlink_base_file(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = frame->local; +- +- /* To-Do: Request open-fd count on base file */ +- STACK_WIND(frame, shard_unlink_base_file_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->unlink, &local->loc, local->xflag, +- local->xattr_req); +- return 0; +-} +- +-int +-shard_unlock_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, dict_t *xdata) +-{ +- if (op_ret) +- gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, +- "Unlock failed. Please check brick logs for " +- "more details"); +- SHARD_STACK_DESTROY(frame); +- return 0; +-} +- +-int +-shard_unlock_entrylk(call_frame_t *frame, xlator_t *this) +-{ +- loc_t *loc = NULL; +- call_frame_t *lk_frame = NULL; +- shard_local_t *local = NULL; +- shard_local_t *lk_local = NULL; +- shard_entrylk_t *lock = NULL; +- +- local = frame->local; +- lk_frame = local->entrylk_frame; +- lk_local = lk_frame->local; +- local->entrylk_frame = NULL; +- lock = &lk_local->int_entrylk; +- loc = &lock->loc; +- +- STACK_WIND(lk_frame, shard_unlock_entrylk_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->entrylk, this->name, loc, +- lk_local->int_entrylk.basename, ENTRYLK_UNLOCK, ENTRYLK_WRLCK, +- NULL); +- local->int_entrylk.acquired_lock = _gf_false; +- return 0; +-} +- +-int +-shard_post_entrylk_fop_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- switch (local->fop) { +- case GF_FOP_UNLINK: +- case GF_FOP_RENAME: +- shard_create_marker_file_under_remove_me(frame, this, +- &local->int_inodelk.loc); +- break; +- default: +- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, +- "post-entrylk handler not defined. This case should not" +- " be hit"); +- break; +- } +- return 0; +-} +- +-int +-shard_acquire_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, dict_t *xdata) +-{ +- call_frame_t *main_frame = NULL; +- shard_local_t *local = NULL; +- shard_local_t *main_local = NULL; +- +- local = frame->local; +- main_frame = local->main_frame; +- main_local = main_frame->local; +- +- if (local->op_ret < 0) { +- shard_common_failure_unwind(main_local->fop, main_frame, op_ret, +- op_errno); +- return 0; +- } +- main_local->int_entrylk.acquired_lock = _gf_true; +- shard_post_entrylk_fop_handler(main_frame, this); +- return 0; +-} +- +-int +-shard_acquire_entrylk(call_frame_t *frame, xlator_t *this, inode_t *inode, +- uuid_t gfid) +-{ +- char gfid_str[GF_UUID_BUF_SIZE] = { +- 0, +- }; +- shard_local_t *local = NULL; +- shard_local_t *entrylk_local = NULL; +- shard_entrylk_t *int_entrylk = NULL; +- call_frame_t *entrylk_frame = NULL; +- +- local = frame->local; +- entrylk_frame = create_frame(this, this->ctx->pool); +- if (!entrylk_frame) { +- gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, +- "Failed to create new frame " +- "to lock marker file"); +- goto err; +- } +- +- entrylk_local = mem_get0(this->local_pool); +- if (!entrylk_local) { +- STACK_DESTROY(entrylk_frame->root); +- goto err; +- } +- +- entrylk_frame->local = entrylk_local; +- entrylk_local->main_frame = frame; +- int_entrylk = &entrylk_local->int_entrylk; +- +- int_entrylk->loc.inode = inode_ref(inode); +- set_lk_owner_from_ptr(&entrylk_frame->root->lk_owner, entrylk_frame->root); +- local->entrylk_frame = entrylk_frame; +- gf_uuid_unparse(gfid, gfid_str); +- int_entrylk->basename = gf_strdup(gfid_str); +- +- STACK_WIND(entrylk_frame, shard_acquire_entrylk_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->entrylk, this->name, &int_entrylk->loc, +- int_entrylk->basename, ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL); +- return 0; +-err: +- shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); +- return 0; +-} +- +-int +-shard_post_lookup_base_shard_rm_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- shard_priv_t *priv = NULL; +- +- priv = this->private; +- local = frame->local; +- +- if (local->op_ret < 0) { +- shard_common_failure_unwind(local->fop, frame, -1, local->op_errno); +- return 0; +- } +- +- if (local->prebuf.ia_nlink > 1) { +- gf_msg_debug(this->name, 0, +- "link count on %s > 1:%d, " +- "performing rename()/unlink()", +- local->int_inodelk.loc.path, local->prebuf.ia_nlink); +- if (local->fop == GF_FOP_RENAME) +- shard_rename_src_base_file(frame, this); +- else if (local->fop == GF_FOP_UNLINK) +- shard_unlink_base_file(frame, this); +- } else { +- gf_msg_debug(this->name, 0, +- "link count on %s = 1, creating " +- "file under .remove_me", +- local->int_inodelk.loc.path); +- local->cleanup_required = _gf_true; +- shard_acquire_entrylk(frame, this, priv->dot_shard_rm_inode, +- local->prebuf.ia_gfid); +- } +- return 0; +-} +- +-int +-shard_post_inodelk_fop_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- switch (local->fop) { +- case GF_FOP_UNLINK: +- case GF_FOP_RENAME: +- shard_lookup_base_file(frame, this, &local->int_inodelk.loc, +- shard_post_lookup_base_shard_rm_handler); +- break; +- default: +- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, +- "post-inodelk handler not defined. This case should not" +- " be hit"); +- break; +- } +- return 0; +-} +- +-int +-shard_acquire_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, dict_t *xdata) +-{ +- call_frame_t *main_frame = NULL; +- shard_local_t *local = NULL; +- shard_local_t *main_local = NULL; +- +- local = frame->local; +- main_frame = local->main_frame; +- main_local = main_frame->local; +- +- if (local->op_ret < 0) { +- shard_common_failure_unwind(main_local->fop, main_frame, op_ret, +- op_errno); +- return 0; +- } +- main_local->int_inodelk.acquired_lock = _gf_true; +- shard_post_inodelk_fop_handler(main_frame, this); +- return 0; +-} +- +-int +-shard_acquire_inodelk(call_frame_t *frame, xlator_t *this, loc_t *loc) +-{ +- call_frame_t *lk_frame = NULL; +- shard_local_t *local = NULL; +- shard_local_t *lk_local = NULL; +- shard_inodelk_t *int_inodelk = NULL; +- +- local = frame->local; +- lk_frame = create_frame(this, this->ctx->pool); +- if (!lk_frame) { +- gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, +- "Failed to create new frame " +- "to lock base shard"); +- goto err; +- } +- lk_local = mem_get0(this->local_pool); +- if (!lk_local) { +- STACK_DESTROY(lk_frame->root); +- goto err; +- } +- +- lk_frame->local = lk_local; +- lk_local->main_frame = frame; +- int_inodelk = &lk_local->int_inodelk; +- +- int_inodelk->flock.l_len = 0; +- int_inodelk->flock.l_start = 0; +- int_inodelk->domain = this->name; +- int_inodelk->flock.l_type = F_WRLCK; +- loc_copy(&local->int_inodelk.loc, loc); +- set_lk_owner_from_ptr(&lk_frame->root->lk_owner, lk_frame->root); +- local->inodelk_frame = lk_frame; +- +- STACK_WIND(lk_frame, shard_acquire_inodelk_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->inodelk, int_inodelk->domain, +- &local->int_inodelk.loc, F_SETLKW, &int_inodelk->flock, NULL); +- return 0; +-err: +- shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); +- return 0; +-} +- +-int +-shard_post_mkdir_rm_handler(call_frame_t *frame, xlator_t *this) +-{ +- loc_t *loc = NULL; +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (local->op_ret < 0) { +- shard_common_failure_unwind(local->fop, frame, -1, local->op_errno); +- return 0; +- } +- if (local->fop == GF_FOP_UNLINK) +- loc = &local->loc; +- else if (local->fop == GF_FOP_RENAME) +- loc = &local->loc2; +- shard_acquire_inodelk(frame, this, loc); +- return 0; +-} +- +-int +-shard_mkdir_internal_dir(call_frame_t *frame, xlator_t *this, +- shard_post_resolve_fop_handler_t handler, +- shard_internal_dir_type_t type); +-int +-shard_pre_mkdir_rm_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (local->op_ret < 0) { +- shard_common_failure_unwind(local->fop, frame, -1, local->op_errno); +- return 0; +- } +- shard_mkdir_internal_dir(frame, this, shard_post_mkdir_rm_handler, +- SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME); +- return 0; +-} +- +-void +-shard_begin_rm_resolution(call_frame_t *frame, xlator_t *this) +-{ +- shard_priv_t *priv = NULL; +- shard_local_t *local = NULL; +- +- priv = this->private; +- local = frame->local; +- +- local->dot_shard_rm_loc.inode = inode_find(this->itable, +- priv->dot_shard_rm_gfid); +- if (!local->dot_shard_rm_loc.inode) { +- local->dot_shard_loc.inode = inode_find(this->itable, +- priv->dot_shard_gfid); +- if (!local->dot_shard_loc.inode) { +- shard_mkdir_internal_dir(frame, this, shard_pre_mkdir_rm_handler, +- SHARD_INTERNAL_DIR_DOT_SHARD); +- } else { +- local->post_res_handler = shard_pre_mkdir_rm_handler; +- shard_refresh_internal_dir(frame, this, +- SHARD_INTERNAL_DIR_DOT_SHARD); +- } +- } else { +- local->post_res_handler = shard_post_mkdir_rm_handler; +- shard_refresh_internal_dir(frame, this, +- SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME); +- } +-} +- +-int +-shard_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, +- dict_t *xdata) +-{ +- int ret = -1; +- uint64_t block_size = 0; +- shard_local_t *local = NULL; +- +- ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size); +- if ((ret) && (!IA_ISLNK(loc->inode->ia_type))) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, +- "Failed to get block " +- "size from inode ctx of %s", +- uuid_utoa(loc->inode->gfid)); +- goto err; +- } +- +- if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { +- STACK_WIND(frame, default_unlink_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata); +- return 0; +- } +- +- local = mem_get0(this->local_pool); +- if (!local) +- goto err; +- +- frame->local = local; +- +- loc_copy(&local->loc, loc); +- local->xflag = xflag; +- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); +- local->block_size = block_size; +- local->resolver_base_inode = loc->inode; +- local->fop = GF_FOP_UNLINK; +- if (!this->itable) +- this->itable = (local->loc.inode)->table; +- +- local->resolve_not = _gf_true; +- shard_begin_rm_resolution(frame, this); +- return 0; +-err: +- shard_common_failure_unwind(GF_FOP_UNLINK, frame, -1, ENOMEM); +- return 0; +-} +- +-int +-shard_post_rename_lookup_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_rename_cbk(frame, this); +- return 0; +-} +- +-int +-shard_rename_src_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, struct iatt *buf, +- struct iatt *preoldparent, struct iatt *postoldparent, +- struct iatt *prenewparent, struct iatt *postnewparent, +- dict_t *xdata) +-{ +- int ret = 0; +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (op_ret < 0) { +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- goto err; +- } +- /* Set ctx->refresh to TRUE to force a lookup on disk when +- * shard_lookup_base_file() is called next to refresh the hard link +- * count in ctx. Note that this is applicable only to the case where +- * the rename dst is already existent and sharded. +- */ +- if ((local->dst_block_size) && (!local->cleanup_required)) +- shard_inode_ctx_set_refresh_flag(local->int_inodelk.loc.inode, this); +- +- local->prebuf = *buf; +- local->preoldparent = *preoldparent; +- local->postoldparent = *postoldparent; +- local->prenewparent = *prenewparent; +- local->postnewparent = *postnewparent; +- if (xdata) +- local->xattr_rsp = dict_ref(xdata); +- +- if (local->dst_block_size) { +- if (local->entrylk_frame) { +- ret = shard_unlock_entrylk(frame, this); +- if (ret < 0) { +- local->op_ret = -1; +- local->op_errno = -ret; +- } +- } +- +- ret = shard_unlock_inodelk(frame, this); +- if (ret < 0) { +- local->op_ret = -1; +- local->op_errno = -ret; +- goto err; +- } +- if (local->cleanup_required) +- shard_start_background_deletion(this); +- } +- +- /* Now the base file of src, if sharded, is looked up to gather ia_size +- * and ia_blocks.*/ +- if (local->block_size) { +- local->tmp_loc.inode = inode_new(this->itable); +- gf_uuid_copy(local->tmp_loc.gfid, (local->loc.inode)->gfid); +- shard_lookup_base_file(frame, this, &local->tmp_loc, +- shard_post_rename_lookup_handler); +- } else { +- shard_rename_cbk(frame, this); +- } +- return 0; +-err: +- shard_common_failure_unwind(local->fop, frame, local->op_ret, +- local->op_errno); +- return 0; +-} +- +-int +-shard_post_lookup_dst_base_file_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (local->op_ret < 0) { +- shard_common_failure_unwind(local->fop, frame, local->op_ret, +- local->op_errno); +- return 0; +- } +- +- /* Save dst base file attributes into postbuf so the information is not +- * lost when it is overwritten after lookup on base file of src in +- * shard_lookup_base_file_cbk(). +- */ +- local->postbuf = local->prebuf; +- shard_rename_src_base_file(frame, this); +- return 0; +-} +- +-int +-shard_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, +- dict_t *xdata) +-{ +- int ret = -1; +- uint64_t block_size = 0; +- uint64_t dst_block_size = 0; +- shard_local_t *local = NULL; +- +- if (IA_ISDIR(oldloc->inode->ia_type)) { +- STACK_WIND(frame, default_rename_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); +- return 0; +- } +- +- ret = shard_inode_ctx_get_block_size(oldloc->inode, this, &block_size); +- if ((ret) && (!IA_ISLNK(oldloc->inode->ia_type))) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, +- "Failed to get block " +- "size from inode ctx of %s", +- uuid_utoa(oldloc->inode->gfid)); +- goto err; +- } +- +- if (newloc->inode) +- ret = shard_inode_ctx_get_block_size(newloc->inode, this, +- &dst_block_size); +- +- /* The following stack_wind covers the case where: +- * a. the src file is not sharded and dst doesn't exist, OR +- * b. the src and dst both exist but are not sharded. +- */ +- if (((!block_size) && (!dst_block_size)) || +- frame->root->pid == GF_CLIENT_PID_GSYNCD) { +- STACK_WIND(frame, default_rename_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); +- return 0; +- } +- +- local = mem_get0(this->local_pool); +- if (!local) +- goto err; +- +- frame->local = local; +- loc_copy(&local->loc, oldloc); +- loc_copy(&local->loc2, newloc); +- local->resolver_base_inode = newloc->inode; +- local->fop = GF_FOP_RENAME; +- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); +- if (!local->xattr_req) +- goto err; +- +- local->block_size = block_size; +- local->dst_block_size = dst_block_size; +- if (!this->itable) +- this->itable = (local->loc.inode)->table; +- local->resolve_not = _gf_true; +- +- /* The following if-block covers the case where the dst file exists +- * and is sharded. +- */ +- if (local->dst_block_size) { +- shard_begin_rm_resolution(frame, this); +- } else { +- /* The following block covers the case where the dst either doesn't +- * exist or is NOT sharded but the src is sharded. In this case, shard +- * xlator would go ahead and rename src to dst. Once done, it would also +- * lookup the base shard of src to get the ia_size and ia_blocks xattr +- * values. +- */ +- shard_rename_src_base_file(frame, this); +- } +- return 0; +- +-err: +- shard_common_failure_unwind(GF_FOP_RENAME, frame, -1, ENOMEM); +- return 0; +-} +- +-int +-shard_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode, +- struct iatt *stbuf, struct iatt *preparent, +- struct iatt *postparent, dict_t *xdata) +-{ +- int ret = -1; +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (op_ret == -1) +- goto unwind; +- +- ret = shard_inode_ctx_set(inode, this, stbuf, local->block_size, +- SHARD_ALL_MASK); +- if (ret) +- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INODE_CTX_SET_FAILED, +- "Failed to set inode " +- "ctx for %s", +- uuid_utoa(inode->gfid)); +- +-unwind: +- SHARD_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, stbuf, +- preparent, postparent, xdata); +- return 0; +-} +- +-int +-shard_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, +- mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) +-{ +- shard_priv_t *priv = NULL; +- shard_local_t *local = NULL; +- +- priv = this->private; +- local = mem_get0(this->local_pool); +- if (!local) +- goto err; +- +- frame->local = local; +- local->block_size = priv->block_size; +- +- if (!__is_gsyncd_on_shard_dir(frame, loc)) { +- SHARD_INODE_CREATE_INIT(this, local->block_size, xdata, loc, 0, 0, err); +- } +- +- STACK_WIND(frame, shard_create_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd, +- xdata); +- return 0; +-err: +- shard_common_failure_unwind(GF_FOP_CREATE, frame, -1, ENOMEM); +- return 0; +-} +- +-int +-shard_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) +-{ +- /* To-Do: Handle open with O_TRUNC under locks */ +- SHARD_STACK_UNWIND(open, frame, op_ret, op_errno, fd, xdata); +- return 0; +-} +- +-int +-shard_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, +- fd_t *fd, dict_t *xdata) +-{ +- STACK_WIND(frame, shard_open_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata); +- return 0; +-} +- +-int +-shard_readv_do_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, struct iovec *vector, +- int32_t count, struct iatt *stbuf, struct iobref *iobref, +- dict_t *xdata) +-{ +- int i = 0; +- int call_count = 0; +- void *address = NULL; +- uint64_t block_num = 0; +- off_t off = 0; +- struct iovec vec = { +- 0, +- }; +- shard_local_t *local = NULL; +- fd_t *anon_fd = cookie; +- shard_inode_ctx_t *ctx = NULL; +- +- local = frame->local; +- +- /* If shard has already seen a failure here before, there is no point +- * in aggregating subsequent reads, so just go to out. +- */ +- if (local->op_ret < 0) +- goto out; +- +- if (op_ret < 0) { +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- goto out; +- } +- +- if (local->op_ret >= 0) +- local->op_ret += op_ret; +- +- shard_inode_ctx_get(anon_fd->inode, this, &ctx); +- block_num = ctx->block_num; +- +- if (block_num == local->first_block) { +- address = local->iobuf->ptr; +- } else { +- /* else +- * address to start writing to = beginning of buffer + +- * number of bytes until end of first block + +- * + block_size times number of blocks +- * between the current block and the first +- */ +- address = (char *)local->iobuf->ptr + +- (local->block_size - (local->offset % local->block_size)) + +- ((block_num - local->first_block - 1) * local->block_size); +- } +- +- for (i = 0; i < count; i++) { +- address = (char *)address + off; +- memcpy(address, vector[i].iov_base, vector[i].iov_len); +- off += vector[i].iov_len; +- } +- +-out: +- if (anon_fd) +- fd_unref(anon_fd); +- call_count = shard_call_count_return(frame); +- if (call_count == 0) { +- SHARD_UNSET_ROOT_FS_ID(frame, local); +- if (local->op_ret < 0) { +- shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, +- local->op_errno); +- } else { +- if (xdata) +- local->xattr_rsp = dict_ref(xdata); +- vec.iov_base = local->iobuf->ptr; +- vec.iov_len = local->total_size; +- local->op_ret = local->total_size; +- SHARD_STACK_UNWIND(readv, frame, local->op_ret, local->op_errno, +- &vec, 1, &local->prebuf, local->iobref, +- local->xattr_rsp); +- return 0; +- } +- } +- +- return 0; +-} +- +-int +-shard_readv_do(call_frame_t *frame, xlator_t *this) +-{ +- int i = 0; +- int call_count = 0; +- int last_block = 0; +- int cur_block = 0; +- off_t orig_offset = 0; +- off_t shard_offset = 0; +- size_t read_size = 0; +- size_t remaining_size = 0; +- fd_t *fd = NULL; +- fd_t *anon_fd = NULL; +- shard_local_t *local = NULL; +- gf_boolean_t wind_failed = _gf_false; +- +- local = frame->local; +- fd = local->fd; +- +- orig_offset = local->offset; +- cur_block = local->first_block; +- last_block = local->last_block; +- remaining_size = local->total_size; +- local->call_count = call_count = local->num_blocks; +- +- SHARD_SET_ROOT_FS_ID(frame, local); +- +- if (fd->flags & O_DIRECT) +- local->flags = O_DIRECT; +- +- while (cur_block <= last_block) { +- if (wind_failed) { +- shard_readv_do_cbk(frame, (void *)(long)0, this, -1, ENOMEM, NULL, +- 0, NULL, NULL, NULL); +- goto next; +- } +- +- shard_offset = orig_offset % local->block_size; +- read_size = local->block_size - shard_offset; +- if (read_size > remaining_size) +- read_size = remaining_size; +- +- remaining_size -= read_size; +- +- if (cur_block == 0) { +- anon_fd = fd_ref(fd); +- } else { +- anon_fd = fd_anonymous(local->inode_list[i]); +- if (!anon_fd) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- wind_failed = _gf_true; +- shard_readv_do_cbk(frame, (void *)(long)anon_fd, this, -1, +- ENOMEM, NULL, 0, NULL, NULL, NULL); +- goto next; +- } +- } ++int shard_post_lookup_base_shard_rm_handler(call_frame_t *frame, ++ xlator_t *this) { ++ shard_local_t *local = NULL; ++ shard_priv_t *priv = NULL; + +- STACK_WIND_COOKIE(frame, shard_readv_do_cbk, anon_fd, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->readv, anon_fd, read_size, +- shard_offset, local->flags, local->xattr_req); ++ priv = this->private; ++ local = frame->local; + +- orig_offset += read_size; +- next: +- cur_block++; +- i++; +- call_count--; +- } ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(local->fop, frame, -1, local->op_errno); + return 0; ++ } ++ ++ if (local->prebuf.ia_nlink > 1) { ++ gf_msg_debug(this->name, 0, "link count on %s > 1:%d, " ++ "performing rename()/unlink()", ++ local->int_inodelk.loc.path, local->prebuf.ia_nlink); ++ if (local->fop == GF_FOP_RENAME) ++ shard_rename_src_base_file(frame, this); ++ else if (local->fop == GF_FOP_UNLINK) ++ shard_unlink_base_file(frame, this); ++ } else { ++ gf_msg_debug(this->name, 0, "link count on %s = 1, creating " ++ "file under .remove_me", ++ local->int_inodelk.loc.path); ++ local->cleanup_required = _gf_true; ++ shard_acquire_entrylk(frame, this, priv->dot_shard_rm_inode, ++ local->prebuf.ia_gfid); ++ } ++ return 0; ++} ++ ++int shard_post_inodelk_fop_handler(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ ++ switch (local->fop) { ++ case GF_FOP_UNLINK: ++ case GF_FOP_RENAME: ++ shard_lookup_base_file(frame, this, &local->int_inodelk.loc, ++ shard_post_lookup_base_shard_rm_handler); ++ break; ++ default: ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, ++ "post-inodelk handler not defined. This case should not" ++ " be hit"); ++ break; ++ } ++ return 0; ++} ++ ++int shard_acquire_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, dict_t *xdata) { ++ call_frame_t *main_frame = NULL; ++ shard_local_t *local = NULL; ++ shard_local_t *main_local = NULL; ++ ++ local = frame->local; ++ main_frame = local->main_frame; ++ main_local = main_frame->local; ++ ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(main_local->fop, main_frame, op_ret, op_errno); ++ return 0; ++ } ++ main_local->int_inodelk.acquired_lock = _gf_true; ++ shard_post_inodelk_fop_handler(main_frame, this); ++ return 0; ++} ++ ++int shard_acquire_inodelk(call_frame_t *frame, xlator_t *this, loc_t *loc) { ++ call_frame_t *lk_frame = NULL; ++ shard_local_t *local = NULL; ++ shard_local_t *lk_local = NULL; ++ shard_inodelk_t *int_inodelk = NULL; ++ ++ local = frame->local; ++ lk_frame = create_frame(this, this->ctx->pool); ++ if (!lk_frame) { ++ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, ++ "Failed to create new frame " ++ "to lock base shard"); ++ goto err; ++ } ++ lk_local = mem_get0(this->local_pool); ++ if (!lk_local) { ++ STACK_DESTROY(lk_frame->root); ++ goto err; ++ } ++ ++ lk_frame->local = lk_local; ++ lk_local->main_frame = frame; ++ int_inodelk = &lk_local->int_inodelk; ++ ++ int_inodelk->flock.l_len = 0; ++ int_inodelk->flock.l_start = 0; ++ int_inodelk->domain = this->name; ++ int_inodelk->flock.l_type = F_WRLCK; ++ loc_copy(&local->int_inodelk.loc, loc); ++ set_lk_owner_from_ptr(&lk_frame->root->lk_owner, lk_frame->root); ++ local->inodelk_frame = lk_frame; ++ ++ STACK_WIND(lk_frame, shard_acquire_inodelk_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->inodelk, int_inodelk->domain, ++ &local->int_inodelk.loc, F_SETLKW, &int_inodelk->flock, NULL); ++ return 0; ++err: ++ shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); ++ return 0; + } + +-int +-shard_common_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, inode_t *inode, +- struct iatt *buf, struct iatt *preparent, +- struct iatt *postparent, dict_t *xdata) +-{ +- int shard_block_num = (long)cookie; +- int call_count = 0; +- shard_local_t *local = NULL; ++int shard_post_mkdir_rm_handler(call_frame_t *frame, xlator_t *this) { ++ loc_t *loc = NULL; ++ shard_local_t *local = NULL; + +- local = frame->local; ++ local = frame->local; + +- if (op_ret < 0) { +- if (op_errno == EEXIST) { +- LOCK(&frame->lock); +- { +- local->eexist_count++; +- } +- UNLOCK(&frame->lock); +- } else { +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- } +- gf_msg_debug(this->name, 0, +- "mknod of shard %d " +- "failed: %s", +- shard_block_num, strerror(op_errno)); +- goto done; +- } ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(local->fop, frame, -1, local->op_errno); ++ return 0; ++ } ++ if (local->fop == GF_FOP_UNLINK) ++ loc = &local->loc; ++ else if (local->fop == GF_FOP_RENAME) ++ loc = &local->loc2; ++ shard_acquire_inodelk(frame, this, loc); ++ return 0; ++} + +- shard_link_block_inode(local, shard_block_num, inode, buf); ++int shard_mkdir_internal_dir(call_frame_t *frame, xlator_t *this, ++ shard_post_resolve_fop_handler_t handler, ++ shard_internal_dir_type_t type); ++int shard_pre_mkdir_rm_handler(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = NULL; + +-done: +- call_count = shard_call_count_return(frame); +- if (call_count == 0) { +- SHARD_UNSET_ROOT_FS_ID(frame, local); +- local->create_count = 0; +- local->post_mknod_handler(frame, this); +- } ++ local = frame->local; + ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(local->fop, frame, -1, local->op_errno); + return 0; ++ } ++ shard_mkdir_internal_dir(frame, this, shard_post_mkdir_rm_handler, ++ SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME); ++ return 0; + } + +-int +-shard_common_resume_mknod(call_frame_t *frame, xlator_t *this, +- shard_post_mknod_fop_handler_t post_mknod_handler) +-{ +- int i = 0; +- int shard_idx_iter = 0; +- int last_block = 0; +- int ret = 0; +- int call_count = 0; +- char path[PATH_MAX] = { +- 0, +- }; +- mode_t mode = 0; +- char *bname = NULL; +- shard_priv_t *priv = NULL; +- shard_inode_ctx_t ctx_tmp = { +- 0, +- }; +- shard_local_t *local = NULL; +- gf_boolean_t wind_failed = _gf_false; +- fd_t *fd = NULL; +- loc_t loc = { +- 0, +- }; +- dict_t *xattr_req = NULL; +- +- local = frame->local; +- priv = this->private; +- fd = local->fd; +- shard_idx_iter = local->first_block; +- last_block = local->last_block; +- call_count = local->call_count = local->create_count; +- local->post_mknod_handler = post_mknod_handler; ++void shard_begin_rm_resolution(call_frame_t *frame, xlator_t *this) { ++ shard_priv_t *priv = NULL; ++ shard_local_t *local = NULL; + +- SHARD_SET_ROOT_FS_ID(frame, local); ++ priv = this->private; ++ local = frame->local; + +- ret = shard_inode_ctx_get_all(fd->inode, this, &ctx_tmp); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, +- "Failed to get inode " +- "ctx for %s", +- uuid_utoa(fd->inode->gfid)); +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto err; +- } +- mode = st_mode_from_ia(ctx_tmp.stat.ia_prot, ctx_tmp.stat.ia_type); ++ local->dot_shard_rm_loc.inode = ++ inode_find(this->itable, priv->dot_shard_rm_gfid); ++ if (!local->dot_shard_rm_loc.inode) { ++ local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid); ++ if (!local->dot_shard_loc.inode) { ++ shard_mkdir_internal_dir(frame, this, shard_pre_mkdir_rm_handler, ++ SHARD_INTERNAL_DIR_DOT_SHARD); ++ } else { ++ local->post_res_handler = shard_pre_mkdir_rm_handler; ++ shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD); ++ } ++ } else { ++ local->post_res_handler = shard_post_mkdir_rm_handler; ++ shard_refresh_internal_dir(frame, this, ++ SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME); ++ } ++} ++ ++int shard_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, ++ dict_t *xdata) { ++ int ret = -1; ++ uint64_t block_size = 0; ++ shard_local_t *local = NULL; ++ ++ ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size); ++ if ((ret) && (!IA_ISLNK(loc->inode->ia_type))) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, ++ "Failed to get block " ++ "size from inode ctx of %s", ++ uuid_utoa(loc->inode->gfid)); ++ goto err; ++ } ++ ++ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { ++ STACK_WIND(frame, default_unlink_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata); ++ return 0; ++ } ++ ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; ++ ++ frame->local = local; ++ ++ loc_copy(&local->loc, loc); ++ local->xflag = xflag; ++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); ++ local->block_size = block_size; ++ local->resolver_base_inode = loc->inode; ++ local->fop = GF_FOP_UNLINK; ++ if (!this->itable) ++ this->itable = (local->loc.inode)->table; ++ ++ local->resolve_not = _gf_true; ++ shard_begin_rm_resolution(frame, this); ++ return 0; ++err: ++ shard_common_failure_unwind(GF_FOP_UNLINK, frame, -1, ENOMEM); ++ return 0; ++} + +- while (shard_idx_iter <= last_block) { +- if (local->inode_list[i]) { +- shard_idx_iter++; +- i++; +- continue; +- } ++int shard_post_rename_lookup_handler(call_frame_t *frame, xlator_t *this) { ++ shard_rename_cbk(frame, this); ++ return 0; ++} + +- if (wind_failed) { +- shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this, +- -1, ENOMEM, NULL, NULL, NULL, NULL, NULL); +- goto next; +- } ++int shard_rename_src_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, struct iatt *buf, ++ struct iatt *preoldparent, struct iatt *postoldparent, ++ struct iatt *prenewparent, struct iatt *postnewparent, ++ dict_t *xdata) { ++ int ret = 0; ++ shard_local_t *local = NULL; + +- shard_make_block_abspath(shard_idx_iter, fd->inode->gfid, path, +- sizeof(path)); +- +- xattr_req = shard_create_gfid_dict(local->xattr_req); +- if (!xattr_req) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- wind_failed = _gf_true; +- shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this, +- -1, ENOMEM, NULL, NULL, NULL, NULL, NULL); +- goto next; +- } ++ local = frame->local; + +- bname = strrchr(path, '/') + 1; +- loc.inode = inode_new(this->itable); +- loc.parent = inode_ref(priv->dot_shard_inode); +- ret = inode_path(loc.parent, bname, (char **)&(loc.path)); +- if (ret < 0 || !(loc.inode)) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, +- "Inode path failed" +- "on %s, base file gfid = %s", +- bname, uuid_utoa(fd->inode->gfid)); +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- wind_failed = _gf_true; +- loc_wipe(&loc); +- dict_unref(xattr_req); +- shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this, +- -1, ENOMEM, NULL, NULL, NULL, NULL, NULL); +- goto next; +- } ++ if (op_ret < 0) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ goto err; ++ } ++ /* Set ctx->refresh to TRUE to force a lookup on disk when ++ * shard_lookup_base_file() is called next to refresh the hard link ++ * count in ctx. Note that this is applicable only to the case where ++ * the rename dst is already existent and sharded. ++ */ ++ if ((local->dst_block_size) && (!local->cleanup_required)) ++ shard_inode_ctx_set_refresh_flag(local->int_inodelk.loc.inode, this); ++ ++ local->prebuf = *buf; ++ local->preoldparent = *preoldparent; ++ local->postoldparent = *postoldparent; ++ local->prenewparent = *prenewparent; ++ local->postnewparent = *postnewparent; ++ if (xdata) ++ local->xattr_rsp = dict_ref(xdata); + +- loc.name = strrchr(loc.path, '/'); +- if (loc.name) +- loc.name++; +- +- STACK_WIND_COOKIE(frame, shard_common_mknod_cbk, +- (void *)(long)shard_idx_iter, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->mknod, &loc, mode, +- ctx_tmp.stat.ia_rdev, 0, xattr_req); +- loc_wipe(&loc); +- dict_unref(xattr_req); +- +- next: +- shard_idx_iter++; +- i++; +- if (!--call_count) +- break; ++ if (local->dst_block_size) { ++ if (local->entrylk_frame) { ++ ret = shard_unlock_entrylk(frame, this); ++ if (ret < 0) { ++ local->op_ret = -1; ++ local->op_errno = -ret; ++ } + } + +- return 0; ++ ret = shard_unlock_inodelk(frame, this); ++ if (ret < 0) { ++ local->op_ret = -1; ++ local->op_errno = -ret; ++ goto err; ++ } ++ if (local->cleanup_required) ++ shard_start_background_deletion(this); ++ } ++ ++ /* Now the base file of src, if sharded, is looked up to gather ia_size ++ * and ia_blocks.*/ ++ if (local->block_size) { ++ local->tmp_loc.inode = inode_new(this->itable); ++ gf_uuid_copy(local->tmp_loc.gfid, (local->loc.inode)->gfid); ++ shard_lookup_base_file(frame, this, &local->tmp_loc, ++ shard_post_rename_lookup_handler); ++ } else { ++ shard_rename_cbk(frame, this); ++ } ++ return 0; + err: +- /* +- * This block is for handling failure in shard_inode_ctx_get_all(). +- * Failures in the while-loop are handled within the loop. +- */ +- SHARD_UNSET_ROOT_FS_ID(frame, local); +- post_mknod_handler(frame, this); +- return 0; ++ shard_common_failure_unwind(local->fop, frame, local->op_ret, ++ local->op_errno); ++ return 0; + } + +-int +-shard_post_mknod_readv_handler(call_frame_t *frame, xlator_t *this); +- +-int +-shard_post_lookup_shards_readv_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (local->op_ret < 0) { +- shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, +- local->op_errno); +- return 0; +- } ++int shard_post_lookup_dst_base_file_handler(call_frame_t *frame, ++ xlator_t *this) { ++ shard_local_t *local = NULL; + +- if (local->create_count) { +- shard_common_resume_mknod(frame, this, shard_post_mknod_readv_handler); +- } else { +- shard_readv_do(frame, this); +- } ++ local = frame->local; + ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(local->fop, frame, local->op_ret, ++ local->op_errno); + return 0; ++ } ++ ++ /* Save dst base file attributes into postbuf so the information is not ++ * lost when it is overwritten after lookup on base file of src in ++ * shard_lookup_base_file_cbk(). ++ */ ++ local->postbuf = local->prebuf; ++ shard_rename_src_base_file(frame, this); ++ return 0; ++} ++ ++int shard_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, ++ loc_t *newloc, dict_t *xdata) { ++ int ret = -1; ++ uint64_t block_size = 0; ++ uint64_t dst_block_size = 0; ++ shard_local_t *local = NULL; ++ ++ if (IA_ISDIR(oldloc->inode->ia_type)) { ++ STACK_WIND(frame, default_rename_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); ++ return 0; ++ } ++ ++ ret = shard_inode_ctx_get_block_size(oldloc->inode, this, &block_size); ++ if ((ret) && (!IA_ISLNK(oldloc->inode->ia_type))) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, ++ "Failed to get block " ++ "size from inode ctx of %s", ++ uuid_utoa(oldloc->inode->gfid)); ++ goto err; ++ } ++ ++ if (newloc->inode) ++ ret = shard_inode_ctx_get_block_size(newloc->inode, this, &dst_block_size); ++ ++ /* The following stack_wind covers the case where: ++ * a. the src file is not sharded and dst doesn't exist, OR ++ * b. the src and dst both exist but are not sharded. ++ */ ++ if (((!block_size) && (!dst_block_size)) || ++ frame->root->pid == GF_CLIENT_PID_GSYNCD) { ++ STACK_WIND(frame, default_rename_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); ++ return 0; ++ } ++ ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; ++ ++ frame->local = local; ++ loc_copy(&local->loc, oldloc); ++ loc_copy(&local->loc2, newloc); ++ local->resolver_base_inode = newloc->inode; ++ local->fop = GF_FOP_RENAME; ++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); ++ if (!local->xattr_req) ++ goto err; ++ ++ local->block_size = block_size; ++ local->dst_block_size = dst_block_size; ++ if (!this->itable) ++ this->itable = (local->loc.inode)->table; ++ local->resolve_not = _gf_true; ++ ++ /* The following if-block covers the case where the dst file exists ++ * and is sharded. ++ */ ++ if (local->dst_block_size) { ++ shard_begin_rm_resolution(frame, this); ++ } else { ++ /* The following block covers the case where the dst either doesn't ++ * exist or is NOT sharded but the src is sharded. In this case, shard ++ * xlator would go ahead and rename src to dst. Once done, it would also ++ * lookup the base shard of src to get the ia_size and ia_blocks xattr ++ * values. ++ */ ++ shard_rename_src_base_file(frame, this); ++ } ++ return 0; ++ ++err: ++ shard_common_failure_unwind(GF_FOP_RENAME, frame, -1, ENOMEM); ++ return 0; + } + +-int +-shard_post_mknod_readv_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; ++int shard_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode, ++ struct iatt *stbuf, struct iatt *preparent, ++ struct iatt *postparent, dict_t *xdata) { ++ int ret = -1; ++ shard_local_t *local = NULL; + +- local = frame->local; ++ local = frame->local; + +- if (local->op_ret < 0) { +- shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, +- local->op_errno); +- return 0; +- } ++ if (op_ret == -1) ++ goto unwind; + +- if (!local->eexist_count) { +- shard_readv_do(frame, this); +- } else { +- local->call_count = local->eexist_count; +- shard_common_lookup_shards(frame, this, local->loc.inode, +- shard_post_lookup_shards_readv_handler); +- } +- return 0; ++ ret = shard_inode_ctx_set(inode, this, stbuf, local->block_size, ++ SHARD_ALL_MASK); ++ if (ret) ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INODE_CTX_SET_FAILED, ++ "Failed to set inode " ++ "ctx for %s", ++ uuid_utoa(inode->gfid)); ++ ++unwind: ++ SHARD_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, stbuf, ++ preparent, postparent, xdata); ++ return 0; + } + +-int +-shard_post_resolve_readv_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; ++int shard_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, ++ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) { ++ shard_priv_t *priv = NULL; ++ shard_local_t *local = NULL; + +- local = frame->local; ++ priv = this->private; ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; + +- if (local->op_ret < 0) { +- if (local->op_errno != ENOENT) { +- shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, +- local->op_errno); +- return 0; +- } else { +- struct iovec vec = { +- 0, +- }; +- +- vec.iov_base = local->iobuf->ptr; +- vec.iov_len = local->total_size; +- local->op_ret = local->total_size; +- SHARD_STACK_UNWIND(readv, frame, local->op_ret, 0, &vec, 1, +- &local->prebuf, local->iobref, NULL); +- return 0; +- } +- } ++ frame->local = local; ++ local->block_size = priv->block_size; + +- if (local->call_count) { +- shard_common_lookup_shards(frame, this, local->resolver_base_inode, +- shard_post_lookup_shards_readv_handler); +- } else { +- shard_readv_do(frame, this); +- } ++ if (!__is_gsyncd_on_shard_dir(frame, loc)) { ++ SHARD_INODE_CREATE_INIT(this, local->block_size, xdata, loc, 0, 0, err); ++ } + +- return 0; +-} ++ STACK_WIND(frame, shard_create_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd, ++ xdata); ++ return 0; ++err: ++ shard_common_failure_unwind(GF_FOP_CREATE, frame, -1, ENOMEM); ++ return 0; ++} ++ ++int shard_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) { ++ /* To-Do: Handle open with O_TRUNC under locks */ ++ SHARD_STACK_UNWIND(open, frame, op_ret, op_errno, fd, xdata); ++ return 0; ++} ++ ++int shard_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, ++ fd_t *fd, dict_t *xdata) { ++ STACK_WIND(frame, shard_open_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata); ++ return 0; ++} ++ ++int shard_readv_do_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, struct iovec *vector, ++ int32_t count, struct iatt *stbuf, struct iobref *iobref, ++ dict_t *xdata) { ++ int i = 0; ++ int call_count = 0; ++ void *address = NULL; ++ uint64_t block_num = 0; ++ off_t off = 0; ++ struct iovec vec = { ++ 0, ++ }; ++ shard_local_t *local = NULL; ++ fd_t *anon_fd = cookie; ++ shard_inode_ctx_t *ctx = NULL; ++ ++ local = frame->local; ++ ++ /* If shard has already seen a failure here before, there is no point ++ * in aggregating subsequent reads, so just go to out. ++ */ ++ if (local->op_ret < 0) ++ goto out; ++ ++ if (op_ret < 0) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ goto out; ++ } ++ ++ if (local->op_ret >= 0) ++ local->op_ret += op_ret; + +-int +-shard_post_lookup_readv_handler(call_frame_t *frame, xlator_t *this) +-{ +- int ret = 0; +- struct iobuf *iobuf = NULL; +- shard_local_t *local = NULL; +- shard_priv_t *priv = NULL; ++ shard_inode_ctx_get(anon_fd->inode, this, &ctx); ++ block_num = ctx->block_num; ++ ++ if (block_num == local->first_block) { ++ address = local->iobuf->ptr; ++ } else { ++ /* else ++ * address to start writing to = beginning of buffer + ++ * number of bytes until end of first block + ++ * + block_size times number of blocks ++ * between the current block and the first ++ */ ++ address = (char *)local->iobuf->ptr + ++ (local->block_size - (local->offset % local->block_size)) + ++ ((block_num - local->first_block - 1) * local->block_size); ++ } + +- priv = this->private; +- local = frame->local; ++ for (i = 0; i < count; i++) { ++ address = (char *)address + off; ++ memcpy(address, vector[i].iov_base, vector[i].iov_len); ++ off += vector[i].iov_len; ++ } + ++out: ++ if (anon_fd) ++ fd_unref(anon_fd); ++ call_count = shard_call_count_return(frame); ++ if (call_count == 0) { ++ SHARD_UNSET_ROOT_FS_ID(frame, local); + if (local->op_ret < 0) { +- shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, +- local->op_errno); +- return 0; ++ shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, ++ local->op_errno); ++ } else { ++ if (xdata) ++ local->xattr_rsp = dict_ref(xdata); ++ vec.iov_base = local->iobuf->ptr; ++ vec.iov_len = local->total_size; ++ local->op_ret = local->total_size; ++ SHARD_STACK_UNWIND(readv, frame, local->op_ret, local->op_errno, &vec, 1, ++ &local->prebuf, local->iobref, local->xattr_rsp); ++ return 0; ++ } ++ } ++ ++ return 0; ++} ++ ++int shard_readv_do(call_frame_t *frame, xlator_t *this) { ++ int i = 0; ++ int call_count = 0; ++ int last_block = 0; ++ int cur_block = 0; ++ off_t orig_offset = 0; ++ off_t shard_offset = 0; ++ size_t read_size = 0; ++ size_t remaining_size = 0; ++ fd_t *fd = NULL; ++ fd_t *anon_fd = NULL; ++ shard_local_t *local = NULL; ++ gf_boolean_t wind_failed = _gf_false; ++ ++ local = frame->local; ++ fd = local->fd; ++ ++ orig_offset = local->offset; ++ cur_block = local->first_block; ++ last_block = local->last_block; ++ remaining_size = local->total_size; ++ local->call_count = call_count = local->num_blocks; ++ ++ SHARD_SET_ROOT_FS_ID(frame, local); ++ ++ if (fd->flags & O_DIRECT) ++ local->flags = O_DIRECT; ++ ++ while (cur_block <= last_block) { ++ if (wind_failed) { ++ shard_readv_do_cbk(frame, (void *)(long)0, this, -1, ENOMEM, NULL, 0, ++ NULL, NULL, NULL); ++ goto next; ++ } ++ ++ shard_offset = orig_offset % local->block_size; ++ read_size = local->block_size - shard_offset; ++ if (read_size > remaining_size) ++ read_size = remaining_size; ++ ++ remaining_size -= read_size; ++ ++ if (cur_block == 0) { ++ anon_fd = fd_ref(fd); ++ } else { ++ anon_fd = fd_anonymous(local->inode_list[i]); ++ if (!anon_fd) { ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ wind_failed = _gf_true; ++ shard_readv_do_cbk(frame, (void *)(long)anon_fd, this, -1, ENOMEM, NULL, ++ 0, NULL, NULL, NULL); ++ goto next; ++ } + } + +- if (local->offset >= local->prebuf.ia_size) { +- /* If the read is being performed past the end of the file, +- * unwind the FOP with 0 bytes read as status. +- */ +- struct iovec vec = { +- 0, +- }; +- +- iobuf = iobuf_get2(this->ctx->iobuf_pool, local->req_size); +- if (!iobuf) +- goto err; +- +- vec.iov_base = iobuf->ptr; +- vec.iov_len = 0; +- local->iobref = iobref_new(); +- iobref_add(local->iobref, iobuf); +- iobuf_unref(iobuf); +- +- SHARD_STACK_UNWIND(readv, frame, 0, 0, &vec, 1, &local->prebuf, +- local->iobref, NULL); +- return 0; +- } ++ STACK_WIND_COOKIE(frame, shard_readv_do_cbk, anon_fd, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->readv, anon_fd, read_size, ++ shard_offset, local->flags, local->xattr_req); ++ ++ orig_offset += read_size; ++ next: ++ cur_block++; ++ i++; ++ call_count--; ++ } ++ return 0; ++} + +- local->first_block = get_lowest_block(local->offset, local->block_size); ++int shard_common_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, inode_t *inode, ++ struct iatt *buf, struct iatt *preparent, ++ struct iatt *postparent, dict_t *xdata) { ++ int shard_block_num = (long)cookie; ++ int call_count = 0; ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ ++ if (op_ret < 0) { ++ if (op_errno == EEXIST) { ++ LOCK(&frame->lock); ++ { local->eexist_count++; } ++ UNLOCK(&frame->lock); ++ } else { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ } ++ gf_msg_debug(this->name, 0, "mknod of shard %d " ++ "failed: %s", ++ shard_block_num, strerror(op_errno)); ++ goto done; ++ } + +- local->total_size = local->req_size; ++ shard_link_block_inode(local, shard_block_num, inode, buf); + +- local->last_block = get_highest_block(local->offset, local->total_size, +- local->block_size); ++done: ++ call_count = shard_call_count_return(frame); ++ if (call_count == 0) { ++ SHARD_UNSET_ROOT_FS_ID(frame, local); ++ local->create_count = 0; ++ local->post_mknod_handler(frame, this); ++ } ++ ++ return 0; ++} ++ ++int shard_common_resume_mknod( ++ call_frame_t *frame, xlator_t *this, ++ shard_post_mknod_fop_handler_t post_mknod_handler) { ++ int i = 0; ++ int shard_idx_iter = 0; ++ int last_block = 0; ++ int ret = 0; ++ int call_count = 0; ++ char path[PATH_MAX] = { ++ 0, ++ }; ++ mode_t mode = 0; ++ char *bname = NULL; ++ shard_priv_t *priv = NULL; ++ shard_inode_ctx_t ctx_tmp = { ++ 0, ++ }; ++ shard_local_t *local = NULL; ++ gf_boolean_t wind_failed = _gf_false; ++ fd_t *fd = NULL; ++ loc_t loc = { ++ 0, ++ }; ++ dict_t *xattr_req = NULL; ++ ++ local = frame->local; ++ priv = this->private; ++ fd = local->fd; ++ shard_idx_iter = local->first_block; ++ last_block = local->last_block; ++ call_count = local->call_count = local->create_count; ++ local->post_mknod_handler = post_mknod_handler; ++ ++ SHARD_SET_ROOT_FS_ID(frame, local); ++ ++ ret = shard_inode_ctx_get_all(fd->inode, this, &ctx_tmp); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, ++ "Failed to get inode " ++ "ctx for %s", ++ uuid_utoa(fd->inode->gfid)); ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto err; ++ } ++ mode = st_mode_from_ia(ctx_tmp.stat.ia_prot, ctx_tmp.stat.ia_type); + +- local->num_blocks = local->last_block - local->first_block + 1; +- local->resolver_base_inode = local->loc.inode; ++ while (shard_idx_iter <= last_block) { ++ if (local->inode_list[i]) { ++ shard_idx_iter++; ++ i++; ++ continue; ++ } + +- local->inode_list = GF_CALLOC(local->num_blocks, sizeof(inode_t *), +- gf_shard_mt_inode_list); +- if (!local->inode_list) +- goto err; ++ if (wind_failed) { ++ shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this, -1, ++ ENOMEM, NULL, NULL, NULL, NULL, NULL); ++ goto next; ++ } + +- iobuf = iobuf_get2(this->ctx->iobuf_pool, local->total_size); +- if (!iobuf) +- goto err; ++ shard_make_block_abspath(shard_idx_iter, fd->inode->gfid, path, ++ sizeof(path)); + +- local->iobref = iobref_new(); +- if (!local->iobref) { +- iobuf_unref(iobuf); +- goto err; ++ xattr_req = shard_create_gfid_dict(local->xattr_req); ++ if (!xattr_req) { ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ wind_failed = _gf_true; ++ shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this, -1, ++ ENOMEM, NULL, NULL, NULL, NULL, NULL); ++ goto next; + } + +- if (iobref_add(local->iobref, iobuf) != 0) { +- iobuf_unref(iobuf); +- goto err; ++ bname = strrchr(path, '/') + 1; ++ loc.inode = inode_new(this->itable); ++ loc.parent = inode_ref(priv->dot_shard_inode); ++ ret = inode_path(loc.parent, bname, (char **)&(loc.path)); ++ if (ret < 0 || !(loc.inode)) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, ++ "Inode path failed" ++ "on %s, base file gfid = %s", ++ bname, uuid_utoa(fd->inode->gfid)); ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ wind_failed = _gf_true; ++ loc_wipe(&loc); ++ dict_unref(xattr_req); ++ shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this, -1, ++ ENOMEM, NULL, NULL, NULL, NULL, NULL); ++ goto next; + } + +- memset(iobuf->ptr, 0, local->total_size); +- iobuf_unref(iobuf); +- local->iobuf = iobuf; ++ loc.name = strrchr(loc.path, '/'); ++ if (loc.name) ++ loc.name++; + +- local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid); +- if (!local->dot_shard_loc.inode) { +- ret = shard_init_internal_dir_loc(this, local, +- SHARD_INTERNAL_DIR_DOT_SHARD); +- if (ret) +- goto err; +- shard_lookup_internal_dir(frame, this, shard_post_resolve_readv_handler, +- SHARD_INTERNAL_DIR_DOT_SHARD); +- } else { +- local->post_res_handler = shard_post_resolve_readv_handler; +- shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD); +- } +- return 0; ++ STACK_WIND_COOKIE(frame, shard_common_mknod_cbk, ++ (void *)(long)shard_idx_iter, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->mknod, &loc, mode, ++ ctx_tmp.stat.ia_rdev, 0, xattr_req); ++ loc_wipe(&loc); ++ dict_unref(xattr_req); ++ ++ next: ++ shard_idx_iter++; ++ i++; ++ if (!--call_count) ++ break; ++ } ++ ++ return 0; + err: +- shard_common_failure_unwind(GF_FOP_READ, frame, -1, ENOMEM); +- return 0; ++ /* ++ * This block is for handling failure in shard_inode_ctx_get_all(). ++ * Failures in the while-loop are handled within the loop. ++ */ ++ SHARD_UNSET_ROOT_FS_ID(frame, local); ++ post_mknod_handler(frame, this); ++ return 0; + } + +-int +-shard_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, +- off_t offset, uint32_t flags, dict_t *xdata) +-{ +- int ret = 0; +- uint64_t block_size = 0; +- shard_local_t *local = NULL; ++int shard_post_mknod_readv_handler(call_frame_t *frame, xlator_t *this); + +- ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, +- "Failed to get block " +- "size for %s from its inode ctx", +- uuid_utoa(fd->inode->gfid)); +- goto err; +- } ++int shard_post_lookup_shards_readv_handler(call_frame_t *frame, ++ xlator_t *this) { ++ shard_local_t *local = NULL; + +- if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { +- /* block_size = 0 means that the file was created before +- * sharding was enabled on the volume. +- */ +- STACK_WIND(frame, default_readv_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, +- xdata); +- return 0; +- } ++ local = frame->local; + +- if (!this->itable) +- this->itable = fd->inode->table; ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, ++ local->op_errno); ++ return 0; ++ } + +- local = mem_get0(this->local_pool); +- if (!local) +- goto err; ++ if (local->create_count) { ++ shard_common_resume_mknod(frame, this, shard_post_mknod_readv_handler); ++ } else { ++ shard_readv_do(frame, this); ++ } + +- frame->local = local; ++ return 0; ++} + +- ret = syncbarrier_init(&local->barrier); +- if (ret) +- goto err; +- local->fd = fd_ref(fd); +- local->block_size = block_size; +- local->offset = offset; +- local->req_size = size; +- local->flags = flags; +- local->fop = GF_FOP_READ; +- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); +- if (!local->xattr_req) +- goto err; ++int shard_post_mknod_readv_handler(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = NULL; + +- local->loc.inode = inode_ref(fd->inode); +- gf_uuid_copy(local->loc.gfid, fd->inode->gfid); ++ local = frame->local; + +- shard_lookup_base_file(frame, this, &local->loc, +- shard_post_lookup_readv_handler); +- return 0; +-err: +- shard_common_failure_unwind(GF_FOP_READ, frame, -1, ENOMEM); ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, ++ local->op_errno); + return 0; ++ } ++ ++ if (!local->eexist_count) { ++ shard_readv_do(frame, this); ++ } else { ++ local->call_count = local->eexist_count; ++ shard_common_lookup_shards(frame, this, local->loc.inode, ++ shard_post_lookup_shards_readv_handler); ++ } ++ return 0; + } + +-int +-shard_common_inode_write_post_update_size_handler(call_frame_t *frame, +- xlator_t *this) +-{ +- shard_local_t *local = NULL; ++int shard_post_resolve_readv_handler(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = NULL; + +- local = frame->local; ++ local = frame->local; + +- if (local->op_ret < 0) { +- shard_common_failure_unwind(local->fop, frame, local->op_ret, +- local->op_errno); ++ if (local->op_ret < 0) { ++ if (local->op_errno != ENOENT) { ++ shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, ++ local->op_errno); ++ return 0; + } else { +- shard_common_inode_write_success_unwind(local->fop, frame, +- local->written_size); ++ struct iovec vec = { ++ 0, ++ }; ++ ++ vec.iov_base = local->iobuf->ptr; ++ vec.iov_len = local->total_size; ++ local->op_ret = local->total_size; ++ SHARD_STACK_UNWIND(readv, frame, local->op_ret, 0, &vec, 1, ++ &local->prebuf, local->iobref, NULL); ++ return 0; + } +- return 0; +-} ++ } + +-static gf_boolean_t +-shard_is_appending_write(shard_local_t *local) +-{ +- if (local->fop != GF_FOP_WRITE) +- return _gf_false; +- if (local->flags & O_APPEND) +- return _gf_true; +- if (local->fd->flags & O_APPEND) +- return _gf_true; +- return _gf_false; ++ if (local->call_count) { ++ shard_common_lookup_shards(frame, this, local->resolver_base_inode, ++ shard_post_lookup_shards_readv_handler); ++ } else { ++ shard_readv_do(frame, this); ++ } ++ ++ return 0; + } + +-int +-__shard_get_delta_size_from_inode_ctx(shard_local_t *local, inode_t *inode, +- xlator_t *this) +-{ +- int ret = -1; +- uint64_t ctx_uint = 0; +- shard_inode_ctx_t *ctx = NULL; ++int shard_post_lookup_readv_handler(call_frame_t *frame, xlator_t *this) { ++ int ret = 0; ++ struct iobuf *iobuf = NULL; ++ shard_local_t *local = NULL; ++ shard_priv_t *priv = NULL; ++ ++ priv = this->private; ++ local = frame->local; + +- ret = __inode_ctx_get(inode, this, &ctx_uint); +- if (ret < 0) +- return ret; ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, ++ local->op_errno); ++ return 0; ++ } + +- ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; ++ if (local->offset >= local->prebuf.ia_size) { ++ /* If the read is being performed past the end of the file, ++ * unwind the FOP with 0 bytes read as status. ++ */ ++ struct iovec vec = { ++ 0, ++ }; + +- if (shard_is_appending_write(local)) { +- local->delta_size = local->total_size; +- } else if (local->offset + local->total_size > ctx->stat.ia_size) { +- local->delta_size = (local->offset + local->total_size) - +- ctx->stat.ia_size; +- } else { +- local->delta_size = 0; +- } +- ctx->stat.ia_size += (local->delta_size); +- local->postbuf = ctx->stat; ++ iobuf = iobuf_get2(this->ctx->iobuf_pool, local->req_size); ++ if (!iobuf) ++ goto err; ++ ++ vec.iov_base = iobuf->ptr; ++ vec.iov_len = 0; ++ local->iobref = iobref_new(); ++ iobref_add(local->iobref, iobuf); ++ iobuf_unref(iobuf); + ++ SHARD_STACK_UNWIND(readv, frame, 0, 0, &vec, 1, &local->prebuf, ++ local->iobref, NULL); + return 0; +-} ++ } + +-int +-shard_get_delta_size_from_inode_ctx(shard_local_t *local, inode_t *inode, +- xlator_t *this) +-{ +- int ret = -1; ++ local->first_block = get_lowest_block(local->offset, local->block_size); + +- LOCK(&inode->lock); +- { +- ret = __shard_get_delta_size_from_inode_ctx(local, inode, this); +- } +- UNLOCK(&inode->lock); ++ local->total_size = local->req_size; + +- return ret; +-} ++ local->last_block = ++ get_highest_block(local->offset, local->total_size, local->block_size); + +-int +-shard_common_inode_write_do_cbk(call_frame_t *frame, void *cookie, +- xlator_t *this, int32_t op_ret, +- int32_t op_errno, struct iatt *pre, +- struct iatt *post, dict_t *xdata) +-{ +- int call_count = 0; +- fd_t *anon_fd = cookie; +- shard_local_t *local = NULL; +- glusterfs_fop_t fop = 0; ++ local->num_blocks = local->last_block - local->first_block + 1; ++ local->resolver_base_inode = local->loc.inode; + +- local = frame->local; +- fop = local->fop; ++ local->inode_list = ++ GF_CALLOC(local->num_blocks, sizeof(inode_t *), gf_shard_mt_inode_list); ++ if (!local->inode_list) ++ goto err; + +- LOCK(&frame->lock); +- { +- if (op_ret < 0) { +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- } else { +- local->written_size += op_ret; +- GF_ATOMIC_ADD(local->delta_blocks, +- post->ia_blocks - pre->ia_blocks); +- local->delta_size += (post->ia_size - pre->ia_size); +- shard_inode_ctx_set(local->fd->inode, this, post, 0, +- SHARD_MASK_TIMES); +- if (local->fd->inode != anon_fd->inode) +- shard_inode_ctx_add_to_fsync_list(local->fd->inode, this, +- anon_fd->inode); +- } +- } +- UNLOCK(&frame->lock); ++ iobuf = iobuf_get2(this->ctx->iobuf_pool, local->total_size); ++ if (!iobuf) ++ goto err; + +- if (anon_fd) +- fd_unref(anon_fd); ++ local->iobref = iobref_new(); ++ if (!local->iobref) { ++ iobuf_unref(iobuf); ++ goto err; ++ } + +- call_count = shard_call_count_return(frame); +- if (call_count == 0) { +- SHARD_UNSET_ROOT_FS_ID(frame, local); +- if (local->op_ret < 0) { +- shard_common_failure_unwind(fop, frame, local->op_ret, +- local->op_errno); +- } else { +- shard_get_delta_size_from_inode_ctx(local, local->fd->inode, this); +- local->hole_size = 0; +- if (xdata) +- local->xattr_rsp = dict_ref(xdata); +- shard_update_file_size( +- frame, this, local->fd, NULL, +- shard_common_inode_write_post_update_size_handler); +- } +- } ++ if (iobref_add(local->iobref, iobuf) != 0) { ++ iobuf_unref(iobuf); ++ goto err; ++ } + +- return 0; ++ memset(iobuf->ptr, 0, local->total_size); ++ iobuf_unref(iobuf); ++ local->iobuf = iobuf; ++ ++ local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid); ++ if (!local->dot_shard_loc.inode) { ++ ret = ++ shard_init_internal_dir_loc(this, local, SHARD_INTERNAL_DIR_DOT_SHARD); ++ if (ret) ++ goto err; ++ shard_lookup_internal_dir(frame, this, shard_post_resolve_readv_handler, ++ SHARD_INTERNAL_DIR_DOT_SHARD); ++ } else { ++ local->post_res_handler = shard_post_resolve_readv_handler; ++ shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD); ++ } ++ return 0; ++err: ++ shard_common_failure_unwind(GF_FOP_READ, frame, -1, ENOMEM); ++ return 0; ++} ++ ++int shard_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, ++ off_t offset, uint32_t flags, dict_t *xdata) { ++ int ret = 0; ++ uint64_t block_size = 0; ++ shard_local_t *local = NULL; ++ ++ ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, ++ "Failed to get block " ++ "size for %s from its inode ctx", ++ uuid_utoa(fd->inode->gfid)); ++ goto err; ++ } ++ ++ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { ++ /* block_size = 0 means that the file was created before ++ * sharding was enabled on the volume. ++ */ ++ STACK_WIND(frame, default_readv_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata); ++ return 0; ++ } ++ ++ if (!this->itable) ++ this->itable = fd->inode->table; ++ ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; ++ ++ frame->local = local; ++ ++ ret = syncbarrier_init(&local->barrier); ++ if (ret) ++ goto err; ++ local->fd = fd_ref(fd); ++ local->block_size = block_size; ++ local->offset = offset; ++ local->req_size = size; ++ local->flags = flags; ++ local->fop = GF_FOP_READ; ++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); ++ if (!local->xattr_req) ++ goto err; ++ ++ local->loc.inode = inode_ref(fd->inode); ++ gf_uuid_copy(local->loc.gfid, fd->inode->gfid); ++ ++ shard_lookup_base_file(frame, this, &local->loc, ++ shard_post_lookup_readv_handler); ++ return 0; ++err: ++ shard_common_failure_unwind(GF_FOP_READ, frame, -1, ENOMEM); ++ return 0; + } + +-int +-shard_common_inode_write_wind(call_frame_t *frame, xlator_t *this, fd_t *fd, +- struct iovec *vec, int count, off_t shard_offset, +- size_t size) +-{ +- shard_local_t *local = NULL; ++int shard_common_inode_write_post_update_size_handler(call_frame_t *frame, ++ xlator_t *this) { ++ shard_local_t *local = NULL; + +- local = frame->local; ++ local = frame->local; + +- switch (local->fop) { +- case GF_FOP_WRITE: +- STACK_WIND_COOKIE( +- frame, shard_common_inode_write_do_cbk, fd, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->writev, fd, vec, count, shard_offset, +- local->flags, local->iobref, local->xattr_req); +- break; +- case GF_FOP_FALLOCATE: +- STACK_WIND_COOKIE( +- frame, shard_common_inode_write_do_cbk, fd, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->fallocate, fd, local->flags, +- shard_offset, size, local->xattr_req); +- break; +- case GF_FOP_ZEROFILL: +- STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd, +- FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->zerofill, fd, +- shard_offset, size, local->xattr_req); +- break; +- case GF_FOP_DISCARD: +- STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd, +- FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->discard, fd, +- shard_offset, size, local->xattr_req); +- break; +- default: +- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, +- "Invalid fop id = %d", local->fop); +- break; +- } +- return 0; ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(local->fop, frame, local->op_ret, ++ local->op_errno); ++ } else { ++ shard_common_inode_write_success_unwind(local->fop, frame, ++ local->written_size); ++ } ++ return 0; + } + +-int +-shard_common_inode_write_do(call_frame_t *frame, xlator_t *this) +-{ +- int i = 0; +- int count = 0; +- int call_count = 0; +- int last_block = 0; +- uint32_t cur_block = 0; +- fd_t *fd = NULL; +- fd_t *anon_fd = NULL; +- shard_local_t *local = NULL; +- struct iovec *vec = NULL; +- gf_boolean_t wind_failed = _gf_false; +- gf_boolean_t odirect = _gf_false; +- off_t orig_offset = 0; +- off_t shard_offset = 0; +- off_t vec_offset = 0; +- size_t remaining_size = 0; +- size_t shard_write_size = 0; +- +- local = frame->local; +- fd = local->fd; +- +- orig_offset = local->offset; +- remaining_size = local->total_size; +- cur_block = local->first_block; +- local->call_count = call_count = local->num_blocks; +- last_block = local->last_block; +- +- SHARD_SET_ROOT_FS_ID(frame, local); +- +- if (dict_set_uint32(local->xattr_req, GLUSTERFS_WRITE_UPDATE_ATOMIC, 4)) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, +- "Failed to set " GLUSTERFS_WRITE_UPDATE_ATOMIC +- " into " +- "dict: %s", +- uuid_utoa(fd->inode->gfid)); +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- local->call_count = 1; +- shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, -1, +- ENOMEM, NULL, NULL, NULL); +- return 0; +- } ++static gf_boolean_t shard_is_appending_write(shard_local_t *local) { ++ if (local->fop != GF_FOP_WRITE) ++ return _gf_false; ++ if (local->flags & O_APPEND) ++ return _gf_true; ++ if (local->fd->flags & O_APPEND) ++ return _gf_true; ++ return _gf_false; ++} + +- if ((fd->flags & O_DIRECT) && (local->fop == GF_FOP_WRITE)) +- odirect = _gf_true; ++int __shard_get_delta_size_from_inode_ctx(shard_local_t *local, inode_t *inode, ++ xlator_t *this) { ++ int ret = -1; ++ uint64_t ctx_uint = 0; ++ shard_inode_ctx_t *ctx = NULL; + +- while (cur_block <= last_block) { +- if (wind_failed) { +- shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, -1, +- ENOMEM, NULL, NULL, NULL); +- goto next; +- } ++ ret = __inode_ctx_get(inode, this, &ctx_uint); ++ if (ret < 0) ++ return ret; + +- shard_offset = orig_offset % local->block_size; +- shard_write_size = local->block_size - shard_offset; +- if (shard_write_size > remaining_size) +- shard_write_size = remaining_size; +- +- remaining_size -= shard_write_size; +- +- if (local->fop == GF_FOP_WRITE) { +- count = iov_subset(local->vector, local->count, vec_offset, +- vec_offset + shard_write_size, NULL); +- +- vec = GF_CALLOC(count, sizeof(struct iovec), gf_shard_mt_iovec); +- if (!vec) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- wind_failed = _gf_true; +- GF_FREE(vec); +- shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, +- -1, ENOMEM, NULL, NULL, NULL); +- goto next; +- } +- count = iov_subset(local->vector, local->count, vec_offset, +- vec_offset + shard_write_size, vec); +- } ++ ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; + +- if (cur_block == 0) { +- anon_fd = fd_ref(fd); +- } else { +- anon_fd = fd_anonymous(local->inode_list[i]); +- if (!anon_fd) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- wind_failed = _gf_true; +- GF_FREE(vec); +- shard_common_inode_write_do_cbk(frame, (void *)(long)anon_fd, +- this, -1, ENOMEM, NULL, NULL, +- NULL); +- goto next; +- } +- +- if (local->fop == GF_FOP_WRITE) { +- if (odirect) +- local->flags = O_DIRECT; +- else +- local->flags = GF_ANON_FD_FLAGS; +- } +- } ++ if (shard_is_appending_write(local)) { ++ local->delta_size = local->total_size; ++ } else if (local->offset + local->total_size > ctx->stat.ia_size) { ++ local->delta_size = (local->offset + local->total_size) - ctx->stat.ia_size; ++ } else { ++ local->delta_size = 0; ++ } ++ ctx->stat.ia_size += (local->delta_size); ++ local->postbuf = ctx->stat; + +- shard_common_inode_write_wind(frame, this, anon_fd, vec, count, +- shard_offset, shard_write_size); +- if (vec) +- vec_offset += shard_write_size; +- orig_offset += shard_write_size; +- GF_FREE(vec); +- vec = NULL; +- next: +- cur_block++; +- i++; +- call_count--; +- } +- return 0; ++ return 0; + } + +-int +-shard_common_inode_write_post_mknod_handler(call_frame_t *frame, +- xlator_t *this); ++int shard_get_delta_size_from_inode_ctx(shard_local_t *local, inode_t *inode, ++ xlator_t *this) { ++ int ret = -1; ++ ++ LOCK(&inode->lock); ++ { ret = __shard_get_delta_size_from_inode_ctx(local, inode, this); } ++ UNLOCK(&inode->lock); + +-int +-shard_common_inode_write_post_lookup_shards_handler(call_frame_t *frame, +- xlator_t *this) +-{ +- shard_local_t *local = NULL; ++ return ret; ++} + +- local = frame->local; ++int shard_common_inode_write_do_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, ++ int32_t op_errno, struct iatt *pre, ++ struct iatt *post, dict_t *xdata) { ++ int call_count = 0; ++ fd_t *anon_fd = cookie; ++ shard_local_t *local = NULL; ++ glusterfs_fop_t fop = 0; + +- if (local->op_ret < 0) { +- shard_common_failure_unwind(local->fop, frame, local->op_ret, +- local->op_errno); +- return 0; +- } ++ local = frame->local; ++ fop = local->fop; + +- if (local->create_count) { +- shard_common_resume_mknod(frame, this, +- shard_common_inode_write_post_mknod_handler); ++ LOCK(&frame->lock); ++ { ++ if (op_ret < 0) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; + } else { +- shard_common_inode_write_do(frame, this); ++ local->written_size += op_ret; ++ GF_ATOMIC_ADD(local->delta_blocks, post->ia_blocks - pre->ia_blocks); ++ local->delta_size += (post->ia_size - pre->ia_size); ++ shard_inode_ctx_set(local->fd->inode, this, post, 0, SHARD_MASK_TIMES); ++ if (local->fd->inode != anon_fd->inode) ++ shard_inode_ctx_add_to_fsync_list(local->fd->inode, this, ++ anon_fd->inode); ++ } ++ } ++ UNLOCK(&frame->lock); ++ ++ if (anon_fd) ++ fd_unref(anon_fd); ++ ++ call_count = shard_call_count_return(frame); ++ if (call_count == 0) { ++ SHARD_UNSET_ROOT_FS_ID(frame, local); ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(fop, frame, local->op_ret, local->op_errno); ++ } else { ++ shard_get_delta_size_from_inode_ctx(local, local->fd->inode, this); ++ local->hole_size = 0; ++ if (xdata) ++ local->xattr_rsp = dict_ref(xdata); ++ shard_update_file_size(frame, this, local->fd, NULL, ++ shard_common_inode_write_post_update_size_handler); + } ++ } + +- return 0; ++ return 0; + } + +-int +-shard_common_inode_write_post_mknod_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; ++int shard_common_inode_write_wind(call_frame_t *frame, xlator_t *this, fd_t *fd, ++ struct iovec *vec, int count, ++ off_t shard_offset, size_t size) { ++ shard_local_t *local = NULL; + +- local = frame->local; ++ local = frame->local; + +- if (local->op_ret < 0) { +- shard_common_failure_unwind(local->fop, frame, local->op_ret, +- local->op_errno); +- return 0; +- } ++ switch (local->fop) { ++ case GF_FOP_WRITE: ++ STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd, ++ FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev, fd, ++ vec, count, shard_offset, local->flags, local->iobref, ++ local->xattr_req); ++ break; ++ case GF_FOP_FALLOCATE: ++ STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd, ++ FIRST_CHILD(this), FIRST_CHILD(this)->fops->fallocate, fd, ++ local->flags, shard_offset, size, local->xattr_req); ++ break; ++ case GF_FOP_ZEROFILL: ++ STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd, ++ FIRST_CHILD(this), FIRST_CHILD(this)->fops->zerofill, fd, ++ shard_offset, size, local->xattr_req); ++ break; ++ case GF_FOP_DISCARD: ++ STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd, ++ FIRST_CHILD(this), FIRST_CHILD(this)->fops->discard, fd, ++ shard_offset, size, local->xattr_req); ++ break; ++ default: ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, ++ "Invalid fop id = %d", local->fop); ++ break; ++ } ++ return 0; ++} ++ ++int shard_common_inode_write_do(call_frame_t *frame, xlator_t *this) { ++ int i = 0; ++ int count = 0; ++ int call_count = 0; ++ int last_block = 0; ++ uint32_t cur_block = 0; ++ fd_t *fd = NULL; ++ fd_t *anon_fd = NULL; ++ shard_local_t *local = NULL; ++ struct iovec *vec = NULL; ++ gf_boolean_t wind_failed = _gf_false; ++ gf_boolean_t odirect = _gf_false; ++ off_t orig_offset = 0; ++ off_t shard_offset = 0; ++ off_t vec_offset = 0; ++ size_t remaining_size = 0; ++ size_t shard_write_size = 0; ++ ++ local = frame->local; ++ fd = local->fd; ++ ++ orig_offset = local->offset; ++ remaining_size = local->total_size; ++ cur_block = local->first_block; ++ local->call_count = call_count = local->num_blocks; ++ last_block = local->last_block; ++ ++ SHARD_SET_ROOT_FS_ID(frame, local); ++ ++ if (dict_set_uint32(local->xattr_req, GLUSTERFS_WRITE_UPDATE_ATOMIC, 4)) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to set " GLUSTERFS_WRITE_UPDATE_ATOMIC " into " ++ "dict: %s", ++ uuid_utoa(fd->inode->gfid)); ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ local->call_count = 1; ++ shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, -1, ENOMEM, ++ NULL, NULL, NULL); ++ return 0; ++ } + +- if (!local->eexist_count) { +- shard_common_inode_write_do(frame, this); +- } else { +- local->call_count = local->eexist_count; +- shard_common_lookup_shards( +- frame, this, local->loc.inode, +- shard_common_inode_write_post_lookup_shards_handler); ++ if ((fd->flags & O_DIRECT) && (local->fop == GF_FOP_WRITE)) ++ odirect = _gf_true; ++ ++ while (cur_block <= last_block) { ++ if (wind_failed) { ++ shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, -1, ENOMEM, ++ NULL, NULL, NULL); ++ goto next; + } + +- return 0; +-} ++ shard_offset = orig_offset % local->block_size; ++ shard_write_size = local->block_size - shard_offset; ++ if (shard_write_size > remaining_size) ++ shard_write_size = remaining_size; + +-int +-shard_common_inode_write_post_resolve_handler(call_frame_t *frame, +- xlator_t *this) +-{ +- shard_local_t *local = NULL; ++ remaining_size -= shard_write_size; + +- local = frame->local; ++ if (local->fop == GF_FOP_WRITE) { ++ count = iov_subset(local->vector, local->count, vec_offset, ++ vec_offset + shard_write_size, NULL); + +- if (local->op_ret < 0) { +- shard_common_failure_unwind(local->fop, frame, local->op_ret, +- local->op_errno); +- return 0; ++ vec = GF_CALLOC(count, sizeof(struct iovec), gf_shard_mt_iovec); ++ if (!vec) { ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ wind_failed = _gf_true; ++ GF_FREE(vec); ++ shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, -1, ++ ENOMEM, NULL, NULL, NULL); ++ goto next; ++ } ++ count = iov_subset(local->vector, local->count, vec_offset, ++ vec_offset + shard_write_size, vec); + } + +- if (local->call_count) { +- shard_common_lookup_shards( +- frame, this, local->resolver_base_inode, +- shard_common_inode_write_post_lookup_shards_handler); ++ if (cur_block == 0) { ++ anon_fd = fd_ref(fd); + } else { +- shard_common_inode_write_do(frame, this); +- } ++ anon_fd = fd_anonymous(local->inode_list[i]); ++ if (!anon_fd) { ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ wind_failed = _gf_true; ++ GF_FREE(vec); ++ shard_common_inode_write_do_cbk(frame, (void *)(long)anon_fd, this, -1, ++ ENOMEM, NULL, NULL, NULL); ++ goto next; ++ } + +- return 0; ++ if (local->fop == GF_FOP_WRITE) { ++ if (odirect) ++ local->flags = O_DIRECT; ++ else ++ local->flags = GF_ANON_FD_FLAGS; ++ } ++ } ++ ++ shard_common_inode_write_wind(frame, this, anon_fd, vec, count, ++ shard_offset, shard_write_size); ++ if (vec) ++ vec_offset += shard_write_size; ++ orig_offset += shard_write_size; ++ GF_FREE(vec); ++ vec = NULL; ++ next: ++ cur_block++; ++ i++; ++ call_count--; ++ } ++ return 0; + } + +-int +-shard_common_inode_write_post_lookup_handler(call_frame_t *frame, +- xlator_t *this) +-{ +- shard_local_t *local = frame->local; +- shard_priv_t *priv = this->private; +- +- if (local->op_ret < 0) { +- shard_common_failure_unwind(local->fop, frame, local->op_ret, +- local->op_errno); +- return 0; +- } +- +- local->postbuf = local->prebuf; +- +- /*Adjust offset to EOF so that correct shard is chosen for append*/ +- if (shard_is_appending_write(local)) +- local->offset = local->prebuf.ia_size; +- +- local->first_block = get_lowest_block(local->offset, local->block_size); +- local->last_block = get_highest_block(local->offset, local->total_size, +- local->block_size); +- local->num_blocks = local->last_block - local->first_block + 1; +- local->inode_list = GF_CALLOC(local->num_blocks, sizeof(inode_t *), +- gf_shard_mt_inode_list); +- if (!local->inode_list) { +- shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); +- return 0; +- } ++int shard_common_inode_write_post_mknod_handler(call_frame_t *frame, ++ xlator_t *this); + +- gf_msg_trace(this->name, 0, +- "%s: gfid=%s first_block=%" PRIu32 +- " " +- "last_block=%" PRIu32 " num_blocks=%" PRIu32 " offset=%" PRId64 +- " total_size=%zu flags=%" PRId32 "", +- gf_fop_list[local->fop], +- uuid_utoa(local->resolver_base_inode->gfid), +- local->first_block, local->last_block, local->num_blocks, +- local->offset, local->total_size, local->flags); ++int shard_common_inode_write_post_lookup_shards_handler(call_frame_t *frame, ++ xlator_t *this) { ++ shard_local_t *local = NULL; + +- local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid); ++ local = frame->local; + +- if (!local->dot_shard_loc.inode) { +- /*change handler*/ +- shard_mkdir_internal_dir(frame, this, +- shard_common_inode_write_post_resolve_handler, +- SHARD_INTERNAL_DIR_DOT_SHARD); +- } else { +- /*change handler*/ +- local->post_res_handler = shard_common_inode_write_post_resolve_handler; +- shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD); +- } ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(local->fop, frame, local->op_ret, ++ local->op_errno); + return 0; +-} +- +-int +-shard_mkdir_internal_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, inode_t *inode, +- struct iatt *buf, struct iatt *preparent, +- struct iatt *postparent, dict_t *xdata) +-{ +- inode_t *link_inode = NULL; +- shard_local_t *local = NULL; +- shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie; ++ } + +- local = frame->local; ++ if (local->create_count) { ++ shard_common_resume_mknod(frame, this, ++ shard_common_inode_write_post_mknod_handler); ++ } else { ++ shard_common_inode_write_do(frame, this); ++ } + +- SHARD_UNSET_ROOT_FS_ID(frame, local); ++ return 0; ++} + +- if (op_ret == -1) { +- if (op_errno != EEXIST) { +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- goto unwind; +- } else { +- gf_msg_debug(this->name, 0, +- "mkdir on %s failed " +- "with EEXIST. Attempting lookup now", +- shard_internal_dir_string(type)); +- shard_lookup_internal_dir(frame, this, local->post_res_handler, +- type); +- return 0; +- } +- } ++int shard_common_inode_write_post_mknod_handler(call_frame_t *frame, ++ xlator_t *this) { ++ shard_local_t *local = NULL; + +- link_inode = shard_link_internal_dir_inode(local, inode, buf, type); +- if (link_inode != inode) { +- shard_refresh_internal_dir(frame, this, type); +- } else { +- shard_inode_ctx_mark_dir_refreshed(link_inode, this); +- shard_common_resolve_shards(frame, this, local->post_res_handler); +- } +- return 0; +-unwind: +- shard_common_resolve_shards(frame, this, local->post_res_handler); +- return 0; +-} ++ local = frame->local; + +-int +-shard_mkdir_internal_dir(call_frame_t *frame, xlator_t *this, +- shard_post_resolve_fop_handler_t handler, +- shard_internal_dir_type_t type) +-{ +- int ret = -1; +- shard_local_t *local = NULL; +- shard_priv_t *priv = NULL; +- dict_t *xattr_req = NULL; +- uuid_t *gfid = NULL; +- loc_t *loc = NULL; +- gf_boolean_t free_gfid = _gf_true; +- +- local = frame->local; +- priv = this->private; +- +- local->post_res_handler = handler; +- gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t); +- if (!gfid) +- goto err; +- +- switch (type) { +- case SHARD_INTERNAL_DIR_DOT_SHARD: +- gf_uuid_copy(*gfid, priv->dot_shard_gfid); +- loc = &local->dot_shard_loc; +- break; +- case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: +- gf_uuid_copy(*gfid, priv->dot_shard_rm_gfid); +- loc = &local->dot_shard_rm_loc; +- break; +- default: +- bzero(*gfid, sizeof(uuid_t)); +- break; +- } ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(local->fop, frame, local->op_ret, ++ local->op_errno); ++ return 0; ++ } + +- xattr_req = dict_new(); +- if (!xattr_req) +- goto err; ++ if (!local->eexist_count) { ++ shard_common_inode_write_do(frame, this); ++ } else { ++ local->call_count = local->eexist_count; ++ shard_common_lookup_shards( ++ frame, this, local->loc.inode, ++ shard_common_inode_write_post_lookup_shards_handler); ++ } + +- ret = shard_init_internal_dir_loc(this, local, type); +- if (ret) +- goto err; ++ return 0; ++} + +- ret = dict_set_gfuuid(xattr_req, "gfid-req", *gfid, false); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, +- "Failed to set gfid-req for %s", +- shard_internal_dir_string(type)); +- goto err; +- } else { +- free_gfid = _gf_false; +- } ++int shard_common_inode_write_post_resolve_handler(call_frame_t *frame, ++ xlator_t *this) { ++ shard_local_t *local = NULL; + +- SHARD_SET_ROOT_FS_ID(frame, local); ++ local = frame->local; + +- STACK_WIND_COOKIE(frame, shard_mkdir_internal_dir_cbk, (void *)(long)type, +- FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir, loc, +- 0755, 0, xattr_req); +- dict_unref(xattr_req); ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(local->fop, frame, local->op_ret, ++ local->op_errno); + return 0; ++ } + +-err: +- if (xattr_req) +- dict_unref(xattr_req); +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- if (free_gfid) +- GF_FREE(gfid); +- handler(frame, this); +- return 0; +-} ++ if (local->call_count) { ++ shard_common_lookup_shards( ++ frame, this, local->resolver_base_inode, ++ shard_common_inode_write_post_lookup_shards_handler); ++ } else { ++ shard_common_inode_write_do(frame, this); ++ } + +-int +-shard_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, dict_t *xdata) +-{ +- /* To-Do: Wind flush on all shards of the file */ +- SHARD_STACK_UNWIND(flush, frame, op_ret, op_errno, xdata); +- return 0; ++ return 0; + } + +-int +-shard_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) +-{ +- STACK_WIND(frame, shard_flush_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->flush, fd, xdata); ++int shard_common_inode_write_post_lookup_handler(call_frame_t *frame, ++ xlator_t *this) { ++ shard_local_t *local = frame->local; ++ shard_priv_t *priv = this->private; ++ ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(local->fop, frame, local->op_ret, ++ local->op_errno); + return 0; +-} ++ } + +-int +-__shard_get_timestamps_from_inode_ctx(shard_local_t *local, inode_t *inode, +- xlator_t *this) +-{ +- int ret = -1; +- uint64_t ctx_uint = 0; +- shard_inode_ctx_t *ctx = NULL; ++ local->postbuf = local->prebuf; + +- ret = __inode_ctx_get(inode, this, &ctx_uint); +- if (ret < 0) +- return ret; ++ /*Adjust offset to EOF so that correct shard is chosen for append*/ ++ if (shard_is_appending_write(local)) ++ local->offset = local->prebuf.ia_size; + +- ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; ++ local->first_block = get_lowest_block(local->offset, local->block_size); ++ local->last_block = ++ get_highest_block(local->offset, local->total_size, local->block_size); ++ local->num_blocks = local->last_block - local->first_block + 1; ++ local->inode_list = ++ GF_CALLOC(local->num_blocks, sizeof(inode_t *), gf_shard_mt_inode_list); ++ if (!local->inode_list) { ++ shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); ++ return 0; ++ } + +- local->postbuf.ia_ctime = ctx->stat.ia_ctime; +- local->postbuf.ia_ctime_nsec = ctx->stat.ia_ctime_nsec; +- local->postbuf.ia_atime = ctx->stat.ia_atime; +- local->postbuf.ia_atime_nsec = ctx->stat.ia_atime_nsec; +- local->postbuf.ia_mtime = ctx->stat.ia_mtime; +- local->postbuf.ia_mtime_nsec = ctx->stat.ia_mtime_nsec; ++ gf_msg_trace( ++ this->name, 0, "%s: gfid=%s first_block=%" PRIu32 " " ++ "last_block=%" PRIu32 " num_blocks=%" PRIu32 ++ " offset=%" PRId64 " total_size=%zu flags=%" PRId32 "", ++ gf_fop_list[local->fop], uuid_utoa(local->resolver_base_inode->gfid), ++ local->first_block, local->last_block, local->num_blocks, local->offset, ++ local->total_size, local->flags); + +- return 0; +-} ++ local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid); + +-int +-shard_get_timestamps_from_inode_ctx(shard_local_t *local, inode_t *inode, +- xlator_t *this) +-{ +- int ret = 0; ++ if (!local->dot_shard_loc.inode) { ++ /*change handler*/ ++ shard_mkdir_internal_dir(frame, this, ++ shard_common_inode_write_post_resolve_handler, ++ SHARD_INTERNAL_DIR_DOT_SHARD); ++ } else { ++ /*change handler*/ ++ local->post_res_handler = shard_common_inode_write_post_resolve_handler; ++ shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD); ++ } ++ return 0; ++} + +- LOCK(&inode->lock); +- { +- ret = __shard_get_timestamps_from_inode_ctx(local, inode, this); +- } +- UNLOCK(&inode->lock); ++int shard_mkdir_internal_dir_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, ++ int32_t op_errno, inode_t *inode, ++ struct iatt *buf, struct iatt *preparent, ++ struct iatt *postparent, dict_t *xdata) { ++ inode_t *link_inode = NULL; ++ shard_local_t *local = NULL; ++ shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie; ++ ++ local = frame->local; ++ ++ SHARD_UNSET_ROOT_FS_ID(frame, local); ++ ++ if (op_ret == -1) { ++ if (op_errno != EEXIST) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ goto unwind; ++ } else { ++ gf_msg_debug(this->name, 0, "mkdir on %s failed " ++ "with EEXIST. Attempting lookup now", ++ shard_internal_dir_string(type)); ++ shard_lookup_internal_dir(frame, this, local->post_res_handler, type); ++ return 0; ++ } ++ } ++ ++ link_inode = shard_link_internal_dir_inode(local, inode, buf, type); ++ if (link_inode != inode) { ++ shard_refresh_internal_dir(frame, this, type); ++ } else { ++ shard_inode_ctx_mark_dir_refreshed(link_inode, this); ++ shard_common_resolve_shards(frame, this, local->post_res_handler); ++ } ++ return 0; ++unwind: ++ shard_common_resolve_shards(frame, this, local->post_res_handler); ++ return 0; ++} ++ ++int shard_mkdir_internal_dir(call_frame_t *frame, xlator_t *this, ++ shard_post_resolve_fop_handler_t handler, ++ shard_internal_dir_type_t type) { ++ int ret = -1; ++ shard_local_t *local = NULL; ++ shard_priv_t *priv = NULL; ++ dict_t *xattr_req = NULL; ++ uuid_t *gfid = NULL; ++ loc_t *loc = NULL; ++ gf_boolean_t free_gfid = _gf_true; ++ ++ local = frame->local; ++ priv = this->private; ++ ++ local->post_res_handler = handler; ++ gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t); ++ if (!gfid) ++ goto err; ++ ++ switch (type) { ++ case SHARD_INTERNAL_DIR_DOT_SHARD: ++ gf_uuid_copy(*gfid, priv->dot_shard_gfid); ++ loc = &local->dot_shard_loc; ++ break; ++ case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: ++ gf_uuid_copy(*gfid, priv->dot_shard_rm_gfid); ++ loc = &local->dot_shard_rm_loc; ++ break; ++ default: ++ bzero(*gfid, sizeof(uuid_t)); ++ break; ++ } ++ ++ xattr_req = dict_new(); ++ if (!xattr_req) ++ goto err; ++ ++ ret = shard_init_internal_dir_loc(this, local, type); ++ if (ret) ++ goto err; ++ ++ ret = dict_set_gfuuid(xattr_req, "gfid-req", *gfid, false); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to set gfid-req for %s", shard_internal_dir_string(type)); ++ goto err; ++ } else { ++ free_gfid = _gf_false; ++ } ++ ++ SHARD_SET_ROOT_FS_ID(frame, local); ++ ++ STACK_WIND_COOKIE(frame, shard_mkdir_internal_dir_cbk, (void *)(long)type, ++ FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir, loc, ++ 0755, 0, xattr_req); ++ dict_unref(xattr_req); ++ return 0; + +- return ret; ++err: ++ if (xattr_req) ++ dict_unref(xattr_req); ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ if (free_gfid) ++ GF_FREE(gfid); ++ handler(frame, this); ++ return 0; + } + +-int +-shard_fsync_shards_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, struct iatt *prebuf, +- struct iatt *postbuf, dict_t *xdata) +-{ +- int call_count = 0; +- uint64_t fsync_count = 0; +- fd_t *anon_fd = cookie; +- shard_local_t *local = NULL; +- shard_inode_ctx_t *ctx = NULL; +- shard_inode_ctx_t *base_ictx = NULL; +- inode_t *base_inode = NULL; +- gf_boolean_t unref_shard_inode = _gf_false; ++int shard_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, dict_t *xdata) { ++ /* To-Do: Wind flush on all shards of the file */ ++ SHARD_STACK_UNWIND(flush, frame, op_ret, op_errno, xdata); ++ return 0; ++} + +- local = frame->local; +- base_inode = local->fd->inode; ++int shard_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) { ++ STACK_WIND(frame, shard_flush_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->flush, fd, xdata); ++ return 0; ++} + +- if (local->op_ret < 0) +- goto out; ++int __shard_get_timestamps_from_inode_ctx(shard_local_t *local, inode_t *inode, ++ xlator_t *this) { ++ int ret = -1; ++ uint64_t ctx_uint = 0; ++ shard_inode_ctx_t *ctx = NULL; + +- LOCK(&frame->lock); +- { +- if (op_ret < 0) { +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- UNLOCK(&frame->lock); +- goto out; +- } +- shard_inode_ctx_set(local->fd->inode, this, postbuf, 0, +- SHARD_MASK_TIMES); +- } +- UNLOCK(&frame->lock); +- fd_ctx_get(anon_fd, this, &fsync_count); +-out: +- if (anon_fd && (base_inode != anon_fd->inode)) { +- LOCK(&base_inode->lock); +- LOCK(&anon_fd->inode->lock); +- { +- __shard_inode_ctx_get(anon_fd->inode, this, &ctx); +- __shard_inode_ctx_get(base_inode, this, &base_ictx); +- if (op_ret == 0) +- ctx->fsync_needed -= fsync_count; +- GF_ASSERT(ctx->fsync_needed >= 0); +- if (ctx->fsync_needed != 0) { +- list_add_tail(&ctx->to_fsync_list, &base_ictx->to_fsync_list); +- base_ictx->fsync_count++; +- } else { +- unref_shard_inode = _gf_true; +- } +- } +- UNLOCK(&anon_fd->inode->lock); +- UNLOCK(&base_inode->lock); +- } ++ ret = __inode_ctx_get(inode, this, &ctx_uint); ++ if (ret < 0) ++ return ret; + +- if (unref_shard_inode) +- inode_unref(anon_fd->inode); +- if (anon_fd) +- fd_unref(anon_fd); ++ ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; + +- call_count = shard_call_count_return(frame); +- if (call_count != 0) +- return 0; ++ local->postbuf.ia_ctime = ctx->stat.ia_ctime; ++ local->postbuf.ia_ctime_nsec = ctx->stat.ia_ctime_nsec; ++ local->postbuf.ia_atime = ctx->stat.ia_atime; ++ local->postbuf.ia_atime_nsec = ctx->stat.ia_atime_nsec; ++ local->postbuf.ia_mtime = ctx->stat.ia_mtime; ++ local->postbuf.ia_mtime_nsec = ctx->stat.ia_mtime_nsec; + +- if (local->op_ret < 0) { +- shard_common_failure_unwind(GF_FOP_FSYNC, frame, local->op_ret, +- local->op_errno); +- } else { +- shard_get_timestamps_from_inode_ctx(local, base_inode, this); +- SHARD_STACK_UNWIND(fsync, frame, local->op_ret, local->op_errno, +- &local->prebuf, &local->postbuf, local->xattr_rsp); +- } +- return 0; ++ return 0; + } + +-int +-shard_post_lookup_fsync_handler(call_frame_t *frame, xlator_t *this) +-{ +- int ret = 0; +- int call_count = 0; +- int fsync_count = 0; +- fd_t *anon_fd = NULL; +- inode_t *base_inode = NULL; +- shard_local_t *local = NULL; +- shard_inode_ctx_t *ctx = NULL; +- shard_inode_ctx_t *iter = NULL; +- struct list_head copy = { +- 0, +- }; +- shard_inode_ctx_t *tmp = NULL; ++int shard_get_timestamps_from_inode_ctx(shard_local_t *local, inode_t *inode, ++ xlator_t *this) { ++ int ret = 0; + +- local = frame->local; +- base_inode = local->fd->inode; +- local->postbuf = local->prebuf; +- INIT_LIST_HEAD(©); ++ LOCK(&inode->lock); ++ { ret = __shard_get_timestamps_from_inode_ctx(local, inode, this); } ++ UNLOCK(&inode->lock); + +- if (local->op_ret < 0) { +- shard_common_failure_unwind(GF_FOP_FSYNC, frame, local->op_ret, +- local->op_errno); +- return 0; +- } ++ return ret; ++} + ++int shard_fsync_shards_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, ++ struct iatt *prebuf, struct iatt *postbuf, ++ dict_t *xdata) { ++ int call_count = 0; ++ uint64_t fsync_count = 0; ++ fd_t *anon_fd = cookie; ++ shard_local_t *local = NULL; ++ shard_inode_ctx_t *ctx = NULL; ++ shard_inode_ctx_t *base_ictx = NULL; ++ inode_t *base_inode = NULL; ++ gf_boolean_t unref_shard_inode = _gf_false; ++ ++ local = frame->local; ++ base_inode = local->fd->inode; ++ ++ if (local->op_ret < 0) ++ goto out; ++ ++ LOCK(&frame->lock); ++ { ++ if (op_ret < 0) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ UNLOCK(&frame->lock); ++ goto out; ++ } ++ shard_inode_ctx_set(local->fd->inode, this, postbuf, 0, SHARD_MASK_TIMES); ++ } ++ UNLOCK(&frame->lock); ++ fd_ctx_get(anon_fd, this, &fsync_count); ++out: ++ if (anon_fd && (base_inode != anon_fd->inode)) { + LOCK(&base_inode->lock); ++ LOCK(&anon_fd->inode->lock); + { +- __shard_inode_ctx_get(base_inode, this, &ctx); +- list_splice_init(&ctx->to_fsync_list, ©); +- call_count = ctx->fsync_count; +- ctx->fsync_count = 0; +- } ++ __shard_inode_ctx_get(anon_fd->inode, this, &ctx); ++ __shard_inode_ctx_get(base_inode, this, &base_ictx); ++ if (op_ret == 0) ++ ctx->fsync_needed -= fsync_count; ++ GF_ASSERT(ctx->fsync_needed >= 0); ++ if (ctx->fsync_needed != 0) { ++ list_add_tail(&ctx->to_fsync_list, &base_ictx->to_fsync_list); ++ base_ictx->fsync_count++; ++ } else { ++ unref_shard_inode = _gf_true; ++ } ++ } ++ UNLOCK(&anon_fd->inode->lock); + UNLOCK(&base_inode->lock); ++ } ++ ++ if (unref_shard_inode) ++ inode_unref(anon_fd->inode); ++ if (anon_fd) ++ fd_unref(anon_fd); ++ ++ call_count = shard_call_count_return(frame); ++ if (call_count != 0) ++ return 0; + +- local->call_count = ++call_count; ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(GF_FOP_FSYNC, frame, local->op_ret, ++ local->op_errno); ++ } else { ++ shard_get_timestamps_from_inode_ctx(local, base_inode, this); ++ SHARD_STACK_UNWIND(fsync, frame, local->op_ret, local->op_errno, ++ &local->prebuf, &local->postbuf, local->xattr_rsp); ++ } ++ return 0; ++} ++ ++int shard_post_lookup_fsync_handler(call_frame_t *frame, xlator_t *this) { ++ int ret = 0; ++ int call_count = 0; ++ int fsync_count = 0; ++ fd_t *anon_fd = NULL; ++ inode_t *base_inode = NULL; ++ shard_local_t *local = NULL; ++ shard_inode_ctx_t *ctx = NULL; ++ shard_inode_ctx_t *iter = NULL; ++ struct list_head copy = { ++ 0, ++ }; ++ shard_inode_ctx_t *tmp = NULL; ++ ++ local = frame->local; ++ base_inode = local->fd->inode; ++ local->postbuf = local->prebuf; ++ INIT_LIST_HEAD(©); ++ ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(GF_FOP_FSYNC, frame, local->op_ret, ++ local->op_errno); ++ return 0; ++ } ++ ++ LOCK(&base_inode->lock); ++ { ++ __shard_inode_ctx_get(base_inode, this, &ctx); ++ list_splice_init(&ctx->to_fsync_list, ©); ++ call_count = ctx->fsync_count; ++ ctx->fsync_count = 0; ++ } ++ UNLOCK(&base_inode->lock); ++ ++ local->call_count = ++call_count; ++ ++ /* Send fsync() on the base shard first */ ++ anon_fd = fd_ref(local->fd); ++ STACK_WIND_COOKIE(frame, shard_fsync_shards_cbk, anon_fd, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fsync, anon_fd, local->datasync, ++ local->xattr_req); ++ call_count--; ++ anon_fd = NULL; ++ ++ list_for_each_entry_safe(iter, tmp, ©, to_fsync_list) { ++ list_del_init(&iter->to_fsync_list); ++ fsync_count = 0; ++ shard_inode_ctx_get_fsync_count(iter->inode, this, &fsync_count); ++ GF_ASSERT(fsync_count > 0); ++ anon_fd = fd_anonymous(iter->inode); ++ if (!anon_fd) { ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, ++ "Failed to create " ++ "anon fd to fsync shard"); ++ shard_fsync_shards_cbk(frame, (void *)(long)anon_fd, this, -1, ENOMEM, ++ NULL, NULL, NULL); ++ continue; ++ } + +- /* Send fsync() on the base shard first */ +- anon_fd = fd_ref(local->fd); ++ ret = fd_ctx_set(anon_fd, this, fsync_count); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_FD_CTX_SET_FAILED, ++ "Failed to set fd " ++ "ctx for shard inode gfid=%s", ++ uuid_utoa(iter->inode->gfid)); ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ shard_fsync_shards_cbk(frame, (void *)(long)anon_fd, this, -1, ENOMEM, ++ NULL, NULL, NULL); ++ continue; ++ } + STACK_WIND_COOKIE(frame, shard_fsync_shards_cbk, anon_fd, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsync, anon_fd, local->datasync, + local->xattr_req); + call_count--; +- anon_fd = NULL; +- +- list_for_each_entry_safe(iter, tmp, ©, to_fsync_list) +- { +- list_del_init(&iter->to_fsync_list); +- fsync_count = 0; +- shard_inode_ctx_get_fsync_count(iter->inode, this, &fsync_count); +- GF_ASSERT(fsync_count > 0); +- anon_fd = fd_anonymous(iter->inode); +- if (!anon_fd) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- gf_msg(this->name, GF_LOG_WARNING, ENOMEM, +- SHARD_MSG_MEMALLOC_FAILED, +- "Failed to create " +- "anon fd to fsync shard"); +- shard_fsync_shards_cbk(frame, (void *)(long)anon_fd, this, -1, +- ENOMEM, NULL, NULL, NULL); +- continue; +- } +- +- ret = fd_ctx_set(anon_fd, this, fsync_count); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_FD_CTX_SET_FAILED, +- "Failed to set fd " +- "ctx for shard inode gfid=%s", +- uuid_utoa(iter->inode->gfid)); +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- shard_fsync_shards_cbk(frame, (void *)(long)anon_fd, this, -1, +- ENOMEM, NULL, NULL, NULL); +- continue; +- } +- STACK_WIND_COOKIE(frame, shard_fsync_shards_cbk, anon_fd, +- FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsync, +- anon_fd, local->datasync, local->xattr_req); +- call_count--; +- } ++ } + +- return 0; ++ return 0; + } + +-int +-shard_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, +- dict_t *xdata) +-{ +- int ret = 0; +- uint64_t block_size = 0; +- shard_local_t *local = NULL; ++int shard_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, ++ dict_t *xdata) { ++ int ret = 0; ++ uint64_t block_size = 0; ++ shard_local_t *local = NULL; + +- ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, +- "Failed to get block " +- "size for %s from its inode ctx", +- uuid_utoa(fd->inode->gfid)); +- goto err; +- } ++ ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, ++ "Failed to get block " ++ "size for %s from its inode ctx", ++ uuid_utoa(fd->inode->gfid)); ++ goto err; ++ } + +- if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { +- STACK_WIND(frame, default_fsync_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->fsync, fd, datasync, xdata); +- return 0; +- } ++ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { ++ STACK_WIND(frame, default_fsync_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fsync, fd, datasync, xdata); ++ return 0; ++ } + +- if (!this->itable) +- this->itable = fd->inode->table; ++ if (!this->itable) ++ this->itable = fd->inode->table; + +- local = mem_get0(this->local_pool); +- if (!local) +- goto err; ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; + +- frame->local = local; ++ frame->local = local; + +- local->fd = fd_ref(fd); +- local->fop = GF_FOP_FSYNC; +- local->datasync = datasync; +- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); +- if (!local->xattr_req) +- goto err; ++ local->fd = fd_ref(fd); ++ local->fop = GF_FOP_FSYNC; ++ local->datasync = datasync; ++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); ++ if (!local->xattr_req) ++ goto err; + +- local->loc.inode = inode_ref(fd->inode); +- gf_uuid_copy(local->loc.gfid, fd->inode->gfid); ++ local->loc.inode = inode_ref(fd->inode); ++ gf_uuid_copy(local->loc.gfid, fd->inode->gfid); + +- shard_lookup_base_file(frame, this, &local->loc, +- shard_post_lookup_fsync_handler); +- return 0; ++ shard_lookup_base_file(frame, this, &local->loc, ++ shard_post_lookup_fsync_handler); ++ return 0; + err: +- shard_common_failure_unwind(GF_FOP_FSYNC, frame, -1, ENOMEM); +- return 0; ++ shard_common_failure_unwind(GF_FOP_FSYNC, frame, -1, ENOMEM); ++ return 0; + } + +-int +-shard_readdir_past_dot_shard_cbk(call_frame_t *frame, void *cookie, +- xlator_t *this, int32_t op_ret, +- int32_t op_errno, gf_dirent_t *orig_entries, +- dict_t *xdata) +-{ +- gf_dirent_t *entry = NULL; +- gf_dirent_t *tmp = NULL; +- shard_local_t *local = NULL; ++int shard_readdir_past_dot_shard_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, ++ int32_t op_errno, ++ gf_dirent_t *orig_entries, dict_t *xdata) { ++ gf_dirent_t *entry = NULL; ++ gf_dirent_t *tmp = NULL; ++ shard_local_t *local = NULL; + +- local = frame->local; ++ local = frame->local; + +- if (op_ret < 0) +- goto unwind; ++ if (op_ret < 0) ++ goto unwind; + +- list_for_each_entry_safe(entry, tmp, (&orig_entries->list), list) +- { +- list_del_init(&entry->list); +- list_add_tail(&entry->list, &local->entries_head.list); ++ list_for_each_entry_safe(entry, tmp, (&orig_entries->list), list) { ++ list_del_init(&entry->list); ++ list_add_tail(&entry->list, &local->entries_head.list); + +- if (!entry->dict) +- continue; ++ if (!entry->dict) ++ continue; + +- if (IA_ISDIR(entry->d_stat.ia_type)) +- continue; ++ if (IA_ISDIR(entry->d_stat.ia_type)) ++ continue; + +- if (dict_get(entry->dict, GF_XATTR_SHARD_FILE_SIZE)) +- shard_modify_size_and_block_count(&entry->d_stat, entry->dict); +- if (!entry->inode) +- continue; ++ if (dict_get(entry->dict, GF_XATTR_SHARD_FILE_SIZE)) ++ shard_modify_size_and_block_count(&entry->d_stat, entry->dict); ++ if (!entry->inode) ++ continue; + +- shard_inode_ctx_update(entry->inode, this, entry->dict, &entry->d_stat); +- } +- local->op_ret += op_ret; ++ shard_inode_ctx_update(entry->inode, this, entry->dict, &entry->d_stat); ++ } ++ local->op_ret += op_ret; + + unwind: +- if (local->fop == GF_FOP_READDIR) +- SHARD_STACK_UNWIND(readdir, frame, local->op_ret, local->op_errno, +- &local->entries_head, xdata); +- else +- SHARD_STACK_UNWIND(readdirp, frame, op_ret, op_errno, +- &local->entries_head, xdata); +- return 0; ++ if (local->fop == GF_FOP_READDIR) ++ SHARD_STACK_UNWIND(readdir, frame, local->op_ret, local->op_errno, ++ &local->entries_head, xdata); ++ else ++ SHARD_STACK_UNWIND(readdirp, frame, op_ret, op_errno, &local->entries_head, ++ xdata); ++ return 0; + } + +-int32_t +-shard_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, gf_dirent_t *orig_entries, +- dict_t *xdata) +-{ +- fd_t *fd = NULL; +- gf_dirent_t *entry = NULL; +- gf_dirent_t *tmp = NULL; +- shard_local_t *local = NULL; +- gf_boolean_t last_entry = _gf_false; ++int32_t shard_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, ++ gf_dirent_t *orig_entries, dict_t *xdata) { ++ fd_t *fd = NULL; ++ gf_dirent_t *entry = NULL; ++ gf_dirent_t *tmp = NULL; ++ shard_local_t *local = NULL; ++ gf_boolean_t last_entry = _gf_false; + +- local = frame->local; +- fd = local->fd; ++ local = frame->local; ++ fd = local->fd; + +- if (op_ret < 0) +- goto unwind; ++ if (op_ret < 0) ++ goto unwind; + +- list_for_each_entry_safe(entry, tmp, (&orig_entries->list), list) +- { +- if (last_entry) +- last_entry = _gf_false; +- +- if (__is_root_gfid(fd->inode->gfid) && +- !(strcmp(entry->d_name, GF_SHARD_DIR))) { +- local->offset = entry->d_off; +- op_ret--; +- last_entry = _gf_true; +- continue; +- } ++ list_for_each_entry_safe(entry, tmp, (&orig_entries->list), list) { ++ if (last_entry) ++ last_entry = _gf_false; + +- list_del_init(&entry->list); +- list_add_tail(&entry->list, &local->entries_head.list); ++ if (__is_root_gfid(fd->inode->gfid) && ++ !(strcmp(entry->d_name, GF_SHARD_DIR))) { ++ local->offset = entry->d_off; ++ op_ret--; ++ last_entry = _gf_true; ++ continue; ++ } + +- if (!entry->dict) +- continue; ++ list_del_init(&entry->list); ++ list_add_tail(&entry->list, &local->entries_head.list); + +- if (IA_ISDIR(entry->d_stat.ia_type)) +- continue; ++ if (!entry->dict) ++ continue; + +- if (dict_get(entry->dict, GF_XATTR_SHARD_FILE_SIZE) && +- frame->root->pid != GF_CLIENT_PID_GSYNCD) +- shard_modify_size_and_block_count(&entry->d_stat, entry->dict); ++ if (IA_ISDIR(entry->d_stat.ia_type)) ++ continue; + +- if (!entry->inode) +- continue; ++ if (dict_get(entry->dict, GF_XATTR_SHARD_FILE_SIZE) && ++ frame->root->pid != GF_CLIENT_PID_GSYNCD) ++ shard_modify_size_and_block_count(&entry->d_stat, entry->dict); + +- shard_inode_ctx_update(entry->inode, this, entry->dict, &entry->d_stat); +- } ++ if (!entry->inode) ++ continue; + +- local->op_ret = op_ret; ++ shard_inode_ctx_update(entry->inode, this, entry->dict, &entry->d_stat); ++ } + +- if (last_entry) { +- if (local->fop == GF_FOP_READDIR) +- STACK_WIND(frame, shard_readdir_past_dot_shard_cbk, +- FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdir, +- local->fd, local->readdir_size, local->offset, +- local->xattr_req); +- else +- STACK_WIND(frame, shard_readdir_past_dot_shard_cbk, +- FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdirp, +- local->fd, local->readdir_size, local->offset, +- local->xattr_req); +- return 0; +- } ++ local->op_ret = op_ret; + +-unwind: ++ if (last_entry) { + if (local->fop == GF_FOP_READDIR) +- SHARD_STACK_UNWIND(readdir, frame, op_ret, op_errno, +- &local->entries_head, xdata); ++ STACK_WIND(frame, shard_readdir_past_dot_shard_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->readdir, local->fd, ++ local->readdir_size, local->offset, local->xattr_req); + else +- SHARD_STACK_UNWIND(readdirp, frame, op_ret, op_errno, +- &local->entries_head, xdata); ++ STACK_WIND(frame, shard_readdir_past_dot_shard_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->readdirp, local->fd, ++ local->readdir_size, local->offset, local->xattr_req); + return 0; +-} ++ } + +-int +-shard_readdir_do(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, +- off_t offset, int whichop, dict_t *xdata) +-{ +- int ret = 0; +- shard_local_t *local = NULL; +- +- local = mem_get0(this->local_pool); +- if (!local) { +- goto err; ++unwind: ++ if (local->fop == GF_FOP_READDIR) ++ SHARD_STACK_UNWIND(readdir, frame, op_ret, op_errno, &local->entries_head, ++ xdata); ++ else ++ SHARD_STACK_UNWIND(readdirp, frame, op_ret, op_errno, &local->entries_head, ++ xdata); ++ return 0; ++} ++ ++int shard_readdir_do(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, ++ off_t offset, int whichop, dict_t *xdata) { ++ int ret = 0; ++ shard_local_t *local = NULL; ++ ++ local = mem_get0(this->local_pool); ++ if (!local) { ++ goto err; ++ } ++ ++ frame->local = local; ++ ++ local->fd = fd_ref(fd); ++ local->fop = whichop; ++ local->readdir_size = size; ++ INIT_LIST_HEAD(&local->entries_head.list); ++ local->list_inited = _gf_true; ++ ++ if (whichop == GF_FOP_READDIR) { ++ STACK_WIND(frame, shard_readdir_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->readdir, fd, size, offset, xdata); ++ } else { ++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); ++ SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid, ++ local, err); ++ ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0); ++ if (ret) { ++ gf_log(this->name, GF_LOG_WARNING, ++ "Failed to set " ++ "dict value: key:%s, directory gfid=%s", ++ GF_XATTR_SHARD_BLOCK_SIZE, uuid_utoa(fd->inode->gfid)); ++ goto err; + } + +- frame->local = local; +- +- local->fd = fd_ref(fd); +- local->fop = whichop; +- local->readdir_size = size; +- INIT_LIST_HEAD(&local->entries_head.list); +- local->list_inited = _gf_true; +- +- if (whichop == GF_FOP_READDIR) { +- STACK_WIND(frame, shard_readdir_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->readdir, fd, size, offset, xdata); +- } else { +- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); +- SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid, +- local, err); +- ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0); +- if (ret) { +- gf_log(this->name, GF_LOG_WARNING, +- "Failed to set " +- "dict value: key:%s, directory gfid=%s", +- GF_XATTR_SHARD_BLOCK_SIZE, uuid_utoa(fd->inode->gfid)); +- goto err; +- } +- +- STACK_WIND(frame, shard_readdir_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->readdirp, fd, size, offset, +- local->xattr_req); +- } ++ STACK_WIND(frame, shard_readdir_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->readdirp, fd, size, offset, ++ local->xattr_req); ++ } + +- return 0; ++ return 0; + + err: +- STACK_UNWIND_STRICT(readdir, frame, -1, ENOMEM, NULL, NULL); +- return 0; ++ STACK_UNWIND_STRICT(readdir, frame, -1, ENOMEM, NULL, NULL); ++ return 0; + } + +-int32_t +-shard_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, +- off_t offset, dict_t *xdata) +-{ +- shard_readdir_do(frame, this, fd, size, offset, GF_FOP_READDIR, xdata); +- return 0; ++int32_t shard_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, ++ size_t size, off_t offset, dict_t *xdata) { ++ shard_readdir_do(frame, this, fd, size, offset, GF_FOP_READDIR, xdata); ++ return 0; + } + +-int32_t +-shard_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, +- off_t offset, dict_t *xdata) +-{ +- shard_readdir_do(frame, this, fd, size, offset, GF_FOP_READDIRP, xdata); +- return 0; ++int32_t shard_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, ++ size_t size, off_t offset, dict_t *xdata) { ++ shard_readdir_do(frame, this, fd, size, offset, GF_FOP_READDIRP, xdata); ++ return 0; + } + +-int32_t +-shard_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, +- const char *name, dict_t *xdata) +-{ +- int op_errno = EINVAL; ++int32_t shard_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, ++ const char *name, dict_t *xdata) { ++ int op_errno = EINVAL; + +- if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { +- GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, out); +- } ++ if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { ++ GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, out); ++ } + +- if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { +- dict_del(xdata, GF_XATTR_SHARD_BLOCK_SIZE); +- dict_del(xdata, GF_XATTR_SHARD_FILE_SIZE); +- } ++ if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { ++ dict_del(xdata, GF_XATTR_SHARD_BLOCK_SIZE); ++ dict_del(xdata, GF_XATTR_SHARD_FILE_SIZE); ++ } + +- STACK_WIND_TAIL(frame, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->removexattr, loc, name, xdata); +- return 0; ++ STACK_WIND_TAIL(frame, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->removexattr, loc, name, xdata); ++ return 0; + out: +- shard_common_failure_unwind(GF_FOP_REMOVEXATTR, frame, -1, op_errno); +- return 0; ++ shard_common_failure_unwind(GF_FOP_REMOVEXATTR, frame, -1, op_errno); ++ return 0; + } + +-int32_t +-shard_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, +- const char *name, dict_t *xdata) +-{ +- int op_errno = EINVAL; ++int32_t shard_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, ++ const char *name, dict_t *xdata) { ++ int op_errno = EINVAL; + +- if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { +- GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, out); +- } ++ if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { ++ GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, out); ++ } + +- if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { +- dict_del(xdata, GF_XATTR_SHARD_BLOCK_SIZE); +- dict_del(xdata, GF_XATTR_SHARD_FILE_SIZE); +- } ++ if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { ++ dict_del(xdata, GF_XATTR_SHARD_BLOCK_SIZE); ++ dict_del(xdata, GF_XATTR_SHARD_FILE_SIZE); ++ } + +- STACK_WIND_TAIL(frame, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata); +- return 0; ++ STACK_WIND_TAIL(frame, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata); ++ return 0; + out: +- shard_common_failure_unwind(GF_FOP_FREMOVEXATTR, frame, -1, op_errno); +- return 0; ++ shard_common_failure_unwind(GF_FOP_FREMOVEXATTR, frame, -1, op_errno); ++ return 0; + } + +-int32_t +-shard_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, dict_t *dict, +- dict_t *xdata) +-{ +- if (op_ret < 0) +- goto unwind; ++int32_t shard_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, dict_t *dict, ++ dict_t *xdata) { ++ if (op_ret < 0) ++ goto unwind; + +- if (dict && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { +- dict_del(dict, GF_XATTR_SHARD_BLOCK_SIZE); +- dict_del(dict, GF_XATTR_SHARD_FILE_SIZE); +- } ++ if (dict && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { ++ dict_del(dict, GF_XATTR_SHARD_BLOCK_SIZE); ++ dict_del(dict, GF_XATTR_SHARD_FILE_SIZE); ++ } + + unwind: +- SHARD_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, dict, xdata); +- return 0; ++ SHARD_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, dict, xdata); ++ return 0; + } + +-int32_t +-shard_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, +- dict_t *xdata) +-{ +- int op_errno = EINVAL; ++int32_t shard_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, ++ const char *name, dict_t *xdata) { ++ int op_errno = EINVAL; + +- if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) && (name) && +- (!strncmp(name, SHARD_XATTR_PREFIX, SLEN(SHARD_XATTR_PREFIX)))) { +- op_errno = ENODATA; +- goto out; +- } ++ if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) && (name) && ++ (!strncmp(name, SHARD_XATTR_PREFIX, SLEN(SHARD_XATTR_PREFIX)))) { ++ op_errno = ENODATA; ++ goto out; ++ } + +- STACK_WIND(frame, shard_fgetxattr_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata); +- return 0; ++ STACK_WIND(frame, shard_fgetxattr_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata); ++ return 0; + out: +- shard_common_failure_unwind(GF_FOP_FGETXATTR, frame, -1, op_errno); +- return 0; ++ shard_common_failure_unwind(GF_FOP_FGETXATTR, frame, -1, op_errno); ++ return 0; + } + +-int32_t +-shard_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, dict_t *dict, +- dict_t *xdata) +-{ +- if (op_ret < 0) +- goto unwind; ++int32_t shard_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, dict_t *dict, ++ dict_t *xdata) { ++ if (op_ret < 0) ++ goto unwind; + +- if (dict && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { +- dict_del(dict, GF_XATTR_SHARD_BLOCK_SIZE); +- dict_del(dict, GF_XATTR_SHARD_FILE_SIZE); +- } ++ if (dict && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { ++ dict_del(dict, GF_XATTR_SHARD_BLOCK_SIZE); ++ dict_del(dict, GF_XATTR_SHARD_FILE_SIZE); ++ } + + unwind: +- SHARD_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, xdata); +- return 0; ++ SHARD_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, xdata); ++ return 0; + } + +-int32_t +-shard_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, +- const char *name, dict_t *xdata) +-{ +- int op_errno = EINVAL; ++int32_t shard_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, ++ const char *name, dict_t *xdata) { ++ int op_errno = EINVAL; + +- if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) && (name) && +- (!strncmp(name, SHARD_XATTR_PREFIX, sizeof(SHARD_XATTR_PREFIX) - 1))) { +- op_errno = ENODATA; +- goto out; +- } ++ if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) && (name) && ++ (!strncmp(name, SHARD_XATTR_PREFIX, sizeof(SHARD_XATTR_PREFIX) - 1))) { ++ op_errno = ENODATA; ++ goto out; ++ } + +- STACK_WIND(frame, shard_getxattr_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->getxattr, loc, name, xdata); +- return 0; ++ STACK_WIND(frame, shard_getxattr_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->getxattr, loc, name, xdata); ++ return 0; + out: +- shard_common_failure_unwind(GF_FOP_GETXATTR, frame, -1, op_errno); +- return 0; ++ shard_common_failure_unwind(GF_FOP_GETXATTR, frame, -1, op_errno); ++ return 0; + } + +-int32_t +-shard_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, +- int32_t flags, dict_t *xdata) +-{ +- int op_errno = EINVAL; ++int32_t shard_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, ++ dict_t *dict, int32_t flags, dict_t *xdata) { ++ int op_errno = EINVAL; + +- if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { +- GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, out); +- } ++ if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { ++ GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, out); ++ } + +- STACK_WIND_TAIL(frame, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata); +- return 0; ++ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsetxattr, ++ fd, dict, flags, xdata); ++ return 0; + out: +- shard_common_failure_unwind(GF_FOP_FSETXATTR, frame, -1, op_errno); +- return 0; ++ shard_common_failure_unwind(GF_FOP_FSETXATTR, frame, -1, op_errno); ++ return 0; + } + +-int32_t +-shard_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, +- int32_t flags, dict_t *xdata) +-{ +- int op_errno = EINVAL; ++int32_t shard_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, ++ dict_t *dict, int32_t flags, dict_t *xdata) { ++ int op_errno = EINVAL; + +- if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { +- GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, out); +- } ++ if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { ++ GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, out); ++ } + +- STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr, +- loc, dict, flags, xdata); +- return 0; ++ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr, ++ loc, dict, flags, xdata); ++ return 0; + out: +- shard_common_failure_unwind(GF_FOP_SETXATTR, frame, -1, op_errno); +- return 0; ++ shard_common_failure_unwind(GF_FOP_SETXATTR, frame, -1, op_errno); ++ return 0; + } + +-int +-shard_post_setattr_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (local->fop == GF_FOP_SETATTR) { +- if (local->op_ret >= 0) +- shard_inode_ctx_set(local->loc.inode, this, &local->postbuf, 0, +- SHARD_LOOKUP_MASK); +- SHARD_STACK_UNWIND(setattr, frame, local->op_ret, local->op_errno, +- &local->prebuf, &local->postbuf, local->xattr_rsp); +- } else if (local->fop == GF_FOP_FSETATTR) { +- if (local->op_ret >= 0) +- shard_inode_ctx_set(local->fd->inode, this, &local->postbuf, 0, +- SHARD_LOOKUP_MASK); +- SHARD_STACK_UNWIND(fsetattr, frame, local->op_ret, local->op_errno, +- &local->prebuf, &local->postbuf, local->xattr_rsp); +- } +- +- return 0; +-} ++int shard_post_setattr_handler(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = NULL; + +-int +-shard_common_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, struct iatt *prebuf, +- struct iatt *postbuf, dict_t *xdata) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (op_ret < 0) { +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- goto unwind; +- } ++ local = frame->local; + +- local->prebuf = *prebuf; +- if (shard_modify_size_and_block_count(&local->prebuf, xdata)) { +- local->op_ret = -1; +- local->op_errno = EINVAL; +- goto unwind; +- } +- if (xdata) +- local->xattr_rsp = dict_ref(xdata); +- local->postbuf = *postbuf; +- local->postbuf.ia_size = local->prebuf.ia_size; +- local->postbuf.ia_blocks = local->prebuf.ia_blocks; ++ if (local->fop == GF_FOP_SETATTR) { ++ if (local->op_ret >= 0) ++ shard_inode_ctx_set(local->loc.inode, this, &local->postbuf, 0, ++ SHARD_LOOKUP_MASK); ++ SHARD_STACK_UNWIND(setattr, frame, local->op_ret, local->op_errno, ++ &local->prebuf, &local->postbuf, local->xattr_rsp); ++ } else if (local->fop == GF_FOP_FSETATTR) { ++ if (local->op_ret >= 0) ++ shard_inode_ctx_set(local->fd->inode, this, &local->postbuf, 0, ++ SHARD_LOOKUP_MASK); ++ SHARD_STACK_UNWIND(fsetattr, frame, local->op_ret, local->op_errno, ++ &local->prebuf, &local->postbuf, local->xattr_rsp); ++ } + +-unwind: +- local->handler(frame, this); +- return 0; ++ return 0; + } + +-int +-shard_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, +- struct iatt *stbuf, int32_t valid, dict_t *xdata) +-{ +- int ret = -1; +- uint64_t block_size = 0; +- shard_local_t *local = NULL; +- +- if ((IA_ISDIR(loc->inode->ia_type)) || (IA_ISLNK(loc->inode->ia_type))) { +- STACK_WIND(frame, default_setattr_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata); +- return 0; +- } +- +- ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, +- "Failed to get block size from inode ctx of %s", +- uuid_utoa(loc->inode->gfid)); +- goto err; +- } +- +- if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { +- STACK_WIND(frame, default_setattr_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata); +- return 0; +- } +- +- local = mem_get0(this->local_pool); +- if (!local) +- goto err; ++int shard_common_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, ++ struct iatt *prebuf, struct iatt *postbuf, ++ dict_t *xdata) { ++ shard_local_t *local = NULL; + +- frame->local = local; ++ local = frame->local; + +- local->handler = shard_post_setattr_handler; +- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); +- if (!local->xattr_req) +- goto err; +- local->fop = GF_FOP_SETATTR; +- loc_copy(&local->loc, loc); ++ if (op_ret < 0) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ goto unwind; ++ } + +- SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, local->loc.gfid, +- local, err); ++ local->prebuf = *prebuf; ++ if (shard_modify_size_and_block_count(&local->prebuf, xdata)) { ++ local->op_ret = -1; ++ local->op_errno = EINVAL; ++ goto unwind; ++ } ++ if (xdata) ++ local->xattr_rsp = dict_ref(xdata); ++ local->postbuf = *postbuf; ++ local->postbuf.ia_size = local->prebuf.ia_size; ++ local->postbuf.ia_blocks = local->prebuf.ia_blocks; + +- STACK_WIND(frame, shard_common_setattr_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, +- local->xattr_req); +- return 0; +-err: +- shard_common_failure_unwind(GF_FOP_SETATTR, frame, -1, ENOMEM); +- return 0; ++unwind: ++ local->handler(frame, this); ++ return 0; + } + +-int +-shard_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, +- struct iatt *stbuf, int32_t valid, dict_t *xdata) +-{ +- int ret = -1; +- uint64_t block_size = 0; +- shard_local_t *local = NULL; +- +- if ((IA_ISDIR(fd->inode->ia_type)) || (IA_ISLNK(fd->inode->ia_type))) { +- STACK_WIND(frame, default_fsetattr_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata); +- return 0; +- } ++int shard_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, ++ struct iatt *stbuf, int32_t valid, dict_t *xdata) { ++ int ret = -1; ++ uint64_t block_size = 0; ++ shard_local_t *local = NULL; + +- ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, +- "Failed to get block size from inode ctx of %s", +- uuid_utoa(fd->inode->gfid)); +- goto err; +- } ++ if ((IA_ISDIR(loc->inode->ia_type)) || (IA_ISLNK(loc->inode->ia_type))) { ++ STACK_WIND(frame, default_setattr_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata); ++ return 0; ++ } + +- if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { +- STACK_WIND(frame, default_fsetattr_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata); +- return 0; +- } ++ ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, ++ "Failed to get block size from inode ctx of %s", ++ uuid_utoa(loc->inode->gfid)); ++ goto err; ++ } + +- if (!this->itable) +- this->itable = fd->inode->table; ++ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { ++ STACK_WIND(frame, default_setattr_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata); ++ return 0; ++ } + +- local = mem_get0(this->local_pool); +- if (!local) +- goto err; ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; + +- frame->local = local; ++ frame->local = local; + +- local->handler = shard_post_setattr_handler; +- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); +- if (!local->xattr_req) +- goto err; +- local->fop = GF_FOP_FSETATTR; +- local->fd = fd_ref(fd); ++ local->handler = shard_post_setattr_handler; ++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); ++ if (!local->xattr_req) ++ goto err; ++ local->fop = GF_FOP_SETATTR; ++ loc_copy(&local->loc, loc); + +- SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid, +- local, err); ++ SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, local->loc.gfid, ++ local, err); + +- STACK_WIND(frame, shard_common_setattr_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, +- local->xattr_req); +- return 0; ++ STACK_WIND(frame, shard_common_setattr_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, ++ local->xattr_req); ++ return 0; + err: +- shard_common_failure_unwind(GF_FOP_FSETATTR, frame, -1, ENOMEM); +- return 0; ++ shard_common_failure_unwind(GF_FOP_SETATTR, frame, -1, ENOMEM); ++ return 0; + } + +-int +-shard_common_inode_write_begin(call_frame_t *frame, xlator_t *this, +- glusterfs_fop_t fop, fd_t *fd, +- struct iovec *vector, int32_t count, +- off_t offset, uint32_t flags, size_t len, +- struct iobref *iobref, dict_t *xdata) +-{ +- int ret = 0; +- int i = 0; +- uint64_t block_size = 0; +- shard_local_t *local = NULL; ++int shard_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, ++ struct iatt *stbuf, int32_t valid, dict_t *xdata) { ++ int ret = -1; ++ uint64_t block_size = 0; ++ shard_local_t *local = NULL; + +- ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, +- "Failed to get block " +- "size for %s from its inode ctx", +- uuid_utoa(fd->inode->gfid)); +- goto out; +- } ++ if ((IA_ISDIR(fd->inode->ia_type)) || (IA_ISLNK(fd->inode->ia_type))) { ++ STACK_WIND(frame, default_fsetattr_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata); ++ return 0; ++ } + +- if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { +- /* block_size = 0 means that the file was created before +- * sharding was enabled on the volume. +- */ +- switch (fop) { +- case GF_FOP_WRITE: +- STACK_WIND_TAIL(frame, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->writev, fd, vector, +- count, offset, flags, iobref, xdata); +- break; +- case GF_FOP_FALLOCATE: +- STACK_WIND_TAIL(frame, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->fallocate, fd, flags, +- offset, len, xdata); +- break; +- case GF_FOP_ZEROFILL: +- STACK_WIND_TAIL(frame, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->zerofill, fd, offset, +- len, xdata); +- break; +- case GF_FOP_DISCARD: +- STACK_WIND_TAIL(frame, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->discard, fd, offset, +- len, xdata); +- break; +- default: +- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, +- "Invalid fop id = %d", fop); +- break; +- } +- return 0; +- } ++ ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, ++ "Failed to get block size from inode ctx of %s", ++ uuid_utoa(fd->inode->gfid)); ++ goto err; ++ } + +- if (!this->itable) +- this->itable = fd->inode->table; ++ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { ++ STACK_WIND(frame, default_fsetattr_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata); ++ return 0; ++ } + +- local = mem_get0(this->local_pool); +- if (!local) +- goto out; ++ if (!this->itable) ++ this->itable = fd->inode->table; + +- frame->local = local; ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; + +- ret = syncbarrier_init(&local->barrier); +- if (ret) +- goto out; +- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); +- if (!local->xattr_req) +- goto out; +- +- if (vector) { +- local->vector = iov_dup(vector, count); +- if (!local->vector) +- goto out; +- for (i = 0; i < count; i++) +- local->total_size += vector[i].iov_len; +- local->count = count; +- } else { +- local->total_size = len; +- } ++ frame->local = local; + +- local->fop = fop; +- local->offset = offset; +- local->flags = flags; +- if (iobref) +- local->iobref = iobref_ref(iobref); +- local->fd = fd_ref(fd); +- local->block_size = block_size; +- local->resolver_base_inode = local->fd->inode; +- GF_ATOMIC_INIT(local->delta_blocks, 0); ++ local->handler = shard_post_setattr_handler; ++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); ++ if (!local->xattr_req) ++ goto err; ++ local->fop = GF_FOP_FSETATTR; ++ local->fd = fd_ref(fd); + +- local->loc.inode = inode_ref(fd->inode); +- gf_uuid_copy(local->loc.gfid, fd->inode->gfid); ++ SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid, ++ local, err); + +- shard_lookup_base_file(frame, this, &local->loc, +- shard_common_inode_write_post_lookup_handler); +- return 0; ++ STACK_WIND(frame, shard_common_setattr_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, ++ local->xattr_req); ++ return 0; ++err: ++ shard_common_failure_unwind(GF_FOP_FSETATTR, frame, -1, ENOMEM); ++ return 0; ++} ++ ++int shard_common_inode_write_begin(call_frame_t *frame, xlator_t *this, ++ glusterfs_fop_t fop, fd_t *fd, ++ struct iovec *vector, int32_t count, ++ off_t offset, uint32_t flags, size_t len, ++ struct iobref *iobref, dict_t *xdata) { ++ int ret = 0; ++ int i = 0; ++ uint64_t block_size = 0; ++ shard_local_t *local = NULL; ++ ++ ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, ++ "Failed to get block " ++ "size for %s from its inode ctx", ++ uuid_utoa(fd->inode->gfid)); ++ goto out; ++ } ++ ++ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { ++ /* block_size = 0 means that the file was created before ++ * sharding was enabled on the volume. ++ */ ++ switch (fop) { ++ case GF_FOP_WRITE: ++ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev, ++ fd, vector, count, offset, flags, iobref, xdata); ++ break; ++ case GF_FOP_FALLOCATE: ++ STACK_WIND_TAIL(frame, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fallocate, fd, flags, offset, ++ len, xdata); ++ break; ++ case GF_FOP_ZEROFILL: ++ STACK_WIND_TAIL(frame, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->zerofill, fd, offset, len, ++ xdata); ++ break; ++ case GF_FOP_DISCARD: ++ STACK_WIND_TAIL(frame, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata); ++ break; ++ default: ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, ++ "Invalid fop id = %d", fop); ++ break; ++ } ++ return 0; ++ } ++ ++ if (!this->itable) ++ this->itable = fd->inode->table; ++ ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto out; ++ ++ frame->local = local; ++ ++ ret = syncbarrier_init(&local->barrier); ++ if (ret) ++ goto out; ++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); ++ if (!local->xattr_req) ++ goto out; ++ ++ if (vector) { ++ local->vector = iov_dup(vector, count); ++ if (!local->vector) ++ goto out; ++ for (i = 0; i < count; i++) ++ local->total_size += vector[i].iov_len; ++ local->count = count; ++ } else { ++ local->total_size = len; ++ } ++ ++ local->fop = fop; ++ local->offset = offset; ++ local->flags = flags; ++ if (iobref) ++ local->iobref = iobref_ref(iobref); ++ local->fd = fd_ref(fd); ++ local->block_size = block_size; ++ local->resolver_base_inode = local->fd->inode; ++ GF_ATOMIC_INIT(local->delta_blocks, 0); ++ ++ local->loc.inode = inode_ref(fd->inode); ++ gf_uuid_copy(local->loc.gfid, fd->inode->gfid); ++ ++ shard_lookup_base_file(frame, this, &local->loc, ++ shard_common_inode_write_post_lookup_handler); ++ return 0; + out: +- shard_common_failure_unwind(fop, frame, -1, ENOMEM); +- return 0; ++ shard_common_failure_unwind(fop, frame, -1, ENOMEM); ++ return 0; + } + +-int +-shard_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, +- struct iovec *vector, int32_t count, off_t offset, uint32_t flags, +- struct iobref *iobref, dict_t *xdata) +-{ +- shard_common_inode_write_begin(frame, this, GF_FOP_WRITE, fd, vector, count, +- offset, flags, 0, iobref, xdata); +- return 0; ++int shard_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, ++ struct iovec *vector, int32_t count, off_t offset, ++ uint32_t flags, struct iobref *iobref, dict_t *xdata) { ++ shard_common_inode_write_begin(frame, this, GF_FOP_WRITE, fd, vector, count, ++ offset, flags, 0, iobref, xdata); ++ return 0; + } + +-int +-shard_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, +- int32_t keep_size, off_t offset, size_t len, dict_t *xdata) +-{ +- if ((keep_size != 0) && (keep_size != FALLOC_FL_ZERO_RANGE) && +- (keep_size != (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))) +- goto out; ++int shard_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, ++ int32_t keep_size, off_t offset, size_t len, ++ dict_t *xdata) { ++ if ((keep_size != 0) && (keep_size != FALLOC_FL_ZERO_RANGE) && ++ (keep_size != (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))) ++ goto out; + +- shard_common_inode_write_begin(frame, this, GF_FOP_FALLOCATE, fd, NULL, 0, +- offset, keep_size, len, NULL, xdata); +- return 0; ++ shard_common_inode_write_begin(frame, this, GF_FOP_FALLOCATE, fd, NULL, 0, ++ offset, keep_size, len, NULL, xdata); ++ return 0; + out: +- shard_common_failure_unwind(GF_FOP_FALLOCATE, frame, -1, ENOTSUP); +- return 0; ++ shard_common_failure_unwind(GF_FOP_FALLOCATE, frame, -1, ENOTSUP); ++ return 0; + } + +-int +-shard_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, +- off_t len, dict_t *xdata) +-{ +- shard_common_inode_write_begin(frame, this, GF_FOP_ZEROFILL, fd, NULL, 0, +- offset, 0, len, NULL, xdata); +- return 0; ++int shard_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, ++ off_t len, dict_t *xdata) { ++ shard_common_inode_write_begin(frame, this, GF_FOP_ZEROFILL, fd, NULL, 0, ++ offset, 0, len, NULL, xdata); ++ return 0; + } + +-int +-shard_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, +- size_t len, dict_t *xdata) +-{ +- shard_common_inode_write_begin(frame, this, GF_FOP_DISCARD, fd, NULL, 0, +- offset, 0, len, NULL, xdata); +- return 0; ++int shard_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, ++ size_t len, dict_t *xdata) { ++ shard_common_inode_write_begin(frame, this, GF_FOP_DISCARD, fd, NULL, 0, ++ offset, 0, len, NULL, xdata); ++ return 0; + } + +-int32_t +-shard_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, +- gf_seek_what_t what, dict_t *xdata) +-{ +- /* TBD */ +- gf_msg(this->name, GF_LOG_INFO, ENOTSUP, SHARD_MSG_FOP_NOT_SUPPORTED, +- "seek called on %s.", uuid_utoa(fd->inode->gfid)); +- shard_common_failure_unwind(GF_FOP_SEEK, frame, -1, ENOTSUP); +- return 0; ++int32_t shard_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, ++ gf_seek_what_t what, dict_t *xdata) { ++ /* TBD */ ++ gf_msg(this->name, GF_LOG_INFO, ENOTSUP, SHARD_MSG_FOP_NOT_SUPPORTED, ++ "seek called on %s.", uuid_utoa(fd->inode->gfid)); ++ shard_common_failure_unwind(GF_FOP_SEEK, frame, -1, ENOTSUP); ++ return 0; + } + +-int32_t +-mem_acct_init(xlator_t *this) +-{ +- int ret = -1; +- +- if (!this) +- return ret; ++int32_t mem_acct_init(xlator_t *this) { ++ int ret = -1; + +- ret = xlator_mem_acct_init(this, gf_shard_mt_end + 1); ++ if (!this) ++ return ret; + +- if (ret != 0) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_MEM_ACCT_INIT_FAILED, +- "Memory accounting init" +- "failed"); +- return ret; +- } ++ ret = xlator_mem_acct_init(this, gf_shard_mt_end + 1); + ++ if (ret != 0) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_MEM_ACCT_INIT_FAILED, ++ "Memory accounting init" ++ "failed"); + return ret; ++ } ++ ++ return ret; + } + +-int +-init(xlator_t *this) +-{ +- int ret = -1; +- shard_priv_t *priv = NULL; ++int init(xlator_t *this) { ++ int ret = -1; ++ shard_priv_t *priv = NULL; + +- if (!this) { +- gf_msg("shard", GF_LOG_ERROR, 0, SHARD_MSG_NULL_THIS, +- "this is NULL. init() failed"); +- return -1; +- } ++ if (!this) { ++ gf_msg("shard", GF_LOG_ERROR, 0, SHARD_MSG_NULL_THIS, ++ "this is NULL. init() failed"); ++ return -1; ++ } + +- if (!this->parents) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INVALID_VOLFILE, +- "Dangling volume. Check volfile"); +- goto out; +- } ++ if (!this->parents) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INVALID_VOLFILE, ++ "Dangling volume. Check volfile"); ++ goto out; ++ } + +- if (!this->children || this->children->next) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INVALID_VOLFILE, +- "shard not configured with exactly one sub-volume. " +- "Check volfile"); +- goto out; +- } ++ if (!this->children || this->children->next) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INVALID_VOLFILE, ++ "shard not configured with exactly one sub-volume. " ++ "Check volfile"); ++ goto out; ++ } + +- priv = GF_CALLOC(1, sizeof(shard_priv_t), gf_shard_mt_priv_t); +- if (!priv) +- goto out; ++ priv = GF_CALLOC(1, sizeof(shard_priv_t), gf_shard_mt_priv_t); ++ if (!priv) ++ goto out; + +- GF_OPTION_INIT("shard-block-size", priv->block_size, size_uint64, out); ++ GF_OPTION_INIT("shard-block-size", priv->block_size, size_uint64, out); + +- GF_OPTION_INIT("shard-deletion-rate", priv->deletion_rate, uint32, out); ++ GF_OPTION_INIT("shard-deletion-rate", priv->deletion_rate, uint32, out); + +- GF_OPTION_INIT("shard-lru-limit", priv->lru_limit, uint64, out); ++ GF_OPTION_INIT("shard-lru-limit", priv->lru_limit, uint64, out); + +- this->local_pool = mem_pool_new(shard_local_t, 128); +- if (!this->local_pool) { +- ret = -1; +- goto out; +- } +- gf_uuid_parse(SHARD_ROOT_GFID, priv->dot_shard_gfid); +- gf_uuid_parse(DOT_SHARD_REMOVE_ME_GFID, priv->dot_shard_rm_gfid); ++ this->local_pool = mem_pool_new(shard_local_t, 128); ++ if (!this->local_pool) { ++ ret = -1; ++ goto out; ++ } ++ gf_uuid_parse(SHARD_ROOT_GFID, priv->dot_shard_gfid); ++ gf_uuid_parse(DOT_SHARD_REMOVE_ME_GFID, priv->dot_shard_rm_gfid); + +- this->private = priv; +- LOCK_INIT(&priv->lock); +- INIT_LIST_HEAD(&priv->ilist_head); +- ret = 0; ++ this->private = priv; ++ LOCK_INIT(&priv->lock); ++ INIT_LIST_HEAD(&priv->ilist_head); ++ ret = 0; + out: +- if (ret) { +- GF_FREE(priv); +- mem_pool_destroy(this->local_pool); +- } ++ if (ret) { ++ GF_FREE(priv); ++ mem_pool_destroy(this->local_pool); ++ } + +- return ret; ++ return ret; + } + +-void +-fini(xlator_t *this) +-{ +- shard_priv_t *priv = NULL; ++void fini(xlator_t *this) { ++ shard_priv_t *priv = NULL; + +- GF_VALIDATE_OR_GOTO("shard", this, out); ++ GF_VALIDATE_OR_GOTO("shard", this, out); + +- mem_pool_destroy(this->local_pool); +- this->local_pool = NULL; ++ mem_pool_destroy(this->local_pool); ++ this->local_pool = NULL; + +- priv = this->private; +- if (!priv) +- goto out; ++ priv = this->private; ++ if (!priv) ++ goto out; + +- this->private = NULL; +- LOCK_DESTROY(&priv->lock); +- GF_FREE(priv); ++ this->private = NULL; ++ LOCK_DESTROY(&priv->lock); ++ GF_FREE(priv); + + out: +- return; ++ return; + } + +-int +-reconfigure(xlator_t *this, dict_t *options) +-{ +- int ret = -1; +- shard_priv_t *priv = NULL; ++int reconfigure(xlator_t *this, dict_t *options) { ++ int ret = -1; ++ shard_priv_t *priv = NULL; + +- priv = this->private; ++ priv = this->private; + +- GF_OPTION_RECONF("shard-block-size", priv->block_size, options, size, out); ++ GF_OPTION_RECONF("shard-block-size", priv->block_size, options, size, out); + +- GF_OPTION_RECONF("shard-deletion-rate", priv->deletion_rate, options, +- uint32, out); +- ret = 0; ++ GF_OPTION_RECONF("shard-deletion-rate", priv->deletion_rate, options, uint32, ++ out); ++ ret = 0; + + out: +- return ret; ++ return ret; + } + +-int +-shard_forget(xlator_t *this, inode_t *inode) +-{ +- uint64_t ctx_uint = 0; +- shard_inode_ctx_t *ctx = NULL; +- shard_priv_t *priv = NULL; ++int shard_forget(xlator_t *this, inode_t *inode) { ++ uint64_t ctx_uint = 0; ++ shard_inode_ctx_t *ctx = NULL; ++ shard_priv_t *priv = NULL; + +- priv = this->private; +- if (!priv) +- return 0; ++ priv = this->private; ++ if (!priv) ++ return 0; + +- inode_ctx_del(inode, this, &ctx_uint); +- if (!ctx_uint) +- return 0; ++ inode_ctx_del(inode, this, &ctx_uint); ++ if (!ctx_uint) ++ return 0; + +- ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; ++ ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; + +- /* When LRU limit reaches inode will be forcefully removed from the +- * table, inode needs to be removed from LRU of shard as well. +- */ +- if (!list_empty(&ctx->ilist)) { +- LOCK(&priv->lock); +- { +- list_del_init(&ctx->ilist); +- priv->inode_count--; +- } +- UNLOCK(&priv->lock); ++ /* When LRU limit reaches inode will be forcefully removed from the ++ * table, inode needs to be removed from LRU of shard as well. ++ */ ++ if (!list_empty(&ctx->ilist)) { ++ LOCK(&priv->lock); ++ { ++ list_del_init(&ctx->ilist); ++ priv->inode_count--; + } +- GF_FREE(ctx); ++ UNLOCK(&priv->lock); ++ } ++ GF_FREE(ctx); + +- return 0; ++ return 0; + } + +-int +-shard_release(xlator_t *this, fd_t *fd) +-{ +- /* TBD */ +- return 0; ++int shard_release(xlator_t *this, fd_t *fd) { ++ /* TBD */ ++ return 0; + } + +-int +-shard_priv_dump(xlator_t *this) +-{ +- shard_priv_t *priv = NULL; +- char key_prefix[GF_DUMP_MAX_BUF_LEN] = { +- 0, +- }; +- char *str = NULL; ++int shard_priv_dump(xlator_t *this) { ++ shard_priv_t *priv = NULL; ++ char key_prefix[GF_DUMP_MAX_BUF_LEN] = { ++ 0, ++ }; ++ char *str = NULL; + +- priv = this->private; ++ priv = this->private; + +- snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name); +- gf_proc_dump_add_section("%s", key_prefix); +- str = gf_uint64_2human_readable(priv->block_size); +- gf_proc_dump_write("shard-block-size", "%s", str); +- gf_proc_dump_write("inode-count", "%d", priv->inode_count); +- gf_proc_dump_write("ilist_head", "%p", &priv->ilist_head); +- gf_proc_dump_write("lru-max-limit", "%" PRIu64, priv->lru_limit); ++ snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name); ++ gf_proc_dump_add_section("%s", key_prefix); ++ str = gf_uint64_2human_readable(priv->block_size); ++ gf_proc_dump_write("shard-block-size", "%s", str); ++ gf_proc_dump_write("inode-count", "%d", priv->inode_count); ++ gf_proc_dump_write("ilist_head", "%p", &priv->ilist_head); ++ gf_proc_dump_write("lru-max-limit", "%" PRIu64, priv->lru_limit); + +- GF_FREE(str); ++ GF_FREE(str); + +- return 0; ++ return 0; + } + +-int +-shard_releasedir(xlator_t *this, fd_t *fd) +-{ +- return 0; +-} ++int shard_releasedir(xlator_t *this, fd_t *fd) { return 0; } + + struct xlator_fops fops = { + .lookup = shard_lookup, +-- +1.8.3.1 + diff --git a/SOURCES/0336-spec-check-and-return-exit-code-in-rpm-scripts.patch b/SOURCES/0336-spec-check-and-return-exit-code-in-rpm-scripts.patch new file mode 100644 index 0000000..df971b8 --- /dev/null +++ b/SOURCES/0336-spec-check-and-return-exit-code-in-rpm-scripts.patch @@ -0,0 +1,162 @@ +From 562283ad34021bbf4fc540127ee7072d5152d34d Mon Sep 17 00:00:00 2001 +From: Yuval Turgeman +Date: Wed, 24 Jul 2019 16:42:22 +0300 +Subject: [PATCH 336/336] spec: check and return exit code in rpm scripts + +lua's error() call expects a value as its second argument, and this is +taken from the `val` variable, while the `ok` is boolean. This causes +the rpm scripts to fail on: + +bad argument #2 to 'error' (number expected, got boolean) + +Label: DOWNSTREAM ONLY +BUG: 1768786 +Change-Id: I9c6b1f62ebf15dbc93196d018bc1fd628b36fc33 +>Signed-off-by: Yuval Turgeman +Reviewed-on: https://code.engineering.redhat.com/gerrit/186405 +Reviewed-by: Mohit Agrawal +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + glusterfs.spec.in | 55 +++++++++++++++++++++++++++++++++---------------------- + 1 file changed, 33 insertions(+), 22 deletions(-) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 91180db..1b975b2 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -1572,8 +1572,9 @@ fi + ]] + + ok, how, val = os.execute(script) +-if not (ok == 0) then +- error("Detected running glusterfs processes", ok) ++rc = val or ok ++if not (rc == 0) then ++ error("Detected running glusterfs processes", rc) + end + + +@@ -1606,8 +1607,9 @@ fi + ]] + + ok, how, val = os.execute(script) +-if not (ok == 0) then +- error("Detected running glusterfs processes", ok) ++rc = val or ok ++if not (rc == 0) then ++ error("Detected running glusterfs processes", rc) + end + + +@@ -1640,8 +1642,9 @@ fi + ]] + + ok, how, val = os.execute(script) +-if not (ok == 0) then +- error("Detected running glusterfs processes", ok) ++rc = val or ok ++if not (rc == 0) then ++ error("Detected running glusterfs processes", rc) + end + + +@@ -1674,8 +1677,9 @@ fi + ]] + + ok, how, val = os.execute(script) +-if not (ok == 0) then +- error("Detected running glusterfs processes", ok) ++rc = val or ok ++if not (rc == 0) then ++ error("Detected running glusterfs processes", rc) + end + + +@@ -1707,8 +1711,9 @@ fi + ]] + + ok, how, val = os.execute(script) +-if not (ok == 0) then +- error("Detected running glusterfs processes", ok) ++rc = val or ok ++if not (rc == 0) then ++ error("Detected running glusterfs processes", rc) + end + + +@@ -1740,8 +1745,9 @@ fi + ]] + + ok, how, val = os.execute(script) +-if not (ok == 0) then +- error("Detected running glusterfs processes", ok) ++rc = val or ok ++if not (rc == 0) then ++ error("Detected running glusterfs processes", rc) + end + + +@@ -1775,8 +1781,9 @@ fi + ]] + + ok, how, val = os.execute(script) +-if not (ok == 0) then +- error("Detected running glusterfs processes", ok) ++rc = val or ok ++if not (rc == 0) then ++ error("Detected running glusterfs processes", rc) + end + %endif + +@@ -1810,8 +1817,9 @@ fi + ]] + + ok, how, val = os.execute(script) +-if not (ok == 0) then +- error("Detected running glusterfs processes", ok) ++rc = val or ok ++if not (rc == 0) then ++ error("Detected running glusterfs processes", rc) + end + + +@@ -1845,8 +1853,9 @@ fi + ]] + + ok, how, val = os.execute(script) +-if not (ok == 0) then +- error("Detected running glusterfs processes", ok) ++rc = val or ok ++if not (rc == 0) then ++ error("Detected running glusterfs processes", rc) + end + %endif + +@@ -1881,8 +1890,9 @@ fi + ]] + + ok, how, val = os.execute(script) +-if not (ok == 0) then +- error("Detected running glusterfs processes", ok) ++rc = val or ok ++if not (rc == 0) then ++ error("Detected running glusterfs processes", rc) + end + %endif + +@@ -1916,8 +1926,9 @@ fi + ]] + + ok, how, val = os.execute(script) +-if not (ok == 0) then +- error("Detected running glusterfs processes", ok) ++rc = val or ok ++if not (rc == 0) then ++ error("Detected running glusterfs processes", rc) + end + + %posttrans server +-- +1.8.3.1 + diff --git a/SOURCES/0337-fuse-Set-limit-on-invalidate-queue-size.patch b/SOURCES/0337-fuse-Set-limit-on-invalidate-queue-size.patch new file mode 100644 index 0000000..b18ef4f --- /dev/null +++ b/SOURCES/0337-fuse-Set-limit-on-invalidate-queue-size.patch @@ -0,0 +1,455 @@ +From ddb0038de77a4269fa7eed1bb217bfb6bed1b7ba Mon Sep 17 00:00:00 2001 +From: N Balachandran +Date: Fri, 9 Aug 2019 14:34:22 +0530 +Subject: [PATCH 337/344] fuse: Set limit on invalidate queue size + +If the glusterfs fuse client process is unable to +process the invalidate requests quickly enough, the +number of such requests quickly grows large enough +to use a significant amount of memory. +We are now introducing another option to set an upper +limit on these to prevent runaway memory usage. + +> Upstream https://review.gluster.org/23187 +> Change-Id: Iddfff1ee2de1466223e6717f7abd4b28ed947788 +> Fixes: bz#1732717 +> Signed-off-by: N Balachandran + +BUG: 1763208 +Change-Id: I666cdf6c70999a0f0bc79969e8df0a9dde93b6e4 +Signed-off-by: Csaba Henk +Reviewed-on: https://code.engineering.redhat.com/gerrit/187529 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + doc/mount.glusterfs.8 | 5 +++ + glusterfsd/src/glusterfsd.c | 21 ++++++++++ + glusterfsd/src/glusterfsd.h | 3 +- + libglusterfs/src/glusterfs/glusterfs.h | 1 + + libglusterfs/src/glusterfs/inode.h | 1 + + libglusterfs/src/inode.c | 31 +++++++++++---- + xlators/mount/fuse/src/fuse-bridge.c | 60 ++++++++++++++++++++++------- + xlators/mount/fuse/src/fuse-bridge.h | 3 +- + xlators/mount/fuse/utils/mount.glusterfs.in | 7 ++++ + 9 files changed, 108 insertions(+), 24 deletions(-) + +diff --git a/doc/mount.glusterfs.8 b/doc/mount.glusterfs.8 +index 286631b..b35b362 100644 +--- a/doc/mount.glusterfs.8 ++++ b/doc/mount.glusterfs.8 +@@ -126,6 +126,11 @@ Provide list of backup volfile servers in the following format [default: None] + Set fuse module's limit for number of inodes kept in LRU list to N [default: 131072] + .TP + .TP ++\fBinvalidate-limit=\fRN ++Suspend fuse invalidations implied by 'lru-limit' if number of outstanding ++invalidations reaches N ++.TP ++.TP + \fBbackground-qlen=\fRN + Set fuse module's background queue length to N [default: 64] + .TP +diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c +index 5b5e996..0856471 100644 +--- a/glusterfsd/src/glusterfsd.c ++++ b/glusterfsd/src/glusterfsd.c +@@ -212,6 +212,9 @@ static struct argp_option gf_options[] = { + {"lru-limit", ARGP_FUSE_LRU_LIMIT_KEY, "N", 0, + "Set fuse module's limit for number of inodes kept in LRU list to N " + "[default: 131072]"}, ++ {"invalidate-limit", ARGP_FUSE_INVALIDATE_LIMIT_KEY, "N", 0, ++ "Suspend inode invalidations implied by 'lru-limit' if the number of " ++ "outstanding invalidations reaches N"}, + {"background-qlen", ARGP_FUSE_BACKGROUND_QLEN_KEY, "N", 0, + "Set fuse module's background queue length to N " + "[default: 64]"}, +@@ -504,6 +507,16 @@ set_fuse_mount_options(glusterfs_ctx_t *ctx, dict_t *options) + } + } + ++ if (cmd_args->invalidate_limit >= 0) { ++ ret = dict_set_int32(options, "invalidate-limit", ++ cmd_args->invalidate_limit); ++ if (ret < 0) { ++ gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4, ++ "invalidate-limit"); ++ goto err; ++ } ++ } ++ + if (cmd_args->background_qlen) { + ret = dict_set_int32(options, "background-qlen", + cmd_args->background_qlen); +@@ -1283,6 +1296,14 @@ parse_opts(int key, char *arg, struct argp_state *state) + argp_failure(state, -1, 0, "unknown LRU limit option %s", arg); + break; + ++ case ARGP_FUSE_INVALIDATE_LIMIT_KEY: ++ if (!gf_string2int32(arg, &cmd_args->invalidate_limit)) ++ break; ++ ++ argp_failure(state, -1, 0, "unknown invalidate limit option %s", ++ arg); ++ break; ++ + case ARGP_FUSE_BACKGROUND_QLEN_KEY: + if (!gf_string2int(arg, &cmd_args->background_qlen)) + break; +diff --git a/glusterfsd/src/glusterfsd.h b/glusterfsd/src/glusterfsd.h +index fa55789..ee655f0 100644 +--- a/glusterfsd/src/glusterfsd.h ++++ b/glusterfsd/src/glusterfsd.h +@@ -111,7 +111,8 @@ enum argp_option_keys { + ARGP_FUSE_FLUSH_HANDLE_INTERRUPT_KEY = 189, + ARGP_FUSE_LRU_LIMIT_KEY = 190, + ARGP_FUSE_AUTO_INVAL_KEY = 191, +- ARGP_BRICK_MUX_KEY = 192 ++ ARGP_BRICK_MUX_KEY = 192, ++ ARGP_FUSE_INVALIDATE_LIMIT_KEY = 195, + }; + + struct _gfd_vol_top_priv { +diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h +index 79c93ae..3b594c0 100644 +--- a/libglusterfs/src/glusterfs/glusterfs.h ++++ b/libglusterfs/src/glusterfs/glusterfs.h +@@ -541,6 +541,7 @@ struct _cmd_args { + int client_pid_set; + unsigned uid_map_root; + int32_t lru_limit; ++ int32_t invalidate_limit; + int background_qlen; + int congestion_threshold; + char *fuse_mountopts; +diff --git a/libglusterfs/src/glusterfs/inode.h b/libglusterfs/src/glusterfs/inode.h +index 52efdd8..4421c47 100644 +--- a/libglusterfs/src/glusterfs/inode.h ++++ b/libglusterfs/src/glusterfs/inode.h +@@ -107,6 +107,7 @@ struct _inode { + struct list_head list; /* active/lru/purge */ + + struct _inode_ctx *_ctx; /* replacement for dict_t *(inode->ctx) */ ++ bool in_invalidate_list; /* Set if inode is in table invalidate list */ + bool invalidate_sent; /* Set it if invalidator_fn is called for inode */ + }; + +diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c +index 96ddea5..5331e93 100644 +--- a/libglusterfs/src/inode.c ++++ b/libglusterfs/src/inode.c +@@ -558,8 +558,8 @@ __inode_unref(inode_t *inode, bool clear) + + this = THIS; + +- if (clear && inode->invalidate_sent) { +- inode->invalidate_sent = false; ++ if (clear && inode->in_invalidate_list) { ++ inode->in_invalidate_list = false; + inode->table->invalidate_size--; + __inode_activate(inode); + } +@@ -573,7 +573,7 @@ __inode_unref(inode_t *inode, bool clear) + inode->_ctx[index].ref--; + } + +- if (!inode->ref && !inode->invalidate_sent) { ++ if (!inode->ref && !inode->in_invalidate_list) { + inode->table->active_size--; + + nlookup = GF_ATOMIC_GET(inode->nlookup); +@@ -609,14 +609,14 @@ __inode_ref(inode_t *inode, bool is_invalidate) + return inode; + + if (!inode->ref) { +- if (inode->invalidate_sent) { +- inode->invalidate_sent = false; ++ if (inode->in_invalidate_list) { ++ inode->in_invalidate_list = false; + inode->table->invalidate_size--; + } else { + inode->table->lru_size--; + } + if (is_invalidate) { +- inode->invalidate_sent = true; ++ inode->in_invalidate_list = true; + inode->table->invalidate_size++; + list_move_tail(&inode->list, &inode->table->invalidate); + } else { +@@ -1609,6 +1609,7 @@ static int + inode_table_prune(inode_table_t *table) + { + int ret = 0; ++ int ret1 = 0; + struct list_head purge = { + 0, + }; +@@ -1647,6 +1648,10 @@ inode_table_prune(inode_table_t *table) + /* check for valid inode with 'nlookup' */ + nlookup = GF_ATOMIC_GET(entry->nlookup); + if (nlookup) { ++ if (entry->invalidate_sent) { ++ list_move_tail(&entry->list, &table->lru); ++ continue; ++ } + __inode_ref(entry, true); + tmp = entry; + break; +@@ -1668,9 +1673,19 @@ inode_table_prune(inode_table_t *table) + if (tmp) { + xlator_t *old_THIS = THIS; + THIS = table->invalidator_xl; +- table->invalidator_fn(table->invalidator_xl, tmp); ++ ret1 = table->invalidator_fn(table->invalidator_xl, tmp); + THIS = old_THIS; +- inode_unref(tmp); ++ pthread_mutex_lock(&table->lock); ++ { ++ if (!ret1) { ++ tmp->invalidate_sent = true; ++ __inode_unref(tmp, false); ++ } else { ++ /* Move this back to the lru list*/ ++ __inode_unref(tmp, true); ++ } ++ } ++ pthread_mutex_unlock(&table->lock); + } + + /* Just so that if purge list is handled too, then clear it off */ +diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c +index 1c946a2..8b2e7f0 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.c ++++ b/xlators/mount/fuse/src/fuse-bridge.c +@@ -26,7 +26,7 @@ static int gf_fuse_xattr_enotsup_log; + void + fini(xlator_t *this_xl); + +-static void ++static int32_t + fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino); + + /* +@@ -312,7 +312,7 @@ send_fuse_data(xlator_t *this, fuse_in_header_t *finh, void *data, size_t size) + #define send_fuse_obj(this, finh, obj) \ + send_fuse_data(this, finh, obj, sizeof(*(obj))) + +-static void ++static int32_t + fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino) + { + #if FUSE_KERNEL_MINOR_VERSION >= 11 +@@ -328,17 +328,22 @@ fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino) + + priv = this->private; + if (!priv->reverse_fuse_thread_started) +- return; ++ return -1; ++ ++ if (priv->invalidate_limit && ++ (priv->invalidate_count >= priv->invalidate_limit)) { ++ return -1; ++ } + + inode = (inode_t *)(unsigned long)fuse_ino; + if (inode == NULL) +- return; ++ return -1; + + list_for_each_entry_safe(dentry, tmp, &inode->dentry_list, inode_list) + { + node = GF_CALLOC(1, sizeof(*node), gf_fuse_mt_invalidate_node_t); + if (node == NULL) +- break; ++ return -1; + + INIT_LIST_HEAD(&node->next); + +@@ -375,20 +380,21 @@ fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino) + pthread_mutex_lock(&priv->invalidate_mutex); + { + list_add_tail(&node->next, &priv->invalidate_list); ++ priv->invalidate_count++; + pthread_cond_signal(&priv->invalidate_cond); + } + pthread_mutex_unlock(&priv->invalidate_mutex); + } + + #endif +- return; ++ return 0; + } + + /* + * Send an inval inode notification to fuse. This causes an invalidation of the + * entire page cache mapping on the inode. + */ +-static void ++static int32_t + fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino) + { + #if FUSE_KERNEL_MINOR_VERSION >= 11 +@@ -401,15 +407,20 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino) + priv = this->private; + + if (!priv->reverse_fuse_thread_started) +- return; ++ return -1; ++ ++ if (priv->invalidate_limit && ++ (priv->invalidate_count >= priv->invalidate_limit)) { ++ return -1; ++ } + + inode = (inode_t *)(unsigned long)fuse_ino; + if (inode == NULL) +- return; ++ return -1; + + node = GF_CALLOC(1, sizeof(*node), gf_fuse_mt_invalidate_node_t); + if (node == NULL) +- return; ++ return -1; + + INIT_LIST_HEAD(&node->next); + +@@ -435,6 +446,7 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino) + pthread_mutex_lock(&priv->invalidate_mutex); + { + list_add_tail(&node->next, &priv->invalidate_list); ++ priv->invalidate_count++; + pthread_cond_signal(&priv->invalidate_cond); + } + pthread_mutex_unlock(&priv->invalidate_mutex); +@@ -443,7 +455,7 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino) + gf_log("glusterfs-fuse", GF_LOG_WARNING, + "fuse_invalidate_inode not implemented on this system"); + #endif +- return; ++ return 0; + } + + #if FUSE_KERNEL_MINOR_VERSION >= 11 +@@ -451,8 +463,9 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino) + static int32_t + fuse_inode_invalidate_fn(xlator_t *this, inode_t *inode) + { +- fuse_invalidate_entry(this, (uint64_t)(uintptr_t)inode); +- return 0; ++ int32_t ret = 0; ++ ret = fuse_invalidate_entry(this, (uint64_t)(uintptr_t)inode); ++ return ret; + } + #endif + +@@ -4003,7 +4016,9 @@ fuse_setxattr(xlator_t *this, fuse_in_header_t *finh, void *msg, + gf_log("fuse", GF_LOG_TRACE, "got request to invalidate %" PRIu64, + finh->nodeid); + #if FUSE_KERNEL_MINOR_VERSION >= 11 +- fuse_invalidate_entry(this, finh->nodeid); ++ ret = fuse_invalidate_entry(this, finh->nodeid); ++ if (ret) ++ op_errno = EBUSY; + #endif + goto done; + } +@@ -4812,6 +4827,7 @@ notify_kernel_loop(void *data) + fuse_invalidate_node_t, next); + + list_del_init(&node->next); ++ priv->invalidate_count--; + } + pthread_mutex_unlock(&priv->invalidate_mutex); + +@@ -4855,6 +4871,7 @@ notify_kernel_loop(void *data) + list_del_init(&node->next); + GF_FREE(node); + } ++ priv->invalidate_count = 0; + } + pthread_mutex_unlock(&priv->invalidate_mutex); + +@@ -6080,6 +6097,9 @@ fuse_priv_dump(xlator_t *this) + (int)private->timed_response_fuse_thread_started); + gf_proc_dump_write("reverse_thread_started", "%d", + (int)private->reverse_fuse_thread_started); ++ gf_proc_dump_write("invalidate_limit", "%u", private->invalidate_limit); ++ gf_proc_dump_write("invalidate_queue_length", "%" PRIu64, ++ private->invalidate_count); + gf_proc_dump_write("use_readdirp", "%d", private->use_readdirp); + + return 0; +@@ -6619,6 +6639,9 @@ init(xlator_t *this_xl) + + GF_OPTION_INIT("lru-limit", priv->lru_limit, uint32, cleanup_exit); + ++ GF_OPTION_INIT("invalidate-limit", priv->invalidate_limit, uint32, ++ cleanup_exit); ++ + GF_OPTION_INIT("event-history", priv->event_history, bool, cleanup_exit); + + GF_OPTION_INIT("thin-client", priv->thin_client, bool, cleanup_exit); +@@ -6955,6 +6978,15 @@ struct volume_options options[] = { + "reaching this limit (0 means 'unlimited')", + }, + { ++ .key = {"invalidate-limit"}, ++ .type = GF_OPTION_TYPE_INT, ++ .default_value = "0", ++ .min = 0, ++ .description = "suspend invalidations as of 'lru-limit' if the number " ++ "of outstanding invalidations reaches this limit " ++ "(0 means 'unlimited')", ++ }, ++ { + .key = {"auto-invalidation"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "true", +diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h +index 697bd88..2311582 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.h ++++ b/xlators/mount/fuse/src/fuse-bridge.h +@@ -139,7 +139,7 @@ struct fuse_private { + pthread_cond_t invalidate_cond; + pthread_mutex_t invalidate_mutex; + gf_boolean_t reverse_fuse_thread_started; +- ++ uint64_t invalidate_count; + /* For communicating with separate mount thread. */ + int status_pipe[2]; + +@@ -191,6 +191,7 @@ struct fuse_private { + + /* LRU Limit, if not set, default is 128k for now */ + uint32_t lru_limit; ++ uint32_t invalidate_limit; + }; + typedef struct fuse_private fuse_private_t; + +diff --git a/xlators/mount/fuse/utils/mount.glusterfs.in b/xlators/mount/fuse/utils/mount.glusterfs.in +index cbde42d..61d7422 100755 +--- a/xlators/mount/fuse/utils/mount.glusterfs.in ++++ b/xlators/mount/fuse/utils/mount.glusterfs.in +@@ -257,6 +257,10 @@ start_glusterfs () + cmd_line=$(echo "$cmd_line --lru-limit=$lru_limit"); + fi + ++ if [ -n "$invalidate_limit" ]; then ++ cmd_line=$(echo "$cmd_line --invalidate-limit=$invalidate_limit"); ++ fi ++ + if [ -n "$bg_qlen" ]; then + cmd_line=$(echo "$cmd_line --background-qlen=$bg_qlen"); + fi +@@ -505,6 +509,9 @@ with_options() + "lru-limit") + lru_limit=$value + ;; ++ "invalidate-limit") ++ invalidate_limit=$value ++ ;; + "background-qlen") + bg_qlen=$value + ;; +-- +1.8.3.1 + diff --git a/SOURCES/0338-glusterfs-fuse-Reduce-the-default-lru-limit-value.patch b/SOURCES/0338-glusterfs-fuse-Reduce-the-default-lru-limit-value.patch new file mode 100644 index 0000000..b108bd0 --- /dev/null +++ b/SOURCES/0338-glusterfs-fuse-Reduce-the-default-lru-limit-value.patch @@ -0,0 +1,83 @@ +From 6d2e12a53ef0bcbeea274c47537a0c707a3f7b1e Mon Sep 17 00:00:00 2001 +From: N Balachandran +Date: Fri, 20 Sep 2019 13:30:42 +0530 +Subject: [PATCH 338/344] glusterfs/fuse: Reduce the default lru-limit value + +The current lru-limit value still uses memory for +upto 128K inodes. +Reduce the default value of lru-limit to 64K. + +> Upstream https://review.gluster.org/23461 +> Change-Id: Ica2dd4f8f5fde45cb5180d8f02c3d86114ac52b3 +> Fixes: bz#1753880 +> Signed-off-by: N Balachandran +> Signed-off-by: Csaba Henk + +BUG: 1763208 +Change-Id: I04ab39b5278e702aacdceebfa5b63702b9f9703b +Signed-off-by: Csaba Henk +Reviewed-on: https://code.engineering.redhat.com/gerrit/187535 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + doc/mount.glusterfs.8 | 2 +- + glusterfsd/src/glusterfsd.c | 2 +- + xlators/mount/fuse/src/fuse-bridge.c | 2 +- + xlators/mount/fuse/src/fuse-bridge.h | 2 +- + 4 files changed, 4 insertions(+), 4 deletions(-) + +diff --git a/doc/mount.glusterfs.8 b/doc/mount.glusterfs.8 +index b35b362..87a5669 100644 +--- a/doc/mount.glusterfs.8 ++++ b/doc/mount.glusterfs.8 +@@ -123,7 +123,7 @@ Provide list of backup volfile servers in the following format [default: None] + .TP + .TP + \fBlru-limit=\fRN +-Set fuse module's limit for number of inodes kept in LRU list to N [default: 131072] ++Set fuse module's limit for number of inodes kept in LRU list to N [default: 65536] + .TP + .TP + \fBinvalidate-limit=\fRN +diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c +index 0856471..974fb88 100644 +--- a/glusterfsd/src/glusterfsd.c ++++ b/glusterfsd/src/glusterfsd.c +@@ -211,7 +211,7 @@ static struct argp_option gf_options[] = { + "Resolve all auxiliary groups in fuse translator (max 32 otherwise)"}, + {"lru-limit", ARGP_FUSE_LRU_LIMIT_KEY, "N", 0, + "Set fuse module's limit for number of inodes kept in LRU list to N " +- "[default: 131072]"}, ++ "[default: 65536]"}, + {"invalidate-limit", ARGP_FUSE_INVALIDATE_LIMIT_KEY, "N", 0, + "Suspend inode invalidations implied by 'lru-limit' if the number of " + "outstanding invalidations reaches N"}, +diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c +index 8b2e7f0..ebe5c28 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.c ++++ b/xlators/mount/fuse/src/fuse-bridge.c +@@ -6972,7 +6972,7 @@ struct volume_options options[] = { + { + .key = {"lru-limit"}, + .type = GF_OPTION_TYPE_INT, +- .default_value = "131072", ++ .default_value = "65536", + .min = 0, + .description = "makes glusterfs invalidate kernel inodes after " + "reaching this limit (0 means 'unlimited')", +diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h +index 2311582..cf4479c 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.h ++++ b/xlators/mount/fuse/src/fuse-bridge.h +@@ -189,7 +189,7 @@ struct fuse_private { + gf_boolean_t flush_handle_interrupt; + gf_boolean_t fuse_auto_inval; + +- /* LRU Limit, if not set, default is 128k for now */ ++ /* LRU Limit, if not set, default is 64k for now */ + uint32_t lru_limit; + uint32_t invalidate_limit; + }; +-- +1.8.3.1 + diff --git a/SOURCES/0339-geo-rep-fix-integer-config-validation.patch b/SOURCES/0339-geo-rep-fix-integer-config-validation.patch new file mode 100644 index 0000000..45f3ede --- /dev/null +++ b/SOURCES/0339-geo-rep-fix-integer-config-validation.patch @@ -0,0 +1,93 @@ +From 8b5b3b247a00515d3188453c27b0ba749e93d325 Mon Sep 17 00:00:00 2001 +From: Aravinda VK +Date: Tue, 26 Mar 2019 13:20:13 +0530 +Subject: [PATCH 339/344] geo-rep: fix integer config validation + +ssh-port validation is mentioned as `validation=int` in template +`gsyncd.conf`, but not handled this during geo-rep config set. + +upstream patch: + https://review.gluster.org/#/c/glusterfs/+/22418/ +Backport of: + + >Fixes: bz#1692666 + >Change-Id: I3f19d9b471b0a3327e4d094dfbefcc58ed2c34f6 + >Signed-off-by: Aravinda VK + >Signed-off-by: Sunny Kumar + +BUG: 1782162 +Change-Id: I3f19d9b471b0a3327e4d094dfbefcc58ed2c34f6 +Signed-off-by: Sunny Kumar +Reviewed-on: https://code.engineering.redhat.com/gerrit/187533 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + geo-replication/syncdaemon/gsyncdconfig.py | 23 ++++++++++++++++++----- + tests/00-geo-rep/georep-basic-dr-rsync.t | 3 +++ + 2 files changed, 21 insertions(+), 5 deletions(-) + +diff --git a/geo-replication/syncdaemon/gsyncdconfig.py b/geo-replication/syncdaemon/gsyncdconfig.py +index f823311..8848071 100644 +--- a/geo-replication/syncdaemon/gsyncdconfig.py ++++ b/geo-replication/syncdaemon/gsyncdconfig.py +@@ -329,6 +329,9 @@ class Gconf(object): + if item["validation"] == "unixtime": + return validate_unixtime(value) + ++ if item["validation"] == "int": ++ return validate_int(value) ++ + return False + + def _is_config_changed(self): +@@ -381,6 +384,14 @@ def config_upgrade(config_file, ret): + config.write(configfile) + + ++def validate_int(value): ++ try: ++ _ = int(value) ++ return True ++ except ValueError: ++ return False ++ ++ + def validate_unixtime(value): + try: + y = datetime.fromtimestamp(int(value)).strftime("%Y") +@@ -393,11 +404,13 @@ def validate_unixtime(value): + + + def validate_minmax(value, minval, maxval): +- value = int(value) +- minval = int(minval) +- maxval = int(maxval) +- +- return value >= minval and value <= maxval ++ try: ++ value = int(value) ++ minval = int(minval) ++ maxval = int(maxval) ++ return value >= minval and value <= maxval ++ except ValueError: ++ return False + + + def validate_choice(value, allowed_values): +diff --git a/tests/00-geo-rep/georep-basic-dr-rsync.t b/tests/00-geo-rep/georep-basic-dr-rsync.t +index b432635..b6fbf18 100644 +--- a/tests/00-geo-rep/georep-basic-dr-rsync.t ++++ b/tests/00-geo-rep/georep-basic-dr-rsync.t +@@ -71,6 +71,9 @@ EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_status_num_rows "Created" + #Config gluster-command-dir + TEST $GEOREP_CLI $master $slave config gluster-command-dir ${GLUSTER_CMD_DIR} + ++#Config Set ssh-port to validate int validation ++TEST $GEOREP_CLI $master $slave config ssh-port 22 ++ + #Config gluster-command-dir + TEST $GEOREP_CLI $master $slave config slave-gluster-command-dir ${GLUSTER_CMD_DIR} + +-- +1.8.3.1 + diff --git a/SOURCES/0340-rpc-event_slot_alloc-converted-infinite-loop-after-r.patch b/SOURCES/0340-rpc-event_slot_alloc-converted-infinite-loop-after-r.patch new file mode 100644 index 0000000..54b2706 --- /dev/null +++ b/SOURCES/0340-rpc-event_slot_alloc-converted-infinite-loop-after-r.patch @@ -0,0 +1,46 @@ +From 0c996d6c40c625f8a0ee6be2c220c89aaf70c840 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Tue, 10 Dec 2019 08:35:23 +0530 +Subject: [PATCH 340/344] rpc: event_slot_alloc converted infinite loop after + reach slot_used to 1024 + +Problem: In the commit faf5ac13c4ee00a05e9451bf8da3be2a9043bbf2 missed one + condition to come out from the loop so after reach the slot_used to + 1024 loop has become infinite loop + +Solution: Correct the code path to avoid the infinite loop + +> Change-Id: Ia02a109571f0d8cc9902c32db3e9b9282ee5c1db +> Fixes: bz#1781440 +> Credits: Xavi Hernandez +> Signed-off-by: Mohit Agrawal +> (Cherry picked from commit 8030f9c0f092170ceb50cedf59b9c330022825b7) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23843/) + +Change-Id: Ia02a109571f0d8cc9902c32db3e9b9282ee5c1db +BUG: 1781444 +Credits: Xavi Hernandez +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/187460 +Tested-by: RHGS Build Bot +Reviewed-by: Xavi Hernandez Juan +--- + libglusterfs/src/event-epoll.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/libglusterfs/src/event-epoll.c b/libglusterfs/src/event-epoll.c +index 65f5efd..5afb2f2 100644 +--- a/libglusterfs/src/event-epoll.c ++++ b/libglusterfs/src/event-epoll.c +@@ -92,7 +92,7 @@ retry: + while (i < EVENT_EPOLL_TABLES) { + switch (event_pool->slots_used[i]) { + case EVENT_EPOLL_SLOTS: +- continue; ++ break; + case 0: + if (!event_pool->ereg[i]) { + table = __event_newtable(event_pool, i); +-- +1.8.3.1 + diff --git a/SOURCES/0341-socket-fix-error-handling.patch b/SOURCES/0341-socket-fix-error-handling.patch new file mode 100644 index 0000000..0eb68d1 --- /dev/null +++ b/SOURCES/0341-socket-fix-error-handling.patch @@ -0,0 +1,742 @@ +From 2c99b7db00a6238fd43053dd672c8ce519d8fd27 Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez +Date: Wed, 11 Dec 2019 18:21:14 +0100 +Subject: [PATCH 341/344] socket: fix error handling + +When __socket_proto_state_machine() detected a problem in the size of +the request or it couldn't allocate an iobuf of the requested size, it +returned -ENOMEM (-12). However the caller was expecting only -1 in +case of error. For this reason the error passes undetected initially, +adding back the socket to the epoll object. On further processing, +however, the error is finally detected and the connection terminated. +Meanwhile, another thread could receive a poll_in event from the same +connection, which could cause races with the connection destruction. +When this happened, the process crashed. + +To fix this, all error detection conditions have been hardened to be +more strict on what is valid and what not. Also, we don't return +-ENOMEM anymore. We always return -1 in case of error. + +An additional change has been done to prevent destruction of the +transport object while it may still be needed. + +Upstream patch: +> Change-Id: I6e59cd81cbf670f7adfdde942625d4e6c3fbc82d +> Upstream patch link: https://review.gluster.org/c/glusterfs/+/23861 +> Fixes: bz#1782495 +> Signed-off-by: Xavi Hernandez + +Change-Id: I6e59cd81cbf670f7adfdde942625d4e6c3fbc82d +BUG: 1779696 +Signed-off-by: Xavi Hernandez +Reviewed-on: https://code.engineering.redhat.com/gerrit/187689 +Tested-by: RHGS Build Bot +Reviewed-by: Raghavendra Gowdappa +--- + rpc/rpc-transport/socket/src/socket.c | 173 ++++++++++++++++++---------------- + 1 file changed, 90 insertions(+), 83 deletions(-) + +diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c +index bf2fa71..f54ca83 100644 +--- a/rpc/rpc-transport/socket/src/socket.c ++++ b/rpc/rpc-transport/socket/src/socket.c +@@ -173,7 +173,7 @@ ssl_setup_connection_params(rpc_transport_t *this); + \ + ret = __socket_readv(this, in->pending_vector, 1, &in->pending_vector, \ + &in->pending_count, &bytes_read); \ +- if (ret == -1) \ ++ if (ret < 0) \ + break; \ + __socket_proto_update_priv_after_read(priv, ret, bytes_read); \ + } +@@ -739,7 +739,7 @@ __socket_rwv(rpc_transport_t *this, struct iovec *vector, int count, + ret = sys_writev(sock, opvector, IOV_MIN(opcount)); + } + +- if (ret == 0 || (ret == -1 && errno == EAGAIN)) { ++ if ((ret == 0) || ((ret < 0) && (errno == EAGAIN))) { + /* done for now */ + break; + } else if (ret > 0) +@@ -754,7 +754,7 @@ __socket_rwv(rpc_transport_t *this, struct iovec *vector, int count, + errno = ENODATA; + ret = -1; + } +- if (ret == -1 && errno == EAGAIN) { ++ if ((ret < 0) && (errno == EAGAIN)) { + /* done for now */ + break; + } else if (ret > 0) +@@ -770,7 +770,7 @@ __socket_rwv(rpc_transport_t *this, struct iovec *vector, int count, + errno = ENOTCONN; + break; + } +- if (ret == -1) { ++ if (ret < 0) { + if (errno == EINTR) + continue; + +@@ -907,7 +907,7 @@ __socket_disconnect(rpc_transport_t *this) + gf_log(this->name, GF_LOG_TRACE, "disconnecting %p, sock=%d", this, + priv->sock); + +- if (priv->sock != -1) { ++ if (priv->sock >= 0) { + gf_log_callingfn(this->name, GF_LOG_TRACE, + "tearing down socket connection"); + ret = __socket_teardown_connection(this); +@@ -942,7 +942,7 @@ __socket_server_bind(rpc_transport_t *this) + + ret = setsockopt(priv->sock, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)); + +- if (ret == -1) { ++ if (ret != 0) { + gf_log(this->name, GF_LOG_ERROR, + "setsockopt() for SO_REUSEADDR failed (%s)", strerror(errno)); + } +@@ -955,7 +955,7 @@ __socket_server_bind(rpc_transport_t *this) + if (reuse_check_sock >= 0) { + ret = connect(reuse_check_sock, SA(&unix_addr), + this->myinfo.sockaddr_len); +- if ((ret == -1) && (ECONNREFUSED == errno)) { ++ if ((ret != 0) && (ECONNREFUSED == errno)) { + sys_unlink(((struct sockaddr_un *)&unix_addr)->sun_path); + } + gf_log(this->name, GF_LOG_INFO, +@@ -967,7 +967,7 @@ __socket_server_bind(rpc_transport_t *this) + ret = bind(priv->sock, (struct sockaddr *)&this->myinfo.sockaddr, + this->myinfo.sockaddr_len); + +- if (ret == -1) { ++ if (ret != 0) { + gf_log(this->name, GF_LOG_ERROR, "binding to %s failed: %s", + this->myinfo.identifier, strerror(errno)); + if (errno == EADDRINUSE) { +@@ -976,7 +976,7 @@ __socket_server_bind(rpc_transport_t *this) + } + if (AF_UNIX != SA(&this->myinfo.sockaddr)->sa_family) { + if (getsockname(priv->sock, SA(&this->myinfo.sockaddr), +- &this->myinfo.sockaddr_len) == -1) { ++ &this->myinfo.sockaddr_len) != 0) { + gf_log(this->name, GF_LOG_WARNING, + "getsockname on (%d) failed (%s)", priv->sock, + strerror(errno)); +@@ -1004,7 +1004,7 @@ __socket_nonblock(int fd) + + flags = fcntl(fd, F_GETFL); + +- if (flags != -1) ++ if (flags >= 0) + ret = fcntl(fd, F_SETFL, flags | O_NONBLOCK); + + return ret; +@@ -1034,7 +1034,7 @@ __socket_keepalive(int fd, int family, int keepaliveintvl, int keepaliveidle, + #endif + + ret = setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on)); +- if (ret == -1) { ++ if (ret != 0) { + gf_log("socket", GF_LOG_WARNING, + "failed to set keep alive option on socket %d", fd); + goto err; +@@ -1051,7 +1051,7 @@ __socket_keepalive(int fd, int family, int keepaliveintvl, int keepaliveidle, + ret = setsockopt(fd, IPPROTO_TCP, TCP_KEEPALIVE, &keepaliveintvl, + sizeof(keepaliveintvl)); + #endif +- if (ret == -1) { ++ if (ret != 0) { + gf_log("socket", GF_LOG_WARNING, + "failed to set keep alive interval on socket %d", fd); + goto err; +@@ -1062,7 +1062,7 @@ __socket_keepalive(int fd, int family, int keepaliveintvl, int keepaliveidle, + + ret = setsockopt(fd, IPPROTO_TCP, TCP_KEEPIDLE, &keepaliveidle, + sizeof(keepaliveidle)); +- if (ret == -1) { ++ if (ret != 0) { + gf_log("socket", GF_LOG_WARNING, + "failed to set keep idle %d on socket %d, %s", keepaliveidle, fd, + strerror(errno)); +@@ -1070,7 +1070,7 @@ __socket_keepalive(int fd, int family, int keepaliveintvl, int keepaliveidle, + } + ret = setsockopt(fd, IPPROTO_TCP, TCP_KEEPINTVL, &keepaliveintvl, + sizeof(keepaliveintvl)); +- if (ret == -1) { ++ if (ret != 0) { + gf_log("socket", GF_LOG_WARNING, + "failed to set keep interval %d on socket %d, %s", + keepaliveintvl, fd, strerror(errno)); +@@ -1082,7 +1082,7 @@ __socket_keepalive(int fd, int family, int keepaliveintvl, int keepaliveidle, + goto done; + ret = setsockopt(fd, IPPROTO_TCP, TCP_USER_TIMEOUT, &timeout_ms, + sizeof(timeout_ms)); +- if (ret == -1) { ++ if (ret != 0) { + gf_log("socket", GF_LOG_WARNING, + "failed to set " + "TCP_USER_TIMEOUT %d on socket %d, %s", +@@ -1093,7 +1093,7 @@ __socket_keepalive(int fd, int family, int keepaliveintvl, int keepaliveidle, + #if defined(TCP_KEEPCNT) + ret = setsockopt(fd, IPPROTO_TCP, TCP_KEEPCNT, &keepalivecnt, + sizeof(keepalivecnt)); +- if (ret == -1) { ++ if (ret != 0) { + gf_log("socket", GF_LOG_WARNING, + "failed to set " + "TCP_KEEPCNT %d on socket %d, %s", +@@ -1366,7 +1366,7 @@ socket_event_poll_err(rpc_transport_t *this, int gen, int idx) + + pthread_mutex_lock(&priv->out_lock); + { +- if ((priv->gen == gen) && (priv->idx == idx) && (priv->sock != -1)) { ++ if ((priv->gen == gen) && (priv->idx == idx) && (priv->sock >= 0)) { + __socket_ioq_flush(this); + __socket_reset(this); + socket_closed = _gf_true; +@@ -1405,7 +1405,7 @@ socket_event_poll_out(rpc_transport_t *this) + if (priv->connected == 1) { + ret = __socket_ioq_churn(this); + +- if (ret == -1) { ++ if (ret < 0) { + gf_log(this->name, GF_LOG_TRACE, + "__socket_ioq_churn returned -1; " + "disconnecting socket"); +@@ -1463,7 +1463,7 @@ __socket_read_simple_msg(rpc_transport_t *this) + &bytes_read); + } + +- if (ret == -1) { ++ if (ret < 0) { + gf_log(this->name, GF_LOG_WARNING, + "reading from socket failed. Error (%s), " + "peer (%s)", +@@ -1661,8 +1661,8 @@ __socket_read_vectored_request(rpc_transport_t *this, + + remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read; + +- if ((ret == -1) || ((ret == 0) && (remaining_size == 0) && +- RPC_LASTFRAG(in->fraghdr))) { ++ if ((ret < 0) || ((ret == 0) && (remaining_size == 0) && ++ RPC_LASTFRAG(in->fraghdr))) { + request->vector_state = SP_STATE_VECTORED_REQUEST_INIT; + in->payload_vector.iov_len = ((unsigned long)frag->fragcurrent - + (unsigned long) +@@ -1739,8 +1739,8 @@ __socket_read_request(rpc_transport_t *this) + + remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read; + +- if ((ret == -1) || ((ret == 0) && (remaining_size == 0) && +- (RPC_LASTFRAG(in->fraghdr)))) { ++ if ((ret < 0) || ((ret == 0) && (remaining_size == 0) && ++ (RPC_LASTFRAG(in->fraghdr)))) { + request->header_state = SP_STATE_REQUEST_HEADER_INIT; + } + +@@ -1870,8 +1870,8 @@ __socket_read_accepted_successful_reply(rpc_transport_t *this) + /* now read the entire remaining msg into new iobuf */ + ret = __socket_read_simple_msg(this); + remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read; +- if ((ret == -1) || ((ret == 0) && (remaining_size == 0) && +- RPC_LASTFRAG(in->fraghdr))) { ++ if ((ret < 0) || ((ret == 0) && (remaining_size == 0) && ++ RPC_LASTFRAG(in->fraghdr))) { + frag->call_body.reply.accepted_success_state = + SP_STATE_ACCEPTED_SUCCESS_REPLY_INIT; + } +@@ -2003,8 +2003,8 @@ __socket_read_accepted_successful_reply_v2(rpc_transport_t *this) + /* now read the entire remaining msg into new iobuf */ + ret = __socket_read_simple_msg(this); + remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read; +- if ((ret == -1) || ((ret == 0) && (remaining_size == 0) && +- RPC_LASTFRAG(in->fraghdr))) { ++ if ((ret < 0) || ((ret == 0) && (remaining_size == 0) && ++ RPC_LASTFRAG(in->fraghdr))) { + frag->call_body.reply.accepted_success_state = + SP_STATE_ACCEPTED_SUCCESS_REPLY_INIT; + } +@@ -2103,8 +2103,8 @@ __socket_read_accepted_reply(rpc_transport_t *this) + + remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read; + +- if ((ret == -1) || ((ret == 0) && (remaining_size == 0) && +- (RPC_LASTFRAG(in->fraghdr)))) { ++ if ((ret < 0) || ((ret == 0) && (remaining_size == 0) && ++ (RPC_LASTFRAG(in->fraghdr)))) { + frag->call_body.reply + .accepted_state = SP_STATE_ACCEPTED_REPLY_INIT; + } +@@ -2169,8 +2169,8 @@ __socket_read_vectored_reply(rpc_transport_t *this) + + remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read; + +- if ((ret == -1) || ((ret == 0) && (remaining_size == 0) && +- (RPC_LASTFRAG(in->fraghdr)))) { ++ if ((ret < 0) || ((ret == 0) && (remaining_size == 0) && ++ (RPC_LASTFRAG(in->fraghdr)))) { + frag->call_body.reply + .status_state = SP_STATE_VECTORED_REPLY_STATUS_INIT; + in->payload_vector.iov_len = (unsigned long)frag->fragcurrent - +@@ -2237,7 +2237,7 @@ __socket_read_reply(rpc_transport_t *this) + /* Transition back to externally visible state. */ + frag->state = SP_STATE_READ_MSGTYPE; + +- if (ret == -1) { ++ if (ret < 0) { + gf_log(this->name, GF_LOG_WARNING, + "notify for event MAP_XID failed for %s", + this->peerinfo.identifier); +@@ -2315,8 +2315,8 @@ __socket_read_frag(rpc_transport_t *this) + + remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read; + +- if ((ret == -1) || ((ret == 0) && (remaining_size == 0) && +- (RPC_LASTFRAG(in->fraghdr)))) { ++ if ((ret < 0) || ((ret == 0) && (remaining_size == 0) && ++ (RPC_LASTFRAG(in->fraghdr)))) { + /* frag->state = SP_STATE_NADA; */ + frag->state = SP_STATE_RPCFRAG_INIT; + } +@@ -2400,7 +2400,7 @@ __socket_proto_state_machine(rpc_transport_t *this, + ret = __socket_readv(this, in->pending_vector, 1, + &in->pending_vector, &in->pending_count, + NULL); +- if (ret == -1) ++ if (ret < 0) + goto out; + + if (ret > 0) { +@@ -2422,7 +2422,7 @@ __socket_proto_state_machine(rpc_transport_t *this, + in->total_bytes_read += RPC_FRAGSIZE(in->fraghdr); + + if (in->total_bytes_read >= GF_UNIT_GB) { +- ret = -ENOMEM; ++ ret = -1; + goto out; + } + +@@ -2430,7 +2430,7 @@ __socket_proto_state_machine(rpc_transport_t *this, + this->ctx->iobuf_pool, + (in->total_bytes_read + sizeof(in->fraghdr))); + if (!iobuf) { +- ret = -ENOMEM; ++ ret = -1; + goto out; + } + +@@ -2457,7 +2457,7 @@ __socket_proto_state_machine(rpc_transport_t *this, + case SP_STATE_READING_FRAG: + ret = __socket_read_frag(this); + +- if ((ret == -1) || ++ if ((ret < 0) || + (frag->bytes_read != RPC_FRAGSIZE(in->fraghdr))) { + goto out; + } +@@ -2575,7 +2575,7 @@ socket_event_poll_in(rpc_transport_t *this, gf_boolean_t notify_handled) + pthread_mutex_unlock(&priv->notify.lock); + } + +- if (notify_handled && (ret != -1)) ++ if (notify_handled && (ret >= 0)) + event_handled(ctx->event_pool, priv->sock, priv->idx, priv->gen); + + if (pollin) { +@@ -2618,10 +2618,10 @@ socket_connect_finish(rpc_transport_t *this) + + ret = __socket_connect_finish(priv->sock); + +- if (ret == -1 && errno == EINPROGRESS) ++ if ((ret < 0) && (errno == EINPROGRESS)) + ret = 1; + +- if (ret == -1 && errno != EINPROGRESS) { ++ if ((ret < 0) && (errno != EINPROGRESS)) { + if (!priv->connect_finish_log) { + gf_log(this->name, GF_LOG_ERROR, + "connection to %s failed (%s); " +@@ -2640,7 +2640,7 @@ socket_connect_finish(rpc_transport_t *this) + + ret = getsockname(priv->sock, SA(&this->myinfo.sockaddr), + &this->myinfo.sockaddr_len); +- if (ret == -1) { ++ if (ret != 0) { + gf_log(this->name, GF_LOG_WARNING, + "getsockname on (%d) failed (%s) - " + "disconnecting socket", +@@ -2924,6 +2924,13 @@ socket_event_handler(int fd, int idx, int gen, void *data, int poll_in, + return; + } + ++ /* At this point we are sure no other thread is using the transport because ++ * we cannot receive more events until we call gf_event_handled(). However ++ * this function may call gf_event_handled() in some cases. When this is ++ * done, the transport may be destroyed at any moment if another thread ++ * handled an error event. To prevent that we take a reference here. */ ++ rpc_transport_ref(this); ++ + GF_VALIDATE_OR_GOTO("socket", this, out); + GF_VALIDATE_OR_GOTO("socket", this->private, out); + GF_VALIDATE_OR_GOTO("socket", this->xl, out); +@@ -2960,7 +2967,7 @@ socket_event_handler(int fd, int idx, int gen, void *data, int poll_in, + if (ret > 0) { + gf_log(this->name, GF_LOG_TRACE, + "(sock:%d) returning to wait on socket", priv->sock); +- return; ++ goto out; + } + } else { + char *sock_type = (priv->is_server ? "Server" : "Client"); +@@ -3015,7 +3022,7 @@ socket_event_handler(int fd, int idx, int gen, void *data, int poll_in, + } + + out: +- return; ++ rpc_transport_unref(this); + } + + static void +@@ -3074,7 +3081,7 @@ socket_server_event_handler(int fd, int idx, int gen, void *data, int poll_in, + + event_handled(ctx->event_pool, fd, idx, gen); + +- if (new_sock == -1) { ++ if (new_sock < 0) { + gf_log(this->name, GF_LOG_WARNING, "accept on %d failed (%s)", + priv->sock, strerror(errno)); + goto out; +@@ -3082,7 +3089,7 @@ socket_server_event_handler(int fd, int idx, int gen, void *data, int poll_in, + + if (priv->nodelay && (new_sockaddr.ss_family != AF_UNIX)) { + ret = __socket_nodelay(new_sock); +- if (ret == -1) { ++ if (ret != 0) { + gf_log(this->name, GF_LOG_WARNING, + "setsockopt() failed for " + "NODELAY (%s)", +@@ -3094,7 +3101,7 @@ socket_server_event_handler(int fd, int idx, int gen, void *data, int poll_in, + ret = __socket_keepalive(new_sock, new_sockaddr.ss_family, + priv->keepaliveintvl, priv->keepaliveidle, + priv->keepalivecnt, priv->timeout); +- if (ret == -1) ++ if (ret != 0) + gf_log(this->name, GF_LOG_WARNING, + "Failed to set keep-alive: %s", strerror(errno)); + } +@@ -3110,7 +3117,7 @@ socket_server_event_handler(int fd, int idx, int gen, void *data, int poll_in, + } + + ret = pthread_mutex_init(&new_trans->lock, NULL); +- if (ret == -1) { ++ if (ret != 0) { + gf_log(this->name, GF_LOG_WARNING, + "pthread_mutex_init() failed: %s; closing newly accepted " + "socket %d", +@@ -3130,7 +3137,7 @@ socket_server_event_handler(int fd, int idx, int gen, void *data, int poll_in, + + ret = getsockname(new_sock, SA(&new_trans->myinfo.sockaddr), + &new_trans->myinfo.sockaddr_len); +- if (ret == -1) { ++ if (ret != 0) { + gf_log(this->name, GF_LOG_WARNING, + "getsockname on socket %d " + "failed (errno:%s); closing newly accepted socket", +@@ -3237,7 +3244,7 @@ socket_server_event_handler(int fd, int idx, int gen, void *data, int poll_in, + */ + ret = rpc_transport_notify(this, RPC_TRANSPORT_ACCEPT, new_trans); + +- if (ret != -1) { ++ if (ret >= 0) { + new_priv->idx = event_register( + ctx->event_pool, new_sock, socket_event_handler, new_trans, + 1, 0, new_trans->notify_poller_death); +@@ -3275,7 +3282,7 @@ socket_server_event_handler(int fd, int idx, int gen, void *data, int poll_in, + rpc_transport_unref(new_trans); + } + +- if (ret == -1) { ++ if (ret < 0) { + gf_log(this->name, GF_LOG_WARNING, "closing newly accepted socket"); + sys_close(new_sock); + /* this unref is to actually cause the destruction of +@@ -3406,7 +3413,7 @@ socket_connect(rpc_transport_t *this, int port) + + pthread_mutex_lock(&priv->out_lock); + { +- if (priv->sock != -1) { ++ if (priv->sock >= 0) { + gf_log_callingfn(this->name, GF_LOG_TRACE, + "connect () called on transport " + "already connected"); +@@ -3420,7 +3427,7 @@ socket_connect(rpc_transport_t *this, int port) + + ret = socket_client_get_remote_sockaddr(this, &sock_union.sa, + &sockaddr_len, &sa_family); +- if (ret == -1) { ++ if (ret < 0) { + /* logged inside client_get_remote_sockaddr */ + goto unlock; + } +@@ -3439,7 +3446,7 @@ socket_connect(rpc_transport_t *this, int port) + this->peerinfo.sockaddr_len = sockaddr_len; + + priv->sock = sys_socket(sa_family, SOCK_STREAM, 0); +- if (priv->sock == -1) { ++ if (priv->sock < 0) { + gf_log(this->name, GF_LOG_ERROR, "socket creation failed (%s)", + strerror(errno)); + ret = -1; +@@ -3451,7 +3458,7 @@ socket_connect(rpc_transport_t *this, int port) + */ + if (priv->windowsize != 0) { + if (setsockopt(priv->sock, SOL_SOCKET, SO_RCVBUF, &priv->windowsize, +- sizeof(priv->windowsize)) < 0) { ++ sizeof(priv->windowsize)) != 0) { + gf_log(this->name, GF_LOG_ERROR, + "setting receive window " + "size failed: %d: %d: %s", +@@ -3459,7 +3466,7 @@ socket_connect(rpc_transport_t *this, int port) + } + + if (setsockopt(priv->sock, SOL_SOCKET, SO_SNDBUF, &priv->windowsize, +- sizeof(priv->windowsize)) < 0) { ++ sizeof(priv->windowsize)) != 0) { + gf_log(this->name, GF_LOG_ERROR, + "setting send window size " + "failed: %d: %d: %s", +@@ -3484,7 +3491,7 @@ socket_connect(rpc_transport_t *this, int port) + if (priv->nodelay && (sa_family != AF_UNIX)) { + ret = __socket_nodelay(priv->sock); + +- if (ret == -1) { ++ if (ret != 0) { + gf_log(this->name, GF_LOG_ERROR, "NODELAY on %d failed (%s)", + priv->sock, strerror(errno)); + } +@@ -3494,7 +3501,7 @@ socket_connect(rpc_transport_t *this, int port) + ret = __socket_keepalive(priv->sock, sa_family, + priv->keepaliveintvl, priv->keepaliveidle, + priv->keepalivecnt, priv->timeout); +- if (ret == -1) ++ if (ret != 0) + gf_log(this->name, GF_LOG_ERROR, "Failed to set keep-alive: %s", + strerror(errno)); + } +@@ -3516,7 +3523,7 @@ socket_connect(rpc_transport_t *this, int port) + + ret = client_bind(this, SA(&this->myinfo.sockaddr), + &this->myinfo.sockaddr_len, priv->sock); +- if (ret == -1) { ++ if (ret < 0) { + gf_log(this->name, GF_LOG_WARNING, "client bind failed: %s", + strerror(errno)); + goto handler; +@@ -3525,7 +3532,7 @@ socket_connect(rpc_transport_t *this, int port) + /* make socket non-blocking for all types of sockets */ + if (!priv->bio) { + ret = __socket_nonblock(priv->sock); +- if (ret == -1) { ++ if (ret != 0) { + gf_log(this->name, GF_LOG_ERROR, "NBIO on %d failed (%s)", + priv->sock, strerror(errno)); + goto handler; +@@ -3552,7 +3559,7 @@ socket_connect(rpc_transport_t *this, int port) + + connect_attempted = _gf_true; + +- if (ret == -1 && errno == ENOENT && ign_enoent) { ++ if ((ret != 0) && (errno == ENOENT) && ign_enoent) { + gf_log(this->name, GF_LOG_WARNING, + "Ignore failed connection attempt on %s, (%s) ", + this->peerinfo.identifier, strerror(errno)); +@@ -3570,7 +3577,7 @@ socket_connect(rpc_transport_t *this, int port) + goto handler; + } + +- if (ret == -1 && ((errno != EINPROGRESS) && (errno != ENOENT))) { ++ if ((ret != 0) && (errno != EINPROGRESS) && (errno != ENOENT)) { + /* For unix path based sockets, the socket path is + * cryptic (md5sum of path) and may not be useful for + * the user in debugging so log it in DEBUG +@@ -3634,8 +3641,8 @@ socket_connect(rpc_transport_t *this, int port) + pthread_mutex_unlock(&priv->out_lock); + + err: +- /* if sock != -1, then cleanup is done from the event handler */ +- if (ret == -1 && sock == -1) { ++ /* if sock >= 0, then cleanup is done from the event handler */ ++ if ((ret < 0) && (sock < 0)) { + /* Cleaup requires to send notification to upper layer which + intern holds the big_lock. There can be dead-lock situation + if big_lock is already held by the current thread. +@@ -3689,20 +3696,20 @@ socket_listen(rpc_transport_t *this) + } + pthread_mutex_unlock(&priv->out_lock); + +- if (sock != -1) { ++ if (sock >= 0) { + gf_log_callingfn(this->name, GF_LOG_DEBUG, "already listening"); + return ret; + } + + ret = socket_server_get_local_sockaddr(this, SA(&sockaddr), &sockaddr_len, + &sa_family); +- if (ret == -1) { ++ if (ret < 0) { + return ret; + } + + pthread_mutex_lock(&priv->out_lock); + { +- if (priv->sock != -1) { ++ if (priv->sock >= 0) { + gf_log(this->name, GF_LOG_DEBUG, "already listening"); + goto unlock; + } +@@ -3712,7 +3719,7 @@ socket_listen(rpc_transport_t *this) + + priv->sock = sys_socket(sa_family, SOCK_STREAM, 0); + +- if (priv->sock == -1) { ++ if (priv->sock < 0) { + gf_log(this->name, GF_LOG_ERROR, "socket creation failed (%s)", + strerror(errno)); + goto unlock; +@@ -3723,7 +3730,7 @@ socket_listen(rpc_transport_t *this) + */ + if (priv->windowsize != 0) { + if (setsockopt(priv->sock, SOL_SOCKET, SO_RCVBUF, &priv->windowsize, +- sizeof(priv->windowsize)) < 0) { ++ sizeof(priv->windowsize)) != 0) { + gf_log(this->name, GF_LOG_ERROR, + "setting receive window size " + "failed: %d: %d: %s", +@@ -3731,7 +3738,7 @@ socket_listen(rpc_transport_t *this) + } + + if (setsockopt(priv->sock, SOL_SOCKET, SO_SNDBUF, &priv->windowsize, +- sizeof(priv->windowsize)) < 0) { ++ sizeof(priv->windowsize)) != 0) { + gf_log(this->name, GF_LOG_ERROR, + "setting send window size failed:" + " %d: %d: %s", +@@ -3741,7 +3748,7 @@ socket_listen(rpc_transport_t *this) + + if (priv->nodelay && (sa_family != AF_UNIX)) { + ret = __socket_nodelay(priv->sock); +- if (ret == -1) { ++ if (ret != 0) { + gf_log(this->name, GF_LOG_ERROR, + "setsockopt() failed for NODELAY (%s)", strerror(errno)); + } +@@ -3750,7 +3757,7 @@ socket_listen(rpc_transport_t *this) + if (!priv->bio) { + ret = __socket_nonblock(priv->sock); + +- if (ret == -1) { ++ if (ret != 0) { + gf_log(this->name, GF_LOG_ERROR, + "NBIO on socket %d failed " + "(errno:%s); closing socket", +@@ -3763,7 +3770,7 @@ socket_listen(rpc_transport_t *this) + + ret = __socket_server_bind(this); + +- if ((ret == -EADDRINUSE) || (ret == -1)) { ++ if (ret < 0) { + /* logged inside __socket_server_bind() */ + gf_log(this->name, GF_LOG_ERROR, + "__socket_server_bind failed;" +@@ -3779,7 +3786,7 @@ socket_listen(rpc_transport_t *this) + + ret = listen(priv->sock, priv->backlog); + +- if (ret == -1) { ++ if (ret != 0) { + gf_log(this->name, GF_LOG_ERROR, + "could not set socket %d to listen mode (errno:%s); " + "closing socket", +@@ -4025,7 +4032,7 @@ reconfigure(rpc_transport_t *this, dict_t *options) + priv = this->private; + + if (dict_get_str(options, "transport.socket.keepalive", &optstr) == 0) { +- if (gf_string2boolean(optstr, &tmp_bool) == -1) { ++ if (gf_string2boolean(optstr, &tmp_bool) != 0) { + gf_log(this->name, GF_LOG_ERROR, + "'transport.socket.keepalive' takes only " + "boolean options, not taking any action"); +@@ -4094,7 +4101,7 @@ reconfigure(rpc_transport_t *this, dict_t *options) + if (dict_get(options, "non-blocking-io")) { + optstr = data_to_str(dict_get(options, "non-blocking-io")); + +- if (gf_string2boolean(optstr, &tmp_bool) == -1) { ++ if (gf_string2boolean(optstr, &tmp_bool) != 0) { + gf_log(this->name, GF_LOG_ERROR, + "'non-blocking-io' takes only boolean options," + " not taking any action"); +@@ -4109,7 +4116,7 @@ reconfigure(rpc_transport_t *this, dict_t *options) + + if (!priv->bio) { + ret = __socket_nonblock(priv->sock); +- if (ret == -1) { ++ if (ret != 0) { + gf_log(this->name, GF_LOG_WARNING, "NBIO on %d failed (%s)", + priv->sock, strerror(errno)); + goto out; +@@ -4508,7 +4515,7 @@ socket_init(rpc_transport_t *this) + if (dict_get(this->options, "non-blocking-io")) { + optstr = data_to_str(dict_get(this->options, "non-blocking-io")); + +- if (gf_string2boolean(optstr, &tmp_bool) == -1) { ++ if (gf_string2boolean(optstr, &tmp_bool) != 0) { + gf_log(this->name, GF_LOG_ERROR, + "'non-blocking-io' takes only boolean options," + " not taking any action"); +@@ -4528,7 +4535,7 @@ socket_init(rpc_transport_t *this) + optstr = data_to_str( + dict_get(this->options, "transport.socket.nodelay")); + +- if (gf_string2boolean(optstr, &tmp_bool) == -1) { ++ if (gf_string2boolean(optstr, &tmp_bool) != 0) { + gf_log(this->name, GF_LOG_ERROR, + "'transport.socket.nodelay' takes only " + "boolean options, not taking any action"); +@@ -4559,7 +4566,7 @@ socket_init(rpc_transport_t *this) + priv->keepalivecnt = GF_KEEPALIVE_COUNT; + if (dict_get_str(this->options, "transport.socket.keepalive", &optstr) == + 0) { +- if (gf_string2boolean(optstr, &tmp_bool) == -1) { ++ if (gf_string2boolean(optstr, &tmp_bool) != 0) { + gf_log(this->name, GF_LOG_ERROR, + "'transport.socket.keepalive' takes only " + "boolean options, not taking any action"); +@@ -4609,7 +4616,7 @@ socket_init(rpc_transport_t *this) + if (dict_get(this->options, "transport.socket.read-fail-log")) { + optstr = data_to_str( + dict_get(this->options, "transport.socket.read-fail-log")); +- if (gf_string2boolean(optstr, &tmp_bool) == -1) { ++ if (gf_string2boolean(optstr, &tmp_bool) != 0) { + gf_log(this->name, GF_LOG_WARNING, + "'transport.socket.read-fail-log' takes only " + "boolean options; logging socket read fails"); +@@ -4646,7 +4653,7 @@ fini(rpc_transport_t *this) + + priv = this->private; + if (priv) { +- if (priv->sock != -1) { ++ if (priv->sock >= 0) { + pthread_mutex_lock(&priv->out_lock); + { + __socket_ioq_flush(this); +@@ -4683,7 +4690,7 @@ init(rpc_transport_t *this) + + ret = socket_init(this); + +- if (ret == -1) { ++ if (ret < 0) { + gf_log(this->name, GF_LOG_DEBUG, "socket_init() failed"); + } + +-- +1.8.3.1 + diff --git a/SOURCES/0342-Revert-hooks-remove-selinux-hooks.patch b/SOURCES/0342-Revert-hooks-remove-selinux-hooks.patch new file mode 100644 index 0000000..028a227 --- /dev/null +++ b/SOURCES/0342-Revert-hooks-remove-selinux-hooks.patch @@ -0,0 +1,120 @@ +From eb37a3b57415d2d4206ecdd2db10530366a0d1b1 Mon Sep 17 00:00:00 2001 +From: Anoop C S +Date: Fri, 13 Dec 2019 15:20:27 +0530 +Subject: [PATCH 342/344] Revert "hooks: remove selinux hooks" + +This reverts commit 421743b7cfa6a249544f6abb4cca5a612bd20ea1. + +Note:- We are not bringing back features.selinux but just the hooks for + setting SELinux context on bricks + +Label: DOWNSTREAM ONLY + +Change-Id: Iccc10428361cac59b294e1d7aa1ba8187c20029e +BUG: 1686800 +Signed-off-by: Anoop C S +Reviewed-on: https://code.engineering.redhat.com/gerrit/187691 +Tested-by: RHGS Build Bot +Reviewed-by: Niels de Vos +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + configure.ac | 4 ++++ + extras/hook-scripts/Makefile.am | 2 +- + extras/hook-scripts/create/Makefile.am | 1 + + extras/hook-scripts/create/post/Makefile.am | 6 ++++++ + extras/hook-scripts/delete/Makefile.am | 1 + + extras/hook-scripts/delete/pre/Makefile.am | 6 ++++++ + glusterfs.spec.in | 2 ++ + 7 files changed, 21 insertions(+), 1 deletion(-) + create mode 100644 extras/hook-scripts/create/Makefile.am + create mode 100644 extras/hook-scripts/create/post/Makefile.am + create mode 100644 extras/hook-scripts/delete/Makefile.am + create mode 100644 extras/hook-scripts/delete/pre/Makefile.am + +diff --git a/configure.ac b/configure.ac +index 327733e..98ee311 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -221,6 +221,10 @@ AC_CONFIG_FILES([Makefile + extras/hook-scripts/add-brick/Makefile + extras/hook-scripts/add-brick/pre/Makefile + extras/hook-scripts/add-brick/post/Makefile ++ extras/hook-scripts/create/Makefile ++ extras/hook-scripts/create/post/Makefile ++ extras/hook-scripts/delete/Makefile ++ extras/hook-scripts/delete/pre/Makefile + extras/hook-scripts/start/Makefile + extras/hook-scripts/start/post/Makefile + extras/hook-scripts/set/Makefile +diff --git a/extras/hook-scripts/Makefile.am b/extras/hook-scripts/Makefile.am +index 771b37e..26059d7 100644 +--- a/extras/hook-scripts/Makefile.am ++++ b/extras/hook-scripts/Makefile.am +@@ -1,5 +1,5 @@ + EXTRA_DIST = S40ufo-stop.py S56glusterd-geo-rep-create-post.sh +-SUBDIRS = add-brick set start stop reset ++SUBDIRS = add-brick create delete set start stop reset + + scriptsdir = $(GLUSTERD_WORKDIR)/hooks/1/gsync-create/post/ + if USE_GEOREP +diff --git a/extras/hook-scripts/create/Makefile.am b/extras/hook-scripts/create/Makefile.am +new file mode 100644 +index 0000000..b083a91 +--- /dev/null ++++ b/extras/hook-scripts/create/Makefile.am +@@ -0,0 +1 @@ ++SUBDIRS = post +diff --git a/extras/hook-scripts/create/post/Makefile.am b/extras/hook-scripts/create/post/Makefile.am +new file mode 100644 +index 0000000..919801a +--- /dev/null ++++ b/extras/hook-scripts/create/post/Makefile.am +@@ -0,0 +1,6 @@ ++EXTRA_DIST = S10selinux-label-brick.sh ++ ++scriptsdir = $(GLUSTERD_WORKDIR)/hooks/1/create/post/ ++if WITH_SERVER ++scripts_SCRIPTS = S10selinux-label-brick.sh ++endif +diff --git a/extras/hook-scripts/delete/Makefile.am b/extras/hook-scripts/delete/Makefile.am +new file mode 100644 +index 0000000..c98a05d +--- /dev/null ++++ b/extras/hook-scripts/delete/Makefile.am +@@ -0,0 +1 @@ ++SUBDIRS = pre +diff --git a/extras/hook-scripts/delete/pre/Makefile.am b/extras/hook-scripts/delete/pre/Makefile.am +new file mode 100644 +index 0000000..93a6b85 +--- /dev/null ++++ b/extras/hook-scripts/delete/pre/Makefile.am +@@ -0,0 +1,6 @@ ++EXTRA_DIST = S10selinux-del-fcontext.sh ++ ++scriptsdir = $(GLUSTERD_WORKDIR)/hooks/1/delete/pre/ ++if WITH_SERVER ++scripts_SCRIPTS = S10selinux-del-fcontext.sh ++endif +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 1b975b2..012989a 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -1453,6 +1453,7 @@ exit 0 + %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/pre/S28Quota-enable-root-xattr-heal.sh + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create/post ++ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create/post/S10selinux-label-brick.sh + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create/pre + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/copy-file + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/copy-file/post +@@ -1461,6 +1462,7 @@ exit 0 + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete/post + %{_sharedstatedir}/glusterd/hooks/1/delete/post/S57glusterfind-delete-post + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete/pre ++ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete/pre/S10selinux-del-fcontext.sh + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/remove-brick + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/remove-brick/post + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/remove-brick/pre +-- +1.8.3.1 + diff --git a/SOURCES/0343-extras-hooks-syntactical-errors-in-SELinux-hooks-sci.patch b/SOURCES/0343-extras-hooks-syntactical-errors-in-SELinux-hooks-sci.patch new file mode 100644 index 0000000..77d2f64 --- /dev/null +++ b/SOURCES/0343-extras-hooks-syntactical-errors-in-SELinux-hooks-sci.patch @@ -0,0 +1,155 @@ +From 8a8c508b529f7609fc5caa10bc79ba817f5d274a Mon Sep 17 00:00:00 2001 +From: Milan Zink +Date: Mon, 5 Feb 2018 15:04:37 +0100 +Subject: [PATCH 343/344] extras/hooks: syntactical errors in SELinux hooks, + scipt logic improved + +Backport of https://review.gluster.org/c/glusterfs/+/19502 + +Change-Id: Ia5fa1df81bbaec3a84653d136a331c76b457f42c +BUG: 1686800 +Signed-off-by: Anoop C S +Reviewed-on: https://code.engineering.redhat.com/gerrit/187692 +Tested-by: RHGS Build Bot +Reviewed-by: Niels de Vos +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + .../create/post/S10selinux-label-brick.sh | 13 +++-- + .../delete/pre/S10selinux-del-fcontext.sh | 60 +++++++++++++--------- + tests/bugs/glusterfs-server/bug-877992.t | 4 +- + 3 files changed, 46 insertions(+), 31 deletions(-) + +diff --git a/extras/hook-scripts/create/post/S10selinux-label-brick.sh b/extras/hook-scripts/create/post/S10selinux-label-brick.sh +index de242d2..f9b4b1a 100755 +--- a/extras/hook-scripts/create/post/S10selinux-label-brick.sh ++++ b/extras/hook-scripts/create/post/S10selinux-label-brick.sh +@@ -34,18 +34,21 @@ parse_args () { + + set_brick_labels() + { +- volname=${1} ++ volname="${1}" + + # grab the path for each local brick +- brickpath="/var/lib/glusterd/vols/${volname}/bricks/*" +- brickdirs=$(grep '^path=' "${brickpath}" | cut -d= -f 2 | sort -u) ++ brickpath="/var/lib/glusterd/vols/${volname}/bricks/" ++ brickdirs=$( ++ find "${brickpath}" -type f -exec grep '^path=' {} \; | \ ++ cut -d= -f 2 | \ ++ sort -u ++ ) + + for b in ${brickdirs}; do + # Add a file context for each brick path and associate with the + # glusterd_brick_t SELinux type. +- pattern="${b}\(/.*\)?" ++ pattern="${b}(/.*)?" + semanage fcontext --add -t glusterd_brick_t -r s0 "${pattern}" +- + # Set the labels on the new brick path. + restorecon -R "${b}" + done +diff --git a/extras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh b/extras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh +index 6eba66f..e7f4e8f 100755 +--- a/extras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh ++++ b/extras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh +@@ -15,45 +15,55 @@ OPTSPEC="volname:" + VOL= + + function parse_args () { +- ARGS=$(getopt -o '' -l $OPTSPEC -n $PROGNAME -- "$@") +- eval set -- "$ARGS" +- +- while true; do +- case $1 in +- --volname) +- shift +- VOL=$1 +- ;; +- *) +- shift +- break +- ;; +- esac ++ ARGS=$(getopt -o '' -l ${OPTSPEC} -n ${PROGNAME} -- "$@") ++ eval set -- "${ARGS}" ++ ++ while true; do ++ case ${1} in ++ --volname) ++ shift ++ VOL=${1} ++ ;; ++ *) + shift +- done ++ break ++ ;; ++ esac ++ shift ++ done + } + + function delete_brick_fcontext() + { +- volname=$1 ++ volname="${1}" ++ ++ # grab the path for each local brick ++ brickpath="/var/lib/glusterd/vols/${volname}/bricks/" ++ brickdirs=$( ++ find "${brickpath}" -type f -exec grep '^path=' {} \; | \ ++ cut -d= -f 2 | \ ++ sort -u ++ ) ++ ++ for b in ${brickdirs} ++ do ++ # remove the file context associated with the brick path ++ pattern="${b}(/.*)?" ++ semanage fcontext --delete "${pattern}" + +- # grab the path for each local brick +- brickdirs=$(grep '^path=' /var/lib/glusterd/vols/${volname}/bricks/* | cut -d= -f 2) ++ # remove the labels on brick path. ++ restorecon -R "${b}" ++ done + +- for b in $brickdirs +- do +- # remove the file context associated with the brick path +- semanage fcontext --delete $b\(/.*\)? +- done + } + + SELINUX_STATE=$(which getenforce && getenforce) + [ "${SELINUX_STATE}" = 'Disabled' ] && exit 0 + + parse_args "$@" +-[ -z "$VOL" ] && exit 1 ++[ -z "${VOL}" ] && exit 1 + +-delete_brick_fcontext $VOL ++delete_brick_fcontext "${VOL}" + + # failure to delete the fcontext is not fatal + exit 0 +diff --git a/tests/bugs/glusterfs-server/bug-877992.t b/tests/bugs/glusterfs-server/bug-877992.t +index aeb73ed..300000b 100755 +--- a/tests/bugs/glusterfs-server/bug-877992.t ++++ b/tests/bugs/glusterfs-server/bug-877992.t +@@ -46,7 +46,9 @@ TEST $CLI volume create $V0 $H0:$B0/${V0}1; + EXPECT "$V0" volinfo_field $V0 'Volume Name'; + EXPECT 'Created' volinfo_field $V0 'Status'; + EXPECT 'createPre' cat /tmp/pre.out; +-EXPECT 'createPost' cat /tmp/post.out; ++# Spost.sh comes after S10selinux-label-brick.sh under create post hook script ++# list. So consider the delay in setting SELinux context on bricks ++EXPECT_WITHIN 5 'createPost' cat /tmp/post.out; + hooks_cleanup 'create' + + +-- +1.8.3.1 + diff --git a/SOURCES/0344-Revert-all-fixes-to-include-SELinux-hook-scripts.patch b/SOURCES/0344-Revert-all-fixes-to-include-SELinux-hook-scripts.patch new file mode 100644 index 0000000..341aeae --- /dev/null +++ b/SOURCES/0344-Revert-all-fixes-to-include-SELinux-hook-scripts.patch @@ -0,0 +1,412 @@ +From 02a93265fe4e78e7fc3fa8c6caa773cbe02f50b6 Mon Sep 17 00:00:00 2001 +From: Anoop C S +Date: Fri, 20 Dec 2019 16:01:59 +0530 +Subject: [PATCH 344/344] Revert all fixes to include SELinux hook scripts + +Following are the reverts included with this change: + +Revert "extras/hooks: syntactical errors in SELinux hooks, scipt logic improved" +Revert "Revert "hooks: remove selinux hooks"" +Revert "tests: subdir-mount.t is failing for brick_mux regrssion" +Revert "extras/hooks: Install and package newly added post add-brick hook script" +Revert "extras/hooks: Add SELinux label on new bricks during add-brick" + +Label: DOWNSTREAM ONLY + +See bug for more details. + +Change-Id: I5c9b9e0e6446568ce16af17257fa39338198a827 +BUG: 1686800 +Signed-off-by: Anoop C S +Reviewed-on: https://code.engineering.redhat.com/gerrit/188169 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + configure.ac | 4 - + extras/hook-scripts/Makefile.am | 2 +- + extras/hook-scripts/add-brick/post/Makefile.am | 4 +- + .../add-brick/post/S10selinux-label-brick.sh | 100 --------------------- + extras/hook-scripts/create/Makefile.am | 1 - + extras/hook-scripts/create/post/Makefile.am | 6 -- + .../create/post/S10selinux-label-brick.sh | 13 ++- + extras/hook-scripts/delete/Makefile.am | 1 - + extras/hook-scripts/delete/pre/Makefile.am | 6 -- + .../delete/pre/S10selinux-del-fcontext.sh | 60 ++++++------- + glusterfs.spec.in | 3 - + tests/bugs/glusterfs-server/bug-877992.t | 4 +- + tests/features/subdir-mount.t | 11 +-- + 13 files changed, 37 insertions(+), 178 deletions(-) + delete mode 100755 extras/hook-scripts/add-brick/post/S10selinux-label-brick.sh + delete mode 100644 extras/hook-scripts/create/Makefile.am + delete mode 100644 extras/hook-scripts/create/post/Makefile.am + delete mode 100644 extras/hook-scripts/delete/Makefile.am + delete mode 100644 extras/hook-scripts/delete/pre/Makefile.am + +diff --git a/configure.ac b/configure.ac +index 98ee311..327733e 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -221,10 +221,6 @@ AC_CONFIG_FILES([Makefile + extras/hook-scripts/add-brick/Makefile + extras/hook-scripts/add-brick/pre/Makefile + extras/hook-scripts/add-brick/post/Makefile +- extras/hook-scripts/create/Makefile +- extras/hook-scripts/create/post/Makefile +- extras/hook-scripts/delete/Makefile +- extras/hook-scripts/delete/pre/Makefile + extras/hook-scripts/start/Makefile + extras/hook-scripts/start/post/Makefile + extras/hook-scripts/set/Makefile +diff --git a/extras/hook-scripts/Makefile.am b/extras/hook-scripts/Makefile.am +index 26059d7..771b37e 100644 +--- a/extras/hook-scripts/Makefile.am ++++ b/extras/hook-scripts/Makefile.am +@@ -1,5 +1,5 @@ + EXTRA_DIST = S40ufo-stop.py S56glusterd-geo-rep-create-post.sh +-SUBDIRS = add-brick create delete set start stop reset ++SUBDIRS = add-brick set start stop reset + + scriptsdir = $(GLUSTERD_WORKDIR)/hooks/1/gsync-create/post/ + if USE_GEOREP +diff --git a/extras/hook-scripts/add-brick/post/Makefile.am b/extras/hook-scripts/add-brick/post/Makefile.am +index 9b236df..bfc0c1c 100644 +--- a/extras/hook-scripts/add-brick/post/Makefile.am ++++ b/extras/hook-scripts/add-brick/post/Makefile.am +@@ -1,6 +1,6 @@ +-EXTRA_DIST = disabled-quota-root-xattr-heal.sh S10selinux-label-brick.sh S13create-subdir-mounts.sh ++EXTRA_DIST = disabled-quota-root-xattr-heal.sh S13create-subdir-mounts.sh + + hookdir = $(GLUSTERD_WORKDIR)/hooks/1/add-brick/post/ + if WITH_SERVER +-hook_SCRIPTS = disabled-quota-root-xattr-heal.sh S10selinux-label-brick.sh S13create-subdir-mounts.sh ++hook_SCRIPTS = disabled-quota-root-xattr-heal.sh S13create-subdir-mounts.sh + endif +diff --git a/extras/hook-scripts/add-brick/post/S10selinux-label-brick.sh b/extras/hook-scripts/add-brick/post/S10selinux-label-brick.sh +deleted file mode 100755 +index 4a17c99..0000000 +--- a/extras/hook-scripts/add-brick/post/S10selinux-label-brick.sh ++++ /dev/null +@@ -1,100 +0,0 @@ +-#!/bin/bash +-# +-# Install to hooks//add-brick/post +-# +-# Add an SELinux file context for each brick using the glusterd_brick_t type. +-# This ensures that the brick is relabeled correctly on an SELinux restart or +-# restore. Subsequently, run a restore on the brick path to set the selinux +-# labels. +-# +-### +- +-PROGNAME="Sselinux" +-OPTSPEC="volname:,version:,gd-workdir:,volume-op:" +-VOL= +- +-parse_args () { +- ARGS=$(getopt -o '' -l ${OPTSPEC} -n ${PROGNAME} -- "$@") +- eval set -- "${ARGS}" +- +- while true; do +- case ${1} in +- --volname) +- shift +- VOL=${1} +- ;; +- --gd-workdir) +- shift +- GLUSTERD_WORKDIR=$1 +- ;; +- --version) +- shift +- ;; +- --volume-op) +- shift +- ;; +- *) +- shift +- break +- ;; +- esac +- shift +- done +-} +- +-set_brick_labels() +-{ +- local volname="${1}" +- local fctx +- local list=() +- +- fctx="$(semanage fcontext --list -C)" +- +- # wait for new brick path to be updated under +- # ${GLUSTERD_WORKDIR}/vols/${volname}/bricks/ +- sleep 5 +- +- # grab the path for each local brick +- brickpath="${GLUSTERD_WORKDIR}/vols/${volname}/bricks/" +- brickdirs=$( +- find "${brickpath}" -type f -exec grep '^path=' {} \; | \ +- cut -d= -f 2 | \ +- sort -u +- ) +- +- # create a list of bricks for which custom SELinux +- # label doesn't exist +- for b in ${brickdirs}; do +- pattern="${b}(/.*)?" +- echo "${fctx}" | grep "^${pattern}\s" >/dev/null +- if [[ $? -ne 0 ]]; then +- list+=("${pattern}") +- fi +- done +- +- # Add a file context for each brick path in the list and associate with the +- # glusterd_brick_t SELinux type. +- for p in ${list[@]} +- do +- semanage fcontext --add -t glusterd_brick_t -r s0 "${p}" +- done +- +- # Set the labels for which SELinux label was added above +- for b in ${brickdirs} +- do +- echo "${list[@]}" | grep "${b}" >/dev/null +- if [[ $? -eq 0 ]]; then +- restorecon -R "${b}" +- fi +- done +-} +- +-SELINUX_STATE=$(which getenforce && getenforce) +-[ "${SELINUX_STATE}" = 'Disabled' ] && exit 0 +- +-parse_args "$@" +-[ -z "${VOL}" ] && exit 1 +- +-set_brick_labels "${VOL}" +- +-exit 0 +diff --git a/extras/hook-scripts/create/Makefile.am b/extras/hook-scripts/create/Makefile.am +deleted file mode 100644 +index b083a91..0000000 +--- a/extras/hook-scripts/create/Makefile.am ++++ /dev/null +@@ -1 +0,0 @@ +-SUBDIRS = post +diff --git a/extras/hook-scripts/create/post/Makefile.am b/extras/hook-scripts/create/post/Makefile.am +deleted file mode 100644 +index 919801a..0000000 +--- a/extras/hook-scripts/create/post/Makefile.am ++++ /dev/null +@@ -1,6 +0,0 @@ +-EXTRA_DIST = S10selinux-label-brick.sh +- +-scriptsdir = $(GLUSTERD_WORKDIR)/hooks/1/create/post/ +-if WITH_SERVER +-scripts_SCRIPTS = S10selinux-label-brick.sh +-endif +diff --git a/extras/hook-scripts/create/post/S10selinux-label-brick.sh b/extras/hook-scripts/create/post/S10selinux-label-brick.sh +index f9b4b1a..de242d2 100755 +--- a/extras/hook-scripts/create/post/S10selinux-label-brick.sh ++++ b/extras/hook-scripts/create/post/S10selinux-label-brick.sh +@@ -34,21 +34,18 @@ parse_args () { + + set_brick_labels() + { +- volname="${1}" ++ volname=${1} + + # grab the path for each local brick +- brickpath="/var/lib/glusterd/vols/${volname}/bricks/" +- brickdirs=$( +- find "${brickpath}" -type f -exec grep '^path=' {} \; | \ +- cut -d= -f 2 | \ +- sort -u +- ) ++ brickpath="/var/lib/glusterd/vols/${volname}/bricks/*" ++ brickdirs=$(grep '^path=' "${brickpath}" | cut -d= -f 2 | sort -u) + + for b in ${brickdirs}; do + # Add a file context for each brick path and associate with the + # glusterd_brick_t SELinux type. +- pattern="${b}(/.*)?" ++ pattern="${b}\(/.*\)?" + semanage fcontext --add -t glusterd_brick_t -r s0 "${pattern}" ++ + # Set the labels on the new brick path. + restorecon -R "${b}" + done +diff --git a/extras/hook-scripts/delete/Makefile.am b/extras/hook-scripts/delete/Makefile.am +deleted file mode 100644 +index c98a05d..0000000 +--- a/extras/hook-scripts/delete/Makefile.am ++++ /dev/null +@@ -1 +0,0 @@ +-SUBDIRS = pre +diff --git a/extras/hook-scripts/delete/pre/Makefile.am b/extras/hook-scripts/delete/pre/Makefile.am +deleted file mode 100644 +index 93a6b85..0000000 +--- a/extras/hook-scripts/delete/pre/Makefile.am ++++ /dev/null +@@ -1,6 +0,0 @@ +-EXTRA_DIST = S10selinux-del-fcontext.sh +- +-scriptsdir = $(GLUSTERD_WORKDIR)/hooks/1/delete/pre/ +-if WITH_SERVER +-scripts_SCRIPTS = S10selinux-del-fcontext.sh +-endif +diff --git a/extras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh b/extras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh +index e7f4e8f..6eba66f 100755 +--- a/extras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh ++++ b/extras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh +@@ -15,55 +15,45 @@ OPTSPEC="volname:" + VOL= + + function parse_args () { +- ARGS=$(getopt -o '' -l ${OPTSPEC} -n ${PROGNAME} -- "$@") +- eval set -- "${ARGS}" +- +- while true; do +- case ${1} in +- --volname) +- shift +- VOL=${1} +- ;; +- *) ++ ARGS=$(getopt -o '' -l $OPTSPEC -n $PROGNAME -- "$@") ++ eval set -- "$ARGS" ++ ++ while true; do ++ case $1 in ++ --volname) ++ shift ++ VOL=$1 ++ ;; ++ *) ++ shift ++ break ++ ;; ++ esac + shift +- break +- ;; +- esac +- shift +- done ++ done + } + + function delete_brick_fcontext() + { +- volname="${1}" +- +- # grab the path for each local brick +- brickpath="/var/lib/glusterd/vols/${volname}/bricks/" +- brickdirs=$( +- find "${brickpath}" -type f -exec grep '^path=' {} \; | \ +- cut -d= -f 2 | \ +- sort -u +- ) +- +- for b in ${brickdirs} +- do +- # remove the file context associated with the brick path +- pattern="${b}(/.*)?" +- semanage fcontext --delete "${pattern}" ++ volname=$1 + +- # remove the labels on brick path. +- restorecon -R "${b}" +- done ++ # grab the path for each local brick ++ brickdirs=$(grep '^path=' /var/lib/glusterd/vols/${volname}/bricks/* | cut -d= -f 2) + ++ for b in $brickdirs ++ do ++ # remove the file context associated with the brick path ++ semanage fcontext --delete $b\(/.*\)? ++ done + } + + SELINUX_STATE=$(which getenforce && getenforce) + [ "${SELINUX_STATE}" = 'Disabled' ] && exit 0 + + parse_args "$@" +-[ -z "${VOL}" ] && exit 1 ++[ -z "$VOL" ] && exit 1 + +-delete_brick_fcontext "${VOL}" ++delete_brick_fcontext $VOL + + # failure to delete the fcontext is not fatal + exit 0 +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 012989a..671ee27 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -1447,13 +1447,11 @@ exit 0 + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post + %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post/disabled-quota-root-xattr-heal.sh +- %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post/S10selinux-label-brick.sh + %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post/S13create-subdir-mounts.sh + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/pre + %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/pre/S28Quota-enable-root-xattr-heal.sh + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create/post +- %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create/post/S10selinux-label-brick.sh + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create/pre + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/copy-file + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/copy-file/post +@@ -1462,7 +1460,6 @@ exit 0 + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete/post + %{_sharedstatedir}/glusterd/hooks/1/delete/post/S57glusterfind-delete-post + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete/pre +- %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete/pre/S10selinux-del-fcontext.sh + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/remove-brick + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/remove-brick/post + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/remove-brick/pre +diff --git a/tests/bugs/glusterfs-server/bug-877992.t b/tests/bugs/glusterfs-server/bug-877992.t +index 300000b..aeb73ed 100755 +--- a/tests/bugs/glusterfs-server/bug-877992.t ++++ b/tests/bugs/glusterfs-server/bug-877992.t +@@ -46,9 +46,7 @@ TEST $CLI volume create $V0 $H0:$B0/${V0}1; + EXPECT "$V0" volinfo_field $V0 'Volume Name'; + EXPECT 'Created' volinfo_field $V0 'Status'; + EXPECT 'createPre' cat /tmp/pre.out; +-# Spost.sh comes after S10selinux-label-brick.sh under create post hook script +-# list. So consider the delay in setting SELinux context on bricks +-EXPECT_WITHIN 5 'createPost' cat /tmp/post.out; ++EXPECT 'createPost' cat /tmp/post.out; + hooks_cleanup 'create' + + +diff --git a/tests/features/subdir-mount.t b/tests/features/subdir-mount.t +index a02bd6b..8401946 100644 +--- a/tests/features/subdir-mount.t ++++ b/tests/features/subdir-mount.t +@@ -85,17 +85,12 @@ TEST $CLI volume start $V0 + TEST $GFS --subdir-mount /subdir1/subdir1.1/subdir1.2 -s $H0 --volfile-id $V0 $M2 + TEST stat $M2 + +-initcnt=`grep -i create-subdir-mounts /var/log/glusterfs/glusterd.log | wc -l` + # mount shouldn't fail even after add-brick + TEST $CLI volume add-brick $V0 replica 2 $H0:$B0/${V0}{5,6}; + +-# Wait to execute create-subdir-mounts.sh script by glusterd +-newcnt=`grep -i create-subdir-mounts /var/log/glusterfs/glusterd.log | wc -l` +-while [ $newcnt -eq $initcnt ] +-do +- newcnt=`grep -i create-subdir-mounts /var/log/glusterfs/glusterd.log | wc -l` +- sleep 1 +-done ++# Give time for client process to get notified and use the new ++# volfile after add-brick ++sleep 1 + + # Existing mount should still be active + mount_inode=$(stat --format "%i" "$M2") +-- +1.8.3.1 + diff --git a/SOURCES/0345-read-ahead-io-cache-turn-off-by-default.patch b/SOURCES/0345-read-ahead-io-cache-turn-off-by-default.patch new file mode 100644 index 0000000..48b0cc8 --- /dev/null +++ b/SOURCES/0345-read-ahead-io-cache-turn-off-by-default.patch @@ -0,0 +1,82 @@ +From d45c64e17e1eb8003ac1086cbd3abea32414c7f9 Mon Sep 17 00:00:00 2001 +From: Raghavendra Gowdappa +Date: Tue, 12 Feb 2019 18:33:44 +0530 +Subject: [PATCH 345/346] read-ahead/io-cache: turn off by default + +We've found perf xlators io-cache and read-ahead not adding any +performance improvement. At best read-ahead is redundant due to kernel +read-ahead and at worst io-cache is degrading the performance for +workloads that doesn't involve re-read. Given that VFS already have +both these functionalities, this patch makes these two +translators turned off by default for native fuse mounts. + +For non-native fuse mounts like gfapi (NFS-ganesha/samba) we can have +these xlators on by having custom profiles. + +>Change-Id: Ie7535788909d4c741844473696f001274dc0bb60 +>Signed-off-by: Raghavendra Gowdappa +>fixes: bz#1676479 +Upstream fix link: https://review.gluster.org/#/c/glusterfs/+/22203/ + +BUG: 1788656 +Change-Id: Ie7535788909d4c741844473696f001274dc0bb60 +Signed-off-by: Sunil Kumar Acharya +Reviewed-on: https://code.engineering.redhat.com/gerrit/188967 +Tested-by: RHGS Build Bot +--- + tests/basic/ec/self-heal.t | 2 ++ + tests/basic/glusterd/volfile_server_switch.t | 2 +- + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 4 ++-- + 3 files changed, 5 insertions(+), 3 deletions(-) + +diff --git a/tests/basic/ec/self-heal.t b/tests/basic/ec/self-heal.t +index d217559..6329bb6 100644 +--- a/tests/basic/ec/self-heal.t ++++ b/tests/basic/ec/self-heal.t +@@ -131,6 +131,8 @@ TEST $CLI volume create $V0 redundancy 2 $H0:$B0/${V0}{0..5} + TEST $CLI volume set $V0 client-log-level DEBUG + #Write-behind has a bug where lookup can race over write which leads to size mismatch on the mount after a 'cp' + TEST $CLI volume set $V0 performance.write-behind off ++#md-cache can return stale stat due to default timeout being 1 sec ++TEST $CLI volume set $V0 performance.stat-prefetch off + EXPECT "Created" volinfo_field $V0 'Status' + TEST $CLI volume start $V0 + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Started" volinfo_field $V0 'Status' +diff --git a/tests/basic/glusterd/volfile_server_switch.t b/tests/basic/glusterd/volfile_server_switch.t +index 3090609..e11cfed 100644 +--- a/tests/basic/glusterd/volfile_server_switch.t ++++ b/tests/basic/glusterd/volfile_server_switch.t +@@ -34,7 +34,7 @@ TEST glusterfs --volfile-id=/$V0 --volfile-server=$H1 --volfile-server=$H2 --vol + + TEST kill_glusterd 1 + +-TEST $CLI_2 volume set $V0 performance.io-cache off ++TEST $CLI_2 volume set $V0 performance.write-behind off + + # make sure by this time directory will be created + # TODO: suggest ideal time to wait +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index 16601a2..9001b88 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -2235,7 +2235,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + {.key = "performance.read-ahead", + .voltype = "performance/read-ahead", + .option = "!perf", +- .value = "on", ++ .value = "off", + .op_version = 1, + .description = "enable/disable read-ahead translator in the volume.", + .flags = VOLOPT_FLAG_CLIENT_OPT | VOLOPT_FLAG_XLATOR_OPT}, +@@ -2249,7 +2249,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + {.key = "performance.io-cache", + .voltype = "performance/io-cache", + .option = "!perf", +- .value = "on", ++ .value = "off", + .op_version = 1, + .description = "enable/disable io-cache translator in the volume.", + .flags = VOLOPT_FLAG_CLIENT_OPT}, +-- +1.8.3.1 + diff --git a/SOURCES/0346-fuse-degrade-logging-of-write-failure-to-fuse-device.patch b/SOURCES/0346-fuse-degrade-logging-of-write-failure-to-fuse-device.patch new file mode 100644 index 0000000..9fca79e --- /dev/null +++ b/SOURCES/0346-fuse-degrade-logging-of-write-failure-to-fuse-device.patch @@ -0,0 +1,223 @@ +From e2af9793014ad67859aa73088765a52307cbe466 Mon Sep 17 00:00:00 2001 +From: Csaba Henk +Date: Tue, 7 Jan 2020 19:43:05 +0100 +Subject: [PATCH 346/346] fuse: degrade logging of write failure to fuse device + +Problem: + +FUSE uses failures of communicating with /dev/fuse with various +errnos to indicate in-kernel conditions to userspace. Some of these +shouldn't be handled as an application error. Also the standard +POSIX errno description should not be shown as they are misleading +in this context. + +Solution: + +When writing to the fuse device, the caller of the respective +convenience routine can mask those errnos which don't qualify to +be an error for the application in that context, so then those +shall be reported at DEBUG level. + +The possible non-standard errnos are reported with their +POSIX name instead of their description to avoid confusion. +(Eg. for ENOENT we don't log "no such file or directory", +we log indeed literal "ENOENT".) + +Upstream on https://review.gluster.org/23974 +> Change-Id: I510158843e4b1d482bdc496c2e97b1860dc1ba93 +> updates: bz#1193929 +> Signed-off-by: Csaba Henk + +BUG: 1763208 +Change-Id: Ib1676bb334ed153ce74ae1c0413fc0e58fb388c7 +Signed-off-by: Csaba Henk +Reviewed-on: https://code.engineering.redhat.com/gerrit/189056 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mount/fuse/src/fuse-bridge.c | 78 +++++++++++++++++++++++++++++++++--- + xlators/mount/fuse/src/fuse-bridge.h | 9 ++++- + 2 files changed, 80 insertions(+), 7 deletions(-) + +diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c +index ebe5c28..6e99053 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.c ++++ b/xlators/mount/fuse/src/fuse-bridge.c +@@ -198,7 +198,7 @@ fusedump_setup_meta(struct iovec *iovs, char *dir, + + static int + check_and_dump_fuse_W(fuse_private_t *priv, struct iovec *iov_out, int count, +- ssize_t res) ++ ssize_t res, errnomask_t errnomask) + { + char w = 'W'; + struct iovec diov[4] = { +@@ -216,8 +216,59 @@ check_and_dump_fuse_W(fuse_private_t *priv, struct iovec *iov_out, int count, + struct fuse_out_header *fouh = NULL; + + if (res == -1) { +- gf_log_callingfn("glusterfs-fuse", GF_LOG_ERROR, +- "writing to fuse device failed: %s", strerror(errno)); ++ const char *errdesc = NULL; ++ gf_loglevel_t loglevel = GF_LOG_ERROR; ++ ++ /* If caller masked the errno, then it ++ * does not indicate an error at the application ++ * level, so we degrade the log severity to DEBUG. ++ */ ++ if (errnomask && errno < ERRNOMASK_MAX && ++ GET_ERRNO_MASK(errnomask, errno)) ++ loglevel = GF_LOG_DEBUG; ++ ++ switch (errno) { ++ /* The listed errnos are FUSE status indicators, ++ * not legit values according to POSIX (see write(3p)), ++ * so resolving them according to the standard ++ * POSIX interpretation would be misleading. ++ */ ++ case ENOENT: ++ errdesc = "ENOENT"; ++ break; ++ case ENOTDIR: ++ errdesc = "ENOTDIR"; ++ break; ++ case ENODEV: ++ errdesc = "ENODEV"; ++ break; ++ case EPERM: ++ errdesc = "EPERM"; ++ break; ++ case ENOMEM: ++ errdesc = "ENOMEM"; ++ break; ++ case ENOTCONN: ++ errdesc = "ENOTCONN"; ++ break; ++ case ECONNREFUSED: ++ errdesc = "ECONNREFUSED"; ++ break; ++ case EOVERFLOW: ++ errdesc = "EOVERFLOW"; ++ break; ++ case EBUSY: ++ errdesc = "EBUSY"; ++ break; ++ case ENOTEMPTY: ++ errdesc = "ENOTEMPTY"; ++ break; ++ default: ++ errdesc = strerror(errno); ++ } ++ ++ gf_log_callingfn("glusterfs-fuse", loglevel, ++ "writing to fuse device failed: %s", errdesc); + return errno; + } + +@@ -282,7 +333,7 @@ send_fuse_iov(xlator_t *this, fuse_in_header_t *finh, struct iovec *iov_out, + gf_log("glusterfs-fuse", GF_LOG_TRACE, "writev() result %d/%d %s", res, + fouh->len, res == -1 ? strerror(errno) : ""); + +- return check_and_dump_fuse_W(priv, iov_out, count, res); ++ return check_and_dump_fuse_W(priv, iov_out, count, res, NULL); + } + + static int +@@ -353,6 +404,15 @@ fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino) + fouh->unique = 0; + fouh->error = FUSE_NOTIFY_INVAL_ENTRY; + ++ if (ENOENT < ERRNOMASK_MAX) ++ MASK_ERRNO(node->errnomask, ENOENT); ++ if (ENOTDIR < ERRNOMASK_MAX) ++ MASK_ERRNO(node->errnomask, ENOTDIR); ++ if (EBUSY < ERRNOMASK_MAX) ++ MASK_ERRNO(node->errnomask, EBUSY); ++ if (ENOTEMPTY < ERRNOMASK_MAX) ++ MASK_ERRNO(node->errnomask, ENOTEMPTY); ++ + if (dentry->name) { + nlen = strlen(dentry->name); + fouh->len = sizeof(*fouh) + sizeof(*fnieo) + nlen + 1; +@@ -437,6 +497,9 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino) + fniio->off = 0; + fniio->len = -1; + ++ if (ENOENT < ERRNOMASK_MAX) ++ MASK_ERRNO(node->errnomask, ENOENT); ++ + fuse_log_eh(this, "Invalidated inode %" PRIu64 " (gfid: %s)", fuse_ino, + uuid_utoa(inode->gfid)); + gf_log("glusterfs-fuse", GF_LOG_TRACE, +@@ -482,6 +545,7 @@ fuse_timed_message_new(void) + /* should be NULL if not set */ + dmsg->fuse_message_body = NULL; + INIT_LIST_HEAD(&dmsg->next); ++ memset(dmsg->errnomask, 0, sizeof(dmsg->errnomask)); + + return dmsg; + } +@@ -680,6 +744,8 @@ fuse_interrupt(xlator_t *this, fuse_in_header_t *finh, void *msg, + dmsg->fuse_out_header.unique = finh->unique; + dmsg->fuse_out_header.len = sizeof(dmsg->fuse_out_header); + dmsg->fuse_out_header.error = -EAGAIN; ++ if (ENOENT < ERRNOMASK_MAX) ++ MASK_ERRNO(dmsg->errnomask, ENOENT); + timespec_now(&dmsg->scheduled_ts); + timespec_adjust_delta(&dmsg->scheduled_ts, + (struct timespec){0, 10000000}); +@@ -4848,7 +4914,7 @@ notify_kernel_loop(void *data) + iov_out.iov_base = node->inval_buf; + iov_out.iov_len = len; + rv = sys_writev(priv->fd, &iov_out, 1); +- check_and_dump_fuse_W(priv, &iov_out, 1, rv); ++ check_and_dump_fuse_W(priv, &iov_out, 1, rv, node->errnomask); + + GF_FREE(node); + +@@ -4940,7 +5006,7 @@ timed_response_loop(void *data) + iovs[1] = (struct iovec){dmsg->fuse_message_body, + len - sizeof(struct fuse_out_header)}; + rv = sys_writev(priv->fd, iovs, 2); +- check_and_dump_fuse_W(priv, iovs, 2, rv); ++ check_and_dump_fuse_W(priv, iovs, 2, rv, dmsg->errnomask); + + fuse_timed_message_free(dmsg); + +diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h +index cf4479c..d2d462c 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.h ++++ b/xlators/mount/fuse/src/fuse-bridge.h +@@ -195,14 +195,20 @@ struct fuse_private { + }; + typedef struct fuse_private fuse_private_t; + ++typedef uint64_t errnomask_t[2]; ++#define MASK_ERRNO(mask, n) ((mask)[(n) >> 6] |= ((uint64_t)1 << ((n)&63))) ++#define GET_ERRNO_MASK(mask, n) ((mask)[(n) >> 6] & ((uint64_t)1 << ((n)&63))) ++#define ERRNOMASK_MAX (64 * (sizeof(errnomask_t) / sizeof(uint64_t))) ++ + #define INVAL_BUF_SIZE \ + (sizeof(struct fuse_out_header) + \ + max(sizeof(struct fuse_notify_inval_inode_out), \ + sizeof(struct fuse_notify_inval_entry_out) + NAME_MAX + 1)) + + struct fuse_invalidate_node { +- char inval_buf[INVAL_BUF_SIZE]; ++ errnomask_t errnomask; + struct list_head next; ++ char inval_buf[INVAL_BUF_SIZE]; + }; + typedef struct fuse_invalidate_node fuse_invalidate_node_t; + +@@ -210,6 +216,7 @@ struct fuse_timed_message { + struct fuse_out_header fuse_out_header; + void *fuse_message_body; + struct timespec scheduled_ts; ++ errnomask_t errnomask; + struct list_head next; + }; + typedef struct fuse_timed_message fuse_timed_message_t; +-- +1.8.3.1 + diff --git a/SOURCES/0347-tools-glusterfind-handle-offline-bricks.patch b/SOURCES/0347-tools-glusterfind-handle-offline-bricks.patch new file mode 100644 index 0000000..ff5251d --- /dev/null +++ b/SOURCES/0347-tools-glusterfind-handle-offline-bricks.patch @@ -0,0 +1,236 @@ +From 87e6ea2cd63898c5d243b0f0c719f4f6347fb829 Mon Sep 17 00:00:00 2001 +From: Milind Changire +Date: Thu, 5 Jan 2017 19:53:19 +0530 +Subject: [PATCH 347/349] tools/glusterfind: handle offline bricks + +Problem: +glusterfind is unable to copy remote output file to local node when a +remove-brick is in progress on the remote node. After copying remote +files, in the --full output listing path, a "sort -u" command is run on +the collected files. However, "sort" exits with an error code if it +finds any file missing. + +Solution: +Maintain a map of (pid, output file) when the node commands are started +and remove the mapping for the pid for which the command returns an +error. Use the list of files present in the map for the "sort" command. + +Backport of: +> Patch: https://review.gluster.org/16332 +> Change-Id: Ie6e019037379f4cb163f24b1c65eb382efc2fb3b +> fixes: bz#1410439 +> Signed-off-by: Milind Changire +> Signed-off-by: Shwetha K Acharya + +BUG: 1789447 +Change-Id: Ie6e019037379f4cb163f24b1c65eb382efc2fb3b +Signed-off-by: Kotresh HR +Reviewed-on: https://code.engineering.redhat.com/gerrit/189214 +Tested-by: RHGS Build Bot +Reviewed-by: Sunny Kumar +--- + tools/glusterfind/src/gfind_py2py3.py | 25 ++++++++++++++ + tools/glusterfind/src/main.py | 61 +++++++++++++++++++++-------------- + 2 files changed, 61 insertions(+), 25 deletions(-) + +diff --git a/tools/glusterfind/src/gfind_py2py3.py b/tools/glusterfind/src/gfind_py2py3.py +index 1d41ec5..87324fb 100644 +--- a/tools/glusterfind/src/gfind_py2py3.py ++++ b/tools/glusterfind/src/gfind_py2py3.py +@@ -40,6 +40,19 @@ if sys.version_info >= (3,): + def gfind_history_changelog_done(libgfc, clfile): + return libgfc.gf_history_changelog_done(clfile.encode()) + ++ def gfind_write_row(f, row, field_separator, p_rep, row_2_rep): ++ f.write(u"{0}{1}{2}{3}{4}\n".format(row, ++ field_separator, ++ p_rep, ++ field_separator, ++ row_2_rep)) ++ ++ def gfind_write(f, row, field_separator, p_rep): ++ f.write(u"{0}{1}{2}\n".format(row, ++ field_separator, ++ p_rep)) ++ ++ + else: + + # Raw conversion of bytearray to string +@@ -61,3 +74,15 @@ else: + + def gfind_history_changelog_done(libgfc, clfile): + return libgfc.gf_history_changelog_done(clfile) ++ ++ def gfind_write_row(f, row, field_separator, p_rep, row_2_rep): ++ f.write(u"{0}{1}{2}{3}{4}\n".format(row, ++ field_separator, ++ p_rep, ++ field_separator, ++ row_2_rep).encode()) ++ ++ def gfind_write(f, row, field_separator, p_rep): ++ f.write(u"{0}{1}{2}\n".format(row, ++ field_separator, ++ p_rep).encode()) +diff --git a/tools/glusterfind/src/main.py b/tools/glusterfind/src/main.py +index cc5a86f..fefe4a3 100644 +--- a/tools/glusterfind/src/main.py ++++ b/tools/glusterfind/src/main.py +@@ -16,6 +16,7 @@ from multiprocessing import Process + import os + import xml.etree.cElementTree as etree + from argparse import ArgumentParser, RawDescriptionHelpFormatter, Action ++from gfind_py2py3 import gfind_write_row, gfind_write + import logging + import shutil + import tempfile +@@ -35,9 +36,9 @@ GlusterFS Incremental API + ParseError = etree.ParseError if hasattr(etree, 'ParseError') else SyntaxError + + logger = logging.getLogger() +-node_outfiles = [] + vol_statusStr = "" + gtmpfilename = None ++g_pid_nodefile_map = {} + + + class StoreAbsPath(Action): +@@ -111,7 +112,7 @@ def node_cmd(host, host_uuid, task, cmd, args, opts): + + + def run_cmd_nodes(task, args, **kwargs): +- global node_outfiles ++ global g_pid_nodefile_map + nodes = get_nodes(args.volume) + pool = [] + for num, node in enumerate(nodes): +@@ -142,7 +143,6 @@ def run_cmd_nodes(task, args, **kwargs): + if tag == "": + tag = '""' if not is_host_local(host_uuid) else "" + +- node_outfiles.append(node_outfile) + # remote file will be copied into this directory + mkdirp(os.path.dirname(node_outfile), + exit_on_err=True, logger=logger) +@@ -180,7 +180,6 @@ def run_cmd_nodes(task, args, **kwargs): + if tag == "": + tag = '""' if not is_host_local(host_uuid) else "" + +- node_outfiles.append(node_outfile) + # remote file will be copied into this directory + mkdirp(os.path.dirname(node_outfile), + exit_on_err=True, logger=logger) +@@ -264,6 +263,7 @@ def run_cmd_nodes(task, args, **kwargs): + args=(host, host_uuid, task, cmd, args, opts)) + p.start() + pool.append(p) ++ g_pid_nodefile_map[p.pid] = node_outfile + + for num, p in enumerate(pool): + p.join() +@@ -271,8 +271,11 @@ def run_cmd_nodes(task, args, **kwargs): + logger.warn("Command %s failed in %s" % (task, nodes[num][1])) + if task in ["create", "delete"]: + fail("Command %s failed in %s" % (task, nodes[num][1])) +- elif task == "pre" and args.disable_partial: +- sys.exit(1) ++ elif task == "pre" or task == "query": ++ if args.disable_partial: ++ sys.exit(1) ++ else: ++ del g_pid_nodefile_map[p.pid] + + + @cache_output +@@ -512,16 +515,10 @@ def write_output(outfile, outfilemerger, field_separator): + continue + + if row_2_rep and row_2_rep != "": +- f.write(u"{0}{1}{2}{3}{4}\n".format(row[0], +- field_separator, +- p_rep, +- field_separator, +- row_2_rep).encode()) +- else: +- f.write(u"{0}{1}{2}\n".format(row[0], +- field_separator, +- p_rep).encode()) ++ gfind_write_row(f, row[0], field_separator, p_rep, field_separator, row_2_rep) + ++ else: ++ gfind_write(f, row[0], field_separator, p_rep) + + def mode_create(session_dir, args): + logger.debug("Init is called - Session: %s, Volume: %s" +@@ -571,6 +568,7 @@ def mode_create(session_dir, args): + + def mode_query(session_dir, args): + global gtmpfilename ++ global g_pid_nodefile_map + + # Verify volume status + cmd = ["gluster", 'volume', 'info', args.volume, "--xml"] +@@ -634,14 +632,20 @@ def mode_query(session_dir, args): + + # Merger + if args.full: +- cmd = ["sort", "-u"] + node_outfiles + ["-o", args.outfile] +- execute(cmd, +- exit_msg="Failed to merge output files " +- "collected from nodes", logger=logger) ++ if len(g_pid_nodefile_map) > 0: ++ cmd = ["sort", "-u"] + g_pid_nodefile_map.values() + \ ++ ["-o", args.outfile] ++ execute(cmd, ++ exit_msg="Failed to merge output files " ++ "collected from nodes", logger=logger) ++ else: ++ fail("Failed to collect any output files from peers. " ++ "Looks like all bricks are offline.", logger=logger) + else: + # Read each Changelogs db and generate finaldb + create_file(args.outfile, exit_on_err=True, logger=logger) +- outfilemerger = OutputMerger(args.outfile + ".db", node_outfiles) ++ outfilemerger = OutputMerger(args.outfile + ".db", ++ g_pid_nodefile_map.values()) + write_output(args.outfile, outfilemerger, args.field_separator) + + try: +@@ -656,6 +660,7 @@ def mode_query(session_dir, args): + + def mode_pre(session_dir, args): + global gtmpfilename ++ global g_pid_nodefile_map + + """ + Read from Session file and write to session.pre file +@@ -696,14 +701,20 @@ def mode_pre(session_dir, args): + + # Merger + if args.full: +- cmd = ["sort", "-u"] + node_outfiles + ["-o", args.outfile] +- execute(cmd, +- exit_msg="Failed to merge output files " +- "collected from nodes", logger=logger) ++ if len(g_pid_nodefile_map) > 0: ++ cmd = ["sort", "-u"] + g_pid_nodefile_map.values() + \ ++ ["-o", args.outfile] ++ execute(cmd, ++ exit_msg="Failed to merge output files " ++ "collected from nodes", logger=logger) ++ else: ++ fail("Failed to collect any output files from peers. " ++ "Looks like all bricks are offline.", logger=logger) + else: + # Read each Changelogs db and generate finaldb + create_file(args.outfile, exit_on_err=True, logger=logger) +- outfilemerger = OutputMerger(args.outfile + ".db", node_outfiles) ++ outfilemerger = OutputMerger(args.outfile + ".db", ++ g_pid_nodefile_map.values()) + write_output(args.outfile, outfilemerger, args.field_separator) + + try: +-- +1.8.3.1 + diff --git a/SOURCES/0348-glusterfind-Fix-py2-py3-issues.patch b/SOURCES/0348-glusterfind-Fix-py2-py3-issues.patch new file mode 100644 index 0000000..e1f89f9 --- /dev/null +++ b/SOURCES/0348-glusterfind-Fix-py2-py3-issues.patch @@ -0,0 +1,113 @@ +From 1ca8a545833e0a6e674984245338b8675ddc58bc Mon Sep 17 00:00:00 2001 +From: Kotresh HR +Date: Fri, 10 Jan 2020 16:48:14 +0530 +Subject: [PATCH 348/349] glusterfind: Fix py2/py3 issues + +1. In dictionary values(), returns list in py2 and not in py3. + So explicitly convert it into list. +2. xattr module returns values in bytes. So explicitly convert + them to str to work both with py2 and py3 + +Backport of: + > Patch: https://review.gluster.org/23993 + > fixes: bz#1789439 + > Change-Id: I27a639cda4f7a4ece9744a97c3d16e247906bd94 + > Signed-off-by: Kotresh HR + +BUG: 1789447 +Change-Id: I27a639cda4f7a4ece9744a97c3d16e247906bd94 +Signed-off-by: Kotresh HR +Reviewed-on: https://code.engineering.redhat.com/gerrit/189215 +Reviewed-by: Shwetha Acharya +Tested-by: RHGS Build Bot +Reviewed-by: Hari Gowtham Gopal +Reviewed-by: Sunny Kumar +--- + tools/glusterfind/src/changelog.py | 14 +++++++++----- + tools/glusterfind/src/main.py | 8 ++++---- + 2 files changed, 13 insertions(+), 9 deletions(-) + +diff --git a/tools/glusterfind/src/changelog.py b/tools/glusterfind/src/changelog.py +index d8f97e0..d972fb5 100644 +--- a/tools/glusterfind/src/changelog.py ++++ b/tools/glusterfind/src/changelog.py +@@ -14,6 +14,7 @@ import sys + import time + import xattr + import logging ++from gfind_py2py3 import bytearray_to_str + from argparse import ArgumentParser, RawDescriptionHelpFormatter + import hashlib + try: +@@ -105,9 +106,10 @@ def populate_pgfid_and_inodegfid(brick, changelog_data): + changelog_data.inodegfid_add(os.stat(p).st_ino, gfid) + file_xattrs = xattr.list(p) + for x in file_xattrs: +- if x.startswith("trusted.pgfid."): ++ x_str = bytearray_to_str(x) ++ if x_str.startswith("trusted.pgfid."): + # PGFID in pgfid table +- changelog_data.pgfid_add(x.split(".")[-1]) ++ changelog_data.pgfid_add(x_str.split(".")[-1]) + except (IOError, OSError): + # All OS Errors ignored, since failures will be logged + # in End. All GFIDs present in gfidpath table +@@ -122,10 +124,12 @@ def enum_hard_links_using_gfid2path(brick, gfid, args): + try: + file_xattrs = xattr.list(p) + for x in file_xattrs: +- if x.startswith("trusted.gfid2path."): ++ x_str = bytearray_to_str(x) ++ if x_str.startswith("trusted.gfid2path."): + # get the value for the xattr i.e. / +- v = xattr.getxattr(p, x) +- pgfid, bn = v.split(os.sep) ++ v = xattr.getxattr(p, x_str) ++ v_str = bytearray_to_str(v) ++ pgfid, bn = v_str.split(os.sep) + try: + path = symlink_gfid_to_path(brick, pgfid) + fullpath = os.path.join(path, bn) +diff --git a/tools/glusterfind/src/main.py b/tools/glusterfind/src/main.py +index fefe4a3..dfc9d07 100644 +--- a/tools/glusterfind/src/main.py ++++ b/tools/glusterfind/src/main.py +@@ -633,7 +633,7 @@ def mode_query(session_dir, args): + # Merger + if args.full: + if len(g_pid_nodefile_map) > 0: +- cmd = ["sort", "-u"] + g_pid_nodefile_map.values() + \ ++ cmd = ["sort", "-u"] + list(g_pid_nodefile_map.values()) + \ + ["-o", args.outfile] + execute(cmd, + exit_msg="Failed to merge output files " +@@ -645,7 +645,7 @@ def mode_query(session_dir, args): + # Read each Changelogs db and generate finaldb + create_file(args.outfile, exit_on_err=True, logger=logger) + outfilemerger = OutputMerger(args.outfile + ".db", +- g_pid_nodefile_map.values()) ++ list(g_pid_nodefile_map.values())) + write_output(args.outfile, outfilemerger, args.field_separator) + + try: +@@ -702,7 +702,7 @@ def mode_pre(session_dir, args): + # Merger + if args.full: + if len(g_pid_nodefile_map) > 0: +- cmd = ["sort", "-u"] + g_pid_nodefile_map.values() + \ ++ cmd = ["sort", "-u"] + list(g_pid_nodefile_map.values()) + \ + ["-o", args.outfile] + execute(cmd, + exit_msg="Failed to merge output files " +@@ -714,7 +714,7 @@ def mode_pre(session_dir, args): + # Read each Changelogs db and generate finaldb + create_file(args.outfile, exit_on_err=True, logger=logger) + outfilemerger = OutputMerger(args.outfile + ".db", +- g_pid_nodefile_map.values()) ++ list(g_pid_nodefile_map.values())) + write_output(args.outfile, outfilemerger, args.field_separator) + + try: +-- +1.8.3.1 + diff --git a/SOURCES/0349-glusterfind-python3-compatibility.patch b/SOURCES/0349-glusterfind-python3-compatibility.patch new file mode 100644 index 0000000..7f1c274 --- /dev/null +++ b/SOURCES/0349-glusterfind-python3-compatibility.patch @@ -0,0 +1,56 @@ +From 1354a492cbc758f9801568153380ca896fab7765 Mon Sep 17 00:00:00 2001 +From: Sunny Kumar +Date: Fri, 10 Jan 2020 14:28:35 +0000 +Subject: [PATCH 349/349] glusterfind: python3 compatibility + +Problem: +While we delete gluster volume the hook script 'S57glusterfind-delete-post.py' +is failed to execute and error message can be observed in glusterd log. + +Traceback: + File "/var/lib/glusterd/hooks/1/delete/post/S57glusterfind-delete-post", line 69, in + main() + File "/var/lib/glusterd/hooks/1/delete/post/S57glusterfind-delete-post", line 39, in main + glusterfind_dir = os.path.join(get_glusterd_workdir(), "glusterfind") + File "/usr/lib64/python3.7/posixpath.py", line 94, in join + genericpath._check_arg_types('join', a, *p) + File "/usr/lib64/python3.7/genericpath.py", line 155, in _check_arg_types + raise TypeError("Can't mix strings and bytes in path components") from None +TypeError: Can't mix strings and bytes in path components + +Solution: + +Added the 'universal_newlines' flag to Popen to support backward compatibility. + +Backport of: + > Patch: https://review.gluster.org/23994 + > Change-Id: Ie5655b11b55535c5ad2338108d0448e6fdaacf4f + > Fixes: bz#1789478 + > Signed-off-by: Sunny Kumar + +Change-Id: Ie5655b11b55535c5ad2338108d0448e6fdaacf4f +BUG: 1789447 +Signed-off-by: Sunny Kumar +Signed-off-by: Kotresh HR +Reviewed-on: https://code.engineering.redhat.com/gerrit/189216 +Tested-by: RHGS Build Bot +--- + tools/glusterfind/S57glusterfind-delete-post.py | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tools/glusterfind/S57glusterfind-delete-post.py b/tools/glusterfind/S57glusterfind-delete-post.py +index 5b5142d..5beece2 100755 +--- a/tools/glusterfind/S57glusterfind-delete-post.py ++++ b/tools/glusterfind/S57glusterfind-delete-post.py +@@ -18,7 +18,7 @@ def handle_rm_error(func, path, exc_info): + + def get_glusterd_workdir(): + p = Popen(["gluster", "system::", "getwd"], +- stdout=PIPE, stderr=PIPE) ++ stdout=PIPE, stderr=PIPE, universal_newlines=True) + + out, _ = p.communicate() + +-- +1.8.3.1 + diff --git a/SOURCES/0350-tools-glusterfind-Remove-an-extra-argument.patch b/SOURCES/0350-tools-glusterfind-Remove-an-extra-argument.patch new file mode 100644 index 0000000..08f70a7 --- /dev/null +++ b/SOURCES/0350-tools-glusterfind-Remove-an-extra-argument.patch @@ -0,0 +1,37 @@ +From 6c06ac0571fb6bf0734b173cc3a75badc7554601 Mon Sep 17 00:00:00 2001 +From: Shwetha K Acharya +Date: Tue, 14 Jan 2020 10:51:06 +0530 +Subject: [PATCH 350/350] tools/glusterfind: Remove an extra argument + +Backport of: +> Upstream Patch: https://review.gluster.org/#/c/glusterfs/+/24011/ +> fixes: bz#1790748 +> Change-Id: I1cb12c975142794139456d0f8e99fbdbb03c53a1 +> Signed-off-by: Shwetha K Acharya + +Change-Id: I1cb12c975142794139456d0f8e99fbdbb03c53a1 +BUG: 1789447 +Signed-off-by: Sunny Kumar +Reviewed-on: https://code.engineering.redhat.com/gerrit/189363 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + tools/glusterfind/src/main.py | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tools/glusterfind/src/main.py b/tools/glusterfind/src/main.py +index dfc9d07..5ca1fec 100644 +--- a/tools/glusterfind/src/main.py ++++ b/tools/glusterfind/src/main.py +@@ -515,7 +515,7 @@ def write_output(outfile, outfilemerger, field_separator): + continue + + if row_2_rep and row_2_rep != "": +- gfind_write_row(f, row[0], field_separator, p_rep, field_separator, row_2_rep) ++ gfind_write_row(f, row[0], field_separator, p_rep, row_2_rep) + + else: + gfind_write(f, row[0], field_separator, p_rep) +-- +1.8.3.1 + diff --git a/SOURCES/0351-server-Mount-fails-after-reboot-1-3-gluster-nodes.patch b/SOURCES/0351-server-Mount-fails-after-reboot-1-3-gluster-nodes.patch new file mode 100644 index 0000000..51dc3bb --- /dev/null +++ b/SOURCES/0351-server-Mount-fails-after-reboot-1-3-gluster-nodes.patch @@ -0,0 +1,131 @@ +From f38f0988eb6c0d72677abceba5ebeb51ea8d44ad Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Tue, 21 Jan 2020 21:09:56 +0530 +Subject: [PATCH 351/351] server: Mount fails after reboot 1/3 gluster nodes + +Problem: At the time of coming up one server node(1x3) after reboot +client is unmounted.The client is unmounted because a client +is getting AUTH_FAILED event and client call fini for the graph.The +client is getting AUTH_FAILED because brick is not attached with a +graph at that moment + +Solution: To avoid the unmounting the client graph throw ENOENT error + from server in case if brick is not attached with server at + the time of authenticate clients. + +> Credits: Xavi Hernandez +> Change-Id: Ie6fbd73cbcf23a35d8db8841b3b6036e87682f5e +> Fixes: bz#1793852 +> Signed-off-by: Mohit Agrawal +> (Cherry picked from commit e4f776308d5ee7ffeb07de0fd9e1edae6944030d) +> (Reviewd on upstream link https://review.gluster.org/#/c/glusterfs/+/24053/) + +Change-Id: Ie6fbd73cbcf23a35d8db8841b3b6036e87682f5e +BUG: 1793035 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/190042 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + tests/bugs/protocol/bug-1433815-auth-allow.t | 1 + + xlators/protocol/client/src/client-handshake.c | 3 +- + xlators/protocol/server/src/server-handshake.c | 41 +++++++++++++++++--------- + 3 files changed, 29 insertions(+), 16 deletions(-) + +diff --git a/tests/bugs/protocol/bug-1433815-auth-allow.t b/tests/bugs/protocol/bug-1433815-auth-allow.t +index fa22ad8..a78c0eb 100644 +--- a/tests/bugs/protocol/bug-1433815-auth-allow.t ++++ b/tests/bugs/protocol/bug-1433815-auth-allow.t +@@ -17,6 +17,7 @@ TEST $CLI volume create $V0 $H0:$B0/$V0 + # Set auth.allow so it *doesn't* include ourselves. + TEST $CLI volume set $V0 auth.allow 1.2.3.4 + TEST $CLI volume start $V0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count + + # "System getspec" will include the username and password if the request comes + # from a server (which we are). Unfortunately, this will cause authentication +diff --git a/xlators/protocol/client/src/client-handshake.c b/xlators/protocol/client/src/client-handshake.c +index c43756a..0002361 100644 +--- a/xlators/protocol/client/src/client-handshake.c ++++ b/xlators/protocol/client/src/client-handshake.c +@@ -1031,8 +1031,7 @@ client_setvolume_cbk(struct rpc_req *req, struct iovec *iov, int count, + "SETVOLUME on remote-host failed: %s", remote_error); + + errno = op_errno; +- if (remote_error && +- (strcmp("Authentication failed", remote_error) == 0)) { ++ if (remote_error && (op_errno == EACCES)) { + auth_fail = _gf_true; + op_ret = 0; + } +diff --git a/xlators/protocol/server/src/server-handshake.c b/xlators/protocol/server/src/server-handshake.c +index 382f241..1d1177d 100644 +--- a/xlators/protocol/server/src/server-handshake.c ++++ b/xlators/protocol/server/src/server-handshake.c +@@ -250,6 +250,7 @@ server_setvolume(rpcsvc_request_t *req) + char *subdir_mount = NULL; + char *client_name = NULL; + gf_boolean_t cleanup_starting = _gf_false; ++ gf_boolean_t xlator_in_graph = _gf_true; + + params = dict_new(); + reply = dict_new(); +@@ -311,8 +312,10 @@ server_setvolume(rpcsvc_request_t *req) + LOCK(&ctx->volfile_lock); + { + xl = get_xlator_by_name(this, name); +- if (!xl) ++ if (!xl) { ++ xlator_in_graph = _gf_false; + xl = this; ++ } + } + UNLOCK(&ctx->volfile_lock); + if (xl == NULL) { +@@ -568,20 +571,30 @@ server_setvolume(rpcsvc_request_t *req) + "failed to set error " + "msg"); + } else { +- gf_event(EVENT_CLIENT_AUTH_REJECT, +- "client_uid=%s;" +- "client_identifier=%s;server_identifier=%s;" +- "brick_path=%s", +- client->client_uid, req->trans->peerinfo.identifier, +- req->trans->myinfo.identifier, name); +- gf_msg(this->name, GF_LOG_ERROR, EACCES, PS_MSG_AUTHENTICATE_ERROR, +- "Cannot authenticate client" +- " from %s %s", +- client->client_uid, (clnt_version) ? clnt_version : "old"); +- + op_ret = -1; +- op_errno = EACCES; +- ret = dict_set_str(reply, "ERROR", "Authentication failed"); ++ if (!xlator_in_graph) { ++ gf_msg(this->name, GF_LOG_ERROR, ENOENT, PS_MSG_AUTHENTICATE_ERROR, ++ "Cannot authenticate client" ++ " from %s %s because brick is not attached in graph", ++ client->client_uid, (clnt_version) ? clnt_version : "old"); ++ ++ op_errno = ENOENT; ++ ret = dict_set_str(reply, "ERROR", "Brick not found"); ++ } else { ++ gf_event(EVENT_CLIENT_AUTH_REJECT, ++ "client_uid=%s;" ++ "client_identifier=%s;server_identifier=%s;" ++ "brick_path=%s", ++ client->client_uid, req->trans->peerinfo.identifier, ++ req->trans->myinfo.identifier, name); ++ gf_msg(this->name, GF_LOG_ERROR, EACCES, PS_MSG_AUTHENTICATE_ERROR, ++ "Cannot authenticate client" ++ " from %s %s", ++ client->client_uid, (clnt_version) ? clnt_version : "old"); ++ ++ op_errno = EACCES; ++ ret = dict_set_str(reply, "ERROR", "Authentication failed"); ++ } + if (ret < 0) + gf_msg_debug(this->name, 0, + "failed to set error " +-- +1.8.3.1 + diff --git a/SOURCES/0352-spec-fixed-missing-dependencies-for-glusterfs-clouds.patch b/SOURCES/0352-spec-fixed-missing-dependencies-for-glusterfs-clouds.patch new file mode 100644 index 0000000..1d9a389 --- /dev/null +++ b/SOURCES/0352-spec-fixed-missing-dependencies-for-glusterfs-clouds.patch @@ -0,0 +1,38 @@ +From 8074906ace5fbd71b5d24cc3da5571ebdebed859 Mon Sep 17 00:00:00 2001 +From: Rinku Kothiya +Date: Thu, 2 Jan 2020 11:27:47 +0000 +Subject: [PATCH 352/353] spec: fixed missing dependencies for + glusterfs-cloudsync-plugins + +RPMDiff raises a warning, subpackage glusterfs-cloudsync-plugins +on x86_64 consumes library libglusterfs.so.0()(64bit) from +subpackage glusterfs-libs but does not have explicit package +version requirement, which is fixed using this patch. + +Label: DOWNSTREAM ONLY + +BUG: 1775564 + +Change-Id: I05ea46ac2c92090f01c07dfbd6e0d66498f1c586 +Signed-off-by: Rinku Kothiya +Reviewed-on: https://code.engineering.redhat.com/gerrit/188619 +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + glusterfs.spec.in | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 671ee27..e95e539 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -374,6 +374,7 @@ This package provides the GlusterFS CLI application and its man page + %package cloudsync-plugins + Summary: Cloudsync Plugins + BuildRequires: libcurl-devel ++Requires: glusterfs-libs = %{version}-%{release} + + %description cloudsync-plugins + GlusterFS is a distributed file-system capable of scaling to several +-- +1.8.3.1 + diff --git a/SOURCES/0353-build-glusterfs-ganesha-pkg-requires-python3-policyc.patch b/SOURCES/0353-build-glusterfs-ganesha-pkg-requires-python3-policyc.patch new file mode 100644 index 0000000..e436373 --- /dev/null +++ b/SOURCES/0353-build-glusterfs-ganesha-pkg-requires-python3-policyc.patch @@ -0,0 +1,47 @@ +From 37e2d76579abf38031d1cd9769da798fa04b183a Mon Sep 17 00:00:00 2001 +From: "Kaleb S. KEITHLEY" +Date: Wed, 22 Jan 2020 14:14:33 -0500 +Subject: [PATCH 353/353] build: glusterfs-ganesha pkg requires + python3-policycoreutils on rhel8 + +glusterfs-ganesha pkg requires policycoreutils-python-utils on rhel8, +not policycoreutils-python + +also requires nfs-ganesha-selinux on rhel-8 (optional on rhel-7) + +Label: DOWNSTREAM ONLY + +Change-Id: Ia97b4dabdc098fb76e3f60e8b48ea4191e677136 +Signed-off-by: Kaleb S. KEITHLEY +BUG: 1794153 +Reviewed-on: https://code.engineering.redhat.com/gerrit/190130 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + glusterfs.spec.in | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index e95e539..7c8a751 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -462,6 +462,7 @@ Summary: NFS-Ganesha configuration + Group: Applications/File + + Requires: %{name}-server%{?_isa} = %{version}-%{release} ++Requires: nfs-ganesha-selinux >= 2.7.3 + Requires: nfs-ganesha-gluster >= 2.7.3 + Requires: pcs, dbus + %if ( 0%{?rhel} && 0%{?rhel} == 6 ) +@@ -475,7 +476,7 @@ Requires: net-tools + %endif + + %if ( 0%{?fedora} && 0%{?fedora} > 25 || ( 0%{?rhel} && 0%{?rhel} > 6 ) ) +-%if ( 0%{?rhel} ) ++%if ( 0%{?rhel} && 0%{?rhel} < 8 ) + Requires: selinux-policy >= 3.13.1-160 + Requires(post): policycoreutils-python + Requires(postun): policycoreutils-python +-- +1.8.3.1 + diff --git a/SOURCES/0354-core-fix-memory-pool-management-races.patch b/SOURCES/0354-core-fix-memory-pool-management-races.patch new file mode 100644 index 0000000..a7cdfc0 --- /dev/null +++ b/SOURCES/0354-core-fix-memory-pool-management-races.patch @@ -0,0 +1,466 @@ +From 75a9d946d252ce70460144615ca17dbdf2e80fab Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez +Date: Fri, 7 Feb 2020 10:19:57 +0100 +Subject: [PATCH 354/355] core: fix memory pool management races + +Objects allocated from a per-thread memory pool keep a reference to it +to be able to return the object to the pool when not used anymore. The +object holding this reference can have a long life cycle that could +survive a glfs_fini() call. + +This means that it's unsafe to destroy memory pools from glfs_fini(). + +Another side effect of destroying memory pools from glfs_fini() is that +the TLS variable that points to one of those pools cannot be reset for +all alive threads. This means that any attempt to allocate memory from +those threads will access already free'd memory, which is very +dangerous. + +To fix these issues, mem_pools_fini() doesn't destroy pool lists +anymore. Only at process termination the pools are destroyed. + +Upatream patch: +> Upstream patch link: https://review.gluster.org/c/glusterfs/+/24099 +> Change-Id: Ib189a5510ab6bdac78983c6c65a022e9634b0965 +> Fixes: bz#1801684 +> Signed-off-by: Xavi Hernandez + +Change-Id: Ib189a5510ab6bdac78983c6c65a022e9634b0965 +BUG: 1800703 +Signed-off-by: Xavi Hernandez +Reviewed-on: https://code.engineering.redhat.com/gerrit/192262 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/globals.c | 13 ++- + libglusterfs/src/glusterfs/globals.h | 3 + + libglusterfs/src/glusterfs/mem-pool.h | 28 ++--- + libglusterfs/src/mem-pool.c | 201 ++++++++++++++++++---------------- + libglusterfs/src/syncop.c | 7 ++ + 5 files changed, 146 insertions(+), 106 deletions(-) + +diff --git a/libglusterfs/src/globals.c b/libglusterfs/src/globals.c +index 02098e6..e433ee8 100644 +--- a/libglusterfs/src/globals.c ++++ b/libglusterfs/src/globals.c +@@ -319,7 +319,18 @@ glusterfs_cleanup(void *ptr) + GF_FREE(thread_syncopctx.groups); + } + +- mem_pool_thread_destructor(); ++ mem_pool_thread_destructor(NULL); ++} ++ ++void ++gf_thread_needs_cleanup(void) ++{ ++ /* The value stored in free_key TLS is not really used for anything, but ++ * pthread implementation doesn't call the TLS destruction function unless ++ * it's != NULL. This function must be called whenever something is ++ * allocated for this thread so that glusterfs_cleanup() will be called ++ * and resources can be released. */ ++ (void)pthread_setspecific(free_key, (void *)1); + } + + static void +diff --git a/libglusterfs/src/glusterfs/globals.h b/libglusterfs/src/glusterfs/globals.h +index e218285..31717ed 100644 +--- a/libglusterfs/src/glusterfs/globals.h ++++ b/libglusterfs/src/glusterfs/globals.h +@@ -181,6 +181,9 @@ glusterfs_leaseid_exist(void); + int + glusterfs_globals_init(glusterfs_ctx_t *ctx); + ++void ++gf_thread_needs_cleanup(void); ++ + struct tvec_base * + glusterfs_ctx_tw_get(glusterfs_ctx_t *ctx); + void +diff --git a/libglusterfs/src/glusterfs/mem-pool.h b/libglusterfs/src/glusterfs/mem-pool.h +index be0a26d..97bf76c 100644 +--- a/libglusterfs/src/glusterfs/mem-pool.h ++++ b/libglusterfs/src/glusterfs/mem-pool.h +@@ -245,24 +245,26 @@ typedef struct per_thread_pool { + } per_thread_pool_t; + + typedef struct per_thread_pool_list { +- /* +- * These first two members are protected by the global pool lock. When +- * a thread first tries to use any pool, we create one of these. We +- * link it into the global list using thr_list so the pool-sweeper +- * thread can find it, and use pthread_setspecific so this thread can +- * find it. When the per-thread destructor runs, we "poison" the pool +- * list to prevent further allocations. This also signals to the +- * pool-sweeper thread that the list should be detached and freed after +- * the next time it's swept. +- */ ++ /* thr_list is used to place the TLS pool_list into the active global list ++ * (pool_threads) or the inactive global list (pool_free_threads). It's ++ * protected by the global pool_lock. */ + struct list_head thr_list; +- unsigned int poison; ++ ++ /* This lock is used to update poison and the hot/cold lists of members ++ * of 'pools' array. */ ++ pthread_spinlock_t lock; ++ ++ /* This field is used to mark a pool_list as not being owned by any thread. ++ * This means that the sweeper thread won't be cleaning objects stored in ++ * its pools. mem_put() uses it to decide if the object being released is ++ * placed into its original pool_list or directly destroyed. */ ++ bool poison; ++ + /* + * There's really more than one pool, but the actual number is hidden + * in the implementation code so we just make it a single-element array + * here. + */ +- pthread_spinlock_t lock; + per_thread_pool_t pools[1]; + } per_thread_pool_list_t; + +@@ -307,7 +309,7 @@ void + mem_pool_destroy(struct mem_pool *pool); + + void +-mem_pool_thread_destructor(void); ++mem_pool_thread_destructor(per_thread_pool_list_t *pool_list); + + void + gf_mem_acct_enable_set(void *ctx); +diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c +index d88041d..2b41c01 100644 +--- a/libglusterfs/src/mem-pool.c ++++ b/libglusterfs/src/mem-pool.c +@@ -367,7 +367,6 @@ static __thread per_thread_pool_list_t *thread_pool_list = NULL; + #define POOL_SWEEP_SECS 30 + + typedef struct { +- struct list_head death_row; + pooled_obj_hdr_t *cold_lists[N_COLD_LISTS]; + unsigned int n_cold_lists; + } sweep_state_t; +@@ -384,36 +383,33 @@ static pthread_mutex_t init_mutex = PTHREAD_MUTEX_INITIALIZER; + static unsigned int init_count = 0; + static pthread_t sweeper_tid; + +-gf_boolean_t ++static bool + collect_garbage(sweep_state_t *state, per_thread_pool_list_t *pool_list) + { + unsigned int i; + per_thread_pool_t *pt_pool; +- gf_boolean_t poisoned; + + (void)pthread_spin_lock(&pool_list->lock); + +- poisoned = pool_list->poison != 0; +- if (!poisoned) { +- for (i = 0; i < NPOOLS; ++i) { +- pt_pool = &pool_list->pools[i]; +- if (pt_pool->cold_list) { +- if (state->n_cold_lists >= N_COLD_LISTS) { +- break; +- } +- state->cold_lists[state->n_cold_lists++] = pt_pool->cold_list; ++ for (i = 0; i < NPOOLS; ++i) { ++ pt_pool = &pool_list->pools[i]; ++ if (pt_pool->cold_list) { ++ if (state->n_cold_lists >= N_COLD_LISTS) { ++ (void)pthread_spin_unlock(&pool_list->lock); ++ return true; + } +- pt_pool->cold_list = pt_pool->hot_list; +- pt_pool->hot_list = NULL; ++ state->cold_lists[state->n_cold_lists++] = pt_pool->cold_list; + } ++ pt_pool->cold_list = pt_pool->hot_list; ++ pt_pool->hot_list = NULL; + } + + (void)pthread_spin_unlock(&pool_list->lock); + +- return poisoned; ++ return false; + } + +-void ++static void + free_obj_list(pooled_obj_hdr_t *victim) + { + pooled_obj_hdr_t *next; +@@ -425,82 +421,96 @@ free_obj_list(pooled_obj_hdr_t *victim) + } + } + +-void * ++static void * + pool_sweeper(void *arg) + { + sweep_state_t state; + per_thread_pool_list_t *pool_list; +- per_thread_pool_list_t *next_pl; +- per_thread_pool_t *pt_pool; +- unsigned int i; +- gf_boolean_t poisoned; ++ uint32_t i; ++ bool pending; + + /* + * This is all a bit inelegant, but the point is to avoid doing + * expensive things (like freeing thousands of objects) while holding a +- * global lock. Thus, we split each iteration into three passes, with ++ * global lock. Thus, we split each iteration into two passes, with + * only the first and fastest holding the lock. + */ + ++ pending = true; ++ + for (;;) { +- sleep(POOL_SWEEP_SECS); ++ /* If we know there's pending work to do (or it's the first run), we ++ * do collect garbage more often. */ ++ sleep(pending ? POOL_SWEEP_SECS / 5 : POOL_SWEEP_SECS); ++ + (void)pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL); +- INIT_LIST_HEAD(&state.death_row); + state.n_cold_lists = 0; ++ pending = false; + + /* First pass: collect stuff that needs our attention. */ + (void)pthread_mutex_lock(&pool_lock); +- list_for_each_entry_safe(pool_list, next_pl, &pool_threads, thr_list) ++ list_for_each_entry(pool_list, &pool_threads, thr_list) + { +- (void)pthread_mutex_unlock(&pool_lock); +- poisoned = collect_garbage(&state, pool_list); +- (void)pthread_mutex_lock(&pool_lock); +- +- if (poisoned) { +- list_move(&pool_list->thr_list, &state.death_row); ++ if (collect_garbage(&state, pool_list)) { ++ pending = true; + } + } + (void)pthread_mutex_unlock(&pool_lock); + +- /* Second pass: free dead pools. */ +- (void)pthread_mutex_lock(&pool_free_lock); +- list_for_each_entry_safe(pool_list, next_pl, &state.death_row, thr_list) +- { +- for (i = 0; i < NPOOLS; ++i) { +- pt_pool = &pool_list->pools[i]; +- free_obj_list(pt_pool->cold_list); +- free_obj_list(pt_pool->hot_list); +- pt_pool->hot_list = pt_pool->cold_list = NULL; +- } +- list_del(&pool_list->thr_list); +- list_add(&pool_list->thr_list, &pool_free_threads); +- } +- (void)pthread_mutex_unlock(&pool_free_lock); +- +- /* Third pass: free cold objects from live pools. */ ++ /* Second pass: free cold objects from live pools. */ + for (i = 0; i < state.n_cold_lists; ++i) { + free_obj_list(state.cold_lists[i]); + } + (void)pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); + } ++ ++ return NULL; + } + + void +-mem_pool_thread_destructor(void) ++mem_pool_thread_destructor(per_thread_pool_list_t *pool_list) + { +- per_thread_pool_list_t *pool_list = thread_pool_list; +- +- /* The pool-sweeper thread will take it from here. +- * +- * We can change 'poison' here without taking locks because the change +- * itself doesn't interact with other parts of the code and a simple write +- * is already atomic from the point of view of the processor. +- * +- * This change can modify what mem_put() does, but both possibilities are +- * fine until the sweeper thread kicks in. The real synchronization must be +- * between mem_put() and the sweeper thread. */ ++ per_thread_pool_t *pt_pool; ++ uint32_t i; ++ ++ if (pool_list == NULL) { ++ pool_list = thread_pool_list; ++ } ++ ++ /* The current thread is terminating. None of the allocated objects will ++ * be used again. We can directly destroy them here instead of delaying ++ * it until the next sweeper loop. */ + if (pool_list != NULL) { +- pool_list->poison = 1; ++ /* Remove pool_list from the global list to avoid that sweeper ++ * could touch it. */ ++ pthread_mutex_lock(&pool_lock); ++ list_del(&pool_list->thr_list); ++ pthread_mutex_unlock(&pool_lock); ++ ++ /* We need to protect hot/cold changes from potential mem_put() calls ++ * that reference this pool_list. Once poison is set to true, we are ++ * sure that no one else will touch hot/cold lists. The only possible ++ * race is when at the same moment a mem_put() is adding a new item ++ * to the hot list. We protect from that by taking pool_list->lock. ++ * After that we don't need the lock to destroy the hot/cold lists. */ ++ pthread_spin_lock(&pool_list->lock); ++ pool_list->poison = true; ++ pthread_spin_unlock(&pool_list->lock); ++ ++ for (i = 0; i < NPOOLS; i++) { ++ pt_pool = &pool_list->pools[i]; ++ ++ free_obj_list(pt_pool->hot_list); ++ pt_pool->hot_list = NULL; ++ ++ free_obj_list(pt_pool->cold_list); ++ pt_pool->cold_list = NULL; ++ } ++ ++ pthread_mutex_lock(&pool_free_lock); ++ list_add(&pool_list->thr_list, &pool_free_threads); ++ pthread_mutex_unlock(&pool_free_lock); ++ + thread_pool_list = NULL; + } + } +@@ -528,6 +538,30 @@ mem_pools_preinit(void) + init_done = GF_MEMPOOL_INIT_EARLY; + } + ++static __attribute__((destructor)) void ++mem_pools_postfini(void) ++{ ++ per_thread_pool_list_t *pool_list, *next; ++ ++ /* This is part of a process shutdown (or dlclose()) which means that ++ * most probably all threads should be stopped. However this is not the ++ * case for gluster and there are even legitimate situations in which we ++ * could have some threads alive. What is sure is that none of those ++ * threads should be using anything from this library, so destroying ++ * everything here should be fine and safe. */ ++ ++ list_for_each_entry_safe(pool_list, next, &pool_threads, thr_list) ++ { ++ mem_pool_thread_destructor(pool_list); ++ } ++ ++ list_for_each_entry_safe(pool_list, next, &pool_free_threads, thr_list) ++ { ++ list_del(&pool_list->thr_list); ++ FREE(pool_list); ++ } ++} ++ + /* Call mem_pools_init() once threading has been configured completely. This + * prevent the pool_sweeper thread from getting killed once the main() thread + * exits during deamonizing. */ +@@ -560,10 +594,6 @@ mem_pools_fini(void) + */ + break; + case 1: { +- per_thread_pool_list_t *pool_list; +- per_thread_pool_list_t *next_pl; +- unsigned int i; +- + /* if mem_pools_init() was not called, sweeper_tid will be invalid + * and the functions will error out. That is not critical. In all + * other cases, the sweeper_tid will be valid and the thread gets +@@ -571,32 +601,11 @@ mem_pools_fini(void) + (void)pthread_cancel(sweeper_tid); + (void)pthread_join(sweeper_tid, NULL); + +- /* At this point all threads should have already terminated, so +- * it should be safe to destroy all pending per_thread_pool_list_t +- * structures that are stored for each thread. */ +- mem_pool_thread_destructor(); +- +- /* free all objects from all pools */ +- list_for_each_entry_safe(pool_list, next_pl, &pool_threads, +- thr_list) +- { +- for (i = 0; i < NPOOLS; ++i) { +- free_obj_list(pool_list->pools[i].hot_list); +- free_obj_list(pool_list->pools[i].cold_list); +- pool_list->pools[i].hot_list = NULL; +- pool_list->pools[i].cold_list = NULL; +- } +- +- list_del(&pool_list->thr_list); +- FREE(pool_list); +- } +- +- list_for_each_entry_safe(pool_list, next_pl, &pool_free_threads, +- thr_list) +- { +- list_del(&pool_list->thr_list); +- FREE(pool_list); +- } ++ /* There could be threads still running in some cases, so we can't ++ * destroy pool_lists in use. We can also not destroy unused ++ * pool_lists because some allocated objects may still be pointing ++ * to them. */ ++ mem_pool_thread_destructor(NULL); + + init_done = GF_MEMPOOL_INIT_DESTROY; + /* Fall through. */ +@@ -617,7 +626,7 @@ mem_pools_fini(void) + { + } + void +-mem_pool_thread_destructor(void) ++mem_pool_thread_destructor(per_thread_pool_list_t *pool_list) + { + } + +@@ -738,13 +747,21 @@ mem_get_pool_list(void) + } + } + ++ /* There's no need to take pool_list->lock, because this is already an ++ * atomic operation and we don't need to synchronize it with any change ++ * in hot/cold lists. */ ++ pool_list->poison = false; ++ + (void)pthread_mutex_lock(&pool_lock); +- pool_list->poison = 0; + list_add(&pool_list->thr_list, &pool_threads); + (void)pthread_mutex_unlock(&pool_lock); + + thread_pool_list = pool_list; + ++ /* Ensure that all memory objects associated to the new pool_list are ++ * destroyed when the thread terminates. */ ++ gf_thread_needs_cleanup(); ++ + return pool_list; + } + +diff --git a/libglusterfs/src/syncop.c b/libglusterfs/src/syncop.c +index 2eb7b49..0de53c6 100644 +--- a/libglusterfs/src/syncop.c ++++ b/libglusterfs/src/syncop.c +@@ -97,6 +97,13 @@ syncopctx_setfsgroups(int count, const void *groups) + + /* set/reset the ngrps, this is where reset of groups is handled */ + opctx->ngrps = count; ++ ++ if ((opctx->valid & SYNCOPCTX_GROUPS) == 0) { ++ /* This is the first time we are storing groups into the TLS structure ++ * so we mark the current thread so that it will be properly cleaned ++ * up when the thread terminates. */ ++ gf_thread_needs_cleanup(); ++ } + opctx->valid |= SYNCOPCTX_GROUPS; + + out: +-- +1.8.3.1 + diff --git a/SOURCES/0355-core-Prevent-crash-on-process-termination.patch b/SOURCES/0355-core-Prevent-crash-on-process-termination.patch new file mode 100644 index 0000000..fca3f2c --- /dev/null +++ b/SOURCES/0355-core-Prevent-crash-on-process-termination.patch @@ -0,0 +1,74 @@ +From 10f1730073b9fb02d2ed7f7de855afd6df0e5202 Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez +Date: Wed, 19 Feb 2020 12:24:15 +0100 +Subject: [PATCH 355/355] core: Prevent crash on process termination + +A previous patch (ce61da816a) has fixed a use-after-free issue, +but it doesn't work well when the final cleanup is done at process +termination because gluster doesn't stop other threads before +calling exit(). + +For this reason, the final cleanup is removed to avoid the crash, +at least until the termination sequence properly stops all gluster +threads before exiting the program. + +Upstream patch: +> Upstream patch link: https://review.gluster.org/c/glusterfs/+/24138 +> Change-Id: Id7cfb4407fcf208e28f03a7c3cdc3ef9c1f3bf9b +> Fixes: bz#1801684 +> Signed-off-by: Xavi Hernandez + +Change-Id: Id7cfb4407fcf208e28f03a7c3cdc3ef9c1f3bf9b +BUG: 1800703 +Signed-off-by: Xavi Hernandez +Reviewed-on: https://code.engineering.redhat.com/gerrit/192344 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/mem-pool.c | 30 +++++++++++------------------- + 1 file changed, 11 insertions(+), 19 deletions(-) + +diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c +index 2b41c01..73503e0 100644 +--- a/libglusterfs/src/mem-pool.c ++++ b/libglusterfs/src/mem-pool.c +@@ -541,25 +541,17 @@ mem_pools_preinit(void) + static __attribute__((destructor)) void + mem_pools_postfini(void) + { +- per_thread_pool_list_t *pool_list, *next; +- +- /* This is part of a process shutdown (or dlclose()) which means that +- * most probably all threads should be stopped. However this is not the +- * case for gluster and there are even legitimate situations in which we +- * could have some threads alive. What is sure is that none of those +- * threads should be using anything from this library, so destroying +- * everything here should be fine and safe. */ +- +- list_for_each_entry_safe(pool_list, next, &pool_threads, thr_list) +- { +- mem_pool_thread_destructor(pool_list); +- } +- +- list_for_each_entry_safe(pool_list, next, &pool_free_threads, thr_list) +- { +- list_del(&pool_list->thr_list); +- FREE(pool_list); +- } ++ /* TODO: This function should destroy all per thread memory pools that ++ * are still alive, but this is not possible right now because glibc ++ * starts calling destructors as soon as exit() is called, and ++ * gluster doesn't ensure that all threads have been stopped before ++ * calling exit(). Existing threads would crash when they try to use ++ * memory or they terminate if we destroy things here. ++ * ++ * When we propertly terminate all threads, we can add the needed ++ * code here. Till then we need to leave the memory allocated. Most ++ * probably this function will be executed on process termination, ++ * so the memory will be released anyway by the system. */ + } + + /* Call mem_pools_init() once threading has been configured completely. This +-- +1.8.3.1 + diff --git a/SOURCES/0356-Update-rfc.sh-to-rhgs-3.5.1-rhel-8.patch b/SOURCES/0356-Update-rfc.sh-to-rhgs-3.5.1-rhel-8.patch new file mode 100644 index 0000000..f2b6835 --- /dev/null +++ b/SOURCES/0356-Update-rfc.sh-to-rhgs-3.5.1-rhel-8.patch @@ -0,0 +1,26 @@ +From 4099fb424482ede2fb6346c76c58523113f415df Mon Sep 17 00:00:00 2001 +From: Rinku Kothiya +Date: Thu, 12 Mar 2020 01:02:41 -0400 +Subject: [PATCH 356/357] Update rfc.sh to rhgs-3.5.1-rhel-8 + +Signed-off-by: Rinku Kothiya +--- + rfc.sh | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/rfc.sh b/rfc.sh +index 918fb11..a408e45 100755 +--- a/rfc.sh ++++ b/rfc.sh +@@ -18,7 +18,7 @@ done + shift $((OPTIND-1)) + + +-branch="rhgs-3.5.1"; ++branch="rhgs-3.5.1-rhel-8"; + + set_hooks_commit_msg() + { +-- +1.8.3.1 + diff --git a/SOURCES/0357-ganesha-ha-updates-for-pcs-0.10.x-i.e.-in-Fedora-29-.patch b/SOURCES/0357-ganesha-ha-updates-for-pcs-0.10.x-i.e.-in-Fedora-29-.patch new file mode 100644 index 0000000..a67b89c --- /dev/null +++ b/SOURCES/0357-ganesha-ha-updates-for-pcs-0.10.x-i.e.-in-Fedora-29-.patch @@ -0,0 +1,268 @@ +From 2d5e678f8331d4d99ee4dff6e166cbf01c83ab36 Mon Sep 17 00:00:00 2001 +From: "Kaleb S. KEITHLEY" +Date: Wed, 12 Feb 2020 12:47:57 -0500 +Subject: [PATCH 357/357] ganesha-ha: updates for pcs-0.10.x (i.e. in Fedora-29 + and RHEL-8) + +pcs-0.10 has introduced changes options to pcs commands + +pcs-0.10.x is in Fedora-29 and later and RHEL-8. + +Also some minor cleanup. Namely use bash built-in [[...]] in a few +more places instead of test(1), i.e. [...], and use correct "==" for +comparison. + +master: https://review.gluster.org/24115 + +Change-Id: I3fb2fcd71406964c77fdc4f18580ca133f365fd6 +BUG: 1802727 +Signed-off-by: Kaleb S. KEITHLEY +Reviewed-on: https://code.engineering.redhat.com/gerrit/194467 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + extras/ganesha/scripts/ganesha-ha.sh | 84 ++++++++++++++++++++++++------------ + 1 file changed, 56 insertions(+), 28 deletions(-) + +diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh +index 32af1ca..0b0050a 100644 +--- a/extras/ganesha/scripts/ganesha-ha.sh ++++ b/extras/ganesha/scripts/ganesha-ha.sh +@@ -28,7 +28,12 @@ HA_VOL_MNT="/var/run/gluster/shared_storage" + HA_CONFDIR=$HA_VOL_MNT"/nfs-ganesha" + SERVICE_MAN="DISTRO_NOT_FOUND" + +-RHEL6_PCS_CNAME_OPTION="--name" ++# rhel, fedora id, version ++ID="" ++VERSION_ID="" ++ ++PCS9OR10_PCS_CNAME_OPTION="" ++PCS9OR10_PCS_CLONE_OPTION="clone" + SECRET_PEM="/var/lib/glusterd/nfs/secret.pem" + + # UNBLOCK RA uses shared_storage which may become unavailable +@@ -101,9 +106,9 @@ determine_service_manager () { + then + SERVICE_MAN="/sbin/service" + fi +- if [ "${SERVICE_MAN}" == "DISTRO_NOT_FOUND" ] ++ if [[ "${SERVICE_MAN}X" == "DISTRO_NOT_FOUNDX" ]] + then +- echo "Service manager not recognized, exiting" ++ logger "Service manager not recognized, exiting" + exit 1 + fi + } +@@ -114,7 +119,7 @@ manage_service () + local new_node=${2} + local option= + +- if [ "${action}" == "start" ]; then ++ if [[ "${action}" == "start" ]]; then + option="yes" + else + option="no" +@@ -122,7 +127,7 @@ manage_service () + ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \ + ${SECRET_PEM} root@${new_node} "${GANESHA_HA_SH} --setup-ganesha-conf-files $HA_CONFDIR $option" + +- if [ "${SERVICE_MAN}" == "/bin/systemctl" ] ++ if [[ "${SERVICE_MAN}" == "/bin/systemctl" ]] + then + ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \ + ${SECRET_PEM} root@${new_node} "${SERVICE_MAN} ${action} nfs-ganesha" +@@ -140,7 +145,7 @@ check_cluster_exists() + + if [ -e /var/run/corosync.pid ]; then + cluster_name=$(pcs status | grep "Cluster name:" | cut -d ' ' -f 3) +- if [ ${cluster_name} -a ${cluster_name} = ${name} ]; then ++ if [[ "${cluster_name}X" == "${name}X" ]]; then + logger "$name already exists, exiting" + exit 0 + fi +@@ -155,7 +160,7 @@ determine_servers() + local tmp_ifs=${IFS} + local ha_servers="" + +- if [ "X${cmd}X" != "XsetupX" -a "X${cmd}X" != "XstatusX" ]; then ++ if [ "${cmd}X" != "setupX" -a "${cmd}X" != "statusX" ]; then + ha_servers=$(pcs status | grep "Online:" | grep -o '\[.*\]' | sed -e 's/\[//' | sed -e 's/\]//') + IFS=$' ' + for server in ${ha_servers} ; do +@@ -193,15 +198,21 @@ setup_cluster() + + logger "setting up cluster ${name} with the following ${servers}" + +- pcs cluster auth ${servers} +- # pcs cluster setup --name ${name} ${servers} +- pcs cluster setup ${RHEL6_PCS_CNAME_OPTION} ${name} --enable --transport udpu ${servers} ++ # pcs cluster setup --force ${PCS9OR10_PCS_CNAME_OPTION} ${name} ${servers} ++ pcs cluster setup --force ${PCS9OR10_PCS_CNAME_OPTION} ${name} --enable ${servers} + if [ $? -ne 0 ]; then +- logger "pcs cluster setup ${RHEL6_PCS_CNAME_OPTION} ${name} --enable --transport udpu ${servers} failed" ++ logger "pcs cluster setup ${PCS9OR10_PCS_CNAME_OPTION} ${name} --enable ${servers} failed, shutting down ganesha and bailing out" + #set up failed stop all ganesha process and clean up symlinks in cluster + stop_ganesha_all "${servers}" + exit 1; + fi ++ ++ # pcs cluster auth ${servers} ++ pcs cluster auth ++ if [ $? -ne 0 ]; then ++ logger "pcs cluster auth failed" ++ fi ++ + pcs cluster start --all + if [ $? -ne 0 ]; then + logger "pcs cluster start failed" +@@ -217,7 +228,7 @@ setup_cluster() + done + + unclean=$(pcs status | grep -u "UNCLEAN") +- while [[ "${unclean}X" = "UNCLEANX" ]]; do ++ while [[ "${unclean}X" == "UNCLEANX" ]]; do + sleep 1 + unclean=$(pcs status | grep -u "UNCLEAN") + done +@@ -244,7 +255,7 @@ setup_finalize_ha() + local stopped="" + + stopped=$(pcs status | grep -u "Stopped") +- while [[ "${stopped}X" = "StoppedX" ]]; do ++ while [[ "${stopped}X" == "StoppedX" ]]; do + sleep 1 + stopped=$(pcs status | grep -u "Stopped") + done +@@ -265,7 +276,7 @@ refresh_config () + if [ -e ${SECRET_PEM} ]; then + while [[ ${3} ]]; do + current_host=`echo ${3} | cut -d "." -f 1` +- if [ ${short_host} != ${current_host} ]; then ++ if [[ ${short_host} != ${current_host} ]]; then + output=$(ssh -oPasswordAuthentication=no \ + -oStrictHostKeyChecking=no -i ${SECRET_PEM} root@${current_host} \ + "dbus-send --print-reply --system --dest=org.ganesha.nfsd \ +@@ -398,7 +409,7 @@ wrap_create_virt_ip_constraints() + # the result is "node2 node3 node4"; for node2, "node3 node4 node1" + # and so on. + while [[ ${1} ]]; do +- if [ "${1}" = "${primary}" ]; then ++ if [[ ${1} == ${primary} ]]; then + shift + while [[ ${1} ]]; do + tail=${tail}" "${1} +@@ -429,15 +440,15 @@ setup_create_resources() + local cibfile=$(mktemp -u) + + # fixup /var/lib/nfs +- logger "pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} --clone" +- pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} --clone ++ logger "pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} ${PCS9OR10_PCS_CLONE_OPTION}" ++ pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} ${PCS9OR10_PCS_CLONE_OPTION} + if [ $? -ne 0 ]; then +- logger "warning: pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} --clone failed" ++ logger "warning: pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} ${PCS9OR10_PCS_CLONE_OPTION} failed" + fi + +- pcs resource create nfs-mon ocf:heartbeat:ganesha_mon --clone ++ pcs resource create nfs-mon ocf:heartbeat:ganesha_mon ${PCS9OR10_PCS_CLONE_OPTION} + if [ $? -ne 0 ]; then +- logger "warning: pcs resource create nfs-mon ocf:heartbeat:ganesha_mon --clone failed" ++ logger "warning: pcs resource create nfs-mon ocf:heartbeat:ganesha_mon ${PCS9OR10_PCS_CLONE_OPTION} failed" + fi + + # see comment in (/usr/lib/ocf/resource.d/heartbeat/ganesha_grace +@@ -445,9 +456,9 @@ setup_create_resources() + # ganesha-active crm_attribute + sleep 5 + +- pcs resource create nfs-grace ocf:heartbeat:ganesha_grace --clone notify=true ++ pcs resource create nfs-grace ocf:heartbeat:ganesha_grace ${PCS9OR10_PCS_CLONE_OPTION} notify=true + if [ $? -ne 0 ]; then +- logger "warning: pcs resource create nfs-grace ocf:heartbeat:ganesha_grace --clone failed" ++ logger "warning: pcs resource create nfs-grace ocf:heartbeat:ganesha_grace ${PCS9OR10_PCS_CLONE_OPTION} failed" + fi + + pcs constraint location nfs-grace-clone rule score=-INFINITY grace-active ne 1 +@@ -616,7 +627,7 @@ addnode_recreate_resources() + --after ${add_node}-nfs_block + if [ $? -ne 0 ]; then + logger "warning pcs resource create ${add_node}-cluster_ip-1 ocf:heartbeat:IPaddr \ +- ip=${add_vip} cidr_netmask=32 op monitor interval=15s failed" ++ ip=${add_vip} cidr_netmask=32 op monitor interval=15s failed" + fi + + pcs -f ${cibfile} constraint order nfs-grace-clone then ${add_node}-cluster_ip-1 +@@ -780,7 +791,7 @@ setup_state_volume() + touch ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state + fi + for server in ${HA_SERVERS} ; do +- if [ ${server} != ${dirname} ]; then ++ if [[ ${server} != ${dirname} ]]; then + ln -s ${mnt}/nfs-ganesha/${server}/nfs/ganesha ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/${server} + ln -s ${mnt}/nfs-ganesha/${server}/nfs/statd ${mnt}/nfs-ganesha/${dirname}/nfs/statd/${server} + fi +@@ -794,7 +805,7 @@ setup_state_volume() + enable_pacemaker() + { + while [[ ${1} ]]; do +- if [ "${SERVICE_MAN}" == "/usr/bin/systemctl" ]; then ++ if [[ "${SERVICE_MAN}" == "/bin/systemctl" ]]; then + ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \ + ${SECRET_PEM} root@${1} "${SERVICE_MAN} enable pacemaker" + else +@@ -892,7 +903,7 @@ delnode_state_volume() + rm -rf ${mnt}/nfs-ganesha/${dirname} + + for server in ${HA_SERVERS} ; do +- if [[ "${server}" != "${dirname}" ]]; then ++ if [[ ${server} != ${dirname} ]]; then + rm -f ${mnt}/nfs-ganesha/${server}/nfs/ganesha/${dirname} + rm -f ${mnt}/nfs-ganesha/${server}/nfs/statd/${dirname} + fi +@@ -963,7 +974,7 @@ status() + + create_ganesha_conf_file() + { +- if [ $1 == "yes" ]; ++ if [[ "$1" == "yes" ]]; + then + if [ -e $GANESHA_CONF ]; + then +@@ -1012,6 +1023,13 @@ main() + semanage boolean -m gluster_use_execmem --on + fi + ++ local osid="" ++ ++ osid=$(grep ^ID= /etc/os-release) ++ eval $(echo ${osid} | grep -F ID=) ++ osid=$(grep ^VERSION_ID= /etc/os-release) ++ eval $(echo ${osid} | grep -F VERSION_ID=) ++ + HA_CONFDIR=${1%/}; shift + local ha_conf=${HA_CONFDIR}/ganesha-ha.conf + local node="" +@@ -1032,7 +1050,17 @@ main() + + determine_servers "setup" + +- if [ "X${HA_NUM_SERVERS}X" != "X1X" ]; then ++ # Fedora 29+ and rhel/centos 8 has PCS-0.10.x ++ # default is pcs-0.10.x options but check for ++ # rhel/centos 7 (pcs-0.9.x) and adjust accordingly ++ if [[ ${ID} =~ {rhel,centos} ]]; then ++ if [[ ${VERSION_ID} == 7.* ]]; then ++ PCS9OR10_PCS_CNAME_OPTION="--name" ++ PCS9OR10_PCS_CLONE_OPTION="--clone" ++ fi ++ fi ++ ++ if [[ "${HA_NUM_SERVERS}X" != "1X" ]]; then + + determine_service_manager + +-- +1.8.3.1 + diff --git a/SOURCES/0358-inode-fix-wrong-loop-count-in-__inode_ctx_free.patch b/SOURCES/0358-inode-fix-wrong-loop-count-in-__inode_ctx_free.patch new file mode 100644 index 0000000..d7138a6 --- /dev/null +++ b/SOURCES/0358-inode-fix-wrong-loop-count-in-__inode_ctx_free.patch @@ -0,0 +1,51 @@ +From 0d8c6d78130d22c475010bcce8055073b19de82a Mon Sep 17 00:00:00 2001 +From: Xie Changlong +Date: Fri, 17 May 2019 18:33:11 +0800 +Subject: [PATCH 358/362] inode: fix wrong loop count in __inode_ctx_free + +Avoid serious memory leak + +Backport of : +>fixes: bz#1711240 +>Upstream patch link: https://review.gluster.org/#/c/glusterfs/+/22738/ +>Change-Id: Ic61a8fdd0e941e136c98376a87b5a77fa8c22316 +>Signed-off-by: Xie Changlong + +BUG: 1781543 +Change-Id: I601ebb6cd6744a61c64edd3d21d3b9a0edf1e95b +Signed-off-by: Rinku Kothiya +Reviewed-on: https://code.engineering.redhat.com/gerrit/195611 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/inode.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c +index 5331e93..9dbb25b 100644 +--- a/libglusterfs/src/inode.c ++++ b/libglusterfs/src/inode.c +@@ -402,14 +402,15 @@ __inode_ctx_free(inode_t *inode) + goto noctx; + } + +- for (index = 0; index < inode->table->xl->graph->xl_count; index++) { ++ for (index = 0; index < inode->table->ctxcount; index++) { + if (inode->_ctx[index].value1 || inode->_ctx[index].value2) { + xl = (xlator_t *)(long)inode->_ctx[index].xl_key; +- old_THIS = THIS; +- THIS = xl; +- if (!xl->call_cleanup && xl->cbks->forget) ++ if (xl && !xl->call_cleanup && xl->cbks->forget) { ++ old_THIS = THIS; ++ THIS = xl; + xl->cbks->forget(xl, inode); +- THIS = old_THIS; ++ THIS = old_THIS; ++ } + } + } + +-- +1.8.3.1 + diff --git a/SOURCES/0359-dht-gf_defrag_process_dir-is-called-even-if-gf_defra.patch b/SOURCES/0359-dht-gf_defrag_process_dir-is-called-even-if-gf_defra.patch new file mode 100644 index 0000000..bd730bc --- /dev/null +++ b/SOURCES/0359-dht-gf_defrag_process_dir-is-called-even-if-gf_defra.patch @@ -0,0 +1,41 @@ +From c0efaa98d777e4520028bf55482846b3ef5fca3a Mon Sep 17 00:00:00 2001 +From: Susant Palai +Date: Wed, 1 Apr 2020 12:14:31 +0530 +Subject: [PATCH 359/362] dht: gf_defrag_process_dir is called even if + gf_defrag_fix_layout has failed + +Currently even though gf_defrag_fix_layout fails with ENOENT or ESTALE, a +subsequent call is made to gf_defrag_process_dir leading to rebalance failure. + +upstream patch: https://review.gluster.org/#/c/glusterfs/+/24225 + +> fixes: #1102 +> Change-Id: Ib0c309fd78e89a000fed3feb4bbe2c5b48e61478 +> Signed-off-by: Susant Palai + +BUG: 1812789 +Change-Id: Ib0c309fd78e89a000fed3feb4bbe2c5b48e61478 +Signed-off-by: Susant Palai +Reviewed-on: https://code.engineering.redhat.com/gerrit/196249 +Reviewed-by: Mohit Agrawal +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/cluster/dht/src/dht-rebalance.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c +index 559f046..f4c62b8 100644 +--- a/xlators/cluster/dht/src/dht-rebalance.c ++++ b/xlators/cluster/dht/src/dht-rebalance.c +@@ -3939,6 +3939,7 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, + defrag->total_failures++; + } + ret = 0; ++ goto out; + } else { + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LAYOUT_FIX_FAILED, + "Setxattr failed for %s", loc->path); +-- +1.8.3.1 + diff --git a/SOURCES/0360-rpc-Make-ssl-log-more-useful.patch b/SOURCES/0360-rpc-Make-ssl-log-more-useful.patch new file mode 100644 index 0000000..05e903d --- /dev/null +++ b/SOURCES/0360-rpc-Make-ssl-log-more-useful.patch @@ -0,0 +1,117 @@ +From 2b859d1a5499a215c8c37472d4fc7d7e4d70dac6 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Tue, 31 Mar 2020 16:45:35 +0530 +Subject: [PATCH 360/362] rpc: Make ssl log more useful + +Currently, ssl_setup_connection_params throws 4 messages for every +rpc connection that irritates a user while reading the logs. The same +info we can print in a single log with peerinfo to make it more +useful.ssl_setup_connection_params try to load dh_param even user +has not configured it and if a dh_param file is not available it throws +a failure message.To avoid the message load dh_param only while the user +has configured it. + +> Change-Id: I9ddb57f86a3fa3e519180cb5d88828e59fe0e487 +> Fixes: #1141 +> Signed-off-by: Mohit Agrawal +> Cherry pick from commit 80dd8cceab3b860bf1bc2945c8e2d8d0b3913e48 +> Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/24270/ + +BUG: 1812824 +Change-Id: I9ddb57f86a3fa3e519180cb5d88828e59fe0e487 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/196371 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + rpc/rpc-transport/socket/src/socket.c | 46 ++++++++++++++++++++--------------- + 1 file changed, 26 insertions(+), 20 deletions(-) + +diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c +index f54ca83..65845ea 100644 +--- a/rpc/rpc-transport/socket/src/socket.c ++++ b/rpc/rpc-transport/socket/src/socket.c +@@ -4240,6 +4240,7 @@ ssl_setup_connection_params(rpc_transport_t *this) + char *cipher_list = DEFAULT_CIPHER_LIST; + char *dh_param = DEFAULT_DH_PARAM; + char *ec_curve = DEFAULT_EC_CURVE; ++ gf_boolean_t dh_flag = _gf_false; + + priv = this->private; + +@@ -4248,6 +4249,10 @@ ssl_setup_connection_params(rpc_transport_t *this) + return 0; + } + ++ if (!priv->ssl_enabled && !priv->mgmt_ssl) { ++ return 0; ++ } ++ + priv->ssl_own_cert = DEFAULT_CERT_PATH; + if (dict_get_str(this->options, SSL_OWN_CERT_OPT, &optstr) == 0) { + if (!priv->ssl_enabled) { +@@ -4294,27 +4299,25 @@ ssl_setup_connection_params(rpc_transport_t *this) + priv->crl_path = gf_strdup(optstr); + } + +- gf_log(this->name, priv->ssl_enabled ? GF_LOG_INFO : GF_LOG_DEBUG, +- "SSL support on the I/O path is %s", +- priv->ssl_enabled ? "ENABLED" : "NOT enabled"); +- gf_log(this->name, priv->mgmt_ssl ? GF_LOG_INFO : GF_LOG_DEBUG, +- "SSL support for glusterd is %s", +- priv->mgmt_ssl ? "ENABLED" : "NOT enabled"); +- + if (!priv->mgmt_ssl) { +- if (!dict_get_int32(this->options, SSL_CERT_DEPTH_OPT, &cert_depth)) { +- gf_log(this->name, GF_LOG_INFO, "using certificate depth %d", +- cert_depth); ++ if (!dict_get_int32_sizen(this->options, SSL_CERT_DEPTH_OPT, ++ &cert_depth)) { + } + } else { + cert_depth = this->ctx->ssl_cert_depth; +- gf_log(this->name, GF_LOG_INFO, "using certificate depth %d", +- cert_depth); + } +- if (!dict_get_str(this->options, SSL_CIPHER_LIST_OPT, &cipher_list)) { ++ gf_log(this->name, priv->ssl_enabled ? GF_LOG_INFO : GF_LOG_DEBUG, ++ "SSL support for MGMT is %s IO path is %s certificate depth is %d " ++ "for peer %s", ++ (priv->mgmt_ssl ? "ENABLED" : "NOT enabled"), ++ (priv->ssl_enabled ? "ENABLED" : "NOT enabled"), cert_depth, ++ this->peerinfo.identifier); ++ ++ if (!dict_get_str_sizen(this->options, SSL_CIPHER_LIST_OPT, &cipher_list)) { + gf_log(this->name, GF_LOG_INFO, "using cipher list %s", cipher_list); + } +- if (!dict_get_str(this->options, SSL_DH_PARAM_OPT, &dh_param)) { ++ if (!dict_get_str_sizen(this->options, SSL_DH_PARAM_OPT, &dh_param)) { ++ dh_flag = _gf_true; + gf_log(this->name, GF_LOG_INFO, "using DH parameters %s", dh_param); + } + if (!dict_get_str(this->options, SSL_EC_CURVE_OPT, &ec_curve)) { +@@ -4349,12 +4352,15 @@ ssl_setup_connection_params(rpc_transport_t *this) + #ifdef SSL_OP_NO_COMPRESSION + SSL_CTX_set_options(priv->ssl_ctx, SSL_OP_NO_COMPRESSION); + #endif +- +- if ((bio = BIO_new_file(dh_param, "r")) == NULL) { +- gf_log(this->name, GF_LOG_INFO, +- "failed to open %s, " +- "DH ciphers are disabled", +- dh_param); ++ /* Upload file to bio wrapper only if dh param is configured ++ */ ++ if (dh_flag) { ++ if ((bio = BIO_new_file(dh_param, "r")) == NULL) { ++ gf_log(this->name, GF_LOG_ERROR, ++ "failed to open %s, " ++ "DH ciphers are disabled", ++ dh_param); ++ } + } + + if (bio != NULL) { +-- +1.8.3.1 + diff --git a/SOURCES/0361-snap_scheduler-python3-compatibility-and-new-test-ca.patch b/SOURCES/0361-snap_scheduler-python3-compatibility-and-new-test-ca.patch new file mode 100644 index 0000000..62b2fe0 --- /dev/null +++ b/SOURCES/0361-snap_scheduler-python3-compatibility-and-new-test-ca.patch @@ -0,0 +1,122 @@ +From 04b824ebfcf80c648d5855f10bc30fde45fd62eb Mon Sep 17 00:00:00 2001 +From: Sunny Kumar +Date: Thu, 26 Mar 2020 10:46:16 +0000 +Subject: [PATCH 361/362] snap_scheduler: python3 compatibility and new test + case +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Problem: +"snap_scheduler.py init" command failing with the below traceback: + +[root@dhcp43-104 ~]# snap_scheduler.py init +Traceback (most recent call last): + File "/usr/sbin/snap_scheduler.py", line 941, in + sys.exit(main(sys.argv[1:])) + File "/usr/sbin/snap_scheduler.py", line 851, in main + initLogger() + File "/usr/sbin/snap_scheduler.py", line 153, in initLogger + logfile = os.path.join(process.stdout.read()[:-1], SCRIPT_NAME + ".log") + File "/usr/lib64/python3.6/posixpath.py", line 94, in join + genericpath._check_arg_types('join', a, *p) + File "/usr/lib64/python3.6/genericpath.py", line 151, in _check_arg_types + raise TypeError("Can't mix strings and bytes in path components") from None +TypeError: Can't mix strings and bytes in path components + +Solution: + +Added the 'universal_newlines' flag to Popen to support backward compatibility. + +Added a basic test for snapshot scheduler. + +Backport Of: +    +        >Upstream Patch: https://review.gluster.org/#/c/glusterfs/+/24257/ +        >Change-Id: I78e8fabd866fd96638747ecd21d292f5ca074a4e +        >Fixes: #1134 +        >Signed-off-by: Sunny Kumar +    +BUG: 1817369 +Change-Id: I78e8fabd866fd96638747ecd21d292f5ca074a4e +Signed-off-by: Sunny Kumar +Reviewed-on: https://code.engineering.redhat.com/gerrit/196482 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + extras/snap_scheduler/snap_scheduler.py | 2 +- + tests/basic/volume-snap-scheduler.t | 49 +++++++++++++++++++++++++++++++++ + 2 files changed, 50 insertions(+), 1 deletion(-) + create mode 100644 tests/basic/volume-snap-scheduler.t + +diff --git a/extras/snap_scheduler/snap_scheduler.py b/extras/snap_scheduler/snap_scheduler.py +index a66c5e3..5a29d41 100755 +--- a/extras/snap_scheduler/snap_scheduler.py ++++ b/extras/snap_scheduler/snap_scheduler.py +@@ -149,7 +149,7 @@ def initLogger(): + sh.setFormatter(formatter) + + process = subprocess.Popen(["gluster", "--print-logdir"], +- stdout=subprocess.PIPE) ++ stdout=subprocess.PIPE, universal_newlines=True) + logfile = os.path.join(process.stdout.read()[:-1], SCRIPT_NAME + ".log") + + fh = logging.FileHandler(logfile) +diff --git a/tests/basic/volume-snap-scheduler.t b/tests/basic/volume-snap-scheduler.t +new file mode 100644 +index 0000000..a638c5c +--- /dev/null ++++ b/tests/basic/volume-snap-scheduler.t +@@ -0,0 +1,49 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../include.rc ++. $(dirname $0)/../volume.rc ++ ++cleanup; ++ ++TEST glusterd; ++TEST pidof glusterd; ++ ++TEST $CLI volume create $V0 replica 2 $H0:$B0/${GMV0}{1,2,3,4}; ++TEST $CLI volume start $V0 ++ ++## Create, start and mount meta_volume as ++## snap_scheduler expects shared storage to be enabled. ++## This test is very basic in nature not creating any snapshot ++## and purpose is to validate snap scheduling commands. ++ ++TEST $CLI volume create $META_VOL replica 3 $H0:$B0/${META_VOL}{1,2,3}; ++TEST $CLI volume start $META_VOL ++TEST mkdir -p $META_MNT ++TEST glusterfs -s $H0 --volfile-id $META_VOL $META_MNT ++ ++##function to check status ++function check_status_scheduler() ++{ ++ local key=$1 ++ snap_scheduler.py status | grep -F "$key" | wc -l ++} ++ ++##Basic snap_scheduler command test init/enable/disable/list ++ ++TEST snap_scheduler.py init ++ ++TEST snap_scheduler.py enable ++ ++EXPECT 1 check_status_scheduler "Enabled" ++ ++TEST snap_scheduler.py disable ++ ++EXPECT 1 check_status_scheduler "Disabled" ++ ++TEST snap_scheduler.py list ++ ++TEST $CLI volume stop $V0; ++ ++TEST $CLI volume delete $V0; ++ ++cleanup; +-- +1.8.3.1 + diff --git a/SOURCES/0362-write-behind-fix-data-corruption.patch b/SOURCES/0362-write-behind-fix-data-corruption.patch new file mode 100644 index 0000000..aeb7242 --- /dev/null +++ b/SOURCES/0362-write-behind-fix-data-corruption.patch @@ -0,0 +1,454 @@ +From 48f6929590157d9a1697e11c02441207afdc1bed Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez +Date: Fri, 27 Mar 2020 23:56:15 +0100 +Subject: [PATCH 362/362] write-behind: fix data corruption + +There was a bug in write-behind that allowed a previous completed write +to overwrite the overlapping region of data from a future write. + +Suppose we want to send three writes (W1, W2 and W3). W1 and W2 are +sequential, and W3 writes at the same offset of W2: + + W2.offset = W3.offset = W1.offset + W1.size + +Both W1 and W2 are sent in parallel. W3 is only sent after W2 completes. +So W3 should *always* overwrite the overlapping part of W2. + +Suppose write-behind processes the requests from 2 concurrent threads: + + Thread 1 Thread 2 + + + + wb_enqueue_tempted(W1) + /* W1 is assigned gen X */ + wb_enqueue_tempted(W2) + /* W2 is assigned gen X */ + + wb_process_queue() + __wb_preprocess_winds() + /* W1 and W2 are sequential and all + * other requisites are met to merge + * both requests. */ + __wb_collapse_small_writes(W1, W2) + __wb_fulfill_request(W2) + + __wb_pick_unwinds() -> W2 + /* In this case, since the request is + * already fulfilled, wb_inode->gen + * is not updated. */ + + wb_do_unwinds() + STACK_UNWIND(W2) + + /* The application has received the + * result of W2, so it can send W3. */ + + + wb_enqueue_tempted(W3) + /* W3 is assigned gen X */ + + wb_process_queue() + /* Here we have W1 (which contains + * the conflicting W2) and W3 with + * same gen, so they are interpreted + * as concurrent writes that do not + * conflict. */ + __wb_pick_winds() -> W3 + + wb_do_winds() + STACK_WIND(W3) + + wb_process_queue() + /* Eventually W1 will be + * ready to be sent */ + __wb_pick_winds() -> W1 + __wb_pick_unwinds() -> W1 + /* Here wb_inode->gen is + * incremented. */ + + wb_do_unwinds() + STACK_UNWIND(W1) + + wb_do_winds() + STACK_WIND(W1) + +So, as we can see, W3 is sent before W1, which shouldn't happen. + +The problem is that wb_inode->gen is only incremented for requests that +have not been fulfilled but, after a merge, the request is marked as +fulfilled even though it has not been sent to the brick. This allows +that future requests are assigned to the same generation, which could +be internally reordered. + +Solution: + +Increment wb_inode->gen before any unwind, even if it's for a fulfilled +request. + +Special thanks to Stefan Ring for writing a reproducer that has been +crucial to identify the issue. + +Upstream patch: +> Upstream patch link: https://review.gluster.org/c/glusterfs/+/24263 +> Change-Id: Id4ab0f294a09aca9a863ecaeef8856474662ab45 +> Signed-off-by: Xavi Hernandez +> Fixes: #884 + +Change-Id: Id4ab0f294a09aca9a863ecaeef8856474662ab45 +Signed-off-by: Xavi Hernandez +BUG: 1819059 +Reviewed-on: https://code.engineering.redhat.com/gerrit/196250 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + tests/bugs/write-behind/issue-884.c | 267 +++++++++++++++++++++ + tests/bugs/write-behind/issue-884.t | 40 +++ + .../performance/write-behind/src/write-behind.c | 4 +- + 3 files changed, 309 insertions(+), 2 deletions(-) + create mode 100644 tests/bugs/write-behind/issue-884.c + create mode 100755 tests/bugs/write-behind/issue-884.t + +diff --git a/tests/bugs/write-behind/issue-884.c b/tests/bugs/write-behind/issue-884.c +new file mode 100644 +index 0000000..e9c33b3 +--- /dev/null ++++ b/tests/bugs/write-behind/issue-884.c +@@ -0,0 +1,267 @@ ++ ++#define _GNU_SOURCE ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++ ++/* Based on a reproducer by Stefan Ring. It seems to be quite sensible to any ++ * timing modification, so the code has been maintained as is, only with minor ++ * changes. */ ++ ++struct glfs *glfs; ++ ++pthread_mutex_t the_mutex = PTHREAD_MUTEX_INITIALIZER; ++pthread_cond_t the_cond = PTHREAD_COND_INITIALIZER; ++ ++typedef struct _my_aiocb { ++ int64_t size; ++ volatile int64_t seq; ++ int which; ++} my_aiocb; ++ ++typedef struct _worker_data { ++ my_aiocb cb; ++ struct iovec iov; ++ int64_t offset; ++} worker_data; ++ ++typedef struct { ++ worker_data wdata[2]; ++ ++ volatile unsigned busy; ++} all_data_t; ++ ++all_data_t all_data; ++ ++static void ++completion_fnc(struct glfs_fd *fd, ssize_t ret, struct glfs_stat *pre, ++ struct glfs_stat *post, void *arg) ++{ ++ void *the_thread; ++ my_aiocb *cb = (my_aiocb *)arg; ++ long seq = cb->seq; ++ ++ assert(ret == cb->size); ++ ++ pthread_mutex_lock(&the_mutex); ++ pthread_cond_broadcast(&the_cond); ++ ++ all_data.busy &= ~(1 << cb->which); ++ cb->seq = -1; ++ ++ the_thread = (void *)pthread_self(); ++ printf("worker %d is done from thread %p, seq %ld!\n", cb->which, ++ the_thread, seq); ++ ++ pthread_mutex_unlock(&the_mutex); ++} ++ ++static void ++init_wdata(worker_data *data, int which) ++{ ++ data->cb.which = which; ++ data->cb.seq = -1; ++ ++ data->iov.iov_base = malloc(1024 * 1024); ++ memset(data->iov.iov_base, 6, ++ 1024 * 1024); /* tail part never overwritten */ ++} ++ ++static void ++init() ++{ ++ all_data.busy = 0; ++ ++ init_wdata(&all_data.wdata[0], 0); ++ init_wdata(&all_data.wdata[1], 1); ++} ++ ++static void ++do_write(struct glfs_fd *fd, int content, int size, int64_t seq, ++ worker_data *wdata, const char *name) ++{ ++ int ret; ++ ++ wdata->cb.size = size; ++ wdata->cb.seq = seq; ++ ++ if (content >= 0) ++ memset(wdata->iov.iov_base, content, size); ++ wdata->iov.iov_len = size; ++ ++ pthread_mutex_lock(&the_mutex); ++ printf("(%d) dispatching write \"%s\", offset %lx, len %x, seq %ld\n", ++ wdata->cb.which, name, (long)wdata->offset, size, (long)seq); ++ pthread_mutex_unlock(&the_mutex); ++ ret = glfs_pwritev_async(fd, &wdata->iov, 1, wdata->offset, 0, ++ completion_fnc, &wdata->cb); ++ assert(ret >= 0); ++} ++ ++#define IDLE 0 // both workers must be idle ++#define ANY 1 // use any worker, other one may be busy ++ ++int ++get_worker(int waitfor, int64_t excl_seq) ++{ ++ int which; ++ ++ pthread_mutex_lock(&the_mutex); ++ ++ while (waitfor == IDLE && (all_data.busy & 3) != 0 || ++ waitfor == ANY && ++ ((all_data.busy & 3) == 3 || ++ excl_seq >= 0 && (all_data.wdata[0].cb.seq == excl_seq || ++ all_data.wdata[1].cb.seq == excl_seq))) ++ pthread_cond_wait(&the_cond, &the_mutex); ++ ++ if (!(all_data.busy & 1)) ++ which = 0; ++ else ++ which = 1; ++ ++ all_data.busy |= (1 << which); ++ ++ pthread_mutex_unlock(&the_mutex); ++ ++ return which; ++} ++ ++static int ++doit(struct glfs_fd *fd) ++{ ++ int ret; ++ int64_t seq = 0; ++ int64_t offset = 0; // position in file, in blocks ++ int64_t base = 0x1000; // where to place the data, in blocks ++ ++ int async_mode = ANY; ++ ++ init(); ++ ++ for (;;) { ++ int which; ++ worker_data *wdata; ++ ++ // for growing to the first offset ++ for (;;) { ++ int gap = base + 0x42 - offset; ++ if (!gap) ++ break; ++ if (gap > 80) ++ gap = 80; ++ ++ which = get_worker(IDLE, -1); ++ wdata = &all_data.wdata[which]; ++ ++ wdata->offset = offset << 9; ++ do_write(fd, 0, gap << 9, seq++, wdata, "gap-filling"); ++ ++ offset += gap; ++ } ++ ++ // 8700 ++ which = get_worker(IDLE, -1); ++ wdata = &all_data.wdata[which]; ++ ++ wdata->offset = (base + 0x42) << 9; ++ do_write(fd, 1, 62 << 9, seq++, wdata, "!8700"); ++ ++ // 8701 ++ which = get_worker(IDLE, -1); ++ wdata = &all_data.wdata[which]; ++ ++ wdata->offset = (base + 0x42) << 9; ++ do_write(fd, 2, 55 << 9, seq++, wdata, "!8701"); ++ ++ // 8702 ++ which = get_worker(async_mode, -1); ++ wdata = &all_data.wdata[which]; ++ ++ wdata->offset = (base + 0x79) << 9; ++ do_write(fd, 3, 54 << 9, seq++, wdata, "!8702"); ++ ++ // 8703 ++ which = get_worker(async_mode, -1); ++ wdata = &all_data.wdata[which]; ++ ++ wdata->offset = (base + 0xaf) << 9; ++ do_write(fd, 4, 81 << 9, seq++, wdata, "!8703"); ++ ++ // 8704 ++ // this writes both 5s and 6s ++ // the range of 5s is the one that overwrites 8703 ++ ++ which = get_worker(async_mode, seq - 1); ++ wdata = &all_data.wdata[which]; ++ ++ memset(wdata->iov.iov_base, 5, 81 << 9); ++ wdata->offset = (base + 0xaf) << 9; ++ do_write(fd, -1, 1623 << 9, seq++, wdata, "!8704"); ++ ++ offset = base + 0x706; ++ base += 0x1000; ++ if (base >= 0x100000) ++ break; ++ } ++ ++ printf("done!\n"); ++ fflush(stdout); ++ ++ pthread_mutex_lock(&the_mutex); ++ ++ while ((all_data.busy & 3) != 0) ++ pthread_cond_wait(&the_cond, &the_mutex); ++ ++ pthread_mutex_unlock(&the_mutex); ++ ++ ret = glfs_close(fd); ++ assert(ret >= 0); ++ /* ++ ret = glfs_fini(glfs); ++ assert(ret >= 0); ++ */ ++ return 0; ++} ++ ++int ++main(int argc, char *argv[]) ++{ ++ int ret; ++ int open_flags = O_RDWR | O_DIRECT | O_TRUNC; ++ struct glfs_fd *fd; ++ ++ glfs = glfs_new(argv[1]); ++ if (!glfs) { ++ printf("glfs_new!\n"); ++ goto out; ++ } ++ ret = glfs_set_volfile_server(glfs, "tcp", "localhost", 24007); ++ if (ret < 0) { ++ printf("set_volfile!\n"); ++ goto out; ++ } ++ ret = glfs_init(glfs); ++ if (ret) { ++ printf("init!\n"); ++ goto out; ++ } ++ fd = glfs_open(glfs, argv[2], open_flags); ++ if (!fd) { ++ printf("open!\n"); ++ goto out; ++ } ++ srand(time(NULL)); ++ return doit(fd); ++out: ++ return 1; ++} +diff --git a/tests/bugs/write-behind/issue-884.t b/tests/bugs/write-behind/issue-884.t +new file mode 100755 +index 0000000..2bcf7d1 +--- /dev/null ++++ b/tests/bugs/write-behind/issue-884.t +@@ -0,0 +1,40 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++ ++# This test tries to detect a race condition in write-behind. It's based on a ++# reproducer written by Stefan Ring that is able to hit it sometimes. On my ++# system, it happened around 10% of the runs. This means that if this bug ++# appears again, this test will fail once every 10 runs. Most probably this ++# failure will be hidden by the automatic test retry of the testing framework. ++# ++# Please, if this test fails, it needs to be analyzed in detail. ++ ++function run() { ++ "${@}" >/dev/null ++} ++ ++cleanup ++ ++TEST glusterd ++TEST pidof glusterd ++ ++TEST $CLI volume create $V0 $H0:$B0/$V0 ++# This makes it easier to hit the issue ++TEST $CLI volume set $V0 client-log-level TRACE ++TEST $CLI volume start $V0 ++ ++TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0 ++ ++build_tester $(dirname $0)/issue-884.c -lgfapi ++ ++TEST touch $M0/testfile ++ ++# This program generates a file of 535694336 bytes with a fixed pattern ++TEST run $(dirname $0)/issue-884 $V0 testfile ++ ++# This is the md5sum of the expected pattern without corruption ++EXPECT "ad105f9349345a70fc697632cbb5eec8" echo "$(md5sum $B0/$V0/testfile | awk '{ print $1; }')" ++ ++cleanup +diff --git a/xlators/performance/write-behind/src/write-behind.c b/xlators/performance/write-behind/src/write-behind.c +index 70e281a..90a0bcf 100644 +--- a/xlators/performance/write-behind/src/write-behind.c ++++ b/xlators/performance/write-behind/src/write-behind.c +@@ -1284,14 +1284,14 @@ __wb_pick_unwinds(wb_inode_t *wb_inode, list_head_t *lies) + + wb_inode->window_current += req->orig_size; + ++ wb_inode->gen++; ++ + if (!req->ordering.fulfilled) { + /* burden increased */ + list_add_tail(&req->lie, &wb_inode->liability); + + req->ordering.lied = 1; + +- wb_inode->gen++; +- + uuid_utoa_r(req->gfid, gfid); + gf_msg_debug(wb_inode->this->name, 0, + "(unique=%" PRIu64 +-- +1.8.3.1 + diff --git a/SOURCES/0363-common-ha-cluster-status-shows-FAILOVER-when-actuall.patch b/SOURCES/0363-common-ha-cluster-status-shows-FAILOVER-when-actuall.patch new file mode 100644 index 0000000..e1ea6d0 --- /dev/null +++ b/SOURCES/0363-common-ha-cluster-status-shows-FAILOVER-when-actuall.patch @@ -0,0 +1,47 @@ +From d7c0dc7107a024d28196a4582bacf28ddcfbeb69 Mon Sep 17 00:00:00 2001 +From: "Kaleb S. KEITHLEY" +Date: Tue, 14 Apr 2020 07:59:22 -0400 +Subject: [PATCH 363/367] common-ha: cluster status shows "FAILOVER" when + actually HEALTHY + +pacemaker devs change the format of the ouput of `pcs status` + +Expected to find a line in the format: + +Online: .... + +but now it's + + * Online: ... + +And the `grep -E "^Online:" no longer finds the list of nodes that +are online. + + https://review.gluster.org/#/c/glusterfs/+/24333/ + +Change-Id: If2aa1e7b53c766c625d7b4cc222a83ea2c0bd72d +BUG: 1823706 +Signed-off-by: Kaleb S. KEITHLEY +Reviewed-on: https://code.engineering.redhat.com/gerrit/197367 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + extras/ganesha/scripts/ganesha-ha.sh | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh +index 0b0050a..df333a1 100644 +--- a/extras/ganesha/scripts/ganesha-ha.sh ++++ b/extras/ganesha/scripts/ganesha-ha.sh +@@ -935,7 +935,7 @@ status() + done + + # print the nodes that are expected to be online +- grep -E "^Online:" ${scratch} ++ grep -E "Online:" ${scratch} + + echo + +-- +1.8.3.1 + diff --git a/SOURCES/0364-dht-fixing-rebalance-failures-for-files-with-holes.patch b/SOURCES/0364-dht-fixing-rebalance-failures-for-files-with-holes.patch new file mode 100644 index 0000000..2c6ba98 --- /dev/null +++ b/SOURCES/0364-dht-fixing-rebalance-failures-for-files-with-holes.patch @@ -0,0 +1,97 @@ +From 5b1bfebacac649e6f5051316e4075309caf93901 Mon Sep 17 00:00:00 2001 +From: Barak Sason Rofman +Date: Tue, 21 Apr 2020 19:13:41 +0300 +Subject: [PATCH 364/367] dht - fixing rebalance failures for files with holes + +Rebalance process handling of files which contains holes casued +rebalance to fail with "No space left on device" errors. +This patch modifies the code-flow in such a way that files with holes +will be rebalanced correctly. + +backport of https://review.gluster.org/#/c/glusterfs/+/24357/ +>fixes: #1187 +>Change-Id: I89bc3d4ea7f074db7213d759c49307f379543932 +>Signed-off-by: Barak Sason Rofman + +BUG: 1823703 +Change-Id: I89bc3d4ea7f074db7213d759c49307f379543932 +Signed-off-by: Barak Sason Rofman +Reviewed-on: https://code.engineering.redhat.com/gerrit/198579 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/cluster/dht/src/dht-rebalance.c | 21 ++++++++++----------- + 1 file changed, 10 insertions(+), 11 deletions(-) + +diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c +index f4c62b8..7d9df02 100644 +--- a/xlators/cluster/dht/src/dht-rebalance.c ++++ b/xlators/cluster/dht/src/dht-rebalance.c +@@ -650,7 +650,7 @@ out: + static int + __dht_rebalance_create_dst_file(xlator_t *this, xlator_t *to, xlator_t *from, + loc_t *loc, struct iatt *stbuf, fd_t **dst_fd, +- int *fop_errno) ++ int *fop_errno, int file_has_holes) + { + int ret = -1; + int ret2 = -1; +@@ -819,7 +819,7 @@ __dht_rebalance_create_dst_file(xlator_t *this, xlator_t *to, xlator_t *from, + + /* No need to bother about 0 byte size files */ + if (stbuf->ia_size > 0) { +- if (conf->use_fallocate) { ++ if (conf->use_fallocate && !file_has_holes) { + ret = syncop_fallocate(to, fd, 0, 0, stbuf->ia_size, NULL, NULL); + if (ret < 0) { + if (ret == -EOPNOTSUPP || ret == -EINVAL || ret == -ENOSYS) { +@@ -846,9 +846,7 @@ __dht_rebalance_create_dst_file(xlator_t *this, xlator_t *to, xlator_t *from, + goto out; + } + } +- } +- +- if (!conf->use_fallocate) { ++ } else { + ret = syncop_ftruncate(to, fd, stbuf->ia_size, NULL, NULL, NULL, + NULL); + if (ret < 0) { +@@ -1728,9 +1726,13 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, + goto out; + } + ++ /* Try to preserve 'holes' while migrating data */ ++ if (stbuf.ia_size > (stbuf.ia_blocks * GF_DISK_SECTOR_SIZE)) ++ file_has_holes = 1; ++ + /* create the destination, with required modes/xattr */ + ret = __dht_rebalance_create_dst_file(this, to, from, loc, &stbuf, &dst_fd, +- fop_errno); ++ fop_errno, file_has_holes); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "Create dst failed" +@@ -1774,8 +1776,8 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, + * destination. We need to do update this only post migration + * as in case of failure the linkto needs to point to the source + * subvol */ +- ret = __dht_rebalance_create_dst_file(this, to, from, loc, &stbuf, +- &dst_fd, fop_errno); ++ ret = __dht_rebalance_create_dst_file( ++ this, to, from, loc, &stbuf, &dst_fd, fop_errno, file_has_holes); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, + "Create dst failed" +@@ -1862,9 +1864,6 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, + ret = 0; + goto out; + } +- /* Try to preserve 'holes' while migrating data */ +- if (stbuf.ia_size > (stbuf.ia_blocks * GF_DISK_SECTOR_SIZE)) +- file_has_holes = 1; + + ret = __dht_rebalance_migrate_data(this, defrag, from, to, src_fd, dst_fd, + stbuf.ia_size, file_has_holes, +-- +1.8.3.1 + diff --git a/SOURCES/0365-build-geo-rep-requires-relevant-selinux-permission-f.patch b/SOURCES/0365-build-geo-rep-requires-relevant-selinux-permission-f.patch new file mode 100644 index 0000000..daf8dc6 --- /dev/null +++ b/SOURCES/0365-build-geo-rep-requires-relevant-selinux-permission-f.patch @@ -0,0 +1,70 @@ +From 36180d21dc4b16619b75d65d51eaf37df4e0e2d3 Mon Sep 17 00:00:00 2001 +From: Sunny Kumar +Date: Mon, 20 Apr 2020 12:15:42 +0100 +Subject: [PATCH 365/367] build: geo-rep requires relevant selinux permission + for rsync + +If selinux is set in enforcing mode geo-rep goes into faulty state. + +To avoid this from happening some relevant selinux booleans need to be set +in 'on' state to allow rsync operation. + +Backport of: + >Upstream Patch: https://review.gluster.org/#/c/glusterfs/+/24348. + >Change-Id: Ia8ce530d6548c2a545f4c99c600f5aac2bbb3363 + >Fixes: #1182 + >Signed-off-by: Sunny Kumar + +BUG: 1813917 +Change-Id: Ia8ce530d6548c2a545f4c99c600f5aac2bbb3363 +Signed-off-by: Sunny Kumar +Reviewed-on: https://code.engineering.redhat.com/gerrit/198599 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + glusterfs.spec.in | 15 +++++++++++++++ + 1 file changed, 15 insertions(+) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 7c8a751..5ed07e7 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -130,6 +130,12 @@ + ## All %%global definitions should be placed here and keep them sorted + ## + ++# selinux booleans whose defalut value needs modification ++# these booleans will be consumed by "%%selinux_set_booleans" macro. ++%if ( 0%{?rhel} && 0%{?rhel} >= 8 ) ++%global selinuxbooleans rsync_full_access=1 rsync_client=1 ++%endif ++ + %if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} > 6 ) + %global _with_systemd true + %endif +@@ -515,6 +521,12 @@ Requires: python%{_pythonver}-gluster = %{version}-%{release} + Requires: rsync + Requires: util-linux + Requires: %{name}-libs%{?_isa} = %{version}-%{release} ++# required for setting selinux bools ++%if ( 0%{?rhel} && 0%{?rhel} >= 8 ) ++Requires: selinux-policy-targeted ++Requires(post): selinux-policy-targeted ++BuildRequires: selinux-policy-devel ++%endif + + %description geo-replication + GlusterFS is a distributed file-system capable of scaling to several +@@ -941,6 +953,9 @@ exit 0 + + %if ( 0%{!?_without_georeplication:1} ) + %post geo-replication ++%if ( 0%{?rhel} && 0%{?rhel} >= 8 ) ++%selinux_set_booleans %{selinuxbooleans} ++%endif + if [ $1 -ge 1 ]; then + %systemd_postun_with_restart glusterd + fi +-- +1.8.3.1 + diff --git a/SOURCES/0366-snapshot-fix-python3-issue-in-gcron.patch b/SOURCES/0366-snapshot-fix-python3-issue-in-gcron.patch new file mode 100644 index 0000000..c704a17 --- /dev/null +++ b/SOURCES/0366-snapshot-fix-python3-issue-in-gcron.patch @@ -0,0 +1,55 @@ +From d7b84014cbb19e65dfae6248af47cc23fabc64e5 Mon Sep 17 00:00:00 2001 +From: Sunny Kumar +Date: Wed, 22 Apr 2020 15:09:16 +0100 +Subject: [PATCH 366/367] snapshot: fix python3 issue in gcron + +`$gcron.py test_vol Job` +Traceback: + File "/usr/sbin/gcron.py", line 189, in + main() + File "/usr/sbin/gcron.py", line 121, in main + initLogger(script_name) + File "/usr/sbin/gcron.py", line 44, in initLogger + logfile = os.path.join(out.strip(), script_name[:-3]+".log") + File "/usr/lib64/python3.6/posixpath.py", line 94, in join + genericpath._check_arg_types('join', a, *p) + File "/usr/lib64/python3.6/genericpath.py", line 151, in _check_arg_types + raise TypeError("Can't mix strings and bytes in path components") from None +TypeError: Can't mix strings and bytes in path components + +Solution: Added the 'universal_newlines' flag to Popen. + +Backport of: + + >Upstream Patch: https://review.gluster.org/#/c/glusterfs/+/24364/ + >Change-Id: I4c7a0e5bce605e4c134f6786c9dd8162b89fc77f + >Fixes: #1193 + >Signed-off-by: Sunny Kumar + +BUG: 1825195 +Change-Id: I4c7a0e5bce605e4c134f6786c9dd8162b89fc77f +Signed-off-by: Sunny Kumar +Reviewed-on: https://code.engineering.redhat.com/gerrit/198641 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + extras/snap_scheduler/gcron.py | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/extras/snap_scheduler/gcron.py b/extras/snap_scheduler/gcron.py +index 1127be0..cc16310 100755 +--- a/extras/snap_scheduler/gcron.py ++++ b/extras/snap_scheduler/gcron.py +@@ -38,7 +38,8 @@ def initLogger(script_name): + sh.setFormatter(formatter) + + process = subprocess.Popen(["gluster", "--print-logdir"], +- stdout=subprocess.PIPE) ++ stdout=subprocess.PIPE, ++ universal_newlines=True) + out, err = process.communicate() + if process.returncode == 0: + logfile = os.path.join(out.strip(), script_name[:-3]+".log") +-- +1.8.3.1 + diff --git a/SOURCES/0367-dht-Handle-setxattr-and-rm-race-for-directory-in-reb.patch b/SOURCES/0367-dht-Handle-setxattr-and-rm-race-for-directory-in-reb.patch new file mode 100644 index 0000000..b94f8fc --- /dev/null +++ b/SOURCES/0367-dht-Handle-setxattr-and-rm-race-for-directory-in-reb.patch @@ -0,0 +1,95 @@ +From aef8e51b9974603d397cc8f5301b24451d012e46 Mon Sep 17 00:00:00 2001 +From: Susant Palai +Date: Fri, 24 Apr 2020 13:32:51 +0530 +Subject: [PATCH 367/367] dht: Handle setxattr and rm race for directory in + rebalance + +Problem: Selfheal as part of directory does not return an error if +the layout setxattr fails. This is because the actual lookup fop +must have been successful to proceed for layout heal. Hence, we could +not tell if fix-layout failed in rebalance. + +Solution: We can check this information in the layout structure that +whether all the xlators have returned error. + +> fixes: #1200 +> hange-Id: I3e5f2a36c0d934c21476a73a9a5473d8e490cde7 +> Signed-off-by: Susant Palai +(backport of https://review.gluster.org/#/c/glusterfs/+/24375/) + +BUG: 1812789 +Change-Id: I897826c4c2e883b3085c9314deff32d649b4588e +Signed-off-by: Susant Palai +Reviewed-on: https://code.engineering.redhat.com/gerrit/198726 +Reviewed-by: Mohit Agrawal +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/cluster/dht/src/dht-common.c | 19 +++++++++++++++++++ + xlators/cluster/dht/src/dht-common.h | 3 +++ + xlators/cluster/dht/src/dht-rebalance.c | 11 +++++++++++ + 3 files changed, 33 insertions(+) + +diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c +index d0b5287..7890e7a 100644 +--- a/xlators/cluster/dht/src/dht-common.c ++++ b/xlators/cluster/dht/src/dht-common.c +@@ -11286,3 +11286,22 @@ dht_pt_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *key, + FIRST_CHILD(this)->fops->fgetxattr, fd, key, xdata); + return 0; + } ++ ++/* The job of this function is to check if all the xlators have updated ++ * error in the layout. */ ++int ++dht_dir_layout_error_check(xlator_t *this, inode_t *inode) ++{ ++ dht_layout_t *layout = NULL; ++ int i = 0; ++ ++ layout = dht_layout_get(this, inode); ++ for (i = 0; i < layout->cnt; i++) { ++ if (layout->list[i].err == 0) { ++ return 0; ++ } ++ } ++ ++ /* Returning the first xlator error as all xlators have errors */ ++ return layout->list[0].err; ++} +diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h +index ce11f02..4d2aae6 100644 +--- a/xlators/cluster/dht/src/dht-common.h ++++ b/xlators/cluster/dht/src/dht-common.h +@@ -1544,4 +1544,7 @@ dht_pt_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + int32_t + dht_check_remote_fd_failed_error(dht_local_t *local, int op_ret, int op_errno); + ++int ++dht_dir_layout_error_check(xlator_t *this, inode_t *inode); ++ + #endif /* _DHT_H */ +diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c +index 7d9df02..33cacfe 100644 +--- a/xlators/cluster/dht/src/dht-rebalance.c ++++ b/xlators/cluster/dht/src/dht-rebalance.c +@@ -3928,6 +3928,17 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, + } + + ret = syncop_setxattr(this, loc, fix_layout, 0, NULL, NULL); ++ ++ /* In case of a race where the directory is deleted just before ++ * layout setxattr, the errors are updated in the layout structure. ++ * We can use this information to make a decision whether the directory ++ * is deleted entirely. ++ */ ++ if (ret == 0) { ++ ret = dht_dir_layout_error_check(this, loc->inode); ++ ret = -ret; ++ } ++ + if (ret) { + if (-ret == ENOENT || -ret == ESTALE) { + gf_msg(this->name, GF_LOG_INFO, -ret, DHT_MSG_LAYOUT_FIX_FAILED, +-- +1.8.3.1 + diff --git a/SOURCES/0368-Update-rfc.sh-to-rhgs-3.5.2.patch b/SOURCES/0368-Update-rfc.sh-to-rhgs-3.5.2.patch new file mode 100644 index 0000000..c103891 --- /dev/null +++ b/SOURCES/0368-Update-rfc.sh-to-rhgs-3.5.2.patch @@ -0,0 +1,26 @@ +From 00b79c4e2837980f36f7d8387d90cfb7dc8d0d58 Mon Sep 17 00:00:00 2001 +From: Rinku Kothiya +Date: Tue, 5 May 2020 12:41:41 -0400 +Subject: [PATCH 368/375] Update rfc.sh to rhgs-3.5.2 + +Signed-off-by: Rinku Kothiya +--- + rfc.sh | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/rfc.sh b/rfc.sh +index a408e45..37d551f 100755 +--- a/rfc.sh ++++ b/rfc.sh +@@ -18,7 +18,7 @@ done + shift $((OPTIND-1)) + + +-branch="rhgs-3.5.1-rhel-8"; ++branch="rhgs-3.5.2"; + + set_hooks_commit_msg() + { +-- +1.8.3.1 + diff --git a/SOURCES/0369-cluster-ec-Return-correct-error-code-and-log-message.patch b/SOURCES/0369-cluster-ec-Return-correct-error-code-and-log-message.patch new file mode 100644 index 0000000..c3c8925 --- /dev/null +++ b/SOURCES/0369-cluster-ec-Return-correct-error-code-and-log-message.patch @@ -0,0 +1,53 @@ +From f30fa3938f980f03d08479776037090e7fc11f42 Mon Sep 17 00:00:00 2001 +From: Ashish Pandey +Date: Tue, 5 May 2020 18:17:49 +0530 +Subject: [PATCH 369/375] cluster/ec: Return correct error code and log message + +In case of readdir was send with an FD on which opendir +was failed, this FD will be useless and we return it with error. +For now, we are returning it with EINVAL without logging any +message in log file. + +Return a correct error code and also log the message to improve thing to debug. + +>fixes: #1220 +>Change-Id: Iaf035254b9c5aa52fa43ace72d328be622b06169 +>Signed-off-by: Ashish Pandey +(Backport of https://review.gluster.org/#/c/glusterfs/+/24407/) + +BUG: 1831403 +Change-Id: Ib5bf30c47b7491abd0ad5ca0ce52ec77945b2e53 +Signed-off-by: Ashish Pandey +Reviewed-on: https://code.engineering.redhat.com/gerrit/200209 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/cluster/ec/src/ec-dir-read.c | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +diff --git a/xlators/cluster/ec/src/ec-dir-read.c b/xlators/cluster/ec/src/ec-dir-read.c +index 8310d4a..9924425 100644 +--- a/xlators/cluster/ec/src/ec-dir-read.c ++++ b/xlators/cluster/ec/src/ec-dir-read.c +@@ -388,9 +388,16 @@ ec_manager_readdir(ec_fop_data_t *fop, int32_t state) + /* Return error if opendir has not been successfully called on + * any subvolume. */ + ctx = ec_fd_get(fop->fd, fop->xl); +- if ((ctx == NULL) || (ctx->open == 0)) { +- fop->error = EINVAL; ++ if (ctx == NULL) { ++ fop->error = ENOMEM; ++ } else if (ctx->open == 0) { ++ fop->error = EBADFD; ++ } + ++ if (fop->error) { ++ gf_msg(fop->xl->name, GF_LOG_ERROR, fop->error, ++ EC_MSG_INVALID_REQUEST, "EC is not winding readdir: %s", ++ ec_msg_str(fop)); + return EC_STATE_REPORT; + } + +-- +1.8.3.1 + diff --git a/SOURCES/0370-dht-Do-opendir-selectively-in-gf_defrag_process_dir.patch b/SOURCES/0370-dht-Do-opendir-selectively-in-gf_defrag_process_dir.patch new file mode 100644 index 0000000..6648a4e --- /dev/null +++ b/SOURCES/0370-dht-Do-opendir-selectively-in-gf_defrag_process_dir.patch @@ -0,0 +1,203 @@ +From 3d230880aed85737365deafe3c9a32c67da2a79e Mon Sep 17 00:00:00 2001 +From: Susant Palai +Date: Mon, 4 May 2020 19:09:00 +0530 +Subject: [PATCH 370/375] dht: Do opendir selectively in gf_defrag_process_dir + +Currently opendir is done from the cluster view. Hence, even if +one opendir is successful, the opendir operation as a whole is considered +successful. + +But since in gf_defrag_get_entry we fetch entries selectively from +local_subvols, we need to opendir individually on those local subvols +and keep track of fds separately. Otherwise it is possible that opendir +failed on one of the subvol and we wind readdirp call on the fd to the +corresponding subvol, which will ultimately result in EINVAL error. + +> fixes: #1218 +> Change-Id: I50dd88b9597852a15579f4ee325918979417f570 +> Signed-off-by: Susant Palai +(Backport of https://review.gluster.org/#/c/glusterfs/+/24404/) + +BUG: 1831403 +Change-Id: I96e19fdd630279c3ef44f361c1d1fc5c1c429821 +Signed-off-by: Susant Palai +Reviewed-on: https://code.engineering.redhat.com/gerrit/200306 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/cluster/dht/src/dht-common.h | 2 + + xlators/cluster/dht/src/dht-rebalance.c | 74 +++++++++++++++++++++++---------- + 2 files changed, 54 insertions(+), 22 deletions(-) + +diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h +index 4d2aae6..8e65111 100644 +--- a/xlators/cluster/dht/src/dht-common.h ++++ b/xlators/cluster/dht/src/dht-common.h +@@ -742,6 +742,8 @@ struct dir_dfmeta { + struct list_head **head; + struct list_head **iterator; + int *fetch_entries; ++ /* fds corresponding to local subvols only */ ++ fd_t **lfd; + }; + + typedef struct dht_migrate_info { +diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c +index 33cacfe..c692119 100644 +--- a/xlators/cluster/dht/src/dht-rebalance.c ++++ b/xlators/cluster/dht/src/dht-rebalance.c +@@ -48,6 +48,8 @@ gf_defrag_free_dir_dfmeta(struct dir_dfmeta *meta, int local_subvols_cnt) + if (meta) { + for (i = 0; i < local_subvols_cnt; i++) { + gf_dirent_free(&meta->equeue[i]); ++ if (meta->lfd && meta->lfd[i]) ++ fd_unref(meta->lfd[i]); + } + + GF_FREE(meta->equeue); +@@ -55,6 +57,7 @@ gf_defrag_free_dir_dfmeta(struct dir_dfmeta *meta, int local_subvols_cnt) + GF_FREE(meta->iterator); + GF_FREE(meta->offset_var); + GF_FREE(meta->fetch_entries); ++ GF_FREE(meta->lfd); + GF_FREE(meta); + } + } +@@ -3095,7 +3098,7 @@ int static gf_defrag_get_entry(xlator_t *this, int i, + struct dir_dfmeta *dir_dfmeta, dict_t *xattr_req, + int *should_commit_hash, int *perrno) + { +- int ret = -1; ++ int ret = 0; + char is_linkfile = 0; + gf_dirent_t *df_entry = NULL; + struct dht_container *tmp_container = NULL; +@@ -3111,6 +3114,13 @@ int static gf_defrag_get_entry(xlator_t *this, int i, + } + + if (dir_dfmeta->fetch_entries[i] == 1) { ++ if (!fd) { ++ dir_dfmeta->fetch_entries[i] = 0; ++ dir_dfmeta->offset_var[i].readdir_done = 1; ++ ret = 0; ++ goto out; ++ } ++ + ret = syncop_readdirp(conf->local_subvols[i], fd, 131072, + dir_dfmeta->offset_var[i].offset, + &(dir_dfmeta->equeue[i]), xattr_req, NULL); +@@ -3270,7 +3280,6 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, + dict_t *migrate_data, int *perrno) + { + int ret = -1; +- fd_t *fd = NULL; + dht_conf_t *conf = NULL; + gf_dirent_t entries; + dict_t *xattr_req = NULL; +@@ -3304,28 +3313,49 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, + goto out; + } + +- fd = fd_create(loc->inode, defrag->pid); +- if (!fd) { +- gf_log(this->name, GF_LOG_ERROR, "Failed to create fd"); ++ dir_dfmeta = GF_CALLOC(1, sizeof(*dir_dfmeta), gf_common_mt_pointer); ++ if (!dir_dfmeta) { ++ gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta is NULL"); + ret = -1; + goto out; + } + +- ret = syncop_opendir(this, loc, fd, NULL, NULL); +- if (ret) { +- gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_DATA_FAILED, +- "Migrate data failed: Failed to open dir %s", loc->path); +- *perrno = -ret; ++ dir_dfmeta->lfd = GF_CALLOC(local_subvols_cnt, sizeof(fd_t *), ++ gf_common_mt_pointer); ++ if (!dir_dfmeta->lfd) { ++ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, 0, ++ "could not allocate memory for dir_dfmeta"); + ret = -1; ++ *perrno = ENOMEM; + goto out; + } + +- fd_bind(fd); +- dir_dfmeta = GF_CALLOC(1, sizeof(*dir_dfmeta), gf_common_mt_pointer); +- if (!dir_dfmeta) { +- gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta is NULL"); +- ret = -1; +- goto out; ++ for (i = 0; i < local_subvols_cnt; i++) { ++ dir_dfmeta->lfd[i] = fd_create(loc->inode, defrag->pid); ++ if (!dir_dfmeta->lfd[i]) { ++ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, 0, "failed to create fd"); ++ *perrno = ENOMEM; ++ ret = -1; ++ goto out; ++ } ++ ++ ret = syncop_opendir(conf->local_subvols[i], loc, dir_dfmeta->lfd[i], ++ NULL, NULL); ++ if (ret) { ++ fd_unref(dir_dfmeta->lfd[i]); ++ dir_dfmeta->lfd[i] = NULL; ++ gf_smsg(this->name, GF_LOG_WARNING, 0, 0, ++ "failed to open dir: %s subvol: %s", loc->path, ++ conf->local_subvols[i]->name); ++ ++ if (conf->decommission_in_progress) { ++ *perrno = -ret; ++ ret = -1; ++ goto out; ++ } ++ } else { ++ fd_bind(dir_dfmeta->lfd[i]); ++ } + } + + dir_dfmeta->head = GF_CALLOC(local_subvols_cnt, sizeof(*(dir_dfmeta->head)), +@@ -3360,6 +3390,7 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, + ret = -1; + goto out; + } ++ + ret = gf_defrag_ctx_subvols_init(dir_dfmeta->offset_var, this); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, +@@ -3372,7 +3403,8 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, + dir_dfmeta->fetch_entries = GF_CALLOC(local_subvols_cnt, sizeof(int), + gf_common_mt_int); + if (!dir_dfmeta->fetch_entries) { +- gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta->fetch_entries is NULL"); ++ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, 0, ++ "could not allocate memory for dir_dfmeta->fetch_entries"); + ret = -1; + goto out; + } +@@ -3442,8 +3474,9 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, + ldfq_count <= MAX_MIGRATE_QUEUE_COUNT && + !dht_dfreaddirp_done(dir_dfmeta->offset_var, local_subvols_cnt)) { + ret = gf_defrag_get_entry(this, dfc_index, &container, loc, conf, +- defrag, fd, migrate_data, dir_dfmeta, +- xattr_req, &should_commit_hash, perrno); ++ defrag, dir_dfmeta->lfd[dfc_index], ++ migrate_data, dir_dfmeta, xattr_req, ++ &should_commit_hash, perrno); + + if (ret) { + gf_log(this->name, GF_LOG_WARNING, +@@ -3497,9 +3530,6 @@ out: + if (xattr_req) + dict_unref(xattr_req); + +- if (fd) +- fd_unref(fd); +- + if (ret == 0 && should_commit_hash == 0) { + ret = 2; + } +-- +1.8.3.1 + diff --git a/SOURCES/0371-common-ha-cluster-status-shows-FAILOVER-when-actuall.patch b/SOURCES/0371-common-ha-cluster-status-shows-FAILOVER-when-actuall.patch new file mode 100644 index 0000000..a395da3 --- /dev/null +++ b/SOURCES/0371-common-ha-cluster-status-shows-FAILOVER-when-actuall.patch @@ -0,0 +1,53 @@ +From 05bd0226716516d37ead173c7d6924225bd474db Mon Sep 17 00:00:00 2001 +From: "Kaleb S. KEITHLEY" +Date: Wed, 6 May 2020 07:24:38 -0400 +Subject: [PATCH 371/375] common-ha: cluster status shows "FAILOVER" when + actually HEALTHY + +pacemaker devs change the format of the ouput of `pcs status` + +Expected to find a line in the format: + + Online: .... + +but now it's + + * Online: ... + +And the `grep -E "^Online:" no longer finds the list of nodes that +are online. + +Also other lines now have '*' in first few characters of the line +throwing off `grep -x ...` + +https://review.gluster.org/#/c/glusterfs/+/24403/ + +Change-Id: Ia04a89e76914f2a455a755f0a93fa415f60aefd0 +BUG: 1823706 +Signed-off-by: Kaleb S. KEITHLEY +Reviewed-on: https://code.engineering.redhat.com/gerrit/199442 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + extras/ganesha/scripts/ganesha-ha.sh | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh +index df333a1..4ecf91b 100644 +--- a/extras/ganesha/scripts/ganesha-ha.sh ++++ b/extras/ganesha/scripts/ganesha-ha.sh +@@ -919,8 +919,9 @@ status() + local index=1 + local nodes + +- # change tabs to spaces, strip leading spaces +- pcs status | sed -e "s/\t/ /g" -e "s/^[ ]*//" > ${scratch} ++ # change tabs to spaces, strip leading spaces, including any ++ # new '*' at the beginning of a line introduced in pcs-0.10.x ++ pcs status | sed -e "s/\t/ /g" -e "s/^[ ]*\*//" -e "s/^[ ]*//" > ${scratch} + + nodes[0]=${1}; shift + +-- +1.8.3.1 + diff --git a/SOURCES/0372-posix-fix-seek-functionality.patch b/SOURCES/0372-posix-fix-seek-functionality.patch new file mode 100644 index 0000000..7c286c2 --- /dev/null +++ b/SOURCES/0372-posix-fix-seek-functionality.patch @@ -0,0 +1,49 @@ +From 955fea10809861aa9b3da85d386c2cc92b319cdb Mon Sep 17 00:00:00 2001 +From: Barak Sason Rofman +Date: Thu, 7 May 2020 18:57:37 +0300 +Subject: [PATCH 372/375] posix - fix seek functionality + +A wrong pointer check causes the offset returned by seek to be always +wrong + +backport of https://review.gluster.org/#/c/glusterfs/+/24412/ +>fixes: #1228 +>Change-Id: Iac4c6a163175617ac4f14544fc6b7c6fb4041cd6 +>Signed-off-by: Barak Sason Rofman + +BUG: 1833017 +Change-Id: Iac4c6a163175617ac4f14544fc6b7c6fb4041cd6 +Signed-off-by: Barak Sason Rofman +Reviewed-on: https://code.engineering.redhat.com/gerrit/199761 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/syncop.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +diff --git a/libglusterfs/src/syncop.c b/libglusterfs/src/syncop.c +index 0de53c6..693970f 100644 +--- a/libglusterfs/src/syncop.c ++++ b/libglusterfs/src/syncop.c +@@ -2881,12 +2881,13 @@ syncop_seek(xlator_t *subvol, fd_t *fd, off_t offset, gf_seek_what_t what, + SYNCOP(subvol, (&args), syncop_seek_cbk, subvol->fops->seek, fd, offset, + what, xdata_in); + +- if (*off) +- *off = args.offset; +- +- if (args.op_ret == -1) ++ if (args.op_ret < 0) { + return -args.op_errno; +- return args.op_ret; ++ } else { ++ if (off) ++ *off = args.offset; ++ return args.op_ret; ++ } + } + + int +-- +1.8.3.1 + diff --git a/SOURCES/0373-build-geo-rep-sub-pkg-requires-policycoreutils-pytho.patch b/SOURCES/0373-build-geo-rep-sub-pkg-requires-policycoreutils-pytho.patch new file mode 100644 index 0000000..7abaf0e --- /dev/null +++ b/SOURCES/0373-build-geo-rep-sub-pkg-requires-policycoreutils-pytho.patch @@ -0,0 +1,51 @@ +From bbf43008e6d21d649536547f500662b940562c3e Mon Sep 17 00:00:00 2001 +From: Sunny Kumar +Date: Mon, 11 May 2020 10:02:08 +0100 +Subject: [PATCH 373/375] build: geo-rep sub-pkg requires + policycoreutils-python-utils on rhel8 + +glusterfs-geo-replication sub-package requires policycoreutils-python-utils +on rhel8 to set relevant selinux boolean to allow rsync. + +Backport of: + >Upstream Patch: https://review.gluster.org/#/c/glusterfs/+/24433/ + >Change-Id: Ia0fdcfdd8c7d18cd194e011f6b365bf5cb70a20a + >Fixes: #1236 + >Signed-off-by: Sunny Kumar + +BUG: 1825177 +Change-Id: Ia0fdcfdd8c7d18cd194e011f6b365bf5cb70a20a +Signed-off-by: Sunny Kumar +Reviewed-on: https://code.engineering.redhat.com/gerrit/200242 +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + glusterfs.spec.in | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 5ed07e7..9def416 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -523,6 +523,8 @@ Requires: util-linux + Requires: %{name}-libs%{?_isa} = %{version}-%{release} + # required for setting selinux bools + %if ( 0%{?rhel} && 0%{?rhel} >= 8 ) ++Requires(post): policycoreutils-python-utils ++Requires(postun): policycoreutils-python-utils + Requires: selinux-policy-targeted + Requires(post): selinux-policy-targeted + BuildRequires: selinux-policy-devel +@@ -1978,6 +1980,10 @@ fi + %endif + + %changelog ++ ++* Mon May 11 2020 Sunny Kumar ++- added requires policycoreutils-python-utils on rhel8 for geo-replication ++ + * Tue Aug 27 2019 Hari Gowtham + - Added scripts to collect machine stats and component stats (#1719171) + +-- +1.8.3.1 + diff --git a/SOURCES/0374-open-behind-fix-missing-fd-reference.patch b/SOURCES/0374-open-behind-fix-missing-fd-reference.patch new file mode 100644 index 0000000..94a1fb9 --- /dev/null +++ b/SOURCES/0374-open-behind-fix-missing-fd-reference.patch @@ -0,0 +1,121 @@ +From 30cbdf8c06145a0c290da42ecc0a7eae928200b7 Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez +Date: Sun, 8 Mar 2020 18:36:45 +0100 +Subject: [PATCH 374/375] open-behind: fix missing fd reference + +Open behind was not keeping any reference on fd's pending to be +opened. This makes it possible that a concurrent close and en entry +fop (unlink, rename, ...) caused destruction of the fd while it +was still being used. + +Upstream patch: +> Upstream patch link: https://review.gluster.org/c/glusterfs/+/24204 +> Change-Id: Ie9e992902cf2cd7be4af1f8b4e57af9bd6afd8e9 +> Fixes: bz#1810934 +> Signed-off-by: Xavi Hernandez + +Change-Id: Ie9e992902cf2cd7be4af1f8b4e57af9bd6afd8e9 +BUG: 1830713 +Signed-off-by: Xavi Hernandez +Reviewed-on: https://code.engineering.redhat.com/gerrit/199714 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/performance/open-behind/src/open-behind.c | 27 ++++++++++++++--------- + 1 file changed, 16 insertions(+), 11 deletions(-) + +diff --git a/xlators/performance/open-behind/src/open-behind.c b/xlators/performance/open-behind/src/open-behind.c +index 268c717..14ebc12 100644 +--- a/xlators/performance/open-behind/src/open-behind.c ++++ b/xlators/performance/open-behind/src/open-behind.c +@@ -206,8 +206,13 @@ ob_fd_free(ob_fd_t *ob_fd) + if (ob_fd->xdata) + dict_unref(ob_fd->xdata); + +- if (ob_fd->open_frame) ++ if (ob_fd->open_frame) { ++ /* If we sill have a frame it means that background open has never ++ * been triggered. We need to release the pending reference. */ ++ fd_unref(ob_fd->fd); ++ + STACK_DESTROY(ob_fd->open_frame->root); ++ } + + GF_FREE(ob_fd); + } +@@ -297,6 +302,7 @@ ob_wake_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + call_resume(stub); + } + ++ /* The background open is completed. We can release the 'fd' reference. */ + fd_unref(fd); + + STACK_DESTROY(frame->root); +@@ -331,7 +337,9 @@ ob_fd_wake(xlator_t *this, fd_t *fd, ob_fd_t *ob_fd) + } + + if (frame) { +- frame->local = fd_ref(fd); ++ /* We don't need to take a reference here. We already have a reference ++ * while the open is pending. */ ++ frame->local = fd; + + STACK_WIND(frame, ob_wake_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->open, &ob_fd->loc, ob_fd->flags, fd, +@@ -345,15 +353,12 @@ void + ob_inode_wake(xlator_t *this, struct list_head *ob_fds) + { + ob_fd_t *ob_fd = NULL, *tmp = NULL; +- fd_t *fd = NULL; + + if (!list_empty(ob_fds)) { + list_for_each_entry_safe(ob_fd, tmp, ob_fds, ob_fds_on_inode) + { + ob_fd_wake(this, ob_fd->fd, ob_fd); +- fd = ob_fd->fd; + ob_fd_free(ob_fd); +- fd_unref(fd); + } + } + } +@@ -365,7 +370,7 @@ ob_fd_copy(ob_fd_t *src, ob_fd_t *dst) + if (!src || !dst) + goto out; + +- dst->fd = __fd_ref(src->fd); ++ dst->fd = src->fd; + dst->loc.inode = inode_ref(src->loc.inode); + gf_uuid_copy(dst->loc.gfid, src->loc.gfid); + dst->flags = src->flags; +@@ -509,7 +514,6 @@ ob_open_behind(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + + ob_fd->ob_inode = ob_inode; + +- /* don't do fd_ref, it'll cause leaks */ + ob_fd->fd = fd; + + ob_fd->open_frame = copy_frame(frame); +@@ -539,15 +543,16 @@ ob_open_behind(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + } + UNLOCK(&fd->inode->lock); + +- if (!open_in_progress && !unlinked) { +- fd_ref(fd); ++ /* We take a reference while the background open is pending or being ++ * processed. If we finally wind the request in the foreground, then ++ * ob_fd_free() will take care of this additional reference. */ ++ fd_ref(fd); + ++ if (!open_in_progress && !unlinked) { + STACK_UNWIND_STRICT(open, frame, 0, 0, fd, xdata); + + if (!conf->lazy_open) + ob_fd_wake(this, fd, NULL); +- +- fd_unref(fd); + } else { + ob_fd_free(ob_fd); + STACK_WIND(frame, default_open_cbk, FIRST_CHILD(this), +-- +1.8.3.1 + diff --git a/SOURCES/0375-features-shard-Send-correct-size-when-reads-are-sent.patch b/SOURCES/0375-features-shard-Send-correct-size-when-reads-are-sent.patch new file mode 100644 index 0000000..32f9c19 --- /dev/null +++ b/SOURCES/0375-features-shard-Send-correct-size-when-reads-are-sent.patch @@ -0,0 +1,75 @@ +From ac5b1b38e705bd0e4c00cc50580a71dfaa4d3b5f Mon Sep 17 00:00:00 2001 +From: Krutika Dhananjay +Date: Wed, 7 Aug 2019 12:12:43 +0530 +Subject: [PATCH 375/375] features/shard: Send correct size when reads are sent + beyond file size + +Backport of: +> https://review.gluster.org/c/glusterfs/+/23175 +> Change-Id: I0cebaaf55c09eb1fb77a274268ff564e871b743b +> fixes bz#1738419 +> Signed-off-by: Krutika Dhananjay + +Change-Id: I0cebaaf55c09eb1fb77a274268ff564e871b743b +BUG: 1802013 +Signed-off-by: Krutika Dhananjay +Reviewed-on: https://code.engineering.redhat.com/gerrit/199570 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + tests/bugs/shard/bug-1738419.t | 29 +++++++++++++++++++++++++++++ + xlators/features/shard/src/shard.c | 2 ++ + 2 files changed, 31 insertions(+) + create mode 100644 tests/bugs/shard/bug-1738419.t + +diff --git a/tests/bugs/shard/bug-1738419.t b/tests/bugs/shard/bug-1738419.t +new file mode 100644 +index 0000000..8d0a31d +--- /dev/null ++++ b/tests/bugs/shard/bug-1738419.t +@@ -0,0 +1,29 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++ ++cleanup ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} ++TEST $CLI volume set $V0 features.shard on ++TEST $CLI volume set $V0 network.remote-dio off ++TEST $CLI volume set $V0 performance.io-cache off ++TEST $CLI volume set $V0 performance.quick-read off ++TEST $CLI volume set $V0 performance.read-ahead off ++TEST $CLI volume set $V0 performance.strict-o-direct on ++TEST $CLI volume start $V0 ++ ++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0 ++ ++TEST dd if=/dev/zero of=$M0/metadata bs=501 count=1 ++ ++EXPECT "501" echo $("dd" if=$M0/metadata bs=4096 count=1 of=/dev/null iflag=direct 2>&1 | awk '/bytes/ {print $1}') ++ ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++TEST $CLI volume stop $V0 ++TEST $CLI volume delete $V0 ++ ++cleanup +diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c +index b224abd..9ed597b 100644 +--- a/xlators/features/shard/src/shard.c ++++ b/xlators/features/shard/src/shard.c +@@ -4433,6 +4433,8 @@ out: + if (xdata) + local->xattr_rsp = dict_ref(xdata); + vec.iov_base = local->iobuf->ptr; ++ if (local->offset + local->req_size > local->prebuf.ia_size) ++ local->total_size = local->prebuf.ia_size - local->offset; + vec.iov_len = local->total_size; + local->op_ret = local->total_size; + SHARD_STACK_UNWIND(readv, frame, local->op_ret, local->op_errno, &vec, 1, +-- +1.8.3.1 + diff --git a/SOURCES/0376-features-shard-Fix-crash-during-shards-cleanup-in-er.patch b/SOURCES/0376-features-shard-Fix-crash-during-shards-cleanup-in-er.patch new file mode 100644 index 0000000..b295fc2 --- /dev/null +++ b/SOURCES/0376-features-shard-Fix-crash-during-shards-cleanup-in-er.patch @@ -0,0 +1,70 @@ +From 341d75642ecc4e27bc6fecb56eb98a0ba03d8544 Mon Sep 17 00:00:00 2001 +From: Krutika Dhananjay +Date: Mon, 23 Mar 2020 11:47:10 +0530 +Subject: [PATCH 376/379] features/shard: Fix crash during shards cleanup in + error cases + +Backport of: +> https://review.gluster.org/c/glusterfs/+/24244 +> Change-Id: I0b49f2b58becd0d8874b3d4b14ff8d92a89d02d5 +> Fixes: #1127 +> Signed-off-by: Krutika Dhananjay + +A crash is seen during a reattempt to clean up shards in background +upon remount. And this happens even on remount (which means a remount +is no workaround for the crash). + +In such a situation, the in-memory base inode object will not be +existent (new process, non-existent base shard). +So local->resolver_base_inode will be NULL. + +In the event of an error (in this case, of space running out), the +process would crash at the time of logging the error in the following line - + + gf_msg(this->name, GF_LOG_ERROR, local->op_errno, SHARD_MSG_FOP_FAILED, + "failed to delete shards of %s", + uuid_utoa(local->resolver_base_inode->gfid)); + +Fixed that by using local->base_gfid as the source of gfid when +local->resolver_base_inode is NULL. + +Change-Id: I0b49f2b58becd0d8874b3d4b14ff8d92a89d02d5 +BUG: 1836233 +Signed-off-by: Krutika Dhananjay +Reviewed-on: https://code.engineering.redhat.com/gerrit/200689 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/features/shard/src/shard.c | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c +index 9ed597b..ee38ed2 100644 +--- a/xlators/features/shard/src/shard.c ++++ b/xlators/features/shard/src/shard.c +@@ -2729,13 +2729,20 @@ int shard_unlink_shards_do(call_frame_t *frame, xlator_t *this, inode_t *inode); + int shard_post_lookup_shards_unlink_handler(call_frame_t *frame, + xlator_t *this) { + shard_local_t *local = NULL; ++ uuid_t gfid = { ++ 0, ++ }; + + local = frame->local; + ++ if (local->resolver_base_inode) ++ gf_uuid_copy(gfid, local->resolver_base_inode->gfid); ++ else ++ gf_uuid_copy(gfid, local->base_gfid); ++ + if ((local->op_ret < 0) && (local->op_errno != ENOENT)) { + gf_msg(this->name, GF_LOG_ERROR, local->op_errno, SHARD_MSG_FOP_FAILED, +- "failed to delete shards of %s", +- uuid_utoa(local->resolver_base_inode->gfid)); ++ "failed to delete shards of %s", uuid_utoa(gfid)); + return 0; + } + local->op_ret = 0; +-- +1.8.3.1 + diff --git a/SOURCES/0377-syncop-improve-scaling-and-implement-more-tools.patch b/SOURCES/0377-syncop-improve-scaling-and-implement-more-tools.patch new file mode 100644 index 0000000..66cccc3 --- /dev/null +++ b/SOURCES/0377-syncop-improve-scaling-and-implement-more-tools.patch @@ -0,0 +1,862 @@ +From 66600fb55522d405a68d7340a5680a2633c4237e Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez +Date: Thu, 30 Apr 2020 11:19:01 +0200 +Subject: [PATCH 377/379] syncop: improve scaling and implement more tools + +The current scaling of the syncop thread pool is not working properly +and can leave some tasks in the run queue more time than necessary +when the maximum number of threads is not reached. + +This patch provides a better scaling condition to react faster to +pending work. + +Condition variables and sleep in the context of a synctask have also +been implemented. Their purpose is to replace regular condition +variables and sleeps that block synctask threads and prevent other +tasks to be executed. + +The new features have been applied to several places in glusterd. + +upstream patch: https://review.gluster.org/#/c/glusterfs/+/24396/ + +> Change-Id: Ic50b7c73c104f9e41f08101a357d30b95efccfbf +> Fixes: #1116 +> Signed-off-by: Xavi Hernandez + +Change-Id: Ic50b7c73c104f9e41f08101a357d30b95efccfbf +BUG: 1810516 +Signed-off-by: Sanju Rakonde +Reviewed-on: https://code.engineering.redhat.com/gerrit/200409 +Tested-by: Sanju Rakonde +Tested-by: RHGS Build Bot +Reviewed-by: Xavi Hernandez Juan +--- + libglusterfs/src/glusterfs/syncop.h | 52 +++- + libglusterfs/src/libglusterfs.sym | 7 + + libglusterfs/src/syncop.c | 306 ++++++++++++++++----- + xlators/cluster/dht/src/dht-rebalance.c | 2 +- + xlators/mgmt/glusterd/src/glusterd-op-sm.c | 9 +- + xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c | 2 +- + .../mgmt/glusterd/src/glusterd-snapshot-utils.c | 5 +- + xlators/mgmt/glusterd/src/glusterd-syncop.h | 2 +- + xlators/mgmt/glusterd/src/glusterd-utils.c | 29 +- + xlators/mgmt/glusterd/src/glusterd.c | 2 + + xlators/mgmt/glusterd/src/glusterd.h | 2 + + 11 files changed, 317 insertions(+), 101 deletions(-) + +diff --git a/libglusterfs/src/glusterfs/syncop.h b/libglusterfs/src/glusterfs/syncop.h +index e0f1017..3011b4c 100644 +--- a/libglusterfs/src/glusterfs/syncop.h ++++ b/libglusterfs/src/glusterfs/syncop.h +@@ -15,6 +15,7 @@ + #include + #include + #include ++#include "glusterfs/timer.h" + + #define SYNCENV_PROC_MAX 16 + #define SYNCENV_PROC_MIN 2 +@@ -32,6 +33,7 @@ + struct synctask; + struct syncproc; + struct syncenv; ++struct synccond; + + typedef int (*synctask_cbk_t)(int ret, call_frame_t *frame, void *opaque); + +@@ -55,9 +57,12 @@ struct synctask { + call_frame_t *opframe; + synctask_cbk_t synccbk; + synctask_fn_t syncfn; +- synctask_state_t state; ++ struct timespec *delta; ++ gf_timer_t *timer; ++ struct synccond *synccond; + void *opaque; + void *stack; ++ synctask_state_t state; + int woken; + int slept; + int ret; +@@ -85,19 +90,21 @@ struct syncproc { + /* hosts the scheduler thread and framework for executing synctasks */ + struct syncenv { + struct syncproc proc[SYNCENV_PROC_MAX]; +- int procs; ++ ++ pthread_mutex_t mutex; ++ pthread_cond_t cond; + + struct list_head runq; +- int runcount; + struct list_head waitq; +- int waitcount; ++ ++ int procs; ++ int procs_idle; ++ ++ int runcount; + + int procmin; + int procmax; + +- pthread_mutex_t mutex; +- pthread_cond_t cond; +- + size_t stacksize; + + int destroy; /* FLAG to mark syncenv is in destroy mode +@@ -123,6 +130,13 @@ struct synclock { + }; + typedef struct synclock synclock_t; + ++struct synccond { ++ pthread_mutex_t pmutex; ++ pthread_cond_t pcond; ++ struct list_head waitq; ++}; ++typedef struct synccond synccond_t; ++ + struct syncbarrier { + gf_boolean_t initialized; /*Set on successful initialization*/ + pthread_mutex_t guard; /* guard the remaining members, pair @cond */ +@@ -219,7 +233,7 @@ struct syncopctx { + #define __yield(args) \ + do { \ + if (args->task) { \ +- synctask_yield(args->task); \ ++ synctask_yield(args->task, NULL); \ + } else { \ + pthread_mutex_lock(&args->mutex); \ + { \ +@@ -307,7 +321,9 @@ synctask_join(struct synctask *task); + void + synctask_wake(struct synctask *task); + void +-synctask_yield(struct synctask *task); ++synctask_yield(struct synctask *task, struct timespec *delta); ++void ++synctask_sleep(int32_t secs); + void + synctask_waitfor(struct synctask *task, int count); + +@@ -405,6 +421,24 @@ synclock_trylock(synclock_t *lock); + int + synclock_unlock(synclock_t *lock); + ++int32_t ++synccond_init(synccond_t *cond); ++ ++void ++synccond_destroy(synccond_t *cond); ++ ++int ++synccond_wait(synccond_t *cond, synclock_t *lock); ++ ++int ++synccond_timedwait(synccond_t *cond, synclock_t *lock, struct timespec *delta); ++ ++void ++synccond_signal(synccond_t *cond); ++ ++void ++synccond_broadcast(synccond_t *cond); ++ + int + syncbarrier_init(syncbarrier_t *barrier); + int +diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym +index 467a1b7..5a721e0 100644 +--- a/libglusterfs/src/libglusterfs.sym ++++ b/libglusterfs/src/libglusterfs.sym +@@ -938,6 +938,12 @@ syncbarrier_destroy + syncbarrier_init + syncbarrier_wait + syncbarrier_wake ++synccond_init ++synccond_destroy ++synccond_wait ++synccond_timedwait ++synccond_signal ++synccond_broadcast + syncenv_destroy + syncenv_new + synclock_destroy +@@ -1015,6 +1021,7 @@ synctask_new + synctask_new1 + synctask_set + synctask_setid ++synctask_sleep + synctask_wake + synctask_yield + sys_access +diff --git a/libglusterfs/src/syncop.c b/libglusterfs/src/syncop.c +index 693970f..71d37b7 100644 +--- a/libglusterfs/src/syncop.c ++++ b/libglusterfs/src/syncop.c +@@ -154,10 +154,14 @@ out: + return ret; + } + ++void * ++syncenv_processor(void *thdata); ++ + static void + __run(struct synctask *task) + { + struct syncenv *env = NULL; ++ int32_t total, ret, i; + + env = task->env; + +@@ -173,7 +177,6 @@ __run(struct synctask *task) + env->runcount--; + break; + case SYNCTASK_WAIT: +- env->waitcount--; + break; + case SYNCTASK_DONE: + gf_msg(task->xl->name, GF_LOG_WARNING, 0, LG_MSG_COMPLETED_TASK, +@@ -187,8 +190,27 @@ __run(struct synctask *task) + } + + list_add_tail(&task->all_tasks, &env->runq); +- env->runcount++; + task->state = SYNCTASK_RUN; ++ ++ env->runcount++; ++ ++ total = env->procs + env->runcount - env->procs_idle; ++ if (total > env->procmax) { ++ total = env->procmax; ++ } ++ if (total > env->procs) { ++ for (i = 0; i < env->procmax; i++) { ++ if (env->proc[i].env == NULL) { ++ env->proc[i].env = env; ++ ret = gf_thread_create(&env->proc[i].processor, NULL, ++ syncenv_processor, &env->proc[i], ++ "sproc%d", i); ++ if ((ret < 0) || (++env->procs >= total)) { ++ break; ++ } ++ } ++ } ++ } + } + + static void +@@ -210,7 +232,6 @@ __wait(struct synctask *task) + gf_msg(task->xl->name, GF_LOG_WARNING, 0, LG_MSG_REWAITING_TASK, + "re-waiting already waiting " + "task"); +- env->waitcount--; + break; + case SYNCTASK_DONE: + gf_msg(task->xl->name, GF_LOG_WARNING, 0, LG_MSG_COMPLETED_TASK, +@@ -223,12 +244,11 @@ __wait(struct synctask *task) + } + + list_add_tail(&task->all_tasks, &env->waitq); +- env->waitcount++; + task->state = SYNCTASK_WAIT; + } + + void +-synctask_yield(struct synctask *task) ++synctask_yield(struct synctask *task, struct timespec *delta) + { + xlator_t *oldTHIS = THIS; + +@@ -237,6 +257,8 @@ synctask_yield(struct synctask *task) + task->proc->sched.uc_flags &= ~_UC_TLSBASE; + #endif + ++ task->delta = delta; ++ + if (task->state != SYNCTASK_DONE) { + task->state = SYNCTASK_SUSPEND; + } +@@ -249,6 +271,35 @@ synctask_yield(struct synctask *task) + } + + void ++synctask_sleep(int32_t secs) ++{ ++ struct timespec delta; ++ struct synctask *task; ++ ++ task = synctask_get(); ++ ++ if (task == NULL) { ++ sleep(secs); ++ } else { ++ delta.tv_sec = secs; ++ delta.tv_nsec = 0; ++ ++ synctask_yield(task, &delta); ++ } ++} ++ ++static void ++__synctask_wake(struct synctask *task) ++{ ++ task->woken = 1; ++ ++ if (task->slept) ++ __run(task); ++ ++ pthread_cond_broadcast(&task->env->cond); ++} ++ ++void + synctask_wake(struct synctask *task) + { + struct syncenv *env = NULL; +@@ -257,13 +308,18 @@ synctask_wake(struct synctask *task) + + pthread_mutex_lock(&env->mutex); + { +- task->woken = 1; ++ if (task->timer != NULL) { ++ if (gf_timer_call_cancel(task->xl->ctx, task->timer) != 0) { ++ goto unlock; ++ } + +- if (task->slept) +- __run(task); ++ task->timer = NULL; ++ task->synccond = NULL; ++ } + +- pthread_cond_broadcast(&env->cond); ++ __synctask_wake(task); + } ++unlock: + pthread_mutex_unlock(&env->mutex); + } + +@@ -282,7 +338,7 @@ synctask_wrap(void) + + task->state = SYNCTASK_DONE; + +- synctask_yield(task); ++ synctask_yield(task, NULL); + } + + void +@@ -422,11 +478,6 @@ synctask_create(struct syncenv *env, size_t stacksize, synctask_fn_t fn, + } + + synctask_wake(newtask); +- /* +- * Make sure someone's there to execute anything we just put on the +- * run queue. +- */ +- syncenv_scale(env); + + return newtask; + err: +@@ -520,8 +571,12 @@ syncenv_task(struct syncproc *proc) + goto unlock; + } + ++ env->procs_idle++; ++ + sleep_till.tv_sec = time(NULL) + SYNCPROC_IDLE_TIME; + ret = pthread_cond_timedwait(&env->cond, &env->mutex, &sleep_till); ++ ++ env->procs_idle--; + } + + task = list_entry(env->runq.next, struct synctask, all_tasks); +@@ -540,6 +595,34 @@ unlock: + return task; + } + ++static void ++synctask_timer(void *data) ++{ ++ struct synctask *task = data; ++ struct synccond *cond; ++ ++ cond = task->synccond; ++ if (cond != NULL) { ++ pthread_mutex_lock(&cond->pmutex); ++ ++ list_del_init(&task->waitq); ++ task->synccond = NULL; ++ ++ pthread_mutex_unlock(&cond->pmutex); ++ ++ task->ret = -ETIMEDOUT; ++ } ++ ++ pthread_mutex_lock(&task->env->mutex); ++ ++ gf_timer_call_cancel(task->xl->ctx, task->timer); ++ task->timer = NULL; ++ ++ __synctask_wake(task); ++ ++ pthread_mutex_unlock(&task->env->mutex); ++} ++ + void + synctask_switchto(struct synctask *task) + { +@@ -572,7 +655,14 @@ synctask_switchto(struct synctask *task) + } else { + task->slept = 1; + __wait(task); ++ ++ if (task->delta != NULL) { ++ task->timer = gf_timer_call_after(task->xl->ctx, *task->delta, ++ synctask_timer, task); ++ } + } ++ ++ task->delta = NULL; + } + pthread_mutex_unlock(&env->mutex); + } +@@ -580,65 +670,18 @@ synctask_switchto(struct synctask *task) + void * + syncenv_processor(void *thdata) + { +- struct syncenv *env = NULL; + struct syncproc *proc = NULL; + struct synctask *task = NULL; + + proc = thdata; +- env = proc->env; +- +- for (;;) { +- task = syncenv_task(proc); +- if (!task) +- break; + ++ while ((task = syncenv_task(proc)) != NULL) { + synctask_switchto(task); +- +- syncenv_scale(env); + } + + return NULL; + } + +-void +-syncenv_scale(struct syncenv *env) +-{ +- int diff = 0; +- int scale = 0; +- int i = 0; +- int ret = 0; +- +- pthread_mutex_lock(&env->mutex); +- { +- if (env->procs > env->runcount) +- goto unlock; +- +- scale = env->runcount; +- if (scale > env->procmax) +- scale = env->procmax; +- if (scale > env->procs) +- diff = scale - env->procs; +- while (diff) { +- diff--; +- for (; (i < env->procmax); i++) { +- if (env->proc[i].processor == 0) +- break; +- } +- +- env->proc[i].env = env; +- ret = gf_thread_create(&env->proc[i].processor, NULL, +- syncenv_processor, &env->proc[i], +- "sproc%03hx", env->procs & 0x3ff); +- if (ret) +- break; +- env->procs++; +- i++; +- } +- } +-unlock: +- pthread_mutex_unlock(&env->mutex); +-} +- + /* The syncenv threads are cleaned up in this routine. + */ + void +@@ -715,12 +758,13 @@ syncenv_new(size_t stacksize, int procmin, int procmax) + newenv->stacksize = stacksize; + newenv->procmin = procmin; + newenv->procmax = procmax; ++ newenv->procs_idle = 0; + + for (i = 0; i < newenv->procmin; i++) { + newenv->proc[i].env = newenv; + ret = gf_thread_create(&newenv->proc[i].processor, NULL, + syncenv_processor, &newenv->proc[i], "sproc%d", +- newenv->procs); ++ i); + if (ret) + break; + newenv->procs++; +@@ -810,7 +854,7 @@ __synclock_lock(struct synclock *lock) + task->woken = 0; + list_add_tail(&task->waitq, &lock->waitq); + pthread_mutex_unlock(&lock->guard); +- synctask_yield(task); ++ synctask_yield(task, NULL); + /* task is removed from waitq in unlock, + * under lock->guard.*/ + pthread_mutex_lock(&lock->guard); +@@ -963,6 +1007,136 @@ synclock_unlock(synclock_t *lock) + return ret; + } + ++/* Condition variables */ ++ ++int32_t ++synccond_init(synccond_t *cond) ++{ ++ int32_t ret; ++ ++ INIT_LIST_HEAD(&cond->waitq); ++ ++ ret = pthread_mutex_init(&cond->pmutex, NULL); ++ if (ret != 0) { ++ return -ret; ++ } ++ ++ ret = pthread_cond_init(&cond->pcond, NULL); ++ if (ret != 0) { ++ pthread_mutex_destroy(&cond->pmutex); ++ } ++ ++ return -ret; ++} ++ ++void ++synccond_destroy(synccond_t *cond) ++{ ++ pthread_cond_destroy(&cond->pcond); ++ pthread_mutex_destroy(&cond->pmutex); ++} ++ ++int ++synccond_timedwait(synccond_t *cond, synclock_t *lock, struct timespec *delta) ++{ ++ struct timespec now; ++ struct synctask *task = NULL; ++ int ret; ++ ++ task = synctask_get(); ++ ++ if (task == NULL) { ++ if (delta != NULL) { ++ timespec_now_realtime(&now); ++ timespec_adjust_delta(&now, *delta); ++ } ++ ++ pthread_mutex_lock(&cond->pmutex); ++ ++ if (delta == NULL) { ++ ret = -pthread_cond_wait(&cond->pcond, &cond->pmutex); ++ } else { ++ ret = -pthread_cond_timedwait(&cond->pcond, &cond->pmutex, &now); ++ } ++ } else { ++ pthread_mutex_lock(&cond->pmutex); ++ ++ list_add_tail(&task->waitq, &cond->waitq); ++ task->synccond = cond; ++ ++ ret = synclock_unlock(lock); ++ if (ret == 0) { ++ pthread_mutex_unlock(&cond->pmutex); ++ ++ synctask_yield(task, delta); ++ ++ ret = synclock_lock(lock); ++ if (ret == 0) { ++ ret = task->ret; ++ } ++ task->ret = 0; ++ ++ return ret; ++ } ++ ++ list_del_init(&task->waitq); ++ } ++ ++ pthread_mutex_unlock(&cond->pmutex); ++ ++ return ret; ++} ++ ++int ++synccond_wait(synccond_t *cond, synclock_t *lock) ++{ ++ return synccond_timedwait(cond, lock, NULL); ++} ++ ++void ++synccond_signal(synccond_t *cond) ++{ ++ struct synctask *task; ++ ++ pthread_mutex_lock(&cond->pmutex); ++ ++ if (!list_empty(&cond->waitq)) { ++ task = list_first_entry(&cond->waitq, struct synctask, waitq); ++ list_del_init(&task->waitq); ++ ++ pthread_mutex_unlock(&cond->pmutex); ++ ++ synctask_wake(task); ++ } else { ++ pthread_cond_signal(&cond->pcond); ++ ++ pthread_mutex_unlock(&cond->pmutex); ++ } ++} ++ ++void ++synccond_broadcast(synccond_t *cond) ++{ ++ struct list_head list; ++ struct synctask *task; ++ ++ INIT_LIST_HEAD(&list); ++ ++ pthread_mutex_lock(&cond->pmutex); ++ ++ list_splice_init(&cond->waitq, &list); ++ pthread_cond_broadcast(&cond->pcond); ++ ++ pthread_mutex_unlock(&cond->pmutex); ++ ++ while (!list_empty(&list)) { ++ task = list_first_entry(&list, struct synctask, waitq); ++ list_del_init(&task->waitq); ++ ++ synctask_wake(task); ++ } ++} ++ + /* Barriers */ + + int +@@ -1032,7 +1206,7 @@ __syncbarrier_wait(struct syncbarrier *barrier, int waitfor) + /* called within a synctask */ + list_add_tail(&task->waitq, &barrier->waitq); + pthread_mutex_unlock(&barrier->guard); +- synctask_yield(task); ++ synctask_yield(task, NULL); + pthread_mutex_lock(&barrier->guard); + } else { + /* called by a non-synctask */ +diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c +index c692119..957deaa 100644 +--- a/xlators/cluster/dht/src/dht-rebalance.c ++++ b/xlators/cluster/dht/src/dht-rebalance.c +@@ -5224,7 +5224,7 @@ gf_defrag_pause_tier(xlator_t *this, gf_defrag_info_t *defrag) + defrag->tier_conf.pause_timer = gf_timer_call_after( + this->ctx, delta, gf_defrag_pause_tier_timeout, this); + +- synctask_yield(defrag->tier_conf.pause_synctask); ++ synctask_yield(defrag->tier_conf.pause_synctask, NULL); + + if (gf_defrag_get_pause_state(&defrag->tier_conf) == TIER_PAUSED) + goto out; +diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +index 0d29de2..6475611 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c ++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +@@ -6076,13 +6076,8 @@ glusterd_op_stage_validate(glusterd_op_t op, dict_t *dict, char **op_errstr, + static void + glusterd_wait_for_blockers(glusterd_conf_t *priv) + { +- uint64_t blockers = GF_ATOMIC_GET(priv->blockers); +- +- while (blockers) { +- synclock_unlock(&priv->big_lock); +- sleep(1); +- blockers = GF_ATOMIC_GET(priv->blockers); +- synclock_lock(&priv->big_lock); ++ while (GF_ATOMIC_GET(priv->blockers)) { ++ synccond_wait(&priv->cond_blockers, &priv->big_lock); + } + } + +diff --git a/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c +index 36018a0..f55a5fd 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c ++++ b/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c +@@ -112,7 +112,7 @@ glusterd_proc_stop(glusterd_proc_t *proc, int sig, int flags) + goto out; + + synclock_unlock(&conf->big_lock); +- sleep(1); ++ synctask_sleep(1); + synclock_lock(&conf->big_lock); + if (gf_is_service_running(proc->pidfile, &pid)) { + ret = kill(pid, SIGKILL); +diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c +index d225854..386eed2 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c +@@ -1961,9 +1961,7 @@ glusterd_update_snaps_synctask(void *opaque) + synclock_lock(&conf->big_lock); + + while (conf->restart_bricks) { +- synclock_unlock(&conf->big_lock); +- sleep(2); +- synclock_lock(&conf->big_lock); ++ synccond_wait(&conf->cond_restart_bricks, &conf->big_lock); + } + conf->restart_bricks = _gf_true; + +@@ -2070,6 +2068,7 @@ out: + if (dict) + dict_unref(dict); + conf->restart_bricks = _gf_false; ++ synccond_broadcast(&conf->cond_restart_bricks); + + return ret; + } +diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.h b/xlators/mgmt/glusterd/src/glusterd-syncop.h +index ce4a940..a265f21 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-syncop.h ++++ b/xlators/mgmt/glusterd/src/glusterd-syncop.h +@@ -32,7 +32,7 @@ + ret = gd_syncop_submit_request(rpc, req, stb, cookie, prog, procnum, \ + cbk, (xdrproc_t)xdrproc); \ + if (!ret) \ +- synctask_yield(stb->task); \ ++ synctask_yield(stb->task, NULL); \ + else \ + gf_asprintf(&stb->errstr, \ + "%s failed. Check log file" \ +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index 812c698..ce9931c 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -5068,22 +5068,22 @@ glusterd_import_friend_volumes_synctask(void *opaque) + * restarted (refer glusterd_restart_bricks ()) + */ + while (conf->restart_bricks) { +- synclock_unlock(&conf->big_lock); +- sleep(2); +- synclock_lock(&conf->big_lock); ++ synccond_wait(&conf->cond_restart_bricks, &conf->big_lock); + } + conf->restart_bricks = _gf_true; + + while (i <= count) { + ret = glusterd_import_friend_volume(peer_data, i); + if (ret) { +- conf->restart_bricks = _gf_false; +- goto out; ++ break; + } + i++; + } +- glusterd_svcs_manager(NULL); ++ if (i > count) { ++ glusterd_svcs_manager(NULL); ++ } + conf->restart_bricks = _gf_false; ++ synccond_broadcast(&conf->cond_restart_bricks); + out: + if (peer_data) + dict_unref(peer_data); +@@ -5769,7 +5769,9 @@ my_callback(struct rpc_req *req, struct iovec *iov, int count, void *v_frame) + call_frame_t *frame = v_frame; + glusterd_conf_t *conf = frame->this->private; + +- GF_ATOMIC_DEC(conf->blockers); ++ if (GF_ATOMIC_DEC(conf->blockers) == 0) { ++ synccond_broadcast(&conf->cond_blockers); ++ } + + STACK_DESTROY(frame->root); + return 0; +@@ -5865,7 +5867,9 @@ attach_brick_callback(struct rpc_req *req, struct iovec *iov, int count, + } + } + out: +- GF_ATOMIC_DEC(conf->blockers); ++ if (GF_ATOMIC_DEC(conf->blockers) == 0) { ++ synccond_broadcast(&conf->cond_blockers); ++ } + STACK_DESTROY(frame->root); + return 0; + } +@@ -6053,7 +6057,7 @@ attach_brick(xlator_t *this, glusterd_brickinfo_t *brickinfo, + * TBD: see if there's a better way + */ + synclock_unlock(&conf->big_lock); +- sleep(1); ++ synctask_sleep(1); + synclock_lock(&conf->big_lock); + } + +@@ -6193,7 +6197,7 @@ find_compat_brick_in_vol(glusterd_conf_t *conf, + "brick %s is still" + " starting, waiting for 2 seconds ", + other_brick->path); +- sleep(2); ++ synctask_sleep(2); + synclock_lock(&conf->big_lock); + retries--; + } +@@ -6680,9 +6684,7 @@ glusterd_restart_bricks(void *opaque) + * glusterd_compare_friend_data ()) + */ + while (conf->restart_bricks) { +- synclock_unlock(&conf->big_lock); +- sleep(2); +- synclock_lock(&conf->big_lock); ++ synccond_wait(&conf->cond_restart_bricks, &conf->big_lock); + } + conf->restart_bricks = _gf_true; + +@@ -6798,6 +6800,7 @@ out: + GF_ATOMIC_DEC(conf->blockers); + conf->restart_done = _gf_true; + conf->restart_bricks = _gf_false; ++ synccond_broadcast(&conf->cond_restart_bricks); + + return_block: + return ret; +diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c +index d360312..a01034a 100644 +--- a/xlators/mgmt/glusterd/src/glusterd.c ++++ b/xlators/mgmt/glusterd/src/glusterd.c +@@ -1845,6 +1845,8 @@ init(xlator_t *this) + (void)strncpy(conf->rundir, rundir, sizeof(conf->rundir)); + + synclock_init(&conf->big_lock, SYNC_LOCK_RECURSIVE); ++ synccond_init(&conf->cond_restart_bricks); ++ synccond_init(&conf->cond_blockers); + pthread_mutex_init(&conf->xprt_lock, NULL); + INIT_LIST_HEAD(&conf->xprt_list); + pthread_mutex_init(&conf->import_volumes, NULL); +diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h +index 2be005c..1c6c3b1 100644 +--- a/xlators/mgmt/glusterd/src/glusterd.h ++++ b/xlators/mgmt/glusterd/src/glusterd.h +@@ -209,6 +209,8 @@ typedef struct { + dict_t *opts; + synclock_t big_lock; + gf_boolean_t restart_done; ++ synccond_t cond_restart_bricks; ++ synccond_t cond_blockers; + rpcsvc_t *uds_rpc; /* RPCSVC for the unix domain socket */ + uint32_t base_port; + uint32_t max_port; +-- +1.8.3.1 + diff --git a/SOURCES/0378-Revert-open-behind-fix-missing-fd-reference.patch b/SOURCES/0378-Revert-open-behind-fix-missing-fd-reference.patch new file mode 100644 index 0000000..e228be2 --- /dev/null +++ b/SOURCES/0378-Revert-open-behind-fix-missing-fd-reference.patch @@ -0,0 +1,120 @@ +From d79660ccc65f163e0d9cf91cc13a199bec04d5f1 Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez Juan +Date: Wed, 20 May 2020 12:55:43 +0000 +Subject: [PATCH 378/379] Revert "open-behind: fix missing fd reference" + +This reverts commit 30cbdf8c06145a0c290da42ecc0a7eae928200b7. + +The patch is not complete because there have been some crash reports +upstream recently after the patch was released. A new patch that should +cover all corner cases is under review (), but it's a big change and it +could be risky to backport it without enough testing. + +Since there exists a workaround to avoid the problem (disable +open-behind), for now we revert the patch. + +Change-Id: I9cfc55623c33758cf5530b18f03c0d795b0f650b +BUG: 1830713 +Signed-off-by: Xavi Hernandez +Reviewed-on: https://code.engineering.redhat.com/gerrit/200952 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/performance/open-behind/src/open-behind.c | 27 +++++++++-------------- + 1 file changed, 11 insertions(+), 16 deletions(-) + +diff --git a/xlators/performance/open-behind/src/open-behind.c b/xlators/performance/open-behind/src/open-behind.c +index 14ebc12..268c717 100644 +--- a/xlators/performance/open-behind/src/open-behind.c ++++ b/xlators/performance/open-behind/src/open-behind.c +@@ -206,13 +206,8 @@ ob_fd_free(ob_fd_t *ob_fd) + if (ob_fd->xdata) + dict_unref(ob_fd->xdata); + +- if (ob_fd->open_frame) { +- /* If we sill have a frame it means that background open has never +- * been triggered. We need to release the pending reference. */ +- fd_unref(ob_fd->fd); +- ++ if (ob_fd->open_frame) + STACK_DESTROY(ob_fd->open_frame->root); +- } + + GF_FREE(ob_fd); + } +@@ -302,7 +297,6 @@ ob_wake_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + call_resume(stub); + } + +- /* The background open is completed. We can release the 'fd' reference. */ + fd_unref(fd); + + STACK_DESTROY(frame->root); +@@ -337,9 +331,7 @@ ob_fd_wake(xlator_t *this, fd_t *fd, ob_fd_t *ob_fd) + } + + if (frame) { +- /* We don't need to take a reference here. We already have a reference +- * while the open is pending. */ +- frame->local = fd; ++ frame->local = fd_ref(fd); + + STACK_WIND(frame, ob_wake_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->open, &ob_fd->loc, ob_fd->flags, fd, +@@ -353,12 +345,15 @@ void + ob_inode_wake(xlator_t *this, struct list_head *ob_fds) + { + ob_fd_t *ob_fd = NULL, *tmp = NULL; ++ fd_t *fd = NULL; + + if (!list_empty(ob_fds)) { + list_for_each_entry_safe(ob_fd, tmp, ob_fds, ob_fds_on_inode) + { + ob_fd_wake(this, ob_fd->fd, ob_fd); ++ fd = ob_fd->fd; + ob_fd_free(ob_fd); ++ fd_unref(fd); + } + } + } +@@ -370,7 +365,7 @@ ob_fd_copy(ob_fd_t *src, ob_fd_t *dst) + if (!src || !dst) + goto out; + +- dst->fd = src->fd; ++ dst->fd = __fd_ref(src->fd); + dst->loc.inode = inode_ref(src->loc.inode); + gf_uuid_copy(dst->loc.gfid, src->loc.gfid); + dst->flags = src->flags; +@@ -514,6 +509,7 @@ ob_open_behind(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + + ob_fd->ob_inode = ob_inode; + ++ /* don't do fd_ref, it'll cause leaks */ + ob_fd->fd = fd; + + ob_fd->open_frame = copy_frame(frame); +@@ -543,16 +539,15 @@ ob_open_behind(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + } + UNLOCK(&fd->inode->lock); + +- /* We take a reference while the background open is pending or being +- * processed. If we finally wind the request in the foreground, then +- * ob_fd_free() will take care of this additional reference. */ +- fd_ref(fd); +- + if (!open_in_progress && !unlinked) { ++ fd_ref(fd); ++ + STACK_UNWIND_STRICT(open, frame, 0, 0, fd, xdata); + + if (!conf->lazy_open) + ob_fd_wake(this, fd, NULL); ++ ++ fd_unref(fd); + } else { + ob_fd_free(ob_fd); + STACK_WIND(frame, default_open_cbk, FIRST_CHILD(this), +-- +1.8.3.1 + diff --git a/SOURCES/0379-glusterd-add-missing-synccond_broadcast.patch b/SOURCES/0379-glusterd-add-missing-synccond_broadcast.patch new file mode 100644 index 0000000..cd51c6d --- /dev/null +++ b/SOURCES/0379-glusterd-add-missing-synccond_broadcast.patch @@ -0,0 +1,45 @@ +From e06882a7fea9720a2899f7d52d5d3866ff098866 Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez +Date: Thu, 21 May 2020 08:26:11 +0200 +Subject: [PATCH 379/379] glusterd: add missing synccond_broadcast() + +After the changes in commit 3da22f8cb08b05562a4c6bd2694f2f19199cff7f, +there was a place where synccond_broadcast() was missing. It could +cause a hang if another synctask was waiting on the condition variable. + +Upstream patch: +> Upstream patch link: https://review.gluster.org/c/glusterfs/+/24476 +> Change-Id: I92bfe4e15c5c3591e4854a64aa9e1566d50dd204 +> Fixes: #1116 +> Signed-off-by: Xavi Hernandez + +Change-Id: I92bfe4e15c5c3591e4854a64aa9e1566d50dd204 +BUG: 1810516 +Signed-off-by: Xavi Hernandez +Reviewed-on: https://code.engineering.redhat.com/gerrit/201057 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mgmt/glusterd/src/glusterd-utils.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index ce9931c..c92cdf3 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -6797,9 +6797,11 @@ glusterd_restart_bricks(void *opaque) + ret = 0; + + out: +- GF_ATOMIC_DEC(conf->blockers); + conf->restart_done = _gf_true; + conf->restart_bricks = _gf_false; ++ if (GF_ATOMIC_DEC(conf->blockers) == 0) { ++ synccond_broadcast(&conf->cond_blockers); ++ } + synccond_broadcast(&conf->cond_restart_bricks); + + return_block: +-- +1.8.3.1 + diff --git a/SOURCES/0380-features-shard-Aggregate-size-block-count-in-iatt-be.patch b/SOURCES/0380-features-shard-Aggregate-size-block-count-in-iatt-be.patch new file mode 100644 index 0000000..05915d9 --- /dev/null +++ b/SOURCES/0380-features-shard-Aggregate-size-block-count-in-iatt-be.patch @@ -0,0 +1,306 @@ +From 2cf22e54c8424949607c4a20df84887b838b2702 Mon Sep 17 00:00:00 2001 +From: Krutika Dhananjay +Date: Fri, 15 May 2020 11:29:36 +0530 +Subject: [PATCH 380/382] features/shard: Aggregate size, block-count in iatt + before unwinding setxattr + +Backport of: +> Upstream patch - https://review.gluster.org/c/glusterfs/+/24471 +> Fixes: #1243 +> Change-Id: I4da0eceb4235b91546df79270bcc0af8cd64e9ea +> Signed-off-by: Krutika Dhananjay + +Posix translator returns pre and postbufs in the dict in {F}SETXATTR fops. +These iatts are further cached at layers like md-cache. +Shard translator, in its current state, simply returns these values without +updating the aggregated file size and block-count. + +This patch fixes this problem. + +Change-Id: I4da0eceb4235b91546df79270bcc0af8cd64e9ea +BUG: 1823423 +Signed-off-by: Krutika Dhananjay +Reviewed-on: https://code.engineering.redhat.com/gerrit/201135 +Tested-by: RHGS Build Bot +Reviewed-by: Xavi Hernandez Juan +--- + tests/bugs/shard/issue-1243.t | 31 ++++++ + xlators/features/shard/src/shard.c | 218 +++++++++++++++++++++++++++++++++---- + 2 files changed, 225 insertions(+), 24 deletions(-) + create mode 100644 tests/bugs/shard/issue-1243.t + +diff --git a/tests/bugs/shard/issue-1243.t b/tests/bugs/shard/issue-1243.t +new file mode 100644 +index 0000000..b0c092c +--- /dev/null ++++ b/tests/bugs/shard/issue-1243.t +@@ -0,0 +1,31 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++ ++cleanup; ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1} ++TEST $CLI volume set $V0 features.shard on ++TEST $CLI volume set $V0 features.shard-block-size 4MB ++TEST $CLI volume set $V0 performance.quick-read off ++TEST $CLI volume set $V0 performance.io-cache off ++TEST $CLI volume set $V0 performance.read-ahead off ++TEST $CLI volume set $V0 performance.strict-o-direct on ++TEST $CLI volume start $V0 ++ ++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0 ++ ++TEST $CLI volume set $V0 md-cache-timeout 10 ++ ++# Write data into a file such that its size crosses shard-block-size ++TEST dd if=/dev/zero of=$M0/foo bs=1048576 count=8 oflag=direct ++ ++# Execute a setxattr on the file. ++TEST setfattr -n trusted.libvirt -v some-value $M0/foo ++ ++# Size of the file should be the aggregated size, not the shard-block-size ++EXPECT '8388608' stat -c %s $M0/foo ++ ++cleanup +diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c +index ee38ed2..6ae4c41 100644 +--- a/xlators/features/shard/src/shard.c ++++ b/xlators/features/shard/src/shard.c +@@ -5929,36 +5929,206 @@ out: + return 0; + } + +-int32_t shard_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, +- dict_t *dict, int32_t flags, dict_t *xdata) { +- int op_errno = EINVAL; ++int32_t shard_common_set_xattr_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, ++ int32_t op_errno, dict_t *xdata) { ++ int ret = -1; ++ struct iatt *prebuf = NULL; ++ struct iatt *postbuf = NULL; ++ struct iatt *stbuf = NULL; ++ data_t *data = NULL; ++ shard_local_t *local = NULL; + +- if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { +- GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, out); +- } ++ local = frame->local; + +- STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsetxattr, +- fd, dict, flags, xdata); +- return 0; +-out: +- shard_common_failure_unwind(GF_FOP_FSETXATTR, frame, -1, op_errno); +- return 0; ++ if (op_ret < 0) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ goto err; ++ } ++ ++ if (!xdata) ++ goto unwind; ++ ++ data = dict_get(xdata, GF_PRESTAT); ++ if (data) { ++ stbuf = data_to_iatt(data, GF_PRESTAT); ++ prebuf = GF_MALLOC(sizeof(struct iatt), gf_common_mt_char); ++ if (prebuf == NULL) { ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto err; ++ } ++ *prebuf = *stbuf; ++ prebuf->ia_size = local->prebuf.ia_size; ++ prebuf->ia_blocks = local->prebuf.ia_blocks; ++ ret = dict_set_iatt(xdata, GF_PRESTAT, prebuf, false); ++ if (ret < 0) { ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto err; ++ } ++ prebuf = NULL; ++ } ++ ++ data = dict_get(xdata, GF_POSTSTAT); ++ if (data) { ++ stbuf = data_to_iatt(data, GF_POSTSTAT); ++ postbuf = GF_MALLOC(sizeof(struct iatt), gf_common_mt_char); ++ if (postbuf == NULL) { ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto err; ++ } ++ *postbuf = *stbuf; ++ postbuf->ia_size = local->prebuf.ia_size; ++ postbuf->ia_blocks = local->prebuf.ia_blocks; ++ ret = dict_set_iatt(xdata, GF_POSTSTAT, postbuf, false); ++ if (ret < 0) { ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto err; ++ } ++ postbuf = NULL; ++ } ++ ++unwind: ++ if (local->fd) ++ SHARD_STACK_UNWIND(fsetxattr, frame, local->op_ret, local->op_errno, ++ xdata); ++ else ++ SHARD_STACK_UNWIND(setxattr, frame, local->op_ret, local->op_errno, ++ xdata); ++ return 0; ++ ++err: ++ GF_FREE(prebuf); ++ GF_FREE(postbuf); ++ shard_common_failure_unwind(local->fop, frame, local->op_ret, ++ local->op_errno); ++ return 0; + } + +-int32_t shard_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, +- dict_t *dict, int32_t flags, dict_t *xdata) { +- int op_errno = EINVAL; ++int32_t shard_post_lookup_set_xattr_handler(call_frame_t *frame, ++ xlator_t *this) { ++ shard_local_t *local = NULL; + +- if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { +- GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, out); +- } ++ local = frame->local; + +- STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr, +- loc, dict, flags, xdata); +- return 0; +-out: +- shard_common_failure_unwind(GF_FOP_SETXATTR, frame, -1, op_errno); +- return 0; ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(local->fop, frame, local->op_ret, ++ local->op_errno); ++ return 0; ++ } ++ ++ if (local->fd) ++ STACK_WIND(frame, shard_common_set_xattr_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fsetxattr, local->fd, ++ local->xattr_req, local->flags, local->xattr_rsp); ++ else ++ STACK_WIND(frame, shard_common_set_xattr_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->setxattr, &local->loc, ++ local->xattr_req, local->flags, local->xattr_rsp); ++ return 0; ++} ++ ++int32_t shard_common_set_xattr(call_frame_t *frame, xlator_t *this, ++ glusterfs_fop_t fop, loc_t *loc, fd_t *fd, ++ dict_t *dict, int32_t flags, dict_t *xdata) { ++ int ret = -1; ++ int op_errno = ENOMEM; ++ uint64_t block_size = 0; ++ shard_local_t *local = NULL; ++ inode_t *inode = loc ? loc->inode : fd->inode; ++ ++ if ((IA_ISDIR(inode->ia_type)) || (IA_ISLNK(inode->ia_type))) { ++ if (loc) ++ STACK_WIND_TAIL(frame, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, ++ xdata); ++ else ++ STACK_WIND_TAIL(frame, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, ++ xdata); ++ return 0; ++ } ++ ++ /* Sharded or not, if shard's special xattrs are attempted to be set, ++ * fail the fop with EPERM (except if the client is gsyncd. ++ */ ++ if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { ++ GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, err); ++ } ++ ++ ret = shard_inode_ctx_get_block_size(inode, this, &block_size); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, ++ "Failed to get block size from inode ctx of %s", ++ uuid_utoa(inode->gfid)); ++ goto err; ++ } ++ ++ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { ++ if (loc) ++ STACK_WIND_TAIL(frame, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, ++ xdata); ++ else ++ STACK_WIND_TAIL(frame, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, ++ xdata); ++ return 0; ++ } ++ ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; ++ ++ frame->local = local; ++ local->fop = fop; ++ if (loc) { ++ if (loc_copy(&local->loc, loc) != 0) ++ goto err; ++ } ++ ++ if (fd) { ++ local->fd = fd_ref(fd); ++ local->loc.inode = inode_ref(fd->inode); ++ gf_uuid_copy(local->loc.gfid, fd->inode->gfid); ++ } ++ local->flags = flags; ++ /* Reusing local->xattr_req and local->xattr_rsp to store the setxattr dict ++ * and the xdata dict ++ */ ++ if (dict) ++ local->xattr_req = dict_ref(dict); ++ if (xdata) ++ local->xattr_rsp = dict_ref(xdata); ++ ++ /* To-Do: Switch from LOOKUP which is path-based, to FSTAT if the fop is ++ * on an fd. This comes under a generic class of bugs in shard tracked by ++ * bz #1782428. ++ */ ++ shard_lookup_base_file(frame, this, &local->loc, ++ shard_post_lookup_set_xattr_handler); ++ return 0; ++err: ++ shard_common_failure_unwind(fop, frame, -1, op_errno); ++ return 0; ++} ++ ++int32_t shard_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, ++ dict_t *dict, int32_t flags, dict_t *xdata) { ++ shard_common_set_xattr(frame, this, GF_FOP_FSETXATTR, NULL, fd, dict, flags, ++ xdata); ++ return 0; ++} ++ ++int32_t shard_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, ++ dict_t *dict, int32_t flags, dict_t *xdata) { ++ shard_common_set_xattr(frame, this, GF_FOP_SETXATTR, loc, NULL, dict, flags, ++ xdata); ++ return 0; + } + + int shard_post_setattr_handler(call_frame_t *frame, xlator_t *this) { +-- +1.8.3.1 + diff --git a/SOURCES/0381-dht-add-null-check-in-gf_defrag_free_dir_dfmeta.patch b/SOURCES/0381-dht-add-null-check-in-gf_defrag_free_dir_dfmeta.patch new file mode 100644 index 0000000..aa875a2 --- /dev/null +++ b/SOURCES/0381-dht-add-null-check-in-gf_defrag_free_dir_dfmeta.patch @@ -0,0 +1,48 @@ +From 63ea2aad2474a0ca169342c699cb1689e6c1d83f Mon Sep 17 00:00:00 2001 +From: Susant Palai +Date: Fri, 22 May 2020 13:49:14 +0530 +Subject: [PATCH 381/382] dht: add null check in gf_defrag_free_dir_dfmeta + +Backport of https://review.gluster.org/#/c/glusterfs/+/24479/ + +BUG:1812789 +Change-Id: I502ed43051bd60d9e5d2b69d4e4d7b6eea997285 +Signed-off-by: Susant Palai +Reviewed-on: https://code.engineering.redhat.com/gerrit/201150 +Tested-by: RHGS Build Bot +Reviewed-by: Ashish Pandey +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/cluster/dht/src/dht-rebalance.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c +index 957deaa..8f31dca 100644 +--- a/xlators/cluster/dht/src/dht-rebalance.c ++++ b/xlators/cluster/dht/src/dht-rebalance.c +@@ -47,7 +47,8 @@ gf_defrag_free_dir_dfmeta(struct dir_dfmeta *meta, int local_subvols_cnt) + + if (meta) { + for (i = 0; i < local_subvols_cnt; i++) { +- gf_dirent_free(&meta->equeue[i]); ++ if (meta->equeue) ++ gf_dirent_free(&meta->equeue[i]); + if (meta->lfd && meta->lfd[i]) + fd_unref(meta->lfd[i]); + } +@@ -3344,9 +3345,9 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, + if (ret) { + fd_unref(dir_dfmeta->lfd[i]); + dir_dfmeta->lfd[i] = NULL; +- gf_smsg(this->name, GF_LOG_WARNING, 0, 0, +- "failed to open dir: %s subvol: %s", loc->path, +- conf->local_subvols[i]->name); ++ gf_msg(this->name, GF_LOG_WARNING, -ret, 0, ++ "failed to open dir: %s subvol: %s", loc->path, ++ conf->local_subvols[i]->name); + + if (conf->decommission_in_progress) { + *perrno = -ret; +-- +1.8.3.1 + diff --git a/SOURCES/0382-features-shard-Aggregate-file-size-block-count-befor.patch b/SOURCES/0382-features-shard-Aggregate-file-size-block-count-befor.patch new file mode 100644 index 0000000..a6528f5 --- /dev/null +++ b/SOURCES/0382-features-shard-Aggregate-file-size-block-count-befor.patch @@ -0,0 +1,422 @@ +From 4097a748cbb7616d78886b35e3360177d570b7a6 Mon Sep 17 00:00:00 2001 +From: Krutika Dhananjay +Date: Fri, 22 May 2020 13:25:26 +0530 +Subject: [PATCH 382/382] features/shard: Aggregate file size, block-count + before unwinding removexattr + +Posix translator returns pre and postbufs in the dict in {F}REMOVEXATTR fops. +These iatts are further cached at layers like md-cache. +Shard translator, in its current state, simply returns these values without +updating the aggregated file size and block-count. + +This patch fixes this problem. + +Upstream patch: +> Upstream patch link: https://review.gluster.org/c/glusterfs/+/24480 +> Change-Id: I4b2dd41ede472c5829af80a67401ec5a6376d872 +> Fixes: #1243 +> Signed-off-by: Krutika Dhananjay + +Change-Id: I4b2dd41ede472c5829af80a67401ec5a6376d872 +BUG: 1823423 +Signed-off-by: Xavi Hernandez +Reviewed-on: https://code.engineering.redhat.com/gerrit/201456 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + tests/bugs/shard/issue-1243.t | 12 ++ + xlators/features/shard/src/shard.c | 293 ++++++++++++++++++++++++++----------- + xlators/features/shard/src/shard.h | 1 + + 3 files changed, 224 insertions(+), 82 deletions(-) + +diff --git a/tests/bugs/shard/issue-1243.t b/tests/bugs/shard/issue-1243.t +index b0c092c..ba22d2b 100644 +--- a/tests/bugs/shard/issue-1243.t ++++ b/tests/bugs/shard/issue-1243.t +@@ -1,6 +1,7 @@ + #!/bin/bash + + . $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc + + cleanup; + +@@ -22,10 +23,21 @@ TEST $CLI volume set $V0 md-cache-timeout 10 + # Write data into a file such that its size crosses shard-block-size + TEST dd if=/dev/zero of=$M0/foo bs=1048576 count=8 oflag=direct + ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0 ++ + # Execute a setxattr on the file. + TEST setfattr -n trusted.libvirt -v some-value $M0/foo + + # Size of the file should be the aggregated size, not the shard-block-size + EXPECT '8388608' stat -c %s $M0/foo + ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0 ++ ++# Execute a removexattr on the file. ++TEST setfattr -x trusted.libvirt $M0/foo ++ ++# Size of the file should be the aggregated size, not the shard-block-size ++EXPECT '8388608' stat -c %s $M0/foo + cleanup +diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c +index 6ae4c41..2e2ef5d 100644 +--- a/xlators/features/shard/src/shard.c ++++ b/xlators/features/shard/src/shard.c +@@ -442,6 +442,9 @@ void shard_local_wipe(shard_local_t *local) { + loc_wipe(&local->int_entrylk.loc); + loc_wipe(&local->newloc); + ++ if (local->name) ++ GF_FREE(local->name); ++ + if (local->int_entrylk.basename) + GF_FREE(local->int_entrylk.basename); + if (local->fd) +@@ -5819,46 +5822,216 @@ int32_t shard_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, + return 0; + } + +-int32_t shard_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, +- const char *name, dict_t *xdata) { +- int op_errno = EINVAL; ++int32_t ++shard_modify_and_set_iatt_in_dict(dict_t *xdata, shard_local_t *local, ++ char *key) ++{ ++ int ret = 0; ++ struct iatt *tmpbuf = NULL; ++ struct iatt *stbuf = NULL; ++ data_t *data = NULL; + +- if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { +- GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, out); +- } ++ if (!xdata) ++ return 0; + +- if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { +- dict_del(xdata, GF_XATTR_SHARD_BLOCK_SIZE); +- dict_del(xdata, GF_XATTR_SHARD_FILE_SIZE); +- } ++ data = dict_get(xdata, key); ++ if (!data) ++ return 0; + +- STACK_WIND_TAIL(frame, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->removexattr, loc, name, xdata); +- return 0; +-out: +- shard_common_failure_unwind(GF_FOP_REMOVEXATTR, frame, -1, op_errno); +- return 0; ++ tmpbuf = data_to_iatt(data, key); ++ stbuf = GF_MALLOC(sizeof(struct iatt), gf_common_mt_char); ++ if (stbuf == NULL) { ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto err; ++ } ++ *stbuf = *tmpbuf; ++ stbuf->ia_size = local->prebuf.ia_size; ++ stbuf->ia_blocks = local->prebuf.ia_blocks; ++ ret = dict_set_iatt(xdata, key, stbuf, false); ++ if (ret < 0) { ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto err; ++ } ++ return 0; ++ ++err: ++ GF_FREE(stbuf); ++ return -1; + } + +-int32_t shard_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, +- const char *name, dict_t *xdata) { +- int op_errno = EINVAL; ++int32_t ++shard_common_remove_xattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, dict_t *xdata) ++{ ++ int ret = -1; ++ shard_local_t *local = NULL; + +- if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { +- GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, out); +- } ++ local = frame->local; + +- if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { +- dict_del(xdata, GF_XATTR_SHARD_BLOCK_SIZE); +- dict_del(xdata, GF_XATTR_SHARD_FILE_SIZE); +- } ++ if (op_ret < 0) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ goto err; ++ } + +- STACK_WIND_TAIL(frame, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata); +- return 0; +-out: +- shard_common_failure_unwind(GF_FOP_FREMOVEXATTR, frame, -1, op_errno); +- return 0; ++ ret = shard_modify_and_set_iatt_in_dict(xdata, local, GF_PRESTAT); ++ if (ret < 0) ++ goto err; ++ ++ ret = shard_modify_and_set_iatt_in_dict(xdata, local, GF_POSTSTAT); ++ if (ret < 0) ++ goto err; ++ ++ if (local->fd) ++ SHARD_STACK_UNWIND(fremovexattr, frame, local->op_ret, local->op_errno, ++ xdata); ++ else ++ SHARD_STACK_UNWIND(removexattr, frame, local->op_ret, local->op_errno, ++ xdata); ++ return 0; ++ ++err: ++ shard_common_failure_unwind(local->fop, frame, local->op_ret, ++ local->op_errno); ++ return 0; ++} ++ ++int32_t ++shard_post_lookup_remove_xattr_handler(call_frame_t *frame, xlator_t *this) ++{ ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(local->fop, frame, local->op_ret, ++ local->op_errno); ++ return 0; ++ } ++ ++ if (local->fd) ++ STACK_WIND(frame, shard_common_remove_xattr_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fremovexattr, local->fd, ++ local->name, local->xattr_req); ++ else ++ STACK_WIND(frame, shard_common_remove_xattr_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->removexattr, &local->loc, ++ local->name, local->xattr_req); ++ return 0; ++} ++ ++int32_t ++shard_common_remove_xattr(call_frame_t *frame, xlator_t *this, ++ glusterfs_fop_t fop, loc_t *loc, fd_t *fd, ++ const char *name, dict_t *xdata) ++{ ++ int ret = -1; ++ int op_errno = ENOMEM; ++ uint64_t block_size = 0; ++ shard_local_t *local = NULL; ++ inode_t *inode = loc ? loc->inode : fd->inode; ++ ++ if ((IA_ISDIR(inode->ia_type)) || (IA_ISLNK(inode->ia_type))) { ++ if (loc) ++ STACK_WIND_TAIL(frame, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->removexattr, loc, name, ++ xdata); ++ else ++ STACK_WIND_TAIL(frame, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fremovexattr, fd, name, ++ xdata); ++ return 0; ++ } ++ ++ /* If shard's special xattrs are attempted to be removed, ++ * fail the fop with EPERM (except if the client is gsyncd). ++ */ ++ if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { ++ GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, err); ++ } ++ ++ /* Repeat the same check for bulk-removexattr */ ++ if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { ++ dict_del(xdata, GF_XATTR_SHARD_BLOCK_SIZE); ++ dict_del(xdata, GF_XATTR_SHARD_FILE_SIZE); ++ } ++ ++ ret = shard_inode_ctx_get_block_size(inode, this, &block_size); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, ++ "Failed to get block size from inode ctx of %s", ++ uuid_utoa(inode->gfid)); ++ goto err; ++ } ++ ++ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { ++ if (loc) ++ STACK_WIND_TAIL(frame, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->removexattr, loc, name, ++ xdata); ++ else ++ STACK_WIND_TAIL(frame, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fremovexattr, fd, name, ++ xdata); ++ return 0; ++ } ++ ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; ++ ++ frame->local = local; ++ local->fop = fop; ++ if (loc) { ++ if (loc_copy(&local->loc, loc) != 0) ++ goto err; ++ } ++ ++ if (fd) { ++ local->fd = fd_ref(fd); ++ local->loc.inode = inode_ref(fd->inode); ++ gf_uuid_copy(local->loc.gfid, fd->inode->gfid); ++ } ++ ++ if (name) { ++ local->name = gf_strdup(name); ++ if (!local->name) ++ goto err; ++ } ++ ++ if (xdata) ++ local->xattr_req = dict_ref(xdata); ++ ++ /* To-Do: Switch from LOOKUP which is path-based, to FSTAT if the fop is ++ * on an fd. This comes under a generic class of bugs in shard tracked by ++ * bz #1782428. ++ */ ++ shard_lookup_base_file(frame, this, &local->loc, ++ shard_post_lookup_remove_xattr_handler); ++ return 0; ++err: ++ shard_common_failure_unwind(fop, frame, -1, op_errno); ++ return 0; ++} ++ ++int32_t ++shard_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, ++ const char *name, dict_t *xdata) ++{ ++ shard_common_remove_xattr(frame, this, GF_FOP_REMOVEXATTR, loc, NULL, name, ++ xdata); ++ return 0; ++} ++ ++int32_t ++shard_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, ++ const char *name, dict_t *xdata) ++{ ++ shard_common_remove_xattr(frame, this, GF_FOP_FREMOVEXATTR, NULL, fd, name, ++ xdata); ++ return 0; + } + + int32_t shard_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +@@ -5933,10 +6106,6 @@ int32_t shard_common_set_xattr_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, dict_t *xdata) { + int ret = -1; +- struct iatt *prebuf = NULL; +- struct iatt *postbuf = NULL; +- struct iatt *stbuf = NULL; +- data_t *data = NULL; + shard_local_t *local = NULL; + + local = frame->local; +@@ -5947,52 +6116,14 @@ int32_t shard_common_set_xattr_cbk(call_frame_t *frame, void *cookie, + goto err; + } + +- if (!xdata) +- goto unwind; +- +- data = dict_get(xdata, GF_PRESTAT); +- if (data) { +- stbuf = data_to_iatt(data, GF_PRESTAT); +- prebuf = GF_MALLOC(sizeof(struct iatt), gf_common_mt_char); +- if (prebuf == NULL) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto err; +- } +- *prebuf = *stbuf; +- prebuf->ia_size = local->prebuf.ia_size; +- prebuf->ia_blocks = local->prebuf.ia_blocks; +- ret = dict_set_iatt(xdata, GF_PRESTAT, prebuf, false); +- if (ret < 0) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto err; +- } +- prebuf = NULL; +- } ++ ret = shard_modify_and_set_iatt_in_dict(xdata, local, GF_PRESTAT); ++ if (ret < 0) ++ goto err; + +- data = dict_get(xdata, GF_POSTSTAT); +- if (data) { +- stbuf = data_to_iatt(data, GF_POSTSTAT); +- postbuf = GF_MALLOC(sizeof(struct iatt), gf_common_mt_char); +- if (postbuf == NULL) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto err; +- } +- *postbuf = *stbuf; +- postbuf->ia_size = local->prebuf.ia_size; +- postbuf->ia_blocks = local->prebuf.ia_blocks; +- ret = dict_set_iatt(xdata, GF_POSTSTAT, postbuf, false); +- if (ret < 0) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto err; +- } +- postbuf = NULL; +- } ++ ret = shard_modify_and_set_iatt_in_dict(xdata, local, GF_POSTSTAT); ++ if (ret < 0) ++ goto err; + +-unwind: + if (local->fd) + SHARD_STACK_UNWIND(fsetxattr, frame, local->op_ret, local->op_errno, + xdata); +@@ -6002,8 +6133,6 @@ unwind: + return 0; + + err: +- GF_FREE(prebuf); +- GF_FREE(postbuf); + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); + return 0; +diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h +index 04abd62..1721417 100644 +--- a/xlators/features/shard/src/shard.h ++++ b/xlators/features/shard/src/shard.h +@@ -318,6 +318,7 @@ typedef struct shard_local { + uint32_t deletion_rate; + gf_boolean_t cleanup_required; + uuid_t base_gfid; ++ char *name; + } shard_local_t; + + typedef struct shard_inode_ctx { +-- +1.8.3.1 + diff --git a/SOURCES/0383-common-ha-ganesha-ha.sh-bad-test-for-rhel-centos-for.patch b/SOURCES/0383-common-ha-ganesha-ha.sh-bad-test-for-rhel-centos-for.patch new file mode 100644 index 0000000..3adaa65 --- /dev/null +++ b/SOURCES/0383-common-ha-ganesha-ha.sh-bad-test-for-rhel-centos-for.patch @@ -0,0 +1,38 @@ +From f880df2ce4706dd748a09d3d6db57d49f62a234c Mon Sep 17 00:00:00 2001 +From: "Kaleb S. KEITHLEY" +Date: Thu, 28 May 2020 08:26:47 -0400 +Subject: [PATCH 383/383] common-ha: ganesha-ha.sh bad test for {rhel,centos} + for pcs options + +bash [[ ... =~ ... ]] built-in returns _0_ when the regex matches, +not 1, thus the sense of the test is backwards and never correctly +detects rhel or centos. + +https://review.gluster.org/#/c/glusterfs/+/24502/ + +Change-Id: Ic9e60aae4ea38aff8f13979080995e60621a68fe +BUG: 1840794 +Signed-off-by: Kaleb S. KEITHLEY +Reviewed-on: https://code.engineering.redhat.com/gerrit/201686 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + extras/ganesha/scripts/ganesha-ha.sh | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh +index 4ecf91b..a6814b1 100644 +--- a/extras/ganesha/scripts/ganesha-ha.sh ++++ b/extras/ganesha/scripts/ganesha-ha.sh +@@ -1054,7 +1054,7 @@ main() + # Fedora 29+ and rhel/centos 8 has PCS-0.10.x + # default is pcs-0.10.x options but check for + # rhel/centos 7 (pcs-0.9.x) and adjust accordingly +- if [[ ${ID} =~ {rhel,centos} ]]; then ++ if [[ ! ${ID} =~ {rhel,centos} ]]; then + if [[ ${VERSION_ID} == 7.* ]]; then + PCS9OR10_PCS_CNAME_OPTION="--name" + PCS9OR10_PCS_CLONE_OPTION="--clone" +-- +1.8.3.1 + diff --git a/SOURCES/0384-extras-Modify-group-virt-to-include-network-related-.patch b/SOURCES/0384-extras-Modify-group-virt-to-include-network-related-.patch new file mode 100644 index 0000000..45684e1 --- /dev/null +++ b/SOURCES/0384-extras-Modify-group-virt-to-include-network-related-.patch @@ -0,0 +1,44 @@ +From 2948ee521316d40384130138233178ba940b175f Mon Sep 17 00:00:00 2001 +From: Krutika Dhananjay +Date: Mon, 4 May 2020 14:30:57 +0530 +Subject: [PATCH 384/392] extras: Modify group 'virt' to include + network-related options + +This is needed to work around an issue seen where vms running on +online hosts are getting killed when a different host is rebooted +in ovirt-gluster hyperconverged environments. Actual RCA is quite +lengthy and documented in the github issue. Please refer to it +for more details. + +Upstream patch: +> Upstream patch link: https://review.gluster.org/c/glusterfs/+/24400 +> Change-Id: Ic25b5f50144ad42458e5c847e1e7e191032396c1 +> Fixes: #1217 +> Signed-off-by: Krutika Dhananjay + +Change-Id: Ic25b5f50144ad42458e5c847e1e7e191032396c1 +BUG: 1848899 +Signed-off-by: Krutika Dhananjay +Reviewed-on: https://code.engineering.redhat.com/gerrit/203685 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + extras/group-virt.example | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/extras/group-virt.example b/extras/group-virt.example +index c2ce89d..3a441eb 100644 +--- a/extras/group-virt.example ++++ b/extras/group-virt.example +@@ -16,3 +16,8 @@ cluster.choose-local=off + client.event-threads=4 + server.event-threads=4 + performance.client-io-threads=on ++network.ping-timeout=20 ++server.tcp-user-timeout=20 ++server.keepalive-time=10 ++server.keepalive-interval=2 ++server.keepalive-count=5 +-- +1.8.3.1 + diff --git a/SOURCES/0385-cluster-afr-Prioritize-ENOSPC-over-other-errors.patch b/SOURCES/0385-cluster-afr-Prioritize-ENOSPC-over-other-errors.patch new file mode 100644 index 0000000..5572e7f --- /dev/null +++ b/SOURCES/0385-cluster-afr-Prioritize-ENOSPC-over-other-errors.patch @@ -0,0 +1,237 @@ +From cdd067dcc0cd70d4f57e173b4050d8e2eb79725a Mon Sep 17 00:00:00 2001 +From: karthik-us +Date: Fri, 5 Jun 2020 17:20:04 +0530 +Subject: [PATCH 385/392] cluster/afr: Prioritize ENOSPC over other errors + +Backport of: https://review.gluster.org/#/c/glusterfs/+/24477/ + +Problem: +In a replicate/arbiter volume if file creations or writes fails on +quorum number of bricks and on one brick it is due to ENOSPC and +on other brick it fails for a different reason, it may fail with +errors other than ENOSPC in some cases. + +Fix: +Prioritize ENOSPC over other lesser priority errors and do not set +op_errno in posix_gfid_set if op_ret is 0 to avoid receiving any +error_no which can be misinterpreted by __afr_dir_write_finalize(). + +Also removing the function afr_has_arbiter_fop_cbk_quorum() which +might consider a successful reply form a single brick as quorum +success in some cases, whereas we always need fop to be successful +on quorum number of bricks in arbiter configuration. + +Change-Id: I4dd2bff17e6812bc7c8372130976e365e2407d88 +Signed-off-by: karthik-us +BUG: 1848895 +(cherry picked from commit 8b11ac1575ef167af2a47a96f7b7ed0f32bb5897) +Reviewed-on: https://code.engineering.redhat.com/gerrit/203691 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + .../bugs/replicate/issue-1254-prioritize-enospc.t | 80 ++++++++++++++++++++++ + xlators/cluster/afr/src/afr-common.c | 4 +- + xlators/cluster/afr/src/afr-transaction.c | 48 +------------ + xlators/storage/posix/src/posix-helpers.c | 2 +- + 4 files changed, 86 insertions(+), 48 deletions(-) + create mode 100644 tests/bugs/replicate/issue-1254-prioritize-enospc.t + +diff --git a/tests/bugs/replicate/issue-1254-prioritize-enospc.t b/tests/bugs/replicate/issue-1254-prioritize-enospc.t +new file mode 100644 +index 0000000..fab94b7 +--- /dev/null ++++ b/tests/bugs/replicate/issue-1254-prioritize-enospc.t +@@ -0,0 +1,80 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++ ++cleanup ++ ++function create_bricks { ++ TEST truncate -s 100M $B0/brick0 ++ TEST truncate -s 100M $B0/brick1 ++ TEST truncate -s 20M $B0/brick2 ++ LO1=`SETUP_LOOP $B0/brick0` ++ TEST [ $? -eq 0 ] ++ TEST MKFS_LOOP $LO1 ++ LO2=`SETUP_LOOP $B0/brick1` ++ TEST [ $? -eq 0 ] ++ TEST MKFS_LOOP $LO2 ++ LO3=`SETUP_LOOP $B0/brick2` ++ TEST [ $? -eq 0 ] ++ TEST MKFS_LOOP $LO3 ++ TEST mkdir -p $B0/${V0}0 $B0/${V0}1 $B0/${V0}2 ++ TEST MOUNT_LOOP $LO1 $B0/${V0}0 ++ TEST MOUNT_LOOP $LO2 $B0/${V0}1 ++ TEST MOUNT_LOOP $LO3 $B0/${V0}2 ++} ++ ++function create_files { ++ local i=1 ++ while (true) ++ do ++ touch $M0/file$i ++ if [ -e $B0/${V0}2/file$i ]; ++ then ++ ((i++)) ++ else ++ break ++ fi ++ done ++} ++ ++TESTS_EXPECTED_IN_LOOP=13 ++ ++#Arbiter volume: Check for ENOSPC when arbiter brick becomes full# ++TEST glusterd ++create_bricks ++TEST $CLI volume create $V0 replica 3 arbiter 1 $H0:$B0/${V0}{0,1,2} ++TEST $CLI volume start $V0 ++TEST $CLI volume set $V0 performance.write-behind off ++TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0 ++ ++create_files ++TEST kill_brick $V0 $H0 $B0/${V0}1 ++error1=$(touch $M0/file-1 2>&1) ++EXPECT "No space left on device" echo $error1 ++error2=$(mkdir $M0/dir-1 2>&1) ++EXPECT "No space left on device" echo $error2 ++error3=$((echo "Test" > $M0/file-3) 2>&1) ++EXPECT "No space left on device" echo $error3 ++ ++cleanup ++ ++#Replica-3 volume: Check for ENOSPC when one of the brick becomes full# ++#Keeping the third brick of lower size to simulate disk full scenario# ++TEST glusterd ++create_bricks ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} ++TEST $CLI volume start $V0 ++TEST $CLI volume set $V0 performance.write-behind off ++TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0 ++ ++create_files ++TEST kill_brick $V0 $H0 $B0/${V0}1 ++error1=$(touch $M0/file-1 2>&1) ++EXPECT "No space left on device" echo $error1 ++error2=$(mkdir $M0/dir-1 2>&1) ++EXPECT "No space left on device" echo $error2 ++error3=$((cat /dev/zero > $M0/file1) 2>&1) ++EXPECT "No space left on device" echo $error3 ++ ++cleanup +diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c +index 3690b84..d6b70e9 100644 +--- a/xlators/cluster/afr/src/afr-common.c ++++ b/xlators/cluster/afr/src/afr-common.c +@@ -2463,7 +2463,7 @@ error: + * others in that they must be given higher priority while + * returning to the user. + * +- * The hierarchy is ENODATA > ENOENT > ESTALE > others ++ * The hierarchy is ENODATA > ENOENT > ESTALE > ENOSPC others + */ + + int +@@ -2475,6 +2475,8 @@ afr_higher_errno(int32_t old_errno, int32_t new_errno) + return ENOENT; + if (old_errno == ESTALE || new_errno == ESTALE) + return ESTALE; ++ if (old_errno == ENOSPC || new_errno == ENOSPC) ++ return ENOSPC; + + return new_errno; + } +diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c +index 15f3a7e..8e65ae2 100644 +--- a/xlators/cluster/afr/src/afr-transaction.c ++++ b/xlators/cluster/afr/src/afr-transaction.c +@@ -514,42 +514,6 @@ afr_compute_pre_op_sources(call_frame_t *frame, xlator_t *this) + local->transaction.pre_op_sources[j] = 0; + } + +-gf_boolean_t +-afr_has_arbiter_fop_cbk_quorum(call_frame_t *frame) +-{ +- afr_local_t *local = NULL; +- afr_private_t *priv = NULL; +- xlator_t *this = NULL; +- gf_boolean_t fop_failed = _gf_false; +- unsigned char *pre_op_sources = NULL; +- int i = 0; +- +- local = frame->local; +- this = frame->this; +- priv = this->private; +- pre_op_sources = local->transaction.pre_op_sources; +- +- /* If the fop failed on the brick, it is not a source. */ +- for (i = 0; i < priv->child_count; i++) +- if (local->transaction.failed_subvols[i]) +- pre_op_sources[i] = 0; +- +- switch (AFR_COUNT(pre_op_sources, priv->child_count)) { +- case 1: +- if (pre_op_sources[ARBITER_BRICK_INDEX]) +- fop_failed = _gf_true; +- break; +- case 0: +- fop_failed = _gf_true; +- break; +- } +- +- if (fop_failed) +- return _gf_false; +- +- return _gf_true; +-} +- + void + afr_txn_arbitrate_fop(call_frame_t *frame, xlator_t *this) + { +@@ -968,12 +932,8 @@ afr_need_dirty_marking(call_frame_t *frame, xlator_t *this) + priv->child_count) + return _gf_false; + +- if (priv->arbiter_count) { +- if (!afr_has_arbiter_fop_cbk_quorum(frame)) +- need_dirty = _gf_true; +- } else if (!afr_has_fop_cbk_quorum(frame)) { ++ if (!afr_has_fop_cbk_quorum(frame)) + need_dirty = _gf_true; +- } + + return need_dirty; + } +@@ -1023,12 +983,8 @@ afr_handle_quorum(call_frame_t *frame, xlator_t *this) + * no split-brain with the fix. The problem is eliminated completely. + */ + +- if (priv->arbiter_count) { +- if (afr_has_arbiter_fop_cbk_quorum(frame)) +- return; +- } else if (afr_has_fop_cbk_quorum(frame)) { ++ if (afr_has_fop_cbk_quorum(frame)) + return; +- } + + if (afr_need_dirty_marking(frame, this)) + goto set_response; +diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c +index 35dd3b6..aca0df6 100644 +--- a/xlators/storage/posix/src/posix-helpers.c ++++ b/xlators/storage/posix/src/posix-helpers.c +@@ -1059,7 +1059,7 @@ verify_handle: + ret = posix_handle_soft(this, path, loc, uuid_curr, &stat); + + out: +- if (!(*op_errno)) ++ if (ret && !(*op_errno)) + *op_errno = errno; + return ret; + } +-- +1.8.3.1 + diff --git a/SOURCES/0386-afr-prevent-spurious-entry-heals-leading-to-gfid-spl.patch b/SOURCES/0386-afr-prevent-spurious-entry-heals-leading-to-gfid-spl.patch new file mode 100644 index 0000000..9ba5451 --- /dev/null +++ b/SOURCES/0386-afr-prevent-spurious-entry-heals-leading-to-gfid-spl.patch @@ -0,0 +1,251 @@ +From 7689fbb4be83f0e0657ec2729c4d66ed341b5751 Mon Sep 17 00:00:00 2001 +From: Ravishankar N +Date: Tue, 11 Feb 2020 14:34:48 +0530 +Subject: [PATCH 386/392] afr: prevent spurious entry heals leading to gfid + split-brain + +Problem: +In a hyperconverged setup with granular-entry-heal enabled, if a file is +recreated while one of the bricks is down, and an index heal is triggered +(with the brick still down), entry-self heal was doing a spurious heal +with just the 2 good bricks. It was doing a post-op leading to removal +of the filename from .glusterfs/indices/entry-changes as well as +erroneous setting of afr xattrs on the parent. When the brick came up, +the xattrs were cleared, resulting in the renamed file not getting +healed and leading to gfid split-brain and EIO on the mount. + +Fix: +Proceed with entry heal only when shd can connect to all bricks of the replica, +just like in data and metadata heal. + +BUG: 1848893 + +> Upstream patch:https://review.gluster.org/#/c/glusterfs/+/24109/ +> fixes: bz#1801624 +> Change-Id: I916ae26ad1fabf259bc6362da52d433b7223b17e +> Signed-off-by: Ravishankar N + +Change-Id: I23f57e543cff1e3f35eb8dbc60a2babfae6838c7 +Signed-off-by: Ravishankar N +(cherry picked from commit 2b2eb846c49caba13ab92ec66af20292e7780fc1) +Reviewed-on: https://code.engineering.redhat.com/gerrit/203692 +Tested-by: RHGS Build Bot +Tested-by: Karthik Subrahmanya +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + .../bug-1433571-undo-pending-only-on-up-bricks.t | 18 ++----- + tests/bugs/replicate/bug-1801624-entry-heal.t | 58 ++++++++++++++++++++++ + xlators/cluster/afr/src/afr-common.c | 4 +- + xlators/cluster/afr/src/afr-self-heal-common.c | 8 +-- + xlators/cluster/afr/src/afr-self-heal-entry.c | 6 +-- + xlators/cluster/afr/src/afr-self-heal-name.c | 2 +- + xlators/cluster/afr/src/afr-self-heal.h | 2 - + 7 files changed, 69 insertions(+), 29 deletions(-) + create mode 100644 tests/bugs/replicate/bug-1801624-entry-heal.t + +diff --git a/tests/bugs/replicate/bug-1433571-undo-pending-only-on-up-bricks.t b/tests/bugs/replicate/bug-1433571-undo-pending-only-on-up-bricks.t +index 0767f47..10ce013 100644 +--- a/tests/bugs/replicate/bug-1433571-undo-pending-only-on-up-bricks.t ++++ b/tests/bugs/replicate/bug-1433571-undo-pending-only-on-up-bricks.t +@@ -49,25 +49,15 @@ TEST $CLI volume start $V0 force + EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 + EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2 + +-#Kill brick 0 and turn on the client side heal and do ls to trigger the heal. +-#The pending xattrs on bricks 1 & 2 should have pending entry on brick 0. +-TEST kill_brick $V0 $H0 $B0/${V0}0 ++# We were killing one brick and checking that entry heal does not reset the ++# pending xattrs for the down brick. Now that we need all bricks to be up for ++# entry heal, I'm removing that test from the .t ++ + TEST $CLI volume set $V0 cluster.data-self-heal on + TEST $CLI volume set $V0 cluster.metadata-self-heal on + TEST $CLI volume set $V0 cluster.entry-self-heal on + + TEST ls $M0 +-EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1 +-EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}2 +-EXPECT_WITHIN $HEAL_TIMEOUT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1 +-EXPECT_WITHIN $HEAL_TIMEOUT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}2 +- +-#Bring back all the bricks and trigger the heal again by doing ls. Now the +-#pending xattrs on all the bricks should be 0. +-TEST $CLI volume start $V0 force +-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 +-TEST ls $M0 +- + TEST cat $M0/f1 + TEST cat $M0/f2 + TEST cat $M0/f3 +diff --git a/tests/bugs/replicate/bug-1801624-entry-heal.t b/tests/bugs/replicate/bug-1801624-entry-heal.t +new file mode 100644 +index 0000000..94b4651 +--- /dev/null ++++ b/tests/bugs/replicate/bug-1801624-entry-heal.t +@@ -0,0 +1,58 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++cleanup; ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 replica 3 $H0:$B0/brick{0,1,2} ++TEST $CLI volume set $V0 heal-timeout 5 ++TEST $CLI volume start $V0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 ++TEST $CLI volume heal $V0 granular-entry-heal enable ++ ++TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2 ++echo "Data">$M0/FILE ++ret=$? ++TEST [ $ret -eq 0 ] ++ ++# Re-create the file when a brick is down. ++TEST kill_brick $V0 $H0 $B0/brick1 ++TEST rm $M0/FILE ++echo "New Data">$M0/FILE ++ret=$? ++TEST [ $ret -eq 0 ] ++EXPECT_WITHIN $HEAL_TIMEOUT "4" get_pending_heal_count $V0 ++ ++# Launching index heal must not reset parent dir afr xattrs or remove granular entry indices. ++$CLI volume heal $V0 # CLI will fail but heal is launched anyway. ++TEST sleep 5 # give index heal a chance to do one run. ++brick0_pending=$(get_hex_xattr trusted.afr.$V0-client-1 $B0/brick0/) ++brick2_pending=$(get_hex_xattr trusted.afr.$V0-client-1 $B0/brick2/) ++TEST [ $brick0_pending -eq "000000000000000000000002" ] ++TEST [ $brick2_pending -eq "000000000000000000000002" ] ++EXPECT "FILE" ls $B0/brick0/.glusterfs/indices/entry-changes/00000000-0000-0000-0000-000000000001/ ++EXPECT "FILE" ls $B0/brick2/.glusterfs/indices/entry-changes/00000000-0000-0000-0000-000000000001/ ++ ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/brick1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 ++$CLI volume heal $V0 ++EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0 ++ ++# No gfid-split-brain (i.e. EIO) must be seen. Try on fresh mount to avoid cached values. ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2 ++TEST cat $M0/FILE ++ ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++cleanup; +diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c +index d6b70e9..939246e 100644 +--- a/xlators/cluster/afr/src/afr-common.c ++++ b/xlators/cluster/afr/src/afr-common.c +@@ -6632,7 +6632,7 @@ afr_fav_child_reset_sink_xattrs(void *opaque) + ret = afr_selfheal_inodelk(heal_frame, this, inode, this->name, 0, 0, + locked_on); + { +- if (ret < AFR_SH_MIN_PARTICIPANTS) ++ if (ret < priv->child_count) + goto data_unlock; + ret = __afr_selfheal_data_prepare( + heal_frame, this, inode, locked_on, sources, sinks, +@@ -6649,7 +6649,7 @@ afr_fav_child_reset_sink_xattrs(void *opaque) + ret = afr_selfheal_inodelk(heal_frame, this, inode, this->name, + LLONG_MAX - 1, 0, locked_on); + { +- if (ret < AFR_SH_MIN_PARTICIPANTS) ++ if (ret < priv->child_count) + goto mdata_unlock; + ret = __afr_selfheal_metadata_prepare( + heal_frame, this, inode, locked_on, sources, sinks, +diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c +index 81ef38a..ce1ea50 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-common.c ++++ b/xlators/cluster/afr/src/afr-self-heal-common.c +@@ -1575,7 +1575,6 @@ afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this, + char *accused = NULL; /* Accused others without any self-accusal */ + char *pending = NULL; /* Have pending operations on others */ + char *self_accused = NULL; /* Accused itself */ +- int min_participants = -1; + + priv = this->private; + +@@ -1599,12 +1598,7 @@ afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this, + } + } + +- if (type == AFR_DATA_TRANSACTION || type == AFR_METADATA_TRANSACTION) { +- min_participants = priv->child_count; +- } else { +- min_participants = AFR_SH_MIN_PARTICIPANTS; +- } +- if (afr_success_count(replies, priv->child_count) < min_participants) { ++ if (afr_success_count(replies, priv->child_count) < priv->child_count) { + /* Treat this just like locks not being acquired */ + return -ENOTCONN; + } +diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c +index 3ce882e..40be898 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-entry.c ++++ b/xlators/cluster/afr/src/afr-self-heal-entry.c +@@ -597,7 +597,7 @@ afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd, + ret = afr_selfheal_entrylk(frame, this, fd->inode, this->name, NULL, + locked_on); + { +- if (ret < AFR_SH_MIN_PARTICIPANTS) { ++ if (ret < priv->child_count) { + gf_msg_debug(this->name, 0, + "%s: Skipping " + "entry self-heal as only %d sub-volumes " +@@ -991,7 +991,7 @@ __afr_selfheal_entry(call_frame_t *frame, xlator_t *this, fd_t *fd, + ret = afr_selfheal_entrylk(frame, this, fd->inode, this->name, NULL, + data_lock); + { +- if (ret < AFR_SH_MIN_PARTICIPANTS) { ++ if (ret < priv->child_count) { + gf_msg_debug(this->name, 0, + "%s: Skipping " + "entry self-heal as only %d sub-volumes could " +@@ -1115,7 +1115,7 @@ afr_selfheal_entry(call_frame_t *frame, xlator_t *this, inode_t *inode) + ret = afr_selfheal_tie_breaker_entrylk(frame, this, inode, priv->sh_domain, + NULL, locked_on); + { +- if (ret < AFR_SH_MIN_PARTICIPANTS) { ++ if (ret < priv->child_count) { + gf_msg_debug(this->name, 0, + "%s: Skipping " + "entry self-heal as only %d sub-volumes could " +diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c +index 36640b5..7d4f208 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-name.c ++++ b/xlators/cluster/afr/src/afr-self-heal-name.c +@@ -514,7 +514,7 @@ afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent, + ret = afr_selfheal_entrylk(frame, this, parent, this->name, bname, + locked_on); + { +- if (ret < AFR_SH_MIN_PARTICIPANTS) { ++ if (ret < priv->child_count) { + ret = -ENOTCONN; + goto unlock; + } +diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h +index 6555ec5..8234cec 100644 +--- a/xlators/cluster/afr/src/afr-self-heal.h ++++ b/xlators/cluster/afr/src/afr-self-heal.h +@@ -11,8 +11,6 @@ + #ifndef _AFR_SELFHEAL_H + #define _AFR_SELFHEAL_H + +-#define AFR_SH_MIN_PARTICIPANTS 2 +- + /* Perform fop on all UP subvolumes and wait for all callbacks to return */ + + #define AFR_ONALL(frame, rfn, fop, args...) \ +-- +1.8.3.1 + diff --git a/SOURCES/0387-cluster-afr-Delay-post-op-for-fsync.patch b/SOURCES/0387-cluster-afr-Delay-post-op-for-fsync.patch new file mode 100644 index 0000000..3913c14 --- /dev/null +++ b/SOURCES/0387-cluster-afr-Delay-post-op-for-fsync.patch @@ -0,0 +1,440 @@ +From 399fad1ac0f9273483270e8af06a5b2d28927533 Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K +Date: Fri, 29 May 2020 14:24:53 +0530 +Subject: [PATCH 387/392] cluster/afr: Delay post-op for fsync + +Problem: +AFR doesn't delay post-op for fsync fop. For fsync heavy workloads +this leads to un-necessary fxattrop/finodelk for every fsync leading +to bad performance. + +Fix: +Have delayed post-op for fsync. Add special flag in xdata to indicate +that afr shouldn't delay post-op in cases where either the +process will terminate or graph-switch would happen. Otherwise it leads +to un-necessary heals when the graph-switch/process-termination +happens before delayed-post-op completes. + +> Upstream-patch: https://review.gluster.org/c/glusterfs/+/24473 +> Fixes: #1253 + +BUG: 1848896 +Change-Id: I531940d13269a111c49e0510d49514dc169f4577 +Signed-off-by: Pranith Kumar K +(cherry picked from commit 3ed98fc9dcb39223032e343fd5b0ad17fa3cae14) +Reviewed-on: https://code.engineering.redhat.com/gerrit/203694 +Tested-by: RHGS Build Bot +Tested-by: Karthik Subrahmanya +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + api/src/glfs-resolve.c | 14 ++- + tests/basic/afr/durability-off.t | 2 + + tests/basic/gfapi/gfapi-graph-switch-open-fd.t | 44 +++++++++ + tests/basic/gfapi/gfapi-keep-writing.c | 129 +++++++++++++++++++++++++ + xlators/cluster/afr/src/afr-inode-write.c | 11 ++- + xlators/cluster/afr/src/afr-transaction.c | 9 +- + xlators/cluster/afr/src/afr.h | 2 +- + xlators/cluster/dht/src/dht-rebalance.c | 15 ++- + xlators/mount/fuse/src/fuse-bridge.c | 23 ++++- + 9 files changed, 239 insertions(+), 10 deletions(-) + create mode 100644 tests/basic/gfapi/gfapi-graph-switch-open-fd.t + create mode 100644 tests/basic/gfapi/gfapi-keep-writing.c + +diff --git a/api/src/glfs-resolve.c b/api/src/glfs-resolve.c +index a79f490..062b7dc 100644 +--- a/api/src/glfs-resolve.c ++++ b/api/src/glfs-resolve.c +@@ -722,6 +722,7 @@ glfs_migrate_fd_safe(struct glfs *fs, xlator_t *newsubvol, fd_t *oldfd) + 0, + }; + char uuid1[64]; ++ dict_t *xdata = NULL; + + oldinode = oldfd->inode; + oldsubvol = oldinode->table->xl; +@@ -730,7 +731,15 @@ glfs_migrate_fd_safe(struct glfs *fs, xlator_t *newsubvol, fd_t *oldfd) + return fd_ref(oldfd); + + if (!oldsubvol->switched) { +- ret = syncop_fsync(oldsubvol, oldfd, 0, NULL, NULL, NULL, NULL); ++ xdata = dict_new(); ++ if (!xdata || dict_set_int8(xdata, "last-fsync", 1)) { ++ gf_msg(fs->volname, GF_LOG_WARNING, ENOMEM, API_MSG_FSYNC_FAILED, ++ "last-fsync set failed on %s graph %s (%d)", ++ uuid_utoa_r(oldfd->inode->gfid, uuid1), ++ graphid_str(oldsubvol), oldsubvol->graph->id); ++ } ++ ++ ret = syncop_fsync(oldsubvol, oldfd, 0, NULL, NULL, xdata, NULL); + DECODE_SYNCOP_ERR(ret); + if (ret) { + gf_msg(fs->volname, GF_LOG_WARNING, errno, API_MSG_FSYNC_FAILED, +@@ -809,6 +818,9 @@ out: + newfd = NULL; + } + ++ if (xdata) ++ dict_unref(xdata); ++ + return newfd; + } + +diff --git a/tests/basic/afr/durability-off.t b/tests/basic/afr/durability-off.t +index 155ffa0..6e0f18b 100644 +--- a/tests/basic/afr/durability-off.t ++++ b/tests/basic/afr/durability-off.t +@@ -26,6 +26,8 @@ TEST $CLI volume heal $V0 + EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0 + EXPECT "^0$" echo $($CLI volume profile $V0 info | grep -w FSYNC | wc -l) + ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 + #Test that fsyncs happen when durability is on + TEST $CLI volume set $V0 cluster.ensure-durability on + TEST $CLI volume set $V0 performance.strict-write-ordering on +diff --git a/tests/basic/gfapi/gfapi-graph-switch-open-fd.t b/tests/basic/gfapi/gfapi-graph-switch-open-fd.t +new file mode 100644 +index 0000000..2e666be +--- /dev/null ++++ b/tests/basic/gfapi/gfapi-graph-switch-open-fd.t +@@ -0,0 +1,44 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++ ++cleanup; ++ ++TEST glusterd ++ ++TEST $CLI volume create $V0 replica 3 ${H0}:$B0/brick{0..2}; ++EXPECT 'Created' volinfo_field $V0 'Status'; ++ ++TEST $CLI volume start $V0; ++EXPECT 'Started' volinfo_field $V0 'Status'; ++ ++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; ++TEST touch $M0/sync ++logdir=`gluster --print-logdir` ++ ++TEST build_tester $(dirname $0)/gfapi-keep-writing.c -lgfapi ++ ++ ++#Launch a program to keep doing writes on an fd ++./$(dirname $0)/gfapi-keep-writing ${H0} $V0 $logdir/gfapi-async-calls-test.log sync & ++p=$! ++sleep 1 #Let some writes go through ++#Check if graph switch will lead to any pending markers for ever ++TEST $CLI volume set $V0 performance.quick-read off ++TEST $CLI volume set $V0 performance.io-cache off ++TEST $CLI volume set $V0 performance.stat-prefetch off ++TEST $CLI volume set $V0 performance.read-ahead off ++ ++ ++TEST rm -f $M0/sync #Make sure the glfd is closed ++TEST wait #Wait for background process to die ++#Goal is to check if there is permanent FOOL changelog ++sleep 5 ++EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/brick0/glfs_test.txt trusted.afr.dirty ++EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/brick1/glfs_test.txt trusted.afr.dirty ++EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/brick2/glfs_test.txt trusted.afr.dirty ++ ++cleanup_tester $(dirname $0)/gfapi-async-calls-test ++ ++cleanup; +diff --git a/tests/basic/gfapi/gfapi-keep-writing.c b/tests/basic/gfapi/gfapi-keep-writing.c +new file mode 100644 +index 0000000..91b59ce +--- /dev/null ++++ b/tests/basic/gfapi/gfapi-keep-writing.c +@@ -0,0 +1,129 @@ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#define LOG_ERR(msg) \ ++ do { \ ++ fprintf(stderr, "%s : Error (%s)\n", msg, strerror(errno)); \ ++ } while (0) ++ ++glfs_t * ++init_glfs(const char *hostname, const char *volname, const char *logfile) ++{ ++ int ret = -1; ++ glfs_t *fs = NULL; ++ ++ fs = glfs_new(volname); ++ if (!fs) { ++ LOG_ERR("glfs_new failed"); ++ return NULL; ++ } ++ ++ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007); ++ if (ret < 0) { ++ LOG_ERR("glfs_set_volfile_server failed"); ++ goto out; ++ } ++ ++ ret = glfs_set_logging(fs, logfile, 7); ++ if (ret < 0) { ++ LOG_ERR("glfs_set_logging failed"); ++ goto out; ++ } ++ ++ ret = glfs_init(fs); ++ if (ret < 0) { ++ LOG_ERR("glfs_init failed"); ++ goto out; ++ } ++ ++ ret = 0; ++out: ++ if (ret) { ++ glfs_fini(fs); ++ fs = NULL; ++ } ++ ++ return fs; ++} ++ ++int ++glfs_test_function(const char *hostname, const char *volname, ++ const char *logfile, const char *syncfile) ++{ ++ int ret = -1; ++ int flags = O_CREAT | O_RDWR; ++ glfs_t *fs = NULL; ++ glfs_fd_t *glfd = NULL; ++ const char *buff = "This is from my prog\n"; ++ const char *filename = "glfs_test.txt"; ++ struct stat buf = {0}; ++ ++ fs = init_glfs(hostname, volname, logfile); ++ if (fs == NULL) { ++ LOG_ERR("init_glfs failed"); ++ return -1; ++ } ++ ++ glfd = glfs_creat(fs, filename, flags, 0644); ++ if (glfd == NULL) { ++ LOG_ERR("glfs_creat failed"); ++ goto out; ++ } ++ ++ while (glfs_stat(fs, syncfile, &buf) == 0) { ++ ret = glfs_write(glfd, buff, strlen(buff), flags); ++ if (ret < 0) { ++ LOG_ERR("glfs_write failed"); ++ goto out; ++ } ++ } ++ ++ ret = glfs_close(glfd); ++ if (ret < 0) { ++ LOG_ERR("glfs_write failed"); ++ goto out; ++ } ++ ++out: ++ ret = glfs_fini(fs); ++ if (ret) { ++ LOG_ERR("glfs_fini failed"); ++ } ++ ++ return ret; ++} ++ ++int ++main(int argc, char *argv[]) ++{ ++ int ret = 0; ++ char *hostname = NULL; ++ char *volname = NULL; ++ char *logfile = NULL; ++ char *syncfile = NULL; ++ ++ if (argc != 5) { ++ fprintf(stderr, "Invalid argument\n"); ++ exit(1); ++ } ++ ++ hostname = argv[1]; ++ volname = argv[2]; ++ logfile = argv[3]; ++ syncfile = argv[4]; ++ ++ ret = glfs_test_function(hostname, volname, logfile, syncfile); ++ if (ret) { ++ LOG_ERR("glfs_test_function failed"); ++ } ++ ++ return ret; ++} +diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c +index 7fcc9d4..df82b6e 100644 +--- a/xlators/cluster/afr/src/afr-inode-write.c ++++ b/xlators/cluster/afr/src/afr-inode-write.c +@@ -2492,6 +2492,7 @@ afr_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, + call_frame_t *transaction_frame = NULL; + int ret = -1; + int32_t op_errno = ENOMEM; ++ int8_t last_fsync = 0; + + transaction_frame = copy_frame(frame); + if (!transaction_frame) +@@ -2501,10 +2502,16 @@ afr_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, + if (!local) + goto out; + +- if (xdata) ++ if (xdata) { + local->xdata_req = dict_copy_with_ref(xdata, NULL); +- else ++ if (dict_get_int8(xdata, "last-fsync", &last_fsync) == 0) { ++ if (last_fsync) { ++ local->transaction.disable_delayed_post_op = _gf_true; ++ } ++ } ++ } else { + local->xdata_req = dict_new(); ++ } + + if (!local->xdata_req) + goto out; +diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c +index 8e65ae2..ffd0ab8 100644 +--- a/xlators/cluster/afr/src/afr-transaction.c ++++ b/xlators/cluster/afr/src/afr-transaction.c +@@ -2385,8 +2385,13 @@ afr_is_delayed_changelog_post_op_needed(call_frame_t *frame, xlator_t *this, + goto out; + } + +- if ((local->op != GF_FOP_WRITE) && (local->op != GF_FOP_FXATTROP)) { +- /*Only allow writes but shard does [f]xattrops on writes, so ++ if (local->transaction.disable_delayed_post_op) { ++ goto out; ++ } ++ ++ if ((local->op != GF_FOP_WRITE) && (local->op != GF_FOP_FXATTROP) && ++ (local->op != GF_FOP_FSYNC)) { ++ /*Only allow writes/fsyncs but shard does [f]xattrops on writes, so + * they are fine too*/ + goto out; + } +diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h +index e731cfa..6bc4721 100644 +--- a/xlators/cluster/afr/src/afr.h ++++ b/xlators/cluster/afr/src/afr.h +@@ -854,7 +854,7 @@ typedef struct _afr_local { + + int (*unwind)(call_frame_t *frame, xlator_t *this); + +- /* post-op hook */ ++ gf_boolean_t disable_delayed_post_op; + } transaction; + + syncbarrier_t barrier; +diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c +index 8f31dca..145e616 100644 +--- a/xlators/cluster/dht/src/dht-rebalance.c ++++ b/xlators/cluster/dht/src/dht-rebalance.c +@@ -1564,6 +1564,7 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, + xlator_t *old_target = NULL; + xlator_t *hashed_subvol = NULL; + fd_t *linkto_fd = NULL; ++ dict_t *xdata = NULL; + + if (from == to) { + gf_msg_debug(this->name, 0, +@@ -1882,7 +1883,15 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, + + /* TODO: Sync the locks */ + +- ret = syncop_fsync(to, dst_fd, 0, NULL, NULL, NULL, NULL); ++ xdata = dict_new(); ++ if (!xdata || dict_set_int8(xdata, "last-fsync", 1)) { ++ gf_log(this->name, GF_LOG_ERROR, ++ "%s: failed to set last-fsync flag on " ++ "%s (%s)", ++ loc->path, to->name, strerror(ENOMEM)); ++ } ++ ++ ret = syncop_fsync(to, dst_fd, 0, NULL, NULL, xdata, NULL); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, "%s: failed to fsync on %s (%s)", + loc->path, to->name, strerror(-ret)); +@@ -2356,11 +2365,15 @@ out: + + if (dst_fd) + syncop_close(dst_fd); ++ + if (src_fd) + syncop_close(src_fd); + if (linkto_fd) + syncop_close(linkto_fd); + ++ if (xdata) ++ dict_unref(xdata); ++ + loc_wipe(&tmp_loc); + loc_wipe(&parent_loc); + +diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c +index 6e99053..1592067 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.c ++++ b/xlators/mount/fuse/src/fuse-bridge.c +@@ -5551,6 +5551,7 @@ fuse_migrate_fd(xlator_t *this, fd_t *basefd, xlator_t *old_subvol, + char create_in_progress = 0; + fuse_fd_ctx_t *basefd_ctx = NULL; + fd_t *oldfd = NULL; ++ dict_t *xdata = NULL; + + basefd_ctx = fuse_fd_ctx_get(this, basefd); + GF_VALIDATE_OR_GOTO("glusterfs-fuse", basefd_ctx, out); +@@ -5587,10 +5588,23 @@ fuse_migrate_fd(xlator_t *this, fd_t *basefd, xlator_t *old_subvol, + } + + if (oldfd->inode->table->xl == old_subvol) { +- if (IA_ISDIR(oldfd->inode->ia_type)) ++ if (IA_ISDIR(oldfd->inode->ia_type)) { + ret = syncop_fsyncdir(old_subvol, oldfd, 0, NULL, NULL); +- else +- ret = syncop_fsync(old_subvol, oldfd, 0, NULL, NULL, NULL, NULL); ++ } else { ++ xdata = dict_new(); ++ if (!xdata || dict_set_int8(xdata, "last-fsync", 1)) { ++ gf_log("glusterfs-fuse", GF_LOG_WARNING, ++ "last-fsync set failed (%s) on fd (%p)" ++ "(basefd:%p basefd-inode.gfid:%s) " ++ "(old-subvolume:%s-%d new-subvolume:%s-%d)", ++ strerror(ENOMEM), oldfd, basefd, ++ uuid_utoa(basefd->inode->gfid), old_subvol->name, ++ old_subvol->graph->id, new_subvol->name, ++ new_subvol->graph->id); ++ } ++ ++ ret = syncop_fsync(old_subvol, oldfd, 0, NULL, NULL, xdata, NULL); ++ } + + if (ret < 0) { + gf_log("glusterfs-fuse", GF_LOG_WARNING, +@@ -5645,6 +5659,9 @@ out: + + fd_unref(oldfd); + ++ if (xdata) ++ dict_unref(xdata); ++ + return ret; + } + +-- +1.8.3.1 + diff --git a/SOURCES/0388-rpc-Cleanup-SSL-specific-data-at-the-time-of-freeing.patch b/SOURCES/0388-rpc-Cleanup-SSL-specific-data-at-the-time-of-freeing.patch new file mode 100644 index 0000000..094a484 --- /dev/null +++ b/SOURCES/0388-rpc-Cleanup-SSL-specific-data-at-the-time-of-freeing.patch @@ -0,0 +1,142 @@ +From be6fafebe1e391e9d9f14d9aed18adbfda8a262b Mon Sep 17 00:00:00 2001 +From: l17zhou +Date: Mon, 4 Nov 2019 08:45:52 +0200 +Subject: [PATCH 388/392] rpc: Cleanup SSL specific data at the time of freeing + rpc object + +Problem: At the time of cleanup rpc object ssl specific data + is not freeing so it has become a leak. + +Solution: To avoid the leak cleanup ssl specific data at the + time of cleanup rpc object + +> Credits: l17zhou +> Fixes: bz#1768407 +> Change-Id: I37f598673ae2d7a33c75f39eb8843ccc6dffaaf0 +> (Cherry pick from commit 54ed71dba174385ab0d8fa415e09262f6250430c) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23650/) + +Change-Id: I37f598673ae2d7a33c75f39eb8843ccc6dffaaf0 +BUG: 1848891 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/203698 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + rpc/rpc-transport/socket/src/socket.c | 22 ++++++++++++++++++++-- + tests/features/ssl-authz.t | 23 ++++++++++++++++++++--- + 2 files changed, 40 insertions(+), 5 deletions(-) + +diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c +index 65845ea..226b2e2 100644 +--- a/rpc/rpc-transport/socket/src/socket.c ++++ b/rpc/rpc-transport/socket/src/socket.c +@@ -446,6 +446,7 @@ ssl_setup_connection_postfix(rpc_transport_t *this) + gf_log(this->name, GF_LOG_DEBUG, + "SSL verification succeeded (client: %s) (server: %s)", + this->peerinfo.identifier, this->myinfo.identifier); ++ X509_free(peer); + return gf_strdup(peer_CN); + + /* Error paths. */ +@@ -1157,7 +1158,15 @@ __socket_reset(rpc_transport_t *this) + memset(&priv->incoming, 0, sizeof(priv->incoming)); + + event_unregister_close(this->ctx->event_pool, priv->sock, priv->idx); +- ++ if (priv->use_ssl && priv->ssl_ssl) { ++ SSL_clear(priv->ssl_ssl); ++ SSL_free(priv->ssl_ssl); ++ priv->ssl_ssl = NULL; ++ } ++ if (priv->use_ssl && priv->ssl_ctx) { ++ SSL_CTX_free(priv->ssl_ctx); ++ priv->ssl_ctx = NULL; ++ } + priv->sock = -1; + priv->idx = -1; + priv->connected = -1; +@@ -3217,7 +3226,6 @@ socket_server_event_handler(int fd, int idx, int gen, void *data, int poll_in, + new_priv->sock = new_sock; + + new_priv->ssl_enabled = priv->ssl_enabled; +- new_priv->ssl_ctx = priv->ssl_ctx; + new_priv->connected = 1; + new_priv->is_server = _gf_true; + +@@ -4672,6 +4680,16 @@ fini(rpc_transport_t *this) + pthread_mutex_destroy(&priv->out_lock); + pthread_mutex_destroy(&priv->cond_lock); + pthread_cond_destroy(&priv->cond); ++ if (priv->use_ssl && priv->ssl_ssl) { ++ SSL_clear(priv->ssl_ssl); ++ SSL_free(priv->ssl_ssl); ++ priv->ssl_ssl = NULL; ++ } ++ if (priv->use_ssl && priv->ssl_ctx) { ++ SSL_CTX_free(priv->ssl_ctx); ++ priv->ssl_ctx = NULL; ++ } ++ + if (priv->ssl_private_key) { + GF_FREE(priv->ssl_private_key); + } +diff --git a/tests/features/ssl-authz.t b/tests/features/ssl-authz.t +index cae010c..132b598 100755 +--- a/tests/features/ssl-authz.t ++++ b/tests/features/ssl-authz.t +@@ -25,6 +25,7 @@ TEST glusterd + TEST pidof glusterd + TEST $CLI volume info; + ++TEST $CLI v set all cluster.brick-multiplex on + # Construct a cipher list that excludes CBC because of POODLE. + # http://web.nvd.nist.gov/view/vuln/detail?vulnId=CVE-2014-3566 + # +@@ -45,12 +46,12 @@ TEST openssl genrsa -out $SSL_KEY 2048 + TEST openssl req -new -x509 -key $SSL_KEY -subj /CN=Anyone -out $SSL_CERT + ln $SSL_CERT $SSL_CA + +-TEST $CLI volume create $V0 $H0:$B0/1 ++TEST $CLI volume create $V0 replica 3 $H0:$B0/{1,2,3} force + TEST $CLI volume set $V0 server.ssl on + TEST $CLI volume set $V0 client.ssl on + TEST $CLI volume set $V0 ssl.cipher-list $(valid_ciphers) + TEST $CLI volume start $V0 +-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" online_brick_count + + # This mount should SUCCEED because ssl-allow=* by default. This effectively + # disables SSL authorization, though authentication and encryption might still +@@ -59,11 +60,27 @@ TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0 + TEST ping_file $M0/before + EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 + ++glusterfsd_pid=`pgrep glusterfsd` ++TEST [ $glusterfsd_pid != 0 ] ++start=`pmap -x $glusterfsd_pid | grep total | awk -F " " '{print $4}'` ++echo "Memory consumption for glusterfsd process" ++for i in $(seq 1 100); do ++ gluster v heal $V0 info >/dev/null ++done ++ ++end=`pmap -x $glusterfsd_pid | grep total | awk -F " " '{print $4}'` ++diff=$((end-start)) ++ ++# If memory consumption is more than 5M some leak in SSL code path ++ ++TEST [ $diff -lt 5000 ] ++ ++ + # Set ssl-allow to a wildcard that includes our identity. + TEST $CLI volume stop $V0 + TEST $CLI volume set $V0 auth.ssl-allow Any* + TEST $CLI volume start $V0 +-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" online_brick_count + + # This mount should SUCCEED because we match the wildcard. + TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0 +-- +1.8.3.1 + diff --git a/SOURCES/0389-socket-Resolve-ssl_ctx-leak-for-a-brick-while-only-m.patch b/SOURCES/0389-socket-Resolve-ssl_ctx-leak-for-a-brick-while-only-m.patch new file mode 100644 index 0000000..a4f9bf2 --- /dev/null +++ b/SOURCES/0389-socket-Resolve-ssl_ctx-leak-for-a-brick-while-only-m.patch @@ -0,0 +1,54 @@ +From d3558cfbded7e973fae45ce2196767611336e351 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Mon, 8 Jun 2020 13:27:50 +0530 +Subject: [PATCH 389/392] socket: Resolve ssl_ctx leak for a brick while only + mgmt SSL is enabled + +Problem: While only mgmt SSL is enabled for a brick process use_ssl flag + is false for a brick process and socket api's cleanup ssl_ctx only + while use_ssl and ssl_ctx both are valid + +Solution: To avoid a leak check only ssl_ctx, if it is valid cleanup + ssl_ctx + +> Fixes: #1196 +> Change-Id: I2f4295478f4149dcb7d608ea78ee5104f28812c3 +> Signed-off-by: Mohit Agrawal +> (Cherry pick from commit 9873baee34afdf0c20f5fc98a7dbf2a9f07447e2) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/24366/) + +BUG: 1848894 +Change-Id: I2f4295478f4149dcb7d608ea78ee5104f28812c3 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/203705 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + rpc/rpc-transport/socket/src/socket.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c +index 226b2e2..54cd5df 100644 +--- a/rpc/rpc-transport/socket/src/socket.c ++++ b/rpc/rpc-transport/socket/src/socket.c +@@ -1163,7 +1163,7 @@ __socket_reset(rpc_transport_t *this) + SSL_free(priv->ssl_ssl); + priv->ssl_ssl = NULL; + } +- if (priv->use_ssl && priv->ssl_ctx) { ++ if (priv->ssl_ctx) { + SSL_CTX_free(priv->ssl_ctx); + priv->ssl_ctx = NULL; + } +@@ -4685,7 +4685,7 @@ fini(rpc_transport_t *this) + SSL_free(priv->ssl_ssl); + priv->ssl_ssl = NULL; + } +- if (priv->use_ssl && priv->ssl_ctx) { ++ if (priv->ssl_ctx) { + SSL_CTX_free(priv->ssl_ctx); + priv->ssl_ctx = NULL; + } +-- +1.8.3.1 + diff --git a/SOURCES/0390-tests-Avoid-ssl-authz.t-failure.patch b/SOURCES/0390-tests-Avoid-ssl-authz.t-failure.patch new file mode 100644 index 0000000..443cdb9 --- /dev/null +++ b/SOURCES/0390-tests-Avoid-ssl-authz.t-failure.patch @@ -0,0 +1,51 @@ +From b68fa363c5981441c20fbc78b6dc00437bd698a7 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Mon, 22 Jun 2020 11:35:29 +0530 +Subject: [PATCH 390/392] tests: Avoid ssl-authz.t failure + +Problem: ssl-authz.t is failing at the time of checking memory + consumption if brick is consuming more than 5M + +Solution: Update the check to avoid a failure. + +> Change-Id: Iffb031f0695a7da83d5a2f6bac8863dad225317e +> Fixes: bz#1811631 +> Signed-off-by: Mohit Agrawal +> Reviewd on upstream link https://review.gluster.org/#/c/glusterfs/+/24221/) +> (Cherry pick from commit fb20713b380e1df8d7f9e9df96563be2f9144fd6) + +BUG: 1848894 +Change-Id: I4fc5d2e2597abfafc1e26d908c8c4184ab82afd5 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/203844 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + tests/features/ssl-authz.t | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/tests/features/ssl-authz.t b/tests/features/ssl-authz.t +index 132b598..ab05c49 100755 +--- a/tests/features/ssl-authz.t ++++ b/tests/features/ssl-authz.t +@@ -67,13 +67,15 @@ echo "Memory consumption for glusterfsd process" + for i in $(seq 1 100); do + gluster v heal $V0 info >/dev/null + done ++#Wait to cleanup memory ++sleep 10 + + end=`pmap -x $glusterfsd_pid | grep total | awk -F " " '{print $4}'` + diff=$((end-start)) + +-# If memory consumption is more than 5M some leak in SSL code path ++# If memory consumption is more than 15M some leak in SSL code path + +-TEST [ $diff -lt 5000 ] ++TEST [ $diff -lt 15000 ] + + + # Set ssl-allow to a wildcard that includes our identity. +-- +1.8.3.1 + diff --git a/SOURCES/0391-cluster-syncop-avoid-duplicate-unlock-of-inodelk-ent.patch b/SOURCES/0391-cluster-syncop-avoid-duplicate-unlock-of-inodelk-ent.patch new file mode 100644 index 0000000..414f259 --- /dev/null +++ b/SOURCES/0391-cluster-syncop-avoid-duplicate-unlock-of-inodelk-ent.patch @@ -0,0 +1,54 @@ +From e9cb714d66a7926a746b8cd5f9288d59aefee918 Mon Sep 17 00:00:00 2001 +From: Kinglong Mee +Date: Mon, 18 Mar 2019 20:47:54 +0800 +Subject: [PATCH 391/392] cluster-syncop: avoid duplicate unlock of + inodelk/entrylk + +When using ec, there are many messages at brick log as, + +[inodelk.c:514:__inode_unlock_lock] 0-test-locks: Matching lock not found for unlock 0-9223372036854775807, lo=68e040a84b7f0000 on 0x7f208c006f78 +[MSGID: 115053] [server-rpc-fops_v2.c:280:server4_inodelk_cbk] 0-test-server: 2557439: INODELK (df4e41be-723f-4289-b7af-b4272b3e880c), client: CTX_ID:67d4a7f3-605a-4965-89a5-31309d62d1fa-GRAPH_ID:0-PID:1659-HOST:openfs-node2-PC_NAME:test-client-1-RECON_NO:-28, error-xlator: test-locks [Invalid argument] + +> Change-Id: Ib164d29ebb071f620a4ca9679c4345ef7c88512a +> Updates: bz#1689920 +> Signed-off-by: Kinglong Mee +> Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/22377/ + +BUG: 1848890 +Change-Id: Ib164d29ebb071f620a4ca9679c4345ef7c88512a +Signed-off-by: Sheetal Pamecha +Reviewed-on: https://code.engineering.redhat.com/gerrit/203852 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/cluster-syncop.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/libglusterfs/src/cluster-syncop.c b/libglusterfs/src/cluster-syncop.c +index 5a08f26..6ee89dd 100644 +--- a/libglusterfs/src/cluster-syncop.c ++++ b/libglusterfs/src/cluster-syncop.c +@@ -1203,6 +1203,10 @@ cluster_tiebreaker_inodelk(xlator_t **subvols, unsigned char *on, + if (num_success) { + FOP_SEQ(subvols, on, numsubvols, replies, locked_on, frame, + inodelk, dom, &loc, F_SETLKW, &flock, NULL); ++ } else { ++ loc_wipe(&loc); ++ memset(locked_on, 0, numsubvols); ++ return 0; + } + break; + } +@@ -1244,7 +1248,9 @@ cluster_tiebreaker_entrylk(xlator_t **subvols, unsigned char *on, + entrylk, dom, &loc, name, ENTRYLK_LOCK, ENTRYLK_WRLCK, + NULL); + } else { ++ loc_wipe(&loc); + memset(locked_on, 0, numsubvols); ++ return 0; + } + break; + } +-- +1.8.3.1 + diff --git a/SOURCES/0392-glusterd-unlink-the-file-after-killing-the-process.patch b/SOURCES/0392-glusterd-unlink-the-file-after-killing-the-process.patch new file mode 100644 index 0000000..03d67f8 --- /dev/null +++ b/SOURCES/0392-glusterd-unlink-the-file-after-killing-the-process.patch @@ -0,0 +1,39 @@ +From 6a2cef546457e6dc9a2268cc2f5cc11b850b7f5c Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde +Date: Tue, 17 Dec 2019 15:52:30 +0530 +Subject: [PATCH 392/392] glusterd: unlink the file after killing the process + +In glusterd_proc_stop(), after killing the pid +we should remove the pidfile. + +> upstream patch: https://review.gluster.org/#/c/glusterfs/+/23890/ +> fixes: bz#1784375 +> Change-Id: Ib6367aed590932c884b0f6f892fc40542aa19686 +> Signed-off-by: Sanju Rakonde + +BUG: 1849533 +Change-Id: Ib6367aed590932c884b0f6f892fc40542aa19686 +Signed-off-by: Sanju Rakonde +Reviewed-on: https://code.engineering.redhat.com/gerrit/203871 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c +index f55a5fd..a05c90d 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c ++++ b/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c +@@ -107,6 +107,8 @@ glusterd_proc_stop(glusterd_proc_t *proc, int sig, int flags) + "service, reason:%s", + proc->name, strerror(errno)); + } ++ } else { ++ (void)glusterd_unlink_file(proc->pidfile); + } + if (flags != PROC_STOP_FORCE) + goto out; +-- +1.8.3.1 + diff --git a/SPECS/glusterfs.spec b/SPECS/glusterfs.spec index 4c930f5..ef7c804 100644 --- a/SPECS/glusterfs.spec +++ b/SPECS/glusterfs.spec @@ -130,6 +130,12 @@ ## All %%global definitions should be placed here and keep them sorted ## +# selinux booleans whose defalut value needs modification +# these booleans will be consumed by "%%selinux_set_booleans" macro. +%if ( 0%{?rhel} && 0%{?rhel} >= 8 ) +%global selinuxbooleans rsync_full_access=1 rsync_client=1 +%endif + %if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} > 6 ) %global _with_systemd true %endif @@ -231,7 +237,7 @@ Release: 0.1%{?prereltag:.%{prereltag}}%{?dist} %else Name: glusterfs Version: 6.0 -Release: 20%{?dist} +Release: 37.2%{?dist} ExcludeArch: i686 %endif License: GPLv2 or LGPLv3+ @@ -622,6 +628,85 @@ Patch0310: 0310-tests-test-case-for-non-root-geo-rep-setup.patch Patch0311: 0311-geo-rep-Fix-Permission-denied-traceback-on-non-root-.patch Patch0312: 0312-Scripts-quota_fsck-script-KeyError-contri_size.patch Patch0313: 0313-extras-Cgroup-CPU-Mem-restriction-are-not-working-on.patch +Patch0314: 0314-glusterd-tier-is_tier_enabled-inserted-causing-check.patch +Patch0315: 0315-geo-rep-Fix-py2-py3-compatibility-in-repce.patch +Patch0316: 0316-spec-fixed-python-prettytable-dependency-for-rhel6.patch +Patch0317: 0317-Update-rfc.sh-to-rhgs-3.5.1.patch +Patch0318: 0318-Update-rfc.sh-to-rhgs-3.5.1.patch +Patch0319: 0319-features-snapview-server-obtain-the-list-of-snapshot.patch +Patch0320: 0320-gf-event-Handle-unix-volfile-servers.patch +Patch0321: 0321-Adding-white-spaces-to-description-of-set-group.patch +Patch0322: 0322-glusterd-display-correct-rebalance-data-size-after-g.patch +Patch0323: 0323-cli-display-detailed-rebalance-info.patch +Patch0324: 0324-extras-hooks-Add-SELinux-label-on-new-bricks-during-.patch +Patch0325: 0325-extras-hooks-Install-and-package-newly-added-post-ad.patch +Patch0326: 0326-tests-subdir-mount.t-is-failing-for-brick_mux-regrss.patch +Patch0327: 0327-glusterfind-integrate-with-gfid2path.patch +Patch0328: 0328-glusterd-Add-warning-and-abort-in-case-of-failures-i.patch +Patch0329: 0329-cluster-afr-Heal-entries-when-there-is-a-source-no-h.patch +Patch0330: 0330-mount.glusterfs-change-the-error-message.patch +Patch0331: 0331-features-locks-Do-special-handling-for-op-version-3..patch +Patch0332: 0332-Removing-one-top-command-from-gluster-v-help.patch +Patch0333: 0333-rpc-Synchronize-slot-allocation-code.patch +Patch0334: 0334-dht-log-getxattr-failure-for-node-uuid-at-DEBUG.patch +Patch0335: 0335-tests-RHEL8-test-failure-fixes-for-RHGS.patch +Patch0336: 0336-spec-check-and-return-exit-code-in-rpm-scripts.patch +Patch0337: 0337-fuse-Set-limit-on-invalidate-queue-size.patch +Patch0338: 0338-glusterfs-fuse-Reduce-the-default-lru-limit-value.patch +Patch0339: 0339-geo-rep-fix-integer-config-validation.patch +Patch0340: 0340-rpc-event_slot_alloc-converted-infinite-loop-after-r.patch +Patch0341: 0341-socket-fix-error-handling.patch +Patch0342: 0342-Revert-hooks-remove-selinux-hooks.patch +Patch0343: 0343-extras-hooks-syntactical-errors-in-SELinux-hooks-sci.patch +Patch0344: 0344-Revert-all-fixes-to-include-SELinux-hook-scripts.patch +Patch0345: 0345-read-ahead-io-cache-turn-off-by-default.patch +Patch0346: 0346-fuse-degrade-logging-of-write-failure-to-fuse-device.patch +Patch0347: 0347-tools-glusterfind-handle-offline-bricks.patch +Patch0348: 0348-glusterfind-Fix-py2-py3-issues.patch +Patch0349: 0349-glusterfind-python3-compatibility.patch +Patch0350: 0350-tools-glusterfind-Remove-an-extra-argument.patch +Patch0351: 0351-server-Mount-fails-after-reboot-1-3-gluster-nodes.patch +Patch0352: 0352-spec-fixed-missing-dependencies-for-glusterfs-clouds.patch +Patch0353: 0353-build-glusterfs-ganesha-pkg-requires-python3-policyc.patch +Patch0354: 0354-core-fix-memory-pool-management-races.patch +Patch0355: 0355-core-Prevent-crash-on-process-termination.patch +Patch0356: 0356-Update-rfc.sh-to-rhgs-3.5.1-rhel-8.patch +Patch0357: 0357-ganesha-ha-updates-for-pcs-0.10.x-i.e.-in-Fedora-29-.patch +Patch0358: 0358-inode-fix-wrong-loop-count-in-__inode_ctx_free.patch +Patch0359: 0359-dht-gf_defrag_process_dir-is-called-even-if-gf_defra.patch +Patch0360: 0360-rpc-Make-ssl-log-more-useful.patch +Patch0361: 0361-snap_scheduler-python3-compatibility-and-new-test-ca.patch +Patch0362: 0362-write-behind-fix-data-corruption.patch +Patch0363: 0363-common-ha-cluster-status-shows-FAILOVER-when-actuall.patch +Patch0364: 0364-dht-fixing-rebalance-failures-for-files-with-holes.patch +Patch0365: 0365-build-geo-rep-requires-relevant-selinux-permission-f.patch +Patch0366: 0366-snapshot-fix-python3-issue-in-gcron.patch +Patch0367: 0367-dht-Handle-setxattr-and-rm-race-for-directory-in-reb.patch +Patch0368: 0368-Update-rfc.sh-to-rhgs-3.5.2.patch +Patch0369: 0369-cluster-ec-Return-correct-error-code-and-log-message.patch +Patch0370: 0370-dht-Do-opendir-selectively-in-gf_defrag_process_dir.patch +Patch0371: 0371-common-ha-cluster-status-shows-FAILOVER-when-actuall.patch +Patch0372: 0372-posix-fix-seek-functionality.patch +Patch0373: 0373-build-geo-rep-sub-pkg-requires-policycoreutils-pytho.patch +Patch0374: 0374-open-behind-fix-missing-fd-reference.patch +Patch0375: 0375-features-shard-Send-correct-size-when-reads-are-sent.patch +Patch0376: 0376-features-shard-Fix-crash-during-shards-cleanup-in-er.patch +Patch0377: 0377-syncop-improve-scaling-and-implement-more-tools.patch +Patch0378: 0378-Revert-open-behind-fix-missing-fd-reference.patch +Patch0379: 0379-glusterd-add-missing-synccond_broadcast.patch +Patch0380: 0380-features-shard-Aggregate-size-block-count-in-iatt-be.patch +Patch0381: 0381-dht-add-null-check-in-gf_defrag_free_dir_dfmeta.patch +Patch0382: 0382-features-shard-Aggregate-file-size-block-count-befor.patch +Patch0383: 0383-common-ha-ganesha-ha.sh-bad-test-for-rhel-centos-for.patch +Patch0384: 0384-extras-Modify-group-virt-to-include-network-related-.patch +Patch0385: 0385-cluster-afr-Prioritize-ENOSPC-over-other-errors.patch +Patch0386: 0386-afr-prevent-spurious-entry-heals-leading-to-gfid-spl.patch +Patch0387: 0387-cluster-afr-Delay-post-op-for-fsync.patch +Patch0388: 0388-rpc-Cleanup-SSL-specific-data-at-the-time-of-freeing.patch +Patch0389: 0389-socket-Resolve-ssl_ctx-leak-for-a-brick-while-only-m.patch +Patch0390: 0390-tests-Avoid-ssl-authz.t-failure.patch +Patch0391: 0391-cluster-syncop-avoid-duplicate-unlock-of-inodelk-ent.patch +Patch0392: 0392-glusterd-unlink-the-file-after-killing-the-process.patch %description GlusterFS is a distributed file-system capable of scaling to several @@ -689,6 +774,7 @@ This package provides the GlusterFS CLI application and its man page %package cloudsync-plugins Summary: Cloudsync Plugins BuildRequires: libcurl-devel +Requires: glusterfs-libs = %{version}-%{release} %description cloudsync-plugins GlusterFS is a distributed file-system capable of scaling to several @@ -776,6 +862,7 @@ Summary: NFS-Ganesha configuration Group: Applications/File Requires: %{name}-server%{?_isa} = %{version}-%{release} +Requires: nfs-ganesha-selinux >= 2.7.3 Requires: nfs-ganesha-gluster >= 2.7.3 Requires: pcs, dbus %if ( 0%{?rhel} && 0%{?rhel} == 6 ) @@ -789,7 +876,7 @@ Requires: net-tools %endif %if ( 0%{?fedora} && 0%{?fedora} > 25 || ( 0%{?rhel} && 0%{?rhel} > 6 ) ) -%if ( 0%{?rhel} ) +%if ( 0%{?rhel} && 0%{?rhel} < 8 ) Requires: selinux-policy >= 3.13.1-160 Requires(post): policycoreutils-python Requires(postun): policycoreutils-python @@ -828,6 +915,14 @@ Requires: python%{_pythonver}-gluster = %{version}-%{release} Requires: rsync Requires: util-linux Requires: %{name}-libs%{?_isa} = %{version}-%{release} +# required for setting selinux bools +%if ( 0%{?rhel} && 0%{?rhel} >= 8 ) +Requires(post): policycoreutils-python-utils +Requires(postun): policycoreutils-python-utils +Requires: selinux-policy-targeted +Requires(post): selinux-policy-targeted +BuildRequires: selinux-policy-devel +%endif %description geo-replication GlusterFS is a distributed file-system capable of scaling to several @@ -1021,7 +1116,7 @@ This package provides the translators needed on any GlusterFS client. %package events Summary: GlusterFS Events Requires: %{name}-server%{?_isa} = %{version}-%{release} -Requires: python%{_pythonver} python%{_pythonver}-prettytable +Requires: python%{_pythonver} Requires: python%{_pythonver}-gluster = %{version}-%{release} %if ( 0%{?rhel} && 0%{?rhel} < 8 ) Requires: python-requests @@ -1029,7 +1124,10 @@ Requires: python-requests Requires: python%{_pythonver}-requests %endif %if ( 0%{?rhel} && 0%{?rhel} < 7 ) +Requires: python-prettytable Requires: python-argparse +%else +Requires: python%{_pythonver}-prettytable %endif %if ( 0%{?_with_systemd:1} ) %{?systemd_requires} @@ -1065,7 +1163,7 @@ do DEST_FILES=( $(egrep '^\+\+\+ b/' $p | cut -f 2- -d '/') ) EXCLUDE_DOCS=() for idx in ${!SOURCE_FILES[@]}; do - # skip the doc + # skip the doc source_file=${SOURCE_FILES[$idx]} dest_file=${DEST_FILES[$idx]} if [[ "$dest_file" =~ ^doc/.+ ]]; then @@ -1324,6 +1422,9 @@ exit 0 %if ( 0%{!?_without_georeplication:1} ) %post geo-replication +%if ( 0%{?rhel} && 0%{?rhel} >= 8 ) +%selinux_set_booleans %{selinuxbooleans} +%endif if [ $1 -ge 1 ]; then %systemd_postun_with_restart glusterd fi @@ -1956,8 +2057,9 @@ fi ]] ok, how, val = os.execute(script) -if not (ok == 0) then - error("Detected running glusterfs processes", ok) +rc = val or ok +if not (rc == 0) then + error("Detected running glusterfs processes", rc) end @@ -1990,8 +2092,9 @@ fi ]] ok, how, val = os.execute(script) -if not (ok == 0) then - error("Detected running glusterfs processes", ok) +rc = val or ok +if not (rc == 0) then + error("Detected running glusterfs processes", rc) end @@ -2024,8 +2127,9 @@ fi ]] ok, how, val = os.execute(script) -if not (ok == 0) then - error("Detected running glusterfs processes", ok) +rc = val or ok +if not (rc == 0) then + error("Detected running glusterfs processes", rc) end @@ -2058,8 +2162,9 @@ fi ]] ok, how, val = os.execute(script) -if not (ok == 0) then - error("Detected running glusterfs processes", ok) +rc = val or ok +if not (rc == 0) then + error("Detected running glusterfs processes", rc) end @@ -2091,8 +2196,9 @@ fi ]] ok, how, val = os.execute(script) -if not (ok == 0) then - error("Detected running glusterfs processes", ok) +rc = val or ok +if not (rc == 0) then + error("Detected running glusterfs processes", rc) end @@ -2124,8 +2230,9 @@ fi ]] ok, how, val = os.execute(script) -if not (ok == 0) then - error("Detected running glusterfs processes", ok) +rc = val or ok +if not (rc == 0) then + error("Detected running glusterfs processes", rc) end @@ -2159,8 +2266,9 @@ fi ]] ok, how, val = os.execute(script) -if not (ok == 0) then - error("Detected running glusterfs processes", ok) +rc = val or ok +if not (rc == 0) then + error("Detected running glusterfs processes", rc) end %endif @@ -2194,8 +2302,9 @@ fi ]] ok, how, val = os.execute(script) -if not (ok == 0) then - error("Detected running glusterfs processes", ok) +rc = val or ok +if not (rc == 0) then + error("Detected running glusterfs processes", rc) end @@ -2229,8 +2338,9 @@ fi ]] ok, how, val = os.execute(script) -if not (ok == 0) then - error("Detected running glusterfs processes", ok) +rc = val or ok +if not (rc == 0) then + error("Detected running glusterfs processes", rc) end %endif @@ -2265,8 +2375,9 @@ fi ]] ok, how, val = os.execute(script) -if not (ok == 0) then - error("Detected running glusterfs processes", ok) +rc = val or ok +if not (rc == 0) then + error("Detected running glusterfs processes", rc) end %endif @@ -2300,8 +2411,9 @@ fi ]] ok, how, val = os.execute(script) -if not (ok == 0) then - error("Detected running glusterfs processes", ok) +rc = val or ok +if not (rc == 0) then + error("Detected running glusterfs processes", rc) end %posttrans server @@ -2335,8 +2447,68 @@ fi %endif %changelog -* Tue Jan 21 2020 CentOS Sources - 6.0-20.el8.centos -- remove vendor and/or packager lines +* Tue Sep 08 2020 Rinku Kothiya - 6.0-37.2 +- fixes bugs bz#1876857 + +* Wed Jun 24 2020 Deepshikha Khandelwal - 6.0-37.1 +- fixes bugs bz#1848890 bz#1848891 bz#1848893 bz#1848894 bz#1848895 + bz#1848896 bz#1848899 bz#1849533 + +* Fri May 29 2020 Rinku Kothiya - 6.0-37 +- fixes bugs bz#1840794 + +* Wed May 27 2020 Rinku Kothiya - 6.0-36 +- fixes bugs bz#1812789 bz#1823423 + +* Fri May 22 2020 Rinku Kothiya - 6.0-35 +- fixes bugs bz#1810516 bz#1830713 bz#1836233 + +* Sun May 17 2020 Rinku Kothiya - 6.0-34 +- fixes bugs bz#1802013 bz#1823706 bz#1825177 bz#1830713 bz#1831403 bz#1833017 + +* Wed Apr 29 2020 Rinku Kothiya - 6.0-33 +- fixes bugs bz#1812789 bz#1813917 bz#1823703 bz#1823706 bz#1825195 + +* Sat Apr 04 2020 Rinku Kothiya - 6.0-32 +- fixes bugs bz#1781543 bz#1812789 bz#1812824 bz#1817369 bz#1819059 + +* Tue Mar 17 2020 Rinku Kothiya - 6.0-31 +- fixes bugs bz#1802727 + +* Thu Feb 20 2020 Rinku Kothiya - 6.0-30.1 +- fixes bugs bz#1800703 + +* Sat Feb 01 2020 Rinku Kothiya - 6.0-30 +- fixes bugs bz#1775564 bz#1794153 + +* Thu Jan 23 2020 Rinku Kothiya - 6.0-29 +- fixes bugs bz#1793035 + +* Tue Jan 14 2020 Rinku Kothiya - 6.0-28 +- fixes bugs bz#1789447 + +* Mon Jan 13 2020 Rinku Kothiya - 6.0-27 +- fixes bugs bz#1789447 + +* Fri Jan 10 2020 Rinku Kothiya - 6.0-26 +- fixes bugs bz#1763208 bz#1788656 + +* Mon Dec 23 2019 Rinku Kothiya - 6.0-25 +- fixes bugs bz#1686800 bz#1763208 bz#1779696 bz#1781444 bz#1782162 + +* Thu Nov 28 2019 Rinku Kothiya - 6.0-24 +- fixes bugs bz#1768786 + +* Thu Nov 21 2019 Rinku Kothiya - 6.0-23 +- fixes bugs bz#1344758 bz#1599802 bz#1685406 bz#1686800 bz#1724021 + bz#1726058 bz#1727755 bz#1731513 bz#1741193 bz#1758923 bz#1761326 bz#1761486 + bz#1762180 bz#1764095 bz#1766640 + +* Thu Nov 14 2019 Rinku Kothiya - 6.0-22 +- fixes bugs bz#1771524 bz#1771614 + +* Fri Oct 25 2019 Rinku Kothiya - 6.0-21 +- fixes bugs bz#1765555 * Wed Oct 23 2019 Rinku Kothiya - 6.0-20 - fixes bugs bz#1719171 bz#1763412 bz#1764202