From 887953c93b83a76ffe2de04342f2e29477023f17 Mon Sep 17 00:00:00 2001 From: CentOS Sources Date: Aug 06 2019 11:12:02 +0000 Subject: import glusterfs-3.12.2-47.2.el7 --- diff --git a/README.debrand b/README.debrand deleted file mode 100644 index 01c46d2..0000000 --- a/README.debrand +++ /dev/null @@ -1,2 +0,0 @@ -Warning: This package was configured for automatic debranding, but the changes -failed to apply. diff --git a/SOURCES/0363-Update-rfc.sh-to-rhgs-3.4.1.patch b/SOURCES/0363-Update-rfc.sh-to-rhgs-3.4.1.patch new file mode 100644 index 0000000..f2be808 --- /dev/null +++ b/SOURCES/0363-Update-rfc.sh-to-rhgs-3.4.1.patch @@ -0,0 +1,27 @@ +From 84e7997bdf977f7d2dbce2f1f7c57c4ccb1190ba Mon Sep 17 00:00:00 2001 +From: Milind Changire +Date: Wed, 5 Sep 2018 10:38:20 +0530 +Subject: [PATCH 363/385] Update rfc.sh to rhgs-3.4.1 + +Change-Id: I16da34310701c5db74664cdd2b2fa67534b662ab +Signed-off-by: Milind Changire +--- + rfc.sh | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/rfc.sh b/rfc.sh +index 356242e..8c4b5ac 100755 +--- a/rfc.sh ++++ b/rfc.sh +@@ -17,7 +17,7 @@ done + shift $((OPTIND-1)) + + +-branch="rhgs-3.4.0"; ++branch="rhgs-3.4.1"; + + set_hooks_commit_msg() + { +-- +1.8.3.1 + diff --git a/SOURCES/0364-fips-Replace-md5sum-usage-to-enable-fips-support.patch b/SOURCES/0364-fips-Replace-md5sum-usage-to-enable-fips-support.patch new file mode 100644 index 0000000..968e932 --- /dev/null +++ b/SOURCES/0364-fips-Replace-md5sum-usage-to-enable-fips-support.patch @@ -0,0 +1,126 @@ +From eba2217ac06dab658526991e93e018b91c92d7b5 Mon Sep 17 00:00:00 2001 +From: Kotresh HR +Date: Tue, 19 Dec 2017 00:05:05 -0500 +Subject: [PATCH 364/385] fips: Replace md5sum usage to enable fips support + +md5sum is not fips compliant. Using xxhash64 instead of +md5sum for socket file generation in glusterd and +changelog to enable fips support. + +NOTE: md5sum is 128 bit hash. xxhash used is 64 bit. + +Backport of: + > Patch: https://review.gluster.org/19048 + > Updates: #230 + > Change-Id: I1bf2ea05905b9151cd29fa951f903685ab0dc84c + > Signed-off-by: Kotresh HR + +BUG: 1459709 +Change-Id: I1bf2ea05905b9151cd29fa951f903685ab0dc84c +Signed-off-by: Kotresh HR +Reviewed-on: https://code.engineering.redhat.com/gerrit/149770 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/common-utils.c | 11 ----------- + libglusterfs/src/common-utils.h | 1 - + xlators/features/changelog/src/changelog-misc.h | 20 ++++++++++---------- + xlators/mgmt/glusterd/src/glusterd-utils.c | 8 +++++--- + 4 files changed, 15 insertions(+), 25 deletions(-) + +diff --git a/libglusterfs/src/common-utils.c b/libglusterfs/src/common-utils.c +index fd2f004..f632e78 100644 +--- a/libglusterfs/src/common-utils.c ++++ b/libglusterfs/src/common-utils.c +@@ -75,17 +75,6 @@ typedef int32_t (*rw_op_t)(int32_t fd, char *buf, int32_t size); + typedef int32_t (*rwv_op_t)(int32_t fd, const struct iovec *buf, int32_t size); + + void +-md5_wrapper(const unsigned char *data, size_t len, char *md5) +-{ +- unsigned short i = 0; +- unsigned short lim = MD5_DIGEST_LENGTH*2+1; +- unsigned char scratch[MD5_DIGEST_LENGTH] = {0,}; +- MD5(data, len, scratch); +- for (; i < MD5_DIGEST_LENGTH; i++) +- snprintf(md5 + i * 2, lim-i*2, "%02x", scratch[i]); +-} +- +-void + gf_xxh64_wrapper(const unsigned char *data, size_t len, unsigned long long seed, + char *xxh64) + { +diff --git a/libglusterfs/src/common-utils.h b/libglusterfs/src/common-utils.h +index 0131070..da943f4 100644 +--- a/libglusterfs/src/common-utils.h ++++ b/libglusterfs/src/common-utils.h +@@ -835,7 +835,6 @@ gf_ports_reserved (char *blocked_port, unsigned char *ports, uint32_t ceiling); + int gf_get_hostname_from_ip (char *client_ip, char **hostname); + gf_boolean_t gf_is_local_addr (char *hostname); + gf_boolean_t gf_is_same_address (char *host1, char *host2); +-void md5_wrapper(const unsigned char *data, size_t len, char *md5); + void gf_xxh64_wrapper(const unsigned char *data, size_t len, + unsigned long long seed, char *xxh64); + int gf_set_timestamp (const char *src, const char* dest); +diff --git a/xlators/features/changelog/src/changelog-misc.h b/xlators/features/changelog/src/changelog-misc.h +index 94d6c50..93af201 100644 +--- a/xlators/features/changelog/src/changelog-misc.h ++++ b/xlators/features/changelog/src/changelog-misc.h +@@ -36,24 +36,24 @@ + "GlusterFS Changelog | version: v%d.%d | encoding : %d\n" + + #define CHANGELOG_MAKE_SOCKET_PATH(brick_path, sockpath, len) do { \ +- char md5_sum[MD5_DIGEST_LENGTH*2+1] = {0,}; \ +- md5_wrapper((unsigned char *) brick_path, \ +- strlen(brick_path), \ +- md5_sum); \ ++ char xxh64[GF_XXH64_DIGEST_LENGTH*2+1] = {0,}; \ ++ gf_xxh64_wrapper ((unsigned char *)brick_path, \ ++ strlen(brick_path), \ ++ GF_XXHSUM64_DEFAULT_SEED, xxh64); \ + (void) snprintf (sockpath, len, \ +- CHANGELOG_UNIX_SOCK, md5_sum); \ ++ CHANGELOG_UNIX_SOCK, xxh64); \ + } while (0) + + #define CHANGELOG_MAKE_TMP_SOCKET_PATH(brick_path, sockpath, len) do { \ + unsigned long pid = 0; \ +- char md5_sum[MD5_DIGEST_LENGTH*2+1] = {0,}; \ ++ char xxh64[GF_XXH64_DIGEST_LENGTH*2+1] = {0,}; \ + pid = (unsigned long) getpid (); \ +- md5_wrapper((unsigned char *) brick_path, \ +- strlen(brick_path), \ +- md5_sum); \ ++ gf_xxh64_wrapper ((unsigned char *)brick_path, \ ++ strlen(brick_path), \ ++ GF_XXHSUM64_DEFAULT_SEED, xxh64); \ + (void) snprintf (sockpath, \ + len, CHANGELOG_TMP_UNIX_SOCK, \ +- md5_sum, pid); \ ++ xxh64, pid); \ + } while (0) + + +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index 01345cd..4fd8575 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -1852,10 +1852,12 @@ out: + void + glusterd_set_socket_filepath (char *sock_filepath, char *sockpath, size_t len) + { +- char md5_sum[MD5_DIGEST_LENGTH*2+1] = {0,}; ++ char xxh64[GF_XXH64_DIGEST_LENGTH*2+1] = {0,}; + +- md5_wrapper ((unsigned char *) sock_filepath, strlen(sock_filepath), md5_sum); +- snprintf (sockpath, len, "%s/%s.socket", GLUSTERD_SOCK_DIR, md5_sum); ++ gf_xxh64_wrapper ((unsigned char *)sock_filepath, ++ strlen(sock_filepath), ++ GF_XXHSUM64_DEFAULT_SEED, xxh64); ++ snprintf (sockpath, len, "%s/%s.socket", GLUSTERD_SOCK_DIR, xxh64); + } + + void +-- +1.8.3.1 + diff --git a/SOURCES/0365-glusterd-ignore-importing-volume-which-is-undergoing.patch b/SOURCES/0365-glusterd-ignore-importing-volume-which-is-undergoing.patch new file mode 100644 index 0000000..af5342f --- /dev/null +++ b/SOURCES/0365-glusterd-ignore-importing-volume-which-is-undergoing.patch @@ -0,0 +1,208 @@ +From d8e094d1bdd2dff5e8f81c0786ca62f6d6dc45ee Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee +Date: Tue, 31 Jul 2018 12:33:49 +0530 +Subject: [PATCH 365/385] glusterd: ignore importing volume which is undergoing + a delete operation + +Problem explanation: + +Assuming in a 3 nodes cluster, if N1 originates a delete operation and +while N1's commit phase completes, either glusterd service of N2 or N3 +gets disconnected from N1 (before completing the commit phase), N1 will +attempt to end up importing the volume which is in-flight for a delete +in other nodes as a fresh resulting into an incorrect configuration +state. + +Fix: + +Mark a volume as stage deleted once a volume delete operation passes +it's staging phase and reset this flag during unlock phase. Now during +this intermediate phase if the same volume gets imported to other peers, +it shouldn't considered to be recreated. + +An automated .t is quite tough to implement with the current infra. + +Test Case: + +1. Keep creating and deleting volumes in a loop on a 3 node cluster +2. Simulate n/w failure between the peers (ifdown followed by ifup) +3. Check if output of 'gluster v list | wc -l' is same across all 3 +nodes during 1 & 2. + +>upstream patch : https://review.gluster.org/#/c/glusterfs/+/20592 +>Change-Id: Ifdd5dc39699120258d7fdd42fe2deb9de25c6246 +>Fixes: bz#1605077 +>Signed-off-by: Atin Mukherjee + +Change-Id: Ifdd5dc39699120258d7fdd42fe2deb9de25c6246 +BUG: 1618221 +Signed-off-by: Atin Mukherjee +Reviewed-on: https://code.engineering.redhat.com/gerrit/149872 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mgmt/glusterd/src/glusterd-locks.c | 13 +++++++++-- + xlators/mgmt/glusterd/src/glusterd-utils.c | 30 ++++++++++++++++++++++--- + xlators/mgmt/glusterd/src/glusterd-utils.h | 2 +- + xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 2 +- + xlators/mgmt/glusterd/src/glusterd.h | 3 +++ + 5 files changed, 43 insertions(+), 7 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-locks.c b/xlators/mgmt/glusterd/src/glusterd-locks.c +index 831be20..f4e0225 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-locks.c ++++ b/xlators/mgmt/glusterd/src/glusterd-locks.c +@@ -790,6 +790,7 @@ glusterd_mgmt_v3_unlock (const char *name, uuid_t uuid, char *type) + int32_t ret = -1; + gf_boolean_t is_valid = _gf_true; + glusterd_conf_t *priv = NULL; ++ glusterd_volinfo_t *volinfo = NULL; + glusterd_mgmt_v3_lock_timer *mgmt_lock_timer = NULL; + uuid_t owner = {0}; + xlator_t *this = NULL; +@@ -888,8 +889,7 @@ glusterd_mgmt_v3_unlock (const char *name, uuid_t uuid, char *type) + "Lock for %s %s successfully released", + type, name); + +- ret = 0; +- /* Release owner refernce which was held during lock */ ++ /* Release owner reference which was held during lock */ + if (mgmt_lock_timer->timer) { + ret = -1; + mgmt_lock_timer_xl = mgmt_lock_timer->xl; +@@ -906,6 +906,15 @@ glusterd_mgmt_v3_unlock (const char *name, uuid_t uuid, char *type) + dict_del (priv->mgmt_v3_lock_timer, key_dup); + mgmt_lock_timer->timer = NULL; + } ++ ret = glusterd_volinfo_find (name, &volinfo); ++ if (volinfo && volinfo->stage_deleted) { ++ /* this indicates a volume still exists and the volume delete ++ * operation has failed in some of the phases, need to ensure ++ * stage_deleted flag is set back to false ++ */ ++ volinfo->stage_deleted = _gf_false; ++ } ++ ret = 0; + out: + + gf_msg_trace (this->name, 0, "Returning %d", ret); +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index 4fd8575..7f52602 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -1699,7 +1699,7 @@ glusterd_volinfo_find_by_volume_id (uuid_t volume_id, glusterd_volinfo_t **volin + } + + int32_t +-glusterd_volinfo_find (char *volname, glusterd_volinfo_t **volinfo) ++glusterd_volinfo_find (const char *volname, glusterd_volinfo_t **volinfo) + { + glusterd_volinfo_t *tmp_volinfo = NULL; + int32_t ret = -1; +@@ -2952,6 +2952,11 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, + if (ret) + goto out; + ++ snprintf (key, sizeof (key), "%s%d.stage_deleted", prefix, count); ++ ret = dict_set_uint32 (dict, key, (uint32_t)volinfo->stage_deleted); ++ if (ret) ++ goto out; ++ + /* tiering related variables */ + + memset (key, 0, sizeof (key)); +@@ -3355,6 +3360,7 @@ glusterd_compare_friend_volume (dict_t *peer_data, int32_t count, + uint32_t cksum = 0; + uint32_t quota_cksum = 0; + uint32_t quota_version = 0; ++ uint32_t stage_deleted = 0; + int32_t version = 0; + xlator_t *this = NULL; + +@@ -3370,9 +3376,15 @@ glusterd_compare_friend_volume (dict_t *peer_data, int32_t count, + goto out; + + ret = glusterd_volinfo_find (volname, &volinfo); +- + if (ret) { +- *status = GLUSTERD_VOL_COMP_UPDATE_REQ; ++ snprintf (key, sizeof (key), "volume%d.stage_deleted", count); ++ ret = dict_get_uint32 (peer_data, key, &stage_deleted); ++ /* stage_deleted = 1 means the volume is still in the process of ++ * deleting a volume, so we shouldn't be trying to create a ++ * fresh volume here which would lead to a stale entry ++ */ ++ if (stage_deleted == 0) ++ *status = GLUSTERD_VOL_COMP_UPDATE_REQ; + ret = 0; + goto out; + } +@@ -3929,6 +3941,7 @@ glusterd_import_volinfo (dict_t *peer_data, int count, + char *rebalance_id_str = NULL; + int op_version = 0; + int client_op_version = 0; ++ uint32_t stage_deleted = 0; + + GF_ASSERT (peer_data); + GF_ASSERT (volinfo); +@@ -3941,6 +3954,17 @@ glusterd_import_volinfo (dict_t *peer_data, int count, + goto out; + } + ++ snprintf (key, sizeof (key), "%s%d.stage_deleted", prefix, count); ++ ret = dict_get_uint32 (peer_data, key, &stage_deleted); ++ /* stage_deleted = 1 means the volume is still in the process of ++ * deleting a volume, so we shouldn't be trying to create a ++ * fresh volume here which would lead to a stale entry ++ */ ++ if (stage_deleted) { ++ ret = 0; ++ goto out; ++ } ++ + ret = glusterd_volinfo_new (&new_volinfo); + if (ret) + goto out; +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h +index 4835728..ffcc636 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.h ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.h +@@ -159,7 +159,7 @@ glusterd_brickinfo_new_from_brick (char *brick, + char **op_errstr); + + int32_t +-glusterd_volinfo_find (char *volname, glusterd_volinfo_t **volinfo); ++glusterd_volinfo_find (const char *volname, glusterd_volinfo_t **volinfo); + + int + glusterd_volinfo_find_by_volume_id (uuid_t volume_id, glusterd_volinfo_t **volinfo); +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +index 8bb0b6d..94e07cb 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +@@ -1828,7 +1828,7 @@ glusterd_op_stage_delete_volume (dict_t *dict, char **op_errstr) + snprintf (msg, sizeof(msg), "Some of the peers are down"); + goto out; + } +- ++ volinfo->stage_deleted = _gf_true; + ret = 0; + + out: +diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h +index 4ec609f..d4f4f7e 100644 +--- a/xlators/mgmt/glusterd/src/glusterd.h ++++ b/xlators/mgmt/glusterd/src/glusterd.h +@@ -475,6 +475,9 @@ struct glusterd_volinfo_ { + glusterd_snapdsvc_t snapd; + glusterd_tierdsvc_t tierd; + int32_t quota_xattr_version; ++ gf_boolean_t stage_deleted; /* volume has passed staging ++ * for delete operation ++ */ + }; + + typedef enum gd_snap_status_ { +-- +1.8.3.1 + diff --git a/SOURCES/0366-glusterd-fail-volume-stop-operation-if-brick-detach-.patch b/SOURCES/0366-glusterd-fail-volume-stop-operation-if-brick-detach-.patch new file mode 100644 index 0000000..c141305 --- /dev/null +++ b/SOURCES/0366-glusterd-fail-volume-stop-operation-if-brick-detach-.patch @@ -0,0 +1,79 @@ +From d67fddc4e6439f6aadd76da1a2058ffb7a4940d4 Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee +Date: Fri, 31 Aug 2018 20:42:21 +0530 +Subject: [PATCH 366/385] glusterd: fail volume stop operation if brick detach + fails + +While sending a detach request for a brick in brick multiplexing mode, +in any situation if the brick isn't connected, glusterd will fail to +detach the brick but due to the missing error code handling, glusterd +will mark the volume as stopped. + +Fix is to handle the return code of send_attach_req in +glusterd_volume_stop_glusterfs () + +>upstream patch : https://review.gluster.org/#/c/glusterfs/+/21055/ + +>Change-Id: I886202969c96eec3620f74cd7027652d6287f4be +>Fixes: bz#1624440 +>Signed-off-by: Atin Mukherjee + +Change-Id: I886202969c96eec3620f74cd7027652d6287f4be +BUG: 1624444 +Signed-off-by: Atin Mukherjee +Reviewed-on: https://code.engineering.redhat.com/gerrit/149873 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mgmt/glusterd/src/glusterd-utils.c | 31 +++++++++++++++++++++--------- + 1 file changed, 22 insertions(+), 9 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index 7f52602..3db3a15 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -2496,19 +2496,32 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo, + * an actual signal instead. + */ + if (is_brick_mx_enabled ()) { +- gf_msg_debug (this->name, 0, "About to send detach " +- "request for brick %s:%s", +- brickinfo->hostname, brickinfo->path); +- +- (void) send_attach_req (this, brickinfo->rpc, +- brickinfo->path, NULL, NULL, +- GLUSTERD_BRICK_TERMINATE); ++ ret = send_attach_req (this, brickinfo->rpc, ++ brickinfo->path, NULL, NULL, ++ GLUSTERD_BRICK_TERMINATE); ++ if (ret && brickinfo->status == GF_BRICK_STARTED) { ++ gf_msg (this->name, GF_LOG_ERROR, 0, ++ GD_MSG_BRICK_STOP_FAIL, "Failed to send" ++ " detach request for brick %s", ++ brickinfo->path); ++ goto out; ++ } ++ gf_log (this->name, GF_LOG_INFO, "Detach request for " ++ "brick %s:%s is sent successfully", ++ brickinfo->hostname, brickinfo->path); + } else { + gf_msg_debug (this->name, 0, "About to stop glusterfsd" + " for brick %s:%s", brickinfo->hostname, + brickinfo->path); +- (void) glusterd_brick_terminate (volinfo, brickinfo, +- NULL, 0, &op_errstr); ++ ret = glusterd_brick_terminate (volinfo, brickinfo, ++ NULL, 0, &op_errstr); ++ if (ret && brickinfo->status == GF_BRICK_STARTED) { ++ gf_msg (this->name, GF_LOG_ERROR, 0, ++ GD_MSG_BRICK_STOP_FAIL, "Failed to kill" ++ " the brick %s", brickinfo->path); ++ goto out; ++ } ++ + if (op_errstr) { + GF_FREE (op_errstr); + } +-- +1.8.3.1 + diff --git a/SOURCES/0367-cluster-ec-Improve-logging-for-some-critical-error-m.patch b/SOURCES/0367-cluster-ec-Improve-logging-for-some-critical-error-m.patch new file mode 100644 index 0000000..1077292 --- /dev/null +++ b/SOURCES/0367-cluster-ec-Improve-logging-for-some-critical-error-m.patch @@ -0,0 +1,202 @@ +From b2a0656b409cf867073c961fa4103bc59966a059 Mon Sep 17 00:00:00 2001 +From: Ashish Pandey +Date: Mon, 3 Sep 2018 14:01:23 +0530 +Subject: [PATCH 367/385] cluster/ec: Improve logging for some critical error + messages + +>Change-Id: I037e52a3467467b81a1ba5416317870864060d4d +>updates: bz#1615703 +>Signed-off-by: Ashish Pandey + +upstream patch: https://review.gluster.org/#/c/glusterfs/+/21061/ + +BUG: 1625622 +Change-Id: I037e52a3467467b81a1ba5416317870864060d4d +Signed-off-by: Ashish Pandey +Reviewed-on: https://code.engineering.redhat.com/gerrit/149671 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/cluster/ec/src/ec-common.c | 66 ++++++++++++++++++++++++++++++-------- + xlators/cluster/ec/src/ec-data.c | 1 + + xlators/cluster/ec/src/ec-types.h | 2 ++ + 3 files changed, 55 insertions(+), 14 deletions(-) + +diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c +index b74bce0..6d0eb62 100644 +--- a/xlators/cluster/ec/src/ec-common.c ++++ b/xlators/cluster/ec/src/ec-common.c +@@ -587,6 +587,42 @@ ec_internal_op (ec_fop_data_t *fop) + return _gf_false; + } + ++char * ++ec_msg_str (ec_fop_data_t *fop) ++{ ++ loc_t *loc1 = NULL; ++ loc_t *loc2 = NULL; ++ char gfid1[64] = {0}; ++ char gfid2[64] = {0}; ++ ++ if (fop->errstr) ++ return fop->errstr; ++ ++ if (!fop->use_fd) { ++ loc1 = &fop->loc[0]; ++ loc2 = &fop->loc[1]; ++ ++ if (fop->id == GF_FOP_RENAME) { ++ gf_asprintf(&fop->errstr, ++ "FOP : '%s' failed on '%s' and '%s' with gfids " ++ "%s and %s respectively", ec_fop_name (fop->id), ++ loc1->path, loc2->path, ++ uuid_utoa_r (loc1->gfid, gfid1), ++ uuid_utoa_r (loc2->gfid, gfid2)); ++ } else { ++ gf_asprintf(&fop->errstr, ++ "FOP : '%s' failed on '%s' with gfid %s", ++ ec_fop_name (fop->id), ++ loc1->path, uuid_utoa_r (loc1->gfid, gfid1)); ++ } ++ } else { ++ gf_asprintf(&fop->errstr, "FOP : '%s' failed on gfid %s", ++ ec_fop_name (fop->id), ++ uuid_utoa_r (fop->fd->inode->gfid, gfid1)); ++ } ++ return fop->errstr; ++} ++ + int32_t ec_child_select(ec_fop_data_t * fop) + { + ec_t * ec = fop->xl->private; +@@ -607,9 +643,8 @@ int32_t ec_child_select(ec_fop_data_t * fop) + gf_msg (fop->xl->name, GF_LOG_WARNING, 0, + EC_MSG_OP_EXEC_UNAVAIL, + "Executing operation with " +- "some subvolumes unavailable " +- "(%lX)", fop->mask & ~ec->xl_up); +- ++ "some subvolumes unavailable. (%lX). %s ", ++ fop->mask & ~ec->xl_up, ec_msg_str(fop)); + fop->mask &= ec->xl_up; + } + +@@ -650,8 +685,8 @@ int32_t ec_child_select(ec_fop_data_t * fop) + EC_MSG_CHILDS_INSUFFICIENT, + "Insufficient available children " + "for this request (have %d, need " +- "%d)", num, fop->minimum); +- ++ "%d). %s", ++ num, fop->minimum, ec_msg_str(fop)); + return 0; + } + +@@ -1122,7 +1157,6 @@ ec_prepare_update_cbk (call_frame_t *frame, void *cookie, + gf_boolean_t release = _gf_false; + uint64_t provided_flags = 0; + uint64_t dirty[EC_VERSION_SIZE] = {0, 0}; +- + lock = parent_link->lock; + parent = parent_link->fop; + ctx = lock->ctx; +@@ -1139,11 +1173,11 @@ ec_prepare_update_cbk (call_frame_t *frame, void *cookie, + list_add_tail(&link->fop->cbk_list, &list); + } + } +- + if (op_ret < 0) { + gf_msg (this->name, GF_LOG_WARNING, op_errno, + EC_MSG_SIZE_VERS_GET_FAIL, +- "Failed to get size and version"); ++ "Failed to get size and version : %s", ++ ec_msg_str(fop)); + + goto unlock; + } +@@ -1155,7 +1189,8 @@ ec_prepare_update_cbk (call_frame_t *frame, void *cookie, + if (op_errno != 0) { + gf_msg (this->name, GF_LOG_ERROR, op_errno, + EC_MSG_VER_XATTR_GET_FAIL, +- "Unable to get version xattr"); ++ "Unable to get version xattr. %s", ++ ec_msg_str(fop)); + goto unlock; + } + ctx->post_version[0] += ctx->pre_version[0]; +@@ -1171,7 +1206,8 @@ ec_prepare_update_cbk (call_frame_t *frame, void *cookie, + if (lock->loc.inode->ia_type == IA_IFREG) { + gf_msg (this->name, GF_LOG_ERROR, op_errno, + EC_MSG_SIZE_XATTR_GET_FAIL, +- "Unable to get size xattr"); ++ "Unable to get size xattr. %s", ++ ec_msg_str(fop)); + goto unlock; + } + } else { +@@ -1187,7 +1223,8 @@ ec_prepare_update_cbk (call_frame_t *frame, void *cookie, + (op_errno != ENODATA)) { + gf_msg (this->name, GF_LOG_ERROR, op_errno, + EC_MSG_CONFIG_XATTR_GET_FAIL, +- "Unable to get config xattr"); ++ "Unable to get config xattr. %s", ++ ec_msg_str(fop)); + + goto unlock; + } +@@ -2168,7 +2205,8 @@ int32_t ec_update_size_version_done(call_frame_t * frame, void * cookie, + if (op_ret < 0) { + gf_msg(fop->xl->name, fop_log_level (fop->id, op_errno), op_errno, + EC_MSG_SIZE_VERS_UPDATE_FAIL, +- "Failed to update version and size"); ++ "Failed to update version and size. %s", ++ ec_msg_str(fop)); + } else { + fop->parent->good &= fop->good; + +@@ -2213,7 +2251,6 @@ ec_update_size_version(ec_lock_link_t *link, uint64_t *version, + ec_inode_t *ctx; + dict_t *dict = NULL; + uintptr_t update_on = 0; +- + int32_t err = -ENOMEM; + + fop = link->fop; +@@ -2294,7 +2331,8 @@ out: + ec_fop_set_error(fop, -err); + + gf_msg (fop->xl->name, GF_LOG_ERROR, -err, EC_MSG_SIZE_VERS_UPDATE_FAIL, +- "Unable to update version and size"); ++ "Unable to update version and size. %s", ++ ec_msg_str(fop)); + + if (lock->unlock_now) { + ec_unlock_lock(fop->data); +diff --git a/xlators/cluster/ec/src/ec-data.c b/xlators/cluster/ec/src/ec-data.c +index 54c708a..b3b72d5 100644 +--- a/xlators/cluster/ec/src/ec-data.c ++++ b/xlators/cluster/ec/src/ec-data.c +@@ -286,6 +286,7 @@ void ec_fop_data_release(ec_fop_data_t * fop) + GF_FREE(fop->str[1]); + loc_wipe(&fop->loc[0]); + loc_wipe(&fop->loc[1]); ++ GF_FREE(fop->errstr); + + ec_resume_parent(fop, fop->error); + +diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h +index f6e2cd9..9176dde 100644 +--- a/xlators/cluster/ec/src/ec-types.h ++++ b/xlators/cluster/ec/src/ec-types.h +@@ -347,6 +347,8 @@ struct _ec_fop_data { + struct iovec *vector; + struct iobref *buffers; + gf_seek_what_t seek; ++ char *errstr; /*String of fop name, path and gfid ++ to be used in gf_msg. */ + }; + + struct _ec_cbk_data { +-- +1.8.3.1 + diff --git a/SOURCES/0368-mount-fuse-convert-ENOENT-to-ESTALE-in-open-dir-_res.patch b/SOURCES/0368-mount-fuse-convert-ENOENT-to-ESTALE-in-open-dir-_res.patch new file mode 100644 index 0000000..106028a --- /dev/null +++ b/SOURCES/0368-mount-fuse-convert-ENOENT-to-ESTALE-in-open-dir-_res.patch @@ -0,0 +1,67 @@ +From 3a392704f61915217f4f8210e1dd94901d6938bb Mon Sep 17 00:00:00 2001 +From: Raghavendra G +Date: Tue, 11 Sep 2018 10:31:27 +0530 +Subject: [PATCH 368/385] mount/fuse: convert ENOENT to ESTALE in + open(dir)_resume + +This patch is continuation of commit +fb4b914ce84bc83a5f418719c5ba7c25689a9251. + + +mount/fuse: never fail open(dir) with ENOENT + + open(dir) being an operation on inode should never fail with + ENOENT. If gfid is not present, the appropriate error is + ESTALE. This will enable kernel to retry open after a revalidate + lookup. + + +Earlier commit failed to fix codepath where error response is sent +back on gfid resolution failures in fuse_open(dir)_resume. Current +patch completes that work + +>Change-Id: Ia07e3cece404811703c8cfbac9b402ca5fe98c1e +>Signed-off-by: Raghavendra G +>updates: bz#1627620 + +Change-Id: Ia07e3cece404811703c8cfbac9b402ca5fe98c1e +Signed-off-by: Raghavendra G +BUG: 1627617 +upstream patch: https://review.gluster.org/#/c/glusterfs/+/21146/ +Reviewed-on: https://code.engineering.redhat.com/gerrit/150109 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mount/fuse/src/fuse-bridge.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c +index b767ea4..85cee73 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.c ++++ b/xlators/mount/fuse/src/fuse-bridge.c +@@ -2235,6 +2235,10 @@ fuse_open_resume (fuse_state_t *state) + "%"PRIu64": OPEN %s resolution failed", + state->finh->unique, uuid_utoa (state->resolve.gfid)); + ++ /* facilitate retry from VFS */ ++ if (state->resolve.op_errno == ENOENT) ++ state->resolve.op_errno = ESTALE; ++ + send_fuse_err (state->this, state->finh, + state->resolve.op_errno); + free_fuse_state (state); +@@ -2687,6 +2691,11 @@ fuse_opendir_resume (fuse_state_t *state) + gf_log ("glusterfs-fuse", GF_LOG_WARNING, + "%"PRIu64": OPENDIR (%s) resolution failed", + state->finh->unique, uuid_utoa (state->resolve.gfid)); ++ ++ /* facilitate retry from VFS */ ++ if (state->resolve.op_errno == ENOENT) ++ state->resolve.op_errno = ESTALE; ++ + send_fuse_err (state->this, state->finh, + state->resolve.op_errno); + free_fuse_state (state); +-- +1.8.3.1 + diff --git a/SOURCES/0369-geo-rep-Fix-deadlock-during-worker-start.patch b/SOURCES/0369-geo-rep-Fix-deadlock-during-worker-start.patch new file mode 100644 index 0000000..b33b0ea --- /dev/null +++ b/SOURCES/0369-geo-rep-Fix-deadlock-during-worker-start.patch @@ -0,0 +1,100 @@ +From 62fe36178cab588b658b44808cc954a57a1fc452 Mon Sep 17 00:00:00 2001 +From: Kotresh HR +Date: Fri, 10 Aug 2018 08:14:14 -0400 +Subject: [PATCH 369/385] geo-rep: Fix deadlock during worker start + +Analysis: +Monitor process spawns monitor threads (one per brick). +Each monitor thread, forks worker and agent processes. +Each monitor thread, while intializing, updates the +monitor status file. It is synchronized using flock. +The race is that, some thread can fork worker while +other thread opened the status file resulting in +holding the reference of fd in worker process. + +Cause: +flock gets unlocked either by specifically unlocking it +or by closing all duplicate fds referring to the file. +The code was relying on fd close, hence a reference +in worker/agent process by fork could cause the deadlock. + +Fix: +1. flock is unlocked specifically. +2. Also made sure to update status file in approriate places so that +the reference is not leaked to worker/agent process. + +With this fix, both the deadlock and possible fd +leaks is solved. + +Upstream Patch : https://review.gluster.org/#/c/glusterfs/+/20704/ +>fixes: bz#1614799 +>Signed-off-by: Kotresh HR + +Change-Id: I0d1ce93072dab07d0dbcc7e779287368cd9f093d +BUG: 1623749 +Signed-off-by: Sunny Kumar +Reviewed-on: https://code.engineering.redhat.com/gerrit/149760 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + geo-replication/syncdaemon/gsyncdstatus.py | 1 + + geo-replication/syncdaemon/monitor.py | 17 ++++++++++++++--- + 2 files changed, 15 insertions(+), 3 deletions(-) + +diff --git a/geo-replication/syncdaemon/gsyncdstatus.py b/geo-replication/syncdaemon/gsyncdstatus.py +index 909c669..67493ca 100644 +--- a/geo-replication/syncdaemon/gsyncdstatus.py ++++ b/geo-replication/syncdaemon/gsyncdstatus.py +@@ -99,6 +99,7 @@ class LockedOpen(object): + return f + + def __exit__(self, _exc_type, _exc_value, _traceback): ++ fcntl.flock(self.fileobj, fcntl.LOCK_UN) + self.fileobj.close() + + +diff --git a/geo-replication/syncdaemon/monitor.py b/geo-replication/syncdaemon/monitor.py +index 9245572..3451fe4 100644 +--- a/geo-replication/syncdaemon/monitor.py ++++ b/geo-replication/syncdaemon/monitor.py +@@ -144,9 +144,6 @@ class Monitor(object): + "%s::%s" % (slave_host, + slave_vol)) + +- set_monitor_status(gconf.state_file, self.ST_STARTED) +- self.status[w[0]['dir']].set_worker_status(self.ST_INIT) +- + ret = 0 + + def nwait(p, o=0): +@@ -196,6 +193,7 @@ class Monitor(object): + # Spawn the worker and agent in lock to avoid fd leak + self.lock.acquire() + ++ self.status[w[0]['dir']].set_worker_status(self.ST_INIT) + logging.info(lf('starting gsyncd worker', + brick=w[0]['dir'], + slave_node=remote_host)) +@@ -375,6 +373,19 @@ class Monitor(object): + t = Thread(target=wmon, args=[wx]) + t.start() + ta.append(t) ++ ++ # monitor status was being updated in each monitor thread. It ++ # should not be done as it can cause deadlock for a worker start. ++ # set_monitor_status uses flock to synchronize multple instances ++ # updating the file. Since each monitor thread forks worker and ++ # agent, these processes can hold the reference to fd of status ++ # file causing deadlock to workers which starts later as flock ++ # will not be release until all references to same fd is closed. ++ # It will also cause fd leaks. ++ ++ self.lock.acquire() ++ set_monitor_status(gconf.get("state-file"), self.ST_STARTED) ++ self.lock.release() + for t in ta: + t.join() + +-- +1.8.3.1 + diff --git a/SOURCES/0370-libgfchangelog-Fix-changelog-history-API.patch b/SOURCES/0370-libgfchangelog-Fix-changelog-history-API.patch new file mode 100644 index 0000000..1ddb2f4 --- /dev/null +++ b/SOURCES/0370-libgfchangelog-Fix-changelog-history-API.patch @@ -0,0 +1,460 @@ +From f005377a54f01edc046aa668c8ab924a3ddf52bb Mon Sep 17 00:00:00 2001 +From: Kotresh HR +Date: Tue, 21 Aug 2018 06:09:44 -0400 +Subject: [PATCH 370/385] libgfchangelog: Fix changelog history API + +Problem: +If requested start time and end time doesn't fall into +first HTIME file, then history API fails even though +continuous changelogs are avaiable for the requested range +in other HTIME files. This is induced by changelog disable +and enable which creates fresh HTIME index file. + +Cause and Analysis: +Each HTIME index file represents the availability of +continuous changelogs. If changelog is disabled and enabled, +a new HTIME index file is created represents non availability +of continuous changelogs. So as long as the requested start +and end falls into single HTIME index file and not across, +history API should succeed. + +But History API checks for the changelogs only in first +HTIME index file and errors out if not available. + +Fix: +Check in all HTIME index files for availability of continuous +changelogs for requested change. + +Upstream Patch : https://review.gluster.org/#/c/glusterfs/+/21016/ + +>fixes: bz#1622549 +>Signed-off-by: Kotresh HR + +Change-Id: I80eeceb5afbd1b89f86a9dc4c320e161907d3559 +BUG: 1627639 +Signed-off-by: Sunny Kumar +Reviewed-on: https://code.engineering.redhat.com/gerrit/149768 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + tests/basic/changelog/changelog-history.t | 86 +++++++++++++++ + tests/utils/changelog/changelog.h | 120 +++++++++++++++++++++ + tests/utils/changelog/get-history.c | 73 +++++++++++++ + .../changelog/lib/src/gf-history-changelog.c | 59 ++++++++-- + 4 files changed, 331 insertions(+), 7 deletions(-) + create mode 100644 tests/basic/changelog/changelog-history.t + create mode 100644 tests/utils/changelog/changelog.h + create mode 100644 tests/utils/changelog/get-history.c + +diff --git a/tests/basic/changelog/changelog-history.t b/tests/basic/changelog/changelog-history.t +new file mode 100644 +index 0000000..3ce4098 +--- /dev/null ++++ b/tests/basic/changelog/changelog-history.t +@@ -0,0 +1,86 @@ ++#!/bin/bash ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../env.rc ++ ++cleanup; ++ ++HISTORY_BIN_PATH=$(dirname $0)/../../utils/changelog ++build_tester $HISTORY_BIN_PATH/get-history.c -lgfchangelog ++ ++time_before_enable1=$(date '+%s') ++CHANGELOG_PATH_0="$B0/${V0}0/.glusterfs/changelogs" ++ROLLOVER_TIME=2 ++ ++TEST glusterd ++TEST pidof glusterd ++ ++sleep 3 ++time_before_enable2=$(date '+%s') ++ ++sleep 3 ++TEST $CLI volume create $V0 $H0:$B0/${V0}0 ++TEST $CLI volume set $V0 changelog.changelog on ++TEST $CLI volume set $V0 changelog.rollover-time $ROLLOVER_TIME ++TEST $CLI volume start $V0 ++ ++sleep 3 ++time_after_enable1=$(date '+%s') ++ ++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; ++touch $M0/file{1..10} ++ ++sleep 3 ++time_after_enable2=$(date '+%s') ++ ++let time_future=time_after_enable2+600 ++ ++#Fails as start falls before changelog enable ++EXPECT "-3" $HISTORY_BIN_PATH/get-history $time_before_enable1 $time_before_enable2 ++ ++#Fails as start falls before changelog enable ++EXPECT "-3" $HISTORY_BIN_PATH/get-history $time_before_enable2 $time_after_enable1 ++ ++#Passes as start and end falls in same htime file ++EXPECT "0" $HISTORY_BIN_PATH/get-history $time_after_enable1 $time_after_enable2 ++ ++#Passes, gives the changelogs till continuous changelogs are available ++# but returns 1 ++EXPECT "1" $HISTORY_BIN_PATH/get-history $time_after_enable2 $time_future ++ ++#Disable and enable changelog ++TEST $CLI volume set $V0 changelog.changelog off ++sleep 6 ++time_between_htime=$(date '+%s') ++sleep 6 ++TEST $CLI volume set $V0 changelog.changelog on ++ ++sleep 6 ++touch $M0/test{1..10} ++time_in_sec_htime1=$(date '+%s') ++ ++sleep 6 ++touch $M0/test1{1..10} ++time_in_sec_htime2=$(date '+%s') ++ ++sleep 3 ++TEST $CLI volume set $V0 changelog.changelog off ++sleep 3 ++time_after_disable=$(date '+%s') ++ ++#Passes, gives the changelogs till continuous changelogs are available ++# but returns 1 ++EXPECT "1" $HISTORY_BIN_PATH/get-history $time_after_enable1 $time_in_sec_htime2 ++ ++#Fails as start falls between htime files ++EXPECT "-3" $HISTORY_BIN_PATH/get-history $time_between_htime $time_in_sec_htime1 ++ ++#Passes as start and end falls in same htime file ++EXPECT "0" $HISTORY_BIN_PATH/get-history $time_in_sec_htime1 $time_in_sec_htime2 ++ ++#Passes, gives the changelogs till continuous changelogs are available ++EXPECT "0" $HISTORY_BIN_PATH/get-history $time_in_sec_htime2 $time_after_disable ++ ++TEST rm $HISTORY_BIN_PATH/get-history ++ ++cleanup; +diff --git a/tests/utils/changelog/changelog.h b/tests/utils/changelog/changelog.h +new file mode 100644 +index 0000000..14094cf +--- /dev/null ++++ b/tests/utils/changelog/changelog.h +@@ -0,0 +1,120 @@ ++/* ++ Copyright (c) 2013 Red Hat, Inc. ++ This file is part of GlusterFS. ++ ++ This file is licensed to you under your choice of the GNU Lesser ++ General Public License, version 3 or any later version (LGPLv3 or ++ later), or the GNU General Public License, version 2 (GPLv2), in all ++ cases as published by the Free Software Foundation. ++*/ ++ ++#ifndef _GF_CHANGELOG_H ++#define _GF_CHANGELOG_H ++ ++struct gf_brick_spec; ++ ++/** ++ * Max bit shiter for event selection ++ */ ++#define CHANGELOG_EV_SELECTION_RANGE 5 ++ ++#define CHANGELOG_OP_TYPE_JOURNAL (1<<0) ++#define CHANGELOG_OP_TYPE_OPEN (1<<1) ++#define CHANGELOG_OP_TYPE_CREATE (1<<2) ++#define CHANGELOG_OP_TYPE_RELEASE (1<<3) ++#define CHANGELOG_OP_TYPE_BR_RELEASE (1<<4) /* logical release (last close()), ++ sent by bitrot stub */ ++#define CHANGELOG_OP_TYPE_MAX (1< ++ This file is part of GlusterFS. ++ ++ This file is licensed to you under your choice of the GNU Lesser ++ General Public License, version 3 or any later version (LGPLv3 or ++ later), or the GNU General Public License, version 2 (GPLv2), in all ++ cases as published by the Free Software Foundation. ++*/ ++ ++/** ++ * get set of new changes every 10 seconds (just print the file names) ++ * ++ * Compile it using: ++ * gcc -o gethistory `pkg-config --cflags libgfchangelog` get-history.c \ ++ * `pkg-config --libs libgfchangelog` ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "changelog.h" ++ ++int ++main (int argc, char **argv) ++{ ++ int ret = 0; ++ unsigned long end_ts = 0; ++ int start = 0; ++ int end = 0; ++ ++ ret = gf_changelog_init (NULL); ++ if (ret) { ++ printf ("-1"); ++ fflush(stdout); ++ return -1; ++ } ++ ++ ret = gf_changelog_register ("/d/backends/patchy0", ++ "/tmp/scratch_v1", ++ "/var/log/glusterfs/changes.log", ++ 9, 5); ++ if (ret) { ++ printf ("-2"); ++ fflush(stdout); ++ return -1; ++ } ++ ++ start = atoi(argv[1]); ++ end = atoi(argv[2]); ++ ++ ret = gf_history_changelog ("/d/backends/patchy0/.glusterfs/changelogs", ++ start, end, 3, &end_ts); ++ if (ret < 0) { ++ printf ("-3"); ++ fflush(stdout); ++ return -1; ++ } else if (ret == 1) { ++ printf ("1"); ++ fflush(stdout); ++ return 0; ++ } ++ ++out: ++ printf ("0"); ++ fflush(stdout); ++ return 0; ++} +diff --git a/xlators/features/changelog/lib/src/gf-history-changelog.c b/xlators/features/changelog/lib/src/gf-history-changelog.c +index 4355396..c1a7070 100644 +--- a/xlators/features/changelog/lib/src/gf-history-changelog.c ++++ b/xlators/features/changelog/lib/src/gf-history-changelog.c +@@ -772,6 +772,15 @@ gf_changelog_extract_min_max (const char *dname, const char *htime_dir, + return ret; + } + ++/* gf_history_changelog returns actual_end and spawns threads to ++ * parse historical changelogs. The return values are as follows. ++ * 0 : On success ++ * 1 : Successful, but partial historical changelogs available, ++ * end time falls into different htime file or future time ++ * -2 : Error, requested historical changelog not available, not ++ * even partial ++ * -1 : On any error ++ */ + int + gf_history_changelog (char* changelog_dir, unsigned long start, + unsigned long end, int n_parallel, +@@ -799,6 +808,7 @@ gf_history_changelog (char* changelog_dir, unsigned long start, + pthread_t consume_th = 0; + char htime_dir[PATH_MAX] = {0,}; + char buffer[PATH_MAX] = {0,}; ++ gf_boolean_t partial_history = _gf_false; + + pthread_attr_t attr; + +@@ -828,6 +838,11 @@ gf_history_changelog (char* changelog_dir, unsigned long start, + goto out; + } + ++ gf_smsg (this->name, GF_LOG_INFO, 0, ++ CHANGELOG_LIB_MSG_TOTAL_LOG_INFO, ++ "Requesting historical changelogs", ++ "start=%lu", start, "end=%lu", end, NULL); ++ + /* basic sanity check */ + if (start > end || n_parallel <= 0) { + gf_msg (this->name, GF_LOG_ERROR, errno, +@@ -860,8 +875,14 @@ gf_history_changelog (char* changelog_dir, unsigned long start, + + entry = sys_readdir (dirp, scratch); + +- if (!entry || errno != 0) ++ if (!entry || errno != 0) { ++ gf_smsg (this->name, GF_LOG_ERROR, errno, ++ CHANGELOG_LIB_MSG_HIST_FAILED, ++ "Requested changelog range is not availbale", ++ "start=%lu", start, "end=%lu", end, NULL); ++ ret = -2; + break; ++ } + + ret = gf_changelog_extract_min_max (entry->d_name, htime_dir, + &fd, &total_changelog, +@@ -906,6 +927,23 @@ gf_history_changelog (char* changelog_dir, unsigned long start, + + end2 = (end <= max_ts) ? end : max_ts; + ++ /* Check if end falls out of same HTIME file. The end ++ * falling to a different htime file or changelog ++ * disable-enable is detected only after 20 seconds. ++ * This is required because, applications generally ++ * asks historical changelogs till current time and ++ * it is possible changelog is not rolled over yet. ++ * So, buffer time of default rollover time plus 5 ++ * seconds is subtracted. If the application requests ++ * the end time with in half a minute of changelog ++ * disable, it's not detected as changelog disable and ++ * it's application's responsibility to retry after ++ * 20 seconds before confirming it as partial history. ++ */ ++ if ((end - 20) > max_ts) { ++ partial_history = _gf_true; ++ } ++ + /** + * search @end2 in htime file returning it's index (@to) + */ +@@ -972,12 +1010,15 @@ gf_history_changelog (char* changelog_dir, unsigned long start, + goto out; + + } else {/* end of range check */ +- gf_msg (this->name, GF_LOG_ERROR, errno, +- CHANGELOG_LIB_MSG_HIST_FAILED, "Requested changelog " +- "range is not available. START - %lu CHLOG_MIN - %lu " +- "CHLOG_MAX - %lu", start, min_ts, max_ts); +- ret = -2; +- goto out; ++ gf_smsg (this->name, GF_LOG_ERROR, errno, ++ CHANGELOG_LIB_MSG_HIST_FAILED, ++ "Requested changelog range is not " ++ "available. Retrying next HTIME", ++ "start=%lu", start, ++ "end=%lu", end, ++ "chlog_min=%lu", min_ts, ++ "chlog_max=%lu", max_ts, ++ NULL); + } + } /* end of readdir() */ + +@@ -1000,5 +1041,9 @@ out: + hist_jnl->hist_done = 1; + *actual_end = ts2; + ++ if (partial_history) { ++ ret = 1; ++ } ++ + return ret; + } +-- +1.8.3.1 + diff --git a/SOURCES/0371-performance-write-behind-remove-the-request-from-wip.patch b/SOURCES/0371-performance-write-behind-remove-the-request-from-wip.patch new file mode 100644 index 0000000..5fa2be9 --- /dev/null +++ b/SOURCES/0371-performance-write-behind-remove-the-request-from-wip.patch @@ -0,0 +1,88 @@ +From c1d83132c4b2d49eb922fb7fe42952856aeff83d Mon Sep 17 00:00:00 2001 +From: Raghavendra G +Date: Sat, 8 Sep 2018 19:53:07 +0530 +Subject: [PATCH 371/385] performance/write-behind: remove the request from wip + queue in wb_fulfill_request + +The bug is very similar to bz 1379655 and the fix too very similar to +commit a8b2a981881221925bb5edfe7bb65b25ad855c04. + +Before this patch, a request is removed from wip queue only when ref +count of request hits 0. Though, wb_fulfill_request does an unref, +it need not be the last unref and hence the request may survive in +wip queue till the last unref. Let, + +T1: the time at which wb_fulfill_request is invoked +T2: the time at which last unref is done on request + +Let's consider a case of T2 > T1. In the time window between T1 and +T2, any other request (waiter) conflicting with request in liability +queue (blocker - basically a write which has been lied) is blocked +from winding. If T2 happens to be when wb_do_unwinds is invoked, no +further processing of request list happens and "waiter" would get +blocked forever. An example imaginary sequence of events is given +below: + +1. A write request w1 is picked up for winding in __wb_pick_winds + and w1 is moved to wip queue. Let's call this + invocation of wb_process_queue by wb_writev as PQ1. Note w1 is not + unwound. + +2. A dependent write (w2) hits write-behind and is unwound followed by + a flush (f1) request. Since the liability queue + of inode is not empty, w2 and f1 are not picked for unwinding. Let's call + the invocation of wb_process_queue by wb_flush as PQ2. Note that + invocation of wb_process_queue by w2 doesn't wind w2 instead + unwinds it after which we hit PQ2 + +3. PQ2 continues and picks w1 for fulfilling and invokes + wb_fulfill. As part of successful wb_fulfill_cbk, + wb_fulfill_request (w1) is invoked. But, w1 is not freed (and hence + not removed from wip queue) as w1 is not unwound _yet_ and a + ref remains (PQ1 has not invoked wb_do_unwinds _yet_). + +4. wb_fulfill_cbk (triggered by PQ2) invokes a wb_process_queue (let's + say PQ3). w2 is not picked up for winding in PQ3 as w1 is still in wip + queue. At this time, PQ2 and PQ3 are complete. + +5. PQ1 continues, unwinds w1 and does last unref on w1 and w1 is freed + (and removed from wip queue). Since PQ1 didn't invoke + wb_fulfill on any other write requests, there won't be any future + codepaths that would invoke wb_process_queue and w2 is stuck + forever. This will prevent f2 too and hence close syscall is hung + +With this fix, w1 is removed from liability queue in step 3 above and +PQ3 winds w2 in step 4 (as there are no requests conflicting with w2 +in liability queue during execution of PQ3). Once w2 is complete, f1 +is resumed. + +>Change-Id: Ia972fad0858dc4abccdc1227cb4d880f85b3b89b +>Signed-off-by: Raghavendra G +>Fixes: bz#1626787 + +Change-Id: Ia972fad0858dc4abccdc1227cb4d880f85b3b89b +Signed-off-by: Raghavendra G +BUG: 1626780 +upstream patch: https://review.gluster.org/21123 +Reviewed-on: https://code.engineering.redhat.com/gerrit/149775 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/performance/write-behind/src/write-behind.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/xlators/performance/write-behind/src/write-behind.c b/xlators/performance/write-behind/src/write-behind.c +index ca1cb63..478985a 100644 +--- a/xlators/performance/write-behind/src/write-behind.c ++++ b/xlators/performance/write-behind/src/write-behind.c +@@ -725,6 +725,7 @@ __wb_fulfill_request (wb_request_t *req) + */ + } + ++ list_del_init (&req->wip); + __wb_request_unref (req); + } + +-- +1.8.3.1 + diff --git a/SOURCES/0372-Revert-posix-disable-block-and-character-files.patch b/SOURCES/0372-Revert-posix-disable-block-and-character-files.patch new file mode 100644 index 0000000..7dff229 --- /dev/null +++ b/SOURCES/0372-Revert-posix-disable-block-and-character-files.patch @@ -0,0 +1,48 @@ +From 9e95bd4e3b2e64880a3522520b05d23cf29b3d91 Mon Sep 17 00:00:00 2001 +From: Amar Tumballi +Date: Mon, 10 Sep 2018 09:28:28 +0530 +Subject: [PATCH 372/385] Revert "posix: disable block and character files" + +This reverts commit d542543faf4eca04737ddfe215e8988a700caf42. + +BUG: 1622649 +Change-Id: Ib51b798df25ff606e6bdef72aba679a1d97f98d1 +Signed-off-by: Amar Tumballi +Reviewed-on: https://code.engineering.redhat.com/gerrit/149666 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/storage/posix/src/posix.c | 17 ----------------- + 1 file changed, 17 deletions(-) + +diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c +index 5088469..e0165f8 100644 +--- a/xlators/storage/posix/src/posix.c ++++ b/xlators/storage/posix/src/posix.c +@@ -1453,23 +1453,6 @@ posix_mknod (call_frame_t *frame, xlator_t *this, + } + + +- if (((mode & S_IFMT) == S_IFBLK) || ((mode & S_IFMT) == S_IFCHR)) { +- /* Man page 'man 2 mknod': +- EPERM mode requested creation of something other than +- a regular file, FIFO (named pipe), or UNIX domain socket, +- and the caller is not privileged (Linux: does not have the +- CAP_MKNOD capability); also returned if the filesystem +- containing pathname does not support the type of node +- requested. +- */ +- op_ret = -1; +- op_errno = EPERM; +- gf_msg (this->name, GF_LOG_ERROR, op_errno, P_MSG_MKNOD_FAILED, +- "%s: mknod failed as Block and Character devices " +- "are not supported on GlusterFS", real_path); +- goto out; +- } +- + op_ret = posix_pstat (this, loc->pargfid, par_path, &preparent); + if (op_ret == -1) { + op_errno = errno; +-- +1.8.3.1 + diff --git a/SOURCES/0373-posix-disable-open-read-write-on-special-files.patch b/SOURCES/0373-posix-disable-open-read-write-on-special-files.patch new file mode 100644 index 0000000..f425fc4 --- /dev/null +++ b/SOURCES/0373-posix-disable-open-read-write-on-special-files.patch @@ -0,0 +1,95 @@ +From 3d81f70f181793c6b1fd6b53523158fd663b8c74 Mon Sep 17 00:00:00 2001 +From: Amar Tumballi +Date: Wed, 5 Sep 2018 19:03:08 +0530 +Subject: [PATCH 373/385] posix: disable open/read/write on special files + +In the file system, the responsibility w.r.to the block and char device +files is related to only support for 'creating' them (using mknod(2)). + +Once the device files are created, the read/write syscalls for the specific +devices are handled by the device driver registered for the specific major +number, and depending on the minor number, it knows where to read from. +Hence, we are at risk of reading contents from devices which are handled +by the host kernel on server nodes. + +By disabling open/read/write on the device file, we would be safe with +the bypass one can achieve from client side (using gfapi) + +Upstream Fix +Upstream Patch: https://review.gluster.org/#/c/glusterfs/+/21069/ +> Change-Id: I48c776b0af1cbd2a5240862826d3d8918601e47f +> BUG: 1625648 + +BUG: 1622649 + +Change-Id: I1135e89270fac05ccfb8a3faa9fdffb58eb51b15 +Signed-off-by: Amar Tumballi +Reviewed-on: https://code.engineering.redhat.com/gerrit/149667 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/storage/posix/src/posix.c | 34 ++++++++++++++++++++++++++++++++++ + 1 file changed, 34 insertions(+) + +diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c +index e0165f8..efbf804 100644 +--- a/xlators/storage/posix/src/posix.c ++++ b/xlators/storage/posix/src/posix.c +@@ -3336,6 +3336,17 @@ posix_open (call_frame_t *frame, xlator_t *this, + priv = this->private; + VALIDATE_OR_GOTO (priv, out); + ++ if (loc->inode && ++ ((loc->inode->ia_type == IA_IFBLK) || ++ (loc->inode->ia_type == IA_IFCHR))) { ++ gf_msg (this->name, GF_LOG_ERROR, EINVAL, ++ P_MSG_INVALID_ARGUMENT, ++ "open received on a block/char file (%s)", ++ uuid_utoa (loc->inode->gfid)); ++ op_errno = EINVAL; ++ goto out; ++ } ++ + if (flags & O_CREAT) + DISK_SPACE_CHECK_AND_GOTO (frame, priv, xdata, op_ret, op_errno, out); + +@@ -3428,6 +3439,17 @@ posix_readv (call_frame_t *frame, xlator_t *this, + priv = this->private; + VALIDATE_OR_GOTO (priv, out); + ++ if (fd->inode && ++ ((fd->inode->ia_type == IA_IFBLK) || ++ (fd->inode->ia_type == IA_IFCHR))) { ++ gf_msg (this->name, GF_LOG_ERROR, EINVAL, ++ P_MSG_INVALID_ARGUMENT, ++ "readv received on a block/char file (%s)", ++ uuid_utoa (fd->inode->gfid)); ++ op_errno = EINVAL; ++ goto out; ++ } ++ + ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg (this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL, +@@ -3674,6 +3696,18 @@ posix_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, + priv = this->private; + + VALIDATE_OR_GOTO (priv, out); ++ ++ if (fd->inode && ++ ((fd->inode->ia_type == IA_IFBLK) || ++ (fd->inode->ia_type == IA_IFCHR))) { ++ gf_msg (this->name, GF_LOG_ERROR, EINVAL, ++ P_MSG_INVALID_ARGUMENT, ++ "writev received on a block/char file (%s)", ++ uuid_utoa (fd->inode->gfid)); ++ op_errno = EINVAL; ++ goto out; ++ } ++ + DISK_SPACE_CHECK_AND_GOTO (frame, priv, xdata, op_ret, op_errno, out); + + ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno); +-- +1.8.3.1 + diff --git a/SOURCES/0374-socket-set-42-as-default-tpc-user-timeout.patch b/SOURCES/0374-socket-set-42-as-default-tpc-user-timeout.patch new file mode 100644 index 0000000..a78a4df --- /dev/null +++ b/SOURCES/0374-socket-set-42-as-default-tpc-user-timeout.patch @@ -0,0 +1,64 @@ +From 74aaa257df25b70711bf962642ab2f8b3d063634 Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez +Date: Thu, 13 Sep 2018 15:44:15 +0200 +Subject: [PATCH 374/385] socket: set 42 as default tpc-user-timeout + +> Upstream: https://review.gluster.org/21170 +> BUG: 1628605 +> Change-Id: Ib8ad7c4ac6aac725b01a78f8c3d10cf4063d2ee6 + +The 'tcp-user-timeout' option is define in the 'socket' module, but it's +configured in 'protocol/server' and 'protocol/client', which are the +parents of the 'socket' module. + +However, current options management logic only takes into consideration +default values specified in the 'socket' module itself, ignoring values +defined in the owner xlator. + +This patch simply sets the default value of tcp-user-timeout in the +'socket' module so that server and client use the expected value. + +Change-Id: Ib8ad7c4ac6aac725b01a78f8c3d10cf4063d2ee6 +BUG: 1623874 +Signed-off-by: Xavi Hernandez +Reviewed-on: https://code.engineering.redhat.com/gerrit/150699 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + rpc/rpc-transport/socket/src/socket.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c +index b98efdc..243d49c 100644 +--- a/rpc/rpc-transport/socket/src/socket.c ++++ b/rpc/rpc-transport/socket/src/socket.c +@@ -3854,7 +3854,7 @@ reconfigure (rpc_transport_t *this, dict_t *options) + int ret = 0; + uint32_t backlog = 0; + uint64_t windowsize = 0; +- uint32_t timeout = 0; ++ uint32_t timeout = 42; + int keepaliveidle = GF_KEEPALIVE_TIME; + int keepaliveintvl = GF_KEEPALIVE_INTERVAL; + int keepalivecnt = GF_KEEPALIVE_COUNT; +@@ -4083,7 +4083,7 @@ socket_init (rpc_transport_t *this) + gf_boolean_t tmp_bool = 0; + uint64_t windowsize = GF_DEFAULT_SOCKET_WINDOW_SIZE; + char *optstr = NULL; +- uint32_t timeout = 0; ++ uint32_t timeout = 42; + int keepaliveidle = GF_KEEPALIVE_TIME; + int keepaliveintvl = GF_KEEPALIVE_INTERVAL; + int keepalivecnt = GF_KEEPALIVE_COUNT; +@@ -4623,7 +4623,7 @@ struct volume_options options[] = { + }, + { .key = {"transport.tcp-user-timeout"}, + .type = GF_OPTION_TYPE_INT, +- .default_value = "0" ++ .default_value = "42" + }, + { .key = {"transport.socket.nodelay"}, + .type = GF_OPTION_TYPE_BOOL, +-- +1.8.3.1 + diff --git a/SOURCES/0375-extras-Add-new-options-to-group-virt.patch b/SOURCES/0375-extras-Add-new-options-to-group-virt.patch new file mode 100644 index 0000000..2d4348c --- /dev/null +++ b/SOURCES/0375-extras-Add-new-options-to-group-virt.patch @@ -0,0 +1,40 @@ +From b83b8efcf0673233bd28cddac4d62a639784a5c0 Mon Sep 17 00:00:00 2001 +From: Krutika Dhananjay +Date: Wed, 19 Sep 2018 15:49:27 +0530 +Subject: [PATCH 375/385] extras: Add new options to group "virt" + +> Upstream: https://review.gluster.org/21222 +> BUG: 1630798 +> Change-Id: I231db309de0e37c79cd44f5666da4cd776fefa04 + +In some of the recent performance tests on gluster-as-vm-image-store +use-case, it has been observed that sometimes the lone fuse thread can +hit near-100% CPU utilization and become a performance bottleneck. +Enabling client-io-threads (in addition to bumping up epoll threads on +server and client side) has shown to be helpful in getting around this +bottleneck and pushing more IOPs. + +Change-Id: I231db309de0e37c79cd44f5666da4cd776fefa04 +BUG: 1619627 +Signed-off-by: Krutika Dhananjay +Reviewed-on: https://code.engineering.redhat.com/gerrit/150696 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + extras/group-virt.example | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/extras/group-virt.example b/extras/group-virt.example +index 7e34b72..c2ce89d 100644 +--- a/extras/group-virt.example ++++ b/extras/group-virt.example +@@ -13,3 +13,6 @@ cluster.shd-wait-qlength=10000 + features.shard=on + user.cifs=off + cluster.choose-local=off ++client.event-threads=4 ++server.event-threads=4 ++performance.client-io-threads=on +-- +1.8.3.1 + diff --git a/SOURCES/0376-rchecksum-fips-Replace-MD5-usage-to-enable-fips-supp.patch b/SOURCES/0376-rchecksum-fips-Replace-MD5-usage-to-enable-fips-supp.patch new file mode 100644 index 0000000..7dfc26d --- /dev/null +++ b/SOURCES/0376-rchecksum-fips-Replace-MD5-usage-to-enable-fips-supp.patch @@ -0,0 +1,193 @@ +From f0e9776dd915c70bd9acb4e9624e8e2fd91ae7b7 Mon Sep 17 00:00:00 2001 +From: Kotresh HR +Date: Tue, 19 Dec 2017 07:21:07 -0500 +Subject: [PATCH 376/385] rchecksum/fips: Replace MD5 usage to enable fips + support + +rchecksum uses MD5 which is not fips compliant. Hence +using sha256 for the same. + +Backport of: + > Patch: https://review.gluster.org/19052 + > Updates: #230 + > Change-Id: I7fad016fcc2a9900395d0da919cf5ba996ec5278 + > Signed-off-by: Kotresh HR + +BUG: 1459709 +Change-Id: I7fad016fcc2a9900395d0da919cf5ba996ec5278 +Signed-off-by: Kotresh HR +Reviewed-on: https://code.engineering.redhat.com/gerrit/149771 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/checksum.c | 8 +++++--- + libglusterfs/src/default-args.c | 2 +- + xlators/cluster/afr/src/afr-self-heal-common.c | 2 +- + xlators/cluster/afr/src/afr-self-heal-data.c | 4 ++-- + xlators/cluster/afr/src/afr.h | 2 +- + xlators/mgmt/glusterd/src/glusterd-utils.c | 2 +- + xlators/protocol/server/src/server-common.c | 2 +- + xlators/storage/bd/src/bd.c | 4 +--- + xlators/storage/posix/src/posix.c | 3 +-- + 9 files changed, 14 insertions(+), 15 deletions(-) + +diff --git a/libglusterfs/src/checksum.c b/libglusterfs/src/checksum.c +index 5fac133..a7f9877 100644 +--- a/libglusterfs/src/checksum.c ++++ b/libglusterfs/src/checksum.c +@@ -8,9 +8,10 @@ + cases as published by the Free Software Foundation. + */ + +-#include ++#include + #include + #include ++#include + + /* + * The "weak" checksum required for the rsync algorithm. +@@ -30,7 +31,8 @@ gf_rsync_weak_checksum (unsigned char *buf, size_t len) + * The "strong" checksum required for the rsync algorithm. + */ + void +-gf_rsync_strong_checksum (unsigned char *data, size_t len, unsigned char *md5) ++gf_rsync_strong_checksum (unsigned char *data, size_t len, ++ unsigned char *sha256_md) + { +- MD5 (data, len, md5); ++ SHA256((const unsigned char *)data, len, sha256_md); + } +diff --git a/libglusterfs/src/default-args.c b/libglusterfs/src/default-args.c +index f40de2d..3ccf52a 100644 +--- a/libglusterfs/src/default-args.c ++++ b/libglusterfs/src/default-args.c +@@ -1140,7 +1140,7 @@ args_rchecksum_cbk_store (default_args_cbk_t *args, + args->weak_checksum = + weak_checksum; + args->strong_checksum = +- memdup (strong_checksum, MD5_DIGEST_LENGTH); ++ memdup (strong_checksum, SHA256_DIGEST_LENGTH); + } + if (xdata) + args->xdata = dict_ref (xdata); +diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c +index 50989d6..2989b9e 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-common.c ++++ b/xlators/cluster/afr/src/afr-self-heal-common.c +@@ -665,7 +665,7 @@ afr_reply_copy (struct afr_reply *dst, struct afr_reply *src) + if (dst->xdata) + dict_unref (dst->xdata); + dst->xdata = xdata; +- memcpy (dst->checksum, src->checksum, MD5_DIGEST_LENGTH); ++ memcpy (dst->checksum, src->checksum, SHA256_DIGEST_LENGTH); + } + + void +diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c +index 3ef7376..dd44deb 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-data.c ++++ b/xlators/cluster/afr/src/afr-self-heal-data.c +@@ -42,7 +42,7 @@ __checksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + replies[i].buf_has_zeroes = dict_get_str_boolean (xdata, + "buf-has-zeroes", _gf_false); + if (strong) +- memcpy (local->replies[i].checksum, strong, MD5_DIGEST_LENGTH); ++ memcpy (local->replies[i].checksum, strong, SHA256_DIGEST_LENGTH); + + syncbarrier_wake (&local->barrier); + return 0; +@@ -92,7 +92,7 @@ __afr_can_skip_data_block_heal (call_frame_t *frame, xlator_t *this, fd_t *fd, + if (local->replies[i].valid) { + if (memcmp (local->replies[source].checksum, + local->replies[i].checksum, +- MD5_DIGEST_LENGTH)) { ++ SHA256_DIGEST_LENGTH)) { + checksum_match = _gf_false; + break; + } +diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h +index 35928a9..7cb6f00 100644 +--- a/xlators/cluster/afr/src/afr.h ++++ b/xlators/cluster/afr/src/afr.h +@@ -271,7 +271,7 @@ struct afr_reply { + struct iatt preparent2; + struct iatt postparent2; + /* For rchecksum */ +- uint8_t checksum[MD5_DIGEST_LENGTH]; ++ uint8_t checksum[SHA256_DIGEST_LENGTH]; + gf_boolean_t buf_has_zeroes; + /* For lookup */ + int8_t need_heal; +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index 3db3a15..2a176be 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -1873,7 +1873,7 @@ glusterd_set_brick_socket_filepath (glusterd_volinfo_t *volinfo, + char sock_filepath[PATH_MAX] = {0,}; + + expected_file_len = strlen (GLUSTERD_SOCK_DIR) + strlen ("/") + +- MD5_DIGEST_LENGTH*2 + strlen (".socket") + 1; ++ SHA256_DIGEST_LENGTH*2 + strlen (".socket") + 1; + GF_ASSERT (len >= expected_file_len); + this = THIS; + GF_ASSERT (this); +diff --git a/xlators/protocol/server/src/server-common.c b/xlators/protocol/server/src/server-common.c +index ce33089..9c38706 100644 +--- a/xlators/protocol/server/src/server-common.c ++++ b/xlators/protocol/server/src/server-common.c +@@ -298,7 +298,7 @@ server_post_rchecksum (gfs3_rchecksum_rsp *rsp, uint32_t weak_checksum, + rsp->weak_checksum = weak_checksum; + + rsp->strong_checksum.strong_checksum_val = (char *)strong_checksum; +- rsp->strong_checksum.strong_checksum_len = MD5_DIGEST_LENGTH; ++ rsp->strong_checksum.strong_checksum_len = SHA256_DIGEST_LENGTH; + + } + +diff --git a/xlators/storage/bd/src/bd.c b/xlators/storage/bd/src/bd.c +index af3ac84..64b34d9 100644 +--- a/xlators/storage/bd/src/bd.c ++++ b/xlators/storage/bd/src/bd.c +@@ -2148,7 +2148,7 @@ bd_rchecksum (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + char *buf = NULL; + int32_t weak_checksum = 0; + bd_fd_t *bd_fd = NULL; +- unsigned char strong_checksum[MD5_DIGEST_LENGTH] = {0}; ++ unsigned char strong_checksum[SHA256_DIGEST_LENGTH] = {0}; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); +@@ -2162,8 +2162,6 @@ bd_rchecksum (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + return 0; + } + +- memset (strong_checksum, 0, MD5_DIGEST_LENGTH); +- + alloc_buf = page_aligned_alloc (len, &buf); + if (!alloc_buf) { + op_errno = ENOMEM; +diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c +index efbf804..4e13465 100644 +--- a/xlators/storage/posix/src/posix.c ++++ b/xlators/storage/posix/src/posix.c +@@ -7000,7 +7000,7 @@ posix_rchecksum (call_frame_t *frame, xlator_t *this, + ssize_t bytes_read = 0; + int32_t weak_checksum = 0; + int32_t zerofillcheck = 0; +- unsigned char strong_checksum[MD5_DIGEST_LENGTH] = {0}; ++ unsigned char strong_checksum[SHA256_DIGEST_LENGTH] = {0}; + struct posix_private *priv = NULL; + dict_t *rsp_xdata = NULL; + gf_boolean_t buf_has_zeroes = _gf_false; +@@ -7010,7 +7010,6 @@ posix_rchecksum (call_frame_t *frame, xlator_t *this, + VALIDATE_OR_GOTO (fd, out); + + priv = this->private; +- memset (strong_checksum, 0, MD5_DIGEST_LENGTH); + + alloc_buf = _page_aligned_alloc (len, &buf); + if (!alloc_buf) { +-- +1.8.3.1 + diff --git a/SOURCES/0377-fips-geo-rep-Replace-MD5-with-SHA256.patch b/SOURCES/0377-fips-geo-rep-Replace-MD5-with-SHA256.patch new file mode 100644 index 0000000..51c9750 --- /dev/null +++ b/SOURCES/0377-fips-geo-rep-Replace-MD5-with-SHA256.patch @@ -0,0 +1,122 @@ +From a75391899459f6123721631613c5d044fc4795af Mon Sep 17 00:00:00 2001 +From: Kotresh HR +Date: Wed, 20 Dec 2017 15:24:11 +0530 +Subject: [PATCH 377/385] fips/geo-rep: Replace MD5 with SHA256 + +MD5 is not fips compliant. Hence replacing +with SHA256. + +NOTE: +The hash is used to form the ctl_path for the ssh connection. +The length of ctl_path for ssh connection should not be > 108. +ssh fails with ctl_path too long if it is so. But when rsync +is piped to ssh, it is not taking > 90. rsync is failing with +error number 12. Hence using first 32 bytes of hash. Hash +collision doesn't matter as only one sock file is created +per directory. + +Backport of: + > Patch: https://review.gluster.org/19061 + > Updates: #230 + > Change-Id: I58aeb32a80b5422f6ac0188cf33fbecccbf08ae7 + > Signed-off-by: Kotresh HR + +BUG: 1459709 +Change-Id: I58aeb32a80b5422f6ac0188cf33fbecccbf08ae7 +Signed-off-by: Kotresh HR +Reviewed-on: https://code.engineering.redhat.com/gerrit/149772 +Tested-by: RHGS Build Bot +Reviewed-by: Aravinda Vishwanathapura Krishna Murthy +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + geo-replication/syncdaemon/master.py | 4 ++-- + geo-replication/syncdaemon/syncdutils.py | 26 ++++++++++++++++---------- + 2 files changed, 18 insertions(+), 12 deletions(-) + +diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py +index 6de2c77..cd135df 100644 +--- a/geo-replication/syncdaemon/master.py ++++ b/geo-replication/syncdaemon/master.py +@@ -23,7 +23,7 @@ from threading import Condition, Lock + from datetime import datetime + from gconf import gconf + from syncdutils import Thread, GsyncdError, boolify, escape_space_newline +-from syncdutils import unescape_space_newline, gauxpfx, md5hex, selfkill ++from syncdutils import unescape_space_newline, gauxpfx, escape1, selfkill + from syncdutils import lstat, errno_wrap, FreeObject, lf, matching_disk_gfid + from syncdutils import NoStimeAvailable, PartialHistoryAvailable + +@@ -771,7 +771,7 @@ class GMasterChangelogMixin(GMasterCommon): + selfkill() + + def setup_working_dir(self): +- workdir = os.path.join(gconf.working_dir, md5hex(gconf.local_path)) ++ workdir = os.path.join(gconf.working_dir, escape1(gconf.local_path)) + logging.debug('changelog working dir %s' % workdir) + return workdir + +diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py +index d798356..3218192 100644 +--- a/geo-replication/syncdaemon/syncdutils.py ++++ b/geo-replication/syncdaemon/syncdutils.py +@@ -60,11 +60,7 @@ try: + except ImportError: + import urllib + +-try: +- from hashlib import md5 as md5 +-except ImportError: +- # py 2.4 +- from md5 import new as md5 ++from hashlib import sha256 as sha256 + + # auxiliary gfid based access prefix + _CL_AUX_GFID_PFX = ".gfid/" +@@ -97,6 +93,8 @@ def escape(s): + to turn whatever data to creatable representation""" + return urllib.quote_plus(s) + ++def escape1(s): ++ return s.replace("/", "-").strip("-") + + def unescape(s): + """inverse of .escape""" +@@ -175,13 +173,21 @@ def setup_ssh_ctl(ctld, remote_addr, resource_url): + gconf.ssh_ctl_dir = ctld + content = "SLAVE_HOST=%s\nSLAVE_RESOURCE_URL=%s" % (remote_addr, + resource_url) +- content_md5 = md5hex(content) ++ content_sha256 = sha256hex(content) ++ """ ++ The length of ctl_path for ssh connection should not be > 108. ++ ssh fails with ctl_path too long if it is so. But when rsync ++ is piped to ssh, it is not taking > 90. Hence using first 32 ++ bytes of hash. Hash collision doesn't matter as only one sock ++ file is created per directory. ++ """ ++ content_sha256 = content_sha256[:32] + fname = os.path.join(gconf.ssh_ctl_dir, +- "%s.mft" % content_md5) ++ "%s.mft" % content_sha256) + + create_manifest(fname, content) + ssh_ctl_path = os.path.join(gconf.ssh_ctl_dir, +- "%s.sock" % content_md5) ++ "%s.sock" % content_sha256) + gconf.ssh_ctl_args = ["-oControlMaster=auto", "-S", ssh_ctl_path] + + +@@ -536,8 +542,8 @@ def gauxpfx(): + return _CL_AUX_GFID_PFX + + +-def md5hex(s): +- return md5(s).hexdigest() ++def sha256hex(s): ++ return sha256(s).hexdigest() + + + def selfkill(sig=SIGTERM): +-- +1.8.3.1 + diff --git a/SOURCES/0378-posix-afr-handle-backward-compatibility-for-rchecksu.patch b/SOURCES/0378-posix-afr-handle-backward-compatibility-for-rchecksu.patch new file mode 100644 index 0000000..85229ce --- /dev/null +++ b/SOURCES/0378-posix-afr-handle-backward-compatibility-for-rchecksu.patch @@ -0,0 +1,302 @@ +From 8c3c6a779b9d1b20c7b413caa25381ab07a08a87 Mon Sep 17 00:00:00 2001 +From: Ravishankar N +Date: Wed, 19 Sep 2018 15:20:27 +0530 +Subject: [PATCH 378/385] posix/afr: handle backward compatibility for + rchecksum fop + +Patch on upstream master: https://review.gluster.org/#/c/glusterfs/+/19538/ + +Added a volume option 'fips-mode-rchecksum' tied to op version 4. +If not set, rchecksum fop will use MD5 instead of SHA256. + +Change-Id: I720777c0ab36985774e6bb877689fe45b64eb777 +BUG: 1459709 +Signed-off-by: Ravishankar N +Reviewed-on: https://code.engineering.redhat.com/gerrit/150479 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/checksum.c | 7 +++++ + libglusterfs/src/checksum.h | 2 ++ + libglusterfs/src/globals.h | 2 ++ + xlators/cluster/afr/src/afr-self-heal-common.c | 8 ++++- + xlators/cluster/afr/src/afr-self-heal-data.c | 29 +++++++++++++----- + xlators/cluster/afr/src/afr.h | 1 + + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 6 ++++ + xlators/protocol/server/src/server-common.c | 2 +- + xlators/storage/posix/src/posix.c | 39 +++++++++++++++++++++++-- + xlators/storage/posix/src/posix.h | 2 ++ + 10 files changed, 85 insertions(+), 13 deletions(-) + +diff --git a/libglusterfs/src/checksum.c b/libglusterfs/src/checksum.c +index a7f9877..561ca04 100644 +--- a/libglusterfs/src/checksum.c ++++ b/libglusterfs/src/checksum.c +@@ -8,6 +8,7 @@ + cases as published by the Free Software Foundation. + */ + ++#include + #include + #include + #include +@@ -36,3 +37,9 @@ gf_rsync_strong_checksum (unsigned char *data, size_t len, + { + SHA256((const unsigned char *)data, len, sha256_md); + } ++ ++void ++gf_rsync_md5_checksum (unsigned char *data, size_t len, unsigned char *md5) ++{ ++ MD5 (data, len, md5); ++} +diff --git a/libglusterfs/src/checksum.h b/libglusterfs/src/checksum.h +index bf7eeed..677a59a 100644 +--- a/libglusterfs/src/checksum.h ++++ b/libglusterfs/src/checksum.h +@@ -17,4 +17,6 @@ gf_rsync_weak_checksum (unsigned char *buf, size_t len); + void + gf_rsync_strong_checksum (unsigned char *buf, size_t len, unsigned char *sum); + ++void ++gf_rsync_md5_checksum (unsigned char *data, size_t len, unsigned char *md5); + #endif /* __CHECKSUM_H__ */ +diff --git a/libglusterfs/src/globals.h b/libglusterfs/src/globals.h +index 39d9716..e810ea7 100644 +--- a/libglusterfs/src/globals.h ++++ b/libglusterfs/src/globals.h +@@ -109,6 +109,8 @@ + + #define GD_OP_VERSION_3_13_2 31302 /* Op-version for GlusterFS 3.13.2 */ + ++#define GD_OP_VERSION_4_0_0 40000 /* Op-version for GlusterFS 4.0.0 */ ++ + /* Downstream only change */ + #define GD_OP_VERSION_3_11_2 31102 /* Op-version for RHGS 3.3.1-async */ + +diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c +index 2989b9e..7e6a691 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-common.c ++++ b/xlators/cluster/afr/src/afr-self-heal-common.c +@@ -665,7 +665,13 @@ afr_reply_copy (struct afr_reply *dst, struct afr_reply *src) + if (dst->xdata) + dict_unref (dst->xdata); + dst->xdata = xdata; +- memcpy (dst->checksum, src->checksum, SHA256_DIGEST_LENGTH); ++ if (xdata && dict_get_str_boolean (xdata, "fips-mode-rchecksum", ++ _gf_false) == _gf_true) { ++ memcpy (dst->checksum, src->checksum, SHA256_DIGEST_LENGTH); ++ } else { ++ memcpy (dst->checksum, src->checksum, MD5_DIGEST_LENGTH); ++ } ++ dst->fips_mode_rchecksum = src->fips_mode_rchecksum; + } + + void +diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c +index dd44deb..556a8f9 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-data.c ++++ b/xlators/cluster/afr/src/afr-self-heal-data.c +@@ -38,11 +38,21 @@ __checksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + replies[i].valid = 1; + replies[i].op_ret = op_ret; + replies[i].op_errno = op_errno; +- if (xdata) ++ if (xdata) { + replies[i].buf_has_zeroes = dict_get_str_boolean (xdata, + "buf-has-zeroes", _gf_false); +- if (strong) +- memcpy (local->replies[i].checksum, strong, SHA256_DIGEST_LENGTH); ++ replies[i].fips_mode_rchecksum = dict_get_str_boolean (xdata, ++ "fips-mode-rchecksum", _gf_false); ++ } ++ if (strong) { ++ if (replies[i].fips_mode_rchecksum) { ++ memcpy (local->replies[i].checksum, strong, ++ SHA256_DIGEST_LENGTH); ++ } else { ++ memcpy (local->replies[i].checksum, strong, ++ MD5_DIGEST_LENGTH); ++ } ++ } + + syncbarrier_wake (&local->barrier); + return 0; +@@ -58,11 +68,13 @@ __afr_can_skip_data_block_heal (call_frame_t *frame, xlator_t *this, fd_t *fd, + afr_local_t *local = NULL; + unsigned char *wind_subvols = NULL; + gf_boolean_t checksum_match = _gf_true; ++ struct afr_reply *replies = NULL; + dict_t *xdata = NULL; + int i = 0; + + priv = this->private; + local = frame->local; ++ replies = local->replies; + + xdata = dict_new(); + if (!xdata) +@@ -83,16 +95,17 @@ __afr_can_skip_data_block_heal (call_frame_t *frame, xlator_t *this, fd_t *fd, + if (xdata) + dict_unref (xdata); + +- if (!local->replies[source].valid || local->replies[source].op_ret != 0) ++ if (!replies[source].valid || replies[source].op_ret != 0) + return _gf_false; + + for (i = 0; i < priv->child_count; i++) { + if (i == source) + continue; +- if (local->replies[i].valid) { +- if (memcmp (local->replies[source].checksum, +- local->replies[i].checksum, +- SHA256_DIGEST_LENGTH)) { ++ if (replies[i].valid) { ++ if (memcmp (replies[source].checksum, ++ replies[i].checksum, ++ replies[source].fips_mode_rchecksum ? ++ SHA256_DIGEST_LENGTH : MD5_DIGEST_LENGTH)) { + checksum_match = _gf_false; + break; + } +diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h +index 7cb6f00..76ad292 100644 +--- a/xlators/cluster/afr/src/afr.h ++++ b/xlators/cluster/afr/src/afr.h +@@ -273,6 +273,7 @@ struct afr_reply { + /* For rchecksum */ + uint8_t checksum[SHA256_DIGEST_LENGTH]; + gf_boolean_t buf_has_zeroes; ++ gf_boolean_t fips_mode_rchecksum; + /* For lookup */ + int8_t need_heal; + }; +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index 474587a..0ff512d 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -2868,6 +2868,12 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .voltype = "storage/posix", + .op_version = GD_OP_VERSION_3_13_0, + }, ++ { .option = "fips-mode-rchecksum", ++ .key = "storage.fips-mode-rchecksum", ++ .type = NO_DOC, ++ .voltype = "storage/posix", ++ .op_version = GD_OP_VERSION_4_0_0, ++ }, + { .key = "storage.bd-aio", + .voltype = "storage/bd", + .op_version = GD_OP_VERSION_RHS_3_0 +diff --git a/xlators/protocol/server/src/server-common.c b/xlators/protocol/server/src/server-common.c +index 9c38706..ce33089 100644 +--- a/xlators/protocol/server/src/server-common.c ++++ b/xlators/protocol/server/src/server-common.c +@@ -298,7 +298,7 @@ server_post_rchecksum (gfs3_rchecksum_rsp *rsp, uint32_t weak_checksum, + rsp->weak_checksum = weak_checksum; + + rsp->strong_checksum.strong_checksum_val = (char *)strong_checksum; +- rsp->strong_checksum.strong_checksum_len = SHA256_DIGEST_LENGTH; ++ rsp->strong_checksum.strong_checksum_len = MD5_DIGEST_LENGTH; + + } + +diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c +index 4e13465..1d3f1ee 100644 +--- a/xlators/storage/posix/src/posix.c ++++ b/xlators/storage/posix/src/posix.c +@@ -7000,7 +7000,9 @@ posix_rchecksum (call_frame_t *frame, xlator_t *this, + ssize_t bytes_read = 0; + int32_t weak_checksum = 0; + int32_t zerofillcheck = 0; ++ unsigned char md5_checksum[MD5_DIGEST_LENGTH] = {0}; + unsigned char strong_checksum[SHA256_DIGEST_LENGTH] = {0}; ++ unsigned char *checksum = NULL; + struct posix_private *priv = NULL; + dict_t *rsp_xdata = NULL; + gf_boolean_t buf_has_zeroes = _gf_false; +@@ -7069,13 +7071,31 @@ posix_rchecksum (call_frame_t *frame, xlator_t *this, + } + } + weak_checksum = gf_rsync_weak_checksum ((unsigned char *) buf, (size_t) ret); +- gf_rsync_strong_checksum ((unsigned char *) buf, (size_t) bytes_read, +- (unsigned char *) strong_checksum); + ++ if (priv->fips_mode_rchecksum) { ++ ret = dict_set_int32 (rsp_xdata, "fips-mode-rchecksum", 1); ++ if (ret) { ++ gf_msg (this->name, GF_LOG_WARNING, -ret, ++ P_MSG_DICT_SET_FAILED, "%s: Failed to set " ++ "dictionary value for key: %s", ++ uuid_utoa (fd->inode->gfid), ++ "fips-mode-rchecksum"); ++ goto out; ++ } ++ checksum = strong_checksum; ++ gf_rsync_strong_checksum ((unsigned char *)buf, ++ (size_t) bytes_read, ++ (unsigned char *)checksum); ++ } else { ++ checksum = md5_checksum; ++ gf_rsync_md5_checksum ((unsigned char *)buf, ++ (size_t) bytes_read, ++ (unsigned char *)checksum); ++ } + op_ret = 0; + out: + STACK_UNWIND_STRICT (rchecksum, frame, op_ret, op_errno, +- weak_checksum, strong_checksum, rsp_xdata); ++ weak_checksum, checksum, rsp_xdata); + if (rsp_xdata) + dict_unref (rsp_xdata); + GF_FREE (alloc_buf); +@@ -7295,6 +7315,9 @@ reconfigure (xlator_t *this, dict_t *options) + GF_OPTION_RECONF ("shared-brick-count", priv->shared_brick_count, + options, int32, out); + ++ GF_OPTION_RECONF ("fips-mode-rchecksum", priv->fips_mode_rchecksum, ++ options, bool, out); ++ + ret = 0; + out: + return ret; +@@ -7953,6 +7976,9 @@ init (xlator_t *this) + + GF_OPTION_INIT ("batch-fsync-delay-usec", _private->batch_fsync_delay_usec, + uint32, out); ++ ++ GF_OPTION_INIT ("fips-mode-rchecksum", _private->fips_mode_rchecksum, ++ bool, out); + out: + return ret; + } +@@ -8182,5 +8208,12 @@ struct volume_options options[] = { + " Useful for displaying the proper usable size through statvfs() " + "call (df command)", + }, ++ { ++ .key = {"fips-mode-rchecksum"}, ++ .type = GF_OPTION_TYPE_BOOL, ++ .default_value = "off", ++ .description = "If enabled, posix_rchecksum uses the FIPS compliant" ++ "SHA256 checksum. MD5 otherwise." ++ }, + { .key = {NULL} } + }; +diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h +index eaf4d0d..bda4172 100644 +--- a/xlators/storage/posix/src/posix.h ++++ b/xlators/storage/posix/src/posix.h +@@ -227,6 +227,8 @@ struct posix_private { + /* Option to handle the cases of multiple bricks exported from + same backend. Very much usable in brick-splitting feature. */ + int32_t shared_brick_count; ++ ++ gf_boolean_t fips_mode_rchecksum; + }; + + typedef struct { +-- +1.8.3.1 + diff --git a/SOURCES/0379-glusterd-change-op-version-of-fips-mode-rchecksum.patch b/SOURCES/0379-glusterd-change-op-version-of-fips-mode-rchecksum.patch new file mode 100644 index 0000000..de4efa7 --- /dev/null +++ b/SOURCES/0379-glusterd-change-op-version-of-fips-mode-rchecksum.patch @@ -0,0 +1,54 @@ +From 715d2215da855089245ae7b8c3af719e488a2908 Mon Sep 17 00:00:00 2001 +From: Ravishankar N +Date: Thu, 20 Sep 2018 22:01:05 +0530 +Subject: [PATCH 379/385] glusterd: change op-version of fips-mode-rchecksum + +..to GD_OP_VERSION_3_13_3 since GD_OP_VERSION_4_0_0 is not present in +rhgs-3.4.1 + +Label: DOWNSTREAM ONLY + +Change-Id: I759272748177d174b15123faffc2305f7a5ec58f +BUG: 1459709 +Signed-off-by: Ravishankar N +Reviewed-on: https://code.engineering.redhat.com/gerrit/150714 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/globals.h | 2 +- + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/libglusterfs/src/globals.h b/libglusterfs/src/globals.h +index e810ea7..213f3ce 100644 +--- a/libglusterfs/src/globals.h ++++ b/libglusterfs/src/globals.h +@@ -109,10 +109,10 @@ + + #define GD_OP_VERSION_3_13_2 31302 /* Op-version for GlusterFS 3.13.2 */ + +-#define GD_OP_VERSION_4_0_0 40000 /* Op-version for GlusterFS 4.0.0 */ + + /* Downstream only change */ + #define GD_OP_VERSION_3_11_2 31102 /* Op-version for RHGS 3.3.1-async */ ++#define GD_OP_VERSION_3_13_3 31303 /* Op-version for RHGS-3.4-Batch Update-1*/ + + #include "xlator.h" + +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index 0ff512d..5a697cf 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -2872,7 +2872,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .key = "storage.fips-mode-rchecksum", + .type = NO_DOC, + .voltype = "storage/posix", +- .op_version = GD_OP_VERSION_4_0_0, ++ .op_version = GD_OP_VERSION_3_13_3, + }, + { .key = "storage.bd-aio", + .voltype = "storage/bd", +-- +1.8.3.1 + diff --git a/SOURCES/0380-cluster-afr-Batch-writes-in-same-lock-even-when-mult.patch b/SOURCES/0380-cluster-afr-Batch-writes-in-same-lock-even-when-mult.patch new file mode 100644 index 0000000..41a9838 --- /dev/null +++ b/SOURCES/0380-cluster-afr-Batch-writes-in-same-lock-even-when-mult.patch @@ -0,0 +1,73 @@ +From 8f270fceed4c03ab4405ae65e7e9ce7fce9e008c Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K +Date: Thu, 6 Sep 2018 15:09:42 +0530 +Subject: [PATCH 380/385] cluster/afr: Batch writes in same lock even when + multiple fds are open + +Problem: +When eager-lock is disabled because of multiple-fds opened and app +writes come on conflicting regions, the number of locks grows very +fast leading to all the CPU being spent just in locking and unlocking +by traversing huge queues in locks xlator for granting locks. + +Fix: +Reduce the number of locks in transit by bundling the writes in the +same lock and disable delayed piggy-pack when we learn that multiple +fds are open on the file. This will reduce the size of queues in the +locks xlator. This also reduces the number of network calls like +inodelk/fxattrop. + +Please note that this problem can still happen if eager-lock is +disabled as the writes will not be bundled in the same lock. + +Upstream-patch: https://review.gluster.org/c/glusterfs/+/21107 +BUG: 1630688 +Change-Id: I8fd1cf229aed54ce5abd4e6226351a039924dd91 +Signed-off-by: Pranith Kumar K +Reviewed-on: https://code.engineering.redhat.com/gerrit/150700 +Tested-by: RHGS Build Bot +Reviewed-by: Ravishankar Narayanankutty +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/cluster/afr/src/afr-transaction.c | 11 ++--------- + 1 file changed, 2 insertions(+), 9 deletions(-) + +diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c +index 3f55070..85b00a8 100644 +--- a/xlators/cluster/afr/src/afr-transaction.c ++++ b/xlators/cluster/afr/src/afr-transaction.c +@@ -2241,9 +2241,6 @@ __need_previous_lock_unlocked (afr_local_t *local) + { + afr_lock_t *lock = NULL; + +- if (!local->transaction.eager_lock_on) +- return _gf_true; +- + lock = &local->inode_ctx->lock[local->transaction.type]; + if (!lock->acquired) + return _gf_false; +@@ -2260,10 +2257,8 @@ __afr_eager_lock_handle (afr_local_t *local, gf_boolean_t *take_lock, + afr_local_t *owner_local = NULL; + xlator_t *this = local->transaction.frame->this; + +- if (local->fd && !afr_are_multiple_fds_opened (local, this)) { +- local->transaction.eager_lock_on = _gf_true; +- afr_set_lk_owner (local->transaction.frame, this, local->inode); +- } ++ local->transaction.eager_lock_on = _gf_true; ++ afr_set_lk_owner (local->transaction.frame, this, local->inode); + + lock = &local->inode_ctx->lock[local->transaction.type]; + if (__need_previous_lock_unlocked (local)) { +@@ -2282,8 +2277,6 @@ __afr_eager_lock_handle (afr_local_t *local, gf_boolean_t *take_lock, + lock->delay_timer = NULL; + } + } +- if (!local->transaction.eager_lock_on) +- goto out; + } + + if (lock->release) { +-- +1.8.3.1 + diff --git a/SOURCES/0381-cluster-afr-Make-data-eager-lock-decision-based-on-n.patch b/SOURCES/0381-cluster-afr-Make-data-eager-lock-decision-based-on-n.patch new file mode 100644 index 0000000..1145162 --- /dev/null +++ b/SOURCES/0381-cluster-afr-Make-data-eager-lock-decision-based-on-n.patch @@ -0,0 +1,204 @@ +From edc4297530eeb4107477a607042edeb2ce2ccca8 Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K +Date: Tue, 18 Sep 2018 12:15:57 +0530 +Subject: [PATCH 381/385] cluster/afr: Make data eager-lock decision based on + number of locks + +For both Virt and block workloads the file is opened multiple times +leading to dynamically setting eager-lock to off for the workload. +Instead of depending on the number-of-open-fds, if we change the +logic to depend on number of inodelks, then it will give better +performance than the earlier logic. When there is an eager-lock +and number of inodelks is more than 1 we know that there is a +conflicting lock, so depend on that information to decide whether +to keep the current transaction go through delayed-post-op or not. + +Locks xlator doesn't have implementation to query number of locks in +fxattrop in releases older than 3.10 so to keep things backward +compatible in 3.12, data transactions will use new logic where as +fxattrop transactions will use old logic. I am planning to send one +more patch which makes metadata domain locks also depend on +inodelk-count + +Profile info for a dd of 500MB to a file with another fd opened +on the file using exec 250>filename + +Without this patch: + 0.14 67.41 us 16.72 us 3870.82 us 892 FINODELK + 0.59 279.87 us 95.71 us 2085.89 us 898 FXATTROP + 3.46 366.43 us 81.75 us 6952.79 us 4000 WRITE +95.79 148733.99 us 50568.12 us 919127.86 us 273 FSYNC + +With this patch: + 0.00 51.01 us 38.07 us 80.16 us 4 FINODELK + 0.00 235.43 us 235.43 us 235.43 us 1 TRUNCATE + 0.00 125.07 us 56.80 us 193.33 us 2 GETXATTR + 0.00 135.86 us 62.13 us 209.59 us 2 INODELK + 0.00 197.88 us 155.39 us 253.90 us 4 FXATTROP + 0.00 450.59 us 394.28 us 506.89 us 2 XATTROP + 0.00 56.96 us 19.06 us 406.59 us 23 FLUSH +37.81 273648.93 us 48.43 us 6017657.05 us 44 LOOKUP +62.18 4951.86 us 93.80 us 1143154.75 us 3999 WRITE + +postgresql benchmark performance changed from ~1130 TPS to ~2300TPS +randio fio job inside Ovirt based VM went from ~600IOPs to ~2000IOPS + +Upstream-Patch: https://review.gluster.org/c/glusterfs/+/21210 +BUG: 1630688 +Change-Id: If7f7388d2f08cf7f17ca517a4ea222560661dc36 +Signed-off-by: Pranith Kumar K +Reviewed-on: https://code.engineering.redhat.com/gerrit/150701 +Tested-by: RHGS Build Bot +Reviewed-by: Karthik Subrahmanya +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/cluster/afr/src/afr-inode-write.c | 26 ++++++++++++++++++++++++-- + xlators/cluster/afr/src/afr-transaction.c | 27 +++++++++++++++++++-------- + xlators/cluster/afr/src/afr.h | 8 ++++++++ + 3 files changed, 51 insertions(+), 10 deletions(-) + +diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c +index 9e6ba35..8b1dcfd 100644 +--- a/xlators/cluster/afr/src/afr-inode-write.c ++++ b/xlators/cluster/afr/src/afr-inode-write.c +@@ -300,6 +300,7 @@ afr_inode_write_fill (call_frame_t *frame, xlator_t *this, int child_index, + afr_local_t *local = frame->local; + uint32_t open_fd_count = 0; + uint32_t write_is_append = 0; ++ int32_t num_inodelks = 0; + + LOCK (&frame->lock); + { +@@ -318,10 +319,19 @@ afr_inode_write_fill (call_frame_t *frame, xlator_t *this, int child_index, + &open_fd_count); + if (ret < 0) + goto unlock; +- if (open_fd_count > local->open_fd_count) { ++ if (open_fd_count > local->open_fd_count) { + local->open_fd_count = open_fd_count; + local->update_open_fd_count = _gf_true; +- } ++ } ++ ++ ret = dict_get_int32(xdata, GLUSTERFS_INODELK_COUNT, ++ &num_inodelks); ++ if (ret < 0) ++ goto unlock; ++ if (num_inodelks > local->num_inodelks) { ++ local->num_inodelks = num_inodelks; ++ local->update_num_inodelks = _gf_true; ++ } + } + unlock: + UNLOCK (&frame->lock); +@@ -331,6 +341,7 @@ void + afr_process_post_writev (call_frame_t *frame, xlator_t *this) + { + afr_local_t *local = NULL; ++ afr_lock_t *lock = NULL; + + local = frame->local; + +@@ -349,6 +360,11 @@ afr_process_post_writev (call_frame_t *frame, xlator_t *this) + + if (local->update_open_fd_count) + local->inode_ctx->open_fd_count = local->open_fd_count; ++ if (local->update_num_inodelks && ++ local->transaction.type == AFR_DATA_TRANSACTION) { ++ lock = &local->inode_ctx->lock[local->transaction.type]; ++ lock->num_inodelks = local->num_inodelks; ++ } + + } + +@@ -534,6 +550,12 @@ afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, + goto out; + } + ++ if (dict_set_str(local->xdata_req, GLUSTERFS_INODELK_DOM_COUNT, ++ this->name)) { ++ op_errno = ENOMEM; ++ goto out; ++ } ++ + if (dict_set_uint32 (local->xdata_req, GLUSTERFS_WRITE_IS_APPEND, 4)) { + op_errno = ENOMEM; + goto out; +diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c +index 85b00a8..0a67a83 100644 +--- a/xlators/cluster/afr/src/afr-transaction.c ++++ b/xlators/cluster/afr/src/afr-transaction.c +@@ -1858,17 +1858,28 @@ afr_internal_lock_finish (call_frame_t *frame, xlator_t *this) + } + + gf_boolean_t +-afr_are_multiple_fds_opened (afr_local_t *local, xlator_t *this) ++afr_are_conflicting_ops_waiting(afr_local_t *local, xlator_t *this) + { ++ afr_lock_t *lock = NULL; ++ lock = &local->inode_ctx->lock[local->transaction.type]; ++ + /* Lets say mount1 has eager-lock(full-lock) and after the eager-lock +- * is taken mount2 opened the same file, it won't be able to +- * perform any data operations until mount1 releases eager-lock. +- * To avoid such scenario do not enable eager-lock for this transaction +- * if open-fd-count is > 1 ++ * is taken mount2 opened the same file, it won't be able to perform ++ * any {meta,}data operations until mount1 releases eager-lock. To ++ * avoid such scenario do not enable eager-lock for this transaction if ++ * open-fd-count is > 1 for metadata transactions and if ++ * num-inodelks > 1 for data transactions + */ + +- if (local->inode_ctx->open_fd_count > 1) +- return _gf_true; ++ if (local->transaction.type == AFR_METADATA_TRANSACTION) { ++ if (local->inode_ctx->open_fd_count > 1) { ++ return _gf_true; ++ } ++ } else if (local->transaction.type == AFR_DATA_TRANSACTION) { ++ if (lock->num_inodelks > 1) { ++ return _gf_true; ++ } ++ } + + return _gf_false; + } +@@ -1890,7 +1901,7 @@ afr_is_delayed_changelog_post_op_needed (call_frame_t *frame, xlator_t *this, + goto out; + } + +- if (afr_are_multiple_fds_opened (local, this)) { ++ if (afr_are_conflicting_ops_waiting(local, this)) { + lock->release = _gf_true; + goto out; + } +diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h +index 76ad292..afe4a73 100644 +--- a/xlators/cluster/afr/src/afr.h ++++ b/xlators/cluster/afr/src/afr.h +@@ -302,6 +302,12 @@ typedef enum { + } afr_fop_lock_state_t; + + typedef struct _afr_inode_lock_t { ++ /* @num_inodelks: ++ Number of inodelks queried from the server, as queried through ++ xdata in FOPs. Currently, used to decide if eager-locking must be ++ temporarily disabled. ++ */ ++ int32_t num_inodelks; + unsigned int event_generation; + gf_boolean_t release; + gf_boolean_t acquired; +@@ -354,6 +360,8 @@ typedef struct _afr_local { + + uint32_t open_fd_count; + gf_boolean_t update_open_fd_count; ++ int32_t num_inodelks; ++ gf_boolean_t update_num_inodelks; + + gf_lkowner_t saved_lk_owner; + +-- +1.8.3.1 + diff --git a/SOURCES/0382-mount-fuse-make-fuse-dumping-available-as-mount-opti.patch b/SOURCES/0382-mount-fuse-make-fuse-dumping-available-as-mount-opti.patch new file mode 100644 index 0000000..6c1a6ad --- /dev/null +++ b/SOURCES/0382-mount-fuse-make-fuse-dumping-available-as-mount-opti.patch @@ -0,0 +1,60 @@ +From 0ac638b415be75c51716e8439f2a6459fd5b999b Mon Sep 17 00:00:00 2001 +From: Csaba Henk +Date: Thu, 3 May 2018 13:35:04 +0200 +Subject: [PATCH 382/385] mount,fuse: make fuse dumping available as mount + option + +Upsteam-patch: https://review.gluster.org/#/c/glusterfs/+/19955/ +BUG: 1627098 +Change-Id: I4dd4d0e607f89650ebb74b893b911b554472826d +Signed-off-by: Csaba Henk +Reviewed-on: https://code.engineering.redhat.com/gerrit/150711 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + doc/mount.glusterfs.8 | 3 +++ + xlators/mount/fuse/utils/mount.glusterfs.in | 7 +++++++ + 2 files changed, 10 insertions(+) + +diff --git a/doc/mount.glusterfs.8 b/doc/mount.glusterfs.8 +index e16bbec..95aad02 100644 +--- a/doc/mount.glusterfs.8 ++++ b/doc/mount.glusterfs.8 +@@ -128,6 +128,9 @@ disable root squashing for the trusted client [default: off] + \fBroot\-squash=\fRBOOL + enable root squashing for the trusted client [default: on] + .TP ++\fBdump\-fuse=\fRPATH ++Dump fuse traffic to PATH ++.TP + \fBuse\-readdirp=\fRBOOL + Use readdirp() mode in fuse kernel module [default: on] + .PP +diff --git a/xlators/mount/fuse/utils/mount.glusterfs.in b/xlators/mount/fuse/utils/mount.glusterfs.in +index 36b60ff..b39bb98 100755 +--- a/xlators/mount/fuse/utils/mount.glusterfs.in ++++ b/xlators/mount/fuse/utils/mount.glusterfs.in +@@ -261,6 +261,10 @@ start_glusterfs () + cmd_line=$(echo "$cmd_line --xlator-option=$xlator_option"); + fi + ++ if [ -n "$dump_fuse" ]; then ++ cmd_line=$(echo "$cmd_line --dump-fuse=$dump_fuse"); ++ fi ++ + # if trasnport type is specified, we have to append it to + # volume name, so that it fetches the right client vol file + +@@ -486,6 +490,9 @@ with_options() + "use-readdirp") + use_readdirp=$value + ;; ++ "dump-fuse") ++ dump_fuse=$value ++ ;; + "event-history") + event_history=$value + ;; +-- +1.8.3.1 + diff --git a/SOURCES/0383-glusterd-bump-up-GD_OP_VERSION_MAX.patch b/SOURCES/0383-glusterd-bump-up-GD_OP_VERSION_MAX.patch new file mode 100644 index 0000000..4130814 --- /dev/null +++ b/SOURCES/0383-glusterd-bump-up-GD_OP_VERSION_MAX.patch @@ -0,0 +1,33 @@ +From 18e3135fba208022430206f366f6705476c66304 Mon Sep 17 00:00:00 2001 +From: Ravishankar N +Date: Fri, 21 Sep 2018 15:47:48 +0530 +Subject: [PATCH 383/385] glusterd: bump up GD_OP_VERSION_MAX + +...to GD_OP_VERSION_3_13_3 for rhgs-3.4 batch update 1. + +Change-Id: I2329cd931ff3a1daea008b03f20f93eb4bb374be +BUG: 1459709 +Signed-off-by: Ravishankar N +Reviewed-on: https://code.engineering.redhat.com/gerrit/150724 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + libglusterfs/src/globals.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/libglusterfs/src/globals.h b/libglusterfs/src/globals.h +index 213f3ce..8e218cb 100644 +--- a/libglusterfs/src/globals.h ++++ b/libglusterfs/src/globals.h +@@ -43,7 +43,7 @@ + */ + #define GD_OP_VERSION_MIN 1 /* MIN is the fresh start op-version, mostly + should not change */ +-#define GD_OP_VERSION_MAX GD_OP_VERSION_3_13_2 /* MAX VERSION is the maximum ++#define GD_OP_VERSION_MAX GD_OP_VERSION_3_13_3 /* MAX VERSION is the maximum + count in VME table, should + keep changing with + introduction of newer +-- +1.8.3.1 + diff --git a/SOURCES/0384-features-uss-Use-xxh64-to-generate-gfid-instead-of-m.patch b/SOURCES/0384-features-uss-Use-xxh64-to-generate-gfid-instead-of-m.patch new file mode 100644 index 0000000..72fd008 --- /dev/null +++ b/SOURCES/0384-features-uss-Use-xxh64-to-generate-gfid-instead-of-m.patch @@ -0,0 +1,356 @@ +From adc074704603822bed91e2714d93501e926c05a3 Mon Sep 17 00:00:00 2001 +From: Raghavendra Bhat +Date: Fri, 21 Sep 2018 11:25:08 -0400 +Subject: [PATCH 384/385] features/uss: Use xxh64 to generate gfid instead of + md5sum + +* This is to ensure FIPS support +* Also changed the signature of svs_uuid_generate to + get xlator argument +* Added xxh64 wrapper functions in common-utils to + generate gfid using xxh64 + - Those wrapper functions can be used by other xlators + as well to generate gfids using xxh64. But as of now + snapview-server is going to be the only consumer. + +backport of following 2 patches from upstream: +https://review.gluster.org/#/c/glusterfs/+/21122/ +https://review.gluster.org/#/c/glusterfs/+/20983/ + +Change-Id: I35047cd1911d7a6ebe036d699d57fbdcd364ec8f +BUG: 1459709 +Signed-off-by: Raghavendra Bhat +Reviewed-on: https://code.engineering.redhat.com/gerrit/150759 +Reviewed-by: Rafi Kavungal Chundattu Parambil +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/common-utils.c | 172 +++++++++++++++++++++ + libglusterfs/src/common-utils.h | 1 + + libglusterfs/src/libglusterfs-messages.h | 11 +- + .../snapview-server/src/snapview-server-helpers.c | 38 +++-- + .../features/snapview-server/src/snapview-server.c | 15 +- + .../features/snapview-server/src/snapview-server.h | 5 +- + 6 files changed, 227 insertions(+), 15 deletions(-) + +diff --git a/libglusterfs/src/common-utils.c b/libglusterfs/src/common-utils.c +index f632e78..54ef875 100644 +--- a/libglusterfs/src/common-utils.c ++++ b/libglusterfs/src/common-utils.c +@@ -91,6 +91,178 @@ gf_xxh64_wrapper(const unsigned char *data, size_t len, unsigned long long seed, + snprintf(xxh64 + i * 2, lim-i*2, "%02x", p[i]); + } + ++/** ++ * This function takes following arguments ++ * @this: xlator ++ * @gfid: The gfid which has to be filled ++ * @hash: the 8 byte hash which has to be filled inside the gfid ++ * @index: the array element of the uuid_t structure (which is ++ * a array of unsigned char) from where the 8 bytes of ++ * the hash has to be filled. Since uuid_t contains 16 ++ * char elements in the array, each byte of the hash has ++ * to be filled in one array element. ++ * ++ * This function is called twice for 2 hashes (of 8 byte each) to ++ * be filled in the gfid. ++ * ++ * The for loop in this function actually is doing these 2 things ++ * for each hash ++ * ++ * 1) One of the hashes ++ * tmp[0] = (hash_2 >> 56) & 0xff; ++ * tmp[1] = (hash_2 >> 48) & 0xff; ++ * tmp[2] = (hash_2 >> 40) & 0xff; ++ * tmp[3] = (hash_2 >> 32) & 0xff; ++ * tmp[4] = (hash_2 >> 24) & 0xff; ++ * tmp[5] = (hash_2 >> 16) & 0xff; ++ * tmp[6] = (hash_2 >> 8) & 0xff; ++ * tmp[7] = (hash_2) & 0xff; ++ * ++ * 2) The other hash: ++ * tmp[8] = (hash_1 >> 56) & 0xff; ++ * tmp[9] = (hash_1 >> 48) & 0xff; ++ * tmp[10] = (hash_1 >> 40) & 0xff; ++ * tmp[11] = (hash_1 >> 32) & 0xff; ++ * tmp[12] = (hash_1 >> 24) & 0xff; ++ * tmp[13] = (hash_1 >> 16) & 0xff; ++ * tmp[14] = (hash_1 >> 8) & 0xff; ++ * tmp[15] = (hash_1) & 0xff; ++ **/ ++static int ++gf_gfid_from_xxh64 (xlator_t *this, uuid_t gfid, GF_XXH64_hash_t hash, ++ unsigned short index) ++{ ++ int ret = -1; ++ int i = -1; ++ ++ if ((index != 0) && (index != 8)) { ++ gf_msg_callingfn ("gfid-from-xxh64", GF_LOG_WARNING, 0, ++ LG_MSG_INDEX_NOT_FOUND, ++ "index can only be either 0 or 8, as this" ++ "function's purpose is to encode a 8 byte " ++ "hash inside the gfid (index: %d)", ++ index); ++ goto out; ++ } ++ ++ for (i = 0; i < sizeof(hash); i++) { ++ /* ++ * As of now the below statement is equivalent of this. ++ * gfid[index+i] = (hash >> (64 - (8 * (i+1)))) & 0xff; ++ */ ++ gfid[index + i] = (hash >> ((sizeof(hash) * 8) - (8 * (i + 1)))) & ++ (0xff); ++ } ++ ++ ret = 0; ++ ++out: ++ return ret; ++} ++ ++/** ++ * This function does the same thing as gf_xxh64_wrapper. But gf_xxh64_wrapper ++ * does not return anything and in this xlator there is a need for both the ++ * actual hash and the canonicalized form of the hash. ++ * ++ * To summarize: ++ * - XXH64_hash_t is needed as return because, those bytes which contain the ++ * hash can be used for different purposes as needed. One example is ++ * to have those bytes copied into the uuid_t structure to be used as gfid ++ * - xxh64 string is needed because, it can be used as the key for generating ++ * the next hash (and any other purpose which might require canonical form ++ * of the hash). ++ **/ ++GF_XXH64_hash_t ++gf_xxh64_hash_wrapper (const unsigned char *data, size_t const len, ++ unsigned long long const seed, char *xxh64) ++{ ++ unsigned short i = 0; ++ const unsigned short lim = GF_XXH64_DIGEST_LENGTH * 2 + 1; ++ GF_XXH64_hash_t hash = 0; ++ GF_XXH64_canonical_t c_hash = {{0,},}; ++ const uint8_t *p = (const uint8_t *)&c_hash; ++ ++ hash = GF_XXH64 (data, len, seed); ++ GF_XXH64_canonicalFromHash (&c_hash, hash); ++ ++ for (i = 0; i < GF_XXH64_DIGEST_LENGTH; i++) ++ snprintf (xxh64 + i * 2, lim - i * 2, "%02x", p[i]); ++ ++ return hash; ++} ++ ++/** ++ * This is the algorithm followed for generating new gfid ++ * 1) generate xxh64 hash using snapname and original gfid of the object ++ * 2) Using the canonicalized form of above hash as the key, generate ++ * another hash ++ * 3) Combine both of the 8 byte hashes to generate a 16 byte uuid_t type ++ * 4) Use the above uuid as the gfid ++ * ++ * Each byte of the hash is stored separately in different elements of the ++ * character array represented by uuid_t ++ * Ex: tmp[0] = (hash_2 >> 56) & 0xFF ++ * This saves the most significant byte of hash_2 in tmp[0] ++ * tmp[1] = (hash_2 >> 48) & 0xFF ++ * This saves next most significant byte of hash_2 in tmp[1] ++ * . ++ * . ++ * So on. ++ * tmp[0] - tmp[7] holds the contents of hash_2 ++ * tmp[8] - tmp[15] hold the conents of hash_1 ++ * ++ * The hash generated (i.e. of type XXH64_hash_t) is 8 bytes long. And for ++ * gfid 16 byte uuid is needed. Hecne the 2 hashes are combined to form ++ * one 16 byte entity. ++ **/ ++int ++gf_gfid_generate_from_xxh64(uuid_t gfid, char *key) ++{ ++ char xxh64_1[GF_XXH64_DIGEST_LENGTH * 2 + 1] = {0,}; ++ char xxh64_2[GF_XXH64_DIGEST_LENGTH * 2 + 1] = {0}; ++ GF_XXH64_hash_t hash_1 = 0; ++ GF_XXH64_hash_t hash_2 = 0; ++ int ret = -1; ++ xlator_t *this = THIS; ++ ++ hash_1 = gf_xxh64_hash_wrapper ((unsigned char *)key, strlen (key), ++ GF_XXHSUM64_DEFAULT_SEED, xxh64_1); ++ ++ hash_2 = gf_xxh64_hash_wrapper ((unsigned char *)xxh64_1, strlen (xxh64_1), ++ GF_XXHSUM64_DEFAULT_SEED, xxh64_2); ++ ++ /* hash_2 is saved in 1st 8 elements of uuid_t char array */ ++ if (gf_gfid_from_xxh64 (this, gfid, hash_2, 0)) { ++ gf_msg_callingfn (this->name, GF_LOG_WARNING, 0, ++ LG_MSG_XXH64_TO_GFID_FAILED, ++ "failed to encode the hash %llx into the 1st" ++ "half of gfid", ++ hash_2); ++ goto out; ++ } ++ ++ /* hash_1 is saved in the remaining 8 elements of uuid_t */ ++ if (gf_gfid_from_xxh64 (this, gfid, hash_1, 8)) { ++ gf_msg_callingfn (this->name, GF_LOG_WARNING, 0, ++ LG_MSG_XXH64_TO_GFID_FAILED, ++ "failed to encode the hash %llx into the 2nd" ++ "half of gfid", ++ hash_1); ++ goto out; ++ } ++ ++ gf_msg_debug (this->name, 0, ++ "gfid generated is %s (hash1: %llx) " ++ "hash2: %llx, xxh64_1: %s xxh64_2: %s", ++ uuid_utoa (gfid), hash_1, hash_2, xxh64_1, xxh64_2); ++ ++ ret = 0; ++ ++out: ++ return ret; ++} ++ + /* works similar to mkdir(1) -p. + */ + int +diff --git a/libglusterfs/src/common-utils.h b/libglusterfs/src/common-utils.h +index da943f4..e64dea3 100644 +--- a/libglusterfs/src/common-utils.h ++++ b/libglusterfs/src/common-utils.h +@@ -837,6 +837,7 @@ gf_boolean_t gf_is_local_addr (char *hostname); + gf_boolean_t gf_is_same_address (char *host1, char *host2); + void gf_xxh64_wrapper(const unsigned char *data, size_t len, + unsigned long long seed, char *xxh64); ++int gf_gfid_generate_from_xxh64(uuid_t gfid, char *key); + int gf_set_timestamp (const char *src, const char* dest); + + int gf_thread_create (pthread_t *thread, const pthread_attr_t *attr, +diff --git a/libglusterfs/src/libglusterfs-messages.h b/libglusterfs/src/libglusterfs-messages.h +index dd65701..9e35897 100644 +--- a/libglusterfs/src/libglusterfs-messages.h ++++ b/libglusterfs/src/libglusterfs-messages.h +@@ -37,7 +37,7 @@ + + #define GLFS_LG_BASE GLFS_MSGID_COMP_LIBGLUSTERFS + +-#define GLFS_LG_NUM_MESSAGES 211 ++#define GLFS_LG_NUM_MESSAGES 212 + + #define GLFS_LG_MSGID_END (GLFS_LG_BASE + GLFS_LG_NUM_MESSAGES + 1) + /* Messaged with message IDs */ +@@ -1809,6 +1809,15 @@ + * @recommendedaction + * + */ ++ ++#define LG_MSG_XXH64_TO_GFID_FAILED (GLFS_LG_BASE + 212) ++ ++/*! ++ * @messageid ++ * @diagnosis ++ * @recommendedaction ++ * ++ */ + /*------------*/ + + #define glfs_msg_end_lg GLFS_LG_MSGID_END, "Invalid: End of messages" +diff --git a/xlators/features/snapview-server/src/snapview-server-helpers.c b/xlators/features/snapview-server/src/snapview-server-helpers.c +index 6f305db..2ad74ef 100644 +--- a/xlators/features/snapview-server/src/snapview-server-helpers.c ++++ b/xlators/features/snapview-server/src/snapview-server-helpers.c +@@ -329,18 +329,38 @@ out: + return svs_fd; + } + +-void +-svs_uuid_generate (uuid_t gfid, char *snapname, uuid_t origin_gfid) ++int ++svs_uuid_generate (xlator_t *this, uuid_t gfid, char *snapname, ++ uuid_t origin_gfid) + { +- unsigned char md5_sum[MD5_DIGEST_LENGTH] = {0}; +- char ino_string[NAME_MAX + 32] = ""; ++ char ino_string[NAME_MAX + 32] = ""; ++ uuid_t tmp = {0,}; ++ int ret = -1; ++ ++ GF_VALIDATE_OR_GOTO ("snapview-server", this, out); ++ GF_VALIDATE_OR_GOTO (this->name, snapname, out); ++ ++ (void)snprintf (ino_string, sizeof (ino_string), "%s%s", snapname, ++ uuid_utoa (origin_gfid)); ++ ++ if (gf_gfid_generate_from_xxh64 (tmp, ino_string)) { ++ gf_log (this->name, GF_LOG_WARNING, ++ "failed to generate " ++ "gfid for object with actual gfid of %s " ++ "(snapname: %s, key: %s)", ++ uuid_utoa (origin_gfid), snapname, ino_string); ++ goto out; ++ } ++ ++ gf_uuid_copy (gfid, tmp); + +- GF_ASSERT (snapname); ++ ret = 0; ++ ++ gf_log (this->name, GF_LOG_DEBUG, "gfid generated is %s ", ++ uuid_utoa(gfid)); + +- (void) snprintf (ino_string, sizeof (ino_string), "%s%s", +- snapname, uuid_utoa(origin_gfid)); +- MD5((unsigned char *)ino_string, strlen(ino_string), md5_sum); +- gf_uuid_copy (gfid, md5_sum); ++out: ++ return ret; + } + + void +diff --git a/xlators/features/snapview-server/src/snapview-server.c b/xlators/features/snapview-server/src/snapview-server.c +index 22b0e1c..7068c5a 100644 +--- a/xlators/features/snapview-server/src/snapview-server.c ++++ b/xlators/features/snapview-server/src/snapview-server.c +@@ -330,9 +330,18 @@ svs_lookup_entry (xlator_t *this, loc_t *loc, struct iatt *buf, + } + + if (gf_uuid_is_null (loc->gfid) && +- gf_uuid_is_null (loc->inode->gfid)) +- svs_uuid_generate (gfid, parent_ctx->snapname, object->gfid); +- else { ++ gf_uuid_is_null (loc->inode->gfid)) { ++ if (svs_uuid_generate (this, gfid, parent_ctx->snapname, ++ object->gfid)) { ++ /* ++ * should op_errno be something else such as ++ * EINVAL or ESTALE? ++ */ ++ op_ret = -1; ++ *op_errno = EIO; ++ goto out; ++ } ++ } else { + if (!gf_uuid_is_null (loc->inode->gfid)) + gf_uuid_copy (gfid, loc->inode->gfid); + else +diff --git a/xlators/features/snapview-server/src/snapview-server.h b/xlators/features/snapview-server/src/snapview-server.h +index a12319f..73524a5 100644 +--- a/xlators/features/snapview-server/src/snapview-server.h ++++ b/xlators/features/snapview-server/src/snapview-server.h +@@ -204,8 +204,9 @@ __svs_fd_ctx_get_or_new (xlator_t *this, fd_t *fd); + svs_fd_t * + svs_fd_ctx_get_or_new (xlator_t *this, fd_t *fd); + +-void +-svs_uuid_generate (uuid_t gfid, char *snapname, uuid_t origin_gfid); ++int ++svs_uuid_generate (xlator_t *this, uuid_t gfid, char *snapname, ++ uuid_t origin_gfid); + + void + svs_fill_ino_from_gfid (struct iatt *buf); +-- +1.8.3.1 + diff --git a/SOURCES/0385-afr-fix-incorrect-reporting-of-directory-split-brain.patch b/SOURCES/0385-afr-fix-incorrect-reporting-of-directory-split-brain.patch new file mode 100644 index 0000000..4cfdcc4 --- /dev/null +++ b/SOURCES/0385-afr-fix-incorrect-reporting-of-directory-split-brain.patch @@ -0,0 +1,312 @@ +From 43124cc2d3ddb1b3262e254326d8461cdf1f894d Mon Sep 17 00:00:00 2001 +From: Ravishankar N +Date: Fri, 21 Sep 2018 18:23:34 +0530 +Subject: [PATCH 385/385] afr: fix incorrect reporting of directory split-brain + +Patch on master: https://review.gluster.org/#/c/glusterfs/+/21135/ +Problem: +When a directory has dirty xattrs due to failed post-ops or when +replace/reset brick is performed, AFR does a conservative merge as +expected, but heal-info reports it as split-brain because there are no +clear sources. + +Fix: +Modify pending flag to contain information about pending heals and +split-brains. For directories, if spit-brain flag is not set,just show +them as needing heal and not being in split-brain. + +Change-Id: I21460bb5375297e421a14efb9ef0f9cea46f7e7c +BUG: 1610743 +Signed-off-by: Ravishankar N +Reviewed-on: https://code.engineering.redhat.com/gerrit/150742 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + tests/afr.rc | 2 +- + .../bugs/replicate/bug-1626994-info-split-brain.t | 62 ++++++++++++++++++++++ + xlators/cluster/afr/src/afr-common.c | 14 ++--- + xlators/cluster/afr/src/afr-self-heal-common.c | 6 ++- + xlators/cluster/afr/src/afr-self-heal-data.c | 2 +- + xlators/cluster/afr/src/afr-self-heal-entry.c | 2 +- + xlators/cluster/afr/src/afr-self-heal-metadata.c | 3 +- + xlators/cluster/afr/src/afr-self-heal.h | 8 +-- + xlators/cluster/afr/src/afr.h | 3 ++ + 9 files changed, 85 insertions(+), 17 deletions(-) + create mode 100644 tests/bugs/replicate/bug-1626994-info-split-brain.t + +diff --git a/tests/afr.rc b/tests/afr.rc +index bdf4075..1fd0310 100644 +--- a/tests/afr.rc ++++ b/tests/afr.rc +@@ -2,7 +2,7 @@ + + function create_brick_xattrop_entry { + local xattrop_dir=$(afr_get_index_path $1) +- local base_entry=`ls $xattrop_dir` ++ local base_entry=`ls $xattrop_dir|grep xattrop` + local gfid_str + local params=`echo "$@" | cut -d' ' -f2-` + echo $params +diff --git a/tests/bugs/replicate/bug-1626994-info-split-brain.t b/tests/bugs/replicate/bug-1626994-info-split-brain.t +new file mode 100644 +index 0000000..86bfecb +--- /dev/null ++++ b/tests/bugs/replicate/bug-1626994-info-split-brain.t +@@ -0,0 +1,62 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../afr.rc ++ ++cleanup; ++ ++# Test to check dirs having dirty xattr do not show up in info split-brain. ++ ++TEST glusterd; ++TEST pidof glusterd; ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}; ++TEST $CLI volume set $V0 self-heal-daemon off ++TEST $CLI volume start $V0; ++EXPECT 'Started' volinfo_field $V0 'Status'; ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2 ++ ++# Create base entry in indices/xattrop ++echo "Data" > $M0/FILE ++rm -f $M0/FILE ++EXPECT "1" count_index_entries $B0/${V0}0 ++EXPECT "1" count_index_entries $B0/${V0}1 ++EXPECT "1" count_index_entries $B0/${V0}2 ++ ++TEST mkdir $M0/dirty_dir ++TEST mkdir $M0/pending_dir ++ ++# Set dirty xattrs on all bricks to simulate the case where entry transaction ++# succeeded only the pre-op phase. ++TEST setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}0/dirty_dir ++TEST setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}1/dirty_dir ++TEST setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}2/dirty_dir ++create_brick_xattrop_entry $B0/${V0}0 dirty_dir ++# Should not show up as split-brain. ++EXPECT "0" afr_get_split_brain_count $V0 ++ ++# replace/reset brick case where the new brick has dirty and the other 2 bricks ++# blame it should not be reported as split-brain. ++TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/${V0}0 ++TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/${V0}1 ++TEST setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}2 ++create_brick_xattrop_entry $B0/${V0}0 "/" ++# Should not show up as split-brain. ++EXPECT "0" afr_get_split_brain_count $V0 ++ ++# Set pending xattrs on all bricks blaming each other to simulate the case of ++# entry split-brain. ++TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}0/pending_dir ++TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/${V0}1/pending_dir ++TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}2/pending_dir ++create_brick_xattrop_entry $B0/${V0}0 pending_dir ++# Should show up as split-brain. ++EXPECT "1" afr_get_split_brain_count $V0 ++ ++cleanup; +diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c +index a85549b..bded6a2 100644 +--- a/xlators/cluster/afr/src/afr-common.c ++++ b/xlators/cluster/afr/src/afr-common.c +@@ -5472,7 +5472,7 @@ out: + int + afr_selfheal_locked_metadata_inspect (call_frame_t *frame, xlator_t *this, + inode_t *inode, gf_boolean_t *msh, +- gf_boolean_t *pending) ++ unsigned char *pending) + { + int ret = -1; + unsigned char *locked_on = NULL; +@@ -5521,7 +5521,7 @@ out: + int + afr_selfheal_locked_data_inspect (call_frame_t *frame, xlator_t *this, + fd_t *fd, gf_boolean_t *dsh, +- gf_boolean_t *pflag) ++ unsigned char *pflag) + { + int ret = -1; + unsigned char *data_lock = NULL; +@@ -5568,7 +5568,7 @@ out: + int + afr_selfheal_locked_entry_inspect (call_frame_t *frame, xlator_t *this, + inode_t *inode, +- gf_boolean_t *esh, gf_boolean_t *pflag) ++ gf_boolean_t *esh, unsigned char *pflag) + { + int ret = -1; + int source = -1; +@@ -5619,7 +5619,7 @@ afr_selfheal_locked_entry_inspect (call_frame_t *frame, xlator_t *this, + sinks, healed_sinks, + locked_replies, + &source, pflag); +- if ((ret == 0) && source < 0) ++ if ((ret == 0) && (*pflag & PFLAG_SBRAIN)) + ret = -EIO; + *esh = afr_decide_heal_info (priv, sources, ret); + } +@@ -5642,7 +5642,7 @@ afr_selfheal_locked_inspect (call_frame_t *frame, xlator_t *this, uuid_t gfid, + gf_boolean_t *entry_selfheal, + gf_boolean_t *data_selfheal, + gf_boolean_t *metadata_selfheal, +- gf_boolean_t *pending) ++ unsigned char *pending) + + { + int ret = -1; +@@ -5730,7 +5730,7 @@ afr_get_heal_info (call_frame_t *frame, xlator_t *this, loc_t *loc) + gf_boolean_t data_selfheal = _gf_false; + gf_boolean_t metadata_selfheal = _gf_false; + gf_boolean_t entry_selfheal = _gf_false; +- gf_boolean_t pending = _gf_false; ++ unsigned char pending = 0; + dict_t *dict = NULL; + int ret = -1; + int op_errno = 0; +@@ -5750,7 +5750,7 @@ afr_get_heal_info (call_frame_t *frame, xlator_t *this, loc_t *loc) + goto out; + } + +- if (pending) { ++ if (pending & PFLAG_PENDING) { + size = strlen ("-pending") + 1; + gf_asprintf (&substr, "-pending"); + if (!substr) +diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c +index 7e6a691..d04f11d 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-common.c ++++ b/xlators/cluster/afr/src/afr-self-heal-common.c +@@ -1545,7 +1545,7 @@ afr_selfheal_find_direction (call_frame_t *frame, xlator_t *this, + afr_transaction_type type, + unsigned char *locked_on, unsigned char *sources, + unsigned char *sinks, uint64_t *witness, +- gf_boolean_t *pflag) ++ unsigned char *pflag) + { + afr_private_t *priv = NULL; + int i = 0; +@@ -1573,7 +1573,7 @@ afr_selfheal_find_direction (call_frame_t *frame, xlator_t *this, + for (i = 0; i < priv->child_count; i++) { + for (j = 0; j < priv->child_count; j++) + if (matrix[i][j]) +- *pflag = _gf_true; ++ *pflag |= PFLAG_PENDING; + if (*pflag) + break; + } +@@ -1655,6 +1655,8 @@ afr_selfheal_find_direction (call_frame_t *frame, xlator_t *this, + if (locked_on[i]) + sinks[i] = 1; + } ++ if (pflag) ++ *pflag |= PFLAG_SBRAIN; + } + + /* One more class of witness similar to dirty in v2 is where no pending +diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c +index 556a8f9..d3deb8f 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-data.c ++++ b/xlators/cluster/afr/src/afr-self-heal-data.c +@@ -624,7 +624,7 @@ __afr_selfheal_data_prepare (call_frame_t *frame, xlator_t *this, + unsigned char *sources, unsigned char *sinks, + unsigned char *healed_sinks, + unsigned char *undid_pending, +- struct afr_reply *replies, gf_boolean_t *pflag) ++ struct afr_reply *replies, unsigned char *pflag) + { + int ret = -1; + int source = -1; +diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c +index f6d3a8a..9f597af 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-entry.c ++++ b/xlators/cluster/afr/src/afr-self-heal-entry.c +@@ -496,7 +496,7 @@ __afr_selfheal_entry_prepare (call_frame_t *frame, xlator_t *this, + unsigned char *sources, unsigned char *sinks, + unsigned char *healed_sinks, + struct afr_reply *replies, int *source_p, +- gf_boolean_t *pflag) ++ unsigned char *pflag) + { + int ret = -1; + int source = -1; +diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c +index 199f896..50f8888 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c ++++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c +@@ -318,7 +318,8 @@ __afr_selfheal_metadata_prepare (call_frame_t *frame, xlator_t *this, inode_t *i + unsigned char *locked_on, unsigned char *sources, + unsigned char *sinks, unsigned char *healed_sinks, + unsigned char *undid_pending, +- struct afr_reply *replies, gf_boolean_t *pflag) ++ struct afr_reply *replies, ++ unsigned char *pflag) + { + int ret = -1; + int source = -1; +diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h +index cc99d9e..7e6fb42 100644 +--- a/xlators/cluster/afr/src/afr-self-heal.h ++++ b/xlators/cluster/afr/src/afr-self-heal.h +@@ -172,7 +172,7 @@ afr_selfheal_find_direction (call_frame_t *frame, xlator_t *this, + afr_transaction_type type, + unsigned char *locked_on, unsigned char *sources, + unsigned char *sinks, uint64_t *witness, +- gf_boolean_t *flag); ++ unsigned char *flag); + int + afr_selfheal_fill_matrix (xlator_t *this, int **matrix, int subvol, int idx, + dict_t *xdata); +@@ -286,7 +286,7 @@ __afr_selfheal_data_prepare (call_frame_t *frame, xlator_t *this, + unsigned char *sources, + unsigned char *sinks, unsigned char *healed_sinks, + unsigned char *undid_pending, +- struct afr_reply *replies, gf_boolean_t *flag); ++ struct afr_reply *replies, unsigned char *flag); + + int + __afr_selfheal_metadata_prepare (call_frame_t *frame, xlator_t *this, +@@ -296,7 +296,7 @@ __afr_selfheal_metadata_prepare (call_frame_t *frame, xlator_t *this, + unsigned char *healed_sinks, + unsigned char *undid_pending, + struct afr_reply *replies, +- gf_boolean_t *flag); ++ unsigned char *flag); + int + __afr_selfheal_entry_prepare (call_frame_t *frame, xlator_t *this, + inode_t *inode, unsigned char *locked_on, +@@ -304,7 +304,7 @@ __afr_selfheal_entry_prepare (call_frame_t *frame, xlator_t *this, + unsigned char *sinks, + unsigned char *healed_sinks, + struct afr_reply *replies, int *source_p, +- gf_boolean_t *flag); ++ unsigned char *flag); + + int + afr_selfheal_unlocked_inspect (call_frame_t *frame, xlator_t *this, +diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h +index afe4a73..2e6d995 100644 +--- a/xlators/cluster/afr/src/afr.h ++++ b/xlators/cluster/afr/src/afr.h +@@ -36,6 +36,9 @@ + + #define ARBITER_BRICK_INDEX 2 + ++#define PFLAG_PENDING (1 << 0) ++#define PFLAG_SBRAIN (1 << 1) ++ + typedef int (*afr_lock_cbk_t) (call_frame_t *frame, xlator_t *this); + + typedef int (*afr_read_txn_wind_t) (call_frame_t *frame, xlator_t *this, int subvol); +-- +1.8.3.1 + diff --git a/SOURCES/0386-glusterd-make-sure-that-brickinfo-uuid-is-not-null.patch b/SOURCES/0386-glusterd-make-sure-that-brickinfo-uuid-is-not-null.patch new file mode 100644 index 0000000..9fdfad2 --- /dev/null +++ b/SOURCES/0386-glusterd-make-sure-that-brickinfo-uuid-is-not-null.patch @@ -0,0 +1,86 @@ +From cb4bf2bb3721e66c28a01be6ffff54a6f8610f0e Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde +Date: Tue, 25 Sep 2018 23:36:48 +0530 +Subject: [PATCH 386/387] glusterd: make sure that brickinfo->uuid is not null + +Problem: After an upgrade from the version where shared-brick-count +option is not present to a version which introduced this option +causes issue at the mount point i.e, size of the volume at mount +point will be reduced by shared-brick-count value times. + +Cause: shared-brick-count is equal to the number of bricks that +are sharing the file system. gd_set_shared_brick_count() calculates +the shared-brick-count value based on uuid of the node and fsid of +the brick. https://review.gluster.org/#/c/glusterfs/+/19484 handles +setting of fsid properly during an upgrade path. This patch assumed +that when the code path is reached, brickinfo->uuid is non-null. +But brickinfo->uuid is null for all the bricks, as the uuid is null +https://review.gluster.org/#/c/glusterfs/+/19484 couldn't reached the +code path to set the fsid for bricks. So, we had fsid as 0 for all +bricks, which resulted in gd_set_shared_brick_count() to calculate +shared-brick-count in a wrong way. i.e, the logic written in +gd_set_shared_brick_count() didn't work as expected since fsid is 0. + +Solution: Before control reaches the code path written by +https://review.gluster.org/#/c/glusterfs/+/19484, +adding a check for whether brickinfo->uuid is null and +if brickinfo->uuid is having null value, calling +glusterd_resolve_brick will set the brickinfo->uuid to a +proper value. When we have proper uuid, fsid for the bricks +will be set properly and shared-brick-count value will be +caluculated correctly. + +Please take a look at the bug https://bugzilla.redhat.com/show_bug.cgi?id=1632889 +for complete RCA + +Steps followed to test the fix: +1. Created a 2 node cluster, the cluster is running with binary +which doesn't have shared-brick-count option +2. Created a 2x(2+1) volume and started it +3. Mouted the volume, checked size of volume using df +4. Upgrade to a version where shared-brick-count is introduced +(upgraded the nodes one by one i.e, stop the glusterd, upgrade the node +and start the glusterd). +5. after upgrading both the nodes, bumped up the cluster.op-version +6. At mount point, df shows the correct size for volume. + +updtream patch: https://review.gluster.org/#/c/glusterfs/+/21278/ + +> fixes: bz#1632889 +> Change-Id: Ib9f078aafb15e899a01086eae113270657ea916b +> Signed-off-by: Sanju Rakonde + +Change-Id: Ib9f078aafb15e899a01086eae113270657ea916b +BUG: 1630997 +Signed-off-by: Sanju Rakonde +Reviewed-on: https://code.engineering.redhat.com/gerrit/151321 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + xlators/mgmt/glusterd/src/glusterd-store.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c +index 387e7e5..015f6c2 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-store.c ++++ b/xlators/mgmt/glusterd/src/glusterd-store.c +@@ -2609,6 +2609,8 @@ glusterd_store_retrieve_bricks (glusterd_volinfo_t *volinfo) + * snapshot or snapshot restored volume this would be done post + * creating the brick mounts + */ ++ if (gf_uuid_is_null(brickinfo->uuid)) ++ (void)glusterd_resolve_brick(brickinfo); + if (brickinfo->real_path[0] == '\0' && !volinfo->is_snap_volume + && gf_uuid_is_null (volinfo->restored_from_snap)) { + /* By now if the brick is a local brick then it will be +@@ -2617,7 +2619,6 @@ glusterd_store_retrieve_bricks (glusterd_volinfo_t *volinfo) + * with MY_UUID for realpath check. Hence do not handle + * error + */ +- (void)glusterd_resolve_brick (brickinfo); + if (!gf_uuid_compare(brickinfo->uuid, MY_UUID)) { + if (!realpath (brickinfo->path, abspath)) { + gf_msg (this->name, GF_LOG_CRITICAL, +-- +1.8.3.1 + diff --git a/SOURCES/0387-georep-Fix-config-set-of-monitor-status.patch b/SOURCES/0387-georep-Fix-config-set-of-monitor-status.patch new file mode 100644 index 0000000..7db1bd7 --- /dev/null +++ b/SOURCES/0387-georep-Fix-config-set-of-monitor-status.patch @@ -0,0 +1,41 @@ +From 8e7eb94645355024a12417aa247fa3eecc586274 Mon Sep 17 00:00:00 2001 +From: Kotresh HR +Date: Mon, 1 Oct 2018 04:37:47 -0400 +Subject: [PATCH 387/387] georep: Fix config set of monitor status + +The backport of [1] introduced config set of monitor +status using new config framework available in +upstream but is not available in downstream. Hence +introduced the issue in downstreamwhere monitor +failed to start workers. Fixed the same and marking +in downstream only. + +[1] https://code.engineering.redhat.com/149760 + + +BUG: 1623749 +Change-Id: Ia80b66137d0a3b193d178859a5d3c5fca01aa623 +Signed-off-by: Kotresh HR +Reviewed-on: https://code.engineering.redhat.com/gerrit/151428 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + geo-replication/syncdaemon/monitor.py | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/geo-replication/syncdaemon/monitor.py b/geo-replication/syncdaemon/monitor.py +index 3451fe4..1b03d7d 100644 +--- a/geo-replication/syncdaemon/monitor.py ++++ b/geo-replication/syncdaemon/monitor.py +@@ -384,7 +384,7 @@ class Monitor(object): + # It will also cause fd leaks. + + self.lock.acquire() +- set_monitor_status(gconf.get("state-file"), self.ST_STARTED) ++ set_monitor_status(gconf.state_file, self.ST_STARTED) + self.lock.release() + for t in ta: + t.join() +-- +1.8.3.1 + diff --git a/SOURCES/0388-glusterd-handshake-prevent-a-buffer-overflow.patch b/SOURCES/0388-glusterd-handshake-prevent-a-buffer-overflow.patch new file mode 100644 index 0000000..2531550 --- /dev/null +++ b/SOURCES/0388-glusterd-handshake-prevent-a-buffer-overflow.patch @@ -0,0 +1,39 @@ +From 7eeba48a6aa2bb04b40163849f211f068a8e6452 Mon Sep 17 00:00:00 2001 +From: Amar Tumballi +Date: Tue, 2 Oct 2018 13:27:20 +0530 +Subject: [PATCH 388/399] glusterd-handshake: prevent a buffer overflow + +as key size in xdr can be anything, it can be bigger than the +'NAME_MAX' allowed in the structure, which can allow for service denial +attacks. + +Fixes: CVE-2018-14653 +BUG: 1634668 +Change-Id: I207db66e0bd1959aad7ca40040cc66b9fc81e111 +Signed-off-by: Amar Tumballi +Reviewed-on: https://code.engineering.redhat.com/gerrit/151515 +Reviewed-by: Atin Mukherjee +--- + xlators/mgmt/glusterd/src/glusterd-handshake.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-handshake.c b/xlators/mgmt/glusterd/src/glusterd-handshake.c +index 3d1dfb2..d5594d0 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-handshake.c ++++ b/xlators/mgmt/glusterd/src/glusterd-handshake.c +@@ -847,6 +847,12 @@ __server_getspec (rpcsvc_request_t *req) + + volume = args.key; + ++ if (strlen (volume) >= (NAME_MAX)) { ++ op_errno = EINVAL; ++ gf_msg (this->name, GF_LOG_ERROR, EINVAL, GD_MSG_NAME_TOO_LONG, ++ "volume name too long (%s)", volume); ++ goto fail; ++ } + /* Need to strip leading '/' from volnames. This was introduced to + * support nfs style mount parameters for native gluster mount + */ +-- +1.8.3.1 + diff --git a/SOURCES/0389-server-don-t-allow-in-basename.patch b/SOURCES/0389-server-don-t-allow-in-basename.patch new file mode 100644 index 0000000..7b5d4c7 --- /dev/null +++ b/SOURCES/0389-server-don-t-allow-in-basename.patch @@ -0,0 +1,96 @@ +From fe704e0f997444d74966aa7c5bfca484ce54f6a4 Mon Sep 17 00:00:00 2001 +From: Amar Tumballi +Date: Thu, 27 Sep 2018 09:55:19 +0530 +Subject: [PATCH 389/399] server: don't allow '/' in basename + +Server stack needs to have all the sort of validation, assuming +clients can be compromized. It is possible for a compromized +client to send basenames with paths with '/', and with that +create files without permission on server. By sanitizing the basename, +and not allowing anything other than actual directory as the parent +for any entry creation, we can mitigate the effects of clients +not able to exploit the server. + +Fixes: CVE-2018-14651 + +BUG: 1633013 +Change-Id: I98d042a9f8e300161fbc3ee5b6e8de755c9765f9 +Signed-off-by: Amar Tumballi +Reviewed-on: https://code.engineering.redhat.com/gerrit/151169 +Reviewed-by: Xavi Hernandez +Reviewed-by: Shyam Ranganathan +Reviewed-by: Atin Mukherjee +--- + xlators/protocol/server/src/server-resolve.c | 31 ++++++++++++++++++++-------- + xlators/storage/posix/src/posix-handle.h | 5 +++-- + 2 files changed, 25 insertions(+), 11 deletions(-) + +diff --git a/xlators/protocol/server/src/server-resolve.c b/xlators/protocol/server/src/server-resolve.c +index b3eda0e..25db43f 100644 +--- a/xlators/protocol/server/src/server-resolve.c ++++ b/xlators/protocol/server/src/server-resolve.c +@@ -307,22 +307,35 @@ resolve_entry_simple (call_frame_t *frame) + ret = 1; + goto out; + } +- +- /* expected @parent was found from the inode cache */ +- gf_uuid_copy (state->loc_now->pargfid, resolve->pargfid); +- state->loc_now->parent = inode_ref (parent); +- +- if (strstr (resolve->bname, "../")) { +- /* Resolving outside the parent's tree is not allowed */ ++ if (parent->ia_type != IA_IFDIR) { ++ /* Parent type should be 'directory', and nothing else */ + gf_msg (this->name, GF_LOG_ERROR, EPERM, + PS_MSG_GFID_RESOLVE_FAILED, +- "%s: path sent by client not allowed", +- resolve->bname); ++ "%s: parent type not directory (%d)", ++ uuid_utoa (parent->gfid), parent->ia_type); + resolve->op_ret = -1; + resolve->op_errno = EPERM; + ret = 1; + goto out; + } ++ ++ /* expected @parent was found from the inode cache */ ++ gf_uuid_copy (state->loc_now->pargfid, resolve->pargfid); ++ state->loc_now->parent = inode_ref (parent); ++ ++ if (strchr (resolve->bname, '/')) { ++ /* basename should be a string (without '/') in a directory, ++ it can't span multiple levels. This can also lead to ++ resolving outside the parent's tree, which is not allowed */ ++ gf_msg (this->name, GF_LOG_ERROR, EPERM, ++ PS_MSG_GFID_RESOLVE_FAILED, ++ "%s: basename sent by client not allowed", ++ resolve->bname); ++ resolve->op_ret = -1; ++ resolve->op_errno = EPERM; ++ ret = 1; ++ goto out; ++ } + state->loc_now->name = resolve->bname; + + inode = inode_grep (state->itable, parent, resolve->bname); +diff --git a/xlators/storage/posix/src/posix-handle.h b/xlators/storage/posix/src/posix-handle.h +index a0f82ec..45ca1d1 100644 +--- a/xlators/storage/posix/src/posix-handle.h ++++ b/xlators/storage/posix/src/posix-handle.h +@@ -223,9 +223,10 @@ + break; \ + } \ + \ +- if (strstr (loc->name, "../")) { \ ++ if (strchr (loc->name, '/')) { \ + gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_ENTRY_HANDLE_CREATE, \ +- "'../' in name not allowed: (%s)", loc->name); \ ++ "'/' in name not allowed: (%s)", loc->name); \ ++ op_ret = -1; \ + break; \ + } \ + if (LOC_HAS_ABSPATH (loc)) { \ +-- +1.8.3.1 + diff --git a/SOURCES/0390-core-glusterfsd-keeping-fd-open-in-index-xlator.patch b/SOURCES/0390-core-glusterfsd-keeping-fd-open-in-index-xlator.patch new file mode 100644 index 0000000..b4b6467 --- /dev/null +++ b/SOURCES/0390-core-glusterfsd-keeping-fd-open-in-index-xlator.patch @@ -0,0 +1,888 @@ +From 9b26837434977601f1e48477904486ea032f742a Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Mon, 8 Oct 2018 22:32:28 +0530 +Subject: [PATCH 390/399] core: glusterfsd keeping fd open in index xlator + +Problem: Current resource cleanup sequence is not + perfect while brick mux is enabled + +Solution: 1. Destroying xprt after cleanup all fd associated + with a client + 2. Before call fini for brick xlators ensure no stub + should be running on a brick + +> Change-Id: I86195785e428f57d3ef0da3e4061021fafacd435 +> fixes: bz#1631357 +> (cherry picked from commit 5bc4594dabc08fd4de1940c044946e33037f2ac7) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/21235/) + +Change-Id: I0676a2f8e42557c1107a877406e255b93a77a269 +BUG: 1631372 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/152170 +Tested-by: RHGS Build Bot +Reviewed-by: Raghavendra Gowdappa +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/defaults-tmpl.c | 8 +- + libglusterfs/src/xlator.c | 2 + + libglusterfs/src/xlator.h | 7 + + xlators/features/index/src/index.c | 50 ++++++- + xlators/features/index/src/index.h | 2 + + xlators/performance/io-threads/src/io-threads.c | 45 ++++++- + xlators/performance/io-threads/src/io-threads.h | 2 +- + xlators/protocol/server/src/server-handshake.c | 23 +++- + xlators/protocol/server/src/server-helpers.c | 79 +++++++++-- + xlators/protocol/server/src/server-helpers.h | 2 +- + xlators/protocol/server/src/server.c | 171 ++++++++++++++++-------- + xlators/protocol/server/src/server.h | 9 +- + xlators/storage/posix/src/posix.c | 11 ++ + 13 files changed, 333 insertions(+), 78 deletions(-) + +diff --git a/libglusterfs/src/defaults-tmpl.c b/libglusterfs/src/defaults-tmpl.c +index 0ef14d5..3fdeabb 100644 +--- a/libglusterfs/src/defaults-tmpl.c ++++ b/libglusterfs/src/defaults-tmpl.c +@@ -119,6 +119,8 @@ default_release (xlator_t *this, fd_t *fd) + int + default_notify (xlator_t *this, int32_t event, void *data, ...) + { ++ xlator_t *victim = data; ++ + switch (event) { + case GF_EVENT_PARENT_UP: + case GF_EVENT_PARENT_DOWN: +@@ -126,7 +128,11 @@ default_notify (xlator_t *this, int32_t event, void *data, ...) + xlator_list_t *list = this->children; + + while (list) { +- xlator_notify (list->xlator, event, this); ++ if (victim && victim->cleanup_starting) { ++ xlator_notify(list->xlator, event, victim); ++ } else { ++ xlator_notify(list->xlator, event, this); ++ } + list = list->next; + } + } +diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c +index 1cf4a63..8aa8aa1 100644 +--- a/libglusterfs/src/xlator.c ++++ b/libglusterfs/src/xlator.c +@@ -489,6 +489,8 @@ xlator_init (xlator_t *xl) + xl->mem_acct_init (xl); + + xl->instance_name = NULL; ++ GF_ATOMIC_INIT(xl->xprtrefcnt, 0); ++ GF_ATOMIC_INIT(xl->fd_cnt, 0); + if (!xl->init) { + gf_msg (xl->name, GF_LOG_WARNING, 0, LG_MSG_INIT_FAILED, + "No init() found"); +diff --git a/libglusterfs/src/xlator.h b/libglusterfs/src/xlator.h +index 7434da8..1879641 100644 +--- a/libglusterfs/src/xlator.h ++++ b/libglusterfs/src/xlator.h +@@ -965,7 +965,14 @@ struct _xlator { + /* flag to avoid recall of xlator_mem_cleanup for xame xlator */ + uint32_t call_cleanup; + ++ /* Variable to save fd_count for detach brick */ ++ gf_atomic_t fd_cnt; + ++ /* Variable to save xprt associated for detach brick */ ++ gf_atomic_t xprtrefcnt; ++ ++ /* Flag to notify got CHILD_DOWN event for detach brick */ ++ uint32_t notify_down; + }; + + typedef struct { +diff --git a/xlators/features/index/src/index.c b/xlators/features/index/src/index.c +index f3b0270..bf3f4dd 100644 +--- a/xlators/features/index/src/index.c ++++ b/xlators/features/index/src/index.c +@@ -188,6 +188,7 @@ worker_enqueue (xlator_t *this, call_stub_t *stub) + pthread_mutex_lock (&priv->mutex); + { + __index_enqueue (&priv->callstubs, stub); ++ GF_ATOMIC_INC(priv->stub_cnt); + pthread_cond_signal (&priv->cond); + } + pthread_mutex_unlock (&priv->mutex); +@@ -223,11 +224,18 @@ index_worker (void *data) + } + if (!bye) + stub = __index_dequeue (&priv->callstubs); ++ if (bye) { ++ priv->curr_count--; ++ if (priv->curr_count == 0) ++ pthread_cond_broadcast(&priv->cond); ++ } + } + pthread_mutex_unlock (&priv->mutex); + +- if (stub) /* guard against spurious wakeups */ ++ if (stub) {/* guard against spurious wakeups */ + call_resume (stub); ++ GF_ATOMIC_DEC(priv->stub_cnt); ++ } + stub = NULL; + if (bye) + break; +@@ -2375,6 +2383,7 @@ init (xlator_t *this) + gf_uuid_generate (priv->internal_vgfid[i]); + + INIT_LIST_HEAD (&priv->callstubs); ++ GF_ATOMIC_INIT(priv->stub_cnt, 0); + + this->local_pool = mem_pool_new (index_local_t, 64); + if (!this->local_pool) { +@@ -2403,6 +2412,7 @@ init (xlator_t *this) + index_set_link_count (priv, count, XATTROP); + priv->down = _gf_false; + ++ priv->curr_count = 0; + ret = gf_thread_create (&priv->thread, &w_attr, index_worker, this, + "idxwrker"); + if (ret) { +@@ -2411,7 +2421,7 @@ init (xlator_t *this) + "Failed to create worker thread, aborting"); + goto out; + } +- ++ priv->curr_count++; + ret = 0; + out: + if (ret) { +@@ -2528,6 +2538,9 @@ notify (xlator_t *this, int event, void *data, ...) + { + int ret = 0; + index_priv_t *priv = NULL; ++ uint64_t stub_cnt = 0; ++ xlator_t *victim = data; ++ struct timespec sleep_till = {0,}; + + if (!this) + return 0; +@@ -2536,6 +2549,39 @@ notify (xlator_t *this, int event, void *data, ...) + if (!priv) + return 0; + ++ if ((event == GF_EVENT_PARENT_DOWN) && victim->cleanup_starting) { ++ stub_cnt = GF_ATOMIC_GET(priv->stub_cnt); ++ clock_gettime(CLOCK_REALTIME, &sleep_till); ++ sleep_till.tv_sec += 1; ++ ++ /* Wait for draining stub from queue before notify PARENT_DOWN */ ++ pthread_mutex_lock(&priv->mutex); ++ { ++ while (stub_cnt) { ++ (void)pthread_cond_timedwait(&priv->cond, &priv->mutex, ++ &sleep_till); ++ stub_cnt = GF_ATOMIC_GET(priv->stub_cnt); ++ } ++ } ++ pthread_mutex_unlock(&priv->mutex); ++ gf_log(this->name, GF_LOG_INFO, ++ "Notify GF_EVENT_PARENT_DOWN for brick %s", victim->name); ++ } ++ ++ if ((event == GF_EVENT_CHILD_DOWN) && victim->cleanup_starting) { ++ pthread_mutex_lock(&priv->mutex); ++ { ++ priv->down = _gf_true; ++ pthread_cond_broadcast(&priv->cond); ++ while (priv->curr_count) ++ pthread_cond_wait(&priv->cond, &priv->mutex); ++ } ++ pthread_mutex_unlock(&priv->mutex); ++ ++ gf_log(this->name, GF_LOG_INFO, ++ "Notify GF_EVENT_CHILD_DOWN for brick %s", victim->name); ++ } ++ + ret = default_notify (this, event, data); + return ret; + } +diff --git a/xlators/features/index/src/index.h b/xlators/features/index/src/index.h +index ae9091d..d935294 100644 +--- a/xlators/features/index/src/index.h ++++ b/xlators/features/index/src/index.h +@@ -62,6 +62,8 @@ typedef struct index_priv { + int64_t pending_count; + pthread_t thread; + gf_boolean_t down; ++ gf_atomic_t stub_cnt; ++ int32_t curr_count; + } index_priv_t; + + typedef struct index_local { +diff --git a/xlators/performance/io-threads/src/io-threads.c b/xlators/performance/io-threads/src/io-threads.c +index 1e1816a..5c47072 100644 +--- a/xlators/performance/io-threads/src/io-threads.c ++++ b/xlators/performance/io-threads/src/io-threads.c +@@ -120,7 +120,7 @@ __iot_dequeue (iot_conf_t *conf, int *pri) + if (!stub) + return NULL; + +- conf->queue_size--; ++ GF_ATOMIC_DEC(conf->queue_size); + conf->queue_sizes[*pri]--; + + return stub; +@@ -153,7 +153,7 @@ __iot_enqueue (iot_conf_t *conf, call_stub_t *stub, int pri) + } + list_add_tail (&stub->list, &ctx->reqs); + +- conf->queue_size++; ++ GF_ATOMIC_INC(conf->queue_size); + conf->queue_sizes[pri]++; + } + +@@ -182,7 +182,7 @@ iot_worker (void *data) + conf->ac_iot_count[pri]--; + pri = -1; + } +- while (conf->queue_size == 0) { ++ while (GF_ATOMIC_GET(conf->queue_size) == 0) { + if (conf->down) { + bye = _gf_true;/*Avoid sleep*/ + break; +@@ -816,7 +816,7 @@ __iot_workers_scale (iot_conf_t *conf) + gf_msg_debug (conf->this->name, 0, + "scaled threads to %d (queue_size=%d/%d)", + conf->curr_count, +- conf->queue_size, scale); ++ GF_ATOMIC_GET(conf->queue_size), scale); + } else { + break; + } +@@ -1030,6 +1030,7 @@ init (xlator_t *this) + bool, out); + + conf->this = this; ++ GF_ATOMIC_INIT(conf->queue_size, 0); + + for (i = 0; i < IOT_PRI_MAX; i++) { + INIT_LIST_HEAD (&conf->clients[i]); +@@ -1073,9 +1074,41 @@ int + notify (xlator_t *this, int32_t event, void *data, ...) + { + iot_conf_t *conf = this->private; ++ xlator_t *victim = data; ++ uint64_t queue_size = 0; ++ struct timespec sleep_till = {0, }; ++ ++ if (GF_EVENT_PARENT_DOWN == event) { ++ if (victim->cleanup_starting) { ++ clock_gettime(CLOCK_REALTIME, &sleep_till); ++ sleep_till.tv_sec += 1; ++ /* Wait for draining stub from queue before notify PARENT_DOWN */ ++ queue_size = GF_ATOMIC_GET(conf->queue_size); ++ ++ pthread_mutex_lock(&conf->mutex); ++ { ++ while (queue_size) { ++ (void)pthread_cond_timedwait(&conf->cond, &conf->mutex, ++ &sleep_till); ++ queue_size = GF_ATOMIC_GET(conf->queue_size); ++ } ++ } ++ pthread_mutex_unlock(&conf->mutex); + +- if (GF_EVENT_PARENT_DOWN == event) +- iot_exit_threads (conf); ++ gf_log(this->name, GF_LOG_INFO, ++ "Notify GF_EVENT_PARENT_DOWN for brick %s", victim->name); ++ } else { ++ iot_exit_threads(conf); ++ } ++ } ++ ++ if (GF_EVENT_CHILD_DOWN == event) { ++ if (victim->cleanup_starting) { ++ iot_exit_threads(conf); ++ gf_log(this->name, GF_LOG_INFO, ++ "Notify GF_EVENT_CHILD_DOWN for brick %s", victim->name); ++ } ++ } + + default_notify (this, event, data); + +diff --git a/xlators/performance/io-threads/src/io-threads.h b/xlators/performance/io-threads/src/io-threads.h +index 9648f74..7a6973c 100644 +--- a/xlators/performance/io-threads/src/io-threads.h ++++ b/xlators/performance/io-threads/src/io-threads.h +@@ -75,7 +75,7 @@ struct iot_conf { + int32_t ac_iot_limit[IOT_PRI_MAX]; + int32_t ac_iot_count[IOT_PRI_MAX]; + int queue_sizes[IOT_PRI_MAX]; +- int queue_size; ++ gf_atomic_t queue_size; + pthread_attr_t w_attr; + gf_boolean_t least_priority; /*Enable/Disable least-priority */ + +diff --git a/xlators/protocol/server/src/server-handshake.c b/xlators/protocol/server/src/server-handshake.c +index 12f620c..75577fa 100644 +--- a/xlators/protocol/server/src/server-handshake.c ++++ b/xlators/protocol/server/src/server-handshake.c +@@ -576,6 +576,7 @@ server_setvolume (rpcsvc_request_t *req) + goto fail; + } + ++ pthread_mutex_lock(&conf->mutex); + list_for_each_entry (tmp, &conf->child_status->status_list, + status_list) { + if (strcmp (tmp->name, name) == 0) +@@ -583,7 +584,7 @@ server_setvolume (rpcsvc_request_t *req) + } + + if (!tmp->name) { +- gf_msg (this->name, GF_LOG_ERROR, 0, ++ gf_msg (this->name, GF_LOG_INFO, 0, + PS_MSG_CHILD_STATUS_FAILED, + "No xlator %s is found in " + "child status list", name); +@@ -594,7 +595,21 @@ server_setvolume (rpcsvc_request_t *req) + PS_MSG_DICT_GET_FAILED, + "Failed to set 'child_up' for xlator %s " + "in the reply dict", tmp->name); ++ if (!tmp->child_up) { ++ ret = dict_set_str(reply, "ERROR", ++ "Not received child_up for this xlator"); ++ if (ret < 0) ++ gf_msg_debug(this->name, 0, "failed to set error msg"); ++ ++ gf_msg(this->name, GF_LOG_ERROR, 0, PS_MSG_CHILD_STATUS_FAILED, ++ "Not received child_up for this xlator %s", name); ++ op_ret = -1; ++ op_errno = EAGAIN; ++ pthread_mutex_unlock(&conf->mutex); ++ goto fail; ++ } + } ++ pthread_mutex_unlock(&conf->mutex); + + ret = dict_get_str (params, "process-uuid", &client_uid); + if (ret < 0) { +@@ -666,7 +681,7 @@ server_setvolume (rpcsvc_request_t *req) + if (serv_ctx->lk_version != 0 && + serv_ctx->lk_version != lk_version) { + (void) server_connection_cleanup (this, client, +- INTERNAL_LOCKS | POSIX_LOCKS); ++ INTERNAL_LOCKS | POSIX_LOCKS, NULL); + } + + pthread_mutex_lock (&conf->mutex); +@@ -812,9 +827,9 @@ server_setvolume (rpcsvc_request_t *req) + req->trans->clnt_options = dict_ref(params); + + gf_msg (this->name, GF_LOG_INFO, 0, PS_MSG_CLIENT_ACCEPTED, +- "accepted client from %s (version: %s)", ++ "accepted client from %s (version: %s) with subvol %s", + client->client_uid, +- (clnt_version) ? clnt_version : "old"); ++ (clnt_version) ? clnt_version : "old", name); + + gf_event (EVENT_CLIENT_CONNECT, "client_uid=%s;" + "client_identifier=%s;server_identifier=%s;" +diff --git a/xlators/protocol/server/src/server-helpers.c b/xlators/protocol/server/src/server-helpers.c +index c492ab1..99256bf 100644 +--- a/xlators/protocol/server/src/server-helpers.c ++++ b/xlators/protocol/server/src/server-helpers.c +@@ -242,16 +242,51 @@ server_connection_cleanup_flush_cbk (call_frame_t *frame, void *cookie, + int32_t ret = -1; + fd_t *fd = NULL; + client_t *client = NULL; ++ uint64_t fd_cnt = 0; ++ xlator_t *victim = NULL; ++ server_conf_t *conf = NULL; ++ xlator_t *serv_xl = NULL; ++ rpc_transport_t *xprt = NULL; ++ rpc_transport_t *xp_next = NULL; ++ int32_t detach = (long)cookie; ++ gf_boolean_t xprt_found = _gf_false; + + GF_VALIDATE_OR_GOTO ("server", this, out); + GF_VALIDATE_OR_GOTO ("server", frame, out); + + fd = frame->local; + client = frame->root->client; ++ serv_xl = frame->this; ++ conf = serv_xl->private; + + fd_unref (fd); + frame->local = NULL; + ++ if (client) ++ victim = client->bound_xl; ++ ++ if (victim) { ++ fd_cnt = GF_ATOMIC_DEC(victim->fd_cnt); ++ if (!fd_cnt && conf && detach) { ++ pthread_mutex_lock(&conf->mutex); ++ { ++ list_for_each_entry_safe(xprt, xp_next, &conf->xprt_list, list) ++ { ++ if (!xprt->xl_private) ++ continue; ++ if (xprt->xl_private == client) { ++ xprt_found = _gf_true; ++ break; ++ } ++ } ++ } ++ pthread_mutex_unlock(&conf->mutex); ++ if (xprt_found) { ++ rpc_transport_unref(xprt); ++ } ++ } ++ } ++ + gf_client_unref (client); + STACK_DESTROY (frame->root); + +@@ -262,7 +297,7 @@ out: + + + static int +-do_fd_cleanup (xlator_t *this, client_t* client, fdentry_t *fdentries, int fd_count) ++do_fd_cleanup (xlator_t *this, client_t *client, fdentry_t *fdentries, int fd_count, int32_t detach) + { + fd_t *fd = NULL; + int i = 0, ret = -1; +@@ -307,9 +342,10 @@ do_fd_cleanup (xlator_t *this, client_t* client, fdentry_t *fdentries, int fd_co + memset (&tmp_frame->root->lk_owner, 0, + sizeof (gf_lkowner_t)); + +- STACK_WIND (tmp_frame, +- server_connection_cleanup_flush_cbk, +- bound_xl, bound_xl->fops->flush, fd, NULL); ++ STACK_WIND_COOKIE (tmp_frame, ++ server_connection_cleanup_flush_cbk, ++ (void *)(long)detach, bound_xl, ++ bound_xl->fops->flush, fd, NULL); + } + } + +@@ -323,13 +359,18 @@ out: + + int + server_connection_cleanup (xlator_t *this, client_t *client, +- int32_t flags) ++ int32_t flags, gf_boolean_t *fd_exist) + { + server_ctx_t *serv_ctx = NULL; + fdentry_t *fdentries = NULL; + uint32_t fd_count = 0; + int cd_ret = 0; + int ret = 0; ++ xlator_t *bound_xl = NULL; ++ int i = 0; ++ fd_t *fd = NULL; ++ uint64_t fd_cnt = 0; ++ int32_t detach = 0; + + GF_VALIDATE_OR_GOTO (this->name, this, out); + GF_VALIDATE_OR_GOTO (this->name, client, out); +@@ -360,13 +401,35 @@ server_connection_cleanup (xlator_t *this, client_t *client, + } + + if (fdentries != NULL) { ++ /* Loop to configure fd_count on victim brick */ ++ bound_xl = client->bound_xl; ++ if (bound_xl) { ++ for (i = 0; i < fd_count; i++) { ++ fd = fdentries[i].fd; ++ if (!fd) ++ continue; ++ fd_cnt++; ++ } ++ if (fd_cnt) { ++ if (fd_exist) ++ (*fd_exist) = _gf_true; ++ GF_ATOMIC_ADD(bound_xl->fd_cnt, fd_cnt); ++ } ++ } ++ ++ /* If fd_exist is not NULL it means function is invoke ++ by server_rpc_notify at the time of getting DISCONNECT ++ notification ++ */ ++ if (fd_exist) ++ detach = 1; + gf_msg_debug (this->name, 0, "Performing cleanup on %d " + "fdentries", fd_count); +- ret = do_fd_cleanup (this, client, fdentries, fd_count); +- } +- else ++ ret = do_fd_cleanup (this, client, fdentries, fd_count, detach); ++ } else { + gf_msg (this->name, GF_LOG_INFO, 0, PS_MSG_FDENTRY_NULL, + "no fdentries to clean"); ++ } + + if (cd_ret || ret) + ret = -1; +diff --git a/xlators/protocol/server/src/server-helpers.h b/xlators/protocol/server/src/server-helpers.h +index 1f47bc9..84a0cf4 100644 +--- a/xlators/protocol/server/src/server-helpers.h ++++ b/xlators/protocol/server/src/server-helpers.h +@@ -42,7 +42,7 @@ get_frame_from_request (rpcsvc_request_t *req); + + int + server_connection_cleanup (xlator_t *this, struct _client *client, +- int32_t flags); ++ int32_t flags, gf_boolean_t *fd_exist); + + gf_boolean_t + server_cancel_grace_timer (xlator_t *this, struct _client *client); +diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c +index 69ad184..8ac0bd1 100644 +--- a/xlators/protocol/server/src/server.c ++++ b/xlators/protocol/server/src/server.c +@@ -79,7 +79,7 @@ grace_time_handler (void *data) + + if (detached) /* reconnection did not happen :-( */ + server_connection_cleanup (this, client, +- INTERNAL_LOCKS | POSIX_LOCKS); ++ INTERNAL_LOCKS | POSIX_LOCKS, NULL); + gf_client_unref (client); + } + out: +@@ -195,7 +195,7 @@ server_submit_reply (call_frame_t *frame, rpcsvc_request_t *req, void *arg, + "Reply submission failed"); + if (frame && client && !lk_heal) { + server_connection_cleanup (frame->this, client, +- INTERNAL_LOCKS | POSIX_LOCKS); ++ INTERNAL_LOCKS | POSIX_LOCKS, NULL); + } else { + gf_msg_callingfn ("", GF_LOG_ERROR, 0, + PS_MSG_REPLY_SUBMIT_FAILED, +@@ -466,6 +466,33 @@ out: + return error; + } + ++void ++server_call_xlator_mem_cleanup(xlator_t *this, char *victim_name) ++{ ++ pthread_t th_id = { 0, }; ++ int th_ret = -1; ++ server_cleanup_xprt_arg_t *arg = NULL; ++ ++ if (!victim_name) ++ return; ++ ++ gf_log(this->name, GF_LOG_INFO, "Create graph janitor thread for brick %s", ++ victim_name); ++ ++ arg = calloc(1, sizeof(*arg)); ++ arg->this = this; ++ arg->victim_name = gf_strdup(victim_name); ++ th_ret = gf_thread_create_detached(&th_id, server_graph_janitor_threads, ++ arg, "graphjanitor"); ++ if (th_ret) { ++ gf_log(this->name, GF_LOG_ERROR, ++ "graph janitor Thread" ++ " creation is failed for brick %s", ++ victim_name); ++ GF_FREE(arg->victim_name); ++ free(arg); ++ } ++} + + int + server_rpc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event, +@@ -480,14 +507,9 @@ server_rpc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event, + struct timespec grace_ts = {0, }; + char *auth_path = NULL; + int ret = -1; +- gf_boolean_t victim_found = _gf_false; + char *xlator_name = NULL; +- glusterfs_ctx_t *ctx = NULL; +- xlator_t *top = NULL; +- xlator_list_t **trav_p = NULL; +- xlator_t *travxl = NULL; + uint64_t xprtrefcount = 0; +- struct _child_status *tmp = NULL; ++ gf_boolean_t fd_exist = _gf_false; + + + if (!xl || !data) { +@@ -500,7 +522,6 @@ server_rpc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event, + this = xl; + trans = data; + conf = this->private; +- ctx = glusterfsd_ctx; + + switch (event) { + case RPCSVC_EVENT_ACCEPT: +@@ -538,7 +559,8 @@ server_rpc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event, + */ + pthread_mutex_lock (&conf->mutex); + client = trans->xl_private; +- list_del_init (&trans->list); ++ if (!client) ++ list_del_init (&trans->list); + pthread_mutex_unlock (&conf->mutex); + + if (!client) +@@ -563,7 +585,7 @@ server_rpc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event, + gf_client_put (client, &detached); + if (detached) { + server_connection_cleanup (this, client, +- INTERNAL_LOCKS | POSIX_LOCKS); ++ INTERNAL_LOCKS | POSIX_LOCKS, &fd_exist); + + gf_event (EVENT_CLIENT_DISCONNECT, + "client_uid=%s;" +@@ -638,56 +660,38 @@ server_rpc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event, + unref_transport: + /* rpc_transport_unref() causes a RPCSVC_EVENT_TRANSPORT_DESTROY + * to be called in blocking manner +- * So no code should ideally be after this unref ++ * So no code should ideally be after this unref, Call rpc_transport_unref ++ * only while no client exist or client is not detached or no fd associated ++ with client + */ +- rpc_transport_unref (trans); ++ if (!client || !detached || !fd_exist) ++ rpc_transport_unref (trans); + + break; + + case RPCSVC_EVENT_TRANSPORT_DESTROY: ++ pthread_mutex_lock(&conf->mutex); + client = trans->xl_private; ++ list_del_init(&trans->list); ++ pthread_mutex_unlock(&conf->mutex); + if (!client) + break; +- pthread_mutex_lock (&conf->mutex); +- list_for_each_entry (tmp, &conf->child_status->status_list, +- status_list) { +- if (tmp->name && client->bound_xl && +- client->bound_xl->cleanup_starting && +- !strcmp (tmp->name, client->bound_xl->name)) { +- xprtrefcount = GF_ATOMIC_GET (tmp->xprtrefcnt); +- if (xprtrefcount > 0) { +- xprtrefcount = GF_ATOMIC_DEC (tmp->xprtrefcnt); +- if (xprtrefcount == 0) +- xlator_name = gf_strdup(client->bound_xl->name); +- } +- break; ++ ++ if (client->bound_xl && client->bound_xl->cleanup_starting) { ++ xprtrefcount = GF_ATOMIC_GET (client->bound_xl->xprtrefcnt); ++ if (xprtrefcount > 0) { ++ xprtrefcount = GF_ATOMIC_DEC (client->bound_xl->xprtrefcnt); ++ if (xprtrefcount == 0) ++ xlator_name = gf_strdup(client->bound_xl->name); + } + } +- pthread_mutex_unlock (&conf->mutex); + + /* unref only for if (!client->lk_heal) */ + if (!conf->lk_heal) + gf_client_unref (client); + + if (xlator_name) { +- if (this->ctx->active) { +- top = this->ctx->active->first; +- LOCK (&ctx->volfile_lock); +- for (trav_p = &top->children; *trav_p; +- trav_p = &(*trav_p)->next) { +- travxl = (*trav_p)->xlator; +- if (!travxl->call_cleanup && +- strcmp (travxl->name, xlator_name) == 0) { +- victim_found = _gf_true; +- break; +- } +- } +- UNLOCK (&ctx->volfile_lock); +- if (victim_found) { +- xlator_mem_cleanup (travxl); +- glusterfs_autoscale_threads (ctx, -1); +- } +- } ++ server_call_xlator_mem_cleanup (this, xlator_name); + GF_FREE (xlator_name); + } + +@@ -701,6 +705,67 @@ out: + return 0; + } + ++void * ++server_graph_janitor_threads(void *data) ++{ ++ xlator_t *victim = NULL; ++ xlator_t *this = NULL; ++ server_conf_t *conf = NULL; ++ glusterfs_ctx_t *ctx = NULL; ++ char *victim_name = NULL; ++ server_cleanup_xprt_arg_t *arg = NULL; ++ gf_boolean_t victim_found = _gf_false; ++ xlator_list_t **trav_p = NULL; ++ xlator_t *top = NULL; ++ ++ GF_ASSERT(data); ++ ++ arg = data; ++ this = arg->this; ++ victim_name = arg->victim_name; ++ THIS = arg->this; ++ conf = this->private; ++ ++ ctx = THIS->ctx; ++ GF_VALIDATE_OR_GOTO(this->name, ctx, out); ++ ++ top = this->ctx->active->first; ++ LOCK(&ctx->volfile_lock); ++ for (trav_p = &top->children; *trav_p; trav_p = &(*trav_p)->next) { ++ victim = (*trav_p)->xlator; ++ if (victim->cleanup_starting && ++ strcmp(victim->name, victim_name) == 0) { ++ victim_found = _gf_true; ++ break; ++ } ++ } ++ if (victim_found) ++ glusterfs_delete_volfile_checksum(ctx, victim->volfile_id); ++ UNLOCK(&ctx->volfile_lock); ++ if (!victim_found) { ++ gf_log(this->name, GF_LOG_ERROR, ++ "victim brick %s is not" ++ " found in graph", ++ victim_name); ++ goto out; ++ } ++ ++ default_notify(victim, GF_EVENT_PARENT_DOWN, victim); ++ if (victim->notify_down) { ++ gf_log(THIS->name, GF_LOG_INFO, ++ "Start call fini for brick" ++ " %s stack", ++ victim->name); ++ xlator_mem_cleanup(victim); ++ glusterfs_autoscale_threads(ctx, -1); ++ } ++ ++out: ++ GF_FREE(arg->victim_name); ++ free(arg); ++ return NULL; ++} ++ + int32_t + mem_acct_init (xlator_t *this) + { +@@ -1136,13 +1201,7 @@ init (xlator_t *this) + conf->child_status = GF_CALLOC (1, sizeof (struct _child_status), + gf_server_mt_child_status); + INIT_LIST_HEAD (&conf->child_status->status_list); +- GF_ATOMIC_INIT (conf->child_status->xprtrefcnt, 0); + +- /*ret = dict_get_str (this->options, "statedump-path", &statedump_path); +- if (!ret) { +- gf_path_strip_trailing_slashes (statedump_path); +- this->ctx->statedump_path = statedump_path; +- }*/ + GF_OPTION_INIT ("statedump-path", statedump_path, path, out); + if (statedump_path) { + gf_path_strip_trailing_slashes (statedump_path); +@@ -1589,6 +1648,11 @@ notify (xlator_t *this, int32_t event, void *data, ...) + + case GF_EVENT_CHILD_DOWN: + { ++ if (victim->cleanup_starting) { ++ victim->notify_down = 1; ++ gf_log(this->name, GF_LOG_INFO, ++ "Getting CHILD_DOWN event for brick %s", victim->name); ++ } + ret = server_process_child_event (this, event, data, + GF_CBK_CHILD_DOWN); + if (ret) { +@@ -1622,7 +1686,7 @@ notify (xlator_t *this, int32_t event, void *data, ...) + status_list) { + if (strcmp (tmp->name, victim->name) == 0) { + tmp->child_up = _gf_false; +- GF_ATOMIC_INIT (tmp->xprtrefcnt, totxprt); ++ GF_ATOMIC_INIT (victim->xprtrefcnt, totxprt); + break; + } + } +@@ -1668,8 +1732,7 @@ notify (xlator_t *this, int32_t event, void *data, ...) + glusterfs_mgmt_pmap_signout (ctx, + victim->name); + if (!xprt_found && victim_found) { +- xlator_mem_cleanup (victim); +- glusterfs_autoscale_threads (ctx, -1); ++ server_call_xlator_mem_cleanup (this, victim); + } + } + break; +diff --git a/xlators/protocol/server/src/server.h b/xlators/protocol/server/src/server.h +index 691c75b..23775d4 100644 +--- a/xlators/protocol/server/src/server.h ++++ b/xlators/protocol/server/src/server.h +@@ -78,7 +78,6 @@ struct _child_status { + struct list_head status_list; + char *name; + gf_boolean_t child_up; +- gf_atomic_t xprtrefcnt; + }; + struct server_conf { + rpcsvc_t *rpc; +@@ -222,6 +221,10 @@ typedef struct _server_ctx { + uint32_t lk_version; + } server_ctx_t; + ++typedef struct server_cleanup_xprt_arg { ++ xlator_t *this; ++ char *victim_name; ++} server_cleanup_xprt_arg_t; + + int + server_submit_reply (call_frame_t *frame, rpcsvc_request_t *req, void *arg, +@@ -246,4 +249,8 @@ serialize_rsp_direntp (gf_dirent_t *entries, gfs3_readdirp_rsp *rsp); + + server_ctx_t* + server_ctx_get (client_t *client, xlator_t *xlator); ++ ++void * ++server_graph_janitor_threads(void *); ++ + #endif /* !_SERVER_H */ +diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c +index 1d3f1ee..f79dbda 100644 +--- a/xlators/storage/posix/src/posix.c ++++ b/xlators/storage/posix/src/posix.c +@@ -7113,6 +7113,8 @@ notify (xlator_t *this, + void *data, + ...) + { ++ xlator_t *victim = data; ++ + switch (event) + { + case GF_EVENT_PARENT_UP: +@@ -7121,6 +7123,15 @@ notify (xlator_t *this, + default_notify (this, GF_EVENT_CHILD_UP, data); + } + break; ++ case GF_EVENT_PARENT_DOWN: ++ { ++ if (!victim->cleanup_starting) ++ break; ++ gf_log(this->name, GF_LOG_INFO, "Sending CHILD_DOWN for brick %s", ++ victim->name); ++ default_notify(this->parents->xlator, GF_EVENT_CHILD_DOWN, data); ++ } ++ break; + default: + /* */ + break; +-- +1.8.3.1 + diff --git a/SOURCES/0391-glusterd-Use-GF_ATOMIC-to-update-blockers-counter-at.patch b/SOURCES/0391-glusterd-Use-GF_ATOMIC-to-update-blockers-counter-at.patch new file mode 100644 index 0000000..31a63b6 --- /dev/null +++ b/SOURCES/0391-glusterd-Use-GF_ATOMIC-to-update-blockers-counter-at.patch @@ -0,0 +1,132 @@ +From cb565207cb7a3e7a9182bbed57a231f12bcaae0e Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Thu, 20 Sep 2018 18:11:36 +0530 +Subject: [PATCH 391/399] glusterd: Use GF_ATOMIC to update 'blockers' counter + at glusterd_conf + +Problem: Currently in glusterd code uses sync_lock/sync_unlock to + update blockers counter which could add delays to the overall + transaction phase escpecially when there's a batch of volume + stop operations processed by glusterd in brick multiplexing mode. + +Solution: Use GF_ATOMIC to update blocker counter to ensure unnecessary + context switching can be avoided. + +> Change-Id: Ie13177dfee2af66687ae7cf5c67405c152853990 +> Fixes: bz#1631128 +> (Cherry picked from commit 4f6ae853ffa9d06446407f389aaef61ac0b3b424) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/21221/) + +Change-Id: I3023bce5ba50bc04e078c56ba6fa62a5b791d205 +BUG: 1631329 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/150641 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mgmt/glusterd/src/glusterd-op-sm.c | 4 +++- + xlators/mgmt/glusterd/src/glusterd-utils.c | 15 +++++---------- + xlators/mgmt/glusterd/src/glusterd.c | 2 +- + xlators/mgmt/glusterd/src/glusterd.h | 2 +- + 4 files changed, 10 insertions(+), 13 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +index 6dfd819..df5b5c2 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c ++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +@@ -6259,9 +6259,11 @@ glusterd_op_stage_validate (glusterd_op_t op, dict_t *dict, char **op_errstr, + static void + glusterd_wait_for_blockers (glusterd_conf_t *priv) + { +- while (priv->blockers) { ++ uint64_t blockers = GF_ATOMIC_GET(priv->blockers); ++ while (blockers) { + synclock_unlock (&priv->big_lock); + sleep (1); ++ blockers = GF_ATOMIC_GET(priv->blockers); + synclock_lock (&priv->big_lock); + } + } +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index 2a176be..04fae63 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -5452,10 +5452,7 @@ my_callback (struct rpc_req *req, struct iovec *iov, int count, void *v_frame) + call_frame_t *frame = v_frame; + glusterd_conf_t *conf = frame->this->private; + +- synclock_lock (&conf->big_lock); +- --(conf->blockers); +- synclock_unlock (&conf->big_lock); +- ++ GF_ATOMIC_DEC(conf->blockers); + STACK_DESTROY (frame->root); + return 0; + } +@@ -5546,9 +5543,7 @@ attach_brick_callback (struct rpc_req *req, struct iovec *iov, int count, + } + } + out: +- synclock_lock (&conf->big_lock); +- --(conf->blockers); +- synclock_unlock (&conf->big_lock); ++ GF_ATOMIC_DEC(conf->blockers); + STACK_DESTROY (frame->root); + return 0; + } +@@ -5633,7 +5628,7 @@ send_attach_req (xlator_t *this, struct rpc_clnt *rpc, char *path, + cbkfn = attach_brick_callback; + } + /* Send the msg */ +- ++(conf->blockers); ++ GF_ATOMIC_INC(conf->blockers); + ret = rpc_clnt_submit (rpc, &gd_brick_prog, op, + cbkfn, &iov, 1, NULL, 0, iobref, + frame, NULL, 0, NULL, 0, NULL); +@@ -6380,7 +6375,7 @@ glusterd_restart_bricks (void *opaque) + } + conf->restart_bricks = _gf_true; + +- ++(conf->blockers); ++ GF_ATOMIC_INC(conf->blockers); + ret = glusterd_get_quorum_cluster_counts (this, &active_count, + &quorum_count); + if (ret) +@@ -6497,7 +6492,7 @@ glusterd_restart_bricks (void *opaque) + ret = 0; + + out: +- --(conf->blockers); ++ GF_ATOMIC_DEC(conf->blockers); + conf->restart_done = _gf_true; + conf->restart_bricks = _gf_false; + +diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c +index 78a37eb..076019f 100644 +--- a/xlators/mgmt/glusterd/src/glusterd.c ++++ b/xlators/mgmt/glusterd/src/glusterd.c +@@ -1979,7 +1979,7 @@ init (xlator_t *this) + } + } + +- conf->blockers = 0; ++ GF_ATOMIC_INIT(conf->blockers, 0); + /* If the peer count is less than 2 then this would be the best time to + * spawn process/bricks that may need (re)starting since last time + * (this) glusterd was up. */ +diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h +index d4f4f7e..8c70d48 100644 +--- a/xlators/mgmt/glusterd/src/glusterd.h ++++ b/xlators/mgmt/glusterd/src/glusterd.h +@@ -198,7 +198,7 @@ typedef struct { + int ping_timeout; + uint32_t generation; + int32_t workers; +- uint32_t blockers; ++ gf_atomic_t blockers; + uint32_t mgmt_v3_lock_timeout; + gf_boolean_t restart_bricks; + } glusterd_conf_t; +-- +1.8.3.1 + diff --git a/SOURCES/0392-glusterd-don-t-wait-for-blockers-flag-for-stop-volum.patch b/SOURCES/0392-glusterd-don-t-wait-for-blockers-flag-for-stop-volum.patch new file mode 100644 index 0000000..716affd --- /dev/null +++ b/SOURCES/0392-glusterd-don-t-wait-for-blockers-flag-for-stop-volum.patch @@ -0,0 +1,46 @@ +From c5bde98eb28ed3ae6707b7eca3c95f4e5e386c23 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Wed, 26 Sep 2018 15:23:28 +0530 +Subject: [PATCH 392/399] glusterd: don't wait for blockers flag for stop + volume + +With commit 4f6ae8 even though the overall transaction time for gluster +volume stop can be reduced, but based on testing it can't be guaranteed +that the transaction can finish in 3 minutes before the unlock timer +gets kicked in. The ground problem to this is the command serialization +which atomic field 'blockers' does for volume stop operation. + +This patch removes that dependency for volume stop not to wait for +blockers. + +> Change-Id: Ifaf120115dc7ed21cf21e65e25c4ec7c61ab8258 +> Fixes: bz#1631128 +> Signed-off-by: Atin Mukherjee +> (Cherry picked from commit f72f18b3bf7f9535827a059a5090476b3141723f) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/21242/) + +Change-Id: Ia3cb4d812ea1c633b7a501a03e0bf25a20b45a8e +BUG: 1631329 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/151065 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mgmt/glusterd/src/glusterd-op-sm.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +index df5b5c2..716d3f2 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c ++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +@@ -6286,7 +6286,6 @@ glusterd_op_commit_perform (glusterd_op_t op, dict_t *dict, char **op_errstr, + break; + + case GD_OP_STOP_VOLUME: +- glusterd_wait_for_blockers (this->private); + ret = glusterd_op_stop_volume (dict); + break; + +-- +1.8.3.1 + diff --git a/SOURCES/0393-core-Pass-xlator_name-in-server_call_xlator_mem_clea.patch b/SOURCES/0393-core-Pass-xlator_name-in-server_call_xlator_mem_clea.patch new file mode 100644 index 0000000..87bf7b0 --- /dev/null +++ b/SOURCES/0393-core-Pass-xlator_name-in-server_call_xlator_mem_clea.patch @@ -0,0 +1,40 @@ +From 2b97774c20920745251665893f7dcf32dece8df7 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Tue, 9 Oct 2018 11:12:52 +0530 +Subject: [PATCH 393/399] core: Pass xlator_name in + server_call_xlator_mem_cleanup + +Problem: In the commit of patch 9b26837434977601f1e48477904486ea032f742a + xlator_name missed at the time of calling function + server_call_xlator_mem_cleanup + +Solution: Correct the function argument at the time of calling + function + +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/21235/) +BUG: 1631372 +Change-Id: I80e735fb2cea4c715f7d3210c5781862aea10a92 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/152213 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/protocol/server/src/server.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c +index 8ac0bd1..11ee7ba 100644 +--- a/xlators/protocol/server/src/server.c ++++ b/xlators/protocol/server/src/server.c +@@ -1732,7 +1732,7 @@ notify (xlator_t *this, int32_t event, void *data, ...) + glusterfs_mgmt_pmap_signout (ctx, + victim->name); + if (!xprt_found && victim_found) { +- server_call_xlator_mem_cleanup (this, victim); ++ server_call_xlator_mem_cleanup (this, victim->name); + } + } + break; +-- +1.8.3.1 + diff --git a/SOURCES/0394-io-stats-prevent-taking-file-dump-on-server-side.patch b/SOURCES/0394-io-stats-prevent-taking-file-dump-on-server-side.patch new file mode 100644 index 0000000..30bc844 --- /dev/null +++ b/SOURCES/0394-io-stats-prevent-taking-file-dump-on-server-side.patch @@ -0,0 +1,43 @@ +From c93ade55ce9794952b372b91aff7bc380b49fd52 Mon Sep 17 00:00:00 2001 +From: Amar Tumballi +Date: Sun, 7 Oct 2018 11:45:35 +0530 +Subject: [PATCH 394/399] io-stats: prevent taking file dump on server side + +By allowing clients taking dump in a file on brick process, we are +allowing compromised clients to create io-stats dumps on server, +which can exhaust all the available inodes. + +Fixes: CVE-2018-14659 + +BUG: 1636305 +Change-Id: I64fc530363b78f849011eb3c91355566ee0c485b +Signed-off-by: Amar Tumballi +Reviewed-on: https://code.engineering.redhat.com/gerrit/152024 +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/debug/io-stats/src/io-stats.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/xlators/debug/io-stats/src/io-stats.c b/xlators/debug/io-stats/src/io-stats.c +index 16a11df..0f71334 100644 +--- a/xlators/debug/io-stats/src/io-stats.c ++++ b/xlators/debug/io-stats/src/io-stats.c +@@ -3022,6 +3022,15 @@ conditional_dump (dict_t *dict, char *key, data_t *value, void *data) + stub = data; + this = stub->this; + ++ /* Don't do this on 'brick-side', only do this on client side */ ++ /* Addresses CVE-2018-14659 */ ++ if (this->ctx->process_mode != GF_CLIENT_PROCESS) { ++ gf_log(this->name, GF_LOG_DEBUG, ++ "taking io-stats dump using setxattr not permitted on brick." ++ " Use 'gluster profile' instead"); ++ return -1; ++ } ++ + /* Create a file name that is appended with the io-stats instance + name as well. This helps when there is more than a single io-stats + instance in the graph, or the client and server processes are running +-- +1.8.3.1 + diff --git a/SOURCES/0395-index-prevent-arbitrary-file-creation-outside-entry-.patch b/SOURCES/0395-index-prevent-arbitrary-file-creation-outside-entry-.patch new file mode 100644 index 0000000..91b359c --- /dev/null +++ b/SOURCES/0395-index-prevent-arbitrary-file-creation-outside-entry-.patch @@ -0,0 +1,68 @@ +From d5c5cbe82ef0f7bf8686e71cf08b92e7baf62f55 Mon Sep 17 00:00:00 2001 +From: Ravishankar N +Date: Sat, 6 Oct 2018 00:50:53 +0530 +Subject: [PATCH 395/399] index: prevent arbitrary file creation outside + entry-changes folder + +Problem: +A compromised client can set arbitrary values for the GF_XATTROP_ENTRY_IN_KEY +and GF_XATTROP_ENTRY_OUT_KEY during xattrop fop. These values are +consumed by index as a filename to be created/deleted according to the key. +Thus it is possible to create/delete random files even outside the gluster +volume boundary. + +Fix: +Index expects the filename to be a basename, i.e. it must not contain any +pathname components like "/" or "../". Enforce this. + +Fixes: CVE-2018-14654 +BUG: 1634671 +Change-Id: Ib01c35414c36e3101af9e99a1ea17535ef8bd3b3 +Signed-off-by: Ravishankar N +Reviewed-on: https://code.engineering.redhat.com/gerrit/151985 +Reviewed-by: Amar Tumballi +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/features/index/src/index.c | 19 +++++++++++++++++++ + 1 file changed, 19 insertions(+) + +diff --git a/xlators/features/index/src/index.c b/xlators/features/index/src/index.c +index bf3f4dd..89cdbda 100644 +--- a/xlators/features/index/src/index.c ++++ b/xlators/features/index/src/index.c +@@ -852,6 +852,15 @@ index_entry_create (xlator_t *this, inode_t *inode, char *filename) + ctx->state[ENTRY_CHANGES] = IN; + } + ++ if (strchr (filename, '/')) { ++ gf_msg (this->name, GF_LOG_ERROR, EINVAL, ++ INDEX_MSG_INDEX_ADD_FAILED, ++ "Got invalid entry (%s) for pargfid path (%s)", ++ filename, pgfid_path); ++ op_errno = EINVAL; ++ goto out; ++ } ++ + op_errno = 0; + + snprintf (entry_path, sizeof(entry_path), "%s/%s", pgfid_path, +@@ -886,6 +895,16 @@ index_entry_delete (xlator_t *this, uuid_t pgfid, char *filename) + + make_gfid_path (priv->index_basepath, ENTRY_CHANGES_SUBDIR, pgfid, + pgfid_path, sizeof (pgfid_path)); ++ ++ if (strchr (filename, '/')) { ++ gf_msg (this->name, GF_LOG_ERROR, EINVAL, ++ INDEX_MSG_INDEX_DEL_FAILED, ++ "Got invalid entry (%s) for pargfid path (%s)", ++ filename, pgfid_path); ++ op_errno = EINVAL; ++ goto out; ++ } ++ + snprintf (entry_path, sizeof(entry_path), "%s/%s", pgfid_path, + filename); + +-- +1.8.3.1 + diff --git a/SOURCES/0396-protocol-remove-the-option-verify-volfile-checksum.patch b/SOURCES/0396-protocol-remove-the-option-verify-volfile-checksum.patch new file mode 100644 index 0000000..e62a250 --- /dev/null +++ b/SOURCES/0396-protocol-remove-the-option-verify-volfile-checksum.patch @@ -0,0 +1,458 @@ +From 00c78b9eb52d8a631cdaef883cd507bd0889639a Mon Sep 17 00:00:00 2001 +From: Amar Tumballi +Date: Fri, 28 Sep 2018 12:06:09 +0530 +Subject: [PATCH 396/399] protocol: remove the option 'verify-volfile-checksum' + +'getspec' operation is not used between 'client' and 'server' ever since +we have off-loaded volfile management to glusterd, ie, at least 7 years. + +No reason to keep the dead code! The removed option had no meaning, +as glusterd didn't provide a way to set (or unset) this option. So, +no regression should be observed from any of the existing glusterfs +deployment, supported or unsupported. + +Fixes: CVE-2018-14653 + +BUG: 1634668 +Change-Id: I8b3a4d302b3c222e065b484cfe449b9c116393f8 +Signed-off-by: Amar Tumballi +Reviewed-on: https://code.engineering.redhat.com/gerrit/151322 +Reviewed-by: Pranith Kumar Karampuri +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/protocol/client/src/client-handshake.c | 83 +------- + xlators/protocol/server/src/server-handshake.c | 276 +------------------------ + xlators/protocol/server/src/server.c | 3 - + 3 files changed, 5 insertions(+), 357 deletions(-) + +diff --git a/xlators/protocol/client/src/client-handshake.c b/xlators/protocol/client/src/client-handshake.c +index aee6b3a..7b36178 100644 +--- a/xlators/protocol/client/src/client-handshake.c ++++ b/xlators/protocol/client/src/client-handshake.c +@@ -37,91 +37,10 @@ typedef struct client_fd_lk_local { + clnt_fd_ctx_t *fdctx; + }clnt_fd_lk_local_t; + +-int +-client3_getspec_cbk (struct rpc_req *req, struct iovec *iov, int count, +- void *myframe) +-{ +- gf_getspec_rsp rsp = {0,}; +- call_frame_t *frame = NULL; +- int ret = 0; +- +- frame = myframe; +- +- if (!frame || !frame->this) { +- gf_msg (THIS->name, GF_LOG_ERROR, EINVAL, PC_MSG_INVALID_ENTRY, +- "frame not found with the request, returning EINVAL"); +- rsp.op_ret = -1; +- rsp.op_errno = EINVAL; +- goto out; +- } +- if (-1 == req->rpc_status) { +- gf_msg (frame->this->name, GF_LOG_WARNING, ENOTCONN, +- PC_MSG_RPC_STATUS_ERROR, "received RPC status error, " +- "returning ENOTCONN"); +- rsp.op_ret = -1; +- rsp.op_errno = ENOTCONN; +- goto out; +- } +- +- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_getspec_rsp); +- if (ret < 0) { +- gf_msg (frame->this->name, GF_LOG_ERROR, EINVAL, +- PC_MSG_XDR_DECODING_FAILED, +- "XDR decoding failed, returning EINVAL"); +- rsp.op_ret = -1; +- rsp.op_errno = EINVAL; +- goto out; +- } +- +- if (-1 == rsp.op_ret) { +- gf_msg (frame->this->name, GF_LOG_WARNING, 0, +- PC_MSG_VOL_FILE_NOT_FOUND, "failed to get the 'volume " +- "file' from server"); +- goto out; +- } +- +-out: +- CLIENT_STACK_UNWIND (getspec, frame, rsp.op_ret, rsp.op_errno, +- rsp.spec); +- +- /* Don't use 'GF_FREE', this is allocated by libc */ +- free (rsp.spec); +- free (rsp.xdata.xdata_val); +- +- return 0; +-} +- + int32_t client3_getspec (call_frame_t *frame, xlator_t *this, void *data) + { +- clnt_conf_t *conf = NULL; +- clnt_args_t *args = NULL; +- gf_getspec_req req = {0,}; +- int op_errno = ESTALE; +- int ret = 0; +- +- if (!frame || !this || !data) +- goto unwind; +- +- args = data; +- conf = this->private; +- req.flags = args->flags; +- req.key = (char *)args->name; +- +- ret = client_submit_request (this, &req, frame, conf->handshake, +- GF_HNDSK_GETSPEC, client3_getspec_cbk, +- NULL, NULL, 0, NULL, 0, NULL, +- (xdrproc_t)xdr_gf_getspec_req); +- +- if (ret) { +- gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_SEND_REQ_FAIL, +- "failed to send the request"); +- } +- +- return 0; +-unwind: +- CLIENT_STACK_UNWIND (getspec, frame, -1, op_errno, NULL); ++ CLIENT_STACK_UNWIND (getspec, frame, -1, ENOSYS, NULL); + return 0; +- + } + + int +diff --git a/xlators/protocol/server/src/server-handshake.c b/xlators/protocol/server/src/server-handshake.c +index 75577fa..217678a 100644 +--- a/xlators/protocol/server/src/server-handshake.c ++++ b/xlators/protocol/server/src/server-handshake.c +@@ -38,204 +38,13 @@ gf_compare_client_version (rpcsvc_request_t *req, int fop_prognum, + } + + int +-_volfile_update_checksum (xlator_t *this, char *key, uint32_t checksum) +-{ +- server_conf_t *conf = NULL; +- struct _volfile_ctx *temp_volfile = NULL; +- +- conf = this->private; +- temp_volfile = conf->volfile; +- +- while (temp_volfile) { +- if ((NULL == key) && (NULL == temp_volfile->key)) +- break; +- if ((NULL == key) || (NULL == temp_volfile->key)) { +- temp_volfile = temp_volfile->next; +- continue; +- } +- if (strcmp (temp_volfile->key, key) == 0) +- break; +- temp_volfile = temp_volfile->next; +- } +- +- if (!temp_volfile) { +- temp_volfile = GF_CALLOC (1, sizeof (struct _volfile_ctx), +- gf_server_mt_volfile_ctx_t); +- if (!temp_volfile) +- goto out; +- temp_volfile->next = conf->volfile; +- temp_volfile->key = (key)? gf_strdup (key): NULL; +- temp_volfile->checksum = checksum; +- +- conf->volfile = temp_volfile; +- goto out; +- } +- +- if (temp_volfile->checksum != checksum) { +- gf_msg (this->name, GF_LOG_INFO, 0, PS_MSG_REMOUNT_CLIENT_REQD, +- "the volume file was modified between a prior access " +- "and now. This may lead to inconsistency between " +- "clients, you are advised to remount client"); +- temp_volfile->checksum = checksum; +- } +- +-out: +- return 0; +-} +- +- +-static size_t +-getspec_build_volfile_path (xlator_t *this, const char *key, char *path, +- size_t path_len) +-{ +- char *filename = NULL; +- server_conf_t *conf = NULL; +- int ret = -1; +- int free_filename = 0; +- char data_key[256] = {0,}; +- +- conf = this->private; +- +- /* Inform users that this option is changed now */ +- ret = dict_get_str (this->options, "client-volume-filename", +- &filename); +- if (ret == 0) { +- gf_msg (this->name, GF_LOG_WARNING, 0, PS_MSG_DEFAULTING_FILE, +- "option 'client-volume-filename' is changed to " +- "'volume-filename.' which now takes 'key' as an " +- "option to choose/fetch different files from server. " +- "Refer documentation or contact developers for more " +- "info. Currently defaulting to given file '%s'", +- filename); +- } +- +- if (key && !filename) { +- sprintf (data_key, "volume-filename.%s", key); +- ret = dict_get_str (this->options, data_key, &filename); +- if (ret < 0) { +- /* Make sure that key doesn't contain "../" in path */ +- if ((gf_strstr (key, "/", "..")) == -1) { +- gf_msg (this->name, GF_LOG_ERROR, EINVAL, +- PS_MSG_INVALID_ENTRY, "%s: invalid " +- "key", key); +- goto out; +- } +- } +- } +- +- if (!filename) { +- ret = dict_get_str (this->options, +- "volume-filename.default", &filename); +- if (ret < 0) { +- gf_msg_debug (this->name, 0, "no default volume " +- "filename given, defaulting to %s", +- DEFAULT_VOLUME_FILE_PATH); +- } +- } +- +- if (!filename && key) { +- ret = gf_asprintf (&filename, "%s/%s.vol", conf->conf_dir, key); +- if (-1 == ret) +- goto out; +- +- free_filename = 1; +- } +- if (!filename) +- filename = DEFAULT_VOLUME_FILE_PATH; +- +- ret = -1; +- +- if ((filename) && (path_len > strlen (filename))) { +- strcpy (path, filename); +- ret = strlen (filename); +- } +- +-out: +- if (free_filename) +- GF_FREE (filename); +- +- return ret; +-} +- +-int +-_validate_volfile_checksum (xlator_t *this, char *key, +- uint32_t checksum) +-{ +- char filename[PATH_MAX] = {0,}; +- server_conf_t *conf = NULL; +- struct _volfile_ctx *temp_volfile = NULL; +- int ret = 0; +- int fd = 0; +- uint32_t local_checksum = 0; +- +- conf = this->private; +- temp_volfile = conf->volfile; +- +- if (!checksum) +- goto out; +- +- if (!temp_volfile) { +- ret = getspec_build_volfile_path (this, key, filename, +- sizeof (filename)); +- if (ret <= 0) +- goto out; +- fd = open (filename, O_RDONLY); +- if (-1 == fd) { +- ret = 0; +- gf_msg (this->name, GF_LOG_INFO, errno, +- PS_MSG_VOL_FILE_OPEN_FAILED, +- "failed to open volume file (%s) : %s", +- filename, strerror (errno)); +- goto out; +- } +- get_checksum_for_file (fd, &local_checksum); +- _volfile_update_checksum (this, key, local_checksum); +- sys_close (fd); +- } +- +- temp_volfile = conf->volfile; +- while (temp_volfile) { +- if ((NULL == key) && (NULL == temp_volfile->key)) +- break; +- if ((NULL == key) || (NULL == temp_volfile->key)) { +- temp_volfile = temp_volfile->next; +- continue; +- } +- if (strcmp (temp_volfile->key, key) == 0) +- break; +- temp_volfile = temp_volfile->next; +- } +- +- if (!temp_volfile) +- goto out; +- +- if ((temp_volfile->checksum) && +- (checksum != temp_volfile->checksum)) +- ret = -1; +- +-out: +- return ret; +-} +- +- +-int + server_getspec (rpcsvc_request_t *req) + { +- int32_t ret = -1; ++ int ret = 0; + int32_t op_errno = ENOENT; +- int32_t spec_fd = -1; +- size_t file_len = 0; +- char filename[PATH_MAX] = {0,}; +- struct stat stbuf = {0,}; +- uint32_t checksum = 0; +- char *key = NULL; +- server_conf_t *conf = NULL; +- xlator_t *this = NULL; + gf_getspec_req args = {0,}; + gf_getspec_rsp rsp = {0,}; + +- this = req->svc->xl; +- conf = this->private; + ret = xdr_to_generic (req->msg[0], &args, + (xdrproc_t)xdr_gf_getspec_req); + if (ret < 0) { +@@ -245,58 +54,11 @@ server_getspec (rpcsvc_request_t *req) + goto fail; + } + +- ret = getspec_build_volfile_path (this, args.key, +- filename, sizeof (filename)); +- if (ret > 0) { +- /* to allocate the proper buffer to hold the file data */ +- ret = sys_stat (filename, &stbuf); +- if (ret < 0){ +- gf_msg (this->name, GF_LOG_ERROR, errno, +- PS_MSG_STAT_ERROR, "Unable to stat %s (%s)", +- filename, strerror (errno)); +- op_errno = errno; +- goto fail; +- } +- +- spec_fd = open (filename, O_RDONLY); +- if (spec_fd < 0) { +- gf_msg (this->name, GF_LOG_ERROR, errno, +- PS_MSG_FILE_OP_FAILED, "Unable to open %s " +- "(%s)", filename, strerror (errno)); +- op_errno = errno; +- goto fail; +- } +- ret = file_len = stbuf.st_size; +- +- if (conf->verify_volfile) { +- get_checksum_for_file (spec_fd, &checksum); +- _volfile_update_checksum (this, key, checksum); +- } +- } else { +- op_errno = ENOENT; +- } +- +- if (file_len) { +- rsp.spec = GF_CALLOC (file_len, sizeof (char), +- gf_server_mt_rsp_buf_t); +- if (!rsp.spec) { +- ret = -1; +- op_errno = ENOMEM; +- goto fail; +- } +- ret = sys_read (spec_fd, rsp.spec, file_len); +- } +- +- /* convert to XDR */ +- op_errno = errno; ++ op_errno = ENOSYS; + fail: +- if (!rsp.spec) +- rsp.spec = ""; ++ rsp.spec = ""; + rsp.op_errno = gf_errno_to_error (op_errno); +- rsp.op_ret = ret; +- +- if (spec_fd != -1) +- sys_close (spec_fd); ++ rsp.op_ret = -1; + + server_submit_reply (NULL, req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gf_getspec_rsp); +@@ -459,9 +221,7 @@ server_setvolume (rpcsvc_request_t *req) + char *clnt_version = NULL; + xlator_t *xl = NULL; + char *msg = NULL; +- char *volfile_key = NULL; + xlator_t *this = NULL; +- uint32_t checksum = 0; + int32_t ret = -1; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; +@@ -756,34 +516,6 @@ server_setvolume (rpcsvc_request_t *req) + goto fail; + } + +- if (conf->verify_volfile) { +- ret = dict_get_uint32 (params, "volfile-checksum", &checksum); +- if (ret == 0) { +- ret = dict_get_str (params, "volfile-key", +- &volfile_key); +- if (ret) +- gf_msg_debug (this->name, 0, "failed to get " +- "'volfile-key'"); +- +- ret = _validate_volfile_checksum (this, volfile_key, +- checksum); +- if (-1 == ret) { +- ret = dict_set_str (reply, "ERROR", +- "volume-file checksum " +- "varies from earlier " +- "access"); +- if (ret < 0) +- gf_msg_debug (this->name, 0, "failed " +- "to set error msg"); +- +- op_ret = -1; +- op_errno = ESTALE; +- goto fail; +- } +- } +- } +- +- + peerinfo = &req->trans->peerinfo; + if (peerinfo) { + ret = dict_set_static_ptr (params, "peer-info", peerinfo); +diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c +index 11ee7ba..d0e815e 100644 +--- a/xlators/protocol/server/src/server.c ++++ b/xlators/protocol/server/src/server.c +@@ -1797,9 +1797,6 @@ struct volume_options options[] = { + .description = "Specifies the limit on the number of inodes " + "in the lru list of the inode cache." + }, +- { .key = {"verify-volfile-checksum"}, +- .type = GF_OPTION_TYPE_BOOL +- }, + { .key = {"trace"}, + .type = GF_OPTION_TYPE_BOOL + }, +-- +1.8.3.1 + diff --git a/SOURCES/0397-features-locks-add-buffer-overflow-checks-in-pl_getx.patch b/SOURCES/0397-features-locks-add-buffer-overflow-checks-in-pl_getx.patch new file mode 100644 index 0000000..6ce6947 --- /dev/null +++ b/SOURCES/0397-features-locks-add-buffer-overflow-checks-in-pl_getx.patch @@ -0,0 +1,59 @@ +From b29b4b4ec846861c975bfa580386d25d48eaa087 Mon Sep 17 00:00:00 2001 +From: Ravishankar N +Date: Mon, 8 Oct 2018 11:04:14 +0530 +Subject: [PATCH 397/399] features/locks: add buffer overflow checks in + pl_getxattr + +Problem: +A compromised client can send a variable length buffer value for the +GF_XATTR_CLRLK_CMD virtual xattr. If the length is greater than the +size of the "key" used to send the response back, locks xlator can +segfault when it tries to do a dict_set because of the buffer overflow +in strncpy of pl_getxattr(). + +Fix: +Perform size checks while forming the 'key'. + +Note: +This fix is already there in the master branch upstream. + +Also, it looks like the size PATH_MAX used for 'key' array is not really +needed since the maximum length seems to be +"strlen(glusterfs.clrlk.tentry.kblocked) + NAME_MAX" where NAME_MAX is +used for the basename value in the clear-locks CLI: + +'gluster volume clear-locks VOLNAME path kind {blocked | granted | all} {inode range | entry basename | posix range}' + +But that can be done some other day. + +Fixes: CVE-2018-14652 +BUG: 1634669 +Change-Id: I101693e91f9ea2bd26cef6c0b7d82527fefcb3e2 +Signed-off-by: Ravishankar N +Reviewed-on: https://code.engineering.redhat.com/gerrit/152038 +Reviewed-by: Amar Tumballi +Reviewed-by: Krutika Dhananjay +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/features/locks/src/posix.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c +index 63bcf31..63f914c 100644 +--- a/xlators/features/locks/src/posix.c ++++ b/xlators/features/locks/src/posix.c +@@ -1120,7 +1120,10 @@ pl_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, + goto out; + } + +- strncpy (key, name, strlen (name)); ++ if (snprintf(key, sizeof(key), "%s", name) >= sizeof(key)) { ++ op_ret = -1; ++ goto out; ++ } + if (dict_set_dynstr (dict, key, lk_summary)) { + op_ret = -1; + op_errno = ENOMEM; +-- +1.8.3.1 + diff --git a/SOURCES/0398-lock-Do-not-allow-meta-lock-count-to-be-more-than-on.patch b/SOURCES/0398-lock-Do-not-allow-meta-lock-count-to-be-more-than-on.patch new file mode 100644 index 0000000..b7adbb7 --- /dev/null +++ b/SOURCES/0398-lock-Do-not-allow-meta-lock-count-to-be-more-than-on.patch @@ -0,0 +1,88 @@ +From 2334f5b162e81d81673b59555baaf0a26189e603 Mon Sep 17 00:00:00 2001 +From: Susant Palai +Date: Mon, 8 Oct 2018 11:38:09 +0530 +Subject: [PATCH 398/399] lock: Do not allow meta-lock count to be more than + one + +In the current scheme of glusterfs where lock migration is +experimental, (ideally) the rebalance process which is migrating +the file should request for a metalock. Hence, the metalock count +should not be more than one for an inode. In future, if there is a +need for meta-lock from other clients, this patch can be reverted. + +Since pl_metalk is called as part of setxattr operation, any client +process(non-rebalance) residing outside trusted network can exhaust +memory of the server node by issuing setxattr repetitively on the +metalock key. The current patch makes sure that more than +one metalock cannot be granted on an inode. + +Fixes: CVE-2018-14660 + +Change-Id: I5a1dde0b24b0aedcfb29cc89dffc04ccc5a88bcb +BUG: 1636308 +Signed-off-by: Susant Palai +Reviewed-on: https://code.engineering.redhat.com/gerrit/152041 +Reviewed-by: Amar Tumballi +Tested-by: Amar Tumballi +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/features/locks/src/posix.c | 36 +++++++++++++++++++++++++++++++++++- + 1 file changed, 35 insertions(+), 1 deletion(-) + +diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c +index 63f914c..c58e6ba 100644 +--- a/xlators/features/locks/src/posix.c ++++ b/xlators/features/locks/src/posix.c +@@ -2938,6 +2938,40 @@ pl_metalk (call_frame_t *frame, xlator_t *this, inode_t *inode) + goto out; + } + ++ /* Non rebalance process trying to do metalock */ ++ if (frame->root->pid != GF_CLIENT_PID_DEFRAG) { ++ ret = -1; ++ goto out; ++ } ++ ++ ++ /* Note: In the current scheme of glusterfs where lock migration is ++ * experimental, (ideally) the rebalance process which is migrating ++ * the file should request for a metalock. Hence, the metalock count ++ * should not be more than one for an inode. In future, if there is a ++ * need for meta-lock from other clients, the following block can be ++ * removed. ++ * ++ * Since pl_metalk is called as part of setxattr operation, any client ++ * process(non-rebalance) residing outside trusted network can exhaust ++ * memory of the server node by issuing setxattr repetitively on the ++ * metalock key. The following code makes sure that more than ++ * one metalock cannot be granted on an inode*/ ++ pthread_mutex_lock (&pl_inode->mutex); ++ { ++ if (pl_metalock_is_active(pl_inode)) { ++ gf_msg (this->name, GF_LOG_WARNING, EINVAL, 0, ++ "More than one meta-lock can not be granted on" ++ "the inode"); ++ ret = -1; ++ } ++ } ++ pthread_mutex_lock (&pl_inode->mutex); ++ ++ if (ret == -1) { ++ goto out; ++ } ++ + if (frame->root->client) { + ctx = pl_ctx_get (frame->root->client, this); + if (!ctx) { +@@ -3118,7 +3152,7 @@ pl_setxattr (call_frame_t *frame, xlator_t *this, + loc_t *loc, dict_t *dict, int flags, dict_t *xdata) + { + int op_ret = 0; +- int op_errno = 0; ++ int op_errno = EINVAL; + dict_t *xdata_rsp = NULL; + + PL_LOCAL_GET_REQUESTS (frame, this, xdata, NULL, loc, NULL); +-- +1.8.3.1 + diff --git a/SOURCES/0399-all-fix-the-format-string-exceptions.patch b/SOURCES/0399-all-fix-the-format-string-exceptions.patch new file mode 100644 index 0000000..96cc6ea --- /dev/null +++ b/SOURCES/0399-all-fix-the-format-string-exceptions.patch @@ -0,0 +1,643 @@ +From bff03720f92bfcde848f46dca6a2cfad7adaf42e Mon Sep 17 00:00:00 2001 +From: Amar Tumballi +Date: Tue, 9 Oct 2018 12:32:41 +0530 +Subject: [PATCH 399/399] all: fix the format string exceptions + +Currently, there are possibilities in few places, where a user-controlled +(like filename, program parameter etc) string can be passed as 'fmt' for +printf(), which can lead to segfault, if the user's string contains '%s', +'%d' in it. + +While fixing it, makes sense to make the explicit check for such issues +across the codebase, by making the format call properly. + +Fixes: CVE-2018-14661 + +BUG: 1637084 +Change-Id: I63d6b65c61106f77c55f0922dc08a5b8fe421f23 +Signed-off-by: Amar Tumballi +Reviewed-on: https://code.engineering.redhat.com/gerrit/152221 +Reviewed-by: Xavi Hernandez +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + cli/src/cli-cmd-volume.c | 2 +- + libglusterfs/src/client_t.c | 2 +- + libglusterfs/src/fd.c | 4 ++-- + libglusterfs/src/inode.c | 2 +- + libglusterfs/src/iobuf.c | 8 ++++---- + libglusterfs/src/latency.c | 2 +- + libglusterfs/src/logging.h | 3 ++- + libglusterfs/src/mem-pool.h | 3 ++- + libglusterfs/src/run.h | 3 ++- + libglusterfs/src/statedump.h | 6 ++++-- + xlators/cluster/afr/src/afr-common.c | 2 +- + xlators/cluster/ec/src/ec.c | 2 +- + xlators/debug/trace/src/trace.c | 2 +- + xlators/features/barrier/src/barrier.c | 4 ++-- + xlators/features/gfid-access/src/gfid-access.c | 2 +- + xlators/features/locks/src/posix.c | 10 +++++----- + xlators/features/shard/src/shard.c | 2 +- + xlators/mgmt/glusterd/src/glusterd-rebalance.c | 2 +- + xlators/mgmt/glusterd/src/glusterd-statedump.c | 2 +- + xlators/mount/fuse/src/fuse-bridge.c | 2 +- + xlators/performance/io-cache/src/io-cache.c | 8 ++++---- + xlators/performance/io-threads/src/io-threads.c | 2 +- + xlators/performance/md-cache/src/md-cache.c | 2 +- + xlators/performance/nl-cache/src/nl-cache-helper.c | 2 +- + xlators/performance/nl-cache/src/nl-cache.c | 2 +- + xlators/performance/open-behind/src/open-behind.c | 4 ++-- + xlators/performance/quick-read/src/quick-read.c | 4 ++-- + xlators/performance/read-ahead/src/read-ahead.c | 6 +++--- + xlators/performance/write-behind/src/write-behind.c | 6 +++--- + xlators/protocol/client/src/client.c | 2 +- + xlators/protocol/server/src/server.c | 2 +- + xlators/storage/posix/src/posix.c | 2 +- + 32 files changed, 56 insertions(+), 51 deletions(-) + +diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c +index 2639afa..a1f0840 100644 +--- a/cli/src/cli-cmd-volume.c ++++ b/cli/src/cli-cmd-volume.c +@@ -2846,7 +2846,7 @@ cli_launch_glfs_heal (int heal_op, dict_t *options) + runner_add_args (&runner, "source-brick", NULL); + runner_argprintf (&runner, "%s:%s", hostname, path); + if (dict_get_str (options, "file", &filename) == 0) +- runner_argprintf (&runner, filename); ++ runner_argprintf (&runner, "%s", filename); + break; + case GF_SHD_OP_SPLIT_BRAIN_FILES: + runner_add_args (&runner, "split-brain-info", NULL); +diff --git a/libglusterfs/src/client_t.c b/libglusterfs/src/client_t.c +index 17e3026..dce27c1 100644 +--- a/libglusterfs/src/client_t.c ++++ b/libglusterfs/src/client_t.c +@@ -650,7 +650,7 @@ clienttable_dump (clienttable_t *clienttable, char *prefix) + clienttable->cliententries[i].next_free) { + gf_proc_dump_build_key(key, prefix, + "cliententry[%d]", i); +- gf_proc_dump_add_section(key); ++ gf_proc_dump_add_section("%s", key); + cliententry_dump(&clienttable->cliententries[i], + key); + } +diff --git a/libglusterfs/src/fd.c b/libglusterfs/src/fd.c +index 2dc52ba..27c8e13 100644 +--- a/libglusterfs/src/fd.c ++++ b/libglusterfs/src/fd.c +@@ -1056,7 +1056,7 @@ fd_dump (fd_t *fd, char *prefix) + + if (fd->inode) { + gf_proc_dump_build_key (key, "inode", NULL); +- gf_proc_dump_add_section(key); ++ gf_proc_dump_add_section("%s", key); + inode_dump (fd->inode, key); + } + +@@ -1104,7 +1104,7 @@ fdtable_dump (fdtable_t *fdtable, char *prefix) + if (GF_FDENTRY_ALLOCATED == + fdtable->fdentries[i].next_free) { + gf_proc_dump_build_key(key, prefix, "fdentry[%d]", i); +- gf_proc_dump_add_section(key); ++ gf_proc_dump_add_section("%s", key); + fdentry_dump(&fdtable->fdentries[i], key); + } + } +diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c +index cf264d8..1bc05a4 100644 +--- a/libglusterfs/src/inode.c ++++ b/libglusterfs/src/inode.c +@@ -31,7 +31,7 @@ + list_for_each_entry (inode, head, list) { \ + gf_proc_dump_build_key(key_buf, key_prefix, \ + "%s.%d",list_type, i++); \ +- gf_proc_dump_add_section(key_buf); \ ++ gf_proc_dump_add_section("%s", key_buf); \ + inode_dump(inode, key); \ + } \ + } +diff --git a/libglusterfs/src/iobuf.c b/libglusterfs/src/iobuf.c +index 76584fc..f6b8558 100644 +--- a/libglusterfs/src/iobuf.c ++++ b/libglusterfs/src/iobuf.c +@@ -1174,7 +1174,7 @@ iobuf_arena_info_dump (struct iobuf_arena *iobuf_arena, const char *key_prefix) + gf_proc_dump_write(key, "%"PRIu64, iobuf_arena->page_size); + list_for_each_entry (trav, &iobuf_arena->active.list, list) { + gf_proc_dump_build_key(key, key_prefix,"active_iobuf.%d", i++); +- gf_proc_dump_add_section(key); ++ gf_proc_dump_add_section("%s", key); + iobuf_info_dump(trav, key); + } + +@@ -1215,21 +1215,21 @@ iobuf_stats_dump (struct iobuf_pool *iobuf_pool) + list_for_each_entry (trav, &iobuf_pool->arenas[j], list) { + snprintf(msg, sizeof(msg), + "arena.%d", i); +- gf_proc_dump_add_section(msg); ++ gf_proc_dump_add_section("%s", msg); + iobuf_arena_info_dump(trav,msg); + i++; + } + list_for_each_entry (trav, &iobuf_pool->purge[j], list) { + snprintf(msg, sizeof(msg), + "purge.%d", i); +- gf_proc_dump_add_section(msg); ++ gf_proc_dump_add_section("%s", msg); + iobuf_arena_info_dump(trav,msg); + i++; + } + list_for_each_entry (trav, &iobuf_pool->filled[j], list) { + snprintf(msg, sizeof(msg), + "filled.%d", i); +- gf_proc_dump_add_section(msg); ++ gf_proc_dump_add_section("%s", msg); + iobuf_arena_info_dump(trav,msg); + i++; + } +diff --git a/libglusterfs/src/latency.c b/libglusterfs/src/latency.c +index 1d75f5b..a890454 100644 +--- a/libglusterfs/src/latency.c ++++ b/libglusterfs/src/latency.c +@@ -169,7 +169,7 @@ gf_proc_dump_latency_info (xlator_t *xl) + int i; + + snprintf (key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.latency", xl->name); +- gf_proc_dump_add_section (key_prefix); ++ gf_proc_dump_add_section ("%s", key_prefix); + + for (i = 0; i < GF_FOP_MAXVALUE; i++) { + gf_proc_dump_build_key (key, key_prefix, "%s", +diff --git a/libglusterfs/src/logging.h b/libglusterfs/src/logging.h +index fd9a36d..4ed2a82 100644 +--- a/libglusterfs/src/logging.h ++++ b/libglusterfs/src/logging.h +@@ -172,7 +172,8 @@ int _gf_log_callingfn (const char *domain, const char *file, + const char *fmt, ...) + __attribute__ ((__format__ (__printf__, 6, 7))); + +-int _gf_log_eh (const char *function, const char *fmt, ...); ++int _gf_log_eh (const char *function, const char *fmt, ...) ++ __attribute__ ((__format__ (__printf__, 2, 3))); + + + +diff --git a/libglusterfs/src/mem-pool.h b/libglusterfs/src/mem-pool.h +index 1272ad4..dfe1f9a 100644 +--- a/libglusterfs/src/mem-pool.h ++++ b/libglusterfs/src/mem-pool.h +@@ -86,7 +86,8 @@ int + gf_vasprintf (char **string_ptr, const char *format, va_list arg); + + int +-gf_asprintf (char **string_ptr, const char *format, ...); ++gf_asprintf (char **string_ptr, const char *format, ...) ++ __attribute__ ((__format__ (__printf__, 2, 3))); + + void + __gf_free (void *ptr); +diff --git a/libglusterfs/src/run.h b/libglusterfs/src/run.h +index 1dc4bf9..e47ce11 100644 +--- a/libglusterfs/src/run.h ++++ b/libglusterfs/src/run.h +@@ -76,7 +76,8 @@ void runner_add_args (runner_t *runner, ...); + * @param runner pointer to runner_t instance + * @param format printf style format specifier + */ +-void runner_argprintf (runner_t *runner, const char *format, ...); ++void runner_argprintf (runner_t *runner, const char *format, ...) ++ __attribute__ ((__format__ (__printf__, 2, 3))); + + /** + * log a message about the command to be run. +diff --git a/libglusterfs/src/statedump.h b/libglusterfs/src/statedump.h +index 0a7a97e..ee97cdb 100644 +--- a/libglusterfs/src/statedump.h ++++ b/libglusterfs/src/statedump.h +@@ -73,9 +73,11 @@ void gf_proc_dump_cleanup(void); + + void gf_proc_dump_info(int signum, glusterfs_ctx_t *ctx); + +-int gf_proc_dump_add_section(char *key,...); ++int gf_proc_dump_add_section(char *key, ...) ++ __attribute__ ((__format__ (__printf__, 1, 2))); + +-int gf_proc_dump_write(char *key, char *value,...); ++int gf_proc_dump_write(char *key, char *value, ...) ++ __attribute__ ((__format__ (__printf__, 2, 3))); + + void inode_table_dump(inode_table_t *itable, char *prefix); + +diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c +index bded6a2..e8107c9 100644 +--- a/xlators/cluster/afr/src/afr-common.c ++++ b/xlators/cluster/afr/src/afr-common.c +@@ -4536,7 +4536,7 @@ afr_priv_dump (xlator_t *this) + + GF_ASSERT (priv); + snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name); +- gf_proc_dump_add_section(key_prefix); ++ gf_proc_dump_add_section("%s", key_prefix); + gf_proc_dump_write("child_count", "%u", priv->child_count); + for (i = 0; i < priv->child_count; i++) { + sprintf (key, "child_up[%d]", i); +diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c +index 9a23a45..9cb9580 100644 +--- a/xlators/cluster/ec/src/ec.c ++++ b/xlators/cluster/ec/src/ec.c +@@ -1316,7 +1316,7 @@ int32_t ec_dump_private(xlator_t *this) + GF_ASSERT(ec); + + snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name); +- gf_proc_dump_add_section(key_prefix); ++ gf_proc_dump_add_section("%s", key_prefix); + gf_proc_dump_write("nodes", "%u", ec->nodes); + gf_proc_dump_write("redundancy", "%u", ec->redundancy); + gf_proc_dump_write("fragment_size", "%u", ec->fragment_size); +diff --git a/xlators/debug/trace/src/trace.c b/xlators/debug/trace/src/trace.c +index 451ef9a..34ac4ca 100644 +--- a/xlators/debug/trace/src/trace.c ++++ b/xlators/debug/trace/src/trace.c +@@ -3059,7 +3059,7 @@ trace_dump_history (xlator_t *this) + if (conf && conf->log_history == _gf_true) { + gf_proc_dump_build_key (key_prefix, "xlator.debug.trace", + "history"); +- gf_proc_dump_add_section (key_prefix); ++ gf_proc_dump_add_section ("%s", key_prefix); + eh_dump (this->history, NULL, dump_history_trace); + } + ret = 0; +diff --git a/xlators/features/barrier/src/barrier.c b/xlators/features/barrier/src/barrier.c +index ce3a255..8e964e8 100644 +--- a/xlators/features/barrier/src/barrier.c ++++ b/xlators/features/barrier/src/barrier.c +@@ -713,7 +713,7 @@ __barrier_dump_queue (barrier_priv_t *priv) + + list_for_each_entry (stub, &priv->queue, list) { + snprintf (key, sizeof (key), "stub.%d", i++); +- gf_proc_dump_add_section (key); ++ gf_proc_dump_add_section ("%s", key); + barrier_dump_stub(stub, key); + } + +@@ -735,7 +735,7 @@ barrier_dump_priv (xlator_t *this) + return 0; + + gf_proc_dump_build_key (key, "xlator.features.barrier", "priv"); +- gf_proc_dump_add_section (key); ++ gf_proc_dump_add_section ("%s", key); + + LOCK (&priv->lock); + { +diff --git a/xlators/features/gfid-access/src/gfid-access.c b/xlators/features/gfid-access/src/gfid-access.c +index 7d75b09..aa8aac1 100644 +--- a/xlators/features/gfid-access/src/gfid-access.c ++++ b/xlators/features/gfid-access/src/gfid-access.c +@@ -1382,7 +1382,7 @@ ga_dump_inodectx (xlator_t *this, inode_t *inode) + if (ret == 0) { + tmp_inode = (void*) value; + gf_proc_dump_build_key (key_prefix, this->name, "inode"); +- gf_proc_dump_add_section (key_prefix); ++ gf_proc_dump_add_section ("%s", key_prefix); + gf_proc_dump_write ("real-gfid", "%s", + uuid_utoa (tmp_inode->gfid)); + } +diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c +index c58e6ba..b434a08 100644 +--- a/xlators/features/locks/src/posix.c ++++ b/xlators/features/locks/src/posix.c +@@ -3292,7 +3292,7 @@ __dump_entrylks (pl_inode_t *pl_inode) + blocked, granted); + } + +- gf_proc_dump_write(key, tmp); ++ gf_proc_dump_write(key, "%s", tmp); + + count++; + } +@@ -3313,7 +3313,7 @@ __dump_entrylks (pl_inode_t *pl_inode) + lkowner_utoa (&lock->owner), lock->client, + lock->connection_id, blocked); + +- gf_proc_dump_write(key, tmp); ++ gf_proc_dump_write(key, "%s", tmp); + + count++; + } +@@ -3364,7 +3364,7 @@ __dump_inodelks (pl_inode_t *pl_inode) + &lock->granted_time.tv_sec, + &lock->blkd_time.tv_sec, + _gf_true); +- gf_proc_dump_write(key, tmp); ++ gf_proc_dump_write(key, "%s", tmp); + + count++; + } +@@ -3380,7 +3380,7 @@ __dump_inodelks (pl_inode_t *pl_inode) + lock->client, lock->connection_id, + 0, &lock->blkd_time.tv_sec, + _gf_false); +- gf_proc_dump_write(key, tmp); ++ gf_proc_dump_write(key, "%s", tmp); + + count++; + } +@@ -3421,7 +3421,7 @@ __dump_posixlks (pl_inode_t *pl_inode) + &lock->owner, lock->client, NULL, + &lock->granted_time.tv_sec, &lock->blkd_time.tv_sec, + (lock->blocked)? _gf_false: _gf_true); +- gf_proc_dump_write(key, tmp); ++ gf_proc_dump_write(key, "%s", tmp); + + count++; + } +diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c +index d67cdf4..f5fb181 100644 +--- a/xlators/features/shard/src/shard.c ++++ b/xlators/features/shard/src/shard.c +@@ -5557,7 +5557,7 @@ shard_priv_dump (xlator_t *this) + + snprintf (key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, + this->name); +- gf_proc_dump_add_section (key_prefix); ++ gf_proc_dump_add_section ("%s", key_prefix); + gf_proc_dump_write ("shard-block-size", "%s", + gf_uint64_2human_readable (priv->block_size)); + gf_proc_dump_write ("inode-count", "%d", priv->inode_count); +diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c +index 848e689..5ab828c 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c ++++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c +@@ -310,7 +310,7 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr, + runner_add_arg (&runner, "--pid-file"); + runner_argprintf (&runner, "%s",pidfile); + runner_add_arg (&runner, "-l"); +- runner_argprintf (&runner, logfile); ++ runner_argprintf (&runner, "%s", logfile); + if (volinfo->memory_accounting) + runner_add_arg (&runner, "--mem-accounting"); + if (dict_get_str (priv->opts, GLUSTERD_LOCALTIME_LOGGING_KEY, +diff --git a/xlators/mgmt/glusterd/src/glusterd-statedump.c b/xlators/mgmt/glusterd/src/glusterd-statedump.c +index d0a9705..8c2c4b5 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-statedump.c ++++ b/xlators/mgmt/glusterd/src/glusterd-statedump.c +@@ -197,7 +197,7 @@ glusterd_dump_priv (xlator_t *this) + return 0; + + gf_proc_dump_build_key (key, "xlator.glusterd", "priv"); +- gf_proc_dump_add_section (key); ++ gf_proc_dump_add_section ("%s", key); + + pthread_mutex_lock (&priv->mutex); + { +diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c +index 85cee73..1c4f4e4 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.c ++++ b/xlators/mount/fuse/src/fuse-bridge.c +@@ -5189,7 +5189,7 @@ fuse_history_dump (xlator_t *this) + + gf_proc_dump_build_key (key_prefix, "xlator.mount.fuse", + "history"); +- gf_proc_dump_add_section (key_prefix); ++ gf_proc_dump_add_section ("%s", key_prefix); + eh_dump (this->history, NULL, dump_history_fuse); + + ret = 0; +diff --git a/xlators/performance/io-cache/src/io-cache.c b/xlators/performance/io-cache/src/io-cache.c +index 8963942..700d8c2 100644 +--- a/xlators/performance/io-cache/src/io-cache.c ++++ b/xlators/performance/io-cache/src/io-cache.c +@@ -2008,7 +2008,7 @@ ioc_inode_dump (xlator_t *this, inode_t *inode) + if (gf_uuid_is_null (ioc_inode->inode->gfid)) + goto unlock; + +- gf_proc_dump_add_section (key_prefix); ++ gf_proc_dump_add_section ("%s", key_prefix); + section_added = _gf_true; + + __inode_path (ioc_inode->inode, NULL, &path); +@@ -2031,7 +2031,7 @@ unlock: + out: + if (ret && ioc_inode) { + if (section_added == _gf_false) +- gf_proc_dump_add_section (key_prefix); ++ gf_proc_dump_add_section ("%s", key_prefix); + gf_proc_dump_write ("Unable to print the status of ioc_inode", + "(Lock acquisition failed) %s", + uuid_utoa (inode->gfid)); +@@ -2053,7 +2053,7 @@ ioc_priv_dump (xlator_t *this) + priv = this->private; + + gf_proc_dump_build_key (key_prefix, "io-cache", "priv"); +- gf_proc_dump_add_section (key_prefix); ++ gf_proc_dump_add_section ("%s", key_prefix); + add_section = _gf_true; + + ret = pthread_mutex_trylock (&priv->table_lock); +@@ -2074,7 +2074,7 @@ out: + if (!add_section) { + gf_proc_dump_build_key (key_prefix, "xlator." + "performance.io-cache", "priv"); +- gf_proc_dump_add_section (key_prefix); ++ gf_proc_dump_add_section ("%s", key_prefix); + } + gf_proc_dump_write ("Unable to dump the state of private " + "structure of io-cache xlator", "(Lock " +diff --git a/xlators/performance/io-threads/src/io-threads.c b/xlators/performance/io-threads/src/io-threads.c +index 5c47072..41d48ab 100644 +--- a/xlators/performance/io-threads/src/io-threads.c ++++ b/xlators/performance/io-threads/src/io-threads.c +@@ -911,7 +911,7 @@ iot_priv_dump (xlator_t *this) + snprintf (key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, + this->name); + +- gf_proc_dump_add_section(key_prefix); ++ gf_proc_dump_add_section("%s", key_prefix); + + gf_proc_dump_write("maximum_threads_count", "%d", conf->max_count); + gf_proc_dump_write("current_threads_count", "%d", conf->curr_count); +diff --git a/xlators/performance/md-cache/src/md-cache.c b/xlators/performance/md-cache/src/md-cache.c +index 9d2eea6..e7452e9 100644 +--- a/xlators/performance/md-cache/src/md-cache.c ++++ b/xlators/performance/md-cache/src/md-cache.c +@@ -3087,7 +3087,7 @@ mdc_priv_dump (xlator_t *this) + conf = this->private; + + snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name); +- gf_proc_dump_add_section(key_prefix); ++ gf_proc_dump_add_section("%s", key_prefix); + + gf_proc_dump_write("stat_hit_count", "%"PRId64, + conf->mdc_counter.stat_hit.cnt); +diff --git a/xlators/performance/nl-cache/src/nl-cache-helper.c b/xlators/performance/nl-cache/src/nl-cache-helper.c +index 0b6c884..583d67b 100644 +--- a/xlators/performance/nl-cache/src/nl-cache-helper.c ++++ b/xlators/performance/nl-cache/src/nl-cache-helper.c +@@ -1177,7 +1177,7 @@ nlc_dump_inodectx (xlator_t *this, inode_t *inode) + gf_proc_dump_build_key (key_prefix, + "xlator.performance.nl-cache", + "nlc_inode"); +- gf_proc_dump_add_section (key_prefix); ++ gf_proc_dump_add_section ("%s", key_prefix); + + __inode_path (inode, NULL, &path); + if (path != NULL) { +diff --git a/xlators/performance/nl-cache/src/nl-cache.c b/xlators/performance/nl-cache/src/nl-cache.c +index 7dad8d9..6365d82 100644 +--- a/xlators/performance/nl-cache/src/nl-cache.c ++++ b/xlators/performance/nl-cache/src/nl-cache.c +@@ -615,7 +615,7 @@ nlc_priv_dump (xlator_t *this) + conf = this->private; + + snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name); +- gf_proc_dump_add_section(key_prefix); ++ gf_proc_dump_add_section("%s", key_prefix); + + gf_proc_dump_write("negative_lookup_hit_count", "%"PRId64, + conf->nlc_counter.nlc_hit.cnt); +diff --git a/xlators/performance/open-behind/src/open-behind.c b/xlators/performance/open-behind/src/open-behind.c +index 3be35bc..2d77f2d 100644 +--- a/xlators/performance/open-behind/src/open-behind.c ++++ b/xlators/performance/open-behind/src/open-behind.c +@@ -897,7 +897,7 @@ ob_priv_dump (xlator_t *this) + gf_proc_dump_build_key (key_prefix, "xlator.performance.open-behind", + "priv"); + +- gf_proc_dump_add_section (key_prefix); ++ gf_proc_dump_add_section ("%s", key_prefix); + + gf_proc_dump_write ("use_anonymous_fd", "%d", conf->use_anonymous_fd); + +@@ -926,7 +926,7 @@ ob_fdctx_dump (xlator_t *this, fd_t *fd) + + gf_proc_dump_build_key (key_prefix, "xlator.performance.open-behind", + "file"); +- gf_proc_dump_add_section (key_prefix); ++ gf_proc_dump_add_section ("%s", key_prefix); + + gf_proc_dump_write ("fd", "%p", fd); + +diff --git a/xlators/performance/quick-read/src/quick-read.c b/xlators/performance/quick-read/src/quick-read.c +index 61232c1..ca228b8 100644 +--- a/xlators/performance/quick-read/src/quick-read.c ++++ b/xlators/performance/quick-read/src/quick-read.c +@@ -748,7 +748,7 @@ qr_inodectx_dump (xlator_t *this, inode_t *inode) + + gf_proc_dump_build_key (key_prefix, "xlator.performance.quick-read", + "inodectx"); +- gf_proc_dump_add_section (key_prefix); ++ gf_proc_dump_add_section ("%s", key_prefix); + + gf_proc_dump_write ("entire-file-cached", "%s", qr_inode->data ? "yes" : "no"); + +@@ -794,7 +794,7 @@ qr_priv_dump (xlator_t *this) + gf_proc_dump_build_key (key_prefix, "xlator.performance.quick-read", + "priv"); + +- gf_proc_dump_add_section (key_prefix); ++ gf_proc_dump_add_section ("%s", key_prefix); + + gf_proc_dump_write ("max_file_size", "%d", conf->max_file_size); + gf_proc_dump_write ("cache_timeout", "%d", conf->cache_timeout); +diff --git a/xlators/performance/read-ahead/src/read-ahead.c b/xlators/performance/read-ahead/src/read-ahead.c +index 242b579..74ddf49 100644 +--- a/xlators/performance/read-ahead/src/read-ahead.c ++++ b/xlators/performance/read-ahead/src/read-ahead.c +@@ -823,7 +823,7 @@ ra_fdctx_dump (xlator_t *this, fd_t *fd) + "xlator.performance.read-ahead", + "file"); + +- gf_proc_dump_add_section (key_prefix); ++ gf_proc_dump_add_section ("%s", key_prefix); + + ret = __inode_path (fd->inode, NULL, &path); + if (path != NULL) { +@@ -1068,7 +1068,7 @@ ra_priv_dump (xlator_t *this) + gf_proc_dump_build_key (key_prefix, "xlator.performance.read-ahead", + "priv"); + +- gf_proc_dump_add_section (key_prefix); ++ gf_proc_dump_add_section ("%s", key_prefix); + add_section = _gf_true; + + ret = pthread_mutex_trylock (&conf->conf_lock); +@@ -1086,7 +1086,7 @@ ra_priv_dump (xlator_t *this) + out: + if (ret && conf) { + if (add_section == _gf_false) +- gf_proc_dump_add_section (key_prefix); ++ gf_proc_dump_add_section ("%s", key_prefix); + + gf_proc_dump_write ("Unable to dump priv", + "(Lock acquisition failed) %s", this->name); +diff --git a/xlators/performance/write-behind/src/write-behind.c b/xlators/performance/write-behind/src/write-behind.c +index 478985a..ef02e18 100644 +--- a/xlators/performance/write-behind/src/write-behind.c ++++ b/xlators/performance/write-behind/src/write-behind.c +@@ -2763,7 +2763,7 @@ wb_priv_dump (xlator_t *this) + gf_proc_dump_build_key (key_prefix, "xlator.performance.write-behind", + "priv"); + +- gf_proc_dump_add_section (key_prefix); ++ gf_proc_dump_add_section ("%s", key_prefix); + + gf_proc_dump_write ("aggregate_size", "%d", conf->aggregate_size); + gf_proc_dump_write ("window_size", "%d", conf->window_size); +@@ -2787,7 +2787,7 @@ __wb_dump_requests (struct list_head *head, char *prefix) + gf_proc_dump_build_key (key_prefix, key, "%s", + (char *)gf_fop_list[req->fop]); + +- gf_proc_dump_add_section(key_prefix); ++ gf_proc_dump_add_section("%s", key_prefix); + + gf_proc_dump_write ("unique", "%"PRIu64, req->unique); + +@@ -2859,7 +2859,7 @@ wb_inode_dump (xlator_t *this, inode_t *inode) + gf_proc_dump_build_key (key_prefix, "xlator.performance.write-behind", + "wb_inode"); + +- gf_proc_dump_add_section (key_prefix); ++ gf_proc_dump_add_section ("%s", key_prefix); + + __inode_path (inode, NULL, &path); + if (path != NULL) { +diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c +index 26b0907..674f1aa 100644 +--- a/xlators/protocol/client/src/client.c ++++ b/xlators/protocol/client/src/client.c +@@ -2855,7 +2855,7 @@ client_priv_dump (xlator_t *this) + gf_proc_dump_build_key(key_prefix, "xlator.protocol.client", + "%s.priv", this->name); + +- gf_proc_dump_add_section(key_prefix); ++ gf_proc_dump_add_section("%s", key_prefix); + + list_for_each_entry(tmp, &conf->saved_fds, sfd_pos) { + sprintf (key, "fd.%d.remote_fd", i); +diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c +index d0e815e..3a429bc 100644 +--- a/xlators/protocol/server/src/server.c ++++ b/xlators/protocol/server/src/server.c +@@ -320,7 +320,7 @@ server_priv (xlator_t *this) + return 0; + + gf_proc_dump_build_key (key, "xlator.protocol.server", "priv"); +- gf_proc_dump_add_section (key); ++ gf_proc_dump_add_section ("%s", key); + + ret = pthread_mutex_trylock (&conf->mutex); + if (ret != 0) +diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c +index f79dbda..7bfe780 100644 +--- a/xlators/storage/posix/src/posix.c ++++ b/xlators/storage/posix/src/posix.c +@@ -6960,7 +6960,7 @@ posix_priv (xlator_t *this) + + (void) snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", + this->type, this->name); +- gf_proc_dump_add_section(key_prefix); ++ gf_proc_dump_add_section("%s", key_prefix); + + if (!this) + return 0; +-- +1.8.3.1 + diff --git a/SOURCES/0400-all-fix-the-format-warnings-due-to-strict-check.patch b/SOURCES/0400-all-fix-the-format-warnings-due-to-strict-check.patch new file mode 100644 index 0000000..6e60f33 --- /dev/null +++ b/SOURCES/0400-all-fix-the-format-warnings-due-to-strict-check.patch @@ -0,0 +1,427 @@ +From c86df7778fd1d93a74892f36fa6fec14d3d20707 Mon Sep 17 00:00:00 2001 +From: Amar Tumballi +Date: Wed, 10 Oct 2018 10:42:53 +0530 +Subject: [PATCH 400/404] all: fix the format warnings due to strict check + +In the fix for CVE listed below, we added a strict format check, +and that has revealed multiple issues with the formats. Fixed all +warnings introduced because of the fix. + +Updates: CVE-2018-14661 + +BUG: 1637084 +Change-Id: Ic1702b264fa4c8ad23d3836fcd1d6dc2ca8bc4b1 +Signed-off-by: Amar Tumballi +Reviewed-on: https://code.engineering.redhat.com/gerrit/152352 +Reviewed-by: Xavi Hernandez +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/fd.c | 2 +- + libglusterfs/src/inode.c | 2 +- + libglusterfs/src/iobuf.c | 8 ++++---- + libglusterfs/src/stack.c | 6 +++--- + libglusterfs/src/statedump.c | 11 ++++++----- + rpc/rpc-lib/src/rpc-drc.c | 4 ++-- + xlators/cluster/dht/src/dht-shared.c | 2 +- + xlators/cluster/stripe/src/stripe.c | 4 ++-- + xlators/debug/trace/src/trace.c | 2 +- + xlators/features/quota/src/quota.c | 2 +- + xlators/mount/fuse/src/fuse-bridge.c | 6 +++--- + xlators/performance/io-cache/src/io-cache.c | 4 ++-- + xlators/performance/nl-cache/src/nl-cache-helper.c | 6 +++--- + xlators/performance/open-behind/src/open-behind.c | 2 +- + xlators/performance/quick-read/src/quick-read.c | 4 ++-- + xlators/performance/read-ahead/src/read-ahead.c | 6 ++---- + xlators/performance/write-behind/src/write-behind.c | 6 +++--- + xlators/protocol/client/src/client.c | 2 +- + xlators/storage/posix/src/posix.c | 10 +++++----- + 19 files changed, 44 insertions(+), 45 deletions(-) + +diff --git a/libglusterfs/src/fd.c b/libglusterfs/src/fd.c +index 27c8e13..ed80bd3 100644 +--- a/libglusterfs/src/fd.c ++++ b/libglusterfs/src/fd.c +@@ -1050,7 +1050,7 @@ fd_dump (fd_t *fd, char *prefix) + return; + + memset(key, 0, sizeof(key)); +- gf_proc_dump_write("pid", "%llu", fd->pid); ++ gf_proc_dump_write("pid", "%" PRIu64, fd->pid); + gf_proc_dump_write("refcount", "%d", fd->refcount); + gf_proc_dump_write("flags", "%d", fd->flags); + +diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c +index 1bc05a4..29d3c8f 100644 +--- a/libglusterfs/src/inode.c ++++ b/libglusterfs/src/inode.c +@@ -2415,7 +2415,7 @@ inode_table_dump (inode_table_t *itable, char *prefix) + } + + gf_proc_dump_build_key(key, prefix, "hashsize"); +- gf_proc_dump_write(key, "%d", itable->hashsize); ++ gf_proc_dump_write(key, "%"GF_PRI_SIZET, itable->hashsize); + gf_proc_dump_build_key(key, prefix, "name"); + gf_proc_dump_write(key, "%s", itable->name); + +diff --git a/libglusterfs/src/iobuf.c b/libglusterfs/src/iobuf.c +index f6b8558..a22dbd3 100644 +--- a/libglusterfs/src/iobuf.c ++++ b/libglusterfs/src/iobuf.c +@@ -1169,7 +1169,7 @@ iobuf_arena_info_dump (struct iobuf_arena *iobuf_arena, const char *key_prefix) + gf_proc_dump_build_key(key, key_prefix, "alloc_cnt"); + gf_proc_dump_write(key, "%"PRIu64, iobuf_arena->alloc_cnt); + gf_proc_dump_build_key(key, key_prefix, "max_active"); +- gf_proc_dump_write(key, "%"PRIu64, iobuf_arena->max_active); ++ gf_proc_dump_write(key, "%d", iobuf_arena->max_active); + gf_proc_dump_build_key(key, key_prefix, "page_size"); + gf_proc_dump_write(key, "%"PRIu64, iobuf_arena->page_size); + list_for_each_entry (trav, &iobuf_arena->active.list, list) { +@@ -1202,9 +1202,9 @@ iobuf_stats_dump (struct iobuf_pool *iobuf_pool) + } + gf_proc_dump_add_section("iobuf.global"); + gf_proc_dump_write("iobuf_pool","%p", iobuf_pool); +- gf_proc_dump_write("iobuf_pool.default_page_size", "%d", +- iobuf_pool->default_page_size); +- gf_proc_dump_write("iobuf_pool.arena_size", "%d", ++ gf_proc_dump_write("iobuf_pool.default_page_size", "%" GF_PRI_SIZET, ++ iobuf_pool->default_page_size); ++ gf_proc_dump_write("iobuf_pool.arena_size", "%" GF_PRI_SIZET, + iobuf_pool->arena_size); + gf_proc_dump_write("iobuf_pool.arena_cnt", "%d", + iobuf_pool->arena_cnt); +diff --git a/libglusterfs/src/stack.c b/libglusterfs/src/stack.c +index d64ac8a..6fbd2bb 100644 +--- a/libglusterfs/src/stack.c ++++ b/libglusterfs/src/stack.c +@@ -144,7 +144,7 @@ gf_proc_dump_call_frame (call_frame_t *call_frame, const char *key_buf,...) + out: + if (ret) { + gf_proc_dump_write("Unable to dump the frame information", +- "(Lock acquisition failed) %p", my_frame); ++ "(Lock acquisition failed)"); + return; + } + } +@@ -183,7 +183,7 @@ gf_proc_dump_call_stack (call_stack_t *call_stack, const char *key_buf,...) + gf_proc_dump_write("uid", "%d", call_stack->uid); + gf_proc_dump_write("gid", "%d", call_stack->gid); + gf_proc_dump_write("pid", "%d", call_stack->pid); +- gf_proc_dump_write("unique", "%Ld", call_stack->unique); ++ gf_proc_dump_write("unique", "%" PRIu64, call_stack->unique); + gf_proc_dump_write("lk-owner", "%s", lkowner_utoa (&call_stack->lk_owner)); + + if (call_stack->type == GF_OP_TYPE_FOP) +@@ -222,7 +222,7 @@ gf_proc_dump_pending_frames (call_pool_t *call_pool) + gf_proc_dump_add_section("global.callpool"); + section_added = _gf_true; + gf_proc_dump_write("callpool_address","%p", call_pool); +- gf_proc_dump_write("callpool.cnt","%d", call_pool->cnt); ++ gf_proc_dump_write("callpool.cnt", "%" PRId64, call_pool->cnt); + + + list_for_each_entry (trav, &call_pool->all_frames, all_frames) { +diff --git a/libglusterfs/src/statedump.c b/libglusterfs/src/statedump.c +index e9ecef5..a123adb 100644 +--- a/libglusterfs/src/statedump.c ++++ b/libglusterfs/src/statedump.c +@@ -240,10 +240,11 @@ gf_proc_dump_xlator_mem_info (xlator_t *xl) + gf_proc_dump_add_section ("%s.%s - usage-type %s memusage", + xl->type, xl->name, + xl->mem_acct->rec[i].typestr); +- gf_proc_dump_write ("size", "%u", xl->mem_acct->rec[i].size); ++ gf_proc_dump_write ("size", "%" GF_PRI_SIZET, ++ xl->mem_acct->rec[i].size); + gf_proc_dump_write ("num_allocs", "%u", + xl->mem_acct->rec[i].num_allocs); +- gf_proc_dump_write ("max_size", "%u", ++ gf_proc_dump_write ("max_size", "%" GF_PRI_SIZET, + xl->mem_acct->rec[i].max_size); + gf_proc_dump_write ("max_num_allocs", "%u", + xl->mem_acct->rec[i].max_num_allocs); +@@ -275,9 +276,9 @@ gf_proc_dump_xlator_mem_info_only_in_use (xlator_t *xl) + gf_proc_dump_add_section ("%s.%s - usage-type %d", xl->type, + xl->name,i); + +- gf_proc_dump_write ("size", "%u", ++ gf_proc_dump_write ("size", "%" GF_PRI_SIZET, + xl->mem_acct->rec[i].size); +- gf_proc_dump_write ("max_size", "%u", ++ gf_proc_dump_write ("max_size", "%" GF_PRI_SIZET, + xl->mem_acct->rec[i].max_size); + gf_proc_dump_write ("num_allocs", "%u", + xl->mem_acct->rec[i].num_allocs); +@@ -475,7 +476,7 @@ gf_proc_dump_dict_info (glusterfs_ctx_t *ctx) + total_dicts = GF_ATOMIC_GET (ctx->stats.total_dicts_used); + total_pairs = GF_ATOMIC_GET (ctx->stats.total_pairs_used); + +- gf_proc_dump_write ("max-pairs-per-dict", "%u", ++ gf_proc_dump_write ("max-pairs-per-dict", "%" PRIu64, + GF_ATOMIC_GET (ctx->stats.max_dict_pairs)); + gf_proc_dump_write ("total-pairs-used", "%lu", total_pairs); + gf_proc_dump_write ("total-dicts-used", "%lu", total_dicts); +diff --git a/rpc/rpc-lib/src/rpc-drc.c b/rpc/rpc-lib/src/rpc-drc.c +index fb7d2f1..f597432 100644 +--- a/rpc/rpc-lib/src/rpc-drc.c ++++ b/rpc/rpc-lib/src/rpc-drc.c +@@ -565,10 +565,10 @@ rpcsvc_drc_priv (rpcsvc_drc_globals_t *drc) + gf_proc_dump_write (key, "%d", drc->lru_factor); + + gf_proc_dump_build_key (key, "drc", "duplicate_request_count"); +- gf_proc_dump_write (key, "%d", drc->cache_hits); ++ gf_proc_dump_write (key, "%" PRIu64, drc->cache_hits); + + gf_proc_dump_build_key (key, "drc", "in_transit_duplicate_requests"); +- gf_proc_dump_write (key, "%d", drc->intransit_hits); ++ gf_proc_dump_write (key, "%" PRIu64, drc->intransit_hits); + + list_for_each_entry (client, &drc->clients_head, client_list) { + gf_proc_dump_build_key (key, "client", "%d.ip-address", i); +diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c +index 2f0d8ce..4aef8ff 100644 +--- a/xlators/cluster/dht/src/dht-shared.c ++++ b/xlators/cluster/dht/src/dht-shared.c +@@ -179,7 +179,7 @@ dht_priv_dump (xlator_t *this) + conf->du_stats[i].avail_inodes); + + snprintf (key, sizeof (key), "du_stats[%d].log", i); +- gf_proc_dump_write (key, "%lu", ++ gf_proc_dump_write (key, "%" PRIu32, + conf->du_stats[i].log); + } + } +diff --git a/xlators/cluster/stripe/src/stripe.c b/xlators/cluster/stripe/src/stripe.c +index 67006ab..6b32f7f 100644 +--- a/xlators/cluster/stripe/src/stripe.c ++++ b/xlators/cluster/stripe/src/stripe.c +@@ -5688,12 +5688,12 @@ stripe_priv_dump (xlator_t *this) + options = priv->pattern; + while (options != NULL) { + gf_proc_dump_write ("path_pattern", "%s", priv->pattern->path_pattern); +- gf_proc_dump_write ("options_block_size", "%ul", options->block_size); ++ gf_proc_dump_write ("options_block_size", "%" PRIu64, options->block_size); + + options = options->next; + } + +- gf_proc_dump_write ("block_size", "%ul", priv->block_size); ++ gf_proc_dump_write ("block_size", "%" PRIu64, priv->block_size); + gf_proc_dump_write ("nodes-down", "%d", priv->nodes_down); + gf_proc_dump_write ("first-child_down", "%d", priv->first_child_down); + gf_proc_dump_write ("xattr_supported", "%d", priv->xattr_supported); +diff --git a/xlators/debug/trace/src/trace.c b/xlators/debug/trace/src/trace.c +index 34ac4ca..602e130 100644 +--- a/xlators/debug/trace/src/trace.c ++++ b/xlators/debug/trace/src/trace.c +@@ -68,7 +68,7 @@ dump_history_trace (circular_buffer_t *cb, void *data) + ".%"GF_PRI_SUSECONDS, cb->tv.tv_usec); + gf_proc_dump_write ("TIME", "%s", timestr); + +- gf_proc_dump_write ("FOP", "%s\n", cb->data); ++ gf_proc_dump_write ("FOP", "%s\n", (char *)cb->data); + + return 0; + } +diff --git a/xlators/features/quota/src/quota.c b/xlators/features/quota/src/quota.c +index 3d68ffa..71068d3 100644 +--- a/xlators/features/quota/src/quota.c ++++ b/xlators/features/quota/src/quota.c +@@ -5224,7 +5224,7 @@ quota_priv_dump (xlator_t *this) + if (!priv) + goto out; + +- gf_proc_dump_add_section ("xlators.features.quota.priv", this->name); ++ gf_proc_dump_add_section ("xlators.features.quota.priv"); + + ret = TRY_LOCK (&priv->lock); + if (ret) +diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c +index 1c4f4e4..fbb4c53 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.c ++++ b/xlators/mount/fuse/src/fuse-bridge.c +@@ -5148,11 +5148,11 @@ fuse_priv_dump (xlator_t *this) + private->proto_minor); + gf_proc_dump_write("volfile", "%s", + private->volfile?private->volfile:"None"); +- gf_proc_dump_write("volfile_size", "%d", ++ gf_proc_dump_write("volfile_size", "%" GF_PRI_SIZET, + private->volfile_size); + gf_proc_dump_write("mount_point", "%s", + private->mount_point); +- gf_proc_dump_write("iobuf", "%u", ++ gf_proc_dump_write("iobuf", "%p", + private->iobuf); + gf_proc_dump_write("fuse_thread_started", "%d", + (int)private->fuse_thread_started); +@@ -5208,7 +5208,7 @@ dump_history_fuse (circular_buffer_t *cb, void *data) + ".%"GF_PRI_SUSECONDS, cb->tv.tv_usec); + gf_proc_dump_write ("TIME", "%s", timestr); + +- gf_proc_dump_write ("message", "%s\n", cb->data); ++ gf_proc_dump_write ("message", "%s\n", (char *)cb->data); + + return 0; + } +diff --git a/xlators/performance/io-cache/src/io-cache.c b/xlators/performance/io-cache/src/io-cache.c +index 700d8c2..de44ad2 100644 +--- a/xlators/performance/io-cache/src/io-cache.c ++++ b/xlators/performance/io-cache/src/io-cache.c +@@ -2065,8 +2065,8 @@ ioc_priv_dump (xlator_t *this) + gf_proc_dump_write ("cache_used", "%ld", priv->cache_used); + gf_proc_dump_write ("inode_count", "%u", priv->inode_count); + gf_proc_dump_write ("cache_timeout", "%u", priv->cache_timeout); +- gf_proc_dump_write ("min-file-size", "%u", priv->min_file_size); +- gf_proc_dump_write ("max-file-size", "%u", priv->max_file_size); ++ gf_proc_dump_write ("min-file-size", "%" PRIu64, priv->min_file_size); ++ gf_proc_dump_write ("max-file-size", "%" PRIu64, priv->max_file_size); + } + pthread_mutex_unlock (&priv->table_lock); + out: +diff --git a/xlators/performance/nl-cache/src/nl-cache-helper.c b/xlators/performance/nl-cache/src/nl-cache-helper.c +index 583d67b..b6f1a17 100644 +--- a/xlators/performance/nl-cache/src/nl-cache-helper.c ++++ b/xlators/performance/nl-cache/src/nl-cache-helper.c +@@ -1192,14 +1192,14 @@ nlc_dump_inodectx (xlator_t *this, inode_t *inode) + + gf_proc_dump_write ("state", "%"PRIu64, nlc_ctx->state); + gf_proc_dump_write ("timer", "%p", nlc_ctx->timer); +- gf_proc_dump_write ("cache-time", "%lld", nlc_ctx->cache_time); ++ gf_proc_dump_write ("cache-time", "%" GF_PRI_TIME, nlc_ctx->cache_time); + gf_proc_dump_write ("cache-size", "%zu", nlc_ctx->cache_size); + gf_proc_dump_write ("refd-inodes", "%"PRIu64, nlc_ctx->refd_inodes); + + if (IS_PE_VALID (nlc_ctx->state)) + list_for_each_entry_safe (pe, tmp, &nlc_ctx->pe, list) { +- gf_proc_dump_write ("pe", "%p, %s", pe, +- pe->inode, pe->name); ++ gf_proc_dump_write ("pe", "%p, %p, %s", (void *)pe, ++ (void *)pe->inode, pe->name); + } + + if (IS_NE_VALID (nlc_ctx->state)) +diff --git a/xlators/performance/open-behind/src/open-behind.c b/xlators/performance/open-behind/src/open-behind.c +index 2d77f2d..3245c8f 100644 +--- a/xlators/performance/open-behind/src/open-behind.c ++++ b/xlators/performance/open-behind/src/open-behind.c +@@ -933,7 +933,7 @@ ob_fdctx_dump (xlator_t *this, fd_t *fd) + gf_proc_dump_write ("open_frame", "%p", ob_fd->open_frame); + + if (ob_fd->open_frame) +- gf_proc_dump_write ("open_frame.root.unique", "%p", ++ gf_proc_dump_write ("open_frame.root.unique", "%" PRIu64, + ob_fd->open_frame->root->unique); + + gf_proc_dump_write ("loc.path", "%s", ob_fd->loc.path); +diff --git a/xlators/performance/quick-read/src/quick-read.c b/xlators/performance/quick-read/src/quick-read.c +index ca228b8..e377ac3 100644 +--- a/xlators/performance/quick-read/src/quick-read.c ++++ b/xlators/performance/quick-read/src/quick-read.c +@@ -796,7 +796,7 @@ qr_priv_dump (xlator_t *this) + + gf_proc_dump_add_section ("%s", key_prefix); + +- gf_proc_dump_write ("max_file_size", "%d", conf->max_file_size); ++ gf_proc_dump_write ("max_file_size", "%" PRIu64, conf->max_file_size); + gf_proc_dump_write ("cache_timeout", "%d", conf->cache_timeout); + + if (!table) { +@@ -811,7 +811,7 @@ qr_priv_dump (xlator_t *this) + } + + gf_proc_dump_write ("total_files_cached", "%d", file_count); +- gf_proc_dump_write ("total_cache_used", "%d", total_size); ++ gf_proc_dump_write ("total_cache_used", "%" PRIu64, total_size); + + out: + return 0; +diff --git a/xlators/performance/read-ahead/src/read-ahead.c b/xlators/performance/read-ahead/src/read-ahead.c +index 74ddf49..e02ca9f 100644 +--- a/xlators/performance/read-ahead/src/read-ahead.c ++++ b/xlators/performance/read-ahead/src/read-ahead.c +@@ -808,7 +808,6 @@ ra_fdctx_dump (xlator_t *this, fd_t *fd) + int32_t ret = 0, i = 0; + uint64_t tmp_file = 0; + char *path = NULL; +- char key[GF_DUMP_MAX_BUF_LEN] = {0, }; + char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, }; + + fd_ctx_get (fd, this, &tmp_file); +@@ -849,8 +848,7 @@ ra_fdctx_dump (xlator_t *this, fd_t *fd) + + for (page = file->pages.next; page != &file->pages; + page = page->next) { +- sprintf (key, "page[%d]", i); +- gf_proc_dump_write (key, "%p", page[i++]); ++ gf_proc_dump_write ("page", "%d: %p", i++, (void *)page); + ra_page_dump (page); + } + +@@ -1075,7 +1073,7 @@ ra_priv_dump (xlator_t *this) + if (ret) + goto out; + { +- gf_proc_dump_write ("page_size", "%d", conf->page_size); ++ gf_proc_dump_write ("page_size", "%" PRIu64, conf->page_size); + gf_proc_dump_write ("page_count", "%d", conf->page_count); + gf_proc_dump_write ("force_atime_update", "%d", + conf->force_atime_update); +diff --git a/xlators/performance/write-behind/src/write-behind.c b/xlators/performance/write-behind/src/write-behind.c +index ef02e18..d655843 100644 +--- a/xlators/performance/write-behind/src/write-behind.c ++++ b/xlators/performance/write-behind/src/write-behind.c +@@ -2765,8 +2765,8 @@ wb_priv_dump (xlator_t *this) + + gf_proc_dump_add_section ("%s", key_prefix); + +- gf_proc_dump_write ("aggregate_size", "%d", conf->aggregate_size); +- gf_proc_dump_write ("window_size", "%d", conf->window_size); ++ gf_proc_dump_write ("aggregate_size", "%" PRIu64, conf->aggregate_size); ++ gf_proc_dump_write ("window_size", "%" PRIu64, conf->window_size); + gf_proc_dump_write ("flush_behind", "%d", conf->flush_behind); + gf_proc_dump_write ("trickling_writes", "%d", conf->trickling_writes); + +@@ -2798,7 +2798,7 @@ __wb_dump_requests (struct list_head *head, char *prefix) + else + gf_proc_dump_write ("wound", "no"); + +- gf_proc_dump_write ("generation-number", "%d", req->gen); ++ gf_proc_dump_write ("generation-number", "%" PRIu64, req->gen); + + gf_proc_dump_write ("req->op_ret", "%d", req->op_ret); + gf_proc_dump_write ("req->op_errno", "%d", req->op_errno); +diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c +index 674f1aa..1e69977 100644 +--- a/xlators/protocol/client/src/client.c ++++ b/xlators/protocol/client/src/client.c +@@ -2859,7 +2859,7 @@ client_priv_dump (xlator_t *this) + + list_for_each_entry(tmp, &conf->saved_fds, sfd_pos) { + sprintf (key, "fd.%d.remote_fd", i); +- gf_proc_dump_write(key, "%d", tmp->remote_fd); ++ gf_proc_dump_write(key, "%" PRId64, tmp->remote_fd); + client_fd_lk_ctx_dump (this, tmp->lk_ctx, i); + i++; + } +diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c +index 7bfe780..e46fe99 100644 +--- a/xlators/storage/posix/src/posix.c ++++ b/xlators/storage/posix/src/posix.c +@@ -6970,11 +6970,11 @@ posix_priv (xlator_t *this) + if (!priv) + return 0; + +- gf_proc_dump_write("base_path","%s", priv->base_path); +- gf_proc_dump_write("base_path_length","%d", priv->base_path_length); +- gf_proc_dump_write("max_read","%d", priv->read_value); +- gf_proc_dump_write("max_write","%d", priv->write_value); +- gf_proc_dump_write("nr_files","%ld", priv->nr_files); ++ gf_proc_dump_write("base_path", "%s", priv->base_path); ++ gf_proc_dump_write("base_path_length", "%d", priv->base_path_length); ++ gf_proc_dump_write("max_read", "%" PRId64, priv->read_value); ++ gf_proc_dump_write("max_write", "%" PRId64, priv->write_value); ++ gf_proc_dump_write("nr_files", "%ld", priv->nr_files); + + return 0; + } +-- +1.8.3.1 + diff --git a/SOURCES/0401-client_t.c-fix-the-format-error.patch b/SOURCES/0401-client_t.c-fix-the-format-error.patch new file mode 100644 index 0000000..e7d407e --- /dev/null +++ b/SOURCES/0401-client_t.c-fix-the-format-error.patch @@ -0,0 +1,41 @@ +From 2b764dcf58da83cdc9138bbe2f9a503400ce66c6 Mon Sep 17 00:00:00 2001 +From: Amar Tumballi +Date: Fri, 12 Oct 2018 10:42:35 +0530 +Subject: [PATCH 401/404] client_t.c: fix the format error + +Updates: CVE-2018-14661 + +BUG: 1637084 +Change-Id: Ieee5b41d24993a00fbe237a613d5db9dd20eee95 +Signed-off-by: Amar Tumballi +Reviewed-on: https://code.engineering.redhat.com/gerrit/152630 +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/client_t.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/libglusterfs/src/client_t.c b/libglusterfs/src/client_t.c +index dce27c1..a9ae62c 100644 +--- a/libglusterfs/src/client_t.c ++++ b/libglusterfs/src/client_t.c +@@ -603,7 +603,7 @@ client_dump (client_t *client, char *prefix) + return; + + memset(key, 0, sizeof key); +- gf_proc_dump_write("refcount", GF_PRI_ATOMIC, ++ gf_proc_dump_write("refcount", "%"GF_PRI_ATOMIC, + GF_ATOMIC_GET (client->count)); + } + +@@ -807,7 +807,7 @@ gf_client_dump_fdtables (xlator_t *this) + } + gf_proc_dump_build_key (key, "conn", "%d.ref", + count); +- gf_proc_dump_write (key, GF_PRI_ATOMIC, ++ gf_proc_dump_write (key, "%"GF_PRI_ATOMIC, + GF_ATOMIC_GET (client->count)); + if (client->bound_xl) { + gf_proc_dump_build_key (key, "conn", +-- +1.8.3.1 + diff --git a/SOURCES/0402-core-glusterfsd-keeping-fd-open-in-index-xlator.patch b/SOURCES/0402-core-glusterfsd-keeping-fd-open-in-index-xlator.patch new file mode 100644 index 0000000..0ca31fd --- /dev/null +++ b/SOURCES/0402-core-glusterfsd-keeping-fd-open-in-index-xlator.patch @@ -0,0 +1,263 @@ +From fad234b5a62df48b7abc726549f2abb6b0af7c04 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Tue, 16 Oct 2018 07:50:47 +0530 +Subject: [PATCH 402/404] core: glusterfsd keeping fd open in index xlator + +Problem: At the time of processing GF_EVENT_PARENT_DOWN + at brick xlator, it forwards the event to next xlator + only while xlator ensures no stub is in progress. + At io-thread xlator it decreases stub_cnt before the process + a stub and notify EVENT to next xlator + +Solution: Introduce a new counter to save stub_cnt and decrease + the counter after process the stub completely at io-thread + xlator. + To avoid brick crash at the time of call xlator_mem_cleanup + move only brick xlator if detach brick name has found in + the graph + +Note: Thanks to pranith for sharing a simple reproducer to + reproduce the same + +> fixes bz#1637934 +> Change-Id: I1a694a001f7a5417e8771e3adf92c518969b6baa +> (Cherry pick from commit 7bf95631b52bd05b06122180f8bd4aa62c70b1a9) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/21379/) + +Change-Id: I54b8ebb19819f9bbcbdd1448474ab084c0fd2eb6 +BUG: 1631372 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/152908 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + glusterfsd/src/glusterfsd-mgmt.c | 15 +---- + tests/bugs/glusterd/brick-mux-fd-cleanup.t | 78 +++++++++++++++++++++++++ + xlators/performance/io-threads/src/io-threads.c | 23 ++++---- + xlators/performance/io-threads/src/io-threads.h | 3 +- + 4 files changed, 94 insertions(+), 25 deletions(-) + create mode 100644 tests/bugs/glusterd/brick-mux-fd-cleanup.t + +diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c +index cbd436a..e3fceeb 100644 +--- a/glusterfsd/src/glusterfsd-mgmt.c ++++ b/glusterfsd/src/glusterfsd-mgmt.c +@@ -270,6 +270,7 @@ xlator_mem_cleanup (xlator_t *this) { + top = glusterfsd_ctx->active->first; + LOCK (&ctx->volfile_lock); + /* TODO here we have leak for xlator node in a graph */ ++ /* Need to move only top xlator from a graph */ + for (trav_p = &top->children; *trav_p; trav_p = &(*trav_p)->next) { + victim = (*trav_p)->xlator; + if (victim->call_cleanup && !strcmp (victim->name, this->name)) { +@@ -277,20 +278,6 @@ xlator_mem_cleanup (xlator_t *this) { + break; + } + } +- /* TODO Sometime brick xlator is not moved from graph so followed below +- approach to move brick xlator from a graph, will move specific brick +- xlator from graph only while inode table and mem_acct are cleaned up +- */ +- trav_p = &top->children; +- while (*trav_p) { +- victim = (*trav_p)->xlator; +- if (victim->call_cleanup && !victim->itable && !victim->mem_acct) { +- (*trav_p) = (*trav_p)->next; +- } else { +- trav_p = &(*trav_p)->next; +- } +- } +- UNLOCK (&ctx->volfile_lock); + } + } + +diff --git a/tests/bugs/glusterd/brick-mux-fd-cleanup.t b/tests/bugs/glusterd/brick-mux-fd-cleanup.t +new file mode 100644 +index 0000000..de11c17 +--- /dev/null ++++ b/tests/bugs/glusterd/brick-mux-fd-cleanup.t +@@ -0,0 +1,78 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++ ++#This .t tests that the fds from client are closed on brick when gluster volume ++#stop is executed in brick-mux setup. ++ ++cleanup; ++TEST glusterd ++TEST pidof glusterd ++ ++function keep_fd_open { ++#This function has to be run as background job because opening the fd in ++#foreground and running commands is leading to flush calls on these fds ++#which is making it very difficult to create the race where fds will be left ++#open even after the brick dies. ++ exec 5>$M1/a ++ exec 6>$M1/b ++ while [ -f $M0/a ]; do sleep 1; done ++} ++ ++function count_open_files { ++ local brick_pid="$1" ++ local pattern="$2" ++ ls -l /proc/$brick_pid/fd | grep -i "$pattern" | wc -l ++} ++ ++TEST $CLI volume set all cluster.brick-multiplex on ++TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} ++TEST $CLI volume create $V1 replica 2 $H0:$B0/${V1}{2,3} ++#Have same configuration on both bricks so that they are multiplexed ++#Delay flush fop for a second ++TEST $CLI volume heal $V0 disable ++TEST $CLI volume heal $V1 disable ++TEST $CLI volume set $V0 delay-gen posix ++TEST $CLI volume set $V0 delay-gen.enable flush ++TEST $CLI volume set $V0 delay-gen.delay-percentage 100 ++TEST $CLI volume set $V0 delay-gen.delay-duration 1000000 ++TEST $CLI volume set $V1 delay-gen posix ++TEST $CLI volume set $V1 delay-gen.enable flush ++TEST $CLI volume set $V1 delay-gen.delay-percentage 100 ++TEST $CLI volume set $V1 delay-gen.delay-duration 1000000 ++ ++TEST $CLI volume start $V0 ++TEST $CLI volume start $V1 ++ ++TEST $GFS -s $H0 --volfile-id=$V0 --direct-io-mode=enable $M0 ++TEST $GFS -s $H0 --volfile-id=$V1 --direct-io-mode=enable $M1 ++ ++TEST touch $M0/a ++keep_fd_open & ++TEST $CLI volume profile $V1 start ++brick_pid=$(get_brick_pid $V1 $H0 $B0/${V1}2) ++TEST count_open_files $brick_pid "$B0/${V1}2/a" ++TEST count_open_files $brick_pid "$B0/${V1}2/b" ++TEST count_open_files $brick_pid "$B0/${V1}3/a" ++TEST count_open_files $brick_pid "$B0/${V1}3/b" ++ ++#If any other flush fops are introduced into the system other than the one at ++#cleanup it interferes with the race, so test for it ++EXPECT "^0$" echo "$($CLI volume profile $V1 info incremental | grep -i flush | wc -l)" ++#Stop the volume ++TEST $CLI volume stop $V1 ++ ++#Wait for cleanup resources or volume V1 ++EXPECT_WITHIN $GRAPH_SWITCH_TIMEOUT "^0$" count_open_files $brick_pid "$B0/${V1}2/a" ++EXPECT_WITHIN $GRAPH_SWITCH_TIMEOUT "^0$" count_open_files $brick_pid "$B0/${V1}2/b" ++EXPECT_WITHIN $GRAPH_SWITCH_TIMEOUT "^0$" count_open_files $brick_pid "$B0/${V1}3/a" ++EXPECT_WITHIN $GRAPH_SWITCH_TIMEOUT "^0$" count_open_files $brick_pid "$B0/${V1}3/b" ++ ++TEST rm -f $M0/a #Exit keep_fd_open() ++wait ++ ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M1 ++ ++cleanup +diff --git a/xlators/performance/io-threads/src/io-threads.c b/xlators/performance/io-threads/src/io-threads.c +index 41d48ab..2944c7d 100644 +--- a/xlators/performance/io-threads/src/io-threads.c ++++ b/xlators/performance/io-threads/src/io-threads.c +@@ -120,7 +120,7 @@ __iot_dequeue (iot_conf_t *conf, int *pri) + if (!stub) + return NULL; + +- GF_ATOMIC_DEC(conf->queue_size); ++ conf->queue_size--; + conf->queue_sizes[*pri]--; + + return stub; +@@ -153,7 +153,8 @@ __iot_enqueue (iot_conf_t *conf, call_stub_t *stub, int pri) + } + list_add_tail (&stub->list, &ctx->reqs); + +- GF_ATOMIC_INC(conf->queue_size); ++ conf->queue_size++; ++ GF_ATOMIC_INC(conf->stub_cnt); + conf->queue_sizes[pri]++; + } + +@@ -182,7 +183,7 @@ iot_worker (void *data) + conf->ac_iot_count[pri]--; + pri = -1; + } +- while (GF_ATOMIC_GET(conf->queue_size) == 0) { ++ while (conf->queue_size == 0) { + if (conf->down) { + bye = _gf_true;/*Avoid sleep*/ + break; +@@ -220,8 +221,10 @@ iot_worker (void *data) + } + pthread_mutex_unlock (&conf->mutex); + +- if (stub) /* guard against spurious wakeups */ ++ if (stub) { /* guard against spurious wakeups */ + call_resume (stub); ++ GF_ATOMIC_DEC(conf->stub_cnt); ++ } + stub = NULL; + + if (bye) +@@ -816,7 +819,7 @@ __iot_workers_scale (iot_conf_t *conf) + gf_msg_debug (conf->this->name, 0, + "scaled threads to %d (queue_size=%d/%d)", + conf->curr_count, +- GF_ATOMIC_GET(conf->queue_size), scale); ++ conf->queue_size, scale); + } else { + break; + } +@@ -1030,7 +1033,7 @@ init (xlator_t *this) + bool, out); + + conf->this = this; +- GF_ATOMIC_INIT(conf->queue_size, 0); ++ GF_ATOMIC_INIT(conf->stub_cnt, 0); + + for (i = 0; i < IOT_PRI_MAX; i++) { + INIT_LIST_HEAD (&conf->clients[i]); +@@ -1075,7 +1078,7 @@ notify (xlator_t *this, int32_t event, void *data, ...) + { + iot_conf_t *conf = this->private; + xlator_t *victim = data; +- uint64_t queue_size = 0; ++ uint64_t stub_cnt = 0; + struct timespec sleep_till = {0, }; + + if (GF_EVENT_PARENT_DOWN == event) { +@@ -1083,14 +1086,14 @@ notify (xlator_t *this, int32_t event, void *data, ...) + clock_gettime(CLOCK_REALTIME, &sleep_till); + sleep_till.tv_sec += 1; + /* Wait for draining stub from queue before notify PARENT_DOWN */ +- queue_size = GF_ATOMIC_GET(conf->queue_size); ++ stub_cnt = GF_ATOMIC_GET(conf->stub_cnt); + + pthread_mutex_lock(&conf->mutex); + { +- while (queue_size) { ++ while (stub_cnt) { + (void)pthread_cond_timedwait(&conf->cond, &conf->mutex, + &sleep_till); +- queue_size = GF_ATOMIC_GET(conf->queue_size); ++ stub_cnt = GF_ATOMIC_GET(conf->stub_cnt); + } + } + pthread_mutex_unlock(&conf->mutex); +diff --git a/xlators/performance/io-threads/src/io-threads.h b/xlators/performance/io-threads/src/io-threads.h +index 7a6973c..57a136e 100644 +--- a/xlators/performance/io-threads/src/io-threads.h ++++ b/xlators/performance/io-threads/src/io-threads.h +@@ -75,7 +75,8 @@ struct iot_conf { + int32_t ac_iot_limit[IOT_PRI_MAX]; + int32_t ac_iot_count[IOT_PRI_MAX]; + int queue_sizes[IOT_PRI_MAX]; +- gf_atomic_t queue_size; ++ int32_t queue_size; ++ gf_atomic_t stub_cnt; + pthread_attr_t w_attr; + gf_boolean_t least_priority; /*Enable/Disable least-priority */ + +-- +1.8.3.1 + diff --git a/SOURCES/0403-afr-prevent-winding-inodelks-twice-for-arbiter-volum.patch b/SOURCES/0403-afr-prevent-winding-inodelks-twice-for-arbiter-volum.patch new file mode 100644 index 0000000..b843a12 --- /dev/null +++ b/SOURCES/0403-afr-prevent-winding-inodelks-twice-for-arbiter-volum.patch @@ -0,0 +1,98 @@ +From 03e4bab925b20832492c9954d3ecb6c10fe56548 Mon Sep 17 00:00:00 2001 +From: Ravishankar N +Date: Wed, 10 Oct 2018 17:57:33 +0530 +Subject: [PATCH 403/404] afr: prevent winding inodelks twice for arbiter + volumes + +Backport of https://review.gluster.org/#/c/glusterfs/+/21380/ + +Problem: +In an arbiter volume, if there is a pending data heal of a file only on +arbiter brick, self-heal takes inodelks twice due to a code-bug but unlocks +it only once, leaving behind a stale lock on the brick. This causes +the next write to the file to hang. + +Fix: +Fix the code-bug to take lock only once. This bug was introduced master +with commit eb472d82a083883335bc494b87ea175ac43471ff + +Thanks to Pranith Kumar K for finding the RCA. + +Change-Id: I15ad969e10a6a3c4bd255e2948b6be6dcddc61e1 +BUG: 1636902 +Signed-off-by: Ravishankar N +Reviewed-on: https://code.engineering.redhat.com/gerrit/152552 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + .../bug-1637802-arbiter-stale-data-heal-lock.t | 44 ++++++++++++++++++++++ + xlators/cluster/afr/src/afr-self-heal-data.c | 2 +- + 2 files changed, 45 insertions(+), 1 deletion(-) + create mode 100644 tests/bugs/replicate/bug-1637802-arbiter-stale-data-heal-lock.t + +diff --git a/tests/bugs/replicate/bug-1637802-arbiter-stale-data-heal-lock.t b/tests/bugs/replicate/bug-1637802-arbiter-stale-data-heal-lock.t +new file mode 100644 +index 0000000..91ed39b +--- /dev/null ++++ b/tests/bugs/replicate/bug-1637802-arbiter-stale-data-heal-lock.t +@@ -0,0 +1,44 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../afr.rc ++ ++cleanup; ++ ++# Test to check that data self-heal does not leave any stale lock. ++ ++TEST glusterd; ++TEST pidof glusterd; ++TEST $CLI volume create $V0 replica 3 arbiter 1 $H0:$B0/${V0}{0,1,2}; ++TEST $CLI volume start $V0; ++EXPECT 'Started' volinfo_field $V0 'Status'; ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2 ++ ++# Create base entry in indices/xattrop ++echo "Data" > $M0/FILE ++ ++# Kill arbiter brick and write to FILE. ++TEST kill_brick $V0 $H0 $B0/${V0}2 ++echo "arbiter down" >> $M0/FILE ++EXPECT 2 get_pending_heal_count $V0 ++ ++# Bring it back up and let heal complete. ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 ++TEST $CLI volume heal $V0 ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 ++ ++# write to the FILE must succeed. ++echo "this must succeed" >> $M0/FILE ++TEST [ $? -eq 0 ] ++cleanup; +diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c +index d3deb8f..2ac6e47 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-data.c ++++ b/xlators/cluster/afr/src/afr-self-heal-data.c +@@ -765,7 +765,7 @@ restore_time: + afr_selfheal_restore_time (frame, this, fd->inode, source, + healed_sinks, locked_replies); + +- if (!is_arbiter_the_only_sink || !empty_file) { ++ if (!is_arbiter_the_only_sink && !empty_file) { + ret = afr_selfheal_inodelk (frame, this, fd->inode, this->name, + 0, 0, data_lock); + if (ret < priv->child_count) { +-- +1.8.3.1 + diff --git a/SOURCES/0404-core-Resolve-some-warnings-to-release-a-build.patch b/SOURCES/0404-core-Resolve-some-warnings-to-release-a-build.patch new file mode 100644 index 0000000..b2d77a3 --- /dev/null +++ b/SOURCES/0404-core-Resolve-some-warnings-to-release-a-build.patch @@ -0,0 +1,53 @@ +From 117b04bf6379a85d21f77a1d961241e95ad67a44 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Tue, 16 Oct 2018 19:55:10 +0530 +Subject: [PATCH 404/404] core: Resolve some warnings to release a build + +Change-Id: I365073fbda9f19ef919f8d869f84a7018eb66d72 +BUG: 1631372 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/152991 +Tested-by: RHGS Build Bot +Reviewed-by: Amar Tumballi +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mgmt/glusterd/src/glusterd-quota.c | 2 +- + xlators/protocol/server/src/server.c | 2 -- + 2 files changed, 1 insertion(+), 3 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-quota.c b/xlators/mgmt/glusterd/src/glusterd-quota.c +index 6d3918b..55bbac7 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-quota.c ++++ b/xlators/mgmt/glusterd/src/glusterd-quota.c +@@ -2101,7 +2101,7 @@ glusterd_op_stage_quota (dict_t *dict, char **op_errstr, dict_t *rsp_dict) + if (errno == ERANGE || hard_limit < 0) + gf_asprintf (op_errstr, "Hard-limit " + "value out of range (0 - %"PRId64 +- "): %s", hard_limit_str); ++ "): %s", hard_limit, hard_limit_str); + else + gf_msg (this->name, GF_LOG_ERROR, errno, + GD_MSG_CONVERSION_FAILED, +diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c +index 3a429bc..65d712f 100644 +--- a/xlators/protocol/server/src/server.c ++++ b/xlators/protocol/server/src/server.c +@@ -710,7 +710,6 @@ server_graph_janitor_threads(void *data) + { + xlator_t *victim = NULL; + xlator_t *this = NULL; +- server_conf_t *conf = NULL; + glusterfs_ctx_t *ctx = NULL; + char *victim_name = NULL; + server_cleanup_xprt_arg_t *arg = NULL; +@@ -724,7 +723,6 @@ server_graph_janitor_threads(void *data) + this = arg->this; + victim_name = arg->victim_name; + THIS = arg->this; +- conf = this->private; + + ctx = THIS->ctx; + GF_VALIDATE_OR_GOTO(this->name, ctx, out); +-- +1.8.3.1 + diff --git a/SOURCES/0405-glusterfsd-add-missing-UNLOCK.patch b/SOURCES/0405-glusterfsd-add-missing-UNLOCK.patch new file mode 100644 index 0000000..100e9eb --- /dev/null +++ b/SOURCES/0405-glusterfsd-add-missing-UNLOCK.patch @@ -0,0 +1,54 @@ +From 331b648352fb0a2cce1ac671f24adf46b1b76b38 Mon Sep 17 00:00:00 2001 +From: Milind Changire +Date: Tue, 23 Oct 2018 12:47:34 +0530 +Subject: [PATCH 405/406] glusterfsd: add missing UNLOCK + +Reproducer steps: (by Sanju Rakonde) +1. enable brick mux +2. create 3 volumes +3. start all the 3 volumes +4. stop 1st volume, it will be success +5. stop second volume, it will time out + +Problem: +Deadlock in glusterfs_handle_terminate() during volume stop of 2nd +successive volume resulting in timeout at gluster CLI. + +Solution: +Add missing UNLOCK to xlator_mem_cleanup() + +NOTE: +Upstream code review by Kaushal Madappa has confirmed that the code is +good at upstream. This needs to be a downstream only patch, however, +this patch is not required at a rebase so will not be marking it as +such. + +Upstream patch: https://review.gluster.org/c/glusterfs/+/19734 +Downstream patch: https://code.engineering.redhat.com/gerrit/152908 + +BUG: 1641489 +Change-Id: I7281aa6f03edcb720f9eca3d274025166ff0b601 +Signed-off-by: Milind Changire +Reviewed-on: https://code.engineering.redhat.com/gerrit/153643 +Tested-by: RHGS Build Bot +Reviewed-by: Kaushal Madappa +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + glusterfsd/src/glusterfsd-mgmt.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c +index e3fceeb..b952526 100644 +--- a/glusterfsd/src/glusterfsd-mgmt.c ++++ b/glusterfsd/src/glusterfsd-mgmt.c +@@ -278,6 +278,7 @@ xlator_mem_cleanup (xlator_t *this) { + break; + } + } ++ UNLOCK (&ctx->volfile_lock); + } + } + +-- +1.8.3.1 + diff --git a/SOURCES/0406-glusterd-improve-logging-for-stage_deleted-flag.patch b/SOURCES/0406-glusterd-improve-logging-for-stage_deleted-flag.patch new file mode 100644 index 0000000..ee4aac5 --- /dev/null +++ b/SOURCES/0406-glusterd-improve-logging-for-stage_deleted-flag.patch @@ -0,0 +1,55 @@ +From 33c75991d92229dc65b24535b8f1d7194b23beb6 Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde +Date: Tue, 23 Oct 2018 11:50:37 +0530 +Subject: [PATCH 406/406] glusterd: improve logging for stage_deleted flag + +> Change-Id: I5f0667a47ddd24cb00949c875c19f3d1dbd8d603 +> BUG: bz#1605077 +> Signed-off-by: Sanju Rakonde + +upstream patch: https://review.gluster.org/#/c/glusterfs/+/21463/ + +Change-Id: I5f0667a47ddd24cb00949c875c19f3d1dbd8d603 +BUG: 1618221 +Signed-off-by: Sanju Rakonde +Reviewed-on: https://code.engineering.redhat.com/gerrit/153671 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mgmt/glusterd/src/glusterd-locks.c | 4 ++++ + xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 4 ++++ + 2 files changed, 8 insertions(+) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-locks.c b/xlators/mgmt/glusterd/src/glusterd-locks.c +index f4e0225..d75452d 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-locks.c ++++ b/xlators/mgmt/glusterd/src/glusterd-locks.c +@@ -913,6 +913,10 @@ glusterd_mgmt_v3_unlock (const char *name, uuid_t uuid, char *type) + * stage_deleted flag is set back to false + */ + volinfo->stage_deleted = _gf_false; ++ gf_log(this->name, GF_LOG_INFO, ++ "Volume %s still exist, setting " ++ "stage deleted flag to false for the volume %s", ++ volinfo->volname, volinfo->volname); + } + ret = 0; + out: +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +index 94e07cb..36d9bff 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +@@ -1829,6 +1829,10 @@ glusterd_op_stage_delete_volume (dict_t *dict, char **op_errstr) + goto out; + } + volinfo->stage_deleted = _gf_true; ++ gf_log(this->name, GF_LOG_INFO, ++ "Setting stage deleted flag to true for " ++ "volume %s", ++ volinfo->volname); + ret = 0; + + out: +-- +1.8.3.1 + diff --git a/SOURCES/0407-spec-update-RHGS-version-for-RHGSWA.patch b/SOURCES/0407-spec-update-RHGS-version-for-RHGSWA.patch new file mode 100644 index 0000000..8379dd8 --- /dev/null +++ b/SOURCES/0407-spec-update-RHGS-version-for-RHGSWA.patch @@ -0,0 +1,33 @@ +From 84f54efa4c72f5ecb0a89001a8cb08e94052f5b7 Mon Sep 17 00:00:00 2001 +From: Milind Changire +Date: Mon, 22 Oct 2018 15:51:38 +0530 +Subject: [PATCH 407/407] spec: update RHGS version for RHGSWA + +Update RHGS version to 3.4 + +Change-Id: I12f6f3bf9d760fa80fc389793b98eb4611e8de30 +BUG: 1641586 +Signed-off-by: Milind Changire +Reviewed-on: https://code.engineering.redhat.com/gerrit/153526 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + glusterfs.spec.in | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index c3f5748..b6b7630 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -866,7 +866,7 @@ install -p -m 0744 -D extras/command-completion/gluster.bash \ + %{buildroot}%{_sysconfdir}/bash_completion.d/gluster + + %if ( 0%{?_build_server} ) +-echo "RHGS 3.4.0" > %{buildroot}%{_datadir}/glusterfs/release ++echo "RHGS 3.4" > %{buildroot}%{_datadir}/glusterfs/release + %endif + + %clean +-- +1.8.3.1 + diff --git a/SOURCES/0409-Update-database-profile-group.patch b/SOURCES/0409-Update-database-profile-group.patch new file mode 100644 index 0000000..075a252 --- /dev/null +++ b/SOURCES/0409-Update-database-profile-group.patch @@ -0,0 +1,50 @@ +From f56ad2fc0ba3f3b78dc854b0c09c5c8f9bb9db77 Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez +Date: Fri, 21 Sep 2018 21:51:46 +0200 +Subject: [PATCH 409/444] Update database profile group + +Some performance testing has revealed that pgbench performs 3x better +when these options are set: + +performance.client-io-threads=on +performance.open-behind=on +performance.read-after-open=yes +server.event-threads=4 +client.event-threads=4 + +> Upstream patch: https://review.gluster.org/c/glusterfs/+/21247 +> Change-Id: I36ce389f893a8af13aac5f8285104d749b73d098 +> fixes: bz#1631886 +> Signed-off-by: Xavi Hernandez + +Change-Id: I36ce389f893a8af13aac5f8285104d749b73d098 +BUG: 1644120 +Signed-off-by: Xavi Hernandez +Reviewed-on: https://code.engineering.redhat.com/gerrit/154881 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + extras/group-db-workload | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/extras/group-db-workload b/extras/group-db-workload +index c9caf21..9334d6f 100644 +--- a/extras/group-db-workload ++++ b/extras/group-db-workload +@@ -1,4 +1,4 @@ +-performance.open-behind=off ++performance.open-behind=on + performance.write-behind=off + performance.stat-prefetch=off + performance.quick-read=off +@@ -6,3 +6,7 @@ performance.strict-o-direct=on + performance.read-ahead=off + performance.io-cache=off + performance.readdir-ahead=off ++performance.client-io-threads=on ++server.event-threads=4 ++client.event-threads=4 ++performance.read-after-open=yes +-- +1.8.3.1 + diff --git a/SOURCES/0410-cli-fix-glusterd-memory-leak-cause-by-gluster-v-stat.patch b/SOURCES/0410-cli-fix-glusterd-memory-leak-cause-by-gluster-v-stat.patch new file mode 100644 index 0000000..a27f0f9 --- /dev/null +++ b/SOURCES/0410-cli-fix-glusterd-memory-leak-cause-by-gluster-v-stat.patch @@ -0,0 +1,42 @@ +From 4bfbc59a0cbfb28325c16e81480decab003fe6d1 Mon Sep 17 00:00:00 2001 +From: shujun10086 +Date: Tue, 2 Oct 2018 08:37:17 +0000 +Subject: [PATCH 410/444] cli: fix glusterd memory leak cause by "gluster v + status volume_name" + +If use this command every some seconds for example 15s to check gluster brick +status, the glusterd will use about 1G memory in a year. free the value of rsp +in gf_cli_status_cbk. glusterd allocate the value of rsp and send it to cli, but +cli do not free the value, that cause glusterd memory leak. + +> fixes: bz#1635480 +> Change-Id: I3f19cd0d4b791ae1b35f9664b3a668b1579f1178 +> Signed-off-by: shujun10086 + +upstream patch: https://review.gluster.org/#/c/21316/ + +Change-Id: I3f19cd0d4b791ae1b35f9664b3a668b1579f1178 +BUG: 1635100 +Signed-off-by: Sanju Rakonde +Reviewed-on: https://code.engineering.redhat.com/gerrit/154882 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + cli/src/cli-rpc-ops.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c +index 54b61ee65..10f772c 100644 +--- a/cli/src/cli-rpc-ops.c ++++ b/cli/src/cli-rpc-ops.c +@@ -8515,6 +8515,7 @@ cont: + ret = rsp.op_ret; + + out: ++ FREE(rsp.dict.dict_val); + if (dict) + dict_unref (dict); + GF_FREE (status.brick); +-- +1.8.3.1 + diff --git a/SOURCES/0411-glusterd-ensure-volinfo-caps-is-set-to-correct-value.patch b/SOURCES/0411-glusterd-ensure-volinfo-caps-is-set-to-correct-value.patch new file mode 100644 index 0000000..5563776 --- /dev/null +++ b/SOURCES/0411-glusterd-ensure-volinfo-caps-is-set-to-correct-value.patch @@ -0,0 +1,86 @@ +From da38c139d41c839244cd5acc0464ddf06fa51c78 Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde +Date: Wed, 3 Oct 2018 23:58:37 +0530 +Subject: [PATCH 411/444] glusterd: ensure volinfo->caps is set to correct + value + +With the commit febf5ed4848, during the volume create op, +we are setting volinfo->caps to 0, only if any of the bricks +belong to the same node and brickinfo->vg[0] is null. +Previously, we used to set volinfo->caps to 0, when +either brick doesn't belong to the same node or brickinfo->vg[0] +is null. + +With this patch, we set volinfo->caps to 0, when either brick +doesn't belong to the same node or brickinfo->vg[0] is null. +(as we do earlier without commit febf5ed4848). + +> fixes: bz#1635820 +> Change-Id: I00a97415786b775fb088ac45566ad52b402f1a49 +> Signed-off-by: Sanju Rakonde + +upstream patch: https://review.gluster.org/#/c/glusterfs/+/21336/ + +Change-Id: I00a97415786b775fb088ac45566ad52b402f1a49 +BUG: 1635136 +Signed-off-by: Sanju Rakonde +Reviewed-on: https://code.engineering.redhat.com/gerrit/154909 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + .../bug-1636957-peer-reject-on-glusterd-reboot.t | 29 ++++++++++++++++++++++ + xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 2 ++ + 2 files changed, 31 insertions(+) + create mode 100644 tests/bugs/glusterd/bug-1636957-peer-reject-on-glusterd-reboot.t + +diff --git a/tests/bugs/glusterd/bug-1636957-peer-reject-on-glusterd-reboot.t b/tests/bugs/glusterd/bug-1636957-peer-reject-on-glusterd-reboot.t +new file mode 100644 +index 0000000..b462b38 +--- /dev/null ++++ b/tests/bugs/glusterd/bug-1636957-peer-reject-on-glusterd-reboot.t +@@ -0,0 +1,29 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../cluster.rc ++. $(dirname $0)/../../volume.rc ++ ++function peer_count { ++eval \$CLI_$1 peer status | grep 'Peer in Cluster (Connected)' | wc -l ++} ++ ++cleanup ++ ++TEST launch_cluster 2 ++ ++TEST $CLI_1 peer probe $H2; ++EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count 1 ++EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count 2 ++ ++TEST $CLI_1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0 ++ ++# rebooting a node which doesn't host bricks for any one volume ++# peer should not go into rejected state ++TEST kill_glusterd 2 ++TEST start_glusterd 2 ++ ++EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count 1 ++EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count 2 ++ ++cleanup +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +index 36d9bff..87b7acc 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +@@ -2485,6 +2485,8 @@ glusterd_op_create_volume (dict_t *dict, char **op_errstr) + caps = 0; + } + #endif ++ } else { ++ caps = 0; + } + + cds_list_add_tail (&brickinfo->brick_list, &volinfo->bricks); +-- +1.8.3.1 + diff --git a/SOURCES/0412-glusterd-set-fsid-while-performing-replace-brick.patch b/SOURCES/0412-glusterd-set-fsid-while-performing-replace-brick.patch new file mode 100644 index 0000000..2535ab0 --- /dev/null +++ b/SOURCES/0412-glusterd-set-fsid-while-performing-replace-brick.patch @@ -0,0 +1,126 @@ +From 1e1495a8d5356e6a4f724c211cdd17c5e3f399b5 Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde +Date: Tue, 30 Oct 2018 16:36:50 +0530 +Subject: [PATCH 412/444] glusterd: set fsid while performing replace brick + +While performing the replace-brick operation, we should set +fsid value to the new brick. + +> fixes: bz#1637196 +> Change-Id: I9e9a4962fc0c2f5dff43e4ac11767814a0c0beaf +> Signed-off-by: Sanju Rakonde + +upstream patch: https://review.gluster.org/#/c/glusterfs/+/21513/ + +Change-Id: I9e9a4962fc0c2f5dff43e4ac11767814a0c0beaf +BUG: 1644279 +Signed-off-by: Sanju Rakonde +Reviewed-on: https://code.engineering.redhat.com/gerrit/154907 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + .../df-results-post-replace-brick-operations.t | 58 ++++++++++++++++++++++ + xlators/mgmt/glusterd/src/glusterd-replace-brick.c | 15 ++++++ + 2 files changed, 73 insertions(+) + create mode 100644 tests/bugs/glusterd/df-results-post-replace-brick-operations.t + +diff --git a/tests/bugs/glusterd/df-results-post-replace-brick-operations.t b/tests/bugs/glusterd/df-results-post-replace-brick-operations.t +new file mode 100644 +index 0000000..443911c +--- /dev/null ++++ b/tests/bugs/glusterd/df-results-post-replace-brick-operations.t +@@ -0,0 +1,58 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++ ++cleanup ++TEST glusterd ++ ++#Create brick partitions ++TEST truncate -s 100M $B0/brick1 ++TEST truncate -s 100M $B0/brick2 ++TEST truncate -s 100M $B0/brick3 ++TEST truncate -s 100M $B0/brick4 ++TEST truncate -s 100M $B0/brick5 ++ ++LO1=`SETUP_LOOP $B0/brick1` ++TEST [ $? -eq 0 ] ++TEST MKFS_LOOP $LO1 ++ ++LO2=`SETUP_LOOP $B0/brick2` ++TEST [ $? -eq 0 ] ++TEST MKFS_LOOP $LO2 ++ ++LO3=`SETUP_LOOP $B0/brick3` ++TEST [ $? -eq 0 ] ++TEST MKFS_LOOP $LO3 ++ ++LO4=`SETUP_LOOP $B0/brick4` ++TEST [ $? -eq 0 ] ++TEST MKFS_LOOP $LO4 ++ ++LO5=`SETUP_LOOP $B0/brick5` ++TEST [ $? -eq 0 ] ++TEST MKFS_LOOP $LO5 ++ ++TEST mkdir -p $B0/${V0}1 $B0/${V0}2 $B0/${V0}3 $B0/${V0}4 $B0/${V0}5 ++TEST MOUNT_LOOP $LO1 $B0/${V0}1 ++TEST MOUNT_LOOP $LO2 $B0/${V0}2 ++TEST MOUNT_LOOP $LO3 $B0/${V0}3 ++TEST MOUNT_LOOP $LO4 $B0/${V0}4 ++TEST MOUNT_LOOP $LO5 $B0/${V0}5 ++ ++# create a subdirectory in mount point and use it for volume creation ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}1/brick1 $H0:$B0/${V0}2/brick1 $H0:$B0/${V0}3/brick1 ++TEST $CLI volume start $V0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "3" online_brick_count ++ ++# mount the volume and check the size at mount point ++TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0 ++total_space=$(df -P $M0 | tail -1 | awk '{ print $2}') ++ ++# perform replace brick operations ++TEST $CLI volume replace-brick $V0 $H0:$B0/${V0}1/brick1 $H0:$B0/${V0}4/brick1 commit force ++TEST $CLI volume replace-brick $V0 $H0:$B0/${V0}2/brick1 $H0:$B0/${V0}5/brick1 commit force ++ ++# check for the size at mount point, it should be same as previous ++total_space_new=$(df -P $M0 | tail -1 | awk '{ print $2}') ++TEST [ $total_space -eq $total_space_new ] +diff --git a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c +index a037323..5fc3669 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c ++++ b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c +@@ -362,6 +362,7 @@ glusterd_op_perform_replace_brick (glusterd_volinfo_t *volinfo, + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; ++ struct statvfs brickstat = {0,}; + + this = THIS; + GF_ASSERT (this); +@@ -379,6 +380,20 @@ glusterd_op_perform_replace_brick (glusterd_volinfo_t *volinfo, + ret = glusterd_resolve_brick (new_brickinfo); + if (ret) + goto out; ++ if (!gf_uuid_compare(new_brickinfo->uuid, MY_UUID)) { ++ ret = sys_statvfs(new_brickinfo->path, &brickstat); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_STATVFS_FAILED, ++ "Failed to fetch disk utilization " ++ "from the brick (%s:%s). Please check the health of " ++ "the brick. Error code was %s", ++ new_brickinfo->hostname, new_brickinfo->path, ++ strerror(errno)); ++ ++ goto out; ++ } ++ new_brickinfo->statfs_fsid = brickstat.f_fsid; ++ } + + ret = glusterd_volume_brickinfo_get_by_brick (old_brick, + volinfo, &old_brickinfo, +-- +1.8.3.1 + diff --git a/SOURCES/0413-glusterfind-add-logs-to-identify-parsing-phases.patch b/SOURCES/0413-glusterfind-add-logs-to-identify-parsing-phases.patch new file mode 100644 index 0000000..9e34554 --- /dev/null +++ b/SOURCES/0413-glusterfind-add-logs-to-identify-parsing-phases.patch @@ -0,0 +1,66 @@ +From 23dda42f6fa9fe0e6def1b6b0cef8dfdd9a5dcb3 Mon Sep 17 00:00:00 2001 +From: Milind Changire +Date: Mon, 5 Nov 2018 15:02:36 +0530 +Subject: [PATCH 413/444] glusterfind: add logs to identify parsing phases + +Add logs to idenitfy start and finish of changelog parsing phases. + +mainline: +> fixes: bz#1632236 +> Reviewed-on: https://review.gluster.org/c/glusterfs/+/21262 +> Change-Id: Id250231f2af7829f887401d30ac98875ae1ae793 +> Signed-off-by: Milind Changire + +Change-Id: Id250231f2af7829f887401d30ac98875ae1ae793 +BUG: 1631166 +Signed-off-by: Milind Changire +Reviewed-on: https://code.engineering.redhat.com/gerrit/154905 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + tools/glusterfind/src/changelog.py | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/tools/glusterfind/src/changelog.py b/tools/glusterfind/src/changelog.py +index 2376af2..8354cc9 100644 +--- a/tools/glusterfind/src/changelog.py ++++ b/tools/glusterfind/src/changelog.py +@@ -273,6 +273,7 @@ def get_changes(brick, hash_dir, log_file, start, end, args): + fail("%s: %s Historical Changelogs not available: %s" % + (args.node, brick, e), logger=logger) + ++ logger.info("[1/4] Starting changelog parsing ...") + try: + # scan followed by getchanges till scan returns zero. + # history_scan() is blocking call, till it gets the number +@@ -301,18 +302,27 @@ def get_changes(brick, hash_dir, log_file, start, end, args): + fail("%s Error during Changelog Crawl: %s" % (brick, e), + logger=logger) + ++ logger.info("[1/4] Finished changelog parsing.") ++ + # Convert all pgfid available from Changelogs ++ logger.info("[2/4] Starting 'pgfid to path' conversions ...") + pgfid_to_path(brick, changelog_data) + changelog_data.commit() ++ logger.info("[2/4] Finished 'pgfid to path' conversions.") + + # Convert all GFIDs for which no other additional details available ++ logger.info("[3/4] Starting 'gfid to path using pgfid' conversions ...") + gfid_to_path_using_pgfid(brick, changelog_data, args) + changelog_data.commit() ++ logger.info("[3/4] Finished 'gfid to path using pgfid' conversions.") + + # If some GFIDs fail to get converted from previous step, + # convert using find ++ logger.info("[4/4] Starting 'gfid to path using batchfind' " ++ "conversions ...") + gfid_to_path_using_batchfind(brick, changelog_data) + changelog_data.commit() ++ logger.info("[4/4] Finished 'gfid to path using batchfind' conversions.") + + return actual_end + +-- +1.8.3.1 + diff --git a/SOURCES/0414-logrotate-utilize-the-new-maxsize-option.patch b/SOURCES/0414-logrotate-utilize-the-new-maxsize-option.patch new file mode 100644 index 0000000..7809452 --- /dev/null +++ b/SOURCES/0414-logrotate-utilize-the-new-maxsize-option.patch @@ -0,0 +1,121 @@ +From 405a367205c72318fc48d014a201eab3b7031010 Mon Sep 17 00:00:00 2001 +From: Amar Tumballi +Date: Mon, 5 Nov 2018 10:27:10 +0530 +Subject: [PATCH 414/444] logrotate: utilize the new 'maxsize' option + +Since logrotate 3.8.x version, a new option 'maxsize' is supported, +which helps in rotating the logs before the specified time if the +size exceeds maxsize limit. This should help in reducing the +overflow of gluster logs. + +Upstream: +> URL: https://review.gluster.org/21187 + +BUG: 1599808 +Change-Id: Ic662ada8b73798146736ff81963053d8981745b8 +Signed-off-by: Amar Tumballi +Reviewed-on: https://code.engineering.redhat.com/gerrit/154846 +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +Tested-by: RHGS Build Bot +--- + .testignore | 2 ++ + extras/glusterfs-georep-logrotate | 24 +++++++++++++++++++++--- + extras/glusterfs-logrotate | 14 ++++++++++++-- + 3 files changed, 35 insertions(+), 5 deletions(-) + +diff --git a/.testignore b/.testignore +index 4a72bc4..6e5df3a 100644 +--- a/.testignore ++++ b/.testignore +@@ -32,6 +32,8 @@ extras/cliutils/README.md + extras/command-completion/README + extras/create_new_xlator/README.md + extras/glusterfs.vim ++extras/glusterfs-logrotate ++extras/glusterfs-georep-logrotate + extras/group-gluster-block + extras/group-db-workload + extras/group-metadata-cache +diff --git a/extras/glusterfs-georep-logrotate b/extras/glusterfs-georep-logrotate +index 6fdb8c6..3e7ecf3 100644 +--- a/extras/glusterfs-georep-logrotate ++++ b/extras/glusterfs-georep-logrotate +@@ -1,6 +1,12 @@ + /var/log/glusterfs/geo-replication/*/*.log { + sharedscripts +- rotate 52 ++ weekly ++ maxsize 10M ++ minsize 100k ++ ++ # 6 months of logs are good enough ++ rotate 26 ++ + missingok + compress + delaycompress +@@ -15,7 +21,13 @@ + + /var/log/glusterfs/geo-replication-slaves/*.log { + sharedscripts +- rotate 52 ++ weekly ++ maxsize 10M ++ minsize 100k ++ ++ # 6 months of logs are good enough ++ rotate 26 ++ + missingok + compress + delaycompress +@@ -30,7 +42,13 @@ + + /var/log/glusterfs/geo-replication-slaves/*/*.log { + sharedscripts +- rotate 52 ++ weekly ++ maxsize 10M ++ minsize 100k ++ ++ # 6 months of logs are good enough ++ rotate 26 ++ + missingok + compress + delaycompress +diff --git a/extras/glusterfs-logrotate b/extras/glusterfs-logrotate +index 575c0ee..75f700e 100644 +--- a/extras/glusterfs-logrotate ++++ b/extras/glusterfs-logrotate +@@ -2,7 +2,12 @@ + /var/log/glusterfs/*.log { + sharedscripts + weekly +- rotate 52 ++ maxsize 10M ++ minsize 100k ++ ++# 6 months of logs are good enough ++ rotate 26 ++ + missingok + compress + delaycompress +@@ -17,7 +22,12 @@ + /var/log/glusterfs/bricks/*.log { + sharedscripts + weekly +- rotate 52 ++ maxsize 10M ++ minsize 100k ++ ++# 6 months of logs are good enough ++ rotate 26 ++ + missingok + compress + delaycompress +-- +1.8.3.1 + diff --git a/SOURCES/0415-statedump-fix-clang-null-dereference-error.patch b/SOURCES/0415-statedump-fix-clang-null-dereference-error.patch new file mode 100644 index 0000000..08ed7bb --- /dev/null +++ b/SOURCES/0415-statedump-fix-clang-null-dereference-error.patch @@ -0,0 +1,39 @@ +From a469cad3a6b7f340c6ac6fad7c2186299d675d70 Mon Sep 17 00:00:00 2001 +From: Amar Tumballi +Date: Mon, 5 Nov 2018 10:22:44 +0530 +Subject: [PATCH 415/444] statedump: fix clang null dereference error + +ctx->active can be null, and is checked elsewhere in the +same function. In another case, where 'ctx->active' gets +dereferenced, it needs to be validated before the loop +is hit. + +Upstream: +> URL: https://review.gluster.org/21493 + +BUG: 1643035 +Change-Id: I799d92c8089ddbfd9171da4e7e1d77ac91133aba +Signed-off-by: Amar Tumballi +Reviewed-on: https://code.engineering.redhat.com/gerrit/154845 +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +Tested-by: RHGS Build Bot +--- + libglusterfs/src/statedump.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/libglusterfs/src/statedump.c b/libglusterfs/src/statedump.c +index a123adb..a4635f3 100644 +--- a/libglusterfs/src/statedump.c ++++ b/libglusterfs/src/statedump.c +@@ -812,7 +812,7 @@ gf_proc_dump_info (int signum, glusterfs_ctx_t *ctx) + if (!ctx) + goto out; + +- if (ctx) { ++ if (ctx && ctx->active) { + top = ctx->active->first; + for (trav_p = &top->children; *trav_p; + trav_p = &(*trav_p)->next) { +-- +1.8.3.1 + diff --git a/SOURCES/0416-glusterd-ignore-RPC-events-when-glusterd-is-shutting.patch b/SOURCES/0416-glusterd-ignore-RPC-events-when-glusterd-is-shutting.patch new file mode 100644 index 0000000..d45b92f --- /dev/null +++ b/SOURCES/0416-glusterd-ignore-RPC-events-when-glusterd-is-shutting.patch @@ -0,0 +1,63 @@ +From 04e697b79edd55680a319e6fdb5983a1e5686db9 Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee +Date: Wed, 3 Oct 2018 16:34:54 +0530 +Subject: [PATCH 416/444] glusterd: ignore RPC events when glusterd is shutting + down + +When glusterd receives a SIGTERM while it receives RPC +connect/disconnect/destroy events, the thread might lead to a crash +while accessing rcu_read_lock () as the clean up thread might have +already freed up the resources. This is more observable when glusterd +comes up with upgrade mode = on during upgrade process. + +The solution is to ignore these events if glusterd is already in the +middle of cleanup_and_exit (). + +> upstream patch : https://review.gluster.org/#/c/glusterfs/+/21330/ + +>Fixes: bz#1635593 +>Change-Id: I12831d31c2f689d4deb038b83b9421bd5cce26d9 +>Signed-off-by: Atin Mukherjee + +Change-Id: I12831d31c2f689d4deb038b83b9421bd5cce26d9 +BUG: 1635071 +Signed-off-by: Atin Mukherjee +Reviewed-on: https://code.engineering.redhat.com/gerrit/154848 +Reviewed-by: Sanju Rakonde +Tested-by: Sanju Rakonde +Tested-by: RHGS Build Bot +--- + xlators/mgmt/glusterd/src/glusterd-handler.c | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c +index 861ff17..bf37e70 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-handler.c ++++ b/xlators/mgmt/glusterd/src/glusterd-handler.c +@@ -6340,6 +6340,7 @@ __glusterd_peer_rpc_notify (struct rpc_clnt *rpc, void *mydata, + glusterd_peerctx_t *peerctx = NULL; + gf_boolean_t quorum_action = _gf_false; + glusterd_volinfo_t *volinfo = NULL; ++ glusterfs_ctx_t *ctx = NULL; + uuid_t uuid; + + peerctx = mydata; +@@ -6355,7 +6356,14 @@ __glusterd_peer_rpc_notify (struct rpc_clnt *rpc, void *mydata, + GF_FREE (peerctx); + return 0; + } +- ++ ctx = this->ctx; ++ GF_VALIDATE_OR_GOTO (this->name, ctx, out); ++ if (ctx->cleanup_started) { ++ gf_log (this->name, GF_LOG_INFO, "glusterd already received a " ++ "SIGTERM, dropping the event %d for peer %s", event, ++ peerctx->peername); ++ return 0; ++ } + rcu_read_lock (); + + peerinfo = glusterd_peerinfo_find_by_generation (peerctx->peerinfo_gen); +-- +1.8.3.1 + diff --git a/SOURCES/0417-cli-Add-warning-message-while-converting-to-replica-.patch b/SOURCES/0417-cli-Add-warning-message-while-converting-to-replica-.patch new file mode 100644 index 0000000..58f9366 --- /dev/null +++ b/SOURCES/0417-cli-Add-warning-message-while-converting-to-replica-.patch @@ -0,0 +1,190 @@ +From 1a24a7942fe9ecccaf29ae9bc125cd9b08fc8906 Mon Sep 17 00:00:00 2001 +From: karthik-us +Date: Mon, 5 Nov 2018 17:57:55 +0530 +Subject: [PATCH 417/444] cli: Add warning message while converting to replica + 2 configuration + +Backport of: https://review.gluster.org/#/c/glusterfs/+/21136/ + +Currently while creating replica 2 volume we display a warning message +of ending up in split-brain. But while converting an existing volume +from other configuration to replica 2 by add-brick or remove-brick +operations we do not show any such messages. +With this fix in add-brick and remove-brick cases also we will display +the same warning message and prompt for confirmation if the configuration +changes to replica 2. + +Change-Id: Id7b1a40e80fca3e1043b802fa5f7c3b656ef2228 +BUG: 1579758 +Signed-off-by: karthik-us +Reviewed-on: https://code.engineering.redhat.com/gerrit/154947 +Tested-by: RHGS Build Bot +Reviewed-by: Ravishankar Narayanankutty +Reviewed-by: Atin Mukherjee +--- + cli/src/cli-cmd-parser.c | 48 +++++++++++++++++++++++++++++++++++++++++++----- + cli/src/cli-cmd-volume.c | 11 +++++++---- + cli/src/cli.h | 10 +++++----- + 3 files changed, 55 insertions(+), 14 deletions(-) + +diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c +index 7917d66..3745fb4 100644 +--- a/cli/src/cli-cmd-parser.c ++++ b/cli/src/cli-cmd-parser.c +@@ -1774,8 +1774,8 @@ out: + } + + int32_t +-cli_cmd_volume_add_brick_parse (const char **words, int wordcount, +- dict_t **options, int *ret_type) ++cli_cmd_volume_add_brick_parse (struct cli_state *state, const char **words, ++ int wordcount, dict_t **options, int *ret_type) + { + dict_t *dict = NULL; + char *volname = NULL; +@@ -1790,6 +1790,8 @@ cli_cmd_volume_add_brick_parse (const char **words, int wordcount, + int index; + gf_boolean_t is_force = _gf_false; + int wc = wordcount; ++ gf_answer_t answer = GF_ANSWER_NO; ++ const char *question = NULL; + + GF_ASSERT (words); + GF_ASSERT (options); +@@ -1854,6 +1856,23 @@ cli_cmd_volume_add_brick_parse (const char **words, int wordcount, + goto out; + index = 7; + } ++ ++ if (count == 2) { ++ if (strcmp (words[wordcount - 1], "force")) { ++ question = "Replica 2 volumes are prone to " ++ "split-brain. Use Arbiter or " ++ "Replica 3 to avoid this.\n" ++ "Do you still want to continue?\n"; ++ answer = cli_cmd_get_confirmation (state, ++ question); ++ if (GF_ANSWER_NO == answer) { ++ gf_log ("cli", GF_LOG_ERROR, "Add brick" ++ " cancelled, exiting"); ++ ret = -1; ++ goto out; ++ } ++ } ++ } + } else if ((strcmp (w, "stripe")) == 0) { + type = GF_CLUSTER_TYPE_STRIPE; + count = strtol (words[4], NULL, 0); +@@ -2061,9 +2080,9 @@ out: + } + + int32_t +-cli_cmd_volume_remove_brick_parse (const char **words, int wordcount, +- dict_t **options, int *question, +- int *brick_count) ++cli_cmd_volume_remove_brick_parse (struct cli_state *state, const char **words, ++ int wordcount, dict_t **options, ++ int *question, int *brick_count) + { + dict_t *dict = NULL; + char *volname = NULL; +@@ -2081,6 +2100,8 @@ cli_cmd_volume_remove_brick_parse (const char **words, int wordcount, + char *w = NULL; + int32_t command = GF_OP_CMD_NONE; + long count = 0; ++ gf_answer_t answer = GF_ANSWER_NO; ++ const char *ques = NULL; + + GF_ASSERT (words); + GF_ASSERT (options); +@@ -2115,6 +2136,23 @@ cli_cmd_volume_remove_brick_parse (const char **words, int wordcount, + goto out; + } + ++ if (count == 2) { ++ if (strcmp (words[wordcount - 1], "force")) { ++ ques = "Replica 2 volumes are prone to " ++ "split-brain. Use Arbiter or Replica 3 " ++ "to avoid this.\n" ++ "Do you still want to continue?\n"; ++ answer = cli_cmd_get_confirmation (state, ++ ques); ++ if (GF_ANSWER_NO == answer) { ++ gf_log ("cli", GF_LOG_ERROR, "Remove " ++ "brick cancelled, exiting"); ++ ret = -1; ++ goto out; ++ } ++ } ++ } ++ + ret = dict_set_int32 (dict, "replica-count", count); + if (ret) + goto out; +diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c +index a1f0840..32efa73 100644 +--- a/cli/src/cli-cmd-volume.c ++++ b/cli/src/cli-cmd-volume.c +@@ -1021,7 +1021,8 @@ cli_cmd_volume_add_brick_cbk (struct cli_state *state, + if (!frame) + goto out; + +- ret = cli_cmd_volume_add_brick_parse (words, wordcount, &options, 0); ++ ret = cli_cmd_volume_add_brick_parse (state, words, wordcount, &options, ++ 0); + if (ret) { + cli_usage_out (word->pattern); + parse_error = 1; +@@ -1151,7 +1152,8 @@ do_cli_cmd_volume_attach_tier (struct cli_state *state, + if (!frame) + goto out; + +- ret = cli_cmd_volume_add_brick_parse (words, wordcount, &options, &type); ++ ret = cli_cmd_volume_add_brick_parse (state, words, wordcount, &options, ++ &type); + if (ret) { + cli_usage_out (word->pattern); + parse_error = 1; +@@ -2032,8 +2034,9 @@ cli_cmd_volume_remove_brick_cbk (struct cli_state *state, + if (!frame) + goto out; + +- ret = cli_cmd_volume_remove_brick_parse (words, wordcount, &options, +- &need_question, &brick_count); ++ ret = cli_cmd_volume_remove_brick_parse (state, words, wordcount, ++ &options, &need_question, ++ &brick_count); + if (ret) { + cli_usage_out (word->pattern); + parse_error = 1; +diff --git a/cli/src/cli.h b/cli/src/cli.h +index c9bf93d..109dcd4 100644 +--- a/cli/src/cli.h ++++ b/cli/src/cli.h +@@ -264,8 +264,8 @@ cli_cmd_get_state_parse (struct cli_state *state, const char **words, + int wordcount, dict_t **options, char **op_errstr); + + int32_t +-cli_cmd_volume_add_brick_parse (const char **words, int wordcount, +- dict_t **options, int *type); ++cli_cmd_volume_add_brick_parse (struct cli_state *state, const char **words, ++ int wordcount, dict_t **options, int *type); + + int32_t + cli_cmd_volume_detach_tier_parse (const char **words, int wordcount, +@@ -280,9 +280,9 @@ cli_cmd_volume_old_tier_parse (const char **words, int wordcount, + dict_t **options); + + int32_t +-cli_cmd_volume_remove_brick_parse (const char **words, int wordcount, +- dict_t **options, int *question, +- int *brick_count); ++cli_cmd_volume_remove_brick_parse (struct cli_state *state, const char **words, ++ int wordcount, dict_t **options, ++ int *question, int *brick_count); + + int32_t + cli_cmd_volume_replace_brick_parse (const char **words, int wordcount, +-- +1.8.3.1 + diff --git a/SOURCES/0418-cli-correct-rebalance-status-elapsed-check.patch b/SOURCES/0418-cli-correct-rebalance-status-elapsed-check.patch new file mode 100644 index 0000000..9aa3154 --- /dev/null +++ b/SOURCES/0418-cli-correct-rebalance-status-elapsed-check.patch @@ -0,0 +1,58 @@ +From 414d6d378b7d63b172859f619bd3ffb72bd3f434 Mon Sep 17 00:00:00 2001 +From: N Balachandran +Date: Tue, 8 Aug 2017 23:11:10 +0530 +Subject: [PATCH 418/444] cli: correct rebalance status elapsed check + +Check that elapsed time has crossed 10 mins for at least +one rebalance process before displaying the estimates. + +Upstream patch: https://review.gluster.org/#/c/glusterfs/+/18000/ + +> Change-Id: Ib357a6f0d0125a178e94ede1e31514fdc6ce3593 +> BUG: 1479528 +> Signed-off-by: N Balachandran + +Change-Id: Ic4606acad991b9369c6b674691e0ec15621c6932 +BUG: 1479446 +Signed-off-by: N Balachandran +Reviewed-on: https://code.engineering.redhat.com/gerrit/154929 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + cli/src/cli-rpc-ops.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c +index 10f772c..5623950 100644 +--- a/cli/src/cli-rpc-ops.c ++++ b/cli/src/cli-rpc-ops.c +@@ -1616,6 +1616,7 @@ gf_cli_print_rebalance_status (dict_t *dict, enum gf_task_types task_type, + gf_boolean_t down = _gf_false; + gf_boolean_t fix_layout = _gf_false; + uint64_t max_time = 0; ++ uint64_t max_elapsed = 0; + uint64_t time_left = 0; + gf_boolean_t show_estimates = _gf_false; + +@@ -1758,6 +1759,9 @@ gf_cli_print_rebalance_status (dict_t *dict, enum gf_task_types task_type, + gf_log ("cli", GF_LOG_TRACE, + "failed to get time left"); + ++ if (elapsed > max_elapsed) ++ max_elapsed = elapsed; ++ + if (time_left > max_time) + max_time = time_left; + +@@ -1818,7 +1822,7 @@ gf_cli_print_rebalance_status (dict_t *dict, enum gf_task_types task_type, + if (!show_estimates) { + goto out; + } +- if (elapsed <= REBAL_ESTIMATE_START_TIME) { ++ if (max_elapsed <= REBAL_ESTIMATE_START_TIME) { + cli_out ("The estimated time for rebalance to complete " + "will be unavailable for the first 10 " + "minutes."); +-- +1.8.3.1 + diff --git a/SOURCES/0419-glusterfs-During-reconfigure-set-log-level-per-xlato.patch b/SOURCES/0419-glusterfs-During-reconfigure-set-log-level-per-xlato.patch new file mode 100644 index 0000000..757266c --- /dev/null +++ b/SOURCES/0419-glusterfs-During-reconfigure-set-log-level-per-xlato.patch @@ -0,0 +1,84 @@ +From 56fb13d05cb4465c14cc231bab1296a48c33c57d Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Tue, 6 Nov 2018 09:06:34 +0530 +Subject: [PATCH 419/444] glusterfs: During reconfigure set log-level per + xlator level + +Problem: In brick_mux environment, while a user has enabled brick-log-level + for anyone volume, it automatically enables for other volumes + also those are attached with same brick. + +Solution: A log-level option is automatically enabled for other volumes + because log-level saved in glusterfsd_ctx and ctx is common for + volumes those are attached with same brick. To resolve it + set log level for all children xlator's at the time of the graph + reconfigure at io-stat xlator. + +> Change-Id: Id9a6efa05d286e0bea2d47f49292d084e7bb2fcf +> fixes: bz#1640495 +> (Reviwed on upstream link https://review.gluster.org/#/c/glusterfs/+/20488/) +> (Cherry pick from commit c34e4161f3cb6539ec83a9020f3d27eb4759a975) + +Change-Id: I1dd57c52997f16e8a05f982c6c05bb4f758e8bd3 +BUG: 1598407 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/155021 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/debug/io-stats/src/io-stats.c | 32 +++++++++++++++++++++++++++++++- + 1 file changed, 31 insertions(+), 1 deletion(-) + +diff --git a/xlators/debug/io-stats/src/io-stats.c b/xlators/debug/io-stats/src/io-stats.c +index 0f71334..aade097 100644 +--- a/xlators/debug/io-stats/src/io-stats.c ++++ b/xlators/debug/io-stats/src/io-stats.c +@@ -3812,6 +3812,35 @@ ios_set_log_format_code (struct ios_conf *conf) + conf->dump_format = IOS_DUMP_TYPE_SAMPLES; + } + ++void ++xlator_set_loglevel(xlator_t *this, int log_level) ++{ ++ glusterfs_ctx_t *ctx = NULL; ++ glusterfs_graph_t *active = NULL; ++ xlator_t *top = NULL; ++ xlator_t *trav = this; ++ ++ ctx = this->ctx; ++ GF_ASSERT(ctx); ++ active = ctx->active; ++ top = active->first; ++ ++ if (strcmp(top->type, "protocol/server") || (log_level == -1)) ++ return; ++ ++ /* Set log-level for server xlator */ ++ top->loglevel = log_level; ++ ++ /* Set log-level for parent xlator */ ++ if (this->parents) ++ this->parents->xlator->loglevel = log_level; ++ ++ while (trav) { ++ trav->loglevel = log_level; ++ trav = trav->next; ++ } ++} ++ + int + reconfigure (xlator_t *this, dict_t *options) + { +@@ -3867,7 +3896,8 @@ reconfigure (xlator_t *this, dict_t *options) + GF_OPTION_RECONF ("log-level", log_str, options, str, out); + if (log_str) { + log_level = glusterd_check_log_level (log_str); +- gf_log_set_loglevel (log_level); ++ /* Set loglevel for all children and server xlators */ ++ xlator_set_loglevel(this, log_level); + } + + GF_OPTION_RECONF ("logger", logger_str, options, str, out); +-- +1.8.3.1 + diff --git a/SOURCES/0420-Modify-log-message-DH-ciphers-are-disabled-from-ERRO.patch b/SOURCES/0420-Modify-log-message-DH-ciphers-are-disabled-from-ERRO.patch new file mode 100644 index 0000000..71e539c --- /dev/null +++ b/SOURCES/0420-Modify-log-message-DH-ciphers-are-disabled-from-ERRO.patch @@ -0,0 +1,40 @@ +From 9a6ad46e3d7ae9ac683ef790c12937fee8f1143c Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Tue, 6 Nov 2018 09:31:50 +0530 +Subject: [PATCH 420/444] Modify log message 'DH ciphers are disabled' from + ERROR to INFO + +Per the latest comment in bz#1398237 this message is confusing for users +because it suggests an error where none exists. + +> Fixes: bz#1626319 +> Change-Id: I2f05999da157b11e225bf3d95edb597e964f9923 +> Signed-off-by: Omar Kohl +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/21108/) + +Change-Id: I154cdd6e33e17d426bcba10fe17fceceba047b16 +BUG: 1632563 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/155023 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + rpc/rpc-transport/socket/src/socket.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c +index 243d49c..8a08177 100644 +--- a/rpc/rpc-transport/socket/src/socket.c ++++ b/rpc/rpc-transport/socket/src/socket.c +@@ -4378,7 +4378,7 @@ socket_init (rpc_transport_t *this) + #endif + + if ((bio = BIO_new_file(dh_param, "r")) == NULL) { +- gf_log(this->name,GF_LOG_ERROR, ++ gf_log(this->name, GF_LOG_INFO, + "failed to open %s, " + "DH ciphers are disabled", dh_param); + } +-- +1.8.3.1 + diff --git a/SOURCES/0421-rpc-handle-EAGAIN-when-SSL_ERROR_SYSCALL-is-returned.patch b/SOURCES/0421-rpc-handle-EAGAIN-when-SSL_ERROR_SYSCALL-is-returned.patch new file mode 100644 index 0000000..623fd14 --- /dev/null +++ b/SOURCES/0421-rpc-handle-EAGAIN-when-SSL_ERROR_SYSCALL-is-returned.patch @@ -0,0 +1,66 @@ +From ce2c9ea016ffa20bf291264a012cc14102040900 Mon Sep 17 00:00:00 2001 +From: Milind Changire +Date: Mon, 10 Sep 2018 13:48:18 +0530 +Subject: [PATCH 421/444] rpc: handle EAGAIN when SSL_ERROR_SYSCALL is returned + +Problem: +A return value of ENODATA was forcibly returned in the case where +SSL_get_error(r) returned SSL_ERROR_SYSCALL. Sometimes SSL_ERROR_SYSCALL +is a transient error which is identified by setting errno to EAGAIN. +EAGAIN is not a fatal error and indicates that the syscall needs to be +retried. + +Solution: +Bubble up the errno in case SSL_get_error(r) returns SSL_ERROR_SYSCALL +and let the upper layers handle it appropriately. + +mainline: +> Reviewed-on: https://review.gluster.org/c/glusterfs/+/20993 +> fixes: bz#1622405 +> Change-Id: I76eff278378930ee79abbf9fa267a7e77356eed6 +> BUG: 1622405 + +Change-Id: I76eff278378930ee79abbf9fa267a7e77356eed6 +BUG: 1622308 +Signed-off-by: Milind Changire +Reviewed-on: https://code.engineering.redhat.com/gerrit/154868 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + rpc/rpc-transport/socket/src/socket.c | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c +index 8a08177..34a937f 100644 +--- a/rpc/rpc-transport/socket/src/socket.c ++++ b/rpc/rpc-transport/socket/src/socket.c +@@ -209,6 +209,7 @@ ssl_do (rpc_transport_t *this, void *buf, size_t len, SSL_trinary_func *func) + int r = (-1); + struct pollfd pfd = {-1,}; + socket_private_t *priv = NULL; ++ int myerrno = -1; + + GF_VALIDATE_OR_GOTO(this->name,this->private,out); + priv = this->private; +@@ -276,10 +277,16 @@ ssl_do (rpc_transport_t *this, void *buf, size_t len, SSL_trinary_func *func) + } + break; + case SSL_ERROR_SYSCALL: ++ myerrno = errno; + /* This is what we get when remote disconnects. */ + gf_log(this->name,GF_LOG_DEBUG, +- "syscall error (probably remote disconnect)"); +- errno = ENODATA; ++ "syscall error (probably remote disconnect)" ++ " errno:%d(%s)", errno, strerror(errno)); ++ /* sometimes, errno is set to EAGAIN in this case ++ * so let the upper layers do what they need to do ++ * with it ++ */ ++ errno = myerrno; + goto out; + default: + errno = EIO; +-- +1.8.3.1 + diff --git a/SOURCES/0422-glusterd-raise-default-transport.listen-backlog.patch b/SOURCES/0422-glusterd-raise-default-transport.listen-backlog.patch new file mode 100644 index 0000000..dd95bc3 --- /dev/null +++ b/SOURCES/0422-glusterd-raise-default-transport.listen-backlog.patch @@ -0,0 +1,46 @@ +From ccac7336bb6fa667b4f9b51426440d898ff3d184 Mon Sep 17 00:00:00 2001 +From: Milind Changire +Date: Mon, 5 Nov 2018 19:38:08 +0530 +Subject: [PATCH 422/444] glusterd: raise default transport.listen-backlog + +Problem: +data center setups with large number of bricks with replication +causes a flood of connections from bricks and self-heal daemons +to glusterd causing connections to be dropped due to insufficient +listener socket backlog queue length + +Solution: +raise default value of transport.listen-backlog to 1024 + +mainline: +> Reviewed-on: https://review.gluster.org/c/glusterfs/+/21482 +> Change-Id: I879e4161a88f1e30875046dff232499a8e2e6c51 +> fixes: bz#1642850 +> Signed-off-by: Milind Changire + +Change-Id: I879e4161a88f1e30875046dff232499a8e2e6c51 +BUG: 1642854 +Signed-off-by: Milind Changire +Reviewed-on: https://code.engineering.redhat.com/gerrit/154959 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/glusterfs.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h +index d71a9c1..5e641fd 100644 +--- a/libglusterfs/src/glusterfs.h ++++ b/libglusterfs/src/glusterfs.h +@@ -73,7 +73,7 @@ + #endif + + #define GLUSTERD_MAX_SNAP_NAME 255 +-#define GLUSTERFS_SOCKET_LISTEN_BACKLOG 10 ++#define GLUSTERFS_SOCKET_LISTEN_BACKLOG 1024 + #define ZR_MOUNTPOINT_OPT "mountpoint" + #define ZR_ATTR_TIMEOUT_OPT "attribute-timeout" + #define ZR_ENTRY_TIMEOUT_OPT "entry-timeout" +-- +1.8.3.1 + diff --git a/SOURCES/0423-glusterd-acquire-lock-to-update-volinfo-structure.patch b/SOURCES/0423-glusterd-acquire-lock-to-update-volinfo-structure.patch new file mode 100644 index 0000000..8b23e30 --- /dev/null +++ b/SOURCES/0423-glusterd-acquire-lock-to-update-volinfo-structure.patch @@ -0,0 +1,150 @@ +From 216ac7a1bd22db08cc02d7b8688a3338e78c71cd Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde +Date: Tue, 11 Sep 2018 14:19:42 +0530 +Subject: [PATCH 423/444] glusterd: acquire lock to update volinfo structure + +Problem: With commit cb0339f92, we are using a separate syntask +for restart_bricks. There can be a situation where two threads +are accessing the same volinfo structure at the same time and +updating volinfo structure. This can lead volinfo to have +inconsistent values and assertion failures because of unexpected +values. + +Solution: While updating the volinfo structure, acquire a +store_volinfo_lock, and release the lock only when the thread +completed its critical section part. + +> Fixes: bz#1627610 +> Signed-off-by: Sanju Rakonde +> Change-Id: I545e4e2368e3285d8f7aa28081ff4448abb72f5d + +upstream patch: https://review.gluster.org/#/c/glusterfs/+/21150/ + +Change-Id: I545e4e2368e3285d8f7aa28081ff4448abb72f5d +BUG: 1631418 +Signed-off-by: Sanju Rakonde +Reviewed-on: https://code.engineering.redhat.com/gerrit/154885 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + xlators/mgmt/glusterd/src/glusterd-store.c | 67 +++++++++++++------------ + xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 2 + + xlators/mgmt/glusterd/src/glusterd.h | 3 ++ + 3 files changed, 40 insertions(+), 32 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c +index 015f6c2..37542e7 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-store.c ++++ b/xlators/mgmt/glusterd/src/glusterd-store.c +@@ -1796,46 +1796,49 @@ glusterd_store_volinfo (glusterd_volinfo_t *volinfo, glusterd_volinfo_ver_ac_t a + + GF_ASSERT (volinfo); + +- glusterd_perform_volinfo_version_action (volinfo, ac); +- ret = glusterd_store_create_volume_dir (volinfo); +- if (ret) +- goto out; +- +- ret = glusterd_store_create_volume_run_dir (volinfo); +- if (ret) +- goto out; ++ pthread_mutex_lock(&volinfo->store_volinfo_lock); ++ { ++ glusterd_perform_volinfo_version_action(volinfo, ac); ++ ret = glusterd_store_create_volume_dir(volinfo); ++ if (ret) ++ goto unlock; + +- ret = glusterd_store_create_vol_shandle_on_absence (volinfo); +- if (ret) +- goto out; ++ ret = glusterd_store_create_volume_run_dir(volinfo); ++ if (ret) ++ goto unlock; + +- ret = glusterd_store_create_nodestate_sh_on_absence (volinfo); +- if (ret) +- goto out; ++ ret = glusterd_store_create_vol_shandle_on_absence(volinfo); ++ if (ret) ++ goto unlock; + +- ret = glusterd_store_perform_volume_store (volinfo); +- if (ret) +- goto out; ++ ret = glusterd_store_create_nodestate_sh_on_absence(volinfo); ++ if (ret) ++ goto unlock; + +- ret = glusterd_store_volume_atomic_update (volinfo); +- if (ret) { +- glusterd_perform_volinfo_version_action (volinfo, +- GLUSTERD_VOLINFO_VER_AC_DECREMENT); +- goto out; +- } ++ ret = glusterd_store_perform_volume_store(volinfo); ++ if (ret) ++ goto unlock; + +- ret = glusterd_store_perform_node_state_store (volinfo); +- if (ret) +- goto out; ++ ret = glusterd_store_volume_atomic_update(volinfo); ++ if (ret) { ++ glusterd_perform_volinfo_version_action(volinfo, ++ GLUSTERD_VOLINFO_VER_AC_DECREMENT); ++ goto unlock; ++ } + +- /* checksum should be computed at the end */ +- ret = glusterd_compute_cksum (volinfo, _gf_false); +- if (ret) +- goto out; ++ ret = glusterd_store_perform_node_state_store(volinfo); ++ if (ret) ++ goto unlock; + +-out: ++ /* checksum should be computed at the end */ ++ ret = glusterd_compute_cksum(volinfo, _gf_false); ++ if (ret) ++ goto unlock; ++ } ++unlock: ++ pthread_mutex_unlock(&volinfo->store_volinfo_lock); + if (ret) +- glusterd_store_volume_cleanup_tmp (volinfo); ++ glusterd_store_volume_cleanup_tmp(volinfo); + + gf_msg_debug (THIS->name, 0, "Returning %d", ret); + +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +index 87b7acc..b91a516 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +@@ -2198,6 +2198,8 @@ glusterd_op_create_volume (dict_t *dict, char **op_errstr) + goto out; + } + ++ pthread_mutex_init(&volinfo->store_volinfo_lock, NULL); ++ + ret = dict_get_str (dict, "volname", &volname); + + if (ret) { +diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h +index 8c70d48..edd41aa 100644 +--- a/xlators/mgmt/glusterd/src/glusterd.h ++++ b/xlators/mgmt/glusterd/src/glusterd.h +@@ -478,6 +478,9 @@ struct glusterd_volinfo_ { + gf_boolean_t stage_deleted; /* volume has passed staging + * for delete operation + */ ++ pthread_mutex_t store_volinfo_lock; /* acquire lock for ++ * updating the volinfo ++ */ + }; + + typedef enum gd_snap_status_ { +-- +1.8.3.1 + diff --git a/SOURCES/0424-cluster-afr-Delegate-metadata-heal-with-pending-xatt.patch b/SOURCES/0424-cluster-afr-Delegate-metadata-heal-with-pending-xatt.patch new file mode 100644 index 0000000..2584cb4 --- /dev/null +++ b/SOURCES/0424-cluster-afr-Delegate-metadata-heal-with-pending-xatt.patch @@ -0,0 +1,272 @@ +From 68b0db385ce968547349b187222b9a9401faee12 Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K +Date: Mon, 27 Aug 2018 11:46:33 +0530 +Subject: [PATCH 424/444] cluster/afr: Delegate metadata heal with pending + xattrs to SHD + +Problem: +When metadata-self-heal is triggered on the mount, it blocks +lookup until metadata-self-heal completes. But that can lead +to hangs when lot of clients are accessing a directory which +needs metadata heal and all of them trigger heals waiting +for other clients to complete heal. + +Fix: +Only when the heal is needed but the pending xattrs are not set, +trigger metadata heal that could block lookup. This is the only +case where different clients may give different metadata to the +clients without heals, which should be avoided. + +BUG: 1619357 +Upstream Patch: https://review.gluster.org/c/glusterfs/+/21086 +Change-Id: I6089e9fda0770a83fb287941b229c882711f4e66 +Signed-off-by: Pranith Kumar K +Reviewed-on: https://code.engineering.redhat.com/gerrit/155028 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + tests/basic/afr/client-side-heal.t | 28 ++++++++++------ + tests/bugs/glusterfs/bug-906646.t | 10 ++++-- + xlators/cluster/afr/src/afr-common.c | 44 ++++++++++++++++++++++++++ + xlators/cluster/afr/src/afr-self-heal-common.c | 38 ---------------------- + xlators/cluster/afr/src/afr.h | 3 ++ + 5 files changed, 72 insertions(+), 51 deletions(-) + +diff --git a/tests/basic/afr/client-side-heal.t b/tests/basic/afr/client-side-heal.t +index eba7dc2..1e93361 100755 +--- a/tests/basic/afr/client-side-heal.t ++++ b/tests/basic/afr/client-side-heal.t +@@ -17,6 +17,7 @@ TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; + echo "some data" > $M0/datafile + EXPECT 0 echo $? + TEST touch $M0/mdatafile ++TEST touch $M0/mdatafile-backend-direct-modify + TEST mkdir $M0/dir + + #Kill a brick and perform I/O to have pending heals. +@@ -29,6 +30,7 @@ EXPECT 0 echo $? + + #pending metadata heal + TEST chmod +x $M0/mdatafile ++TEST chmod +x $B0/${V0}0/mdatafile-backend-direct-modify + + #pending entry heal. Also causes pending metadata/data heals on file{1..5} + TEST touch $M0/dir/file{1..5} +@@ -40,9 +42,12 @@ TEST $CLI volume start $V0 force + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 + + #Medatada heal via explicit lookup must not happen +-TEST ls $M0/mdatafile ++TEST getfattr -d -m. -e hex $M0/mdatafile ++TEST ls $M0/mdatafile-backend-direct-modify + +-#Inode refresh must not trigger data and entry heals. ++TEST [[ "$(stat -c %A $B0/${V0}0/mdatafile-backend-direct-modify)" != "$(stat -c %A $B0/${V0}1/mdatafile-backend-direct-modify)" ]] ++ ++#Inode refresh must not trigger data metadata and entry heals. + #To trigger inode refresh for sure, the volume is unmounted and mounted each time. + #Check that data heal does not happen. + EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +@@ -52,7 +57,6 @@ TEST cat $M0/datafile + EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 + TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; + TEST ls $M0/dir +- + #No heal must have happened + EXPECT 8 get_pending_heal_count $V0 + +@@ -61,21 +65,25 @@ TEST $CLI volume set $V0 cluster.data-self-heal on + TEST $CLI volume set $V0 cluster.metadata-self-heal on + TEST $CLI volume set $V0 cluster.entry-self-heal on + +-#Metadata heal is triggered by lookup without need for inode refresh. +-TEST ls $M0/mdatafile +-EXPECT 7 get_pending_heal_count $V0 +- +-#Inode refresh must trigger data and entry heals. ++#Inode refresh must trigger data metadata and entry heals. + #To trigger inode refresh for sure, the volume is unmounted and mounted each time. + EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 + TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; ++TEST ls $M0/mdatafile-backend-direct-modify ++ ++TEST [[ "$(stat -c %A $B0/${V0}0/mdatafile-backend-direct-modify)" == "$(stat -c %A $B0/${V0}1/mdatafile-backend-direct-modify)" ]] ++ ++ ++TEST getfattr -d -m. -e hex $M0/mdatafile ++EXPECT_WITHIN $HEAL_TIMEOUT 7 get_pending_heal_count $V0 ++ + TEST cat $M0/datafile + EXPECT_WITHIN $HEAL_TIMEOUT 6 get_pending_heal_count $V0 + + EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 + TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; + TEST ls $M0/dir +-EXPECT 5 get_pending_heal_count $V0 ++EXPECT_WITHIN $HEAL_TIMEOUT 5 get_pending_heal_count $V0 + + TEST cat $M0/dir/file1 + TEST cat $M0/dir/file2 +@@ -83,5 +91,5 @@ TEST cat $M0/dir/file3 + TEST cat $M0/dir/file4 + TEST cat $M0/dir/file5 + +-EXPECT 0 get_pending_heal_count $V0 ++EXPECT_WITHIN $HEAL_TIMEOUT 0 get_pending_heal_count $V0 + cleanup; +diff --git a/tests/bugs/glusterfs/bug-906646.t b/tests/bugs/glusterfs/bug-906646.t +index 45c85d9..37b8fe5 100644 +--- a/tests/bugs/glusterfs/bug-906646.t ++++ b/tests/bugs/glusterfs/bug-906646.t +@@ -13,7 +13,6 @@ TEST pidof glusterd + TEST $CLI volume create $V0 replica $REPLICA $H0:$B0/${V0}-00 $H0:$B0/${V0}-01 $H0:$B0/${V0}-10 $H0:$B0/${V0}-11 + TEST $CLI volume start $V0 + +-TEST $CLI volume set $V0 cluster.self-heal-daemon off + TEST $CLI volume set $V0 cluster.background-self-heal-count 0 + + ## Mount FUSE with caching disabled +@@ -82,10 +81,15 @@ EXPECT 1 xattr_query_check ${backend_paths_array[1]} "trusted.name" + # restart the brick process + TEST $CLI volume start $V0 force + +-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 `expr $brick_id - 1` ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 3 + +-cat $pth >/dev/null ++TEST $CLI volume heal $V0 + ++EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0 + # check backends - xattr should not be present anywhere + EXPECT 1 xattr_query_check ${backend_paths_array[0]} "trusted.name" + EXPECT 1 xattr_query_check ${backend_paths_array[1]} "trusted.name" +diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c +index e8107c9..e74fdec 100644 +--- a/xlators/cluster/afr/src/afr-common.c ++++ b/xlators/cluster/afr/src/afr-common.c +@@ -2571,6 +2571,42 @@ out: + return 0; + } + ++gf_boolean_t ++afr_is_pending_set (xlator_t *this, dict_t *xdata, int type) ++{ ++ int idx = -1; ++ afr_private_t *priv = NULL; ++ void *pending_raw = NULL; ++ int *pending_int = NULL; ++ int i = 0; ++ ++ priv = this->private; ++ idx = afr_index_for_transaction_type (type); ++ ++ if (dict_get_ptr (xdata, AFR_DIRTY, &pending_raw) == 0) { ++ if (pending_raw) { ++ pending_int = pending_raw; ++ ++ if (ntoh32 (pending_int[idx])) ++ return _gf_true; ++ } ++ } ++ ++ for (i = 0; i < priv->child_count; i++) { ++ if (dict_get_ptr (xdata, priv->pending_key[i], ++ &pending_raw)) ++ continue; ++ if (!pending_raw) ++ continue; ++ pending_int = pending_raw; ++ ++ if (ntoh32 (pending_int[idx])) ++ return _gf_true; ++ } ++ ++ return _gf_false; ++} ++ + static gf_boolean_t + afr_can_start_metadata_self_heal(call_frame_t *frame, xlator_t *this) + { +@@ -2597,6 +2633,14 @@ afr_can_start_metadata_self_heal(call_frame_t *frame, xlator_t *this) + continue; + } + ++ if (afr_is_pending_set (this, replies[i].xdata, ++ AFR_METADATA_TRANSACTION)) { ++ /* Let shd do the heal so that lookup is not blocked ++ * on getting metadata lock/doing the heal */ ++ start = _gf_false; ++ break; ++ } ++ + if (gf_uuid_compare (stbuf.ia_gfid, replies[i].poststat.ia_gfid)) { + start = _gf_false; + break; +diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c +index d04f11d..c6ee75b 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-common.c ++++ b/xlators/cluster/afr/src/afr-self-heal-common.c +@@ -2182,44 +2182,6 @@ afr_selfheal_unentrylk (call_frame_t *frame, xlator_t *this, inode_t *inode, + return 0; + } + +- +-gf_boolean_t +-afr_is_pending_set (xlator_t *this, dict_t *xdata, int type) +-{ +- int idx = -1; +- afr_private_t *priv = NULL; +- void *pending_raw = NULL; +- int *pending_int = NULL; +- int i = 0; +- +- priv = this->private; +- idx = afr_index_for_transaction_type (type); +- +- if (dict_get_ptr (xdata, AFR_DIRTY, &pending_raw) == 0) { +- if (pending_raw) { +- pending_int = pending_raw; +- +- if (ntoh32 (pending_int[idx])) +- return _gf_true; +- } +- } +- +- for (i = 0; i < priv->child_count; i++) { +- if (dict_get_ptr (xdata, priv->pending_key[i], +- &pending_raw)) +- continue; +- if (!pending_raw) +- continue; +- pending_int = pending_raw; +- +- if (ntoh32 (pending_int[idx])) +- return _gf_true; +- } +- +- return _gf_false; +-} +- +- + gf_boolean_t + afr_is_data_set (xlator_t *this, dict_t *xdata) + { +diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h +index 2e6d995..af9dbc8 100644 +--- a/xlators/cluster/afr/src/afr.h ++++ b/xlators/cluster/afr/src/afr.h +@@ -1225,4 +1225,7 @@ afr_set_inode_local (xlator_t *this, afr_local_t *local, inode_t *inode); + + gf_boolean_t + afr_is_symmetric_error (call_frame_t *frame, xlator_t *this); ++ ++gf_boolean_t ++afr_is_pending_set (xlator_t *this, dict_t *xdata, int type); + #endif /* __AFR_H__ */ +-- +1.8.3.1 + diff --git a/SOURCES/0425-cluster-afr-Delegate-name-heal-when-possible.patch b/SOURCES/0425-cluster-afr-Delegate-name-heal-when-possible.patch new file mode 100644 index 0000000..193538e --- /dev/null +++ b/SOURCES/0425-cluster-afr-Delegate-name-heal-when-possible.patch @@ -0,0 +1,352 @@ +From 8a3c0fb64c8798ecf5a3635fe0922e3cfd476817 Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K +Date: Mon, 27 Aug 2018 12:40:16 +0530 +Subject: [PATCH 425/444] cluster/afr: Delegate name-heal when possible + +Problem: +When name-self-heal is triggered on the mount, it blocks +lookup until name-self-heal completes. But that can lead +to hangs when lot of clients are accessing a directory which +needs name heal and all of them trigger heals waiting +for other clients to complete heal. + +Fix: +When a name-heal is needed but quorum number of names have the +file and pending xattrs exist on the parent, then better to +delegate the heal to SHD which will be completed as part of +entry-heal of the parent directory. We could also do the same +for quorum-number of names not present but we don't have +any known use-case where this is a frequent occurrence so +not changing that part at the moment. When there is a gfid +mismatch or missing gfid it is important to complete the heal +so that next rename doesn't assume everything is fine and +perform a rename etc + +BUG: 1619357 +Upstream Patch: https://review.gluster.org/c/glusterfs/+/21087 +Change-Id: I8b002c85dffc6eb6f2833e742684a233daefeb2c +Signed-off-by: Pranith Kumar K +Reviewed-on: https://code.engineering.redhat.com/gerrit/155029 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + tests/afr.rc | 8 ++ + tests/basic/afr/name-self-heal.t | 112 +++++++++++++++++++++++++++ + xlators/cluster/afr/src/afr-common.c | 100 ++++++++++++++++++------ + xlators/cluster/afr/src/afr-self-heal-name.c | 12 ++- + 4 files changed, 205 insertions(+), 27 deletions(-) + create mode 100644 tests/basic/afr/name-self-heal.t + +diff --git a/tests/afr.rc b/tests/afr.rc +index 1fd0310..a1e8a44 100644 +--- a/tests/afr.rc ++++ b/tests/afr.rc +@@ -89,3 +89,11 @@ function count_index_entries() + { + ls $1/.glusterfs/indices/xattrop | wc -l + } ++ ++function get_quorum_type() ++{ ++ local m="$1" ++ local v="$2" ++ local repl_id="$3" ++ cat $m/.meta/graphs/active/$v-replicate-$repl_id/private|grep quorum-type|awk '{print $3}' ++} +diff --git a/tests/basic/afr/name-self-heal.t b/tests/basic/afr/name-self-heal.t +new file mode 100644 +index 0000000..50fc2ec +--- /dev/null ++++ b/tests/basic/afr/name-self-heal.t +@@ -0,0 +1,112 @@ ++#!/bin/bash ++#Self-heal tests ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../afr.rc ++cleanup; ++ ++#Check that when quorum is not enabled name-heal happens correctly ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1} ++TEST $CLI volume set $V0 performance.stat-prefetch off ++TEST $CLI volume start $V0 ++TEST $CLI volume heal $V0 disable ++TEST $CLI volume set $V0 cluster.entry-self-heal off ++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; ++ ++TEST touch $M0/a ++TEST touch $M0/c ++TEST kill_brick $V0 $H0 $B0/brick0 ++TEST touch $M0/b ++TEST rm -f $M0/a ++TEST rm -f $M0/c ++TEST touch $M0/c #gfid mismatch case ++c_gfid=$(gf_get_gfid_xattr $B0/brick1/c) ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++TEST ! stat $M0/a ++TEST ! stat $B0/brick0/a ++TEST ! stat $B0/brick1/a ++ ++TEST stat $M0/b ++TEST stat $B0/brick0/b ++TEST stat $B0/brick1/b ++TEST [[ "$(gf_get_gfid_xattr $B0/brick0/b)" == "$(gf_get_gfid_xattr $B0/brick1/b)" ]] ++ ++TEST stat $M0/c ++TEST stat $B0/brick0/c ++TEST stat $B0/brick1/c ++TEST [[ "$(gf_get_gfid_xattr $B0/brick0/c)" == "$c_gfid" ]] ++ ++cleanup; ++ ++#Check that when quorum is enabled name-heal happens as expected ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 replica 3 $H0:$B0/brick{0,1,2} ++TEST $CLI volume set $V0 performance.stat-prefetch off ++TEST $CLI volume start $V0 ++TEST $CLI volume heal $V0 disable ++TEST $CLI volume set $V0 cluster.entry-self-heal off ++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; ++ ++TEST touch $M0/a ++TEST touch $M0/c ++TEST kill_brick $V0 $H0 $B0/brick0 ++TEST touch $M0/b ++TEST rm -f $M0/a ++TEST rm -f $M0/c ++TEST touch $M0/c #gfid mismatch case ++c_gfid=$(gf_get_gfid_xattr $B0/brick1/c) ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++TEST ! stat $M0/a ++TEST ! stat $B0/brick0/a ++TEST ! stat $B0/brick1/a ++TEST ! stat $B0/brick2/a ++ ++TEST stat $M0/b ++TEST ! stat $B0/brick0/b #Name heal shouldn't be triggered ++TEST stat $B0/brick1/b ++TEST stat $B0/brick2/b ++ ++TEST stat $M0/c ++TEST stat $B0/brick0/c ++TEST stat $B0/brick1/c ++TEST stat $B0/brick2/c ++TEST [[ "$(gf_get_gfid_xattr $B0/brick0/c)" == "$c_gfid" ]] ++ ++TEST $CLI volume set $V0 cluster.quorum-type none ++EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "none" get_quorum_type $M0 $V0 0 ++TEST stat $M0/b ++TEST stat $B0/brick0/b #Name heal should be triggered ++TEST stat $B0/brick1/b ++TEST stat $B0/brick2/b ++TEST [[ "$(gf_get_gfid_xattr $B0/brick0/b)" == "$(gf_get_gfid_xattr $B0/brick1/b)" ]] ++TEST $CLI volume set $V0 cluster.quorum-type auto ++EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "auto" get_quorum_type $M0 $V0 0 ++ ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++ ++#Missing parent xattrs cases ++TEST $CLI volume heal $V0 enable ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 ++TEST $CLI volume heal $V0 ++EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0 ++ ++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; ++TEST $CLI volume heal $V0 disable ++#In cases where a good parent doesn't have pending xattrs and a file, ++#name-heal will be triggered ++TEST gf_rm_file_and_gfid_link $B0/brick1 c ++TEST stat $M0/c ++TEST stat $B0/brick0/c ++TEST stat $B0/brick1/c ++TEST stat $B0/brick2/c ++TEST [[ "$(gf_get_gfid_xattr $B0/brick0/c)" == "$c_gfid" ]] ++cleanup +diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c +index e74fdec..ce2b17a 100644 +--- a/xlators/cluster/afr/src/afr-common.c ++++ b/xlators/cluster/afr/src/afr-common.c +@@ -2302,8 +2302,6 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this) + */ + for (i = 0; i < priv->child_count; i++) { + if (!replies[i].valid || replies[i].op_ret == -1) { +- if (priv->child_up[i]) +- can_interpret = _gf_false; + continue; + } + +@@ -2742,21 +2740,52 @@ afr_lookup_entry_heal (call_frame_t *frame, xlator_t *this) + afr_private_t *priv = NULL; + call_frame_t *heal = NULL; + int i = 0, first = -1; +- gf_boolean_t need_heal = _gf_false; ++ gf_boolean_t name_state_mismatch = _gf_false; + struct afr_reply *replies = NULL; + int ret = 0; ++ unsigned char *par_readables = NULL; ++ unsigned char *success = NULL; ++ int32_t op_errno = 0; ++ uuid_t gfid = {0}; + + local = frame->local; + replies = local->replies; + priv = this->private; ++ par_readables = alloca0(priv->child_count); ++ success = alloca0(priv->child_count); ++ ++ ret = afr_inode_read_subvol_get (local->loc.parent, this, par_readables, ++ NULL, NULL); ++ if (ret < 0 || AFR_COUNT (par_readables, priv->child_count) == 0) { ++ /* In this case set par_readables to all 1 so that name_heal ++ * need checks at the end of this function will flag missing ++ * entry when name state mismatches*/ ++ memset (par_readables, 1, priv->child_count); ++ } + + for (i = 0; i < priv->child_count; i++) { + if (!replies[i].valid) + continue; + ++ if (replies[i].op_ret == 0) { ++ if (uuid_is_null (gfid)) { ++ gf_uuid_copy (gfid, ++ replies[i].poststat.ia_gfid); ++ } ++ success[i] = 1; ++ } else { ++ if ((replies[i].op_errno != ENOTCONN) && ++ (replies[i].op_errno != ENOENT) && ++ (replies[i].op_errno != ESTALE)) { ++ op_errno = replies[i].op_errno; ++ } ++ } ++ ++ /*gfid is missing, needs heal*/ + if ((replies[i].op_ret == -1) && +- (replies[i].op_errno == ENODATA)) +- need_heal = _gf_true; ++ (replies[i].op_errno == ENODATA)) { ++ goto name_heal; ++ } + + if (first == -1) { + first = i; +@@ -2764,30 +2793,53 @@ afr_lookup_entry_heal (call_frame_t *frame, xlator_t *this) + } + + if (replies[i].op_ret != replies[first].op_ret) { +- need_heal = _gf_true; +- break; ++ name_state_mismatch = _gf_true; + } + +- if (gf_uuid_compare (replies[i].poststat.ia_gfid, +- replies[first].poststat.ia_gfid)) { +- need_heal = _gf_true; +- break; +- } ++ if (replies[i].op_ret == 0) { ++ /* Rename after this lookup may succeed if we don't do ++ * a name-heal and the destination may not have pending xattrs ++ * to indicate which name is good and which is bad so always do ++ * this heal*/ ++ if (gf_uuid_compare (replies[i].poststat.ia_gfid, ++ gfid)) { ++ goto name_heal; ++ } ++ } + } + +- if (need_heal) { +- heal = afr_frame_create (this, NULL); +- if (!heal) +- goto metadata_heal; +- +- ret = synctask_new (this->ctx->env, afr_lookup_selfheal_wrap, +- afr_refresh_selfheal_done, heal, frame); +- if (ret) { +- AFR_STACK_DESTROY (heal); +- goto metadata_heal; ++ if (name_state_mismatch) { ++ if (!priv->quorum_count) ++ goto name_heal; ++ if (!afr_has_quorum (success, this)) ++ goto name_heal; ++ if (op_errno) ++ goto name_heal; ++ for (i = 0; i < priv->child_count; i++) { ++ if (!replies[i].valid) ++ continue; ++ if (par_readables[i] && replies[i].op_ret < 0 && ++ replies[i].op_errno != ENOTCONN) { ++ goto name_heal; ++ } + } +- return ret; +- } ++ } ++ ++ goto metadata_heal; ++ ++name_heal: ++ heal = afr_frame_create (this, NULL); ++ if (!heal) ++ goto metadata_heal; ++ ++ ret = synctask_new (this->ctx->env, afr_lookup_selfheal_wrap, ++ afr_refresh_selfheal_done, heal, frame); ++ if (ret) { ++ AFR_STACK_DESTROY (heal); ++ goto metadata_heal; ++ } ++ return ret; ++ + metadata_heal: + ret = afr_lookup_metadata_heal_check (frame, this); + +diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c +index bcd0e60..0a5be29 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-name.c ++++ b/xlators/cluster/afr/src/afr-self-heal-name.c +@@ -634,20 +634,26 @@ afr_selfheal_name_unlocked_inspect (call_frame_t *frame, xlator_t *this, + continue; + + if ((replies[i].op_ret == -1) && +- (replies[i].op_errno == ENODATA)) ++ (replies[i].op_errno == ENODATA)) { + *need_heal = _gf_true; ++ break; ++ } + + if (first_idx == -1) { + first_idx = i; + continue; + } + +- if (replies[i].op_ret != replies[first_idx].op_ret) ++ if (replies[i].op_ret != replies[first_idx].op_ret) { + *need_heal = _gf_true; ++ break; ++ } + + if (gf_uuid_compare (replies[i].poststat.ia_gfid, +- replies[first_idx].poststat.ia_gfid)) ++ replies[first_idx].poststat.ia_gfid)) { + *need_heal = _gf_true; ++ break; ++ } + } + + if (inode) +-- +1.8.3.1 + diff --git a/SOURCES/0426-features-shard-Make-operations-on-internal-directori.patch b/SOURCES/0426-features-shard-Make-operations-on-internal-directori.patch new file mode 100644 index 0000000..aa8e5a9 --- /dev/null +++ b/SOURCES/0426-features-shard-Make-operations-on-internal-directori.patch @@ -0,0 +1,582 @@ +From 9be984ac2b71423b72ab3b1fa45b4d77a263ce1e Mon Sep 17 00:00:00 2001 +From: Krutika Dhananjay +Date: Wed, 28 Mar 2018 12:09:27 +0530 +Subject: [PATCH 426/444] features/shard: Make operations on internal + directories generic + +> Upstream: https://review.gluster.org/19892 +> BUG: 1568521 +> Change-Id: Iea7ad2102220c6d415909f8caef84167ce2d6818 + +Change-Id: Iea7ad2102220c6d415909f8caef84167ce2d6818 +BUG: 1520882 +Signed-off-by: Krutika Dhananjay +Reviewed-on: https://code.engineering.redhat.com/gerrit/154860 +Tested-by: RHGS Build Bot +Reviewed-by: Xavi Hernandez +--- + xlators/features/shard/src/shard.c | 295 +++++++++++++++++++++++++------------ + xlators/features/shard/src/shard.h | 4 + + 2 files changed, 206 insertions(+), 93 deletions(-) + +diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c +index f5fb181..5ff04df 100644 +--- a/xlators/features/shard/src/shard.c ++++ b/xlators/features/shard/src/shard.c +@@ -546,30 +546,55 @@ shard_call_count_return (call_frame_t *frame) + return call_count; + } + ++static char * ++shard_internal_dir_string (shard_internal_dir_type_t type) ++{ ++ char *str = NULL; ++ ++ switch (type) { ++ case SHARD_INTERNAL_DIR_DOT_SHARD: ++ str = ".shard"; ++ break; ++ default: ++ break; ++ } ++ return str; ++} ++ + static int +-shard_init_dot_shard_loc (xlator_t *this, shard_local_t *local) ++shard_init_internal_dir_loc (xlator_t *this, shard_local_t *local, ++ shard_internal_dir_type_t type) + { +- int ret = -1; +- loc_t *dot_shard_loc = NULL; ++ int ret = -1; ++ char *bname = NULL; ++ loc_t *internal_dir_loc = NULL; + + if (!local) + return -1; + +- dot_shard_loc = &local->dot_shard_loc; +- dot_shard_loc->inode = inode_new (this->itable); +- dot_shard_loc->parent = inode_ref (this->itable->root); +- ret = inode_path (dot_shard_loc->parent, GF_SHARD_DIR, +- (char **)&dot_shard_loc->path); +- if (ret < 0 || !(dot_shard_loc->inode)) { ++ switch (type) { ++ case SHARD_INTERNAL_DIR_DOT_SHARD: ++ internal_dir_loc = &local->dot_shard_loc; ++ bname = GF_SHARD_DIR; ++ break; ++ default: ++ break; ++ } ++ ++ internal_dir_loc->inode = inode_new (this->itable); ++ internal_dir_loc->parent = inode_ref (this->itable->root); ++ ret = inode_path (internal_dir_loc->parent, bname, ++ (char **)&internal_dir_loc->path); ++ if (ret < 0 || !(internal_dir_loc->inode)) { + gf_msg (this->name, GF_LOG_ERROR, 0, + SHARD_MSG_INODE_PATH_FAILED, +- "Inode path failed on %s", GF_SHARD_DIR); ++ "Inode path failed on %s", bname); + goto out; + } + +- dot_shard_loc->name = strrchr (dot_shard_loc->path, '/'); +- if (dot_shard_loc->name) +- dot_shard_loc->name++; ++ internal_dir_loc->name = strrchr (internal_dir_loc->path, '/'); ++ if (internal_dir_loc->name) ++ internal_dir_loc->name++; + + ret = 0; + out: +@@ -1029,28 +1054,42 @@ out: + } + + static inode_t * +-shard_link_dot_shard_inode (shard_local_t *local, inode_t *inode, +- struct iatt *buf) ++shard_link_internal_dir_inode (shard_local_t *local, inode_t *inode, ++ struct iatt *buf, shard_internal_dir_type_t type) + { + inode_t *linked_inode = NULL; + shard_priv_t *priv = NULL; ++ char *bname = NULL; ++ inode_t **priv_inode = NULL; + + priv = THIS->private; + +- linked_inode = inode_link (inode, inode->table->root, ".shard", buf); ++ switch (type) { ++ case SHARD_INTERNAL_DIR_DOT_SHARD: ++ bname = ".shard"; ++ priv_inode = &priv->dot_shard_inode; ++ break; ++ default: ++ break; ++ } ++ ++ linked_inode = inode_link (inode, inode->table->root, bname, buf); + inode_lookup (linked_inode); +- priv->dot_shard_inode = linked_inode; ++ *priv_inode = linked_inode; + return linked_inode; + } + + + int +-shard_refresh_dot_shard_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, inode_t *inode, +- struct iatt *buf, dict_t *xdata, +- struct iatt *postparent) ++shard_refresh_internal_dir_cbk (call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, ++ int32_t op_errno, inode_t *inode, ++ struct iatt *buf, dict_t *xdata, ++ struct iatt *postparent) + { +- shard_local_t *local = NULL; ++ shard_local_t *local = NULL; ++ inode_t *linked_inode = NULL; ++ shard_internal_dir_type_t type = (shard_internal_dir_type_t) cookie; + + local = frame->local; + +@@ -1061,27 +1100,37 @@ shard_refresh_dot_shard_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + } + + /* To-Do: Fix refcount increment per call to +- * shard_link_dot_shard_inode(). ++ * shard_link_internal_dir_inode(). + */ +- shard_link_dot_shard_inode (local, inode, buf); +- shard_inode_ctx_set_refreshed_flag (inode, this); ++ linked_inode = shard_link_internal_dir_inode (local, inode, buf, type); ++ shard_inode_ctx_set_refreshed_flag (linked_inode, this); + out: + shard_common_resolve_shards (frame, this, local->post_res_handler); + return 0; + } + + int +-shard_refresh_dot_shard (call_frame_t *frame, xlator_t *this) ++shard_refresh_internal_dir (call_frame_t *frame, xlator_t *this, ++ shard_internal_dir_type_t type) + { + loc_t loc = {0,}; + inode_t *inode = NULL; + shard_priv_t *priv = NULL; + shard_local_t *local = NULL; ++ uuid_t gfid = {0,}; + + local = frame->local; + priv = this->private; + +- inode = inode_find (this->itable, priv->dot_shard_gfid); ++ switch (type) { ++ case SHARD_INTERNAL_DIR_DOT_SHARD: ++ gf_uuid_copy (gfid, priv->dot_shard_gfid); ++ break; ++ default: ++ break; ++ } ++ ++ inode = inode_find (this->itable, gfid); + + if (!shard_inode_ctx_needs_lookup (inode, this)) { + local->op_ret = 0; +@@ -1092,10 +1141,11 @@ shard_refresh_dot_shard (call_frame_t *frame, xlator_t *this) + * call to inode_find() + */ + loc.inode = inode; +- gf_uuid_copy (loc.gfid, priv->dot_shard_gfid); ++ gf_uuid_copy (loc.gfid, gfid); + +- STACK_WIND (frame, shard_refresh_dot_shard_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->lookup, &loc, NULL); ++ STACK_WIND_COOKIE (frame, shard_refresh_internal_dir_cbk, ++ (void *)(long) type, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->lookup, &loc, NULL); + loc_wipe (&loc); + + return 0; +@@ -1106,13 +1156,14 @@ out: + } + + int +-shard_lookup_dot_shard_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, inode_t *inode, +- struct iatt *buf, dict_t *xdata, +- struct iatt *postparent) ++shard_lookup_internal_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, inode_t *inode, ++ struct iatt *buf, dict_t *xdata, ++ struct iatt *postparent) + { +- inode_t *link_inode = NULL; +- shard_local_t *local = NULL; ++ inode_t *link_inode = NULL; ++ shard_local_t *local = NULL; ++ shard_internal_dir_type_t type = (shard_internal_dir_type_t) cookie; + + local = frame->local; + +@@ -1124,17 +1175,17 @@ shard_lookup_dot_shard_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + + if (!IA_ISDIR (buf->ia_type)) { + gf_msg (this->name, GF_LOG_CRITICAL, 0, +- SHARD_MSG_DOT_SHARD_NODIR, "/.shard already exists and " +- "is not a directory. Please remove /.shard from all " +- "bricks and try again"); ++ SHARD_MSG_DOT_SHARD_NODIR, "%s already exists and " ++ "is not a directory. Please remove it from all bricks " ++ "and try again", shard_internal_dir_string (type)); + local->op_ret = -1; + local->op_errno = EIO; + goto unwind; + } + +- link_inode = shard_link_dot_shard_inode (local, inode, buf); ++ link_inode = shard_link_internal_dir_inode (local, inode, buf, type); + if (link_inode != inode) { +- shard_refresh_dot_shard (frame, this); ++ shard_refresh_internal_dir (frame, this, type); + } else { + shard_inode_ctx_set_refreshed_flag (link_inode, this); + shard_common_resolve_shards (frame, this, +@@ -1148,18 +1199,26 @@ unwind: + } + + int +-shard_lookup_dot_shard (call_frame_t *frame, xlator_t *this, +- shard_post_resolve_fop_handler_t post_res_handler) ++shard_lookup_internal_dir (call_frame_t *frame, xlator_t *this, ++ shard_post_resolve_fop_handler_t post_res_handler, ++ shard_internal_dir_type_t type) + { + int ret = -1; + dict_t *xattr_req = NULL; + shard_priv_t *priv = NULL; + shard_local_t *local = NULL; ++ uuid_t *gfid = NULL; ++ loc_t *loc = NULL; ++ gf_boolean_t free_gfid = _gf_true; + + local = frame->local; + priv = this->private; + local->post_res_handler = post_res_handler; + ++ gfid = GF_CALLOC (1, sizeof(uuid_t), gf_common_mt_uuid_t); ++ if (!gfid) ++ goto err; ++ + xattr_req = dict_new (); + if (!xattr_req) { + local->op_ret = -1; +@@ -1167,26 +1226,38 @@ shard_lookup_dot_shard (call_frame_t *frame, xlator_t *this, + goto err; + } + +- ret = dict_set_static_bin (xattr_req, "gfid-req", priv->dot_shard_gfid, +- 16); ++ switch (type) { ++ case SHARD_INTERNAL_DIR_DOT_SHARD: ++ gf_uuid_copy (*gfid, priv->dot_shard_gfid); ++ loc = &local->dot_shard_loc; ++ break; ++ default: ++ break; ++ } ++ ++ ret = dict_set_bin (xattr_req, "gfid-req", *gfid, 16); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_SET_FAILED, +- "Failed to set gfid of /.shard into dict"); ++ "Failed to set gfid of %s into dict", ++ shard_internal_dir_string (type)); + local->op_ret = -1; + local->op_errno = ENOMEM; + goto err; ++ } else { ++ free_gfid = _gf_false; + } + +- STACK_WIND (frame, shard_lookup_dot_shard_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->lookup, &local->dot_shard_loc, +- xattr_req); +- ++ STACK_WIND_COOKIE (frame, shard_lookup_internal_dir_cbk, ++ (void *) (long) type, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->lookup, loc, xattr_req); + dict_unref (xattr_req); + return 0; + + err: + if (xattr_req) + dict_unref (xattr_req); ++ if (free_gfid) ++ GF_FREE (gfid); + post_res_handler (frame, this); + return 0; + } +@@ -2203,14 +2274,17 @@ shard_truncate_begin (call_frame_t *frame, xlator_t *this) + local->dot_shard_loc.inode = inode_find (this->itable, + priv->dot_shard_gfid); + if (!local->dot_shard_loc.inode) { +- ret = shard_init_dot_shard_loc (this, local); ++ ret = shard_init_internal_dir_loc (this, local, ++ SHARD_INTERNAL_DIR_DOT_SHARD); + if (ret) + goto err; +- shard_lookup_dot_shard (frame, this, +- shard_post_resolve_truncate_handler); ++ shard_lookup_internal_dir (frame, this, ++ shard_post_resolve_truncate_handler, ++ SHARD_INTERNAL_DIR_DOT_SHARD); + } else { + local->post_res_handler = shard_post_resolve_truncate_handler; +- shard_refresh_dot_shard (frame, this); ++ shard_refresh_internal_dir (frame, this, ++ SHARD_INTERNAL_DIR_DOT_SHARD); + } + return 0; + +@@ -2682,14 +2756,17 @@ shard_unlink_base_file_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + local->dot_shard_loc.inode = inode_find (this->itable, + priv->dot_shard_gfid); + if (!local->dot_shard_loc.inode) { +- ret = shard_init_dot_shard_loc (this, local); ++ ret = shard_init_internal_dir_loc (this, local, ++ SHARD_INTERNAL_DIR_DOT_SHARD); + if (ret) + goto unwind; +- shard_lookup_dot_shard (frame, this, +- shard_post_resolve_unlink_handler); ++ shard_lookup_internal_dir (frame, this, ++ shard_post_resolve_unlink_handler, ++ SHARD_INTERNAL_DIR_DOT_SHARD); + } else { + local->post_res_handler = shard_post_resolve_unlink_handler; +- shard_refresh_dot_shard (frame, this); ++ shard_refresh_internal_dir (frame, this, ++ SHARD_INTERNAL_DIR_DOT_SHARD); + } + + return 0; +@@ -3048,14 +3125,17 @@ shard_rename_unlink_dst_shards_do (call_frame_t *frame, xlator_t *this) + local->dot_shard_loc.inode = inode_find (this->itable, + priv->dot_shard_gfid); + if (!local->dot_shard_loc.inode) { +- ret = shard_init_dot_shard_loc (this, local); ++ ret = shard_init_internal_dir_loc (this, local, ++ SHARD_INTERNAL_DIR_DOT_SHARD); + if (ret) + goto out; +- shard_lookup_dot_shard (frame, this, +- shard_post_resolve_unlink_handler); ++ shard_lookup_internal_dir (frame, this, ++ shard_post_resolve_unlink_handler, ++ SHARD_INTERNAL_DIR_DOT_SHARD); + } else { + local->post_res_handler = shard_post_resolve_unlink_handler; +- shard_refresh_dot_shard (frame, this); ++ shard_refresh_internal_dir (frame, this, ++ SHARD_INTERNAL_DIR_DOT_SHARD); + } + + return 0; +@@ -3811,14 +3891,17 @@ shard_post_lookup_readv_handler (call_frame_t *frame, xlator_t *this) + local->dot_shard_loc.inode = inode_find (this->itable, + priv->dot_shard_gfid); + if (!local->dot_shard_loc.inode) { +- ret = shard_init_dot_shard_loc (this, local); ++ ret = shard_init_internal_dir_loc (this, local, ++ SHARD_INTERNAL_DIR_DOT_SHARD); + if (ret) + goto err; +- shard_lookup_dot_shard (frame, this, +- shard_post_resolve_readv_handler); ++ shard_lookup_internal_dir (frame, this, ++ shard_post_resolve_readv_handler, ++ SHARD_INTERNAL_DIR_DOT_SHARD); + } else { + local->post_res_handler = shard_post_resolve_readv_handler; +- shard_refresh_dot_shard (frame, this); ++ shard_refresh_internal_dir (frame, this, ++ SHARD_INTERNAL_DIR_DOT_SHARD); + } + return 0; + +@@ -4249,8 +4332,9 @@ shard_common_inode_write_post_mknod_handler (call_frame_t *frame, + } + + int +-shard_mkdir_dot_shard (call_frame_t *frame, xlator_t *this, +- shard_post_resolve_fop_handler_t handler); ++shard_mkdir_internal_dir (call_frame_t *frame, xlator_t *this, ++ shard_post_resolve_fop_handler_t handler, ++ shard_internal_dir_type_t type); + int + shard_common_inode_write_post_resolve_handler (call_frame_t *frame, + xlator_t *this) +@@ -4323,26 +4407,28 @@ shard_common_inode_write_post_lookup_handler (call_frame_t *frame, + + if (!local->dot_shard_loc.inode) { + /*change handler*/ +- shard_mkdir_dot_shard (frame, this, +- shard_common_inode_write_post_resolve_handler); ++ shard_mkdir_internal_dir (frame, this, ++ shard_common_inode_write_post_resolve_handler, ++ SHARD_INTERNAL_DIR_DOT_SHARD); + } else { + /*change handler*/ + local->post_res_handler = + shard_common_inode_write_post_resolve_handler; +- shard_refresh_dot_shard (frame, this); ++ shard_refresh_internal_dir (frame, this, ++ SHARD_INTERNAL_DIR_DOT_SHARD); + } + return 0; + } + + int +-shard_mkdir_dot_shard_cbk (call_frame_t *frame, void *cookie, +- xlator_t *this, int32_t op_ret, +- int32_t op_errno, inode_t *inode, +- struct iatt *buf, struct iatt *preparent, +- struct iatt *postparent, dict_t *xdata) ++shard_mkdir_internal_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, inode_t *inode, ++ struct iatt *buf, struct iatt *preparent, ++ struct iatt *postparent, dict_t *xdata) + { +- inode_t *link_inode = NULL; +- shard_local_t *local = NULL; ++ inode_t *link_inode = NULL; ++ shard_local_t *local = NULL; ++ shard_internal_dir_type_t type = (shard_internal_dir_type_t) cookie; + + local = frame->local; + +@@ -4354,17 +4440,19 @@ shard_mkdir_dot_shard_cbk (call_frame_t *frame, void *cookie, + local->op_errno = op_errno; + goto unwind; + } else { +- gf_msg_debug (this->name, 0, "mkdir on /.shard failed " +- "with EEXIST. Attempting lookup now"); +- shard_lookup_dot_shard (frame, this, +- local->post_res_handler); ++ gf_msg_debug (this->name, 0, "mkdir on %s failed " ++ "with EEXIST. Attempting lookup now", ++ shard_internal_dir_string (type)); ++ shard_lookup_internal_dir (frame, this, ++ local->post_res_handler, ++ type); + return 0; + } + } + +- link_inode = shard_link_dot_shard_inode (local, inode, buf); ++ link_inode = shard_link_internal_dir_inode (local, inode, buf, type); + if (link_inode != inode) { +- shard_refresh_dot_shard (frame, this); ++ shard_refresh_internal_dir (frame, this, type); + } else { + shard_inode_ctx_set_refreshed_flag (link_inode, this); + shard_common_resolve_shards (frame, this, +@@ -4377,40 +4465,59 @@ unwind: + } + + int +-shard_mkdir_dot_shard (call_frame_t *frame, xlator_t *this, +- shard_post_resolve_fop_handler_t handler) ++shard_mkdir_internal_dir (call_frame_t *frame, xlator_t *this, ++ shard_post_resolve_fop_handler_t handler, ++ shard_internal_dir_type_t type) + { + int ret = -1; + shard_local_t *local = NULL; + shard_priv_t *priv = NULL; + dict_t *xattr_req = NULL; ++ uuid_t *gfid = NULL; ++ loc_t *loc = NULL; ++ gf_boolean_t free_gfid = _gf_true; + + local = frame->local; + priv = this->private; + + local->post_res_handler = handler; ++ gfid = GF_CALLOC (1, sizeof(uuid_t), gf_common_mt_uuid_t); ++ if (!gfid) ++ goto err; ++ ++ switch (type) { ++ case SHARD_INTERNAL_DIR_DOT_SHARD: ++ gf_uuid_copy (*gfid, priv->dot_shard_gfid); ++ loc = &local->dot_shard_loc; ++ break; ++ default: ++ break; ++ } + + xattr_req = dict_new (); + if (!xattr_req) + goto err; + +- ret = shard_init_dot_shard_loc (this, local); ++ ret = shard_init_internal_dir_loc (this, local, type); + if (ret) + goto err; + +- ret = dict_set_static_bin (xattr_req, "gfid-req", priv->dot_shard_gfid, +- 16); ++ ret = dict_set_bin (xattr_req, "gfid-req", *gfid, 16); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_SET_FAILED, +- "Failed to set gfid-req for /.shard"); ++ "Failed to set gfid-req for %s", ++ shard_internal_dir_string (type)); + goto err; ++ } else { ++ free_gfid = _gf_false; + } + + SHARD_SET_ROOT_FS_ID (frame, local); + +- STACK_WIND (frame, shard_mkdir_dot_shard_cbk, +- FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir, +- &local->dot_shard_loc, 0755, 0, xattr_req); ++ STACK_WIND_COOKIE (frame, shard_mkdir_internal_dir_cbk, ++ (void *)(long) type, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->mkdir, loc, 0755, 0, ++ xattr_req); + dict_unref (xattr_req); + return 0; + +@@ -4419,6 +4526,8 @@ err: + dict_unref (xattr_req); + local->op_ret = -1; + local->op_errno = ENOMEM; ++ if (free_gfid) ++ GF_FREE (gfid); + handler (frame, this); + return 0; + } +diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h +index 75d39a1..a1adb6a 100644 +--- a/xlators/features/shard/src/shard.h ++++ b/xlators/features/shard/src/shard.h +@@ -278,4 +278,8 @@ typedef struct shard_inode_ctx { + inode_t *base_inode; + } shard_inode_ctx_t; + ++typedef enum { ++ SHARD_INTERNAL_DIR_DOT_SHARD = 1, ++} shard_internal_dir_type_t; ++ + #endif /* __SHARD_H__ */ +-- +1.8.3.1 + diff --git a/SOURCES/0427-features-shard-Add-option-to-barrier-parallel-lookup.patch b/SOURCES/0427-features-shard-Add-option-to-barrier-parallel-lookup.patch new file mode 100644 index 0000000..282045d --- /dev/null +++ b/SOURCES/0427-features-shard-Add-option-to-barrier-parallel-lookup.patch @@ -0,0 +1,291 @@ +From 64b238d3a5caf7bdb32bca25946f84e0afe9bc7a Mon Sep 17 00:00:00 2001 +From: Krutika Dhananjay +Date: Tue, 17 Apr 2018 22:14:20 +0530 +Subject: [PATCH 427/444] features/shard: Add option to barrier parallel lookup + and unlink of shards + +> Upstream: https://review.gluster.org/19915 +> BUG: 1568521 +> Change-Id: Ib0f90a5f62abdfa89cda7bef9f3ff99f349ec332 + +Also move the common parallel unlink callback for GF_FOP_TRUNCATE and +GF_FOP_FTRUNCATE into a separate function. + +Change-Id: Ib0f90a5f62abdfa89cda7bef9f3ff99f349ec332 +BUG: 1520882 +Signed-off-by: Krutika Dhananjay +Reviewed-on: https://code.engineering.redhat.com/gerrit/154861 +Tested-by: RHGS Build Bot +Reviewed-by: Xavi Hernandez +Tested-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/features/shard/src/shard.c | 113 ++++++++++++++++++++++++++++--------- + xlators/features/shard/src/shard.h | 4 ++ + 2 files changed, 89 insertions(+), 28 deletions(-) + +diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c +index 5ff04df..268ba20 100644 +--- a/xlators/features/shard/src/shard.c ++++ b/xlators/features/shard/src/shard.c +@@ -475,6 +475,7 @@ shard_local_wipe (shard_local_t *local) + + count = local->num_blocks; + ++ syncbarrier_destroy (&local->barrier); + loc_wipe (&local->loc); + loc_wipe (&local->dot_shard_loc); + loc_wipe (&local->loc2); +@@ -861,6 +862,7 @@ shard_common_resolve_shards (call_frame_t *frame, xlator_t *this, + + priv = this->private; + local = frame->local; ++ local->call_count = 0; + shard_idx_iter = local->first_block; + res_inode = local->resolver_base_inode; + +@@ -1780,6 +1782,37 @@ shard_unlink_shards_do_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + struct iatt *preparent, struct iatt *postparent, + dict_t *xdata); + ++void ++shard_unlink_block_inode (shard_local_t *local, int shard_block_num); ++ ++int ++shard_truncate_htol_cbk (call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, ++ struct iatt *preparent, struct iatt *postparent, ++ dict_t *xdata) ++{ ++ int call_count = 0; ++ int shard_block_num = (long) cookie; ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ ++ if (op_ret < 0) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ goto done; ++ } ++ ++ shard_unlink_block_inode (local, shard_block_num); ++done: ++ call_count = shard_call_count_return (frame); ++ if (call_count == 0) { ++ SHARD_UNSET_ROOT_FS_ID (frame, local); ++ shard_truncate_last_shard (frame, this, local->inode_list[0]); ++ } ++ return 0; ++} ++ + int + shard_truncate_htol (call_frame_t *frame, xlator_t *this, inode_t *inode) + { +@@ -1839,10 +1872,9 @@ shard_truncate_htol (call_frame_t *frame, xlator_t *this, inode_t *inode) + continue; + } + if (wind_failed) { +- shard_unlink_shards_do_cbk (frame, +- (void *)(long) cur_block, +- this, -1, ENOMEM, NULL, +- NULL, NULL); ++ shard_truncate_htol_cbk (frame, (void *)(long) cur_block, ++ this, -1, ENOMEM, NULL, NULL, ++ NULL); + goto next; + } + +@@ -1860,10 +1892,9 @@ shard_truncate_htol (call_frame_t *frame, xlator_t *this, inode_t *inode) + local->op_errno = ENOMEM; + loc_wipe (&loc); + wind_failed = _gf_true; +- shard_unlink_shards_do_cbk (frame, +- (void *)(long) cur_block, +- this, -1, ENOMEM, NULL, +- NULL, NULL); ++ shard_truncate_htol_cbk (frame, (void *)(long) cur_block, ++ this, -1, ENOMEM, NULL, NULL, ++ NULL); + goto next; + } + loc.name = strrchr (loc.path, '/'); +@@ -1871,7 +1902,7 @@ shard_truncate_htol (call_frame_t *frame, xlator_t *this, inode_t *inode) + loc.name++; + loc.inode = inode_ref (local->inode_list[i]); + +- STACK_WIND_COOKIE (frame, shard_unlink_shards_do_cbk, ++ STACK_WIND_COOKIE (frame, shard_truncate_htol_cbk, + (void *) (long) cur_block, FIRST_CHILD(this), + FIRST_CHILD (this)->fops->unlink, &loc, + 0, NULL); +@@ -2022,13 +2053,18 @@ shard_common_lookup_shards_cbk (call_frame_t *frame, void *cookie, + + done: + call_count = shard_call_count_return (frame); +- if (call_count == 0) { +- if (!local->first_lookup_done) +- local->first_lookup_done = _gf_true; +- if (local->op_ret < 0) +- goto unwind; +- else +- local->pls_fop_handler (frame, this); ++ if (local->lookup_shards_barriered) { ++ syncbarrier_wake (&local->barrier); ++ return 0; ++ } else { ++ if (call_count == 0) { ++ if (!local->first_lookup_done) ++ local->first_lookup_done = _gf_true; ++ if (local->op_ret < 0) ++ goto unwind; ++ else ++ local->pls_fop_handler (frame, this); ++ } + } + return 0; + +@@ -2074,6 +2110,7 @@ shard_common_lookup_shards (call_frame_t *frame, xlator_t *this, inode_t *inode, + { + int i = 0; + int ret = 0; ++ int count = 0; + int call_count = 0; + int32_t shard_idx_iter = 0; + int last_block = 0; +@@ -2087,10 +2124,12 @@ shard_common_lookup_shards (call_frame_t *frame, xlator_t *this, inode_t *inode, + + priv = this->private; + local = frame->local; +- call_count = local->call_count; ++ count = call_count = local->call_count; + shard_idx_iter = local->first_block; + last_block = local->last_block; + local->pls_fop_handler = handler; ++ if (local->lookup_shards_barriered) ++ local->barrier.waitfor = local->call_count; + + while (shard_idx_iter <= last_block) { + if (local->inode_list[i]) { +@@ -2162,7 +2201,8 @@ next: + if (!--call_count) + break; + } +- ++ if (local->lookup_shards_barriered) ++ syncbarrier_wait (&local->barrier, count); + return 0; + } + +@@ -2400,6 +2440,9 @@ shard_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + + frame->local = local; + ++ ret = syncbarrier_init (&local->barrier); ++ if (ret) ++ goto err; + loc_copy (&local->loc, loc); + local->offset = offset; + local->block_size = block_size; +@@ -2450,6 +2493,9 @@ shard_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + goto err; + + frame->local = local; ++ ret = syncbarrier_init (&local->barrier); ++ if (ret) ++ goto err; + local->fd = fd_ref (fd); + local->offset = offset; + local->block_size = block_size; +@@ -2881,18 +2927,19 @@ shard_unlink_shards_do_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + + done: + call_count = shard_call_count_return (frame); +- if (call_count == 0) { +- SHARD_UNSET_ROOT_FS_ID (frame, local); ++ if (local->unlink_shards_barriered) { ++ syncbarrier_wake (&local->barrier); ++ } else { + +- if (local->fop == GF_FOP_UNLINK) +- shard_unlink_cbk (frame, this); +- else if (local->fop == GF_FOP_RENAME) +- shard_rename_cbk (frame, this); +- else +- shard_truncate_last_shard (frame, this, +- local->inode_list[0]); +- } ++ if (call_count == 0) { ++ SHARD_UNSET_ROOT_FS_ID (frame, local); + ++ if (local->fop == GF_FOP_UNLINK) ++ shard_unlink_cbk (frame, this); ++ else if (local->fop == GF_FOP_RENAME) ++ shard_rename_cbk (frame, this); ++ } ++ } + return 0; + } + +@@ -2952,6 +2999,8 @@ shard_unlink_shards_do (call_frame_t *frame, xlator_t *this, inode_t *inode) + local->call_count = call_count = count; + cur_block = 1; + SHARD_SET_ROOT_FS_ID (frame, local); ++ if (local->unlink_shards_barriered) ++ local->barrier.waitfor = count; + + /* Ignore the base file and start iterating from the first block shard. + */ +@@ -3006,6 +3055,8 @@ next: + if (!--call_count) + break; + } ++ if (local->unlink_shards_barriered) ++ syncbarrier_wait (&local->barrier, count); + + return 0; + } +@@ -3947,6 +3998,9 @@ shard_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + + frame->local = local; + ++ ret = syncbarrier_init (&local->barrier); ++ if (ret) ++ goto err; + local->fd = fd_ref (fd); + local->block_size = block_size; + local->offset = offset; +@@ -5414,6 +5468,9 @@ shard_common_inode_write_begin (call_frame_t *frame, xlator_t *this, + + frame->local = local; + ++ ret = syncbarrier_init (&local->barrier); ++ if (ret) ++ goto out; + local->xattr_req = (xdata) ? dict_ref (xdata) : dict_new (); + if (!local->xattr_req) + goto out; +diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h +index a1adb6a..225caa0 100644 +--- a/xlators/features/shard/src/shard.h ++++ b/xlators/features/shard/src/shard.h +@@ -15,6 +15,7 @@ + #include "xlator.h" + #include "compat-errno.h" + #include "shard-messages.h" ++#include "syncop.h" + + #define GF_SHARD_DIR ".shard" + #define SHARD_MIN_BLOCK_SIZE (4 * GF_UNIT_MB) +@@ -257,6 +258,9 @@ typedef struct shard_local { + } lock; + inode_t *resolver_base_inode; + gf_boolean_t first_lookup_done; ++ syncbarrier_t barrier; ++ gf_boolean_t lookup_shards_barriered; ++ gf_boolean_t unlink_shards_barriered; + } shard_local_t; + + typedef struct shard_inode_ctx { +-- +1.8.3.1 + diff --git a/SOURCES/0428-libglusterfs-syncop-Handle-barrier_-init-destroy-in-.patch b/SOURCES/0428-libglusterfs-syncop-Handle-barrier_-init-destroy-in-.patch new file mode 100644 index 0000000..a6136c0 --- /dev/null +++ b/SOURCES/0428-libglusterfs-syncop-Handle-barrier_-init-destroy-in-.patch @@ -0,0 +1,99 @@ +From c285acf172d42271d87eb069045ea70bce97b0b1 Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K +Date: Mon, 23 Apr 2018 21:04:58 +0530 +Subject: [PATCH 428/444] libglusterfs/syncop: Handle barrier_{init/destroy} in + error cases + +> Upstream: https://review.gluster.org/19927 +> BUG: 1568521 +> Change-Id: I53e60cfcaa7f8edfa5eca47307fa99f10ee64505 + +Change-Id: I53e60cfcaa7f8edfa5eca47307fa99f10ee64505 +BUG: 1520882 +Signed-off-by: Pranith Kumar K +Reviewed-on: https://code.engineering.redhat.com/gerrit/154862 +Tested-by: Krutika Dhananjay +Tested-by: RHGS Build Bot +Reviewed-by: Xavi Hernandez +Tested-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/syncop.c | 30 ++++++++++++++++++++++++++---- + libglusterfs/src/syncop.h | 1 + + 2 files changed, 27 insertions(+), 4 deletions(-) + +diff --git a/libglusterfs/src/syncop.c b/libglusterfs/src/syncop.c +index ac40a1d..81d73b2 100644 +--- a/libglusterfs/src/syncop.c ++++ b/libglusterfs/src/syncop.c +@@ -1087,30 +1087,52 @@ synclock_unlock (synclock_t *lock) + int + syncbarrier_init (struct syncbarrier *barrier) + { ++ int ret = 0; + if (!barrier) { + errno = EINVAL; + return -1; + } + +- pthread_cond_init (&barrier->cond, 0); ++ ret = pthread_cond_init (&barrier->cond, 0); ++ if (ret) { ++ errno = ret; ++ return -1; ++ } + barrier->count = 0; + barrier->waitfor = 0; + INIT_LIST_HEAD (&barrier->waitq); + +- return pthread_mutex_init (&barrier->guard, 0); ++ ret = pthread_mutex_init (&barrier->guard, 0); ++ if (ret) { ++ (void)pthread_cond_destroy (&barrier->cond); ++ errno = ret; ++ return -1; ++ } ++ barrier->initialized = _gf_true; ++ return 0; + } + + + int + syncbarrier_destroy (struct syncbarrier *barrier) + { ++ int ret = 0; ++ int ret1 = 0; + if (!barrier) { + errno = EINVAL; + return -1; + } + +- pthread_cond_destroy (&barrier->cond); +- return pthread_mutex_destroy (&barrier->guard); ++ if (barrier->initialized) { ++ ret = pthread_cond_destroy (&barrier->cond); ++ ret1 = pthread_mutex_destroy (&barrier->guard); ++ barrier->initialized = _gf_false; ++ } ++ if (ret || ret1) { ++ errno = ret?ret:ret1; ++ return -1; ++ } ++ return 0; + } + + +diff --git a/libglusterfs/src/syncop.h b/libglusterfs/src/syncop.h +index 5b5ad4e..9ab5ee8 100644 +--- a/libglusterfs/src/syncop.h ++++ b/libglusterfs/src/syncop.h +@@ -134,6 +134,7 @@ typedef struct synclock synclock_t; + + + struct syncbarrier { ++ gf_boolean_t initialized; /*Set on successful initialization*/ + pthread_mutex_t guard; /* guard the remaining members, pair @cond */ + pthread_cond_t cond; /* waiting non-synctasks */ + struct list_head waitq; /* waiting synctasks */ +-- +1.8.3.1 + diff --git a/SOURCES/0429-features-shard-Introducing-.shard-.remove_me-for-ato.patch b/SOURCES/0429-features-shard-Introducing-.shard-.remove_me-for-ato.patch new file mode 100644 index 0000000..94cd4a6 --- /dev/null +++ b/SOURCES/0429-features-shard-Introducing-.shard-.remove_me-for-ato.patch @@ -0,0 +1,2749 @@ +From b92aedc0b10d3c7b6150b8f18c950bf95494bc5f Mon Sep 17 00:00:00 2001 +From: Krutika Dhananjay +Date: Thu, 29 Mar 2018 17:21:32 +0530 +Subject: [PATCH 429/444] features/shard: Introducing ".shard/.remove_me" for + atomic shard deletion (part 1) + +> Upstream: https://review.gluster.org/19929 +> BUG: 1568521 +> Change-Id: Ia1d238b721a3e99f951a73abbe199e4245f51a3a + +PROBLEM: +Shards are deleted synchronously when a sharded file is unlinked or +when a sharded file participating as the dst in a rename() is going to +be replaced. The problem with this approach is it makes the operation +really slow, sometimes causing the application to time out, especially +with large files. + +SOLUTION: +To make this operation atomic, we introduce a ".remove_me" directory. +Now renames and unlinks will simply involve two steps: +1. creating an empty file under .remove_me named after the gfid of the file +participating in unlink/rename +2. carrying out the actual rename/unlink +A synctask is created (more on that in part 2) to scan this directory +after every unlink/rename operation (or upon a volume mount) and clean +up all shards associated with it. All of this happens in the background. +The task takes care to delete the shards associated with the gfid in +.remove_me only if this gfid doesn't exist in backend, ensuring that the +file was successfully renamed/unlinked and its shards can be discarded now +safely. + +Change-Id: Ia1d238b721a3e99f951a73abbe199e4245f51a3a +BUG: 1520882 +Signed-off-by: Krutika Dhananjay +Reviewed-on: https://code.engineering.redhat.com/gerrit/154863 +Tested-by: RHGS Build Bot +Reviewed-by: Xavi Hernandez +--- + libglusterfs/src/common-utils.h | 1 + + tests/bugs/shard/bug-1245547.t | 4 +- + tests/bugs/shard/bug-1568521-EEXIST.t | 79 ++ + tests/bugs/shard/bug-shard-discard.t | 16 +- + tests/bugs/shard/shard-inode-refcount-test.t | 2 +- + tests/bugs/shard/unlinks-and-renames.t | 118 ++- + xlators/features/shard/src/shard-mem-types.h | 1 + + xlators/features/shard/src/shard-messages.h | 9 +- + xlators/features/shard/src/shard.c | 1384 ++++++++++++++++++-------- + xlators/features/shard/src/shard.h | 103 +- + 10 files changed, 1250 insertions(+), 467 deletions(-) + create mode 100644 tests/bugs/shard/bug-1568521-EEXIST.t + +diff --git a/libglusterfs/src/common-utils.h b/libglusterfs/src/common-utils.h +index e64dea3..c804ed5 100644 +--- a/libglusterfs/src/common-utils.h ++++ b/libglusterfs/src/common-utils.h +@@ -121,6 +121,7 @@ void trap (void); + /* Shard */ + #define GF_XATTR_SHARD_FILE_SIZE "trusted.glusterfs.shard.file-size" + #define SHARD_ROOT_GFID "be318638-e8a0-4c6d-977d-7a937aa84806" ++#define DOT_SHARD_REMOVE_ME_GFID "77dd5a45-dbf5-4592-b31b-b440382302e9" + + /* Lease: buffer length for stringified lease id + * Format: 4hexnum-4hexnum-4hexnum-4hexnum-4hexnum-4hexnum-4hexnum-4hexnum +diff --git a/tests/bugs/shard/bug-1245547.t b/tests/bugs/shard/bug-1245547.t +index c19b2a6..3c46785 100644 +--- a/tests/bugs/shard/bug-1245547.t ++++ b/tests/bugs/shard/bug-1245547.t +@@ -25,11 +25,11 @@ TEST touch $M0/bar + TEST truncate -s 10G $M0/bar + #Unlink on such a file should succeed. + TEST unlink $M0/bar +-# ++ + #Create a file 'baz' with holes. + TEST touch $M0/baz + TEST truncate -s 10G $M0/baz + #Rename with a sharded existing dest that has holes must succeed. + TEST mv -f $M0/foo $M0/baz + +-cleanup; ++cleanup +diff --git a/tests/bugs/shard/bug-1568521-EEXIST.t b/tests/bugs/shard/bug-1568521-EEXIST.t +new file mode 100644 +index 0000000..e4c3d41 +--- /dev/null ++++ b/tests/bugs/shard/bug-1568521-EEXIST.t +@@ -0,0 +1,79 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++ ++cleanup ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} ++TEST $CLI volume set $V0 features.shard on ++TEST $CLI volume set $V0 features.shard-block-size 4MB ++TEST $CLI volume start $V0 ++TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 ++ ++TEST mkdir $M0/dir ++# Unlink a temporary file to trigger creation of .remove_me ++TEST touch $M0/tmp ++TEST unlink $M0/tmp ++ ++TEST stat $B0/${V0}0/.shard/.remove_me ++TEST stat $B0/${V0}1/.shard/.remove_me ++ ++TEST dd if=/dev/zero of=$M0/dir/file bs=1024 count=9216 ++gfid_file=$(get_gfid_string $M0/dir/file) ++ ++# Create marker file from the backend to simulate ENODATA. ++touch $B0/${V0}0/.shard/.remove_me/$gfid_file ++touch $B0/${V0}1/.shard/.remove_me/$gfid_file ++ ++# Set block and file size to incorrect values of 64MB and 5MB to simulate "stale xattrs" case ++# and confirm that the correct values are set when the actual unlink takes place ++ ++TEST setfattr -n trusted.glusterfs.shard.block-size -v 0x0000000004000000 $B0/${V0}0/.shard/.remove_me/$gfid_file ++TEST setfattr -n trusted.glusterfs.shard.block-size -v 0x0000000004000000 $B0/${V0}1/.shard/.remove_me/$gfid_file ++ ++TEST setfattr -n trusted.glusterfs.shard.file-size -v 0x0000000000500000000000000000000000000000000000000000000000000000 $B0/${V0}0/.shard/.remove_me/$gfid_file ++TEST setfattr -n trusted.glusterfs.shard.file-size -v 0x0000000000500000000000000000000000000000000000000000000000000000 $B0/${V0}1/.shard/.remove_me/$gfid_file ++ ++# Sleep for 2 seconds to prevent posix_gfid_heal() from believing marker file is "fresh" and failing lookup with ENOENT ++sleep 2 ++ ++TEST unlink $M0/dir/file ++EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_file ++EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_file ++EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_file ++EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_file ++ ++############################## ++### Repeat test for rename ### ++############################## ++ ++TEST touch $M0/src ++TEST dd if=/dev/zero of=$M0/dir/dst bs=1024 count=9216 ++gfid_dst=$(get_gfid_string $M0/dir/dst) ++ ++# Create marker file from the backend to simulate ENODATA. ++touch $B0/${V0}0/.shard/.remove_me/$gfid_dst ++touch $B0/${V0}1/.shard/.remove_me/$gfid_dst ++ ++# Set block and file size to incorrect values of 64MB and 5MB to simulate "stale xattrs" case ++# and confirm that the correct values are set when the actual unlink takes place ++ ++TEST setfattr -n trusted.glusterfs.shard.block-size -v 0x0000000004000000 $B0/${V0}0/.shard/.remove_me/$gfid_dst ++TEST setfattr -n trusted.glusterfs.shard.block-size -v 0x0000000004000000 $B0/${V0}1/.shard/.remove_me/$gfid_dst ++ ++TEST setfattr -n trusted.glusterfs.shard.file-size -v 0x0000000000500000000000000000000000000000000000000000000000000000 $B0/${V0}0/.shard/.remove_me/$gfid_dst ++TEST setfattr -n trusted.glusterfs.shard.file-size -v 0x0000000000500000000000000000000000000000000000000000000000000000 $B0/${V0}1/.shard/.remove_me/$gfid_dst ++ ++# Sleep for 2 seconds to prevent posix_gfid_heal() from believing marker file is "fresh" and failing lookup with ENOENT ++sleep 2 ++ ++TEST mv -f $M0/src $M0/dir/dst ++EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_dst ++EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_dst ++EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_dst ++EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_dst ++ ++cleanup +diff --git a/tests/bugs/shard/bug-shard-discard.t b/tests/bugs/shard/bug-shard-discard.t +index 72d8586..884d9e7 100644 +--- a/tests/bugs/shard/bug-shard-discard.t ++++ b/tests/bugs/shard/bug-shard-discard.t +@@ -42,14 +42,14 @@ EXPECT_NOT "1" file_all_zeroes `find $B0 -name $gfid_foo.1` + + # Now unlink the file. And ensure that all shards associated with the file are cleaned up + TEST unlink $M0/foo +-TEST ! stat $B0/${V0}0/.shard/$gfid_foo.1 +-TEST ! stat $B0/${V0}1/.shard/$gfid_foo.1 +-TEST ! stat $B0/${V0}2/.shard/$gfid_foo.1 +-TEST ! stat $B0/${V0}3/.shard/$gfid_foo.1 +-TEST ! stat $B0/${V0}0/.shard/$gfid_foo.2 +-TEST ! stat $B0/${V0}1/.shard/$gfid_foo.2 +-TEST ! stat $B0/${V0}2/.shard/$gfid_foo.2 +-TEST ! stat $B0/${V0}3/.shard/$gfid_foo.2 ++#TEST ! stat $B0/${V0}0/.shard/$gfid_foo.1 ++#TEST ! stat $B0/${V0}1/.shard/$gfid_foo.1 ++#TEST ! stat $B0/${V0}2/.shard/$gfid_foo.1 ++#TEST ! stat $B0/${V0}3/.shard/$gfid_foo.1 ++#TEST ! stat $B0/${V0}0/.shard/$gfid_foo.2 ++#TEST ! stat $B0/${V0}1/.shard/$gfid_foo.2 ++#TEST ! stat $B0/${V0}2/.shard/$gfid_foo.2 ++#TEST ! stat $B0/${V0}3/.shard/$gfid_foo.2 + TEST ! stat $M0/foo + + #clean up everything +diff --git a/tests/bugs/shard/shard-inode-refcount-test.t b/tests/bugs/shard/shard-inode-refcount-test.t +index 03e0cc9..c92dc07 100644 +--- a/tests/bugs/shard/shard-inode-refcount-test.t ++++ b/tests/bugs/shard/shard-inode-refcount-test.t +@@ -18,7 +18,7 @@ TEST dd if=/dev/zero conv=fsync of=$M0/one-plus-five-shards bs=1M count=23 + + ACTIVE_INODES_BEFORE=$(get_mount_active_size_value $V0) + TEST rm -f $M0/one-plus-five-shards +-EXPECT `expr $ACTIVE_INODES_BEFORE - 5` get_mount_active_size_value $V0 ++#EXPECT `expr $ACTIVE_INODES_BEFORE - 4` get_mount_active_size_value $V0 + + EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 + TEST $CLI volume stop $V0 +diff --git a/tests/bugs/shard/unlinks-and-renames.t b/tests/bugs/shard/unlinks-and-renames.t +index a8f188b..997c397 100644 +--- a/tests/bugs/shard/unlinks-and-renames.t ++++ b/tests/bugs/shard/unlinks-and-renames.t +@@ -32,7 +32,17 @@ TEST truncate -s 5M $M0/dir/foo + TEST ! stat $B0/${V0}0/.shard + TEST ! stat $B0/${V0}1/.shard + # Test to ensure that unlink doesn't fail due to absence of /.shard ++gfid_foo=$(get_gfid_string $M0/dir/foo) + TEST unlink $M0/dir/foo ++TEST stat $B0/${V0}0/.shard/.remove_me ++TEST stat $B0/${V0}1/.shard/.remove_me ++TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_foo ++TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_foo ++ ++EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_foo ++EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_foo ++EXPECT "0000000000500000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_foo ++EXPECT "0000000000500000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_foo + + ################################################## + ##### Unlink of a sharded file without holes ##### +@@ -46,14 +56,20 @@ TEST stat $B0/${V0}1/.shard/$gfid_new.1 + TEST stat $B0/${V0}0/.shard/$gfid_new.2 + TEST stat $B0/${V0}1/.shard/$gfid_new.2 + TEST unlink $M0/dir/new +-TEST ! stat $B0/${V0}0/.shard/$gfid_new.1 +-TEST ! stat $B0/${V0}1/.shard/$gfid_new.1 +-TEST ! stat $B0/${V0}0/.shard/$gfid_new.2 +-TEST ! stat $B0/${V0}1/.shard/$gfid_new.2 ++#TEST ! stat $B0/${V0}0/.shard/$gfid_new.1 ++#TEST ! stat $B0/${V0}1/.shard/$gfid_new.1 ++#TEST ! stat $B0/${V0}0/.shard/$gfid_new.2 ++#TEST ! stat $B0/${V0}1/.shard/$gfid_new.2 + TEST ! stat $M0/dir/new + TEST ! stat $B0/${V0}0/dir/new + TEST ! stat $B0/${V0}1/dir/new ++TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_new ++TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_new + ++EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_new ++EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_new ++EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_new ++EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_new + ####################################### + ##### Unlink with /.shard present ##### + ####################################### +@@ -67,18 +83,32 @@ TEST unlink $M0/dir/foo + TEST ! stat $B0/${V0}0/dir/foo + TEST ! stat $B0/${V0}1/dir/foo + TEST ! stat $M0/dir/foo ++TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_foo ++TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_foo ++ ++EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_foo ++EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_foo ++EXPECT "0000000000500000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_foo ++EXPECT "0000000000500000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_foo + + ############################################################# + ##### Unlink of a file with only one block (the zeroth) ##### + ############################################################# + TEST touch $M0/dir/foo ++gfid_foo=$(get_gfid_string $M0/dir/foo) + TEST dd if=/dev/zero of=$M0/dir/foo bs=1024 count=1024 +-# Test to ensure that unlink of a sparse file works fine. ++# Test to ensure that unlink of a file with only base shard works fine. + TEST unlink $M0/dir/foo + TEST ! stat $B0/${V0}0/dir/foo + TEST ! stat $B0/${V0}1/dir/foo + TEST ! stat $M0/dir/foo ++TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_foo ++TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_foo + ++EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_foo ++EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_foo ++EXPECT "0000000000100000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_foo ++EXPECT "0000000000100000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_foo + #################################################### + ##### Unlink of a sharded file with hard-links ##### + #################################################### +@@ -94,6 +124,8 @@ TEST stat $B0/${V0}1/.shard/$gfid_original.2 + TEST ln $M0/dir/original $M0/link + # Now delete the original file. + TEST unlink $M0/dir/original ++TEST ! stat $B0/${V0}0/.shard/.remove_me/$gfid_original ++TEST ! stat $B0/${V0}1/.shard/.remove_me/$gfid_original + # Ensure the shards are still intact. + TEST stat $B0/${V0}0/.shard/$gfid_original.1 + TEST stat $B0/${V0}1/.shard/$gfid_original.1 +@@ -105,15 +137,22 @@ TEST stat $B0/${V0}0/link + TEST stat $B0/${V0}1/link + # Now delete the last link. + TEST unlink $M0/link ++TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_original ++TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_original + # Ensure that the shards are all cleaned up. +-TEST ! stat $B0/${V0}0/.shard/$gfid_original.1 +-TEST ! stat $B0/${V0}1/.shard/$gfid_original.1 +-TEST ! stat $B0/${V0}0/.shard/$gfid_original.2 +-TEST ! stat $B0/${V0}1/.shard/$gfid_original.2 +-TEST ! stat $M0/link ++#TEST ! stat $B0/${V0}0/.shard/$gfid_original.1 ++#TEST ! stat $B0/${V0}1/.shard/$gfid_original.1 ++#TEST ! stat $B0/${V0}0/.shard/$gfid_original.2 ++#TEST ! stat $B0/${V0}1/.shard/$gfid_original.2 ++#TEST ! stat $M0/link + TEST ! stat $B0/${V0}0/link + TEST ! stat $B0/${V0}1/link + ++EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_original ++EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_original ++EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_original ++EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_original ++ + EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 + TEST $CLI volume stop $V0 + TEST $CLI volume delete $V0 +@@ -140,6 +179,7 @@ TEST touch $M0/dir/dst + ##### Rename with /.shard absent ##### + ###################################### + TEST truncate -s 5M $M0/dir/dst ++gfid_dst=$(get_gfid_string $M0/dir/dst) + TEST ! stat $B0/${V0}0/.shard + TEST ! stat $B0/${V0}1/.shard + # Test to ensure that rename doesn't fail due to absence of /.shard +@@ -150,6 +190,13 @@ TEST ! stat $B0/${V0}0/dir/src + TEST ! stat $B0/${V0}1/dir/src + TEST stat $B0/${V0}0/dir/dst + TEST stat $B0/${V0}1/dir/dst ++TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_dst ++TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_dst ++ ++EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_dst ++EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_dst ++EXPECT "0000000000500000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_dst ++EXPECT "0000000000500000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_dst + + ################################################## + ##### Rename to a sharded file without holes ##### +@@ -165,16 +212,23 @@ TEST stat $B0/${V0}1/.shard/$gfid_dst.1 + TEST stat $B0/${V0}0/.shard/$gfid_dst.2 + TEST stat $B0/${V0}1/.shard/$gfid_dst.2 + TEST mv -f $M0/dir/src $M0/dir/dst +-TEST ! stat $B0/${V0}0/.shard/$gfid_dst.1 +-TEST ! stat $B0/${V0}1/.shard/$gfid_dst.1 +-TEST ! stat $B0/${V0}0/.shard/$gfid_dst.2 +-TEST ! stat $B0/${V0}1/.shard/$gfid_dst.2 ++#TEST ! stat $B0/${V0}0/.shard/$gfid_dst.1 ++#TEST ! stat $B0/${V0}1/.shard/$gfid_dst.1 ++#TEST ! stat $B0/${V0}0/.shard/$gfid_dst.2 ++#TEST ! stat $B0/${V0}1/.shard/$gfid_dst.2 + TEST ! stat $M0/dir/src + TEST stat $M0/dir/dst + TEST ! stat $B0/${V0}0/dir/src + TEST ! stat $B0/${V0}1/dir/src + TEST stat $B0/${V0}0/dir/dst + TEST stat $B0/${V0}1/dir/dst ++TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_dst ++TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_dst ++ ++EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_dst ++EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_dst ++EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_dst ++EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_dst + + ################################################### + ##### Rename of dst file with /.shard present ##### +@@ -182,7 +236,8 @@ TEST stat $B0/${V0}1/dir/dst + TEST unlink $M0/dir/dst + TEST touch $M0/dir/src + TEST truncate -s 5M $M0/dir/dst +-# Test to ensure that unlink of a sparse file works fine. ++gfid_dst=$(get_gfid_string $M0/dir/dst) ++# Test to ensure that rename into a sparse file works fine. + TEST mv -f $M0/dir/src $M0/dir/dst + TEST ! stat $M0/dir/src + TEST stat $M0/dir/dst +@@ -190,6 +245,13 @@ TEST ! stat $B0/${V0}0/dir/src + TEST ! stat $B0/${V0}1/dir/src + TEST stat $B0/${V0}0/dir/dst + TEST stat $B0/${V0}1/dir/dst ++TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_dst ++TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_dst ++ ++EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_dst ++EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_dst ++EXPECT "0000000000500000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_dst ++EXPECT "0000000000500000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_dst + + ############################################################### + ##### Rename of dst file with only one block (the zeroth) ##### +@@ -197,7 +259,8 @@ TEST stat $B0/${V0}1/dir/dst + TEST unlink $M0/dir/dst + TEST touch $M0/dir/src + TEST dd if=/dev/zero of=$M0/dir/dst bs=1024 count=1024 +-# Test to ensure that unlink of a sparse file works fine. ++gfid_dst=$(get_gfid_string $M0/dir/dst) ++# Test to ensure that rename into a file with only base shard works fine. + TEST mv -f $M0/dir/src $M0/dir/dst + TEST ! stat $M0/dir/src + TEST stat $M0/dir/dst +@@ -205,6 +268,13 @@ TEST ! stat $B0/${V0}0/dir/src + TEST ! stat $B0/${V0}1/dir/src + TEST stat $B0/${V0}0/dir/dst + TEST stat $B0/${V0}1/dir/dst ++TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_dst ++TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_dst ++ ++EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_dst ++EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_dst ++EXPECT "0000000000100000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_dst ++EXPECT "0000000000100000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_dst + + ######################################################## + ##### Rename to a dst sharded file with hard-links ##### +@@ -231,18 +301,26 @@ TEST stat $B0/${V0}1/.shard/$gfid_dst.2 + TEST ! stat $M0/dir/src + TEST ! stat $B0/${V0}0/dir/src + TEST ! stat $B0/${V0}1/dir/src ++TEST ! stat $B0/${V0}0/.shard/.remove_me/$gfid_dst ++TEST ! stat $B0/${V0}1/.shard/.remove_me/$gfid_dst + # Now rename another file to the last link. + TEST touch $M0/dir/src2 + TEST mv -f $M0/dir/src2 $M0/link + # Ensure that the shards are all cleaned up. +-TEST ! stat $B0/${V0}0/.shard/$gfid_dst.1 +-TEST ! stat $B0/${V0}1/.shard/$gfid_dst.1 +-TEST ! stat $B0/${V0}0/.shard/$gfid_dst.2 +-TEST ! stat $B0/${V0}1/.shard/$gfid_dst.2 ++#TEST ! stat $B0/${V0}0/.shard/$gfid_dst.1 ++#TEST ! stat $B0/${V0}1/.shard/$gfid_dst.1 ++#TEST ! stat $B0/${V0}0/.shard/$gfid_dst.2 ++#TEST ! stat $B0/${V0}1/.shard/$gfid_dst.2 + TEST ! stat $M0/dir/src2 + TEST ! stat $B0/${V0}0/dir/src2 + TEST ! stat $B0/${V0}1/dir/src2 ++TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_dst ++TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_dst + ++EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_dst ++EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_dst ++EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_dst ++EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_dst + # Rename with non-existent dst and a sharded src + TEST touch $M0/dir/src + TEST dd if=/dev/zero of=$M0/dir/src bs=1024 count=9216 +diff --git a/xlators/features/shard/src/shard-mem-types.h b/xlators/features/shard/src/shard-mem-types.h +index 77f0cee..fea66aa 100644 +--- a/xlators/features/shard/src/shard-mem-types.h ++++ b/xlators/features/shard/src/shard-mem-types.h +@@ -18,6 +18,7 @@ enum gf_shard_mem_types_ { + gf_shard_mt_inode_ctx_t, + gf_shard_mt_iovec, + gf_shard_mt_int64_t, ++ gf_shard_mt_uint64_t, + gf_shard_mt_end + }; + #endif +diff --git a/xlators/features/shard/src/shard-messages.h b/xlators/features/shard/src/shard-messages.h +index 8e61630..0267f8a 100644 +--- a/xlators/features/shard/src/shard-messages.h ++++ b/xlators/features/shard/src/shard-messages.h +@@ -40,7 +40,7 @@ + */ + + #define GLFS_COMP_BASE_SHARD GLFS_MSGID_COMP_SHARD +-#define GLFS_NUM_MESSAGES 19 ++#define GLFS_NUM_MESSAGES 20 + #define GLFS_MSGID_END (GLFS_COMP_BASE_SHARD + GLFS_NUM_MESSAGES + 1) + + #define glfs_msg_start_x GLFS_COMP_BASE_SHARD, "Invalid: Start of messages" +@@ -187,5 +187,12 @@ + */ + #define SHARD_MSG_MEMALLOC_FAILED (GLFS_COMP_BASE_SHARD + 19) + ++/*! ++ * @messageid 133020 ++ * @diagnosis ++ * @recommendedaction ++*/ ++#define SHARD_MSG_FOP_FAILED (GLFS_COMP_BASE_SHARD + 20) ++ + #define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages" + #endif /* !_SHARD_MESSAGES_H_ */ +diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c +index 268ba20..492341c 100644 +--- a/xlators/features/shard/src/shard.c ++++ b/xlators/features/shard/src/shard.c +@@ -117,9 +117,6 @@ __shard_inode_ctx_set (inode_t *inode, xlator_t *this, struct iatt *stbuf, + if (valid & SHARD_MASK_BLOCK_SIZE) + ctx->block_size = block_size; + +- if (!stbuf) +- return 0; +- + if (valid & SHARD_MASK_PROT) + ctx->stat.ia_prot = stbuf->ia_prot; + +@@ -179,7 +176,35 @@ shard_inode_ctx_set (inode_t *inode, xlator_t *this, struct iatt *stbuf, + } + + int +-__shard_inode_ctx_set_refreshed_flag (inode_t *inode, xlator_t *this) ++__shard_inode_ctx_set_refresh_flag (inode_t *inode, xlator_t *this) ++{ ++ int ret = -1; ++ shard_inode_ctx_t *ctx = NULL; ++ ++ ret = __shard_inode_ctx_get (inode, this, &ctx); ++ if (ret) ++ return ret; ++ ++ ctx->refresh = _gf_true; ++ ++ return 0; ++} ++int ++shard_inode_ctx_set_refresh_flag (inode_t *inode, xlator_t *this) ++{ ++ int ret = -1; ++ ++ LOCK (&inode->lock); ++ { ++ ret = __shard_inode_ctx_set_refresh_flag (inode, this); ++ } ++ UNLOCK (&inode->lock); ++ ++ return ret; ++} ++ ++int ++__shard_inode_ctx_mark_dir_refreshed (inode_t *inode, xlator_t *this) + { + int ret = -1; + shard_inode_ctx_t *ctx = NULL; +@@ -193,13 +218,13 @@ __shard_inode_ctx_set_refreshed_flag (inode_t *inode, xlator_t *this) + } + + int +-shard_inode_ctx_set_refreshed_flag (inode_t *inode, xlator_t *this) ++shard_inode_ctx_mark_dir_refreshed (inode_t *inode, xlator_t *this) + { + int ret = -1; + + LOCK (&inode->lock); + { +- ret = __shard_inode_ctx_set_refreshed_flag (inode, this); ++ ret = __shard_inode_ctx_mark_dir_refreshed (inode, this); + } + UNLOCK (&inode->lock); + +@@ -478,9 +503,15 @@ shard_local_wipe (shard_local_t *local) + syncbarrier_destroy (&local->barrier); + loc_wipe (&local->loc); + loc_wipe (&local->dot_shard_loc); ++ loc_wipe (&local->dot_shard_rm_loc); + loc_wipe (&local->loc2); + loc_wipe (&local->tmp_loc); ++ loc_wipe (&local->int_inodelk.loc); ++ loc_wipe (&local->int_entrylk.loc); ++ loc_wipe (&local->newloc); + ++ if (local->int_entrylk.basename) ++ GF_FREE (local->int_entrylk.basename); + if (local->fd) + fd_unref (local->fd); + +@@ -504,6 +535,10 @@ shard_local_wipe (shard_local_t *local) + iobref_unref (local->iobref); + if (local->list_inited) + gf_dirent_free (&local->entries_head); ++ if (local->inodelk_frame) ++ SHARD_STACK_DESTROY (local->inodelk_frame); ++ if (local->entrylk_frame) ++ SHARD_STACK_DESTROY (local->entrylk_frame); + } + + int +@@ -554,7 +589,10 @@ shard_internal_dir_string (shard_internal_dir_type_t type) + + switch (type) { + case SHARD_INTERNAL_DIR_DOT_SHARD: +- str = ".shard"; ++ str = GF_SHARD_DIR; ++ break; ++ case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: ++ str = GF_SHARD_REMOVE_ME_DIR; + break; + default: + break; +@@ -566,10 +604,13 @@ static int + shard_init_internal_dir_loc (xlator_t *this, shard_local_t *local, + shard_internal_dir_type_t type) + { +- int ret = -1; +- char *bname = NULL; +- loc_t *internal_dir_loc = NULL; ++ int ret = -1; ++ char *bname = NULL; ++ inode_t *parent = NULL; ++ loc_t *internal_dir_loc = NULL; ++ shard_priv_t *priv = NULL; + ++ priv = this->private; + if (!local) + return -1; + +@@ -577,13 +618,19 @@ shard_init_internal_dir_loc (xlator_t *this, shard_local_t *local, + case SHARD_INTERNAL_DIR_DOT_SHARD: + internal_dir_loc = &local->dot_shard_loc; + bname = GF_SHARD_DIR; ++ parent = inode_ref (this->itable->root); ++ break; ++ case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: ++ internal_dir_loc = &local->dot_shard_rm_loc; ++ bname = GF_SHARD_REMOVE_ME_DIR; ++ parent = inode_ref (priv->dot_shard_inode); + break; + default: + break; + } + + internal_dir_loc->inode = inode_new (this->itable); +- internal_dir_loc->parent = inode_ref (this->itable->root); ++ internal_dir_loc->parent = parent; + ret = inode_path (internal_dir_loc->parent, bname, + (char **)&internal_dir_loc->path); + if (ret < 0 || !(internal_dir_loc->inode)) { +@@ -706,11 +753,48 @@ __shard_update_shards_inode_list (inode_t *linked_inode, xlator_t *this, + } + + int +-shard_common_inode_write_failure_unwind (glusterfs_fop_t fop, +- call_frame_t *frame, int32_t op_ret, +- int32_t op_errno) ++shard_common_failure_unwind (glusterfs_fop_t fop, call_frame_t *frame, ++ int32_t op_ret, int32_t op_errno) + { + switch (fop) { ++ case GF_FOP_LOOKUP: ++ SHARD_STACK_UNWIND (lookup, frame, op_ret, op_errno, NULL, NULL, ++ NULL, NULL); ++ break; ++ case GF_FOP_STAT: ++ SHARD_STACK_UNWIND (stat, frame, op_ret, op_errno, NULL, NULL); ++ break; ++ case GF_FOP_FSTAT: ++ SHARD_STACK_UNWIND (fstat, frame, op_ret, op_errno, NULL, NULL); ++ break; ++ case GF_FOP_TRUNCATE: ++ SHARD_STACK_UNWIND (truncate, frame, op_ret, op_errno, NULL, ++ NULL, NULL); ++ break; ++ case GF_FOP_FTRUNCATE: ++ SHARD_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, NULL, ++ NULL, NULL); ++ break; ++ case GF_FOP_MKNOD: ++ SHARD_STACK_UNWIND (mknod, frame, op_ret, op_errno, NULL, ++ NULL, NULL, NULL, NULL); ++ break; ++ case GF_FOP_LINK: ++ SHARD_STACK_UNWIND (link, frame, op_ret, op_errno, NULL, ++ NULL, NULL, NULL, NULL); ++ break; ++ case GF_FOP_CREATE: ++ SHARD_STACK_UNWIND (create, frame, op_ret, op_errno, NULL, ++ NULL, NULL, NULL, NULL, NULL); ++ break; ++ case GF_FOP_UNLINK: ++ SHARD_STACK_UNWIND (unlink, frame, op_ret, op_errno, NULL, NULL, ++ NULL); ++ break; ++ case GF_FOP_RENAME: ++ SHARD_STACK_UNWIND (rename, frame, op_ret, op_errno, NULL, NULL, ++ NULL, NULL, NULL, NULL); ++ break; + case GF_FOP_WRITE: + SHARD_STACK_UNWIND (writev, frame, op_ret, op_errno, + NULL, NULL, NULL); +@@ -727,6 +811,45 @@ shard_common_inode_write_failure_unwind (glusterfs_fop_t fop, + SHARD_STACK_UNWIND (discard, frame, op_ret, op_errno, + NULL, NULL, NULL); + break; ++ case GF_FOP_READ: ++ SHARD_STACK_UNWIND (readv, frame, op_ret, op_errno, NULL, -1, ++ NULL, NULL, NULL); ++ break; ++ case GF_FOP_FSYNC: ++ SHARD_STACK_UNWIND (fsync, frame, op_ret, op_errno, NULL, NULL, ++ NULL); ++ break; ++ case GF_FOP_REMOVEXATTR: ++ SHARD_STACK_UNWIND (removexattr, frame, op_ret, op_errno, NULL); ++ break; ++ case GF_FOP_FREMOVEXATTR: ++ SHARD_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, NULL); ++ break; ++ case GF_FOP_FGETXATTR: ++ SHARD_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, NULL, ++ NULL); ++ break; ++ case GF_FOP_GETXATTR: ++ SHARD_STACK_UNWIND (getxattr, frame, op_ret, op_errno, NULL, ++ NULL); ++ break; ++ case GF_FOP_FSETXATTR: ++ SHARD_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, NULL); ++ break; ++ case GF_FOP_SETXATTR: ++ SHARD_STACK_UNWIND (setxattr, frame, op_ret, op_errno, NULL); ++ break; ++ case GF_FOP_SETATTR: ++ SHARD_STACK_UNWIND (setattr, frame, op_ret, op_errno, NULL, ++ NULL, NULL); ++ break; ++ case GF_FOP_FSETATTR: ++ SHARD_STACK_UNWIND (fsetattr, frame, op_ret, op_errno, NULL, ++ NULL, NULL); ++ break; ++ case GF_FOP_SEEK: ++ SHARD_STACK_UNWIND (seek, frame, op_ret, op_errno, 0, NULL); ++ break; + default: + gf_msg (THIS->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, + "Invalid fop id = %d", fop); +@@ -866,7 +989,7 @@ shard_common_resolve_shards (call_frame_t *frame, xlator_t *this, + shard_idx_iter = local->first_block; + res_inode = local->resolver_base_inode; + +- if (local->op_ret < 0) ++ if ((local->op_ret < 0) || (local->resolve_not)) + goto out; + + while (shard_idx_iter <= local->last_block) { +@@ -1063,19 +1186,26 @@ shard_link_internal_dir_inode (shard_local_t *local, inode_t *inode, + shard_priv_t *priv = NULL; + char *bname = NULL; + inode_t **priv_inode = NULL; ++ inode_t *parent = NULL; + + priv = THIS->private; + + switch (type) { + case SHARD_INTERNAL_DIR_DOT_SHARD: +- bname = ".shard"; ++ bname = GF_SHARD_DIR; + priv_inode = &priv->dot_shard_inode; ++ parent = inode->table->root; ++ break; ++ case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: ++ bname = GF_SHARD_REMOVE_ME_DIR; ++ priv_inode = &priv->dot_shard_rm_inode; ++ parent = priv->dot_shard_inode; + break; + default: + break; + } + +- linked_inode = inode_link (inode, inode->table->root, bname, buf); ++ linked_inode = inode_link (inode, parent, bname, buf); + inode_lookup (linked_inode); + *priv_inode = linked_inode; + return linked_inode; +@@ -1105,7 +1235,7 @@ shard_refresh_internal_dir_cbk (call_frame_t *frame, void *cookie, + * shard_link_internal_dir_inode(). + */ + linked_inode = shard_link_internal_dir_inode (local, inode, buf, type); +- shard_inode_ctx_set_refreshed_flag (linked_inode, this); ++ shard_inode_ctx_mark_dir_refreshed (linked_inode, this); + out: + shard_common_resolve_shards (frame, this, local->post_res_handler); + return 0; +@@ -1128,6 +1258,9 @@ shard_refresh_internal_dir (call_frame_t *frame, xlator_t *this, + case SHARD_INTERNAL_DIR_DOT_SHARD: + gf_uuid_copy (gfid, priv->dot_shard_gfid); + break; ++ case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: ++ gf_uuid_copy (gfid, priv->dot_shard_rm_gfid); ++ break; + default: + break; + } +@@ -1189,7 +1322,7 @@ shard_lookup_internal_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this + if (link_inode != inode) { + shard_refresh_internal_dir (frame, this, type); + } else { +- shard_inode_ctx_set_refreshed_flag (link_inode, this); ++ shard_inode_ctx_mark_dir_refreshed (link_inode, this); + shard_common_resolve_shards (frame, this, + local->post_res_handler); + } +@@ -1233,6 +1366,10 @@ shard_lookup_internal_dir (call_frame_t *frame, xlator_t *this, + gf_uuid_copy (*gfid, priv->dot_shard_gfid); + loc = &local->dot_shard_loc; + break; ++ case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: ++ gf_uuid_copy (*gfid, priv->dot_shard_rm_gfid); ++ loc = &local->dot_shard_rm_loc; ++ break; + default: + break; + } +@@ -1383,13 +1520,9 @@ shard_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, + + STACK_WIND (frame, shard_lookup_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->lookup, loc, local->xattr_req); +- + return 0; +- +- + err: +- SHARD_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, +- NULL, NULL); ++ shard_common_failure_unwind (GF_FOP_LOOKUP, frame, -1, op_errno); + return 0; + + } +@@ -1610,11 +1743,9 @@ shard_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) + + STACK_WIND (frame, shard_common_stat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->stat, loc, local->xattr_req); +- + return 0; +- + err: +- SHARD_STACK_UNWIND (stat, frame, -1, ENOMEM, NULL, NULL); ++ shard_common_failure_unwind (GF_FOP_STAT, frame, -1, ENOMEM); + return 0; + } + +@@ -1668,9 +1799,8 @@ shard_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) + STACK_WIND (frame, shard_common_stat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fstat, fd, local->xattr_req); + return 0; +- + err: +- SHARD_STACK_UNWIND (fstat, frame, -1, ENOMEM, NULL, NULL); ++ shard_common_failure_unwind (GF_FOP_FSTAT, frame, -1, ENOMEM); + return 0; + } + +@@ -1728,14 +1858,9 @@ shard_truncate_last_shard_cbk (call_frame_t *frame, void *cookie, + shard_update_file_size (frame, this, NULL, &local->loc, + shard_post_update_size_truncate_handler); + return 0; +- + err: +- if (local->fop == GF_FOP_TRUNCATE) +- SHARD_STACK_UNWIND (truncate, frame, local->op_ret, +- local->op_errno, NULL, NULL, NULL); +- else +- SHARD_STACK_UNWIND (ftruncate, frame, local->op_ret, +- local->op_errno, NULL, NULL, NULL); ++ shard_common_failure_unwind (local->fop, frame, local->op_ret, ++ local->op_errno); + return 0; + } + +@@ -1946,12 +2071,8 @@ shard_post_lookup_shards_truncate_handler (call_frame_t *frame, xlator_t *this) + local = frame->local; + + if (local->op_ret < 0) { +- if (local->fop == GF_FOP_TRUNCATE) +- SHARD_STACK_UNWIND (truncate, frame, local->op_ret, +- local->op_errno, NULL, NULL, NULL); +- else +- SHARD_STACK_UNWIND (ftruncate, frame, local->op_ret, +- local->op_errno, NULL, NULL, NULL); ++ shard_common_failure_unwind (local->fop, frame, local->op_ret, ++ local->op_errno); + return 0; + } + +@@ -2232,16 +2353,9 @@ shard_post_resolve_truncate_handler (call_frame_t *frame, xlator_t *this) + shard_post_update_size_truncate_handler); + return 0; + } else { +- if (local->fop == GF_FOP_TRUNCATE) +- SHARD_STACK_UNWIND (truncate, frame, +- local->op_ret, +- local->op_errno, NULL, NULL, +- NULL); +- else +- SHARD_STACK_UNWIND (ftruncate, frame, +- local->op_ret, +- local->op_errno, NULL, NULL, +- NULL); ++ shard_common_failure_unwind (local->fop, frame, ++ local->op_ret, ++ local->op_errno); + return 0; + } + } +@@ -2329,14 +2443,8 @@ shard_truncate_begin (call_frame_t *frame, xlator_t *this) + return 0; + + err: +- if (local->fop == GF_FOP_TRUNCATE) +- SHARD_STACK_UNWIND (truncate, frame, -1, ENOMEM, NULL, NULL, +- NULL); +- else +- SHARD_STACK_UNWIND (ftruncate, frame, -1, ENOMEM, NULL, NULL, +- NULL); +- +- return 0; ++ shard_common_failure_unwind (local->fop, frame, -1, ENOMEM); ++ return 0; + } + + int +@@ -2348,13 +2456,8 @@ shard_post_lookup_truncate_handler (call_frame_t *frame, xlator_t *this) + local = frame->local; + + if (local->op_ret < 0) { +- if (local->fop == GF_FOP_TRUNCATE) +- SHARD_STACK_UNWIND (truncate, frame, local->op_ret, +- local->op_errno, NULL, NULL, NULL); +- else +- SHARD_STACK_UNWIND (ftruncate, frame, local->op_ret, +- local->op_errno, NULL, NULL, NULL); +- ++ shard_common_failure_unwind (local->fop, frame, local->op_ret, ++ local->op_errno); + return 0; + } + +@@ -2457,7 +2560,7 @@ shard_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + return 0; + + err: +- SHARD_STACK_UNWIND (truncate, frame, -1, ENOMEM, NULL, NULL, NULL); ++ shard_common_failure_unwind (GF_FOP_TRUNCATE, frame, -1, ENOMEM); + return 0; + } + +@@ -2512,8 +2615,7 @@ shard_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + shard_post_lookup_truncate_handler); + return 0; + err: +- +- SHARD_STACK_UNWIND (ftruncate, frame, -1, ENOMEM, NULL, NULL, NULL); ++ shard_common_failure_unwind (GF_FOP_FTRUNCATE, frame, -1, ENOMEM); + return 0; + } + +@@ -2531,7 +2633,7 @@ shard_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + if (op_ret == -1) + goto unwind; + +- ret = shard_inode_ctx_set (inode, this, buf, ntoh64 (local->block_size), ++ ret = shard_inode_ctx_set (inode, this, buf, local->block_size, + SHARD_ALL_MASK); + if (ret) + gf_msg (this->name, GF_LOG_WARNING, 0, +@@ -2549,25 +2651,27 @@ int + shard_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + dev_t rdev, mode_t umask, dict_t *xdata) + { ++ shard_priv_t *priv = NULL; + shard_local_t *local = NULL; + ++ priv = this->private; + local = mem_get0 (this->local_pool); + if (!local) + goto err; + + frame->local = local; ++ local->block_size = priv->block_size; + if (!__is_gsyncd_on_shard_dir (frame, loc)) { +- SHARD_INODE_CREATE_INIT (this, local, xdata, loc, err); ++ SHARD_INODE_CREATE_INIT (this, local->block_size, xdata, loc, 0, ++ 0, err); + } + + STACK_WIND (frame, shard_mknod_cbk, FIRST_CHILD (this), + FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, + xdata); + return 0; +- + err: +- SHARD_STACK_UNWIND (mknod, frame, -1, ENOMEM, NULL, NULL, NULL, +- NULL, NULL); ++ shard_common_failure_unwind (GF_FOP_MKNOD, frame, -1, ENOMEM); + return 0; + + } +@@ -2594,8 +2698,7 @@ shard_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + preparent, postparent, xdata); + return 0; + err: +- SHARD_STACK_UNWIND (link, frame, op_ret, op_errno, inode, NULL, NULL, +- NULL, NULL); ++ shard_common_failure_unwind (GF_FOP_LINK, frame, op_ret, op_errno); + return 0; + } + +@@ -2660,10 +2763,8 @@ shard_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + shard_lookup_base_file (frame, this, &local->loc, + shard_post_lookup_link_handler); + return 0; +- + err: +- SHARD_STACK_UNWIND (link, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, +- NULL); ++ shard_common_failure_unwind (GF_FOP_LINK, frame, -1, ENOMEM); + return 0; + } + +@@ -2678,13 +2779,8 @@ shard_post_lookup_shards_unlink_handler (call_frame_t *frame, xlator_t *this) + local = frame->local; + + if ((local->op_ret < 0) && (local->op_errno != ENOENT)) { +- if (local->fop == GF_FOP_UNLINK) +- SHARD_STACK_UNWIND (unlink, frame, local->op_ret, +- local->op_errno, NULL, NULL, NULL); +- else +- SHARD_STACK_UNWIND (rename, frame, local->op_ret, +- local->op_errno, NULL, NULL, NULL, +- NULL, NULL, NULL); ++ shard_common_failure_unwind (local->fop, frame, local->op_ret, ++ local->op_errno); + return 0; + } + local->op_ret = 0; +@@ -2724,13 +2820,9 @@ shard_post_resolve_unlink_handler (call_frame_t *frame, xlator_t *this) + shard_rename_cbk (frame, this); + return 0; + } else { +- if (local->fop == GF_FOP_UNLINK) +- SHARD_STACK_UNWIND (unlink, frame, +- local->op_ret, +- local->op_errno, NULL, NULL, +- NULL); +- else +- shard_rename_cbk (frame, this); ++ shard_common_failure_unwind (local->fop, frame, ++ local->op_ret, ++ local->op_errno); + return 0; + } + } +@@ -2745,103 +2837,6 @@ shard_post_resolve_unlink_handler (call_frame_t *frame, xlator_t *this) + return 0; + } + +-int +-shard_unlink_base_file_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, +- struct iatt *preparent, struct iatt *postparent, +- dict_t *xdata) +-{ +- int ret = 0; +- uint32_t link_count = 0; +- shard_local_t *local = NULL; +- shard_priv_t *priv = NULL; +- +- local = frame->local; +- priv = this->private; +- +- if (op_ret < 0) { +- SHARD_STACK_UNWIND (unlink, frame, op_ret, op_errno, NULL, NULL, +- NULL); +- return 0; +- } +- +- /* Because link() does not create links for all but the +- * base shard, unlink() must delete these shards only when the +- * link count is 1. We can return safely now. +- */ +- if ((xdata) && (!dict_get_uint32 (xdata, GET_LINK_COUNT, &link_count)) +- && (link_count > 1)) +- goto unwind; +- +- local->first_block = get_lowest_block (0, local->block_size); +- local->last_block = get_highest_block (0, local->prebuf.ia_size, +- local->block_size); +- local->num_blocks = local->last_block - local->first_block + 1; +- local->resolver_base_inode = local->loc.inode; +- +- /* num_blocks = 1 implies that the file has not crossed its +- * shard block size. So unlink boils down to unlinking just the +- * base file. We can safely return now. +- */ +- if (local->num_blocks == 1) +- goto unwind; +- +- local->inode_list = GF_CALLOC (local->num_blocks, sizeof (inode_t *), +- gf_shard_mt_inode_list); +- if (!local->inode_list) +- goto unwind; +- +- /* Save the xdata and preparent and postparent iatts now. This will be +- * used at the time of unwinding the call to the parent xl. +- */ +- local->preoldparent = *preparent; +- local->postoldparent = *postparent; +- if (xdata) +- local->xattr_rsp = dict_ref (xdata); +- +- local->dot_shard_loc.inode = inode_find (this->itable, +- priv->dot_shard_gfid); +- if (!local->dot_shard_loc.inode) { +- ret = shard_init_internal_dir_loc (this, local, +- SHARD_INTERNAL_DIR_DOT_SHARD); +- if (ret) +- goto unwind; +- shard_lookup_internal_dir (frame, this, +- shard_post_resolve_unlink_handler, +- SHARD_INTERNAL_DIR_DOT_SHARD); +- } else { +- local->post_res_handler = shard_post_resolve_unlink_handler; +- shard_refresh_internal_dir (frame, this, +- SHARD_INTERNAL_DIR_DOT_SHARD); +- } +- +- return 0; +- +-unwind: +- SHARD_STACK_UNWIND (unlink, frame, op_ret, op_errno, preparent, +- postparent, xdata); +- return 0; +-} +- +-int +-shard_unlink_base_file (call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (dict_set_uint32 (local->xattr_req, GET_LINK_COUNT, 0)) +- gf_msg (this->name, GF_LOG_WARNING, 0, +- SHARD_MSG_DICT_SET_FAILED, "Failed to set " +- GET_LINK_COUNT" in dict"); +- +- /* To-Do: Request open-fd count on base file */ +- STACK_WIND (frame, shard_unlink_base_file_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->unlink, &local->loc, local->xflag, +- local->xattr_req); +- return 0; +-} +- + void + shard_unlink_block_inode (shard_local_t *local, int shard_block_num) + { +@@ -3062,160 +3057,754 @@ next: + } + + int +-shard_post_lookup_unlink_handler (call_frame_t *frame, xlator_t *this) ++shard_unlock_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, dict_t *xdata) + { +- shard_local_t *local = NULL; ++ if (op_ret) ++ gf_msg (this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, ++ "Unlock failed. Please check brick logs for " ++ "more details"); ++ SHARD_STACK_DESTROY (frame); ++ return 0; ++} + +- local = frame->local; ++int ++shard_unlock_inodelk (call_frame_t *frame, xlator_t *this) ++{ ++ loc_t *loc = NULL; ++ call_frame_t *lk_frame = NULL; ++ shard_local_t *local = NULL; ++ shard_local_t *lk_local = NULL; ++ shard_inodelk_t *lock = NULL; + +- if (local->op_ret < 0) { +- SHARD_STACK_UNWIND (unlink, frame, local->op_ret, +- local->op_errno, NULL, NULL, NULL); +- return 0; +- } ++ local = frame->local; ++ lk_frame = local->inodelk_frame; ++ lk_local = lk_frame->local; ++ local->inodelk_frame = NULL; ++ loc = &local->int_inodelk.loc; ++ lock = &lk_local->int_inodelk; ++ lock->flock.l_type = F_UNLCK; + +- shard_unlink_base_file (frame, this); ++ STACK_WIND (lk_frame, shard_unlock_inodelk_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->inodelk, lock->domain, loc, ++ F_SETLK, &lock->flock, NULL); ++ local->int_inodelk.acquired_lock = _gf_false; + return 0; + } + + int +-shard_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, +- dict_t *xdata) ++shard_rename_src_cbk (call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, struct iatt *buf, ++ struct iatt *preoldparent, struct iatt *postoldparent, ++ struct iatt *prenewparent, struct iatt *postnewparent, ++ dict_t *xdata); ++int ++shard_rename_src_base_file (call_frame_t *frame, xlator_t *this) + { +- int ret = -1; +- uint64_t block_size = 0; +- shard_local_t *local = NULL; ++ int ret = 0; ++ loc_t *dst_loc = NULL; ++ loc_t tmp_loc = {0,}; ++ shard_local_t *local = frame->local; + +- ret = shard_inode_ctx_get_block_size (loc->inode, this, &block_size); +- if ((ret) && (!IA_ISLNK(loc->inode->ia_type))) { +- gf_msg (this->name, GF_LOG_ERROR, 0, +- SHARD_MSG_INODE_CTX_GET_FAILED, "Failed to get block " +- "size from inode ctx of %s", +- uuid_utoa (loc->inode->gfid)); +- goto err; +- } ++ if (local->dst_block_size) { ++ tmp_loc.parent = inode_ref (local->loc2.parent); ++ ret = inode_path (tmp_loc.parent, local->loc2.name, ++ (char **)&tmp_loc.path); ++ if (ret < 0) { ++ gf_msg (this->name, GF_LOG_ERROR, 0, ++ SHARD_MSG_INODE_PATH_FAILED, "Inode path failed" ++ " on pargfid=%s bname=%s", ++ uuid_utoa (tmp_loc.parent->gfid), ++ local->loc2.name); ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto err; ++ } + +- if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { +- STACK_WIND (frame, default_unlink_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata); +- return 0; ++ tmp_loc.name = strrchr (tmp_loc.path, '/'); ++ if (tmp_loc.name) ++ tmp_loc.name++; ++ dst_loc = &tmp_loc; ++ } else { ++ dst_loc = &local->loc2; + } + +- local = mem_get0 (this->local_pool); +- if (!local) +- goto err; +- +- frame->local = local; +- +- loc_copy (&local->loc, loc); +- local->xflag = xflag; +- local->xattr_req = (xdata) ? dict_ref (xdata) : dict_new (); +- local->block_size = block_size; +- local->resolver_base_inode = loc->inode; +- local->fop = GF_FOP_UNLINK; +- if (!this->itable) +- this->itable = (local->loc.inode)->table; +- +- shard_lookup_base_file (frame, this, &local->loc, +- shard_post_lookup_unlink_handler); ++ /* To-Do: Request open-fd count on dst base file */ ++ STACK_WIND (frame, shard_rename_src_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->rename, &local->loc, dst_loc, ++ local->xattr_req); ++ loc_wipe (&tmp_loc); + return 0; + err: +- SHARD_STACK_UNWIND (unlink, frame, -1, ENOMEM, NULL, NULL, NULL); ++ loc_wipe (&tmp_loc); ++ shard_common_failure_unwind (local->fop, frame, local->op_ret, ++ local->op_errno); + return 0; +- + } + + int +-shard_rename_cbk (call_frame_t *frame, xlator_t *this) ++shard_unlink_base_file (call_frame_t *frame, xlator_t *this); ++ ++int ++shard_set_size_attrs_on_marker_file_cbk (call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, ++ int32_t op_errno, dict_t *dict, ++ dict_t *xdata) + { ++ shard_priv_t *priv = NULL; + shard_local_t *local = NULL; + ++ priv = this->private; + local = frame->local; ++ if (op_ret < 0) { ++ gf_msg (this->name, GF_LOG_ERROR, op_errno, ++ SHARD_MSG_FOP_FAILED, "Xattrop on marker file failed " ++ "while performing %s; entry gfid=%s", ++ gf_fop_string (local->fop), local->newloc.name); ++ goto err; ++ } + +- SHARD_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno, +- &local->prebuf, &local->preoldparent, +- &local->postoldparent, &local->prenewparent, +- &local->postnewparent, local->xattr_rsp); ++ inode_unlink (local->newloc.inode, priv->dot_shard_rm_inode, ++ local->newloc.name); ++ ++ if (local->fop == GF_FOP_UNLINK) ++ shard_unlink_base_file (frame, this); ++ else if (local->fop == GF_FOP_RENAME) ++ shard_rename_src_base_file (frame, this); ++ return 0; ++err: ++ shard_common_failure_unwind (local->fop, frame, op_ret, op_errno); + return 0; + } + + int +-shard_rename_unlink_dst_shards_do (call_frame_t *frame, xlator_t *this) ++shard_set_size_attrs_on_marker_file (call_frame_t *frame, xlator_t *this) + { +- int ret = -1; +- uint32_t link_count = 0; +- shard_local_t *local = NULL; +- shard_priv_t *priv = NULL; ++ int op_errno = ENOMEM; ++ uint64_t bs = 0; ++ dict_t *xdata = NULL; ++ shard_local_t *local = NULL; + + local = frame->local; +- priv = this->private; +- +- local->first_block = get_lowest_block (0, local->dst_block_size); +- local->last_block = get_highest_block (0, local->postbuf.ia_size, +- local->dst_block_size); +- local->num_blocks = local->last_block - local->first_block + 1; +- local->resolver_base_inode = local->loc2.inode; ++ xdata = dict_new (); ++ if (!xdata) ++ goto err; + +- if ((local->xattr_rsp) && +- (!dict_get_uint32 (local->xattr_rsp, GET_LINK_COUNT, &link_count)) +- && (link_count > 1)) { +- shard_rename_cbk (frame, this); +- return 0; ++ if (local->fop == GF_FOP_UNLINK) ++ bs = local->block_size; ++ else if (local->fop == GF_FOP_RENAME) ++ bs = local->dst_block_size; ++ SHARD_INODE_CREATE_INIT (this, bs, xdata, &local->newloc, ++ local->prebuf.ia_size, 0, err); ++ STACK_WIND (frame, shard_set_size_attrs_on_marker_file_cbk, ++ FIRST_CHILD(this), FIRST_CHILD(this)->fops->xattrop, ++ &local->newloc, GF_XATTROP_GET_AND_SET, xdata, NULL); ++ dict_unref (xdata); ++ return 0; ++err: ++ if (xdata) ++ dict_unref (xdata); ++ shard_common_failure_unwind (local->fop, frame, -1, op_errno); ++ return 0; ++} ++ ++int ++shard_lookup_marker_file_cbk (call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, inode_t *inode, ++ struct iatt *buf, dict_t *xdata, ++ struct iatt *postparent) ++{ ++ inode_t *linked_inode = NULL; ++ shard_priv_t *priv = NULL; ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ priv = this->private; ++ ++ if (op_ret < 0) { ++ gf_msg (this->name, GF_LOG_ERROR, op_errno, ++ SHARD_MSG_FOP_FAILED, "Lookup on marker file failed " ++ "while performing %s; entry gfid=%s", ++ gf_fop_string (local->fop), local->newloc.name); ++ goto err; + } + +- if (local->num_blocks == 1) { +- shard_rename_cbk (frame, this); ++ linked_inode = inode_link (inode, priv->dot_shard_rm_inode, ++ local->newloc.name, buf); ++ inode_unref (local->newloc.inode); ++ local->newloc.inode = linked_inode; ++ shard_set_size_attrs_on_marker_file (frame, this); ++ return 0; ++err: ++ shard_common_failure_unwind (local->fop, frame, op_ret, op_errno); ++ return 0; ++} ++ ++int ++shard_lookup_marker_file (call_frame_t *frame, xlator_t *this) ++{ ++ int op_errno = ENOMEM; ++ dict_t *xattr_req = NULL; ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ ++ xattr_req = shard_create_gfid_dict (local->xattr_req); ++ if (!xattr_req) ++ goto err; ++ ++ STACK_WIND (frame, shard_lookup_marker_file_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->lookup, &local->newloc, xattr_req); ++ dict_unref (xattr_req); ++ return 0; ++err: ++ if (xattr_req) ++ dict_unref (xattr_req); ++ shard_common_failure_unwind (local->fop, frame, -1, op_errno); ++ return 0; ++} ++ ++int ++shard_create_marker_file_under_remove_me_cbk (call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, ++ int32_t op_errno, inode_t *inode, ++ struct iatt *buf, ++ struct iatt *preparent, ++ struct iatt *postparent, ++ dict_t *xdata) ++{ ++ inode_t *linked_inode = NULL; ++ shard_priv_t *priv = NULL; ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ priv = this->private; ++ ++ SHARD_UNSET_ROOT_FS_ID (frame, local); ++ if (op_ret < 0) { ++ if ((op_errno != EEXIST) && (op_errno != ENODATA)) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ gf_msg (this->name, GF_LOG_ERROR, op_errno, ++ SHARD_MSG_FOP_FAILED, "Marker file creation " ++ "failed while performing %s; entry gfid=%s", ++ gf_fop_string (local->fop), local->newloc.name); ++ goto err; ++ } else { ++ shard_lookup_marker_file (frame, this); ++ return 0; ++ } ++ } ++ ++ linked_inode = inode_link (inode, priv->dot_shard_rm_inode, ++ local->newloc.name, buf); ++ inode_unref (local->newloc.inode); ++ local->newloc.inode = linked_inode; ++ ++ if (local->fop == GF_FOP_UNLINK) ++ shard_unlink_base_file (frame, this); ++ else if (local->fop == GF_FOP_RENAME) ++ shard_rename_src_base_file (frame, this); ++ return 0; ++err: ++ shard_common_failure_unwind (local->fop, frame, -1, local->op_errno); ++ return 0; ++} ++ ++int ++shard_create_marker_file_under_remove_me (call_frame_t *frame, xlator_t *this, ++ loc_t *loc) ++{ ++ int ret = 0; ++ int op_errno = ENOMEM; ++ uint64_t bs = 0; ++ char g1[64] = {0,}; ++ char g2[64] = {0,}; ++ dict_t *xattr_req = NULL; ++ shard_priv_t *priv = NULL; ++ shard_local_t *local = NULL; ++ ++ priv = this->private; ++ local = frame->local; ++ ++ SHARD_SET_ROOT_FS_ID (frame, local); ++ ++ xattr_req = shard_create_gfid_dict (local->xattr_req); ++ if (!xattr_req) ++ goto err; ++ ++ local->newloc.inode = inode_new (this->itable); ++ local->newloc.parent = inode_ref (priv->dot_shard_rm_inode); ++ ret = inode_path (local->newloc.parent, uuid_utoa (loc->inode->gfid), ++ (char **)&local->newloc.path); ++ if (ret < 0) { ++ gf_msg (this->name, GF_LOG_ERROR, 0, ++ SHARD_MSG_INODE_PATH_FAILED, "Inode path failed on " ++ "pargfid=%s bname=%s", ++ uuid_utoa_r (priv->dot_shard_rm_gfid, g1), ++ uuid_utoa_r (loc->inode->gfid, g2)); ++ goto err; ++ } ++ local->newloc.name = strrchr (local->newloc.path, '/'); ++ if (local->newloc.name) ++ local->newloc.name++; ++ ++ if (local->fop == GF_FOP_UNLINK) ++ bs = local->block_size; ++ else if (local->fop == GF_FOP_RENAME) ++ bs = local->dst_block_size; ++ ++ SHARD_INODE_CREATE_INIT (this, bs, xattr_req, &local->newloc, ++ local->prebuf.ia_size, 0, err); ++ ++ STACK_WIND (frame, shard_create_marker_file_under_remove_me_cbk, ++ FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod, ++ &local->newloc, 0, 0, 0644, xattr_req); ++ dict_unref (xattr_req); ++ return 0; ++ ++err: ++ if (xattr_req) ++ dict_unref (xattr_req); ++ shard_create_marker_file_under_remove_me_cbk (frame, 0, this, -1, ++ op_errno, NULL, NULL, ++ NULL, NULL, NULL); ++ return 0; ++} ++ ++int ++shard_unlock_entrylk (call_frame_t *frame, xlator_t *this); ++ ++int ++shard_unlink_base_file_cbk (call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, ++ struct iatt *preparent, struct iatt *postparent, ++ dict_t *xdata) ++{ ++ int ret = 0; ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ ++ if (op_ret < 0) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ } else { ++ local->preoldparent = *preparent; ++ local->postoldparent = *postparent; ++ if (xdata) ++ local->xattr_rsp = dict_ref (xdata); ++ } ++ if (local->entrylk_frame) { ++ ret = shard_unlock_entrylk (frame, this); ++ if (ret < 0) { ++ local->op_ret = -1; ++ local->op_errno = -ret; ++ } ++ } ++ ++ ret = shard_unlock_inodelk (frame, this); ++ if (ret < 0) { ++ local->op_ret = -1; ++ local->op_errno = -ret; ++ } ++ shard_unlink_cbk (frame, this); ++ return 0; ++} ++ ++int ++shard_unlink_base_file (call_frame_t *frame, xlator_t *this) ++{ ++ shard_local_t *local = frame->local; ++ ++ /* To-Do: Request open-fd count on base file */ ++ STACK_WIND (frame, shard_unlink_base_file_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->unlink, &local->loc, local->xflag, ++ local->xattr_req); ++ return 0; ++} ++ ++int ++shard_unlock_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, dict_t *xdata) ++{ ++ if (op_ret) ++ gf_msg (this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, ++ "Unlock failed. Please check brick logs for " ++ "more details"); ++ SHARD_STACK_DESTROY (frame); ++ return 0; ++} ++ ++int ++shard_unlock_entrylk (call_frame_t *frame, xlator_t *this) ++{ ++ loc_t *loc = NULL; ++ call_frame_t *lk_frame = NULL; ++ shard_local_t *local = NULL; ++ shard_local_t *lk_local = NULL; ++ shard_entrylk_t *lock = NULL; ++ ++ local = frame->local; ++ lk_frame = local->entrylk_frame; ++ lk_local = lk_frame->local; ++ local->entrylk_frame = NULL; ++ lock = &lk_local->int_entrylk; ++ loc = &lock->loc; ++ ++ STACK_WIND (lk_frame, shard_unlock_entrylk_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->entrylk, this->name, loc, ++ lk_local->int_entrylk.basename, ENTRYLK_UNLOCK, ++ ENTRYLK_WRLCK, NULL); ++ local->int_entrylk.acquired_lock = _gf_false; ++ return 0; ++} ++ ++int ++shard_post_entrylk_fop_handler (call_frame_t *frame, xlator_t *this) ++{ ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ ++ switch (local->fop) { ++ case GF_FOP_UNLINK: ++ case GF_FOP_RENAME: ++ shard_create_marker_file_under_remove_me (frame, this, ++ &local->int_inodelk.loc); ++ break; ++ default: ++ gf_msg (this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, ++ "post-entrylk handler not defined. This case should not" ++ " be hit"); ++ break; ++ } ++ return 0; ++} ++ ++int ++shard_acquire_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, dict_t *xdata) ++{ ++ call_frame_t *main_frame = NULL; ++ shard_local_t *local = NULL; ++ shard_local_t *main_local = NULL; ++ ++ local = frame->local; ++ main_frame = local->main_frame; ++ main_local = main_frame->local; ++ ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind (main_local->fop, main_frame, ++ op_ret, op_errno); + return 0; + } ++ main_local->int_entrylk.acquired_lock = _gf_true; ++ shard_post_entrylk_fop_handler (main_frame, this); ++ return 0; ++} + +- local->inode_list = GF_CALLOC (local->num_blocks, sizeof (inode_t *), +- gf_shard_mt_inode_list); +- if (!local->inode_list) +- goto out; ++int ++shard_acquire_entrylk (call_frame_t *frame, xlator_t *this, inode_t *inode, ++ uuid_t gfid) ++{ ++ char gfid_str[GF_UUID_BUF_SIZE] = {0,}; ++ shard_local_t *local = NULL; ++ shard_local_t *entrylk_local = NULL; ++ shard_entrylk_t *int_entrylk = NULL; ++ call_frame_t *entrylk_frame = NULL; + +- local->dot_shard_loc.inode = inode_find (this->itable, +- priv->dot_shard_gfid); +- if (!local->dot_shard_loc.inode) { +- ret = shard_init_internal_dir_loc (this, local, +- SHARD_INTERNAL_DIR_DOT_SHARD); +- if (ret) +- goto out; +- shard_lookup_internal_dir (frame, this, +- shard_post_resolve_unlink_handler, +- SHARD_INTERNAL_DIR_DOT_SHARD); ++ entrylk_frame = create_frame (this, this->ctx->pool); ++ if (!entrylk_frame) { ++ gf_msg (this->name, GF_LOG_WARNING, ENOMEM, ++ SHARD_MSG_MEMALLOC_FAILED, "Failed to create new frame " ++ "to lock marker file"); ++ goto err; ++ } ++ ++ entrylk_local = mem_get0 (this->local_pool); ++ if (!entrylk_local) { ++ STACK_DESTROY (entrylk_frame->root); ++ goto err; ++ } ++ ++ local = frame->local; ++ entrylk_frame->local = entrylk_local; ++ entrylk_local->main_frame = frame; ++ int_entrylk = &entrylk_local->int_entrylk; ++ ++ int_entrylk->loc.inode = inode_ref (inode); ++ set_lk_owner_from_ptr (&entrylk_frame->root->lk_owner, ++ entrylk_frame->root); ++ local->entrylk_frame = entrylk_frame; ++ gf_uuid_unparse (gfid, gfid_str); ++ int_entrylk->basename = gf_strdup (gfid_str); ++ ++ STACK_WIND (entrylk_frame, shard_acquire_entrylk_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->entrylk, this->name, ++ &int_entrylk->loc, int_entrylk->basename, ENTRYLK_LOCK, ++ ENTRYLK_WRLCK, NULL); ++ return 0; ++err: ++ shard_common_failure_unwind (local->fop, frame, -1, ENOMEM); ++ return 0; ++} ++ ++int ++shard_post_lookup_base_shard_rm_handler (call_frame_t *frame, xlator_t *this) ++{ ++ shard_local_t *local = NULL; ++ shard_priv_t *priv = NULL; ++ ++ priv = this->private; ++ local = frame->local; ++ ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind (local->fop, frame, -1, ++ local->op_errno); ++ return 0; ++ } ++ ++ if (local->prebuf.ia_nlink > 1) { ++ gf_msg_debug (this->name, 0, "link count on %s > 1:%d, " ++ "performing rename()/unlink()", ++ local->int_inodelk.loc.path, local->prebuf.ia_nlink); ++ if (local->fop == GF_FOP_RENAME) ++ shard_rename_src_base_file (frame, this); ++ else if (local->fop == GF_FOP_UNLINK) ++ shard_unlink_base_file (frame, this); + } else { +- local->post_res_handler = shard_post_resolve_unlink_handler; +- shard_refresh_internal_dir (frame, this, +- SHARD_INTERNAL_DIR_DOT_SHARD); ++ gf_msg_debug (this->name, 0, "link count on %s = 1, creating " ++ "file under .remove_me", local->int_inodelk.loc.path); ++ shard_acquire_entrylk (frame, this, priv->dot_shard_rm_inode, ++ local->prebuf.ia_gfid); + } ++ return 0; ++} + ++int ++shard_post_inodelk_fop_handler (call_frame_t *frame, xlator_t *this) ++{ ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ ++ switch (local->fop) { ++ case GF_FOP_UNLINK: ++ case GF_FOP_RENAME: ++ shard_lookup_base_file (frame, this, &local->int_inodelk.loc, ++ shard_post_lookup_base_shard_rm_handler); ++ break; ++ default: ++ gf_msg (this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, ++ "post-inodelk handler not defined. This case should not" ++ " be hit"); ++ break; ++ } + return 0; ++} + +-out: +- SHARD_STACK_UNWIND (rename, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, +- NULL, NULL); ++int ++shard_acquire_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, dict_t *xdata) ++{ ++ call_frame_t *main_frame = NULL; ++ shard_local_t *local = NULL; ++ shard_local_t *main_local = NULL; ++ ++ local = frame->local; ++ main_frame = local->main_frame; ++ main_local = main_frame->local; ++ ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind (main_local->fop, main_frame, ++ op_ret, op_errno); ++ return 0; ++ } ++ main_local->int_inodelk.acquired_lock = _gf_true; ++ shard_post_inodelk_fop_handler (main_frame, this); + return 0; + } + + int +-shard_post_rename_lookup_handler (call_frame_t *frame, xlator_t *this) ++shard_acquire_inodelk (call_frame_t *frame, xlator_t *this, loc_t *loc) ++{ ++ call_frame_t *lk_frame = NULL; ++ shard_local_t *local = NULL; ++ shard_local_t *lk_local = NULL; ++ shard_inodelk_t *int_inodelk = NULL; ++ ++ lk_frame = create_frame (this, this->ctx->pool); ++ if (!lk_frame) { ++ gf_msg (this->name, GF_LOG_WARNING, ENOMEM, ++ SHARD_MSG_MEMALLOC_FAILED, "Failed to create new frame " ++ "to lock base shard"); ++ goto err; ++ } ++ lk_local = mem_get0 (this->local_pool); ++ if (!lk_local) { ++ STACK_DESTROY (lk_frame->root); ++ goto err; ++ } ++ ++ local = frame->local; ++ lk_frame->local = lk_local; ++ lk_local->main_frame = frame; ++ int_inodelk = &lk_local->int_inodelk; ++ ++ int_inodelk->flock.l_len = 0; ++ int_inodelk->flock.l_start = 0; ++ int_inodelk->domain = this->name; ++ int_inodelk->flock.l_type = F_WRLCK; ++ loc_copy (&local->int_inodelk.loc, loc); ++ set_lk_owner_from_ptr (&lk_frame->root->lk_owner, lk_frame->root); ++ local->inodelk_frame = lk_frame; ++ ++ STACK_WIND (lk_frame, shard_acquire_inodelk_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->inodelk, int_inodelk->domain, ++ &local->int_inodelk.loc, F_SETLKW, &int_inodelk->flock, NULL); ++ return 0; ++err: ++ shard_common_failure_unwind (local->fop, frame, -1, ENOMEM); ++ return 0; ++} ++ ++int ++shard_post_mkdir_rm_handler (call_frame_t *frame, xlator_t *this) + { ++ loc_t *loc = NULL; + shard_local_t *local = NULL; + + local = frame->local; + + if (local->op_ret < 0) { +- SHARD_STACK_UNWIND (rename, frame, local->op_ret, +- local->op_errno, NULL, NULL, NULL, NULL, +- NULL, NULL); ++ shard_common_failure_unwind (local->fop, frame, -1, ++ local->op_errno); + return 0; + } ++ if (local->fop == GF_FOP_UNLINK) ++ loc = &local->loc; ++ else if (local->fop == GF_FOP_RENAME) ++ loc = &local->loc2; ++ shard_acquire_inodelk (frame, this, loc); ++ return 0; ++} + +- if (local->dst_block_size) +- shard_rename_unlink_dst_shards_do (frame, this); +- else +- shard_rename_cbk (frame, this); ++int ++shard_mkdir_internal_dir (call_frame_t *frame, xlator_t *this, ++ shard_post_resolve_fop_handler_t handler, ++ shard_internal_dir_type_t type); ++int ++shard_pre_mkdir_rm_handler (call_frame_t *frame, xlator_t *this) ++{ ++ shard_local_t *local = NULL; ++ ++ local = frame->local; + ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind (local->fop, frame, -1, ++ local->op_errno); ++ return 0; ++ } ++ shard_mkdir_internal_dir (frame, this, shard_post_mkdir_rm_handler, ++ SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME); ++ return 0; ++} ++ ++void ++shard_begin_rm_resolution (call_frame_t *frame, xlator_t *this) ++{ ++ shard_priv_t *priv = NULL; ++ shard_local_t *local = NULL; ++ ++ priv = this->private; ++ local = frame->local; ++ ++ local->dot_shard_rm_loc.inode = inode_find (this->itable, ++ priv->dot_shard_rm_gfid); ++ if (!local->dot_shard_rm_loc.inode) { ++ local->dot_shard_loc.inode = inode_find (this->itable, ++ priv->dot_shard_gfid); ++ if (!local->dot_shard_loc.inode) { ++ shard_mkdir_internal_dir (frame, this, ++ shard_pre_mkdir_rm_handler, ++ SHARD_INTERNAL_DIR_DOT_SHARD); ++ } else { ++ local->post_res_handler = shard_pre_mkdir_rm_handler; ++ shard_refresh_internal_dir (frame, this, ++ SHARD_INTERNAL_DIR_DOT_SHARD); ++ } ++ } else { ++ local->post_res_handler = shard_post_mkdir_rm_handler; ++ shard_refresh_internal_dir (frame, this, ++ SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME); ++ } ++} ++ ++int ++shard_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, ++ dict_t *xdata) ++{ ++ int ret = -1; ++ uint64_t block_size = 0; ++ shard_local_t *local = NULL; ++ ++ ret = shard_inode_ctx_get_block_size (loc->inode, this, &block_size); ++ if ((ret) && (!IA_ISLNK(loc->inode->ia_type))) { ++ gf_msg (this->name, GF_LOG_ERROR, 0, ++ SHARD_MSG_INODE_CTX_GET_FAILED, "Failed to get block " ++ "size from inode ctx of %s", ++ uuid_utoa (loc->inode->gfid)); ++ goto err; ++ } ++ ++ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { ++ STACK_WIND (frame, default_unlink_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata); ++ return 0; ++ } ++ ++ local = mem_get0 (this->local_pool); ++ if (!local) ++ goto err; ++ ++ frame->local = local; ++ ++ loc_copy (&local->loc, loc); ++ local->xflag = xflag; ++ local->xattr_req = (xdata) ? dict_ref (xdata) : dict_new (); ++ local->block_size = block_size; ++ local->resolver_base_inode = loc->inode; ++ local->fop = GF_FOP_UNLINK; ++ if (!this->itable) ++ this->itable = (local->loc.inode)->table; ++ ++ local->resolve_not = _gf_true; ++ shard_begin_rm_resolution (frame, this); ++ return 0; ++err: ++ shard_common_failure_unwind (GF_FOP_UNLINK, frame, -1, ENOMEM); ++ return 0; ++} ++ ++int ++shard_rename_cbk (call_frame_t *frame, xlator_t *this) ++{ ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ ++ SHARD_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno, ++ &local->prebuf, &local->preoldparent, ++ &local->postoldparent, &local->prenewparent, ++ &local->postnewparent, local->xattr_rsp); ++ return 0; ++} ++ ++int ++shard_post_rename_lookup_handler (call_frame_t *frame, xlator_t *this) ++{ ++ shard_rename_cbk (frame, this); + return 0; + } + +@@ -3226,6 +3815,7 @@ shard_rename_src_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + struct iatt *prenewparent, struct iatt *postnewparent, + dict_t *xdata) + { ++ int ret = 0; + shard_local_t *local = NULL; + + local = frame->local; +@@ -3235,6 +3825,11 @@ shard_rename_src_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + local->op_errno = op_errno; + goto err; + } ++ /* Set ctx->refresh to TRUE to force a lookup on disk when ++ * shard_lookup_base_file() is called next to refresh the hard link ++ * count in ctx ++ */ ++ shard_inode_ctx_set_refresh_flag (local->int_inodelk.loc.inode, this); + + local->prebuf = *buf; + local->preoldparent = *preoldparent; +@@ -3244,40 +3839,37 @@ shard_rename_src_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + if (xdata) + local->xattr_rsp = dict_ref (xdata); + +- /* Now the base file is looked up to gather the ia_size and ia_blocks.*/ ++ if (local->dst_block_size) { ++ if (local->entrylk_frame) { ++ ret = shard_unlock_entrylk (frame, this); ++ if (ret < 0) { ++ local->op_ret = -1; ++ local->op_errno = -ret; ++ } ++ } + ++ ret = shard_unlock_inodelk (frame, this); ++ if (ret < 0) { ++ local->op_ret = -1; ++ local->op_errno = -ret; ++ goto err; ++ } ++ } ++ ++ /* Now the base file of src, if sharded, is looked up to gather ia_size ++ * and ia_blocks.*/ + if (local->block_size) { + local->tmp_loc.inode = inode_new (this->itable); + gf_uuid_copy (local->tmp_loc.gfid, (local->loc.inode)->gfid); + shard_lookup_base_file (frame, this, &local->tmp_loc, + shard_post_rename_lookup_handler); + } else { +- shard_rename_unlink_dst_shards_do (frame, this); ++ shard_rename_cbk (frame, this); + } +- + return 0; + err: +- SHARD_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno, NULL, +- NULL, NULL, NULL, NULL, NULL); +- return 0; +-} +- +-int +-shard_rename_src_base_file (call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (dict_set_uint32 (local->xattr_req, GET_LINK_COUNT, 0)) +- gf_msg (this->name, GF_LOG_WARNING, 0, +- SHARD_MSG_DICT_SET_FAILED, "Failed to set " +- GET_LINK_COUNT" in dict"); +- +- /* To-Do: Request open-fd count on dst base file */ +- STACK_WIND (frame, shard_rename_src_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->rename, &local->loc, &local->loc2, +- local->xattr_req); ++ shard_common_failure_unwind (local->fop, frame, local->op_ret, ++ local->op_errno); + return 0; + } + +@@ -3289,9 +3881,8 @@ shard_post_lookup_dst_base_file_handler (call_frame_t *frame, xlator_t *this) + local = frame->local; + + if (local->op_ret < 0) { +- SHARD_STACK_UNWIND (rename, frame, local->op_ret, +- local->op_errno, NULL, NULL, NULL, NULL, +- NULL, NULL); ++ shard_common_failure_unwind (local->fop, frame, local->op_ret, ++ local->op_errno); + return 0; + } + +@@ -3332,6 +3923,7 @@ shard_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + if (newloc->inode) + ret = shard_inode_ctx_get_block_size (newloc->inode, this, + &dst_block_size); ++ + /* The following stack_wind covers the case where: + * a. the src file is not sharded and dst doesn't exist, OR + * b. the src and dst both exist but are not sharded. +@@ -3361,26 +3953,26 @@ shard_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + local->dst_block_size = dst_block_size; + if (!this->itable) + this->itable = (local->loc.inode)->table; ++ local->resolve_not = _gf_true; + +- if (local->dst_block_size) +- /* The if block covers the case where the dst file exists and is +- * sharded. So it is important to look up this inode, record its +- * size, before renaming src to dst, so as to NOT lose this +- * information. +- */ +- shard_lookup_base_file (frame, this, &local->loc2, +- shard_post_lookup_dst_base_file_handler); +- else +- /* The following block covers the case where the dst either +- * doesn't exist or is NOT sharded. In this case, shard xlator +- * would go ahead and rename src to dst. +- */ ++ /* The following if-block covers the case where the dst file exists ++ * and is sharded. ++ */ ++ if (local->dst_block_size) { ++ shard_begin_rm_resolution (frame, this); ++ } else { ++ /* The following block covers the case where the dst either doesn't ++ * exist or is NOT sharded but the src is sharded. In this case, shard ++ * xlator would go ahead and rename src to dst. Once done, it would also ++ * lookup the base shard of src to get the ia_size and ia_blocks xattr ++ * values. ++ */ + shard_rename_src_base_file (frame, this); ++ } + return 0; + + err: +- SHARD_STACK_UNWIND (rename, frame, -1, ENOMEM, NULL, NULL, NULL, +- NULL, NULL, NULL); ++ shard_common_failure_unwind (GF_FOP_RENAME, frame, -1, ENOMEM); + return 0; + + } +@@ -3400,8 +3992,8 @@ shard_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + if (op_ret == -1) + goto unwind; + +- ret = shard_inode_ctx_set (inode, this, stbuf, +- ntoh64 (local->block_size), SHARD_ALL_MASK); ++ ret = shard_inode_ctx_set (inode, this, stbuf, local->block_size, ++ SHARD_ALL_MASK); + if (ret) + gf_msg (this->name, GF_LOG_WARNING, 0, + SHARD_MSG_INODE_CTX_SET_FAILED, "Failed to set inode " +@@ -3417,28 +4009,29 @@ int + shard_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) + { ++ shard_priv_t *priv = NULL; + shard_local_t *local = NULL; + ++ priv = this->private; + local = mem_get0 (this->local_pool); + if (!local) + goto err; + + frame->local = local; ++ local->block_size = priv->block_size; + + if (!__is_gsyncd_on_shard_dir (frame, loc)) { +- SHARD_INODE_CREATE_INIT (this, local, xdata, loc, err); ++ SHARD_INODE_CREATE_INIT (this, local->block_size, xdata, loc, 0, ++ 0, err); + } + + STACK_WIND (frame, shard_create_cbk, FIRST_CHILD (this), + FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, + fd, xdata); + return 0; +- + err: +- SHARD_STACK_UNWIND (create, frame, -1, ENOMEM, NULL, NULL, NULL, +- NULL, NULL, NULL); ++ shard_common_failure_unwind (GF_FOP_CREATE, frame, -1, ENOMEM); + return 0; +- + } + + int +@@ -3523,9 +4116,9 @@ out: + if (call_count == 0) { + SHARD_UNSET_ROOT_FS_ID (frame, local); + if (local->op_ret < 0) { +- SHARD_STACK_UNWIND (readv, frame, local->op_ret, +- local->op_errno, NULL, 0, NULL, +- NULL, NULL); ++ shard_common_failure_unwind (GF_FOP_READ, frame, ++ local->op_ret, ++ local->op_errno); + } else { + if (xdata) + local->xattr_rsp = dict_ref (xdata); +@@ -3792,8 +4385,8 @@ shard_post_lookup_shards_readv_handler (call_frame_t *frame, xlator_t *this) + local = frame->local; + + if (local->op_ret < 0) { +- SHARD_STACK_UNWIND (readv, frame, local->op_ret, +- local->op_errno, NULL, 0, NULL, NULL, NULL); ++ shard_common_failure_unwind (GF_FOP_READ, frame, local->op_ret, ++ local->op_errno); + return 0; + } + +@@ -3815,8 +4408,8 @@ shard_post_mknod_readv_handler (call_frame_t *frame, xlator_t *this) + local = frame->local; + + if (local->op_ret < 0) { +- SHARD_STACK_UNWIND (readv, frame, local->op_ret, +- local->op_errno, NULL, 0, NULL, NULL, NULL); ++ shard_common_failure_unwind (GF_FOP_READ, frame, local->op_ret, ++ local->op_errno); + return 0; + } + +@@ -3839,9 +4432,9 @@ shard_post_resolve_readv_handler (call_frame_t *frame, xlator_t *this) + + if (local->op_ret < 0) { + if (local->op_errno != ENOENT) { +- SHARD_STACK_UNWIND (readv, frame, local->op_ret, +- local->op_errno, NULL, 0, NULL, +- NULL, NULL); ++ shard_common_failure_unwind (GF_FOP_READ, frame, ++ local->op_ret, ++ local->op_errno); + return 0; + } else { + struct iovec vec = {0,}; +@@ -3878,8 +4471,8 @@ shard_post_lookup_readv_handler (call_frame_t *frame, xlator_t *this) + local = frame->local; + + if (local->op_ret < 0) { +- SHARD_STACK_UNWIND (readv, frame, local->op_ret, +- local->op_errno, NULL, 0, NULL, NULL, NULL); ++ shard_common_failure_unwind (GF_FOP_READ, frame, local->op_ret, ++ local->op_errno); + return 0; + } + +@@ -3955,10 +4548,8 @@ shard_post_lookup_readv_handler (call_frame_t *frame, xlator_t *this) + SHARD_INTERNAL_DIR_DOT_SHARD); + } + return 0; +- + err: +- SHARD_STACK_UNWIND (readv, frame, -1, ENOMEM, NULL, 0, NULL, NULL, +- NULL); ++ shard_common_failure_unwind (GF_FOP_READ, frame, -1, ENOMEM); + return 0; + } + +@@ -4018,8 +4609,7 @@ shard_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + shard_post_lookup_readv_handler); + return 0; + err: +- SHARD_STACK_UNWIND (readv, frame, -1, ENOMEM, NULL, 0, NULL, NULL, +- NULL); ++ shard_common_failure_unwind (GF_FOP_READ, frame, -1, ENOMEM); + return 0; + } + +@@ -4032,9 +4622,8 @@ shard_common_inode_write_post_update_size_handler (call_frame_t *frame, + local = frame->local; + + if (local->op_ret < 0) { +- shard_common_inode_write_failure_unwind (local->fop, frame, +- local->op_ret, +- local->op_errno); ++ shard_common_failure_unwind (local->fop, frame, local->op_ret, ++ local->op_errno); + } else { + shard_common_inode_write_success_unwind (local->fop, frame, + local->written_size); +@@ -4139,9 +4728,8 @@ shard_common_inode_write_do_cbk (call_frame_t *frame, void *cookie, + if (call_count == 0) { + SHARD_UNSET_ROOT_FS_ID (frame, local); + if (local->op_ret < 0) { +- shard_common_inode_write_failure_unwind (fop, frame, +- local->op_ret, +- local->op_errno); ++ shard_common_failure_unwind (fop, frame, local->op_ret, ++ local->op_errno); + } else { + shard_get_delta_size_from_inode_ctx (local, + local->fd->inode, +@@ -4343,9 +4931,8 @@ shard_common_inode_write_post_lookup_shards_handler (call_frame_t *frame, + local = frame->local; + + if (local->op_ret < 0) { +- shard_common_inode_write_failure_unwind (local->fop, frame, +- local->op_ret, +- local->op_errno); ++ shard_common_failure_unwind (local->fop, frame, local->op_ret, ++ local->op_errno); + return 0; + } + +@@ -4368,9 +4955,8 @@ shard_common_inode_write_post_mknod_handler (call_frame_t *frame, + local = frame->local; + + if (local->op_ret < 0) { +- shard_common_inode_write_failure_unwind (local->fop, frame, +- local->op_ret, +- local->op_errno); ++ shard_common_failure_unwind (local->fop, frame, local->op_ret, ++ local->op_errno); + return 0; + } + +@@ -4386,10 +4972,6 @@ shard_common_inode_write_post_mknod_handler (call_frame_t *frame, + } + + int +-shard_mkdir_internal_dir (call_frame_t *frame, xlator_t *this, +- shard_post_resolve_fop_handler_t handler, +- shard_internal_dir_type_t type); +-int + shard_common_inode_write_post_resolve_handler (call_frame_t *frame, + xlator_t *this) + { +@@ -4398,9 +4980,8 @@ shard_common_inode_write_post_resolve_handler (call_frame_t *frame, + local = frame->local; + + if (local->op_ret < 0) { +- shard_common_inode_write_failure_unwind (local->fop, frame, +- local->op_ret, +- local->op_errno); ++ shard_common_failure_unwind (local->fop, frame, local->op_ret, ++ local->op_errno); + return 0; + } + +@@ -4423,9 +5004,8 @@ shard_common_inode_write_post_lookup_handler (call_frame_t *frame, + shard_priv_t *priv = this->private; + + if (local->op_ret < 0) { +- shard_common_inode_write_failure_unwind (local->fop, frame, +- local->op_ret, +- local->op_errno); ++ shard_common_failure_unwind (local->fop, frame, local->op_ret, ++ local->op_errno); + return 0; + } + +@@ -4443,8 +5023,7 @@ shard_common_inode_write_post_lookup_handler (call_frame_t *frame, + local->inode_list = GF_CALLOC (local->num_blocks, sizeof (inode_t *), + gf_shard_mt_inode_list); + if (!local->inode_list) { +- shard_common_inode_write_failure_unwind (local->fop, frame, +- -1, ENOMEM); ++ shard_common_failure_unwind (local->fop, frame, -1, ENOMEM); + return 0; + } + +@@ -4508,7 +5087,7 @@ shard_mkdir_internal_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + if (link_inode != inode) { + shard_refresh_internal_dir (frame, this, type); + } else { +- shard_inode_ctx_set_refreshed_flag (link_inode, this); ++ shard_inode_ctx_mark_dir_refreshed (link_inode, this); + shard_common_resolve_shards (frame, this, + local->post_res_handler); + } +@@ -4544,6 +5123,10 @@ shard_mkdir_internal_dir (call_frame_t *frame, xlator_t *this, + gf_uuid_copy (*gfid, priv->dot_shard_gfid); + loc = &local->dot_shard_loc; + break; ++ case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: ++ gf_uuid_copy (*gfid, priv->dot_shard_rm_gfid); ++ loc = &local->dot_shard_rm_loc; ++ break; + default: + break; + } +@@ -4702,8 +5285,8 @@ out: + return 0; + + if (local->op_ret < 0) { +- SHARD_STACK_UNWIND (fsync, frame, local->op_ret, +- local->op_errno, NULL, NULL, NULL); ++ shard_common_failure_unwind (GF_FOP_FSYNC, frame, local->op_ret, ++ local->op_errno); + } else { + shard_get_timestamps_from_inode_ctx (local, base_inode, this); + SHARD_STACK_UNWIND (fsync, frame, local->op_ret, +@@ -4733,8 +5316,8 @@ shard_post_lookup_fsync_handler (call_frame_t *frame, xlator_t *this) + INIT_LIST_HEAD (©); + + if (local->op_ret < 0) { +- SHARD_STACK_UNWIND (fsync, frame, local->op_ret, +- local->op_errno, NULL, NULL, NULL); ++ shard_common_failure_unwind (GF_FOP_FSYNC, frame, local->op_ret, ++ local->op_errno); + return 0; + } + +@@ -4847,7 +5430,7 @@ shard_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, + shard_post_lookup_fsync_handler); + return 0; + err: +- SHARD_STACK_UNWIND (fsync, frame, -1, ENOMEM, NULL, NULL, NULL); ++ shard_common_failure_unwind (GF_FOP_FSYNC, frame, -1, ENOMEM); + return 0; + } + +@@ -5069,9 +5652,8 @@ shard_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc, + FIRST_CHILD(this)->fops->removexattr, loc, name, + xdata); + return 0; +- + out: +- SHARD_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL); ++ shard_common_failure_unwind (GF_FOP_REMOVEXATTR, frame, -1, op_errno); + return 0; + } + +@@ -5095,9 +5677,8 @@ shard_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd, + FIRST_CHILD(this)->fops->fremovexattr, fd, name, + xdata); + return 0; +- + out: +- SHARD_STACK_UNWIND (fremovexattr, frame, -1, op_errno, NULL); ++ shard_common_failure_unwind (GF_FOP_FREMOVEXATTR, frame, -1, op_errno); + return 0; + } + +@@ -5135,9 +5716,8 @@ shard_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, + STACK_WIND (frame, shard_fgetxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata); + return 0; +- + out: +- SHARD_STACK_UNWIND (fgetxattr, frame, -1, op_errno, NULL, NULL); ++ shard_common_failure_unwind (GF_FOP_FGETXATTR, frame, -1, op_errno); + return 0; + } + +@@ -5176,9 +5756,8 @@ shard_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, + STACK_WIND (frame, shard_getxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->getxattr, loc, name, xdata); + return 0; +- + out: +- SHARD_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL, NULL); ++ shard_common_failure_unwind (GF_FOP_GETXATTR, frame, -1, op_errno); + return 0; + } + +@@ -5197,9 +5776,8 @@ shard_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, + FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, + xdata); + return 0; +- + out: +- SHARD_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL); ++ shard_common_failure_unwind (GF_FOP_FSETXATTR, frame, -1, op_errno); + return 0; + } + +@@ -5218,9 +5796,8 @@ shard_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, + xdata); + return 0; +- + out: +- SHARD_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL); ++ shard_common_failure_unwind (GF_FOP_SETXATTR, frame, -1, op_errno); + return 0; + } + +@@ -5335,11 +5912,9 @@ shard_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, + STACK_WIND (frame, shard_common_setattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, + local->xattr_req); +- + return 0; +- + err: +- SHARD_STACK_UNWIND (setattr, frame, -1, ENOMEM, NULL, NULL, NULL); ++ shard_common_failure_unwind (GF_FOP_SETATTR, frame, -1, ENOMEM); + return 0; + } + +@@ -5398,9 +5973,8 @@ shard_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, + FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, + local->xattr_req); + return 0; +- + err: +- SHARD_STACK_UNWIND (fsetattr, frame, -1, ENOMEM, NULL, NULL, NULL); ++ shard_common_failure_unwind (GF_FOP_FSETATTR, frame, -1, ENOMEM); + return 0; + } + +@@ -5502,7 +6076,7 @@ shard_common_inode_write_begin (call_frame_t *frame, xlator_t *this, + shard_common_inode_write_post_lookup_handler); + return 0; + out: +- shard_common_inode_write_failure_unwind (fop, frame, -1, ENOMEM); ++ shard_common_failure_unwind (fop, frame, -1, ENOMEM); + return 0; + } + +@@ -5527,9 +6101,8 @@ shard_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd, + shard_common_inode_write_begin (frame, this, GF_FOP_FALLOCATE, fd, NULL, + 0, offset, keep_size, len, NULL, xdata); + return 0; +- + out: +- SHARD_STACK_UNWIND (fallocate, frame, -1, ENOTSUP, NULL, NULL, NULL); ++ shard_common_failure_unwind (GF_FOP_FALLOCATE, frame, -1, ENOTSUP); + return 0; + } + +@@ -5558,7 +6131,7 @@ shard_seek (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + /* TBD */ + gf_msg (this->name, GF_LOG_INFO, ENOTSUP, SHARD_MSG_FOP_NOT_SUPPORTED, + "seek called on %s.", uuid_utoa (fd->inode->gfid)); +- SHARD_STACK_UNWIND (seek, frame, -1, ENOTSUP, 0, NULL); ++ shard_common_failure_unwind (GF_FOP_SEEK, frame, -1, ENOTSUP); + return 0; + } + +@@ -5619,6 +6192,7 @@ init (xlator_t *this) + goto out; + } + gf_uuid_parse (SHARD_ROOT_GFID, priv->dot_shard_gfid); ++ gf_uuid_parse (DOT_SHARD_REMOVE_ME_GFID, priv->dot_shard_rm_gfid); + + this->private = priv; + LOCK_INIT (&priv->lock); +diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h +index 225caa0..1783ff6 100644 +--- a/xlators/features/shard/src/shard.h ++++ b/xlators/features/shard/src/shard.h +@@ -18,6 +18,7 @@ + #include "syncop.h" + + #define GF_SHARD_DIR ".shard" ++#define GF_SHARD_REMOVE_ME_DIR ".remove_me" + #define SHARD_MIN_BLOCK_SIZE (4 * GF_UNIT_MB) + #define SHARD_MAX_BLOCK_SIZE (4 * GF_UNIT_TB) + #define SHARD_XATTR_PREFIX "trusted.glusterfs.shard." +@@ -55,6 +56,12 @@ + #define get_highest_block(off, len, shard_size) \ + (((((off)+(len)) == 0)?0:((off)+(len)-1)) / (shard_size)) + ++int ++shard_unlock_inodelk (call_frame_t *frame, xlator_t *this); ++ ++int ++shard_unlock_entrylk (call_frame_t *frame, xlator_t *this); ++ + #define SHARD_ENTRY_FOP_CHECK(loc, op_errno, label) do { \ + if ((loc->name && !strcmp (GF_SHARD_DIR, loc->name)) && \ + (((loc->parent) && \ +@@ -79,39 +86,57 @@ + } \ + } while (0) + +-#define SHARD_STACK_UNWIND(fop, frame, params ...) do { \ +- shard_local_t *__local = NULL; \ +- if (frame) { \ +- __local = frame->local; \ +- frame->local = NULL; \ +- } \ +- STACK_UNWIND_STRICT (fop, frame, params); \ +- if (__local) { \ +- shard_local_wipe (__local); \ +- mem_put (__local); \ +- } \ ++#define SHARD_STACK_UNWIND(fop, frame, params ...) do { \ ++ shard_local_t *__local = NULL; \ ++ if (frame) { \ ++ __local = frame->local; \ ++ if (__local && __local->int_inodelk.acquired_lock) \ ++ shard_unlock_inodelk (frame, frame->this); \ ++ if (__local && __local->int_entrylk.acquired_lock) \ ++ shard_unlock_entrylk (frame, frame->this); \ ++ frame->local = NULL; \ ++ } \ ++ STACK_UNWIND_STRICT (fop, frame, params); \ ++ if (__local) { \ ++ shard_local_wipe (__local); \ ++ mem_put (__local); \ ++ } \ + } while (0) + ++#define SHARD_STACK_DESTROY(frame) \ ++ do { \ ++ shard_local_t *__local = NULL; \ ++ __local = frame->local; \ ++ frame->local = NULL; \ ++ STACK_DESTROY (frame->root); \ ++ if (__local) { \ ++ shard_local_wipe (__local); \ ++ mem_put (__local); \ ++ } \ ++ } while (0); ++ + +-#define SHARD_INODE_CREATE_INIT(this, local, xattr_req, loc, label) do { \ ++#define SHARD_INODE_CREATE_INIT(this, block_size, xattr_req, loc, size, \ ++ block_count, label) do { \ + int __ret = -1; \ + int64_t *__size_attr = NULL; \ +- shard_priv_t *__priv = NULL; \ ++ uint64_t *__bs = 0; \ + \ +- __priv = this->private; \ +- \ +- local->block_size = hton64 (__priv->block_size); \ +- __ret = dict_set_static_bin (xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, \ +- &local->block_size, \ +- sizeof (local->block_size)); \ ++ __bs = GF_CALLOC (1, sizeof (uint64_t), gf_shard_mt_uint64_t); \ ++ if (!__bs) \ ++ goto label; \ ++ *__bs = hton64 (block_size); \ ++ __ret = dict_set_bin (xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, __bs, \ ++ sizeof (*__bs)); \ + if (__ret) { \ + gf_msg (this->name, GF_LOG_WARNING, 0, \ + SHARD_MSG_DICT_SET_FAILED, "Failed to set key: %s " \ +- "on path %s", GF_XATTR_SHARD_BLOCK_SIZE, loc->path); \ ++ "on path %s", GF_XATTR_SHARD_BLOCK_SIZE, (loc)->path);\ ++ GF_FREE (__bs); \ + goto label; \ + } \ + \ +- __ret = shard_set_size_attrs (0, 0, &__size_attr); \ ++ __ret = shard_set_size_attrs (size, block_count, &__size_attr); \ + if (__ret) \ + goto label; \ + \ +@@ -120,7 +145,7 @@ + if (__ret) { \ + gf_msg (this->name, GF_LOG_WARNING, 0, \ + SHARD_MSG_DICT_SET_FAILED, "Failed to set key: %s " \ +- "on path %s", GF_XATTR_SHARD_FILE_SIZE, loc->path); \ ++ "on path %s", GF_XATTR_SHARD_FILE_SIZE, (loc)->path); \ + GF_FREE (__size_attr); \ + goto label; \ + } \ +@@ -172,21 +197,34 @@ + } \ + } while (0) + ++/* rm = "remove me" */ + + typedef struct shard_priv { + uint64_t block_size; + uuid_t dot_shard_gfid; ++ uuid_t dot_shard_rm_gfid; + inode_t *dot_shard_inode; ++ inode_t *dot_shard_rm_inode; + gf_lock_t lock; + int inode_count; + struct list_head ilist_head; + } shard_priv_t; + + typedef struct { +- loc_t *loc; +- short type; ++ loc_t loc; + char *domain; +-} shard_lock_t; ++ struct gf_flock flock; ++ gf_boolean_t acquired_lock; ++} shard_inodelk_t; ++ ++typedef struct { ++ loc_t loc; ++ char *domain; ++ char *basename; ++ entrylk_cmd cmd; ++ entrylk_type type; ++ gf_boolean_t acquired_lock; ++} shard_entrylk_t; + + typedef int32_t (*shard_post_fop_handler_t) (call_frame_t *frame, + xlator_t *this); +@@ -200,6 +238,7 @@ typedef int32_t (*shard_post_mknod_fop_handler_t) (call_frame_t *frame, + + typedef int32_t (*shard_post_update_size_fop_handler_t) (call_frame_t *frame, + xlator_t *this); ++ + typedef struct shard_local { + int op_ret; + int op_errno; +@@ -227,6 +266,7 @@ typedef struct shard_local { + int delta_blocks; + loc_t loc; + loc_t dot_shard_loc; ++ loc_t dot_shard_rm_loc; + loc_t loc2; + loc_t tmp_loc; + fd_t *fd; +@@ -251,16 +291,18 @@ typedef struct shard_local { + shard_post_resolve_fop_handler_t post_res_handler; + shard_post_mknod_fop_handler_t post_mknod_handler; + shard_post_update_size_fop_handler_t post_update_size_handler; +- struct { +- int lock_count; +- fop_inodelk_cbk_t inodelk_cbk; +- shard_lock_t *shard_lock; +- } lock; ++ shard_inodelk_t int_inodelk; ++ shard_entrylk_t int_entrylk; + inode_t *resolver_base_inode; + gf_boolean_t first_lookup_done; + syncbarrier_t barrier; + gf_boolean_t lookup_shards_barriered; + gf_boolean_t unlink_shards_barriered; ++ gf_boolean_t resolve_not; ++ loc_t newloc; ++ call_frame_t *main_frame; ++ call_frame_t *inodelk_frame; ++ call_frame_t *entrylk_frame; + } shard_local_t; + + typedef struct shard_inode_ctx { +@@ -284,6 +326,7 @@ typedef struct shard_inode_ctx { + + typedef enum { + SHARD_INTERNAL_DIR_DOT_SHARD = 1, ++ SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME, + } shard_internal_dir_type_t; + + #endif /* __SHARD_H__ */ +-- +1.8.3.1 + diff --git a/SOURCES/0430-features-shard-Perform-shards-deletion-in-the-backgr.patch b/SOURCES/0430-features-shard-Perform-shards-deletion-in-the-backgr.patch new file mode 100644 index 0000000..cddeddf --- /dev/null +++ b/SOURCES/0430-features-shard-Perform-shards-deletion-in-the-backgr.patch @@ -0,0 +1,1790 @@ +From 93ef66173442aaf4aeaeb161c6d6108eda54014a Mon Sep 17 00:00:00 2001 +From: Krutika Dhananjay +Date: Thu, 12 Apr 2018 15:47:00 +0530 +Subject: [PATCH 430/444] features/shard: Perform shards deletion in the + background + +> Upstream: https://review.gluster.org/19970 +> BUG: 1568521 +> Change-Id: Ia83117230c9dd7d0d9cae05235644f8475e97bc3 + +A synctask is created that would scan the indices from +.shard/.remove_me, to delete the shards associated with the +gfid corresponding to the index bname and the rate of deletion +is controlled by the option features.shard-deletion-rate whose +default value is 100. +The task is launched on two accounts: +1. when shard receives its first-ever lookup on the volume +2. when a rename or unlink deleted an inode + +Change-Id: Ia83117230c9dd7d0d9cae05235644f8475e97bc3 +BUG: 1520882 +Signed-off-by: Krutika Dhananjay +Reviewed-on: https://code.engineering.redhat.com/gerrit/154864 +Tested-by: RHGS Build Bot +Reviewed-by: Xavi Hernandez +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/globals.h | 1 + + tests/bugs/shard/bug-1568521-EEXIST.t | 30 +- + tests/bugs/shard/bug-1568521.t | 53 ++ + tests/bugs/shard/bug-shard-discard.t | 19 +- + tests/bugs/shard/shard-inode-refcount-test.t | 5 +- + tests/bugs/shard/unlinks-and-renames.t | 123 ++-- + xlators/features/shard/src/shard-messages.h | 18 +- + xlators/features/shard/src/shard.c | 816 +++++++++++++++++++----- + xlators/features/shard/src/shard.h | 19 +- + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 5 + + 10 files changed, 829 insertions(+), 260 deletions(-) + create mode 100644 tests/bugs/shard/bug-1568521.t + +diff --git a/libglusterfs/src/globals.h b/libglusterfs/src/globals.h +index 8e218cb..699e73e 100644 +--- a/libglusterfs/src/globals.h ++++ b/libglusterfs/src/globals.h +@@ -109,6 +109,7 @@ + + #define GD_OP_VERSION_3_13_2 31302 /* Op-version for GlusterFS 3.13.2 */ + ++#define GD_OP_VERSION_4_2_0 40200 /* Op-version for GlusterFs 4.2.0 */ + + /* Downstream only change */ + #define GD_OP_VERSION_3_11_2 31102 /* Op-version for RHGS 3.3.1-async */ +diff --git a/tests/bugs/shard/bug-1568521-EEXIST.t b/tests/bugs/shard/bug-1568521-EEXIST.t +index e4c3d41..7de400d 100644 +--- a/tests/bugs/shard/bug-1568521-EEXIST.t ++++ b/tests/bugs/shard/bug-1568521-EEXIST.t +@@ -5,6 +5,12 @@ + + cleanup + ++function get_file_count { ++ ls $1* | wc -l ++} ++ ++FILE_COUNT_TIME=5 ++ + TEST glusterd + TEST pidof glusterd + TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} +@@ -41,10 +47,14 @@ TEST setfattr -n trusted.glusterfs.shard.file-size -v 0x000000000050000000000000 + sleep 2 + + TEST unlink $M0/dir/file +-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_file +-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_file +-EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_file +-EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_file ++ ++TEST ! stat $B0/${V0}0/dir/file ++TEST ! stat $B0/${V0}1/dir/file ++ ++EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_file ++EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_file ++EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/$gfid_file ++EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/$gfid_file + + ############################## + ### Repeat test for rename ### +@@ -71,9 +81,13 @@ TEST setfattr -n trusted.glusterfs.shard.file-size -v 0x000000000050000000000000 + sleep 2 + + TEST mv -f $M0/src $M0/dir/dst +-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_dst +-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_dst +-EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_dst +-EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_dst ++ ++TEST ! stat $B0/${V0}0/src ++TEST ! stat $B0/${V0}1/src ++ ++EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_dst ++EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_dst ++EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/$gfid_dst ++EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/$gfid_dst + + cleanup +diff --git a/tests/bugs/shard/bug-1568521.t b/tests/bugs/shard/bug-1568521.t +new file mode 100644 +index 0000000..167fb63 +--- /dev/null ++++ b/tests/bugs/shard/bug-1568521.t +@@ -0,0 +1,53 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++ ++ ++function delete_files { ++ local mountpoint=$1; ++ local success=0; ++ local value=$2 ++ for i in {1..500}; do ++ unlink $mountpoint/file-$i 2>/dev/null 1>/dev/null ++ if [ $? -eq 0 ]; then ++ echo $2 >> $B0/output.txt ++ fi ++ done ++ echo $success ++} ++ ++cleanup ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} ++TEST $CLI volume set $V0 features.shard on ++TEST $CLI volume set $V0 shard-block-size 4MB ++TEST $CLI volume start $V0 ++ ++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0 ++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M1 ++ ++for i in {1..500}; do ++ dd if=/dev/urandom of=$M0/file-$i bs=1M count=2 ++done ++ ++for i in {1..500}; do ++ stat $M1/file-$i > /dev/null ++done ++ ++delete_files $M0 0 & ++delete_files $M1 1 & ++wait ++ ++success1=$(grep 0 $B0/output.txt | wc -l); ++success2=$(grep 1 $B0/output.txt | wc -l); ++ ++echo "Success1 is $success1"; ++echo "Success2 is $success2"; ++ ++success_total=$((success1 + success2)); ++ ++EXPECT 500 echo $success_total ++ ++cleanup +diff --git a/tests/bugs/shard/bug-shard-discard.t b/tests/bugs/shard/bug-shard-discard.t +index 884d9e7..910ade1 100644 +--- a/tests/bugs/shard/bug-shard-discard.t ++++ b/tests/bugs/shard/bug-shard-discard.t +@@ -5,6 +5,12 @@ + + cleanup + ++FILE_COUNT_TIME=5 ++ ++function get_shard_count { ++ ls $1/$2.* | wc -l ++} ++ + TEST glusterd + TEST pidof glusterd + TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0..3} +@@ -42,14 +48,11 @@ EXPECT_NOT "1" file_all_zeroes `find $B0 -name $gfid_foo.1` + + # Now unlink the file. And ensure that all shards associated with the file are cleaned up + TEST unlink $M0/foo +-#TEST ! stat $B0/${V0}0/.shard/$gfid_foo.1 +-#TEST ! stat $B0/${V0}1/.shard/$gfid_foo.1 +-#TEST ! stat $B0/${V0}2/.shard/$gfid_foo.1 +-#TEST ! stat $B0/${V0}3/.shard/$gfid_foo.1 +-#TEST ! stat $B0/${V0}0/.shard/$gfid_foo.2 +-#TEST ! stat $B0/${V0}1/.shard/$gfid_foo.2 +-#TEST ! stat $B0/${V0}2/.shard/$gfid_foo.2 +-#TEST ! stat $B0/${V0}3/.shard/$gfid_foo.2 ++ ++EXPECT_WITHIN $FILE_COUNT_TIME 0 get_shard_count $B0/${V0}0/.shard $gfid_foo ++EXPECT_WITHIN $FILE_COUNT_TIME 0 get_shard_count $B0/${V0}1/.shard $gfid_foo ++EXPECT_WITHIN $FILE_COUNT_TIME 0 get_shard_count $B0/${V0}2/.shard $gfid_foo ++EXPECT_WITHIN $FILE_COUNT_TIME 0 get_shard_count $B0/${V0}3/.shard $gfid_foo + TEST ! stat $M0/foo + + #clean up everything +diff --git a/tests/bugs/shard/shard-inode-refcount-test.t b/tests/bugs/shard/shard-inode-refcount-test.t +index c92dc07..087c8ba 100644 +--- a/tests/bugs/shard/shard-inode-refcount-test.t ++++ b/tests/bugs/shard/shard-inode-refcount-test.t +@@ -5,6 +5,8 @@ + + cleanup + ++SHARD_COUNT_TIME=5 ++ + TEST glusterd + TEST pidof glusterd + TEST $CLI volume create $V0 $H0:$B0/${V0}0 +@@ -18,7 +20,8 @@ TEST dd if=/dev/zero conv=fsync of=$M0/one-plus-five-shards bs=1M count=23 + + ACTIVE_INODES_BEFORE=$(get_mount_active_size_value $V0) + TEST rm -f $M0/one-plus-five-shards +-#EXPECT `expr $ACTIVE_INODES_BEFORE - 4` get_mount_active_size_value $V0 ++# Expect 5 inodes less. But one inode more than before because .remove_me would be created. ++EXPECT_WITHIN $SHARD_COUNT_TIME `expr $ACTIVE_INODES_BEFORE - 5 + 1` get_mount_active_size_value $V0 + + EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 + TEST $CLI volume stop $V0 +diff --git a/tests/bugs/shard/unlinks-and-renames.t b/tests/bugs/shard/unlinks-and-renames.t +index 997c397..6e5164f 100644 +--- a/tests/bugs/shard/unlinks-and-renames.t ++++ b/tests/bugs/shard/unlinks-and-renames.t +@@ -9,6 +9,12 @@ cleanup + # and rename fops in sharding and make sure they work fine. + # + ++FILE_COUNT_TIME=5 ++ ++function get_file_count { ++ ls $1* | wc -l ++} ++ + ################################################# + ################### UNLINK ###################### + ################################################# +@@ -36,13 +42,8 @@ gfid_foo=$(get_gfid_string $M0/dir/foo) + TEST unlink $M0/dir/foo + TEST stat $B0/${V0}0/.shard/.remove_me + TEST stat $B0/${V0}1/.shard/.remove_me +-TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_foo +-TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_foo +- +-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_foo +-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_foo +-EXPECT "0000000000500000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_foo +-EXPECT "0000000000500000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_foo ++EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_foo ++EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_foo + + ################################################## + ##### Unlink of a sharded file without holes ##### +@@ -56,20 +57,14 @@ TEST stat $B0/${V0}1/.shard/$gfid_new.1 + TEST stat $B0/${V0}0/.shard/$gfid_new.2 + TEST stat $B0/${V0}1/.shard/$gfid_new.2 + TEST unlink $M0/dir/new +-#TEST ! stat $B0/${V0}0/.shard/$gfid_new.1 +-#TEST ! stat $B0/${V0}1/.shard/$gfid_new.1 +-#TEST ! stat $B0/${V0}0/.shard/$gfid_new.2 +-#TEST ! stat $B0/${V0}1/.shard/$gfid_new.2 ++EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/$gfid_new ++EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/$gfid_new + TEST ! stat $M0/dir/new + TEST ! stat $B0/${V0}0/dir/new + TEST ! stat $B0/${V0}1/dir/new +-TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_new +-TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_new ++EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_new ++EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_new + +-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_new +-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_new +-EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_new +-EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_new + ####################################### + ##### Unlink with /.shard present ##### + ####################################### +@@ -83,13 +78,8 @@ TEST unlink $M0/dir/foo + TEST ! stat $B0/${V0}0/dir/foo + TEST ! stat $B0/${V0}1/dir/foo + TEST ! stat $M0/dir/foo +-TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_foo +-TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_foo +- +-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_foo +-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_foo +-EXPECT "0000000000500000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_foo +-EXPECT "0000000000500000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_foo ++EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_foo ++EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_foo + + ############################################################# + ##### Unlink of a file with only one block (the zeroth) ##### +@@ -102,13 +92,9 @@ TEST unlink $M0/dir/foo + TEST ! stat $B0/${V0}0/dir/foo + TEST ! stat $B0/${V0}1/dir/foo + TEST ! stat $M0/dir/foo +-TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_foo +-TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_foo ++EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_foo ++EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_foo + +-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_foo +-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_foo +-EXPECT "0000000000100000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_foo +-EXPECT "0000000000100000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_foo + #################################################### + ##### Unlink of a sharded file with hard-links ##### + #################################################### +@@ -137,22 +123,15 @@ TEST stat $B0/${V0}0/link + TEST stat $B0/${V0}1/link + # Now delete the last link. + TEST unlink $M0/link +-TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_original +-TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_original ++EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_original ++EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_original + # Ensure that the shards are all cleaned up. +-#TEST ! stat $B0/${V0}0/.shard/$gfid_original.1 +-#TEST ! stat $B0/${V0}1/.shard/$gfid_original.1 +-#TEST ! stat $B0/${V0}0/.shard/$gfid_original.2 +-#TEST ! stat $B0/${V0}1/.shard/$gfid_original.2 +-#TEST ! stat $M0/link ++EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/$gfid_original ++EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/$gfid_original ++TEST ! stat $M0/link + TEST ! stat $B0/${V0}0/link + TEST ! stat $B0/${V0}1/link + +-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_original +-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_original +-EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_original +-EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_original +- + EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 + TEST $CLI volume stop $V0 + TEST $CLI volume delete $V0 +@@ -190,13 +169,8 @@ TEST ! stat $B0/${V0}0/dir/src + TEST ! stat $B0/${V0}1/dir/src + TEST stat $B0/${V0}0/dir/dst + TEST stat $B0/${V0}1/dir/dst +-TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_dst +-TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_dst +- +-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_dst +-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_dst +-EXPECT "0000000000500000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_dst +-EXPECT "0000000000500000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_dst ++EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_dst ++EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_dst + + ################################################## + ##### Rename to a sharded file without holes ##### +@@ -212,23 +186,16 @@ TEST stat $B0/${V0}1/.shard/$gfid_dst.1 + TEST stat $B0/${V0}0/.shard/$gfid_dst.2 + TEST stat $B0/${V0}1/.shard/$gfid_dst.2 + TEST mv -f $M0/dir/src $M0/dir/dst +-#TEST ! stat $B0/${V0}0/.shard/$gfid_dst.1 +-#TEST ! stat $B0/${V0}1/.shard/$gfid_dst.1 +-#TEST ! stat $B0/${V0}0/.shard/$gfid_dst.2 +-#TEST ! stat $B0/${V0}1/.shard/$gfid_dst.2 ++EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/$gfid_dst ++EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/$gfid_dst + TEST ! stat $M0/dir/src + TEST stat $M0/dir/dst + TEST ! stat $B0/${V0}0/dir/src + TEST ! stat $B0/${V0}1/dir/src + TEST stat $B0/${V0}0/dir/dst + TEST stat $B0/${V0}1/dir/dst +-TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_dst +-TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_dst +- +-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_dst +-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_dst +-EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_dst +-EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_dst ++EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_dst ++EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_dst + + ################################################### + ##### Rename of dst file with /.shard present ##### +@@ -245,13 +212,8 @@ TEST ! stat $B0/${V0}0/dir/src + TEST ! stat $B0/${V0}1/dir/src + TEST stat $B0/${V0}0/dir/dst + TEST stat $B0/${V0}1/dir/dst +-TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_dst +-TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_dst +- +-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_dst +-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_dst +-EXPECT "0000000000500000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_dst +-EXPECT "0000000000500000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_dst ++EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_dst ++EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_dst + + ############################################################### + ##### Rename of dst file with only one block (the zeroth) ##### +@@ -268,13 +230,8 @@ TEST ! stat $B0/${V0}0/dir/src + TEST ! stat $B0/${V0}1/dir/src + TEST stat $B0/${V0}0/dir/dst + TEST stat $B0/${V0}1/dir/dst +-TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_dst +-TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_dst +- +-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_dst +-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_dst +-EXPECT "0000000000100000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_dst +-EXPECT "0000000000100000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_dst ++EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_dst ++EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_dst + + ######################################################## + ##### Rename to a dst sharded file with hard-links ##### +@@ -307,20 +264,18 @@ TEST ! stat $B0/${V0}1/.shard/.remove_me/$gfid_dst + TEST touch $M0/dir/src2 + TEST mv -f $M0/dir/src2 $M0/link + # Ensure that the shards are all cleaned up. +-#TEST ! stat $B0/${V0}0/.shard/$gfid_dst.1 +-#TEST ! stat $B0/${V0}1/.shard/$gfid_dst.1 +-#TEST ! stat $B0/${V0}0/.shard/$gfid_dst.2 +-#TEST ! stat $B0/${V0}1/.shard/$gfid_dst.2 ++EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/$gfid_dst ++EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/$gfid_dst ++TEST ! stat $B0/${V0}0/.shard/$gfid_dst.1 ++TEST ! stat $B0/${V0}1/.shard/$gfid_dst.1 ++TEST ! stat $B0/${V0}0/.shard/$gfid_dst.2 ++TEST ! stat $B0/${V0}1/.shard/$gfid_dst.2 + TEST ! stat $M0/dir/src2 + TEST ! stat $B0/${V0}0/dir/src2 + TEST ! stat $B0/${V0}1/dir/src2 +-TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_dst +-TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_dst ++EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_dst ++EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_dst + +-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_dst +-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_dst +-EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_dst +-EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_dst + # Rename with non-existent dst and a sharded src + TEST touch $M0/dir/src + TEST dd if=/dev/zero of=$M0/dir/src bs=1024 count=9216 +diff --git a/xlators/features/shard/src/shard-messages.h b/xlators/features/shard/src/shard-messages.h +index 0267f8a..bc04e5e 100644 +--- a/xlators/features/shard/src/shard-messages.h ++++ b/xlators/features/shard/src/shard-messages.h +@@ -40,7 +40,7 @@ + */ + + #define GLFS_COMP_BASE_SHARD GLFS_MSGID_COMP_SHARD +-#define GLFS_NUM_MESSAGES 20 ++#define GLFS_NUM_MESSAGES 22 + #define GLFS_MSGID_END (GLFS_COMP_BASE_SHARD + GLFS_NUM_MESSAGES + 1) + + #define glfs_msg_start_x GLFS_COMP_BASE_SHARD, "Invalid: Start of messages" +@@ -58,7 +58,7 @@ + * @diagnosis + * @recommendedaction + */ +-#define SHARD_MSG_DICT_SET_FAILED (GLFS_COMP_BASE_SHARD + 2) ++#define SHARD_MSG_DICT_OP_FAILED (GLFS_COMP_BASE_SHARD + 2) + + + /*! +@@ -194,5 +194,19 @@ + */ + #define SHARD_MSG_FOP_FAILED (GLFS_COMP_BASE_SHARD + 20) + ++/*! ++ * @messageid 133021 ++ * @diagnosis ++ * @recommendedaction ++*/ ++#define SHARD_MSG_SHARDS_DELETION_FAILED (GLFS_COMP_BASE_SHARD + 21) ++ ++/*! ++ * @messageid 133022 ++ * @diagnosis ++ * @recommendedaction ++*/ ++#define SHARD_MSG_SHARDS_DELETION_COMPLETED (GLFS_COMP_BASE_SHARD + 22) ++ + #define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages" + #endif /* !_SHARD_MESSAGES_H_ */ +diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c +index 492341c..2faf711 100644 +--- a/xlators/features/shard/src/shard.c ++++ b/xlators/features/shard/src/shard.c +@@ -677,7 +677,8 @@ __shard_update_shards_inode_list (inode_t *linked_inode, xlator_t *this, + * keep it alive by holding a ref on it. + */ + inode_ref (linked_inode); +- gf_uuid_copy (ctx->base_gfid, base_inode->gfid); ++ if (base_inode) ++ gf_uuid_copy (ctx->base_gfid, base_inode->gfid); + ctx->block_num = block_num; + list_add_tail (&ctx->ilist, &priv->ilist_head); + priv->inode_count++; +@@ -738,7 +739,8 @@ __shard_update_shards_inode_list (inode_t *linked_inode, xlator_t *this, + * keep it alive by holding a ref on it. + */ + inode_ref (linked_inode); +- gf_uuid_copy (ctx->base_gfid, base_inode->gfid); ++ if (base_inode) ++ gf_uuid_copy (ctx->base_gfid, base_inode->gfid); + ctx->block_num = block_num; + ctx->base_inode = base_inode; + list_add_tail (&ctx->ilist, &priv->ilist_head); +@@ -977,6 +979,7 @@ shard_common_resolve_shards (call_frame_t *frame, xlator_t *this, + int i = -1; + uint32_t shard_idx_iter = 0; + char path[PATH_MAX] = {0,}; ++ uuid_t gfid = {0,}; + inode_t *inode = NULL; + inode_t *res_inode = NULL; + inode_t *fsync_inode = NULL; +@@ -988,6 +991,10 @@ shard_common_resolve_shards (call_frame_t *frame, xlator_t *this, + local->call_count = 0; + shard_idx_iter = local->first_block; + res_inode = local->resolver_base_inode; ++ if (res_inode) ++ gf_uuid_copy (gfid, res_inode->gfid); ++ else ++ gf_uuid_copy (gfid, local->base_gfid); + + if ((local->op_ret < 0) || (local->resolve_not)) + goto out; +@@ -1000,7 +1007,7 @@ shard_common_resolve_shards (call_frame_t *frame, xlator_t *this, + continue; + } + +- shard_make_block_abspath (shard_idx_iter, res_inode->gfid, path, ++ shard_make_block_abspath (shard_idx_iter, gfid, path, + sizeof(path)); + + inode = NULL; +@@ -1147,7 +1154,7 @@ shard_update_file_size (call_frame_t *frame, xlator_t *this, fd_t *fd, + ret = dict_set_bin (xattr_req, GF_XATTR_SHARD_FILE_SIZE, size_attr, + 8 * 4); + if (ret) { +- gf_msg (this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_SET_FAILED, ++ gf_msg (this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, + "Failed to set key %s into dict. gfid=%s", + GF_XATTR_SHARD_FILE_SIZE, uuid_utoa (inode->gfid)); + GF_FREE (size_attr); +@@ -1376,7 +1383,7 @@ shard_lookup_internal_dir (call_frame_t *frame, xlator_t *this, + + ret = dict_set_bin (xattr_req, "gfid-req", *gfid, 16); + if (ret) { +- gf_msg (this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_SET_FAILED, ++ gf_msg (this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, + "Failed to set gfid of %s into dict", + shard_internal_dir_string (type)); + local->op_ret = -1; +@@ -1431,10 +1438,49 @@ shard_inode_ctx_update (inode_t *inode, xlator_t *this, dict_t *xdata, + } + + int ++shard_delete_shards (void *opaque); ++ ++int ++shard_delete_shards_cbk (int ret, call_frame_t *frame, void *data); ++ ++int ++shard_start_background_deletion (xlator_t *this) ++{ ++ int ret = 0; ++ call_frame_t *cleanup_frame = NULL; ++ ++ cleanup_frame = create_frame (this, this->ctx->pool); ++ if (!cleanup_frame) { ++ gf_msg (this->name, GF_LOG_WARNING, ENOMEM, ++ SHARD_MSG_MEMALLOC_FAILED, "Failed to create " ++ "new frame to delete shards"); ++ return -ENOMEM; ++ } ++ ++ ret = synctask_new (this->ctx->env, shard_delete_shards, ++ shard_delete_shards_cbk, cleanup_frame, ++ cleanup_frame); ++ if (ret < 0) { ++ gf_msg (this->name, GF_LOG_WARNING, errno, ++ SHARD_MSG_SHARDS_DELETION_FAILED, ++ "failed to create task to do background " ++ "cleanup of shards"); ++ STACK_DESTROY (cleanup_frame->root); ++ } ++ return ret; ++} ++ ++int + shard_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, struct iatt *postparent) + { ++ int ret = 0; ++ shard_priv_t *priv = NULL; ++ gf_boolean_t i_start_cleanup = _gf_false; ++ ++ priv = this->private; ++ + if (op_ret < 0) + goto unwind; + +@@ -1460,6 +1506,25 @@ shard_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + + (void) shard_inode_ctx_update (inode, this, xdata, buf); + ++ LOCK (&priv->lock); ++ { ++ if (priv->first_lookup == SHARD_FIRST_LOOKUP_PENDING) { ++ priv->first_lookup = SHARD_FIRST_LOOKUP_IN_PROGRESS; ++ i_start_cleanup = _gf_true; ++ } ++ } ++ UNLOCK (&priv->lock); ++ ++ if (i_start_cleanup) { ++ ret = shard_start_background_deletion (this); ++ if (ret) { ++ LOCK (&priv->lock); ++ { ++ priv->first_lookup = SHARD_FIRST_LOOKUP_PENDING; ++ } ++ UNLOCK (&priv->lock); ++ } ++ } + unwind: + SHARD_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, buf, + xdata, postparent); +@@ -1475,6 +1540,7 @@ shard_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, + uint64_t block_size = 0; + shard_local_t *local = NULL; + ++ this->itable = loc->inode->table; + if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { + SHARD_ENTRY_FOP_CHECK (loc, op_errno, err); + } +@@ -1496,7 +1562,7 @@ shard_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, + GF_XATTR_SHARD_BLOCK_SIZE, 0); + if (ret) { + gf_msg (this->name, GF_LOG_WARNING, 0, +- SHARD_MSG_DICT_SET_FAILED, "Failed to set dict" ++ SHARD_MSG_DICT_OP_FAILED, "Failed to set dict" + " value: key:%s for path %s", + GF_XATTR_SHARD_BLOCK_SIZE, loc->path); + goto err; +@@ -1508,7 +1574,7 @@ shard_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, + GF_XATTR_SHARD_FILE_SIZE, 8 * 4); + if (ret) { + gf_msg (this->name, GF_LOG_WARNING, 0, +- SHARD_MSG_DICT_SET_FAILED, ++ SHARD_MSG_DICT_OP_FAILED, + "Failed to set dict value: key:%s for path %s.", + GF_XATTR_SHARD_FILE_SIZE, loc->path); + goto err; +@@ -1901,12 +1967,6 @@ shard_truncate_last_shard (call_frame_t *frame, xlator_t *this, inode_t *inode) + return 0; + } + +-int +-shard_unlink_shards_do_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, +- struct iatt *preparent, struct iatt *postparent, +- dict_t *xdata); +- + void + shard_unlink_block_inode (shard_local_t *local, int shard_block_num); + +@@ -1941,17 +2001,17 @@ done: + int + shard_truncate_htol (call_frame_t *frame, xlator_t *this, inode_t *inode) + { +- int i = 1; +- int ret = -1; +- int call_count = 0; +- uint32_t cur_block = 0; +- uint32_t last_block = 0; +- char path[PATH_MAX] = {0,}; +- char *bname = NULL; +- loc_t loc = {0,}; +- gf_boolean_t wind_failed = _gf_false; +- shard_local_t *local = NULL; +- shard_priv_t *priv = NULL; ++ int i = 1; ++ int ret = -1; ++ int call_count = 0; ++ uint32_t cur_block = 0; ++ uint32_t last_block = 0; ++ char path[PATH_MAX] = {0,}; ++ char *bname = NULL; ++ loc_t loc = {0,}; ++ gf_boolean_t wind_failed = _gf_false; ++ shard_local_t *local = NULL; ++ shard_priv_t *priv = NULL; + + local = frame->local; + priv = this->private; +@@ -2086,6 +2146,7 @@ shard_link_block_inode (shard_local_t *local, int block_num, inode_t *inode, + { + int list_index = 0; + char block_bname[256] = {0,}; ++ uuid_t gfid = {0,}; + inode_t *linked_inode = NULL; + xlator_t *this = NULL; + inode_t *fsync_inode = NULL; +@@ -2093,9 +2154,12 @@ shard_link_block_inode (shard_local_t *local, int block_num, inode_t *inode, + + this = THIS; + priv = this->private; ++ if (local->loc.inode) ++ gf_uuid_copy (gfid, local->loc.inode->gfid); ++ else ++ gf_uuid_copy (gfid, local->base_gfid); + +- shard_make_block_bname (block_num, (local->loc.inode)->gfid, +- block_bname, sizeof (block_bname)); ++ shard_make_block_bname (block_num, gfid, block_bname, sizeof (block_bname)); + + shard_inode_ctx_set (inode, this, buf, 0, SHARD_LOOKUP_MASK); + linked_inode = inode_link (inode, priv->dot_shard_inode, block_bname, +@@ -2125,9 +2189,14 @@ shard_common_lookup_shards_cbk (call_frame_t *frame, void *cookie, + { + int call_count = 0; + int shard_block_num = (long) cookie; ++ uuid_t gfid = {0,}; + shard_local_t *local = NULL; + + local = frame->local; ++ if (local->resolver_base_inode) ++ gf_uuid_copy (gfid, local->resolver_base_inode->gfid); ++ else ++ gf_uuid_copy (gfid, local->base_gfid); + + if (op_ret < 0) { + /* Ignore absence of shards in the backend in truncate fop. */ +@@ -2162,9 +2231,7 @@ shard_common_lookup_shards_cbk (call_frame_t *frame, void *cookie, + gf_msg (this->name, GF_LOG_ERROR, op_errno, + SHARD_MSG_LOOKUP_SHARD_FAILED, "Lookup on shard %d " + "failed. Base file gfid = %s", shard_block_num, +- (local->fop == GF_FOP_RENAME) ? +- uuid_utoa (local->loc2.inode->gfid) +- : uuid_utoa (local->loc.inode->gfid)); ++ uuid_utoa (gfid)); + local->op_ret = op_ret; + local->op_errno = op_errno; + goto done; +@@ -2173,25 +2240,18 @@ shard_common_lookup_shards_cbk (call_frame_t *frame, void *cookie, + shard_link_block_inode (local, shard_block_num, inode, buf); + + done: +- call_count = shard_call_count_return (frame); + if (local->lookup_shards_barriered) { + syncbarrier_wake (&local->barrier); + return 0; + } else { ++ call_count = shard_call_count_return (frame); + if (call_count == 0) { + if (!local->first_lookup_done) + local->first_lookup_done = _gf_true; +- if (local->op_ret < 0) +- goto unwind; +- else +- local->pls_fop_handler (frame, this); ++ local->pls_fop_handler (frame, this); + } + } + return 0; +- +-unwind: +- local->pls_fop_handler (frame, this); +- return 0; + } + + dict_t* +@@ -2237,6 +2297,7 @@ shard_common_lookup_shards (call_frame_t *frame, xlator_t *this, inode_t *inode, + int last_block = 0; + char path[PATH_MAX] = {0,}; + char *bname = NULL; ++ uuid_t gfid = {0,}; + loc_t loc = {0,}; + shard_local_t *local = NULL; + shard_priv_t *priv = NULL; +@@ -2252,6 +2313,11 @@ shard_common_lookup_shards (call_frame_t *frame, xlator_t *this, inode_t *inode, + if (local->lookup_shards_barriered) + local->barrier.waitfor = local->call_count; + ++ if (inode) ++ gf_uuid_copy (gfid, inode->gfid); ++ else ++ gf_uuid_copy (gfid, local->base_gfid); ++ + while (shard_idx_iter <= last_block) { + if (local->inode_list[i]) { + i++; +@@ -2267,7 +2333,7 @@ shard_common_lookup_shards (call_frame_t *frame, xlator_t *this, inode_t *inode, + goto next; + } + +- shard_make_block_abspath (shard_idx_iter, inode->gfid, path, ++ shard_make_block_abspath (shard_idx_iter, gfid, path, + sizeof(path)); + + bname = strrchr (path, '/') + 1; +@@ -2279,7 +2345,7 @@ shard_common_lookup_shards (call_frame_t *frame, xlator_t *this, inode_t *inode, + gf_msg (this->name, GF_LOG_ERROR, 0, + SHARD_MSG_INODE_PATH_FAILED, "Inode path failed" + " on %s, base file gfid = %s", bname, +- uuid_utoa (inode->gfid)); ++ uuid_utoa (gfid)); + local->op_ret = -1; + local->op_errno = ENOMEM; + loc_wipe (&loc); +@@ -2322,8 +2388,10 @@ next: + if (!--call_count) + break; + } +- if (local->lookup_shards_barriered) ++ if (local->lookup_shards_barriered) { + syncbarrier_wait (&local->barrier, count); ++ local->pls_fop_handler (frame, this); ++ } + return 0; + } + +@@ -2779,8 +2847,9 @@ shard_post_lookup_shards_unlink_handler (call_frame_t *frame, xlator_t *this) + local = frame->local; + + if ((local->op_ret < 0) && (local->op_errno != ENOENT)) { +- shard_common_failure_unwind (local->fop, frame, local->op_ret, +- local->op_errno); ++ gf_msg (this->name, GF_LOG_ERROR, local->op_errno, ++ SHARD_MSG_FOP_FAILED, "failed to delete shards of %s", ++ uuid_utoa (local->resolver_base_inode->gfid)); + return 0; + } + local->op_ret = 0; +@@ -2791,41 +2860,12 @@ shard_post_lookup_shards_unlink_handler (call_frame_t *frame, xlator_t *this) + } + + int +-shard_rename_cbk (call_frame_t *frame, xlator_t *this); +- +-int32_t +-shard_unlink_cbk (call_frame_t *frame, xlator_t *this); +- +-int + shard_post_resolve_unlink_handler (call_frame_t *frame, xlator_t *this) + { + shard_local_t *local = NULL; + + local = frame->local; +- +- if (local->op_ret < 0) { +- if (local->op_errno == ENOENT) { +- /* If lookup on /.shard fails with ENOENT, it probably +- * means that the file is being unlinked before it +- * could grow beyond its first block. In this case, +- * unlink boils down to unlinking the base file and +- * unwinding the call. +- */ +- local->op_ret = 0; +- local->first_block = local->last_block = 0; +- local->num_blocks = 1; +- if (local->fop == GF_FOP_UNLINK) +- shard_unlink_cbk (frame, this); +- else +- shard_rename_cbk (frame, this); +- return 0; +- } else { +- shard_common_failure_unwind (local->fop, frame, +- local->op_ret, +- local->op_errno); +- return 0; +- } +- } ++ local->lookup_shards_barriered = _gf_true; + + if (!local->call_count) + shard_unlink_shards_do (frame, this, +@@ -2841,6 +2881,7 @@ void + shard_unlink_block_inode (shard_local_t *local, int shard_block_num) + { + char block_bname[256] = {0,}; ++ uuid_t gfid = {0,}; + inode_t *inode = NULL; + inode_t *base_inode = NULL; + xlator_t *this = NULL; +@@ -2854,12 +2895,17 @@ shard_unlink_block_inode (shard_local_t *local, int shard_block_num) + + inode = local->inode_list[shard_block_num - local->first_block]; + base_inode = local->resolver_base_inode; ++ if (base_inode) ++ gf_uuid_copy (gfid, base_inode->gfid); ++ else ++ gf_uuid_copy (gfid, local->base_gfid); + +- shard_make_block_bname (shard_block_num, (local->loc.inode)->gfid, ++ shard_make_block_bname (shard_block_num, gfid, + block_bname, sizeof (block_bname)); + + LOCK(&priv->lock); +- LOCK(&base_inode->lock); ++ if (base_inode) ++ LOCK(&base_inode->lock); + LOCK(&inode->lock); + { + __shard_inode_ctx_get (inode, this, &ctx); +@@ -2870,14 +2916,18 @@ shard_unlink_block_inode (shard_local_t *local, int shard_block_num) + unlink_unref_forget = _gf_true; + } + if (ctx->fsync_needed) { +- inode_unref (base_inode); ++ if (base_inode) ++ inode_unref (base_inode); + list_del_init (&ctx->to_fsync_list); +- __shard_inode_ctx_get (base_inode, this, &base_ictx); +- base_ictx->fsync_count--; ++ if (base_inode) { ++ __shard_inode_ctx_get (base_inode, this, &base_ictx); ++ base_ictx->fsync_count--; ++ } + } + } + UNLOCK(&inode->lock); +- UNLOCK(&base_inode->lock); ++ if (base_inode) ++ UNLOCK(&base_inode->lock); + if (unlink_unref_forget) { + inode_unlink (inode, priv->dot_shard_inode, block_bname); + inode_unref (inode); +@@ -2887,7 +2937,18 @@ shard_unlink_block_inode (shard_local_t *local, int shard_block_num) + } + + int +-shard_rename_cbk (call_frame_t *frame, xlator_t *this); ++shard_rename_cbk (call_frame_t *frame, xlator_t *this) ++{ ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ ++ SHARD_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno, ++ &local->prebuf, &local->preoldparent, ++ &local->postoldparent, &local->prenewparent, ++ &local->postnewparent, local->xattr_rsp); ++ return 0; ++} + + int32_t + shard_unlink_cbk (call_frame_t *frame, xlator_t *this) +@@ -2906,7 +2967,6 @@ shard_unlink_shards_do_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) + { +- int call_count = 0; + int shard_block_num = (long) cookie; + shard_local_t *local = NULL; + +@@ -2919,22 +2979,8 @@ shard_unlink_shards_do_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + } + + shard_unlink_block_inode (local, shard_block_num); +- + done: +- call_count = shard_call_count_return (frame); +- if (local->unlink_shards_barriered) { +- syncbarrier_wake (&local->barrier); +- } else { +- +- if (call_count == 0) { +- SHARD_UNSET_ROOT_FS_ID (frame, local); +- +- if (local->fop == GF_FOP_UNLINK) +- shard_unlink_cbk (frame, this); +- else if (local->fop == GF_FOP_RENAME) +- shard_rename_cbk (frame, this); +- } +- } ++ syncbarrier_wake (&local->barrier); + return 0; + } + +@@ -2944,11 +2990,11 @@ shard_unlink_shards_do (call_frame_t *frame, xlator_t *this, inode_t *inode) + int i = 0; + int ret = -1; + int count = 0; +- int call_count = 0; +- uint32_t last_block = 0; + uint32_t cur_block = 0; ++ uint32_t cur_block_idx = 0;/*this is idx into inode_list[] array */ + char *bname = NULL; + char path[PATH_MAX] = {0,}; ++ uuid_t gfid = {0,}; + loc_t loc = {0,}; + gf_boolean_t wind_failed = _gf_false; + shard_local_t *local = NULL; +@@ -2957,16 +3003,12 @@ shard_unlink_shards_do (call_frame_t *frame, xlator_t *this, inode_t *inode) + priv = this->private; + local = frame->local; + +- /* local->num_blocks includes the base file block. This function only +- * deletes the shards under /.shard. So subtract num_blocks by 1. +- */ +- local->call_count = call_count = local->num_blocks - 1; +- last_block = local->last_block; ++ if (inode) ++ gf_uuid_copy (gfid, inode->gfid); ++ else ++ gf_uuid_copy (gfid, local->base_gfid); + +- /* Ignore the inode associated with the base file and start counting +- * from 1. +- */ +- for (i = 1; i < local->num_blocks; i++) { ++ for (i = 0; i < local->num_blocks; i++) { + if (!local->inode_list[i]) + continue; + count++; +@@ -2975,35 +3017,21 @@ shard_unlink_shards_do (call_frame_t *frame, xlator_t *this, inode_t *inode) + if (!count) { + /* callcount = 0 implies that all of the shards that need to be + * unlinked are non-existent (in other words the file is full of +- * holes). So shard xlator can simply return the fop to its +- * parent now. ++ * holes). + */ + gf_msg_debug (this->name, 0, "All shards that need to be " + "unlinked are non-existent: %s", +- uuid_utoa (inode->gfid)); +- local->num_blocks = 1; +- if (local->fop == GF_FOP_UNLINK) { +- shard_unlink_cbk (frame, this); +- } else if (local->fop == GF_FOP_RENAME) { +- gf_msg_debug (this->name, 0, "Resuming rename()"); +- shard_rename_cbk (frame, this); +- } ++ uuid_utoa (gfid)); + return 0; + } + +- local->call_count = call_count = count; +- cur_block = 1; + SHARD_SET_ROOT_FS_ID (frame, local); +- if (local->unlink_shards_barriered) +- local->barrier.waitfor = count; ++ local->barrier.waitfor = count; ++ cur_block = cur_block_idx + local->first_block; + +- /* Ignore the base file and start iterating from the first block shard. +- */ +- while (cur_block <= last_block) { +- if (!local->inode_list[cur_block]) { +- cur_block++; +- continue; +- } ++ while (cur_block_idx < local->num_blocks) { ++ if (!local->inode_list[cur_block_idx]) ++ goto next; + + if (wind_failed) { + shard_unlink_shards_do_cbk (frame, +@@ -3013,8 +3041,7 @@ shard_unlink_shards_do (call_frame_t *frame, xlator_t *this, inode_t *inode) + goto next; + } + +- shard_make_block_abspath (cur_block, inode->gfid, path, +- sizeof (path)); ++ shard_make_block_abspath (cur_block, gfid, path, sizeof (path)); + bname = strrchr (path, '/') + 1; + loc.parent = inode_ref (priv->dot_shard_inode); + ret = inode_path (loc.parent, bname, (char **) &(loc.path)); +@@ -3022,7 +3049,7 @@ shard_unlink_shards_do (call_frame_t *frame, xlator_t *this, inode_t *inode) + gf_msg (this->name, GF_LOG_ERROR, 0, + SHARD_MSG_INODE_PATH_FAILED, "Inode path failed" + " on %s, base file gfid = %s", bname, +- uuid_utoa (inode->gfid)); ++ uuid_utoa (gfid)); + local->op_ret = -1; + local->op_errno = ENOMEM; + loc_wipe (&loc); +@@ -3037,26 +3064,505 @@ shard_unlink_shards_do (call_frame_t *frame, xlator_t *this, inode_t *inode) + loc.name = strrchr (loc.path, '/'); + if (loc.name) + loc.name++; +- loc.inode = inode_ref (local->inode_list[cur_block]); ++ loc.inode = inode_ref (local->inode_list[cur_block_idx]); + + STACK_WIND_COOKIE (frame, shard_unlink_shards_do_cbk, + (void *) (long) cur_block, FIRST_CHILD(this), + FIRST_CHILD (this)->fops->unlink, &loc, + local->xflag, local->xattr_req); + loc_wipe (&loc); +- + next: + cur_block++; +- if (!--call_count) +- break; ++ cur_block_idx++; + } +- if (local->unlink_shards_barriered) +- syncbarrier_wait (&local->barrier, count); ++ syncbarrier_wait (&local->barrier, count); ++ SHARD_UNSET_ROOT_FS_ID (frame, local); ++ return 0; ++} ++ ++int ++shard_regulated_shards_deletion (call_frame_t *cleanup_frame, xlator_t *this, ++ int now, int first_block, gf_dirent_t *entry) ++{ ++ int i = 0; ++ int ret = 0; ++ shard_local_t *local = NULL; ++ uuid_t gfid = {0,}; ++ ++ local = cleanup_frame->local; ++ ++ local->inode_list = GF_CALLOC (now, sizeof (inode_t *), ++ gf_shard_mt_inode_list); ++ if (!local->inode_list) ++ return -ENOMEM; ++ ++ local->first_block = first_block; ++ local->last_block = first_block + now - 1; ++ local->num_blocks = now; ++ gf_uuid_parse (entry->d_name, gfid); ++ gf_uuid_copy (local->base_gfid, gfid); ++ local->resolver_base_inode = inode_find (this->itable, gfid); ++ local->call_count = 0; ++ syncbarrier_init (&local->barrier); ++ ++ shard_common_resolve_shards (cleanup_frame, this, ++ shard_post_resolve_unlink_handler); ++ ++ for (i = 0; i < local->num_blocks; i++) { ++ if (local->inode_list[i]) ++ inode_unref (local->inode_list[i]); ++ } ++ GF_FREE (local->inode_list); ++ local->inode_list = NULL; ++ if (local->op_ret) ++ ret = -local->op_errno; ++ syncbarrier_destroy (&local->barrier); ++ inode_unref (local->resolver_base_inode); ++ local->resolver_base_inode = NULL; ++ STACK_RESET (cleanup_frame->root); ++ return ret; ++} ++ ++ ++int ++__shard_delete_shards_of_entry (call_frame_t *cleanup_frame, xlator_t *this, ++ gf_dirent_t *entry, inode_t *inode) ++{ ++ int ret = 0; ++ int shard_count = 0; ++ int first_block = 0; ++ int now = 0; ++ uint64_t size = 0; ++ uint64_t block_size = 0; ++ uint64_t size_array[4] = {0,}; ++ void *bsize = NULL; ++ void *size_attr = NULL; ++ dict_t *xattr_rsp = NULL; ++ loc_t loc = {0,}; ++ shard_local_t *local = NULL; ++ shard_priv_t *priv = NULL; + ++ priv = this->private; ++ local = cleanup_frame->local; ++ ret = dict_reset (local->xattr_req); ++ if (ret) { ++ gf_msg (this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to reset dict"); ++ ret = -ENOMEM; ++ goto err; ++ } ++ ++ ret = dict_set_uint64 (local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0); ++ if (ret) { ++ gf_msg (this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to set dict value: key:%s", ++ GF_XATTR_SHARD_BLOCK_SIZE); ++ ret = -ENOMEM; ++ goto err; ++ } ++ ++ ret = dict_set_uint64 (local->xattr_req, GF_XATTR_SHARD_FILE_SIZE, ++ 8 * 4); ++ if (ret) { ++ gf_msg (this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to set dict value: key:%s", ++ GF_XATTR_SHARD_FILE_SIZE); ++ ret = -ENOMEM; ++ goto err; ++ } ++ ++ loc.inode = inode_ref (inode); ++ loc.parent = inode_ref (priv->dot_shard_rm_inode); ++ ret = inode_path (loc.parent, entry->d_name, (char **)&(loc.path)); ++ if (ret < 0) { ++ gf_msg (this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, ++ "Inode path failed on %s", entry->d_name); ++ ret = -ENOMEM; ++ goto err; ++ } ++ ++ loc.name = strrchr (loc.path, '/'); ++ if (loc.name) ++ loc.name++; ++ ret = syncop_lookup (FIRST_CHILD(this), &loc, NULL, NULL, ++ local->xattr_req, &xattr_rsp); ++ if (ret) ++ goto err; ++ ++ ret = dict_get_ptr (xattr_rsp, GF_XATTR_SHARD_BLOCK_SIZE, &bsize); ++ if (ret) { ++ gf_msg (this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to get dict value: key:%s", ++ GF_XATTR_SHARD_BLOCK_SIZE); ++ goto err; ++ } ++ block_size = ntoh64 (*((uint64_t *)bsize)); ++ ++ ret = dict_get_ptr (xattr_rsp, GF_XATTR_SHARD_FILE_SIZE, &size_attr); ++ if (ret) { ++ gf_msg (this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to get dict value: key:%s", ++ GF_XATTR_SHARD_FILE_SIZE); ++ goto err; ++ } ++ ++ memcpy (size_array, size_attr, sizeof (size_array)); ++ size = ntoh64 (size_array[0]); ++ ++ shard_count = (size / block_size) - 1; ++ if (shard_count < 0) { ++ gf_msg_debug (this->name, 0, "Size of %s hasn't grown beyond " ++ "its shard-block-size. Nothing to delete. " ++ "Returning", entry->d_name); ++ /* File size < shard-block-size, so nothing to delete */ ++ ret = 0; ++ goto delete_marker; ++ } ++ if ((size % block_size) > 0) ++ shard_count++; ++ ++ if (shard_count == 0) { ++ gf_msg_debug (this->name, 0, "Size of %s is exactly equal to " ++ "its shard-block-size. Nothing to delete. " ++ "Returning", entry->d_name); ++ ret = 0; ++ goto delete_marker; ++ } ++ gf_msg_debug (this->name, 0, "base file = %s, " ++ "shard-block-size=%"PRIu64", file-size=%"PRIu64", " ++ "shard_count=%d", entry->d_name, block_size, size, ++ shard_count); ++ ++ /* Perform a gfid-based lookup to see if gfid corresponding to marker ++ * file's base name exists. ++ */ ++ loc_wipe (&loc); ++ loc.inode = inode_new (this->itable); ++ if (!loc.inode) { ++ ret = -ENOMEM; ++ goto err; ++ } ++ gf_uuid_parse (entry->d_name, loc.gfid); ++ ret = syncop_lookup (FIRST_CHILD(this), &loc, NULL, NULL, NULL, NULL); ++ if (!ret) { ++ gf_msg_debug (this->name, 0, "Base shard corresponding to gfid " ++ "%s is present. Skipping shard deletion. " ++ "Returning", entry->d_name); ++ ret = 0; ++ goto delete_marker; ++ } ++ ++ first_block = 1; ++ ++ while (shard_count) { ++ if (shard_count < local->deletion_rate) { ++ now = shard_count; ++ shard_count = 0; ++ } else { ++ now = local->deletion_rate; ++ shard_count -= local->deletion_rate; ++ } ++ ++ gf_msg_debug (this->name, 0, "deleting %d shards starting from " ++ "block %d of gfid %s", now, first_block, ++ entry->d_name); ++ ret = shard_regulated_shards_deletion (cleanup_frame, this, ++ now, first_block, ++ entry); ++ if (ret) ++ goto err; ++ first_block += now; ++ } ++ ++delete_marker: ++ loc_wipe (&loc); ++ loc.inode = inode_ref (inode); ++ loc.parent = inode_ref (priv->dot_shard_rm_inode); ++ ret = inode_path (loc.parent, entry->d_name, (char **)&(loc.path)); ++ if (ret < 0) { ++ gf_msg (this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, ++ "Inode path failed on %s", entry->d_name); ++ ret = -ENOMEM; ++ goto err; ++ } ++ loc.name = strrchr (loc.path, '/'); ++ if (loc.name) ++ loc.name++; ++ ret = syncop_unlink (FIRST_CHILD(this), &loc, NULL, NULL); ++ if (ret) ++ gf_msg (this->name, GF_LOG_ERROR, 0, ++ SHARD_MSG_SHARDS_DELETION_FAILED, "Failed to delete %s " ++ "from /%s", entry->d_name, GF_SHARD_REMOVE_ME_DIR); ++err: ++ if (xattr_rsp) ++ dict_unref (xattr_rsp); ++ loc_wipe (&loc); ++ return ret; ++} ++ ++int ++shard_delete_shards_of_entry (call_frame_t *cleanup_frame, xlator_t *this, ++ gf_dirent_t *entry, inode_t *inode) ++{ ++ int ret = -1; ++ loc_t loc = {0,}; ++ shard_priv_t *priv = NULL; ++ ++ priv = this->private; ++ loc.inode = inode_ref (priv->dot_shard_rm_inode); ++ ++ ret = syncop_entrylk (FIRST_CHILD(this), this->name, &loc, ++ entry->d_name, ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL, ++ NULL); ++ if (ret) ++ goto out; ++ { ++ ret = __shard_delete_shards_of_entry (cleanup_frame, this, ++ entry, inode); ++ } ++ syncop_entrylk (FIRST_CHILD(this), this->name, &loc, entry->d_name, ++ ENTRYLK_UNLOCK, ENTRYLK_WRLCK, NULL, NULL); ++out: ++ loc_wipe (&loc); ++ return ret; ++} ++ ++int ++shard_delete_shards_cbk (int ret, call_frame_t *frame, void *data) ++{ ++ xlator_t *this = NULL; ++ shard_priv_t *priv = NULL; ++ ++ this = frame->this; ++ priv = this->private; ++ ++ if (ret < 0) { ++ gf_msg (this->name, GF_LOG_WARNING, -ret, ++ SHARD_MSG_SHARDS_DELETION_FAILED, ++ "Background deletion of shards failed"); ++ priv->first_lookup = SHARD_FIRST_LOOKUP_PENDING; ++ } else { ++ priv->first_lookup = SHARD_FIRST_LOOKUP_DONE; ++ } ++ SHARD_STACK_DESTROY (frame); + return 0; + } + + int ++shard_resolve_internal_dir (xlator_t *this, shard_local_t *local, ++ shard_internal_dir_type_t type) ++{ ++ int ret = 0; ++ char *bname = NULL; ++ loc_t *loc = NULL; ++ shard_priv_t *priv = NULL; ++ uuid_t gfid = {0,}; ++ struct iatt stbuf = {0,}; ++ ++ priv = this->private; ++ ++ switch (type) { ++ case SHARD_INTERNAL_DIR_DOT_SHARD: ++ loc = &local->dot_shard_loc; ++ gf_uuid_copy (gfid, priv->dot_shard_gfid); ++ bname = GF_SHARD_DIR; ++ break; ++ case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: ++ loc = &local->dot_shard_rm_loc; ++ gf_uuid_copy (gfid, priv->dot_shard_rm_gfid); ++ bname = GF_SHARD_REMOVE_ME_DIR; ++ break; ++ default: ++ break; ++ } ++ ++ loc->inode = inode_find (this->itable, gfid); ++ if (!loc->inode) { ++ ret = shard_init_internal_dir_loc (this, local, type); ++ if (ret) ++ goto err; ++ ret = dict_reset (local->xattr_req); ++ if (ret) { ++ gf_msg (this->name, GF_LOG_WARNING, 0, ++ SHARD_MSG_DICT_OP_FAILED, "Failed to reset " ++ "dict"); ++ ret = -ENOMEM; ++ goto err; ++ } ++ ret = dict_set_static_bin (local->xattr_req, "gfid-req", gfid, ++ 16); ++ ret = syncop_lookup (FIRST_CHILD(this), loc, &stbuf, NULL, ++ local->xattr_req, NULL); ++ if (ret < 0) { ++ if (ret != -ENOENT) ++ gf_msg (this->name, GF_LOG_ERROR, -ret, ++ SHARD_MSG_SHARDS_DELETION_FAILED, ++ "Lookup on %s failed, exiting", bname); ++ goto err; ++ } else { ++ shard_link_internal_dir_inode (local, ++ loc->inode, &stbuf, ++ type); ++ } ++ } ++ ret = 0; ++err: ++ return ret; ++} ++ ++int ++shard_lookup_marker_entry (xlator_t *this, shard_local_t *local, ++ gf_dirent_t *entry) ++{ ++ int ret = 0; ++ loc_t loc = {0,}; ++ ++ loc.inode = inode_new (this->itable); ++ if (!loc.inode) { ++ ret = -ENOMEM; ++ goto err; ++ } ++ loc.parent = inode_ref (local->fd->inode); ++ ++ ret = inode_path (loc.parent, entry->d_name, (char **)&(loc.path)); ++ if (ret < 0) { ++ gf_msg (this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, ++ "Inode path failed on %s", entry->d_name); ++ ret = -ENOMEM; ++ goto err; ++ } ++ ++ loc.name = strrchr (loc.path, '/'); ++ if (loc.name) ++ loc.name++; ++ ++ ret = syncop_lookup (FIRST_CHILD(this), &loc, NULL, NULL, NULL, NULL); ++ if (ret < 0) { ++ goto err; ++ } ++ entry->inode = inode_ref (loc.inode); ++ ret = 0; ++err: ++ loc_wipe (&loc); ++ return ret; ++} ++ ++int ++shard_delete_shards (void *opaque) ++{ ++ int ret = 0; ++ off_t offset = 0; ++ loc_t loc = {0,}; ++ inode_t *link_inode = NULL; ++ xlator_t *this = NULL; ++ shard_priv_t *priv = NULL; ++ shard_local_t *local = NULL; ++ gf_dirent_t entries; ++ gf_dirent_t *entry = NULL; ++ call_frame_t *cleanup_frame = NULL; ++ ++ this = THIS; ++ priv = this->private; ++ INIT_LIST_HEAD (&entries.list); ++ ++ cleanup_frame = opaque; ++ ++ local = mem_get0 (this->local_pool); ++ if (!local) { ++ gf_msg (this->name, GF_LOG_WARNING, ENOMEM, ++ SHARD_MSG_MEMALLOC_FAILED, "Failed to create local to " ++ "delete shards"); ++ ret = -ENOMEM; ++ goto err; ++ } ++ cleanup_frame->local = local; ++ ++ local->xattr_req = dict_new (); ++ if (!local->xattr_req) { ++ ret = -ENOMEM; ++ goto err; ++ } ++ local->deletion_rate = priv->deletion_rate; ++ ++ ret = shard_resolve_internal_dir (this, local, ++ SHARD_INTERNAL_DIR_DOT_SHARD); ++ if (ret == -ENOENT) { ++ gf_msg_debug (this->name, 0, ".shard absent. Nothing to" ++ " delete. Exiting"); ++ ret = 0; ++ goto err; ++ } else if (ret < 0) { ++ goto err; ++ } ++ ++ ret = shard_resolve_internal_dir (this, local, ++ SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME); ++ if (ret == -ENOENT) { ++ gf_msg_debug (this->name, 0, ".remove_me absent. " ++ "Nothing to delete. Exiting"); ++ ret = 0; ++ goto err; ++ } else if (ret < 0) { ++ goto err; ++ } ++ ++ local->fd = fd_anonymous (local->dot_shard_rm_loc.inode); ++ if (!local->fd) { ++ ret = -ENOMEM; ++ goto err; ++ } ++ ++ while ((ret = syncop_readdirp (FIRST_CHILD(this), local->fd, 131072, ++ offset, &entries, local->xattr_req, ++ NULL))) { ++ if (ret > 0) ++ ret = 0; ++ list_for_each_entry (entry, &entries.list, list) { ++ offset = entry->d_off; ++ ++ if (!strcmp (entry->d_name, ".") || ++ !strcmp (entry->d_name, "..")) ++ continue; ++ ++ if (!entry->inode) { ++ ret = shard_lookup_marker_entry (this, local, ++ entry); ++ if (ret < 0) ++ continue; ++ } ++ link_inode = inode_link (entry->inode, local->fd->inode, ++ entry->d_name, &entry->d_stat); ++ ++ gf_msg_debug (this->name, 0, "Initiating deletion of " ++ "shards of gfid %s", entry->d_name); ++ ret = shard_delete_shards_of_entry (cleanup_frame, this, ++ entry, link_inode); ++ inode_unlink (link_inode, local->fd->inode, ++ entry->d_name); ++ inode_unref (link_inode); ++ if (ret) { ++ gf_msg (this->name, GF_LOG_ERROR, -ret, ++ SHARD_MSG_SHARDS_DELETION_FAILED, ++ "Failed to clean up shards of gfid %s", ++ entry->d_name); ++ continue; ++ } ++ gf_msg (this->name, GF_LOG_INFO, 0, ++ SHARD_MSG_SHARDS_DELETION_COMPLETED, "Deleted " ++ "shards of gfid=%s from backend", ++ entry->d_name); ++ } ++ gf_dirent_free (&entries); ++ if (ret) ++ break; ++ } ++ ret = 0; ++err: ++ loc_wipe (&loc); ++ return ret; ++} ++ ++int + shard_unlock_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) + { +@@ -3394,7 +3900,10 @@ shard_unlink_base_file_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + local->postoldparent = *postparent; + if (xdata) + local->xattr_rsp = dict_ref (xdata); ++ if (local->cleanup_required) ++ shard_start_background_deletion (this); + } ++ + if (local->entrylk_frame) { + ret = shard_unlock_entrylk (frame, this); + if (ret < 0) { +@@ -3408,6 +3917,7 @@ shard_unlink_base_file_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + local->op_ret = -1; + local->op_errno = -ret; + } ++ + shard_unlink_cbk (frame, this); + return 0; + } +@@ -3576,6 +4086,7 @@ shard_post_lookup_base_shard_rm_handler (call_frame_t *frame, xlator_t *this) + } else { + gf_msg_debug (this->name, 0, "link count on %s = 1, creating " + "file under .remove_me", local->int_inodelk.loc.path); ++ local->cleanup_required = _gf_true; + shard_acquire_entrylk (frame, this, priv->dot_shard_rm_inode, + local->prebuf.ia_gfid); + } +@@ -3788,20 +4299,6 @@ err: + } + + int +-shard_rename_cbk (call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- SHARD_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno, +- &local->prebuf, &local->preoldparent, +- &local->postoldparent, &local->prenewparent, +- &local->postnewparent, local->xattr_rsp); +- return 0; +-} +- +-int + shard_post_rename_lookup_handler (call_frame_t *frame, xlator_t *this) + { + shard_rename_cbk (frame, this); +@@ -3854,6 +4351,8 @@ shard_rename_src_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + local->op_errno = -ret; + goto err; + } ++ if (local->cleanup_required) ++ shard_start_background_deletion (this); + } + + /* Now the base file of src, if sharded, is looked up to gather ia_size +@@ -4822,7 +5321,7 @@ shard_common_inode_write_do (call_frame_t *frame, xlator_t *this) + + if (dict_set_uint32 (local->xattr_req, + GLUSTERFS_WRITE_UPDATE_ATOMIC, 4)) { +- gf_msg (this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_SET_FAILED, ++ gf_msg (this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, + "Failed to set "GLUSTERFS_WRITE_UPDATE_ATOMIC" into " + "dict: %s", uuid_utoa (fd->inode->gfid)); + local->op_ret = -1; +@@ -5141,7 +5640,7 @@ shard_mkdir_internal_dir (call_frame_t *frame, xlator_t *this, + + ret = dict_set_bin (xattr_req, "gfid-req", *gfid, 16); + if (ret) { +- gf_msg (this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_SET_FAILED, ++ gf_msg (this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, + "Failed to set gfid-req for %s", + shard_internal_dir_string (type)); + goto err; +@@ -6186,6 +6685,8 @@ init (xlator_t *this) + + GF_OPTION_INIT ("shard-block-size", priv->block_size, size_uint64, out); + ++ GF_OPTION_INIT ("shard-deletion-rate", priv->deletion_rate, uint32, out); ++ + this->local_pool = mem_pool_new (shard_local_t, 128); + if (!this->local_pool) { + ret = -1; +@@ -6241,6 +6742,8 @@ reconfigure (xlator_t *this, dict_t *options) + GF_OPTION_RECONF ("shard-block-size", priv->block_size, options, size, + out); + ++ GF_OPTION_RECONF ("shard-deletion-rate", priv->deletion_rate, options, ++ uint32, out); + ret = 0; + + out: +@@ -6364,5 +6867,12 @@ struct volume_options options[] = { + .description = "The size unit used to break a file into multiple " + "chunks", + }, ++ { .key = {"shard-deletion-rate"}, ++ .type = GF_OPTION_TYPE_INT, ++ .default_value = "100", ++ .min = 100, ++ .max = INT_MAX, ++ .description = "The number of shards to send deletes on at a time", ++ }, + { .key = {NULL} }, + }; +diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h +index 1783ff6..5de098a 100644 +--- a/xlators/features/shard/src/shard.h ++++ b/xlators/features/shard/src/shard.h +@@ -130,9 +130,9 @@ shard_unlock_entrylk (call_frame_t *frame, xlator_t *this); + sizeof (*__bs)); \ + if (__ret) { \ + gf_msg (this->name, GF_LOG_WARNING, 0, \ +- SHARD_MSG_DICT_SET_FAILED, "Failed to set key: %s " \ ++ SHARD_MSG_DICT_OP_FAILED, "Failed to set key: %s " \ + "on path %s", GF_XATTR_SHARD_BLOCK_SIZE, (loc)->path);\ +- GF_FREE (__bs); \ ++ GF_FREE (__bs); \ + goto label; \ + } \ + \ +@@ -144,7 +144,7 @@ shard_unlock_entrylk (call_frame_t *frame, xlator_t *this); + __size_attr, 8 * 4); \ + if (__ret) { \ + gf_msg (this->name, GF_LOG_WARNING, 0, \ +- SHARD_MSG_DICT_SET_FAILED, "Failed to set key: %s " \ ++ SHARD_MSG_DICT_OP_FAILED, "Failed to set key: %s " \ + "on path %s", GF_XATTR_SHARD_FILE_SIZE, (loc)->path); \ + GF_FREE (__size_attr); \ + goto label; \ +@@ -160,7 +160,7 @@ shard_unlock_entrylk (call_frame_t *frame, xlator_t *this); + local->op_ret = -1; \ + local->op_errno = ENOMEM; \ + gf_msg (this->name, GF_LOG_WARNING, 0, \ +- SHARD_MSG_DICT_SET_FAILED, "Failed to set dict value:"\ ++ SHARD_MSG_DICT_OP_FAILED, "Failed to set dict value:"\ + " key:%s for %s.", GF_XATTR_SHARD_FILE_SIZE, \ + uuid_utoa (gfid)); \ + goto label; \ +@@ -197,6 +197,12 @@ shard_unlock_entrylk (call_frame_t *frame, xlator_t *this); + } \ + } while (0) + ++typedef enum { ++ SHARD_FIRST_LOOKUP_PENDING = 0, ++ SHARD_FIRST_LOOKUP_IN_PROGRESS, ++ SHARD_FIRST_LOOKUP_DONE, ++} shard_first_lookup_state_t; ++ + /* rm = "remove me" */ + + typedef struct shard_priv { +@@ -208,6 +214,8 @@ typedef struct shard_priv { + gf_lock_t lock; + int inode_count; + struct list_head ilist_head; ++ uint32_t deletion_rate; ++ shard_first_lookup_state_t first_lookup; + } shard_priv_t; + + typedef struct { +@@ -303,6 +311,9 @@ typedef struct shard_local { + call_frame_t *main_frame; + call_frame_t *inodelk_frame; + call_frame_t *entrylk_frame; ++ uint32_t deletion_rate; ++ gf_boolean_t cleanup_required; ++ uuid_t base_gfid; + } shard_local_t; + + typedef struct shard_inode_ctx { +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index 5a697cf..4357562 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -3298,6 +3298,11 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .op_version = GD_OP_VERSION_3_7_0, + .flags = OPT_FLAG_CLIENT_OPT + }, ++ { .key = "features.shard-deletion-rate", ++ .voltype = "features/shard", ++ .op_version = GD_OP_VERSION_4_2_0, ++ .flags = OPT_FLAG_CLIENT_OPT ++ }, + { .key = "features.scrub-throttle", + .voltype = "features/bit-rot", + .value = "lazy", +-- +1.8.3.1 + diff --git a/SOURCES/0431-glusterd-Reset-op-version-for-features.shard-deletio.patch b/SOURCES/0431-glusterd-Reset-op-version-for-features.shard-deletio.patch new file mode 100644 index 0000000..73eaf67 --- /dev/null +++ b/SOURCES/0431-glusterd-Reset-op-version-for-features.shard-deletio.patch @@ -0,0 +1,58 @@ +From c06048a218b4a2e56f72b05b4f9f5842eec611e4 Mon Sep 17 00:00:00 2001 +From: Krutika Dhananjay +Date: Wed, 12 Sep 2018 21:41:35 +0530 +Subject: [PATCH 431/444] glusterd: Reset op-version for + "features.shard-deletion-rate" + +The op-version for the "features.shard-deletion-rate" option was set to +4.2.0 in the upstream patch and backported at +e75be952569eb69325d5f505f7ab94aace31be52. +This commit reverts the op-version for this option to 3.13.3. + +Label: DOWNSTREAM ONLY + +Change-Id: Ie3d12f3119ad7a4b40d81bd8bd6ed591658e8371 +BUG: 1520882 +Signed-off-by: Krutika Dhananjay +Reviewed-on: https://code.engineering.redhat.com/gerrit/154865 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/globals.h | 3 ++- + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 2 +- + 2 files changed, 3 insertions(+), 2 deletions(-) + +diff --git a/libglusterfs/src/globals.h b/libglusterfs/src/globals.h +index 699e73e..97c4fad 100644 +--- a/libglusterfs/src/globals.h ++++ b/libglusterfs/src/globals.h +@@ -109,11 +109,12 @@ + + #define GD_OP_VERSION_3_13_2 31302 /* Op-version for GlusterFS 3.13.2 */ + +-#define GD_OP_VERSION_4_2_0 40200 /* Op-version for GlusterFs 4.2.0 */ ++#define GD_OP_VERSION_3_13_3 31303 /* Op-version for GlusterFS 3.13.3 */ + + /* Downstream only change */ + #define GD_OP_VERSION_3_11_2 31102 /* Op-version for RHGS 3.3.1-async */ + #define GD_OP_VERSION_3_13_3 31303 /* Op-version for RHGS-3.4-Batch Update-1*/ ++#define GD_OP_VERSION_3_13_4 31304 /* Op-version for RHGS-3.4-Batch Update-2*/ + + #include "xlator.h" + +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index 4357562..a825f52 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -3300,7 +3300,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + }, + { .key = "features.shard-deletion-rate", + .voltype = "features/shard", +- .op_version = GD_OP_VERSION_4_2_0, ++ .op_version = GD_OP_VERSION_3_13_4, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "features.scrub-throttle", +-- +1.8.3.1 + diff --git a/SOURCES/0432-features-shard-Fix-crash-and-test-case-in-RENAME-fop.patch b/SOURCES/0432-features-shard-Fix-crash-and-test-case-in-RENAME-fop.patch new file mode 100644 index 0000000..82a43f5 --- /dev/null +++ b/SOURCES/0432-features-shard-Fix-crash-and-test-case-in-RENAME-fop.patch @@ -0,0 +1,250 @@ +From 212e89f8b257463ace8093dfc72253f515adb234 Mon Sep 17 00:00:00 2001 +From: Krutika Dhananjay +Date: Thu, 2 Aug 2018 21:48:34 +0530 +Subject: [PATCH 432/444] features/shard: Fix crash and test case in RENAME fop + +> Upstream: https://review.gluster.org/20623 +> BUG: 1611692 +> Change-Id: Iaf85a5ee3dff8b01a76e11972f10f2bb9dcbd407 + +Setting the refresh flag in inode ctx in shard_rename_src_cbk() +is applicable only when the dst file exists and is sharded and +has a hard link > 1 at the time of rename. + +But this piece of code is exercised even when dst doesn't exist. +In this case, the mount crashes because local->int_inodelk.loc.inode +is NULL. + +Change-Id: Iaf85a5ee3dff8b01a76e11972f10f2bb9dcbd407 +BUG: 1520882 +Signed-off-by: Krutika Dhananjay +Reviewed-on: https://code.engineering.redhat.com/gerrit/154866 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + tests/bugs/shard/unlinks-and-renames.t | 96 ++++++++++++++++++++-------------- + xlators/features/shard/src/shard.c | 7 ++- + 2 files changed, 61 insertions(+), 42 deletions(-) + +diff --git a/tests/bugs/shard/unlinks-and-renames.t b/tests/bugs/shard/unlinks-and-renames.t +index 6e5164f..990ca69 100644 +--- a/tests/bugs/shard/unlinks-and-renames.t ++++ b/tests/bugs/shard/unlinks-and-renames.t +@@ -31,9 +31,10 @@ TEST mkdir $M0/dir + TEST touch $M0/dir/foo + TEST touch $M0/dir/new + +-###################################### +-##### Unlink with /.shard absent ##### +-###################################### ++########################################## ++##### 01. Unlink with /.shard absent ##### ++########################################## ++ + TEST truncate -s 5M $M0/dir/foo + TEST ! stat $B0/${V0}0/.shard + TEST ! stat $B0/${V0}1/.shard +@@ -45,9 +46,10 @@ TEST stat $B0/${V0}1/.shard/.remove_me + EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_foo + EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_foo + +-################################################## +-##### Unlink of a sharded file without holes ##### +-################################################## ++###################################################### ++##### 02. Unlink of a sharded file without holes ##### ++###################################################### ++ + # Create a 9M sharded file + TEST dd if=/dev/zero of=$M0/dir/new bs=1024 count=9216 + gfid_new=$(get_gfid_string $M0/dir/new) +@@ -65,9 +67,10 @@ TEST ! stat $B0/${V0}1/dir/new + EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_new + EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_new + +-####################################### +-##### Unlink with /.shard present ##### +-####################################### ++########################################### ++##### 03. Unlink with /.shard present ##### ++########################################### ++ + TEST truncate -s 5M $M0/dir/foo + gfid_foo=$(get_gfid_string $M0/dir/foo) + # Ensure its shards are absent. +@@ -81,9 +84,10 @@ TEST ! stat $M0/dir/foo + EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_foo + EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_foo + +-############################################################# +-##### Unlink of a file with only one block (the zeroth) ##### +-############################################################# ++################################################################# ++##### 04. Unlink of a file with only one block (the zeroth) ##### ++################################################################# ++ + TEST touch $M0/dir/foo + gfid_foo=$(get_gfid_string $M0/dir/foo) + TEST dd if=/dev/zero of=$M0/dir/foo bs=1024 count=1024 +@@ -95,9 +99,10 @@ TEST ! stat $M0/dir/foo + EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_foo + EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_foo + +-#################################################### +-##### Unlink of a sharded file with hard-links ##### +-#################################################### ++######################################################## ++##### 05. Unlink of a sharded file with hard-links ##### ++######################################################## ++ + # Create a 9M sharded file + TEST dd if=/dev/zero of=$M0/dir/original bs=1024 count=9216 + gfid_original=$(get_gfid_string $M0/dir/original) +@@ -154,9 +159,10 @@ TEST mkdir $M0/dir + TEST touch $M0/dir/src + TEST touch $M0/dir/dst + +-###################################### +-##### Rename with /.shard absent ##### +-###################################### ++########################################## ++##### 06. Rename with /.shard absent ##### ++########################################## ++ + TEST truncate -s 5M $M0/dir/dst + gfid_dst=$(get_gfid_string $M0/dir/dst) + TEST ! stat $B0/${V0}0/.shard +@@ -172,9 +178,10 @@ TEST stat $B0/${V0}1/dir/dst + EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_dst + EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_dst + +-################################################## +-##### Rename to a sharded file without holes ##### +-################################################## ++###################################################### ++##### 07. Rename to a sharded file without holes ##### ++###################################################### ++ + TEST unlink $M0/dir/dst + TEST touch $M0/dir/src + # Create a 9M sharded file +@@ -197,9 +204,10 @@ TEST stat $B0/${V0}1/dir/dst + EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_dst + EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_dst + +-################################################### +-##### Rename of dst file with /.shard present ##### +-################################################### ++####################################################### ++##### 08. Rename of dst file with /.shard present ##### ++####################################################### ++ + TEST unlink $M0/dir/dst + TEST touch $M0/dir/src + TEST truncate -s 5M $M0/dir/dst +@@ -215,9 +223,10 @@ TEST stat $B0/${V0}1/dir/dst + EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_dst + EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_dst + +-############################################################### +-##### Rename of dst file with only one block (the zeroth) ##### +-############################################################### ++################################################################### ++##### 09. Rename of dst file with only one block (the zeroth) ##### ++################################################################### ++ + TEST unlink $M0/dir/dst + TEST touch $M0/dir/src + TEST dd if=/dev/zero of=$M0/dir/dst bs=1024 count=1024 +@@ -233,9 +242,10 @@ TEST stat $B0/${V0}1/dir/dst + EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_dst + EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_dst + +-######################################################## +-##### Rename to a dst sharded file with hard-links ##### +-######################################################## ++############################################################ ++##### 10. Rename to a dst sharded file with hard-links ##### ++############################################################ ++ + TEST unlink $M0/dir/dst + TEST touch $M0/dir/src + # Create a 9M sharded file +@@ -276,7 +286,10 @@ TEST ! stat $B0/${V0}1/dir/src2 + EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_dst + EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_dst + +-# Rename with non-existent dst and a sharded src ++############################################################## ++##### 11. Rename with non-existent dst and a sharded src ##### ++##############################################################l ++ + TEST touch $M0/dir/src + TEST dd if=/dev/zero of=$M0/dir/src bs=1024 count=9216 + gfid_src=$(get_gfid_string $M0/dir/src) +@@ -286,7 +299,7 @@ TEST stat $B0/${V0}1/.shard/$gfid_src.1 + TEST stat $B0/${V0}0/.shard/$gfid_src.2 + TEST stat $B0/${V0}1/.shard/$gfid_src.2 + # Now rename src to the dst. +-TEST mv $M0/dir/src $M0/dir/dst ++TEST mv $M0/dir/src $M0/dir/dst2 + + TEST stat $B0/${V0}0/.shard/$gfid_src.1 + TEST stat $B0/${V0}1/.shard/$gfid_src.1 +@@ -295,23 +308,26 @@ TEST stat $B0/${V0}1/.shard/$gfid_src.2 + TEST ! stat $M0/dir/src + TEST ! stat $B0/${V0}0/dir/src + TEST ! stat $B0/${V0}1/dir/src +-TEST stat $M0/dir/dst +-TEST stat $B0/${V0}0/dir/dst +-TEST stat $B0/${V0}1/dir/dst ++TEST stat $M0/dir/dst2 ++TEST stat $B0/${V0}0/dir/dst2 ++TEST stat $B0/${V0}1/dir/dst2 ++ ++############################################################################# ++##### 12. Rename with non-existent dst and a sharded src with no shards ##### ++############################################################################# + +-# Rename with non-existent dst and a sharded src with no shards + TEST touch $M0/dir/src + TEST dd if=/dev/zero of=$M0/dir/src bs=1024 count=1024 + gfid_src=$(get_gfid_string $M0/dir/src) + TEST ! stat $B0/${V0}0/.shard/$gfid_src.1 + TEST ! stat $B0/${V0}1/.shard/$gfid_src.1 + # Now rename src to the dst. +-TEST mv $M0/dir/src $M0/dir/dst ++TEST mv $M0/dir/src $M0/dir/dst1 + TEST ! stat $M0/dir/src + TEST ! stat $B0/${V0}0/dir/src + TEST ! stat $B0/${V0}1/dir/src +-TEST stat $M0/dir/dst +-TEST stat $B0/${V0}0/dir/dst +-TEST stat $B0/${V0}1/dir/dst ++TEST stat $M0/dir/dst1 ++TEST stat $B0/${V0}0/dir/dst1 ++TEST stat $B0/${V0}1/dir/dst1 + + cleanup +diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c +index 2faf711..6066a54 100644 +--- a/xlators/features/shard/src/shard.c ++++ b/xlators/features/shard/src/shard.c +@@ -4324,9 +4324,12 @@ shard_rename_src_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + } + /* Set ctx->refresh to TRUE to force a lookup on disk when + * shard_lookup_base_file() is called next to refresh the hard link +- * count in ctx ++ * count in ctx. Note that this is applicable only to the case where ++ * the rename dst is already existent and sharded. + */ +- shard_inode_ctx_set_refresh_flag (local->int_inodelk.loc.inode, this); ++ if ((local->dst_block_size) && (!local->cleanup_required)) ++ shard_inode_ctx_set_refresh_flag (local->int_inodelk.loc.inode, ++ this); + + local->prebuf = *buf; + local->preoldparent = *preoldparent; +-- +1.8.3.1 + diff --git a/SOURCES/0433-mgmt-glusterd-use-proper-path-to-the-volfile.patch b/SOURCES/0433-mgmt-glusterd-use-proper-path-to-the-volfile.patch new file mode 100644 index 0000000..9c1d8a2 --- /dev/null +++ b/SOURCES/0433-mgmt-glusterd-use-proper-path-to-the-volfile.patch @@ -0,0 +1,153 @@ +From fd9e0103cd5c3f2962e063dbc3083c451b7e592b Mon Sep 17 00:00:00 2001 +From: Raghavendra Bhat +Date: Thu, 4 Oct 2018 14:27:45 -0400 +Subject: [PATCH 433/444] mgmt/glusterd: use proper path to the volfile + + > Upstream: https://review.gluster.org/#/c/glusterfs/+/21314/ + > BUG: 1635050 + > Change-Id: I28b2dfa5d9b379fe943db92c2fdfea879a6a594e + +NOTE: This patch is actually directly applied from the patch that + was sent to the release-4.1 branch. The master branch patch + will have merge conflicts due to the clang format changes done + there. This is the patch which this commit is a backport of. + + upstream(4.1): https://review.gluster.org/#/c/glusterfs/+/21348/ + Chane-ID: I28b2dfa5d9b379fe943db92c2fdfea879a6a594e + +Till now, glusterd was generating the volfile path for the snapshot +volume's bricks like this. + +/snaps// + +But in reality, the path to the brick volfile for a snapshot volume is + +/snaps/// + +The above workaround was used to distinguish between a mount command used +to mount the snapshot volume, and a brick of the snapshot volume, so that +based on what is actually happening, glusterd can return the proper volfile +(client volfile for the former and the brick volfile for the latter). But, +this was causing problems for snapshot restore when brick multiplexing is +enabled. Because, with brick multiplexing, it tries to find the volfile +and sends GETSPEC rpc call to glusterd using the 2nd style of path i.e. + +/snaps/// + +So, when the snapshot brick (which is multiplexed) sends a GETSPEC rpc +request to glusterd for obtaining the brick volume file, glusterd was +returning the client volume file of the snapshot volume instead of the +brick volume file. + +Change-Id: I28b2dfa5d9b379fe943db92c2fdfea879a6a594e +BUG: 1636291 +Signed-off-by: Raghavendra Bhat +Reviewed-on: https://code.engineering.redhat.com/gerrit/155129 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + .../snapview-server/src/snapview-server-helpers.c | 5 +++-- + xlators/mgmt/glusterd/src/glusterd-handshake.c | 20 ++++++++++++++++++-- + xlators/mgmt/glusterd/src/glusterd-utils.c | 9 +++++---- + 3 files changed, 26 insertions(+), 8 deletions(-) + +diff --git a/xlators/features/snapview-server/src/snapview-server-helpers.c b/xlators/features/snapview-server/src/snapview-server-helpers.c +index 2ad74ef..4c2edc6 100644 +--- a/xlators/features/snapview-server/src/snapview-server-helpers.c ++++ b/xlators/features/snapview-server/src/snapview-server-helpers.c +@@ -481,8 +481,9 @@ __svs_initialise_snapshot_volume (xlator_t *this, const char *name, + goto out; + } + +- snprintf (volname, sizeof (volname), "/snaps/%s/%s", +- dirent->name, dirent->snap_volname); ++ snprintf (volname, sizeof (volname), "/snaps/%s/%s/%s", ++ dirent->name, dirent->snap_volname, ++ dirent->snap_volname); + + + fs = glfs_new (volname); +diff --git a/xlators/mgmt/glusterd/src/glusterd-handshake.c b/xlators/mgmt/glusterd/src/glusterd-handshake.c +index d5594d0..b2a9b20 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-handshake.c ++++ b/xlators/mgmt/glusterd/src/glusterd-handshake.c +@@ -52,6 +52,7 @@ get_snap_volname_and_volinfo (const char *volpath, char **volname, + char *vol = NULL; + glusterd_snap_t *snap = NULL; + xlator_t *this = NULL; ++ char *volfile_token = NULL; + + this = THIS; + GF_ASSERT (this); +@@ -101,12 +102,27 @@ get_snap_volname_and_volinfo (const char *volpath, char **volname, + */ + ret = glusterd_volinfo_find (volname_token, volinfo); + if (ret) { +- *volname = gf_strdup (volname_token); ++ gf_msg (this->name, GF_LOG_WARNING, 0, GD_MSG_VOLINFO_GET_FAIL, ++ "failed to get the volinfo for the volume %s", ++ volname_token); ++ ++ /* Get the actual volfile name */ ++ volfile_token = strtok_r (NULL, "/", &save_ptr); ++ *volname = gf_strdup (volfile_token); + if (NULL == *volname) { + ret = -1; + goto out; + } + ++ /* ++ * Ideally, this should succeed as volname_token now ++ * contains the name of the snap volume (i.e. name of ++ * the volume that represents the snapshot). ++ * But, if for some reason, volinfo for the snap volume ++ * is not found, then try to get from the name of the ++ * volfile. Name of the volfile is like this. ++ * ...vol ++ */ + ret = glusterd_snap_volinfo_find (volname_token, snap, + volinfo); + if (ret) { +@@ -115,7 +131,7 @@ get_snap_volname_and_volinfo (const char *volpath, char **volname, + if (!vol) { + gf_msg (this->name, GF_LOG_ERROR, EINVAL, + GD_MSG_INVALID_ENTRY, "Invalid " +- "volname (%s)", volname_token); ++ "volname (%s)", volfile_token); + goto out; + } + +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index 04fae63..7179a68 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -2068,10 +2068,10 @@ retry: + } + + if (volinfo->is_snap_volume) { +- snprintf (volfile, PATH_MAX,"/%s/%s/%s.%s.%s", ++ snprintf (volfile, PATH_MAX, "/%s/%s/%s/%s.%s.%s", + GLUSTERD_VOL_SNAP_DIR_PREFIX, + volinfo->snapshot->snapname, volinfo->volname, +- brickinfo->hostname, exp_path); ++ volinfo->volname, brickinfo->hostname, exp_path); + } else { + snprintf (volfile, PATH_MAX, "%s.%s.%s", volinfo->volname, + brickinfo->hostname, exp_path); +@@ -5676,10 +5676,11 @@ attach_brick (xlator_t *this, + GLUSTERD_GET_BRICK_PIDFILE (pidfile2, volinfo, brickinfo, conf); + + if (volinfo->is_snap_volume) { +- snprintf (full_id, sizeof(full_id), "/%s/%s/%s.%s.%s", ++ snprintf (full_id, sizeof(full_id), "/%s/%s/%s/%s.%s.%s", + GLUSTERD_VOL_SNAP_DIR_PREFIX, + volinfo->snapshot->snapname, +- volinfo->volname, brickinfo->hostname, unslashed); ++ volinfo->volname, volinfo->volname, ++ brickinfo->hostname, unslashed); + } else { + snprintf (full_id, sizeof(full_id), "%s.%s.%s", + volinfo->volname, brickinfo->hostname, unslashed); +-- +1.8.3.1 + diff --git a/SOURCES/0434-cluster-afr-s-uuid_is_null-gf_uuid_is_null.patch b/SOURCES/0434-cluster-afr-s-uuid_is_null-gf_uuid_is_null.patch new file mode 100644 index 0000000..22c3883 --- /dev/null +++ b/SOURCES/0434-cluster-afr-s-uuid_is_null-gf_uuid_is_null.patch @@ -0,0 +1,32 @@ +From f0914172f481bb32b202612b080f7902ac31ad30 Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K +Date: Tue, 6 Nov 2018 21:32:55 +0530 +Subject: [PATCH 434/444] cluster/afr: s/uuid_is_null/gf_uuid_is_null + +BUG: 1619357 +Upstream-patch: https://review.gluster.org/c/glusterfs/+/21571 +Change-Id: I006116d329ac96268db132ae3aac06cc2be70e75 +Signed-off-by: Pranith Kumar K +Reviewed-on: https://code.engineering.redhat.com/gerrit/155128 +Reviewed-by: Atin Mukherjee +Tested-by: RHGS Build Bot +--- + xlators/cluster/afr/src/afr-common.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c +index ce2b17a..10d9620 100644 +--- a/xlators/cluster/afr/src/afr-common.c ++++ b/xlators/cluster/afr/src/afr-common.c +@@ -2768,7 +2768,7 @@ afr_lookup_entry_heal (call_frame_t *frame, xlator_t *this) + continue; + + if (replies[i].op_ret == 0) { +- if (uuid_is_null (gfid)) { ++ if (gf_uuid_is_null (gfid)) { + gf_uuid_copy (gfid, + replies[i].poststat.ia_gfid); + } +-- +1.8.3.1 + diff --git a/SOURCES/0435-geo-rep-Fix-traceback-with-symlink-metadata-sync.patch b/SOURCES/0435-geo-rep-Fix-traceback-with-symlink-metadata-sync.patch new file mode 100644 index 0000000..5399ba4 --- /dev/null +++ b/SOURCES/0435-geo-rep-Fix-traceback-with-symlink-metadata-sync.patch @@ -0,0 +1,93 @@ +From 7e7ffc4cc56b6b6ed460a49344082c3c25c1a23d Mon Sep 17 00:00:00 2001 +From: Kotresh HR +Date: Mon, 5 Nov 2018 11:46:41 +0530 +Subject: [PATCH 435/444] geo-rep: Fix traceback with symlink metadata sync + +While syncing metadata, 'os.chmod', 'os.chown', +'os.utime' should be used without de-reference. +But python supports only 'os.chown' without +de-reference. That's mostly because Linux +doesn't support 'chmod' on symlink file itself +but it does support 'chown'. + +So while syncing metadata ops, if it's symlink +we should only sync 'chown' and not do 'chmod' +and 'utime'. It will lead to tracebacks with +errors like EROFS, EPERM, ACCESS, ENOENT. +All the three errors (EPERM, ACCESS, ENOENT) +were handled except EROFS. But the way it was +handled was not fool proof. The operation is +tried and failure was handled based on the errors. +All the errors with symlink file for 'chown', +'utime' had to be passed to safe errors list of +'errno_wrap'. This patch handles it better by +avoiding 'chmod' and 'utime' if it's symlink +file. + +Backport of: + > Patch: https://review.gluster.org/21546 + > fixes: bz#1646104 + > Change-Id: Ic354206455cdc7ab2a87d741d81f4efe1f19d77d + > Signed-off-by: Kotresh HR + +BUG: 1645916 +Change-Id: Ic354206455cdc7ab2a87d741d81f4efe1f19d77d +Signed-off-by: Kotresh HR +Reviewed-on: https://code.engineering.redhat.com/gerrit/155049 +Tested-by: RHGS Build Bot +Reviewed-by: Aravinda Vishwanathapura Krishna Murthy +Reviewed-by: Sunny Kumar +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + geo-replication/syncdaemon/resource.py | 26 +++++++++++--------------- + 1 file changed, 11 insertions(+), 15 deletions(-) + +diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py +index eb696f3..b289b3b 100644 +--- a/geo-replication/syncdaemon/resource.py ++++ b/geo-replication/syncdaemon/resource.py +@@ -790,10 +790,8 @@ class Server(object): + # 'lchown' 'lchmod' 'utime with no-deference' blindly. + # But since 'lchmod' and 'utime with no de-reference' is + # not supported in python3, we have to rely on 'chmod' +- # and 'utime with de-reference'. But 'chmod' +- # de-reference the symlink and gets ENOENT, EACCES, +- # EPERM errors, hence ignoring those errors if it's on +- # symlink file. ++ # and 'utime with de-reference'. Hence avoiding 'chmod' ++ # and 'utime' if it's symlink file. + + is_symlink = False + cmd_ret = errno_wrap(os.lchown, [go, uid, gid], [ENOENT], +@@ -801,19 +799,17 @@ class Server(object): + if isinstance(cmd_ret, int): + continue + +- cmd_ret = errno_wrap(os.chmod, [go, mode], +- [ENOENT, EACCES, EPERM], [ESTALE, EINVAL]) +- if isinstance(cmd_ret, int): +- is_symlink = os.path.islink(go) +- if not is_symlink: ++ is_symlink = os.path.islink(go) ++ ++ if not is_symlink: ++ cmd_ret = errno_wrap(os.chmod, [go, mode], ++ [ENOENT, EACCES, EPERM], [ESTALE, EINVAL]) ++ if isinstance(cmd_ret, int): + failures.append((e, cmd_ret, "chmod")) + +- cmd_ret = errno_wrap(os.utime, [go, (atime, mtime)], +- [ENOENT, EACCES, EPERM], [ESTALE, EINVAL]) +- if isinstance(cmd_ret, int): +- if not is_symlink: +- is_symlink = os.path.islink(go) +- if not is_symlink: ++ cmd_ret = errno_wrap(os.utime, [go, (atime, mtime)], ++ [ENOENT, EACCES, EPERM], [ESTALE, EINVAL]) ++ if isinstance(cmd_ret, int): + failures.append((e, cmd_ret, "utime")) + return failures + +-- +1.8.3.1 + diff --git a/SOURCES/0436-geo-rep-Fix-issue-in-gfid-conflict-resolution.patch b/SOURCES/0436-geo-rep-Fix-issue-in-gfid-conflict-resolution.patch new file mode 100644 index 0000000..33b8721 --- /dev/null +++ b/SOURCES/0436-geo-rep-Fix-issue-in-gfid-conflict-resolution.patch @@ -0,0 +1,204 @@ +From f42b8789cdcd93cb9fa93f35ed067268ce75f789 Mon Sep 17 00:00:00 2001 +From: Kotresh HR +Date: Thu, 25 Oct 2018 03:23:56 -0400 +Subject: [PATCH 436/444] geo-rep: Fix issue in gfid-conflict-resolution + +Problem: +During gfid-conflict-resolution, geo-rep crashes +with 'ValueError: list.remove(x): x not in list' + +Cause and Analysis: +During gfid-conflict-resolution, the entry blob is +passed back to master along with additional +information to verify it's integrity. If everything +looks fine, the entry creation is ignored and is +deleted from the original list. But it is crashing +during removal of entry from the list saying entry +not in list. The reason is that the stat information +in the entry blob was modified and sent back to +master if present. + +Fix: +Send back the correct stat information for +gfid-conflict-resolution. + +Backport of: + > Patch: https://review.gluster.org/21483 + > fixes: bz#1642865 + > Change-Id: I47a6aa60b2a495465aa9314eebcb4085f0b1c4fd + > Signed-off-by: Kotresh HR + +BUG: 1640347 +Change-Id: I47a6aa60b2a495465aa9314eebcb4085f0b1c4fd +Signed-off-by: Kotresh HR +Reviewed-on: https://code.engineering.redhat.com/gerrit/155038 +Tested-by: RHGS Build Bot +Reviewed-by: Aravinda Vishwanathapura Krishna Murthy +Reviewed-by: Sunny Kumar +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + geo-replication/syncdaemon/resource.py | 42 +++++++++++++++++++--------------- + 1 file changed, 24 insertions(+), 18 deletions(-) + +diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py +index b289b3b..f16066e 100644 +--- a/geo-replication/syncdaemon/resource.py ++++ b/geo-replication/syncdaemon/resource.py +@@ -456,7 +456,7 @@ class Server(object): + st['uid'], st['gid'], + gf, st['mode'], bn, lnk) + +- def entry_purge(op, entry, gfid, e): ++ def entry_purge(op, entry, gfid, e, uid, gid): + # This is an extremely racy code and needs to be fixed ASAP. + # The GFID check here is to be sure that the pargfid/bname + # to be purged is the GFID gotten from the changelog. +@@ -470,7 +470,7 @@ class Server(object): + return + + if not matching_disk_gfid(gfid, entry): +- collect_failure(e, EEXIST) ++ collect_failure(e, EEXIST, uid, gid) + return + + if op == 'UNLINK': +@@ -486,7 +486,7 @@ class Server(object): + if er == ENOTEMPTY: + return er + +- def collect_failure(e, cmd_ret, dst=False): ++ def collect_failure(e, cmd_ret, uid, gid, dst=False): + slv_entry_info = {} + slv_entry_info['gfid_mismatch'] = False + slv_entry_info['name_mismatch'] = False +@@ -499,6 +499,11 @@ class Server(object): + if cmd_ret is None: + return False + ++ if e.get("stat", {}): ++ # Copy actual UID/GID value back to entry stat ++ e['stat']['uid'] = uid ++ e['stat']['gid'] = gid ++ + if cmd_ret == EEXIST: + if dst: + en = e['entry1'] +@@ -559,7 +564,7 @@ class Server(object): + + errno_wrap(os.rmdir, [path], [ENOENT, ESTALE], [EBUSY]) + +- def rename_with_disk_gfid_confirmation(gfid, entry, en): ++ def rename_with_disk_gfid_confirmation(gfid, entry, en, uid, gid): + if not matching_disk_gfid(gfid, entry): + logging.error(lf("RENAME ignored: source entry does not match " + "with on-disk gfid", +@@ -567,14 +572,13 @@ class Server(object): + gfid=gfid, + disk_gfid=get_gfid_from_mnt(entry), + target=en)) +- collect_failure(e, EEXIST) ++ collect_failure(e, EEXIST, uid, gid) + return + + cmd_ret = errno_wrap(os.rename, + [entry, en], + [ENOENT, EEXIST], [ESTALE, EBUSY]) +- collect_failure(e, cmd_ret) +- ++ collect_failure(e, cmd_ret, uid, gid) + + for e in entries: + blob = None +@@ -595,7 +599,7 @@ class Server(object): + if op in ['RMDIR', 'UNLINK']: + # Try once, if rmdir failed with ENOTEMPTY + # then delete recursively. +- er = entry_purge(op, entry, gfid, e) ++ er = entry_purge(op, entry, gfid, e, uid, gid) + if isinstance(er, int): + if er == ENOTEMPTY and op == 'RMDIR': + # Retry if ENOTEMPTY, ESTALE +@@ -632,7 +636,7 @@ class Server(object): + cmd_ret = errno_wrap(os.link, + [slink, entry], + [ENOENT, EEXIST], [ESTALE]) +- collect_failure(e, cmd_ret) ++ collect_failure(e, cmd_ret, uid, gid) + elif op == 'MKDIR': + en = e['entry'] + slink = os.path.join(pfx, gfid) +@@ -676,7 +680,7 @@ class Server(object): + cmd_ret = errno_wrap(os.link, + [slink, entry], + [ENOENT, EEXIST], [ESTALE]) +- collect_failure(e, cmd_ret) ++ collect_failure(e, cmd_ret, uid, gid) + elif op == 'SYMLINK': + en = e['entry'] + st = lstat(entry) +@@ -684,7 +688,7 @@ class Server(object): + blob = entry_pack_symlink(gfid, bname, e['link'], + e['stat']) + elif not matching_disk_gfid(gfid, en): +- collect_failure(e, EEXIST) ++ collect_failure(e, EEXIST, uid, gid) + elif op == 'RENAME': + en = e['entry1'] + # The matching disk gfid check validates two things +@@ -704,7 +708,7 @@ class Server(object): + blob = entry_pack_symlink(gfid, bname, + e['link'], e['stat']) + elif not matching_disk_gfid(gfid, en): +- collect_failure(e, EEXIST, True) ++ collect_failure(e, EEXIST, uid, gid, True) + else: + slink = os.path.join(pfx, gfid) + st = lstat(slink) +@@ -716,12 +720,13 @@ class Server(object): + else: + cmd_ret = errno_wrap(os.link, [slink, en], + [ENOENT, EEXIST], [ESTALE]) +- collect_failure(e, cmd_ret) ++ collect_failure(e, cmd_ret, uid, gid) + else: + st = lstat(entry) + st1 = lstat(en) + if isinstance(st1, int): +- rename_with_disk_gfid_confirmation(gfid, entry, en) ++ rename_with_disk_gfid_confirmation(gfid, entry, en, ++ uid, gid) + else: + if st.st_ino == st1.st_ino: + # we have a hard link, we can now unlink source +@@ -746,15 +751,16 @@ class Server(object): + else: + raise + elif not matching_disk_gfid(gfid, en): +- collect_failure(e, EEXIST, True) ++ collect_failure(e, EEXIST, uid, gid, True) + else: +- rename_with_disk_gfid_confirmation(gfid, entry, en) ++ rename_with_disk_gfid_confirmation(gfid, entry, en, ++ uid, gid) + if blob: + cmd_ret = errno_wrap(Xattr.lsetxattr, + [pg, 'glusterfs.gfid.newfile', blob], + [EEXIST, ENOENT], + [ESTALE, EINVAL, EBUSY]) +- failed = collect_failure(e, cmd_ret) ++ collect_failure(e, cmd_ret, uid, gid) + + # If UID/GID is different than zero that means we are trying + # create Entry with different UID/GID. Create Entry with +@@ -763,7 +769,7 @@ class Server(object): + path = os.path.join(pfx, gfid) + cmd_ret = errno_wrap(os.lchown, [path, uid, gid], [ENOENT], + [ESTALE, EINVAL]) +- collect_failure(e, cmd_ret) ++ collect_failure(e, cmd_ret, uid, gid) + + return failures + +-- +1.8.3.1 + diff --git a/SOURCES/0437-geo-rep-Add-more-intelligence-to-automatic-error-han.patch b/SOURCES/0437-geo-rep-Add-more-intelligence-to-automatic-error-han.patch new file mode 100644 index 0000000..d05394e --- /dev/null +++ b/SOURCES/0437-geo-rep-Add-more-intelligence-to-automatic-error-han.patch @@ -0,0 +1,144 @@ +From 85da98b9c54889139822b5c3d351a0249abf75b0 Mon Sep 17 00:00:00 2001 +From: Kotresh HR +Date: Fri, 26 Oct 2018 03:45:46 -0400 +Subject: [PATCH 437/444] geo-rep: Add more intelligence to automatic error + handling + +Geo-rep's automatic error handling does gfid conflict +resolution. But if there are ENOENT errors because the +parent is not synced to slave, it doesn' handle them. +This patch adds the intelligence to create missing +parent directories on slave. It can create the missing +directories upto the depth of 10. + +Backport of: + > Patch: https://review.gluster.org/21498 + > fixes: bz#1643402 + > Change-Id: Ic97ed1fa5899c087e404d559e04f7963ed7bb54c + > Signed-off-by: Kotresh HR + +BUG: 1638069 +Change-Id: Ic97ed1fa5899c087e404d559e04f7963ed7bb54c +Signed-off-by: Kotresh HR +Reviewed-on: https://code.engineering.redhat.com/gerrit/155039 +Tested-by: RHGS Build Bot +Reviewed-by: Aravinda Vishwanathapura Krishna Murthy +Reviewed-by: Sunny Kumar +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + geo-replication/syncdaemon/master.py | 68 ++++++++++++++++++++++++------------ + 1 file changed, 46 insertions(+), 22 deletions(-) + +diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py +index cd135df..bdb4da2 100644 +--- a/geo-replication/syncdaemon/master.py ++++ b/geo-replication/syncdaemon/master.py +@@ -693,7 +693,7 @@ class GMasterChangelogMixin(GMasterCommon): + TYPE_ENTRY = "E " + + MAX_EF_RETRIES = 10 +- MAX_OE_RETRIES = 5 ++ MAX_OE_RETRIES = 10 + + # flat directory hierarchy for gfid based access + FLAT_DIR_HIERARCHY = '.' +@@ -836,11 +836,12 @@ class GMasterChangelogMixin(GMasterCommon): + # The file exists on master but with different name. + # Probably renamed and got missed during xsync crawl. + elif failure[2]['slave_isdir']: +- realpath = os.readlink(os.path.join(gconf.local_path, +- ".glusterfs", +- slave_gfid[0:2], +- slave_gfid[2:4], +- slave_gfid)) ++ realpath = os.readlink(os.path.join( ++ gconf.local_path, ++ ".glusterfs", ++ slave_gfid[0:2], ++ slave_gfid[2:4], ++ slave_gfid)) + dst_entry = os.path.join(pfx, realpath.split('/')[-2], + realpath.split('/')[-1]) + src_entry = pbname +@@ -881,25 +882,37 @@ class GMasterChangelogMixin(GMasterCommon): + gfid=failure[2]['slave_gfid'], + entry=pbname)) + elif failure[1] == ENOENT: +- # Ignore ENOENT error for fix_entry_ops aka retry_count > 1 +- if retry_count > 1: +- logging.info(lf('ENOENT error while fixing entry ops. ' +- 'Safe to ignore, take out entry', ++ if op in ['RENAME']: ++ pbname = failure[0]['entry1'] ++ else: ++ pbname = failure[0]['entry'] ++ ++ pargfid = pbname.split('/')[1] ++ st = lstat(os.path.join(pfx, pargfid)) ++ # Safe to ignore the failure as master doesn't contain ++ # parent directory. ++ if isinstance(st, int): ++ logging.info(lf('Fixing ENOENT error in slave. Parent ' ++ 'does not exist on master. Safe to ' ++ 'ignore, take out entry', + retry_count=retry_count, + entry=repr(failure))) + entries.remove(failure[0]) +- elif op in ('MKNOD', 'CREATE', 'MKDIR'): +- pargfid = pbname.split('/')[1] +- st = lstat(os.path.join(pfx, pargfid)) +- # Safe to ignore the failure as master doesn't contain +- # parent directory. +- if isinstance(st, int): +- logging.info(lf('Fixing ENOENT error in slave. Parent ' +- 'does not exist on master. Safe to ' +- 'ignore, take out entry', +- retry_count=retry_count, +- entry=repr(failure))) +- entries.remove(failure[0]) ++ else: ++ logging.info(lf('Fixing ENOENT error in slave. Create ' ++ 'parent directory on slave.', ++ retry_count=retry_count, ++ entry=repr(failure))) ++ realpath = os.readlink(os.path.join(gconf.local_path, ++ ".glusterfs", ++ pargfid[0:2], ++ pargfid[2:4], ++ pargfid)) ++ dir_entry = os.path.join(pfx, realpath.split('/')[-2], ++ realpath.split('/')[-1]) ++ fix_entry_ops.append( ++ edct('MKDIR', gfid=pargfid, entry=dir_entry, ++ mode=st.st_mode, uid=st.st_uid, gid=st.st_gid)) + + if fix_entry_ops: + # Process deletions of entries whose gfids are mismatched +@@ -1077,6 +1090,11 @@ class GMasterChangelogMixin(GMasterCommon): + os.path.join(pfx, ec[self.POS_ENTRY1 - 1])) + entries.append(edct(ty, gfid=gfid, entry=e1, entry1=en, + stat=st, link=rl)) ++ # If src doesn't exist while doing rename, destination ++ # is created. If data is not followed by rename, this ++ # remains zero byte file on slave. Hence add data entry ++ # for renames ++ datas.add(os.path.join(pfx, gfid)) + else: + # stat() to get mode and other information + if not matching_disk_gfid(gfid, en): +@@ -1100,6 +1118,12 @@ class GMasterChangelogMixin(GMasterCommon): + rl = None + entries.append(edct(ty, stat=st, entry=en, gfid=gfid, + link=rl)) ++ # If src doesn't exist while doing link, destination ++ # is created based on file type. If data is not ++ # followed by link, this remains zero byte file on ++ # slave. Hence add data entry for links ++ if rl is None: ++ datas.add(os.path.join(pfx, gfid)) + elif ty == 'SYMLINK': + rl = errno_wrap(os.readlink, [en], [ENOENT], + [ESTALE, EINTR]) +-- +1.8.3.1 + diff --git a/SOURCES/0438-cluster-dht-In-rename-unlink-after-creating-linkto-f.patch b/SOURCES/0438-cluster-dht-In-rename-unlink-after-creating-linkto-f.patch new file mode 100644 index 0000000..7c9ffe8 --- /dev/null +++ b/SOURCES/0438-cluster-dht-In-rename-unlink-after-creating-linkto-f.patch @@ -0,0 +1,365 @@ +From e76a777f3820e62948256a45a38d5e97f3eb08a9 Mon Sep 17 00:00:00 2001 +From: N Balachandran +Date: Tue, 28 Aug 2018 12:00:33 +0530 +Subject: [PATCH 438/444] cluster/dht: In rename, unlink after creating linkto + file + +The linkto file creation for the dst was done in parallel with +the unlink of the old src linkto. If these operations reached +the brick out of order, we end up with a dst linkto file without +a .glusterfs handle. + +Fixed by unlinking only after the linkto file creation has +completed. + +upstream patch: https://review.gluster.org/#/c/glusterfs/+/21023/ + +> Change-Id: I4246f7655f5bc180f5ded7fd34d263b7828a8110 +> fixes: bz#1621981 +> Signed-off-by: N Balachandran + +Change-Id: Ia845a68bb314997cadab57887a84dff9373400c4 +BUG: 1622001 +Signed-off-by: N Balachandran +Reviewed-on: https://code.engineering.redhat.com/gerrit/154933 +Tested-by: RHGS Build Bot +Reviewed-by: Raghavendra Gowdappa +--- + tests/bugs/posix/bug-1619720.t | 1 + + xlators/cluster/dht/src/dht-rename.c | 254 ++++++++++++++++++----------------- + 2 files changed, 133 insertions(+), 122 deletions(-) + +diff --git a/tests/bugs/posix/bug-1619720.t b/tests/bugs/posix/bug-1619720.t +index 5e0d0f7..bfd304d 100755 +--- a/tests/bugs/posix/bug-1619720.t ++++ b/tests/bugs/posix/bug-1619720.t +@@ -48,6 +48,7 @@ TEST mv $M0/tmp/file-2 $M0/tmp/file-3 + + TEST mv -f $M0/tmp/file-1 $M0/tmp/file-3 + ++ + TEST getfattr -n $pgfid_xattr_name $B0/${V0}0/tmp/file-3 + TEST getfattr -n $pgfid_xattr_name $B0/${V0}1/tmp/file-3 + +diff --git a/xlators/cluster/dht/src/dht-rename.c b/xlators/cluster/dht/src/dht-rename.c +index 1d0c2bb..378cb0a 100644 +--- a/xlators/cluster/dht/src/dht-rename.c ++++ b/xlators/cluster/dht/src/dht-rename.c +@@ -849,8 +849,8 @@ dht_rename_cleanup (call_frame_t *frame) + if (src_cached == dst_cached) + goto nolinks; + +- if (local->linked && (dst_hashed != src_hashed )&& +- (dst_hashed != src_cached)) { ++ if (local->linked && (dst_hashed != src_hashed) && ++ (dst_hashed != src_cached)) { + call_cnt++; + } + +@@ -935,6 +935,120 @@ nolinks: + + + int ++dht_rename_unlink (call_frame_t *frame, xlator_t *this) ++{ ++ dht_local_t *local = NULL; ++ xlator_t *src_hashed = NULL; ++ xlator_t *src_cached = NULL; ++ xlator_t *dst_hashed = NULL; ++ xlator_t *dst_cached = NULL; ++ xlator_t *rename_subvol = NULL; ++ dict_t *xattr = NULL; ++ ++ local = frame->local; ++ ++ src_hashed = local->src_hashed; ++ src_cached = local->src_cached; ++ dst_hashed = local->dst_hashed; ++ dst_cached = local->dst_cached; ++ ++ local->call_cnt = 0; ++ ++ /* NOTE: rename_subvol is the same subvolume from which dht_rename_cbk ++ * is called. since rename has already happened on rename_subvol, ++ * unlink shouldn't be sent for oldpath (either linkfile or cached-file) ++ * on rename_subvol. */ ++ if (src_cached == dst_cached) ++ rename_subvol = src_cached; ++ else ++ rename_subvol = dst_hashed; ++ ++ /* TODO: delete files in background */ ++ ++ if (src_cached != dst_hashed && src_cached != dst_cached) ++ local->call_cnt++; ++ ++ if (src_hashed != rename_subvol && src_hashed != src_cached) ++ local->call_cnt++; ++ ++ if (dst_cached && dst_cached != dst_hashed && dst_cached != src_cached) ++ local->call_cnt++; ++ ++ if (local->call_cnt == 0) ++ goto unwind; ++ ++ DHT_MARK_FOP_INTERNAL (xattr); ++ ++ if (src_cached != dst_hashed && src_cached != dst_cached) { ++ dict_t *xattr_new = NULL; ++ ++ xattr_new = dict_copy_with_ref (xattr, NULL); ++ ++ gf_msg_trace (this->name, 0, ++ "deleting old src datafile %s @ %s", ++ local->loc.path, src_cached->name); ++ ++ if (gf_uuid_compare (local->loc.pargfid, ++ local->loc2.pargfid) == 0) { ++ DHT_MARKER_DONT_ACCOUNT(xattr_new); ++ } ++ ++ DHT_CHANGELOG_TRACK_AS_RENAME(xattr_new, &local->loc, ++ &local->loc2); ++ STACK_WIND_COOKIE (frame, dht_rename_unlink_cbk, src_cached, ++ src_cached, src_cached->fops->unlink, ++ &local->loc, 0, xattr_new); ++ ++ dict_unref (xattr_new); ++ xattr_new = NULL; ++ } ++ ++ if (src_hashed != rename_subvol && src_hashed != src_cached) { ++ dict_t *xattr_new = NULL; ++ ++ xattr_new = dict_copy_with_ref (xattr, NULL); ++ ++ gf_msg_trace (this->name, 0, ++ "deleting old src linkfile %s @ %s", ++ local->loc.path, src_hashed->name); ++ ++ DHT_MARKER_DONT_ACCOUNT(xattr_new); ++ ++ STACK_WIND_COOKIE (frame, dht_rename_unlink_cbk, src_hashed, ++ src_hashed, src_hashed->fops->unlink, ++ &local->loc, 0, xattr_new); ++ ++ dict_unref (xattr_new); ++ xattr_new = NULL; ++ } ++ ++ if (dst_cached && ++ (dst_cached != dst_hashed) && ++ (dst_cached != src_cached)) { ++ gf_msg_trace (this->name, 0, ++ "deleting old dst datafile %s @ %s", ++ local->loc2.path, dst_cached->name); ++ ++ STACK_WIND_COOKIE (frame, dht_rename_unlink_cbk, dst_cached, ++ dst_cached, dst_cached->fops->unlink, ++ &local->loc2, 0, xattr); ++ } ++ if (xattr) ++ dict_unref (xattr); ++ return 0; ++ ++unwind: ++ WIPE (&local->preoldparent); ++ WIPE (&local->postoldparent); ++ WIPE (&local->preparent); ++ WIPE (&local->postparent); ++ ++ dht_rename_done (frame, this); ++ ++ return 0; ++} ++ ++int + dht_rename_links_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + inode_t *inode, struct iatt *stbuf, +@@ -947,6 +1061,7 @@ dht_rename_links_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + prev = cookie; + local = frame->local; + ++ /* TODO: Handle this case in lookup-optimize */ + if (op_ret == -1) { + gf_msg (this->name, GF_LOG_WARNING, op_errno, + DHT_MSG_CREATE_LINK_FAILED, +@@ -958,8 +1073,8 @@ dht_rename_links_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + local->linked = _gf_false; + dht_linkfile_attr_heal (frame, this); + } +- DHT_STACK_DESTROY (frame); + ++ dht_rename_unlink (frame, this); + return 0; + } + +@@ -973,19 +1088,14 @@ dht_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + { + dht_local_t *local = NULL; + xlator_t *prev = NULL; +- xlator_t *src_hashed = NULL; + xlator_t *src_cached = NULL; + xlator_t *dst_hashed = NULL; + xlator_t *dst_cached = NULL; +- xlator_t *rename_subvol = NULL; +- call_frame_t *link_frame = NULL; +- dht_local_t *link_local = NULL; +- dict_t *xattr = NULL; ++ loc_t link_loc = {0}; + + local = frame->local; + prev = cookie; + +- src_hashed = local->src_hashed; + src_cached = local->src_cached; + dst_hashed = local->dst_hashed; + dst_cached = local->dst_cached; +@@ -1043,31 +1153,6 @@ dht_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + local->xattr = dict_copy_with_ref (xdata, local->xattr); + } + +- if ((src_cached == dst_cached) && (dst_hashed != dst_cached)) { +- link_frame = copy_frame (frame); +- if (!link_frame) { +- goto err; +- } +- +- /* fop value sent as maxvalue because it is not used +- anywhere in this case */ +- link_local = dht_local_init (link_frame, &local->loc2, NULL, +- GF_FOP_MAXVALUE); +- if (!link_local) { +- goto err; +- } +- +- if (link_local->loc.inode) +- inode_unref (link_local->loc.inode); +- link_local->loc.inode = inode_ref (local->loc.inode); +- gf_uuid_copy (link_local->gfid, local->loc.inode->gfid); +- +- dht_linkfile_create (link_frame, dht_rename_links_create_cbk, +- this, src_cached, dst_hashed, +- &link_local->loc); +- } +- +-err: + /* Merge attrs only from src_cached. In case there of src_cached != + * dst_hashed, this ignores linkfile attrs. */ + if (prev == src_cached) { +@@ -1080,98 +1165,23 @@ err: + dht_iatt_merge (this, &local->postparent, postnewparent, prev); + } + ++ /* Create the linkto file for the dst file */ ++ if ((src_cached == dst_cached) && (dst_hashed != dst_cached)) { + +- /* NOTE: rename_subvol is the same subvolume from which dht_rename_cbk +- * is called. since rename has already happened on rename_subvol, +- * unlink should not be sent for oldpath (either linkfile or cached-file) +- * on rename_subvol. */ +- if (src_cached == dst_cached) +- rename_subvol = src_cached; +- else +- rename_subvol = dst_hashed; +- +- /* TODO: delete files in background */ +- +- if (src_cached != dst_hashed && src_cached != dst_cached) +- local->call_cnt++; +- +- if (src_hashed != rename_subvol && src_hashed != src_cached) +- local->call_cnt++; +- +- if (dst_cached && dst_cached != dst_hashed && dst_cached != src_cached) +- local->call_cnt++; +- +- if (local->call_cnt == 0) +- goto unwind; +- +- DHT_MARK_FOP_INTERNAL (xattr); +- +- if (src_cached != dst_hashed && src_cached != dst_cached) { +- dict_t *xattr_new = NULL; +- +- xattr_new = dict_copy_with_ref (xattr, NULL); +- +- gf_msg_trace (this->name, 0, +- "deleting old src datafile %s @ %s", +- local->loc.path, src_cached->name); +- +- if (gf_uuid_compare (local->loc.pargfid, +- local->loc2.pargfid) == 0) { +- DHT_MARKER_DONT_ACCOUNT(xattr_new); +- } +- +- DHT_CHANGELOG_TRACK_AS_RENAME(xattr_new, &local->loc, +- &local->loc2); +- STACK_WIND_COOKIE (frame, dht_rename_unlink_cbk, src_cached, +- src_cached, src_cached->fops->unlink, +- &local->loc, 0, xattr_new); +- +- dict_unref (xattr_new); +- xattr_new = NULL; +- } +- +- if (src_hashed != rename_subvol && src_hashed != src_cached) { +- dict_t *xattr_new = NULL; +- +- xattr_new = dict_copy_with_ref (xattr, NULL); +- +- gf_msg_trace (this->name, 0, +- "deleting old src linkfile %s @ %s", +- local->loc.path, src_hashed->name); +- +- DHT_MARKER_DONT_ACCOUNT(xattr_new); +- +- STACK_WIND_COOKIE (frame, dht_rename_unlink_cbk, src_hashed, +- src_hashed, src_hashed->fops->unlink, +- &local->loc, 0, xattr_new); +- +- dict_unref (xattr_new); +- xattr_new = NULL; +- } ++ loc_copy (&link_loc, &local->loc2); ++ if (link_loc.inode) ++ inode_unref (link_loc.inode); ++ link_loc.inode = inode_ref (local->loc.inode); ++ gf_uuid_copy (local->gfid, local->loc.inode->gfid); ++ gf_uuid_copy (link_loc.gfid, local->loc.inode->gfid); + +- if (dst_cached +- && (dst_cached != dst_hashed) +- && (dst_cached != src_cached)) { +- gf_msg_trace (this->name, 0, +- "deleting old dst datafile %s @ %s", +- local->loc2.path, dst_cached->name); +- +- STACK_WIND_COOKIE (frame, dht_rename_unlink_cbk, dst_cached, +- dst_cached, dst_cached->fops->unlink, +- &local->loc2, 0, xattr); ++ dht_linkfile_create (frame, dht_rename_links_create_cbk, ++ this, src_cached, dst_hashed, ++ &link_loc); ++ return 0; + } +- if (xattr) +- dict_unref (xattr); +- return 0; +- +-unwind: +- WIPE (&local->preoldparent); +- WIPE (&local->postoldparent); +- WIPE (&local->preparent); +- WIPE (&local->postparent); +- +- dht_rename_done (frame, this); + ++ dht_rename_unlink (frame, this); + return 0; + + cleanup: +-- +1.8.3.1 + diff --git a/SOURCES/0439-cluster-dht-fixes-to-unlinking-invalid-linkto-file.patch b/SOURCES/0439-cluster-dht-fixes-to-unlinking-invalid-linkto-file.patch new file mode 100644 index 0000000..e508553 --- /dev/null +++ b/SOURCES/0439-cluster-dht-fixes-to-unlinking-invalid-linkto-file.patch @@ -0,0 +1,70 @@ +From 07ae526af10de814d174189ff41709cf781ace9c Mon Sep 17 00:00:00 2001 +From: Raghavendra Gowdappa +Date: Tue, 30 Oct 2018 12:15:35 +0530 +Subject: [PATCH 439/444] cluster/dht: fixes to unlinking invalid linkto file + +If unlinking of an invalid linkto file failed in lookup-everywhere +codepath, lookup was failed with EIO. The rational as per the comment +was, + + + +/* When dht_lookup_everywhere is performed, one cached + *and one hashed file was found and hashed file does + *not point to the above mentioned cached node. So it + *was considered as stale and an unlink was performed. + *But unlink fails. So may be rebalance is in progress. + *now ideally we have two data-files. One obtained during + *lookup_everywhere and one where unlink-failed. So + *at this point in time we cannot decide which one to + *choose because there are chances of first cached + *file is truncated after rebalance and if it is chosen + *as cached node, application will fail. So return EIO. +*/ + + + +However, this reasoning is only valid when +* op_errno is EBUSY, indicating rebalance is in progress +* op_errno is ENOTCONN as wecannot determine what was the status of + file on brick. + +Hence this patch doesn't fail lookup unless unlink fails with an +either EBUSY or ENOTCONN + +>Change-Id: Ife55f3d97fe557f3db05beae0c2d786df31e8e55 +>Fixes: bz#1635145 +>Signed-off-by: Raghavendra Gowdappa + +Change-Id: Ife55f3d97fe557f3db05beae0c2d786df31e8e55 +BUG: 1634649 +upstream patch: http://review.gluster.org/r/Ife55f3d97fe557f3db05beae0c2d786df31e8e55 +Signed-off-by: Raghavendra Gowdappa +Reviewed-on: https://code.engineering.redhat.com/gerrit/155102 +Tested-by: RHGS Build Bot +Reviewed-by: Nithya Balachandran +--- + xlators/cluster/dht/src/dht-common.c | 7 +++---- + 1 file changed, 3 insertions(+), 4 deletions(-) + +diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c +index 0984f8f..d3a0c8b 100644 +--- a/xlators/cluster/dht/src/dht-common.c ++++ b/xlators/cluster/dht/src/dht-common.c +@@ -2104,10 +2104,9 @@ dht_lookup_unlink_of_false_linkto_cbk (call_frame_t *frame, void *cookie, + + this_call_cnt = dht_frame_return (frame); + if (is_last_call (this_call_cnt)) { +- +- if (op_ret == 0) { +- dht_lookup_everywhere_done (frame, this); +- } else { ++ if ((op_ret == 0) || ((op_errno != EBUSY) && (op_errno != ENOTCONN))) { ++ dht_lookup_everywhere_done (frame, this); ++ } else { + /*When dht_lookup_everywhere is performed, one cached + *and one hashed file was found and hashed file does + *not point to the above mentioned cached node. So it +-- +1.8.3.1 + diff --git a/SOURCES/0440-features-locks-Use-pthread_mutex_unlock-instead-of-p.patch b/SOURCES/0440-features-locks-Use-pthread_mutex_unlock-instead-of-p.patch new file mode 100644 index 0000000..62b4f4f --- /dev/null +++ b/SOURCES/0440-features-locks-Use-pthread_mutex_unlock-instead-of-p.patch @@ -0,0 +1,34 @@ +From efd713e5f9067a743f532923c529416fc5f5189e Mon Sep 17 00:00:00 2001 +From: Susant Palai +Date: Wed, 7 Nov 2018 13:06:07 +0530 +Subject: [PATCH 440/444] features/locks:Use pthread_mutex_unlock() instead of + pthread_mutex_lock() + +Upstream patch: https://review.gluster.org/#/c/glusterfs/+/21579/ + +Change-Id: I85ea6e351f07cc289245cfb501f027942ee3176c +BUG: 1647675 +Signed-off-by: Susant Palai +Reviewed-on: https://code.engineering.redhat.com/gerrit/155326 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/features/locks/src/posix.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c +index b434a08..2cc2837 100644 +--- a/xlators/features/locks/src/posix.c ++++ b/xlators/features/locks/src/posix.c +@@ -2966,7 +2966,7 @@ pl_metalk (call_frame_t *frame, xlator_t *this, inode_t *inode) + ret = -1; + } + } +- pthread_mutex_lock (&pl_inode->mutex); ++ pthread_mutex_unlock (&pl_inode->mutex); + + if (ret == -1) { + goto out; +-- +1.8.3.1 + diff --git a/SOURCES/0441-features-shard-Make-lru-limit-of-inode-list-configur.patch b/SOURCES/0441-features-shard-Make-lru-limit-of-inode-list-configur.patch new file mode 100644 index 0000000..019251b --- /dev/null +++ b/SOURCES/0441-features-shard-Make-lru-limit-of-inode-list-configur.patch @@ -0,0 +1,201 @@ +From 7b12a7ea7a6b4945ad52f218b187ca440dfbef63 Mon Sep 17 00:00:00 2001 +From: Krutika Dhananjay +Date: Fri, 20 Jul 2018 10:52:22 +0530 +Subject: [PATCH 441/444] features/shard: Make lru limit of inode list + configurable + +> Upstream: https://review.gluster.org/20544 +> BUG: 1605056 +> Change-Id: Ifdcc2099f634314fafe8444e2d676e192e89e295 + +Currently this lru limit is hard-coded to 16384. This patch makes it +configurable to make it easier to hit the lru limit and enable testing +of different cases that arise when the limit is reached. + +The option is features.shard-lru-limit. It is by design allowed to +be configured only in init() but not in reconfigure(). This is to avoid +all the complexity associated with eviction of least recently used shards +when the list is shrunk. + +Change-Id: Ifdcc2099f634314fafe8444e2d676e192e89e295 +BUG: 1603118 +Signed-off-by: Krutika Dhananjay +Reviewed-on: https://code.engineering.redhat.com/gerrit/155126 +Reviewed-by: Xavi Hernandez +Tested-by: RHGS Build Bot +--- + libglusterfs/src/globals.h | 4 ++- + tests/bugs/shard/configure-lru-limit.t | 48 +++++++++++++++++++++++++ + xlators/features/shard/src/shard.c | 19 ++++++++-- + xlators/features/shard/src/shard.h | 3 +- + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 6 ++++ + 5 files changed, 75 insertions(+), 5 deletions(-) + create mode 100644 tests/bugs/shard/configure-lru-limit.t + +diff --git a/libglusterfs/src/globals.h b/libglusterfs/src/globals.h +index 97c4fad..555f44b 100644 +--- a/libglusterfs/src/globals.h ++++ b/libglusterfs/src/globals.h +@@ -43,7 +43,7 @@ + */ + #define GD_OP_VERSION_MIN 1 /* MIN is the fresh start op-version, mostly + should not change */ +-#define GD_OP_VERSION_MAX GD_OP_VERSION_3_13_3 /* MAX VERSION is the maximum ++#define GD_OP_VERSION_MAX GD_OP_VERSION_4_2_0 /* MAX VERSION is the maximum + count in VME table, should + keep changing with + introduction of newer +@@ -111,6 +111,8 @@ + + #define GD_OP_VERSION_3_13_3 31303 /* Op-version for GlusterFS 3.13.3 */ + ++#define GD_OP_VERSION_4_2_0 40200 /* Op-version for GlusterFS 4.2.0 */ ++ + /* Downstream only change */ + #define GD_OP_VERSION_3_11_2 31102 /* Op-version for RHGS 3.3.1-async */ + #define GD_OP_VERSION_3_13_3 31303 /* Op-version for RHGS-3.4-Batch Update-1*/ +diff --git a/tests/bugs/shard/configure-lru-limit.t b/tests/bugs/shard/configure-lru-limit.t +new file mode 100644 +index 0000000..a8ba8ed +--- /dev/null ++++ b/tests/bugs/shard/configure-lru-limit.t +@@ -0,0 +1,48 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++cleanup ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} ++TEST $CLI volume set $V0 features.shard on ++TEST $CLI volume set $V0 features.shard-block-size 4MB ++TEST $CLI volume set $V0 features.shard-lru-limit 25 ++TEST $CLI volume start $V0 ++ ++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0 ++ ++# Perform a write that would cause 25 shards to be created, 24 of them under .shard ++TEST dd if=/dev/zero of=$M0/foo bs=1M count=100 ++ ++statedump=$(generate_mount_statedump $V0) ++sleep 1 ++EXPECT "25" echo $(grep "lru-max-limit" $statedump | cut -f2 -d'=' | tail -1) ++ ++# Base shard is never added to this list. So all other shards should make up for 24 inodes in lru list ++EXPECT "24" echo $(grep "inode-count" $statedump | cut -f2 -d'=' | tail -1) ++ ++rm -f $statedump ++ ++# Test to ensure there's no "reconfiguration" of the value once set. ++TEST $CLI volume set $V0 features.shard-lru-limit 30 ++statedump=$(generate_mount_statedump $V0) ++sleep 1 ++EXPECT "25" echo $(grep "lru-max-limit" $statedump | cut -f2 -d'=' | tail -1) ++rm -f $statedump ++ ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++ ++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0 ++statedump=$(generate_mount_statedump $V0) ++sleep 1 ++EXPECT "30" echo $(grep "lru-max-limit" $statedump | cut -f2 -d'=' | tail -1) ++rm -f $statedump ++ ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++TEST $CLI volume stop $V0 ++TEST $CLI volume delete $V0 ++ ++cleanup +diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c +index 6066a54..eb32168 100644 +--- a/xlators/features/shard/src/shard.c ++++ b/xlators/features/shard/src/shard.c +@@ -668,7 +668,7 @@ __shard_update_shards_inode_list (inode_t *linked_inode, xlator_t *this, + shard_inode_ctx_get (linked_inode, this, &ctx); + + if (list_empty (&ctx->ilist)) { +- if (priv->inode_count + 1 <= SHARD_MAX_INODES) { ++ if (priv->inode_count + 1 <= priv->lru_limit) { + /* If this inode was linked here for the first time (indicated + * by empty list), and if there is still space in the priv list, + * add this ctx to the tail of the list. +@@ -6690,6 +6690,8 @@ init (xlator_t *this) + + GF_OPTION_INIT ("shard-deletion-rate", priv->deletion_rate, uint32, out); + ++ GF_OPTION_INIT ("shard-lru-limit", priv->lru_limit, uint64, out); ++ + this->local_pool = mem_pool_new (shard_local_t, 128); + if (!this->local_pool) { + ret = -1; +@@ -6808,7 +6810,7 @@ shard_priv_dump (xlator_t *this) + gf_uint64_2human_readable (priv->block_size)); + gf_proc_dump_write ("inode-count", "%d", priv->inode_count); + gf_proc_dump_write ("ilist_head", "%p", &priv->ilist_head); +- gf_proc_dump_write ("lru-max-limit", "%d", SHARD_MAX_INODES); ++ gf_proc_dump_write ("lru-max-limit", "%d", priv->lru_limit); + + return 0; + } +@@ -6877,5 +6879,18 @@ struct volume_options options[] = { + .max = INT_MAX, + .description = "The number of shards to send deletes on at a time", + }, ++ { .key = {"shard-lru-limit"}, ++ .type = GF_OPTION_TYPE_INT, ++ .default_value = "16384", ++ .min = 20, ++ .max = INT_MAX, ++ .description = "The number of resolved shard inodes to keep in " ++ "memory. A higher number means shards that are " ++ "resolved will remain in memory longer, avoiding " ++ "frequent lookups on them when they participate in " ++ "file operations. The option also has a bearing on " ++ "amount of memory consumed by these inodes and their " ++ "internal metadata", ++ }, + { .key = {NULL} }, + }; +diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h +index 5de098a..ac3813c 100644 +--- a/xlators/features/shard/src/shard.h ++++ b/xlators/features/shard/src/shard.h +@@ -23,8 +23,6 @@ + #define SHARD_MAX_BLOCK_SIZE (4 * GF_UNIT_TB) + #define SHARD_XATTR_PREFIX "trusted.glusterfs.shard." + #define GF_XATTR_SHARD_BLOCK_SIZE "trusted.glusterfs.shard.block-size" +-#define SHARD_INODE_LRU_LIMIT 4096 +-#define SHARD_MAX_INODES 16384 + /** + * Bit masks for the valid flag, which is used while updating ctx + **/ +@@ -216,6 +214,7 @@ typedef struct shard_priv { + struct list_head ilist_head; + uint32_t deletion_rate; + shard_first_lookup_state_t first_lookup; ++ uint64_t lru_limit; + } shard_priv_t; + + typedef struct { +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index a825f52..d442fe0 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -3298,6 +3298,12 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .op_version = GD_OP_VERSION_3_7_0, + .flags = OPT_FLAG_CLIENT_OPT + }, ++ { .key = "features.shard-lru-limit", ++ .voltype = "features/shard", ++ .op_version = GD_OP_VERSION_4_2_0, ++ .flags = OPT_FLAG_CLIENT_OPT, ++ .type = NO_DOC, ++ }, + { .key = "features.shard-deletion-rate", + .voltype = "features/shard", + .op_version = GD_OP_VERSION_3_13_4, +-- +1.8.3.1 + diff --git a/SOURCES/0442-glusterd-Reset-op-version-for-features.shard-lru-lim.patch b/SOURCES/0442-glusterd-Reset-op-version-for-features.shard-lru-lim.patch new file mode 100644 index 0000000..f12bdd6 --- /dev/null +++ b/SOURCES/0442-glusterd-Reset-op-version-for-features.shard-lru-lim.patch @@ -0,0 +1,63 @@ +From dd1d565505d1f9c41dd6f151341f9337d89aa7cf Mon Sep 17 00:00:00 2001 +From: Krutika Dhananjay +Date: Tue, 6 Nov 2018 18:44:55 +0530 +Subject: [PATCH 442/444] glusterd: Reset op-version for + "features.shard-lru-limit" + +The op-version for the "features.shard-lru-limit" option was set to +4.2.0 in the upstream patch and backported at +41e7e33c6512e98a1567e5a5532d3898b59cfa98 + +This commit reverts the op-version for this option to 3.13.4. + +Label: DOWNSTREAM ONLY + +Change-Id: I7d3ed6b373851267c78fc6815a83bee2c0906413 +BUG: 1603118 +Signed-off-by: Krutika Dhananjay +Reviewed-on: https://code.engineering.redhat.com/gerrit/155127 +Tested-by: RHGS Build Bot +Reviewed-by: Xavi Hernandez +--- + libglusterfs/src/globals.h | 4 +--- + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 2 +- + 2 files changed, 2 insertions(+), 4 deletions(-) + +diff --git a/libglusterfs/src/globals.h b/libglusterfs/src/globals.h +index 555f44b..1bede2e 100644 +--- a/libglusterfs/src/globals.h ++++ b/libglusterfs/src/globals.h +@@ -43,7 +43,7 @@ + */ + #define GD_OP_VERSION_MIN 1 /* MIN is the fresh start op-version, mostly + should not change */ +-#define GD_OP_VERSION_MAX GD_OP_VERSION_4_2_0 /* MAX VERSION is the maximum ++#define GD_OP_VERSION_MAX GD_OP_VERSION_3_13_4 /* MAX VERSION is the maximum + count in VME table, should + keep changing with + introduction of newer +@@ -111,8 +111,6 @@ + + #define GD_OP_VERSION_3_13_3 31303 /* Op-version for GlusterFS 3.13.3 */ + +-#define GD_OP_VERSION_4_2_0 40200 /* Op-version for GlusterFS 4.2.0 */ +- + /* Downstream only change */ + #define GD_OP_VERSION_3_11_2 31102 /* Op-version for RHGS 3.3.1-async */ + #define GD_OP_VERSION_3_13_3 31303 /* Op-version for RHGS-3.4-Batch Update-1*/ +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index d442fe0..1175f1d 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -3300,7 +3300,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + }, + { .key = "features.shard-lru-limit", + .voltype = "features/shard", +- .op_version = GD_OP_VERSION_4_2_0, ++ .op_version = GD_OP_VERSION_3_13_4, + .flags = OPT_FLAG_CLIENT_OPT, + .type = NO_DOC, + }, +-- +1.8.3.1 + diff --git a/SOURCES/0443-features-shard-Hold-a-ref-on-base-inode-when-adding-.patch b/SOURCES/0443-features-shard-Hold-a-ref-on-base-inode-when-adding-.patch new file mode 100644 index 0000000..0bfe143 --- /dev/null +++ b/SOURCES/0443-features-shard-Hold-a-ref-on-base-inode-when-adding-.patch @@ -0,0 +1,367 @@ +From 72ce80749fca03ab97a63af79d4e6bc76a49ab64 Mon Sep 17 00:00:00 2001 +From: Krutika Dhananjay +Date: Fri, 5 Oct 2018 11:32:21 +0530 +Subject: [PATCH 443/444] features/shard: Hold a ref on base inode when adding + a shard to lru list + + > Upstream: https://review.gluster.org/21454 + > BUG: 1605056 + > Change-Id: Ic15ca41444dd04684a9458bd4a526b1d3e160499 + +In __shard_update_shards_inode_list(), previously shard translator +was not holding a ref on the base inode whenever a shard was added to +the lru list. But if the base shard is forgotten and destroyed either +by fuse due to memory pressure or due to the file being deleted at some +point by a different client with this client still containing stale +shards in its lru list, the client would crash at the time of locking +lru_base_inode->lock owing to illegal memory access. + +So now the base shard is ref'd into the inode ctx of every shard that +is added to lru list until it gets lru'd out. + +The patch also handles the case where none of the shards associated +with a file that is about to be deleted are part of the LRU list and +where an unlink at the beginning of the operation destroys the base +inode (because there are no refkeepers) and hence all of the shards +that are about to be deleted will be resolved without the existence +of a base shard in-memory. This, if not handled properly, could lead +to a crash. + +Change-Id: Ic15ca41444dd04684a9458bd4a526b1d3e160499 +BUG: 1603118 +Signed-off-by: Krutika Dhananjay +Reviewed-on: https://code.engineering.redhat.com/gerrit/155318 +Reviewed-by: Xavi Hernandez +Tested-by: RHGS Build Bot +--- + tests/bugs/shard/bug-1605056-2.t | 34 +++++++++++++++ + tests/bugs/shard/bug-1605056.t | 63 ++++++++++++++++++++++++++++ + tests/bugs/shard/shard-inode-refcount-test.t | 2 +- + tests/volume.rc | 12 ++++-- + xlators/features/shard/src/shard.c | 48 +++++++++++++++------ + 5 files changed, 141 insertions(+), 18 deletions(-) + create mode 100644 tests/bugs/shard/bug-1605056-2.t + create mode 100644 tests/bugs/shard/bug-1605056.t + +diff --git a/tests/bugs/shard/bug-1605056-2.t b/tests/bugs/shard/bug-1605056-2.t +new file mode 100644 +index 0000000..a9c10fe +--- /dev/null ++++ b/tests/bugs/shard/bug-1605056-2.t +@@ -0,0 +1,34 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++ ++cleanup ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} ++TEST $CLI volume set $V0 features.shard on ++TEST $CLI volume set $V0 features.shard-block-size 4MB ++TEST $CLI volume set $V0 features.shard-lru-limit 25 ++TEST $CLI volume set $V0 performance.write-behind off ++ ++TEST $CLI volume start $V0 ++ ++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0 ++ ++# Perform a write that would cause 25 shards to be created under .shard ++TEST dd if=/dev/zero of=$M0/foo bs=1M count=104 ++ ++# Write into another file bar to ensure all of foo's shards are evicted from lru list of $M0 ++TEST dd if=/dev/zero of=$M0/bar bs=1M count=104 ++ ++# Delete foo from $M0. If there's a bug, the mount will crash. ++TEST unlink $M0/foo ++ ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++ ++TEST $CLI volume stop $V0 ++TEST $CLI volume delete $V0 ++ ++cleanup +diff --git a/tests/bugs/shard/bug-1605056.t b/tests/bugs/shard/bug-1605056.t +new file mode 100644 +index 0000000..c2329ea +--- /dev/null ++++ b/tests/bugs/shard/bug-1605056.t +@@ -0,0 +1,63 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++ ++SHARD_COUNT_TIME=5 ++ ++cleanup ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} ++TEST $CLI volume set $V0 features.shard on ++TEST $CLI volume set $V0 features.shard-block-size 4MB ++TEST $CLI volume set $V0 features.shard-lru-limit 25 ++TEST $CLI volume set $V0 performance.write-behind off ++ ++TEST $CLI volume start $V0 ++ ++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0 ++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M1 ++ ++# Perform a write that would cause 25 shards to be created under .shard ++TEST dd if=/dev/zero of=$M0/foo bs=1M count=104 ++ ++# Read the file from $M1, indirectly filling up the lru list. ++TEST `cat $M1/foo > /dev/null` ++statedump=$(generate_mount_statedump $V0 $M1) ++sleep 1 ++EXPECT "25" echo $(grep "inode-count" $statedump | cut -f2 -d'=' | tail -1) ++rm -f $statedump ++ ++# Delete foo from $M0. ++TEST unlink $M0/foo ++ ++# Send stat on foo from $M1 to force $M1 to "forget" inode associated with foo. ++# Now the ghost shards associated with "foo" are still in lru list of $M1. ++TEST ! stat $M1/foo ++ ++# Let's force the ghost shards of "foo" out of lru list by looking up more shards ++# through I/O on a file named "bar" from $M1. This should crash if the base inode ++# had been destroyed by now. ++ ++TEST dd if=/dev/zero of=$M1/bar bs=1M count=104 ++ ++############################################### ++#### Now for some inode ref-leak tests ... #### ++############################################### ++ ++# Expect there to be 29 active inodes - 26 belonging to "bar", 1 for .shard, ++# 1 for .shard/remove_me and 1 for '/' ++EXPECT_WITHIN $SHARD_COUNT_TIME `expr 26 + 3` get_mount_active_size_value $V0 $M1 ++ ++TEST rm -f $M1/bar ++EXPECT_WITHIN $SHARD_COUNT_TIME 3 get_mount_active_size_value $V0 $M1 ++ ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M1 ++ ++TEST $CLI volume stop $V0 ++TEST $CLI volume delete $V0 ++ ++cleanup +diff --git a/tests/bugs/shard/shard-inode-refcount-test.t b/tests/bugs/shard/shard-inode-refcount-test.t +index 087c8ba..3fd181b 100644 +--- a/tests/bugs/shard/shard-inode-refcount-test.t ++++ b/tests/bugs/shard/shard-inode-refcount-test.t +@@ -21,7 +21,7 @@ TEST dd if=/dev/zero conv=fsync of=$M0/one-plus-five-shards bs=1M count=23 + ACTIVE_INODES_BEFORE=$(get_mount_active_size_value $V0) + TEST rm -f $M0/one-plus-five-shards + # Expect 5 inodes less. But one inode more than before because .remove_me would be created. +-EXPECT_WITHIN $SHARD_COUNT_TIME `expr $ACTIVE_INODES_BEFORE - 5 + 1` get_mount_active_size_value $V0 ++EXPECT_WITHIN $SHARD_COUNT_TIME `expr $ACTIVE_INODES_BEFORE - 5 + 1` get_mount_active_size_value $V0 $M0 + + EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 + TEST $CLI volume stop $V0 +diff --git a/tests/volume.rc b/tests/volume.rc +index bba7e4e..6a983fd 100644 +--- a/tests/volume.rc ++++ b/tests/volume.rc +@@ -93,7 +93,8 @@ function remove_brick_status_completed_field { + + function get_mount_process_pid { + local vol=$1 +- ps auxww | grep glusterfs | grep -E "volfile-id[ =]/?$vol " | awk '{print $2}' | head -1 ++ local mnt=$2 ++ ps auxww | grep glusterfs | grep -E "volfile-id[ =]/?$vol .*$mnt" | awk '{print $2}' | head -1 + } + + function get_nfs_pid () +@@ -126,7 +127,8 @@ function generate_statedump { + + function generate_mount_statedump { + local vol=$1 +- generate_statedump $(get_mount_process_pid $vol) ++ local mnt=$2 ++ generate_statedump $(get_mount_process_pid $vol $mnt) + } + + function cleanup_mount_statedump { +@@ -850,7 +852,8 @@ function get_active_fd_count { + + function get_mount_active_size_value { + local vol=$1 +- local statedump=$(generate_mount_statedump $vol) ++ local mount=$2 ++ local statedump=$(generate_mount_statedump $vol $mount) + sleep 1 + local val=$(grep "active_size" $statedump | cut -f2 -d'=' | tail -1) + rm -f $statedump +@@ -859,7 +862,8 @@ function get_mount_active_size_value { + + function get_mount_lru_size_value { + local vol=$1 +- local statedump=$(generate_mount_statedump $vol) ++ local mount=$2 ++ local statedump=$(generate_mount_statedump $vol $mount) + sleep 1 + local val=$(grep "lru_size" $statedump | cut -f2 -d'=' | tail -1) + rm -f $statedump +diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c +index eb32168..fb88315 100644 +--- a/xlators/features/shard/src/shard.c ++++ b/xlators/features/shard/src/shard.c +@@ -651,7 +651,8 @@ out: + + inode_t * + __shard_update_shards_inode_list (inode_t *linked_inode, xlator_t *this, +- inode_t *base_inode, int block_num) ++ inode_t *base_inode, int block_num, ++ uuid_t gfid) + { + char block_bname[256] = {0,}; + inode_t *lru_inode = NULL; +@@ -679,10 +680,13 @@ __shard_update_shards_inode_list (inode_t *linked_inode, xlator_t *this, + inode_ref (linked_inode); + if (base_inode) + gf_uuid_copy (ctx->base_gfid, base_inode->gfid); ++ else ++ gf_uuid_copy(ctx->base_gfid, gfid); + ctx->block_num = block_num; + list_add_tail (&ctx->ilist, &priv->ilist_head); + priv->inode_count++; +- ctx->base_inode = base_inode; ++ if (base_inode) ++ ctx->base_inode = inode_ref (base_inode); + } else { + /*If on the other hand there is no available slot for this inode + * in the list, delete the lru inode from the head of the list, +@@ -701,6 +705,8 @@ __shard_update_shards_inode_list (inode_t *linked_inode, xlator_t *this, + * deleted from fsync list and fsync'd in a new frame, + * and then unlinked in memory and forgotten. + */ ++ if (!lru_base_inode) ++ goto after_fsync_check; + LOCK (&lru_base_inode->lock); + LOCK (&lru_inode->lock); + { +@@ -715,6 +721,7 @@ __shard_update_shards_inode_list (inode_t *linked_inode, xlator_t *this, + UNLOCK (&lru_inode->lock); + UNLOCK (&lru_base_inode->lock); + ++after_fsync_check: + if (!do_fsync) { + shard_make_block_bname (lru_inode_ctx->block_num, + lru_inode_ctx->base_gfid, +@@ -729,20 +736,31 @@ __shard_update_shards_inode_list (inode_t *linked_inode, xlator_t *this, + inode_forget (lru_inode, 0); + } else { + fsync_inode = lru_inode; +- inode_unref (lru_base_inode); ++ if (lru_base_inode) ++ inode_unref (lru_base_inode); + } + /* The following unref corresponds to the ref + * held by inode_find() above. + */ + inode_unref (lru_inode); ++ ++ /* The following unref corresponds to the ref held on ++ * the base shard at the time of adding shard inode to ++ * lru list ++ */ ++ if (lru_base_inode) ++ inode_unref (lru_base_inode); + /* For as long as an inode is in lru list, we try to + * keep it alive by holding a ref on it. + */ + inode_ref (linked_inode); + if (base_inode) + gf_uuid_copy (ctx->base_gfid, base_inode->gfid); ++ else ++ gf_uuid_copy (ctx->base_gfid, gfid); + ctx->block_num = block_num; +- ctx->base_inode = base_inode; ++ if (base_inode) ++ ctx->base_inode = inode_ref (base_inode); + list_add_tail (&ctx->ilist, &priv->ilist_head); + } + } else { +@@ -1027,7 +1045,7 @@ shard_common_resolve_shards (call_frame_t *frame, xlator_t *this, + fsync_inode = __shard_update_shards_inode_list (inode, + this, + res_inode, +- shard_idx_iter); ++ shard_idx_iter, gfid); + } + UNLOCK(&priv->lock); + shard_idx_iter++; +@@ -2173,7 +2191,8 @@ shard_link_block_inode (shard_local_t *local, int block_num, inode_t *inode, + fsync_inode = __shard_update_shards_inode_list (linked_inode, + this, + local->loc.inode, +- block_num); ++ block_num, ++ gfid); + } + UNLOCK(&priv->lock); + if (fsync_inode) +@@ -2881,6 +2900,7 @@ void + shard_unlink_block_inode (shard_local_t *local, int shard_block_num) + { + char block_bname[256] = {0,}; ++ int unref_base_inode = 0; + uuid_t gfid = {0,}; + inode_t *inode = NULL; + inode_t *base_inode = NULL; +@@ -2894,11 +2914,12 @@ shard_unlink_block_inode (shard_local_t *local, int shard_block_num) + priv = this->private; + + inode = local->inode_list[shard_block_num - local->first_block]; +- base_inode = local->resolver_base_inode; ++ shard_inode_ctx_get (inode, this, &ctx); ++ base_inode = ctx->base_inode; + if (base_inode) + gf_uuid_copy (gfid, base_inode->gfid); + else +- gf_uuid_copy (gfid, local->base_gfid); ++ gf_uuid_copy (gfid, ctx->base_gfid); + + shard_make_block_bname (shard_block_num, gfid, + block_bname, sizeof (block_bname)); +@@ -2912,17 +2933,16 @@ shard_unlink_block_inode (shard_local_t *local, int shard_block_num) + if (!list_empty (&ctx->ilist)) { + list_del_init (&ctx->ilist); + priv->inode_count--; ++ unref_base_inode++; + GF_ASSERT (priv->inode_count >= 0); + unlink_unref_forget = _gf_true; + } + if (ctx->fsync_needed) { +- if (base_inode) +- inode_unref (base_inode); ++ unref_base_inode++; + list_del_init (&ctx->to_fsync_list); +- if (base_inode) { ++ if (base_inode) + __shard_inode_ctx_get (base_inode, this, &base_ictx); +- base_ictx->fsync_count--; +- } ++ base_ictx->fsync_count--; + } + } + UNLOCK(&inode->lock); +@@ -2933,6 +2953,8 @@ shard_unlink_block_inode (shard_local_t *local, int shard_block_num) + inode_unref (inode); + inode_forget (inode, 0); + } ++ if (base_inode && unref_base_inode) ++ inode_ref_reduce_by_n (base_inode, unref_base_inode); + UNLOCK(&priv->lock); + } + +-- +1.8.3.1 + diff --git a/SOURCES/0444-features-shard-fix-formatting-warning.patch b/SOURCES/0444-features-shard-fix-formatting-warning.patch new file mode 100644 index 0000000..8e29c61 --- /dev/null +++ b/SOURCES/0444-features-shard-fix-formatting-warning.patch @@ -0,0 +1,31 @@ +From 6dbeac0371f3f2b42c0b428ba9f95b4ac3bc889d Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez +Date: Thu, 8 Nov 2018 18:42:26 +0100 +Subject: [PATCH 444/444] features/shard: fix formatting warning + +BUG: 1603118 +Change-Id: I6191351f824901a45416ffe7610ad2b964645012 +Signed-off-by: Xavi Hernandez +Reviewed-on: https://code.engineering.redhat.com/gerrit/155395 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/features/shard/src/shard.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c +index fb88315..5b72399 100644 +--- a/xlators/features/shard/src/shard.c ++++ b/xlators/features/shard/src/shard.c +@@ -6832,7 +6832,7 @@ shard_priv_dump (xlator_t *this) + gf_uint64_2human_readable (priv->block_size)); + gf_proc_dump_write ("inode-count", "%d", priv->inode_count); + gf_proc_dump_write ("ilist_head", "%p", &priv->ilist_head); +- gf_proc_dump_write ("lru-max-limit", "%d", priv->lru_limit); ++ gf_proc_dump_write ("lru-max-limit", "%" PRIu64, priv->lru_limit); + + return 0; + } +-- +1.8.3.1 + diff --git a/SOURCES/0445-glusterd-don-t-call-svcs_reconfigure-for-all-volumes.patch b/SOURCES/0445-glusterd-don-t-call-svcs_reconfigure-for-all-volumes.patch new file mode 100644 index 0000000..90ad5de --- /dev/null +++ b/SOURCES/0445-glusterd-don-t-call-svcs_reconfigure-for-all-volumes.patch @@ -0,0 +1,75 @@ +From 15d1f5b80b1eeb9c8f7d85c72247ffc4ef704267 Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee +Date: Fri, 9 Nov 2018 12:44:20 +0530 +Subject: [PATCH 445/450] glusterd: don't call svcs_reconfigure for all volumes + during op-version bump up + +With having large number of volumes in a configuration having +svcs_reconfigure () called for every volumes makes cluster.op-version bump up to +time out. Instead call svcs_reconfigure () only once. + +> Change-Id: Ic6a133d77113c992a4dbeaf7f5663b7ffcbb0ae9 +> Fixes: bz#1648237 +> Signed-off-by: Atin Mukherjee + +upstream patch: https://review.gluster.org/#/c/glusterfs/+/21608/ + +Change-Id: Ic6a133d77113c992a4dbeaf7f5663b7ffcbb0ae9 +BUG: 1648210 +Signed-off-by: Sanju Rakonde +Reviewed-on: https://code.engineering.redhat.com/gerrit/156190 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + xlators/mgmt/glusterd/src/glusterd-op-sm.c | 21 +++++++++++---------- + 1 file changed, 11 insertions(+), 10 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +index 716d3f2..8d767cc 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c ++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +@@ -2622,6 +2622,7 @@ glusterd_op_set_all_volume_options (xlator_t *this, dict_t *dict, + glusterd_volinfo_t *volinfo = NULL; + glusterd_svc_t *svc = NULL; + gf_boolean_t start_nfs_svc = _gf_false; ++ gf_boolean_t svcs_reconfigure = _gf_false; + + conf = this->private; + ret = dict_get_str (dict, "key1", &key); +@@ -2717,15 +2718,16 @@ glusterd_op_set_all_volume_options (xlator_t *this, dict_t *dict, + } + if (GLUSTERD_STATUS_STARTED + == volinfo->status) { +- ret = glusterd_svcs_reconfigure (); +- if (ret) { +- gf_msg (this->name, +- GF_LOG_ERROR, 0, +- GD_MSG_SVC_RESTART_FAIL, +- "Unable to restart " +- "services"); +- goto out; +- } ++ svcs_reconfigure = _gf_true; ++ } ++ } ++ if (svcs_reconfigure) { ++ ret = glusterd_svcs_reconfigure(); ++ if (ret) { ++ gf_msg (this->name, GF_LOG_ERROR, 0, ++ GD_MSG_SVC_RESTART_FAIL, ++ "Unable to restart services"); ++ goto out; + } + } + if (start_nfs_svc) { +@@ -2758,7 +2760,6 @@ glusterd_op_set_all_volume_options (xlator_t *this, dict_t *dict, + ret = dict_set_str (dup_opt, key, value); + if (ret) + goto out; +- + ret = glusterd_get_next_global_opt_version_str (conf->opts, + &next_version); + if (ret) +-- +1.8.3.1 + diff --git a/SOURCES/0446-core-Portmap-entries-showing-stale-brick-entries-whe.patch b/SOURCES/0446-core-Portmap-entries-showing-stale-brick-entries-whe.patch new file mode 100644 index 0000000..451c01d --- /dev/null +++ b/SOURCES/0446-core-Portmap-entries-showing-stale-brick-entries-whe.patch @@ -0,0 +1,91 @@ +From 1de7497540b8428187df5048a1b8e82c2feec604 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Mon, 19 Nov 2018 13:00:57 +0530 +Subject: [PATCH 446/450] core: Portmap entries showing stale brick entries + when bricks are down + +Problem: pmap is showing stale brick entries after down the brick + because of glusterd_brick_rpc_notify call gf_is_service_running + before call pmap_registry_remove to ensure about brick instance. + +Solutiom: 1) Change the condition in gf_is_pid_running to ensure about + process existence, use open instead of access to achieve + the same + 2) Call search_brick_path_from_proc in __glusterd_brick_rpc_notify + along with gf_is_service_running + +> Change-Id: Ia663ac61c01fdee6c12f47c0300cdf93f19b6a19 +> fixes: bz#1646892 +> Signed-off-by: Mohit Agrawal +> (Cherry picked from commit bcf1e8b07491b48c5372924dbbbad5b8391c6d81) +> (Reviwed on upstream link https://review.gluster.org/#/c/glusterfs/+/21568/) + +BUG: 1649651 +Change-Id: I06b0842d5e3ffc909304529311709064237ccc94 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/156326 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + libglusterfs/src/common-utils.c | 5 ++++- + xlators/mgmt/glusterd/src/glusterd-handler.c | 7 +++++-- + xlators/mgmt/glusterd/src/glusterd-utils.h | 2 ++ + 3 files changed, 11 insertions(+), 3 deletions(-) + +diff --git a/libglusterfs/src/common-utils.c b/libglusterfs/src/common-utils.c +index 54ef875..dd6cdb3 100644 +--- a/libglusterfs/src/common-utils.c ++++ b/libglusterfs/src/common-utils.c +@@ -3986,13 +3986,16 @@ gf_boolean_t + gf_is_pid_running (int pid) + { + char fname[32] = {0,}; ++ int fd = -1; + + snprintf(fname, sizeof(fname), "/proc/%d/cmdline", pid); + +- if (sys_access (fname , R_OK) != 0) { ++ fd = sys_open(fname, O_RDONLY, 0); ++ if (fd < 0) { + return _gf_false; + } + ++ sys_close(fd); + return _gf_true; + + } +diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c +index bf37e70..a129afc 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-handler.c ++++ b/xlators/mgmt/glusterd/src/glusterd-handler.c +@@ -6193,11 +6193,14 @@ __glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata, + /* In case of an abrupt shutdown of a brick PMAP_SIGNOUT + * event is not received by glusterd which can lead to a + * stale port entry in glusterd, so forcibly clean up +- * the same if the process is not running ++ * the same if the process is not running sometime ++ * gf_is_service_running true so to ensure about brick instance ++ * call search_brick_path_from_proc + */ + GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, + brickinfo, conf); +- if (!gf_is_service_running (pidfile, &pid)) { ++ if (!gf_is_service_running (pidfile, &pid) || ++ !search_brick_path_from_proc(pid, brickinfo->path)) { + ret = pmap_registry_remove ( + THIS, brickinfo->port, + brickinfo->path, +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h +index ffcc636..8e5320d 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.h ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.h +@@ -897,4 +897,6 @@ glusterd_get_index_basepath (glusterd_brickinfo_t *brickinfo, char *buffer, + gf_boolean_t + glusterd_is_profile_on (glusterd_volinfo_t *volinfo); + ++char * ++search_brick_path_from_proc(pid_t brick_pid, char *brickpath); + #endif +-- +1.8.3.1 + diff --git a/SOURCES/0447-cluster-ec-Don-t-update-trusted.ec.version-if-fop-su.patch b/SOURCES/0447-cluster-ec-Don-t-update-trusted.ec.version-if-fop-su.patch new file mode 100644 index 0000000..8475e23 --- /dev/null +++ b/SOURCES/0447-cluster-ec-Don-t-update-trusted.ec.version-if-fop-su.patch @@ -0,0 +1,55 @@ +From afff5f5aaab363afebb8fd359af2b8403b992930 Mon Sep 17 00:00:00 2001 +From: Ashish Pandey +Date: Thu, 6 Sep 2018 11:20:32 +0530 +Subject: [PATCH 447/450] cluster/ec: Don't update trusted.ec.version if fop + succeeds + +If a fop has succeeded on all the bricks and trying to release +the lock, there is no need to update the version for the +file/entry. All it will do is to increase the version from +x to x+1 on all the bricks. + +If this update (x to x+1) fails on some brick, this will indicate +that the entry is unhealthy while in realty everything is fine +with the entry. + +Avoiding this update will help to not to send one xattrop +at the end of the fops. Which will decrease the chances +of entries being in unhealthy state and also improve the +performance. + +upstream patch : https://review.gluster.org/#/c/glusterfs/+/21105 + +Change-Id: Id9fca6bd2991425db6ed7d1f36af27027accb636 +BUG: 1626350 +Signed-off-by: Ashish Pandey +Reviewed-on: https://code.engineering.redhat.com/gerrit/156342 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/cluster/ec/src/ec-common.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c +index 6d0eb62..a7a8234 100644 +--- a/xlators/cluster/ec/src/ec-common.c ++++ b/xlators/cluster/ec/src/ec-common.c +@@ -2372,6 +2372,15 @@ ec_update_info(ec_lock_link_t *link) + if (ctx->dirty[1] != 0) { + dirty[1] = -1; + } ++ /*If everything is fine and we already ++ *have version xattr set on entry, there ++ *is no need to update version again*/ ++ if (ctx->pre_version[0]) { ++ version[0] = 0; ++ } ++ if (ctx->pre_version[1]) { ++ version[1] = 0; ++ } + } else { + link->optimistic_changelog = _gf_false; + ec_set_dirty_flag (link, ctx, dirty); +-- +1.8.3.1 + diff --git a/SOURCES/0448-core-Resolve-memory-leak-at-the-time-of-graph-init.patch b/SOURCES/0448-core-Resolve-memory-leak-at-the-time-of-graph-init.patch new file mode 100644 index 0000000..b8980f0 --- /dev/null +++ b/SOURCES/0448-core-Resolve-memory-leak-at-the-time-of-graph-init.patch @@ -0,0 +1,68 @@ +From 597826a5fa4e307a23615a03031d2df0f739652f Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Tue, 20 Nov 2018 15:55:31 +0530 +Subject: [PATCH 448/450] core: Resolve memory leak at the time of graph init + +Problem: Memory leak when graph init fails as during volfile + exchange between brick and glusterd + +Solution: Fix the error code path in glusterfs_graph_init + +> Change-Id: If62bee61283fccb7fd60abc6ea217cfac12358fa +> fixes: bz#1651431 +> Signed-off-by: Mohit Agrawal +> (cherry pick from commit 751b14f2bfd40e08ad395ccd98c6eb0a41ac4e91) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/21658/) + +Change-Id: I29fd290e71754214cc242eac0cc9461d18abec81 +BUG: 1650138 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/156358 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + glusterfsd/src/glusterfsd.c | 11 +++++++---- + xlators/mgmt/glusterd/src/glusterd.c | 4 ---- + 2 files changed, 7 insertions(+), 8 deletions(-) + +diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c +index 6b7adc4..262a0c1 100644 +--- a/glusterfsd/src/glusterfsd.c ++++ b/glusterfsd/src/glusterfsd.c +@@ -2383,11 +2383,14 @@ out: + if (fp) + fclose (fp); + +- if (ret && !ctx->active) { +- glusterfs_graph_destroy (graph); ++ if (ret) { ++ if (graph && (ctx && (ctx->active != graph))) ++ glusterfs_graph_destroy (graph); + /* there is some error in setting up the first graph itself */ +- emancipate (ctx, ret); +- cleanup_and_exit (ret); ++ if (!ctx->active) { ++ emancipate (ctx, ret); ++ cleanup_and_exit (ret); ++ } + } + + return ret; +diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c +index 076019f..ca17526 100644 +--- a/xlators/mgmt/glusterd/src/glusterd.c ++++ b/xlators/mgmt/glusterd/src/glusterd.c +@@ -1120,10 +1120,6 @@ glusterd_init_uds_listener (xlator_t *this) + strncpy (sockfile, sock_data->data, UNIX_PATH_MAX); + } + +- options = dict_new (); +- if (!options) +- goto out; +- + ret = rpcsvc_transport_unix_options_build (&options, sockfile); + if (ret) + goto out; +-- +1.8.3.1 + diff --git a/SOURCES/0449-glusterd-mux-Optimize-brick-disconnect-handler-code.patch b/SOURCES/0449-glusterd-mux-Optimize-brick-disconnect-handler-code.patch new file mode 100644 index 0000000..3364960 --- /dev/null +++ b/SOURCES/0449-glusterd-mux-Optimize-brick-disconnect-handler-code.patch @@ -0,0 +1,397 @@ +From afcb244f1264af8b0df42b5c79905fd52f01b924 Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC +Date: Thu, 15 Nov 2018 13:18:36 +0530 +Subject: [PATCH 449/450] glusterd/mux: Optimize brick disconnect handler code + +Removed unnecessary iteration during brick disconnect +handler when multiplex is enabled. + + >Change-Id: I62dd3337b7e7da085da5d76aaae206e0b0edff9f + >fixes: bz#1650115 + >Signed-off-by: Mohammed Rafi KC +upstream patch : https://review.gluster.org/#/c/glusterfs/+/21651/ + +Change-Id: I62dd3337b7e7da085da5d76aaae206e0b0edff9f +BUG: 1649651 +Signed-off-by: Mohammed Rafi KC +Reviewed-on: https://code.engineering.redhat.com/gerrit/156327 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + xlators/mgmt/glusterd/src/glusterd-handler.c | 74 ++++------------ + xlators/mgmt/glusterd/src/glusterd-utils.c | 122 +++++++++++++-------------- + xlators/mgmt/glusterd/src/glusterd-utils.h | 3 +- + xlators/mgmt/glusterd/src/glusterd.h | 21 +++-- + 4 files changed, 87 insertions(+), 133 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c +index a129afc..cab0dec 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-handler.c ++++ b/xlators/mgmt/glusterd/src/glusterd-handler.c +@@ -6046,37 +6046,6 @@ out: + + static int gd_stale_rpc_disconnect_log; + +-static int +-glusterd_mark_bricks_stopped_by_proc (glusterd_brick_proc_t *brick_proc) { +- glusterd_brickinfo_t *brickinfo = NULL; +- glusterd_brickinfo_t *brickinfo_tmp = NULL; +- glusterd_volinfo_t *volinfo = NULL; +- int ret = -1; +- +- cds_list_for_each_entry (brickinfo, &brick_proc->bricks, brick_list) { +- ret = glusterd_get_volinfo_from_brick (brickinfo->path, +- &volinfo); +- if (ret) { +- gf_msg (THIS->name, GF_LOG_ERROR, 0, +- GD_MSG_VOLINFO_GET_FAIL, "Failed to get volinfo" +- " from brick(%s)", brickinfo->path); +- goto out; +- } +- cds_list_for_each_entry (brickinfo_tmp, &volinfo->bricks, +- brick_list) { +- if (strcmp (brickinfo->path, +- brickinfo_tmp->path) == 0) { +- glusterd_set_brick_status (brickinfo_tmp, +- GF_BRICK_STOPPED); +- brickinfo_tmp->start_triggered = _gf_false; +- } +- } +- } +- return 0; +-out: +- return ret; +-} +- + int + __glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata, + rpc_clnt_event_t event, void *data) +@@ -6087,7 +6056,6 @@ __glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata, + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = NULL; +- int temp = 0; + int32_t pid = -1; + glusterd_brickinfo_t *brickinfo_tmp = NULL; + glusterd_brick_proc_t *brick_proc = NULL; +@@ -6218,33 +6186,21 @@ __glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata, + } + } + +- if (is_brick_mx_enabled()) { +- cds_list_for_each_entry (brick_proc, &conf->brick_procs, +- brick_proc_list) { +- cds_list_for_each_entry (brickinfo_tmp, +- &brick_proc->bricks, +- brick_list) { +- if (strcmp (brickinfo_tmp->path, +- brickinfo->path) == 0) { +- ret = glusterd_mark_bricks_stopped_by_proc +- (brick_proc); +- if (ret) { +- gf_msg(THIS->name, +- GF_LOG_ERROR, 0, +- GD_MSG_BRICK_STOP_FAIL, +- "Unable to stop " +- "bricks of process" +- " to which brick(%s)" +- " belongs", +- brickinfo->path); +- goto out; +- } +- temp = 1; +- break; +- } +- } +- if (temp == 1) +- break; ++ if (is_brick_mx_enabled() && glusterd_is_brick_started(brickinfo)) { ++ brick_proc = brickinfo->brick_proc; ++ if (!brick_proc) ++ break; ++ cds_list_for_each_entry(brickinfo_tmp, &brick_proc->bricks, ++ mux_bricks) ++ { ++ glusterd_set_brick_status(brickinfo_tmp, GF_BRICK_STOPPED); ++ brickinfo_tmp->start_triggered = _gf_false; ++ /* When bricks are stopped, ports also need to ++ * be cleaned up ++ */ ++ pmap_registry_remove( ++ THIS, brickinfo_tmp->port, brickinfo_tmp->path, ++ GF_PMAP_PORT_BRICKSERVER, NULL, _gf_true); + } + } else { + glusterd_set_brick_status (brickinfo, GF_BRICK_STOPPED); +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index 7179a68..ec7e27a 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -1088,6 +1088,7 @@ glusterd_brickinfo_new (glusterd_brickinfo_t **brickinfo) + goto out; + + CDS_INIT_LIST_HEAD (&new_brickinfo->brick_list); ++ CDS_INIT_LIST_HEAD (&new_brickinfo->mux_bricks); + pthread_mutex_init (&new_brickinfo->restart_mutex, NULL); + *brickinfo = new_brickinfo; + +@@ -1978,6 +1979,7 @@ glusterd_volume_start_glusterfs (glusterd_volinfo_t *volinfo, + struct rpc_clnt *rpc = NULL; + rpc_clnt_connection_t *conn = NULL; + int pid = -1; ++ glusterd_brick_proc_t *brick_proc = NULL; + + GF_ASSERT (volinfo); + GF_ASSERT (brickinfo); +@@ -2188,15 +2190,20 @@ retry: + goto out; + } + +- ret = glusterd_brick_process_add_brick (brickinfo); ++ ret = glusterd_brickprocess_new(&brick_proc); + if (ret) { +- gf_msg (this->name, GF_LOG_ERROR, 0, +- GD_MSG_BRICKPROC_ADD_BRICK_FAILED, "Adding brick %s:%s " +- "to brick process failed.", brickinfo->hostname, +- brickinfo->path); ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICKPROC_NEW_FAILED, ++ "Failed to create new brick process instance"); + goto out; + } + ++ brick_proc->port = brickinfo->port; ++ cds_list_add_tail(&brick_proc->brick_proc_list, &priv->brick_procs); ++ brickinfo->brick_proc = brick_proc; ++ cds_list_add_tail(&brickinfo->mux_bricks, &brick_proc->bricks); ++ brickinfo->brick_proc = brick_proc; ++ brick_proc->brick_count++; ++ + connect: + ret = glusterd_brick_connect (volinfo, brickinfo, socketpath); + if (ret) { +@@ -2328,9 +2335,6 @@ glusterd_brick_process_remove_brick (glusterd_brickinfo_t *brickinfo) + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + glusterd_brick_proc_t *brick_proc = NULL; +- glusterd_brickinfo_t *brickinfoiter = NULL; +- glusterd_brick_proc_t *brick_proc_tmp = NULL; +- glusterd_brickinfo_t *tmp = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO ("glusterd", this, out); +@@ -2339,48 +2343,44 @@ glusterd_brick_process_remove_brick (glusterd_brickinfo_t *brickinfo) + GF_VALIDATE_OR_GOTO (this->name, priv, out); + GF_VALIDATE_OR_GOTO (this->name, brickinfo, out); + +- cds_list_for_each_entry_safe (brick_proc, brick_proc_tmp, +- &priv->brick_procs, brick_proc_list) { +- if (brickinfo->port != brick_proc->port) { +- continue; +- } +- +- GF_VALIDATE_OR_GOTO (this->name, (brick_proc->brick_count > 0), out); ++ brick_proc = brickinfo->brick_proc; ++ if (!brick_proc) { ++ if (brickinfo->status != GF_BRICK_STARTED) { ++ /* this function will be called from gluster_pmap_signout and ++ * glusterd_volume_stop_glusterfs. So it is possible to have ++ * brick_proc set as null. ++ */ ++ ret = 0; ++ } ++ goto out; ++ } + +- cds_list_for_each_entry_safe (brickinfoiter, tmp, +- &brick_proc->bricks, brick_list) { +- if (strcmp (brickinfoiter->path, brickinfo->path) == 0) { +- cds_list_del_init (&brickinfoiter->brick_list); ++ GF_VALIDATE_OR_GOTO(this->name, (brick_proc->brick_count > 0), out); + +- GF_FREE (brickinfoiter->logfile); +- GF_FREE (brickinfoiter); +- brick_proc->brick_count--; +- break; +- } +- } ++ cds_list_del_init(&brickinfo->mux_bricks); ++ brick_proc->brick_count--; + +- /* If all bricks have been removed, delete the brick process */ +- if (brick_proc->brick_count == 0) { +- ret = glusterd_brickprocess_delete (brick_proc); +- if (ret) +- goto out; +- } +- break; ++ /* If all bricks have been removed, delete the brick process */ ++ if (brick_proc->brick_count == 0) { ++ ret = glusterd_brickprocess_delete(brick_proc); ++ if (ret) ++ goto out; + } + ++ brickinfo->brick_proc = NULL; + ret = 0; + out: + return ret; + } + + int +-glusterd_brick_process_add_brick (glusterd_brickinfo_t *brickinfo) ++glusterd_brick_process_add_brick (glusterd_brickinfo_t *brickinfo, ++ glusterd_brickinfo_t *parent_brickinfo) + { + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + glusterd_brick_proc_t *brick_proc = NULL; +- glusterd_brickinfo_t *brickinfo_dup = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO ("glusterd", this, out); +@@ -2389,37 +2389,28 @@ glusterd_brick_process_add_brick (glusterd_brickinfo_t *brickinfo) + GF_VALIDATE_OR_GOTO (this->name, priv, out); + GF_VALIDATE_OR_GOTO (this->name, brickinfo, out); + +- ret = glusterd_brickinfo_new (&brickinfo_dup); +- if (ret) { +- gf_msg ("glusterd", GF_LOG_ERROR, 0, +- GD_MSG_BRICK_NEW_INFO_FAIL, +- "Failed to create new brickinfo"); +- goto out; +- } +- +- ret = glusterd_brickinfo_dup (brickinfo, brickinfo_dup); +- if (ret) { +- gf_msg (this->name, GF_LOG_ERROR, 0, +- GD_MSG_BRICK_SET_INFO_FAIL, "Failed to dup brickinfo"); +- goto out; +- } +- +- ret = glusterd_brick_proc_for_port (brickinfo->port, &brick_proc); +- if (ret) { +- ret = glusterd_brickprocess_new (&brick_proc); ++ if (!parent_brickinfo) { ++ ret = glusterd_brick_proc_for_port(brickinfo->port, ++ &brick_proc); + if (ret) { +- gf_msg (this->name, GF_LOG_ERROR, 0, +- GD_MSG_BRICKPROC_NEW_FAILED, "Failed to create " +- "new brick process instance"); +- goto out; ++ ret = glusterd_brickprocess_new (&brick_proc); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ GD_MSG_BRICKPROC_NEW_FAILED, ++ "Failed to create " ++ "new brick process instance"); ++ goto out; ++ } ++ brick_proc->port = brickinfo->port; ++ cds_list_add_tail(&brick_proc->brick_proc_list, ++ &priv->brick_procs); + } +- +- brick_proc->port = brickinfo->port; +- +- cds_list_add_tail (&brick_proc->brick_proc_list, &priv->brick_procs); ++ } else { ++ ret = 0; ++ brick_proc = parent_brickinfo->brick_proc; + } +- +- cds_list_add_tail (&brickinfo_dup->brick_list, &brick_proc->bricks); ++ cds_list_add_tail(&brickinfo->mux_bricks, &brick_proc->bricks); ++ brickinfo->brick_proc = brick_proc; + brick_proc->brick_count++; + out: + return ret; +@@ -2538,6 +2529,7 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo, + + brickinfo->status = GF_BRICK_STOPPED; + brickinfo->start_triggered = _gf_false; ++ brickinfo->brick_proc = NULL; + if (del_brick) + glusterd_delete_brick (volinfo, brickinfo); + out: +@@ -5704,7 +5696,8 @@ attach_brick (xlator_t *this, + goto out; + } + brickinfo->port = other_brick->port; +- ret = glusterd_brick_process_add_brick (brickinfo); ++ ret = glusterd_brick_process_add_brick(brickinfo ++ , other_brick); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_BRICKPROC_ADD_BRICK_FAILED, +@@ -6259,7 +6252,8 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo, + (void) glusterd_brick_connect (volinfo, brickinfo, + socketpath); + +- ret = glusterd_brick_process_add_brick (brickinfo); ++ ret = glusterd_brick_process_add_brick (brickinfo, ++ NULL); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_BRICKPROC_ADD_BRICK_FAILED, +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h +index 8e5320d..69bb8c8 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.h ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.h +@@ -179,7 +179,8 @@ int32_t + glusterd_resolve_brick (glusterd_brickinfo_t *brickinfo); + + int +-glusterd_brick_process_add_brick (glusterd_brickinfo_t *brickinfo); ++glusterd_brick_process_add_brick (glusterd_brickinfo_t *brickinfo, ++ glusterd_brickinfo_t *parent_brickinfo); + + int + glusterd_brick_process_remove_brick (glusterd_brickinfo_t *brickinfo); +diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h +index edd41aa..3dfbf9c 100644 +--- a/xlators/mgmt/glusterd/src/glusterd.h ++++ b/xlators/mgmt/glusterd/src/glusterd.h +@@ -211,6 +211,15 @@ typedef enum gf_brick_status { + GF_BRICK_STARTING + } gf_brick_status_t; + ++struct glusterd_brick_proc { ++ int port; ++ uint32_t brick_count; ++ struct cds_list_head brick_proc_list; ++ struct cds_list_head bricks; ++}; ++ ++typedef struct glusterd_brick_proc glusterd_brick_proc_t; ++ + struct glusterd_brickinfo { + char hostname[1024]; + char path[PATH_MAX]; +@@ -249,19 +258,13 @@ struct glusterd_brickinfo { + gf_boolean_t port_registered; + gf_boolean_t start_triggered; + pthread_mutex_t restart_mutex; ++ glusterd_brick_proc_t *brick_proc; /* Information regarding mux bricks */ ++ struct cds_list_head mux_bricks; ++ /* List to store the bricks in brick_proc*/ + }; + + typedef struct glusterd_brickinfo glusterd_brickinfo_t; + +-struct glusterd_brick_proc { +- int port; +- uint32_t brick_count; +- struct cds_list_head brick_proc_list; +- struct cds_list_head bricks; +-}; +- +-typedef struct glusterd_brick_proc glusterd_brick_proc_t; +- + struct gf_defrag_brickinfo_ { + char *name; + int files; +-- +1.8.3.1 + diff --git a/SOURCES/0450-glusterd-fix-Resource-leak-coverity-issue.patch b/SOURCES/0450-glusterd-fix-Resource-leak-coverity-issue.patch new file mode 100644 index 0000000..98b1fe7 --- /dev/null +++ b/SOURCES/0450-glusterd-fix-Resource-leak-coverity-issue.patch @@ -0,0 +1,65 @@ +From 92b94a92d2ab3a0d392c0ba6c412bc20144de956 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Tue, 20 Nov 2018 18:35:58 +0530 +Subject: [PATCH 450/450] glusterd: fix Resource leak coverity issue + +Problem: In commit bcf1e8b07491b48c5372924dbbbad5b8391c6d81 code + was missed to free path return by function search_brick_path_from_proc + +> This patch fixes CID: +> 1396668: Resource leak +> (Cherry pick from commit 818e60ac9269c49396480a151c049042af5b2929) +> (Reviewed on link https://review.gluster.org/#/c/glusterfs/+/21630/) + +> Change-Id: I4888c071c1058023c7e138a8bcb94ec97305fadf +> fixes: bz#1646892 + +Change-Id: I9b0a9f4257b74e65c9f8c8686a6b124445f64d64 +BUG: 1649651 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/156334 +Reviewed-by: Atin Mukherjee +Tested-by: RHGS Build Bot +--- + xlators/mgmt/glusterd/src/glusterd-handler.c | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c +index cab0dec..7486f51 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-handler.c ++++ b/xlators/mgmt/glusterd/src/glusterd-handler.c +@@ -6060,6 +6060,7 @@ __glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata, + glusterd_brickinfo_t *brickinfo_tmp = NULL; + glusterd_brick_proc_t *brick_proc = NULL; + char pidfile[PATH_MAX] = {0}; ++ char *brickpath = NULL; + + brickid = mydata; + if (!brickid) +@@ -6167,8 +6168,11 @@ __glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata, + */ + GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, + brickinfo, conf); +- if (!gf_is_service_running (pidfile, &pid) || +- !search_brick_path_from_proc(pid, brickinfo->path)) { ++ gf_is_service_running(pidfile, &pid); ++ if (pid > 0) ++ brickpath = search_brick_path_from_proc(pid, ++ brickinfo->path); ++ if (!gf_is_service_running (pidfile, &pid) || !brickpath) { + ret = pmap_registry_remove ( + THIS, brickinfo->port, + brickinfo->path, +@@ -6186,6 +6190,9 @@ __glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata, + } + } + ++ if (brickpath) ++ GF_FREE (brickpath); ++ + if (is_brick_mx_enabled() && glusterd_is_brick_started(brickinfo)) { + brick_proc = brickinfo->brick_proc; + if (!brick_proc) +-- +1.8.3.1 + diff --git a/SOURCES/0451-core-Resolve-memory-leak-at-the-time-of-graph-init.patch b/SOURCES/0451-core-Resolve-memory-leak-at-the-time-of-graph-init.patch new file mode 100644 index 0000000..c8f8415 --- /dev/null +++ b/SOURCES/0451-core-Resolve-memory-leak-at-the-time-of-graph-init.patch @@ -0,0 +1,88 @@ +From 63aa90525f8f408526ee5e16c42dcc976245eca7 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Thu, 22 Nov 2018 18:28:35 +0530 +Subject: [PATCH 451/451] core: Resolve memory leak at the time of graph init + +Problem: In the commit 751b14f2bfd40e08ad395ccd98c6eb0a41ac4e91 + one code path is missed to avoid leak at the time + of calling graph init + +Solution: Before destroying graph call xlator fini to avoid leak for + server-side xlators those call init during graph init + +> Credit: Pranith Kumar Karampuri +> fixes: bz#1651431 + +> Change-Id: I6e7cff0d792ab9d954524b28667e94f2d9ec19a2 +> Signed-off-by: Mohit Agrawal +> (Cherry pick from commit 12285e76e8f93ef6f6aa2611869bd1f40955dc9e) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/21695/) + +Change-Id: Ie81635622552d43f41bbbaf810c5009a2c772a31 +BUG: 1650138 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/156609 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + glusterfsd/src/glusterfsd.c | 36 +++++++++++++++++++++++++++++++++--- + 1 file changed, 33 insertions(+), 3 deletions(-) + +diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c +index 262a0c1..2e43cdb 100644 +--- a/glusterfsd/src/glusterfsd.c ++++ b/glusterfsd/src/glusterfsd.c +@@ -2338,6 +2338,23 @@ out: + } + #endif + ++int ++glusterfs_graph_fini(glusterfs_graph_t *graph) ++{ ++ xlator_t *trav = NULL; ++ ++ trav = graph->first; ++ ++ while (trav) { ++ if (trav->init_succeeded) { ++ trav->fini(trav); ++ trav->init_succeeded = 0; ++ } ++ trav = trav->next; ++ } ++ ++ return 0; ++} + + int + glusterfs_process_volfp (glusterfs_ctx_t *ctx, FILE *fp) +@@ -2384,10 +2401,23 @@ out: + fclose (fp); + + if (ret) { +- if (graph && (ctx && (ctx->active != graph))) +- glusterfs_graph_destroy (graph); ++ /* TODO This code makes to generic for all graphs ++ client as well as servers.For now it destroys ++ graph only for server-side xlators not for client-side ++ xlators, before destroying a graph call xlator fini for ++ xlators those call xlator_init to avoid leak ++ */ ++ if (graph) { ++ xl = graph->first; ++ if ((ctx && (ctx->active != graph)) && ++ (xl && !strcmp(xl->type, "protocol/server"))) { ++ glusterfs_graph_fini(graph); ++ glusterfs_graph_destroy(graph); ++ } ++ } ++ + /* there is some error in setting up the first graph itself */ +- if (!ctx->active) { ++ if (!ctx || !ctx->active) { + emancipate (ctx, ret); + cleanup_and_exit (ret); + } +-- +1.8.3.1 + diff --git a/SOURCES/0452-glusterd-make-max-bricks-per-process-default-value-t.patch b/SOURCES/0452-glusterd-make-max-bricks-per-process-default-value-t.patch new file mode 100644 index 0000000..840e24e --- /dev/null +++ b/SOURCES/0452-glusterd-make-max-bricks-per-process-default-value-t.patch @@ -0,0 +1,38 @@ +From 30908dd034f9289f3f0120a17bb856a1da81422f Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee +Date: Wed, 21 Nov 2018 07:49:07 +0530 +Subject: [PATCH 452/453] glusterd: make max-bricks-per-process default value + to 250 + +>Change-Id: Ia2c6a10e2b76a4aa8bd4ea97e5ce33bdc813942e +>Fixes: bz#1652118 +>Signed-off-by: Atin Mukherjee + +upstream patch: https://review.gluster.org/#/c/glusterfs/+/21701/ + +Change-Id: Ia2c6a10e2b76a4aa8bd4ea97e5ce33bdc813942e +BUG: 1653073 +Signed-off-by: Sanju Rakonde +Reviewed-on: https://code.engineering.redhat.com/gerrit/157424 +Reviewed-by: Atin Mukherjee +Tested-by: RHGS Build Bot +--- + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index 1175f1d..27d7b20 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -3534,7 +3534,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + }, + { .key = GLUSTERD_BRICKMUX_LIMIT_KEY, + .voltype = "mgmt/glusterd", +- .value = "0", ++ .value = "250", + .op_version = GD_OP_VERSION_3_11_1, + .validate_fn = validate_mux_limit, + .type = GLOBAL_DOC, +-- +1.8.3.1 + diff --git a/SOURCES/0453-server-Resolve-memory-leak-path-in-server_init.patch b/SOURCES/0453-server-Resolve-memory-leak-path-in-server_init.patch new file mode 100644 index 0000000..ca11a26 --- /dev/null +++ b/SOURCES/0453-server-Resolve-memory-leak-path-in-server_init.patch @@ -0,0 +1,384 @@ +From 919597d141dd79b34a9c0ef9e52a63cc43320d6c Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Mon, 3 Dec 2018 17:05:19 +0530 +Subject: [PATCH 453/453] server: Resolve memory leak path in server_init + +Problem: 1) server_init does not cleanup allocate resources + while it is failed before return error + 2) dict leak at the time of graph destroying + +Solution: 1) free resources in case of server_init is failed + 2) Take dict_ref of graph xlator before destroying + the graph to avoid leak + +> Change-Id: I9e31e156b9ed6bebe622745a8be0e470774e3d15 +> fixes: bz#1654917 +> Cherry pick from commit 46c15ea8fa98bb3d92580b192f03863c2e2a2d9c +> Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/21750/ + +Change-Id: I5ba1b37840bcaa4a7fa4c05822c84016a2d89ea2 +BUG: 1650138 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/157445 +Tested-by: RHGS Build Bot +Reviewed-by: Pranith Kumar Karampuri +Reviewed-by: Raghavendra Gowdappa +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + glusterfsd/src/glusterfsd.c | 4 +++ + libglusterfs/src/xlator.c | 31 ++++++++++++++++++ + libglusterfs/src/xlator.h | 3 ++ + rpc/rpc-lib/src/rpc-transport.c | 26 ++++++++++++---- + rpc/rpc-lib/src/rpc-transport.h | 3 ++ + rpc/rpc-lib/src/rpcsvc.c | 40 ++++++++++++++++++++++++ + rpc/rpc-lib/src/rpcsvc.h | 3 ++ + xlators/mgmt/glusterd/src/glusterd-utils.c | 33 ++------------------ + xlators/protocol/server/src/server.c | 50 +++++++++++++++++++++++++++--- + 9 files changed, 151 insertions(+), 42 deletions(-) + +diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c +index 2e43cdb..78f3719 100644 +--- a/glusterfsd/src/glusterfsd.c ++++ b/glusterfsd/src/glusterfsd.c +@@ -2411,6 +2411,10 @@ out: + xl = graph->first; + if ((ctx && (ctx->active != graph)) && + (xl && !strcmp(xl->type, "protocol/server"))) { ++ /* Take dict ref for every graph xlator to avoid dict leak ++ at the time of graph destroying ++ */ ++ gluster_graph_take_reference(graph->first); + glusterfs_graph_fini(graph); + glusterfs_graph_destroy(graph); + } +diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c +index 8aa8aa1..340d83d 100644 +--- a/libglusterfs/src/xlator.c ++++ b/libglusterfs/src/xlator.c +@@ -1225,3 +1225,34 @@ glusterfs_delete_volfile_checksum (glusterfs_ctx_t *ctx, + + return 0; + } ++ ++/* ++ The function is required to take dict ref for every xlator at graph. ++ At the time of compare graph topology create a graph and populate ++ key values in the dictionary, after finished graph comparison we do destroy ++ the new graph.At the time of construct graph we don't take any reference ++ so to avoid dict leak at the of destroying graph due to ref counter underflow ++ we need to call dict_ref here. ++ ++*/ ++ ++void ++gluster_graph_take_reference(xlator_t *tree) ++{ ++ xlator_t *trav = tree; ++ xlator_t *prev = tree; ++ ++ if (!tree) { ++ gf_msg("parser", GF_LOG_ERROR, 0, LG_MSG_TREE_NOT_FOUND, ++ "Translator tree not found"); ++ return; ++ } ++ ++ while (prev) { ++ trav = prev->next; ++ if (prev->options) ++ dict_ref(prev->options); ++ prev = trav; ++ } ++ return; ++} +diff --git a/libglusterfs/src/xlator.h b/libglusterfs/src/xlator.h +index 1879641..f8f2630 100644 +--- a/libglusterfs/src/xlator.h ++++ b/libglusterfs/src/xlator.h +@@ -1087,4 +1087,7 @@ glusterfs_delete_volfile_checksum (glusterfs_ctx_t *ctx, + const char *volfile_id); + int + xlator_memrec_free (xlator_t *xl); ++ ++void ++gluster_graph_take_reference(xlator_t *tree); + #endif /* _XLATOR_H */ +diff --git a/rpc/rpc-lib/src/rpc-transport.c b/rpc/rpc-lib/src/rpc-transport.c +index 94880f4..77abf96 100644 +--- a/rpc/rpc-lib/src/rpc-transport.c ++++ b/rpc/rpc-lib/src/rpc-transport.c +@@ -160,6 +160,25 @@ out: + return msg; + } + ++void ++rpc_transport_cleanup(rpc_transport_t *trans) ++{ ++ if (!trans) ++ return; ++ ++ if (trans->fini) ++ trans->fini(trans); ++ ++ GF_FREE(trans->name); ++ ++ if (trans->xl) ++ pthread_mutex_destroy(&trans->lock); ++ ++ if (trans->dl_handle) ++ dlclose(trans->dl_handle); ++ ++ GF_FREE(trans); ++} + + + rpc_transport_t * +@@ -361,12 +380,7 @@ rpc_transport_load (glusterfs_ctx_t *ctx, dict_t *options, char *trans_name) + + fail: + if (trans) { +- GF_FREE (trans->name); +- +- if (trans->dl_handle) +- dlclose (trans->dl_handle); +- +- GF_FREE (trans); ++ rpc_transport_cleanup(trans); + } + + GF_FREE (name); +diff --git a/rpc/rpc-lib/src/rpc-transport.h b/rpc/rpc-lib/src/rpc-transport.h +index 33f474e..23246c5 100644 +--- a/rpc/rpc-lib/src/rpc-transport.h ++++ b/rpc/rpc-lib/src/rpc-transport.h +@@ -316,4 +316,7 @@ rpc_transport_unix_options_build (dict_t **options, char *filepath, + + int + rpc_transport_inet_options_build (dict_t **options, const char *hostname, int port); ++ ++void ++rpc_transport_cleanup(rpc_transport_t *); + #endif /* __RPC_TRANSPORT_H__ */ +diff --git a/rpc/rpc-lib/src/rpcsvc.c b/rpc/rpc-lib/src/rpcsvc.c +index 8d0c409..695e9fb 100644 +--- a/rpc/rpc-lib/src/rpcsvc.c ++++ b/rpc/rpc-lib/src/rpcsvc.c +@@ -36,6 +36,7 @@ + #include + #include + #include ++#include + + #include "xdr-rpcclnt.h" + #include "glusterfs-acl.h" +@@ -1677,6 +1678,7 @@ rpcsvc_create_listener (rpcsvc_t *svc, dict_t *options, char *name) + + listener = rpcsvc_listener_alloc (svc, trans); + if (listener == NULL) { ++ ret = -1; + goto out; + } + +@@ -1684,6 +1686,7 @@ rpcsvc_create_listener (rpcsvc_t *svc, dict_t *options, char *name) + out: + if (!listener && trans) { + rpc_transport_disconnect (trans, _gf_true); ++ rpc_transport_cleanup(trans); + } + + return ret; +@@ -2285,6 +2288,43 @@ rpcsvc_get_throttle (rpcsvc_t *svc) + return svc->throttle; + } + ++/* Function call to cleanup resources for svc ++ */ ++int ++rpcsvc_destroy(rpcsvc_t *svc) ++{ ++ struct rpcsvc_auth_list *auth = NULL; ++ struct rpcsvc_auth_list *tmp = NULL; ++ rpcsvc_listener_t *listener = NULL; ++ rpcsvc_listener_t *next = NULL; ++ int ret = 0; ++ ++ if (!svc) ++ return ret; ++ ++ list_for_each_entry_safe(listener, next, &svc->listeners, list) ++ { ++ rpcsvc_listener_destroy(listener); ++ } ++ ++ list_for_each_entry_safe(auth, tmp, &svc->authschemes, authlist) ++ { ++ list_del_init(&auth->authlist); ++ GF_FREE(auth); ++ } ++ ++ rpcsvc_program_unregister(svc, &gluster_dump_prog); ++ if (svc->rxpool) { ++ mem_pool_destroy(svc->rxpool); ++ svc->rxpool = NULL; ++ } ++ ++ pthread_mutex_destroy(&svc->rpclock); ++ GF_FREE(svc); ++ ++ return ret; ++} ++ + /* The global RPC service initializer. + */ + rpcsvc_t * +diff --git a/rpc/rpc-lib/src/rpcsvc.h b/rpc/rpc-lib/src/rpcsvc.h +index 34429b4..d3aafac 100644 +--- a/rpc/rpc-lib/src/rpcsvc.h ++++ b/rpc/rpc-lib/src/rpcsvc.h +@@ -610,4 +610,7 @@ rpcsvc_auth_array (rpcsvc_t *svc, char *volname, int *autharr, int arrlen); + rpcsvc_vector_sizer + rpcsvc_get_program_vector_sizer (rpcsvc_t *svc, uint32_t prognum, + uint32_t progver, int procnum); ++ ++extern int ++rpcsvc_destroy(rpcsvc_t *svc); + #endif +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index ec7e27a..b63c95a 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -9384,35 +9384,6 @@ glusterd_defrag_volume_status_update (glusterd_volinfo_t *volinfo, + + return ret; + } +-/* +- The function is required to take dict ref for every xlator at graph. +- At the time of compare graph topology create a graph and populate +- key values in the dictionary, after finished graph comparison we do destroy +- the new graph.At the time of construct graph we don't take any reference +- so to avoid leak due to ref counter underflow we need to call dict_ref here. +- +-*/ +- +-void +-glusterd_graph_take_reference (xlator_t *tree) +-{ xlator_t *trav = tree; +- xlator_t *prev = tree; +- +- if (!tree) { +- gf_msg ("parser", GF_LOG_ERROR, 0, LG_MSG_TREE_NOT_FOUND, +- "Translator tree not found"); +- return; +- } +- +- while (prev) { +- trav = prev->next; +- if (prev->options) +- dict_ref (prev->options); +- prev = trav; +- } +- return; +-} +- + + + int +@@ -9461,14 +9432,14 @@ glusterd_check_topology_identical (const char *filename1, + if (grph1 == NULL) + goto out; + +- glusterd_graph_take_reference (grph1->first); ++ gluster_graph_take_reference (grph1->first); + + /* create the graph for filename2 */ + grph2 = glusterfs_graph_construct(fp2); + if (grph2 == NULL) + goto out; + +- glusterd_graph_take_reference (grph2->first); ++ gluster_graph_take_reference (grph2->first); + + /* compare the graph topology */ + *identical = is_graph_topology_equal(grph1, grph2); +diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c +index 65d712f..6f510ea 100644 +--- a/xlators/protocol/server/src/server.c ++++ b/xlators/protocol/server/src/server.c +@@ -1144,12 +1144,54 @@ client_destroy_cbk (xlator_t *this, client_t *client) + return 0; + } + ++void ++server_cleanup(xlator_t *this, server_conf_t *conf) ++{ ++ if (!this || !conf) ++ return; ++ ++ LOCK_DESTROY(&conf->itable_lock); ++ pthread_mutex_destroy(&conf->mutex); ++ ++ if (this->ctx->event_pool) { ++ /* Free the event pool */ ++ (void)event_pool_destroy(this->ctx->event_pool); ++ } ++ ++ if (dict_get(this->options, "config-directory")) { ++ GF_FREE(conf->conf_dir); ++ conf->conf_dir = NULL; ++ } ++ ++ if (conf->child_status) { ++ GF_FREE(conf->child_status); ++ conf->child_status = NULL; ++ } ++ ++ if (this->ctx->statedump_path) { ++ GF_FREE(this->ctx->statedump_path); ++ this->ctx->statedump_path = NULL; ++ } ++ ++ if (conf->auth_modules) { ++ gf_auth_fini(conf->auth_modules); ++ dict_unref(conf->auth_modules); ++ } ++ ++ if (conf->rpc) { ++ (void)rpcsvc_destroy(conf->rpc); ++ conf->rpc = NULL; ++ } ++ ++ GF_FREE(conf); ++ this->private = NULL; ++} ++ + int + init (xlator_t *this) + { + int32_t ret = -1; + server_conf_t *conf = NULL; +- rpcsvc_listener_t *listener = NULL; + char *transport_type = NULL; + char *statedump_path = NULL; + int total_transport = 0; +@@ -1226,6 +1268,7 @@ init (xlator_t *this) + ret = gf_auth_init (this, conf->auth_modules); + if (ret) { + dict_unref (conf->auth_modules); ++ conf->auth_modules = NULL; + goto out; + } + +@@ -1378,10 +1421,7 @@ out: + if (this != NULL) { + this->fini (this); + } +- +- if (listener != NULL) { +- rpcsvc_listener_destroy (listener); +- } ++ server_cleanup(this, conf); + } + + return ret; +-- +1.8.3.1 + diff --git a/SOURCES/0454-glusterd-set-cluster.max-bricks-per-process-to-250.patch b/SOURCES/0454-glusterd-set-cluster.max-bricks-per-process-to-250.patch new file mode 100644 index 0000000..174caaf --- /dev/null +++ b/SOURCES/0454-glusterd-set-cluster.max-bricks-per-process-to-250.patch @@ -0,0 +1,40 @@ +From 9648930b5364efd42de7017068b088e6ca8ffe35 Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee +Date: Tue, 4 Dec 2018 19:19:58 +0530 +Subject: [PATCH 454/454] glusterd: set cluster.max-bricks-per-process to 250 + +Commit 6821cec changed this default from 0 to 250 in the option table, +however the same wasn't done in the global option table. + +>Change-Id: I6075f2ebc51e839510d6492fb62e706deb2d845b +>Fixes: bz#1652118 +>Signed-off-by: Atin Mukherjee + +upstream patch: https://review.gluster.org/#/c/glusterfs/+/21797/ + +Change-Id: I6075f2ebc51e839510d6492fb62e706deb2d845b +BUG: 1653073 +Signed-off-by: Sanju Rakonde +Reviewed-on: https://code.engineering.redhat.com/gerrit/157686 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mgmt/glusterd/src/glusterd-op-sm.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +index 8d767cc..f0c43f4 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c ++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +@@ -84,7 +84,7 @@ glusterd_all_vol_opts valid_all_vol_opts[] = { + * can be attached per process. + * TBD: Discuss the default value for this. Maybe this should be a + * dynamic value depending on the memory specifications per node */ +- { GLUSTERD_BRICKMUX_LIMIT_KEY, "0"}, ++ { GLUSTERD_BRICKMUX_LIMIT_KEY, "250"}, + /*{ GLUSTERD_LOCALTIME_LOGGING_KEY, "disable"},*/ + { GLUSTERD_DAEMON_LOG_LEVEL_KEY, "INFO"}, + { NULL }, +-- +1.8.3.1 + diff --git a/SOURCES/0455-glusterd-fix-get_mux_limit_per_process-to-read-defau.patch b/SOURCES/0455-glusterd-fix-get_mux_limit_per_process-to-read-defau.patch new file mode 100644 index 0000000..27b74e5 --- /dev/null +++ b/SOURCES/0455-glusterd-fix-get_mux_limit_per_process-to-read-defau.patch @@ -0,0 +1,89 @@ +From cf1ba3e3835b78d4ee60984f63ee9f9421e3e8a0 Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee +Date: Thu, 6 Dec 2018 23:14:57 +0530 +Subject: [PATCH 455/455] glusterd: fix get_mux_limit_per_process to read + default value + +get_mux_limit_per_process () reads the global option dictionary and in +case it doesn't find out a key, assumes that +cluster.max-bricks-per-process option isn't configured however the +default value should be picked up in such case. + +>Change-Id: I35dd8da084adbf59793d58557e818d8e6c17f9f3 +>Fixes: bz#1656951 +>Signed-off-by: Atin Mukherjee + +upstream patch: https://review.gluster.org/#/c/glusterfs/+/21819/ + +Change-Id: I35dd8da084adbf59793d58557e818d8e6c17f9f3 +BUG: 1656924 +Signed-off-by: Sanju Rakonde +Reviewed-on: https://code.engineering.redhat.com/gerrit/157960 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mgmt/glusterd/src/glusterd-op-sm.c | 2 +- + xlators/mgmt/glusterd/src/glusterd-utils.c | 7 +------ + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 2 +- + xlators/mgmt/glusterd/src/glusterd.h | 1 + + 4 files changed, 4 insertions(+), 8 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +index f0c43f4..52a3db0 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c ++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +@@ -84,7 +84,7 @@ glusterd_all_vol_opts valid_all_vol_opts[] = { + * can be attached per process. + * TBD: Discuss the default value for this. Maybe this should be a + * dynamic value depending on the memory specifications per node */ +- { GLUSTERD_BRICKMUX_LIMIT_KEY, "250"}, ++ { GLUSTERD_BRICKMUX_LIMIT_KEY, GLUSTERD_BRICKMUX_LIMIT_DFLT_VALUE}, + /*{ GLUSTERD_LOCALTIME_LOGGING_KEY, "disable"},*/ + { GLUSTERD_DAEMON_LOG_LEVEL_KEY, "INFO"}, + { NULL }, +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index b63c95a..d789c53 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -137,12 +137,7 @@ get_mux_limit_per_process (int *mux_limit) + + ret = dict_get_str (priv->opts, GLUSTERD_BRICKMUX_LIMIT_KEY, &value); + if (ret) { +- gf_msg_debug (this->name, 0, "Limit for number of bricks per " +- "brick process not yet set in dict. Returning " +- "limit as 0 denoting that multiplexing can " +- "happen with no limit set."); +- ret = 0; +- goto out; ++ value = GLUSTERD_BRICKMUX_LIMIT_DFLT_VALUE; + } + + ret = gf_string2int (value, &max_bricks_per_proc); +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index 27d7b20..fb6a81b 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -3534,7 +3534,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + }, + { .key = GLUSTERD_BRICKMUX_LIMIT_KEY, + .voltype = "mgmt/glusterd", +- .value = "250", ++ .value = GLUSTERD_BRICKMUX_LIMIT_DFLT_VALUE, + .op_version = GD_OP_VERSION_3_11_1, + .validate_fn = validate_mux_limit, + .type = GLOBAL_DOC, +diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h +index 3dfbf9c..bfa8310 100644 +--- a/xlators/mgmt/glusterd/src/glusterd.h ++++ b/xlators/mgmt/glusterd/src/glusterd.h +@@ -55,6 +55,7 @@ + #define GLUSTERD_SHARED_STORAGE_KEY "cluster.enable-shared-storage" + #define GLUSTERD_BRICK_MULTIPLEX_KEY "cluster.brick-multiplex" + #define GLUSTERD_BRICKMUX_LIMIT_KEY "cluster.max-bricks-per-process" ++#define GLUSTERD_BRICKMUX_LIMIT_DFLT_VALUE "250" + #define GLUSTERD_LOCALTIME_LOGGING_KEY "cluster.localtime-logging" + #define GLUSTERD_DAEMON_LOG_LEVEL_KEY "cluster.daemon-log-level" + +-- +1.8.3.1 + diff --git a/SOURCES/0457-cluster-dht-sync-brick-root-perms-on-add-brick.patch b/SOURCES/0457-cluster-dht-sync-brick-root-perms-on-add-brick.patch new file mode 100644 index 0000000..cf819fa --- /dev/null +++ b/SOURCES/0457-cluster-dht-sync-brick-root-perms-on-add-brick.patch @@ -0,0 +1,122 @@ +From aad0d32376e6ca56770e5c2d4dc5a1462b1e7167 Mon Sep 17 00:00:00 2001 +From: N Balachandran +Date: Thu, 13 Dec 2018 12:06:10 +0530 +Subject: [PATCH 457/493] cluster/dht: sync brick root perms on add brick + +If a single brick is added to the volume and the +newly added brick is the first to respond to a +dht_revalidate call, its stbuf will not be merged +into local->stbuf as the brick does not yet have +a layout. The is_permission_different check therefore +fails to detect that an attr heal is required as it +only considers the stbuf values from existing bricks. +To fix this, merge all stbuf values into local->stbuf +and use local->prebuf to store the correct directory +attributes. + +upstream patch: https://review.gluster.org/#/c/glusterfs/+/21611/ + +> Change-Id: Ic9e8b04a1ab9ed1248b6b056e3450bbafe32e1bc +> fixes: bz#1648298 +> Signed-off-by: N Balachandran + +Change-Id: I329ce48555d15f741d7247290e749f0800d12df8 +BUG: 1648296 +Signed-off-by: N Balachandran +Reviewed-on: https://code.engineering.redhat.com/gerrit/158493 +Tested-by: RHGS Build Bot +Reviewed-by: Susant Palai +Reviewed-by: Raghavendra Gowdappa +--- + tests/bugs/distribute/bug-1368012.t | 11 +++++------ + xlators/cluster/dht/src/dht-common.c | 26 ++++++++------------------ + 2 files changed, 13 insertions(+), 24 deletions(-) + +diff --git a/tests/bugs/distribute/bug-1368012.t b/tests/bugs/distribute/bug-1368012.t +index b861554..0b62635 100644 +--- a/tests/bugs/distribute/bug-1368012.t ++++ b/tests/bugs/distribute/bug-1368012.t +@@ -15,7 +15,7 @@ TEST pidof glusterd; + TEST $CLI volume info; + + ## Lets create volume +-TEST $CLI volume create $V0 $H0:/${V0}{1,2}; ++TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2}; + + ## Verify volume is created + EXPECT "$V0" volinfo_field $V0 'Volume Name'; +@@ -36,17 +36,16 @@ TEST chmod 444 $M0 + TEST permission_root=`stat -c "%A" $M0` + TEST echo $permission_root + #Add-brick +-TEST $CLI volume add-brick $V0 $H0:/${V0}3 ++TEST $CLI volume add-brick $V0 $H0:$B0/${V0}3 + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "3" online_brick_count + + #Allow one lookup to happen +-TEST pushd $M0 +-TEST ls ++TEST ls $M0 + #Generate another lookup + echo 3 > /proc/sys/vm/drop_caches +-TEST ls ++TEST ls $M0 + #check root permission + EXPECT_WITHIN "5" $permission_root get_permission $M0 + #check permission on the new-brick +-EXPECT $permission_root get_permission /${V0}3 ++EXPECT $permission_root get_permission $B0/${V0}3 + cleanup +diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c +index d3a0c8b..2e19036 100644 +--- a/xlators/cluster/dht/src/dht-common.c ++++ b/xlators/cluster/dht/src/dht-common.c +@@ -1717,14 +1717,17 @@ dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + if (is_dir) { + ret = dht_dir_has_layout (xattr, conf->xattr_name); + if (ret >= 0) { +- if (is_greater_time(local->stbuf.ia_ctime, +- local->stbuf.ia_ctime_nsec, ++ if (is_greater_time(local->prebuf.ia_ctime, ++ local->prebuf.ia_ctime_nsec, + stbuf->ia_ctime, + stbuf->ia_ctime_nsec)) { + /* Choose source */ + local->prebuf.ia_gid = stbuf->ia_gid; + local->prebuf.ia_uid = stbuf->ia_uid; + ++ local->prebuf.ia_ctime = stbuf->ia_ctime; ++ local->prebuf.ia_ctime_nsec = stbuf->ia_ctime_nsec; ++ + if (__is_root_gfid (stbuf->ia_gfid)) + local->prebuf.ia_prot = stbuf->ia_prot; + } +@@ -1792,22 +1795,9 @@ dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + } + } + +- +- /* Update stbuf from the servers where layout is present. This +- * is an indication that the server is not a newly added brick. +- * Merging stbuf from newly added brick may result in the added +- * brick being the source of heal for uid/gid */ +- if (!is_dir || (is_dir && +- dht_dir_has_layout (xattr, conf->xattr_name) >= 0) +- || conf->subvolume_cnt == 1) { +- +- dht_iatt_merge (this, &local->stbuf, stbuf, prev); +- dht_iatt_merge (this, &local->postparent, postparent, +- prev); +- } else { +- /* copy the gfid anyway */ +- gf_uuid_copy (local->stbuf.ia_gfid, stbuf->ia_gfid); +- } ++ gf_uuid_copy (local->stbuf.ia_gfid, stbuf->ia_gfid); ++ dht_iatt_merge (this, &local->stbuf, stbuf, prev); ++ dht_iatt_merge (this, &local->postparent, postparent, prev); + + local->op_ret = 0; + +-- +1.8.3.1 + diff --git a/SOURCES/0458-glusterd-fix-crash.patch b/SOURCES/0458-glusterd-fix-crash.patch new file mode 100644 index 0000000..3ffd65e --- /dev/null +++ b/SOURCES/0458-glusterd-fix-crash.patch @@ -0,0 +1,42 @@ +From 3a1484c401d4293531c80532fa96c2f7cfc8aa2d Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde +Date: Wed, 19 Sep 2018 19:49:51 +0530 +Subject: [PATCH 458/493] glusterd: fix crash + +When huge number of volumes are created, glusterd crash is seen. +With the core dump, got to know that mgmt_lock_timer became NULL. +Adding a null check for the same, need to explore about the root +cause. + +>updates: bz#1630922 +>Change-Id: I0770063fcbbbf4b24bef29e94b857b20bdfb5b85 +>Signed-off-by: Sanju Rakonde + +upstream patch: https://review.gluster.org/#/c/glusterfs/+/21228/ + +Change-Id: I0770063fcbbbf4b24bef29e94b857b20bdfb5b85 +BUG: 1599220 +Signed-off-by: Sanju Rakonde +Reviewed-on: https://code.engineering.redhat.com/gerrit/158542 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mgmt/glusterd/src/glusterd-locks.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-locks.c b/xlators/mgmt/glusterd/src/glusterd-locks.c +index d75452d..d62d9dd 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-locks.c ++++ b/xlators/mgmt/glusterd/src/glusterd-locks.c +@@ -890,7 +890,7 @@ glusterd_mgmt_v3_unlock (const char *name, uuid_t uuid, char *type) + type, name); + + /* Release owner reference which was held during lock */ +- if (mgmt_lock_timer->timer) { ++ if (mgmt_lock_timer && mgmt_lock_timer->timer) { + ret = -1; + mgmt_lock_timer_xl = mgmt_lock_timer->xl; + GF_VALIDATE_OR_GOTO (this->name, mgmt_lock_timer_xl, out); +-- +1.8.3.1 + diff --git a/SOURCES/0459-glfsheal-add-a-nolog-flag.patch b/SOURCES/0459-glfsheal-add-a-nolog-flag.patch new file mode 100644 index 0000000..8e2837f --- /dev/null +++ b/SOURCES/0459-glfsheal-add-a-nolog-flag.patch @@ -0,0 +1,223 @@ +From 1637d5018aeea96efc2916afe162c4905ef2c2d9 Mon Sep 17 00:00:00 2001 +From: Ravishankar N +Date: Fri, 14 Dec 2018 12:48:05 +0530 +Subject: [PATCH 459/493] glfsheal: add a '--nolog' flag + +(Upstream master patch: https://review.gluster.org/#/c/glusterfs/+/21501/) + +....and if set, change the log level to GF_LOG_NONE. This is useful for +monitoring applications which invoke the heal info set of commands once +every minute, leading to un-necessary glfsheal* logs in +/var/log/glusterfs/. For example, we can now run + +`gluster volume heal info --nolog` +`gluster volume heal info split-brain --nolog` etc. + +The default log level is still retained at GF_LOG_INFO. + +The patch also changes glfsheal internally to accept '--xml' instead of 'xml'. +Note: The --nolog flag is *not* displayed in the help anywhere, for the +sake of consistency in how the other flags are not displayed anywhere in +the help. + +Change-Id: I932d0f79070880b0f9ca87e164d3c2a3b831c8c4 +BUG: 1579293 +Signed-off-by: Ravishankar N +Reviewed-on: https://code.engineering.redhat.com/gerrit/158640 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + cli/src/cli-cmd-volume.c | 8 ++++--- + cli/src/cli.c | 5 ++++ + cli/src/cli.h | 1 + + heal/src/glfs-heal.c | 59 ++++++++++++++++++++++++++++++++++++------------ + 4 files changed, 56 insertions(+), 17 deletions(-) + +diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c +index 32efa73..8fca7eb 100644 +--- a/cli/src/cli-cmd-volume.c ++++ b/cli/src/cli-cmd-volume.c +@@ -2830,7 +2830,7 @@ cli_launch_glfs_heal (int heal_op, dict_t *options) + switch (heal_op) { + case GF_SHD_OP_INDEX_SUMMARY: + if (global_state->mode & GLUSTER_MODE_XML) { +- runner_add_args (&runner, "xml", NULL); ++ runner_add_args (&runner, "--xml", NULL); + } + break; + case GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE: +@@ -2854,7 +2854,7 @@ cli_launch_glfs_heal (int heal_op, dict_t *options) + case GF_SHD_OP_SPLIT_BRAIN_FILES: + runner_add_args (&runner, "split-brain-info", NULL); + if (global_state->mode & GLUSTER_MODE_XML) { +- runner_add_args (&runner, "xml", NULL); ++ runner_add_args (&runner, "--xml", NULL); + } + break; + case GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE: +@@ -2864,12 +2864,14 @@ cli_launch_glfs_heal (int heal_op, dict_t *options) + case GF_SHD_OP_HEAL_SUMMARY: + runner_add_args (&runner, "info-summary", NULL); + if (global_state->mode & GLUSTER_MODE_XML) { +- runner_add_args (&runner, "xml", NULL); ++ runner_add_args (&runner, "--xml", NULL); + } + break; + default: + ret = -1; + } ++ if (global_state->mode & GLUSTER_MODE_GLFSHEAL_NOLOG) ++ runner_add_args(&runner, "--nolog", NULL); + ret = runner_start (&runner); + if (ret == -1) + goto out; +diff --git a/cli/src/cli.c b/cli/src/cli.c +index b64d4ef..3fd7bc5 100644 +--- a/cli/src/cli.c ++++ b/cli/src/cli.c +@@ -340,6 +340,11 @@ cli_opt_parse (char *opt, struct cli_state *state) + return 0; + } + ++ if (strcmp(opt, "nolog") == 0) { ++ state->mode |= GLUSTER_MODE_GLFSHEAL_NOLOG; ++ return 0; ++ } ++ + if (strcmp (opt, "wignore-partition") == 0) { + state->mode |= GLUSTER_MODE_WIGNORE_PARTITION; + return 0; +diff --git a/cli/src/cli.h b/cli/src/cli.h +index 109dcd4..104d601 100644 +--- a/cli/src/cli.h ++++ b/cli/src/cli.h +@@ -61,6 +61,7 @@ typedef enum { + #define GLUSTER_MODE_XML (1 << 2) + #define GLUSTER_MODE_WIGNORE (1 << 3) + #define GLUSTER_MODE_WIGNORE_PARTITION (1 << 4) ++#define GLUSTER_MODE_GLFSHEAL_NOLOG (1 << 5) + + + #define GLUSTERD_GET_QUOTA_LIST_MOUNT_PATH(abspath, volname, path) do { \ +diff --git a/heal/src/glfs-heal.c b/heal/src/glfs-heal.c +index 153cd29..12746dc 100644 +--- a/heal/src/glfs-heal.c ++++ b/heal/src/glfs-heal.c +@@ -40,6 +40,9 @@ xmlDocPtr glfsh_doc = NULL; + ret = 0; \ + } while (0) \ + ++#define MODE_XML (1 << 0) ++#define MODE_NO_LOG (1 << 1) ++ + typedef struct num_entries { + uint64_t num_entries; + uint64_t pending_entries; +@@ -1434,6 +1437,28 @@ out: + return ret; + } + ++static void ++parse_flags(int *argc, char **argv, int *flags) ++{ ++ int i = 0; ++ char *opt = NULL; ++ int count = 0; ++ ++ for (i = 0; i < *argc; i++) { ++ opt = strtail(argv[i], "--"); ++ if (!opt) ++ continue; ++ if (strcmp(opt, "nolog") == 0) { ++ *flags |= MODE_NO_LOG; ++ count++; ++ } else if (strcmp(opt, "xml") == 0) { ++ *flags |= MODE_XML; ++ count++; ++ } ++ } ++ *argc = *argc - count; ++} ++ + int + glfsh_heal_from_bigger_file_or_mtime (glfs_t *fs, xlator_t *top_subvol, + loc_t *rootloc, char *file, +@@ -1518,6 +1543,8 @@ main (int argc, char **argv) + char *file = NULL; + char *op_errstr = NULL; + gf_xl_afr_op_t heal_op = -1; ++ gf_loglevel_t log_level = GF_LOG_INFO; ++ int flags = 0; + + if (argc < 2) { + printf (USAGE_STR, argv[0]); +@@ -1526,6 +1553,13 @@ main (int argc, char **argv) + } + + volname = argv[1]; ++ ++ parse_flags(&argc, argv, &flags); ++ if (flags & MODE_NO_LOG) ++ log_level = GF_LOG_NONE; ++ if (flags & MODE_XML) ++ is_xml = 1; ++ + switch (argc) { + case 2: + heal_op = GF_SHD_OP_INDEX_SUMMARY; +@@ -1533,9 +1567,6 @@ main (int argc, char **argv) + case 3: + if (!strcmp (argv[2], "split-brain-info")) { + heal_op = GF_SHD_OP_SPLIT_BRAIN_FILES; +- } else if (!strcmp (argv[2], "xml")) { +- heal_op = GF_SHD_OP_INDEX_SUMMARY; +- is_xml = 1; + } else if (!strcmp (argv[2], "granular-entry-heal-op")) { + heal_op = GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE; + } else if (!strcmp (argv[2], "info-summary")) { +@@ -1547,15 +1578,7 @@ main (int argc, char **argv) + } + break; + case 4: +- if ((!strcmp (argv[2], "split-brain-info")) +- && (!strcmp (argv[3], "xml"))) { +- heal_op = GF_SHD_OP_SPLIT_BRAIN_FILES; +- is_xml = 1; +- } else if ((!strcmp (argv[2], "info-summary")) +- && (!strcmp (argv[3], "xml"))) { +- heal_op = GF_SHD_OP_HEAL_SUMMARY; +- is_xml = 1; +- } else if (!strcmp (argv[2], "bigger-file")) { ++ if (!strcmp (argv[2], "bigger-file")) { + heal_op = GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE; + file = argv[3]; + } else if (!strcmp (argv[2], "latest-mtime")) { +@@ -1592,7 +1615,15 @@ main (int argc, char **argv) + glfsh_output = &glfsh_human_readable; + if (is_xml) { + #if (HAVE_LIB_XML) +- glfsh_output = &glfsh_xml_output; ++ if ((heal_op == GF_SHD_OP_INDEX_SUMMARY) || ++ (heal_op == GF_SHD_OP_SPLIT_BRAIN_FILES) || ++ (heal_op == GF_SHD_OP_HEAL_SUMMARY)) { ++ glfsh_output = &glfsh_xml_output; ++ } else { ++ printf(USAGE_STR, argv[0]); ++ ret = -1; ++ goto out; ++ } + #else + /*No point doing anything, just fail the command*/ + exit (EXIT_FAILURE); +@@ -1636,7 +1667,7 @@ main (int argc, char **argv) + } + snprintf (logfilepath, sizeof (logfilepath), + DEFAULT_HEAL_LOG_FILE_DIRECTORY"/glfsheal-%s.log", volname); +- ret = glfs_set_logging(fs, logfilepath, GF_LOG_INFO); ++ ret = glfs_set_logging(fs, logfilepath, log_level); + if (ret < 0) { + ret = -errno; + gf_asprintf (&op_errstr, "Failed to set the log file path, " +-- +1.8.3.1 + diff --git a/SOURCES/0460-cli-add-a-warning-confirmation-message-in-peer-detac.patch b/SOURCES/0460-cli-add-a-warning-confirmation-message-in-peer-detac.patch new file mode 100644 index 0000000..8125367 --- /dev/null +++ b/SOURCES/0460-cli-add-a-warning-confirmation-message-in-peer-detac.patch @@ -0,0 +1,74 @@ +From 81b19743a97ebecc188d87fbe04dce59260824f8 Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee +Date: Tue, 6 Nov 2018 21:35:36 +0530 +Subject: [PATCH 460/493] cli: add a warning/confirmation message in peer + detach code path + +On a multi node cluster if one of the node is detached which had active +clients mounted through the same server address, this can cause all the +clients to loose any volfile changes. This is due to the lack of infra +in glusterd to let client know the list of IPs and attempt to connect to +other active nodes as failback. Such framework does exist in GD2 but not +in GD1. + +This patch ensures to take a preventive measure to have a warning +message thrown to user to ensure all such connected clients are +remounted through a different IP. + +> Change-Id: I740b01868abbd75bf0a609cfaf00114d4d78aa96 +> Fixes: bz#1647074 +> Signed-off-by: Atin Mukherjee + +upstream patch: https://review.gluster.org/#/c/glusterfs/+/21572/ + +Change-Id: I740b01868abbd75bf0a609cfaf00114d4d78aa96 +BUG: 1639568 +Signed-off-by: Sanju Rakonde +Reviewed-on: https://code.engineering.redhat.com/gerrit/158630 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + cli/src/cli-cmd-peer.c | 14 +++++++++++++- + 1 file changed, 13 insertions(+), 1 deletion(-) + +diff --git a/cli/src/cli-cmd-peer.c b/cli/src/cli-cmd-peer.c +index 7df60bc..6f3b744 100644 +--- a/cli/src/cli-cmd-peer.c ++++ b/cli/src/cli-cmd-peer.c +@@ -111,13 +111,20 @@ cli_cmd_peer_deprobe_cbk (struct cli_state *state, struct cli_cmd_word *word, + int sent = 0; + int parse_error = 0; + cli_local_t *local = NULL; ++ gf_answer_t answer = GF_ANSWER_NO; ++ const char *question = NULL; + + if ((wordcount < 3) || (wordcount > 4)) { + cli_usage_out (word->pattern); + parse_error = 1; + goto out; + } +- ++ question = ++ "All clients mounted through the peer which is getting detached" ++ " need to be remounted using one of the other active peers in " ++ "the trusted storage pool to ensure client gets notification on" ++ " any changes done on the gluster configuration and if the " ++ "same has been done do you want to proceed?"; + proc = &cli_rpc_prog->proctable[GLUSTER_CLI_DEPROBE]; + + frame = create_frame (THIS, THIS->ctx->pool); +@@ -149,6 +156,11 @@ cli_cmd_peer_deprobe_cbk (struct cli_state *state, struct cli_cmd_word *word, + ret = dict_set_int32 (dict, "flags", flags); + if (ret) + goto out; ++ answer = cli_cmd_get_confirmation(state, question); ++ if (GF_ANSWER_NO == answer) { ++ ret = 0; ++ goto out; ++ } + + CLI_LOCAL_INIT (local, words, frame, dict); + +-- +1.8.3.1 + diff --git a/SOURCES/0461-mount-fuse-Add-support-for-multi-threaded-fuse-reade.patch b/SOURCES/0461-mount-fuse-Add-support-for-multi-threaded-fuse-reade.patch new file mode 100644 index 0000000..aba7f00 --- /dev/null +++ b/SOURCES/0461-mount-fuse-Add-support-for-multi-threaded-fuse-reade.patch @@ -0,0 +1,835 @@ +From 668b55b7dd86b23e635cfb2264bc5e50f4cd888d Mon Sep 17 00:00:00 2001 +From: Krutika Dhananjay +Date: Tue, 9 Jan 2018 15:11:00 +0530 +Subject: [PATCH 461/493] mount/fuse: Add support for multi-threaded fuse + readers + + > Upstream: https://review.gluster.org/19226 + > Github issue #412 + > Change-Id: I94aa1505e5ae6a133683d473e0e4e0edd139b76b + +Usage: Use 'reader-thread-count=' as command line option to +set the thread count at the time of mounting the volume. + +Next task is to make these threads auto-scale based on the load, +instead of having the user remount the volume everytime to change +the thread count. + +Change-Id: I94aa1505e5ae6a133683d473e0e4e0edd139b76b +BUG: 1651040 +Signed-off-by: Krutika Dhananjay +Reviewed-on: https://code.engineering.redhat.com/gerrit/158514 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + glusterfsd/src/glusterfsd.c | 26 ++++ + glusterfsd/src/glusterfsd.h | 1 + + libglusterfs/src/glusterfs.h | 1 + + xlators/mount/fuse/src/fuse-bridge.c | 231 ++++++++++++++++++---------- + xlators/mount/fuse/src/fuse-bridge.h | 9 +- + xlators/mount/fuse/src/fuse-helpers.c | 3 + + xlators/mount/fuse/src/fuse-mem-types.h | 1 + + xlators/mount/fuse/utils/mount.glusterfs.in | 7 + + 8 files changed, 196 insertions(+), 83 deletions(-) + +diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c +index 78f3719..03bca24 100644 +--- a/glusterfsd/src/glusterfsd.c ++++ b/glusterfsd/src/glusterfsd.c +@@ -238,6 +238,8 @@ static struct argp_option gf_options[] = { + "Enable localtime logging"}, + {"event-history", ARGP_FUSE_EVENT_HISTORY_KEY, "BOOL", + OPTION_ARG_OPTIONAL, "disable/enable fuse event-history"}, ++ {"reader-thread-count", ARGP_READER_THREAD_COUNT_KEY, "INTEGER", ++ OPTION_ARG_OPTIONAL, "set fuse reader thread count"}, + {0, 0, 0, 0, "Miscellaneous Options:"}, + {0, } + }; +@@ -557,6 +559,17 @@ set_fuse_mount_options (glusterfs_ctx_t *ctx, dict_t *options) + goto err; + } + } ++ if (cmd_args->reader_thread_count) { ++ ret = dict_set_uint32 (options, "reader-thread-count", ++ cmd_args->reader_thread_count); ++ if (ret < 0) { ++ gf_msg ("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4, ++ "failed to set dict value for key " ++ "reader-thread-count"); ++ goto err; ++ } ++ } ++ + ret = 0; + err: + return ret; +@@ -1307,6 +1320,19 @@ no_oom_api: + argp_failure (state, -1, 0, + "unknown event-history setting \"%s\"", arg); + break; ++ case ARGP_READER_THREAD_COUNT_KEY: ++ if (gf_string2uint32 (arg, &cmd_args->reader_thread_count)) { ++ argp_failure (state, -1, 0, ++ "unknown reader thread count option %s", ++ arg); ++ } else if ((cmd_args->reader_thread_count < 1) || ++ (cmd_args->reader_thread_count > 64)) { ++ argp_failure (state, -1, 0, ++ "Invalid reader thread count %s. " ++ "Valid range: [\"1, 64\"]", arg); ++ } ++ ++ break; + } + + return 0; +diff --git a/glusterfsd/src/glusterfsd.h b/glusterfsd/src/glusterfsd.h +index f66947b..75cb1d8 100644 +--- a/glusterfsd/src/glusterfsd.h ++++ b/glusterfsd/src/glusterfsd.h +@@ -99,6 +99,7 @@ enum argp_option_keys { + ARGP_LOCALTIME_LOGGING_KEY = 177, + ARGP_SUBDIR_MOUNT_KEY = 178, + ARGP_FUSE_EVENT_HISTORY_KEY = 179, ++ ARGP_READER_THREAD_COUNT_KEY = 180, + }; + + struct _gfd_vol_top_priv { +diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h +index 5e641fd..3e2f426 100644 +--- a/libglusterfs/src/glusterfs.h ++++ b/libglusterfs/src/glusterfs.h +@@ -446,6 +446,7 @@ struct _cmd_args { + char *subdir_mount; + + char *event_history; ++ uint32_t reader_thread_count; + }; + typedef struct _cmd_args cmd_args_t; + +diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c +index fbb4c53..8d1e3a0 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.c ++++ b/xlators/mount/fuse/src/fuse-bridge.c +@@ -665,7 +665,8 @@ fuse_lookup_resume (fuse_state_t *state) + } + + static void +-fuse_lookup (xlator_t *this, fuse_in_header_t *finh, void *msg) ++fuse_lookup (xlator_t *this, fuse_in_header_t *finh, void *msg, ++ struct iobuf *iobuf) + { + char *name = msg; + fuse_state_t *state = NULL; +@@ -693,7 +694,8 @@ do_forget(xlator_t *this, uint64_t unique, uint64_t nodeid, uint64_t nlookup) + } + + static void +-fuse_forget (xlator_t *this, fuse_in_header_t *finh, void *msg) ++fuse_forget (xlator_t *this, fuse_in_header_t *finh, void *msg, ++ struct iobuf *iobuf) + + { + struct fuse_forget_in *ffi = msg; +@@ -714,7 +716,8 @@ fuse_forget (xlator_t *this, fuse_in_header_t *finh, void *msg) + + #if FUSE_KERNEL_MINOR_VERSION >= 16 + static void +-fuse_batch_forget(xlator_t *this, fuse_in_header_t *finh, void *msg) ++fuse_batch_forget(xlator_t *this, fuse_in_header_t *finh, void *msg, ++ struct iobuf *iobuf) + { + struct fuse_batch_forget_in *fbfi = msg; + struct fuse_forget_one *ffo = (struct fuse_forget_one *) (fbfi + 1); +@@ -932,7 +935,8 @@ fuse_getattr_resume (fuse_state_t *state) + } + + static void +-fuse_getattr (xlator_t *this, fuse_in_header_t *finh, void *msg) ++fuse_getattr (xlator_t *this, fuse_in_header_t *finh, void *msg, ++ struct iobuf *iobuf) + { + #if FUSE_KERNEL_MINOR_VERSION >= 9 + struct fuse_getattr_in *fgi = msg; +@@ -1265,7 +1269,8 @@ fuse_setattr_resume (fuse_state_t *state) + } + + static void +-fuse_setattr (xlator_t *this, fuse_in_header_t *finh, void *msg) ++fuse_setattr (xlator_t *this, fuse_in_header_t *finh, void *msg, ++ struct iobuf *iobuf) + { + struct fuse_setattr_in *fsi = msg; + +@@ -1492,7 +1497,8 @@ fuse_access_resume (fuse_state_t *state) + } + + static void +-fuse_access (xlator_t *this, fuse_in_header_t *finh, void *msg) ++fuse_access (xlator_t *this, fuse_in_header_t *finh, void *msg, ++ struct iobuf *iobuf) + { + struct fuse_access_in *fai = msg; + fuse_state_t *state = NULL; +@@ -1566,7 +1572,8 @@ fuse_readlink_resume (fuse_state_t *state) + } + + static void +-fuse_readlink (xlator_t *this, fuse_in_header_t *finh, void *msg) ++fuse_readlink (xlator_t *this, fuse_in_header_t *finh, void *msg, ++ struct iobuf *iobuf) + { + fuse_state_t *state = NULL; + +@@ -1616,7 +1623,8 @@ fuse_mknod_resume (fuse_state_t *state) + } + + static void +-fuse_mknod (xlator_t *this, fuse_in_header_t *finh, void *msg) ++fuse_mknod (xlator_t *this, fuse_in_header_t *finh, void *msg, ++ struct iobuf *iobuf) + { + struct fuse_mknod_in *fmi = msg; + char *name = (char *)(fmi + 1); +@@ -1686,7 +1694,8 @@ fuse_mkdir_resume (fuse_state_t *state) + } + + static void +-fuse_mkdir (xlator_t *this, fuse_in_header_t *finh, void *msg) ++fuse_mkdir (xlator_t *this, fuse_in_header_t *finh, void *msg, ++ struct iobuf *iobuf) + { + struct fuse_mkdir_in *fmi = msg; + char *name = (char *)(fmi + 1); +@@ -1738,7 +1747,8 @@ fuse_unlink_resume (fuse_state_t *state) + } + + static void +-fuse_unlink (xlator_t *this, fuse_in_header_t *finh, void *msg) ++fuse_unlink (xlator_t *this, fuse_in_header_t *finh, void *msg, ++ struct iobuf *iobuf) + { + char *name = msg; + fuse_state_t *state = NULL; +@@ -1775,7 +1785,8 @@ fuse_rmdir_resume (fuse_state_t *state) + } + + static void +-fuse_rmdir (xlator_t *this, fuse_in_header_t *finh, void *msg) ++fuse_rmdir (xlator_t *this, fuse_in_header_t *finh, void *msg, ++ struct iobuf *iobuf) + { + char *name = msg; + fuse_state_t *state = NULL; +@@ -1825,7 +1836,8 @@ fuse_symlink_resume (fuse_state_t *state) + } + + static void +-fuse_symlink (xlator_t *this, fuse_in_header_t *finh, void *msg) ++fuse_symlink (xlator_t *this, fuse_in_header_t *finh, void *msg, ++ struct iobuf *iobuf) + { + char *name = msg; + char *linkname = name + strlen (name) + 1; +@@ -1947,7 +1959,8 @@ fuse_rename_resume (fuse_state_t *state) + } + + static void +-fuse_rename (xlator_t *this, fuse_in_header_t *finh, void *msg) ++fuse_rename (xlator_t *this, fuse_in_header_t *finh, void *msg, ++ struct iobuf *iobuf) + { + struct fuse_rename_in *fri = msg; + char *oldname = (char *)(fri + 1); +@@ -1997,7 +2010,8 @@ fuse_link_resume (fuse_state_t *state) + } + + static void +-fuse_link (xlator_t *this, fuse_in_header_t *finh, void *msg) ++fuse_link (xlator_t *this, fuse_in_header_t *finh, void *msg, ++ struct iobuf *iobuf) + { + struct fuse_link_in *fli = msg; + char *name = (char *)(fli + 1); +@@ -2186,7 +2200,8 @@ fuse_create_resume (fuse_state_t *state) + } + + static void +-fuse_create (xlator_t *this, fuse_in_header_t *finh, void *msg) ++fuse_create (xlator_t *this, fuse_in_header_t *finh, void *msg, ++ struct iobuf *iobuf) + { + #if FUSE_KERNEL_MINOR_VERSION >= 12 + struct fuse_create_in *fci = msg; +@@ -2280,7 +2295,8 @@ fuse_open_resume (fuse_state_t *state) + } + + static void +-fuse_open (xlator_t *this, fuse_in_header_t *finh, void *msg) ++fuse_open (xlator_t *this, fuse_in_header_t *finh, void *msg, ++ struct iobuf *iobuf) + { + struct fuse_open_in *foi = msg; + fuse_state_t *state = NULL; +@@ -2357,7 +2373,8 @@ fuse_readv_resume (fuse_state_t *state) + } + + static void +-fuse_readv (xlator_t *this, fuse_in_header_t *finh, void *msg) ++fuse_readv (xlator_t *this, fuse_in_header_t *finh, void *msg, ++ struct iobuf *iobuf) + { + struct fuse_read_in *fri = msg; + +@@ -2433,8 +2450,6 @@ void + fuse_write_resume (fuse_state_t *state) + { + struct iobref *iobref = NULL; +- struct iobuf *iobuf = NULL; +- + + iobref = iobref_new (); + if (!iobref) { +@@ -2447,8 +2462,7 @@ fuse_write_resume (fuse_state_t *state) + return; + } + +- iobuf = ((fuse_private_t *) (state->this->private))->iobuf; +- iobref_add (iobref, iobuf); ++ iobref_add (iobref, state->iobuf); + + gf_log ("glusterfs-fuse", GF_LOG_TRACE, + "%"PRIu64": WRITE (%p, size=%"GF_PRI_SIZET", offset=%"PRId64")", +@@ -2462,7 +2476,8 @@ fuse_write_resume (fuse_state_t *state) + } + + static void +-fuse_write (xlator_t *this, fuse_in_header_t *finh, void *msg) ++fuse_write (xlator_t *this, fuse_in_header_t *finh, void *msg, ++ struct iobuf *iobuf) + { + /* WRITE is special, metadata is attached to in_header, + * and msg is the payload as-is. +@@ -2505,6 +2520,7 @@ fuse_write (xlator_t *this, fuse_in_header_t *finh, void *msg) + + state->vector.iov_base = msg; + state->vector.iov_len = fwi->size; ++ state->iobuf = iobuf; + + fuse_resolve_and_resume (state, fuse_write_resume); + +@@ -2543,7 +2559,8 @@ fuse_lseek_resume (fuse_state_t *state) + } + + static void +-fuse_lseek (xlator_t *this, fuse_in_header_t *finh, void *msg) ++fuse_lseek (xlator_t *this, fuse_in_header_t *finh, void *msg, ++ struct iobuf *iobuf) + { + struct fuse_lseek_in *ffi = msg; + fuse_state_t *state = NULL; +@@ -2579,7 +2596,8 @@ fuse_flush_resume (fuse_state_t *state) + } + + static void +-fuse_flush (xlator_t *this, fuse_in_header_t *finh, void *msg) ++fuse_flush (xlator_t *this, fuse_in_header_t *finh, void *msg, ++ struct iobuf *iobuf) + { + struct fuse_flush_in *ffi = msg; + +@@ -2615,7 +2633,8 @@ fuse_internal_release (xlator_t *this, fd_t *fd) + } + + static void +-fuse_release (xlator_t *this, fuse_in_header_t *finh, void *msg) ++fuse_release (xlator_t *this, fuse_in_header_t *finh, void *msg, ++ struct iobuf *iobuf) + { + struct fuse_release_in *fri = msg; + fd_t *fd = NULL; +@@ -2660,7 +2679,8 @@ fuse_fsync_resume (fuse_state_t *state) + } + + static void +-fuse_fsync (xlator_t *this, fuse_in_header_t *finh, void *msg) ++fuse_fsync (xlator_t *this, fuse_in_header_t *finh, void *msg, ++ struct iobuf *iobuf) + { + struct fuse_fsync_in *fsi = msg; + +@@ -2735,7 +2755,8 @@ fuse_opendir_resume (fuse_state_t *state) + } + + static void +-fuse_opendir (xlator_t *this, fuse_in_header_t *finh, void *msg) ++fuse_opendir (xlator_t *this, fuse_in_header_t *finh, void *msg, ++ struct iobuf *iobuf) + { + /* + struct fuse_open_in *foi = msg; +@@ -2877,7 +2898,8 @@ fuse_readdir_resume (fuse_state_t *state) + } + + static void +-fuse_readdir (xlator_t *this, fuse_in_header_t *finh, void *msg) ++fuse_readdir (xlator_t *this, fuse_in_header_t *finh, void *msg, ++ struct iobuf *iobuf) + { + struct fuse_read_in *fri = msg; + +@@ -3028,7 +3050,8 @@ fuse_readdirp_resume (fuse_state_t *state) + + + static void +-fuse_readdirp (xlator_t *this, fuse_in_header_t *finh, void *msg) ++fuse_readdirp (xlator_t *this, fuse_in_header_t *finh, void *msg, ++ struct iobuf *iobuf) + { + struct fuse_read_in *fri = msg; + +@@ -3075,7 +3098,8 @@ fuse_fallocate_resume(fuse_state_t *state) + } + + static void +-fuse_fallocate(xlator_t *this, fuse_in_header_t *finh, void *msg) ++fuse_fallocate(xlator_t *this, fuse_in_header_t *finh, void *msg, ++ struct iobuf *iobuf) + { + struct fuse_fallocate_in *ffi = msg; + fuse_state_t *state = NULL; +@@ -3093,7 +3117,8 @@ fuse_fallocate(xlator_t *this, fuse_in_header_t *finh, void *msg) + #endif /* FUSE minor version >= 19 */ + + static void +-fuse_releasedir (xlator_t *this, fuse_in_header_t *finh, void *msg) ++fuse_releasedir (xlator_t *this, fuse_in_header_t *finh, void *msg, ++ struct iobuf *iobuf) + { + struct fuse_release_in *fri = msg; + fuse_state_t *state = NULL; +@@ -3134,7 +3159,8 @@ fuse_fsyncdir_resume (fuse_state_t *state) + } + + static void +-fuse_fsyncdir (xlator_t *this, fuse_in_header_t *finh, void *msg) ++fuse_fsyncdir (xlator_t *this, fuse_in_header_t *finh, void *msg, ++ struct iobuf *iobuf) + { + struct fuse_fsync_in *fsi = msg; + +@@ -3221,7 +3247,8 @@ fuse_statfs_resume (fuse_state_t *state) + + + static void +-fuse_statfs (xlator_t *this, fuse_in_header_t *finh, void *msg) ++fuse_statfs (xlator_t *this, fuse_in_header_t *finh, void *msg, ++ struct iobuf *iobuf) + { + fuse_state_t *state = NULL; + +@@ -3273,7 +3300,8 @@ fuse_setxattr_resume (fuse_state_t *state) + + + static void +-fuse_setxattr (xlator_t *this, fuse_in_header_t *finh, void *msg) ++fuse_setxattr (xlator_t *this, fuse_in_header_t *finh, void *msg, ++ struct iobuf *iobuf) + { + struct fuse_setxattr_in *fsi = msg; + char *name = (char *)(fsi + 1); +@@ -3604,7 +3632,8 @@ fuse_getxattr_resume (fuse_state_t *state) + + + static void +-fuse_getxattr (xlator_t *this, fuse_in_header_t *finh, void *msg) ++fuse_getxattr (xlator_t *this, fuse_in_header_t *finh, void *msg, ++ struct iobuf *iobuf) + { + struct fuse_getxattr_in *fgxi = msg; + char *name = (char *)(fgxi + 1); +@@ -3710,7 +3739,8 @@ fuse_listxattr_resume (fuse_state_t *state) + + + static void +-fuse_listxattr (xlator_t *this, fuse_in_header_t *finh, void *msg) ++fuse_listxattr (xlator_t *this, fuse_in_header_t *finh, void *msg, ++ struct iobuf *iobuf) + { + struct fuse_getxattr_in *fgxi = msg; + fuse_state_t *state = NULL; +@@ -3766,7 +3796,8 @@ fuse_removexattr_resume (fuse_state_t *state) + + + static void +-fuse_removexattr (xlator_t *this, fuse_in_header_t *finh, void *msg) ++fuse_removexattr (xlator_t *this, fuse_in_header_t *finh, void *msg, ++ struct iobuf *iobuf) + { + char *name = msg; + +@@ -3865,7 +3896,8 @@ fuse_getlk_resume (fuse_state_t *state) + + + static void +-fuse_getlk (xlator_t *this, fuse_in_header_t *finh, void *msg) ++fuse_getlk (xlator_t *this, fuse_in_header_t *finh, void *msg, ++ struct iobuf *iobuf) + { + struct fuse_lk_in *fli = msg; + +@@ -3957,7 +3989,8 @@ fuse_setlk_resume (fuse_state_t *state) + + + static void +-fuse_setlk (xlator_t *this, fuse_in_header_t *finh, void *msg) ++fuse_setlk (xlator_t *this, fuse_in_header_t *finh, void *msg, ++ struct iobuf *iobuf) + { + struct fuse_lk_in *fli = msg; + +@@ -4056,7 +4089,8 @@ notify_kernel_loop (void *data) + #endif + + static void +-fuse_init (xlator_t *this, fuse_in_header_t *finh, void *msg) ++fuse_init (xlator_t *this, fuse_in_header_t *finh, void *msg, ++ struct iobuf *iobuf) + { + struct fuse_init_in *fini = msg; + struct fuse_init_out fino = {0,}; +@@ -4227,7 +4261,8 @@ fuse_init (xlator_t *this, fuse_in_header_t *finh, void *msg) + + + static void +-fuse_enosys (xlator_t *this, fuse_in_header_t *finh, void *msg) ++fuse_enosys (xlator_t *this, fuse_in_header_t *finh, void *msg, ++ struct iobuf *iobuf) + { + send_fuse_err (this, finh, ENOSYS); + +@@ -4236,7 +4271,8 @@ fuse_enosys (xlator_t *this, fuse_in_header_t *finh, void *msg) + + + static void +-fuse_destroy (xlator_t *this, fuse_in_header_t *finh, void *msg) ++fuse_destroy (xlator_t *this, fuse_in_header_t *finh, void *msg, ++ struct iobuf *iobuf) + { + send_fuse_err (this, finh, 0); + +@@ -4826,6 +4862,7 @@ fuse_graph_sync (xlator_t *this) + new_graph_id = priv->next_graph->id; + priv->next_graph = NULL; + need_first_lookup = 1; ++ priv->handle_graph_switch = _gf_true; + + while (!priv->event_recvd) { + ret = pthread_cond_wait (&priv->sync_cond, +@@ -4854,6 +4891,8 @@ unlock: + { + old_subvol->switched = 1; + winds_on_old_subvol = old_subvol->winds; ++ priv->handle_graph_switch = _gf_false; ++ pthread_cond_broadcast (&priv->migrate_cond); + } + pthread_mutex_unlock (&priv->sync_mutex); + +@@ -4861,6 +4900,13 @@ unlock: + xlator_notify (old_subvol, GF_EVENT_PARENT_DOWN, + old_subvol, NULL); + } ++ } else { ++ pthread_mutex_lock (&priv->sync_mutex); ++ { ++ priv->handle_graph_switch = _gf_false; ++ pthread_cond_broadcast (&priv->migrate_cond); ++ } ++ pthread_mutex_unlock (&priv->sync_mutex); + } + + return 0; +@@ -4897,7 +4943,6 @@ fuse_thread_proc (void *data) + const size_t msg0_size = sizeof (*finh) + 128; + fuse_handler_t **fuse_ops = NULL; + struct pollfd pfd[2] = {{0,}}; +- gf_boolean_t mount_finished = _gf_false; + + this = data; + priv = this->private; +@@ -4914,32 +4959,40 @@ fuse_thread_proc (void *data) + /* THIS has to be reset here */ + THIS = this; + +- if (!mount_finished) { +- memset(pfd,0,sizeof(pfd)); +- pfd[0].fd = priv->status_pipe[0]; +- pfd[0].events = POLLIN | POLLHUP | POLLERR; +- pfd[1].fd = priv->fd; +- pfd[1].events = POLLIN | POLLHUP | POLLERR; +- if (poll(pfd,2,-1) < 0) { +- gf_log (this->name, GF_LOG_ERROR, +- "poll error %s", strerror(errno)); +- break; +- } +- if (pfd[0].revents & POLLIN) { +- if (fuse_get_mount_status(this) != 0) { ++ pthread_mutex_lock (&priv->sync_mutex); ++ { ++ if (!priv->mount_finished) { ++ memset(pfd, 0, sizeof(pfd)); ++ pfd[0].fd = priv->status_pipe[0]; ++ pfd[0].events = POLLIN | POLLHUP | POLLERR; ++ pfd[1].fd = priv->fd; ++ pfd[1].events = POLLIN | POLLHUP | POLLERR; ++ if (poll(pfd, 2, -1) < 0) { ++ gf_log (this->name, GF_LOG_ERROR, ++ "poll error %s", ++ strerror(errno)); ++ pthread_mutex_unlock (&priv->sync_mutex); + break; + } +- mount_finished = _gf_true; +- } +- else if (pfd[0].revents) { +- gf_log (this->name, GF_LOG_ERROR, +- "mount pipe closed without status"); +- break; +- } +- if (!pfd[1].revents) { +- continue; ++ if (pfd[0].revents & POLLIN) { ++ if (fuse_get_mount_status(this) != 0) { ++ pthread_mutex_unlock (&priv->sync_mutex); ++ break; ++ } ++ priv->mount_finished = _gf_true; ++ } else if (pfd[0].revents) { ++ gf_log (this->name, GF_LOG_ERROR, ++ "mount pipe closed without status"); ++ pthread_mutex_unlock (&priv->sync_mutex); ++ break; ++ } ++ if (!pfd[1].revents) { ++ pthread_mutex_unlock (&priv->sync_mutex); ++ continue; ++ } + } + } ++ pthread_mutex_unlock (&priv->sync_mutex); + + /* + * We don't want to block on readv while we're still waiting +@@ -5034,8 +5087,6 @@ fuse_thread_proc (void *data) + break; + } + +- priv->iobuf = iobuf; +- + /* + * This can be moved around a bit, but it's important to do it + * *after* the readv. Otherwise, a graph switch could occur +@@ -5078,9 +5129,9 @@ fuse_thread_proc (void *data) + + if (finh->opcode >= FUSE_OP_HIGH) + /* turn down MacFUSE specific messages */ +- fuse_enosys (this, finh, msg); ++ fuse_enosys (this, finh, msg, NULL); + else +- fuse_ops[finh->opcode] (this, finh, msg); ++ fuse_ops[finh->opcode] (this, finh, msg, iobuf); + + iobuf_unref (iobuf); + continue; +@@ -5152,8 +5203,6 @@ fuse_priv_dump (xlator_t *this) + private->volfile_size); + gf_proc_dump_write("mount_point", "%s", + private->mount_point); +- gf_proc_dump_write("iobuf", "%p", +- private->iobuf); + gf_proc_dump_write("fuse_thread_started", "%d", + (int)private->fuse_thread_started); + gf_proc_dump_write("direct_io_mode", "%d", +@@ -5279,6 +5328,7 @@ unlock: + int + notify (xlator_t *this, int32_t event, void *data, ...) + { ++ int i = 0; + int32_t ret = 0; + fuse_private_t *private = NULL; + gf_boolean_t start_thread = _gf_false; +@@ -5327,14 +5377,21 @@ notify (xlator_t *this, int32_t event, void *data, ...) + pthread_mutex_unlock (&private->sync_mutex); + + if (start_thread) { +- ret = gf_thread_create (&private->fuse_thread, NULL, +- fuse_thread_proc, this, +- "fuseproc"); +- if (ret != 0) { +- gf_log (this->name, GF_LOG_DEBUG, +- "pthread_create() failed (%s)", +- strerror (errno)); +- break; ++ private->fuse_thread = GF_CALLOC (private->reader_thread_count, ++ sizeof (pthread_t), ++ gf_fuse_mt_pthread_t); ++ for (i = 0; i < private->reader_thread_count; i++) { ++ ++ ret = gf_thread_create (&private->fuse_thread[i], ++ NULL, ++ fuse_thread_proc, this, ++ "fuseproc"); ++ if (ret != 0) { ++ gf_log (this->name, GF_LOG_DEBUG, ++ "pthread_create() failed (%s)", ++ strerror (errno)); ++ break; ++ } + } + } + +@@ -5441,7 +5498,8 @@ static fuse_handler_t *fuse_dump_ops[FUSE_OP_HIGH]; + + + static void +-fuse_dumper (xlator_t *this, fuse_in_header_t *finh, void *msg) ++fuse_dumper (xlator_t *this, fuse_in_header_t *finh, void *msg, ++ struct iobuf *iobuf) + { + fuse_private_t *priv = NULL; + struct iovec diov[6] = {{0,},}; +@@ -5473,7 +5531,7 @@ fuse_dumper (xlator_t *this, fuse_in_header_t *finh, void *msg) + "failed to dump fuse message (R): %s", + strerror (errno)); + +- priv->fuse_ops0[finh->opcode] (this, finh, msg); ++ priv->fuse_ops0[finh->opcode] (this, finh, msg, NULL); + } + + +@@ -5578,6 +5636,9 @@ init (xlator_t *this_xl) + GF_OPTION_INIT (ZR_ATTR_TIMEOUT_OPT, priv->attribute_timeout, double, + cleanup_exit); + ++ GF_OPTION_INIT ("reader-thread-count", priv->reader_thread_count, uint32, ++ cleanup_exit); ++ + GF_OPTION_INIT (ZR_ENTRY_TIMEOUT_OPT, priv->entry_timeout, double, + cleanup_exit); + +@@ -5793,6 +5854,7 @@ init (xlator_t *this_xl) + + pthread_mutex_init (&priv->fuse_dump_mutex, NULL); + pthread_cond_init (&priv->sync_cond, NULL); ++ pthread_cond_init (&priv->migrate_cond, NULL); + pthread_mutex_init (&priv->sync_mutex, NULL); + priv->event_recvd = 0; + +@@ -5992,5 +6054,12 @@ struct volume_options options[] = { + .description = "This option can be used to enable or disable fuse " + "event history.", + }, ++ { .key = {"reader-thread-count"}, ++ .type = GF_OPTION_TYPE_INT, ++ .default_value = "1", ++ .min = 1, ++ .max = 64, ++ .description = "Sets fuse reader thread count.", ++ }, + { .key = {NULL} }, + }; +diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h +index 2dfef64..4ca76e9 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.h ++++ b/xlators/mount/fuse/src/fuse-bridge.h +@@ -52,7 +52,7 @@ + + typedef struct fuse_in_header fuse_in_header_t; + typedef void (fuse_handler_t) (xlator_t *this, fuse_in_header_t *finh, +- void *msg); ++ void *msg, struct iobuf *iobuf); + + struct fuse_private { + int fd; +@@ -62,7 +62,8 @@ struct fuse_private { + char *mount_point; + struct iobuf *iobuf; + +- pthread_t fuse_thread; ++ pthread_t *fuse_thread; ++ uint32_t reader_thread_count; + char fuse_thread_started; + + uint32_t direct_io_mode; +@@ -140,6 +141,9 @@ struct fuse_private { + + /* whether to run the unmount daemon */ + gf_boolean_t auto_unmount; ++ gf_boolean_t mount_finished; ++ gf_boolean_t handle_graph_switch; ++ pthread_cond_t migrate_cond; + }; + typedef struct fuse_private fuse_private_t; + +@@ -391,6 +395,7 @@ typedef struct { + int32_t fd_no; + + gf_seek_what_t whence; ++ struct iobuf *iobuf; + } fuse_state_t; + + typedef struct { +diff --git a/xlators/mount/fuse/src/fuse-helpers.c b/xlators/mount/fuse/src/fuse-helpers.c +index c59ff77..c2d4d0c 100644 +--- a/xlators/mount/fuse/src/fuse-helpers.c ++++ b/xlators/mount/fuse/src/fuse-helpers.c +@@ -123,6 +123,9 @@ get_fuse_state (xlator_t *this, fuse_in_header_t *finh) + + pthread_mutex_lock (&priv->sync_mutex); + { ++ while (priv->handle_graph_switch) ++ pthread_cond_wait (&priv->migrate_cond, ++ &priv->sync_mutex); + active_subvol = fuse_active_subvol (state->this); + active_subvol->winds++; + } +diff --git a/xlators/mount/fuse/src/fuse-mem-types.h b/xlators/mount/fuse/src/fuse-mem-types.h +index 2b4b473..721b9a3 100644 +--- a/xlators/mount/fuse/src/fuse-mem-types.h ++++ b/xlators/mount/fuse/src/fuse-mem-types.h +@@ -23,6 +23,7 @@ enum gf_fuse_mem_types_ { + gf_fuse_mt_graph_switch_args_t, + gf_fuse_mt_gids_t, + gf_fuse_mt_invalidate_node_t, ++ gf_fuse_mt_pthread_t, + gf_fuse_mt_end + }; + #endif +diff --git a/xlators/mount/fuse/utils/mount.glusterfs.in b/xlators/mount/fuse/utils/mount.glusterfs.in +index b39bb98..817619e 100755 +--- a/xlators/mount/fuse/utils/mount.glusterfs.in ++++ b/xlators/mount/fuse/utils/mount.glusterfs.in +@@ -221,6 +221,10 @@ start_glusterfs () + cmd_line=$(echo "$cmd_line --event-history=$event_history"); + fi + ++ if [ -n "$reader_thread_count" ]; then ++ cmd_line=$(echo "$cmd_line --reader-thread-count=$reader_thread_count"); ++ fi ++ + if [ -n "$volume_name" ]; then + cmd_line=$(echo "$cmd_line --volume-name=$volume_name"); + fi +@@ -496,6 +500,9 @@ with_options() + "event-history") + event_history=$value + ;; ++ "reader-thread-count") ++ reader_thread_count=$value ++ ;; + "no-root-squash") + if [ $value = "yes" ] || + [ $value = "on" ] || +-- +1.8.3.1 + diff --git a/SOURCES/0462-posix-Do-not-log-ENXIO-errors-for-seek-fop.patch b/SOURCES/0462-posix-Do-not-log-ENXIO-errors-for-seek-fop.patch new file mode 100644 index 0000000..c18bd8e --- /dev/null +++ b/SOURCES/0462-posix-Do-not-log-ENXIO-errors-for-seek-fop.patch @@ -0,0 +1,62 @@ +From a064614b60924c4b0b1dbc4dd18278ce18b46db0 Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez +Date: Fri, 6 Jul 2018 23:26:41 +0200 +Subject: [PATCH 462/493] posix: Do not log ENXIO errors for seek fop + +When lseek is used with SEEK_DATA and SEEK_HOLE, it's expected that the +last operation fails with ENXIO when offset is beyond the end of file. +In this case it doesn't make sense to report this as an error log message. + +This patch reports ENXIO failure messages for seek fops in debug level +instead of error level. + +> Change-Id: I62a4f61f99b0e4d7ea6a2cdcd40afe15072794ac +> fixes: bz#1598926 +> Signed-off-by: Xavi Hernandez + +Upstream patch: https://review.gluster.org/c/glusterfs/+/20475 +Change-Id: I62a4f61f99b0e4d7ea6a2cdcd40afe15072794ac +BUG: 1598883 +Signed-off-by: Xavi Hernandez +Reviewed-on: https://code.engineering.redhat.com/gerrit/158531 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/common-utils.c | 6 ++++++ + xlators/storage/posix/src/posix.c | 3 ++- + 2 files changed, 8 insertions(+), 1 deletion(-) + +diff --git a/libglusterfs/src/common-utils.c b/libglusterfs/src/common-utils.c +index dd6cdb3..25600a9 100644 +--- a/libglusterfs/src/common-utils.c ++++ b/libglusterfs/src/common-utils.c +@@ -4369,6 +4369,12 @@ fop_log_level (glusterfs_fop_t fop, int op_errno) + if (op_errno == EEXIST) + return GF_LOG_DEBUG; + ++ if (fop == GF_FOP_SEEK) { ++ if (op_errno == ENXIO) { ++ return GF_LOG_DEBUG; ++ } ++ } ++ + return GF_LOG_ERROR; + } + +diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c +index e46fe99..13b4aa6 100644 +--- a/xlators/storage/posix/src/posix.c ++++ b/xlators/storage/posix/src/posix.c +@@ -1225,7 +1225,8 @@ posix_seek (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + ret = sys_lseek (pfd->fd, offset, whence); + if (ret == -1) { + err = errno; +- gf_msg (this->name, GF_LOG_ERROR, err, P_MSG_SEEK_FAILED, ++ gf_msg (this->name, fop_log_level(GF_FOP_SEEK, err), err, ++ P_MSG_SEEK_FAILED, + "seek failed on fd %d length %" PRId64 , pfd->fd, + offset); + goto out; +-- +1.8.3.1 + diff --git a/SOURCES/0463-build-glusterfs.spec-.in-firewalld-file-doesn-t-use-.patch b/SOURCES/0463-build-glusterfs.spec-.in-firewalld-file-doesn-t-use-.patch new file mode 100644 index 0000000..f6641d5 --- /dev/null +++ b/SOURCES/0463-build-glusterfs.spec-.in-firewalld-file-doesn-t-use-.patch @@ -0,0 +1,58 @@ +From f1d10db3bf315bfc9640a532aa39b9248d55e9c6 Mon Sep 17 00:00:00 2001 +From: Kaleb S KEITHLEY +Date: Thu, 13 Dec 2018 11:24:05 +0530 +Subject: [PATCH 463/493] build: glusterfs.spec(.in) firewalld file doesn't use + %{_prefix} + +.../firewalld/services/glusterfs.xml in %server files section +does not use %{_prefix} + +Other firewalld files, e.g., in the firewalld.src.rpm file use +%{_prefix} + +N.B. the other 'hardcoded' path, "/sbin/mount.glusterfs" in the +%files fuse section must remain as is, there is no macro for +mount utilities to use as there is for, e.g., the %{_sbindir} +macro for /usr/sbin. This doesn't matter for either RHEL6, where +/sbin and /usr/sbin are distinct directories, or for Fedora and +RHEL7, where /sbin is a symlink to /usr/sbin. E.g. see the nfs- +utils.src.rpm where /sbin/mount.nfs is also 'hardcoded'. + +mainline: +> Change-Id: I902f47e3c589526c774d0aceb6fc2815abf86b01 +> BUG: 1350793 +> Signed-off-by: Kaleb S KEITHLEY +> Reviewed-on: http://review.gluster.org/14823 +> Smoke: Gluster Build System +> CentOS-regression: Gluster Build System +> NetBSD-regression: NetBSD Build System +> Reviewed-by: Milind Changire +> Reviewed-by: Niels de Vos + +Change-Id: I902f47e3c589526c774d0aceb6fc2815abf86b01 +BUG: 1350745 +Signed-off-by: Kaleb S KEITHLEY +Reviewed-on: https://code.engineering.redhat.com/gerrit/158502 +Tested-by: RHGS Build Bot +Tested-by: Milind Changire +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + glusterfs.spec.in | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index b6b7630..2745b50 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -1168,7 +1168,7 @@ exit 0 + %exclude %{_tmpfilesdir}/gluster.conf + %endif + %if ( 0%{?_with_firewalld:1} ) +-%exclude /usr/lib/firewalld/services/glusterfs.xml ++%exclude %{_prefix}/lib/firewalld/services/glusterfs.xml + %endif + %endif + %doc ChangeLog COPYING-GPLV2 COPYING-LGPLV3 INSTALL README.md THANKS +-- +1.8.3.1 + diff --git a/SOURCES/0464-build-exclude-packaging-crypt.so.patch b/SOURCES/0464-build-exclude-packaging-crypt.so.patch new file mode 100644 index 0000000..f32c9da --- /dev/null +++ b/SOURCES/0464-build-exclude-packaging-crypt.so.patch @@ -0,0 +1,37 @@ +From 93f35b10446fe14a0fd3e5d318056fe399479386 Mon Sep 17 00:00:00 2001 +From: Milind Changire +Date: Thu, 13 Dec 2018 11:31:20 +0530 +Subject: [PATCH 464/493] build: exclude packaging crypt.so + +exclude packaging crypt.so for FIPS compliance + +Label: DOWNSTREAM ONLY + +BUG: 1653224 +Change-Id: Icbf0d3efc90813c5856237213e6cf25af84e4915 +Signed-off-by: Milind Changire +Reviewed-on: https://code.engineering.redhat.com/gerrit/158500 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + glusterfs.spec.in | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 2745b50..baee2fa 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -1198,8 +1198,8 @@ exit 0 + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/debug/trace.so + %if ( ! ( 0%{?rhel} && 0%{?rhel} < 6 ) ) + # RHEL-5 based distributions have a too old openssl +-%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/encryption +- %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/encryption/crypt.so ++%exclude %dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/encryption ++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/encryption/crypt.so + %endif + %dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/access-control.so +-- +1.8.3.1 + diff --git a/SOURCES/0465-build-add-missing-explicit-package-dependencies.patch b/SOURCES/0465-build-add-missing-explicit-package-dependencies.patch new file mode 100644 index 0000000..850ce76 --- /dev/null +++ b/SOURCES/0465-build-add-missing-explicit-package-dependencies.patch @@ -0,0 +1,84 @@ +From ef7af50ae35ac5776057d6355b84ae111d33151e Mon Sep 17 00:00:00 2001 +From: Milind Changire +Date: Thu, 13 Dec 2018 12:46:56 +0530 +Subject: [PATCH 465/493] build: add missing explicit package dependencies + +Add dependencies for glusterfs-libs, and other packages. +This is an Errata Tool whine. + +Label: DOWNSTREAM ONLY + +BUG: 1656357 +Change-Id: Ieaadb6e4ffa84d1811aa740f7891855568ecbcbb +Signed-off-by: Milind Changire +Reviewed-on: https://code.engineering.redhat.com/gerrit/158501 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + glusterfs.spec.in | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index baee2fa..f6a4ab0 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -282,6 +282,7 @@ Summary: GlusterFS api library + Group: System Environment/Daemons + Requires: %{name}%{?_isa} = %{version}-%{release} + Requires: %{name}-client-xlators%{?_isa} = %{version}-%{release} ++Requires: %{name}-libs%{?_isa} = %{version}-%{release} + + %description api + GlusterFS is a distributed file-system capable of scaling to several +@@ -300,6 +301,7 @@ Group: Development/Libraries + Requires: %{name}%{?_isa} = %{version}-%{release} + Requires: %{name}-devel%{?_isa} = %{version}-%{release} + Requires: libacl-devel ++Requires: %{name}-api%{?_isa} = %{version}-%{release} + + %description api-devel + GlusterFS is a distributed file-system capable of scaling to several +@@ -336,6 +338,8 @@ Requires: %{name}%{?_isa} = %{version}-%{release} + %if ( 0%{!?_without_extra_xlators:1} ) + Requires: %{name}-extra-xlators = %{version}-%{release} + %endif ++Requires: %{name}-libs%{?_isa} = %{version}-%{release} ++Requires: %{name}-server%{?_isa} = %{version}-%{release} + + %description devel + GlusterFS is a distributed file-system capable of scaling to several +@@ -385,6 +389,7 @@ Requires: %{name}-client-xlators%{?_isa} = %{version}-%{release} + + Obsoletes: %{name}-client < %{version}-%{release} + Provides: %{name}-client = %{version}-%{release} ++Requires: %{name}-libs%{?_isa} = %{version}-%{release} + + %description fuse + GlusterFS is a distributed file-system capable of scaling to several +@@ -454,6 +459,7 @@ BuildRequires: python-ctypes + Requires: python2-gluster = %{version}-%{release} + Requires: rsync + Requires: util-linux ++Requires: %{name}-libs%{?_isa} = %{version}-%{release} + + %description geo-replication + GlusterFS is a distributed file-system capable of scaling to several +@@ -526,6 +532,7 @@ BuildRequires: libibverbs-devel + BuildRequires: librdmacm-devel >= 1.0.15 + %endif + Requires: %{name}%{?_isa} = %{version}-%{release} ++Requires: %{name}-libs%{?_isa} = %{version}-%{release} + + %description rdma + GlusterFS is a distributed file-system capable of scaling to several +@@ -656,6 +663,7 @@ This package provides the glusterfs server daemon. + %package client-xlators + Summary: GlusterFS client-side translators + Group: Applications/File ++Requires: %{name}-libs%{?_isa} = %{version}-%{release} + + %description client-xlators + GlusterFS is a distributed file-system capable of scaling to several +-- +1.8.3.1 + diff --git a/SOURCES/0466-extras-Add-group-distributed-virt-for-single-brick-o.patch b/SOURCES/0466-extras-Add-group-distributed-virt-for-single-brick-o.patch new file mode 100644 index 0000000..6d443fc --- /dev/null +++ b/SOURCES/0466-extras-Add-group-distributed-virt-for-single-brick-o.patch @@ -0,0 +1,86 @@ +From 2ef41e27b452f215e56bfc08c8117f8f18c33619 Mon Sep 17 00:00:00 2001 +From: Krutika Dhananjay +Date: Wed, 28 Nov 2018 12:39:31 +0530 +Subject: [PATCH 466/493] extras: Add group-distributed-virt for single-brick + ovirt-gluster use-case + + > Upstream: https://review.gluster.org/21735 + > BUG: 1654138 + > Change-Id: I930011327332b7ba30cc76f614efaf5932eb4f3d + +Change-Id: I930011327332b7ba30cc76f614efaf5932eb4f3d +BUG: 1653613 +Signed-off-by: Krutika Dhananjay +Reviewed-on: https://code.engineering.redhat.com/gerrit/158487 +Tested-by: RHGS Build Bot +Reviewed-by: Milind Changire +Reviewed-by: Atin Mukherjee +--- + extras/Makefile.am | 6 +++++- + extras/group-distributed-virt | 10 ++++++++++ + glusterfs.spec.in | 4 ++++ + 3 files changed, 19 insertions(+), 1 deletion(-) + create mode 100644 extras/group-distributed-virt + +diff --git a/extras/Makefile.am b/extras/Makefile.am +index 7b791af..e0e05b5 100644 +--- a/extras/Makefile.am ++++ b/extras/Makefile.am +@@ -12,7 +12,9 @@ SUBDIRS = init.d systemd benchmarking hook-scripts $(OCF_SUBDIR) LinuxRPM \ + + confdir = $(sysconfdir)/glusterfs + conf_DATA = glusterfs-logrotate gluster-rsyslog-7.2.conf gluster-rsyslog-5.8.conf \ +- logger.conf.example glusterfs-georep-logrotate group-virt.example group-metadata-cache group-gluster-block group-nl-cache group-db-workload ++ logger.conf.example glusterfs-georep-logrotate group-virt.example \ ++ group-metadata-cache group-gluster-block group-nl-cache group-db-workload \ ++ group-distributed-virt + + voldir = $(sysconfdir)/glusterfs + vol_DATA = glusterd.vol +@@ -49,3 +51,5 @@ install-data-local: + $(DESTDIR)$(GLUSTERD_WORKDIR)/groups/nl-cache + $(INSTALL_DATA) $(top_srcdir)/extras/group-db-workload \ + $(DESTDIR)$(GLUSTERD_WORKDIR)/groups/db-workload ++ $(INSTALL_DATA) $(top_srcdir)/extras/group-distributed-virt \ ++ $(DESTDIR)$(GLUSTERD_WORKDIR)/groups/distributed-virt +diff --git a/extras/group-distributed-virt b/extras/group-distributed-virt +new file mode 100644 +index 0000000..a960b76 +--- /dev/null ++++ b/extras/group-distributed-virt +@@ -0,0 +1,10 @@ ++performance.quick-read=off ++performance.read-ahead=off ++performance.io-cache=off ++performance.low-prio-threads=32 ++network.remote-dio=enable ++features.shard=on ++user.cifs=off ++client.event-threads=4 ++server.event-threads=4 ++performance.client-io-threads=on +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index f6a4ab0..a4accd9 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -1522,6 +1522,7 @@ exit 0 + %attr(0644,-,-) %{_sharedstatedir}/glusterd/groups/metadata-cache + %attr(0644,-,-) %{_sharedstatedir}/glusterd/groups/gluster-block + %attr(0644,-,-) %{_sharedstatedir}/glusterd/groups/db-workload ++ %attr(0644,-,-) %{_sharedstatedir}/glusterd/groups/distributed-virt + %attr(0644,-,-) %{_sharedstatedir}/glusterd/groups/nl-cache + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/glusterfind + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/glusterfind/.keys +@@ -2169,6 +2170,9 @@ fi + %endif + + %changelog ++* Thu Dec 13 2018 Krutika Dhananjay ++- Install /var/lib/glusterd/groups/distributed-virt by default (#1653613) ++ + * Fri Jul 6 2018 Atin Mukherjee + - Added db group profile (#1597506) + +-- +1.8.3.1 + diff --git a/SOURCES/0467-glusterd-glusterd-to-regenerate-volfiles-when-GD_OP_.patch b/SOURCES/0467-glusterd-glusterd-to-regenerate-volfiles-when-GD_OP_.patch new file mode 100644 index 0000000..b5aa151 --- /dev/null +++ b/SOURCES/0467-glusterd-glusterd-to-regenerate-volfiles-when-GD_OP_.patch @@ -0,0 +1,304 @@ +From 647b4d4e8edefd256de2a9f3916763b8cfa8429b Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee +Date: Tue, 20 Nov 2018 12:32:32 +0530 +Subject: [PATCH 467/493] glusterd: glusterd to regenerate volfiles when + GD_OP_VERSION_MAX changes + +While glusterd has an infra to allow post install of spec to bring it up +in the interim upgrade mode to allow all the volfiles to be regenerated +with the latest executable, in container world the same methodology is +not followed as container image always point to the specific gluster rpm +and gluster rpm doesn't go through an upgrade process. + +This fix does the following: +1. If glusterd.upgrade file doesn't exist, regenerate the volfiles +2. If maximum-operating-version read from glusterd.upgrade doesn't match +with GD_OP_VERSION_MAX, glusterd detects it to be a version where new +options are introduced and regenerate the volfiles. + +Tests done: + +1. Bring up glusterd, check if glusterd.upgrade file has been created +with GD_OP_VERSION_MAX value. +2. Post 1, restart glusterd and check glusterd hasn't regenerated the +volfiles as there's is no change in the GD_OP_VERSION_MAX vs the +op_version read from the file. +3. Bump up the GD_OP_VERSION_MAX in the code by 1 and post compilation +restart glusterd where the volfiles should be again regenerated. + +Note: The old way of having volfiles regenerated during an rpm upgrade +is kept as it is for now but eventually this can be sunset later. + +> Change-Id: I75b49a1601c71e99f6a6bc360dd12dd03a96414b +> Fixes: bz#1651463 +> Signed-off-by: Atin Mukherjee + +upstream patch: https://review.gluster.org/#/c/glusterfs/+/21687/ + +Change-Id: I75b49a1601c71e99f6a6bc360dd12dd03a96414b +BUG: 1651460 +Signed-off-by: Sanju Rakonde +Reviewed-on: https://code.engineering.redhat.com/gerrit/158645 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + xlators/mgmt/glusterd/src/glusterd-store.c | 126 +++++++++++++++++++++++++++-- + xlators/mgmt/glusterd/src/glusterd-store.h | 6 ++ + xlators/mgmt/glusterd/src/glusterd.c | 27 +++++-- + xlators/mgmt/glusterd/src/glusterd.h | 7 ++ + 4 files changed, 154 insertions(+), 12 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c +index 37542e7..f276fef 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-store.c ++++ b/xlators/mgmt/glusterd/src/glusterd-store.c +@@ -2063,7 +2063,7 @@ glusterd_store_global_info (xlator_t *this) + } + + handle->fd = gf_store_mkstemp (handle); +- if (handle->fd <= 0) { ++ if (handle->fd < 0) { + ret = -1; + goto out; + } +@@ -2081,7 +2081,7 @@ glusterd_store_global_info (xlator_t *this) + goto out; + } + +- snprintf (op_version_str, 15, "%d", conf->op_version); ++ snprintf (op_version_str, sizeof(op_version_str), "%d", conf->op_version); + ret = gf_store_save_value (handle->fd, GD_OP_VERSION_KEY, + op_version_str); + if (ret) { +@@ -2094,12 +2094,8 @@ glusterd_store_global_info (xlator_t *this) + ret = gf_store_rename_tmppath (handle); + out: + if (handle) { +- if (ret && (handle->fd > 0)) ++ if (ret && (handle->fd >= 0)) + gf_store_unlink_tmppath (handle); +- +- if (handle->fd > 0) { +- handle->fd = 0; +- } + } + + if (uuid_str) +@@ -2114,6 +2110,122 @@ out: + } + + int ++glusterd_store_max_op_version(xlator_t *this) ++{ ++ int ret = -1; ++ glusterd_conf_t *conf = NULL; ++ char op_version_str[15] = {0,}; ++ char path[PATH_MAX] = {0,}; ++ gf_store_handle_t *handle = NULL; ++ int32_t len = 0; ++ ++ conf = this->private; ++ ++ len = snprintf(path, PATH_MAX, "%s/%s", conf->workdir, ++ GLUSTERD_UPGRADE_FILE); ++ if ((len < 0) || (len >= PATH_MAX)) { ++ goto out; ++ } ++ ret = gf_store_handle_new(path, &handle); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ GD_MSG_STORE_HANDLE_GET_FAIL, "Unable to get store " ++ "handle"); ++ goto out; ++ } ++ ++ /* These options need to be available for all users */ ++ ret = sys_chmod(handle->path, 0644); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, ++ "chmod error for %s", GLUSTERD_UPGRADE_FILE); ++ goto out; ++ } ++ ++ handle->fd = gf_store_mkstemp(handle); ++ if (handle->fd < 0) { ++ ret = -1; ++ goto out; ++ } ++ ++ snprintf(op_version_str, sizeof(op_version_str), "%d", ++ GD_OP_VERSION_MAX); ++ ret = gf_store_save_value(handle->fd, GD_MAX_OP_VERSION_KEY, ++ op_version_str); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OP_VERS_STORE_FAIL, ++ "Storing op-version failed ret = %d", ret); ++ goto out; ++ } ++ ++ ret = gf_store_rename_tmppath(handle); ++out: ++ if (handle) { ++ if (ret && (handle->fd >= 0)) ++ gf_store_unlink_tmppath(handle); ++ } ++ ++ if (ret) ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ GD_MSG_GLUSTERD_GLOBAL_INFO_STORE_FAIL, ++ "Failed to store max op-version"); ++ if (handle) ++ gf_store_handle_destroy(handle); ++ return ret; ++} ++ ++int ++glusterd_retrieve_max_op_version(xlator_t *this, int *op_version) ++{ ++ char *op_version_str = NULL; ++ glusterd_conf_t *priv = NULL; ++ int ret = -1; ++ int tmp_version = 0; ++ char *tmp = NULL; ++ char path[PATH_MAX] = {0,}; ++ gf_store_handle_t *handle = NULL; ++ int32_t len = 0; ++ ++ priv = this->private; ++ ++ len = snprintf(path, PATH_MAX, "%s/%s", priv->workdir, ++ GLUSTERD_UPGRADE_FILE); ++ if ((len < 0) || (len >= PATH_MAX)) { ++ goto out; ++ } ++ ret = gf_store_handle_retrieve(path, &handle); ++ ++ if (ret) { ++ gf_msg_debug(this->name, 0, "Unable to get store handle!"); ++ goto out; ++ } ++ ++ ret = gf_store_retrieve_value(handle, GD_MAX_OP_VERSION_KEY, ++ &op_version_str); ++ if (ret) { ++ gf_msg_debug(this->name, 0, "No previous op_version present"); ++ goto out; ++ } ++ ++ tmp_version = strtol(op_version_str, &tmp, 10); ++ if ((tmp_version <= 0) || (tmp && strlen(tmp) > 1)) { ++ gf_msg(this->name, GF_LOG_WARNING, EINVAL, ++ GD_MSG_UNSUPPORTED_VERSION, "invalid version number"); ++ goto out; ++ } ++ ++ *op_version = tmp_version; ++ ++ ret = 0; ++out: ++ if (op_version_str) ++ GF_FREE(op_version_str); ++ if (handle) ++ gf_store_handle_destroy(handle); ++ return ret; ++} ++ ++int + glusterd_retrieve_op_version (xlator_t *this, int *op_version) + { + char *op_version_str = NULL; +diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h +index 383a475..76c5500 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-store.h ++++ b/xlators/mgmt/glusterd/src/glusterd-store.h +@@ -161,6 +161,12 @@ glusterd_retrieve_op_version (xlator_t *this, int *op_version); + int + glusterd_store_global_info (xlator_t *this); + ++int ++glusterd_retrieve_max_op_version(xlator_t *this, int *op_version); ++ ++int ++glusterd_store_max_op_version(xlator_t *this); ++ + int32_t + glusterd_store_retrieve_options (xlator_t *this); + +diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c +index ca17526..29d5de1 100644 +--- a/xlators/mgmt/glusterd/src/glusterd.c ++++ b/xlators/mgmt/glusterd/src/glusterd.c +@@ -1428,6 +1428,7 @@ init (xlator_t *this) + gf_boolean_t upgrade = _gf_false; + gf_boolean_t downgrade = _gf_false; + char *localtime_logging = NULL; ++ int op_version = 0; + + #ifndef GF_DARWIN_HOST_OS + { +@@ -1976,6 +1977,27 @@ init (xlator_t *this) + } + + GF_ATOMIC_INIT(conf->blockers, 0); ++ ret = glusterd_handle_upgrade_downgrade(this->options, conf, upgrade, ++ downgrade); ++ if (ret) ++ goto out; ++ ++ ret = glusterd_retrieve_max_op_version(this, &op_version); ++ /* first condition indicates file isn't present which means this code ++ * change is hitting for the first time or someone has deleted it from ++ * the backend.second condition is when max op_version differs, in both ++ * cases volfiles should be regenerated ++ */ ++ if (op_version == 0 || op_version != GD_OP_VERSION_MAX) { ++ gf_log(this->name, GF_LOG_INFO, ++ "Regenerating volfiles due to a max op-version mismatch " ++ "or glusterd.upgrade file not being present, op_version " ++ "retrieved: %d, max op_version: %d", op_version, ++ GD_OP_VERSION_MAX); ++ glusterd_recreate_volfiles(conf); ++ ret = glusterd_store_max_op_version(this); ++ } ++ + /* If the peer count is less than 2 then this would be the best time to + * spawn process/bricks that may need (re)starting since last time + * (this) glusterd was up. */ +@@ -1983,11 +2005,6 @@ init (xlator_t *this) + glusterd_launch_synctask (glusterd_spawn_daemons, NULL); + + +- ret = glusterd_handle_upgrade_downgrade (this->options, conf, upgrade, +- downgrade); +- if (ret) +- goto out; +- + ret = glusterd_hooks_spawn_worker (this); + if (ret) + goto out; +diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h +index bfa8310..cbdca52 100644 +--- a/xlators/mgmt/glusterd/src/glusterd.h ++++ b/xlators/mgmt/glusterd/src/glusterd.h +@@ -582,6 +582,9 @@ typedef enum { + + #define GLUSTERD_DEFAULT_PORT GF_DEFAULT_BASE_PORT + #define GLUSTERD_INFO_FILE "glusterd.info" ++#define GLUSTERD_UPGRADE_FILE \ ++ "glusterd.upgrade" /* zero byte file to detect a need for regenerating \ ++ volfiles in container mode */ + #define GLUSTERD_VOLUME_QUOTA_CONFIG "quota.conf" + #define GLUSTERD_VOLUME_DIR_PREFIX "vols" + #define GLUSTERD_PEER_DIR_PREFIX "peers" +@@ -1333,4 +1336,8 @@ glusterd_tier_prevalidate (dict_t *dict, char **op_errstr, + + int + glusterd_options_init (xlator_t *this); ++ ++int32_t ++glusterd_recreate_volfiles(glusterd_conf_t *conf); ++ + #endif +-- +1.8.3.1 + diff --git a/SOURCES/0468-core-move-invalid-port-logs-to-DEBUG-log-level.patch b/SOURCES/0468-core-move-invalid-port-logs-to-DEBUG-log-level.patch new file mode 100644 index 0000000..6f69339 --- /dev/null +++ b/SOURCES/0468-core-move-invalid-port-logs-to-DEBUG-log-level.patch @@ -0,0 +1,40 @@ +From 8eb95b2ebdedd61e7784cf2f18b9564d2d4ed0b9 Mon Sep 17 00:00:00 2001 +From: Milind Changire +Date: Thu, 13 Dec 2018 10:11:58 +0530 +Subject: [PATCH 468/493] core: move invalid port logs to DEBUG log level + +Stop spamming "invalid port" logs in case sysadmin has reserved a large +number of ports. + +manline: +> Change-Id: I244ef7693560cc404b36cadc6b05d92ec0e908d3 +> fixes: bz#1656517 +> Signed-off-by: Milind Changire +> Reviewed-on: https://review.gluster.org/c/glusterfs/+/21809 + +Change-Id: I244ef7693560cc404b36cadc6b05d92ec0e908d3 +BUG: 1656217 +Signed-off-by: Milind Changire +Reviewed-on: https://code.engineering.redhat.com/gerrit/158483 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/common-utils.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/libglusterfs/src/common-utils.c b/libglusterfs/src/common-utils.c +index 25600a9..1243754 100644 +--- a/libglusterfs/src/common-utils.c ++++ b/libglusterfs/src/common-utils.c +@@ -3373,7 +3373,7 @@ gf_ports_reserved (char *blocked_port, unsigned char *ports, uint32_t ceiling) + if (blocked_port[strlen(blocked_port) -1] == '\n') + blocked_port[strlen(blocked_port) -1] = '\0'; + if (gf_string2int32 (blocked_port, &tmp_port1) == 0) { +- if (tmp_port1 > ceiling ++ if (tmp_port1 > GF_PORT_MAX + || tmp_port1 < 0) { + gf_msg ("glusterfs-socket", GF_LOG_WARNING, 0, + LG_MSG_INVALID_PORT, "invalid port %d", +-- +1.8.3.1 + diff --git a/SOURCES/0469-nfs-set-ctx-for-every-inode-looked-up-nfs3_fh_resolv.patch b/SOURCES/0469-nfs-set-ctx-for-every-inode-looked-up-nfs3_fh_resolv.patch new file mode 100644 index 0000000..a265365 --- /dev/null +++ b/SOURCES/0469-nfs-set-ctx-for-every-inode-looked-up-nfs3_fh_resolv.patch @@ -0,0 +1,48 @@ +From 4a3e8888d7e866137287fced284b71ba152a17ad Mon Sep 17 00:00:00 2001 +From: Jiffin Tony Thottan +Date: Thu, 29 Nov 2018 19:22:40 +0530 +Subject: [PATCH 469/493] nfs : set ctx for every inode looked up + nfs3_fh_resolve_inode_lookup_cbk() + +The inode ctx for nfs xlator is set with help nfs_fix_generation. +But currently gnfs is crashing because inode_ctx is becoming null +nfs3_resolve_inode_hard() is used to perform a lookup on entire +path and looks like function is missing to set the ctx for inode. +This patch will set ctx for the inode which it looked on. + +Upstream reference : +>url: https://review.gluster.org/#/c/glusterfs/+/21749/ +>Change-Id: I464fa7f78df1bae990ebe97de8ccf6d5fb74fc9f +>fixes: bz#1651439 +>Signed-off-by: Jiffin Tony Thottan + +Change-Id: I464fa7f78df1bae990ebe97de8ccf6d5fb74fc9f +BUG: 1633177 +Signed-off-by: Jiffin Tony Thottan +Reviewed-on: https://code.engineering.redhat.com/gerrit/158676 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/nfs/server/src/nfs3-helpers.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/xlators/nfs/server/src/nfs3-helpers.c b/xlators/nfs/server/src/nfs3-helpers.c +index 0b97709..9bc8aff 100644 +--- a/xlators/nfs/server/src/nfs3-helpers.c ++++ b/xlators/nfs/server/src/nfs3-helpers.c +@@ -3660,6 +3660,12 @@ nfs3_fh_resolve_entry_lookup_cbk (call_frame_t *frame, void *cookie, + inode_lookup (linked_inode); + inode_unref (cs->resolvedloc.inode); + cs->resolvedloc.inode = linked_inode; ++ } else { ++ /* nfs3_fh_resolve_entry_hard() use to resolve entire path if needed. ++ * So the ctx for inode obtained from here need to set properly, ++ * otherwise it may result in a crash. ++ */ ++ nfs_fix_generation(this, inode); + } + err: + nfs3_call_resume (cs); +-- +1.8.3.1 + diff --git a/SOURCES/0470-dht-fix-use-after-free-in-dht_rmdir_readdirp_cbk.patch b/SOURCES/0470-dht-fix-use-after-free-in-dht_rmdir_readdirp_cbk.patch new file mode 100644 index 0000000..46e91de --- /dev/null +++ b/SOURCES/0470-dht-fix-use-after-free-in-dht_rmdir_readdirp_cbk.patch @@ -0,0 +1,87 @@ +From 870513f9bade449fa760a81e242102860a0fdc91 Mon Sep 17 00:00:00 2001 +From: N Balachandran +Date: Thu, 13 Dec 2018 10:54:15 +0530 +Subject: [PATCH 470/493] dht: fix use after free in dht_rmdir_readdirp_cbk + +The frame is freed when linkfile exist in dht_rmdir_is_subvol_empty(), +the following message use the freed local. + +upstream patch: https://review.gluster.org/#/c/glusterfs/+/21446/ + +> Change-Id: I41191e8bd477f031a2444d5f15e578dc4f086e6b +> Updates: bz#1640489 +> Signed-off-by: Kinglong Mee + +Change-Id: Ia257e1da57cc486ab336e43f8e88187e984c70e2 +BUG: 1654103 +Author: Kinglong Mee +Signed-off-by: N Balachandran +Reviewed-on: https://code.engineering.redhat.com/gerrit/158486 +Tested-by: RHGS Build Bot +Reviewed-by: Raghavendra Gowdappa +--- + xlators/cluster/dht/src/dht-common.c | 23 +++++++++++++---------- + 1 file changed, 13 insertions(+), 10 deletions(-) + +diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c +index 2e19036..ff0099c 100644 +--- a/xlators/cluster/dht/src/dht-common.c ++++ b/xlators/cluster/dht/src/dht-common.c +@@ -10175,6 +10175,7 @@ dht_rmdir_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + xlator_t *prev = NULL; + xlator_t *src = NULL; + int ret = 0; ++ char *path = NULL; + + + local = frame->local; +@@ -10182,6 +10183,11 @@ dht_rmdir_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + src = prev; + + if (op_ret > 2) { ++ /* dht_rmdir_is_subvol_empty() may free the frame, ++ * copy path for logging. ++ */ ++ path = gf_strdup(local->loc.path); ++ + ret = dht_rmdir_is_subvol_empty (frame, this, entries, src); + + switch (ret) { +@@ -10192,27 +10198,24 @@ dht_rmdir_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + local->loc.path, op_ret); + local->op_ret = -1; + local->op_errno = ENOTEMPTY; +- goto done; ++ break; + default: + /* @ret number of linkfiles are getting unlinked */ + gf_msg_trace (this->name, 0, + "readdir on %s for %s found %d " +- "linkfiles", prev->name, +- local->loc.path, ret); ++ "linkfiles", ++ prev->name, path, ret); + break; + } + + } + +- +- if (ret) { +- return 0; +- } +- +-done: + /* readdirp failed or no linkto files were found on this subvol */ ++ if (!ret) { ++ dht_rmdir_readdirp_done(frame, this); ++ } ++ GF_FREE(path); + +- dht_rmdir_readdirp_done (frame, this); + return 0; + } + +-- +1.8.3.1 + diff --git a/SOURCES/0471-glusterd-migrating-profile-commands-to-mgmt_v3-frame.patch b/SOURCES/0471-glusterd-migrating-profile-commands-to-mgmt_v3-frame.patch new file mode 100644 index 0000000..74a29d6 --- /dev/null +++ b/SOURCES/0471-glusterd-migrating-profile-commands-to-mgmt_v3-frame.patch @@ -0,0 +1,507 @@ +From e68845ff7018e5d81d7979684b18e6eda449b088 Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde +Date: Thu, 8 Nov 2018 18:50:18 +0530 +Subject: [PATCH 471/493] glusterd: migrating profile commands to mgmt_v3 + framework + +Current profile commands use the op_state machine framework. +Porting it to use the mgmt_v3 framework. + +The following tests were performed on the patch: +case 1: +1. On a 3 node cluster, created and started 3 volumes +2. Mounted all the three volumes and wrote some data +3. Started profile operation for all the volumes +4. Ran "gluster v status" from N1, + "gluster v profile info" form N2, + "gluster v profile info" from N3 simultaneously in a + loop for around 10000 times +5. Didn't find any cores generated. + +case 2: +1. Repeat the steps 1,2 and 3 from case 1. +2. Ran "gluster v status" from N1, + "gluster v profile info" form N2(terminal 1), + "gluster v profile info" from N2(terminal 2) + simultaneously in a loop. +3. No cores were generated. + +> fixes: bz#1654181 +> Change-Id: I83044cf5aee3970ef94066c89fcc41783ed468a6 +> Signed-off-by: Sanju Rakonde + +upstream patch: https://review.gluster.org/#/c/glusterfs/+/21736/ + +Change-Id: I83044cf5aee3970ef94066c89fcc41783ed468a6 +BUG: 1639476 +Signed-off-by: Sanju Rakonde +Reviewed-on: https://code.engineering.redhat.com/gerrit/158631 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + libglusterfs/src/globals.h | 2 + + xlators/mgmt/glusterd/src/glusterd-handler.c | 18 +- + xlators/mgmt/glusterd/src/glusterd-mgmt.c | 240 +++++++++++++++++++++++++-- + xlators/mgmt/glusterd/src/glusterd-mgmt.h | 6 +- + xlators/mgmt/glusterd/src/glusterd-op-sm.c | 4 +- + xlators/mgmt/glusterd/src/glusterd-op-sm.h | 6 + + 6 files changed, 252 insertions(+), 24 deletions(-) + +diff --git a/libglusterfs/src/globals.h b/libglusterfs/src/globals.h +index 1bede2e..d2b0964 100644 +--- a/libglusterfs/src/globals.h ++++ b/libglusterfs/src/globals.h +@@ -111,6 +111,8 @@ + + #define GD_OP_VERSION_3_13_3 31303 /* Op-version for GlusterFS 3.13.3 */ + ++#define GD_OP_VERSION_6_0 60000 /* Op-version for GlusterFS 6.0 */ ++ + /* Downstream only change */ + #define GD_OP_VERSION_3_11_2 31102 /* Op-version for RHGS 3.3.1-async */ + #define GD_OP_VERSION_3_13_3 31303 /* Op-version for RHGS-3.4-Batch Update-1*/ +diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c +index 7486f51..90eaa95 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-handler.c ++++ b/xlators/mgmt/glusterd/src/glusterd-handler.c +@@ -28,6 +28,7 @@ + #include "glusterd-sm.h" + #include "glusterd-op-sm.h" + #include "glusterd-utils.h" ++#include "glusterd-mgmt.h" + #include "glusterd-server-quorum.h" + #include "glusterd-store.h" + #include "glusterd-locks.h" +@@ -3065,10 +3066,13 @@ __glusterd_handle_cli_profile_volume (rpcsvc_request_t *req) + int32_t op = 0; + char err_str[2048] = {0,}; + xlator_t *this = NULL; ++ glusterd_conf_t *conf = NULL; + + GF_ASSERT (req); + this = THIS; + GF_ASSERT (this); ++ conf = this->private; ++ GF_VALIDATE_OR_GOTO(this->name, conf, out); + + ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { +@@ -3109,12 +3113,18 @@ __glusterd_handle_cli_profile_volume (rpcsvc_request_t *req) + goto out; + } + +- ret = glusterd_op_begin (req, cli_op, dict, err_str, sizeof (err_str)); ++ if (conf->op_version < GD_OP_VERSION_6_0) { ++ gf_msg_debug(this->name, 0, "The cluster is operating at " ++ "version less than %d. Falling back to op-sm " ++ "framework.", GD_OP_VERSION_6_0); ++ ret = glusterd_op_begin(req, cli_op, dict, err_str, sizeof(err_str)); ++ glusterd_friend_sm(); ++ glusterd_op_sm(); ++ } else { ++ ret = glusterd_mgmt_v3_initiate_profile_phases(req, cli_op, dict); ++ } + + out: +- glusterd_friend_sm (); +- glusterd_op_sm (); +- + free (cli_req.dict.dict_val); + + if (ret) { +diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-mgmt.c +index d7da3c1..751d6e4 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-mgmt.c ++++ b/xlators/mgmt/glusterd/src/glusterd-mgmt.c +@@ -19,6 +19,7 @@ + #include "glusterd-locks.h" + #include "glusterd-mgmt.h" + #include "glusterd-op-sm.h" ++#include "glusterd-server-quorum.h" + #include "glusterd-volgen.h" + #include "glusterd-store.h" + #include "glusterd-snapshot-utils.h" +@@ -213,6 +214,16 @@ gd_mgmt_v3_pre_validate_fn (glusterd_op_t op, dict_t *dict, + } + break; + ++ case GD_OP_PROFILE_VOLUME: ++ ret = glusterd_op_stage_stats_volume(dict, op_errstr); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_WARNING, 0, ++ GD_MSG_PRE_VALIDATION_FAIL, ++ "prevalidation failed for profile operation."); ++ goto out; ++ } ++ break; ++ + case GD_OP_MAX_OPVERSION: + ret = 0; + break; +@@ -252,6 +263,16 @@ gd_mgmt_v3_brick_op_fn (glusterd_op_t op, dict_t *dict, + } + break; + } ++ case GD_OP_PROFILE_VOLUME: ++ { ++ ret = gd_brick_op_phase(op, rsp_dict, dict, op_errstr); ++ if (ret) { ++ gf_log(this->name, GF_LOG_WARNING, "%s brickop failed", ++ gd_op_list[op]); ++ goto out; ++ } ++ break; ++ } + default: + break; + } +@@ -406,6 +427,17 @@ gd_mgmt_v3_commit_fn (glusterd_op_t op, dict_t *dict, + break; + + } ++ case GD_OP_PROFILE_VOLUME: ++ { ++ ret = glusterd_op_stats_volume(dict, op_errstr, rsp_dict); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ GD_MSG_COMMIT_OP_FAIL, "commit failed " ++ "volume profile operation."); ++ goto out; ++ } ++ break; ++ } + + default: + break; +@@ -847,6 +879,7 @@ glusterd_pre_validate_aggr_rsp_dict (glusterd_op_t op, + case GD_OP_DETACH_TIER_STATUS: + case GD_OP_TIER_START_STOP: + case GD_OP_REMOVE_TIER_BRICK: ++ case GD_OP_PROFILE_VOLUME: + break; + case GD_OP_MAX_OPVERSION: + break; +@@ -1039,6 +1072,16 @@ glusterd_mgmt_v3_pre_validate (glusterd_op_t op, dict_t *req_dict, + goto out; + } + ++ if (op == GD_OP_PROFILE_VOLUME) { ++ ret = glusterd_validate_quorum(this, op, req_dict, op_errstr); ++ if (ret) { ++ gf_msg (this->name, GF_LOG_ERROR, 0, ++ GD_MSG_SERVER_QUORUM_NOT_MET, "Server quorum " ++ "not met. Rejecting operation."); ++ goto out; ++ } ++ } ++ + /* Pre Validation on local node */ + ret = gd_mgmt_v3_pre_validate_fn (op, req_dict, op_errstr, + rsp_dict, op_errno); +@@ -1157,6 +1200,7 @@ glusterd_mgmt_v3_build_payload (dict_t **req, char **op_errstr, dict_t *dict, + case GD_OP_REPLACE_BRICK: + case GD_OP_RESET_BRICK: + case GD_OP_ADD_TIER_BRICK: ++ case GD_OP_PROFILE_VOLUME: + { + ret = dict_get_str (dict, "volname", &volname); + if (ret) { +@@ -1309,12 +1353,11 @@ out: + } + + int +-glusterd_mgmt_v3_brick_op (glusterd_op_t op, dict_t *req_dict, char **op_errstr, +- uint32_t txn_generation) ++glusterd_mgmt_v3_brick_op (glusterd_op_t op, dict_t *rsp_dict, dict_t *req_dict, ++ char **op_errstr, uint32_t txn_generation) + { + int32_t ret = -1; + int32_t peer_cnt = 0; +- dict_t *rsp_dict = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + struct syncargs args = {0}; + uuid_t peer_uuid = {0}; +@@ -1329,14 +1372,6 @@ glusterd_mgmt_v3_brick_op (glusterd_op_t op, dict_t *req_dict, char **op_errstr, + GF_ASSERT (req_dict); + GF_ASSERT (op_errstr); + +- rsp_dict = dict_new (); +- if (!rsp_dict) { +- gf_msg (this->name, GF_LOG_ERROR, 0, +- GD_MSG_DICT_CREATE_FAIL, +- "Failed to create response dictionary"); +- goto out; +- } +- + /* Perform brick op on local node */ + ret = gd_mgmt_v3_brick_op_fn (op, req_dict, op_errstr, + rsp_dict); +@@ -1361,11 +1396,8 @@ glusterd_mgmt_v3_brick_op (glusterd_op_t op, dict_t *req_dict, char **op_errstr, + goto out; + } + +- dict_unref (rsp_dict); +- rsp_dict = NULL; +- + /* Sending brick op req to other nodes in the cluster */ +- gd_syncargs_init (&args, NULL); ++ gd_syncargs_init (&args, rsp_dict); + synctask_barrier_init((&args)); + peer_cnt = 0; + +@@ -2108,6 +2140,180 @@ out: + } + + int32_t ++glusterd_mgmt_v3_initiate_profile_phases (rpcsvc_request_t *req, ++ glusterd_op_t op, dict_t *dict) ++{ ++ int32_t ret = -1; ++ int32_t op_ret = -1; ++ dict_t *req_dict = NULL; ++ dict_t *tmp_dict = NULL; ++ glusterd_conf_t *conf = NULL; ++ char *op_errstr = NULL; ++ xlator_t *this = NULL; ++ gf_boolean_t is_acquired = _gf_false; ++ uuid_t *originator_uuid = NULL; ++ uint32_t txn_generation = 0; ++ uint32_t op_errno = 0; ++ ++ this = THIS; ++ GF_ASSERT (this); ++ GF_ASSERT (req); ++ GF_ASSERT (dict); ++ conf = this->private; ++ GF_ASSERT (conf); ++ ++ /* Save the peer list generation */ ++ txn_generation = conf->generation; ++ cmm_smp_rmb (); ++ /* This read memory barrier makes sure that this assignment happens here ++ * only and is not reordered and optimized by either the compiler or the ++ * processor. ++ */ ++ ++ /* Save the MY_UUID as the originator_uuid. This originator_uuid ++ * will be used by is_origin_glusterd() to determine if a node ++ * is the originator node for a command. */ ++ originator_uuid = GF_CALLOC (1, sizeof(uuid_t), ++ gf_common_mt_uuid_t); ++ if (!originator_uuid) { ++ ret = -1; ++ goto out; ++ } ++ ++ gf_uuid_copy (*originator_uuid, MY_UUID); ++ ret = dict_set_bin (dict, "originator_uuid", ++ originator_uuid, sizeof (uuid_t)); ++ if (ret) { ++ gf_msg (this->name, GF_LOG_ERROR, 0, ++ GD_MSG_DICT_SET_FAILED, ++ "Failed to set originator_uuid."); ++ GF_FREE (originator_uuid); ++ goto out; ++ } ++ ++ /* Marking the operation as complete synctasked */ ++ ret = dict_set_int32 (dict, "is_synctasked", _gf_true); ++ if (ret) { ++ gf_msg (this->name, GF_LOG_ERROR, 0, ++ GD_MSG_DICT_SET_FAILED, ++ "Failed to set synctasked flag."); ++ goto out; ++ } ++ ++ /* Use a copy at local unlock as cli response will be sent before ++ * the unlock and the volname in the dict might be removed */ ++ tmp_dict = dict_new(); ++ if (!tmp_dict) { ++ gf_msg (this->name, GF_LOG_ERROR, 0, ++ GD_MSG_DICT_CREATE_FAIL, "Unable to create dict"); ++ goto out; ++ } ++ dict_copy (dict, tmp_dict); ++ ++ /* LOCKDOWN PHASE - Acquire mgmt_v3 locks */ ++ ret = glusterd_mgmt_v3_initiate_lockdown (op, dict, &op_errstr, ++ &op_errno, &is_acquired, ++ txn_generation); ++ if (ret) { ++ gf_msg (this->name, GF_LOG_ERROR, 0, ++ GD_MSG_MGMTV3_LOCKDOWN_FAIL, ++ "mgmt_v3 lockdown failed."); ++ goto out; ++ } ++ ++ /* BUILD PAYLOAD */ ++ ret = glusterd_mgmt_v3_build_payload (&req_dict, &op_errstr, dict, op); ++ if (ret) { ++ gf_msg (this->name, GF_LOG_ERROR, 0, ++ GD_MSG_MGMTV3_PAYLOAD_BUILD_FAIL, LOGSTR_BUILD_PAYLOAD, ++ gd_op_list[op]); ++ if (op_errstr == NULL) ++ gf_asprintf (&op_errstr, OPERRSTR_BUILD_PAYLOAD); ++ goto out; ++ } ++ ++ /* PRE-COMMIT VALIDATE PHASE */ ++ ret = glusterd_mgmt_v3_pre_validate (op, req_dict, &op_errstr, ++ &op_errno, txn_generation); ++ if (ret) { ++ gf_msg (this->name, GF_LOG_ERROR, 0, ++ GD_MSG_PRE_VALIDATION_FAIL, "Pre Validation Failed"); ++ goto out; ++ } ++ ++ /* BRICK-OPS */ ++ ret = glusterd_mgmt_v3_brick_op(op, dict, req_dict, &op_errstr, ++ txn_generation); ++ if (ret) { ++ gf_log(this->name, GF_LOG_ERROR, "Brick Op Failed"); ++ goto out; ++ } ++ ++ /* COMMIT OP PHASE */ ++ ret = glusterd_mgmt_v3_commit (op, dict, req_dict, &op_errstr, ++ &op_errno, txn_generation); ++ if (ret) { ++ gf_msg (this->name, GF_LOG_ERROR, 0, ++ GD_MSG_COMMIT_OP_FAIL, "Commit Op Failed"); ++ goto out; ++ } ++ ++ /* POST-COMMIT VALIDATE PHASE */ ++ /* As of now, post_validate is not trying to cleanup any failed ++ commands. So as of now, I am sending 0 (op_ret as 0). ++ */ ++ ret = glusterd_mgmt_v3_post_validate (op, 0, dict, req_dict, &op_errstr, ++ txn_generation); ++ if (ret) { ++ gf_msg (this->name, GF_LOG_ERROR, 0, ++ GD_MSG_POST_VALIDATION_FAIL, "Post Validation Failed"); ++ goto out; ++ } ++ ++ ret = 0; ++out: ++ op_ret = ret; ++ /* UNLOCK PHASE FOR PEERS*/ ++ (void) glusterd_mgmt_v3_release_peer_locks (op, dict, op_ret, ++ &op_errstr, is_acquired, ++ txn_generation); ++ ++ /* LOCAL VOLUME(S) UNLOCK */ ++ if (is_acquired) { ++ /* Trying to release multiple mgmt_v3 locks */ ++ ret = glusterd_multiple_mgmt_v3_unlock (tmp_dict, MY_UUID); ++ if (ret) { ++ gf_msg (this->name, GF_LOG_ERROR, 0, ++ GD_MSG_MGMTV3_UNLOCK_FAIL, ++ "Failed to release mgmt_v3 locks on localhost"); ++ op_ret = ret; ++ } ++ } ++ ++ if (op_ret && (op_errno == 0)) ++ op_errno = EG_INTRNL; ++ ++ if (op != GD_OP_MAX_OPVERSION) { ++ /* SEND CLI RESPONSE */ ++ glusterd_op_send_cli_response (op, op_ret, op_errno, req, ++ dict, op_errstr); ++ } ++ ++ if (req_dict) ++ dict_unref (req_dict); ++ ++ if (tmp_dict) ++ dict_unref (tmp_dict); ++ ++ if (op_errstr) { ++ GF_FREE (op_errstr); ++ op_errstr = NULL; ++ } ++ ++ return 0; ++} ++ ++int32_t + glusterd_mgmt_v3_initiate_all_phases (rpcsvc_request_t *req, glusterd_op_t op, + dict_t *dict) + { +@@ -2465,7 +2671,7 @@ glusterd_mgmt_v3_initiate_snap_phases (rpcsvc_request_t *req, glusterd_op_t op, + goto out; + } + +- ret = glusterd_mgmt_v3_brick_op (op, req_dict, &op_errstr, ++ ret = glusterd_mgmt_v3_brick_op (op, dict, req_dict, &op_errstr, + txn_generation); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, +@@ -2526,7 +2732,7 @@ unbarrier: + goto out; + } + +- ret = glusterd_mgmt_v3_brick_op (op, req_dict, &op_errstr, ++ ret = glusterd_mgmt_v3_brick_op (op, dict, req_dict, &op_errstr, + txn_generation); + + if (ret) { +diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt.h b/xlators/mgmt/glusterd/src/glusterd-mgmt.h +index 2215f17..eff070d 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-mgmt.h ++++ b/xlators/mgmt/glusterd/src/glusterd-mgmt.h +@@ -37,7 +37,11 @@ glusterd_mgmt_v3_initiate_all_phases (rpcsvc_request_t *req, glusterd_op_t op, + dict_t *dict); + + int32_t +-glusterd_mgmt_v3_initiate_snap_phases (rpcsvc_request_t *req, glusterd_op_t op, ++glusterd_mgmt_v3_initiate_profile_phases(rpcsvc_request_t *req, ++ glusterd_op_t op, dict_t *dict); ++ ++int32_t ++glusterd_mgmt_v3_initiate_snap_phases(rpcsvc_request_t *req, glusterd_op_t op, + dict_t *dict); + + int +diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +index 52a3db0..9f76ab3 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c ++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +@@ -2032,7 +2032,7 @@ glusterd_op_stage_status_volume (dict_t *dict, char **op_errstr) + return ret; + } + +-static int ++int + glusterd_op_stage_stats_volume (dict_t *dict, char **op_errstr) + { + int ret = -1; +@@ -3322,7 +3322,7 @@ glusterd_remove_profile_volume_options (glusterd_volinfo_t *volinfo) + dict_del (volinfo->dict, fd_stats_key); + } + +-static int ++int + glusterd_op_stats_volume (dict_t *dict, char **op_errstr, + dict_t *rsp_dict) + { +diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.h b/xlators/mgmt/glusterd/src/glusterd-op-sm.h +index f2aee9c..e64d368 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.h ++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.h +@@ -312,4 +312,10 @@ glusterd_dict_set_volid (dict_t *dict, char *volname, char **op_errstr); + + int32_t + glusterd_tier_op (xlator_t *this, void *data); ++ ++int ++glusterd_op_stats_volume (dict_t *dict, char **op_errstr, dict_t *rsp_dict); ++ ++int ++glusterd_op_stage_stats_volume (dict_t *dict, char **op_errstr); + #endif +-- +1.8.3.1 + diff --git a/SOURCES/0472-glusterd-introduce-a-new-op-version-for-rhgs-3.4.3.patch b/SOURCES/0472-glusterd-introduce-a-new-op-version-for-rhgs-3.4.3.patch new file mode 100644 index 0000000..2c54457 --- /dev/null +++ b/SOURCES/0472-glusterd-introduce-a-new-op-version-for-rhgs-3.4.3.patch @@ -0,0 +1,72 @@ +From ff97d22a02c02ad5edced2755a799a587b45cb79 Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde +Date: Mon, 17 Dec 2018 14:07:01 +0530 +Subject: [PATCH 472/493] glusterd: introduce a new op-version for rhgs-3.4.3 + +This patch introduces a new op-version 31305 for rhgs-3.4.3 and +sets the max op-version to 31305. + +For migrating profile commands (commit e68845ff7018e5d81d7979684b18e6eda449b088) +we used GD_OP_VERSION_6_0 in upstream. we are changing +it to GD_OP_VERSION_3_13_5 here. + +Label: DOWNSTREAM ONLY + +Change-Id: Ie3a05c70eb4e406889c468343f54e999b1218f19 +BUG: 1639476 +Signed-off-by: Sanju Rakonde +Reviewed-on: https://code.engineering.redhat.com/gerrit/158795 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + libglusterfs/src/globals.h | 5 ++--- + xlators/mgmt/glusterd/src/glusterd-handler.c | 4 ++-- + 2 files changed, 4 insertions(+), 5 deletions(-) + +diff --git a/libglusterfs/src/globals.h b/libglusterfs/src/globals.h +index d2b0964..343263c 100644 +--- a/libglusterfs/src/globals.h ++++ b/libglusterfs/src/globals.h +@@ -43,7 +43,7 @@ + */ + #define GD_OP_VERSION_MIN 1 /* MIN is the fresh start op-version, mostly + should not change */ +-#define GD_OP_VERSION_MAX GD_OP_VERSION_3_13_4 /* MAX VERSION is the maximum ++#define GD_OP_VERSION_MAX GD_OP_VERSION_3_13_5 /* MAX VERSION is the maximum + count in VME table, should + keep changing with + introduction of newer +@@ -111,12 +111,11 @@ + + #define GD_OP_VERSION_3_13_3 31303 /* Op-version for GlusterFS 3.13.3 */ + +-#define GD_OP_VERSION_6_0 60000 /* Op-version for GlusterFS 6.0 */ +- + /* Downstream only change */ + #define GD_OP_VERSION_3_11_2 31102 /* Op-version for RHGS 3.3.1-async */ + #define GD_OP_VERSION_3_13_3 31303 /* Op-version for RHGS-3.4-Batch Update-1*/ + #define GD_OP_VERSION_3_13_4 31304 /* Op-version for RHGS-3.4-Batch Update-2*/ ++#define GD_OP_VERSION_3_13_5 31305 /* Op-version for RHGS-3.4-Batch Update-3*/ + + #include "xlator.h" + +diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c +index 90eaa95..c71bf3c 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-handler.c ++++ b/xlators/mgmt/glusterd/src/glusterd-handler.c +@@ -3113,10 +3113,10 @@ __glusterd_handle_cli_profile_volume (rpcsvc_request_t *req) + goto out; + } + +- if (conf->op_version < GD_OP_VERSION_6_0) { ++ if (conf->op_version < GD_OP_VERSION_3_13_5) { + gf_msg_debug(this->name, 0, "The cluster is operating at " + "version less than %d. Falling back to op-sm " +- "framework.", GD_OP_VERSION_6_0); ++ "framework.", GD_OP_VERSION_3_13_5); + ret = glusterd_op_begin(req, cli_op, dict, err_str, sizeof(err_str)); + glusterd_friend_sm(); + glusterd_op_sm(); +-- +1.8.3.1 + diff --git a/SOURCES/0473-rpc-bump-up-server.event-threads.patch b/SOURCES/0473-rpc-bump-up-server.event-threads.patch new file mode 100644 index 0000000..23548ba --- /dev/null +++ b/SOURCES/0473-rpc-bump-up-server.event-threads.patch @@ -0,0 +1,64 @@ +From 63e8fb2b6a3846c3f3a0e1db6275a8c03dbbc6ff Mon Sep 17 00:00:00 2001 +From: Milind Changire +Date: Thu, 13 Dec 2018 10:00:45 +0530 +Subject: [PATCH 473/493] rpc: bump up server.event-threads + +Problem: +A single event-thread causes performance issues in the system. + +Solution: +Bump up event-threads to 2 to make the system more performant. +This helps in making the system more responsive and helps avoid the +ping-timer-expiry problem as well. However, setting the event-threads +to 2 is not the only thing required to avoid ping-timer-expiry issues. + +NOTE: +NFS xlator option nfs.event-threads does not yet exist here. + +mainline: +> Change-Id: Idb0fd49e078db3bd5085dd083b0cdc77b59ddb00 +> fixes: bz#1653277 +> Signed-off-by: Milind Changire +> Reviewed-on: https://review.gluster.org/c/glusterfs/+/21719 + +Change-Id: Idb0fd49e078db3bd5085dd083b0cdc77b59ddb00 +BUG: 1652537 +Signed-off-by: Milind Changire +Reviewed-on: https://code.engineering.redhat.com/gerrit/158482 +Tested-by: RHGS Build Bot +Reviewed-by: Raghavendra Gowdappa +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/glusterfs.h | 2 +- + xlators/protocol/server/src/server.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h +index 3e2f426..d06d8cf 100644 +--- a/libglusterfs/src/glusterfs.h ++++ b/libglusterfs/src/glusterfs.h +@@ -249,7 +249,7 @@ enum gf_internal_fop_indicator { + + #define GLUSTERFS_RPC_REPLY_SIZE 24 + +-#define STARTING_EVENT_THREADS 1 ++#define STARTING_EVENT_THREADS 2 + + #define DEFAULT_VAR_RUN_DIRECTORY DATADIR "/run/gluster" + #define DEFAULT_GLUSTERFSD_MISC_DIRETORY DATADIR "/lib/misc/glusterfsd" +diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c +index 6f510ea..1046152 100644 +--- a/xlators/protocol/server/src/server.c ++++ b/xlators/protocol/server/src/server.c +@@ -1932,7 +1932,7 @@ struct volume_options options[] = { + .type = GF_OPTION_TYPE_INT, + .min = 1, + .max = 1024, +- .default_value = "1", ++ .default_value = "2", + .description = "Specifies the number of event threads to execute " + "in parallel. Larger values would help process" + " responses faster, depending on available processing" +-- +1.8.3.1 + diff --git a/SOURCES/0474-afr-open_ftruncate_cbk-should-read-fd-from-local-con.patch b/SOURCES/0474-afr-open_ftruncate_cbk-should-read-fd-from-local-con.patch new file mode 100644 index 0000000..ffd0177 --- /dev/null +++ b/SOURCES/0474-afr-open_ftruncate_cbk-should-read-fd-from-local-con.patch @@ -0,0 +1,45 @@ +From cdedd41ab825bfe59e8d1739fdea625a51f659f9 Mon Sep 17 00:00:00 2001 +From: Soumya Koduri +Date: Fri, 9 Nov 2018 02:29:52 -0500 +Subject: [PATCH 474/493] afr: open_ftruncate_cbk should read fd from + local->cont.open struct + +afr_open stores the fd as part of its local->cont.open struct +but when it calls ftruncate (if open flags contain O_TRUNC), the +corresponding cbk function (afr_ open_ftruncate_cbk) is +incorrectly referencing uninitialized local->fd. This patch fixes +the same. + +Upstream reference: +Change-Id: Icbdedbd1b8cfea11d8f41b6e5c4cb4b44d989aba +> updates: bz#1648687 +> review-url: https://review.gluster.org/#/c/glusterfs/+/21617/ + +BUG: 1655578 +updates: bz#1655578 +Signed-off-by: Soumya Koduri +Change-Id: I9c26eadd811fdd32630227f3130dec28e4b6972b +Reviewed-on: https://code.engineering.redhat.com/gerrit/158799 +Tested-by: RHGS Build Bot +Reviewed-by: Karthik Subrahmanya +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/cluster/afr/src/afr-open.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xlators/cluster/afr/src/afr-open.c b/xlators/cluster/afr/src/afr-open.c +index 6c625cc..d820462 100644 +--- a/xlators/cluster/afr/src/afr-open.c ++++ b/xlators/cluster/afr/src/afr-open.c +@@ -62,7 +62,7 @@ afr_open_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + afr_local_t * local = frame->local; + + AFR_STACK_UNWIND (open, frame, local->op_ret, local->op_errno, +- local->fd, xdata); ++ local->cont.open.fd, xdata); + return 0; + } + +-- +1.8.3.1 + diff --git a/SOURCES/0475-glusterd-perform-store-operation-in-cleanup-lock.patch b/SOURCES/0475-glusterd-perform-store-operation-in-cleanup-lock.patch new file mode 100644 index 0000000..f24e47c --- /dev/null +++ b/SOURCES/0475-glusterd-perform-store-operation-in-cleanup-lock.patch @@ -0,0 +1,175 @@ +From db15e8fe12b7148b2da975d915573cb24c4ee1c9 Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee +Date: Thu, 22 Nov 2018 09:58:52 +0530 +Subject: [PATCH 475/493] glusterd: perform store operation in cleanup lock + +All glusterd store operation and cleanup thread should work under a +critical section to avoid any partial store write. + +> Change-Id: I4f12e738f597a1f925c87ea2f42565dcf9ecdb9d +> Fixes: bz#1652430 +> Signed-off-by: Atin Mukherjee + +upstream patch: https://review.gluster.org/#/c/glusterfs/+/21702/ + +Change-Id: I4f12e738f597a1f925c87ea2f42565dcf9ecdb9d +BUG: 1654161 +Signed-off-by: Sanju Rakonde +Reviewed-on: https://code.engineering.redhat.com/gerrit/158804 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + glusterfsd/src/glusterfsd.c | 73 ++++++++++++++++-------------- + libglusterfs/src/glusterfs.h | 1 + + xlators/mgmt/glusterd/src/glusterd-store.c | 10 +++- + 3 files changed, 49 insertions(+), 35 deletions(-) + +diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c +index 03bca24..57effbd 100644 +--- a/glusterfsd/src/glusterfsd.c ++++ b/glusterfsd/src/glusterfsd.c +@@ -1395,43 +1395,46 @@ cleanup_and_exit (int signum) + if (ctx->cleanup_started) + return; + +- ctx->cleanup_started = 1; ++ pthread_mutex_lock(&ctx->cleanup_lock); ++ { ++ ctx->cleanup_started = 1; + +- /* signout should be sent to all the bricks in case brick mux is enabled +- * and multiple brick instances are attached to this process +- */ +- if (ctx->active) { +- top = ctx->active->first; +- for (trav_p = &top->children; *trav_p; +- trav_p = &(*trav_p)->next) { +- victim = (*trav_p)->xlator; +- glusterfs_mgmt_pmap_signout (ctx, victim->name); ++ /* signout should be sent to all the bricks in case brick mux ++ * is enabled and multiple brick instances are attached to this ++ * process ++ */ ++ if (ctx->active) { ++ top = ctx->active->first; ++ for (trav_p = &top->children; *trav_p; ++ trav_p = &(*trav_p)->next) { ++ victim = (*trav_p)->xlator; ++ glusterfs_mgmt_pmap_signout (ctx, victim->name); ++ } ++ } else { ++ glusterfs_mgmt_pmap_signout (ctx, NULL); + } +- } else { +- glusterfs_mgmt_pmap_signout (ctx, NULL); +- } + +- /* below part is a racy code where the rpcsvc object is freed. +- * But in another thread (epoll thread), upon poll error in the +- * socket the transports are cleaned up where again rpcsvc object +- * is accessed (which is already freed by the below function). +- * Since the process is about to be killed dont execute the function +- * below. +- */ +- /* if (ctx->listener) { */ +- /* (void) glusterfs_listener_stop (ctx); */ +- /* } */ ++ /* below part is a racy code where the rpcsvc object is freed. ++ * But in another thread (epoll thread), upon poll error in the ++ * socket the transports are cleaned up where again rpcsvc object ++ * is accessed (which is already freed by the below function). ++ * Since the process is about to be killed dont execute the ++ * function below. ++ */ ++ /* if (ctx->listener) { */ ++ /* (void) glusterfs_listener_stop (ctx); */ ++ /* } */ + +- /* Call fini() of FUSE xlator first: +- * so there are no more requests coming and +- * 'umount' of mount point is done properly */ +- trav = ctx->master; +- if (trav && trav->fini) { +- THIS = trav; +- trav->fini (trav); +- } ++ /* Call fini() of FUSE xlator first: ++ * so there are no more requests coming and ++ * 'umount' of mount point is done properly */ ++ trav = ctx->master; ++ if (trav && trav->fini) { ++ THIS = trav; ++ trav->fini (trav); ++ } + +- glusterfs_pidfile_cleanup (ctx); ++ glusterfs_pidfile_cleanup (ctx); + + #if 0 + /* TODO: Properly do cleanup_and_exit(), with synchronization */ +@@ -1442,8 +1445,9 @@ cleanup_and_exit (int signum) + } + #endif + +- trav = NULL; +- ++ trav = NULL; ++ } ++ pthread_mutex_unlock(&ctx->cleanup_lock); + /* NOTE: Only the least significant 8 bits i.e (signum & 255) + will be available to parent process on calling exit() */ + exit(abs(signum)); +@@ -1598,6 +1602,7 @@ glusterfs_ctx_defaults_init (glusterfs_ctx_t *ctx) + goto out; + + pthread_mutex_init (&ctx->notify_lock, NULL); ++ pthread_mutex_init(&ctx->cleanup_lock, NULL); + pthread_cond_init (&ctx->notify_cond, NULL); + + ctx->clienttable = gf_clienttable_alloc(); +diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h +index d06d8cf..c12e94e 100644 +--- a/libglusterfs/src/glusterfs.h ++++ b/libglusterfs/src/glusterfs.h +@@ -574,6 +574,7 @@ struct _glusterfs_ctx { + char btbuf[GF_BACKTRACE_LEN]; + + pthread_mutex_t notify_lock; ++ pthread_mutex_t cleanup_lock; + pthread_cond_t notify_cond; + int notifying; + +diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c +index f276fef..b3c4d9a 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-store.c ++++ b/xlators/mgmt/glusterd/src/glusterd-store.c +@@ -1792,10 +1792,17 @@ out: + int32_t + glusterd_store_volinfo (glusterd_volinfo_t *volinfo, glusterd_volinfo_ver_ac_t ac) + { +- int32_t ret = -1; ++ int32_t ret = -1; ++ glusterfs_ctx_t *ctx = NULL; ++ xlator_t *this = NULL; + ++ this = THIS; ++ GF_ASSERT(this); ++ ctx = this->ctx; ++ GF_ASSERT(ctx); + GF_ASSERT (volinfo); + ++ pthread_mutex_lock(&ctx->cleanup_lock); + pthread_mutex_lock(&volinfo->store_volinfo_lock); + { + glusterd_perform_volinfo_version_action(volinfo, ac); +@@ -1837,6 +1844,7 @@ glusterd_store_volinfo (glusterd_volinfo_t *volinfo, glusterd_volinfo_ver_ac_t a + } + unlock: + pthread_mutex_unlock(&volinfo->store_volinfo_lock); ++ pthread_mutex_unlock(&ctx->cleanup_lock); + if (ret) + glusterd_store_volume_cleanup_tmp(volinfo); + +-- +1.8.3.1 + diff --git a/SOURCES/0476-afr-add-checks-for-allowing-lookups.patch b/SOURCES/0476-afr-add-checks-for-allowing-lookups.patch new file mode 100644 index 0000000..6adfb0f --- /dev/null +++ b/SOURCES/0476-afr-add-checks-for-allowing-lookups.patch @@ -0,0 +1,610 @@ +From e3f6fc1ccff95145b94aea9405cd136ada9000bc Mon Sep 17 00:00:00 2001 +From: Ravishankar N +Date: Wed, 16 Aug 2017 18:01:17 +0530 +Subject: [PATCH 476/493] afr: add checks for allowing lookups + +Patch in upstream master: https://review.gluster.org/#/c/glusterfs/+/17673/ + +Problem: +In an arbiter volume, lookup was being served from one of the sink +bricks (source brick was down). shard uses the iatt values from lookup cbk +to calculate the size and block count, which in this case were incorrect +values. shard_local_t->last_block was thus initialised to -1, resulting +in an infinite while loop in shard_common_resolve_shards(). + +Fix: +Use client quorum logic to allow or fail the lookups from afr if there +are no readable subvolumes. So in replica-3 or arbiter vols, if there is +no good copy or if quorum is not met, fail lookup with ENOTCONN. + +With this fix, we are also removing support for quorum-reads xlator +option. So if quorum is not met, neither read nor write txns are allowed +and we fail the fop with ENOTCONN. + +Change-Id: Ic65c00c24f77ece007328b421494eee62a505fa0 +BUG: 1362129 +Signed-off-by: Ravishankar N +Reviewed-on: https://code.engineering.redhat.com/gerrit/158650 +Tested-by: RHGS Build Bot +Reviewed-by: Karthik Subrahmanya +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + tests/basic/afr/quorum.t | 23 ---- + tests/bugs/replicate/bug-977797.t | 2 + + xlators/cluster/afr/src/afr-common.c | 244 +++++++++++++++++++++------------ + xlators/cluster/afr/src/afr-read-txn.c | 3 +- + xlators/cluster/afr/src/afr.c | 7 +- + xlators/cluster/afr/src/afr.h | 1 - + 6 files changed, 164 insertions(+), 116 deletions(-) + +diff --git a/tests/basic/afr/quorum.t b/tests/basic/afr/quorum.t +index 252e254..58116ba 100644 +--- a/tests/basic/afr/quorum.t ++++ b/tests/basic/afr/quorum.t +@@ -31,11 +31,7 @@ TEST $CLI volume set $V0 cluster.quorum-count 2 + TEST test_write + TEST kill_brick $V0 $H0 $B0/${V0}1 + TEST ! test_write +-EXPECT "abc" cat $M0/b +-TEST $CLI volume set $V0 cluster.quorum-reads on +-EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "1" mount_get_option_value $M0 $V0-replicate-0 quorum-reads + TEST ! cat $M0/b +-TEST $CLI volume reset $V0 cluster.quorum-reads + + TEST $CLI volume set $V0 cluster.quorum-type auto + EXPECT auto volume_option $V0 cluster.quorum-type +@@ -44,11 +40,7 @@ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 + TEST test_write + TEST kill_brick $V0 $H0 $B0/${V0}1 + TEST ! test_write +-EXPECT "abc" cat $M0/b +-TEST $CLI volume set $V0 cluster.quorum-reads on +-EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "1" mount_get_option_value $M0 $V0-replicate-0 quorum-reads + TEST ! cat $M0/b +-TEST $CLI volume reset $V0 cluster.quorum-reads + + TEST $CLI volume set $V0 cluster.quorum-type none + EXPECT none volume_option $V0 cluster.quorum-type +@@ -57,11 +49,6 @@ TEST test_write + TEST $CLI volume reset $V0 cluster.quorum-type + TEST test_write + EXPECT "abc" cat $M0/b +-TEST $CLI volume set $V0 cluster.quorum-reads on +-EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "1" mount_get_option_value $M0 $V0-replicate-0 quorum-reads +-EXPECT "abc" cat $M0/b +-TEST $CLI volume reset $V0 cluster.quorum-reads +- + + cleanup; + TEST glusterd; +@@ -86,24 +73,14 @@ TEST $CLI volume set $V0 cluster.quorum-count 3 + TEST test_write + TEST kill_brick $V0 $H0 $B0/${V0}1 + TEST ! test_write +-EXPECT "abc" cat $M0/b +-TEST $CLI volume set $V0 cluster.quorum-reads on +-EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "1" mount_get_option_value $M0 $V0-replicate-0 quorum-reads + TEST ! cat $M0/b +-TEST $CLI volume reset $V0 cluster.quorum-reads +- + + TEST $CLI volume set $V0 cluster.quorum-type auto + EXPECT auto volume_option $V0 cluster.quorum-type + TEST test_write + TEST kill_brick $V0 $H0 $B0/${V0}3 + TEST ! test_write +-EXPECT "abc" cat $M0/b +-TEST $CLI volume set $V0 cluster.quorum-reads on +-EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "1" mount_get_option_value $M0 $V0-replicate-0 quorum-reads + TEST ! cat $M0/b +-TEST $CLI volume reset $V0 cluster.quorum-reads +- + + TEST $CLI volume set $V0 cluster.quorum-type none + EXPECT none volume_option $V0 cluster.quorum-type +diff --git a/tests/bugs/replicate/bug-977797.t b/tests/bugs/replicate/bug-977797.t +index ea9a98a..fee8205 100755 +--- a/tests/bugs/replicate/bug-977797.t ++++ b/tests/bugs/replicate/bug-977797.t +@@ -53,6 +53,8 @@ TEST chmod 757 $M0/a/file + TEST $CLI volume start $V0 force + EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1; + ++#Trigger entry heal of $M0/a ++getfattr -n user.nosuchattr $M0/a + dd if=$M0/a/file of=/dev/null bs=1024k + #read fails, but heal is triggered. + TEST [ $? -ne 0 ] +diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c +index 10d9620..231de9d 100644 +--- a/xlators/cluster/afr/src/afr-common.c ++++ b/xlators/cluster/afr/src/afr-common.c +@@ -47,9 +47,7 @@ + int32_t + afr_quorum_errno (afr_private_t *priv) + { +- if (priv->quorum_reads) +- return ENOTCONN; +- return EROFS; ++ return ENOTCONN; + } + + int +@@ -1154,8 +1152,6 @@ afr_replies_interpret (call_frame_t *frame, xlator_t *this, inode_t *inode, + return ret; + } + +- +- + int + afr_refresh_selfheal_done (int ret, call_frame_t *heal, void *opaque) + { +@@ -1726,6 +1722,29 @@ afr_inode_read_subvol_type_get (inode_t *inode, xlator_t *this, + return ret; + } + ++void ++afr_readables_intersect_get (inode_t *inode, xlator_t *this, int *event, ++ unsigned char *intersection) ++{ ++ afr_private_t *priv = NULL; ++ unsigned char *data_readable = NULL; ++ unsigned char *metadata_readable = NULL; ++ unsigned char *intersect = NULL; ++ ++ priv = this->private; ++ data_readable = alloca0 (priv->child_count); ++ metadata_readable = alloca0 (priv->child_count); ++ intersect = alloca0 (priv->child_count); ++ ++ afr_inode_read_subvol_get (inode, this, data_readable, ++ metadata_readable, event); ++ ++ AFR_INTERSECT (intersect, data_readable, metadata_readable, ++ priv->child_count); ++ if (intersection) ++ memcpy (intersection, intersect, ++ sizeof (*intersection) * priv->child_count); ++} + + int + afr_read_subvol_get (inode_t *inode, xlator_t *this, int *subvol_p, +@@ -1734,8 +1753,6 @@ afr_read_subvol_get (inode_t *inode, xlator_t *this, int *subvol_p, + afr_read_subvol_args_t *args) + { + afr_private_t *priv = NULL; +- unsigned char *data_readable = NULL; +- unsigned char *metadata_readable = NULL; + unsigned char *readable = NULL; + unsigned char *intersection = NULL; + int subvol = -1; +@@ -1744,17 +1761,11 @@ afr_read_subvol_get (inode_t *inode, xlator_t *this, int *subvol_p, + priv = this->private; + + readable = alloca0 (priv->child_count); +- data_readable = alloca0 (priv->child_count); +- metadata_readable = alloca0 (priv->child_count); + intersection = alloca0 (priv->child_count); + + afr_inode_read_subvol_type_get (inode, this, readable, &event, type); + +- afr_inode_read_subvol_get (inode, this, data_readable, metadata_readable, +- &event); +- +- AFR_INTERSECT (intersection, data_readable, metadata_readable, +- priv->child_count); ++ afr_readables_intersect_get (inode, this, &event, intersection); + + if (AFR_COUNT (intersection, priv->child_count) > 0) + subvol = afr_read_subvol_select_by_policy (inode, this, +@@ -2188,18 +2199,28 @@ afr_get_parent_read_subvol (xlator_t *this, inode_t *parent, + + int + afr_read_subvol_decide (inode_t *inode, xlator_t *this, +- afr_read_subvol_args_t *args) ++ afr_read_subvol_args_t *args, unsigned char *readable) + { +- int data_subvol = -1; +- int mdata_subvol = -1; ++ int event = 0; ++ afr_private_t *priv = NULL; ++ unsigned char *intersection = NULL; ++ ++ priv = this->private; ++ intersection = alloca0 (priv->child_count); ++ ++ afr_readables_intersect_get (inode, this, &event, intersection); + +- data_subvol = afr_data_subvol_get (inode, this, NULL, NULL, NULL, args); +- mdata_subvol = afr_metadata_subvol_get (inode, this, +- NULL, NULL, NULL, args); +- if (data_subvol == -1 || mdata_subvol == -1) ++ if (AFR_COUNT (intersection, priv->child_count) <= 0) { ++ /* TODO: If we have one brick with valid data_readable and ++ * another with metadata_readable, try to send an iatt with ++ * valid bits from both.*/ + return -1; ++ } + +- return data_subvol; ++ memcpy (readable, intersection, sizeof (*readable) * priv->child_count); ++ ++ return afr_read_subvol_select_by_policy (inode, this, intersection, ++ args); + } + + static inline int +@@ -2216,7 +2237,49 @@ afr_first_up_child (call_frame_t *frame, xlator_t *this) + if (local->replies[i].valid && + local->replies[i].op_ret == 0) + return i; +- return 0; ++ return -1; ++} ++ ++static void ++afr_attempt_readsubvol_set (call_frame_t *frame, xlator_t *this, ++ unsigned char *success_replies, ++ unsigned char *data_readable, int *read_subvol) ++{ ++ afr_private_t *priv = NULL; ++ afr_local_t *local = NULL; ++ int spb_choice = -1; ++ int child_count = -1; ++ ++ if (*read_subvol != -1) ++ return; ++ ++ priv = this->private; ++ local = frame->local; ++ child_count = priv->child_count; ++ ++ afr_inode_split_brain_choice_get (local->inode, this, ++ &spb_choice); ++ if ((spb_choice >= 0) && ++ (AFR_COUNT(success_replies, child_count) == child_count)) { ++ *read_subvol = spb_choice; ++ } else if (!priv->quorum_count) { ++ *read_subvol = afr_first_up_child (frame, this); ++ } else if (priv->quorum_count && ++ afr_has_quorum (data_readable, this)) { ++ /* read_subvol is guaranteed to be valid if we hit this path. */ ++ *read_subvol = afr_first_up_child (frame, this); ++ } else { ++ /* If quorum is enabled and we do not have a ++ readable yet, it means all good copies are down. ++ */ ++ local->op_ret = -1; ++ local->op_errno = ENOTCONN; ++ gf_msg (this->name, GF_LOG_WARNING, 0, ++ AFR_MSG_READ_SUBVOL_ERROR, "no read " ++ "subvols for %s", local->loc.path); ++ } ++ if (*read_subvol >= 0) ++ dict_del (local->replies[*read_subvol].xdata, GF_CONTENT_KEY); + } + + static void +@@ -2230,13 +2293,13 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this) + int par_read_subvol = 0; + int ret = -1; + unsigned char *readable = NULL; ++ unsigned char *success_replies = NULL; + int event = 0; + struct afr_reply *replies = NULL; + uuid_t read_gfid = {0, }; + gf_boolean_t locked_entry = _gf_false; + gf_boolean_t can_interpret = _gf_true; + inode_t *parent = NULL; +- int spb_choice = -1; + ia_type_t ia_type = IA_INVAL; + afr_read_subvol_args_t args = {0,}; + char *gfid_heal_msg = NULL; +@@ -2250,11 +2313,12 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this) + this); + + readable = alloca0 (priv->child_count); ++ success_replies = alloca0 (priv->child_count); + + afr_inode_read_subvol_get (parent, this, readable, NULL, &event); ++ par_read_subvol = afr_get_parent_read_subvol (this, parent, replies, ++ readable); + +- afr_inode_split_brain_choice_get (local->inode, this, +- &spb_choice); + /* First, check if we have a gfid-change from somewhere, + If so, propagate that so that a fresh lookup can be + issued +@@ -2262,13 +2326,17 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this) + if (local->cont.lookup.needs_fresh_lookup) { + local->op_ret = -1; + local->op_errno = ESTALE; +- goto unwind; ++ goto error; + } + + op_errno = afr_final_errno (frame->local, this->private); + local->op_errno = op_errno; + + read_subvol = -1; ++ for (i = 0; i < priv->child_count; i++) ++ if (replies[i].valid && replies[i].op_ret == 0) ++ success_replies[i] = 1; ++ + for (i = 0; i < priv->child_count; i++) { + if (!replies[i].valid) + continue; +@@ -2277,9 +2345,9 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this) + replies[i].op_errno == ENOENT) { + /* Second, check entry is still + "underway" in creation */ +- local->op_ret = -1; +- local->op_errno = ENOENT; +- goto unwind; ++ local->op_ret = -1; ++ local->op_errno = ENOENT; ++ goto error; + } + + if (replies[i].op_ret == -1) +@@ -2293,8 +2361,8 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this) + } + } + +- if (read_subvol == -1) +- goto unwind; ++ if (read_subvol == -1) ++ goto error; + /* We now have a read_subvol, which is readable[] (if there + were any). Next we look for GFID mismatches. We don't + consider a GFID mismatch as an error if read_subvol is +@@ -2318,58 +2386,61 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this) + if (readable[read_subvol] && !readable[i]) + continue; + ++ /* If we were called from glfsheal and there is still a gfid ++ * mismatch, succeed the lookup and let glfsheal print the ++ * response via gfid-heal-msg.*/ ++ if (!dict_get_str (local->xattr_req, "gfid-heal-msg", ++ &gfid_heal_msg)) ++ goto cant_interpret; ++ + /* LOG ERROR */ + local->op_ret = -1; + local->op_errno = EIO; +- goto unwind; ++ goto error; + } + + /* Forth, for the finalized GFID, pick the best subvolume + to return stats from. + */ ++ read_subvol = -1; ++ memset (readable, 0, sizeof (*readable) * priv->child_count); + if (can_interpret) { ++ if (!afr_has_quorum (success_replies, this)) ++ goto cant_interpret; + /* It is safe to call afr_replies_interpret() because we have + a response from all the UP subvolumes and all of them resolved + to the same GFID + */ + gf_uuid_copy (args.gfid, read_gfid); + args.ia_type = ia_type; +- if (afr_replies_interpret (frame, this, local->inode, NULL)) { +- read_subvol = afr_read_subvol_decide (local->inode, +- this, &args); ++ ret = afr_replies_interpret (frame, this, local->inode, NULL); ++ read_subvol = afr_read_subvol_decide (local->inode, this, &args, ++ readable); ++ if (read_subvol == -1) ++ goto cant_interpret; ++ if (ret) { + afr_inode_event_gen_reset (local->inode, this); +- goto cant_interpret; +- } else { +- read_subvol = afr_data_subvol_get (local->inode, this, +- NULL, NULL, NULL, &args); +- } ++ dict_del (local->replies[read_subvol].xdata, ++ GF_CONTENT_KEY); ++ } + } else { + cant_interpret: ++ afr_attempt_readsubvol_set (frame, this, success_replies, ++ readable, &read_subvol); + if (read_subvol == -1) { +- if (spb_choice >= 0) +- read_subvol = spb_choice; +- else +- read_subvol = afr_first_up_child (frame, this); ++ goto error; + } +- dict_del (replies[read_subvol].xdata, GF_CONTENT_KEY); + } + + afr_handle_quota_size (frame, this); + +-unwind: + afr_set_need_heal (this, local); +- if (read_subvol == -1) { +- if (spb_choice >= 0) +- read_subvol = spb_choice; +- else +- read_subvol = afr_first_up_child (frame, this); +- +- } +- par_read_subvol = afr_get_parent_read_subvol (this, parent, replies, +- readable); + if (AFR_IS_ARBITER_BRICK (priv, read_subvol) && local->op_ret == 0) { +- local->op_ret = -1; +- local->op_errno = ENOTCONN; ++ local->op_ret = -1; ++ local->op_errno = ENOTCONN; ++ gf_msg_debug(this->name, 0, "Arbiter cannot be a read subvol " ++ "for %s", local->loc.path); ++ goto error; + } + + ret = dict_get_str (local->xattr_req, "gfid-heal-msg", &gfid_heal_msg); +@@ -2389,6 +2460,11 @@ unwind: + local->inode, &local->replies[read_subvol].poststat, + local->replies[read_subvol].xdata, + &local->replies[par_read_subvol].postparent); ++ return; ++ ++error: ++ AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno, NULL, ++ NULL, NULL, NULL); + } + + /* +@@ -2904,55 +2980,54 @@ afr_discover_done (call_frame_t *frame, xlator_t *this) + afr_local_t *local = NULL; + int i = -1; + int op_errno = 0; +- int spb_choice = -1; + int read_subvol = -1; ++ unsigned char *data_readable = NULL; ++ unsigned char *success_replies = NULL; + + priv = this->private; + local = frame->local; +- +- afr_inode_split_brain_choice_get (local->inode, this, +- &spb_choice); ++ data_readable = alloca0 (priv->child_count); ++ success_replies = alloca0 (priv->child_count); + + for (i = 0; i < priv->child_count; i++) { + if (!local->replies[i].valid) + continue; +- if (local->replies[i].op_ret == 0) ++ if (local->replies[i].op_ret == 0) { ++ success_replies[i] = 1; + local->op_ret = 0; ++ } + } + + op_errno = afr_final_errno (frame->local, this->private); + + if (local->op_ret < 0) { +- local->op_errno = op_errno; +- local->op_ret = -1; +- goto unwind; ++ AFR_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, ++ NULL, NULL); ++ return; + } + +- afr_replies_interpret (frame, this, local->inode, NULL); ++ if (!afr_has_quorum (success_replies, this)) ++ goto unwind; + +- read_subvol = afr_read_subvol_decide (local->inode, this, NULL); +- if (read_subvol == -1) { +- gf_msg (this->name, GF_LOG_WARNING, 0, +- AFR_MSG_READ_SUBVOL_ERROR, "no read subvols for %s", +- local->loc.path); ++ afr_replies_interpret (frame, this, local->inode, NULL); + +- if (spb_choice >= 0) { +- read_subvol = spb_choice; +- } else { +- read_subvol = afr_first_up_child (frame, this); +- } +- } ++ read_subvol = afr_read_subvol_decide (local->inode, this, NULL, ++ data_readable); + + unwind: ++ afr_attempt_readsubvol_set (frame, this, success_replies, data_readable, ++ &read_subvol); + if (read_subvol == -1) { +- if (spb_choice >= 0) +- read_subvol = spb_choice; +- else +- read_subvol = afr_first_up_child (frame, this); ++ AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno, ++ NULL, NULL, NULL, NULL); ++ return; + } ++ + if (AFR_IS_ARBITER_BRICK (priv, read_subvol) && local->op_ret == 0) { +- local->op_ret = -1; +- local->op_errno = ENOTCONN; ++ local->op_ret = -1; ++ local->op_errno = ENOTCONN; ++ gf_msg_debug (this->name, 0, "Arbiter cannot be a read subvol " ++ "for %s", local->loc.path); + } + + AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno, +@@ -4646,7 +4721,6 @@ afr_priv_dump (xlator_t *this) + gf_proc_dump_write("read_child", "%d", priv->read_child); + gf_proc_dump_write("favorite_child", "%d", priv->favorite_child); + gf_proc_dump_write("wait_count", "%u", priv->wait_count); +- gf_proc_dump_write("quorum-reads", "%d", priv->quorum_reads); + gf_proc_dump_write("heal-wait-queue-length", "%d", + priv->heal_wait_qlen); + gf_proc_dump_write("heal-waiters", "%d", priv->heal_waiters); +diff --git a/xlators/cluster/afr/src/afr-read-txn.c b/xlators/cluster/afr/src/afr-read-txn.c +index 50e8040..f6c491b 100644 +--- a/xlators/cluster/afr/src/afr-read-txn.c ++++ b/xlators/cluster/afr/src/afr-read-txn.c +@@ -193,8 +193,7 @@ afr_read_txn (call_frame_t *frame, xlator_t *this, inode_t *inode, + local->inode = inode_ref (inode); + local->is_read_txn = _gf_true; + +- if (priv->quorum_reads && +- priv->quorum_count && !afr_has_quorum (priv->child_up, this)) { ++ if (priv->quorum_count && !afr_has_quorum (local->child_up, this)) { + local->op_ret = -1; + local->op_errno = ENOTCONN; + read_subvol = -1; +diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c +index 0122b7f..1b738c0 100644 +--- a/xlators/cluster/afr/src/afr.c ++++ b/xlators/cluster/afr/src/afr.c +@@ -267,8 +267,6 @@ reconfigure (xlator_t *this, dict_t *options) + GF_OPTION_RECONF ("heal-timeout", priv->shd.timeout, options, + int32, out); + +- GF_OPTION_RECONF ("quorum-reads", priv->quorum_reads, options, +- bool, out); + GF_OPTION_RECONF ("consistent-metadata", priv->consistent_metadata, + options, bool, out); + +@@ -531,7 +529,6 @@ init (xlator_t *this) + GF_OPTION_INIT ("iam-self-heal-daemon", priv->shd.iamshd, bool, out); + GF_OPTION_INIT ("heal-timeout", priv->shd.timeout, int32, out); + +- GF_OPTION_INIT ("quorum-reads", priv->quorum_reads, bool, out); + GF_OPTION_INIT ("consistent-metadata", priv->consistent_metadata, bool, + out); + GF_OPTION_INIT ("consistent-io", priv->consistent_io, bool, out); +@@ -965,8 +962,8 @@ struct volume_options options[] = { + { .key = {"quorum-reads"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "no", +- .description = "If quorum-reads is \"true\" only allow reads if " +- "quorum is met when quorum is enabled.", ++ .description = "This option has been removed. Reads are not allowed " ++ "if quorum is not met.", + }, + { .key = {"node-uuid"}, + .type = GF_OPTION_TYPE_STR, +diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h +index af9dbc8..7010e9b 100644 +--- a/xlators/cluster/afr/src/afr.h ++++ b/xlators/cluster/afr/src/afr.h +@@ -131,7 +131,6 @@ typedef struct _afr_private { + gf_boolean_t pre_op_compat; /* on/off */ + uint32_t post_op_delay_secs; + unsigned int quorum_count; +- gf_boolean_t quorum_reads; + + char vol_uuid[UUID_SIZE + 1]; + int32_t *last_event; +-- +1.8.3.1 + diff --git a/SOURCES/0477-glusterd-perform-rcu_read_lock-unlock-under-cleanup_.patch b/SOURCES/0477-glusterd-perform-rcu_read_lock-unlock-under-cleanup_.patch new file mode 100644 index 0000000..a8c39ec --- /dev/null +++ b/SOURCES/0477-glusterd-perform-rcu_read_lock-unlock-under-cleanup_.patch @@ -0,0 +1,1808 @@ +From c0d88596bda4eb5c7e942e621a7d38c7ae6d737a Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde +Date: Wed, 28 Nov 2018 16:13:58 +0530 +Subject: [PATCH 477/493] glusterd: perform rcu_read_lock/unlock() under + cleanup_lock mutex + +Problem: glusterd should not try to acquire locks on any resources, +when it already received a SIGTERM and cleanup is started. Otherwise +we might hit segfault, since the thread which is going through +cleanup path will be freeing up the resouces and some other thread +might be trying to acquire locks on freed resources. + +Solution: perform rcu_read_lock/unlock() under cleanup_lock mutex. + +> fixes: bz#1654270 +> Change-Id: I87a97cfe4f272f74f246d688660934638911ce54 +> Signed-off-by: Sanju Rakonde + +upstream patch: https://review.gluster.org/#/c/glusterfs/+/21743/ + +Change-Id: I87a97cfe4f272f74f246d688660934638911ce54 +BUG: 1654161 +Signed-off-by: Sanju Rakonde +Reviewed-on: https://code.engineering.redhat.com/gerrit/158647 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + xlators/mgmt/glusterd/src/glusterd-brick-ops.c | 8 +-- + xlators/mgmt/glusterd/src/glusterd-handler.c | 75 +++++++++++----------- + xlators/mgmt/glusterd/src/glusterd-handshake.c | 32 ++++----- + xlators/mgmt/glusterd/src/glusterd-mgmt.c | 28 ++++---- + xlators/mgmt/glusterd/src/glusterd-op-sm.c | 30 ++++----- + xlators/mgmt/glusterd/src/glusterd-peer-utils.c | 40 ++++++------ + xlators/mgmt/glusterd/src/glusterd-replace-brick.c | 4 +- + xlators/mgmt/glusterd/src/glusterd-reset-brick.c | 4 +- + xlators/mgmt/glusterd/src/glusterd-rpc-ops.c | 48 +++++++------- + xlators/mgmt/glusterd/src/glusterd-server-quorum.c | 4 +- + xlators/mgmt/glusterd/src/glusterd-sm.c | 64 +++++++++--------- + xlators/mgmt/glusterd/src/glusterd-snapshot.c | 6 +- + xlators/mgmt/glusterd/src/glusterd-store.c | 4 +- + xlators/mgmt/glusterd/src/glusterd-syncop.c | 40 ++++++------ + xlators/mgmt/glusterd/src/glusterd-utils.c | 8 +-- + xlators/mgmt/glusterd/src/glusterd.h | 20 +++++- + 16 files changed, 215 insertions(+), 200 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +index 416412e..5ad8ab8 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c ++++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +@@ -2097,7 +2097,7 @@ check: + continue; + } + +- rcu_read_lock (); ++ RCU_READ_LOCK; + peerinfo = glusterd_peerinfo_find_by_uuid + (brickinfo->uuid); + if (!peerinfo) { +@@ -2105,7 +2105,7 @@ check: + "brick %s is not in cluster", brick); + *errstr = gf_strdup (msg); + ret = -1; +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + goto out; + } + if (!peerinfo->connected) { +@@ -2113,10 +2113,10 @@ check: + "brick %s is down", brick); + *errstr = gf_strdup (msg); + ret = -1; +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + goto out; + } +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + } + + out: +diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c +index c71bf3c..d40de89 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-handler.c ++++ b/xlators/mgmt/glusterd/src/glusterd-handler.c +@@ -105,7 +105,7 @@ glusterd_handle_friend_req (rpcsvc_request_t *req, uuid_t uuid, + + ret = glusterd_remote_hostname_get (req, rhost, sizeof (rhost)); + +- rcu_read_lock (); ++ RCU_READ_LOCK; + + peerinfo = glusterd_peerinfo_find (uuid, rhost); + +@@ -179,7 +179,7 @@ glusterd_handle_friend_req (rpcsvc_request_t *req, uuid_t uuid, + ret = GLUSTERD_CONNECTION_AWAITED; + + out: +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + + if (ret && (ret != GLUSTERD_CONNECTION_AWAITED)) { + if (ctx && ctx->hostname) +@@ -198,7 +198,6 @@ out: + GF_FREE (event); + } + +- + return ret; + } + +@@ -214,7 +213,7 @@ glusterd_handle_unfriend_req (rpcsvc_request_t *req, uuid_t uuid, + if (!port) + port = GF_DEFAULT_BASE_PORT; + +- rcu_read_lock (); ++ RCU_READ_LOCK; + + peerinfo = glusterd_peerinfo_find (uuid, hostname); + +@@ -269,7 +268,7 @@ glusterd_handle_unfriend_req (rpcsvc_request_t *req, uuid_t uuid, + ret = 0; + + out: +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + + if (0 != ret) { + if (ctx && ctx->hostname) +@@ -902,9 +901,9 @@ __glusterd_handle_cluster_lock (rpcsvc_request_t *req) + gf_msg_debug (this->name, 0, "Received LOCK from uuid: %s", + uuid_utoa (lock_req.uuid)); + +- rcu_read_lock (); ++ RCU_READ_LOCK; + ret = (glusterd_peerinfo_find_by_uuid (lock_req.uuid) == NULL); +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + if (ret) { + gf_msg (this->name, GF_LOG_WARNING, 0, + GD_MSG_PEER_NOT_FOUND, "%s doesn't " +@@ -1060,9 +1059,9 @@ __glusterd_handle_stage_op (rpcsvc_request_t *req) + gf_msg_debug (this->name, 0, "transaction ID = %s", + uuid_utoa (*txn_id)); + +- rcu_read_lock (); ++ RCU_READ_LOCK; + ret = (glusterd_peerinfo_find_by_uuid (op_req.uuid) == NULL); +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + if (ret) { + gf_msg (this->name, GF_LOG_WARNING, 0, + GD_MSG_PEER_NOT_FOUND, "%s doesn't " +@@ -1144,9 +1143,9 @@ __glusterd_handle_commit_op (rpcsvc_request_t *req) + goto out; + } + +- rcu_read_lock (); ++ RCU_READ_LOCK; + ret = (glusterd_peerinfo_find_by_uuid (op_req.uuid) == NULL); +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + if (ret) { + gf_msg (this->name, GF_LOG_WARNING, 0, + GD_MSG_PEER_NOT_FOUND, "%s doesn't " +@@ -1270,12 +1269,12 @@ __glusterd_handle_cli_probe (rpcsvc_request_t *req) + goto out; + } + +- rcu_read_lock (); ++ RCU_READ_LOCK; + + peerinfo = glusterd_peerinfo_find_by_hostname (hostname); + ret = (peerinfo && gd_peer_has_address (peerinfo, hostname)); + +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + + if (ret) { + gf_msg_debug ("glusterd", 0, "Probe host %s port %d " +@@ -2329,7 +2328,7 @@ __glusterd_handle_fsm_log (rpcsvc_request_t *req) + conf = this->private; + ret = glusterd_sm_tr_log_add_to_dict (dict, &conf->op_sm_log); + } else { +- rcu_read_lock (); ++ RCU_READ_LOCK; + + peerinfo = glusterd_peerinfo_find_by_hostname (cli_req.name); + if (!peerinfo) { +@@ -2341,7 +2340,7 @@ __glusterd_handle_fsm_log (rpcsvc_request_t *req) + (dict, &peerinfo->sm_log); + } + +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + } + + out: +@@ -2482,9 +2481,9 @@ __glusterd_handle_cluster_unlock (rpcsvc_request_t *req) + gf_msg_debug (this->name, 0, + "Received UNLOCK from uuid: %s", uuid_utoa (unlock_req.uuid)); + +- rcu_read_lock (); ++ RCU_READ_LOCK; + ret = (glusterd_peerinfo_find_by_uuid (unlock_req.uuid) == NULL); +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + if (ret) { + gf_msg (this->name, GF_LOG_WARNING, 0, + GD_MSG_PEER_NOT_FOUND, "%s doesn't " +@@ -2786,11 +2785,11 @@ __glusterd_handle_friend_update (rpcsvc_request_t *req) + } + + ret = 0; +- rcu_read_lock (); ++ RCU_READ_LOCK; + if (glusterd_peerinfo_find (friend_req.uuid, NULL) == NULL) { + ret = -1; + } +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + if (ret) { + gf_msg (this->name, GF_LOG_CRITICAL, 0, + GD_MSG_REQ_FROM_UNKNOWN_PEER, +@@ -2856,7 +2855,7 @@ __glusterd_handle_friend_update (rpcsvc_request_t *req) + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "friend%d", i); + +- rcu_read_lock (); ++ RCU_READ_LOCK; + peerinfo = glusterd_peerinfo_find (uuid, NULL); + if (peerinfo == NULL) { + /* Create a new peer and add it to the list as there is +@@ -2903,7 +2902,7 @@ __glusterd_handle_friend_update (rpcsvc_request_t *req) + } + } + unlock: +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + if (ret) + break; + +@@ -3002,7 +3001,7 @@ __glusterd_handle_probe_query (rpcsvc_request_t *req) + goto out; + } + +- rcu_read_lock (); ++ RCU_READ_LOCK; + peerinfo = glusterd_peerinfo_find (probe_req.uuid, remote_hostname); + if ((peerinfo == NULL) && (!cds_list_empty (&conf->peers))) { + rsp.op_ret = -1; +@@ -3024,7 +3023,7 @@ __glusterd_handle_probe_query (rpcsvc_request_t *req) + rsp.op_errno = GF_PROBE_ADD_FAILED; + } + } +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + + respond: + gf_uuid_copy (rsp.uuid, MY_UUID); +@@ -3370,11 +3369,11 @@ glusterd_friend_remove (uuid_t uuid, char *hostname) + int ret = -1; + glusterd_peerinfo_t *peerinfo = NULL; + +- rcu_read_lock (); ++ RCU_READ_LOCK; + + peerinfo = glusterd_peerinfo_find (uuid, hostname); + if (peerinfo == NULL) { +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + goto out; + } + +@@ -3382,7 +3381,7 @@ glusterd_friend_remove (uuid_t uuid, char *hostname) + if (ret) + gf_msg (THIS->name, GF_LOG_WARNING, 0, + GD_MSG_VOL_CLEANUP_FAIL, "Volumes cleanup failed"); +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + /* Giving up the critical section here as glusterd_peerinfo_cleanup must + * be called from outside a critical section + */ +@@ -3715,7 +3714,7 @@ glusterd_probe_begin (rpcsvc_request_t *req, const char *hoststr, int port, + + GF_ASSERT (hoststr); + +- rcu_read_lock (); ++ RCU_READ_LOCK; + peerinfo = glusterd_peerinfo_find (NULL, hoststr); + + if (peerinfo == NULL) { +@@ -3763,7 +3762,7 @@ glusterd_probe_begin (rpcsvc_request_t *req, const char *hoststr, int port, + } + + out: +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + gf_msg_debug ("glusterd", 0, "returning %d", ret); + return ret; + } +@@ -3780,7 +3779,7 @@ glusterd_deprobe_begin (rpcsvc_request_t *req, const char *hoststr, int port, + GF_ASSERT (hoststr); + GF_ASSERT (req); + +- rcu_read_lock (); ++ RCU_READ_LOCK; + + peerinfo = glusterd_peerinfo_find (uuid, hoststr); + if (peerinfo == NULL) { +@@ -3840,7 +3839,7 @@ glusterd_deprobe_begin (rpcsvc_request_t *req, const char *hoststr, int port, + peerinfo->detaching = _gf_true; + + out: +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + return ret; + } + +@@ -4162,7 +4161,7 @@ glusterd_list_friends (rpcsvc_request_t *req, dict_t *dict, int32_t flags) + + /* Reset ret to 0, needed to prevent failure incase no peers exist */ + ret = 0; +- rcu_read_lock (); ++ RCU_READ_LOCK; + if (!cds_list_empty (&priv->peers)) { + cds_list_for_each_entry_rcu (entry, &priv->peers, uuid_list) { + count++; +@@ -4173,7 +4172,7 @@ glusterd_list_friends (rpcsvc_request_t *req, dict_t *dict, int32_t flags) + } + } + unlock: +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + if (ret) + goto out; + +@@ -5592,7 +5591,7 @@ glusterd_get_state (rpcsvc_request_t *req, dict_t *dict) + if (priv->opts) + dict_foreach (priv->opts, glusterd_print_global_options, fp); + +- rcu_read_lock (); ++ RCU_READ_LOCK; + fprintf (fp, "\n[Peers]\n"); + + cds_list_for_each_entry_rcu (peerinfo, &priv->peers, uuid_list) { +@@ -5621,7 +5620,7 @@ glusterd_get_state (rpcsvc_request_t *req, dict_t *dict) + count_bkp = 0; + fprintf (fp, "\n"); + } +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + + count = 0; + fprintf (fp, "\n[Volumes]\n"); +@@ -6259,7 +6258,7 @@ glusterd_friend_remove_notify (glusterd_peerctx_t *peerctx, int32_t op_errno) + + GF_ASSERT (peerctx); + +- rcu_read_lock (); ++ RCU_READ_LOCK; + peerinfo = glusterd_peerinfo_find_by_generation (peerctx->peerinfo_gen); + if (!peerinfo) { + gf_msg_debug (THIS->name, 0, "Could not find peer %s(%s). " +@@ -6300,7 +6299,7 @@ glusterd_friend_remove_notify (glusterd_peerctx_t *peerctx, int32_t op_errno) + } + + out: +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + return ret; + } + +@@ -6340,7 +6339,7 @@ __glusterd_peer_rpc_notify (struct rpc_clnt *rpc, void *mydata, + peerctx->peername); + return 0; + } +- rcu_read_lock (); ++ RCU_READ_LOCK; + + peerinfo = glusterd_peerinfo_find_by_generation (peerctx->peerinfo_gen); + if (!peerinfo) { +@@ -6466,7 +6465,7 @@ __glusterd_peer_rpc_notify (struct rpc_clnt *rpc, void *mydata, + } + + out: +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + + glusterd_friend_sm (); + glusterd_op_sm (); +diff --git a/xlators/mgmt/glusterd/src/glusterd-handshake.c b/xlators/mgmt/glusterd/src/glusterd-handshake.c +index b2a9b20..d18a7a3 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-handshake.c ++++ b/xlators/mgmt/glusterd/src/glusterd-handshake.c +@@ -1140,9 +1140,9 @@ gd_validate_mgmt_hndsk_req (rpcsvc_request_t *req, dict_t *dict) + */ + if (!ret) { + gf_uuid_parse (uuid_str, peer_uuid); +- rcu_read_lock (); ++ RCU_READ_LOCK; + ret = (glusterd_peerinfo_find (peer_uuid, NULL) != NULL); +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + if (ret) + return _gf_true; + } +@@ -1158,7 +1158,7 @@ gd_validate_mgmt_hndsk_req (rpcsvc_request_t *req, dict_t *dict) + * is available in the peerinfo list but the uuid has changed of the + * node due to a reinstall, in that case the validation should fail! + */ +- rcu_read_lock (); ++ RCU_READ_LOCK; + if (!uuid_str) { + ret = (glusterd_peerinfo_find (NULL, hostname) == NULL); + } else { +@@ -1177,7 +1177,7 @@ gd_validate_mgmt_hndsk_req (rpcsvc_request_t *req, dict_t *dict) + ret = -1; + } + } +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_HANDSHAKE_REQ_REJECTED, "Rejecting management " +@@ -1728,7 +1728,7 @@ glusterd_event_connected_inject (glusterd_peerctx_t *peerctx) + goto out; + } + +- rcu_read_lock (); ++ RCU_READ_LOCK; + + peerinfo = glusterd_peerinfo_find_by_generation (peerctx->peerinfo_gen); + if (!peerinfo) { +@@ -1754,7 +1754,7 @@ glusterd_event_connected_inject (glusterd_peerctx_t *peerctx) + GD_MSG_EVENT_INJECT_FAIL, "Unable to inject " + "EVENT_CONNECTED ret = %d", ret); + unlock: +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + + out: + gf_msg_debug ("glusterd", 0, "returning %d", ret); +@@ -1824,7 +1824,7 @@ __glusterd_mgmt_hndsk_version_ack_cbk (struct rpc_req *req, struct iovec *iov, + frame = myframe; + peerctx = frame->local; + +- rcu_read_lock (); ++ RCU_READ_LOCK; + peerinfo = glusterd_peerinfo_find_by_generation (peerctx->peerinfo_gen); + if (!peerinfo) { + gf_msg_debug (this->name, 0, "Could not find peer %s(%s)", +@@ -1887,7 +1887,7 @@ out: + if (ret != 0 && peerinfo) + rpc_transport_disconnect (peerinfo->rpc->conn.trans, _gf_false); + +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + + frame->local = NULL; + STACK_DESTROY (frame->root); +@@ -1930,7 +1930,7 @@ __glusterd_mgmt_hndsk_version_cbk (struct rpc_req *req, struct iovec *iov, + frame = myframe; + peerctx = frame->local; + +- rcu_read_lock (); ++ RCU_READ_LOCK; + + peerinfo = glusterd_peerinfo_find_by_generation (peerctx->peerinfo_gen); + if (!peerinfo) { +@@ -2014,7 +2014,7 @@ out: + _gf_false); + } + +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + + if (rsp.hndsk.hndsk_val) + free (rsp.hndsk.hndsk_val); +@@ -2070,7 +2070,7 @@ glusterd_mgmt_handshake (xlator_t *this, glusterd_peerctx_t *peerctx) + GF_PROTOCOL_DICT_SERIALIZE (this, req_dict, (&req.hndsk.hndsk_val), + req.hndsk.hndsk_len, ret, out); + +- rcu_read_lock (); ++ RCU_READ_LOCK; + + peerinfo = glusterd_peerinfo_find_by_generation (peerctx->peerinfo_gen); + if (!peerinfo) { +@@ -2086,7 +2086,7 @@ glusterd_mgmt_handshake (xlator_t *this, glusterd_peerctx_t *peerctx) + (xdrproc_t)xdr_gf_mgmt_hndsk_req); + ret = 0; + unlock: +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + out: + if (ret && frame) + STACK_DESTROY (frame->root); +@@ -2202,7 +2202,7 @@ __glusterd_peer_dump_version_cbk (struct rpc_req *req, struct iovec *iov, + frame = myframe; + peerctx = frame->local; + +- rcu_read_lock (); ++ RCU_READ_LOCK; + + peerinfo = glusterd_peerinfo_find_by_generation (peerctx->peerinfo_gen); + if (!peerinfo) { +@@ -2282,7 +2282,7 @@ out: + if (ret != 0 && peerinfo) + rpc_transport_disconnect (peerinfo->rpc->conn.trans, _gf_false); + +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + + glusterd_friend_sm (); + glusterd_op_sm (); +@@ -2330,7 +2330,7 @@ glusterd_peer_dump_version (xlator_t *this, struct rpc_clnt *rpc, + if (!peerctx) + goto out; + +- rcu_read_lock (); ++ RCU_READ_LOCK; + + peerinfo = glusterd_peerinfo_find_by_generation (peerctx->peerinfo_gen); + if (!peerinfo) { +@@ -2347,7 +2347,7 @@ glusterd_peer_dump_version (xlator_t *this, struct rpc_clnt *rpc, + glusterd_peer_dump_version_cbk, + (xdrproc_t)xdr_gf_dump_req); + unlock: +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + out: + if (ret && frame) + STACK_DESTROY (frame->root); +diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-mgmt.c +index 751d6e4..d98c6bc 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-mgmt.c ++++ b/xlators/mgmt/glusterd/src/glusterd-mgmt.c +@@ -52,14 +52,14 @@ gd_mgmt_v3_collate_errors (struct syncargs *args, int op_ret, int op_errno, + args->op_ret = op_ret; + args->op_errno = op_errno; + +- rcu_read_lock (); ++ RCU_READ_LOCK; + peerinfo = glusterd_peerinfo_find (peerid, NULL); + if (peerinfo) + peer_str = gf_strdup (peerinfo->hostname); + else + peer_str = gf_strdup (uuid_utoa (uuid)); + +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + + is_operrstr_blk = (op_errstr && strcmp (op_errstr, "")); + err_string = (is_operrstr_blk) ? op_errstr : err_str; +@@ -761,7 +761,7 @@ glusterd_mgmt_v3_initiate_lockdown (glusterd_op_t op, dict_t *dict, + synctask_barrier_init((&args)); + peer_cnt = 0; + +- rcu_read_lock (); ++ RCU_READ_LOCK; + cds_list_for_each_entry_rcu (peerinfo, &conf->peers, uuid_list) { + /* Only send requests to peers who were available before the + * transaction started +@@ -779,7 +779,7 @@ glusterd_mgmt_v3_initiate_lockdown (glusterd_op_t op, dict_t *dict, + MY_UUID, peer_uuid); + peer_cnt++; + } +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + + if (0 == peer_cnt) { + ret = 0; +@@ -1126,7 +1126,7 @@ glusterd_mgmt_v3_pre_validate (glusterd_op_t op, dict_t *req_dict, + synctask_barrier_init((&args)); + peer_cnt = 0; + +- rcu_read_lock (); ++ RCU_READ_LOCK; + cds_list_for_each_entry_rcu (peerinfo, &conf->peers, uuid_list) { + /* Only send requests to peers who were available before the + * transaction started +@@ -1144,7 +1144,7 @@ glusterd_mgmt_v3_pre_validate (glusterd_op_t op, dict_t *req_dict, + MY_UUID, peer_uuid); + peer_cnt++; + } +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + + if (0 == peer_cnt) { + ret = 0; +@@ -1401,7 +1401,7 @@ glusterd_mgmt_v3_brick_op (glusterd_op_t op, dict_t *rsp_dict, dict_t *req_dict, + synctask_barrier_init((&args)); + peer_cnt = 0; + +- rcu_read_lock (); ++ RCU_READ_LOCK; + cds_list_for_each_entry_rcu (peerinfo, &conf->peers, uuid_list) { + /* Only send requests to peers who were available before the + * transaction started +@@ -1419,7 +1419,7 @@ glusterd_mgmt_v3_brick_op (glusterd_op_t op, dict_t *rsp_dict, dict_t *req_dict, + MY_UUID, peer_uuid); + peer_cnt++; + } +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + + if (0 == peer_cnt) { + ret = 0; +@@ -1667,7 +1667,7 @@ glusterd_mgmt_v3_commit (glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, + synctask_barrier_init((&args)); + peer_cnt = 0; + +- rcu_read_lock (); ++ RCU_READ_LOCK; + cds_list_for_each_entry_rcu (peerinfo, &conf->peers, uuid_list) { + /* Only send requests to peers who were available before the + * transaction started +@@ -1702,7 +1702,7 @@ glusterd_mgmt_v3_commit (glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, + MY_UUID, peer_uuid); + peer_cnt++; + } +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + + if (0 == peer_cnt) { + ret = 0; +@@ -1912,7 +1912,7 @@ glusterd_mgmt_v3_post_validate (glusterd_op_t op, int32_t op_ret, dict_t *dict, + synctask_barrier_init((&args)); + peer_cnt = 0; + +- rcu_read_lock (); ++ RCU_READ_LOCK; + cds_list_for_each_entry_rcu (peerinfo, &conf->peers, uuid_list) { + /* Only send requests to peers who were available before the + * transaction started +@@ -1930,7 +1930,7 @@ glusterd_mgmt_v3_post_validate (glusterd_op_t op, int32_t op_ret, dict_t *dict, + &args, MY_UUID, peer_uuid); + peer_cnt++; + } +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + + if (0 == peer_cnt) { + ret = 0; +@@ -2094,7 +2094,7 @@ glusterd_mgmt_v3_release_peer_locks (glusterd_op_t op, dict_t *dict, + synctask_barrier_init((&args)); + peer_cnt = 0; + +- rcu_read_lock (); ++ RCU_READ_LOCK; + cds_list_for_each_entry_rcu (peerinfo, &conf->peers, uuid_list) { + /* Only send requests to peers who were available before the + * transaction started +@@ -2112,7 +2112,7 @@ glusterd_mgmt_v3_release_peer_locks (glusterd_op_t op, dict_t *dict, + MY_UUID, peer_uuid); + peer_cnt++; + } +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + + if (0 == peer_cnt) { + ret = 0; +diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +index 9f76ab3..6414a4e 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c ++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +@@ -1825,7 +1825,7 @@ glusterd_op_stage_sync_volume (dict_t *dict, char **op_errstr) + ret = 0; + } + } else { +- rcu_read_lock (); ++ RCU_READ_LOCK; + + peerinfo = glusterd_peerinfo_find (NULL, hostname); + if (peerinfo == NULL) { +@@ -1841,7 +1841,7 @@ glusterd_op_stage_sync_volume (dict_t *dict, char **op_errstr) + ret = -1; + } + +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + } + + out: +@@ -3964,7 +3964,7 @@ glusterd_op_ac_send_lock (glusterd_op_sm_event_t *event, void *ctx) + priv = this->private; + GF_ASSERT (priv); + +- rcu_read_lock (); ++ RCU_READ_LOCK; + cds_list_for_each_entry_rcu (peerinfo, &priv->peers, uuid_list) { + /* Only send requests to peers who were available before the + * transaction started +@@ -3985,7 +3985,7 @@ glusterd_op_ac_send_lock (glusterd_op_sm_event_t *event, void *ctx) + if (proc->fn) { + ret = proc->fn (NULL, this, peerinfo); + if (ret) { +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + gf_msg (this->name, GF_LOG_WARNING, 0, + GD_MSG_LOCK_REQ_SEND_FAIL, + "Failed to send lock request " +@@ -4009,7 +4009,7 @@ glusterd_op_ac_send_lock (glusterd_op_sm_event_t *event, void *ctx) + ret = dict_set_static_ptr (dict, "peerinfo", + peerinfo); + if (ret) { +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_DICT_SET_FAILED, + "failed to set peerinfo"); +@@ -4019,7 +4019,7 @@ glusterd_op_ac_send_lock (glusterd_op_sm_event_t *event, void *ctx) + + ret = proc->fn (NULL, this, dict); + if (ret) { +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + gf_msg (this->name, GF_LOG_WARNING, 0, + GD_MSG_MGMTV3_LOCK_REQ_SEND_FAIL, + "Failed to send mgmt_v3 lock " +@@ -4036,7 +4036,7 @@ glusterd_op_ac_send_lock (glusterd_op_sm_event_t *event, void *ctx) + } + } + } +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + + opinfo.pending_count = pending_count; + +@@ -4074,7 +4074,7 @@ glusterd_op_ac_send_unlock (glusterd_op_sm_event_t *event, void *ctx) + priv = this->private; + GF_ASSERT (priv); + +- rcu_read_lock (); ++ RCU_READ_LOCK; + cds_list_for_each_entry_rcu (peerinfo, &priv->peers, uuid_list) { + /* Only send requests to peers who were available before the + * transaction started +@@ -4152,7 +4152,7 @@ glusterd_op_ac_send_unlock (glusterd_op_sm_event_t *event, void *ctx) + } + } + } +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + + opinfo.pending_count = pending_count; + +@@ -4762,7 +4762,7 @@ glusterd_op_ac_send_stage_op (glusterd_op_sm_event_t *event, void *ctx) + goto out; + } + +- rcu_read_lock (); ++ RCU_READ_LOCK; + cds_list_for_each_entry_rcu (peerinfo, &priv->peers, uuid_list) { + /* Only send requests to peers who were available before the + * transaction started +@@ -4781,7 +4781,7 @@ glusterd_op_ac_send_stage_op (glusterd_op_sm_event_t *event, void *ctx) + if (proc->fn) { + ret = dict_set_static_ptr (dict, "peerinfo", peerinfo); + if (ret) { +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_DICT_SET_FAILED, "failed to " + "set peerinfo"); +@@ -4800,7 +4800,7 @@ glusterd_op_ac_send_stage_op (glusterd_op_sm_event_t *event, void *ctx) + pending_count++; + } + } +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + + opinfo.pending_count = pending_count; + out: +@@ -5413,7 +5413,7 @@ glusterd_op_ac_send_commit_op (glusterd_op_sm_event_t *event, void *ctx) + goto out; + } + +- rcu_read_lock (); ++ RCU_READ_LOCK; + cds_list_for_each_entry_rcu (peerinfo, &priv->peers, uuid_list) { + /* Only send requests to peers who were available before the + * transaction started +@@ -5432,7 +5432,7 @@ glusterd_op_ac_send_commit_op (glusterd_op_sm_event_t *event, void *ctx) + if (proc->fn) { + ret = dict_set_static_ptr (dict, "peerinfo", peerinfo); + if (ret) { +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_DICT_SET_FAILED, + "failed to set peerinfo"); +@@ -5451,7 +5451,7 @@ glusterd_op_ac_send_commit_op (glusterd_op_sm_event_t *event, void *ctx) + pending_count++; + } + } +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + + opinfo.pending_count = pending_count; + gf_msg_debug (this->name, 0, "Sent commit op req for 'Volume %s' " +diff --git a/xlators/mgmt/glusterd/src/glusterd-peer-utils.c b/xlators/mgmt/glusterd/src/glusterd-peer-utils.c +index 592aa16..6ed5831 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-peer-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-peer-utils.c +@@ -190,7 +190,7 @@ glusterd_peerinfo_find_by_uuid (uuid_t uuid) + if (gf_uuid_is_null (uuid)) + return NULL; + +- rcu_read_lock (); ++ RCU_READ_LOCK; + cds_list_for_each_entry_rcu (entry, &priv->peers, uuid_list) { + if (!gf_uuid_compare (entry->uuid, uuid)) { + +@@ -201,7 +201,7 @@ glusterd_peerinfo_find_by_uuid (uuid_t uuid) + break; + } + } +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + + if (!found) + gf_msg_debug (this->name, 0, +@@ -330,7 +330,7 @@ glusterd_chk_peers_connected_befriended (uuid_t skip_uuid) + priv= THIS->private; + GF_ASSERT (priv); + +- rcu_read_lock (); ++ RCU_READ_LOCK; + cds_list_for_each_entry_rcu (peerinfo, &priv->peers, uuid_list) { + + if (!gf_uuid_is_null (skip_uuid) && !gf_uuid_compare (skip_uuid, +@@ -343,7 +343,7 @@ glusterd_chk_peers_connected_befriended (uuid_t skip_uuid) + break; + } + } +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + + gf_msg_debug (THIS->name, 0, "Returning %s", + (ret?"TRUE":"FALSE")); +@@ -366,7 +366,7 @@ glusterd_uuid_to_hostname (uuid_t uuid) + if (!gf_uuid_compare (MY_UUID, uuid)) { + hostname = gf_strdup ("localhost"); + } +- rcu_read_lock (); ++ RCU_READ_LOCK; + if (!cds_list_empty (&priv->peers)) { + cds_list_for_each_entry_rcu (entry, &priv->peers, uuid_list) { + if (!gf_uuid_compare (entry->uuid, uuid)) { +@@ -375,7 +375,7 @@ glusterd_uuid_to_hostname (uuid_t uuid) + } + } + } +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + + return hostname; + } +@@ -406,14 +406,14 @@ glusterd_are_all_peers_up () + conf = this->private; + GF_VALIDATE_OR_GOTO (this->name, conf, out); + +- rcu_read_lock (); ++ RCU_READ_LOCK; + cds_list_for_each_entry_rcu (peerinfo, &conf->peers, uuid_list) { + if (!peerinfo->connected) { +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + goto out; + } + } +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + + peers_up = _gf_true; + +@@ -434,7 +434,7 @@ glusterd_are_vol_all_peers_up (glusterd_volinfo_t *volinfo, + if (!gf_uuid_compare (brickinfo->uuid, MY_UUID)) + continue; + +- rcu_read_lock (); ++ RCU_READ_LOCK; + cds_list_for_each_entry_rcu (peerinfo, peers, uuid_list) { + if (gf_uuid_compare (peerinfo->uuid, brickinfo->uuid)) + continue; +@@ -447,11 +447,11 @@ glusterd_are_vol_all_peers_up (glusterd_volinfo_t *volinfo, + *down_peerstr = gf_strdup (peerinfo->hostname); + gf_msg_debug (THIS->name, 0, "Peer %s is down. ", + peerinfo->hostname); +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + goto out; + } + } +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + } + + ret = _gf_true; +@@ -664,7 +664,7 @@ gd_peerinfo_find_from_hostname (const char *hoststr) + + GF_VALIDATE_OR_GOTO (this->name, (hoststr != NULL), out); + +- rcu_read_lock (); ++ RCU_READ_LOCK; + cds_list_for_each_entry_rcu (peer, &priv->peers, uuid_list) { + cds_list_for_each_entry_rcu (tmphost, &peer->hostnames, + hostname_list) { +@@ -679,7 +679,7 @@ gd_peerinfo_find_from_hostname (const char *hoststr) + } + } + unlock: +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + out: + return found; + } +@@ -713,7 +713,7 @@ gd_peerinfo_find_from_addrinfo (const struct addrinfo *addr) + + GF_VALIDATE_OR_GOTO (this->name, (addr != NULL), out); + +- rcu_read_lock (); ++ RCU_READ_LOCK; + cds_list_for_each_entry_rcu (peer, &conf->peers, uuid_list) { + cds_list_for_each_entry_rcu (address, &peer->hostnames, + hostname_list) { +@@ -747,7 +747,7 @@ gd_peerinfo_find_from_addrinfo (const struct addrinfo *addr) + } + } + unlock: +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + out: + return found; + } +@@ -1014,7 +1014,7 @@ glusterd_peerinfo_find_by_generation (uint32_t generation) { + + GF_ASSERT (priv); + +- rcu_read_lock (); ++ RCU_READ_LOCK; + cds_list_for_each_entry_rcu (entry, &priv->peers, uuid_list) { + if (entry->generation == generation) { + +@@ -1025,7 +1025,7 @@ glusterd_peerinfo_find_by_generation (uint32_t generation) { + break; + } + } +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + + if (!found) + gf_msg_debug (this->name, 0, +@@ -1047,10 +1047,10 @@ glusterd_get_peers_count () { + conf = this->private; + GF_VALIDATE_OR_GOTO (this->name, conf, out); + +- rcu_read_lock (); ++ RCU_READ_LOCK; + cds_list_for_each_entry_rcu (peer, &conf->peers, uuid_list) + count++; +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + + out: + return count; +diff --git a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c +index 5fc3669..f9ad524 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c ++++ b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c +@@ -278,7 +278,7 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, + } + + if (!gf_is_local_addr (host)) { +- rcu_read_lock (); ++ RCU_READ_LOCK; + + peerinfo = glusterd_peerinfo_find (NULL, host); + if (peerinfo == NULL) { +@@ -300,7 +300,7 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, + *op_errstr = gf_strdup (msg); + ret = -1; + } +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + + if (ret) + goto out; +diff --git a/xlators/mgmt/glusterd/src/glusterd-reset-brick.c b/xlators/mgmt/glusterd/src/glusterd-reset-brick.c +index c1de043..60c5716 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-reset-brick.c ++++ b/xlators/mgmt/glusterd/src/glusterd-reset-brick.c +@@ -165,7 +165,7 @@ glusterd_reset_brick_prevalidate (dict_t *dict, char **op_errstr, + if (ret) + goto out; + } else { +- rcu_read_lock (); ++ RCU_READ_LOCK; + + peerinfo = glusterd_peerinfo_find (NULL, host); + if (peerinfo == NULL) { +@@ -190,7 +190,7 @@ glusterd_reset_brick_prevalidate (dict_t *dict, char **op_errstr, + *op_errstr = gf_strdup (msg); + ret = -1; + } +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + + if (ret) + goto out; +diff --git a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c +index 86e1256..c669240 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c ++++ b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c +@@ -280,7 +280,7 @@ __glusterd_probe_cbk (struct rpc_req *req, struct iovec *iov, + goto out; + } + +- rcu_read_lock (); ++ RCU_READ_LOCK; + peerinfo = glusterd_peerinfo_find (rsp.uuid, rsp.hostname); + if (peerinfo == NULL) { + ret = -1; +@@ -422,7 +422,7 @@ cont: + GD_MSG_PROBE_REQ_RESP_RCVD, "Received resp to probe req"); + + unlock: +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + + out: + free (rsp.hostname);//malloced by xdr +@@ -485,7 +485,7 @@ __glusterd_friend_add_cbk (struct rpc_req * req, struct iovec *iov, + "Received %s from uuid: %s, host: %s, port: %d", + (op_ret)?"RJT":"ACC", uuid_utoa (rsp.uuid), rsp.hostname, rsp.port); + +- rcu_read_lock (); ++ RCU_READ_LOCK; + + peerinfo = glusterd_peerinfo_find (rsp.uuid, rsp.hostname); + if (peerinfo == NULL) { +@@ -527,7 +527,7 @@ __glusterd_friend_add_cbk (struct rpc_req * req, struct iovec *iov, + ret = glusterd_friend_sm_inject_event (event); + + unlock: +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + out: + ctx = ((call_frame_t *)myframe)->local; + ((call_frame_t *)myframe)->local = NULL; +@@ -605,7 +605,7 @@ __glusterd_friend_remove_cbk (struct rpc_req * req, struct iovec *iov, + (op_ret)?"RJT":"ACC", uuid_utoa (rsp.uuid), rsp.hostname, rsp.port); + + inject: +- rcu_read_lock (); ++ RCU_READ_LOCK; + + peerinfo = glusterd_peerinfo_find (rsp.uuid, ctx->hostname); + if (peerinfo == NULL) { +@@ -640,7 +640,7 @@ inject: + op_ret = 0; + + unlock: +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + + respond: + ret = glusterd_xfer_cli_deprobe_resp (ctx->req, op_ret, op_errno, NULL, +@@ -769,9 +769,9 @@ __glusterd_cluster_lock_cbk (struct rpc_req *req, struct iovec *iov, + uuid_utoa (rsp.uuid)); + } + +- rcu_read_lock (); ++ RCU_READ_LOCK; + ret = (glusterd_peerinfo_find (rsp.uuid, NULL) == NULL); +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + + if (ret) { + gf_msg (this->name, GF_LOG_CRITICAL, 0, +@@ -889,9 +889,9 @@ glusterd_mgmt_v3_lock_peers_cbk_fn (struct rpc_req *req, struct iovec *iov, + uuid_utoa (rsp.uuid)); + } + +- rcu_read_lock (); ++ RCU_READ_LOCK; + ret = (glusterd_peerinfo_find (rsp.uuid, NULL) == NULL); +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + + if (ret) { + gf_msg (this->name, GF_LOG_CRITICAL, 0, +@@ -1000,9 +1000,9 @@ glusterd_mgmt_v3_unlock_peers_cbk_fn (struct rpc_req *req, struct iovec *iov, + uuid_utoa (rsp.uuid)); + } + +- rcu_read_lock (); ++ RCU_READ_LOCK; + ret = (glusterd_peerinfo_find (rsp.uuid, NULL) == NULL); +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + + if (ret) { + gf_msg (this->name, GF_LOG_CRITICAL, 0, +@@ -1109,9 +1109,9 @@ __glusterd_cluster_unlock_cbk (struct rpc_req *req, struct iovec *iov, + uuid_utoa (rsp.uuid)); + } + +- rcu_read_lock (); ++ RCU_READ_LOCK; + ret = (glusterd_peerinfo_find (rsp.uuid, NULL) == NULL); +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + + if (ret) { + gf_msg (this->name, GF_LOG_CRITICAL, 0, +@@ -1239,7 +1239,7 @@ out: + uuid_utoa (rsp.uuid)); + } + +- rcu_read_lock (); ++ RCU_READ_LOCK; + peerinfo = glusterd_peerinfo_find (rsp.uuid, NULL); + if (peerinfo == NULL) { + gf_msg (this->name, GF_LOG_CRITICAL, 0, +@@ -1268,7 +1268,7 @@ out: + event_type = GD_OP_EVENT_RCVD_ACC; + } + +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + + + ret = glusterd_set_txn_opinfo (txn_id, &opinfo); +@@ -1399,7 +1399,7 @@ __glusterd_commit_op_cbk (struct rpc_req *req, struct iovec *iov, + "for txn_id = %s", uuid_utoa (*txn_id)); + } + +- rcu_read_lock (); ++ RCU_READ_LOCK; + peerinfo = glusterd_peerinfo_find (rsp.uuid, NULL); + if (peerinfo == NULL) { + gf_msg (this->name, GF_LOG_CRITICAL, 0, +@@ -1450,7 +1450,7 @@ __glusterd_commit_op_cbk (struct rpc_req *req, struct iovec *iov, + } + } + unlock: +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + + out: + +@@ -1554,11 +1554,11 @@ glusterd_rpc_friend_add (call_frame_t *frame, xlator_t *this, + + GF_ASSERT (priv); + +- rcu_read_lock (); ++ RCU_READ_LOCK; + + peerinfo = glusterd_peerinfo_find (event->peerid, event->peername); + if (!peerinfo) { +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + ret = -1; + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_PEER_NOT_FOUND, "Could not find peer %s(%s)", +@@ -1570,7 +1570,7 @@ glusterd_rpc_friend_add (call_frame_t *frame, xlator_t *this, + req.hostname = gf_strdup (peerinfo->hostname); + req.port = peerinfo->port; + +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + + ret = glusterd_add_volumes_to_export_dict (&peer_data); + if (ret) { +@@ -1653,11 +1653,11 @@ glusterd_rpc_friend_remove (call_frame_t *frame, xlator_t *this, + + GF_ASSERT (priv); + +- rcu_read_lock (); ++ RCU_READ_LOCK; + + peerinfo = glusterd_peerinfo_find (event->peerid, event->peername); + if (!peerinfo) { +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + ret = -1; + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_PEER_NOT_FOUND, "Could not find peer %s(%s)", +@@ -1674,7 +1674,7 @@ glusterd_rpc_friend_remove (call_frame_t *frame, xlator_t *this, + this, glusterd_friend_remove_cbk, + (xdrproc_t)xdr_gd1_mgmt_friend_req); + +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + out: + GF_FREE (req.hostname); + +diff --git a/xlators/mgmt/glusterd/src/glusterd-server-quorum.c b/xlators/mgmt/glusterd/src/glusterd-server-quorum.c +index b01bfaa..ef97bfd 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-server-quorum.c ++++ b/xlators/mgmt/glusterd/src/glusterd-server-quorum.c +@@ -224,14 +224,14 @@ glusterd_get_quorum_cluster_counts (xlator_t *this, int *active_count, + if (active_count) + *active_count = 1; + +- rcu_read_lock (); ++ RCU_READ_LOCK; + cds_list_for_each_entry_rcu (peerinfo, &conf->peers, uuid_list) { + if (_is_contributing_to_quorum (peerinfo->quorum_contrib)) + inquorum_count = inquorum_count + 1; + if (active_count && (peerinfo->quorum_contrib == QUORUM_UP)) + *active_count = *active_count + 1; + } +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + + ret = dict_get_str (conf->opts, GLUSTERD_QUORUM_RATIO_KEY, &val); + if (ret == 0) { +diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.c b/xlators/mgmt/glusterd/src/glusterd-sm.c +index 6c56837..a2ef9f7 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-sm.c ++++ b/xlators/mgmt/glusterd/src/glusterd-sm.c +@@ -157,7 +157,7 @@ glusterd_broadcast_friend_delete (char *hostname, uuid_t uuid) + if (ret) + goto out; + +- rcu_read_lock (); ++ RCU_READ_LOCK; + cds_list_for_each_entry_rcu (peerinfo, &priv->peers, uuid_list) { + if (!peerinfo->connected || !peerinfo->peer) + continue; +@@ -180,7 +180,7 @@ glusterd_broadcast_friend_delete (char *hostname, uuid_t uuid) + } + } + unlock: +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + + gf_msg_debug ("glusterd", 0, "Returning with %d", ret); + +@@ -224,7 +224,7 @@ glusterd_ac_reverse_probe_begin (glusterd_friend_sm_event_t *event, void *ctx) + GF_ASSERT (event); + GF_ASSERT (ctx); + +- rcu_read_lock (); ++ RCU_READ_LOCK; + + peerinfo = glusterd_peerinfo_find (event->peerid, event->peername); + if (!peerinfo) { +@@ -271,7 +271,7 @@ glusterd_ac_reverse_probe_begin (glusterd_friend_sm_event_t *event, void *ctx) + } + + out: +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + + if (ret) { + if (new_event) +@@ -302,7 +302,7 @@ glusterd_ac_friend_add (glusterd_friend_sm_event_t *event, void *ctx) + + GF_ASSERT (conf); + +- rcu_read_lock (); ++ RCU_READ_LOCK; + + peerinfo = glusterd_peerinfo_find (event->peerid, event->peername); + if (!peerinfo) { +@@ -326,7 +326,7 @@ glusterd_ac_friend_add (glusterd_friend_sm_event_t *event, void *ctx) + } + + out: +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + + if (ret && frame) + STACK_DESTROY (frame->root); +@@ -359,7 +359,7 @@ glusterd_ac_friend_probe (glusterd_friend_sm_event_t *event, void *ctx) + + GF_ASSERT (conf); + +- rcu_read_lock (); ++ RCU_READ_LOCK; + peerinfo = glusterd_peerinfo_find (NULL, probe_ctx->hostname); + if (peerinfo == NULL) { + //We should not reach this state ideally +@@ -406,7 +406,7 @@ glusterd_ac_friend_probe (glusterd_friend_sm_event_t *event, void *ctx) + } + + out: +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + + if (dict) + dict_unref (dict); +@@ -439,7 +439,7 @@ glusterd_ac_send_friend_remove_req (glusterd_friend_sm_event_t *event, + + GF_ASSERT (conf); + +- rcu_read_lock (); ++ RCU_READ_LOCK; + + peerinfo = glusterd_peerinfo_find (event->peerid, event->peername); + if (!peerinfo) { +@@ -489,7 +489,7 @@ glusterd_ac_send_friend_remove_req (glusterd_friend_sm_event_t *event, + } + + out: +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + + gf_msg_debug ("glusterd", 0, "Returning with %d", ret); + +@@ -533,7 +533,7 @@ glusterd_ac_send_friend_update (glusterd_friend_sm_event_t *event, void *ctx) + + GF_ASSERT (priv); + +- rcu_read_lock (); ++ RCU_READ_LOCK; + + cur_peerinfo = glusterd_peerinfo_find (event->peerid, event->peername); + if (!cur_peerinfo) { +@@ -596,7 +596,7 @@ glusterd_ac_send_friend_update (glusterd_friend_sm_event_t *event, void *ctx) + gf_msg_debug ("glusterd", 0, "Returning with %d", ret); + + out: +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + + if (friends) + dict_unref (friends); +@@ -628,7 +628,7 @@ glusterd_ac_update_friend (glusterd_friend_sm_event_t *event, void *ctx) + + GF_ASSERT (priv); + +- rcu_read_lock (); ++ RCU_READ_LOCK; + + cur_peerinfo = glusterd_peerinfo_find (event->peerid, event->peername); + if (!cur_peerinfo) { +@@ -690,7 +690,7 @@ glusterd_ac_update_friend (glusterd_friend_sm_event_t *event, void *ctx) + gf_msg_debug (this->name, 0, "Returning with %d", ret); + + out: +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + + if (friends) + dict_unref (friends); +@@ -790,13 +790,13 @@ glusterd_ac_handle_friend_remove_req (glusterd_friend_sm_event_t *event, + ret = glusterd_xfer_friend_remove_resp (ev_ctx->req, ev_ctx->hostname, + ev_ctx->port); + +- rcu_read_lock (); ++ RCU_READ_LOCK; + cds_list_for_each_entry_rcu (peerinfo, &priv->peers, uuid_list) { + + ret = glusterd_friend_sm_new_event (GD_FRIEND_EVENT_REMOVE_FRIEND, + &new_event); + if (ret) { +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + goto out; + } + +@@ -805,13 +805,13 @@ glusterd_ac_handle_friend_remove_req (glusterd_friend_sm_event_t *event, + + ret = glusterd_friend_sm_inject_event (new_event); + if (ret) { +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + goto out; + } + + new_event = NULL; + } +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + + glusterd_peer_detach_cleanup (priv); + out: +@@ -831,7 +831,7 @@ glusterd_ac_friend_remove (glusterd_friend_sm_event_t *event, void *ctx) + + GF_ASSERT (event); + +- rcu_read_lock (); ++ RCU_READ_LOCK; + + peerinfo = glusterd_peerinfo_find (event->peerid, event->peername); + if (!peerinfo) { +@@ -839,7 +839,7 @@ glusterd_ac_friend_remove (glusterd_friend_sm_event_t *event, void *ctx) + GD_MSG_PEER_NOT_FOUND, + "Could not find peer %s(%s)", + event->peername, uuid_utoa (event->peerid)); +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + goto out; + } + ret = glusterd_friend_remove_cleanup_vols (peerinfo->uuid); +@@ -847,7 +847,7 @@ glusterd_ac_friend_remove (glusterd_friend_sm_event_t *event, void *ctx) + gf_msg (THIS->name, GF_LOG_WARNING, 0, GD_MSG_VOL_CLEANUP_FAIL, + "Volumes cleanup failed"); + +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + /* Exiting read critical section as glusterd_peerinfo_cleanup calls + * synchronize_rcu before freeing the peerinfo + */ +@@ -896,14 +896,14 @@ glusterd_ac_handle_friend_add_req (glusterd_friend_sm_event_t *event, void *ctx) + ev_ctx = ctx; + gf_uuid_copy (uuid, ev_ctx->uuid); + +- rcu_read_lock (); ++ RCU_READ_LOCK; + peerinfo = glusterd_peerinfo_find (event->peerid, event->peername); + if (!peerinfo) { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_PEER_NOT_FOUND, "Could not find peer %s(%s)", + event->peername, uuid_utoa (event->peerid)); + ret = -1; +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + goto out; + } + +@@ -913,7 +913,7 @@ glusterd_ac_handle_friend_add_req (glusterd_friend_sm_event_t *event, void *ctx) + */ + gf_uuid_copy (peerinfo->uuid, ev_ctx->uuid); + +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + + conf = this->private; + GF_ASSERT (conf); +@@ -1032,7 +1032,7 @@ glusterd_friend_sm_transition_state (uuid_t peerid, char *peername, + GF_ASSERT (state); + GF_ASSERT (peername); + +- rcu_read_lock (); ++ RCU_READ_LOCK; + peerinfo = glusterd_peerinfo_find (peerid, peername); + if (!peerinfo) { + goto out; +@@ -1047,7 +1047,7 @@ glusterd_friend_sm_transition_state (uuid_t peerid, char *peername, + + ret = 0; + out: +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + return ret; + } + +@@ -1357,7 +1357,7 @@ glusterd_friend_sm () + cds_list_del_init (&event->list); + event_type = event->event; + +- rcu_read_lock (); ++ RCU_READ_LOCK; + + peerinfo = glusterd_peerinfo_find (event->peerid, + event->peername); +@@ -1368,7 +1368,7 @@ glusterd_friend_sm () + glusterd_friend_sm_event_name_get (event_type)); + + GF_FREE (event); +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + continue; + } + gf_msg_debug ("glusterd", 0, "Dequeued event of type: '%s'", +@@ -1377,7 +1377,7 @@ glusterd_friend_sm () + + old_state = peerinfo->state.state; + +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + /* Giving up read-critical section here as we only need + * the current state to call the handler. + * +@@ -1435,11 +1435,11 @@ glusterd_friend_sm () + /* We need to obtain peerinfo reference once again as we + * had exited the read critical section above. + */ +- rcu_read_lock (); ++ RCU_READ_LOCK; + peerinfo = glusterd_peerinfo_find (event->peerid, + event->peername); + if (!peerinfo) { +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + /* A peer can only be deleted as a effect of + * this state machine, and two such state + * machines can never run at the same time. +@@ -1463,7 +1463,7 @@ glusterd_friend_sm () + } + + ret = glusterd_store_peerinfo (peerinfo); +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + + glusterd_destroy_friend_event_context (event); + GF_FREE (event); +diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot.c b/xlators/mgmt/glusterd/src/glusterd-snapshot.c +index 830a67f..3c362e1 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-snapshot.c ++++ b/xlators/mgmt/glusterd/src/glusterd-snapshot.c +@@ -185,7 +185,7 @@ glusterd_find_missed_snap (dict_t *rsp_dict, glusterd_volinfo_t *vol, + continue; + } + +- rcu_read_lock (); ++ RCU_READ_LOCK; + cds_list_for_each_entry_rcu (peerinfo, peers, uuid_list) { + if (gf_uuid_compare (peerinfo->uuid, brickinfo->uuid)) { + /* If the brick doesnt belong to this peer */ +@@ -210,12 +210,12 @@ glusterd_find_missed_snap (dict_t *rsp_dict, glusterd_volinfo_t *vol, + "info for %s:%s in the " + "rsp_dict", brickinfo->hostname, + brickinfo->path); +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + goto out; + } + } + } +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + brick_count++; + } + +diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c +index b3c4d9a..1db2c7c 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-store.c ++++ b/xlators/mgmt/glusterd/src/glusterd-store.c +@@ -4593,13 +4593,13 @@ glusterd_store_retrieve_peers (xlator_t *this) + + args.mode = GD_MODE_ON; + +- rcu_read_lock (); ++ RCU_READ_LOCK; + cds_list_for_each_entry_rcu (peerinfo, &priv->peers, uuid_list) { + ret = glusterd_friend_rpc_create (this, peerinfo, &args); + if (ret) + break; + } +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + peerinfo = NULL; + + out: +diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c +index 5aaa7f8..9a67d1c 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-syncop.c ++++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c +@@ -52,13 +52,13 @@ gd_collate_errors (struct syncargs *args, int op_ret, int op_errno, + args->op_ret = op_ret; + args->op_errno = op_errno; + +- rcu_read_lock (); ++ RCU_READ_LOCK; + peerinfo = glusterd_peerinfo_find (peerid, NULL); + if (peerinfo) + peer_str = gf_strdup (peerinfo->hostname); + else + peer_str = gf_strdup (uuid_utoa (uuid)); +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + + if (op_errstr && strcmp (op_errstr, "")) { + len = snprintf (err_str, sizeof(err_str) - 1, +@@ -571,7 +571,7 @@ _gd_syncop_mgmt_lock_cbk (struct rpc_req *req, struct iovec *iov, + + gf_uuid_copy (args->uuid, rsp.uuid); + +- rcu_read_lock (); ++ RCU_READ_LOCK; + peerinfo = glusterd_peerinfo_find (*peerid, NULL); + if (peerinfo) { + /* Set peer as locked, so we unlock only the locked peers */ +@@ -584,7 +584,7 @@ _gd_syncop_mgmt_lock_cbk (struct rpc_req *req, struct iovec *iov, + "Could not find peer with " + "ID %s", uuid_utoa (*peerid)); + } +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; +@@ -670,7 +670,7 @@ _gd_syncop_mgmt_unlock_cbk (struct rpc_req *req, struct iovec *iov, + + gf_uuid_copy (args->uuid, rsp.uuid); + +- rcu_read_lock (); ++ RCU_READ_LOCK; + peerinfo = glusterd_peerinfo_find (*peerid, NULL); + if (peerinfo) { + peerinfo->locked = _gf_false; +@@ -680,7 +680,7 @@ _gd_syncop_mgmt_unlock_cbk (struct rpc_req *req, struct iovec *iov, + GD_MSG_PEER_NOT_FOUND, "Could not find peer with " + "ID %s", uuid_utoa (*peerid)); + } +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; +@@ -780,9 +780,9 @@ _gd_syncop_stage_op_cbk (struct rpc_req *req, struct iovec *iov, + } + } + +- rcu_read_lock (); ++ RCU_READ_LOCK; + ret = (glusterd_peerinfo_find (rsp.uuid, NULL) == NULL); +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + if (ret) { + ret = -1; + gf_msg (this->name, GF_LOG_CRITICAL, 0, +@@ -1110,9 +1110,9 @@ _gd_syncop_commit_op_cbk (struct rpc_req *req, struct iovec *iov, + } + } + +- rcu_read_lock (); ++ RCU_READ_LOCK; + ret = (glusterd_peerinfo_find (rsp.uuid, NULL) == 0); +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + if (ret) { + ret = -1; + gf_msg (this->name, GF_LOG_CRITICAL, 0, +@@ -1227,7 +1227,7 @@ gd_lock_op_phase (glusterd_conf_t *conf, glusterd_op_t op, dict_t *op_ctx, + synctask_barrier_init((&args)); + peer_cnt = 0; + +- rcu_read_lock (); ++ RCU_READ_LOCK; + cds_list_for_each_entry_rcu (peerinfo, &conf->peers, uuid_list) { + /* Only send requests to peers who were available before the + * transaction started +@@ -1252,7 +1252,7 @@ gd_lock_op_phase (glusterd_conf_t *conf, glusterd_op_t op, dict_t *op_ctx, + MY_UUID, peer_uuid, txn_id); + peer_cnt++; + } +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + + if (0 == peer_cnt) { + ret = 0; +@@ -1360,7 +1360,7 @@ stage_done: + synctask_barrier_init((&args)); + peer_cnt = 0; + +- rcu_read_lock (); ++ RCU_READ_LOCK; + cds_list_for_each_entry_rcu (peerinfo, &conf->peers, uuid_list) { + /* Only send requests to peers who were available before the + * transaction started +@@ -1379,7 +1379,7 @@ stage_done: + op, req_dict, op_ctx); + peer_cnt++; + } +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + + if (0 == peer_cnt) { + ret = 0; +@@ -1491,7 +1491,7 @@ commit_done: + synctask_barrier_init((&args)); + peer_cnt = 0; + +- rcu_read_lock (); ++ RCU_READ_LOCK; + cds_list_for_each_entry_rcu (peerinfo, &conf->peers, uuid_list) { + /* Only send requests to peers who were available before the + * transaction started +@@ -1510,7 +1510,7 @@ commit_done: + op, req_dict, op_ctx); + peer_cnt++; + } +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + + if (0 == peer_cnt) { + ret = 0; +@@ -1568,7 +1568,7 @@ gd_unlock_op_phase (glusterd_conf_t *conf, glusterd_op_t op, int *op_ret, + peer_cnt = 0; + + if (cluster_lock) { +- rcu_read_lock (); ++ RCU_READ_LOCK; + cds_list_for_each_entry_rcu (peerinfo, &conf->peers, + uuid_list) { + /* Only send requests to peers who were available before +@@ -1590,7 +1590,7 @@ gd_unlock_op_phase (glusterd_conf_t *conf, glusterd_op_t op, int *op_ret, + peer_cnt++; + } + } +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + } else { + + ret = dict_get_int32 (op_ctx, "hold_global_locks", &global); +@@ -1599,7 +1599,7 @@ gd_unlock_op_phase (glusterd_conf_t *conf, glusterd_op_t op, int *op_ret, + else + type = "vol"; + if (volname || global) { +- rcu_read_lock (); ++ RCU_READ_LOCK; + cds_list_for_each_entry_rcu (peerinfo, &conf->peers, + uuid_list) { + /* Only send requests to peers who were +@@ -1620,7 +1620,7 @@ gd_unlock_op_phase (glusterd_conf_t *conf, glusterd_op_t op, int *op_ret, + tmp_uuid, txn_id); + peer_cnt++; + } +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + } + } + +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index d789c53..2290343 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -10934,7 +10934,7 @@ glusterd_volume_rebalance_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict) + node_uuid_str = gf_strdup (node_uuid); + + /* Finding the index of the node-uuid in the peer-list */ +- rcu_read_lock (); ++ RCU_READ_LOCK; + cds_list_for_each_entry_rcu (peerinfo, &conf->peers, + uuid_list) { + peer_uuid_str = gd_peer_uuid_str (peerinfo); +@@ -10943,7 +10943,7 @@ glusterd_volume_rebalance_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict) + + current_index++; + } +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + + /* Setting the largest index value as the total count. */ + ret = dict_get_int32 (ctx_dict, "count", &count); +@@ -13716,7 +13716,7 @@ glusterd_count_connected_peers (int32_t *count) + + *count = 1; + +- rcu_read_lock (); ++ RCU_READ_LOCK; + cds_list_for_each_entry_rcu (peerinfo, &conf->peers, uuid_list) { + /* Find peer who is connected and is a friend */ + if ((peerinfo->connected) && +@@ -13724,7 +13724,7 @@ glusterd_count_connected_peers (int32_t *count) + (*count)++; + } + } +- rcu_read_unlock (); ++ RCU_READ_UNLOCK; + + ret = 0; + out: +diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h +index cbdca52..42c8821 100644 +--- a/xlators/mgmt/glusterd/src/glusterd.h ++++ b/xlators/mgmt/glusterd/src/glusterd.h +@@ -804,6 +804,22 @@ do { \ + *snap_volname_ptr = '\0'; \ + } while (0) + ++#define RCU_READ_LOCK do { \ ++ pthread_mutex_lock(&(THIS->ctx)->cleanup_lock); \ ++ { \ ++ rcu_read_lock(); \ ++ } \ ++ pthread_mutex_unlock(&(THIS->ctx)->cleanup_lock); \ ++ } while (0) ++ ++#define RCU_READ_UNLOCK do { \ ++ pthread_mutex_lock(&(THIS->ctx)->cleanup_lock); \ ++ { \ ++ rcu_read_unlock(); \ ++ } \ ++ pthread_mutex_unlock(&(THIS->ctx)->cleanup_lock); \ ++ } while (0) ++ + #define GLUSTERD_DUMP_PEERS(head, member, xpeers) do { \ + glusterd_peerinfo_t *_peerinfo = NULL; \ + int index = 1; \ +@@ -815,7 +831,7 @@ do { \ + snprintf (key, sizeof (key), \ + "glusterd.xaction_peer"); \ + \ +- rcu_read_lock (); \ ++ RCU_READ_LOCK; \ + cds_list_for_each_entry_rcu (_peerinfo, head, member) { \ + glusterd_dump_peer (_peerinfo, key, index, xpeers); \ + if (!xpeers) \ +@@ -823,7 +839,7 @@ do { \ + index); \ + index++; \ + } \ +- rcu_read_unlock (); \ ++ RCU_READ_UNLOCK; \ + \ + } while (0) + +-- +1.8.3.1 + diff --git a/SOURCES/0478-libglusterfs-fix-memory-corruption-caused-by-per-thr.patch b/SOURCES/0478-libglusterfs-fix-memory-corruption-caused-by-per-thr.patch new file mode 100644 index 0000000..d3a3376 --- /dev/null +++ b/SOURCES/0478-libglusterfs-fix-memory-corruption-caused-by-per-thr.patch @@ -0,0 +1,289 @@ +From a5471a84069631ab0d0605cf7b68f16285f5079f Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez +Date: Fri, 14 Dec 2018 11:26:36 +0100 +Subject: [PATCH 478/493] libglusterfs: fix memory corruption caused by + per-thread mem pools + +There was a race in the per-thread memory pool management that could lead +to memory corruption. The race appeared when the following sequence of +events happened: + +1. Thread T1 allocated a memory object O1 from its own private pool P1 +2. T1 terminates and P1 is marked to be destroyed +3. The mem-sweeper thread is woken up and scans all private pools +4. It detects that P1 needs to be destroyed and starts releasing the + objects from hot and cold lists. +5. Thread T2 releases O1 +6. O1 is added to the hot list of P1 + +The problem happens because steps 4 and 6 are protected by diferent locks, +so they can run concurrently. This means that both T1 and T2 are modifying +the same list at the same time, potentially causing corruption. + +This patch fixes the problem using the following approach: + +1. When an object is released, it's only returned to the hot list of the + corresponding memory pool if it's not marked to be destroyed. Otherwise + the memory is released to the system. +2. Object release and mem-sweeper thread synchronize access to the deletion + mark of the memory pool to prevent simultaneous access to the list. + +Some other minor adjustments are made to reduce the lengths of the locked +regions. + +This patch is not 100% identical to upstream version because changes +coming from https://github.com/gluster/glusterfs/issues/307 are not +backported. + +Upstream patch: https://review.gluster.org/c/glusterfs/+/21583 +> Fixes: bz#1651165 +> Change-Id: I63be3893f92096e57f54a6150e0461340084ddde +> Signed-off-by: Xavi Hernandez + +Upstream patch: https://review.gluster.org/c/glusterfs/+/21727 +> Change-Id: Idbf23bda7f9228d60c644a1bea4b6c2cfc582090 +> updates: bz#1193929 +> Signed-off-by: Xavi Hernandez + +Change-Id: I63be3893f92096e57f54a6150e0461340084ddde +BUG: 1647499 +Signed-off-by: Xavi Hernandez +Reviewed-on: https://code.engineering.redhat.com/gerrit/158658 +Tested-by: RHGS Build Bot +Reviewed-by: Pranith Kumar Karampuri +--- + libglusterfs/src/mem-pool.c | 137 ++++++++++++++++++++++++++------------------ + 1 file changed, 81 insertions(+), 56 deletions(-) + +diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c +index ba29137..8ff261c 100644 +--- a/libglusterfs/src/mem-pool.c ++++ b/libglusterfs/src/mem-pool.c +@@ -411,37 +411,34 @@ static unsigned int init_count = 0; + static pthread_t sweeper_tid; + + +-void ++gf_boolean_t + collect_garbage (sweep_state_t *state, per_thread_pool_list_t *pool_list) + { + unsigned int i; + per_thread_pool_t *pt_pool; +- +- if (pool_list->poison) { +- list_del (&pool_list->thr_list); +- list_add (&pool_list->thr_list, &state->death_row); +- return; +- } +- +- if (state->n_cold_lists >= N_COLD_LISTS) { +- return; +- } ++ gf_boolean_t poisoned; + + (void) pthread_spin_lock (&pool_list->lock); +- for (i = 0; i < NPOOLS; ++i) { +- pt_pool = &pool_list->pools[i]; +- if (pt_pool->cold_list) { +- state->cold_lists[state->n_cold_lists++] +- = pt_pool->cold_list; +- } +- pt_pool->cold_list = pt_pool->hot_list; +- pt_pool->hot_list = NULL; +- if (state->n_cold_lists >= N_COLD_LISTS) { +- /* We'll just catch up on a future pass. */ +- break; ++ ++ poisoned = pool_list->poison != 0; ++ if (!poisoned) { ++ for (i = 0; i < NPOOLS; ++i) { ++ pt_pool = &pool_list->pools[i]; ++ if (pt_pool->cold_list) { ++ if (state->n_cold_lists >= N_COLD_LISTS) { ++ break; ++ } ++ state->cold_lists[state->n_cold_lists++] ++ = pt_pool->cold_list; ++ } ++ pt_pool->cold_list = pt_pool->hot_list; ++ pt_pool->hot_list = NULL; + } + } ++ + (void) pthread_spin_unlock (&pool_list->lock); ++ ++ return poisoned; + } + + +@@ -469,6 +466,7 @@ pool_sweeper (void *arg) + struct timeval begin_time; + struct timeval end_time; + struct timeval elapsed; ++ gf_boolean_t poisoned; + + /* + * This is all a bit inelegant, but the point is to avoid doing +@@ -488,7 +486,13 @@ pool_sweeper (void *arg) + (void) pthread_mutex_lock (&pool_lock); + list_for_each_entry_safe (pool_list, next_pl, + &pool_threads, thr_list) { +- collect_garbage (&state, pool_list); ++ (void) pthread_mutex_unlock (&pool_lock); ++ poisoned = collect_garbage (&state, pool_list); ++ (void) pthread_mutex_lock (&pool_lock); ++ if (poisoned) { ++ list_move(&pool_list->thr_list, ++ &state.death_row); ++ } + } + (void) pthread_mutex_unlock (&pool_lock); + (void) gettimeofday (&end_time, NULL); +@@ -525,7 +529,15 @@ pool_destructor (void *arg) + { + per_thread_pool_list_t *pool_list = arg; + +- /* The pool-sweeper thread will take it from here. */ ++ /* The pool-sweeper thread will take it from here. ++ * ++ * We can change 'poison' here without taking locks because the change ++ * itself doesn't interact with other parts of the code and a simple ++ * write is already atomic from the point of view of the processor. ++ * ++ * This change can modify what mem_put() does, but both possibilities ++ * are fine until the sweeper thread kicks in. The real synchronization ++ * must be between mem_put() and the sweeper thread. */ + pool_list->poison = 1; + } + +@@ -736,7 +748,7 @@ mem_get_pool_list (void) + (void) pthread_mutex_unlock (&pool_free_lock); + + if (!pool_list) { +- pool_list = CALLOC (pool_list_size, 1); ++ pool_list = MALLOC (pool_list_size); + if (!pool_list) { + return NULL; + } +@@ -761,26 +773,47 @@ mem_get_pool_list (void) + } + + pooled_obj_hdr_t * +-mem_get_from_pool (per_thread_pool_t *pt_pool) ++mem_get_from_pool (struct mem_pool *mem_pool) + { ++ per_thread_pool_list_t *pool_list; ++ per_thread_pool_t *pt_pool; + pooled_obj_hdr_t *retval; + ++ pool_list = mem_get_pool_list (); ++ if (!pool_list || pool_list->poison) { ++ return NULL; ++ } ++ ++ pt_pool = &pool_list->pools[mem_pool->power_of_two-POOL_SMALLEST]; ++ ++ (void) pthread_spin_lock (&pool_list->lock); ++ + retval = pt_pool->hot_list; + if (retval) { +- GF_ATOMIC_INC (pt_pool->parent->allocs_hot); + pt_pool->hot_list = retval->next; +- return retval; ++ (void) pthread_spin_unlock (&pool_list->lock); ++ GF_ATOMIC_INC (pt_pool->parent->allocs_hot); ++ } else { ++ retval = pt_pool->cold_list; ++ if (retval) { ++ pt_pool->cold_list = retval->next; ++ (void) pthread_spin_unlock (&pool_list->lock); ++ GF_ATOMIC_INC (pt_pool->parent->allocs_cold); ++ } else { ++ (void) pthread_spin_unlock (&pool_list->lock); ++ GF_ATOMIC_INC (pt_pool->parent->allocs_stdc); ++ retval = malloc (1 << mem_pool->power_of_two); ++ } + } + +- retval = pt_pool->cold_list; + if (retval) { +- GF_ATOMIC_INC (pt_pool->parent->allocs_cold); +- pt_pool->cold_list = retval->next; +- return retval; ++ retval->magic = GF_MEM_HEADER_MAGIC; ++ retval->next = NULL; ++ retval->pool_list = pool_list; ++ retval->power_of_two = mem_pool->power_of_two; + } + +- GF_ATOMIC_INC (pt_pool->parent->allocs_stdc); +- return malloc (1 << pt_pool->parent->power_of_two); ++ return retval; + } + + +@@ -791,8 +824,6 @@ mem_get (struct mem_pool *mem_pool) + return GF_CALLOC (1, AVAILABLE_SIZE (mem_pool->power_of_two), + gf_common_mt_mem_pool); + #else +- per_thread_pool_list_t *pool_list; +- per_thread_pool_t *pt_pool; + pooled_obj_hdr_t *retval; + + if (!mem_pool) { +@@ -801,25 +832,11 @@ mem_get (struct mem_pool *mem_pool) + return NULL; + } + +- pool_list = mem_get_pool_list (); +- if (!pool_list || pool_list->poison) { +- return NULL; +- } +- +- (void) pthread_spin_lock (&pool_list->lock); +- pt_pool = &pool_list->pools[mem_pool->power_of_two-POOL_SMALLEST]; +- retval = mem_get_from_pool (pt_pool); +- (void) pthread_spin_unlock (&pool_list->lock); +- ++ retval = mem_get_from_pool (mem_pool); + if (!retval) { + return NULL; + } + +- retval->magic = GF_MEM_HEADER_MAGIC; +- retval->next = NULL; +- retval->pool_list = pool_list;; +- retval->power_of_two = mem_pool->power_of_two; +- + return retval + 1; + #endif /* GF_DISABLE_MEMPOOL */ + } +@@ -849,12 +866,20 @@ mem_put (void *ptr) + pool_list = hdr->pool_list; + pt_pool = &pool_list->pools[hdr->power_of_two-POOL_SMALLEST]; + +- (void) pthread_spin_lock (&pool_list->lock); + hdr->magic = GF_MEM_INVALID_MAGIC; +- hdr->next = pt_pool->hot_list; +- pt_pool->hot_list = hdr; +- GF_ATOMIC_INC (pt_pool->parent->frees_to_list); +- (void) pthread_spin_unlock (&pool_list->lock); ++ ++ (void) pthread_spin_lock (&pool_list->lock); ++ if (!pool_list->poison) { ++ hdr->next = pt_pool->hot_list; ++ pt_pool->hot_list = hdr; ++ (void) pthread_spin_unlock (&pool_list->lock); ++ GF_ATOMIC_INC (pt_pool->parent->frees_to_list); ++ } else { ++ (void) pthread_spin_unlock (&pool_list->lock); ++ /* If the owner thread of this element has terminated, we ++ * simply release its memory. */ ++ free(hdr); ++ } + #endif /* GF_DISABLE_MEMPOOL */ + } + +-- +1.8.3.1 + diff --git a/SOURCES/0479-ganesha-ha-ensure-pacemaker-is-enabled-after-setup.patch b/SOURCES/0479-ganesha-ha-ensure-pacemaker-is-enabled-after-setup.patch new file mode 100644 index 0000000..a541716 --- /dev/null +++ b/SOURCES/0479-ganesha-ha-ensure-pacemaker-is-enabled-after-setup.patch @@ -0,0 +1,51 @@ +From e42fcda7ca4becd4e14b36c6318ed6c3a3068783 Mon Sep 17 00:00:00 2001 +From: "Kaleb S. KEITHLEY" +Date: Tue, 11 Dec 2018 10:09:42 -0500 +Subject: [PATCH 479/493] ganesha-ha: ensure pacemaker is enabled after setup + +There appears to be a race between `pcs cluster setup ...` early +in the setup and the `systemctl enable pacemaker` at the end. The +`pcs cluster setup ...` disables pacemaker and corosync. (Now, in +pacemaker-1.1.18. Was it always the case?) + +I am not able to reproduce this on my devel system. I speculate that +on a busy system that the `pcs cluster setup ...` disable may, under +the right conditions, not run until after the setup script enables +it. It must require the right alignment of the Sun, Moon, and all +the planets. + +Regardless, we'll use the --enable option to `pcs cluster setup ...` +to ensure that the cluster (re)starts pacemaker. + +Label: DOWNSTREAM ONLY + +Change-Id: I771ff62c37426438b80e61651a8b4ecaf2d549c3 +BUG: 1637564 +Signed-off-by: Kaleb S. KEITHLEY +Reviewed-on: https://code.engineering.redhat.com/gerrit/158294 +Tested-by: RHGS Build Bot +Reviewed-by: Soumya Koduri +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + extras/ganesha/scripts/ganesha-ha.sh | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh +index 5cdafad..5a7f5ae 100644 +--- a/extras/ganesha/scripts/ganesha-ha.sh ++++ b/extras/ganesha/scripts/ganesha-ha.sh +@@ -195,9 +195,9 @@ setup_cluster() + + pcs cluster auth ${servers} + # pcs cluster setup --name ${name} ${servers} +- pcs cluster setup ${RHEL6_PCS_CNAME_OPTION} ${name} --transport udpu ${servers} ++ pcs cluster setup ${RHEL6_PCS_CNAME_OPTION} ${name} --enable --transport udpu ${servers} + if [ $? -ne 0 ]; then +- logger "pcs cluster setup ${RHEL6_PCS_CNAME_OPTION} ${name} ${servers} failed" ++ logger "pcs cluster setup ${RHEL6_PCS_CNAME_OPTION} ${name} --enable --transport udpu ${servers} failed" + #set up failed stop all ganesha process and clean up symlinks in cluster + stop_ganesha_all "${servers}" + exit 1; +-- +1.8.3.1 + diff --git a/SOURCES/0480-geo-rep-Make-slave-volume-read-only-by-default.patch b/SOURCES/0480-geo-rep-Make-slave-volume-read-only-by-default.patch new file mode 100644 index 0000000..017b1bd --- /dev/null +++ b/SOURCES/0480-geo-rep-Make-slave-volume-read-only-by-default.patch @@ -0,0 +1,101 @@ +From ecb5c63d41daf98c3bca73d2ab24aa77e1b34886 Mon Sep 17 00:00:00 2001 +From: Sunny Kumar +Date: Mon, 17 Dec 2018 12:56:33 +0530 +Subject: [PATCH 480/493] geo-rep: Make slave volume read-only (by default) + +Added a command to set "features.read-only" option +to a default value "on" for slave volume. +Changes are made in: +$SRC//extras/hook-scripts/S56glusterd-geo-rep-create-post.sh +for root geo-rep and +$SRC/geo-replication/src/set_geo_rep_pem_keys.sh +for non-root geo-rep. + +Upstream patch : https://review.gluster.org/#/c/glusterfs/+/21739 +>Fixes: bz#1654187 +>Signed-off-by: Harpreet Kaur + +Change-Id: I15beeae3506f3f6b1dcba0a5c50b6344fd468c7c +BUG: 1643370 +Signed-off-by: Sunny Kumar +Reviewed-on: https://code.engineering.redhat.com/gerrit/158790 +Tested-by: RHGS Build Bot +Reviewed-by: Kotresh Hiremath Ravishankar +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + extras/hook-scripts/S56glusterd-geo-rep-create-post.sh | 1 + + geo-replication/src/set_geo_rep_pem_keys.sh | 1 + + tests/geo-rep.rc | 7 +++++++ + tests/geo-rep/georep-basic-dr-rsync.t | 3 +++ + tests/geo-rep/georep-basic-dr-tarssh.t | 3 +++ + 5 files changed, 15 insertions(+) + +diff --git a/extras/hook-scripts/S56glusterd-geo-rep-create-post.sh b/extras/hook-scripts/S56glusterd-geo-rep-create-post.sh +index a5e472e..589c263 100755 +--- a/extras/hook-scripts/S56glusterd-geo-rep-create-post.sh ++++ b/extras/hook-scripts/S56glusterd-geo-rep-create-post.sh +@@ -90,5 +90,6 @@ if [ -f $pub_file ]; then + ssh -p ${SSH_PORT} $slave_ip "mv $pub_file_tmp ${pub_file_dname}/${mastervol}_${slavevol}_${pub_file_bname}" + ssh -p ${SSH_PORT} $slave_ip "gluster system:: copy file /geo-replication/${mastervol}_${slavevol}_common_secret.pem.pub > /dev/null" + ssh -p ${SSH_PORT} $slave_ip "gluster system:: execute add_secret_pub root geo-replication/${mastervol}_${slavevol}_common_secret.pem.pub > /dev/null" ++ ssh -p ${SSH_PORT} $slave_ip "gluster vol set ${slavevol} features.read-only on" + fi + fi +diff --git a/geo-replication/src/set_geo_rep_pem_keys.sh b/geo-replication/src/set_geo_rep_pem_keys.sh +index ae23f4f..8a43fa3 100755 +--- a/geo-replication/src/set_geo_rep_pem_keys.sh ++++ b/geo-replication/src/set_geo_rep_pem_keys.sh +@@ -47,6 +47,7 @@ function main() + cp $home_dir/${COMMON_SECRET_PEM_PUB} ${GLUSTERD_WORKDIR}/geo-replication/ + gluster system:: copy file /geo-replication/${COMMON_SECRET_PEM_PUB} + gluster system:: execute add_secret_pub $user geo-replication/${master_vol}_${slave_vol}_common_secret.pem.pub ++ gluster vol set ${slave_vol} features.read-only on + else + echo "$home_dir/common_secret.pem.pub not present. Please run geo-replication command on master with push-pem option to generate the file" + exit 1; +diff --git a/tests/geo-rep.rc b/tests/geo-rep.rc +index 1a44b4a..f9ab3fc 100644 +--- a/tests/geo-rep.rc ++++ b/tests/geo-rep.rc +@@ -230,3 +230,10 @@ function hardlink_rename_ok() + fi + return 0 + } ++ ++function check_slave_read_only() ++{ ++ volum=$1 ++ gluster volume info $1 | grep 'features.read-only: on' ++ echo $? ++} +diff --git a/tests/geo-rep/georep-basic-dr-rsync.t b/tests/geo-rep/georep-basic-dr-rsync.t +index 39da524..fd0c4aa 100644 +--- a/tests/geo-rep/georep-basic-dr-rsync.t ++++ b/tests/geo-rep/georep-basic-dr-rsync.t +@@ -57,6 +57,9 @@ EXPECT 4 check_status_num_rows "Created" #15 + #Enable_metavolume + TEST $GEOREP_CLI $master $slave config use_meta_volume true + ++#Verify "features.read-only" Option ++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_slave_read_only $GSV0 ++ + #Start_georep + TEST $GEOREP_CLI $master $slave start + +diff --git a/tests/geo-rep/georep-basic-dr-tarssh.t b/tests/geo-rep/georep-basic-dr-tarssh.t +index 5f879db..5331df9 100644 +--- a/tests/geo-rep/georep-basic-dr-tarssh.t ++++ b/tests/geo-rep/georep-basic-dr-tarssh.t +@@ -62,6 +62,9 @@ TEST $GEOREP_CLI $master $slave config use_meta_volume true + #Config tarssh as sync-engine + TEST $GEOREP_CLI $master $slave config use_tarssh true + ++#Verify "features.read-only" Option ++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_slave_read_only $GSV0 ++ + #Start_georep + TEST $GEOREP_CLI $master $slave start + +-- +1.8.3.1 + diff --git a/SOURCES/0481-extras-hooks-Do-not-blindly-remove-volume-share-from.patch b/SOURCES/0481-extras-hooks-Do-not-blindly-remove-volume-share-from.patch new file mode 100644 index 0000000..f055981 --- /dev/null +++ b/SOURCES/0481-extras-hooks-Do-not-blindly-remove-volume-share-from.patch @@ -0,0 +1,102 @@ +From 644bcd954000b77fc8f49e3a7941de23ca869427 Mon Sep 17 00:00:00 2001 +From: Anoop C S +Date: Wed, 21 Mar 2018 13:09:01 +0530 +Subject: [PATCH 481/493] extras/hooks: Do not blindly remove volume share from + smb.conf + +When Gluster volumes are shared via Samba, any extra smb.conf parameter +setting done by administrator to those shares are lost during restart +of the volume. Instead of removing the whole share completely from +smb.conf(via hook scripts during volume stop) it is better to make it +temporarily unavailable to end-users till the volume is started again. +Therefore we make use of a smb.conf parameter named 'available'[1] to +achieve the above intend. + +[1] https://www.samba.org/samba/docs/current/man-html/smb.conf.5.html + +upstream ref: https://review.gluster.org/c/glusterfs/+/19750 + +Change-Id: I68a9055b50791f6ffd3b95a3c13d858a75fa6530 +BUG: 1164778 +Signed-off-by: Anoop C S +Reviewed-on: https://code.engineering.redhat.com/gerrit/158495 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + extras/hook-scripts/set/post/S30samba-set.sh | 11 ++++++----- + extras/hook-scripts/start/post/S30samba-start.sh | 4 +++- + extras/hook-scripts/stop/pre/S30samba-stop.sh | 6 +++--- + 3 files changed, 12 insertions(+), 9 deletions(-) + +diff --git a/extras/hook-scripts/set/post/S30samba-set.sh b/extras/hook-scripts/set/post/S30samba-set.sh +index b93415b..c596db0 100755 +--- a/extras/hook-scripts/set/post/S30samba-set.sh ++++ b/extras/hook-scripts/set/post/S30samba-set.sh +@@ -103,9 +103,9 @@ function sighup_samba () { + fi + } + +-function del_samba_share () { ++function deactivate_samba_share () { + volname=$1 +- sed -i "/\[gluster-$volname\]/,/^$/d" ${CONFIGFILE} ++ sed -i -e '/^\[gluster-'"$volname"'\]/{ :a' -e 'n; /available = no/H; /^$/!{$!ba;}; x; /./!{ s/^/available = no/; $!{G;x}; $H; }; s/.*//; x; };' ${CONFIGFILE} + } + + function is_volume_started () { +@@ -140,12 +140,13 @@ if [ "$USERCIFS_SET" = "YES" ] || [ "$USERSMB_SET" = "YES" ]; then + find_config_info + + if [ "$(get_smb "$VOL")" = "disable" ]; then +- del_samba_share $VOL +- sighup_samba ++ deactivate_samba_share $VOL + else + if ! grep --quiet "\[gluster-$VOL\]" ${CONFIGFILE} ; then + add_samba_share $VOL +- sighup_samba ++ else ++ sed -i '/\[gluster-'"$VOL"'\]/,/^$/!b;/available = no/d' ${CONFIGFILE} + fi + fi ++ sighup_samba + fi +diff --git a/extras/hook-scripts/start/post/S30samba-start.sh b/extras/hook-scripts/start/post/S30samba-start.sh +index 92ddaf4..5d586ee 100755 +--- a/extras/hook-scripts/start/post/S30samba-start.sh ++++ b/extras/hook-scripts/start/post/S30samba-start.sh +@@ -127,5 +127,7 @@ find_config_info + + if ! grep --quiet "\[gluster-$VOL\]" ${CONFIGFILE} ; then + add_samba_share $VOL +- sighup_samba ++else ++ sed -i '/\[gluster-'"$VOL"'\]/,/^$/!b;/available = no/d' ${CONFIGFILE} + fi ++sighup_samba +diff --git a/extras/hook-scripts/stop/pre/S30samba-stop.sh b/extras/hook-scripts/stop/pre/S30samba-stop.sh +index 5e87845..ea79938 100755 +--- a/extras/hook-scripts/stop/pre/S30samba-stop.sh ++++ b/extras/hook-scripts/stop/pre/S30samba-stop.sh +@@ -56,9 +56,9 @@ function find_config_info () { + PIDDIR=`smbd -b | grep PIDDIR | awk '{print $2}'` + } + +-function del_samba_share () { ++function deactivate_samba_share () { + volname=$1 +- sed -i "/\[gluster-$volname\]/,/^$/d" ${CONFIGFILE} ++ sed -i -e '/^\[gluster-'"$volname"'\]/{ :a' -e 'n; /available = no/H; /^$/!{$!ba;}; x; /./!{ s/^/available = no/; $!{G;x}; $H; }; s/.*//; x; };' ${CONFIGFILE} + } + + function sighup_samba () { +@@ -73,5 +73,5 @@ function sighup_samba () { + + parse_args "$@" + find_config_info +-del_samba_share $VOL ++deactivate_samba_share $VOL + sighup_samba +-- +1.8.3.1 + diff --git a/SOURCES/0482-extras-hooks-General-improvements-to-S30samba-start..patch b/SOURCES/0482-extras-hooks-General-improvements-to-S30samba-start..patch new file mode 100644 index 0000000..65e2cf6 --- /dev/null +++ b/SOURCES/0482-extras-hooks-General-improvements-to-S30samba-start..patch @@ -0,0 +1,91 @@ +From 121180edc218432a782a153e94b9f884d4c56a7c Mon Sep 17 00:00:00 2001 +From: Anoop C S +Date: Sun, 9 Sep 2018 11:39:47 +0530 +Subject: [PATCH 482/493] extras/hooks: General improvements to + S30samba-start.sh + +Based on https://review.gluster.org/c/glusterfs/+/19204 from +Milan Zink + +upstream ref: https://review.gluster.org/c/glusterfs/+/21124 + +Change-Id: I61f62407a62475a74a6cc046c24748a31c66e6cd +BUG: 1541568 +Signed-off-by: Anoop C S +Reviewed-on: https://code.engineering.redhat.com/gerrit/158496 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + extras/hook-scripts/start/post/S30samba-start.sh | 26 ++++++++++++------------ + 1 file changed, 13 insertions(+), 13 deletions(-) + +diff --git a/extras/hook-scripts/start/post/S30samba-start.sh b/extras/hook-scripts/start/post/S30samba-start.sh +index 5d586ee..dfd9c1b 100755 +--- a/extras/hook-scripts/start/post/S30samba-start.sh ++++ b/extras/hook-scripts/start/post/S30samba-start.sh +@@ -68,14 +68,14 @@ function parse_args () { + } + + function find_config_info () { +- cmdout=`smbd -b | grep smb.conf` +- if [ $? -ne 0 ];then ++ cmdout=$(smbd -b 2> /dev/null) ++ CONFIGFILE=$(echo "$cmdout" | grep CONFIGFILE | awk '{print $2}') ++ if [ -z "$CONFIGFILE" ]; then + echo "Samba is not installed" + exit 1 + fi +- CONFIGFILE=`echo $cmdout | awk '{print $2}'` +- PIDDIR=`smbd -b | grep PIDDIR | awk '{print $2}'` +- LOGFILEBASE=`smbd -b | grep 'LOGFILEBASE' | awk '{print $2}'` ++ PIDDIR=$(echo "$cmdout" | grep PIDDIR | awk '{print $2}') ++ LOGFILEBASE=$(echo "$cmdout" | grep 'LOGFILEBASE' | awk '{print $2}') + } + + function add_samba_share () { +@@ -89,11 +89,11 @@ function add_samba_share () { + STRING+="path = /\n" + STRING+="read only = no\n" + STRING+="guest ok = yes\n" +- printf "$STRING" >> ${CONFIGFILE} ++ printf "$STRING" >> "${CONFIGFILE}" + } + + function sighup_samba () { +- pid=`cat ${PIDDIR}/smbd.pid` ++ pid=$(cat "${PIDDIR}/smbd.pid" 2> /dev/null) + if [ "x$pid" != "x" ] + then + kill -HUP "$pid"; +@@ -106,12 +106,12 @@ function get_smb () { + volname=$1 + uservalue= + +- usercifsvalue=$(grep user.cifs $GLUSTERD_WORKDIR/vols/"$volname"/info |\ ++ usercifsvalue=$(grep user.cifs "$GLUSTERD_WORKDIR"/vols/"$volname"/info |\ + cut -d"=" -f2) +- usersmbvalue=$(grep user.smb $GLUSTERD_WORKDIR/vols/"$volname"/info |\ ++ usersmbvalue=$(grep user.smb "$GLUSTERD_WORKDIR"/vols/"$volname"/info |\ + cut -d"=" -f2) + +- if [ $usercifsvalue = "disable" ] || [ $usersmbvalue = "disable" ]; then ++ if [ "$usercifsvalue" = "disable" ] || [ "$usersmbvalue" = "disable" ]; then + uservalue="disable" + fi + echo "$uservalue" +@@ -125,9 +125,9 @@ fi + #Find smb.conf, smbd pid directory and smbd logfile path + find_config_info + +-if ! grep --quiet "\[gluster-$VOL\]" ${CONFIGFILE} ; then +- add_samba_share $VOL ++if ! grep --quiet "\[gluster-$VOL\]" "${CONFIGFILE}" ; then ++ add_samba_share "$VOL" + else +- sed -i '/\[gluster-'"$VOL"'\]/,/^$/!b;/available = no/d' ${CONFIGFILE} ++ sed -i '/\[gluster-'"$VOL"'\]/,/^$/!b;/available = no/d' "${CONFIGFILE}" + fi + sighup_samba +-- +1.8.3.1 + diff --git a/SOURCES/0483-Do-not-blindly-add-volume-share-section-to-smb.conf.patch b/SOURCES/0483-Do-not-blindly-add-volume-share-section-to-smb.conf.patch new file mode 100644 index 0000000..60f5d1a --- /dev/null +++ b/SOURCES/0483-Do-not-blindly-add-volume-share-section-to-smb.conf.patch @@ -0,0 +1,86 @@ +From c807ba5a11364d8eb83b86b0e4262a32b6834267 Mon Sep 17 00:00:00 2001 +From: Anoop C S +Date: Thu, 13 Dec 2018 12:05:37 +0530 +Subject: [PATCH 483/493] Do not blindly add volume share section to smb.conf + +With this change, by default GlusterFS volume share section will +no longer be added to smb.conf for client access unless user.cifs +or user.smb volume set options are enabled. This also fixes the +hook script to check for presence of all configuration possibilities +for those volume set options like 'enable' or 'on'. + +upstream ref: https://review.gluster.org/c/glusterfs/+/19204 + +Change-Id: Ibecf7fffb4507d7255d963c3b1482afb0d0db984 +BUG: 1541568 +Signed-off-by: Anoop C S +Reviewed-on: https://code.engineering.redhat.com/gerrit/158497 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + extras/hook-scripts/set/post/S30samba-set.sh | 13 +++++++++++-- + extras/hook-scripts/start/post/S30samba-start.sh | 18 +++++++++++++++--- + 2 files changed, 26 insertions(+), 5 deletions(-) + +diff --git a/extras/hook-scripts/set/post/S30samba-set.sh b/extras/hook-scripts/set/post/S30samba-set.sh +index c596db0..d2a62d3 100755 +--- a/extras/hook-scripts/set/post/S30samba-set.sh ++++ b/extras/hook-scripts/set/post/S30samba-set.sh +@@ -123,9 +123,18 @@ function get_smb () { + usersmbvalue=$(grep user.smb $GLUSTERD_WORKDIR/vols/"$volname"/info |\ + cut -d"=" -f2) + +- if [ $usercifsvalue = "disable" ] || [ $usersmbvalue = "disable" ]; then +- uservalue="disable" ++ if [ -n "$usercifsvalue" ]; then ++ if [ "$usercifsvalue" = "disable" ] || [ "$usercifsvalue" = "off" ]; then ++ uservalue="disable" ++ fi + fi ++ ++ if [ -n "$usersmbvalue" ]; then ++ if [ "$usersmbvalue" = "disable" ] || [ "$usersmbvalue" = "off" ]; then ++ uservalue="disable" ++ fi ++ fi ++ + echo "$uservalue" + } + +diff --git a/extras/hook-scripts/start/post/S30samba-start.sh b/extras/hook-scripts/start/post/S30samba-start.sh +index dfd9c1b..2854bdd 100755 +--- a/extras/hook-scripts/start/post/S30samba-start.sh ++++ b/extras/hook-scripts/start/post/S30samba-start.sh +@@ -111,14 +111,26 @@ function get_smb () { + usersmbvalue=$(grep user.smb "$GLUSTERD_WORKDIR"/vols/"$volname"/info |\ + cut -d"=" -f2) + +- if [ "$usercifsvalue" = "disable" ] || [ "$usersmbvalue" = "disable" ]; then +- uservalue="disable" ++ if [ -n "$usercifsvalue" ]; then ++ if [ "$usercifsvalue" = "enable" ] || [ "$usercifsvalue" = "on" ]; then ++ uservalue="enable" ++ fi + fi ++ ++ if [ -n "$usersmbvalue" ]; then ++ if [ "$usersmbvalue" = "enable" ] || [ "$usersmbvalue" = "on" ]; then ++ uservalue="enable" ++ fi ++ fi ++ + echo "$uservalue" + } + + parse_args "$@" +-if [ "$(get_smb "$VOL")" = "disable" ]; then ++ ++value=$(get_smb "$VOL") ++ ++if [ -z "$value" ] || [ "$value" != "enable" ]; then + exit 0 + fi + +-- +1.8.3.1 + diff --git a/SOURCES/0484-extras-New-group-volume-set-command-for-Samba-integr.patch b/SOURCES/0484-extras-New-group-volume-set-command-for-Samba-integr.patch new file mode 100644 index 0000000..215d0e2 --- /dev/null +++ b/SOURCES/0484-extras-New-group-volume-set-command-for-Samba-integr.patch @@ -0,0 +1,84 @@ +From bf8ca8c73df0a114b9728150934d6b7ecd3cbd6f Mon Sep 17 00:00:00 2001 +From: Anoop C S +Date: Thu, 6 Dec 2018 15:05:20 +0530 +Subject: [PATCH 484/493] extras: New group volume set command for Samba + integration + + # gluster volume set group samba + +List of volume set options from group-samba are aimed at improving the below +workloads which consumes time and network hops in SMB world: + +* Listing of large directories +* Negative lookups during creation of files + +Caching the necessary metadata required for these workloads saves us time and +network hops. On the other side we have to ensure correctness(avoiding stale +cache) in caching(via md-cache) with the help of cache invalidation in an +environment where multiple client access is expected. + +upstream ref: https://review.gluster.org/c/glusterfs/+/21814 + +Change-Id: Icdd2d8e5eb290e12bc509105418c668f432f4eae +BUG: 1655385 +Signed-off-by: Anoop C S +Reviewed-on: https://code.engineering.redhat.com/gerrit/158723 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + extras/Makefile.am | 4 +++- + extras/group-samba | 10 ++++++++++ + glusterfs.spec.in | 1 + + 3 files changed, 14 insertions(+), 1 deletion(-) + create mode 100644 extras/group-samba + +diff --git a/extras/Makefile.am b/extras/Makefile.am +index e0e05b5..f898245 100644 +--- a/extras/Makefile.am ++++ b/extras/Makefile.am +@@ -14,7 +14,7 @@ confdir = $(sysconfdir)/glusterfs + conf_DATA = glusterfs-logrotate gluster-rsyslog-7.2.conf gluster-rsyslog-5.8.conf \ + logger.conf.example glusterfs-georep-logrotate group-virt.example \ + group-metadata-cache group-gluster-block group-nl-cache group-db-workload \ +- group-distributed-virt ++ group-distributed-virt group-samba + + voldir = $(sysconfdir)/glusterfs + vol_DATA = glusterd.vol +@@ -53,3 +53,5 @@ install-data-local: + $(DESTDIR)$(GLUSTERD_WORKDIR)/groups/db-workload + $(INSTALL_DATA) $(top_srcdir)/extras/group-distributed-virt \ + $(DESTDIR)$(GLUSTERD_WORKDIR)/groups/distributed-virt ++ $(INSTALL_DATA) $(top_srcdir)/extras/group-samba \ ++ $(DESTDIR)$(GLUSTERD_WORKDIR)/groups/samba +diff --git a/extras/group-samba b/extras/group-samba +new file mode 100644 +index 0000000..ee39202 +--- /dev/null ++++ b/extras/group-samba +@@ -0,0 +1,10 @@ ++features.cache-invalidation=on ++features.cache-invalidation-timeout=600 ++performance.cache-samba-metadata=on ++performance.stat-prefetch=on ++performance.cache-invalidation=on ++performance.md-cache-timeout=600 ++network.inode-lru-limit=200000 ++performance.nl-cache=on ++performance.nl-cache-timeout=600 ++performance.parallel-readdir=on +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index a4accd9..16dc5d7 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -1524,6 +1524,7 @@ exit 0 + %attr(0644,-,-) %{_sharedstatedir}/glusterd/groups/db-workload + %attr(0644,-,-) %{_sharedstatedir}/glusterd/groups/distributed-virt + %attr(0644,-,-) %{_sharedstatedir}/glusterd/groups/nl-cache ++ %attr(0644,-,-) %{_sharedstatedir}/glusterd/groups/samba + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/glusterfind + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/glusterfind/.keys + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/glustershd +-- +1.8.3.1 + diff --git a/SOURCES/0485-cluster-ec-Prevent-volume-create-without-redundant-b.patch b/SOURCES/0485-cluster-ec-Prevent-volume-create-without-redundant-b.patch new file mode 100644 index 0000000..72e202e --- /dev/null +++ b/SOURCES/0485-cluster-ec-Prevent-volume-create-without-redundant-b.patch @@ -0,0 +1,60 @@ +From f3982cf5bf3250ee29da3ea4477591c298780043 Mon Sep 17 00:00:00 2001 +From: Sunil Kumar Acharya +Date: Mon, 17 Dec 2018 16:37:21 +0530 +Subject: [PATCH 485/493] cluster/ec : Prevent volume create without redundant + brick + +Problem: +EC volumes can be created without any redundant brick. + +Solution: +Updated the conditional check to avoid volume create +without redundant brick. + +>fixes: bz#1642448 +>Change-Id: I0cb334b1b9378d67fcb8abf793dbe312c3179c0b +>Signed-off-by: Sunil Kumar Acharya +Upstream Patch: https://review.gluster.org/#/c/glusterfs/+/21478/ + +BUG: 1597252 +Change-Id: I0cb334b1b9378d67fcb8abf793dbe312c3179c0b +Signed-off-by: Sunil Kumar Acharya +Reviewed-on: https://code.engineering.redhat.com/gerrit/158910 +Tested-by: RHGS Build Bot +--- + cli/src/cli-cmd-parser.c | 6 +++--- + tests/basic/glusterd/disperse-create.t | 1 + + 2 files changed, 4 insertions(+), 3 deletions(-) + +diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c +index 3745fb4..e790d79 100644 +--- a/cli/src/cli-cmd-parser.c ++++ b/cli/src/cli-cmd-parser.c +@@ -275,9 +275,9 @@ cli_cmd_create_disperse_check (struct cli_state *state, int *disperse, + tmp = *disperse - *redundancy; + } + +- if (*redundancy > (*disperse - 1) / 2) { +- cli_err ("redundancy must be less than %d for a " +- "disperse %d volume", ++ if ((*redundancy < 1) || (*redundancy > (*disperse - 1) / 2)) { ++ cli_err ("redundancy must be greater than or equal to 1 and" ++ " less than %d for a disperse %d volume", + (*disperse + 1) / 2, *disperse); + + return -1; +diff --git a/tests/basic/glusterd/disperse-create.t b/tests/basic/glusterd/disperse-create.t +index e5ce74c..5b3ed13 100644 +--- a/tests/basic/glusterd/disperse-create.t ++++ b/tests/basic/glusterd/disperse-create.t +@@ -48,6 +48,7 @@ TEST ! $CLI volume create $V0 redundancy 1 redundancy 1 $H0:$B0/b20 $H0:$B0/b21 + #Minimum counts test + TEST ! $CLI volume create $V0 disperse 2 $H0:$B0/b20 $H0:$B0/b22 + TEST ! $CLI volume create $V0 disperse-data 1 redundancy 0 $H0:$B0/b20 $H0:$B0/b22 ++TEST ! $CLI volume create $V0 disperse 4 disperse-data 4 $H0:$B0/b20 $H0:$B0/b21 $H0:$B0/b23 $H0:$B0/b24 + TEST ! $CLI volume create $V0 redundancy 0 $H0:$B0/b20 $H0:$B0/b22 + + #Wrong count n != k+m +-- +1.8.3.1 + diff --git a/SOURCES/0486-performance-rda-Fixed-dict_t-memory-leak.patch b/SOURCES/0486-performance-rda-Fixed-dict_t-memory-leak.patch new file mode 100644 index 0000000..2c29a2a --- /dev/null +++ b/SOURCES/0486-performance-rda-Fixed-dict_t-memory-leak.patch @@ -0,0 +1,51 @@ +From 64cfb0126d88bd6b841777fefb63e413b587a7b1 Mon Sep 17 00:00:00 2001 +From: N Balachandran +Date: Tue, 18 Dec 2018 14:08:04 +0530 +Subject: [PATCH 486/493] performance/rda: Fixed dict_t memory leak + +Removed all references to dict_t xdata_from_req which is +allocated but not used anywhere. It is also not cleaned up +and hence causes a memory leak. + +upstream patch: https://review.gluster.org/#/c/glusterfs/+/21859/ + +> Change-Id: I2edb857696191e872ad12a12efc36999626bacc7 +> fixes: bz#1659432 +> Signed-off-by: N Balachandran + +Change-Id: Ic07ecbefef3140aeb24a2afaf97c80ee65768e7e +BUG: 1659439 +Signed-off-by: N Balachandran +Reviewed-on: https://code.engineering.redhat.com/gerrit/158915 +Tested-by: RHGS Build Bot +Reviewed-by: Raghavendra Gowdappa +--- + xlators/performance/readdir-ahead/src/readdir-ahead.c | 8 -------- + 1 file changed, 8 deletions(-) + +diff --git a/xlators/performance/readdir-ahead/src/readdir-ahead.c b/xlators/performance/readdir-ahead/src/readdir-ahead.c +index 6501a6b..3e0951c 100644 +--- a/xlators/performance/readdir-ahead/src/readdir-ahead.c ++++ b/xlators/performance/readdir-ahead/src/readdir-ahead.c +@@ -505,18 +505,10 @@ rda_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, + { + int op_errno = 0; + struct rda_local *local = NULL; +- dict_t *xdata_from_req = NULL; + + if (xdata) { +- xdata_from_req = dict_new(); +- if (!xdata_from_req) { +- op_errno = ENOMEM; +- goto unwind; +- } +- + local = mem_get0(this->local_pool); + if (!local) { +- dict_unref(xdata_from_req); + op_errno = ENOMEM; + goto unwind; + } +-- +1.8.3.1 + diff --git a/SOURCES/0487-mem-pool-add-tracking-of-mem_pool-that-requested-the.patch b/SOURCES/0487-mem-pool-add-tracking-of-mem_pool-that-requested-the.patch new file mode 100644 index 0000000..baa6715 --- /dev/null +++ b/SOURCES/0487-mem-pool-add-tracking-of-mem_pool-that-requested-the.patch @@ -0,0 +1,270 @@ +From 3cc901acea41632df0c342639c4292c10bd90964 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Tue, 18 Dec 2018 15:39:14 +0530 +Subject: [PATCH 487/493] mem-pool: add tracking of mem_pool that requested the + allocation + +This renames the current 'struct mem_pool' to 'struct mem_pool_shared'. +The mem_pool_shared is globally allocated and not specific for +particular objects. + +A new 'struct mem_pool' gets allocated when mem_pool_new() is called. It +points to the mem_pool_shared that handles the actual allocation +requests. The 'struct mem_pool' is only used for accounting of the +objects that the caller requested and free'd. + +All of these changes will be used to collect all the memory pools a +glusterfs_ctx_t is consuming, so that statedumps can be collected per +context. + +> Updates: #307 +> Change-Id: I6355d3f0251c928e0bbfc71be3431307c6f3a3da +> Signed-off-by: Niels de Vos +> Reviewed-on: https://review.gluster.org/18073 +> Smoke: Gluster Build System +> CentOS-regression: Gluster Build System +> Reviewed-by: Amar Tumballi +> Reviewed-by: Jeff Darcy +> Cherry picked from commit 2645e730b79b44fc035170657e43bb52f3e855c5 + +Change-Id: I6cce6284e4553c6ca59a90ad124c23c950db3148 +BUG: 1648893 +Signed-off-by: Mohit Agrawal + +Signed-off-by: Mohit Agrawal +Change-Id: I363d71152b1dd17eca53d9c327fcdf2f26c0fb61 +Reviewed-on: https://code.engineering.redhat.com/gerrit/158930 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/mem-pool.c | 69 +++++++++++++++++++++++++++----------------- + libglusterfs/src/mem-pool.h | 20 +++++++++++-- + libglusterfs/src/mem-types.h | 2 -- + 3 files changed, 60 insertions(+), 31 deletions(-) + +diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c +index 8ff261c..a8a9347 100644 +--- a/libglusterfs/src/mem-pool.c ++++ b/libglusterfs/src/mem-pool.c +@@ -14,15 +14,6 @@ + #include + #include + +-#define GF_MEM_POOL_LIST_BOUNDARY (sizeof(struct list_head)) +-#define GF_MEM_POOL_PTR (sizeof(struct mem_pool*)) +-#define GF_MEM_POOL_PAD_BOUNDARY (GF_MEM_POOL_LIST_BOUNDARY + GF_MEM_POOL_PTR + sizeof(int)) +-#define mem_pool_chunkhead2ptr(head) ((head) + GF_MEM_POOL_PAD_BOUNDARY) +-#define mem_pool_ptr2chunkhead(ptr) ((ptr) - GF_MEM_POOL_PAD_BOUNDARY) +-#define is_mem_chunk_in_use(ptr) (*ptr == 1) +-#define mem_pool_from_ptr(ptr) ((ptr) + GF_MEM_POOL_LIST_BOUNDARY) +- +-#define GLUSTERFS_ENV_MEM_ACCT_STR "GLUSTERFS_DISABLE_MEM_ACCT" + + #include "unittest/unittest.h" + #include "libglusterfs-messages.h" +@@ -380,7 +371,7 @@ static pthread_mutex_t pool_lock = PTHREAD_MUTEX_INITIALIZER; + static struct list_head pool_threads; + static pthread_mutex_t pool_free_lock = PTHREAD_MUTEX_INITIALIZER; + static struct list_head pool_free_threads; +-static struct mem_pool pools[NPOOLS]; ++static struct mem_pool_shared pools[NPOOLS]; + static size_t pool_list_size; + + #if !defined(GF_DISABLE_MEMPOOL) +@@ -689,6 +680,8 @@ mem_pool_new_fn (unsigned long sizeof_type, + unsigned long count, char *name) + { + unsigned int i; ++ struct mem_pool *new = NULL; ++ struct mem_pool_shared *pool = NULL; + + if (!sizeof_type) { + gf_msg_callingfn ("mem-pool", GF_LOG_ERROR, EINVAL, +@@ -698,13 +691,27 @@ mem_pool_new_fn (unsigned long sizeof_type, + + for (i = 0; i < NPOOLS; ++i) { + if (sizeof_type <= AVAILABLE_SIZE(pools[i].power_of_two)) { +- return &pools[i]; ++ pool = &pools[i]; ++ break; + } + } + +- gf_msg_callingfn ("mem-pool", GF_LOG_ERROR, EINVAL, +- LG_MSG_INVALID_ARG, "invalid argument"); +- return NULL; ++ if (!pool) { ++ gf_msg_callingfn ("mem-pool", GF_LOG_ERROR, EINVAL, ++ LG_MSG_INVALID_ARG, "invalid argument"); ++ return NULL; ++ } ++ ++ new = GF_CALLOC (sizeof (struct mem_pool), 1, gf_common_mt_mem_pool); ++ if (!new) ++ return NULL; ++ ++ new->sizeof_type = sizeof_type; ++ new->count = count; ++ new->name = name; ++ new->pool = pool; ++ ++ return new; + } + + void* +@@ -721,7 +728,7 @@ mem_get0 (struct mem_pool *mem_pool) + ptr = mem_get(mem_pool); + + if (ptr) { +- memset (ptr, 0, AVAILABLE_SIZE(mem_pool->power_of_two)); ++ memset (ptr, 0, AVAILABLE_SIZE(mem_pool->pool->power_of_two)); + } + + return ptr; +@@ -784,7 +791,7 @@ mem_get_from_pool (struct mem_pool *mem_pool) + return NULL; + } + +- pt_pool = &pool_list->pools[mem_pool->power_of_two-POOL_SMALLEST]; ++ pt_pool = &pool_list->pools[mem_pool->pool->power_of_two-POOL_SMALLEST]; + + (void) pthread_spin_lock (&pool_list->lock); + +@@ -802,7 +809,7 @@ mem_get_from_pool (struct mem_pool *mem_pool) + } else { + (void) pthread_spin_unlock (&pool_list->lock); + GF_ATOMIC_INC (pt_pool->parent->allocs_stdc); +- retval = malloc (1 << mem_pool->power_of_two); ++ retval = malloc (1 << mem_pool->pool->power_of_two); + } + } + +@@ -810,7 +817,7 @@ mem_get_from_pool (struct mem_pool *mem_pool) + retval->magic = GF_MEM_HEADER_MAGIC; + retval->next = NULL; + retval->pool_list = pool_list; +- retval->power_of_two = mem_pool->power_of_two; ++ retval->power_of_two = mem_pool->pool->power_of_two; + } + + return retval; +@@ -821,9 +828,10 @@ void * + mem_get (struct mem_pool *mem_pool) + { + #if defined(GF_DISABLE_MEMPOOL) +- return GF_CALLOC (1, AVAILABLE_SIZE (mem_pool->power_of_two), ++ return GF_CALLOC (1, AVAILABLE_SIZE (mem_pool->pool->power_of_two), + gf_common_mt_mem_pool); + #else ++ per_thread_pool_list_t *pool_list; + pooled_obj_hdr_t *retval; + + if (!mem_pool) { +@@ -832,11 +840,22 @@ mem_get (struct mem_pool *mem_pool) + return NULL; + } + ++ pool_list = mem_get_pool_list (); ++ if (!pool_list || pool_list->poison) { ++ return NULL; ++ } ++ + retval = mem_get_from_pool (mem_pool); ++ + if (!retval) { + return NULL; + } + ++ retval->magic = GF_MEM_HEADER_MAGIC; ++ retval->pool = mem_pool; ++ retval->pool_list = pool_list; ++ retval->power_of_two = mem_pool->pool->power_of_two; ++ + return retval + 1; + #endif /* GF_DISABLE_MEMPOOL */ + } +@@ -886,14 +905,12 @@ mem_put (void *ptr) + void + mem_pool_destroy (struct mem_pool *pool) + { +- if (!pool) +- return; ++ GF_FREE (pool); + + /* +- * Pools are now permanent, so this does nothing. Yes, this means we +- * can keep allocating from a pool after calling mem_destroy on it, but +- * that's kind of OK. All of the objects *in* the pool will eventually +- * be freed via the pool-sweeper thread, and this way we don't have to +- * add a lot of reference-counting complexity. ++ * Pools are now permanent, so the mem_pool->pool is kept around. All ++ * of the objects *in* the pool will eventually be freed via the ++ * pool-sweeper thread, and this way we don't have to add a lot of ++ * reference-counting complexity. + */ + } +diff --git a/libglusterfs/src/mem-pool.h b/libglusterfs/src/mem-pool.h +index dfe1f9a..057d957 100644 +--- a/libglusterfs/src/mem-pool.h ++++ b/libglusterfs/src/mem-pool.h +@@ -204,18 +204,31 @@ out: + return dup_mem; + } + ++/* kind of 'header' for the actual mem_pool_shared structure, this might make ++ * it possible to dump some more details in a statedump */ ++struct mem_pool { ++ unsigned long sizeof_type; ++ unsigned long count; ++ char *name; ++ ++ struct mem_pool_shared *pool; ++}; ++ + typedef struct pooled_obj_hdr { + unsigned long magic; + struct pooled_obj_hdr *next; + struct per_thread_pool_list *pool_list; + unsigned int power_of_two; ++ ++ /* track the pool that was used to request this object */ ++ struct mem_pool *pool; + } pooled_obj_hdr_t; + + #define AVAILABLE_SIZE(p2) ((1 << (p2)) - sizeof(pooled_obj_hdr_t)) + + typedef struct per_thread_pool { +- /* This never changes, so doesn't need a lock. */ +- struct mem_pool *parent; ++ /* the pool that was used to request this allocation */ ++ struct mem_pool_shared *parent; + /* Everything else is protected by our own lock. */ + pooled_obj_hdr_t *hot_list; + pooled_obj_hdr_t *cold_list; +@@ -243,7 +256,8 @@ typedef struct per_thread_pool_list { + per_thread_pool_t pools[1]; + } per_thread_pool_list_t; + +-struct mem_pool { ++/* actual pool structure, shared between different mem_pools */ ++struct mem_pool_shared { + unsigned int power_of_two; + /* + * Updates to these are *not* protected by a global lock, so races +diff --git a/libglusterfs/src/mem-types.h b/libglusterfs/src/mem-types.h +index 85cb5d2..64d0e90 100644 +--- a/libglusterfs/src/mem-types.h ++++ b/libglusterfs/src/mem-types.h +@@ -61,9 +61,7 @@ enum gf_common_mem_types_ { + gf_common_mt_char, + gf_common_mt_rbthash_table_t, + gf_common_mt_rbthash_bucket, +-#if defined(GF_DISABLE_MEMPOOL) + gf_common_mt_mem_pool, +-#endif + gf_common_mt_long, + gf_common_mt_rpcsvc_auth_list, + gf_common_mt_rpcsvc_t, +-- +1.8.3.1 + diff --git a/SOURCES/0488-cluster-afr-Allow-lookup-on-root-if-it-is-from-ADD_R.patch b/SOURCES/0488-cluster-afr-Allow-lookup-on-root-if-it-is-from-ADD_R.patch new file mode 100644 index 0000000..9e8c6a3 --- /dev/null +++ b/SOURCES/0488-cluster-afr-Allow-lookup-on-root-if-it-is-from-ADD_R.patch @@ -0,0 +1,459 @@ +From f398b6b9705f1b75d17d965ffcd72157d5be3daf Mon Sep 17 00:00:00 2001 +From: karthik-us +Date: Tue, 18 Dec 2018 16:04:42 +0530 +Subject: [PATCH 488/493] cluster/afr: Allow lookup on root if it is from + ADD_REPLICA_MOUNT + +Problem: When trying to convert a plain distribute volume to replica-3 +or arbiter type it is failing with ENOTCONN error as the lookup on +the root will fail as there is no quorum. + +Fix: Allow lookup on root if it is coming from the ADD_REPLICA_MOUNT +which is used while adding bricks to a volume. It will try to set the +pending xattrs for the newly added bricks to allow the heal to happen +in the right direction and avoid data loss scenarios. + +Note: This fix will solve the problem of type conversion only in the +case where the volume was mounted at least once. The conversion of +non mounted volumes will still fail since the dht selfheal tries to +set the directory layout will fail as they do that with the PID +GF_CLIENT_PID_NO_ROOT_SQUASH set in the frame->root. + +Backport of: https://review.gluster.org/#/c/glusterfs/+/21791/ + +Change-Id: Ie31d429dfebbfb0f60610c9c5739595c54b19c46 +BUG: 1645480 +Signed-off-by: karthik-us +Reviewed-on: https://code.engineering.redhat.com/gerrit/158932 +Tested-by: RHGS Build Bot +Reviewed-by: Ravishankar Narayanankutty +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/common-utils.h | 3 +- + ...g-1655854-support-dist-to-rep3-arb-conversion.t | 95 ++++++++++++++++++++++ + xlators/cluster/afr/src/afr-common.c | 80 +++++++++++++----- + xlators/cluster/afr/src/afr-inode-write.c | 2 +- + xlators/cluster/afr/src/afr-read-txn.c | 3 +- + xlators/cluster/afr/src/afr-transaction.c | 11 ++- + xlators/cluster/afr/src/afr-transaction.h | 3 +- + xlators/cluster/afr/src/afr.c | 2 +- + xlators/cluster/afr/src/afr.h | 4 + + xlators/mgmt/glusterd/src/glusterd-utils.c | 2 +- + 10 files changed, 175 insertions(+), 30 deletions(-) + create mode 100644 tests/bugs/replicate/bug-1655854-support-dist-to-rep3-arb-conversion.t + +diff --git a/libglusterfs/src/common-utils.h b/libglusterfs/src/common-utils.h +index c804ed5..50c1f9a 100644 +--- a/libglusterfs/src/common-utils.h ++++ b/libglusterfs/src/common-utils.h +@@ -162,7 +162,8 @@ enum _gf_special_pid + GF_CLIENT_PID_BITD = -8, + GF_CLIENT_PID_SCRUB = -9, + GF_CLIENT_PID_TIER_DEFRAG = -10, +- GF_SERVER_PID_TRASH = -11 ++ GF_SERVER_PID_TRASH = -11, ++ GF_CLIENT_PID_ADD_REPLICA_MOUNT = -12 + }; + + enum _gf_xlator_ipc_targets { +diff --git a/tests/bugs/replicate/bug-1655854-support-dist-to-rep3-arb-conversion.t b/tests/bugs/replicate/bug-1655854-support-dist-to-rep3-arb-conversion.t +new file mode 100644 +index 0000000..783016d +--- /dev/null ++++ b/tests/bugs/replicate/bug-1655854-support-dist-to-rep3-arb-conversion.t +@@ -0,0 +1,95 @@ ++#!/bin/bash ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++cleanup; ++ ++TEST glusterd ++TEST pidof glusterd ++ ++# Conversion from 2x1 to 2x3 ++ ++TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1} ++EXPECT 'Created' volinfo_field $V0 'Status'; ++TEST $CLI volume start $V0 ++EXPECT 'Started' volinfo_field $V0 'Status'; ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 ++ ++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; ++TEST mkdir $M0/dir ++TEST dd if=/dev/urandom of=$M0/dir/file bs=100K count=5 ++file_md5sum=$(md5sum $M0/dir/file | awk '{print $1}') ++ ++TEST $CLI volume add-brick $V0 replica 3 $H0:$B0/${V0}{2..5} ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}3 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}4 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}5 ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 3 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 4 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 5 ++ ++# Trigger heal and wait for for it to complete ++TEST $CLI volume heal $V0 ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 ++ ++# Check whether the directory & file are healed to the newly added bricks ++TEST ls $B0/${V0}2/dir ++TEST ls $B0/${V0}3/dir ++TEST ls $B0/${V0}4/dir ++TEST ls $B0/${V0}5/dir ++ ++TEST [ $file_md5sum == $(md5sum $B0/${V0}4/dir/file | awk '{print $1}') ] ++TEST [ $file_md5sum == $(md5sum $B0/${V0}5/dir/file | awk '{print $1}') ] ++ ++ ++# Conversion from 2x1 to 2x(2+1) ++ ++TEST $CLI volume create $V1 $H0:$B0/${V1}{0,1} ++EXPECT 'Created' volinfo_field $V1 'Status'; ++TEST $CLI volume start $V1 ++EXPECT 'Started' volinfo_field $V1 'Status'; ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V1 $H0 $B0/${V1}0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V1 $H0 $B0/${V1}1 ++ ++TEST $GFS --volfile-id=$V1 --volfile-server=$H0 $M1; ++TEST mkdir $M1/dir ++TEST dd if=/dev/urandom of=$M1/dir/file bs=100K count=5 ++file_md5sum=$(md5sum $M1/dir/file | awk '{print $1}') ++ ++TEST $CLI volume add-brick $V1 replica 3 arbiter 1 $H0:$B0/${V1}{2..5} ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V1 $H0 $B0/${V1}2 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V1 $H0 $B0/${V1}3 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V1 $H0 $B0/${V1}4 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V1 $H0 $B0/${V1}5 ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V1 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V1 1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V1 2 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V1 3 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V1 4 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V1 5 ++ ++# Trigger heal and wait for for it to complete ++TEST $CLI volume heal $V1 ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V1 ++ ++# Check whether the directory & file are healed to the newly added bricks ++TEST ls $B0/${V1}2/dir ++TEST ls $B0/${V1}3/dir ++TEST ls $B0/${V1}4/dir ++TEST ls $B0/${V1}5/dir ++ ++EXPECT "0" stat -c %s $B0/${V1}5/dir/file ++TEST [ $file_md5sum == $(md5sum $B0/${V1}4/dir/file | awk '{print $1}') ] ++ ++cleanup; +diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c +index 231de9d..322dfbe 100644 +--- a/xlators/cluster/afr/src/afr-common.c ++++ b/xlators/cluster/afr/src/afr-common.c +@@ -2265,7 +2265,7 @@ afr_attempt_readsubvol_set (call_frame_t *frame, xlator_t *this, + } else if (!priv->quorum_count) { + *read_subvol = afr_first_up_child (frame, this); + } else if (priv->quorum_count && +- afr_has_quorum (data_readable, this)) { ++ afr_has_quorum (data_readable, this, NULL)) { + /* read_subvol is guaranteed to be valid if we hit this path. */ + *read_subvol = afr_first_up_child (frame, this); + } else { +@@ -2405,7 +2405,7 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this) + read_subvol = -1; + memset (readable, 0, sizeof (*readable) * priv->child_count); + if (can_interpret) { +- if (!afr_has_quorum (success_replies, this)) ++ if (!afr_has_quorum (success_replies, this, NULL)) + goto cant_interpret; + /* It is safe to call afr_replies_interpret() because we have + a response from all the UP subvolumes and all of them resolved +@@ -2887,7 +2887,7 @@ afr_lookup_entry_heal (call_frame_t *frame, xlator_t *this) + if (name_state_mismatch) { + if (!priv->quorum_count) + goto name_heal; +- if (!afr_has_quorum (success, this)) ++ if (!afr_has_quorum (success, this, NULL)) + goto name_heal; + if (op_errno) + goto name_heal; +@@ -2979,7 +2979,6 @@ afr_discover_done (call_frame_t *frame, xlator_t *this) + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + int i = -1; +- int op_errno = 0; + int read_subvol = -1; + unsigned char *data_readable = NULL; + unsigned char *success_replies = NULL; +@@ -2998,15 +2997,13 @@ afr_discover_done (call_frame_t *frame, xlator_t *this) + } + } + +- op_errno = afr_final_errno (frame->local, this->private); +- + if (local->op_ret < 0) { +- AFR_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, +- NULL, NULL); +- return; +- } ++ local->op_ret = -1; ++ local->op_errno = afr_final_errno (frame->local, this->private); ++ goto error; ++ } + +- if (!afr_has_quorum (success_replies, this)) ++ if (!afr_has_quorum (success_replies, this, frame)) + goto unwind; + + afr_replies_interpret (frame, this, local->inode, NULL); +@@ -3017,11 +3014,8 @@ afr_discover_done (call_frame_t *frame, xlator_t *this) + unwind: + afr_attempt_readsubvol_set (frame, this, success_replies, data_readable, + &read_subvol); +- if (read_subvol == -1) { +- AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno, +- NULL, NULL, NULL, NULL); +- return; +- } ++ if (read_subvol == -1) ++ goto error; + + if (AFR_IS_ARBITER_BRICK (priv, read_subvol) && local->op_ret == 0) { + local->op_ret = -1; +@@ -3034,6 +3028,11 @@ unwind: + local->inode, &local->replies[read_subvol].poststat, + local->replies[read_subvol].xdata, + &local->replies[read_subvol].postparent); ++ return; ++ ++error: ++ AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno, NULL, ++ NULL, NULL, NULL); + } + + +@@ -3977,7 +3976,8 @@ afr_fop_lock_done (call_frame_t *frame, xlator_t *this) + + if (afr_is_conflicting_lock_present (local->op_ret, local->op_errno)) { + afr_unlock_locks_and_proceed (frame, this, lock_count); +- } else if (priv->quorum_count && !afr_has_quorum (success, this)) { ++ } else if (priv->quorum_count && ++ !afr_has_quorum (success, this, NULL)) { + local->fop_lock_state = AFR_FOP_LOCK_QUORUM_FAILED; + local->op_ret = -1; + local->op_errno = afr_final_errno (local, priv); +@@ -4485,7 +4485,7 @@ afr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + &local->cont.lk.user_flock, + local->xdata_req); + } else if (priv->quorum_count && +- !afr_has_quorum (local->cont.lk.locked_nodes, this)) { ++ !afr_has_quorum (local->cont.lk.locked_nodes, this, NULL)) { + local->op_ret = -1; + local->op_errno = afr_final_errno (local, priv); + +@@ -5199,7 +5199,7 @@ afr_notify (xlator_t *this, int32_t event, + } + + had_quorum = priv->quorum_count && afr_has_quorum (priv->child_up, +- this); ++ this, NULL); + if (priv->halo_enabled) { + halo_max_latency_msec = afr_get_halo_latency (this); + +@@ -5328,7 +5328,7 @@ afr_notify (xlator_t *this, int32_t event, + UNLOCK (&priv->lock); + + if (priv->quorum_count) { +- has_quorum = afr_has_quorum (priv->child_up, this); ++ has_quorum = afr_has_quorum (priv->child_up, this, NULL); + if (!had_quorum && has_quorum) { + gf_msg (this->name, GF_LOG_INFO, 0, AFR_MSG_QUORUM_MET, + "Client-quorum is met"); +@@ -6543,3 +6543,43 @@ afr_set_inode_local (xlator_t *this, afr_local_t *local, inode_t *inode) + } + return ret; + } ++ ++gf_boolean_t ++afr_is_add_replica_mount_lookup_on_root (call_frame_t *frame) ++{ ++ afr_local_t *local = NULL; ++ ++ local = frame->local; ++ ++ if (frame->root->pid != GF_CLIENT_PID_ADD_REPLICA_MOUNT) ++ return _gf_false; ++ ++ if (local->op != GF_FOP_LOOKUP) ++ /* TODO:If the replica count is being increased on a plain ++ * distribute volume that was never mounted, we need to allow ++ * setxattr on '/' with GF_CLIENT_PID_NO_ROOT_SQUASH to ++ * accomodate for DHT layout setting */ ++ return _gf_false; ++ ++ if (local->inode == NULL) ++ return _gf_false; ++ ++ if (!__is_root_gfid (local->inode->gfid)) ++ return _gf_false; ++ ++ return _gf_true; ++} ++ ++gf_boolean_t ++afr_lookup_has_quorum (call_frame_t *frame, xlator_t *this, ++ unsigned char *subvols) ++{ ++ afr_private_t *priv = this->private; ++ ++ if (frame && afr_is_add_replica_mount_lookup_on_root (frame)) { ++ if (AFR_COUNT (subvols, priv->child_count) > 0) ++ return _gf_true; ++ } ++ ++ return _gf_false; ++} +diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c +index 8b1dcfd..ea4755a 100644 +--- a/xlators/cluster/afr/src/afr-inode-write.c ++++ b/xlators/cluster/afr/src/afr-inode-write.c +@@ -1539,7 +1539,7 @@ afr_handle_empty_brick (xlator_t *this, call_frame_t *frame, loc_t *loc, + if (ret && ab_ret) + goto out; + +- if (frame->root->pid != GF_CLIENT_PID_SELF_HEALD) { ++ if (frame->root->pid != GF_CLIENT_PID_ADD_REPLICA_MOUNT) { + gf_msg (this->name, GF_LOG_ERROR, EPERM, + afr_get_msg_id (op_type), + "'%s' is an internal extended attribute.", +diff --git a/xlators/cluster/afr/src/afr-read-txn.c b/xlators/cluster/afr/src/afr-read-txn.c +index f6c491b..ec322ae 100644 +--- a/xlators/cluster/afr/src/afr-read-txn.c ++++ b/xlators/cluster/afr/src/afr-read-txn.c +@@ -193,7 +193,8 @@ afr_read_txn (call_frame_t *frame, xlator_t *this, inode_t *inode, + local->inode = inode_ref (inode); + local->is_read_txn = _gf_true; + +- if (priv->quorum_count && !afr_has_quorum (local->child_up, this)) { ++ if (priv->quorum_count && ++ !afr_has_quorum (local->child_up, this, NULL)) { + local->op_ret = -1; + local->op_errno = ENOTCONN; + read_subvol = -1; +diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c +index 0a67a83..0e58e02 100644 +--- a/xlators/cluster/afr/src/afr-transaction.c ++++ b/xlators/cluster/afr/src/afr-transaction.c +@@ -160,7 +160,7 @@ afr_changelog_has_quorum (afr_local_t *local, xlator_t *this) + } + } + +- if (afr_has_quorum (success_children, this)) { ++ if (afr_has_quorum (success_children, this, NULL)) { + return _gf_true; + } + +@@ -690,7 +690,7 @@ afr_handle_symmetric_errors (call_frame_t *frame, xlator_t *this) + } + + gf_boolean_t +-afr_has_quorum (unsigned char *subvols, xlator_t *this) ++afr_has_quorum (unsigned char *subvols, xlator_t *this, call_frame_t *frame) + { + unsigned int quorum_count = 0; + afr_private_t *priv = NULL; +@@ -699,6 +699,9 @@ afr_has_quorum (unsigned char *subvols, xlator_t *this) + priv = this->private; + up_children_count = AFR_COUNT (subvols, priv->child_count); + ++ if (afr_lookup_has_quorum (frame, this, subvols)) ++ return _gf_true; ++ + if (priv->quorum_count == AFR_QUORUM_AUTO) { + /* + * Special case for auto-quorum with an even number of nodes. +@@ -753,7 +756,7 @@ afr_has_fop_quorum (call_frame_t *frame) + + locked_nodes = afr_locked_nodes_get (local->transaction.type, + &local->internal_lock); +- return afr_has_quorum (locked_nodes, this); ++ return afr_has_quorum (locked_nodes, this, NULL); + } + + static gf_boolean_t +@@ -771,7 +774,7 @@ afr_has_fop_cbk_quorum (call_frame_t *frame) + success[i] = 1; + } + +- return afr_has_quorum (success, this); ++ return afr_has_quorum (success, this, NULL); + } + + gf_boolean_t +diff --git a/xlators/cluster/afr/src/afr-transaction.h b/xlators/cluster/afr/src/afr-transaction.h +index a27e9a3..1c42c66 100644 +--- a/xlators/cluster/afr/src/afr-transaction.h ++++ b/xlators/cluster/afr/src/afr-transaction.h +@@ -38,7 +38,8 @@ int afr_read_txn (call_frame_t *frame, xlator_t *this, inode_t *inode, + int afr_read_txn_continue (call_frame_t *frame, xlator_t *this, int subvol); + + call_frame_t *afr_transaction_detach_fop_frame (call_frame_t *frame); +-gf_boolean_t afr_has_quorum (unsigned char *subvols, xlator_t *this); ++gf_boolean_t afr_has_quorum (unsigned char *subvols, xlator_t *this, ++ call_frame_t *frame); + gf_boolean_t afr_needs_changelog_update (afr_local_t *local); + void afr_zero_fill_stat (afr_local_t *local); + +diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c +index 1b738c0..d5347e9 100644 +--- a/xlators/cluster/afr/src/afr.c ++++ b/xlators/cluster/afr/src/afr.c +@@ -244,7 +244,7 @@ reconfigure (xlator_t *this, dict_t *options) + GF_OPTION_RECONF ("quorum-count", priv->quorum_count, options, + uint32, out); + fix_quorum_options (this, priv, qtype, options); +- if (priv->quorum_count && !afr_has_quorum (priv->child_up, this)) ++ if (priv->quorum_count && !afr_has_quorum (priv->child_up, this, NULL)) + gf_msg (this->name, GF_LOG_WARNING, 0, AFR_MSG_QUORUM_FAIL, + "Client-quorum is not met"); + +diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h +index 7010e9b..febc509 100644 +--- a/xlators/cluster/afr/src/afr.h ++++ b/xlators/cluster/afr/src/afr.h +@@ -1227,4 +1227,8 @@ afr_is_symmetric_error (call_frame_t *frame, xlator_t *this); + + gf_boolean_t + afr_is_pending_set (xlator_t *this, dict_t *xdata, int type); ++ ++gf_boolean_t ++afr_lookup_has_quorum (call_frame_t *frame, xlator_t *this, ++ unsigned char *subvols); + #endif /* __AFR_H__ */ +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index 2290343..6468ecb 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -13788,7 +13788,7 @@ glusterd_handle_replicate_brick_ops (glusterd_volinfo_t *volinfo, + goto out; + } + +- ret = gf_asprintf (&pid, "%d", GF_CLIENT_PID_SELF_HEALD); ++ ret = gf_asprintf (&pid, "%d", GF_CLIENT_PID_ADD_REPLICA_MOUNT); + if (ret < 0) + goto out; + +-- +1.8.3.1 + diff --git a/SOURCES/0489-cluster-afr-Do-not-update-read_subvol-in-inode_ctx-a.patch b/SOURCES/0489-cluster-afr-Do-not-update-read_subvol-in-inode_ctx-a.patch new file mode 100644 index 0000000..b2b7cb6 --- /dev/null +++ b/SOURCES/0489-cluster-afr-Do-not-update-read_subvol-in-inode_ctx-a.patch @@ -0,0 +1,91 @@ +From adeeec1fbe7241d18903ae3830a1fabb1061be21 Mon Sep 17 00:00:00 2001 +From: karthik-us +Date: Mon, 17 Dec 2018 14:28:07 +0530 +Subject: [PATCH 489/493] cluster/afr: Do not update read_subvol in inode_ctx + after rename/link fop + +Since rename/link fops on a file will not change any data in it, it should +not update the read_subvol values in the inode_ctx, which interprets the +data & metadata readable subvols for that file. The old read_subvol values +should be retained even after the rename/link operations. + +Backport of: https://review.gluster.org/#/c/glusterfs/+/21841 + +Change-Id: I22787938b3f8295a48162ab4f498e4010d66a1ab +BUG: 1645480 +Signed-off-by: karthik-us +Reviewed-on: https://code.engineering.redhat.com/gerrit/158803 +Tested-by: RHGS Build Bot +Tested-by: Ravishankar Narayanankutty +Reviewed-by: Ravishankar Narayanankutty +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + ...7783-do-not-update-read-subvol-on-rename-link.t | 40 ++++++++++++++++++++++ + xlators/cluster/afr/src/afr-dir-write.c | 4 ++- + 2 files changed, 43 insertions(+), 1 deletion(-) + create mode 100644 tests/bugs/replicate/bug-1657783-do-not-update-read-subvol-on-rename-link.t + +diff --git a/tests/bugs/replicate/bug-1657783-do-not-update-read-subvol-on-rename-link.t b/tests/bugs/replicate/bug-1657783-do-not-update-read-subvol-on-rename-link.t +new file mode 100644 +index 0000000..b180f0e +--- /dev/null ++++ b/tests/bugs/replicate/bug-1657783-do-not-update-read-subvol-on-rename-link.t +@@ -0,0 +1,40 @@ ++#!/bin/bash ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++cleanup; ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0..2} ++TEST $CLI volume set $V0 self-heal-daemon off ++TEST $CLI volume set $V0 cluster.data-self-heal off ++TEST $CLI volume set $V0 cluster.metadata-self-heal off ++TEST $CLI volume set $V0 cluster.entry-self-heal off ++TEST $CLI volume set $V0 performance.write-behind off ++ ++TEST $CLI volume start $V0 ++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; ++TEST mkdir $M0/dir ++TEST "echo abc > $M0/file1" ++TEST "echo uvw > $M0/file2" ++ ++TEST kill_brick $V0 $H0 $B0/${V0}0 ++TEST "echo def > $M0/file1" ++TEST "echo xyz > $M0/file2" ++ ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++ ++TEST kill_brick $V0 $H0 $B0/${V0}1 ++ ++# Rename file1 and read it. Read should be served from the 3rd brick ++TEST mv $M0/file1 $M0/file3 ++EXPECT "def" cat $M0/file3 ++ ++# Create a link to file2 and read it. Read should be served from the 3rd brick ++TEST ln $M0/file2 $M0/dir/file4 ++EXPECT "xyz" cat $M0/dir/file4 ++EXPECT "xyz" cat $M0/file2 ++ ++cleanup +diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c +index 75889de..c508034 100644 +--- a/xlators/cluster/afr/src/afr-dir-write.c ++++ b/xlators/cluster/afr/src/afr-dir-write.c +@@ -99,7 +99,9 @@ __afr_dir_write_finalize (call_frame_t *frame, xlator_t *this) + } + + if (local->inode) { +- afr_replies_interpret (frame, this, local->inode, NULL); ++ if (local->op != GF_FOP_RENAME && local->op != GF_FOP_LINK) ++ afr_replies_interpret (frame, this, local->inode, NULL); ++ + inode_read_subvol = afr_data_subvol_get (local->inode, this, + NULL, NULL, NULL, &args); + } +-- +1.8.3.1 + diff --git a/SOURCES/0490-glusterd-migrating-rebalance-commands-to-mgmt_v3-fra.patch b/SOURCES/0490-glusterd-migrating-rebalance-commands-to-mgmt_v3-fra.patch new file mode 100644 index 0000000..1ce1b73 --- /dev/null +++ b/SOURCES/0490-glusterd-migrating-rebalance-commands-to-mgmt_v3-fra.patch @@ -0,0 +1,921 @@ +From 783c36e573a9c937422e63af038bb35648483b9e Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde +Date: Fri, 30 Nov 2018 16:16:55 +0530 +Subject: [PATCH 490/493] glusterd: migrating rebalance commands to mgmt_v3 + framework + +Current rebalance commands use the op_state machine framework. +Porting it to use the mgmt_v3 framework. + +> Change-Id: I6faf4a6335c2e2f3d54bbde79908a7749e4613e7 +> fixes: bz#1655827 +> Signed-off-by: Sanju Rakonde + +upstream patch: https://review.gluster.org/#/c/glusterfs/+/21762/ + +Change-Id: I6faf4a6335c2e2f3d54bbde79908a7749e4613e7 +BUG: 1652466 +Signed-off-by: Sanju Rakonde +Reviewed-on: https://code.engineering.redhat.com/gerrit/158917 +Reviewed-by: Atin Mukherjee +Tested-by: RHGS Build Bot +--- + libglusterfs/src/globals.h | 2 + + xlators/mgmt/glusterd/src/glusterd-handler.c | 4 +- + xlators/mgmt/glusterd/src/glusterd-mgmt.c | 128 ++++++- + xlators/mgmt/glusterd/src/glusterd-mgmt.h | 5 +- + xlators/mgmt/glusterd/src/glusterd-op-sm.h | 3 + + xlators/mgmt/glusterd/src/glusterd-rebalance.c | 495 ++++++++++++++++++++++++- + xlators/mgmt/glusterd/src/glusterd-syncop.c | 9 + + xlators/mgmt/glusterd/src/glusterd-utils.c | 4 +- + xlators/mgmt/glusterd/src/glusterd.h | 9 + + 9 files changed, 637 insertions(+), 22 deletions(-) + +diff --git a/libglusterfs/src/globals.h b/libglusterfs/src/globals.h +index 343263c..5e3b180 100644 +--- a/libglusterfs/src/globals.h ++++ b/libglusterfs/src/globals.h +@@ -111,6 +111,8 @@ + + #define GD_OP_VERSION_3_13_3 31303 /* Op-version for GlusterFS 3.13.3 */ + ++#define GD_OP_VERSION_6_0 60000 /* Op-version for GlusterFS 6.0 */ ++ + /* Downstream only change */ + #define GD_OP_VERSION_3_11_2 31102 /* Op-version for RHGS 3.3.1-async */ + #define GD_OP_VERSION_3_13_3 31303 /* Op-version for RHGS-3.4-Batch Update-1*/ +diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c +index d40de89..d8e3335 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-handler.c ++++ b/xlators/mgmt/glusterd/src/glusterd-handler.c +@@ -3120,7 +3120,9 @@ __glusterd_handle_cli_profile_volume (rpcsvc_request_t *req) + glusterd_friend_sm(); + glusterd_op_sm(); + } else { +- ret = glusterd_mgmt_v3_initiate_profile_phases(req, cli_op, dict); ++ ret = glusterd_mgmt_v3_initiate_all_phases_with_brickop_phase(req, ++ cli_op, ++ dict); + } + + out: +diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-mgmt.c +index d98c6bc..ef8a2d9 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-mgmt.c ++++ b/xlators/mgmt/glusterd/src/glusterd-mgmt.c +@@ -224,6 +224,16 @@ gd_mgmt_v3_pre_validate_fn (glusterd_op_t op, dict_t *dict, + } + break; + ++ case GD_OP_REBALANCE: ++ case GD_OP_DEFRAG_BRICK_VOLUME: ++ ret = glusterd_mgmt_v3_op_stage_rebalance(dict, op_errstr); ++ if (ret) { ++ gf_log(this->name, GF_LOG_WARNING, ++ "Rebalance Prevalidate Failed"); ++ goto out; ++ } ++ break; ++ + case GD_OP_MAX_OPVERSION: + ret = 0; + break; +@@ -264,6 +274,8 @@ gd_mgmt_v3_brick_op_fn (glusterd_op_t op, dict_t *dict, + break; + } + case GD_OP_PROFILE_VOLUME: ++ case GD_OP_REBALANCE: ++ case GD_OP_DEFRAG_BRICK_VOLUME: + { + ret = gd_brick_op_phase(op, rsp_dict, dict, op_errstr); + if (ret) { +@@ -438,6 +450,19 @@ gd_mgmt_v3_commit_fn (glusterd_op_t op, dict_t *dict, + } + break; + } ++ case GD_OP_REBALANCE: ++ case GD_OP_DEFRAG_BRICK_VOLUME: ++ { ++ ret = glusterd_mgmt_v3_op_rebalance(dict, op_errstr, ++ rsp_dict); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ GD_MSG_COMMIT_OP_FAIL, ++ "Rebalance Commit Failed"); ++ goto out; ++ } ++ break; ++ } + + default: + break; +@@ -880,6 +905,8 @@ glusterd_pre_validate_aggr_rsp_dict (glusterd_op_t op, + case GD_OP_TIER_START_STOP: + case GD_OP_REMOVE_TIER_BRICK: + case GD_OP_PROFILE_VOLUME: ++ case GD_OP_DEFRAG_BRICK_VOLUME: ++ case GD_OP_REBALANCE: + break; + case GD_OP_MAX_OPVERSION: + break; +@@ -1197,6 +1224,7 @@ glusterd_mgmt_v3_build_payload (dict_t **req, char **op_errstr, dict_t *dict, + break; + case GD_OP_START_VOLUME: + case GD_OP_ADD_BRICK: ++ case GD_OP_DEFRAG_BRICK_VOLUME: + case GD_OP_REPLACE_BRICK: + case GD_OP_RESET_BRICK: + case GD_OP_ADD_TIER_BRICK: +@@ -1221,6 +1249,30 @@ glusterd_mgmt_v3_build_payload (dict_t **req, char **op_errstr, dict_t *dict, + dict_copy (dict, req_dict); + } + break; ++ ++ case GD_OP_REBALANCE: { ++ if (gd_set_commit_hash(dict) != 0) { ++ ret = -1; ++ goto out; ++ } ++ ret = dict_get_str (dict, "volname", &volname); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_CRITICAL, errno, ++ GD_MSG_DICT_GET_FAILED, ++ "volname is not present in " ++ "operation ctx"); ++ goto out; ++ } ++ ++ if (strcasecmp(volname, "all")) { ++ ret = glusterd_dict_set_volid(dict, volname, op_errstr); ++ if (ret) ++ goto out; ++ } ++ dict_copy(dict, req_dict); ++ } ++ break; ++ + case GD_OP_TIER_START_STOP: + case GD_OP_REMOVE_TIER_BRICK: + case GD_OP_DETACH_TIER_STATUS: +@@ -1247,6 +1299,7 @@ gd_mgmt_v3_brick_op_cbk_fn (struct rpc_req *req, struct iovec *iov, + call_frame_t *frame = NULL; + int32_t op_ret = -1; + int32_t op_errno = -1; ++ dict_t *rsp_dict = NULL; + xlator_t *this = NULL; + uuid_t *peerid = NULL; + +@@ -1278,10 +1331,45 @@ gd_mgmt_v3_brick_op_cbk_fn (struct rpc_req *req, struct iovec *iov, + if (ret < 0) + goto out; + ++ if (rsp.dict.dict_len) { ++ /* Unserialize the dictionary */ ++ rsp_dict = dict_new (); ++ ++ ret = dict_unserialize (rsp.dict.dict_val, ++ rsp.dict.dict_len, ++ &rsp_dict); ++ if (ret < 0) { ++ free (rsp.dict.dict_val); ++ goto out; ++ } else { ++ rsp_dict->extra_stdfree = rsp.dict.dict_val; ++ } ++ } ++ + gf_uuid_copy (args->uuid, rsp.uuid); ++ pthread_mutex_lock (&args->lock_dict); ++ { ++ if (rsp.op == GD_OP_DEFRAG_BRICK_VOLUME) ++ ret = glusterd_syncop_aggr_rsp_dict (rsp.op, args->dict, ++ rsp_dict); ++ } ++ pthread_mutex_unlock (&args->lock_dict); + +- op_ret = rsp.op_ret; +- op_errno = rsp.op_errno; ++ if (ret) { ++ gf_msg (this->name, GF_LOG_ERROR, 0, ++ GD_MSG_RESP_AGGR_FAIL, "%s", ++ "Failed to aggregate response from " ++ " node/brick"); ++ if (!rsp.op_ret) ++ op_ret = ret; ++ else { ++ op_ret = rsp.op_ret; ++ op_errno = rsp.op_errno; ++ } ++ } else { ++ op_ret = rsp.op_ret; ++ op_errno = rsp.op_errno; ++ } + + out: + gd_mgmt_v3_collate_errors (args, op_ret, op_errno, rsp.op_errstr, +@@ -1353,11 +1441,12 @@ out: + } + + int +-glusterd_mgmt_v3_brick_op (glusterd_op_t op, dict_t *rsp_dict, dict_t *req_dict, ++glusterd_mgmt_v3_brick_op (glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, + char **op_errstr, uint32_t txn_generation) + { + int32_t ret = -1; + int32_t peer_cnt = 0; ++ dict_t *rsp_dict = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + struct syncargs args = {0}; + uuid_t peer_uuid = {0}; +@@ -1372,6 +1461,13 @@ glusterd_mgmt_v3_brick_op (glusterd_op_t op, dict_t *rsp_dict, dict_t *req_dict, + GF_ASSERT (req_dict); + GF_ASSERT (op_errstr); + ++ rsp_dict = dict_new(); ++ if (!rsp_dict) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, ++ "Failed to create response dictionary"); ++ goto out; ++ } ++ + /* Perform brick op on local node */ + ret = gd_mgmt_v3_brick_op_fn (op, req_dict, op_errstr, + rsp_dict); +@@ -1395,9 +1491,21 @@ glusterd_mgmt_v3_brick_op (glusterd_op_t op, dict_t *rsp_dict, dict_t *req_dict, + } + goto out; + } ++ if (op == GD_OP_DEFRAG_BRICK_VOLUME || op == GD_OP_PROFILE_VOLUME) { ++ ret = glusterd_syncop_aggr_rsp_dict(op, op_ctx, rsp_dict); ++ if (ret) { ++ gf_log(this->name, GF_LOG_ERROR, "%s", ++ "Failed to aggregate response from " ++ " node/brick"); ++ goto out; ++ } ++ } ++ ++ dict_unref(rsp_dict); ++ rsp_dict = NULL; + + /* Sending brick op req to other nodes in the cluster */ +- gd_syncargs_init (&args, rsp_dict); ++ gd_syncargs_init (&args, op_ctx); + synctask_barrier_init((&args)); + peer_cnt = 0; + +@@ -1616,6 +1724,13 @@ glusterd_mgmt_v3_commit (glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, + GF_ASSERT (op_errstr); + GF_VALIDATE_OR_GOTO (this->name, op_errno, out); + ++ if (op == GD_OP_REBALANCE || op == GD_OP_DEFRAG_BRICK_VOLUME) { ++ ret = glusterd_set_rebalance_id_in_rsp_dict(req_dict, op_ctx); ++ if (ret) { ++ gf_log(this->name, GF_LOG_WARNING, ++ "Failed to set rebalance id in dict."); ++ } ++ } + rsp_dict = dict_new (); + if (!rsp_dict) { + gf_msg (this->name, GF_LOG_ERROR, 0, +@@ -2140,8 +2255,9 @@ out: + } + + int32_t +-glusterd_mgmt_v3_initiate_profile_phases (rpcsvc_request_t *req, +- glusterd_op_t op, dict_t *dict) ++glusterd_mgmt_v3_initiate_all_phases_with_brickop_phase (rpcsvc_request_t *req, ++ glusterd_op_t op, ++ dict_t *dict) + { + int32_t ret = -1; + int32_t op_ret = -1; +diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt.h b/xlators/mgmt/glusterd/src/glusterd-mgmt.h +index eff070d..ef0fe10 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-mgmt.h ++++ b/xlators/mgmt/glusterd/src/glusterd-mgmt.h +@@ -37,8 +37,9 @@ glusterd_mgmt_v3_initiate_all_phases (rpcsvc_request_t *req, glusterd_op_t op, + dict_t *dict); + + int32_t +-glusterd_mgmt_v3_initiate_profile_phases(rpcsvc_request_t *req, +- glusterd_op_t op, dict_t *dict); ++glusterd_mgmt_v3_initiate_all_phases_with_brickop_phase(rpcsvc_request_t *req, ++ glusterd_op_t op, ++ dict_t *dict); + + int32_t + glusterd_mgmt_v3_initiate_snap_phases(rpcsvc_request_t *req, glusterd_op_t op, +diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.h b/xlators/mgmt/glusterd/src/glusterd-op-sm.h +index e64d368..cf1e61c 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.h ++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.h +@@ -318,4 +318,7 @@ glusterd_op_stats_volume (dict_t *dict, char **op_errstr, dict_t *rsp_dict); + + int + glusterd_op_stage_stats_volume (dict_t *dict, char **op_errstr); ++ ++int ++gd_set_commit_hash(dict_t *dict); + #endif +diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c +index 5ab828c..7ba5f65 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c ++++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c +@@ -23,6 +23,7 @@ + #include "glusterd.h" + #include "glusterd-sm.h" + #include "glusterd-op-sm.h" ++#include "glusterd-mgmt.h" + #include "glusterd-utils.h" + #include "glusterd-messages.h" + #include "glusterd-store.h" +@@ -501,6 +502,7 @@ __glusterd_handle_defrag_volume (rpcsvc_request_t *req) + int32_t ret = -1; + gf_cli_req cli_req = {{0,}}; + glusterd_conf_t *priv = NULL; ++ int32_t op = GD_OP_NONE; + dict_t *dict = NULL; + char *volname = NULL; + gf_cli_defrag_type cmd = 0; +@@ -563,17 +565,25 @@ __glusterd_handle_defrag_volume (rpcsvc_request_t *req) + (cmd == GF_DEFRAG_CMD_STOP_DETACH_TIER) || + (cmd == GF_DEFRAG_CMD_STOP) || + (cmd == GF_DEFRAG_CMD_DETACH_STATUS)) { +- ret = glusterd_op_begin (req, GD_OP_DEFRAG_BRICK_VOLUME, +- dict, msg, sizeof (msg)); ++ op = GD_OP_DEFRAG_BRICK_VOLUME; + } else +- ret = glusterd_op_begin (req, GD_OP_REBALANCE, dict, +- msg, sizeof (msg)); +- ++ op = GD_OP_REBALANCE; ++ ++ if (priv->op_version < GD_OP_VERSION_6_0) { ++ gf_msg_debug(this->name, 0, ++ "The cluster is operating at " ++ "version less than %d. Falling back " ++ "to op-sm framework.", ++ GD_OP_VERSION_6_0); ++ ret = glusterd_op_begin(req, op, dict, msg, sizeof(msg)); ++ glusterd_friend_sm(); ++ glusterd_op_sm(); ++ } else { ++ ret = glusterd_mgmt_v3_initiate_all_phases_with_brickop_phase(req, ++ op, ++ dict); ++ } + out: +- +- glusterd_friend_sm (); +- glusterd_op_sm (); +- + if (ret) { + if (msg[0] == '\0') + snprintf (msg, sizeof (msg), "Operation failed"); +@@ -583,8 +593,8 @@ out: + } + + free (cli_req.dict.dict_val);//malloced by xdr +- +- return 0; ++ gf_msg_debug(this->name, 0, "Returning %d", ret); ++ return ret; + } + + int +@@ -628,6 +638,469 @@ glusterd_brick_validation (dict_t *dict, char *key, data_t *value, + } + + int ++glusterd_set_rebalance_id_in_rsp_dict(dict_t *req_dict, dict_t *rsp_dict) ++{ ++ int ret = -1; ++ int32_t cmd = 0; ++ char *volname = NULL; ++ glusterd_volinfo_t *volinfo = NULL; ++ char msg[2048] = {0}; ++ char *task_id_str = NULL; ++ xlator_t *this = NULL; ++ ++ this = THIS; ++ GF_ASSERT(this); ++ ++ GF_ASSERT(rsp_dict); ++ GF_ASSERT(req_dict); ++ ++ ret = dict_get_str(rsp_dict, "volname", &volname); ++ if (ret) { ++ gf_msg_debug(this->name, 0, "volname not found"); ++ goto out; ++ } ++ ++ ret = dict_get_int32(rsp_dict, "rebalance-command", &cmd); ++ if (ret) { ++ gf_msg_debug(this->name, 0, "cmd not found"); ++ goto out; ++ } ++ ++ ret = glusterd_rebalance_cmd_validate(cmd, volname, &volinfo, msg, ++ sizeof(msg)); ++ if (ret) { ++ gf_msg_debug(this->name, 0, "failed to validate"); ++ goto out; ++ } ++ ++ /* reblance id is generted in glusterd_mgmt_v3_op_stage_rebalance(), but ++ * rsp_dict is unavailable there. So copying it to rsp_dict from req_dict ++ * here. So that cli can display the rebalance id.*/ ++ if ((cmd == GF_DEFRAG_CMD_START) || ++ (cmd == GF_DEFRAG_CMD_START_LAYOUT_FIX) || ++ (cmd == GF_DEFRAG_CMD_START_FORCE) || ++ (cmd == GF_DEFRAG_CMD_START_TIER)) { ++ if (is_origin_glusterd(rsp_dict)) { ++ ret = dict_get_str(req_dict, GF_REBALANCE_TID_KEY, ++ &task_id_str); ++ if (ret) { ++ snprintf(msg, sizeof(msg), "Missing rebalance-id"); ++ gf_msg(this->name, GF_LOG_WARNING, 0, ++ GD_MSG_REBALANCE_ID_MISSING, "%s", msg); ++ ret = 0; ++ } else { ++ gf_uuid_parse(task_id_str, ++ volinfo->rebal.rebalance_id); ++ ret = glusterd_copy_uuid_to_dict( ++ volinfo->rebal.rebalance_id, rsp_dict, ++ GF_REBALANCE_TID_KEY); ++ if (ret) { ++ snprintf(msg, sizeof(msg), ++ "Failed to set rebalance id for volume %s", ++ volname); ++ gf_msg(this->name, GF_LOG_WARNING, 0, ++ GD_MSG_DICT_SET_FAILED, "%s", ++ msg); ++ } ++ } ++ } ++ } ++ ++ /* Set task-id, if available, in rsp_dict for operations other than ++ * start. This is needed when we want rebalance id in xml output ++ */ ++ if (cmd == GF_DEFRAG_CMD_STATUS || cmd == GF_DEFRAG_CMD_STOP || ++ cmd == GF_DEFRAG_CMD_STATUS_TIER) { ++ if (!gf_uuid_is_null(volinfo->rebal.rebalance_id)) { ++ if (GD_OP_REMOVE_BRICK == volinfo->rebal.op) ++ ret = glusterd_copy_uuid_to_dict( ++ volinfo->rebal.rebalance_id, rsp_dict, ++ GF_REMOVE_BRICK_TID_KEY); ++ else ++ ret = glusterd_copy_uuid_to_dict( ++ volinfo->rebal.rebalance_id, ++ rsp_dict, GF_REBALANCE_TID_KEY); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ GD_MSG_DICT_SET_FAILED, ++ "Failed to set task-id for volume %s", ++ volname); ++ goto out; ++ } ++ } ++ } ++out: ++ return ret; ++} ++ ++int ++glusterd_mgmt_v3_op_stage_rebalance(dict_t *dict, char **op_errstr) ++{ ++ char *volname = NULL; ++ char *cmd_str = NULL; ++ int ret = 0; ++ int32_t cmd = 0; ++ char msg[2048] = {0}; ++ glusterd_volinfo_t *volinfo = NULL; ++ char *task_id_str = NULL; ++ xlator_t *this = 0; ++ int32_t is_force = 0; ++ ++ this = THIS; ++ GF_ASSERT(this); ++ ++ ret = dict_get_str (dict, "volname", &volname); ++ if (ret) { ++ gf_msg_debug(this->name, 0, "volname not found"); ++ goto out; ++ } ++ ++ ret = dict_get_int32 (dict, "rebalance-command", &cmd); ++ if (ret) { ++ gf_msg_debug(this->name, 0, "cmd not found"); ++ goto out; ++ } ++ ++ ret = glusterd_rebalance_cmd_validate(cmd, volname, &volinfo, msg, ++ sizeof(msg)); ++ if (ret) { ++ gf_msg_debug(this->name, 0, "failed to validate"); ++ goto out; ++ } ++ switch (cmd) { ++ case GF_DEFRAG_CMD_START_TIER: ++ ret = dict_get_int32 (dict, "force", &is_force); ++ if (ret) ++ is_force = 0; ++ ++ if (volinfo->type != GF_CLUSTER_TYPE_TIER) { ++ gf_asprintf(op_errstr, ++ "volume %s is not a tier " ++ "volume.", ++ volinfo->volname); ++ ret = -1; ++ goto out; ++ } ++ if ((!is_force) && glusterd_is_tier_daemon_running(volinfo)) { ++ ret = gf_asprintf(op_errstr, ++ "A Tier daemon is " ++ "already running on volume %s", ++ volname); ++ ret = -1; ++ goto out; ++ } ++ /* Fall through */ ++ case GF_DEFRAG_CMD_START: ++ case GF_DEFRAG_CMD_START_LAYOUT_FIX: ++ /* Check if the connected clients are all of version ++ * glusterfs-3.6 and higher. This is needed to prevent some data ++ * loss issues that could occur when older clients are connected ++ * when rebalance is run. This check can be bypassed by using ++ * 'force' ++ */ ++ ret = glusterd_check_client_op_version_support(volname, ++ GD_OP_VERSION_RHS_3_0, ++ NULL); ++ if (ret) { ++ ret = gf_asprintf(op_errstr, ++ "Volume %s has one or " ++ "more connected clients of a version" ++ " lower than GlusterFS-v3.6.0. " ++ "Starting rebalance in this state " ++ "could lead to data loss.\nPlease " ++ "disconnect those clients before " ++ "attempting this command again.", ++ volname); ++ goto out; ++ } ++ /* Fall through */ ++ case GF_DEFRAG_CMD_START_FORCE: ++ if (is_origin_glusterd(dict)) { ++ ret = glusterd_generate_and_set_task_id(dict, ++ GF_REBALANCE_TID_KEY); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TASKID_GEN_FAIL, ++ "Failed to generate task-id"); ++ goto out; ++ } ++ } else { ++ ret = dict_get_str(dict, GF_REBALANCE_TID_KEY, &task_id_str); ++ if (ret) { ++ snprintf(msg, sizeof(msg), "Missing rebalance-id"); ++ gf_msg(this->name, GF_LOG_WARNING, 0, ++ GD_MSG_REBALANCE_ID_MISSING, "%s", msg); ++ ret = 0; ++ } ++ } ++ ret = glusterd_defrag_start_validate(volinfo, msg, sizeof(msg), ++ GD_OP_REBALANCE); ++ if (ret) { ++ gf_msg_debug(this->name, 0, ++ "defrag start validate " ++ "failed for volume %s.", ++ volinfo->volname); ++ goto out; ++ } ++ break; ++ case GF_DEFRAG_CMD_STATUS_TIER: ++ case GF_DEFRAG_CMD_STATUS: ++ case GF_DEFRAG_CMD_STOP: ++ ++ ret = dict_get_str(dict, "cmd-str", &cmd_str); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, ++ "Failed to get " ++ "command string"); ++ ret = -1; ++ goto out; ++ } ++ if ((strstr(cmd_str, "rebalance") != NULL) && ++ (volinfo->rebal.op != GD_OP_REBALANCE)) { ++ snprintf(msg, sizeof(msg), ++ "Rebalance not started " ++ "for volume %s.", volinfo->volname); ++ ret = -1; ++ goto out; ++ } ++ ++ if (strstr(cmd_str, "remove-brick") != NULL) { ++ if (volinfo->rebal.op != GD_OP_REMOVE_BRICK) { ++ snprintf(msg, sizeof(msg), ++ "remove-brick not " ++ "started for volume %s.", ++ volinfo->volname); ++ ret = -1; ++ goto out; ++ } ++ ++ /* For remove-brick status/stop command check whether ++ * given input brick is part of volume or not.*/ ++ ++ ret = dict_foreach_fnmatch(dict, "brick*", ++ glusterd_brick_validation, volinfo); ++ if (ret == -1) { ++ snprintf(msg, sizeof(msg), ++ "Incorrect brick for volume %s", ++ volinfo->volname); ++ goto out; ++ } ++ } ++ if (cmd == GF_DEFRAG_CMD_STATUS_TIER) { ++ if (volinfo->type != GF_CLUSTER_TYPE_TIER) { ++ snprintf(msg, sizeof(msg), ++ "volume %s is not " ++ "a tier volume.", ++ volinfo->volname); ++ ret = -1; ++ goto out; ++ } ++ } ++ ++ break; ++ ++ case GF_DEFRAG_CMD_STOP_DETACH_TIER: ++ case GF_DEFRAG_CMD_DETACH_STATUS: ++ if (volinfo->type != GF_CLUSTER_TYPE_TIER) { ++ snprintf(msg, sizeof(msg), ++ "volume %s is not " ++ "a tier volume.", ++ volinfo->volname); ++ ret = -1; ++ goto out; ++ } ++ ++ if (volinfo->rebal.op != GD_OP_REMOVE_BRICK) { ++ snprintf(msg, sizeof(msg), ++ "Detach-tier " ++ "not started"); ++ ret = -1; ++ goto out; ++ } ++ break; ++ default: ++ break; ++ } ++ ++ ret = 0; ++out: ++ if (ret && op_errstr && msg[0]) ++ *op_errstr = gf_strdup(msg); ++ ++ return ret; ++} ++ ++int ++glusterd_mgmt_v3_op_rebalance(dict_t *dict, char **op_errstr, dict_t *rsp_dict) ++{ ++ char *volname = NULL; ++ int ret = 0; ++ int32_t cmd = 0; ++ char msg[2048] = {0}; ++ glusterd_volinfo_t *volinfo = NULL; ++ glusterd_brickinfo_t *brickinfo = NULL; ++ glusterd_brickinfo_t *tmp = NULL; ++ gf_boolean_t volfile_update = _gf_false; ++ char *task_id_str = NULL; ++ xlator_t *this = NULL; ++ uint32_t commit_hash; ++ int32_t is_force = 0; ++ ++ this = THIS; ++ GF_ASSERT(this); ++ ++ ret = dict_get_str(dict, "volname", &volname); ++ if (ret) { ++ gf_msg_debug(this->name, 0, "volname not given"); ++ goto out; ++ } ++ ++ ret = dict_get_int32(dict, "rebalance-command", &cmd); ++ if (ret) { ++ gf_msg_debug(this->name, 0, "command not given"); ++ goto out; ++ } ++ ++ ret = glusterd_rebalance_cmd_validate(cmd, volname, &volinfo, msg, ++ sizeof(msg)); ++ if (ret) { ++ gf_msg_debug(this->name, 0, "cmd validate failed"); ++ goto out; ++ } ++ ++ switch (cmd) { ++ case GF_DEFRAG_CMD_START: ++ case GF_DEFRAG_CMD_START_LAYOUT_FIX: ++ case GF_DEFRAG_CMD_START_FORCE: ++ case GF_DEFRAG_CMD_START_TIER: ++ ++ ret = dict_get_int32(dict, "force", &is_force); ++ if (ret) ++ is_force = 0; ++ if (!is_force) { ++ /* Reset defrag status to 'NOT STARTED' whenever a ++ * remove-brick/rebalance command is issued to remove ++ * stale information from previous run. ++ */ ++ volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_NOT_STARTED; ++ ++ ret = dict_get_str(dict, GF_REBALANCE_TID_KEY, &task_id_str); ++ if (ret) { ++ gf_msg_debug(this->name, 0, ++ "Missing rebalance id"); ++ ret = 0; ++ } else { ++ gf_uuid_parse(task_id_str, volinfo->rebal.rebalance_id); ++ volinfo->rebal.op = GD_OP_REBALANCE; ++ } ++ if (!gd_should_i_start_rebalance(volinfo)) { ++ /* Store the rebalance-id and rebalance command ++ * even if the peer isn't starting a rebalance ++ * process. On peers where a rebalance process ++ * is started, glusterd_handle_defrag_start ++ * performs the storing. ++ * Storing this is needed for having ++ * 'volume status' work correctly. ++ */ ++ glusterd_store_perform_node_state_store(volinfo); ++ break; ++ } ++ if (dict_get_uint32(dict, "commit-hash", &commit_hash) == 0) { ++ volinfo->rebal.commit_hash = commit_hash; ++ } ++ ret = glusterd_handle_defrag_start(volinfo, msg, sizeof(msg), ++ cmd, NULL, GD_OP_REBALANCE); ++ break; ++ } else { ++ /* Reset defrag status to 'STARTED' so that the ++ * pid is checked and restarted accordingly. ++ * If the pid is not running it executes the ++ * "NOT_STARTED" case and restarts the process ++ */ ++ volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_STARTED; ++ volinfo->rebal.defrag_cmd = cmd; ++ volinfo->rebal.op = GD_OP_REBALANCE; ++ ++ ret = dict_get_str(dict, GF_REBALANCE_TID_KEY, &task_id_str); ++ if (ret) { ++ gf_msg_debug(this->name, 0, ++ "Missing rebalance id"); ++ ret = 0; ++ } else { ++ gf_uuid_parse(task_id_str, volinfo->rebal.rebalance_id); ++ volinfo->rebal.op = GD_OP_REBALANCE; ++ } ++ if (dict_get_uint32(dict, "commit-hash", &commit_hash) == 0) { ++ volinfo->rebal.commit_hash = commit_hash; ++ } ++ ret = glusterd_restart_rebalance_for_volume(volinfo); ++ break; ++ } ++ case GF_DEFRAG_CMD_STOP: ++ case GF_DEFRAG_CMD_STOP_DETACH_TIER: ++ /* Clear task-id only on explicitly stopping rebalance. ++ * Also clear the stored operation, so it doesn't cause trouble ++ * with future rebalance/remove-brick starts ++ */ ++ gf_uuid_clear(volinfo->rebal.rebalance_id); ++ volinfo->rebal.op = GD_OP_NONE; ++ ++ /* Fall back to the old volume file in case of decommission*/ ++ cds_list_for_each_entry_safe(brickinfo, tmp, &volinfo->bricks, ++ brick_list) ++ { ++ if (!brickinfo->decommissioned) ++ continue; ++ brickinfo->decommissioned = 0; ++ volfile_update = _gf_true; ++ } ++ ++ if (volfile_update == _gf_false) { ++ ret = 0; ++ break; ++ } ++ ++ ret = glusterd_create_volfiles_and_notify_services(volinfo); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_WARNING, 0, ++ GD_MSG_VOLFILE_CREATE_FAIL, "failed to create volfiles"); ++ goto out; ++ } ++ ++ ret = glusterd_store_volinfo(volinfo, ++ GLUSTERD_VOLINFO_VER_AC_INCREMENT); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOLINFO_SET_FAIL, ++ "failed to store volinfo"); ++ goto out; ++ } ++ ++ if (volinfo->type == GF_CLUSTER_TYPE_TIER && ++ cmd == GF_OP_CMD_STOP_DETACH_TIER) { ++ glusterd_defrag_info_set(volinfo, dict, ++ GF_DEFRAG_CMD_START_TIER, ++ GF_DEFRAG_CMD_START, GD_OP_REBALANCE); ++ glusterd_restart_rebalance_for_volume(volinfo); ++ } ++ ++ ret = 0; ++ break; ++ ++ case GF_DEFRAG_CMD_START_DETACH_TIER: ++ case GF_DEFRAG_CMD_STATUS: ++ case GF_DEFRAG_CMD_STATUS_TIER: ++ break; ++ default: ++ break; ++ } ++ ++out: ++ if (ret && op_errstr && msg[0]) ++ *op_errstr = gf_strdup(msg); ++ ++ return ret; ++} ++ ++int + glusterd_op_stage_rebalance (dict_t *dict, char **op_errstr) + { + char *volname = NULL; +diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c +index 9a67d1c..7baef64 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-syncop.c ++++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c +@@ -317,6 +317,15 @@ glusterd_syncop_aggr_rsp_dict (glusterd_op_t op, dict_t *aggr, dict_t *rsp) + ret = glusterd_max_opversion_use_rsp_dict (aggr, rsp); + break; + ++ case GD_OP_PROFILE_VOLUME: ++ ret = glusterd_profile_volume_use_rsp_dict(aggr, rsp); ++ break; ++ ++ case GD_OP_REBALANCE: ++ case GD_OP_DEFRAG_BRICK_VOLUME: ++ ret = glusterd_volume_rebalance_use_rsp_dict(aggr, rsp); ++ break; ++ + case GD_OP_TIER_STATUS: + case GD_OP_DETACH_TIER_STATUS: + case GD_OP_REMOVE_TIER_BRICK: +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index 6468ecb..0fe56eb 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -10884,7 +10884,7 @@ glusterd_volume_rebalance_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict) + int ret = 0; + int32_t index = 0; + int32_t count = 0; +- int32_t current_index = 2; ++ int32_t current_index = 1; + int32_t value32 = 0; + uint64_t value = 0; + char *peer_uuid_str = NULL; +@@ -10925,7 +10925,7 @@ glusterd_volume_rebalance_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict) + if (ret) + gf_msg ("glusterd", GF_LOG_ERROR, 0, + GD_MSG_DICT_GET_FAILED, +- "failed to get index"); ++ "failed to get index from rsp dict"); + + memset (key, 0, 256); + snprintf (key, 256, "node-uuid-%d", index); +diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h +index 42c8821..f1e41be 100644 +--- a/xlators/mgmt/glusterd/src/glusterd.h ++++ b/xlators/mgmt/glusterd/src/glusterd.h +@@ -1223,6 +1223,15 @@ int glusterd_op_stage_add_brick (dict_t *dict, char **op_errstr, + dict_t *rsp_dict); + int glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr); + ++int ++glusterd_set_rebalance_id_in_rsp_dict(dict_t *req_dict, dict_t *rsp_dict); ++ ++int ++glusterd_mgmt_v3_op_stage_rebalance(dict_t *dict, char **op_errstr); ++ ++int ++glusterd_mgmt_v3_op_rebalance(dict_t *dict, char **op_errstr, dict_t *rsp_dict); ++ + int glusterd_op_stage_rebalance (dict_t *dict, char **op_errstr); + int glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict); + +-- +1.8.3.1 + diff --git a/SOURCES/0491-glusterd-tag-rebalance-mgmt_v3-command-to-op-version.patch b/SOURCES/0491-glusterd-tag-rebalance-mgmt_v3-command-to-op-version.patch new file mode 100644 index 0000000..669a2e0 --- /dev/null +++ b/SOURCES/0491-glusterd-tag-rebalance-mgmt_v3-command-to-op-version.patch @@ -0,0 +1,56 @@ +From d12c2c34484f92e968dea4a52538205a69d5484f Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee +Date: Tue, 18 Dec 2018 17:57:25 +0530 +Subject: [PATCH 491/493] glusterd: tag rebalance mgmt_v3 command to op-version + 31305 + +In upstream migrating rebalance command is tagged to op-version 60000 +but in downstream the latest new op-version is 31305. + +Label: DOWNSTREAM ONLY + +Change-Id: I30bbad3efca29bf42b9a750581eb1aebc8a30ff9 +BUG: 1652466 +Signed-off-by: Atin Mukherjee +Reviewed-on: https://code.engineering.redhat.com/gerrit/158943 +Tested-by: RHGS Build Bot +--- + libglusterfs/src/globals.h | 2 -- + xlators/mgmt/glusterd/src/glusterd-rebalance.c | 4 ++-- + 2 files changed, 2 insertions(+), 4 deletions(-) + +diff --git a/libglusterfs/src/globals.h b/libglusterfs/src/globals.h +index 5e3b180..343263c 100644 +--- a/libglusterfs/src/globals.h ++++ b/libglusterfs/src/globals.h +@@ -111,8 +111,6 @@ + + #define GD_OP_VERSION_3_13_3 31303 /* Op-version for GlusterFS 3.13.3 */ + +-#define GD_OP_VERSION_6_0 60000 /* Op-version for GlusterFS 6.0 */ +- + /* Downstream only change */ + #define GD_OP_VERSION_3_11_2 31102 /* Op-version for RHGS 3.3.1-async */ + #define GD_OP_VERSION_3_13_3 31303 /* Op-version for RHGS-3.4-Batch Update-1*/ +diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c +index 7ba5f65..ba32241 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c ++++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c +@@ -569,12 +569,12 @@ __glusterd_handle_defrag_volume (rpcsvc_request_t *req) + } else + op = GD_OP_REBALANCE; + +- if (priv->op_version < GD_OP_VERSION_6_0) { ++ if (priv->op_version < GD_OP_VERSION_3_13_5) { + gf_msg_debug(this->name, 0, + "The cluster is operating at " + "version less than %d. Falling back " + "to op-sm framework.", +- GD_OP_VERSION_6_0); ++ GD_OP_VERSION_3_13_5); + ret = glusterd_op_begin(req, op, dict, msg, sizeof(msg)); + glusterd_friend_sm(); + glusterd_op_sm(); +-- +1.8.3.1 + diff --git a/SOURCES/0492-mem-pool-track-glusterfs_ctx_t-in-struct-mem_pool.patch b/SOURCES/0492-mem-pool-track-glusterfs_ctx_t-in-struct-mem_pool.patch new file mode 100644 index 0000000..ff0460b --- /dev/null +++ b/SOURCES/0492-mem-pool-track-glusterfs_ctx_t-in-struct-mem_pool.patch @@ -0,0 +1,282 @@ +From c8e58e3a577e70a64df77fe885847285f682d9fb Mon Sep 17 00:00:00 2001 +From: Niels de Vos +Date: Tue, 29 Aug 2017 00:16:22 +0200 +Subject: [PATCH 492/493] mem-pool: track glusterfs_ctx_t in struct mem_pool + +In order to generate statedumps per glusterfs_ctx_t, it is needed to +place all the memory pools in a structure that the context can reach. +The 'struct mem_pool' has been extended with a 'list_head owner' that is +linked with the glusterfs_ctx_t->mempool_list. + +All callers of mem_pool_new() have been updated to pass the current +glusterfs_ctx_t along. This context is needed to add the new memory pool +to the list and for grabbing the ctx->lock while updating the +glusterfs_ctx_t->mempool_list. + +> Updates: #307 +> Change-Id: Ia9384424d8d1630ef3efc9d5d523bf739c356c6e +> Signed-off-by: Niels de Vos +> Reviewed-on: https://review.gluster.org/18075 +> Smoke: Gluster Build System +> CentOS-regression: Gluster Build System +> Reviewed-by: Jeff Darcy + +Change-Id: Id3b193f366f7c46f91b77bced8729a4eb538837b +BUG: 1648893 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/158937 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/ctx.c | 2 -- + libglusterfs/src/glusterfs.h | 2 -- + libglusterfs/src/mem-pool.c | 18 +++++++++++++++++- + libglusterfs/src/mem-pool.h | 16 +++++++++++++--- + libglusterfs/src/rbthash.c | 5 +++-- + libglusterfs/src/rbthash.h | 2 +- + libglusterfs/src/statedump.c | 16 +++++++++++++++- + xlators/cluster/ec/src/ec-method.c | 2 +- + xlators/nfs/server/src/nfs3.c | 3 ++- + xlators/performance/io-cache/src/io-cache.c | 2 +- + 10 files changed, 53 insertions(+), 15 deletions(-) + +diff --git a/libglusterfs/src/ctx.c b/libglusterfs/src/ctx.c +index 94c56ac..90480d0 100644 +--- a/libglusterfs/src/ctx.c ++++ b/libglusterfs/src/ctx.c +@@ -31,9 +31,7 @@ glusterfs_ctx_new () + ctx->mem_acct_enable = gf_global_mem_acct_enable_get(); + + INIT_LIST_HEAD (&ctx->graphs); +-#if defined(OLD_MEM_POOLS) + INIT_LIST_HEAD (&ctx->mempool_list); +-#endif + INIT_LIST_HEAD (&ctx->volfile_list); + + ctx->daemon_pipe[0] = -1; +diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h +index c12e94e..157437c 100644 +--- a/libglusterfs/src/glusterfs.h ++++ b/libglusterfs/src/glusterfs.h +@@ -526,11 +526,9 @@ struct _glusterfs_ctx { + int process_mode; /*mode in which process is runninng*/ + struct syncenv *env; /* The env pointer to the synctasks */ + +-#if defined(OLD_MEM_POOLS) + struct list_head mempool_list; /* used to keep a global list of + mempools, used to log details of + mempool in statedump */ +-#endif + char *statedump_path; + + struct mem_pool *dict_pool; +diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c +index a8a9347..999a83f 100644 +--- a/libglusterfs/src/mem-pool.c ++++ b/libglusterfs/src/mem-pool.c +@@ -676,7 +676,7 @@ void mem_pools_fini (void) {} + #endif + + struct mem_pool * +-mem_pool_new_fn (unsigned long sizeof_type, ++mem_pool_new_fn (glusterfs_ctx_t *ctx, unsigned long sizeof_type, + unsigned long count, char *name) + { + unsigned int i; +@@ -706,10 +706,18 @@ mem_pool_new_fn (unsigned long sizeof_type, + if (!new) + return NULL; + ++ new->ctx = ctx; + new->sizeof_type = sizeof_type; + new->count = count; + new->name = name; + new->pool = pool; ++ INIT_LIST_HEAD (&new->owner); ++ ++ LOCK (&ctx->lock); ++ { ++ list_add (&new->owner, &ctx->mempool_list); ++ } ++ UNLOCK (&ctx->lock); + + return new; + } +@@ -905,6 +913,14 @@ mem_put (void *ptr) + void + mem_pool_destroy (struct mem_pool *pool) + { ++ /* remove this pool from the owner (glusterfs_ctx_t) */ ++ LOCK (&pool->ctx->lock); ++ { ++ list_del (&pool->owner); ++ } ++ UNLOCK (&pool->ctx->lock); ++ ++ /* free this pool, but keep the mem_pool_shared */ + GF_FREE (pool); + + /* +diff --git a/libglusterfs/src/mem-pool.h b/libglusterfs/src/mem-pool.h +index 057d957..0ebb63b 100644 +--- a/libglusterfs/src/mem-pool.h ++++ b/libglusterfs/src/mem-pool.h +@@ -16,6 +16,7 @@ + #include "atomic.h" + #include "logging.h" + #include "mem-types.h" ++#include "glusterfs.h" /* for glusterfs_ctx_t */ + #include + #include + #include +@@ -207,11 +208,15 @@ out: + /* kind of 'header' for the actual mem_pool_shared structure, this might make + * it possible to dump some more details in a statedump */ + struct mem_pool { ++ /* object size, without pooled_obj_hdr_t */ + unsigned long sizeof_type; + unsigned long count; + char *name; + +- struct mem_pool_shared *pool; ++ struct list_head owner; /* glusterfs_ctx_t->mempool_list */ ++ glusterfs_ctx_t *ctx; /* take ctx->lock when updating owner */ ++ ++ struct mem_pool_shared *pool; /* the initial pool that was returned */ + }; + + typedef struct pooled_obj_hdr { +@@ -276,9 +281,14 @@ void mem_pools_init_late (void); /* start the pool_sweeper thread */ + void mem_pools_fini (void); /* cleanup memory pools */ + + struct mem_pool * +-mem_pool_new_fn (unsigned long sizeof_type, unsigned long count, char *name); ++mem_pool_new_fn (glusterfs_ctx_t *ctx, unsigned long sizeof_type, unsigned long ++ count, char *name); ++ ++#define mem_pool_new(type, count) (mem_pool_new_fn (THIS->ctx, \ ++ sizeof(type), count, #type)) + +-#define mem_pool_new(type,count) mem_pool_new_fn (sizeof(type), count, #type) ++#define mem_pool_new_ctx(ctx, type, count) (mem_pool_new_fn (ctx, \ ++ sizeof(type), count, #type)) + + void mem_put (void *ptr); + void *mem_get (struct mem_pool *pool); +diff --git a/libglusterfs/src/rbthash.c b/libglusterfs/src/rbthash.c +index 52d8a15..06fc7ee 100644 +--- a/libglusterfs/src/rbthash.c ++++ b/libglusterfs/src/rbthash.c +@@ -83,7 +83,7 @@ err: + */ + + rbthash_table_t * +-rbthash_table_init (int buckets, rbt_hasher_t hfunc, ++rbthash_table_init (glusterfs_ctx_t *ctx, int buckets, rbt_hasher_t hfunc, + rbt_data_destroyer_t dfunc, + unsigned long expected_entries, + struct mem_pool *entrypool) +@@ -123,7 +123,8 @@ rbthash_table_init (int buckets, rbt_hasher_t hfunc, + + if (expected_entries) { + newtab->entrypool = +- mem_pool_new (rbthash_entry_t, expected_entries); ++ mem_pool_new_ctx (ctx, rbthash_entry_t, ++ expected_entries); + if (!newtab->entrypool) { + goto free_buckets; + } +diff --git a/libglusterfs/src/rbthash.h b/libglusterfs/src/rbthash.h +index b093ce9..949b88a 100644 +--- a/libglusterfs/src/rbthash.h ++++ b/libglusterfs/src/rbthash.h +@@ -52,7 +52,7 @@ typedef struct rbthash_table { + } rbthash_table_t; + + extern rbthash_table_t * +-rbthash_table_init (int buckets, rbt_hasher_t hfunc, ++rbthash_table_init (glusterfs_ctx_t *ctx, int buckets, rbt_hasher_t hfunc, + rbt_data_destroyer_t dfunc, unsigned long expected_entries, + struct mem_pool *entrypool); + +diff --git a/libglusterfs/src/statedump.c b/libglusterfs/src/statedump.c +index a4635f3..4aad014 100644 +--- a/libglusterfs/src/statedump.c ++++ b/libglusterfs/src/statedump.c +@@ -377,11 +377,11 @@ gf_proc_dump_mem_info_to_dict (dict_t *dict) + void + gf_proc_dump_mempool_info (glusterfs_ctx_t *ctx) + { +-#if defined(OLD_MEM_POOLS) + struct mem_pool *pool = NULL; + + gf_proc_dump_add_section ("mempool"); + ++#if defined(OLD_MEM_POOLS) + list_for_each_entry (pool, &ctx->mempool_list, global_list) { + gf_proc_dump_write ("-----", "-----"); + gf_proc_dump_write ("pool-name", "%s", pool->name); +@@ -396,6 +396,20 @@ gf_proc_dump_mempool_info (glusterfs_ctx_t *ctx) + gf_proc_dump_write ("cur-stdalloc", "%d", pool->curr_stdalloc); + gf_proc_dump_write ("max-stdalloc", "%d", pool->max_stdalloc); + } ++#else ++ LOCK (&ctx->lock); ++ { ++ list_for_each_entry (pool, &ctx->mempool_list, owner) { ++ gf_proc_dump_write ("-----", "-----"); ++ gf_proc_dump_write ("pool-name", "%s", pool->name); ++ gf_proc_dump_write ("sizeof-type", "%lu", pool->sizeof_type); ++ gf_proc_dump_write ("padded-sizeof", "%d", 1 << pool->pool->power_of_two); ++ gf_proc_dump_write ("shared-pool", "%p", pool->pool); ++ } ++ } ++ UNLOCK (&ctx->lock); ++ ++ /* TODO: details of (struct mem_pool_shared) pool->pool */ + #endif + } + +diff --git a/xlators/cluster/ec/src/ec-method.c b/xlators/cluster/ec/src/ec-method.c +index e0dd8e7..a2dd2bd 100644 +--- a/xlators/cluster/ec/src/ec-method.c ++++ b/xlators/cluster/ec/src/ec-method.c +@@ -310,7 +310,7 @@ ec_method_init(xlator_t *xl, ec_matrix_list_t *list, uint32_t columns, + INIT_LIST_HEAD(&list->lru); + int32_t err; + +- list->pool = mem_pool_new_fn(sizeof(ec_matrix_t) + ++ list->pool = mem_pool_new_fn(xl->ctx, sizeof(ec_matrix_t) + + sizeof(ec_matrix_row_t) * columns + + sizeof(uint32_t) * columns * columns, + 128, "ec_matrix_t"); +diff --git a/xlators/nfs/server/src/nfs3.c b/xlators/nfs/server/src/nfs3.c +index 040d316..b053eb3 100644 +--- a/xlators/nfs/server/src/nfs3.c ++++ b/xlators/nfs/server/src/nfs3.c +@@ -5786,7 +5786,8 @@ nfs3_init_state (xlator_t *nfsx) + + localpool = nfs->memfactor * GF_NFS_CONCURRENT_OPS_MULT; + gf_msg_trace (GF_NFS3, 0, "local pool: %d", localpool); +- nfs3->localpool = mem_pool_new (nfs3_call_state_t, localpool); ++ nfs3->localpool = mem_pool_new_ctx (nfsx->ctx, nfs3_call_state_t, ++ localpool); + if (!nfs3->localpool) { + gf_msg (GF_NFS3, GF_LOG_ERROR, ENOMEM, NFS_MSG_NO_MEMORY, + "local mempool creation failed"); +diff --git a/xlators/performance/io-cache/src/io-cache.c b/xlators/performance/io-cache/src/io-cache.c +index de44ad2..d7b3b37 100644 +--- a/xlators/performance/io-cache/src/io-cache.c ++++ b/xlators/performance/io-cache/src/io-cache.c +@@ -1146,7 +1146,7 @@ ioc_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, + if (!ioc_inode->cache.page_table) { + ioc_inode->cache.page_table + = rbthash_table_init +- (IOC_PAGE_TABLE_BUCKET_COUNT, ++ (this->ctx, IOC_PAGE_TABLE_BUCKET_COUNT, + ioc_hashfn, NULL, 0, + table->mem_pool); + +-- +1.8.3.1 + diff --git a/SOURCES/0493-mem-pool-count-allocations-done-per-user-pool.patch b/SOURCES/0493-mem-pool-count-allocations-done-per-user-pool.patch new file mode 100644 index 0000000..39d4784 --- /dev/null +++ b/SOURCES/0493-mem-pool-count-allocations-done-per-user-pool.patch @@ -0,0 +1,96 @@ +From bb668d69815209bd7f7f4669142191d4b48bcde8 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Tue, 18 Dec 2018 19:41:42 +0530 +Subject: [PATCH 493/493] mem-pool: count allocations done per user-pool + +Count the active allocations per 'struct mem_pool'. These are the +objects that the calling component allocated and free'd in the memory +pool for this specific type. Having this count in the statedump will +make it easy to find memory leaks. + +> Updates: #307 +> Change-Id: I797fabab86f104e49338c00e449a7d0b0d270004 +> Signed-off-by: Niels de Vos +> Reviewed-on: https://review.gluster.org/18074 +> Smoke: Gluster Build System +> CentOS-regression: Gluster Build System +> Reviewed-by: Jeff Darcy + +Change-Id: I2e4375fd59f11288f41a36cad131d794bff19fbb +BUG: 1648893 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/158961 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/mem-pool.c | 4 ++++ + libglusterfs/src/mem-pool.h | 3 ++- + libglusterfs/src/statedump.c | 4 ++++ + 3 files changed, 10 insertions(+), 1 deletion(-) + +diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c +index 999a83f..d82a371 100644 +--- a/libglusterfs/src/mem-pool.c ++++ b/libglusterfs/src/mem-pool.c +@@ -711,6 +711,7 @@ mem_pool_new_fn (glusterfs_ctx_t *ctx, unsigned long sizeof_type, + new->count = count; + new->name = name; + new->pool = pool; ++ GF_ATOMIC_INIT (new->active, 0); + INIT_LIST_HEAD (&new->owner); + + LOCK (&ctx->lock); +@@ -864,6 +865,8 @@ mem_get (struct mem_pool *mem_pool) + retval->pool_list = pool_list; + retval->power_of_two = mem_pool->pool->power_of_two; + ++ GF_ATOMIC_INC (mem_pool->active); ++ + return retval + 1; + #endif /* GF_DISABLE_MEMPOOL */ + } +@@ -894,6 +897,7 @@ mem_put (void *ptr) + pt_pool = &pool_list->pools[hdr->power_of_two-POOL_SMALLEST]; + + hdr->magic = GF_MEM_INVALID_MAGIC; ++ GF_ATOMIC_DEC (hdr->pool->active); + + (void) pthread_spin_lock (&pool_list->lock); + if (!pool_list->poison) { +diff --git a/libglusterfs/src/mem-pool.h b/libglusterfs/src/mem-pool.h +index 0ebb63b..aa1d045 100644 +--- a/libglusterfs/src/mem-pool.h ++++ b/libglusterfs/src/mem-pool.h +@@ -210,8 +210,9 @@ out: + struct mem_pool { + /* object size, without pooled_obj_hdr_t */ + unsigned long sizeof_type; +- unsigned long count; ++ unsigned long count; /* requested pool size (unused) */ + char *name; ++ gf_atomic_t active; /* current allocations */ + + struct list_head owner; /* glusterfs_ctx_t->mempool_list */ + glusterfs_ctx_t *ctx; /* take ctx->lock when updating owner */ +diff --git a/libglusterfs/src/statedump.c b/libglusterfs/src/statedump.c +index 4aad014..a04c535 100644 +--- a/libglusterfs/src/statedump.c ++++ b/libglusterfs/src/statedump.c +@@ -400,10 +400,14 @@ gf_proc_dump_mempool_info (glusterfs_ctx_t *ctx) + LOCK (&ctx->lock); + { + list_for_each_entry (pool, &ctx->mempool_list, owner) { ++ int64_t active = GF_ATOMIC_GET (pool->active); ++ + gf_proc_dump_write ("-----", "-----"); + gf_proc_dump_write ("pool-name", "%s", pool->name); ++ gf_proc_dump_write ("active-count", "%"GF_PRI_ATOMIC, active); + gf_proc_dump_write ("sizeof-type", "%lu", pool->sizeof_type); + gf_proc_dump_write ("padded-sizeof", "%d", 1 << pool->pool->power_of_two); ++ gf_proc_dump_write ("size", "%lu", (1 << pool->pool->power_of_two) * active); + gf_proc_dump_write ("shared-pool", "%p", pool->pool); + } + } +-- +1.8.3.1 + diff --git a/SOURCES/0494-mem-pool-Resolve-crash-in-mem_pool_destroy.patch b/SOURCES/0494-mem-pool-Resolve-crash-in-mem_pool_destroy.patch new file mode 100644 index 0000000..74075bb --- /dev/null +++ b/SOURCES/0494-mem-pool-Resolve-crash-in-mem_pool_destroy.patch @@ -0,0 +1,43 @@ +From 10868bfc5ed099a90fbfd2310bc89c299475d94e Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Wed, 19 Dec 2018 13:08:01 +0530 +Subject: [PATCH 494/495] mem-pool: Resolve crash in mem_pool_destroy + +Problem: In the commit c8e58e3a577e70a64df77fe885847285f682d9fb + change the code in mem_pool_destroy to delete pool->owner + without validate pool + +Solution: To avoid the crash check the pool pointer + +(gdb) f 0 +921 LOCK (&pool->ctx->lock); +(gdb) p pool +$1 = (struct mem_pool *) 0x0 + +Change-Id: I34fdc8974c5c60767a5c26e7d1c9099d398b9722 +BUG: 1648893 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/159029 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/mem-pool.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c +index d82a371..76daca9 100644 +--- a/libglusterfs/src/mem-pool.c ++++ b/libglusterfs/src/mem-pool.c +@@ -917,6 +917,9 @@ mem_put (void *ptr) + void + mem_pool_destroy (struct mem_pool *pool) + { ++ if (!pool) ++ return; ++ + /* remove this pool from the owner (glusterfs_ctx_t) */ + LOCK (&pool->ctx->lock); + { +-- +1.8.3.1 + diff --git a/SOURCES/0495-build-add-conditional-dependency-on-server-for-devel.patch b/SOURCES/0495-build-add-conditional-dependency-on-server-for-devel.patch new file mode 100644 index 0000000..f3d32b1 --- /dev/null +++ b/SOURCES/0495-build-add-conditional-dependency-on-server-for-devel.patch @@ -0,0 +1,48 @@ +From 24b37593679a8a3a3dbba14ff92ec92c6d83dbc6 Mon Sep 17 00:00:00 2001 +From: Milind Changire +Date: Wed, 19 Dec 2018 13:17:42 +0530 +Subject: [PATCH 495/495] build: add conditional dependency on server for devel + +Add conditional depedency on server for glusterfs-devel + +Label: DOWNSTREAM ONLY + +BUG: 1656357 +Change-Id: Icc45df3db137dbc03d240c1ac774b5c8735c5f2f +Signed-off-by: Milind Changire +Reviewed-on: https://code.engineering.redhat.com/gerrit/159030 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + glusterfs.spec.in | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 16dc5d7..0ad4ffc 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -339,7 +339,9 @@ Requires: %{name}%{?_isa} = %{version}-%{release} + Requires: %{name}-extra-xlators = %{version}-%{release} + %endif + Requires: %{name}-libs%{?_isa} = %{version}-%{release} ++%if ( 0%{?_build_server} ) + Requires: %{name}-server%{?_isa} = %{version}-%{release} ++%endif + + %description devel + GlusterFS is a distributed file-system capable of scaling to several +@@ -2171,6 +2173,11 @@ fi + %endif + + %changelog ++* Wed Dec 19 2018 Milind Changire ++- Add explicit package dependencies (#1656357) ++- Remove absolute paths from spec file (#1350745) ++- Do not package crypt.so for FIPS compliance (#1653224) ++ + * Thu Dec 13 2018 Krutika Dhananjay + - Install /var/lib/glusterd/groups/distributed-virt by default (#1653613) + +-- +1.8.3.1 + diff --git a/SOURCES/0496-glusterd-kill-the-process-without-releasing-the-clea.patch b/SOURCES/0496-glusterd-kill-the-process-without-releasing-the-clea.patch new file mode 100644 index 0000000..9f797bc --- /dev/null +++ b/SOURCES/0496-glusterd-kill-the-process-without-releasing-the-clea.patch @@ -0,0 +1,64 @@ +From 61fd5c07791d82e830d7caac008247765437b7ca Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde +Date: Wed, 2 Jan 2019 12:29:53 +0530 +Subject: [PATCH 496/498] glusterd: kill the process without releasing the + cleanup mutex lock + +Problem: +glusterd acquires a cleanup mutex lock before it starts +cleanup process, so that any other thread which tries to acquire +lock on any resource will be blocked on cleanup mutex lock. + +We don't want any thread to try to acquire any resource, once +the cleanup is started. because other threads might try to acquire +lock on resources which are already freed by the thread which is +going though the cleanup phase. + +previously we were releasing the cleanup mutex lock before the +process exit. As we are releasing the cleanup mutex lock, before +the process can exit some other thread which is blocked on +cleanup mutex lock is acquiring the cleanup mutex lock and +trying to acquire some resources which are already freed as a +part of cleanup. This is leading glusterd to crash. + +Solution: We should exit the process without releasing the +cleanup mutex lock. + +> Change-Id: Ibae1c62260f141019017f7a547519a5d38dc2bb6 +> fixes: bz#1654270 +> Signed-off-by: Sanju Rakonde + +upstream patch: https://review.gluster.org/#/c/glusterfs/+/21974/ + +Change-Id: Ibae1c62260f141019017f7a547519a5d38dc2bb6 +BUG: 1654161 +Signed-off-by: Sanju Rakonde +Reviewed-on: https://code.engineering.redhat.com/gerrit/159635 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + glusterfsd/src/glusterfsd.c | 7 +++---- + 1 file changed, 3 insertions(+), 4 deletions(-) + +diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c +index 57effbd..990036c 100644 +--- a/glusterfsd/src/glusterfsd.c ++++ b/glusterfsd/src/glusterfsd.c +@@ -1446,11 +1446,10 @@ cleanup_and_exit (int signum) + #endif + + trav = NULL; ++ /* NOTE: Only the least significant 8 bits i.e (signum & 255) ++ will be available to parent process on calling exit() */ ++ exit(abs(signum)); + } +- pthread_mutex_unlock(&ctx->cleanup_lock); +- /* NOTE: Only the least significant 8 bits i.e (signum & 255) +- will be available to parent process on calling exit() */ +- exit(abs(signum)); + } + + +-- +1.8.3.1 + diff --git a/SOURCES/0497-cluster-dht-Use-percentages-for-space-check.patch b/SOURCES/0497-cluster-dht-Use-percentages-for-space-check.patch new file mode 100644 index 0000000..12458db --- /dev/null +++ b/SOURCES/0497-cluster-dht-Use-percentages-for-space-check.patch @@ -0,0 +1,145 @@ +From 2029bf72400a380a4a0f1bf7f1b72816c70f9774 Mon Sep 17 00:00:00 2001 +From: N Balachandran +Date: Mon, 31 Dec 2018 17:42:27 +0530 +Subject: [PATCH 497/498] cluster/dht: Use percentages for space check + +With heterogenous bricks now being supported in DHT +we could run into issues where files are not migrated +even though there is sufficient space in newly added bricks +which just happen to be considerably smaller than older +bricks. Using percentages instead of absolute available +space for space checks can mitigate that to some extent. + +upstream patch:https://review.gluster.org/#/c/glusterfs/+/19101/ +This is not an identical backport as there were some changes +to upstream master that are not available in the downstream code. + +Marking bug-1247563.t bad as that used to depend on the easier +code to prevent a file from migrating. This will be removed +once we find a way to force a file migration failure. + +Change-Id: Ie89bfdd114406a986b3ff4f53b0bb0fae6574c8e +BUG: 1290124 +Signed-off-by: N Balachandran +Reviewed-on: https://code.engineering.redhat.com/gerrit/159569 +Tested-by: RHGS Build Bot +Reviewed-by: Susant Palai +Reviewed-by: Raghavendra Gowdappa +--- + tests/bugs/distribute/bug-1247563.t | 3 ++ + xlators/cluster/dht/src/dht-rebalance.c | 57 ++++++++++++++++++++++++--------- + 2 files changed, 45 insertions(+), 15 deletions(-) + +diff --git a/tests/bugs/distribute/bug-1247563.t b/tests/bugs/distribute/bug-1247563.t +index f7f9258..12cd080 100644 +--- a/tests/bugs/distribute/bug-1247563.t ++++ b/tests/bugs/distribute/bug-1247563.t +@@ -55,3 +55,6 @@ COUNT=`getfacl $FPATH2 |grep -c "user:root:rwx"` + EXPECT "0" echo $COUNT + + cleanup; ++ ++#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=000000 ++#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000 +diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c +index d0f49d2..291b557 100644 +--- a/xlators/cluster/dht/src/dht-rebalance.c ++++ b/xlators/cluster/dht/src/dht-rebalance.c +@@ -880,8 +880,12 @@ __dht_check_free_space (xlator_t *this, xlator_t *to, xlator_t *from, loc_t *loc + dict_t *xdata = NULL; + dht_layout_t *layout = NULL; + uint64_t src_statfs_blocks = 1; ++ uint64_t src_total_blocks = 0; + uint64_t dst_statfs_blocks = 1; +- double post_availspacepercent = 0; ++ uint64_t dst_total_blocks = 0; ++ uint64_t file_blocks = 0; ++ double dst_post_availspacepercent = 0; ++ double src_post_availspacepercent = 0; + + xdata = dict_new (); + if (!xdata) { +@@ -926,8 +930,24 @@ __dht_check_free_space (xlator_t *this, xlator_t *to, xlator_t *from, loc_t *loc + } + + gf_msg_debug (this->name, 0, "min_free_disk - %f , block available - %lu ," +- " block size - %lu ", conf->min_free_disk, dst_statfs.f_bavail, +- dst_statfs.f_bsize); ++ " block size - %lu ", conf->min_free_disk, ++ dst_statfs.f_bavail, dst_statfs.f_frsize); ++ ++ dst_statfs_blocks = ((dst_statfs.f_bavail * ++ dst_statfs.f_frsize) / ++ GF_DISK_SECTOR_SIZE); ++ ++ src_statfs_blocks = ((src_statfs.f_bavail * ++ src_statfs.f_frsize) / ++ GF_DISK_SECTOR_SIZE); ++ ++ dst_total_blocks = ((dst_statfs.f_blocks * ++ dst_statfs.f_frsize) / ++ GF_DISK_SECTOR_SIZE); ++ ++ src_total_blocks = ((src_statfs.f_blocks * ++ src_statfs.f_frsize) / ++ GF_DISK_SECTOR_SIZE); + + /* if force option is given, do not check for space @ dst. + * Check only if space is avail for the file */ +@@ -940,17 +960,22 @@ __dht_check_free_space (xlator_t *this, xlator_t *to, xlator_t *from, loc_t *loc + subvol gains certain 'blocks' of free space. A valid check is + necessary here to avoid errorneous move to destination where + the space could be scantily available. ++ With heterogenous brick support, an actual space comparison could ++ prevent any files being migrated to newly added bricks if they are ++ smaller then the free space available on the existing bricks. + */ + if (stbuf) { +- dst_statfs_blocks = ((dst_statfs.f_bavail * +- dst_statfs.f_bsize) / +- GF_DISK_SECTOR_SIZE); +- src_statfs_blocks = ((src_statfs.f_bavail * +- src_statfs.f_bsize) / +- GF_DISK_SECTOR_SIZE); +- if ((dst_statfs_blocks) < +- (src_statfs_blocks + stbuf->ia_blocks)) { ++ file_blocks = stbuf->ia_size + GF_DISK_SECTOR_SIZE - 1; ++ file_blocks /= GF_DISK_SECTOR_SIZE; + ++ src_post_availspacepercent = ++ (((src_statfs_blocks + file_blocks) * 100) / ++ src_total_blocks); ++ ++ dst_post_availspacepercent = ((dst_statfs_blocks * 100) / ++ dst_total_blocks); ++ ++ if (dst_post_availspacepercent < src_post_availspacepercent) { + gf_msg (this->name, GF_LOG_WARNING, 0, + DHT_MSG_MIGRATE_FILE_FAILED, + "data movement of file " +@@ -969,16 +994,18 @@ __dht_check_free_space (xlator_t *this, xlator_t *to, xlator_t *from, loc_t *loc + } + } + +- + check_avail_space: + + if (conf->disk_unit == 'p' && dst_statfs.f_blocks) { +- post_availspacepercent = (dst_statfs.f_bavail * 100) / dst_statfs.f_blocks; ++ dst_post_availspacepercent = ++ (dst_statfs_blocks) / dst_total_blocks; ++ + gf_msg_debug (this->name, 0, "file : %s, post_availspacepercent : %lf " + "f_bavail : %lu min-free-disk: %lf", loc->path, +- post_availspacepercent, dst_statfs.f_bavail, conf->min_free_disk); ++ dst_post_availspacepercent, dst_statfs.f_bavail, ++ conf->min_free_disk); + +- if (post_availspacepercent < conf->min_free_disk) { ++ if (dst_post_availspacepercent < conf->min_free_disk) { + gf_msg (this->name, GF_LOG_WARNING, 0, 0, + "Write will cross min-free-disk for " + "file - %s on subvol - %s. Looking " +-- +1.8.3.1 + diff --git a/SOURCES/0498-mem-pool-Code-refactor-in-mem_pool.c.patch b/SOURCES/0498-mem-pool-Code-refactor-in-mem_pool.c.patch new file mode 100644 index 0000000..a7afafc --- /dev/null +++ b/SOURCES/0498-mem-pool-Code-refactor-in-mem_pool.c.patch @@ -0,0 +1,53 @@ +From 9080a49b75c2802f7739cb631050c8befa9ae760 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Mon, 31 Dec 2018 13:52:27 +0530 +Subject: [PATCH 498/498] mem-pool: Code refactor in mem_pool.c + +Problem: In the last commit 10868bfc5ed099a90fbfd2310bc89c299475d94e + the patch was not complete according to upstream commit. + +Solution: Update some changes to match with an upstream patch. + +BUG: 1648893 +> Signed-off-by: Mohit Agrawal +> Reviewed-on: https://code.engineering.redhat.com/gerrit/159029 +> Tested-by: RHGS Build Bot +> Reviewed-by: Sunil Kumar Heggodu Gopala Acharya + +Change-Id: I924ba4967ce28ece6329dbda3e0309b79784fbe7 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/159628 +Tested-by: RHGS Build Bot +Reviewed-by: Niels de Vos +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/mem-pool.c | 6 +----- + 1 file changed, 1 insertion(+), 5 deletions(-) + +diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c +index 76daca9..7b62358 100644 +--- a/libglusterfs/src/mem-pool.c ++++ b/libglusterfs/src/mem-pool.c +@@ -825,6 +825,7 @@ mem_get_from_pool (struct mem_pool *mem_pool) + if (retval) { + retval->magic = GF_MEM_HEADER_MAGIC; + retval->next = NULL; ++ retval->pool = mem_pool; + retval->pool_list = pool_list; + retval->power_of_two = mem_pool->pool->power_of_two; + } +@@ -860,11 +861,6 @@ mem_get (struct mem_pool *mem_pool) + return NULL; + } + +- retval->magic = GF_MEM_HEADER_MAGIC; +- retval->pool = mem_pool; +- retval->pool_list = pool_list; +- retval->power_of_two = mem_pool->pool->power_of_two; +- + GF_ATOMIC_INC (mem_pool->active); + + return retval + 1; +-- +1.8.3.1 + diff --git a/SOURCES/0499-cluster-dht-Fix-incorrect-backport.patch b/SOURCES/0499-cluster-dht-Fix-incorrect-backport.patch new file mode 100644 index 0000000..3ee6804 --- /dev/null +++ b/SOURCES/0499-cluster-dht-Fix-incorrect-backport.patch @@ -0,0 +1,37 @@ +From 2e3632750662956ec62ed3dc8c3c4b2deedc01af Mon Sep 17 00:00:00 2001 +From: N Balachandran +Date: Thu, 3 Jan 2019 13:35:47 +0530 +Subject: [PATCH 499/501] cluster/dht: Fix incorrect backport + +There was an error in the patch +https://code.engineering.redhat.com/gerrit/#/c/159569/ +which is now fixed. + +Change-Id: I187dd8f6e9f1ceb9495ee4f5c6e39ca837c9df15 +BUG: 1290124 +Signed-off-by: N Balachandran +Reviewed-on: https://code.engineering.redhat.com/gerrit/159719 +Tested-by: RHGS Build Bot +Reviewed-by: Raghavendra Gowdappa +Reviewed-by: Susant Palai +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/cluster/dht/src/dht-rebalance.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c +index 291b557..20ad7ef 100644 +--- a/xlators/cluster/dht/src/dht-rebalance.c ++++ b/xlators/cluster/dht/src/dht-rebalance.c +@@ -998,7 +998,7 @@ check_avail_space: + + if (conf->disk_unit == 'p' && dst_statfs.f_blocks) { + dst_post_availspacepercent = +- (dst_statfs_blocks) / dst_total_blocks; ++ (dst_statfs_blocks * 100) / dst_total_blocks; + + gf_msg_debug (this->name, 0, "file : %s, post_availspacepercent : %lf " + "f_bavail : %lu min-free-disk: %lf", loc->path, +-- +1.8.3.1 + diff --git a/SOURCES/0500-extras-Add-readdir-ahead-to-samba-group-command.patch b/SOURCES/0500-extras-Add-readdir-ahead-to-samba-group-command.patch new file mode 100644 index 0000000..8fa2d5f --- /dev/null +++ b/SOURCES/0500-extras-Add-readdir-ahead-to-samba-group-command.patch @@ -0,0 +1,30 @@ +From 9620aeb14c26fcaff7c3f8daf6bac01be150bb40 Mon Sep 17 00:00:00 2001 +From: Anoop C S +Date: Thu, 3 Jan 2019 10:52:22 +0530 +Subject: [PATCH 500/501] extras: Add readdir-ahead to samba group command + +upstream ref: https://review.gluster.org/c/glusterfs/+/21982 + +Change-Id: I310f014a9ab8416d9833f5711ff902da1457f415 +BUG: 1655385 +Signed-off-by: Anoop C S +Reviewed-on: https://code.engineering.redhat.com/gerrit/159824 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + extras/group-samba | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/extras/group-samba b/extras/group-samba +index ee39202..eeee6e0 100644 +--- a/extras/group-samba ++++ b/extras/group-samba +@@ -7,4 +7,5 @@ performance.md-cache-timeout=600 + network.inode-lru-limit=200000 + performance.nl-cache=on + performance.nl-cache-timeout=600 ++performance.readdir-ahead=on + performance.parallel-readdir=on +-- +1.8.3.1 + diff --git a/SOURCES/0501-glusterd-aggregate-rsp-from-peers-for-profile-comman.patch b/SOURCES/0501-glusterd-aggregate-rsp-from-peers-for-profile-comman.patch new file mode 100644 index 0000000..5cf14da --- /dev/null +++ b/SOURCES/0501-glusterd-aggregate-rsp-from-peers-for-profile-comman.patch @@ -0,0 +1,48 @@ +From b415160bcf51c682c919776287c4c9a2b5a8221f Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde +Date: Thu, 3 Jan 2019 19:09:36 +0530 +Subject: [PATCH 501/501] glusterd: aggregate rsp from peers for profile + command + +When we run profile info command, it should display statistics +of all the bricks of the volume. To display information of bricks +which are hosted on peers, we need to aggregate the response from +peers. + +For profile info command, all the statistics will be added into +the dictionary in brick-op phase. To aggregate the information from +peers, we need to call glusterd_syncop_aggr_rsp_dict() in brick-op +call back function. + +>upstream patch : https://review.gluster.org/#/c/glusterfs/+/21988 +>fixes: bz#1663223 +>Change-Id: I5f5890c3d01974747f829128ab74be6071f4aa30 +>Signed-off-by: Sanju Rakonde + +BUG: 1663232 +Change-Id: I5f5890c3d01974747f829128ab74be6071f4aa30 +Signed-off-by: Sanju Rakonde +Reviewed-on: https://code.engineering.redhat.com/gerrit/159818 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mgmt/glusterd/src/glusterd-mgmt.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-mgmt.c +index ef8a2d9..275059c 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-mgmt.c ++++ b/xlators/mgmt/glusterd/src/glusterd-mgmt.c +@@ -1349,7 +1349,8 @@ gd_mgmt_v3_brick_op_cbk_fn (struct rpc_req *req, struct iovec *iov, + gf_uuid_copy (args->uuid, rsp.uuid); + pthread_mutex_lock (&args->lock_dict); + { +- if (rsp.op == GD_OP_DEFRAG_BRICK_VOLUME) ++ if (rsp.op == GD_OP_DEFRAG_BRICK_VOLUME || ++ rsp.op == GD_OP_PROFILE_VOLUME) + ret = glusterd_syncop_aggr_rsp_dict (rsp.op, args->dict, + rsp_dict); + } +-- +1.8.3.1 + diff --git a/SOURCES/0502-posix-posix_health_check_thread_proc-crash-due-to-pr.patch b/SOURCES/0502-posix-posix_health_check_thread_proc-crash-due-to-pr.patch new file mode 100644 index 0000000..8c3df64 --- /dev/null +++ b/SOURCES/0502-posix-posix_health_check_thread_proc-crash-due-to-pr.patch @@ -0,0 +1,92 @@ +From 11f9ffcd733c95e8a728c150ff2ffc3dbeaddac1 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Fri, 11 Jan 2019 11:57:20 +0530 +Subject: [PATCH 502/506] posix: posix_health_check_thread_proc crash due to + priv is NULL + +Problem: posix_fini sends a cancellation request to health_check + thread and cleanup priv without ensuring health_check thread + is running + +Solution: Make health_check && disk_space thread joinable and call + gf_thread_cleanup_xint to wait unless thread is not finished + +> Change-Id: I4d37b08138766881dab0922a47ed68a2c3411f13 +> fixes: bz#1636570 +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/21717/) +> (Cherry pick from commit e82bcc33ed2d5cd54d3f918397f31818089299ad) + +Change-Id: I9edadd5bc445549b5f45bab98e4794d62a185f1c +BUG: 1662828 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/160404 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/storage/posix/src/posix-helpers.c | 8 +++----- + xlators/storage/posix/src/posix.c | 15 +++++++++++---- + 2 files changed, 14 insertions(+), 9 deletions(-) + +diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c +index cddf02e63..ed5d3e55e 100644 +--- a/xlators/storage/posix/src/posix-helpers.c ++++ b/xlators/storage/posix/src/posix-helpers.c +@@ -1854,8 +1854,6 @@ posix_spawn_health_check_thread (xlator_t *xl) + goto unlock; + } + +- /* run the thread detached, resources will be freed on exit */ +- pthread_detach (priv->health_check); + priv->health_check_active = _gf_true; + } + unlock: +@@ -1958,9 +1956,9 @@ posix_spawn_disk_space_check_thread (xlator_t *xl) + priv->disk_space_check_active = _gf_false; + } + +- ret = gf_thread_create_detached (&priv->disk_space_check, +- posix_disk_space_check_thread_proc, +- xl, "posix_reserve"); ++ ret = gf_thread_create (&priv->disk_space_check, NULL, ++ posix_disk_space_check_thread_proc, ++ xl, "posix_reserve"); + if (ret < 0) { + priv->disk_space_check_active = _gf_false; + gf_msg (xl->name, GF_LOG_ERROR, errno, +diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c +index 13b4aa6b6..591119ea9 100644 +--- a/xlators/storage/posix/src/posix.c ++++ b/xlators/storage/posix/src/posix.c +@@ -7999,18 +7999,25 @@ void + fini (xlator_t *this) + { + struct posix_private *priv = this->private; ++ gf_boolean_t health_check = _gf_false; ++ + if (!priv) + return; + LOCK (&priv->lock); +- if (priv->health_check_active) { ++ { ++ health_check = priv->health_check_active; + priv->health_check_active = _gf_false; +- pthread_cancel (priv->health_check); +- priv->health_check = 0; + } + UNLOCK (&priv->lock); ++ ++ if (health_check) { ++ (void)gf_thread_cleanup_xint(priv->health_check); ++ priv->health_check = 0; ++ } ++ + if (priv->disk_space_check) { + priv->disk_space_check_active = _gf_false; +- pthread_cancel (priv->disk_space_check); ++ (void)gf_thread_cleanup_xint(priv->disk_space_check); + priv->disk_space_check = 0; + } + if (priv->janitor) { +-- +2.20.1 + diff --git a/SOURCES/0503-core-brick-process-is-crashed-at-the-time-of-spawn-t.patch b/SOURCES/0503-core-brick-process-is-crashed-at-the-time-of-spawn-t.patch new file mode 100644 index 0000000..a9df84b --- /dev/null +++ b/SOURCES/0503-core-brick-process-is-crashed-at-the-time-of-spawn-t.patch @@ -0,0 +1,202 @@ +From 5989899b7aa5cc86e589c5ff20560476b959d98b Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Fri, 11 Jan 2019 12:42:20 +0530 +Subject: [PATCH 503/506] core: brick process is crashed at the time of spawn + thread + +Problem: brick is getting crashed at the time of calling + pthread_detach after just call gf_thread_create.If + sufficient resources are not available on the system + pthread_create returns EAGAIN (non-negative) but the + caller function expects negative error code in case of failure + +Solution: Change the condition in caller function to avoid the crash + +> Change-Id: Ifeaa49f809957eb6c33aa9792f5af1b55566756d +> fixes: bz#1662906 +> (Cherry pick from commit 1e28c54c5ec8d84ec8a22493161314010992918e) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/21976/) + +Change-Id: I9e5c3de4b98236de22f834d66268ab21001817a1 +BUG: 1662828 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/160409 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/storage/posix/src/posix-helpers.c | 15 ++++++----- + xlators/storage/posix/src/posix.c | 31 +++++++++++++++++------ + xlators/storage/posix/src/posix.h | 6 ++--- + 3 files changed, 35 insertions(+), 17 deletions(-) + +diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c +index ed5d3e55e..1137f1c41 100644 +--- a/xlators/storage/posix/src/posix-helpers.c ++++ b/xlators/storage/posix/src/posix-helpers.c +@@ -1322,7 +1322,7 @@ posix_janitor_thread_proc (void *data) + } + + +-void ++int + posix_spawn_janitor_thread (xlator_t *this) + { + struct posix_private *priv = NULL; +@@ -1337,7 +1337,7 @@ posix_spawn_janitor_thread (xlator_t *this) + posix_janitor_thread_proc, + this, "posixjan"); + +- if (ret < 0) { ++ if (ret) { + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_THREAD_FAILED, "spawning janitor " + "thread failed"); +@@ -1349,6 +1349,7 @@ posix_spawn_janitor_thread (xlator_t *this) + } + unlock: + UNLOCK (&priv->lock); ++ return ret; + } + + static int +@@ -1822,7 +1823,7 @@ abort: + return NULL; + } + +-void ++int + posix_spawn_health_check_thread (xlator_t *xl) + { + struct posix_private *priv = NULL; +@@ -1845,7 +1846,7 @@ posix_spawn_health_check_thread (xlator_t *xl) + ret = gf_thread_create (&priv->health_check, NULL, + posix_health_check_thread_proc, + xl, "posixhc"); +- if (ret < 0) { ++ if (ret) { + priv->health_check_interval = 0; + priv->health_check_active = _gf_false; + gf_msg (xl->name, GF_LOG_ERROR, errno, +@@ -1858,6 +1859,7 @@ posix_spawn_health_check_thread (xlator_t *xl) + } + unlock: + UNLOCK (&priv->lock); ++ return ret; + } + + void +@@ -1940,7 +1942,7 @@ out: + return NULL; + } + +-void ++int + posix_spawn_disk_space_check_thread (xlator_t *xl) + { + struct posix_private *priv = NULL; +@@ -1959,7 +1961,7 @@ posix_spawn_disk_space_check_thread (xlator_t *xl) + ret = gf_thread_create (&priv->disk_space_check, NULL, + posix_disk_space_check_thread_proc, + xl, "posix_reserve"); +- if (ret < 0) { ++ if (ret) { + priv->disk_space_check_active = _gf_false; + gf_msg (xl->name, GF_LOG_ERROR, errno, + P_MSG_DISK_SPACE_CHECK_FAILED, +@@ -1971,6 +1973,7 @@ posix_spawn_disk_space_check_thread (xlator_t *xl) + } + unlock: + UNLOCK (&priv->lock); ++ return ret; + } + + int +diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c +index 591119ea9..8a6282d29 100644 +--- a/xlators/storage/posix/src/posix.c ++++ b/xlators/storage/posix/src/posix.c +@@ -7317,12 +7317,19 @@ reconfigure (xlator_t *this, dict_t *options) + + GF_OPTION_RECONF ("reserve", priv->disk_reserve, + options, uint32, out); +- if (priv->disk_reserve) +- posix_spawn_disk_space_check_thread (this); ++ if (priv->disk_reserve) { ++ ret = posix_spawn_disk_space_check_thread (this); ++ if (ret) ++ goto out; ++ } + + GF_OPTION_RECONF ("health-check-interval", priv->health_check_interval, + options, uint32, out); +- posix_spawn_health_check_thread (this); ++ if (priv->health_check_interval) { ++ ret = posix_spawn_health_check_thread (this); ++ if (ret) ++ goto out; ++ } + + GF_OPTION_RECONF ("shared-brick-count", priv->shared_brick_count, + options, int32, out); +@@ -7925,20 +7932,28 @@ init (xlator_t *this) + _private->disk_space_full = 0; + GF_OPTION_INIT ("reserve", + _private->disk_reserve, uint32, out); +- if (_private->disk_reserve) +- posix_spawn_disk_space_check_thread (this); ++ if (_private->disk_reserve) { ++ ret = posix_spawn_disk_space_check_thread (this); ++ if (ret) ++ goto out; ++ } + + _private->health_check_active = _gf_false; + GF_OPTION_INIT ("health-check-interval", + _private->health_check_interval, uint32, out); +- if (_private->health_check_interval) +- posix_spawn_health_check_thread (this); ++ if (_private->health_check_interval) { ++ ret = posix_spawn_health_check_thread (this); ++ if (ret) ++ goto out; ++ } + + pthread_mutex_init (&_private->janitor_lock, NULL); + pthread_cond_init (&_private->janitor_cond, NULL); + INIT_LIST_HEAD (&_private->janitor_fds); + +- posix_spawn_janitor_thread (this); ++ ret = posix_spawn_janitor_thread (this); ++ if (ret) ++ goto out; + + pthread_mutex_init (&_private->fsync_mutex, NULL); + pthread_cond_init (&_private->fsync_cond, NULL); +diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h +index bda41726c..cb8dc8acc 100644 +--- a/xlators/storage/posix/src/posix.h ++++ b/xlators/storage/posix/src/posix.h +@@ -305,7 +305,7 @@ int posix_handle_pair (xlator_t *this, const char *real_path, char *key, + data_t *value, int flags, struct iatt *stbuf); + int posix_fhandle_pair (xlator_t *this, int fd, char *key, data_t *value, + int flags, struct iatt *stbuf); +-void posix_spawn_janitor_thread (xlator_t *this); ++int posix_spawn_janitor_thread (xlator_t *this); + int posix_acl_xattr_set (xlator_t *this, const char *path, dict_t *xattr_req); + int posix_gfid_heal (xlator_t *this, const char *path, loc_t *loc, dict_t *xattr_req); + int posix_entry_create_xattr_set (xlator_t *this, const char *path, +@@ -320,9 +320,9 @@ gf_boolean_t posix_special_xattr (char **pattern, char *key); + void + __posix_fd_set_odirect (fd_t *fd, struct posix_fd *pfd, int opflags, + off_t offset, size_t size); +-void posix_spawn_health_check_thread (xlator_t *this); ++int posix_spawn_health_check_thread (xlator_t *this); + +-void posix_spawn_disk_space_check_thread (xlator_t *this); ++int posix_spawn_disk_space_check_thread (xlator_t *this); + + void *posix_fsyncer (void *); + int +-- +2.20.1 + diff --git a/SOURCES/0504-dht-Add-NULL-check-for-stbuf-in-dht_rmdir_lookup_cbk.patch b/SOURCES/0504-dht-Add-NULL-check-for-stbuf-in-dht_rmdir_lookup_cbk.patch new file mode 100644 index 0000000..03cd944 --- /dev/null +++ b/SOURCES/0504-dht-Add-NULL-check-for-stbuf-in-dht_rmdir_lookup_cbk.patch @@ -0,0 +1,39 @@ +From b6feae228a82f34b88b67b8a8f8fa55189dcdb72 Mon Sep 17 00:00:00 2001 +From: Susant Palai +Date: Fri, 11 Jan 2019 13:00:59 +0530 +Subject: [PATCH 504/506] dht: Add NULL check for stbuf in dht_rmdir_lookup_cbk + +> Change-Id: I2ced288113a369cc6497a77ac1871007df434da4 +> fixes: bz#1664647 +> Signed-off-by: Susant Palai + +upstream patch: https://review.gluster.org/#/c/glusterfs/+/22004/ + +Change-Id: Ib53b06f6bbf20e0fbf1663cddf7fd76b37f23186 +BUG: 1664529 +Signed-off-by: Susant Palai +Reviewed-on: https://code.engineering.redhat.com/gerrit/160438 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/cluster/dht/src/dht-common.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c +index ff0099c1d..767c6a868 100644 +--- a/xlators/cluster/dht/src/dht-common.c ++++ b/xlators/cluster/dht/src/dht-common.c +@@ -9842,8 +9842,8 @@ dht_rmdir_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + + gf_msg (this->name, GF_LOG_WARNING, op_errno, + DHT_MSG_FILE_LOOKUP_FAILED, +- "lookup failed for %s on %s (type=0%o)", +- local->loc.path, src->name, stbuf->ia_type); ++ "lookup failed for %s on %s", local->loc.path, ++ src->name); + goto err; + } + +-- +2.20.1 + diff --git a/SOURCES/0505-features-shard-Fix-launch-of-multiple-synctasks-for-.patch b/SOURCES/0505-features-shard-Fix-launch-of-multiple-synctasks-for-.patch new file mode 100644 index 0000000..b471bac --- /dev/null +++ b/SOURCES/0505-features-shard-Fix-launch-of-multiple-synctasks-for-.patch @@ -0,0 +1,378 @@ +From a7ade5267ebaf4bf318ee2aebe48000cee583e3b Mon Sep 17 00:00:00 2001 +From: Krutika Dhananjay +Date: Fri, 28 Dec 2018 18:53:15 +0530 +Subject: [PATCH 505/506] features/shard: Fix launch of multiple synctasks for + background deletion + +> Upstream: https://review.gluster.org/21957 +> BUG: 1662368 +> Change-Id: Ib33773d27fb4be463c7a8a5a6a4b63689705324e + +PROBLEM: + +When multiple sharded files are deleted in quick succession, multiple +issues were observed: +1. misleading logs corresponding to a sharded file where while one log + message said the shards corresponding to the file were deleted + successfully, this was followed by multiple logs suggesting the very + same operation failed. This was because of multiple synctasks + attempting to clean up shards of the same file and only one of them + succeeding (the one that gets ENTRYLK successfully), and the rest of + them logging failure. + +2. multiple synctasks to do background deletion would be launched, one + for each deleted file but all of them could readdir entries from + .remove_me at the same time could potentially contend for ENTRYLK on + .shard for each of the entry names. This is undesirable and wasteful. + +FIX: +Background deletion will now follow a state machine. In the event that +there are multiple attempts to launch synctask for background deletion, +one for each file deleted, only the first task is launched. And if while +this task is doing the cleanup, more attempts are made to delete other +files, the state of the synctask is adjusted so that it restarts the +crawl even after reaching end-of-directory to pick up any files it may +have missed in the previous iteration. + +This patch also fixes uninitialized lk-owner during syncop_entrylk() +which was leading to multiple background deletion synctasks entering +the critical section at the same time and leading to illegal memory access +of base inode in the second syntcask after it was destroyed post shard deletion +by the first synctask. + +Change-Id: Ib33773d27fb4be463c7a8a5a6a4b63689705324e +BUG: 1662059 +Signed-off-by: Krutika Dhananjay +Reviewed-on: https://code.engineering.redhat.com/gerrit/160437 +Tested-by: RHGS Build Bot +Reviewed-by: Xavi Hernandez +--- + xlators/features/shard/src/shard.c | 199 +++++++++++++++++++---------- + xlators/features/shard/src/shard.h | 12 +- + 2 files changed, 136 insertions(+), 75 deletions(-) + +diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c +index 5b72399f5..8aed1a386 100644 +--- a/xlators/features/shard/src/shard.c ++++ b/xlators/features/shard/src/shard.c +@@ -1465,16 +1465,45 @@ int + shard_start_background_deletion (xlator_t *this) + { + int ret = 0; ++ gf_boolean_t i_cleanup = _gf_true; ++ shard_priv_t *priv = NULL; + call_frame_t *cleanup_frame = NULL; + ++ priv = this->private; ++ ++ LOCK(&priv->lock); ++ { ++ switch (priv->bg_del_state) { ++ case SHARD_BG_DELETION_NONE: ++ i_cleanup = _gf_true; ++ priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING; ++ break; ++ case SHARD_BG_DELETION_LAUNCHING: ++ i_cleanup = _gf_false; ++ break; ++ case SHARD_BG_DELETION_IN_PROGRESS: ++ priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING; ++ i_cleanup = _gf_false; ++ break; ++ default: ++ break; ++ } ++ } ++ UNLOCK(&priv->lock); ++ if (!i_cleanup) ++ return 0; ++ + cleanup_frame = create_frame (this, this->ctx->pool); + if (!cleanup_frame) { + gf_msg (this->name, GF_LOG_WARNING, ENOMEM, + SHARD_MSG_MEMALLOC_FAILED, "Failed to create " + "new frame to delete shards"); +- return -ENOMEM; ++ ret = -ENOMEM; ++ goto err; + } + ++ set_lk_owner_from_ptr(&cleanup_frame->root->lk_owner, cleanup_frame->root); ++ + ret = synctask_new (this->ctx->env, shard_delete_shards, + shard_delete_shards_cbk, cleanup_frame, + cleanup_frame); +@@ -1484,7 +1513,16 @@ shard_start_background_deletion (xlator_t *this) + "failed to create task to do background " + "cleanup of shards"); + STACK_DESTROY (cleanup_frame->root); ++ goto err; + } ++ return 0; ++ ++err: ++ LOCK(&priv->lock); ++ { ++ priv->bg_del_state = SHARD_BG_DELETION_NONE; ++ } ++ UNLOCK(&priv->lock); + return ret; + } + +@@ -1493,7 +1531,7 @@ shard_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, struct iatt *postparent) + { +- int ret = 0; ++ int ret = -1; + shard_priv_t *priv = NULL; + gf_boolean_t i_start_cleanup = _gf_false; + +@@ -1526,22 +1564,23 @@ shard_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + + LOCK (&priv->lock); + { +- if (priv->first_lookup == SHARD_FIRST_LOOKUP_PENDING) { +- priv->first_lookup = SHARD_FIRST_LOOKUP_IN_PROGRESS; ++ if (priv->first_lookup_done == _gf_false) { ++ priv->first_lookup_done = _gf_true; + i_start_cleanup = _gf_true; + } + } + UNLOCK (&priv->lock); + +- if (i_start_cleanup) { +- ret = shard_start_background_deletion (this); +- if (ret) { +- LOCK (&priv->lock); +- { +- priv->first_lookup = SHARD_FIRST_LOOKUP_PENDING; +- } +- UNLOCK (&priv->lock); ++ if (!i_start_cleanup) ++ goto unwind; ++ ++ ret = shard_start_background_deletion(this); ++ if (ret < 0) { ++ LOCK(&priv->lock); ++ { ++ priv->first_lookup_done = _gf_false; + } ++ UNLOCK(&priv->lock); + } + unwind: + SHARD_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, buf, +@@ -2940,9 +2979,10 @@ shard_unlink_block_inode (shard_local_t *local, int shard_block_num) + if (ctx->fsync_needed) { + unref_base_inode++; + list_del_init (&ctx->to_fsync_list); +- if (base_inode) ++ if (base_inode) { + __shard_inode_ctx_get (base_inode, this, &base_ictx); +- base_ictx->fsync_count--; ++ base_ictx->fsync_count--; ++ } + } + } + UNLOCK(&inode->lock); +@@ -3334,10 +3374,15 @@ shard_delete_shards_of_entry (call_frame_t *cleanup_frame, xlator_t *this, + loc.inode = inode_ref (priv->dot_shard_rm_inode); + + ret = syncop_entrylk (FIRST_CHILD(this), this->name, &loc, +- entry->d_name, ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL, +- NULL); +- if (ret) ++ entry->d_name, ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, ++ NULL, NULL); ++ if (ret < 0) { ++ if (ret == -EAGAIN) { ++ ret = 0; ++ } + goto out; ++ } ++ + { + ret = __shard_delete_shards_of_entry (cleanup_frame, this, + entry, inode); +@@ -3352,20 +3397,6 @@ out: + int + shard_delete_shards_cbk (int ret, call_frame_t *frame, void *data) + { +- xlator_t *this = NULL; +- shard_priv_t *priv = NULL; +- +- this = frame->this; +- priv = this->private; +- +- if (ret < 0) { +- gf_msg (this->name, GF_LOG_WARNING, -ret, +- SHARD_MSG_SHARDS_DELETION_FAILED, +- "Background deletion of shards failed"); +- priv->first_lookup = SHARD_FIRST_LOOKUP_PENDING; +- } else { +- priv->first_lookup = SHARD_FIRST_LOOKUP_DONE; +- } + SHARD_STACK_DESTROY (frame); + return 0; + } +@@ -3482,6 +3513,7 @@ shard_delete_shards (void *opaque) + gf_dirent_t entries; + gf_dirent_t *entry = NULL; + call_frame_t *cleanup_frame = NULL; ++ gf_boolean_t done = _gf_false; + + this = THIS; + priv = this->private; +@@ -3534,52 +3566,81 @@ shard_delete_shards (void *opaque) + goto err; + } + +- while ((ret = syncop_readdirp (FIRST_CHILD(this), local->fd, 131072, +- offset, &entries, local->xattr_req, +- NULL))) { +- if (ret > 0) +- ret = 0; +- list_for_each_entry (entry, &entries.list, list) { +- offset = entry->d_off; +- +- if (!strcmp (entry->d_name, ".") || +- !strcmp (entry->d_name, "..")) +- continue; ++ for (;;) { ++ offset = 0; ++ LOCK(&priv->lock); ++ { ++ if (priv->bg_del_state == SHARD_BG_DELETION_LAUNCHING) { ++ priv->bg_del_state = SHARD_BG_DELETION_IN_PROGRESS; ++ } else if (priv->bg_del_state == SHARD_BG_DELETION_IN_PROGRESS) { ++ priv->bg_del_state = SHARD_BG_DELETION_NONE; ++ done = _gf_true; ++ } ++ } ++ UNLOCK(&priv->lock); ++ if (done) ++ break; ++ while ((ret = syncop_readdirp (FIRST_CHILD(this), local->fd, ++ 131072, offset, &entries, ++ local->xattr_req, NULL))) { ++ if (ret > 0) ++ ret = 0; ++ list_for_each_entry (entry, &entries.list, list) { ++ offset = entry->d_off; ++ ++ if (!strcmp (entry->d_name, ".") || ++ !strcmp (entry->d_name, "..")) ++ continue; + +- if (!entry->inode) { +- ret = shard_lookup_marker_entry (this, local, +- entry); +- if (ret < 0) ++ if (!entry->inode) { ++ ret = shard_lookup_marker_entry (this, ++ local, ++ entry); ++ if (ret < 0) ++ continue; ++ } ++ link_inode = inode_link (entry->inode, ++ local->fd->inode, ++ entry->d_name, ++ &entry->d_stat); ++ ++ gf_msg_debug (this->name, 0, "Initiating " ++ "deletion of shards of gfid %s", ++ entry->d_name); ++ ret = shard_delete_shards_of_entry (cleanup_frame, ++ this, ++ entry, ++ link_inode); ++ inode_unlink (link_inode, local->fd->inode, ++ entry->d_name); ++ inode_unref (link_inode); ++ if (ret) { ++ gf_msg (this->name, GF_LOG_ERROR, -ret, ++ SHARD_MSG_SHARDS_DELETION_FAILED, ++ "Failed to clean up shards of " ++ "gfid %s", entry->d_name); + continue; +- } +- link_inode = inode_link (entry->inode, local->fd->inode, +- entry->d_name, &entry->d_stat); +- +- gf_msg_debug (this->name, 0, "Initiating deletion of " +- "shards of gfid %s", entry->d_name); +- ret = shard_delete_shards_of_entry (cleanup_frame, this, +- entry, link_inode); +- inode_unlink (link_inode, local->fd->inode, +- entry->d_name); +- inode_unref (link_inode); +- if (ret) { +- gf_msg (this->name, GF_LOG_ERROR, -ret, +- SHARD_MSG_SHARDS_DELETION_FAILED, +- "Failed to clean up shards of gfid %s", ++ } ++ gf_msg (this->name, GF_LOG_INFO, 0, ++ SHARD_MSG_SHARDS_DELETION_COMPLETED, ++ "Deleted shards of gfid=%s from backend", + entry->d_name); +- continue; + } +- gf_msg (this->name, GF_LOG_INFO, 0, +- SHARD_MSG_SHARDS_DELETION_COMPLETED, "Deleted " +- "shards of gfid=%s from backend", +- entry->d_name); ++ gf_dirent_free (&entries); ++ if (ret) ++ break; + } +- gf_dirent_free (&entries); +- if (ret) +- break; + } + ret = 0; ++ loc_wipe(&loc); ++ return ret; ++ + err: ++ LOCK(&priv->lock); ++ { ++ priv->bg_del_state = SHARD_BG_DELETION_NONE; ++ } ++ UNLOCK(&priv->lock); + loc_wipe (&loc); + return ret; + } +diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h +index ac3813c8c..37934f3a2 100644 +--- a/xlators/features/shard/src/shard.h ++++ b/xlators/features/shard/src/shard.h +@@ -196,11 +196,10 @@ shard_unlock_entrylk (call_frame_t *frame, xlator_t *this); + } while (0) + + typedef enum { +- SHARD_FIRST_LOOKUP_PENDING = 0, +- SHARD_FIRST_LOOKUP_IN_PROGRESS, +- SHARD_FIRST_LOOKUP_DONE, +-} shard_first_lookup_state_t; +- ++ SHARD_BG_DELETION_NONE = 0, ++ SHARD_BG_DELETION_LAUNCHING, ++ SHARD_BG_DELETION_IN_PROGRESS, ++} shard_bg_deletion_state_t; + /* rm = "remove me" */ + + typedef struct shard_priv { +@@ -213,7 +212,8 @@ typedef struct shard_priv { + int inode_count; + struct list_head ilist_head; + uint32_t deletion_rate; +- shard_first_lookup_state_t first_lookup; ++ shard_bg_deletion_state_t bg_del_state; ++ gf_boolean_t first_lookup_done; + uint64_t lru_limit; + } shard_priv_t; + +-- +2.20.1 + diff --git a/SOURCES/0506-features-shard-Assign-fop-id-during-background-delet.patch b/SOURCES/0506-features-shard-Assign-fop-id-during-background-delet.patch new file mode 100644 index 0000000..01cda18 --- /dev/null +++ b/SOURCES/0506-features-shard-Assign-fop-id-during-background-delet.patch @@ -0,0 +1,49 @@ +From 6f7a336da731a5113d8fdf9632f37ef181f04f9c Mon Sep 17 00:00:00 2001 +From: Krutika Dhananjay +Date: Fri, 28 Dec 2018 07:27:11 +0530 +Subject: [PATCH 506/506] features/shard: Assign fop id during background + deletion to prevent excessive logging + +> Upstream: https://review.gluster.org/21946 +> BUG: 1662368 +> Change-Id: I0ca8d3b3bfbcd354b4a555eee520eb0479bcda35 + +... of the kind + +"[2018-12-26 05:22:44.195019] E [MSGID: 133010] +[shard.c:2253:shard_common_lookup_shards_cbk] 0-volume1-shard: Lookup +on shard 785 failed. Base file gfid = cd938e64-bf06-476f-a5d4-d580a0d37416 +[No such file or directory]" + +shard_common_lookup_shards_cbk() has a specific check to ignore ENOENT error without +logging them during specific fops. But because background deletion is done in a new +frame (with local->fop being GF_FOP_NULL), the ENOENT check is skipped and the +absence of shards gets logged everytime. + +To fix this, local->fop is initialized to GF_FOP_UNLINK during background deletion. + +Change-Id: I0ca8d3b3bfbcd354b4a555eee520eb0479bcda35 +BUG: 1662059 +Signed-off-by: Krutika Dhananjay +Reviewed-on: https://code.engineering.redhat.com/gerrit/160436 +Tested-by: RHGS Build Bot +Reviewed-by: Xavi Hernandez +--- + xlators/features/shard/src/shard.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c +index 8aed1a386..19dd3e4ba 100644 +--- a/xlators/features/shard/src/shard.c ++++ b/xlators/features/shard/src/shard.c +@@ -3530,6 +3530,7 @@ shard_delete_shards (void *opaque) + goto err; + } + cleanup_frame->local = local; ++ local->fop = GF_FOP_UNLINK; + + local->xattr_req = dict_new (); + if (!local->xattr_req) { +-- +2.20.1 + diff --git a/SOURCES/0507-geo-rep-fix-rename-sync-on-hybrid-crawl.patch b/SOURCES/0507-geo-rep-fix-rename-sync-on-hybrid-crawl.patch new file mode 100644 index 0000000..49ad3ee --- /dev/null +++ b/SOURCES/0507-geo-rep-fix-rename-sync-on-hybrid-crawl.patch @@ -0,0 +1,78 @@ +From 83b57d7278f2c6f493042e3ea34f104822823137 Mon Sep 17 00:00:00 2001 +From: Sunny Kumar +Date: Mon, 14 Jan 2019 11:48:55 +0530 +Subject: [PATCH 507/507] geo-rep : fix rename sync on hybrid crawl + +Problem: When geo-rep is configured as hybrid crawl + directory renames are not synced to the slave. + +Solution: Rename sync of directory was failing due to incorrect + destination path calculation. + During check for existence on slave we miscalculated + realpath. . + +Change-Id: I23f1ea60e86a917598fe869d5d24f8da654d8a0a +BUG: 1664235 +>Upstream Patch: https://review.gluster.org/#/c/glusterfs/+/22020/ +>fixes: bz#1665826 +>Signed-off-by: Sunny Kumar + +Signed-off-by: Sunny Kumar +Change-Id: I0588e38f803cc44a2f044c04563246fcb6aaebec +Reviewed-on: https://code.engineering.redhat.com/gerrit/160876 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + geo-replication/syncdaemon/resource.py | 2 ++ + geo-replication/syncdaemon/syncdutils.py | 22 +++++++++------------- + 2 files changed, 11 insertions(+), 13 deletions(-) + +diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py +index f16066e76..23e509c76 100644 +--- a/geo-replication/syncdaemon/resource.py ++++ b/geo-replication/syncdaemon/resource.py +@@ -656,6 +656,8 @@ class Server(object): + logging.info(lf("Special case: rename on mkdir", + gfid=gfid, entry=repr(entry))) + src_entry = get_slv_dir_path(slv_host, slv_volume, gfid) ++ if src_entry is None: ++ collect_failure(e, ENOENT, uid, gid) + if src_entry is not None and src_entry != entry: + slv_entry_info = {} + slv_entry_info['gfid_mismatch'] = False +diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py +index 32181925d..ec987bdb1 100644 +--- a/geo-replication/syncdaemon/syncdutils.py ++++ b/geo-replication/syncdaemon/syncdutils.py +@@ -695,19 +695,15 @@ def get_slv_dir_path(slv_host, slv_volume, gfid): + gfid[2:4], + gfid], [ENOENT], [ESTALE]) + if dir_path != ENOENT: +- break +- +- if not isinstance(dir_path, int): +- realpath = errno_wrap(os.readlink, [dir_path], +- [ENOENT], [ESTALE]) +- +- if not isinstance(realpath, int): +- realpath_parts = realpath.split('/') +- pargfid = realpath_parts[-2] +- basename = realpath_parts[-1] +- pfx = gauxpfx() +- dir_entry = os.path.join(pfx, pargfid, basename) +- return dir_entry ++ realpath = errno_wrap(os.readlink, [dir_path], ++ [ENOENT], [ESTALE]) ++ if not isinstance(realpath, int): ++ realpath_parts = realpath.split('/') ++ pargfid = realpath_parts[-2] ++ basename = realpath_parts[-1] ++ pfx = gauxpfx() ++ dir_entry = os.path.join(pfx, pargfid, basename) ++ return dir_entry + + return None + +-- +2.20.1 + diff --git a/SOURCES/0508-glusterd-Resolve-multiple-leaks-in-glusterd-code-pat.patch b/SOURCES/0508-glusterd-Resolve-multiple-leaks-in-glusterd-code-pat.patch new file mode 100644 index 0000000..f19e2f8 --- /dev/null +++ b/SOURCES/0508-glusterd-Resolve-multiple-leaks-in-glusterd-code-pat.patch @@ -0,0 +1,164 @@ +From ff98fc7a53b51b14c64c47def854d98cbd1f3ca0 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Mon, 21 Jan 2019 20:52:36 +0530 +Subject: [PATCH 508/508] glusterd: Resolve multiple leaks in glusterd code + path + +In gluster get-state volumeoptions command there was some amount of leak +observed. This fix resolves the identified leaks. + +> Change-Id: Ibde5743d1136fa72c531d48bb1b0b5da0c0b82a1 +> fixes: bz#1667779 +> (Cherry picked from commit 5903111ad21cb937258c0fda24ea7dec466347b4) +> (Cherry picked from commit 4d3be307293b63c74398dfa715e9920cf356f083) +> (Cherry picked from commit 2b7b6ff28fa92335613d0b5715acd552cfcfd759) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/21823/) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/20888/) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/22064/) + +Change-Id: I7bf3ed61770511c73975ad7b0d4fe97619c9864a +BUG: 1667169 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/161128 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mgmt/glusterd/src/glusterd-handler.c | 13 ++++++++++--- + xlators/mgmt/glusterd/src/glusterd-utils.c | 11 +++++++++++ + 2 files changed, 21 insertions(+), 3 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c +index d8e333510..81b1c029d 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-handler.c ++++ b/xlators/mgmt/glusterd/src/glusterd-handler.c +@@ -1808,6 +1808,8 @@ out: + glusterd_to_cli (req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gf_cli_rsp, dict); + ++ GF_FREE(rsp.dict.dict_val); ++ + return 0; + } + int +@@ -1871,6 +1873,7 @@ out: + if (dict) + dict_unref (dict); + ++ GF_FREE(rsp.dict.dict_val); + glusterd_friend_sm (); + glusterd_op_sm (); + +@@ -4991,6 +4994,7 @@ out: + + glusterd_submit_reply (req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gf_cli_rsp); ++ GF_FREE(rsp.dict.dict_val); + return ret; + } + +@@ -5221,7 +5225,7 @@ glusterd_print_snapinfo_by_vol (FILE *fp, glusterd_volinfo_t *volinfo, int volco + fprintf (fp, "Volume%d.snapshot%d.name: %s\n", + volcount, snapcount, snapinfo->snapname); + fprintf (fp, "Volume%d.snapshot%d.id: %s\n", volcount, snapcount, +- gf_strdup (uuid_utoa (snapinfo->snap_id))); ++ uuid_utoa (snapinfo->snap_id)); + fprintf (fp, "Volume%d.snapshot%d.time: %s\n", + volcount, snapcount, timestr); + +@@ -5494,6 +5498,7 @@ glusterd_get_state (rpcsvc_request_t *req, dict_t *dict) + GD_MSG_DICT_GET_FAILED, "%s", err_str); + } + ++ GF_FREE (odir); + ret = -1; + goto out; + } +@@ -5528,7 +5533,7 @@ glusterd_get_state (rpcsvc_request_t *req, dict_t *dict) + GF_FREE (odir); + GF_FREE (filename); + +- ret = dict_set_str (dict, "ofilepath", ofilepath); ++ ret = dict_set_dynstr (dict, "ofilepath", ofilepath); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_DICT_SET_FAILED, "Unable to set output path"); +@@ -5568,6 +5573,7 @@ glusterd_get_state (rpcsvc_request_t *req, dict_t *dict) + GD_MSG_VOL_OPTS_IMPORT_FAIL, "Failed to " + "fetch the value of all volume options " + "for volume %s", volinfo->volname); ++ dict_unref (vol_all_opts); + continue; + } + +@@ -5942,7 +5948,7 @@ out: + &rsp.dict.dict_len); + glusterd_to_cli (req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gf_cli_rsp, dict); +- ++ GF_FREE(rsp.dict.dict_val); + return ret; + } + +@@ -5986,6 +5992,7 @@ __glusterd_handle_get_state (rpcsvc_request_t *req) + "unserialize req-buffer to dictionary"); + snprintf (err_str, sizeof (err_str), "Unable to decode" + " the command"); ++ free (cli_req.dict.dict_val); + goto out; + } else { + dict->extra_stdfree = cli_req.dict.dict_val; +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index 0fe56ebbc..e21ec4e5d 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -13125,6 +13125,7 @@ glusterd_get_default_val_for_volopt (dict_t *ctx, gf_boolean_t all_opts, + char *def_val = NULL; + char dict_key[50] = {0,}; + gf_boolean_t key_found = _gf_false; ++ gf_boolean_t get_value_vme = _gf_false; + glusterd_conf_t *priv = NULL; + dict_t *vol_dict = NULL; + +@@ -13149,6 +13150,7 @@ glusterd_get_default_val_for_volopt (dict_t *ctx, gf_boolean_t all_opts, + if (!all_opts && strcmp (vme->key, input_key)) + continue; + key_found = _gf_true; ++ get_value_vme = _gf_false; + /* First look for the key in the priv->opts for global option + * and then into vol_dict, if its not present then look for + * translator default value */ +@@ -13164,6 +13166,7 @@ glusterd_get_default_val_for_volopt (dict_t *ctx, gf_boolean_t all_opts, + } else { + ret = glusterd_get_value_for_vme_entry + (vme, &def_val); ++ get_value_vme = _gf_true; + if (!all_opts && ret) + goto out; + else if (ret == -2) +@@ -13179,6 +13182,8 @@ glusterd_get_default_val_for_volopt (dict_t *ctx, gf_boolean_t all_opts, + GD_MSG_DICT_SET_FAILED, + "Failed to " + "set %s in dictionary", vme->key); ++ if (get_value_vme) ++ GF_FREE (def_val); + goto out; + } + sprintf (dict_key, "value%d", count); +@@ -13189,8 +13194,14 @@ glusterd_get_default_val_for_volopt (dict_t *ctx, gf_boolean_t all_opts, + "Failed to " + "set %s for key %s in dictionary", def_val, + vme->key); ++ if (get_value_vme) ++ GF_FREE (def_val); ++ + goto out; + } ++ if (get_value_vme) ++ GF_FREE (def_val); ++ + def_val = NULL; + if (!all_opts) + break; +-- +2.20.1 + diff --git a/SOURCES/0509-core-heketi-cli-is-throwing-error-target-is-busy.patch b/SOURCES/0509-core-heketi-cli-is-throwing-error-target-is-busy.patch new file mode 100644 index 0000000..f10795e --- /dev/null +++ b/SOURCES/0509-core-heketi-cli-is-throwing-error-target-is-busy.patch @@ -0,0 +1,117 @@ +From f58abec63fb325e0e1c21fe3fe127de2e4a85d7d Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Thu, 24 Jan 2019 18:45:54 +0530 +Subject: [PATCH 509/510] core: heketi-cli is throwing error "target is busy" + +Problem: At the time of deleting block hosting volume + through heketi-cli , it is throwing an error "target is busy". + cli is throwing an error because brick is not detached successfully + and brick is not detached due to race condition to cleanp xprt + associated with detached brick + +Solution: To avoid xprt specifc race condition introduce an atomic flag + on rpc_transport + +> Change-Id: Id4ff1fe8375a63be71fb3343f455190a1b8bb6d4 +> fixes: bz#1668190 +> (Cherry pick from commit 04f84756e1baa5eff4560339700f82970eaa5d80) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/22073/) + +Change-Id: Ie3786b569ee03569bc3ac970925732dd834a76dc +BUG: 1669020 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/161388 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + rpc/rpc-lib/src/rpc-transport.c | 1 + + rpc/rpc-lib/src/rpc-transport.h | 1 + + xlators/protocol/server/src/server.c | 18 ++++++++++++++++++ + 3 files changed, 20 insertions(+) + +diff --git a/rpc/rpc-lib/src/rpc-transport.c b/rpc/rpc-lib/src/rpc-transport.c +index 77abf9617..0c6ab6694 100644 +--- a/rpc/rpc-lib/src/rpc-transport.c ++++ b/rpc/rpc-lib/src/rpc-transport.c +@@ -371,6 +371,7 @@ rpc_transport_load (glusterfs_ctx_t *ctx, dict_t *options, char *trans_name) + } + + INIT_LIST_HEAD (&trans->list); ++ GF_ATOMIC_INIT(trans->disconnect_progress, 0); + + return_trans = trans; + +diff --git a/rpc/rpc-lib/src/rpc-transport.h b/rpc/rpc-lib/src/rpc-transport.h +index 23246c564..f5fb6e13b 100644 +--- a/rpc/rpc-lib/src/rpc-transport.h ++++ b/rpc/rpc-lib/src/rpc-transport.h +@@ -217,6 +217,7 @@ struct rpc_transport { + * layer or in client management notification handler functions + */ + gf_boolean_t connect_failed; ++ gf_atomic_t disconnect_progress; + }; + + struct rpc_transport_ops { +diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c +index 104615265..ba3b8316d 100644 +--- a/xlators/protocol/server/src/server.c ++++ b/xlators/protocol/server/src/server.c +@@ -553,6 +553,11 @@ server_rpc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event, + break; + } + ++ /* Set the disconnect_progress flag to 1 to avoid races ++ during brick detach while brick mux is enabled ++ */ ++ GF_ATOMIC_INIT(trans->disconnect_progress, 1); ++ + /* transport has to be removed from the list upon disconnect + * irrespective of whether lock self heal is off or on, since + * new transport will be created upon reconnect. +@@ -1638,6 +1643,7 @@ notify (xlator_t *this, int32_t event, void *data, ...) + glusterfs_ctx_t *ctx = NULL; + gf_boolean_t xprt_found = _gf_false; + uint64_t totxprt = 0; ++ uint64_t totdisconnect = 0; + + GF_VALIDATE_OR_GOTO (THIS->name, this, out); + conf = this->private; +@@ -1715,6 +1721,10 @@ notify (xlator_t *this, int32_t event, void *data, ...) + if (!xprt->xl_private) { + continue; + } ++ ++ if (GF_ATOMIC_GET(xprt->disconnect_progress)) ++ continue; ++ + if (xprt->xl_private->bound_xl == data) { + totxprt++; + } +@@ -1740,15 +1750,23 @@ notify (xlator_t *this, int32_t event, void *data, ...) + if (!xprt->xl_private) { + continue; + } ++ ++ if (GF_ATOMIC_GET(xprt->disconnect_progress)) ++ continue; ++ + if (xprt->xl_private->bound_xl == data) { + gf_log (this->name, GF_LOG_INFO, + "disconnecting %s", + xprt->peerinfo.identifier); + xprt_found = _gf_true; ++ totdisconnect++; + rpc_transport_disconnect (xprt, _gf_false); + } + } + ++ if (totxprt > totdisconnect) ++ GF_ATOMIC_SUB(victim->xprtrefcnt, (totxprt - totdisconnect)); ++ + pthread_mutex_unlock (&conf->mutex); + if (this->ctx->active) { + top = this->ctx->active->first; +-- +2.20.1 + diff --git a/SOURCES/0510-features-shard-Ref-shard-inode-while-adding-to-fsync.patch b/SOURCES/0510-features-shard-Ref-shard-inode-while-adding-to-fsync.patch new file mode 100644 index 0000000..802aa4f --- /dev/null +++ b/SOURCES/0510-features-shard-Ref-shard-inode-while-adding-to-fsync.patch @@ -0,0 +1,183 @@ +From f4d1a1683882a4da81240413dae1f6a390ee2442 Mon Sep 17 00:00:00 2001 +From: Krutika Dhananjay +Date: Thu, 24 Jan 2019 14:14:39 +0530 +Subject: [PATCH 510/510] features/shard: Ref shard inode while adding to fsync + list + +> Upstream: https://review.gluster.org/22091 +> BUG: 1669077 +> Change-Id: Iab460667d091b8388322f59b6cb27ce69299b1b2 + +PROBLEM: + +Lot of the earlier changes in the management of shards in lru, fsync +lists assumed that if a given shard exists in fsync list, it must be +part of lru list as well. This was found to be not true. + +Consider this - a file is FALLOCATE'd to a size which would make the +number of participant shards to be greater than the lru list size. +In this case, some of the resolved shards that are to participate in +this fop will be evicted from lru list to give way to the rest of the +shards. And once FALLOCATE completes, these shards are added to fsync +list but without a ref. After the fop completes, these shard inodes +are unref'd and destroyed while their inode ctxs are still part of +fsync list. Now when an FSYNC is called on the base file and the +fsync-list traversed, the client crashes due to illegal memory access. + +FIX: + +Hold a ref on the shard inode when adding to fsync list as well. +And unref under following conditions: +1. when the shard is evicted from lru list +2. when the base file is fsync'd +3. when the shards are deleted. + +Change-Id: Iab460667d091b8388322f59b6cb27ce69299b1b2 +BUG: 1668304 +Signed-off-by: Krutika Dhananjay +Reviewed-on: https://code.engineering.redhat.com/gerrit/161397 +Tested-by: RHGS Build Bot +Reviewed-by: Xavi Hernandez +--- + tests/bugs/shard/bug-1669077.t | 29 +++++++++++++++++++++++++++++ + xlators/features/shard/src/shard.c | 29 +++++++++++++++++++++-------- + 2 files changed, 50 insertions(+), 8 deletions(-) + create mode 100644 tests/bugs/shard/bug-1669077.t + +diff --git a/tests/bugs/shard/bug-1669077.t b/tests/bugs/shard/bug-1669077.t +new file mode 100644 +index 000000000..8d3a67a36 +--- /dev/null ++++ b/tests/bugs/shard/bug-1669077.t +@@ -0,0 +1,29 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++ ++SHARD_COUNT_TIME=5 ++ ++cleanup ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} ++TEST $CLI volume set $V0 features.shard on ++TEST $CLI volume set $V0 features.shard-block-size 4MB ++TEST $CLI volume set $V0 features.shard-lru-limit 25 ++ ++TEST $CLI volume start $V0 ++ ++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0 ++ ++# If the bug still exists, client should crash during fallocate below ++TEST fallocate -l 200M $M0/foo ++ ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++ ++TEST $CLI volume stop $V0 ++TEST $CLI volume delete $V0 ++ ++cleanup +diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c +index 19dd3e4ba..cd388500d 100644 +--- a/xlators/features/shard/src/shard.c ++++ b/xlators/features/shard/src/shard.c +@@ -272,6 +272,7 @@ shard_inode_ctx_add_to_fsync_list (inode_t *base_inode, xlator_t *this, + * of the to_fsync_list. + */ + inode_ref (base_inode); ++ inode_ref(shard_inode); + + LOCK (&base_inode->lock); + LOCK (&shard_inode->lock); +@@ -285,8 +286,10 @@ shard_inode_ctx_add_to_fsync_list (inode_t *base_inode, xlator_t *this, + /* Unref the base inode corresponding to the ref above, if the shard is + * found to be already part of the fsync list. + */ +- if (ret != 0) ++ if (ret != 0) { + inode_unref (base_inode); ++ inode_unref(shard_inode); ++ } + return ret; + } + +@@ -735,6 +738,10 @@ after_fsync_check: + block_bname); + inode_forget (lru_inode, 0); + } else { ++ /* The following unref corresponds to the ref ++ * held when the shard was added to fsync list. ++ */ ++ inode_unref(lru_inode); + fsync_inode = lru_inode; + if (lru_base_inode) + inode_unref (lru_base_inode); +@@ -2947,7 +2954,7 @@ shard_unlink_block_inode (shard_local_t *local, int shard_block_num) + shard_priv_t *priv = NULL; + shard_inode_ctx_t *ctx = NULL; + shard_inode_ctx_t *base_ictx = NULL; +- gf_boolean_t unlink_unref_forget = _gf_false; ++ int unref_shard_inode = 0; + + this = THIS; + priv = this->private; +@@ -2973,11 +2980,12 @@ shard_unlink_block_inode (shard_local_t *local, int shard_block_num) + list_del_init (&ctx->ilist); + priv->inode_count--; + unref_base_inode++; ++ unref_shard_inode++; + GF_ASSERT (priv->inode_count >= 0); +- unlink_unref_forget = _gf_true; + } + if (ctx->fsync_needed) { + unref_base_inode++; ++ unref_shard_inode++; + list_del_init (&ctx->to_fsync_list); + if (base_inode) { + __shard_inode_ctx_get (base_inode, this, &base_ictx); +@@ -2988,11 +2996,11 @@ shard_unlink_block_inode (shard_local_t *local, int shard_block_num) + UNLOCK(&inode->lock); + if (base_inode) + UNLOCK(&base_inode->lock); +- if (unlink_unref_forget) { +- inode_unlink (inode, priv->dot_shard_inode, block_bname); +- inode_unref (inode); +- inode_forget (inode, 0); +- } ++ ++ inode_unlink(inode, priv->dot_shard_inode, block_bname); ++ inode_ref_reduce_by_n(inode, unref_shard_inode); ++ inode_forget(inode, 0); ++ + if (base_inode && unref_base_inode) + inode_ref_reduce_by_n (base_inode, unref_base_inode); + UNLOCK(&priv->lock); +@@ -5824,6 +5832,7 @@ shard_fsync_shards_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + shard_inode_ctx_t *ctx = NULL; + shard_inode_ctx_t *base_ictx = NULL; + inode_t *base_inode = NULL; ++ gf_boolean_t unref_shard_inode = _gf_false; + + local = frame->local; + base_inode = local->fd->inode; +@@ -5858,11 +5867,15 @@ out: + list_add_tail (&ctx->to_fsync_list, + &base_ictx->to_fsync_list); + base_ictx->fsync_count++; ++ } else { ++ unref_shard_inode = _gf_true; + } + } + UNLOCK (&anon_fd->inode->lock); + UNLOCK (&base_inode->lock); + } ++ if (unref_shard_inode) ++ inode_unref(anon_fd->inode); + if (anon_fd) + fd_unref (anon_fd); + +-- +2.20.1 + diff --git a/SOURCES/0511-Update-rfc.sh-to-rhgs-3.4.4.patch b/SOURCES/0511-Update-rfc.sh-to-rhgs-3.4.4.patch new file mode 100644 index 0000000..5b21d6a --- /dev/null +++ b/SOURCES/0511-Update-rfc.sh-to-rhgs-3.4.4.patch @@ -0,0 +1,27 @@ +From 4049de578f44e028ebe6beab3b1b13ce4d3de954 Mon Sep 17 00:00:00 2001 +From: Milind Changire +Date: Fri, 1 Feb 2019 19:55:33 +0530 +Subject: [PATCH 511/529] Update rfc.sh to rhgs-3.4.4 + +Change-Id: I826c246fefecf8cf12999e3b4b307d0a29aef668 +Signed-off-by: Milind Changire +--- + rfc.sh | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/rfc.sh b/rfc.sh +index bd52851..6f5c77c 100755 +--- a/rfc.sh ++++ b/rfc.sh +@@ -17,7 +17,7 @@ done + shift $((OPTIND-1)) + + +-branch="rhgs-3.4.3"; ++branch="rhgs-3.4.4"; + + set_hooks_commit_msg() + { +-- +1.8.3.1 + diff --git a/SOURCES/0512-Test-fixes-for-rhgs-3.4-downstream.patch b/SOURCES/0512-Test-fixes-for-rhgs-3.4-downstream.patch new file mode 100644 index 0000000..62b3ad5 --- /dev/null +++ b/SOURCES/0512-Test-fixes-for-rhgs-3.4-downstream.patch @@ -0,0 +1,533 @@ +From 5f8f80190c154bbb159a3cebbb7d3e12014275ed Mon Sep 17 00:00:00 2001 +From: Nigel Babu +Date: Mon, 30 Apr 2018 11:28:06 +0530 +Subject: [PATCH 512/529] Test fixes for rhgs-3.4 downstream + +This patch includes test fixes and two tests are removed +because they're not supported downstream. + +Label: DOWNSTREAM ONLY +Change-Id: I99072130cea4780654980837522c76eab38e79d3 +Signed-off-by: Krutika Dhananjay +Signed-off-by: Sanju Rakonde +Signed-off-by: Ravishankar N +Signed-off-by: Sunil Kumar Acharya +Reviewed-on: https://code.engineering.redhat.com/gerrit/162177 +--- + tests/basic/bd.t | 142 --------------------- + tests/basic/ec/ec-1468261.t | 1 + + tests/bugs/cli/bug-1169302.t | 4 +- + tests/bugs/core/multiplex-limit-issue-151.t | 2 +- + tests/bugs/distribute/bug-882278.t | 73 ----------- + tests/bugs/glusterd/brick-mux-fd-cleanup.t | 3 + + .../glusterd/bug-1245045-remove-brick-validation.t | 16 ++- + .../glusterd/bug-1293414-import-brickinfo-uuid.t | 1 + + .../bug-1483058-replace-brick-quorum-validation.t | 9 +- + tests/bugs/glusterd/bug-1595320.t | 2 +- + .../df-results-post-replace-brick-operations.t | 3 + + tests/bugs/posix/bug-990028.t | 2 +- + tests/bugs/readdir-ahead/bug-1439640.t | 1 + + .../replicate/bug-1591193-assign-gfid-and-heal.t | 5 +- + .../bug-1637802-arbiter-stale-data-heal-lock.t | 1 + + tests/bugs/shard/zero-flag.t | 1 + + tests/cluster.rc | 10 ++ + tests/include.rc | 1 + + 18 files changed, 48 insertions(+), 229 deletions(-) + delete mode 100755 tests/basic/bd.t + delete mode 100755 tests/bugs/distribute/bug-882278.t + +diff --git a/tests/basic/bd.t b/tests/basic/bd.t +deleted file mode 100755 +index 63622ed..0000000 +--- a/tests/basic/bd.t ++++ /dev/null +@@ -1,142 +0,0 @@ +-#!/bin/bash +- +-. $(dirname $0)/../include.rc +-. $(dirname $0)/../volume.rc +- +-function execute() +-{ +- cmd=$1 +- shift +- ${cmd} $@ >/dev/null 2>&1 +-} +- +-function bd_cleanup() +-{ +- execute vgremove -f ${V0} +- execute pvremove ${ld} +- execute losetup -d ${ld} +- execute rm ${BD_DISK} +- cleanup +-} +- +-function check() +-{ +- if [ $? -ne 0 ]; then +- echo prerequsite $@ failed +- bd_cleanup +- exit +- fi +-} +- +-SIZE=256 #in MB +- +-bd_cleanup; +- +-## Configure environment needed for BD backend volumes +-## Create a file with configured size and +-## set it as a temporary loop device to create +-## physical volume & VG. These are basic things needed +-## for testing BD xlator if anyone of these steps fail, +-## test script exits +-function configure() +-{ +- GLDIR=`$CLI system:: getwd` +- BD_DISK=${GLDIR}/bd_disk +- +- execute truncate -s${SIZE}M ${BD_DISK} +- check ${BD_DISK} creation +- +- execute losetup -f +- check losetup +- ld=`losetup -f` +- +- execute losetup ${ld} ${BD_DISK} +- check losetup ${BD_DISK} +- execute pvcreate -f ${ld} +- check pvcreate ${ld} +- execute vgcreate ${V0} ${ld} +- check vgcreate ${V0} +- execute lvcreate --thin ${V0}/pool --size 128M +-} +- +-function volinfo_field() +-{ +- local vol=$1; +- local field=$2; +- $CLI volume info $vol | grep "^$field: " | sed 's/.*: //'; +-} +- +-function volume_type() +-{ +- getfattr -n volume.type $M0/. --only-values --absolute-names -e text +-} +- +-case $OSTYPE in +-NetBSD) +- echo "Skip test on LVM which is not available on NetBSD" >&2 +- SKIP_TESTS +- exit 0 +- ;; +-*) +- ;; +-esac +- +-TEST glusterd +-TEST pidof glusterd +-configure +- +-TEST $CLI volume create $V0 ${H0}:/$B0/$V0?${V0} +-EXPECT "$V0" volinfo_field $V0 'Volume Name'; +-EXPECT 'Created' volinfo_field $V0 'Status'; +- +-## Start volume and verify +-TEST $CLI volume start $V0; +-EXPECT 'Started' volinfo_field $V0 'Status' +- +-TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0; +-EXPECT '1' volume_type +- +-## Create posix file +-TEST touch $M0/posix +- +-TEST touch $M0/lv +-gfid=`getfattr -n glusterfs.gfid.string $M0/lv --only-values --absolute-names` +-TEST setfattr -n user.glusterfs.bd -v "lv:4MB" $M0/lv +-# Check if LV is created +-TEST stat /dev/$V0/${gfid} +- +-## Create filesystem +-sleep 1 +-TEST mkfs.ext4 -qF $M0/lv +-# Cloning +-TEST touch $M0/lv_clone +-gfid=`getfattr -n glusterfs.gfid.string $M0/lv_clone --only-values --absolute-names` +-TEST setfattr -n clone -v ${gfid} $M0/lv +-TEST stat /dev/$V0/${gfid} +- +-sleep 1 +-## Check mounting +-TEST mount -o loop $M0/lv $M1 +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M1 +- +-# Snapshot +-TEST touch $M0/lv_sn +-gfid=`getfattr -n glusterfs.gfid.string $M0/lv_sn --only-values --absolute-names` +-TEST setfattr -n snapshot -v ${gfid} $M0/lv +-TEST stat /dev/$V0/${gfid} +- +-# Merge +-sleep 1 +-TEST setfattr -n merge -v "$M0/lv_sn" $M0/lv_sn +-TEST ! stat $M0/lv_sn +-TEST ! stat /dev/$V0/${gfid} +- +- +-rm $M0/* -f +- +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +-TEST $CLI volume stop ${V0} +-EXPECT 'Stopped' volinfo_field $V0 'Status'; +-TEST $CLI volume delete ${V0} +- +-bd_cleanup +diff --git a/tests/basic/ec/ec-1468261.t b/tests/basic/ec/ec-1468261.t +index d687d7b..b2d92fc 100644 +--- a/tests/basic/ec/ec-1468261.t ++++ b/tests/basic/ec/ec-1468261.t +@@ -14,6 +14,7 @@ TEST glusterd + TEST pidof glusterd + TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5} + TEST $CLI volume set $V0 disperse.optimistic-change-log on ++TEST $CLI volume set $V0 disperse.other-eager-lock on + TEST $CLI volume start $V0 + + #Mount the volume +diff --git a/tests/bugs/cli/bug-1169302.t b/tests/bugs/cli/bug-1169302.t +index 24355e5..05c006c 100755 +--- a/tests/bugs/cli/bug-1169302.t ++++ b/tests/bugs/cli/bug-1169302.t +@@ -40,7 +40,9 @@ cleanup_statedump + # hostname or IP-address with the connection from the bug-1169302 executable. + # In our CI it seems not possible to use $H0, 'localhost', $(hostname --fqdn) + # or even "127.0.0.1".... +-TEST $CLI_3 volume statedump $V0 client $H1:$GFAPI_PID ++sleep 2 ++host=`netstat -nap | grep $GFAPI_PID | grep 24007 | awk '{print $4}' | cut -d: -f1` ++TEST $CLI_3 volume statedump $V0 client $host:$GFAPI_PID + EXPECT_WITHIN $STATEDUMP_TIMEOUT "Y" path_exists $statedumpdir/glusterdump.$GFAPI_PID* + + kill $GFAPI_PID +diff --git a/tests/bugs/core/multiplex-limit-issue-151.t b/tests/bugs/core/multiplex-limit-issue-151.t +index 9511756..c5bbbda 100644 +--- a/tests/bugs/core/multiplex-limit-issue-151.t ++++ b/tests/bugs/core/multiplex-limit-issue-151.t +@@ -50,7 +50,7 @@ EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_brick_pids + EXPECT_WITHIN $PROCESS_UP_TIMEOUT 7 count_up_bricks + + TEST $CLI volume remove-brick $V0 $H0:$B0/brick3 start +-TEST $CLI volume remove-brick $V0 $H0:$B0/brick3 commit ++TEST $CLI volume remove-brick $V0 $H0:$B0/brick3 force + + EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_brick_processes + EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_brick_pids +diff --git a/tests/bugs/distribute/bug-882278.t b/tests/bugs/distribute/bug-882278.t +deleted file mode 100755 +index 8cb5147..0000000 +--- a/tests/bugs/distribute/bug-882278.t ++++ /dev/null +@@ -1,73 +0,0 @@ +-#!/bin/bash +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../volume.rc +-cleanup +- +-# Is there a good reason to require --fqdn elsewhere? It's worse than useless +-# here. +-H0=$(hostname -s) +- +-function recreate { +- # The rm is necessary so we don't get fooled by leftovers from old runs. +- rm -rf $1 && mkdir -p $1 +-} +- +-function count_lines { +- grep "$1" $2/* | wc -l +-} +- +-TEST glusterd +-TEST pidof glusterd +-TEST $CLI volume info; +- +-## Start and create a volume +-TEST recreate ${B0}/${V0}-0 +-TEST recreate ${B0}/${V0}-1 +-TEST $CLI volume create $V0 $H0:$B0/${V0}-{0,1} +-TEST $CLI volume set $V0 cluster.nufa on +- +-function volinfo_field() +-{ +- local vol=$1; +- local field=$2; +- +- $CLI volume info $vol | grep "^$field: " | sed 's/.*: //'; +-} +- +- +-## Verify volume is created +-EXPECT "$V0" volinfo_field $V0 'Volume Name'; +-EXPECT 'Created' volinfo_field $V0 'Status'; +- +-## Start volume and verify +-TEST $CLI volume start $V0; +-EXPECT 'Started' volinfo_field $V0 'Status'; +- +-## Mount native +-special_option="--xlator-option ${V0}-dht.local-volume-name=${V0}-client-1" +-TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $special_option $M0 +- +-## Create a bunch of test files. +-for i in $(seq 0 99); do +- echo hello > $(printf $M0/file%02d $i) +-done +- +-## Make sure the files went to the right place. There might be link files in +-## the other brick, but they won't have any contents. +-EXPECT "0" count_lines hello ${B0}/${V0}-0 +-EXPECT "100" count_lines hello ${B0}/${V0}-1 +- +-if [ "$EXIT_EARLY" = "1" ]; then +- exit 0; +-fi +- +-## Finish up +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +-TEST $CLI volume stop $V0; +-EXPECT 'Stopped' volinfo_field $V0 'Status'; +- +-TEST $CLI volume delete $V0; +-TEST ! $CLI volume info $V0; +- +-cleanup; +diff --git a/tests/bugs/glusterd/brick-mux-fd-cleanup.t b/tests/bugs/glusterd/brick-mux-fd-cleanup.t +index de11c17..2ac7f9c 100644 +--- a/tests/bugs/glusterd/brick-mux-fd-cleanup.t ++++ b/tests/bugs/glusterd/brick-mux-fd-cleanup.t +@@ -76,3 +76,6 @@ EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 + EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M1 + + cleanup ++ ++#delay-gen in not present downstream ++#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=000000 +diff --git a/tests/bugs/glusterd/bug-1245045-remove-brick-validation.t b/tests/bugs/glusterd/bug-1245045-remove-brick-validation.t +index 597c40c..a931d29 100644 +--- a/tests/bugs/glusterd/bug-1245045-remove-brick-validation.t ++++ b/tests/bugs/glusterd/bug-1245045-remove-brick-validation.t +@@ -3,12 +3,16 @@ + . $(dirname $0)/../../include.rc + . $(dirname $0)/../../cluster.rc + ++function peer_count { ++eval \$CLI_$1 peer status | grep 'Peer in Cluster (Connected)' | wc -l ++} ++ + cleanup + + TEST launch_cluster 3; + TEST $CLI_1 peer probe $H2; + TEST $CLI_1 peer probe $H3; +-EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count ++EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count 1 + + TEST $CLI_1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0 + TEST $CLI_1 volume start $V0 +@@ -21,7 +25,9 @@ TEST ! $CLI_1 volume remove-brick $V0 $H2:$B2/${V0} start + TEST start_glusterd 2 + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status_1 $V0 $H2 $B2/${V0} + +-EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count ++EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count 1 ++EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count 2 ++EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count 3 + + #volume status should work + TEST $CLI_2 volume status +@@ -36,7 +42,7 @@ TEST ! $CLI_1 volume remove-brick $V0 $H2:$B2/${V0} commit + TEST start_glusterd 2 + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status_1 $V0 $H2 $B2/${V0} + +-EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count ++EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count 1 + + #volume status should work + TEST $CLI_2 volume status +@@ -44,12 +50,12 @@ TEST $CLI_2 volume status + TEST $CLI_1 volume remove-brick $V0 $H2:$B2/${V0} stop + + kill_glusterd 3 +-EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count ++EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count 1 + + TEST $CLI_1 volume remove-brick $V0 $H2:$B2/${V0} start + + TEST start_glusterd 3 +-EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count ++EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count 1 + + TEST $CLI_3 volume status + +diff --git a/tests/bugs/glusterd/bug-1293414-import-brickinfo-uuid.t b/tests/bugs/glusterd/bug-1293414-import-brickinfo-uuid.t +index 9f67e4c..977276e 100755 +--- a/tests/bugs/glusterd/bug-1293414-import-brickinfo-uuid.t ++++ b/tests/bugs/glusterd/bug-1293414-import-brickinfo-uuid.t +@@ -24,6 +24,7 @@ EXPECT_WITHIN $PROBE_TIMEOUT 3 peer_count + TEST ! $CLI_3 peer detach $H1 + TEST ! $CLI_3 peer detach $H2 + ++EXPECT_WITHIN $PROBE_TIMEOUT 3 peer_count + + # peer not hosting bricks should be detachable + TEST $CLI_3 peer detach $H4 +diff --git a/tests/bugs/glusterd/bug-1483058-replace-brick-quorum-validation.t b/tests/bugs/glusterd/bug-1483058-replace-brick-quorum-validation.t +index 3dbe28a..2d9e528 100644 +--- a/tests/bugs/glusterd/bug-1483058-replace-brick-quorum-validation.t ++++ b/tests/bugs/glusterd/bug-1483058-replace-brick-quorum-validation.t +@@ -45,7 +45,14 @@ TEST start_glusterd 2 + + EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count + ++# checking peer_count is not enough to call that quorum is regained as ++# peer_count is based on peerinfo->connected where as quorum is calculated based ++# on peerinfo->quorum_contrib. To avoid this spurious race of replace brick ++# commit force to execute and fail before the quorum is regained run the command ++# in EXPECT_WITHIN to ensure that with multiple attempts the command goes ++# through once the quorum is regained. ++ + # Now quorum is met. replace-brick will execute successfuly +-TEST $CLI_1 volume replace-brick $V0 $H2:$B2/${V0}1 $H1:$B1/${V0}1_new commit force ++EXPECT_WITHIN $PEER_SYNC_TIMEOUT 0 attempt_replace_brick 1 $V0 $H2:$B2/${V0}1 $H1:$B1/${V0}1_new + + #cleanup; +diff --git a/tests/bugs/glusterd/bug-1595320.t b/tests/bugs/glusterd/bug-1595320.t +index f41df9d..3a289f3 100644 +--- a/tests/bugs/glusterd/bug-1595320.t ++++ b/tests/bugs/glusterd/bug-1595320.t +@@ -25,7 +25,7 @@ TEST pidof glusterd + + # Create volume and enable brick multiplexing + TEST $CLI volume create $V0 $H0:$L1 $H0:$L2 $H0:$L3 +-gluster v set all cluster.brick-multiplex on ++TEST $CLI v set all cluster.brick-multiplex on + + # Start the volume + TEST $CLI volume start $V0 +diff --git a/tests/bugs/glusterd/df-results-post-replace-brick-operations.t b/tests/bugs/glusterd/df-results-post-replace-brick-operations.t +index 443911c..04f7588 100644 +--- a/tests/bugs/glusterd/df-results-post-replace-brick-operations.t ++++ b/tests/bugs/glusterd/df-results-post-replace-brick-operations.t +@@ -53,6 +53,9 @@ total_space=$(df -P $M0 | tail -1 | awk '{ print $2}') + TEST $CLI volume replace-brick $V0 $H0:$B0/${V0}1/brick1 $H0:$B0/${V0}4/brick1 commit force + TEST $CLI volume replace-brick $V0 $H0:$B0/${V0}2/brick1 $H0:$B0/${V0}5/brick1 commit force + ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 ++ + # check for the size at mount point, it should be same as previous + total_space_new=$(df -P $M0 | tail -1 | awk '{ print $2}') + TEST [ $total_space -eq $total_space_new ] +diff --git a/tests/bugs/posix/bug-990028.t b/tests/bugs/posix/bug-990028.t +index c864214..bef36a8 100755 +--- a/tests/bugs/posix/bug-990028.t ++++ b/tests/bugs/posix/bug-990028.t +@@ -78,7 +78,7 @@ function links_across_directories() + TEST [ $LINES = 2 ] + + for i in $(seq 1 2); do +- HL=`getfattr -m "trusted.pgfid.*" -de hex $B0/brick/dir$i/file$i 2>&1 | grep "trusted.pgfid" | cut -d$'\n' -f$i | cut -d'=' -f2` ++ HL=`getfattr -m "trusted.pgfid.*" -de hex $B0/brick/dir$i/file$i 2>&1 | grep "trusted.pgfid" | awk -v n=$i 'NR==n' | cut -d'=' -f2` + TEST_IN_LOOP [ $HL = "0x00000001" ] + done + +diff --git a/tests/bugs/readdir-ahead/bug-1439640.t b/tests/bugs/readdir-ahead/bug-1439640.t +index cc6c829..dcd5407 100755 +--- a/tests/bugs/readdir-ahead/bug-1439640.t ++++ b/tests/bugs/readdir-ahead/bug-1439640.t +@@ -8,6 +8,7 @@ cleanup; + TEST glusterd + + TEST $CLI volume create $V0 $H0:$B{0..1}/$V0 ++TEST $CLI volume set $V0 readdir-ahead on + TEST $CLI volume start $V0 + + TEST ! $CLI volume set $V0 parallel-readdir sdf +diff --git a/tests/bugs/replicate/bug-1591193-assign-gfid-and-heal.t b/tests/bugs/replicate/bug-1591193-assign-gfid-and-heal.t +index d3b5f9a..a2abaf6 100644 +--- a/tests/bugs/replicate/bug-1591193-assign-gfid-and-heal.t ++++ b/tests/bugs/replicate/bug-1591193-assign-gfid-and-heal.t +@@ -108,10 +108,7 @@ TEST stat $M0/file2 + + # Though file is created on all 3 bricks, lookup will fail as arbiter blames the + # other 2 bricks and ariter is not 'readable'. +-# TEST ! stat $M0/file3 +-# But the checks for failing lookups when quorum is not met is not yet there in +-# rhgs-3.4.0, so stat will succeed. +-TEST stat $M0/file3 ++TEST ! stat $M0/file3 + + # Launch index heal to complete any pending data/metadata heals. + TEST $CLI volume heal $V0 +diff --git a/tests/bugs/replicate/bug-1637802-arbiter-stale-data-heal-lock.t b/tests/bugs/replicate/bug-1637802-arbiter-stale-data-heal-lock.t +index 91ed39b..d7d1f28 100644 +--- a/tests/bugs/replicate/bug-1637802-arbiter-stale-data-heal-lock.t ++++ b/tests/bugs/replicate/bug-1637802-arbiter-stale-data-heal-lock.t +@@ -32,6 +32,7 @@ EXPECT 2 get_pending_heal_count $V0 + # Bring it back up and let heal complete. + TEST $CLI volume start $V0 force + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status + EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 + EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 + EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 +diff --git a/tests/bugs/shard/zero-flag.t b/tests/bugs/shard/zero-flag.t +index 84cb963..1f39787 100644 +--- a/tests/bugs/shard/zero-flag.t ++++ b/tests/bugs/shard/zero-flag.t +@@ -14,6 +14,7 @@ TEST glusterd + TEST pidof glusterd + TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1,2,3} + TEST $CLI volume set $V0 features.shard on ++TEST $CLI volume set $V0 features.shard-block-size 4MB + TEST $CLI volume start $V0 + + TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0 +diff --git a/tests/cluster.rc b/tests/cluster.rc +index c1ff8ab..e258b58 100644 +--- a/tests/cluster.rc ++++ b/tests/cluster.rc +@@ -142,6 +142,16 @@ function peer_count() { + $CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l + } + ++function attempt_replace_brick { ++ local cli_no=$1 ++ local vol=$2; ++ local src_brick=$3; ++ local dst_brick=$4; ++ ++ eval \$CLI_$cli_no volume replace-brick $vol $src_brick $dst_brick commit force; ++ echo $? ++} ++ + function cluster_rebalance_status_field { + #The rebalance status can be up to 3 words, (e.g.:'fix-layout in progress'), hence the awk-print $7 thru $9. + #But if the status is less than 3 words, it also prints the next field i.e the run_time_in_secs.(e.g.:'completed 3.00'). +diff --git a/tests/include.rc b/tests/include.rc +index aca4c4a..81146f4 100644 +--- a/tests/include.rc ++++ b/tests/include.rc +@@ -74,6 +74,7 @@ PROCESS_UP_TIMEOUT=30 + NFS_EXPORT_TIMEOUT=20 + CHILD_UP_TIMEOUT=20 + PROBE_TIMEOUT=60 ++PEER_SYNC_TIMEOUT=20 + REBALANCE_TIMEOUT=360 + REOPEN_TIMEOUT=20 + HEAL_TIMEOUT=80 +-- +1.8.3.1 + diff --git a/SOURCES/0513-core-heketi-cli-is-throwing-error-target-is-busy.patch b/SOURCES/0513-core-heketi-cli-is-throwing-error-target-is-busy.patch new file mode 100644 index 0000000..aa15a69 --- /dev/null +++ b/SOURCES/0513-core-heketi-cli-is-throwing-error-target-is-busy.patch @@ -0,0 +1,114 @@ +From 61d178c25468adfac4cbdfcef43a0d002c635466 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Tue, 5 Feb 2019 12:49:10 +0530 +Subject: [PATCH 513/529] core: heketi-cli is throwing error "target is busy" + +Problem: When rpc-transport-disconnect happens, server_connection_cleanup_flush_cbk() + is supposed to call rpc_transport_unref() after open-files on + that transport are flushed per transport.But open-fd-count is + maintained in bound_xl->fd_count, which can be incremented/decremented + cumulatively in server_connection_cleanup() by all transport + disconnect paths. So instead of rpc_transport_unref() happening + per transport, it ends up doing it only once after all the files + on all the transports for the brick are flushed leading to + rpc-leaks. + +Solution: To avoid races maintain fd_cnt at client instead of maintaining + on brick + +Credits: Pranith Kumar Karampuri +> Change-Id: I6e8ea37a61f82d9aefb227c5b3ab57a7a36850e6 +> fixes: bz#1668190 +> (Cherry pick from commit b41cdeb638f9f9ec2fef13ec95c216faf52a9df9) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/22108/) + +Change-Id: Ic810095ea1ce418836d240d411168df8be0e4a41 +BUG: 1669020 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/162251 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/client_t.c | 1 + + libglusterfs/src/client_t.h | 2 ++ + libglusterfs/src/xlator.c | 1 - + libglusterfs/src/xlator.h | 3 --- + xlators/protocol/server/src/server-helpers.c | 4 ++-- + 5 files changed, 5 insertions(+), 6 deletions(-) + +diff --git a/libglusterfs/src/client_t.c b/libglusterfs/src/client_t.c +index a9ae62c..7d92e0d 100644 +--- a/libglusterfs/src/client_t.c ++++ b/libglusterfs/src/client_t.c +@@ -232,6 +232,7 @@ gf_client_get (xlator_t *this, struct rpcsvc_auth_data *cred, char *client_uid, + + GF_ATOMIC_INIT (client->bind, 1); + GF_ATOMIC_INIT (client->count, 1); ++ GF_ATOMIC_INIT(client->fd_cnt, 0); + + client->auth.flavour = cred->flavour; + if (cred->flavour != AUTH_NONE) { +diff --git a/libglusterfs/src/client_t.h b/libglusterfs/src/client_t.h +index 088508e..403c488 100644 +--- a/libglusterfs/src/client_t.h ++++ b/libglusterfs/src/client_t.h +@@ -46,6 +46,8 @@ typedef struct _client { + inode_t *subdir_inode; + uuid_t subdir_gfid; + int32_t opversion; ++ /* Variable to save fd_count for detach brick */ ++ gf_atomic_t fd_cnt; + } client_t; + + #define GF_CLIENTCTX_INITIAL_SIZE 8 +diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c +index 340d83d..7d90baa 100644 +--- a/libglusterfs/src/xlator.c ++++ b/libglusterfs/src/xlator.c +@@ -490,7 +490,6 @@ xlator_init (xlator_t *xl) + + xl->instance_name = NULL; + GF_ATOMIC_INIT(xl->xprtrefcnt, 0); +- GF_ATOMIC_INIT(xl->fd_cnt, 0); + if (!xl->init) { + gf_msg (xl->name, GF_LOG_WARNING, 0, LG_MSG_INIT_FAILED, + "No init() found"); +diff --git a/libglusterfs/src/xlator.h b/libglusterfs/src/xlator.h +index f8f2630..202bdca 100644 +--- a/libglusterfs/src/xlator.h ++++ b/libglusterfs/src/xlator.h +@@ -965,9 +965,6 @@ struct _xlator { + /* flag to avoid recall of xlator_mem_cleanup for xame xlator */ + uint32_t call_cleanup; + +- /* Variable to save fd_count for detach brick */ +- gf_atomic_t fd_cnt; +- + /* Variable to save xprt associated for detach brick */ + gf_atomic_t xprtrefcnt; + +diff --git a/xlators/protocol/server/src/server-helpers.c b/xlators/protocol/server/src/server-helpers.c +index 99256bf..30045ef 100644 +--- a/xlators/protocol/server/src/server-helpers.c ++++ b/xlators/protocol/server/src/server-helpers.c +@@ -266,7 +266,7 @@ server_connection_cleanup_flush_cbk (call_frame_t *frame, void *cookie, + victim = client->bound_xl; + + if (victim) { +- fd_cnt = GF_ATOMIC_DEC(victim->fd_cnt); ++ fd_cnt = GF_ATOMIC_DEC(client->fd_cnt); + if (!fd_cnt && conf && detach) { + pthread_mutex_lock(&conf->mutex); + { +@@ -413,7 +413,7 @@ server_connection_cleanup (xlator_t *this, client_t *client, + if (fd_cnt) { + if (fd_exist) + (*fd_exist) = _gf_true; +- GF_ATOMIC_ADD(bound_xl->fd_cnt, fd_cnt); ++ GF_ATOMIC_ADD(client->fd_cnt, fd_cnt); + } + } + +-- +1.8.3.1 + diff --git a/SOURCES/0514-glusterd-display-gluster-volume-status-when-quorum-t.patch b/SOURCES/0514-glusterd-display-gluster-volume-status-when-quorum-t.patch new file mode 100644 index 0000000..17d7c3c --- /dev/null +++ b/SOURCES/0514-glusterd-display-gluster-volume-status-when-quorum-t.patch @@ -0,0 +1,49 @@ +From 4413ccd6818a8680c74bc072b784319ce8d8429f Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde +Date: Thu, 9 Nov 2017 13:15:51 +0530 +Subject: [PATCH 514/529] glusterd: display gluster volume status, when quorum + type is server + +Problem: when server-quorum-type is server, after restarting glusterd +in the node which is up, gluster volume status is giving incorrect +information. + +Fix: check whether server is blank, before adding other keys into the +dictionary. + +upstream patch: https://review.gluster.org/#/c/glusterfs/+/18703/ + +>Change-Id: I926ebdffab330ccef844f23f6d6556e137914047 +>BUG: 1511339 +>Signed-off-by: Sanju Rakonde + +Change-Id: I926ebdffab330ccef844f23f6d6556e137914047 +BUG: 1574490 +Signed-off-by: Sanju Rakonde +Reviewed-on: https://code.engineering.redhat.com/gerrit/162399 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mgmt/glusterd/src/glusterd-utils.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index e21ec4e..bafc3af 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -5204,6 +5204,12 @@ glusterd_add_node_to_dict (char *server, dict_t *dict, int count, + * the brick as hostname+path, so this will make more sense + * when output. + */ ++ ++ if (!strcmp(server, "")) { ++ ret = 0; ++ goto out; ++ } ++ + snprintf (key, sizeof (key), "brick%d.hostname", count); + if (!strcmp (server, priv->nfs_svc.name)) + ret = dict_set_str (dict, key, "NFS Server"); +-- +1.8.3.1 + diff --git a/SOURCES/0515-cli-change-the-warning-message.patch b/SOURCES/0515-cli-change-the-warning-message.patch new file mode 100644 index 0000000..7414d11 --- /dev/null +++ b/SOURCES/0515-cli-change-the-warning-message.patch @@ -0,0 +1,38 @@ +From d7d4504b6f3e7fb659dd20640b53922812d50445 Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde +Date: Wed, 6 Feb 2019 19:06:45 +0530 +Subject: [PATCH 515/529] cli: change the warning message + +This patch changes the warning message user gets, when enabling brick +multiplexing to reflect OCS instead of CNS/CRS. + +Label: DOWNSTREAM ONLY + +Change-Id: Id5fd87955d5a692f8e57560245f8b0cf9882e1da +BUG: 1661393 +Signed-off-by: Sanju Rakonde +Reviewed-on: https://code.engineering.redhat.com/gerrit/162405 +Reviewed-by: Atin Mukherjee +Tested-by: RHGS Build Bot +--- + cli/src/cli-cmd-parser.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c +index e790d79..dcce9d3 100644 +--- a/cli/src/cli-cmd-parser.c ++++ b/cli/src/cli-cmd-parser.c +@@ -1742,8 +1742,8 @@ cli_cmd_volume_set_parse (struct cli_state *state, const char **words, + + if ((strcmp (key, "cluster.brick-multiplex") == 0)) { + question = "Brick-multiplexing is supported only for " +- "container workloads (CNS/CRS). Also it is " +- "advised to make sure that either all " ++ "OCS converged or independent mode. Also it" ++ " is advised to make sure that either all " + "volumes are in stopped state or no bricks " + "are running before this option is modified." + "Do you still want to continue?"; +-- +1.8.3.1 + diff --git a/SOURCES/0516-geo-rep-Fix-permissions-with-non-root-setup.patch b/SOURCES/0516-geo-rep-Fix-permissions-with-non-root-setup.patch new file mode 100644 index 0000000..12ab13c --- /dev/null +++ b/SOURCES/0516-geo-rep-Fix-permissions-with-non-root-setup.patch @@ -0,0 +1,84 @@ +From 39bf395e91021dd51d53c312d6e02638267c3a6b Mon Sep 17 00:00:00 2001 +From: Kotresh HR +Date: Tue, 20 Nov 2018 12:36:55 +0530 +Subject: [PATCH 516/529] geo-rep: Fix permissions with non-root setup + +Problem: +In non-root fail-over/fail-back(FO/FB), when slave is +promoted as master, the session goes to 'Faulty' + +Cause: +The command 'gluster-mountbroker ' +is run as a pre-requisite on slave in non-root setup. +It modifies the permission and group of following required +directories and files recursively + + [1] /var/lib/glusterd/geo-replication + [2] /var/log/glusterfs/geo-replication-slaves + +In a normal setup, this is executed on slave node and hence +doing it recursively is not an issue on [1]. But when original +master becomes slave in non-root during FO/FB, it contains +ssh public keys and modifying permissions on them causes +geo-rep to fail with incorrect permissions. + +Fix: +Don't do permission change recursively. Fix permissions for +required files. + +Backport of: + > Patch: https://review.gluster.org/#/c/glusterfs/+/21689/ + > BUG: bz#1651498 + > Change-Id: I68a744644842e3b00abc26c95c06f123aa78361d + > Signed-off-by: Kotresh HR + +BUG: 1510752 +Change-Id: I68a744644842e3b00abc26c95c06f123aa78361d +Signed-off-by: Kotresh HR +Reviewed-on: https://code.engineering.redhat.com/gerrit/162463 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + geo-replication/src/peer_mountbroker.py.in | 12 +++++++++--- + 1 file changed, 9 insertions(+), 3 deletions(-) + +diff --git a/geo-replication/src/peer_mountbroker.py.in b/geo-replication/src/peer_mountbroker.py.in +index be182c5..5be16a2 100644 +--- a/geo-replication/src/peer_mountbroker.py.in ++++ b/geo-replication/src/peer_mountbroker.py.in +@@ -8,6 +8,7 @@ from gluster.cliutils import (execute, Cmd, node_output_ok, + from prettytable import PrettyTable + + LOG_DIR = "@localstatedir@/log/glusterfs/geo-replication-slaves" ++CLI_LOG = "@localstatedir@/log/glusterfs/cli.log" + GEOREP_DIR = "@GLUSTERD_WORKDIR@/geo-replication" + GLUSTERD_VOLFILE = "@GLUSTERD_VOLFILE@" + +@@ -142,7 +143,7 @@ class NodeSetup(Cmd): + # chgrp -R /var/log/glusterfs/geo-replication-slaves + # chgrp -R /var/lib/glusterd/geo-replication + # chmod -R 770 /var/log/glusterfs/geo-replication-slaves +- # chmod -R 770 /var/lib/glusterd/geo-replication ++ # chmod 770 /var/lib/glusterd/geo-replication + # mkdir -p + # chmod 0711 + # If selinux, +@@ -192,8 +193,13 @@ class NodeSetup(Cmd): + + execute(["chgrp", "-R", args.group, GEOREP_DIR]) + execute(["chgrp", "-R", args.group, LOG_DIR]) +- execute(["chmod", "-R", "770", GEOREP_DIR]) +- execute(["chmod", "-R", "770", args.group, LOG_DIR]) ++ execute(["chgrp", args.group, CLI_LOG]) ++ execute(["chmod", "770", args.group, GEOREP_DIR]) ++ execute(["find", LOG_DIR, "-type", "d", "-exec", "chmod", "770", "{}", ++ "+"]) ++ execute(["find", LOG_DIR, "-type", "f", "-exec", "chmod", "660", "{}", ++ "+"]) ++ execute(["chmod", "660", CLI_LOG]) + + m.set_mount_root_and_group(args.mount_root, args.group) + m.save() +-- +1.8.3.1 + diff --git a/SOURCES/0517-geo-rep-validate-the-config-checkpoint-date-format.patch b/SOURCES/0517-geo-rep-validate-the-config-checkpoint-date-format.patch new file mode 100644 index 0000000..f2ae211 --- /dev/null +++ b/SOURCES/0517-geo-rep-validate-the-config-checkpoint-date-format.patch @@ -0,0 +1,39 @@ +From ba30dc0db99f0cd6e83ba5085be43607e4710711 Mon Sep 17 00:00:00 2001 +From: Shwetha Acharya +Date: Wed, 21 Nov 2018 12:24:00 +0530 +Subject: [PATCH 517/529] geo-rep: validate the config checkpoint date format + +Added a strlen check to ensure that the format is (Y-m-d H:M:S). + +>Change-Id: I8844aaa33418d43ffe2320c4a05eb1eddd306903 +>updates: bz#1651584 +>Signed-off-by: Shwetha Acharya + +backport of https://review.gluster.org/#/c/glusterfs/+/21692/ + +BUG: 1429190 +Change-Id: I70d56925abfffb02d2d4b7d6f570b2c063a8d9c2 +Signed-off-by: Shwetha K Acharya +Reviewed-on: https://code.engineering.redhat.com/gerrit/162467 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + cli/src/cli-cmd-parser.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c +index dcce9d3..a450797 100644 +--- a/cli/src/cli-cmd-parser.c ++++ b/cli/src/cli-cmd-parser.c +@@ -2774,7 +2774,7 @@ config_parse (const char **words, int wordcount, dict_t *dict, + ret_chkpt = strptime(append_str, "%Y-%m-%d %H:%M:%S", + &checkpoint_time); + +- if (ret_chkpt == NULL) { ++ if (ret_chkpt == NULL || *ret_chkpt != '\0') { + ret = -1; + cli_err ("Invalid Checkpoint label. Use format " + "\"Y-m-d H:M:S\", Example: 2016-10-25 15:30:45"); +-- +1.8.3.1 + diff --git a/SOURCES/0518-logging-create-parent-dir-if-not-available.patch b/SOURCES/0518-logging-create-parent-dir-if-not-available.patch new file mode 100644 index 0000000..d1db729 --- /dev/null +++ b/SOURCES/0518-logging-create-parent-dir-if-not-available.patch @@ -0,0 +1,57 @@ +From 1e1bda15377a133e9a91e6f99d13e02bf4469269 Mon Sep 17 00:00:00 2001 +From: Amar Tumballi +Date: Thu, 7 Feb 2019 13:57:21 +0530 +Subject: [PATCH 518/529] logging: create parent dir if not available + +As glusterfs logging uses different directory than /var/log +(ie, /var/log/glusterfs), there is a chance it may not be +present when starting glusterfs. Create parent dir if it +doesn't exist. + +Upstream fix: +>> URL: https://review.gluster.org/21536 + +BUG: 1570958 +Change-Id: I6efaffd1e7e8aee350afcf2ca354b27747ff5e50 +Signed-off-by: Amar Tumballi +Reviewed-on: https://code.engineering.redhat.com/gerrit/162470 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/logging.c | 20 ++++++++++++++++++++ + 1 file changed, 20 insertions(+) + +diff --git a/libglusterfs/src/logging.c b/libglusterfs/src/logging.c +index 0f238d0..631bc98 100644 +--- a/libglusterfs/src/logging.c ++++ b/libglusterfs/src/logging.c +@@ -758,6 +758,26 @@ gf_log_init (void *data, const char *file, const char *ident) + goto out; + } + ++ /* Also create parent dir */ ++ char *logdir = gf_strdup(file); ++ if (!logdir) { ++ return -1; ++ } ++ char *tmp_index = rindex(logdir, '/'); ++ if (tmp_index) { ++ tmp_index[0] = '\0'; ++ } ++ if (mkdir_p(logdir, 0755, _gf_true)) { ++ /* EEXIST is handled in mkdir_p() itself */ ++ gf_msg("logging", GF_LOG_ERROR, 0, LG_MSG_STRDUP_ERROR, ++ "failed to create metrics dir %s (%s)", logdir, ++ strerror(errno)); ++ GF_FREE(logdir); ++ return -1; ++ } ++ /* no need of this variable */ ++ GF_FREE(logdir); ++ + ctx->log.filename = gf_strdup (file); + if (!ctx->log.filename) { + fprintf (stderr, "ERROR: updating log-filename failed: %s\n", +-- +1.8.3.1 + diff --git a/SOURCES/0519-cluster-dht-Delete-invalid-linkto-files-in-rmdir.patch b/SOURCES/0519-cluster-dht-Delete-invalid-linkto-files-in-rmdir.patch new file mode 100644 index 0000000..7c31115 --- /dev/null +++ b/SOURCES/0519-cluster-dht-Delete-invalid-linkto-files-in-rmdir.patch @@ -0,0 +1,118 @@ +From a902a17263648180bba8a0167a221e549ba5186a Mon Sep 17 00:00:00 2001 +From: N Balachandran +Date: Wed, 6 Feb 2019 10:26:42 +0530 +Subject: [PATCH 519/529] cluster/dht: Delete invalid linkto files in rmdir + +rm -rf fails on dirs which contain linkto files +that point to themselves because dht incorrectly thought +that they were cached files after looking them up. +The fix now treats them as invalid linkto files +and deletes them. + +upstream master: https://review.gluster.org/#/c/glusterfs/+/22066/ + +> Change-Id: I376c72a5309714ee339c74485e02cfb4e29be643 +> fixes: bz#1667804 +> Signed-off-by: N Balachandran + +Change-Id: Ib759907131f791e5853b2e0cb38a68d94a3efd81 +BUG: 1668989 +Signed-off-by: N Balachandran +Reviewed-on: https://code.engineering.redhat.com/gerrit/162342 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + tests/bugs/distribute/bug-1667804.t | 63 ++++++++++++++++++++++++++++++++++++ + xlators/cluster/dht/src/dht-common.c | 6 ++-- + 2 files changed, 67 insertions(+), 2 deletions(-) + create mode 100644 tests/bugs/distribute/bug-1667804.t + +diff --git a/tests/bugs/distribute/bug-1667804.t b/tests/bugs/distribute/bug-1667804.t +new file mode 100644 +index 0000000..3f7c431 +--- /dev/null ++++ b/tests/bugs/distribute/bug-1667804.t +@@ -0,0 +1,63 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../dht.rc ++ ++function confirm_all_linkto_files () ++{ ++ inpath=$1 ++ for infile in $inpath/* ++ do ++ echo $infile ++ ret1=$(is_dht_linkfile $infile) ++ if [ "$ret1" -eq 0 ]; then ++ echo "$infile is not a linkto file" ++ echo 0 ++ return ++ fi ++ done ++ echo 1 ++} ++ ++cleanup; ++ ++#Basic checks ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume info ++ ++#Create a distributed volume ++TEST $CLI volume create $V0 $H0:$B0/${V0}{1..2}; ++TEST $CLI volume start $V0 ++ ++# Mount FUSE ++TEST glusterfs -s $H0 --volfile-id $V0 $M0 ++ ++#Create files and rename them in order to create linkto files ++TEST mkdir -p $M0/dir0/dir1 ++TEST touch $M0/dir0/dir1/file-{1..50} ++ ++for i in {1..50}; do ++ mv $M0/dir0/dir1/file-$i $M0/dir0/dir1/nfile-$i; ++done ++ ++#Remove the second brick to force the creation of linkto files ++#on the removed brick ++ ++TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}2 start ++EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" remove_brick_status_completed_field "$V0 $H0:$B0/${V0}2" ++TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}2 stop ++ ++EXPECT "1" confirm_all_linkto_files $B0/${V0}2/dir0/dir1 ++ ++#Modify the xattrs of the linkto files on the removed brick to point to itself. ++ ++target=$(cat $M0/.meta/graphs/active/$V0-dht/subvolumes/1/name) ++ ++setfattr -n trusted.glusterfs.dht.linkto -v "$target\0" $B0/${V0}2/dir0/dir1/nfile* ++ ++ ++TEST rm -rf $M0/dir0 ++ ++cleanup; +diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c +index 767c6a8..1311a8d 100644 +--- a/xlators/cluster/dht/src/dht-common.c ++++ b/xlators/cluster/dht/src/dht-common.c +@@ -10062,8 +10062,10 @@ dht_rmdir_is_subvol_empty (call_frame_t *frame, xlator_t *this, + + subvol = dht_linkfile_subvol (this, NULL, &trav->d_stat, + trav->dict); +- if (!subvol) { +- ++ if (!subvol || (subvol == src)) { ++ /* we need to delete the linkto file if it does not ++ * have a valid subvol or it points to itself. ++ */ + gf_msg (this->name, GF_LOG_INFO, 0, + DHT_MSG_INVALID_LINKFILE, + "Linkfile does not have link subvolume. " +-- +1.8.3.1 + diff --git a/SOURCES/0520-spec-avoid-creation-of-temp-file-in-lua-script.patch b/SOURCES/0520-spec-avoid-creation-of-temp-file-in-lua-script.patch new file mode 100644 index 0000000..63a4700 --- /dev/null +++ b/SOURCES/0520-spec-avoid-creation-of-temp-file-in-lua-script.patch @@ -0,0 +1,205 @@ +From f807b5dd999808a8e56061690da01420d3cb4cc5 Mon Sep 17 00:00:00 2001 +From: Milind Changire +Date: Sat, 9 Feb 2019 14:01:28 +0530 +Subject: [PATCH 520/529] spec: avoid creation of temp file in lua script + +Avoiding creation of temporary file to execute bash shell script from a +lua scriptlet increases install time security. + +Label: DOWNSTREAM ONLY + +BUG: 1410145 +Change-Id: Ie5b9035f292402b18dea768aca8bc82a1e7fa615 +Signed-off-by: Milind Changire +Reviewed-on: https://code.engineering.redhat.com/gerrit/162621 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + glusterfs.spec.in | 87 ++++++++++--------------------------------------------- + 1 file changed, 15 insertions(+), 72 deletions(-) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 0ad4ffc..2680bec 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -1643,12 +1643,7 @@ fi + -- Since we run pretrans scripts only for RPMs built for a server build, + -- we can now use os.tmpname() since it is available on RHEL6 and later + -- platforms which are server platforms. +-tmpname = os.tmpname() +-tmpfile = io.open(tmpname, "w") +-tmpfile:write(script) +-tmpfile:close() +-ok, how, val = os.execute("/bin/bash " .. tmpname) +-os.remove(tmpname) ++ok, how, val = os.execute("/bin/bash -c \"" .. script .. "\"") + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -1685,12 +1680,7 @@ fi + -- Since we run pretrans scripts only for RPMs built for a server build, + -- we can now use os.tmpname() since it is available on RHEL6 and later + -- platforms which are server platforms. +-tmpname = os.tmpname() +-tmpfile = io.open(tmpname, "w") +-tmpfile:write(script) +-tmpfile:close() +-ok, how, val = os.execute("/bin/bash " .. tmpname) +-os.remove(tmpname) ++ok, how, val = os.execute("/bin/bash -c \"" .. script .. "\"") + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -1727,12 +1717,7 @@ fi + -- Since we run pretrans scripts only for RPMs built for a server build, + -- we can now use os.tmpname() since it is available on RHEL6 and later + -- platforms which are server platforms. +-tmpname = os.tmpname() +-tmpfile = io.open(tmpname, "w") +-tmpfile:write(script) +-tmpfile:close() +-ok, how, val = os.execute("/bin/bash " .. tmpname) +-os.remove(tmpname) ++ok, how, val = os.execute("/bin/bash -c \"" .. script .. "\"") + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -1769,12 +1754,7 @@ fi + -- Since we run pretrans scripts only for RPMs built for a server build, + -- we can now use os.tmpname() since it is available on RHEL6 and later + -- platforms which are server platforms. +-tmpname = os.tmpname() +-tmpfile = io.open(tmpname, "w") +-tmpfile:write(script) +-tmpfile:close() +-ok, how, val = os.execute("/bin/bash " .. tmpname) +-os.remove(tmpname) ++ok, how, val = os.execute("/bin/bash -c \"" .. script .. "\"") + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -1811,12 +1791,7 @@ fi + -- Since we run pretrans scripts only for RPMs built for a server build, + -- we can now use os.tmpname() since it is available on RHEL6 and later + -- platforms which are server platforms. +-tmpname = os.tmpname() +-tmpfile = io.open(tmpname, "w") +-tmpfile:write(script) +-tmpfile:close() +-ok, how, val = os.execute("/bin/bash " .. tmpname) +-os.remove(tmpname) ++ok, how, val = os.execute("/bin/bash -c \"" .. script .. "\"") + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -1853,12 +1828,7 @@ fi + -- Since we run pretrans scripts only for RPMs built for a server build, + -- we can now use os.tmpname() since it is available on RHEL6 and later + -- platforms which are server platforms. +-tmpname = os.tmpname() +-tmpfile = io.open(tmpname, "w") +-tmpfile:write(script) +-tmpfile:close() +-ok, how, val = os.execute("/bin/bash " .. tmpname) +-os.remove(tmpname) ++ok, how, val = os.execute("/bin/bash -c \"" .. script .. "\"") + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -1895,12 +1865,7 @@ fi + -- Since we run pretrans scripts only for RPMs built for a server build, + -- we can now use os.tmpname() since it is available on RHEL6 and later + -- platforms which are server platforms. +-tmpname = os.tmpname() +-tmpfile = io.open(tmpname, "w") +-tmpfile:write(script) +-tmpfile:close() +-ok, how, val = os.execute("/bin/bash " .. tmpname) +-os.remove(tmpname) ++ok, how, val = os.execute("/bin/bash -c \"" .. script .. "\"") + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -1938,12 +1903,7 @@ fi + -- Since we run pretrans scripts only for RPMs built for a server build, + -- we can now use os.tmpname() since it is available on RHEL6 and later + -- platforms which are server platforms. +-tmpname = os.tmpname() +-tmpfile = io.open(tmpname, "w") +-tmpfile:write(script) +-tmpfile:close() +-ok, how, val = os.execute("/bin/bash " .. tmpname) +-os.remove(tmpname) ++ok, how, val = os.execute("/bin/bash -c \"" .. script .. "\"") + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -1981,12 +1941,7 @@ fi + -- Since we run pretrans scripts only for RPMs built for a server build, + -- we can now use os.tmpname() since it is available on RHEL6 and later + -- platforms which are server platforms. +-tmpname = os.tmpname() +-tmpfile = io.open(tmpname, "w") +-tmpfile:write(script) +-tmpfile:close() +-ok, how, val = os.execute("/bin/bash " .. tmpname) +-os.remove(tmpname) ++ok, how, val = os.execute("/bin/bash -c \"" .. script .. "\"") + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -2024,12 +1979,7 @@ fi + -- Since we run pretrans scripts only for RPMs built for a server build, + -- we can now use os.tmpname() since it is available on RHEL6 and later + -- platforms which are server platforms. +-tmpname = os.tmpname() +-tmpfile = io.open(tmpname, "w") +-tmpfile:write(script) +-tmpfile:close() +-ok, how, val = os.execute("/bin/bash " .. tmpname) +-os.remove(tmpname) ++ok, how, val = os.execute("/bin/bash -c \"" .. script .. "\"") + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -2068,12 +2018,7 @@ fi + -- Since we run pretrans scripts only for RPMs built for a server build, + -- we can now use os.tmpname() since it is available on RHEL6 and later + -- platforms which are server platforms. +-tmpname = os.tmpname() +-tmpfile = io.open(tmpname, "w") +-tmpfile:write(script) +-tmpfile:close() +-ok, how, val = os.execute("/bin/bash " .. tmpname) +-os.remove(tmpname) ++ok, how, val = os.execute("/bin/bash -c \"" .. script .. "\"") + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -2111,12 +2056,7 @@ fi + -- Since we run pretrans scripts only for RPMs built for a server build, + -- we can now use os.tmpname() since it is available on RHEL6 and later + -- platforms which are server platforms. +-tmpname = os.tmpname() +-tmpfile = io.open(tmpname, "w") +-tmpfile:write(script) +-tmpfile:close() +-ok, how, val = os.execute("/bin/bash " .. tmpname) +-os.remove(tmpname) ++ok, how, val = os.execute("/bin/bash -c \"" .. script .. "\"") + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -2173,6 +2113,9 @@ fi + %endif + + %changelog ++* Sat Feb 09 2019 Milind Changire ++- Avoid creation of temporary file in lua script during install (#1410145) ++ + * Wed Dec 19 2018 Milind Changire + - Add explicit package dependencies (#1656357) + - Remove absolute paths from spec file (#1350745) +-- +1.8.3.1 + diff --git a/SOURCES/0521-rpc-use-address-family-option-from-vol-file.patch b/SOURCES/0521-rpc-use-address-family-option-from-vol-file.patch new file mode 100644 index 0000000..a355469 --- /dev/null +++ b/SOURCES/0521-rpc-use-address-family-option-from-vol-file.patch @@ -0,0 +1,276 @@ +From ae01f8acacf8e51b6c3486e3349497bb4e982866 Mon Sep 17 00:00:00 2001 +From: Milind Changire +Date: Sat, 9 Feb 2019 13:38:40 +0530 +Subject: [PATCH 521/529] rpc: use address-family option from vol file + +This patch helps enable IPv6 connections in the cluster. +The default address-family is IPv4 without using this option explicitly. + +When address-family is set to "inet6" in the /etc/glusterfs/glusterd.vol +file, the mount command-line also needs to have +-o xlator-option="transport.address-family=inet6" added to it. + +This option also gets added to the brick command-line. +Snapshot and gfapi use-cases should also use this option to pass in the +inet6 address-family. + +mainline: +> Change-Id: I97db91021af27bacb6d7578e33ea4817f66d7270 +> fixes: bz#1635863 +> Signed-off-by: Milind Changire +> Reviewed-on: https://review.gluster.org/c/glusterfs/+/21948 + +Change-Id: I97db91021af27bacb6d7578e33ea4817f66d7270 +BUG: 1618669 +Signed-off-by: Milind Changire +Reviewed-on: https://code.engineering.redhat.com/gerrit/162620 +Tested-by: RHGS Build Bot +Reviewed-by: Amar Tumballi Suryanarayan +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + api/src/glfs-mgmt.c | 12 +++++++++--- + glusterfsd/src/glusterfsd-mgmt.c | 6 +++++- + libglusterfs/src/common-utils.c | 18 ++++++++++++++++-- + libglusterfs/src/common-utils.h | 3 +++ + rpc/rpc-lib/src/rpc-transport.c | 6 +++--- + rpc/rpc-lib/src/rpc-transport.h | 2 +- + .../snapview-server/src/snapview-server-mgmt.c | 5 ++++- + xlators/mgmt/glusterd/src/glusterd-handler.c | 11 ++++++++--- + xlators/mgmt/glusterd/src/glusterd-utils.c | 7 +++++++ + 9 files changed, 56 insertions(+), 14 deletions(-) + +diff --git a/api/src/glfs-mgmt.c b/api/src/glfs-mgmt.c +index b70dc35..f1281bb 100644 +--- a/api/src/glfs-mgmt.c ++++ b/api/src/glfs-mgmt.c +@@ -646,8 +646,10 @@ glfs_mgmt_getspec_cbk (struct rpc_req *req, struct iovec *iov, int count, + * volfile if topology hasn't changed. + * glusterfs_volfile_reconfigure returns 3 possible return states + * return 0 =======> reconfiguration of options has succeeded +- * return 1 =======> the graph has to be reconstructed and all the xlators should be inited +- * return -1(or -ve) =======> Some Internal Error occurred during the operation ++ * return 1 =======> the graph has to be reconstructed and all the ++ * xlators should be inited ++ * return -1(or -ve) =======> Some Internal Error occurred during the ++ * operation + */ + + ret = gf_volfile_reconfigure (fs->oldvollen, tmpfp, fs->ctx, +@@ -988,7 +990,11 @@ glfs_mgmt_init (struct glfs *fs) + !strcmp (cmd_args->volfile_server_transport, "unix")) { + ret = rpc_transport_unix_options_build (&options, host, 0); + } else { +- ret = rpc_transport_inet_options_build (&options, host, port); ++ xlator_cmdline_option_t *opt = ++ find_xlator_option_in_cmd_args_t("address-family", ++ cmd_args); ++ ret = rpc_transport_inet_options_build(&options, host, port, ++ (opt ? opt->value : NULL)); + } + + if (ret) +diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c +index b952526..e38ad64 100644 +--- a/glusterfsd/src/glusterfsd-mgmt.c ++++ b/glusterfsd/src/glusterfsd-mgmt.c +@@ -2552,6 +2552,7 @@ glusterfs_mgmt_init (glusterfs_ctx_t *ctx) + int ret = -1; + int port = GF_DEFAULT_BASE_PORT; + char *host = NULL; ++ xlator_cmdline_option_t *opt = NULL; + + cmd_args = &ctx->cmd_args; + GF_VALIDATE_OR_GOTO (THIS->name, cmd_args->volfile_server, out); +@@ -2570,7 +2571,10 @@ glusterfs_mgmt_init (glusterfs_ctx_t *ctx) + !strcmp (cmd_args->volfile_server_transport, "unix")) { + ret = rpc_transport_unix_options_build (&options, host, 0); + } else { +- ret = rpc_transport_inet_options_build (&options, host, port); ++ opt = find_xlator_option_in_cmd_args_t("address-family", ++ cmd_args); ++ ret = rpc_transport_inet_options_build(&options, host, port, ++ (opt ? opt->value : NULL)); + } + if (ret) + goto out; +diff --git a/libglusterfs/src/common-utils.c b/libglusterfs/src/common-utils.c +index 1243754..e3f3989 100644 +--- a/libglusterfs/src/common-utils.c ++++ b/libglusterfs/src/common-utils.c +@@ -477,8 +477,9 @@ gf_resolve_ip6 (const char *hostname, + } + if ((ret = getaddrinfo(hostname, port_str, &hints, &cache->first)) != 0) { + gf_msg ("resolver", GF_LOG_ERROR, 0, +- LG_MSG_GETADDRINFO_FAILED, "getaddrinfo failed" +- " (%s)", gai_strerror (ret)); ++ LG_MSG_GETADDRINFO_FAILED, ++ "getaddrinfo failed (family:%d) (%s)", family, ++ gai_strerror (ret)); + + GF_FREE (*dnscache); + *dnscache = NULL; +@@ -5136,3 +5137,16 @@ out: + return NULL; + } + ++xlator_cmdline_option_t * ++find_xlator_option_in_cmd_args_t(const char *option_name, cmd_args_t *args) ++{ ++ xlator_cmdline_option_t *pos = NULL; ++ xlator_cmdline_option_t *tmp = NULL; ++ ++ list_for_each_entry_safe(pos, tmp, &args->xlator_options, cmd_args) ++ { ++ if (strcmp(pos->key, option_name) == 0) ++ return pos; ++ } ++ return NULL; ++} +diff --git a/libglusterfs/src/common-utils.h b/libglusterfs/src/common-utils.h +index 50c1f9a..15a31a3 100644 +--- a/libglusterfs/src/common-utils.h ++++ b/libglusterfs/src/common-utils.h +@@ -945,4 +945,7 @@ glusterfs_compute_sha256 (const unsigned char *content, size_t size, + char* + get_struct_variable (int mem_num, gf_gsync_status_t *sts_val); + ++xlator_cmdline_option_t * ++find_xlator_option_in_cmd_args_t(const char *option_name, cmd_args_t *args); ++ + #endif /* _COMMON_UTILS_H */ +diff --git a/rpc/rpc-lib/src/rpc-transport.c b/rpc/rpc-lib/src/rpc-transport.c +index 0c6ab66..b737ff2 100644 +--- a/rpc/rpc-lib/src/rpc-transport.c ++++ b/rpc/rpc-lib/src/rpc-transport.c +@@ -666,7 +666,7 @@ out: + + int + rpc_transport_inet_options_build (dict_t **options, const char *hostname, +- int port) ++ int port, char *af) + { + dict_t *dict = NULL; + char *host = NULL; +@@ -702,10 +702,10 @@ rpc_transport_inet_options_build (dict_t **options, const char *hostname, + goto out; + } + +- ret = dict_set_str (dict, "address-family", addr_family); ++ ret = dict_set_str (dict, "address-family", (af != NULL ? af : addr_family)); + if (ret) { + gf_log (THIS->name, GF_LOG_WARNING, +- "failed to set address-family to %s", addr_family); ++ "failed to set address-family to %s", (af != NULL ? af : addr_family)); + goto out; + } + +diff --git a/rpc/rpc-lib/src/rpc-transport.h b/rpc/rpc-lib/src/rpc-transport.h +index f5fb6e1..c97f98d 100644 +--- a/rpc/rpc-lib/src/rpc-transport.h ++++ b/rpc/rpc-lib/src/rpc-transport.h +@@ -316,7 +316,7 @@ rpc_transport_unix_options_build (dict_t **options, char *filepath, + int frame_timeout); + + int +-rpc_transport_inet_options_build (dict_t **options, const char *hostname, int port); ++rpc_transport_inet_options_build (dict_t **options, const char *hostname, int port, char *af); + + void + rpc_transport_cleanup(rpc_transport_t *); +diff --git a/xlators/features/snapview-server/src/snapview-server-mgmt.c b/xlators/features/snapview-server/src/snapview-server-mgmt.c +index 18c902d..f82c8a0 100644 +--- a/xlators/features/snapview-server/src/snapview-server-mgmt.c ++++ b/xlators/features/snapview-server/src/snapview-server-mgmt.c +@@ -84,6 +84,7 @@ svs_mgmt_init (xlator_t *this) + char *host = NULL; + cmd_args_t *cmd_args = NULL; + glusterfs_ctx_t *ctx = NULL; ++ xlator_cmdline_option_t *opt = NULL; + + GF_VALIDATE_OR_GOTO ("snapview-server", this, out); + GF_VALIDATE_OR_GOTO (this->name, this->private, out); +@@ -98,7 +99,9 @@ svs_mgmt_init (xlator_t *this) + if (cmd_args->volfile_server) + host = cmd_args->volfile_server; + +- ret = rpc_transport_inet_options_build (&options, host, port); ++ opt = find_xlator_option_in_cmd_args_t("address-family", cmd_args); ++ ret = rpc_transport_inet_options_build(&options, host, port, ++ (opt != NULL ? opt->value : NULL)); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "failed to build the " + "transport options"); +diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c +index 81b1c02..e92cb5f 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-handler.c ++++ b/xlators/mgmt/glusterd/src/glusterd-handler.c +@@ -3440,7 +3440,7 @@ out: + + int + glusterd_transport_inet_options_build (dict_t **options, const char *hostname, +- int port) ++ int port, char *af) + { + xlator_t *this = NULL; + dict_t *dict = NULL; +@@ -3458,7 +3458,7 @@ glusterd_transport_inet_options_build (dict_t **options, const char *hostname, + port = GLUSTERD_DEFAULT_PORT; + + /* Build default transport options */ +- ret = rpc_transport_inet_options_build (&dict, hostname, port); ++ ret = rpc_transport_inet_options_build (&dict, hostname, port, af); + if (ret) + goto out; + +@@ -3518,6 +3518,7 @@ glusterd_friend_rpc_create (xlator_t *this, glusterd_peerinfo_t *peerinfo, + int ret = -1; + glusterd_peerctx_t *peerctx = NULL; + data_t *data = NULL; ++ char *af = NULL; + + peerctx = GF_CALLOC (1, sizeof (*peerctx), gf_gld_mt_peerctx_t); + if (!peerctx) +@@ -3532,10 +3533,14 @@ glusterd_friend_rpc_create (xlator_t *this, glusterd_peerinfo_t *peerinfo, + number can be used to + uniquely identify a + peerinfo */ ++ ret = dict_get_str(this->options, "transport.address-family", &af); ++ if (ret) ++ gf_log(this->name, GF_LOG_TRACE, ++ "option transport.address-family is not set in xlator options"); + + ret = glusterd_transport_inet_options_build (&options, + peerinfo->hostname, +- peerinfo->port); ++ peerinfo->port, af); + if (ret) + goto out; + +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index bafc3af..50758ca 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -1975,6 +1975,7 @@ glusterd_volume_start_glusterfs (glusterd_volinfo_t *volinfo, + rpc_clnt_connection_t *conn = NULL; + int pid = -1; + glusterd_brick_proc_t *brick_proc = NULL; ++ char *inet_family = NULL; + + GF_ASSERT (volinfo); + GF_ASSERT (brickinfo); +@@ -2140,6 +2141,12 @@ retry: + runner_argprintf (&runner, + "--volfile-server-transport=socket,rdma"); + ++ ret = dict_get_str(this->options, "transport.address-family", &inet_family); ++ if (!ret) { ++ runner_add_arg(&runner, "--xlator-option"); ++ runner_argprintf(&runner, "transport.address-family=%s", inet_family); ++ } ++ + if (volinfo->memory_accounting) + runner_add_arg (&runner, "--mem-accounting"); + +-- +1.8.3.1 + diff --git a/SOURCES/0522-fuse-add-lru-limit-option.patch b/SOURCES/0522-fuse-add-lru-limit-option.patch new file mode 100644 index 0000000..a625d91 --- /dev/null +++ b/SOURCES/0522-fuse-add-lru-limit-option.patch @@ -0,0 +1,1028 @@ +From 55e67fb41ae3b4388839723ac929cd239280a0fc Mon Sep 17 00:00:00 2001 +From: Amar Tumballi +Date: Thu, 7 Feb 2019 18:06:43 +0530 +Subject: [PATCH 522/529] fuse: add --lru-limit option + +The inode LRU mechanism is moot in fuse xlator (ie. there is no +limit for the LRU list), as fuse inodes are referenced from +kernel context, and thus they can only be dropped on request of +the kernel. This might results in a high number of passive +inodes which are useless for the glusterfs client, causing a +significant memory overhead. + +This change tries to remedy this by extending the LRU semantics +and allowing to set a finite limit on the fuse inode LRU. + +A brief history of problem: + +When gluster's inode table was designed, fuse didn't have any +'invalidate' method, which means, userspace application could +never ask kernel to send a 'forget()' fop, instead had to wait +for kernel to send it based on kernel's parameters. Inode table +remembers the number of times kernel has cached the inode based +on the 'nlookup' parameter. And 'nlookup' field is not used by +no other entry points (like server-protocol, gfapi etc). + +Hence the inode_table of fuse module always has to have lru-limit +as '0', which means no limit. GlusterFS always had to keep all +inodes in memory as kernel would have had a reference to it. +Again, the reason for this is, kernel's glusterfs inode reference +was pointer of 'inode_t' structure in glusterfs. As it is a +pointer, we could never free it (to prevent segfault, or memory +corruption). + +Solution: + +In the inode table, handle the prune case of inodes with 'nlookup' +differently, and call a 'invalidator' method, which in this case is +fuse_invalidate(), and it sends the request to kernel for getting +the forget request. + +When the kernel sends the forget, it means, it has dropped all +the reference to the inode, and it will send the forget with the +'nlookup' parameter too. We just need to make sure to reduce the +'nlookup' value we have when we get forget. That automatically +cause the relevant prune to happen. + +Credits: Csaba Henk, Xavier Hernandez, Raghavendra Gowdappa, Nithya B + +Upstream: +> URL: https://review.gluster.org/19778 + +BUG: 1511779 +Change-Id: Iabe22a62e0f819b7eb67d4ecb850dd559b0c937f +Signed-off-by: Amar Tumballi +Reviewed-on: https://code.engineering.redhat.com/gerrit/162494 +Reviewed-by: Nithya Balachandran +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + doc/mount.glusterfs.8 | 4 + + glusterfsd/src/glusterfsd.c | 24 +++ + glusterfsd/src/glusterfsd.h | 1 + + libglusterfs/src/glusterfs.h | 1 + + libglusterfs/src/inode.c | 256 ++++++++++++++++++++++++---- + libglusterfs/src/inode.h | 17 +- + tests/features/fuse-lru-limit.t | 42 +++++ + xlators/mount/fuse/src/fuse-bridge.c | 121 ++++++++----- + xlators/mount/fuse/src/fuse-bridge.h | 3 + + xlators/mount/fuse/utils/mount.glusterfs.in | 7 + + 10 files changed, 393 insertions(+), 83 deletions(-) + create mode 100644 tests/features/fuse-lru-limit.t + +diff --git a/doc/mount.glusterfs.8 b/doc/mount.glusterfs.8 +index 95aad02..ed6b410 100644 +--- a/doc/mount.glusterfs.8 ++++ b/doc/mount.glusterfs.8 +@@ -119,6 +119,10 @@ Provide list of backup volfile servers in the following format [default: None] + \fBDeprecated\fR option - placed here for backward compatibility [default: 1] + .TP + .TP ++\fBlru-limit=\fRN ++Set fuse module's limit for number of inodes kept in LRU list to N [default: 131072] ++.TP ++.TP + \fBbackground-qlen=\fRN + Set fuse module's background queue length to N [default: 64] + .TP +diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c +index 990036c..2e2cd77 100644 +--- a/glusterfsd/src/glusterfsd.c ++++ b/glusterfsd/src/glusterfsd.c +@@ -203,6 +203,9 @@ static struct argp_option gf_options[] = { + "[default: 300]"}, + {"resolve-gids", ARGP_RESOLVE_GIDS_KEY, 0, 0, + "Resolve all auxiliary groups in fuse translator (max 32 otherwise)"}, ++ {"lru-limit", ARGP_FUSE_LRU_LIMIT_KEY, "N", 0, ++ "Set fuse module's limit for number of inodes kept in LRU list to N " ++ "[default: 131072]"}, + {"background-qlen", ARGP_FUSE_BACKGROUND_QLEN_KEY, "N", 0, + "Set fuse module's background queue length to N " + "[default: 64]"}, +@@ -462,6 +465,15 @@ set_fuse_mount_options (glusterfs_ctx_t *ctx, dict_t *options) + } + } + ++ if (cmd_args->lru_limit >= 0) { ++ ret = dict_set_int32(options, "lru-limit", cmd_args->lru_limit); ++ if (ret < 0) { ++ gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4, ++ "lru-limit"); ++ goto err; ++ } ++ } ++ + if (cmd_args->background_qlen) { + ret = dict_set_int32 (options, "background-qlen", + cmd_args->background_qlen); +@@ -1169,6 +1181,13 @@ parse_opts (int key, char *arg, struct argp_state *state) + cmd_args->resolve_gids = 1; + break; + ++ case ARGP_FUSE_LRU_LIMIT_KEY: ++ if (!gf_string2int32(arg, &cmd_args->lru_limit)) ++ break; ++ ++ argp_failure(state, -1, 0, "unknown LRU limit option %s", arg); ++ break; ++ + case ARGP_FUSE_BACKGROUND_QLEN_KEY: + if (!gf_string2int (arg, &cmd_args->background_qlen)) + break; +@@ -1937,6 +1956,11 @@ parse_cmdline (int argc, char *argv[], glusterfs_ctx_t *ctx) + ctx->ssl_cert_depth = glusterfs_read_secure_access_file (); + } + ++ /* Need to set lru_limit to below 0 to indicate there was nothing ++ specified. This is needed as 0 is a valid option, and may not be ++ default value. */ ++ cmd_args->lru_limit = -1; ++ + argp_parse (&argp, argc, argv, ARGP_IN_ORDER, NULL, cmd_args); + if (cmd_args->print_netgroups) { + /* When this option is set we don't want to do anything else +diff --git a/glusterfsd/src/glusterfsd.h b/glusterfsd/src/glusterfsd.h +index 75cb1d8..1550a30 100644 +--- a/glusterfsd/src/glusterfsd.h ++++ b/glusterfsd/src/glusterfsd.h +@@ -100,6 +100,7 @@ enum argp_option_keys { + ARGP_SUBDIR_MOUNT_KEY = 178, + ARGP_FUSE_EVENT_HISTORY_KEY = 179, + ARGP_READER_THREAD_COUNT_KEY = 180, ++ ARGP_FUSE_LRU_LIMIT_KEY = 190, + }; + + struct _gfd_vol_top_priv { +diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h +index 157437c..2690306 100644 +--- a/libglusterfs/src/glusterfs.h ++++ b/libglusterfs/src/glusterfs.h +@@ -413,6 +413,7 @@ struct _cmd_args { + pid_t client_pid; + int client_pid_set; + unsigned uid_map_root; ++ int32_t lru_limit; + int background_qlen; + int congestion_threshold; + char *fuse_mountopts; +diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c +index 29d3c8f..f57020a 100644 +--- a/libglusterfs/src/inode.c ++++ b/libglusterfs/src/inode.c +@@ -24,6 +24,100 @@ + move latest accessed dentry to list_head of inode + */ + ++/* clang-format off */ ++/* ++ ++Details as per Xavi: ++ ++ I think we should have 3 lists: active, lru and invalidate. ++ ++We'll need 3 things: refs, nlookups and invalidate_sent flag. Any change of ++refs, invalidate_sent flag and moving from one list to another must be done ++atomically. ++ ++With this information, these are the states that cause a transition: ++ ++ refs nlookups inv_sent op ++ 1 0 0 unref -> refs = 0, active--->destroy ++ 1 1 0 unref -> refs = 0, active--->lru ++ 1 1 0 forget -> nlookups = 0, active--->active ++ *0 1 0 forget -> nlookups = 0, lru--->destroy ++ *0 1 1 forget -> nlookups = 0, invalidate--->destroy ++ 0 1 0 ref -> refs = 1, lru--->active ++ 0 1 1 ref -> refs = 1, inv_sent = 0, invalidate--->active ++ 0 1 0 overflow -> refs = 1, inv_sent = 1, lru--->invalidate ++ 1 1 1 unref -> refs = 0, invalidate--->invalidate ++ 1 1 1 forget -> nlookups = 0, inv_sent = 0, invalidate--->active ++ ++(*) technically these combinations cannot happen because a forget sent by the ++kernel first calls ref() and then unref(). However it's equivalent. ++ ++overflow means that lru list has grown beyond the limit and the inode needs to ++be invalidated. All other combinations do not cause a change in state or are not ++possible. ++ ++Based on this, the code could be similar to this: ++ ++ ref(inode, inv) ++ { ++ if (refs == 0) { ++ if (inv_sent) { ++ invalidate_count--; ++ inv_sent = 0; ++ } else { ++ lru_count--; ++ } ++ if (inv) { ++ inv_sent = 1; ++ invalidate_count++; ++ list_move(inode, invalidate); ++ } else { ++ active_count++; ++ list_move(inode, active); ++ } ++ } ++ refs++; ++ } ++ ++ unref(inode, clear) ++ { ++ if (clear && inv_sent) { ++ // there is a case of fuse itself sending forget, without ++ // invalidate, after entry delete, like unlink(), rmdir(). ++ inv_sent = 0; ++ invalidate_count--; ++ active_count++; ++ list_move(inode, active); ++ } ++ refs--; ++ if ((refs == 0) && !inv_sent) { ++ active_count--; ++ if (nlookups == 0) { ++ destroy(inode); ++ } else { ++ lru_count++; ++ list_move(inode, lru); ++ } ++ } ++ } ++ ++ forget(inode) ++ { ++ ref(inode, false); ++ nlookups--; ++ unref(inode, true); ++ } ++ ++ overflow(inode) ++ { ++ ref(inode, true); ++ invalidator(inode); ++ unref(inode, false); ++ } ++ ++*/ ++/* clang-format on */ ++ + #define INODE_DUMP_LIST(head, key_buf, key_prefix, list_type) \ + { \ + int i = 1; \ +@@ -37,7 +131,7 @@ + } + + static inode_t * +-__inode_unref (inode_t *inode); ++__inode_unref (inode_t *inode, gf_boolean_t clear); + + static int + inode_table_prune (inode_table_t *table); +@@ -138,7 +232,7 @@ __dentry_unset (dentry_t *dentry) + dentry->name = NULL; + + if (dentry->parent) { +- __inode_unref (dentry->parent); ++ __inode_unref (dentry->parent, _gf_false); + dentry->parent = NULL; + } + +@@ -465,7 +559,7 @@ out: + + + static inode_t * +-__inode_unref (inode_t *inode) ++__inode_unref (inode_t *inode, gf_boolean_t clear) + { + int index = 0; + xlator_t *this = NULL; +@@ -473,8 +567,6 @@ __inode_unref (inode_t *inode) + if (!inode) + return NULL; + +- this = THIS; +- + /* + * Root inode should always be in active list of inode table. So unrefs + * on root inode are no-ops. +@@ -482,6 +574,14 @@ __inode_unref (inode_t *inode) + if (__is_root_gfid(inode->gfid)) + return inode; + ++ this = THIS; ++ ++ if (clear && inode->invalidate_sent) { ++ inode->invalidate_sent = _gf_false; ++ inode->table->invalidate_size--; ++ __inode_activate(inode); ++ } ++ + GF_ASSERT (inode->ref); + + --inode->ref; +@@ -492,7 +592,7 @@ __inode_unref (inode_t *inode) + inode->_ctx[index].ref--; + } + +- if (!inode->ref) { ++ if (!inode->ref && !inode->invalidate_sent) { + inode->table->active_size--; + + if (inode->nlookup) +@@ -506,7 +606,7 @@ __inode_unref (inode_t *inode) + + + static inode_t * +-__inode_ref (inode_t *inode) ++__inode_ref (inode_t *inode, gf_boolean_t is_invalidate) + { + int index = 0; + xlator_t *this = NULL; +@@ -516,11 +616,6 @@ __inode_ref (inode_t *inode) + + this = THIS; + +- if (!inode->ref) { +- inode->table->lru_size--; +- __inode_activate (inode); +- } +- + /* + * Root inode should always be in active list of inode table. So unrefs + * on root inode are no-ops. If we do not allow unrefs but allow refs, +@@ -532,6 +627,22 @@ __inode_ref (inode_t *inode) + if (__is_root_gfid(inode->gfid) && inode->ref) + return inode; + ++ if (!inode->ref) { ++ if (inode->invalidate_sent) { ++ inode->invalidate_sent = _gf_false; ++ inode->table->invalidate_size--; ++ } else { ++ inode->table->lru_size--; ++ } ++ if (is_invalidate) { ++ inode->invalidate_sent = _gf_true; ++ inode->table->invalidate_size++; ++ list_move_tail(&inode->list, &inode->table->invalidate); ++ } else { ++ __inode_activate(inode); ++ } ++ } ++ + inode->ref++; + + index = __inode_get_xl_index (inode, this); +@@ -556,7 +667,7 @@ inode_unref (inode_t *inode) + + pthread_mutex_lock (&table->lock); + { +- inode = __inode_unref (inode); ++ inode = __inode_unref (inode, _gf_false); + } + pthread_mutex_unlock (&table->lock); + +@@ -578,7 +689,7 @@ inode_ref (inode_t *inode) + + pthread_mutex_lock (&table->lock); + { +- inode = __inode_ref (inode); ++ inode = __inode_ref (inode, _gf_false); + } + pthread_mutex_unlock (&table->lock); + +@@ -614,7 +725,7 @@ __dentry_create (inode_t *inode, inode_t *parent, const char *name) + } + + if (parent) +- newd->parent = __inode_ref (parent); ++ newd->parent = __inode_ref (parent, _gf_false); + + list_add (&newd->inode_list, &inode->dentry_list); + newd->inode = inode; +@@ -685,7 +796,7 @@ inode_new (inode_table_t *table) + { + inode = __inode_create (table); + if (inode != NULL) { +- __inode_ref (inode); ++ __inode_ref (inode, _gf_false); + } + } + pthread_mutex_unlock (&table->lock); +@@ -802,7 +913,7 @@ inode_grep (inode_table_t *table, inode_t *parent, const char *name) + inode = dentry->inode; + + if (inode) +- __inode_ref (inode); ++ __inode_ref (inode, _gf_false); + } + pthread_mutex_unlock (&table->lock); + +@@ -947,7 +1058,7 @@ inode_find (inode_table_t *table, uuid_t gfid) + { + inode = __inode_find (table, gfid); + if (inode) +- __inode_ref (inode); ++ __inode_ref (inode, _gf_false); + } + pthread_mutex_unlock (&table->lock); + +@@ -1096,7 +1207,7 @@ inode_link (inode_t *inode, inode_t *parent, const char *name, + linked_inode = __inode_link (inode, parent, name, iatt); + + if (linked_inode) +- __inode_ref (linked_inode); ++ __inode_ref (linked_inode, _gf_false); + } + pthread_mutex_unlock (&table->lock); + +@@ -1178,6 +1289,31 @@ inode_forget (inode_t *inode, uint64_t nlookup) + return 0; + } + ++int ++inode_forget_with_unref(inode_t *inode, uint64_t nlookup) ++{ ++ inode_table_t *table = NULL; ++ ++ if (!inode) { ++ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND, ++ "inode not found"); ++ return -1; ++ } ++ ++ table = inode->table; ++ ++ pthread_mutex_lock(&table->lock); ++ { ++ __inode_forget(inode, nlookup); ++ __inode_unref(inode, _gf_true); ++ } ++ pthread_mutex_unlock(&table->lock); ++ ++ inode_table_prune(table); ++ ++ return 0; ++} ++ + /* + * Invalidate an inode. This is invoked when a translator decides that an inode's + * cache is no longer valid. Any translator interested in taking action in this +@@ -1356,7 +1492,7 @@ inode_parent (inode_t *inode, uuid_t pargfid, const char *name) + parent = dentry->parent; + + if (parent) +- __inode_ref (parent); ++ __inode_ref (parent, _gf_false); + } + pthread_mutex_unlock (&table->lock); + +@@ -1540,6 +1676,7 @@ inode_table_prune (inode_table_t *table) + inode_t *del = NULL; + inode_t *tmp = NULL; + inode_t *entry = NULL; ++ int64_t lru_size = 0; + + if (!table) + return -1; +@@ -1548,8 +1685,11 @@ inode_table_prune (inode_table_t *table) + + pthread_mutex_lock (&table->lock); + { +- while (table->lru_limit +- && table->lru_size > (table->lru_limit)) { ++ if (!table->lru_limit) ++ goto purge_list; ++ ++ lru_size = table->lru_size; ++ while (lru_size > (table->lru_limit)) { + if (list_empty (&table->lru)) { + gf_msg_callingfn (THIS->name, GF_LOG_WARNING, 0, + LG_MSG_INVALID_INODE_LIST, +@@ -1559,7 +1699,18 @@ inode_table_prune (inode_table_t *table) + break; + } + ++ lru_size--; + entry = list_entry (table->lru.next, inode_t, list); ++ /* The logic of invalidation is required only if invalidator_fn ++ is present */ ++ if (table->invalidator_fn) { ++ /* check for valid inode with 'nlookup' */ ++ if (entry->nlookup) { ++ __inode_ref(entry, _gf_true); ++ tmp = entry; ++ break; ++ } ++ } + + table->lru_size--; + __inode_retire (entry); +@@ -1567,17 +1718,25 @@ inode_table_prune (inode_table_t *table) + ret++; + } + ++ purge_list: + list_splice_init (&table->purge, &purge); + table->purge_size = 0; + } + pthread_mutex_unlock (&table->lock); + +- { +- list_for_each_entry_safe (del, tmp, &purge, list) { +- list_del_init (&del->list); +- __inode_forget (del, 0); +- __inode_destroy (del); +- } ++ /* Pick 1 inode for invalidation */ ++ if (tmp) { ++ xlator_t *old_THIS = THIS; ++ THIS = table->invalidator_xl; ++ table->invalidator_fn(table->invalidator_xl, tmp); ++ THIS = old_THIS; ++ inode_unref(tmp); ++ } ++ ++ list_for_each_entry_safe (del, tmp, &purge, list) { ++ list_del_init (&del->list); ++ __inode_forget (del, 0); ++ __inode_destroy (del); + } + + return ret; +@@ -1605,9 +1764,12 @@ __inode_table_init_root (inode_table_t *table) + + + inode_table_t * +-inode_table_new (size_t lru_limit, xlator_t *xl) ++inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl, ++ int32_t (*invalidator_fn)(xlator_t *, inode_t *), ++ xlator_t *invalidator_xl) + { + inode_table_t *new = NULL; ++ uint32_t mem_pool_size = lru_limit; + int ret = -1; + int i = 0; + +@@ -1619,20 +1781,19 @@ inode_table_new (size_t lru_limit, xlator_t *xl) + new->ctxcount = xl->graph->xl_count + 1; + + new->lru_limit = lru_limit; ++ new->invalidator_fn = invalidator_fn; ++ new->invalidator_xl = invalidator_xl; + + new->hashsize = 14057; /* TODO: Random Number?? */ + +- /* In case FUSE is initing the inode table. */ +- if (lru_limit == 0) +- lru_limit = DEFAULT_INODE_MEMPOOL_ENTRIES; +- +- new->inode_pool = mem_pool_new (inode_t, lru_limit); ++ if (!mem_pool_size || (mem_pool_size > DEFAULT_INODE_MEMPOOL_ENTRIES)) ++ mem_pool_size = DEFAULT_INODE_MEMPOOL_ENTRIES; + ++ new->inode_pool = mem_pool_new(inode_t, mem_pool_size); + if (!new->inode_pool) + goto out; + +- new->dentry_pool = mem_pool_new (dentry_t, lru_limit); +- ++ new->dentry_pool = mem_pool_new (dentry_t, mem_pool_size); + if (!new->dentry_pool) + goto out; + +@@ -1667,6 +1828,7 @@ inode_table_new (size_t lru_limit, xlator_t *xl) + INIT_LIST_HEAD (&new->active); + INIT_LIST_HEAD (&new->lru); + INIT_LIST_HEAD (&new->purge); ++ INIT_LIST_HEAD(&new->invalidate); + + ret = gf_asprintf (&new->name, "%s/inode", xl->name); + if (-1 == ret) { +@@ -1696,6 +1858,14 @@ out: + return new; + } + ++inode_table_t * ++inode_table_new(uint32_t lru_limit, xlator_t *xl) ++{ ++ /* Only fuse for now requires the inode table with invalidator */ ++ return inode_table_with_invalidator(lru_limit, xl, NULL, NULL); ++} ++ ++ + int + inode_table_ctx_free (inode_table_t *table) + { +@@ -1830,6 +2000,15 @@ inode_table_destroy (inode_table_t *inode_table) { + inode_table->lru_size--; + } + ++ /* Same logic for invalidate list */ ++ while (!list_empty(&inode_table->invalidate)) { ++ trav = list_first_entry(&inode_table->invalidate, ++ inode_t, list); ++ __inode_forget(trav, 0); ++ __inode_retire(trav); ++ inode_table->invalidate_size--; ++ } ++ + while (!list_empty (&inode_table->active)) { + trav = list_first_entry (&inode_table->active, + inode_t, list); +@@ -2347,6 +2526,8 @@ inode_dump (inode_t *inode, char *prefix) + gf_proc_dump_write("active-fd-count", "%u", + inode->active_fd_count); + gf_proc_dump_write("ref", "%u", inode->ref); ++ gf_proc_dump_write("invalidate-sent", "%d", ++ inode->invalidate_sent); + gf_proc_dump_write("ia_type", "%d", inode->ia_type); + if (inode->_ctx) { + inode_ctx = GF_CALLOC (inode->table->ctxcount, +@@ -2427,10 +2608,13 @@ inode_table_dump (inode_table_t *itable, char *prefix) + gf_proc_dump_write(key, "%d", itable->lru_size); + gf_proc_dump_build_key(key, prefix, "purge_size"); + gf_proc_dump_write(key, "%d", itable->purge_size); ++ gf_proc_dump_build_key(key, prefix, "invalidate_size"); ++ gf_proc_dump_write(key, "%d", itable->invalidate_size); + + INODE_DUMP_LIST(&itable->active, key, prefix, "active"); + INODE_DUMP_LIST(&itable->lru, key, prefix, "lru"); + INODE_DUMP_LIST(&itable->purge, key, prefix, "purge"); ++ INODE_DUMP_LIST(&itable->invalidate, key, prefix, "invalidate"); + + pthread_mutex_unlock(&itable->lock); + } +diff --git a/libglusterfs/src/inode.h b/libglusterfs/src/inode.h +index 7a87748..6a96447 100644 +--- a/libglusterfs/src/inode.h ++++ b/libglusterfs/src/inode.h +@@ -55,6 +55,13 @@ struct _inode_table { + struct mem_pool *dentry_pool; /* memory pool for dentrys */ + struct mem_pool *fd_mem_pool; /* memory pool for fd_t */ + int ctxcount; /* number of slots in inode->ctx */ ++ ++ /* This is required for 'invalidation' when 'nlookup' would be used, ++ specially in case of fuse-bridge */ ++ int32_t (*invalidator_fn)(xlator_t *, inode_t *); ++ xlator_t *invalidator_xl; ++ struct list_head invalidate; /* inodes which are in invalidation queue */ ++ uint32_t invalidate_size; /* count of inodes in invalidation list */ + }; + + +@@ -102,6 +109,7 @@ struct _inode { + struct list_head list; /* active/lru/purge */ + + struct _inode_ctx *_ctx; /* replacement for dict_t *(inode->ctx) */ ++ gf_boolean_t invalidate_sent; /* Set it if invalidator_fn is called for inode */ + }; + + +@@ -110,7 +118,14 @@ struct _inode { + #define GFID_STR_PFX_LEN (sizeof (GFID_STR_PFX) - 1) + + inode_table_t * +-inode_table_new (size_t lru_limit, xlator_t *xl); ++inode_table_new(uint32_t lru_limit, xlator_t *xl); ++ ++inode_table_t * ++inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl, ++ int32_t (*invalidator_fn)(xlator_t *, inode_t *), ++ xlator_t *invalidator_xl); ++int ++inode_forget_with_unref(inode_t *inode, uint64_t nlookup); + + void + inode_table_destroy_all (glusterfs_ctx_t *ctx); +diff --git a/tests/features/fuse-lru-limit.t b/tests/features/fuse-lru-limit.t +new file mode 100644 +index 0000000..9f12116 +--- /dev/null ++++ b/tests/features/fuse-lru-limit.t +@@ -0,0 +1,42 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../include.rc ++. $(dirname $0)/../volume.rc ++ ++cleanup ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1} ++TEST $CLI volume start $V0 ++TEST glusterfs -s $H0 --volfile-id $V0 $M0 ++ ++EXPECT "1" get_mount_active_size_value $V0 $M0 ++EXPECT "0" get_mount_lru_size_value $V0 $M0 ++ ++mkdir ${M0}/dir-{1..9} ++for i in {1..9}; do ++ for j in {1..1000}; do ++ echo "Test file" > ${M0}/dir-$i/file-$j; ++ done; ++done ++lc=$(get_mount_lru_size_value $V0 ${M0}) ++# ideally it should be 9000+ ++TEST [ $lc -ge 9000 ] ++ ++TEST umount $M0 ++ ++TEST glusterfs -s $H0 --volfile-id $V0 --lru-limit 1000 $M0 ++ ++TEST find $M0 ++lc=$(get_mount_lru_size_value $V0 ${M0}) ++# ideally it should be <1000 ++# Not sure if there are any possibilities of buffer need. ++TEST [ $lc -le 1000 ] ++ ++TEST rm -rf $M0/* ++ ++EXPECT "1" get_mount_active_size_value $V0 $M0 ++EXPECT "0" get_mount_lru_size_value $V0 $M0 ++ ++cleanup +diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c +index 8d1e3a0..f3188d6 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.c ++++ b/xlators/mount/fuse/src/fuse-bridge.c +@@ -279,29 +279,31 @@ send_fuse_data (xlator_t *this, fuse_in_header_t *finh, void *data, size_t size) + send_fuse_data (this, finh, obj, sizeof (*(obj))) + + +-#if FUSE_KERNEL_MINOR_VERSION >= 11 + static void + fuse_invalidate_entry (xlator_t *this, uint64_t fuse_ino) + { ++#if FUSE_KERNEL_MINOR_VERSION >= 11 + struct fuse_out_header *fouh = NULL; + struct fuse_notify_inval_entry_out *fnieo = NULL; + fuse_private_t *priv = NULL; + dentry_t *dentry = NULL; ++ dentry_t *tmp = NULL; + inode_t *inode = NULL; + size_t nlen = 0; + fuse_invalidate_node_t *node = NULL; ++ char gfid_str[UUID_CANONICAL_FORM_LEN + 1]; + + priv = this->private; + + if (!priv->reverse_fuse_thread_started) + return; + +- inode = fuse_ino_to_inode(fuse_ino, this); ++ inode = (inode_t *)(unsigned long)fuse_ino; + if (inode == NULL) { + return; + } + +- list_for_each_entry (dentry, &inode->dentry_list, inode_list) { ++ list_for_each_entry_safe (dentry, tmp, &inode->dentry_list, inode_list) { + node = GF_CALLOC (1, sizeof (*node), + gf_fuse_mt_invalidate_node_t); + if (node == NULL) +@@ -315,14 +317,31 @@ fuse_invalidate_entry (xlator_t *this, uint64_t fuse_ino) + fouh->unique = 0; + fouh->error = FUSE_NOTIFY_INVAL_ENTRY; + +- nlen = strlen (dentry->name); +- fouh->len = sizeof (*fouh) + sizeof (*fnieo) + nlen + 1; +- fnieo->parent = inode_to_fuse_nodeid (dentry->parent); ++ if (dentry->name) { ++ nlen = strlen (dentry->name); ++ fouh->len = sizeof (*fouh) + sizeof (*fnieo) + nlen + 1; ++ fnieo->parent = inode_to_fuse_nodeid (dentry->parent); ++ ++ fnieo->namelen = nlen; ++ strcpy (node->inval_buf + sizeof (*fouh) + sizeof (*fnieo), ++ dentry->name); ++ } + +- fnieo->namelen = nlen; +- strcpy (node->inval_buf + sizeof (*fouh) + sizeof (*fnieo), +- dentry->name); ++ gf_log ("glusterfs-fuse", GF_LOG_TRACE, "INVALIDATE entry: " ++ "%"PRIu64"/%s (gfid:%s)", fnieo->parent, dentry->name, ++ uuid_utoa(inode->gfid)); + ++ if (dentry->parent) { ++ fuse_log_eh (this, "Invalidated entry %s (parent: %s)" ++ "(gfid: %s)", dentry->name, ++ uuid_utoa (dentry->parent->gfid), ++ uuid_utoa_r(inode->gfid, gfid_str)); ++ } else { ++ fuse_log_eh (this, "Invalidated entry %s(nodeid: %" ++ PRIu64 ") gfid: %s", ++ dentry->name, fnieo->parent, ++ uuid_utoa (inode->gfid)); ++ } + pthread_mutex_lock (&priv->invalidate_mutex); + { + list_add_tail (&node->next, &priv->invalidate_list); +@@ -330,23 +349,10 @@ fuse_invalidate_entry (xlator_t *this, uint64_t fuse_ino) + } + pthread_mutex_unlock (&priv->invalidate_mutex); + +- gf_log ("glusterfs-fuse", GF_LOG_TRACE, "INVALIDATE entry: " +- "%"PRIu64"/%s", fnieo->parent, dentry->name); +- +- if (dentry->parent) { +- fuse_log_eh (this, "Invalidated entry %s (parent: %s)", +- dentry->name, +- uuid_utoa (dentry->parent->gfid)); +- } else { +- fuse_log_eh (this, "Invalidated entry %s(nodeid: %" PRIu64 ")", +- dentry->name, fnieo->parent); +- } + } +- +- if (inode) +- inode_unref (inode); ++#endif /* KERNEL_VERSION */ ++ return; + } +-#endif + + /* + * Send an inval inode notification to fuse. This causes an invalidation of the +@@ -367,6 +373,10 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino) + if (!priv->reverse_fuse_thread_started) + return; + ++ inode = (inode_t *)(unsigned long)fuse_ino; ++ if (inode == NULL) ++ return; ++ + node = GF_CALLOC (1, sizeof (*node), gf_fuse_mt_invalidate_node_t); + if (node == NULL) + return; +@@ -386,7 +396,11 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino) + fniio->off = 0; + fniio->len = -1; + +- inode = fuse_ino_to_inode (fuse_ino, this); ++ fuse_log_eh(this, "Invalidated inode %" PRIu64 " (gfid: %s)", fuse_ino, ++ uuid_utoa(inode->gfid)); ++ gf_log("glusterfs-fuse", GF_LOG_TRACE, ++ "INVALIDATE inode: %" PRIu64 "(gfid:%s)", fuse_ino, ++ uuid_utoa(inode->gfid)); + + pthread_mutex_lock (&priv->invalidate_mutex); + { +@@ -395,24 +409,23 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino) + } + pthread_mutex_unlock (&priv->invalidate_mutex); + +- gf_log ("glusterfs-fuse", GF_LOG_TRACE, "INVALIDATE inode: %" PRIu64, +- fuse_ino); +- +- if (inode) { +- fuse_log_eh (this, "Invalidated inode %" PRIu64 " (gfid: %s)", +- fuse_ino, uuid_utoa (inode->gfid)); +- } else { +- fuse_log_eh (this, "Invalidated inode %" PRIu64, fuse_ino); +- } +- +- if (inode) +- inode_unref (inode); + #else + gf_log ("glusterfs-fuse", GF_LOG_WARNING, +- "fuse_invalidate_inode not implemented on OS X due to missing FUSE notification"); ++ "fuse_invalidate_inode not implemented on this system"); + #endif ++ return; + } + ++#if FUSE_KERNEL_MINOR_VERSION >= 11 ++/* Need this function for the signature (inode_t *, instead of uint64_t) */ ++static int32_t ++fuse_inode_invalidate_fn(xlator_t *this, inode_t *inode) ++{ ++ fuse_invalidate_entry(this, (uint64_t)inode); ++ return 0; ++} ++#endif ++ + + int + send_fuse_err (xlator_t *this, fuse_in_header_t *finh, int error) +@@ -686,11 +699,14 @@ do_forget(xlator_t *this, uint64_t unique, uint64_t nodeid, uint64_t nlookup) + { + inode_t *fuse_inode = fuse_ino_to_inode(nodeid, this); + ++ gf_log("fuse", GF_LOG_TRACE, ++ "%" PRIu64 ": FORGET %" PRIu64 "/%" PRIu64 " gfid: (%s)", unique, ++ nodeid, nlookup, uuid_utoa(fuse_inode->gfid)); ++ + fuse_log_eh(this, "%"PRIu64": FORGET %"PRIu64"/%"PRIu64" gfid: (%s)", + unique, nodeid, nlookup, uuid_utoa(fuse_inode->gfid)); + +- inode_forget(fuse_inode, nlookup); +- inode_unref(fuse_inode); ++ inode_forget_with_unref(fuse_inode, nlookup); + } + + static void +@@ -705,10 +721,6 @@ fuse_forget (xlator_t *this, fuse_in_header_t *finh, void *msg, + return; + } + +- gf_log ("glusterfs-fuse", GF_LOG_TRACE, +- "%"PRIu64": FORGET %"PRIu64"/%"PRIu64, +- finh->unique, finh->nodeid, ffi->nlookup); +- + do_forget(this, finh->unique, finh->nodeid, ffi->nlookup); + + GF_FREE (finh); +@@ -4940,7 +4952,9 @@ fuse_thread_proc (void *data) + fuse_in_header_t *finh = NULL; + struct iovec iov_in[2]; + void *msg = NULL; +- const size_t msg0_size = sizeof (*finh) + 128; ++ /* we need 512 extra buffer size for BATCH_FORGET fop. By tests, it is ++ found to be reduces 'REALLOC()' in the loop */ ++ const size_t msg0_size = sizeof (*finh) + 512; + fuse_handler_t **fuse_ops = NULL; + struct pollfd pfd[2] = {{0,}}; + +@@ -5283,7 +5297,12 @@ fuse_graph_setup (xlator_t *this, glusterfs_graph_t *graph) + goto unlock; + } + +- itable = inode_table_new (0, graph->top); ++#if FUSE_KERNEL_MINOR_VERSION >= 11 ++ itable = inode_table_with_invalidator(priv->lru_limit, graph->top, ++ fuse_inode_invalidate_fn, this); ++#else ++ itable = inode_table_new(0, graph->top); ++#endif + if (!itable) { + ret = -1; + goto unlock; +@@ -5740,6 +5759,8 @@ init (xlator_t *this_xl) + } + } + ++ GF_OPTION_INIT("lru-limit", priv->lru_limit, uint32, cleanup_exit); ++ + GF_OPTION_INIT("event-history", priv->event_history, bool, + cleanup_exit); + +@@ -6061,5 +6082,13 @@ struct volume_options options[] = { + .max = 64, + .description = "Sets fuse reader thread count.", + }, ++ { ++ .key = {"lru-limit"}, ++ .type = GF_OPTION_TYPE_INT, ++ .default_value = "131072", ++ .min = 0, ++ .description = "makes glusterfs invalidate kernel inodes after " ++ "reaching this limit (0 means 'unlimited')", ++ }, + { .key = {NULL} }, + }; +diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h +index 4ca76e9..4e32a7f 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.h ++++ b/xlators/mount/fuse/src/fuse-bridge.h +@@ -144,6 +144,9 @@ struct fuse_private { + gf_boolean_t mount_finished; + gf_boolean_t handle_graph_switch; + pthread_cond_t migrate_cond; ++ ++ /* LRU Limit, if not set, default is 128k for now */ ++ uint32_t lru_limit; + }; + typedef struct fuse_private fuse_private_t; + +diff --git a/xlators/mount/fuse/utils/mount.glusterfs.in b/xlators/mount/fuse/utils/mount.glusterfs.in +index 817619e..9a0404f 100755 +--- a/xlators/mount/fuse/utils/mount.glusterfs.in ++++ b/xlators/mount/fuse/utils/mount.glusterfs.in +@@ -245,6 +245,10 @@ start_glusterfs () + cmd_line=$(echo "$cmd_line --gid-timeout=$gid_timeout"); + fi + ++ if [ -n "$lru_limit" ]; then ++ cmd_line=$(echo "$cmd_line --lru-limit=$lru_limit"); ++ fi ++ + if [ -n "$bg_qlen" ]; then + cmd_line=$(echo "$cmd_line --background-qlen=$bg_qlen"); + fi +@@ -467,6 +471,9 @@ with_options() + "gid-timeout") + gid_timeout=$value + ;; ++ "lru-limit") ++ lru_limit=$value ++ ;; + "background-qlen") + bg_qlen=$value + ;; +-- +1.8.3.1 + diff --git a/SOURCES/0523-libglusterfs-rename-macros-roof-and-floor-to-not-con.patch b/SOURCES/0523-libglusterfs-rename-macros-roof-and-floor-to-not-con.patch new file mode 100644 index 0000000..5070d8a --- /dev/null +++ b/SOURCES/0523-libglusterfs-rename-macros-roof-and-floor-to-not-con.patch @@ -0,0 +1,225 @@ +From 60bd1dcd283b2d464968092711b658c12a900918 Mon Sep 17 00:00:00 2001 +From: Raghavendra Gowdappa +Date: Mon, 11 Feb 2019 16:42:48 +0530 +Subject: [PATCH 523/529] libglusterfs: rename macros roof and floor to not + conflict with math.h + +mainline: +> Change-Id: I666eeb63ebd000711b3f793b948d4e0c04b1a242 +> Signed-off-by: Raghavendra Gowdappa +> Updates: bz#1644629 +> Reviewed-on: https://review.gluster.org/c/glusterfs/+/21703 + +Change-Id: I666eeb63ebd000711b3f793b948d4e0c04b1a242 +BUG: 1390151 +Signed-off-by: Raghavendra Gowdappa +Reviewed-on: https://code.engineering.redhat.com/gerrit/162678 +Tested-by: RHGS Build Bot +--- + libglusterfs/src/common-utils.h | 4 ++-- + rpc/rpc-transport/socket/src/socket.c | 2 +- + xlators/cluster/stripe/src/stripe.c | 18 +++++++++--------- + xlators/performance/io-cache/src/io-cache.c | 4 ++-- + xlators/performance/io-cache/src/page.c | 4 ++-- + xlators/performance/read-ahead/src/page.c | 4 ++-- + xlators/performance/read-ahead/src/read-ahead.c | 8 ++++---- + xlators/protocol/server/src/server-rpc-fops.c | 2 +- + 8 files changed, 23 insertions(+), 23 deletions(-) + +diff --git a/libglusterfs/src/common-utils.h b/libglusterfs/src/common-utils.h +index 15a31a3..af2e0fd 100644 +--- a/libglusterfs/src/common-utils.h ++++ b/libglusterfs/src/common-utils.h +@@ -53,8 +53,8 @@ void trap (void); + + #define min(a,b) ((a)<(b)?(a):(b)) + #define max(a,b) ((a)>(b)?(a):(b)) +-#define roof(a,b) ((((a)+(b)-1)/((b)?(b):1))*(b)) +-#define floor(a,b) (((a)/((b)?(b):1))*(b)) ++#define gf_roof(a, b) ((((a) + (b) - 1)/((b) ? (b) : 1)) * (b)) ++#define gf_floor(a, b) (((a) / ((b) ? (b) : 1)) * (b)) + + #define IPv4_ADDR_SIZE 32 + +diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c +index 34a937f..e28c5cd 100644 +--- a/rpc/rpc-transport/socket/src/socket.c ++++ b/rpc/rpc-transport/socket/src/socket.c +@@ -1646,7 +1646,7 @@ __socket_read_accepted_successful_reply (rpc_transport_t *this) + + /* need to round off to proper roof (%4), as XDR packing pads + the end of opaque object with '0' */ +- size = roof (read_rsp.xdata.xdata_len, 4); ++ size = gf_roof (read_rsp.xdata.xdata_len, 4); + + if (!size) { + frag->call_body.reply.accepted_success_state +diff --git a/xlators/cluster/stripe/src/stripe.c b/xlators/cluster/stripe/src/stripe.c +index 6b32f7f..fc809a0 100644 +--- a/xlators/cluster/stripe/src/stripe.c ++++ b/xlators/cluster/stripe/src/stripe.c +@@ -698,10 +698,10 @@ stripe_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + * to the size of the previous stripe. + */ + if (i < eof_idx) +- tmp_offset = roof(offset, fctx->stripe_size * ++ tmp_offset = gf_roof(offset, fctx->stripe_size * + fctx->stripe_count); + else if (i > eof_idx) +- tmp_offset = floor(offset, fctx->stripe_size * ++ tmp_offset = gf_floor(offset, fctx->stripe_size * + fctx->stripe_count); + else + tmp_offset = offset; +@@ -3067,10 +3067,10 @@ stripe_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, d + + if (fctx->stripe_coalesce) { + if (i < eof_idx) +- tmp_offset = roof(offset, fctx->stripe_size * ++ tmp_offset = gf_roof(offset, fctx->stripe_size * + fctx->stripe_count); + else if (i > eof_idx) +- tmp_offset = floor(offset, fctx->stripe_size * ++ tmp_offset = gf_floor(offset, fctx->stripe_size * + fctx->stripe_count); + else + tmp_offset = offset; +@@ -3476,8 +3476,8 @@ stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, + * the file is in which child node. Always '0-' part of + * the file resides in the first child. + */ +- rounded_start = floor (offset, stripe_size); +- rounded_end = roof (offset+size, stripe_size); ++ rounded_start = gf_floor (offset, stripe_size); ++ rounded_end = gf_roof (offset+size, stripe_size); + num_stripe = (rounded_end- rounded_start)/stripe_size; + + local = mem_get0 (this->local_pool); +@@ -3510,7 +3510,7 @@ stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, + goto err; + } + +- frame_size = min (roof (frame_offset+1, stripe_size), ++ frame_size = min (gf_roof (frame_offset+1, stripe_size), + (offset + size)) - frame_offset; + + rlocal->node_index = index - off_index; +@@ -3693,8 +3693,8 @@ stripe_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, + goto err; + } + +- rounded_start = floor(offset, stripe_size); +- rounded_end = roof(offset + total_size, stripe_size); ++ rounded_start = gf_floor(offset, stripe_size); ++ rounded_end = gf_roof(offset + total_size, stripe_size); + total_chunks = (rounded_end - rounded_start) / stripe_size; + local->replies = GF_CALLOC(total_chunks, sizeof(struct stripe_replies), + gf_stripe_mt_stripe_replies); +diff --git a/xlators/performance/io-cache/src/io-cache.c b/xlators/performance/io-cache/src/io-cache.c +index d7b3b37..5ef77b0 100644 +--- a/xlators/performance/io-cache/src/io-cache.c ++++ b/xlators/performance/io-cache/src/io-cache.c +@@ -953,8 +953,8 @@ ioc_dispatch_requests (call_frame_t *frame, ioc_inode_t *ioc_inode, fd_t *fd, + local = frame->local; + table = ioc_inode->table; + +- rounded_offset = floor (offset, table->page_size); +- rounded_end = roof (offset + size, table->page_size); ++ rounded_offset = gf_floor (offset, table->page_size); ++ rounded_end = gf_roof (offset + size, table->page_size); + trav_offset = rounded_offset; + + /* once a frame does read, it should be waiting on something */ +diff --git a/xlators/performance/io-cache/src/page.c b/xlators/performance/io-cache/src/page.c +index 50f5e19..832c4ee 100644 +--- a/xlators/performance/io-cache/src/page.c ++++ b/xlators/performance/io-cache/src/page.c +@@ -43,7 +43,7 @@ __ioc_page_get (ioc_inode_t *ioc_inode, off_t offset) + table = ioc_inode->table; + GF_VALIDATE_OR_GOTO ("io-cache", ioc_inode, out); + +- rounded_offset = floor (offset, table->page_size); ++ rounded_offset = gf_floor (offset, table->page_size); + + page = rbthash_get (ioc_inode->cache.page_table, &rounded_offset, + sizeof (rounded_offset)); +@@ -256,7 +256,7 @@ __ioc_page_create (ioc_inode_t *ioc_inode, off_t offset) + table = ioc_inode->table; + GF_VALIDATE_OR_GOTO ("io-cache", table, out); + +- rounded_offset = floor (offset, table->page_size); ++ rounded_offset = gf_floor (offset, table->page_size); + + newpage = GF_CALLOC (1, sizeof (*newpage), gf_ioc_mt_ioc_newpage_t); + if (newpage == NULL) { +diff --git a/xlators/performance/read-ahead/src/page.c b/xlators/performance/read-ahead/src/page.c +index 17e346e..8a5ce58 100644 +--- a/xlators/performance/read-ahead/src/page.c ++++ b/xlators/performance/read-ahead/src/page.c +@@ -25,7 +25,7 @@ ra_page_get (ra_file_t *file, off_t offset) + GF_VALIDATE_OR_GOTO ("read-ahead", file, out); + + page = file->pages.next; +- rounded_offset = floor (offset, file->page_size); ++ rounded_offset = gf_floor (offset, file->page_size); + + while (page != &file->pages && page->offset < rounded_offset) + page = page->next; +@@ -48,7 +48,7 @@ ra_page_create (ra_file_t *file, off_t offset) + GF_VALIDATE_OR_GOTO ("read-ahead", file, out); + + page = file->pages.next; +- rounded_offset = floor (offset, file->page_size); ++ rounded_offset = gf_floor (offset, file->page_size); + + while (page != &file->pages && page->offset < rounded_offset) + page = page->next; +diff --git a/xlators/performance/read-ahead/src/read-ahead.c b/xlators/performance/read-ahead/src/read-ahead.c +index e02ca9f..8adbd53 100644 +--- a/xlators/performance/read-ahead/src/read-ahead.c ++++ b/xlators/performance/read-ahead/src/read-ahead.c +@@ -283,7 +283,7 @@ read_ahead (call_frame_t *frame, ra_file_t *file) + } + + ra_size = file->page_size * file->page_count; +- ra_offset = floor (file->offset, file->page_size); ++ ra_offset = gf_floor (file->offset, file->page_size); + cap = file->size ? file->size : file->offset + ra_size; + + while (ra_offset < min (file->offset + ra_size, cap)) { +@@ -372,8 +372,8 @@ dispatch_requests (call_frame_t *frame, ra_file_t *file) + local = frame->local; + conf = file->conf; + +- rounded_offset = floor (local->offset, file->page_size); +- rounded_end = roof (local->offset + local->size, file->page_size); ++ rounded_offset = gf_floor (local->offset, file->page_size); ++ rounded_end = gf_roof (local->offset + local->size, file->page_size); + + trav_offset = rounded_offset; + +@@ -532,7 +532,7 @@ ra_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + + dispatch_requests (frame, file); + +- flush_region (frame, file, 0, floor (offset, file->page_size), 0); ++ flush_region (frame, file, 0, gf_floor (offset, file->page_size), 0); + + read_ahead (frame, file); + +diff --git a/xlators/protocol/server/src/server-rpc-fops.c b/xlators/protocol/server/src/server-rpc-fops.c +index 35d0887..b7bb26a 100644 +--- a/xlators/protocol/server/src/server-rpc-fops.c ++++ b/xlators/protocol/server/src/server-rpc-fops.c +@@ -4123,7 +4123,7 @@ server3_3_writev_vecsizer (int state, ssize_t *readsize, char *base_addr, + + /* need to round off to proper roof (%4), as XDR packing pads + the end of opaque object with '0' */ +- size = roof (write_req.xdata.xdata_len, 4); ++ size = gf_roof (write_req.xdata.xdata_len, 4); + + *readsize = size; + +-- +1.8.3.1 + diff --git a/SOURCES/0524-program-GF-DUMP-Shield-ping-processing-from-traffic-.patch b/SOURCES/0524-program-GF-DUMP-Shield-ping-processing-from-traffic-.patch new file mode 100644 index 0000000..46de12e --- /dev/null +++ b/SOURCES/0524-program-GF-DUMP-Shield-ping-processing-from-traffic-.patch @@ -0,0 +1,266 @@ +From 3de9cc04cdf5a65825cc86c8239734a284775470 Mon Sep 17 00:00:00 2001 +From: Raghavendra G +Date: Wed, 6 Feb 2019 17:30:30 +0530 +Subject: [PATCH 524/529] program/GF-DUMP: Shield ping processing from traffic + to Glusterfs Program + +Since poller thread bears the brunt of execution till the request is +handed over to io-threads, poller thread experiencies lock +contention(s) in the control flow till io-threads, which slows it +down. This delay invariably affects reading ping requests from network +and responding to them, resulting in increased ping latencies, which +sometimes results in a ping-timer-expiry on client leading to +disconnect of transport. So, this patch aims to free up poller thread +from executing code of Glusterfs Program. We do this by making + +* Glusterfs Program registering itself asking rpcsvc to execute its + actors in its own threads. +* GF-DUMP Program registering itself asking rpcsvc to _NOT_ execute + its actors in its own threads. Otherwise program's ownthreads become + bottleneck in processing ping traffic. This means that poller thread + reads a ping packet, invokes its actor and hands the response msg to + transport queue. + +Change-Id: I526268c10bdd5ef93f322a4f95385137550a6a49 +Signed-off-by: Raghavendra G +BUG: 1390151 +Reviewed-on: https://review.gluster.org/17105 +NetBSD-regression: NetBSD Build System +CentOS-regression: Gluster Build System +Smoke: Gluster Build System +Reviewed-by: Amar Tumballi +Reviewed-by: Jeff Darcy +(cherry picked from commit 2e72b24707f1886833db0b09e48b3f48b8d68d37) +Reviewed-on: https://code.engineering.redhat.com/gerrit/162426 +Tested-by: RHGS Build Bot +--- + rpc/rpc-lib/src/rpcsvc.c | 90 ++++++++++++++++++++++++++- + rpc/rpc-lib/src/rpcsvc.h | 18 +++++- + xlators/protocol/server/src/server-helpers.c | 4 -- + xlators/protocol/server/src/server-rpc-fops.c | 1 + + 4 files changed, 106 insertions(+), 7 deletions(-) + +diff --git a/rpc/rpc-lib/src/rpcsvc.c b/rpc/rpc-lib/src/rpcsvc.c +index 695e9fb..faa1956 100644 +--- a/rpc/rpc-lib/src/rpcsvc.c ++++ b/rpc/rpc-lib/src/rpcsvc.c +@@ -304,6 +304,7 @@ rpcsvc_program_actor (rpcsvc_request_t *req) + goto err; + } + ++ req->ownthread = program->ownthread; + req->synctask = program->synctask; + + err = SUCCESS; +@@ -411,6 +412,7 @@ rpcsvc_request_init (rpcsvc_t *svc, rpc_transport_t *trans, + req->progver = rpc_call_progver (callmsg); + req->procnum = rpc_call_progproc (callmsg); + req->trans = rpc_transport_ref (trans); ++ gf_client_ref (req->trans->xl_private); + req->count = msg->count; + req->msg[0] = progmsg; + req->iobref = iobref_ref (msg->iobref); +@@ -426,6 +428,7 @@ rpcsvc_request_init (rpcsvc_t *svc, rpc_transport_t *trans, + req->trans_private = msg->private; + + INIT_LIST_HEAD (&req->txlist); ++ INIT_LIST_HEAD (&req->request_list); + req->payloadsize = 0; + + /* By this time, the data bytes for the auth scheme would have already +@@ -576,7 +579,7 @@ rpcsvc_handle_rpc_call (rpcsvc_t *svc, rpc_transport_t *trans, + rpcsvc_request_t *req = NULL; + int ret = -1; + uint16_t port = 0; +- gf_boolean_t is_unix = _gf_false; ++ gf_boolean_t is_unix = _gf_false, empty = _gf_false; + gf_boolean_t unprivileged = _gf_false; + drc_cached_op_t *reply = NULL; + rpcsvc_drc_globals_t *drc = NULL; +@@ -692,6 +695,20 @@ rpcsvc_handle_rpc_call (rpcsvc_t *svc, rpc_transport_t *trans, + (synctask_fn_t) actor_fn, + rpcsvc_check_and_reply_error, NULL, + req); ++ } else if (req->ownthread) { ++ pthread_mutex_lock (&req->prog->queue_lock); ++ { ++ empty = list_empty (&req->prog->request_queue); ++ ++ list_add_tail (&req->request_list, ++ &req->prog->request_queue); ++ ++ if (empty) ++ pthread_cond_signal (&req->prog->queue_cond); ++ } ++ pthread_mutex_unlock (&req->prog->queue_lock); ++ ++ ret = 0; + } else { + ret = actor_fn (req); + } +@@ -1572,6 +1589,12 @@ rpcsvc_program_unregister (rpcsvc_t *svc, rpcsvc_program_t *program) + " Ver: %d, Port: %d", prog->progname, prog->prognum, + prog->progver, prog->progport); + ++ if (prog->ownthread) { ++ prog->alive = _gf_false; ++ ret = 0; ++ goto out; ++ } ++ + pthread_mutex_lock (&svc->rpclock); + { + list_del_init (&prog->program); +@@ -1838,6 +1861,56 @@ out: + return ret; + } + ++void * ++rpcsvc_request_handler (void *arg) ++{ ++ rpcsvc_program_t *program = arg; ++ rpcsvc_request_t *req = NULL; ++ rpcsvc_actor_t *actor = NULL; ++ gf_boolean_t done = _gf_false; ++ int ret = 0; ++ ++ if (!program) ++ return NULL; ++ ++ while (1) { ++ pthread_mutex_lock (&program->queue_lock); ++ { ++ if (!program->alive ++ && list_empty (&program->request_queue)) { ++ done = 1; ++ goto unlock; ++ } ++ ++ while (list_empty (&program->request_queue)) ++ pthread_cond_wait (&program->queue_cond, ++ &program->queue_lock); ++ ++ req = list_entry (program->request_queue.next, ++ typeof (*req), request_list); ++ ++ list_del_init (&req->request_list); ++ } ++ unlock: ++ pthread_mutex_unlock (&program->queue_lock); ++ ++ if (done) ++ break; ++ ++ THIS = req->svc->xl; ++ ++ actor = rpcsvc_program_actor (req); ++ ++ ret = actor->actor (req); ++ ++ if (ret != 0) { ++ rpcsvc_check_and_reply_error (ret, NULL, req); ++ } ++ } ++ ++ return NULL; ++} ++ + int + rpcsvc_program_register (rpcsvc_t *svc, rpcsvc_program_t *program) + { +@@ -1878,6 +1951,21 @@ rpcsvc_program_register (rpcsvc_t *svc, rpcsvc_program_t *program) + memcpy (newprog, program, sizeof (*program)); + + INIT_LIST_HEAD (&newprog->program); ++ INIT_LIST_HEAD (&newprog->request_queue); ++ pthread_mutex_init (&newprog->queue_lock, NULL); ++ pthread_cond_init (&newprog->queue_cond, NULL); ++ ++ newprog->alive = _gf_true; ++ ++ /* make sure synctask gets priority over ownthread */ ++ if (newprog->synctask) ++ newprog->ownthread = _gf_false; ++ ++ if (newprog->ownthread) { ++ gf_thread_create (&newprog->thread, NULL, ++ rpcsvc_request_handler, ++ newprog, "reqhnd"); ++ } + + pthread_mutex_lock (&svc->rpclock); + { +diff --git a/rpc/rpc-lib/src/rpcsvc.h b/rpc/rpc-lib/src/rpcsvc.h +index d3aafac..58c0055 100644 +--- a/rpc/rpc-lib/src/rpcsvc.h ++++ b/rpc/rpc-lib/src/rpcsvc.h +@@ -233,7 +233,9 @@ struct rpcsvc_request { + */ + rpcsvc_auth_data_t verf; + +- /* Execute this request's actor function as a synctask?*/ ++ /* Execute this request's actor function in ownthread of program?*/ ++ gf_boolean_t ownthread; ++ + gf_boolean_t synctask; + /* Container for a RPC program wanting to store a temp + * request-specific item. +@@ -245,6 +247,10 @@ struct rpcsvc_request { + + /* pointer to cached reply for use in DRC */ + drc_cached_op_t *reply; ++ ++ /* request queue in rpcsvc */ ++ struct list_head request_list; ++ + }; + + #define rpcsvc_request_program(req) ((rpcsvc_program_t *)((req)->prog)) +@@ -395,10 +401,18 @@ struct rpcsvc_program { + */ + int min_auth; + +- /* Execute actor function as a synctask? */ ++ /* Execute actor function in program's own thread? */ ++ /* This will reduce the workload on poller threads */ ++ gf_boolean_t ownthread; ++ gf_boolean_t alive; ++ + gf_boolean_t synctask; + /* list member to link to list of registered services with rpcsvc */ + struct list_head program; ++ struct list_head request_queue; ++ pthread_mutex_t queue_lock; ++ pthread_cond_t queue_cond; ++ pthread_t thread; + }; + + typedef struct rpcsvc_cbk_program { +diff --git a/xlators/protocol/server/src/server-helpers.c b/xlators/protocol/server/src/server-helpers.c +index 30045ef..7cc3d15 100644 +--- a/xlators/protocol/server/src/server-helpers.c ++++ b/xlators/protocol/server/src/server-helpers.c +@@ -557,10 +557,6 @@ get_frame_from_request (rpcsvc_request_t *req) + } + } + +- /* Add a ref for this fop */ +- if (client) +- gf_client_ref (client); +- + frame->root->uid = req->uid; + frame->root->gid = req->gid; + frame->root->pid = req->pid; +diff --git a/xlators/protocol/server/src/server-rpc-fops.c b/xlators/protocol/server/src/server-rpc-fops.c +index b7bb26a..db4242d 100644 +--- a/xlators/protocol/server/src/server-rpc-fops.c ++++ b/xlators/protocol/server/src/server-rpc-fops.c +@@ -6143,4 +6143,5 @@ struct rpcsvc_program glusterfs3_3_fop_prog = { + .progver = GLUSTER_FOP_VERSION, + .numactors = GLUSTER_FOP_PROCCNT, + .actors = glusterfs3_3_fop_actors, ++ .ownthread = _gf_true, + }; +-- +1.8.3.1 + diff --git a/SOURCES/0525-rpcsvc-provide-each-request-handler-thread-its-own-q.patch b/SOURCES/0525-rpcsvc-provide-each-request-handler-thread-its-own-q.patch new file mode 100644 index 0000000..a0dc399 --- /dev/null +++ b/SOURCES/0525-rpcsvc-provide-each-request-handler-thread-its-own-q.patch @@ -0,0 +1,1211 @@ +From 667e92a8dd0a21902cef39a59bc6c6b77d1f3c26 Mon Sep 17 00:00:00 2001 +From: Raghavendra Gowdappa +Date: Mon, 11 Feb 2019 12:32:52 +0530 +Subject: [PATCH 525/529] rpcsvc: provide each request handler thread its own + queue + +A single global per program queue is contended by all request handler +threads and event threads. This can lead to high contention. So, +reduce the contention by providing each request handler thread its own +private queue. + +Thanks to "Manoj Pillai" for the idea of pairing a +single queue with a fixed request-handler-thread and event-thread, +which brought down the performance regression due to overhead of +queuing significantly. + +Thanks to "Xavi Hernandez" for discussion on +how to communicate the event-thread death to request-handler-thread. + +Thanks to "Karan Sandha" for voluntarily running +the perf benchmarks to qualify that performance regression introduced +by ping-timer-fixes is fixed with this patch and patiently running +many iterations of regression tests while RCAing the issue. + +Thanks to "Milind Changire" for patiently running +the many iterations of perf benchmarking tests while RCAing the +regression caused by ping-timer-expiry fixes. + +Change-Id: I578c3fc67713f4234bd3abbec5d3fbba19059ea5 +BUG: 1390151 +Signed-off-by: Raghavendra Gowdappa +(cherry picked from commit 95e380eca19b9f0d03a53429535f15556e5724ad) +Reviewed-on: https://code.engineering.redhat.com/gerrit/162427 +Tested-by: RHGS Build Bot +--- + cli/src/cli-rl.c | 4 +- + libglusterfs/src/event-epoll.c | 156 +++++++++--- + libglusterfs/src/event-poll.c | 14 +- + libglusterfs/src/event.c | 11 +- + libglusterfs/src/event.h | 19 +- + rpc/rpc-lib/src/rpc-clnt.c | 6 + + rpc/rpc-lib/src/rpc-transport.c | 4 + + rpc/rpc-lib/src/rpc-transport.h | 3 + + rpc/rpc-lib/src/rpcsvc.c | 339 +++++++++++++++++++++++---- + rpc/rpc-lib/src/rpcsvc.h | 32 ++- + rpc/rpc-transport/socket/src/socket.c | 29 ++- + xlators/protocol/server/src/server-helpers.c | 4 + + xlators/protocol/server/src/server.c | 3 + + 13 files changed, 530 insertions(+), 94 deletions(-) + +diff --git a/cli/src/cli-rl.c b/cli/src/cli-rl.c +index 4745cf4..cffd0a8 100644 +--- a/cli/src/cli-rl.c ++++ b/cli/src/cli-rl.c +@@ -109,7 +109,7 @@ cli_rl_process_line (char *line) + + int + cli_rl_stdin (int fd, int idx, int gen, void *data, +- int poll_out, int poll_in, int poll_err) ++ int poll_out, int poll_in, int poll_err, char event_thread_died) + { + struct cli_state *state = NULL; + +@@ -394,7 +394,7 @@ cli_rl_enable (struct cli_state *state) + } + + ret = event_register (state->ctx->event_pool, 0, cli_rl_stdin, state, +- 1, 0); ++ 1, 0, 0); + if (ret == -1) + goto out; + +diff --git a/libglusterfs/src/event-epoll.c b/libglusterfs/src/event-epoll.c +index 7fc53ff..310bce3 100644 +--- a/libglusterfs/src/event-epoll.c ++++ b/libglusterfs/src/event-epoll.c +@@ -32,6 +32,7 @@ struct event_slot_epoll { + int fd; + int events; + int gen; ++ int idx; + int ref; + int do_close; + int in_handler; +@@ -39,6 +40,7 @@ struct event_slot_epoll { + void *data; + event_handler_t handler; + gf_lock_t lock; ++ struct list_head poller_death; + }; + + struct event_thread_data { +@@ -60,6 +62,7 @@ __event_newtable (struct event_pool *event_pool, int table_idx) + for (i = 0; i < EVENT_EPOLL_SLOTS; i++) { + table[i].fd = -1; + LOCK_INIT (&table[i].lock); ++ INIT_LIST_HEAD(&table[i].poller_death); + } + + event_pool->ereg[table_idx] = table; +@@ -70,7 +73,8 @@ __event_newtable (struct event_pool *event_pool, int table_idx) + + + static int +-__event_slot_alloc (struct event_pool *event_pool, int fd) ++__event_slot_alloc (struct event_pool *event_pool, int fd, ++ char notify_poller_death) + { + int i = 0; + int table_idx = -1; +@@ -105,34 +109,42 @@ __event_slot_alloc (struct event_pool *event_pool, int fd) + + table_idx = i; + +- for (i = 0; i < EVENT_EPOLL_SLOTS; i++) { +- if (table[i].fd == -1) { +- /* wipe everything except bump the generation */ +- gen = table[i].gen; +- memset (&table[i], 0, sizeof (table[i])); +- table[i].gen = gen + 1; +- +- LOCK_INIT (&table[i].lock); ++ for (i = 0; i < EVENT_EPOLL_SLOTS; i++) { ++ if (table[i].fd == -1) { ++ /* wipe everything except bump the generation */ ++ gen = table[i].gen; ++ memset (&table[i], 0, sizeof (table[i])); ++ table[i].gen = gen + 1; ++ ++ LOCK_INIT (&table[i].lock); ++ INIT_LIST_HEAD(&table[i].poller_death); ++ ++ table[i].fd = fd; ++ if (notify_poller_death) { ++ table[i].idx = table_idx * EVENT_EPOLL_SLOTS + i; ++ list_add_tail(&table[i].poller_death, ++ &event_pool->poller_death); ++ } + +- table[i].fd = fd; +- event_pool->slots_used[table_idx]++; ++ event_pool->slots_used[table_idx]++; + +- break; +- } +- } ++ break; ++ } ++ } + + return table_idx * EVENT_EPOLL_SLOTS + i; + } + + + static int +-event_slot_alloc (struct event_pool *event_pool, int fd) ++event_slot_alloc (struct event_pool *event_pool, int fd, ++ char notify_poller_death) + { + int idx = -1; + + pthread_mutex_lock (&event_pool->mutex); + { +- idx = __event_slot_alloc (event_pool, fd); ++ idx = __event_slot_alloc (event_pool, fd, notify_poller_death); + } + pthread_mutex_unlock (&event_pool->mutex); + +@@ -162,6 +174,7 @@ __event_slot_dealloc (struct event_pool *event_pool, int idx) + slot->fd = -1; + slot->handled_error = 0; + slot->in_handler = 0; ++ list_del_init(&slot->poller_death); + event_pool->slots_used[table_idx]--; + + return; +@@ -180,6 +193,23 @@ event_slot_dealloc (struct event_pool *event_pool, int idx) + return; + } + ++static int ++event_slot_ref(struct event_slot_epoll *slot) ++{ ++ int ref; ++ ++ if (!slot) ++ return -1; ++ ++ LOCK (&slot->lock); ++ { ++ slot->ref++; ++ ref = slot->ref; ++ } ++ UNLOCK (&slot->lock); ++ ++ return ref; ++} + + static struct event_slot_epoll * + event_slot_get (struct event_pool *event_pool, int idx) +@@ -198,15 +228,44 @@ event_slot_get (struct event_pool *event_pool, int idx) + + slot = &table[offset]; + ++ event_slot_ref (slot); ++ return slot; ++} ++ ++static void ++__event_slot_unref(struct event_pool *event_pool, struct event_slot_epoll *slot, ++ int idx) ++{ ++ int ref = -1; ++ int fd = -1; ++ int do_close = 0; ++ + LOCK (&slot->lock); + { +- slot->ref++; ++ --(slot->ref); ++ ref = slot->ref; + } + UNLOCK (&slot->lock); + +- return slot; +-} ++ if (ref) ++ /* slot still alive */ ++ goto done; ++ ++ LOCK(&slot->lock); ++ { ++ fd = slot->fd; ++ do_close = slot->do_close; ++ slot->do_close = 0; ++ } ++ UNLOCK(&slot->lock); ++ ++ __event_slot_dealloc(event_pool, idx); + ++ if (do_close) ++ sys_close(fd); ++done: ++ return; ++} + + static void + event_slot_unref (struct event_pool *event_pool, struct event_slot_epoll *slot, +@@ -264,7 +323,7 @@ event_pool_new_epoll (int count, int eventthreadcount) + event_pool->fd = epfd; + + event_pool->count = count; +- ++ INIT_LIST_HEAD(&event_pool->poller_death); + event_pool->eventthreadcount = eventthreadcount; + event_pool->auto_thread_count = 0; + +@@ -315,7 +374,8 @@ __slot_update_events (struct event_slot_epoll *slot, int poll_in, int poll_out) + int + event_register_epoll (struct event_pool *event_pool, int fd, + event_handler_t handler, +- void *data, int poll_in, int poll_out) ++ void *data, int poll_in, int poll_out, ++ char notify_poller_death) + { + int idx = -1; + int ret = -1; +@@ -345,7 +405,7 @@ event_register_epoll (struct event_pool *event_pool, int fd, + if (destroy == 1) + goto out; + +- idx = event_slot_alloc (event_pool, fd); ++ idx = event_slot_alloc (event_pool, fd, notify_poller_death); + if (idx == -1) { + gf_msg ("epoll", GF_LOG_ERROR, 0, LG_MSG_SLOT_NOT_FOUND, + "could not find slot for fd=%d", fd); +@@ -583,7 +643,7 @@ pre_unlock: + ret = handler (fd, idx, gen, data, + (event->events & (EPOLLIN|EPOLLPRI)), + (event->events & (EPOLLOUT)), +- (event->events & (EPOLLERR|EPOLLHUP))); ++ (event->events & (EPOLLERR|EPOLLHUP)), 0); + } + out: + event_slot_unref (event_pool, slot, idx); +@@ -600,7 +660,10 @@ event_dispatch_epoll_worker (void *data) + struct event_thread_data *ev_data = data; + struct event_pool *event_pool; + int myindex = -1; +- int timetodie = 0; ++ int timetodie = 0, gen = 0; ++ struct list_head poller_death_notify; ++ struct event_slot_epoll *slot = NULL, *tmp = NULL; ++ + + GF_VALIDATE_OR_GOTO ("event", ev_data, out); + +@@ -610,7 +673,7 @@ event_dispatch_epoll_worker (void *data) + GF_VALIDATE_OR_GOTO ("event", event_pool, out); + + gf_msg ("epoll", GF_LOG_INFO, 0, LG_MSG_STARTED_EPOLL_THREAD, "Started" +- " thread with index %d", myindex); ++ " thread with index %d", myindex - 1); + + pthread_mutex_lock (&event_pool->mutex); + { +@@ -627,21 +690,58 @@ event_dispatch_epoll_worker (void *data) + * reconfigured always */ + pthread_mutex_lock (&event_pool->mutex); + { +- if (event_pool->eventthreadcount < +- myindex) { ++ if (event_pool->eventthreadcount < myindex) { ++ while (event_pool->poller_death_sliced) { ++ pthread_cond_wait( ++ &event_pool->cond, ++ &event_pool->mutex); ++ } ++ ++ INIT_LIST_HEAD(&poller_death_notify); ++ + /* if found true in critical section, + * die */ + event_pool->pollers[myindex - 1] = 0; + event_pool->activethreadcount--; + timetodie = 1; ++ gen = ++event_pool->poller_gen; ++ list_for_each_entry(slot, &event_pool->poller_death, ++ poller_death) ++ { ++ event_slot_ref(slot); ++ } ++ ++ list_splice_init(&event_pool->poller_death, ++ &poller_death_notify); ++ event_pool->poller_death_sliced = 1; ++ + pthread_cond_broadcast (&event_pool->cond); + } + } + pthread_mutex_unlock (&event_pool->mutex); + if (timetodie) { ++ list_for_each_entry(slot, &poller_death_notify, poller_death) ++ { ++ slot->handler(slot->fd, 0, gen, slot->data, 0, 0, 0, 1); ++ } ++ ++ pthread_mutex_lock(&event_pool->mutex); ++ { ++ list_for_each_entry_safe(slot, tmp, &poller_death_notify, poller_death) ++ { ++ __event_slot_unref(event_pool, slot, slot->idx); ++ } ++ ++ list_splice(&poller_death_notify, ++ &event_pool->poller_death); ++ event_pool->poller_death_sliced = 0; ++ pthread_cond_broadcast(&event_pool->cond); ++ } ++ pthread_mutex_unlock(&event_pool->mutex); ++ + gf_msg ("epoll", GF_LOG_INFO, 0, + LG_MSG_EXITED_EPOLL_THREAD, "Exited " +- "thread with index %d", myindex); ++ "thread with index %d", myindex - 1); + goto out; + } + } +diff --git a/libglusterfs/src/event-poll.c b/libglusterfs/src/event-poll.c +index 3bffc47..ca00071 100644 +--- a/libglusterfs/src/event-poll.c ++++ b/libglusterfs/src/event-poll.c +@@ -36,12 +36,14 @@ struct event_slot_poll { + static int + event_register_poll (struct event_pool *event_pool, int fd, + event_handler_t handler, +- void *data, int poll_in, int poll_out); ++ void *data, int poll_in, int poll_out, ++ char notify_poller_death); + + + static int + __flush_fd (int fd, int idx, int gen, void *data, +- int poll_in, int poll_out, int poll_err) ++ int poll_in, int poll_out, int poll_err, ++ char notify_poller_death) + { + char buf[64]; + int ret = -1; +@@ -153,7 +155,7 @@ event_pool_new_poll (int count, int eventthreadcount) + } + + ret = event_register_poll (event_pool, event_pool->breaker[0], +- __flush_fd, NULL, 1, 0); ++ __flush_fd, NULL, 1, 0, 0); + if (ret == -1) { + gf_msg ("poll", GF_LOG_ERROR, 0, LG_MSG_REGISTER_PIPE_FAILED, + "could not register pipe fd with poll event loop"); +@@ -180,7 +182,8 @@ event_pool_new_poll (int count, int eventthreadcount) + static int + event_register_poll (struct event_pool *event_pool, int fd, + event_handler_t handler, +- void *data, int poll_in, int poll_out) ++ void *data, int poll_in, int poll_out, ++ char notify_poller_death) + { + int idx = -1; + +@@ -389,7 +392,8 @@ unlock: + ret = handler (ufds[i].fd, idx, 0, data, + (ufds[i].revents & (POLLIN|POLLPRI)), + (ufds[i].revents & (POLLOUT)), +- (ufds[i].revents & (POLLERR|POLLHUP|POLLNVAL))); ++ (ufds[i].revents & (POLLERR|POLLHUP|POLLNVAL)), ++ 0); + + return ret; + } +diff --git a/libglusterfs/src/event.c b/libglusterfs/src/event.c +index bba6f84..8463c19 100644 +--- a/libglusterfs/src/event.c ++++ b/libglusterfs/src/event.c +@@ -58,14 +58,16 @@ event_pool_new (int count, int eventthreadcount) + int + event_register (struct event_pool *event_pool, int fd, + event_handler_t handler, +- void *data, int poll_in, int poll_out) ++ void *data, int poll_in, int poll_out, ++ char notify_poller_death) + { + int ret = -1; + + GF_VALIDATE_OR_GOTO ("event", event_pool, out); + + ret = event_pool->ops->event_register (event_pool, fd, handler, data, +- poll_in, poll_out); ++ poll_in, poll_out, ++ notify_poller_death); + out: + return ret; + } +@@ -170,7 +172,8 @@ out: + + int + poller_destroy_handler (int fd, int idx, int gen, void *data, +- int poll_out, int poll_in, int poll_err) ++ int poll_out, int poll_in, int poll_err, ++ char event_thread_exit) + { + struct event_destroy_data *destroy = NULL; + int readfd = -1, ret = -1; +@@ -239,7 +242,7 @@ event_dispatch_destroy (struct event_pool *event_pool) + /* From the main thread register an event on the pipe fd[0], + */ + idx = event_register (event_pool, fd[0], poller_destroy_handler, +- &data, 1, 0); ++ &data, 1, 0, 0); + if (idx < 0) + goto out; + +diff --git a/libglusterfs/src/event.h b/libglusterfs/src/event.h +index c60b14a..875cd7d 100644 +--- a/libglusterfs/src/event.h ++++ b/libglusterfs/src/event.h +@@ -12,6 +12,7 @@ + #define _EVENT_H_ + + #include ++#include "list.h" + + struct event_pool; + struct event_ops; +@@ -24,7 +25,8 @@ struct event_data { + + + typedef int (*event_handler_t) (int fd, int idx, int gen, void *data, +- int poll_in, int poll_out, int poll_err); ++ int poll_in, int poll_out, int poll_err, ++ char event_thread_exit); + + #define EVENT_EPOLL_TABLES 1024 + #define EVENT_EPOLL_SLOTS 1024 +@@ -41,6 +43,13 @@ struct event_pool { + struct event_slot_epoll *ereg[EVENT_EPOLL_TABLES]; + int slots_used[EVENT_EPOLL_TABLES]; + ++ struct list_head poller_death; ++ int poller_death_sliced; /* track whether the list of fds interested ++ * poller_death is sliced. If yes, new thread ++ * death notification has to wait till the ++ * list is added back ++ */ ++ int poller_gen; + int used; + int changed; + +@@ -54,7 +63,7 @@ struct event_pool { + * epoll. */ + int eventthreadcount; /* number of event threads to execute. */ + pthread_t pollers[EVENT_MAX_THREADS]; /* poller thread_id store, +- * and live status */ ++ * and live status */ + int destroy; + int activethreadcount; + +@@ -83,7 +92,8 @@ struct event_ops { + + int (*event_register) (struct event_pool *event_pool, int fd, + event_handler_t handler, +- void *data, int poll_in, int poll_out); ++ void *data, int poll_in, int poll_out, ++ char notify_poller_death); + + int (*event_select_on) (struct event_pool *event_pool, int fd, int idx, + int poll_in, int poll_out); +@@ -107,7 +117,8 @@ int event_select_on (struct event_pool *event_pool, int fd, int idx, + int poll_in, int poll_out); + int event_register (struct event_pool *event_pool, int fd, + event_handler_t handler, +- void *data, int poll_in, int poll_out); ++ void *data, int poll_in, int poll_out, ++ char notify_poller_death); + int event_unregister (struct event_pool *event_pool, int fd, int idx); + int event_unregister_close (struct event_pool *event_pool, int fd, int idx); + int event_dispatch (struct event_pool *event_pool); +diff --git a/rpc/rpc-lib/src/rpc-clnt.c b/rpc/rpc-lib/src/rpc-clnt.c +index fd7e3ec..fe5e3fd 100644 +--- a/rpc/rpc-lib/src/rpc-clnt.c ++++ b/rpc/rpc-lib/src/rpc-clnt.c +@@ -1013,6 +1013,12 @@ rpc_clnt_notify (rpc_transport_t *trans, void *mydata, + */ + ret = 0; + break; ++ ++ case RPC_TRANSPORT_EVENT_THREAD_DIED: ++ /* only meaningful on a server, no need of handling this event on a ++ * client */ ++ ret = 0; ++ break; + } + + out: +diff --git a/rpc/rpc-lib/src/rpc-transport.c b/rpc/rpc-lib/src/rpc-transport.c +index b737ff2..db02338 100644 +--- a/rpc/rpc-lib/src/rpc-transport.c ++++ b/rpc/rpc-lib/src/rpc-transport.c +@@ -294,6 +294,10 @@ rpc_transport_load (glusterfs_ctx_t *ctx, dict_t *options, char *trans_name) + goto fail; + } + ++ if (dict_get(options, "notify-poller-death")) { ++ trans->notify_poller_death = 1; ++ } ++ + gf_log ("rpc-transport", GF_LOG_DEBUG, + "attempt to load file %s", name); + +diff --git a/rpc/rpc-lib/src/rpc-transport.h b/rpc/rpc-lib/src/rpc-transport.h +index c97f98d..cf77c9d 100644 +--- a/rpc/rpc-lib/src/rpc-transport.h ++++ b/rpc/rpc-lib/src/rpc-transport.h +@@ -99,6 +99,7 @@ typedef enum { + RPC_TRANSPORT_MSG_RECEIVED, /* Complete rpc msg has been read */ + RPC_TRANSPORT_CONNECT, /* client is connected to server */ + RPC_TRANSPORT_MSG_SENT, ++ RPC_TRANSPORT_EVENT_THREAD_DIED /* event-thread has died */ + } rpc_transport_event_t; + + struct rpc_transport_msg { +@@ -218,6 +219,8 @@ struct rpc_transport { + */ + gf_boolean_t connect_failed; + gf_atomic_t disconnect_progress; ++ char notify_poller_death; ++ char poller_death_accept; + }; + + struct rpc_transport_ops { +diff --git a/rpc/rpc-lib/src/rpcsvc.c b/rpc/rpc-lib/src/rpcsvc.c +index faa1956..c769463 100644 +--- a/rpc/rpc-lib/src/rpcsvc.c ++++ b/rpc/rpc-lib/src/rpcsvc.c +@@ -8,6 +8,7 @@ + cases as published by the Free Software Foundation. + */ + ++#include + #include "rpcsvc.h" + #include "rpc-transport.h" + #include "dict.h" +@@ -56,9 +57,76 @@ int + rpcsvc_notify (rpc_transport_t *trans, void *mydata, + rpc_transport_event_t event, void *data, ...); + ++void * ++rpcsvc_request_handler(void *arg); ++ + static int + rpcsvc_match_subnet_v4 (const char *addrtok, const char *ipaddr); + ++void ++rpcsvc_toggle_queue_status(rpcsvc_program_t *prog, ++ rpcsvc_request_queue_t *queue, char status[]) ++{ ++ int queue_index = 0, status_index = 0, set_bit = 0; ++ ++ if (queue != &prog->request_queue[0]) { ++ queue_index = (queue - &prog->request_queue[0]); ++ } ++ ++ status_index = queue_index / 8; ++ set_bit = queue_index % 8; ++ ++ status[status_index] ^= (1 << set_bit); ++ ++ return; ++} ++ ++static int ++get_rightmost_set_bit(int n) ++{ ++ return log2(n & -n); ++} ++ ++int ++rpcsvc_get_free_queue_index(rpcsvc_program_t *prog) ++{ ++ int queue_index = 0, max_index = 0, i = 0; ++ unsigned int right_most_unset_bit = 0; ++ ++ right_most_unset_bit = 8; ++ ++ max_index = gf_roof(EVENT_MAX_THREADS, 8) / 8; ++ for (i = 0; i < max_index; i++) { ++ if (prog->request_queue_status[i] == 0) { ++ right_most_unset_bit = 0; ++ break; ++ } else { ++ right_most_unset_bit = get_rightmost_set_bit( ++ ~prog->request_queue_status[i]); ++ if (right_most_unset_bit < 8) { ++ break; ++ } ++ } ++ } ++ ++ if (right_most_unset_bit > 7) { ++ queue_index = -1; ++ } else { ++ queue_index = i * 8; ++ queue_index += right_most_unset_bit; ++ ++ if (queue_index > EVENT_MAX_THREADS) { ++ queue_index = -1; ++ } ++ } ++ ++ if (queue_index != -1) { ++ prog->request_queue_status[i] |= (0x1 << right_most_unset_bit); ++ } ++ ++ return queue_index; ++} ++ + rpcsvc_notify_wrapper_t * + rpcsvc_notify_wrapper_alloc (void) + { +@@ -412,7 +480,6 @@ rpcsvc_request_init (rpcsvc_t *svc, rpc_transport_t *trans, + req->progver = rpc_call_progver (callmsg); + req->procnum = rpc_call_progproc (callmsg); + req->trans = rpc_transport_ref (trans); +- gf_client_ref (req->trans->xl_private); + req->count = msg->count; + req->msg[0] = progmsg; + req->iobref = iobref_ref (msg->iobref); +@@ -570,6 +637,73 @@ rpcsvc_check_and_reply_error (int ret, call_frame_t *frame, void *opaque) + return 0; + } + ++void ++rpcsvc_queue_event_thread_death(rpcsvc_t *svc, rpcsvc_program_t *prog, int gen) ++{ ++ rpcsvc_request_queue_t *queue = NULL; ++ int num = 0; ++ void *value = NULL; ++ rpcsvc_request_t *req = NULL; ++ char empty = 0; ++ ++ value = pthread_getspecific(prog->req_queue_key); ++ if (value == NULL) { ++ return; ++ } ++ ++ num = ((unsigned long)value) - 1; ++ ++ queue = &prog->request_queue[num]; ++ ++ if (queue->gen == gen) { ++ /* duplicate event */ ++ gf_log(GF_RPCSVC, GF_LOG_INFO, ++ "not queuing duplicate event thread death. " ++ "queue %d program %s", ++ num, prog->progname); ++ return; ++ } ++ ++ rpcsvc_alloc_request(svc, req); ++ req->prognum = RPCSVC_INFRA_PROGRAM; ++ req->procnum = RPCSVC_PROC_EVENT_THREAD_DEATH; ++ gf_log(GF_RPCSVC, GF_LOG_INFO, ++ "queuing event thread death request to queue %d of program %s", num, ++ prog->progname); ++ ++ pthread_mutex_lock(&queue->queue_lock); ++ { ++ empty = list_empty(&queue->request_queue); ++ ++ list_add_tail(&req->request_list, &queue->request_queue); ++ queue->gen = gen; ++ ++ if (empty && queue->waiting) ++ pthread_cond_signal(&queue->queue_cond); ++ } ++ pthread_mutex_unlock(&queue->queue_lock); ++ ++ return; ++} ++ ++int ++rpcsvc_handle_event_thread_death(rpcsvc_t *svc, rpc_transport_t *trans, int gen) ++{ ++ rpcsvc_program_t *prog = NULL; ++ ++ pthread_mutex_lock (&svc->rpclock); ++ { ++ list_for_each_entry(prog, &svc->programs, program) ++ { ++ if (prog->ownthread) ++ rpcsvc_queue_event_thread_death(svc, prog, gen); ++ } ++ } ++ pthread_mutex_unlock (&svc->rpclock); ++ ++ return 0; ++} ++ + int + rpcsvc_handle_rpc_call (rpcsvc_t *svc, rpc_transport_t *trans, + rpc_transport_pollin_t *msg) +@@ -581,8 +715,12 @@ rpcsvc_handle_rpc_call (rpcsvc_t *svc, rpc_transport_t *trans, + uint16_t port = 0; + gf_boolean_t is_unix = _gf_false, empty = _gf_false; + gf_boolean_t unprivileged = _gf_false; ++ gf_boolean_t spawn_request_handler = _gf_false; + drc_cached_op_t *reply = NULL; + rpcsvc_drc_globals_t *drc = NULL; ++ rpcsvc_request_queue_t *queue = NULL; ++ long num = 0; ++ void *value = NULL; + + if (!trans || !svc) + return -1; +@@ -696,20 +834,83 @@ rpcsvc_handle_rpc_call (rpcsvc_t *svc, rpc_transport_t *trans, + rpcsvc_check_and_reply_error, NULL, + req); + } else if (req->ownthread) { +- pthread_mutex_lock (&req->prog->queue_lock); ++ value = pthread_getspecific(req->prog->req_queue_key); ++ if (value == NULL) { ++ pthread_mutex_lock(&req->prog->thr_lock); ++ { ++ num = rpcsvc_get_free_queue_index(req->prog); ++ if (num != -1) { ++ num++; ++ value = (void *)num; ++ ret = pthread_setspecific(req->prog->req_queue_key, ++ value); ++ if (ret < 0) { ++ gf_log(GF_RPCSVC, GF_LOG_WARNING, ++ "setting request queue in TLS failed"); ++ rpcsvc_toggle_queue_status( ++ req->prog, &req->prog->request_queue[num - 1], ++ req->prog->request_queue_status); ++ num = -1; ++ } else { ++ spawn_request_handler = 1; ++ } ++ } ++ } ++ pthread_mutex_unlock(&req->prog->thr_lock); ++ } ++ ++ if (num == -1) ++ goto noqueue; ++ ++ num = ((unsigned long)value) - 1; ++ ++ queue = &req->prog->request_queue[num]; ++ ++ if (spawn_request_handler) { ++ ret = gf_thread_create(&queue->thread, NULL, ++ rpcsvc_request_handler, queue, ++ "rpcrqhnd"); ++ if (!ret) { ++ gf_log(GF_RPCSVC, GF_LOG_INFO, ++ "spawned a request handler " ++ "thread for queue %d", ++ (int)num); ++ ++ req->prog->threadcount++; ++ } else { ++ gf_log(GF_RPCSVC, GF_LOG_INFO, ++ "spawning a request handler " ++ "thread for queue %d failed", ++ (int)num); ++ ret = pthread_setspecific(req->prog->req_queue_key, 0); ++ if (ret < 0) { ++ gf_log(GF_RPCSVC, GF_LOG_WARNING, ++ "resetting request " ++ "queue in TLS failed"); ++ } ++ ++ rpcsvc_toggle_queue_status( ++ req->prog, &req->prog->request_queue[num - 1], ++ req->prog->request_queue_status); ++ ++ goto noqueue; ++ } ++ } ++ ++ pthread_mutex_lock(&queue->queue_lock); + { +- empty = list_empty (&req->prog->request_queue); ++ empty = list_empty(&queue->request_queue); + +- list_add_tail (&req->request_list, +- &req->prog->request_queue); ++ list_add_tail(&req->request_list, &queue->request_queue); + +- if (empty) +- pthread_cond_signal (&req->prog->queue_cond); ++ if (empty && queue->waiting) ++ pthread_cond_signal(&queue->queue_cond); + } +- pthread_mutex_unlock (&req->prog->queue_lock); ++ pthread_mutex_unlock(&queue->queue_lock); + + ret = 0; + } else { ++noqueue: + ret = actor_fn (req); + } + } +@@ -838,6 +1039,12 @@ rpcsvc_notify (rpc_transport_t *trans, void *mydata, + "got MAP_XID event, which should have not come"); + ret = 0; + break; ++ ++ case RPC_TRANSPORT_EVENT_THREAD_DIED: ++ rpcsvc_handle_event_thread_death(svc, trans, ++ (int)(unsigned long)data); ++ ret = 0; ++ break; + } + + out: +@@ -1779,6 +1986,7 @@ rpcsvc_create_listeners (rpcsvc_t *svc, dict_t *options, char *name) + goto out; + } + ++ dict_del(options, "notify-poller-death"); + GF_FREE (transport_name); + transport_name = NULL; + count++; +@@ -1864,50 +2072,87 @@ out: + void * + rpcsvc_request_handler (void *arg) + { +- rpcsvc_program_t *program = arg; +- rpcsvc_request_t *req = NULL; ++ rpcsvc_request_queue_t *queue = NULL; ++ rpcsvc_program_t *program = NULL; ++ rpcsvc_request_t *req = NULL, *tmp_req = NULL; + rpcsvc_actor_t *actor = NULL; + gf_boolean_t done = _gf_false; + int ret = 0; ++ struct list_head tmp_list = { ++ 0, ++ }; ++ ++ queue = arg; ++ program = queue->program; ++ ++ INIT_LIST_HEAD(&tmp_list); + + if (!program) + return NULL; + + while (1) { +- pthread_mutex_lock (&program->queue_lock); ++ pthread_mutex_lock(&queue->queue_lock); + { +- if (!program->alive +- && list_empty (&program->request_queue)) { ++ if (!program->alive && list_empty(&queue->request_queue)) { + done = 1; + goto unlock; + } +- +- while (list_empty (&program->request_queue)) +- pthread_cond_wait (&program->queue_cond, +- &program->queue_lock); +- +- req = list_entry (program->request_queue.next, +- typeof (*req), request_list); +- +- list_del_init (&req->request_list); ++ while (list_empty(&queue->request_queue)) { ++ queue->waiting = _gf_true; ++ pthread_cond_wait(&queue->queue_cond, &queue->queue_lock); ++ } ++ queue->waiting = _gf_false; ++ if (!list_empty(&queue->request_queue)) { ++ INIT_LIST_HEAD(&tmp_list); ++ list_splice_init(&queue->request_queue, &tmp_list); ++ } ++ } ++unlock: ++ pthread_mutex_unlock(&queue->queue_lock); ++ list_for_each_entry_safe(req, tmp_req, &tmp_list, request_list) ++ { ++ list_del_init(&req->request_list); ++ if (req) { ++ if (req->prognum == RPCSVC_INFRA_PROGRAM) { ++ switch (req->procnum) { ++ case RPCSVC_PROC_EVENT_THREAD_DEATH: ++ gf_log(GF_RPCSVC, GF_LOG_INFO, ++ "event thread died, exiting request handler " ++ "thread for queue %d of program %s", ++ (int)(queue - &program->request_queue[0]), ++ program->progname); ++ done = 1; ++ ++ pthread_mutex_lock(&program->thr_lock); ++ { ++ rpcsvc_toggle_queue_status( ++ program, queue, ++ program->request_queue_status); ++ program->threadcount--; ++ } ++ pthread_mutex_unlock(&program->thr_lock); ++ rpcsvc_request_destroy(req); ++ break; ++ ++ default: ++ break; ++ } ++ } else { ++ THIS = req->svc->xl; ++ actor = rpcsvc_program_actor(req); ++ ret = actor->actor(req); ++ ++ if (ret != 0) { ++ rpcsvc_check_and_reply_error(ret, NULL, req); ++ } ++ ++ req = NULL; ++ } ++ } + } +- unlock: +- pthread_mutex_unlock (&program->queue_lock); +- + if (done) + break; +- +- THIS = req->svc->xl; +- +- actor = rpcsvc_program_actor (req); +- +- ret = actor->actor (req); +- +- if (ret != 0) { +- rpcsvc_check_and_reply_error (ret, NULL, req); +- } + } +- + return NULL; + } + +@@ -1917,6 +2162,7 @@ rpcsvc_program_register (rpcsvc_t *svc, rpcsvc_program_t *program) + int ret = -1; + rpcsvc_program_t *newprog = NULL; + char already_registered = 0; ++ int i = 0; + + if (!svc) { + goto out; +@@ -1951,9 +2197,16 @@ rpcsvc_program_register (rpcsvc_t *svc, rpcsvc_program_t *program) + memcpy (newprog, program, sizeof (*program)); + + INIT_LIST_HEAD (&newprog->program); +- INIT_LIST_HEAD (&newprog->request_queue); +- pthread_mutex_init (&newprog->queue_lock, NULL); +- pthread_cond_init (&newprog->queue_cond, NULL); ++ ++ for (i = 0; i < EVENT_MAX_THREADS; i++) { ++ INIT_LIST_HEAD(&newprog->request_queue[i].request_queue); ++ pthread_mutex_init(&newprog->request_queue[i].queue_lock, NULL); ++ pthread_cond_init(&newprog->request_queue[i].queue_cond, NULL); ++ newprog->request_queue[i].program = newprog; ++ } ++ ++ pthread_mutex_init(&newprog->thr_lock, NULL); ++ pthread_cond_init(&newprog->thr_cond, NULL); + + newprog->alive = _gf_true; + +@@ -1962,9 +2215,11 @@ rpcsvc_program_register (rpcsvc_t *svc, rpcsvc_program_t *program) + newprog->ownthread = _gf_false; + + if (newprog->ownthread) { +- gf_thread_create (&newprog->thread, NULL, +- rpcsvc_request_handler, +- newprog, "reqhnd"); ++ struct event_pool *ep = svc->ctx->event_pool; ++ newprog->eventthreadcount = ep->eventthreadcount; ++ ++ pthread_key_create(&newprog->req_queue_key, NULL); ++ newprog->thr_queue = 1; + } + + pthread_mutex_lock (&svc->rpclock); +diff --git a/rpc/rpc-lib/src/rpcsvc.h b/rpc/rpc-lib/src/rpcsvc.h +index 58c0055..f500bab 100644 +--- a/rpc/rpc-lib/src/rpcsvc.h ++++ b/rpc/rpc-lib/src/rpcsvc.h +@@ -33,6 +33,16 @@ + #define MAX_IOVEC 16 + #endif + ++/* TODO: we should store prognums at a centralized location to avoid conflict ++ or use a robust random number generator to avoid conflicts ++*/ ++ ++#define RPCSVC_INFRA_PROGRAM 7712846 /* random number */ ++ ++typedef enum { ++ RPCSVC_PROC_EVENT_THREAD_DEATH = 0, ++} rpcsvc_infra_procnum_t; ++ + #define RPCSVC_DEFAULT_OUTSTANDING_RPC_LIMIT 64 /* Default for protocol/server */ + #define RPCSVC_DEF_NFS_OUTSTANDING_RPC_LIMIT 16 /* Default for nfs/server */ + #define RPCSVC_MAX_OUTSTANDING_RPC_LIMIT 65536 +@@ -349,6 +359,16 @@ typedef struct rpcsvc_actor_desc { + drc_op_type_t op_type; + } rpcsvc_actor_t; + ++typedef struct rpcsvc_request_queue { ++ int gen; ++ struct list_head request_queue; ++ pthread_mutex_t queue_lock; ++ pthread_cond_t queue_cond; ++ pthread_t thread; ++ struct rpcsvc_program *program; ++ gf_boolean_t waiting; ++} rpcsvc_request_queue_t; ++ + /* Describes a program and its version along with the function pointers + * required to handle the procedures/actors of each program/version. + * Never changed ever by any thread so no need for a lock. +@@ -409,10 +429,14 @@ struct rpcsvc_program { + gf_boolean_t synctask; + /* list member to link to list of registered services with rpcsvc */ + struct list_head program; +- struct list_head request_queue; +- pthread_mutex_t queue_lock; +- pthread_cond_t queue_cond; +- pthread_t thread; ++ rpcsvc_request_queue_t request_queue[EVENT_MAX_THREADS]; ++ char request_queue_status[EVENT_MAX_THREADS / 8 + 1]; ++ pthread_mutex_t thr_lock; ++ pthread_cond_t thr_cond; ++ int thr_queue; ++ pthread_key_t req_queue_key; ++ int threadcount; ++ int eventthreadcount; + }; + + typedef struct rpcsvc_cbk_program { +diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c +index e28c5cd..df984f8 100644 +--- a/rpc/rpc-transport/socket/src/socket.c ++++ b/rpc/rpc-transport/socket/src/socket.c +@@ -2419,7 +2419,8 @@ static int socket_disconnect (rpc_transport_t *this, gf_boolean_t wait); + /* reads rpc_requests during pollin */ + static int + socket_event_handler (int fd, int idx, int gen, void *data, +- int poll_in, int poll_out, int poll_err) ++ int poll_in, int poll_out, int poll_err, ++ char event_thread_died) + { + rpc_transport_t *this = NULL; + socket_private_t *priv = NULL; +@@ -2429,6 +2430,13 @@ socket_event_handler (int fd, int idx, int gen, void *data, + + this = data; + ++ if (event_thread_died) { ++ /* to avoid duplicate notifications, ++ * notify only for listener sockets ++ */ ++ return 0; ++ } ++ + GF_VALIDATE_OR_GOTO ("socket", this, out); + GF_VALIDATE_OR_GOTO ("socket", this->private, out); + GF_VALIDATE_OR_GOTO ("socket", this->xl, out); +@@ -2720,7 +2728,8 @@ socket_spawn (rpc_transport_t *this) + + static int + socket_server_event_handler (int fd, int idx, int gen, void *data, +- int poll_in, int poll_out, int poll_err) ++ int poll_in, int poll_out, int poll_err, ++ char event_thread_died) + { + rpc_transport_t *this = NULL; + socket_private_t *priv = NULL; +@@ -2742,6 +2751,12 @@ socket_server_event_handler (int fd, int idx, int gen, void *data, + priv = this->private; + ctx = this->ctx; + ++ if (event_thread_died) { ++ rpc_transport_notify(this, RPC_TRANSPORT_EVENT_THREAD_DIED, ++ (void *)(unsigned long)gen); ++ return 0; ++ } ++ + /* NOTE: + * We have done away with the critical section in this function. since + * there's little that it helps with. There's no other code that +@@ -2840,6 +2855,7 @@ socket_server_event_handler (int fd, int idx, int gen, void *data, + new_trans->mydata = this->mydata; + new_trans->notify = this->notify; + new_trans->listener = this; ++ new_trans->notify_poller_death = this->poller_death_accept; + new_priv = new_trans->private; + + if (new_sockaddr.ss_family == AF_UNIX) { +@@ -2935,7 +2951,8 @@ socket_server_event_handler (int fd, int idx, int gen, void *data, + new_sock, + socket_event_handler, + new_trans, +- 1, 0); ++ 1, 0, ++ new_trans->notify_poller_death); + if (new_priv->idx == -1) { + ret = -1; + gf_log(this->name, GF_LOG_ERROR, +@@ -3388,7 +3405,8 @@ handler: + else { + priv->idx = event_register (ctx->event_pool, priv->sock, + socket_event_handler, +- this, 1, 1); ++ this, 1, 1, ++ this->notify_poller_death); + if (priv->idx == -1) { + gf_log ("", GF_LOG_WARNING, + "failed to register the event"); +@@ -3560,7 +3578,8 @@ socket_listen (rpc_transport_t *this) + + priv->idx = event_register (ctx->event_pool, priv->sock, + socket_server_event_handler, +- this, 1, 0); ++ this, 1, 0, ++ this->notify_poller_death); + + if (priv->idx == -1) { + gf_log (this->name, GF_LOG_WARNING, +diff --git a/xlators/protocol/server/src/server-helpers.c b/xlators/protocol/server/src/server-helpers.c +index 7cc3d15..30045ef 100644 +--- a/xlators/protocol/server/src/server-helpers.c ++++ b/xlators/protocol/server/src/server-helpers.c +@@ -557,6 +557,10 @@ get_frame_from_request (rpcsvc_request_t *req) + } + } + ++ /* Add a ref for this fop */ ++ if (client) ++ gf_client_ref (client); ++ + frame->root->uid = req->uid; + frame->root->gid = req->gid; + frame->root->pid = req->pid; +diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c +index ba3b831..d32f5dd 100644 +--- a/xlators/protocol/server/src/server.c ++++ b/xlators/protocol/server/src/server.c +@@ -1342,6 +1342,9 @@ init (xlator_t *this) + ret = -1; + goto out; + } ++ ++ ret = dict_set_int32(this->options, "notify-poller-death", 1); ++ + ret = rpcsvc_create_listeners (conf->rpc, this->options, + this->name); + if (ret < 1) { +-- +1.8.3.1 + diff --git a/SOURCES/0526-fuse-interrupt-handling-framework.patch b/SOURCES/0526-fuse-interrupt-handling-framework.patch new file mode 100644 index 0000000..1670ca3 --- /dev/null +++ b/SOURCES/0526-fuse-interrupt-handling-framework.patch @@ -0,0 +1,671 @@ +From 77716a11910ca2b88f37ff549776f7778cc17dae Mon Sep 17 00:00:00 2001 +From: Csaba Henk +Date: Thu, 9 Aug 2018 11:46:33 +0200 +Subject: [PATCH 526/529] fuse: interrupt handling framework + +- add sub-framework to send timed responses to kernel +- add interrupt handler queue +- implement INTERRUPT + +fuse_interrupt looks up handlers for interrupted messages +in the queue. If found, it invokes the handler function. +Else responds with EAGAIN with a delay. + +See spec at + +https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/filesystems/fuse.txt?h=v4.17#n148 + +and explanation in comments. + +Upstream: https://review.gluster.org/20686 +> Change-Id: I1a79d3679b31f36e14b4ac8f60b7f2c1ea2badfb +> updates: #465 +> Signed-off-by: Csaba Henk + +Change-Id: Idff76920aaa9f87b185dabb0b431a31fcd2a2c77 +BUG: 1595246 +Signed-off-by: Csaba Henk +Reviewed-on: https://code.engineering.redhat.com/gerrit/162549 +Tested-by: RHGS Build Bot +Reviewed-by: Amar Tumballi Suryanarayan +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/timespec.c | 16 ++ + libglusterfs/src/timespec.h | 1 + + xlators/mount/fuse/src/fuse-bridge.c | 464 +++++++++++++++++++++++++++++++- + xlators/mount/fuse/src/fuse-bridge.h | 39 +++ + xlators/mount/fuse/src/fuse-mem-types.h | 2 + + 5 files changed, 521 insertions(+), 1 deletion(-) + +diff --git a/libglusterfs/src/timespec.c b/libglusterfs/src/timespec.c +index 903303d..55f7575 100644 +--- a/libglusterfs/src/timespec.c ++++ b/libglusterfs/src/timespec.c +@@ -72,3 +72,19 @@ void timespec_sub (const struct timespec *begin, const struct timespec *end, + res->tv_nsec = end->tv_nsec - begin->tv_nsec; + } + } ++ ++int ++timespec_cmp(const struct timespec *lhs_ts, const struct timespec *rhs_ts) ++{ ++ if (lhs_ts->tv_sec < rhs_ts->tv_sec) { ++ return -1; ++ } else if (lhs_ts->tv_sec > rhs_ts->tv_sec) { ++ return 1; ++ } else if (lhs_ts->tv_nsec < rhs_ts->tv_nsec) { ++ return -1; ++ } else if (lhs_ts->tv_nsec > rhs_ts->tv_nsec) { ++ return 1; ++ } ++ ++ return 0; ++} +diff --git a/libglusterfs/src/timespec.h b/libglusterfs/src/timespec.h +index 9c393ee..aa37951 100644 +--- a/libglusterfs/src/timespec.h ++++ b/libglusterfs/src/timespec.h +@@ -23,5 +23,6 @@ void timespec_adjust_delta (struct timespec *ts, struct timespec delta); + void timespec_sub (const struct timespec *begin, + const struct timespec *end, + struct timespec *res); ++int timespec_cmp(const struct timespec *lhs_ts, const struct timespec *rhs_ts); + + #endif /* __INCLUDE_TIMESPEC_H__ */ +diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c +index f3188d6..0d4b9db 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.c ++++ b/xlators/mount/fuse/src/fuse-bridge.c +@@ -15,6 +15,7 @@ + #include "compat-errno.h" + #include "glusterfs-acl.h" + #include "syscall.h" ++#include "timespec.h" + + #ifdef __NetBSD__ + #undef open /* in perfuse.h, pulled from mount-gluster-compat.h */ +@@ -426,6 +427,361 @@ fuse_inode_invalidate_fn(xlator_t *this, inode_t *inode) + } + #endif + ++static fuse_timed_message_t * ++fuse_timed_message_new (void) ++{ ++ fuse_timed_message_t *dmsg = NULL; ++ ++ dmsg = GF_MALLOC (sizeof (*dmsg), gf_fuse_mt_timed_message_t); ++ if (!dmsg) { ++ return NULL; ++ } ++ ++ /* should be NULL if not set */ ++ dmsg->fuse_message_body = NULL; ++ INIT_LIST_HEAD (&dmsg->next); ++ ++ return dmsg; ++} ++ ++static void ++fuse_timed_message_free (fuse_timed_message_t *dmsg) ++{ ++ GF_FREE (dmsg->fuse_message_body); ++ GF_FREE (dmsg); ++} ++ ++static void ++send_fuse_timed (xlator_t *this, fuse_timed_message_t *dmsg) ++{ ++ fuse_private_t *priv = NULL; ++ ++ priv = this->private; ++ ++ if (!priv->timed_response_fuse_thread_started) { ++ return; ++ } ++ ++ pthread_mutex_lock (&priv->timed_mutex); ++ { ++ list_add_tail (&dmsg->next, &priv->timed_list); ++ pthread_cond_signal (&priv->timed_cond); ++ } ++ pthread_mutex_unlock (&priv->timed_mutex); ++} ++ ++fuse_interrupt_record_t * ++fuse_interrupt_record_new (fuse_in_header_t *finh, ++ fuse_interrupt_handler_t handler) ++{ ++ fuse_interrupt_record_t *fir = NULL; ++ ++ fir = GF_MALLOC (sizeof (*fir), gf_fuse_mt_interrupt_record_t); ++ if (!fir) { ++ return NULL; ++ } ++ ++ fir->hit = _gf_false; ++ fir->interrupt_state = INTERRUPT_NONE; ++ fir->data = NULL; ++ ++ fir->interrupt_handler = handler; ++ memcpy (&fir->fuse_in_header, finh, sizeof (*finh)); ++ pthread_cond_init (&fir->handler_cond, NULL); ++ pthread_mutex_init (&fir->handler_mutex, NULL); ++ INIT_LIST_HEAD (&fir->next); ++ ++ return fir; ++} ++ ++static void ++fuse_interrupt_record_free (fuse_interrupt_record_t *fir, void **datap) ++{ ++ /* ++ * If caller wishes, we give back the private data to let them deal with it ++ * however they want; otherwise we take care of freeing it. ++ */ ++ if (datap) { ++ *datap = fir->data; ++ } else { ++ GF_FREE (fir->data); ++ } ++ ++ GF_FREE (fir); ++} ++ ++void ++fuse_interrupt_record_insert (xlator_t *this, fuse_interrupt_record_t *fir) ++{ ++ fuse_private_t *priv = NULL; ++ ++ priv = this->private; ++ pthread_mutex_lock (&priv->interrupt_mutex); ++ { ++ list_add_tail (&fir->next, &priv->interrupt_list); ++ } ++ pthread_mutex_unlock (&priv->interrupt_mutex); ++} ++ ++static fuse_interrupt_record_t * ++fuse_interrupt_record_fetch (xlator_t *this, uint64_t unique, gf_boolean_t reap) ++{ ++ fuse_interrupt_record_t *fir = NULL; ++ gf_boolean_t found = _gf_false; ++ fuse_private_t *priv = NULL; ++ ++ priv = this->private; ++ pthread_mutex_lock (&priv->interrupt_mutex); ++ { ++ list_for_each_entry (fir, &priv->interrupt_list, next) ++ { ++ if (fir->fuse_in_header.unique == unique) { ++ /* ++ * If we are to reap, we do it regardless the ++ * hit flag; otherwise we take the record only ++ * hasn't yet flagged hit. ++ */ ++ if (reap || !fir->hit) { ++ found = _gf_true; ++ } ++ /* ++ * If we are not reaping (coming from handler ++ * context), we set the hit flag. ++ */ ++ if (!reap) { ++ fir->hit = _gf_true; ++ } ++ break; ++ } ++ } ++ if (found && reap) { ++ list_del (&fir->next); ++ } ++ } ++ pthread_mutex_unlock (&priv->interrupt_mutex); ++ ++ if (found) { ++ return fir; ++ } ++ return NULL; ++} ++ ++static fuse_interrupt_record_t * ++fuse_interrupt_record_get (xlator_t *this, uint64_t unique) ++{ ++ return fuse_interrupt_record_fetch (this, unique, _gf_false); ++} ++ ++static fuse_interrupt_record_t * ++fuse_interrupt_record_reap (xlator_t *this, uint64_t unique) ++{ ++ return fuse_interrupt_record_fetch (this, unique, _gf_true); ++} ++ ++static void ++fuse_interrupt (xlator_t *this, fuse_in_header_t *finh, void *msg, ++ struct iobuf *iobuf) ++{ ++ struct fuse_interrupt_in *fii = msg; ++ fuse_interrupt_record_t *fir = NULL; ++ ++ gf_log ("glusterfs-fuse", GF_LOG_TRACE, ++ "unique %" PRIu64 " INTERRUPT for %" PRIu64, finh->unique, ++ fii->unique); ++ ++ fir = fuse_interrupt_record_get (this, fii->unique); ++ if (fir) { ++ gf_log ("glusterfs-fuse", GF_LOG_DEBUG, ++ "unique %" PRIu64 " INTERRUPT for %" PRIu64 ++ ": handler triggered", ++ finh->unique, fii->unique); ++ ++ fir->interrupt_handler (this, fir); ++ } else { ++ fuse_timed_message_t *dmsg = NULL; ++ ++ /* ++ * No record found for this interrupt request. ++ * ++ * It's either because the handler for the interrupted message ++ * does not want to handle interrupt, or this interrupt ++ * message beat the interrupted which hasn't yet added a record ++ * to the interrupt queue. Either case we reply with error ++ * EAGAIN with some (0.01 sec) delay. That will have this ++ * interrupt request resent, unless the interrupted message ++ * has been already answered. ++ * ++ * So effectively we are looping in between kernel and ++ * userspace, which will be exited either when the interrupted ++ * message handler has added an interrupt record, or has ++ * replied to kernel. See ++ * ++ * https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/ ++ * linux.git/tree/Documentation/filesystems/fuse.txt?h=v4.18#n148 ++ */ ++ ++ gf_log ("glusterfs-fuse", GF_LOG_DEBUG, ++ "unique %" PRIu64 " INTERRUPT for %" PRIu64 ": no handler found", ++ finh->unique, fii->unique); ++ ++ dmsg = fuse_timed_message_new (); ++ if (!dmsg) { ++ gf_log ("glusterfs-fuse", GF_LOG_ERROR, ++ "unique %" PRIu64 " INTERRUPT for %" PRIu64 ++ ":" ++ " failed to allocate timed message", ++ finh->unique, fii->unique); ++ ++ return; ++ } ++ ++ dmsg->fuse_out_header.unique = finh->unique; ++ dmsg->fuse_out_header.len = sizeof (dmsg->fuse_out_header); ++ dmsg->fuse_out_header.error = -EAGAIN; ++ timespec_now (&dmsg->scheduled_ts); ++ timespec_adjust_delta (&dmsg->scheduled_ts, ++ (struct timespec){0, 10000000}); ++ ++ send_fuse_timed (this, dmsg); ++ } ++} ++ ++/* ++ * Function to be called in fop cbk context (if the fop engages ++ * with interrupt handling). ++ */ ++gf_boolean_t ++fuse_interrupt_finish_fop (call_frame_t *frame, xlator_t *this, ++ gf_boolean_t sync, void **datap) ++{ ++ fuse_interrupt_record_t *fir = NULL; ++ fuse_state_t *state = frame->root->state; ++ fuse_in_header_t *finh = state->finh; ++ gf_boolean_t hit = _gf_false; ++ gf_boolean_t handled = _gf_false; ++ fuse_interrupt_state_t intstat_orig = INTERRUPT_NONE; ++ ++ fir = fuse_interrupt_record_reap (this, finh->unique); ++ if (!fir) { ++ /* ++ * No interrupt record was inserted (however, caller would usually know ++ * about that and there is no point then in calling this function). ++ */ ++ return _gf_false; ++ } ++ ++ /* ++ * The interrupt handler (if finds the record) modifies fir->hit; however, ++ * that could have occurred only before fuse_interrupt_record_reap (), so ++ * we are safe here with a lock-free access. ++ */ ++ hit = fir->hit; ++ if (hit) { ++ pthread_mutex_lock (&fir->handler_mutex); ++ { ++ intstat_orig = fir->interrupt_state; ++ if (fir->interrupt_state == INTERRUPT_NONE) { ++ fir->interrupt_state = INTERRUPT_SQUELCHED; ++ if (sync) { ++ while (fir->interrupt_state == INTERRUPT_NONE) { ++ pthread_cond_wait (&fir->handler_cond, ++ &fir->handler_mutex); ++ } ++ } ++ } ++ } ++ pthread_mutex_unlock (&fir->handler_mutex); ++ } ++ ++ gf_log ("glusterfs-fuse", GF_LOG_DEBUG, "intstat_orig=%d", intstat_orig); ++ ++ /* ++ * From this on fir can only be referred under the conditions that imply ++ * we are to free it (otherwise interrupt handler might have already freed ++ * it). ++ */ ++ ++ if (/* there was no interrupt */ ++ !hit || ++ /* lost the race against interrupt handler */ ++ intstat_orig != INTERRUPT_NONE || ++ /* we took cleaning up on us */ ++ sync) { ++ /* cleaning up */ ++ fuse_interrupt_record_free (fir, datap); ++ } else if (datap) { ++ *datap = NULL; ++ } ++ ++ handled = (intstat_orig == INTERRUPT_HANDLED); ++ if (handled) { ++ /* ++ * Fuse request was answered already from interrupt context, we can do ++ * away with the stack. ++ */ ++ free_fuse_state (state); ++ STACK_DESTROY (frame->root); ++ } ++ ++ /* ++ * Let caller know if they have to answer the fuse request. ++ */ ++ return handled; ++} ++ ++/* ++ * Function to be called in interrupt handler context. ++ */ ++void ++fuse_interrupt_finish_interrupt (xlator_t *this, fuse_interrupt_record_t *fir, ++ fuse_interrupt_state_t intstat, ++ gf_boolean_t sync, void **datap) ++{ ++ fuse_in_header_t finh = { ++ 0, ++ }; ++ fuse_interrupt_state_t intstat_orig = INTERRUPT_NONE; ++ ++ pthread_mutex_lock (&fir->handler_mutex); ++ { ++ intstat_orig = fir->interrupt_state; ++ if (fir->interrupt_state == INTERRUPT_NONE) { ++ fir->interrupt_state = intstat; ++ if (sync) { ++ pthread_cond_signal (&fir->handler_cond); ++ } ++ } ++ finh = fir->fuse_in_header; ++ } ++ pthread_mutex_unlock (&fir->handler_mutex); ++ ++ gf_log ("glusterfs-fuse", GF_LOG_DEBUG, "intstat_orig=%d", intstat_orig); ++ ++ /* ++ * From this on fir can only be referred under the conditions that imply ++ * we are to free it (otherwise fop handler might have already freed it). ++ */ ++ ++ if (/* we won the race, response is up to us */ ++ intstat_orig == INTERRUPT_NONE && ++ /* interrupt handling was successful, let the kernel know */ ++ intstat == INTERRUPT_HANDLED) { ++ send_fuse_err (this, &finh, EINTR); ++ } ++ ++ if (/* lost the race ... */ ++ intstat_orig != INTERRUPT_NONE && ++ /* ++ * ... and there is no contract with fop handler that it does the ++ * cleanup ... ++ */ ++ !sync) { ++ /* ... so we do! */ ++ fuse_interrupt_record_free (fir, datap); ++ } else if (datap) { ++ *datap = NULL; ++ } ++} + + int + send_fuse_err (xlator_t *this, fuse_in_header_t *finh, int error) +@@ -4100,6 +4456,89 @@ notify_kernel_loop (void *data) + } + #endif + ++static void * ++timed_response_loop (void *data) ++{ ++ ssize_t rv = 0; ++ size_t len = 0; ++ xlator_t *this = NULL; ++ fuse_private_t *priv = NULL; ++ fuse_timed_message_t *dmsg = NULL; ++ fuse_timed_message_t *tmp = NULL; ++ struct timespec now = {0,}; ++ struct timespec delta = {0,}; ++ struct iovec iovs[2] = {{0,},}; ++ fuse_in_header_t finh = {0,}; ++ ++ this = data; ++ priv = this->private; ++ ++ for (;;) { ++ pthread_mutex_lock (&priv->timed_mutex); ++ { ++ while (list_empty (&priv->timed_list)) { ++ pthread_cond_wait (&priv->timed_cond, &priv->timed_mutex); ++ } ++ ++ dmsg = list_entry (priv->timed_list.next, fuse_timed_message_t, ++ next); ++ list_for_each_entry (tmp, &priv->timed_list, next) ++ { ++ if (timespec_cmp (&tmp->scheduled_ts, &dmsg->scheduled_ts) < 0) { ++ dmsg = tmp; ++ } ++ } ++ ++ list_del_init (&dmsg->next); ++ } ++ pthread_mutex_unlock (&priv->timed_mutex); ++ ++ timespec_now (&now); ++ if (timespec_cmp (&now, &dmsg->scheduled_ts) < 0) { ++ timespec_sub (&now, &dmsg->scheduled_ts, &delta); ++ nanosleep (&delta, NULL); ++ } ++ ++ gf_log ("glusterfs-fuse", GF_LOG_TRACE, ++ "sending timed message of unique %"PRIu64, ++ dmsg->fuse_out_header.unique); ++ ++ len = dmsg->fuse_out_header.len; ++ iovs[0] = (struct iovec){&dmsg->fuse_out_header, ++ sizeof (struct fuse_out_header)}; ++ iovs[1] = (struct iovec){dmsg->fuse_message_body, ++ len - sizeof (struct fuse_out_header)}; ++ /* ++ * Nasty hack to allow us to use the send_fuse_iov API, ++ * which we resort to, as the API used in original upstream ++ * code used is not available in this codebase. ++ */ ++ finh.unique = dmsg->fuse_out_header.unique; ++ rv = send_fuse_iov (this, &finh, iovs, 2); ++ ++ fuse_timed_message_free (dmsg); ++ ++ if (rv == EBADF) { ++ break; ++ } ++ } ++ ++ gf_log ("glusterfs-fuse", GF_LOG_ERROR, "timed response loop terminated"); ++ ++ pthread_mutex_lock (&priv->timed_mutex); ++ { ++ priv->timed_response_fuse_thread_started = _gf_false; ++ list_for_each_entry_safe (dmsg, tmp, &priv->timed_list, next) ++ { ++ list_del_init (&dmsg->next); ++ fuse_timed_message_free (dmsg); ++ } ++ } ++ pthread_mutex_unlock (&priv->timed_mutex); ++ ++ return NULL; ++} ++ + static void + fuse_init (xlator_t *this, fuse_in_header_t *finh, void *msg, + struct iobuf *iobuf) +@@ -4112,6 +4551,7 @@ fuse_init (xlator_t *this, fuse_in_header_t *finh, void *msg, + #if FUSE_KERNEL_MINOR_VERSION >= 9 + pthread_t messenger; + #endif ++ pthread_t delayer; + + priv = this->private; + +@@ -4160,6 +4600,19 @@ fuse_init (xlator_t *this, fuse_in_header_t *finh, void *msg, + fino.flags |= FUSE_BIG_WRITES; + } + ++ /* Start the thread processing timed responses */ ++ ret = gf_thread_create (&delayer, NULL, timed_response_loop, this, ++ "fusedlyd"); ++ if (ret != 0) { ++ gf_log ("glusterfs-fuse", GF_LOG_ERROR, ++ "failed to start timed response thread (%s)", ++ strerror (errno)); ++ ++ sys_close (priv->fd); ++ goto out; ++ } ++ priv->timed_response_fuse_thread_started = _gf_true; ++ + /* Used for 'reverse invalidation of inode' */ + if (fini->minor >= 12) { + ret = gf_thread_create (&messenger, NULL, notify_kernel_loop, +@@ -5229,6 +5682,8 @@ fuse_priv_dump (xlator_t *this) + (int)private->init_recvd); + gf_proc_dump_write("strict_volfile_check", "%d", + (int)private->strict_volfile_check); ++ gf_proc_dump_write("timed_response_thread_started", "%d", ++ (int)private->timed_response_fuse_thread_started); + gf_proc_dump_write("reverse_thread_started", "%d", + (int)private->reverse_fuse_thread_started); + gf_proc_dump_write("use_readdirp", "%d", private->use_readdirp); +@@ -5486,7 +5941,7 @@ static fuse_handler_t *fuse_std_ops[FUSE_OP_HIGH] = { + [FUSE_SETLKW] = fuse_setlk, + [FUSE_ACCESS] = fuse_access, + [FUSE_CREATE] = fuse_create, +- /* [FUSE_INTERRUPT] */ ++ [FUSE_INTERRUPT] = fuse_interrupt, + /* [FUSE_BMAP] */ + [FUSE_DESTROY] = fuse_destroy, + /* [FUSE_IOCTL] */ +@@ -5611,6 +6066,13 @@ init (xlator_t *this_xl) + pthread_cond_init (&priv->invalidate_cond, NULL); + pthread_mutex_init (&priv->invalidate_mutex, NULL); + ++ INIT_LIST_HEAD (&priv->timed_list); ++ pthread_cond_init (&priv->timed_cond, NULL); ++ pthread_mutex_init (&priv->timed_mutex, NULL); ++ ++ INIT_LIST_HEAD (&priv->interrupt_list); ++ pthread_mutex_init (&priv->interrupt_mutex, NULL); ++ + /* get options from option dictionary */ + ret = dict_get_str (options, ZR_MOUNTPOINT_OPT, &value_string); + if (ret == -1 || value_string == NULL) { +diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h +index 4e32a7f..ba3e000 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.h ++++ b/xlators/mount/fuse/src/fuse-bridge.h +@@ -147,6 +147,16 @@ struct fuse_private { + + /* LRU Limit, if not set, default is 128k for now */ + uint32_t lru_limit; ++ ++ /* Delayed fuse response */ ++ struct list_head timed_list; ++ pthread_cond_t timed_cond; ++ pthread_mutex_t timed_mutex; ++ gf_boolean_t timed_response_fuse_thread_started; ++ ++ /* Interrupt subscription */ ++ struct list_head interrupt_list; ++ pthread_mutex_t interrupt_mutex; + }; + typedef struct fuse_private fuse_private_t; + +@@ -162,6 +172,35 @@ struct fuse_invalidate_node { + }; + typedef struct fuse_invalidate_node fuse_invalidate_node_t; + ++struct fuse_timed_message { ++ struct fuse_out_header fuse_out_header; ++ void *fuse_message_body; ++ struct timespec scheduled_ts; ++ struct list_head next; ++}; ++typedef struct fuse_timed_message fuse_timed_message_t; ++ ++enum fuse_interrupt_state { ++ INTERRUPT_NONE, ++ INTERRUPT_SQUELCHED, ++ INTERRUPT_HANDLED, ++}; ++typedef enum fuse_interrupt_state fuse_interrupt_state_t; ++struct fuse_interrupt_record; ++typedef struct fuse_interrupt_record fuse_interrupt_record_t; ++typedef void (*fuse_interrupt_handler_t) (xlator_t *this, ++ fuse_interrupt_record_t *); ++struct fuse_interrupt_record { ++ struct fuse_in_header fuse_in_header; ++ void *data; ++ gf_boolean_t hit; ++ fuse_interrupt_state_t interrupt_state; ++ fuse_interrupt_handler_t interrupt_handler; ++ pthread_cond_t handler_cond; ++ pthread_mutex_t handler_mutex; ++ struct list_head next; ++}; ++ + struct fuse_graph_switch_args { + xlator_t *this; + xlator_t *old_subvol; +diff --git a/xlators/mount/fuse/src/fuse-mem-types.h b/xlators/mount/fuse/src/fuse-mem-types.h +index 721b9a3..4ded879 100644 +--- a/xlators/mount/fuse/src/fuse-mem-types.h ++++ b/xlators/mount/fuse/src/fuse-mem-types.h +@@ -24,6 +24,8 @@ enum gf_fuse_mem_types_ { + gf_fuse_mt_gids_t, + gf_fuse_mt_invalidate_node_t, + gf_fuse_mt_pthread_t, ++ gf_fuse_mt_timed_message_t, ++ gf_fuse_mt_interrupt_record_t, + gf_fuse_mt_end + }; + #endif +-- +1.8.3.1 + diff --git a/SOURCES/0527-fuse-diagnostic-FLUSH-interrupt.patch b/SOURCES/0527-fuse-diagnostic-FLUSH-interrupt.patch new file mode 100644 index 0000000..c15c9ce --- /dev/null +++ b/SOURCES/0527-fuse-diagnostic-FLUSH-interrupt.patch @@ -0,0 +1,568 @@ +From fc4bebd605b6a579a4d19c6640aca38057397c77 Mon Sep 17 00:00:00 2001 +From: Csaba Henk +Date: Tue, 21 Aug 2018 12:44:54 +0200 +Subject: [PATCH 527/529] fuse: diagnostic FLUSH interrupt + +We add dummy interrupt handling for the FLUSH +fuse message. It can be enabled by the +"--fuse-flush-handle-interrupt" hidden command line +option, or "-ofuse-flush-handle-interrupt=yes" +mount option. + +It serves no other than diagnostic & demonstational +purposes -- to exercise the interrupt handling framework +a bit and to give an usage example. + +Documentation is also provided that showcases interrupt +handling via FLUSH. + +Upstream: https://review.gluster.org/20876 +> Change-Id: I522f1e798501d06b74ac3592a5f73c1ab0590c60 +> updates: #465 +> Signed-off-by: Csaba Henk + +Change-Id: I510aff8895a3fe5858ab313c47514de7087d08c1 +BUG: 1595246 +Signed-off-by: Csaba Henk +Reviewed-on: https://code.engineering.redhat.com/gerrit/162550 +Tested-by: RHGS Build Bot +Reviewed-by: Amar Tumballi Suryanarayan +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + doc/developer-guide/Developers-Index.md | 5 ++ + doc/developer-guide/fuse-interrupt.md | 130 ++++++++++++++++++++++++++++ + glusterfsd/src/glusterfsd.c | 53 +++++++++++- + glusterfsd/src/glusterfsd.h | 1 + + libglusterfs/src/glusterfs.h | 2 + + tests/features/interrupt.t | 67 ++++++++++++++ + tests/features/open_and_sleep.c | 27 ++++++ + xlators/mount/fuse/src/fuse-bridge.c | 59 +++++++++++++ + xlators/mount/fuse/src/fuse-bridge.h | 4 +- + xlators/mount/fuse/utils/mount.glusterfs.in | 7 ++ + 10 files changed, 353 insertions(+), 2 deletions(-) + create mode 100644 doc/developer-guide/fuse-interrupt.md + create mode 100644 tests/features/interrupt.t + create mode 100644 tests/features/open_and_sleep.c + +diff --git a/doc/developer-guide/Developers-Index.md b/doc/developer-guide/Developers-Index.md +index 4c6346e..6c00a4a 100644 +--- a/doc/developer-guide/Developers-Index.md ++++ b/doc/developer-guide/Developers-Index.md +@@ -59,6 +59,11 @@ Translators + - [Storage/posix Translator](./posix.md) + - [Compression translator](./network_compression.md) + ++Fuse ++---- ++ ++- [Interrupt handling](./fuse-interrupt.md) ++ + Testing/Debugging + ----------------- + +diff --git a/doc/developer-guide/fuse-interrupt.md b/doc/developer-guide/fuse-interrupt.md +new file mode 100644 +index 0000000..f92b553 +--- /dev/null ++++ b/doc/developer-guide/fuse-interrupt.md +@@ -0,0 +1,130 @@ ++# Fuse interrupt handling ++ ++## Conventions followed ++ ++- *FUSE* refers to the "wire protocol" between kernel and userspace and ++ related specifications. ++- *fuse* refers to the kernel subsystem and also to the GlusterFs translator. ++ ++## FUSE interrupt handling spec ++ ++The [Linux kernel FUSE documentation](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/filesystems/fuse.txt?h=v4.18#n148) ++desrcibes how interrupt handling happens in fuse. ++ ++## Interrupt handling in the fuse translator ++ ++### Declarations ++ ++This document describes the internal API in the fuse translator with which ++interrupt can be handled. ++ ++The API being internal (to be used only in fuse-bridge.c; the functions are ++not exported to a header file). ++ ++``` ++enum fuse_interrupt_state { ++ INTERRUPT_NONE, ++ INTERRUPT_SQUELCHED, ++ INTERRUPT_HANDLED, ++}; ++typedef enum fuse_interrupt_state fuse_interrupt_state_t; ++struct fuse_interrupt_record; ++typedef struct fuse_interrupt_record fuse_interrupt_record_t; ++typedef void (*fuse_interrupt_handler_t)(xlator_t *this, ++ fuse_interrupt_record_t *); ++struct fuse_interrupt_record { ++ fuse_in_header_t fuse_in_header; ++ void *data; ++ /* ++ ... ++ */ ++}; ++ ++fuse_interrupt_record_t * ++fuse_interrupt_record_new(fuse_in_header_t *finh, ++ fuse_interrupt_handler_t handler); ++ ++void ++fuse_interrupt_record_insert(xlator_t *this, fuse_interrupt_record_t *fir); ++ ++gf_boolean_t ++fuse_interrupt_finish_fop(call_frame_t *frame, xlator_t *this, ++ gf_boolean_t sync, void **datap); ++ ++void ++fuse_interrupt_finish_interrupt(xlator_t *this, fuse_interrupt_record_t *fir, ++ fuse_interrupt_state_t intstat, ++ gf_boolean_t sync, void **datap); ++``` ++ ++The code demonstrates the usage of the API through `fuse_flush()`. (It's a ++dummy implementation only for demonstration purposes.) Flush is chosen ++because a `FLUSH` interrupt is easy to trigger (see ++*tests/features/interrupt.t*). Interrupt handling for flush is switched on ++by `--fuse-flush-handle-interrupt` (a hidden glusterfs command line flag). ++The flush interrupt handling code is guarded by the ++`flush_handle_interrupt` Boolean member of `fuse_private_t`. ++ ++### Usage ++ ++A given FUSE fop can be enabled to handle interrupts via the following ++steps: ++ ++- Define a handler function (of type `fuse_interrupt_handler_t`). ++ It should implement the interrupt handling logic and in the end ++ call (directly or as async callback) `fuse_interrupt_finish_interrupt()`. ++ The `intstat` argument to `fuse_interrupt_finish_interrupt` should be ++ either `INTERRUPT_SQUELCHED` or `INTERRUPT_HANDLED`. ++ - `INTERRUPT_SQUELCHED` means that we choose not to handle the interrupt ++ and the fop is going on uninterrupted. ++ - `INTERRUPT_HANDLED` means that the interrupt was actually handled. In ++ this case the fop will be answered from interrupt context with errno ++ `EINTR` (that is, the fop should not send a response to the kernel). ++ ++ We return to the `sync` and `datap` arguments later. ++- In the `fuse_` function create an interrupt record using ++ `fuse_interrupt_record_new()`, passing the incoming `fuse_in_header` and ++ the above handler function to it. ++ - Arbitrary further data can be referred to via the `data` member of the ++ interrupt record that is to be passed on from fop context to ++ interrupt context. ++- When it's set up, pass the interrupt record to ++ `fuse_interrupt_record_insert()`. ++- In `fuse__cbk` call `fuse_interrupt_finish_fop()`. ++ - `fuse_interrupt_finish_fop()` returns a Boolean according to whether the ++ interrupt was handled. If it was, then the fuse request is already ++ answered and the stack gets destroyed in `fuse_interrupt_finish_fop` so ++ `fuse__cbk` can just return (zero). Otherwise follow the standard ++ cbk logic (answer the fuse request and destroy the stack -- these are ++ typically accomplished by `fuse_err_cbk()`). ++- The last two argument of `fuse_interrupt_finish_fop()` and ++ `fuse_interrupt_finish_interrupt()` are `gf_boolean_t sync` and ++ `void **datap`. ++ - `sync` represents the strategy for freeing the interrupt record. The ++ interrupt handler and the fop handler are in race to get at the interrupt ++ record first (interrupt handler for purposes of doing the interrupt ++ handling, fop handler for purposes of deactivating the interrupt record ++ upon completion of the fop handling). ++ - If `sync` is true, then the fop handler will wait for the interrupt ++ handler to finish and it takes care of freeing. ++ - If `sync` is false, the loser of the above race will perform freeing. ++ ++ Freeing is done within the respective interrupt finish routines, except ++ for the `data` field of the interrupt record; with respect to that, see ++ the discussion of the `datap` parameter below. The strategy has to be ++ consensual, that is, `fuse_interrupt_finish_fop()` and ++ `fuse_interrupt_finish_interrupt()` must pass the same value for `sync`. ++ If dismantling the resources associated with the interrupt record is ++ simple, `sync = _gf_false` is the suggested choice; `sync = _gf_true` can ++ be useful in the opposite case, when dismantling those resources would ++ be inconvenient to implement in two places or to enact in non-fop context. ++ - If `datap` is `NULL`, the `data` member of the interrupt record will be ++ freed within the interrupt finish routine. If it points to a valid ++ `void *` pointer, and if caller is doing the cleanup (see `sync` above), ++ then that pointer will be directed to the `data` member of the interrupt ++ record and it's up to the caller what it's doing with it. ++ - If `sync` is true, interrupt handler can use `datap = NULL`, and ++ fop handler will have `datap` set. ++ - If `sync` is false, and handlers pass a pointer to a pointer for ++ `datap`, they should check if the pointed pointer is NULL before ++ attempting to deal with the data. +diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c +index 2e2cd77..9c536cd 100644 +--- a/glusterfsd/src/glusterfsd.c ++++ b/glusterfsd/src/glusterfsd.c +@@ -243,6 +243,9 @@ static struct argp_option gf_options[] = { + OPTION_ARG_OPTIONAL, "disable/enable fuse event-history"}, + {"reader-thread-count", ARGP_READER_THREAD_COUNT_KEY, "INTEGER", + OPTION_ARG_OPTIONAL, "set fuse reader thread count"}, ++ {"fuse-flush-handle-interrupt", ARGP_FUSE_FLUSH_HANDLE_INTERRUPT_KEY, ++ "BOOL", OPTION_ARG_OPTIONAL | OPTION_HIDDEN, ++ "handle interrupt in fuse FLUSH handler"}, + {0, 0, 0, 0, "Miscellaneous Options:"}, + {0, } + }; +@@ -581,6 +584,38 @@ set_fuse_mount_options (glusterfs_ctx_t *ctx, dict_t *options) + goto err; + } + } ++ switch (cmd_args->fuse_flush_handle_interrupt) { ++ case GF_OPTION_ENABLE: ++ ret = dict_set_static_ptr (options, ++ "flush-handle-interrupt", ++ "on"); ++ if (ret < 0) { ++ gf_msg ("glusterfsd", GF_LOG_ERROR, 0, ++ glusterfsd_msg_4, ++ "failed to set dict value for key " ++ "flush-handle-interrupt"); ++ goto err; ++ } ++ break; ++ case GF_OPTION_DISABLE: ++ ret = dict_set_static_ptr (options, ++ "flush-handle-interrupt", ++ "off"); ++ if (ret < 0) { ++ gf_msg ("glusterfsd", GF_LOG_ERROR, 0, ++ glusterfsd_msg_4, ++ "failed to set dict value for key " ++ "flush-handle-interrupt"); ++ goto err; ++ } ++ break; ++ case GF_OPTION_DEFERRED: /* default */ ++ default: ++ gf_msg_debug ("glusterfsd", 0, ++ "fuse-flush-handle-interrupt mode %d", ++ cmd_args->fuse_flush_handle_interrupt); ++ break; ++ } + + ret = 0; + err: +@@ -1352,7 +1387,22 @@ no_oom_api: + } + + break; +- } ++ case ARGP_FUSE_FLUSH_HANDLE_INTERRUPT_KEY: ++ if (!arg) ++ arg = "yes"; ++ ++ if (gf_string2boolean(arg, &b) == 0) { ++ cmd_args->fuse_flush_handle_interrupt = b; ++ ++ break; ++ } ++ ++ argp_failure(state, -1, 0, ++ "unknown fuse flush handle interrupt " ++ "setting \"%s\"", ++ arg); ++ break; ++ } + + return 0; + } +@@ -1648,6 +1698,7 @@ glusterfs_ctx_defaults_init (glusterfs_ctx_t *ctx) + cmd_args->fuse_attribute_timeout = -1; + cmd_args->fuse_entry_timeout = -1; + cmd_args->fopen_keep_cache = GF_OPTION_DEFERRED; ++ cmd_args->fuse_flush_handle_interrupt = GF_OPTION_DEFERRED; + + if (ctx->mem_acct_enable) + cmd_args->mem_acct = 1; +diff --git a/glusterfsd/src/glusterfsd.h b/glusterfsd/src/glusterfsd.h +index 1550a30..28b514a 100644 +--- a/glusterfsd/src/glusterfsd.h ++++ b/glusterfsd/src/glusterfsd.h +@@ -101,6 +101,7 @@ enum argp_option_keys { + ARGP_FUSE_EVENT_HISTORY_KEY = 179, + ARGP_READER_THREAD_COUNT_KEY = 180, + ARGP_FUSE_LRU_LIMIT_KEY = 190, ++ ARGP_FUSE_FLUSH_HANDLE_INTERRUPT_KEY = 191, + }; + + struct _gfd_vol_top_priv { +diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h +index 2690306..9fa066e 100644 +--- a/libglusterfs/src/glusterfs.h ++++ b/libglusterfs/src/glusterfs.h +@@ -448,6 +448,8 @@ struct _cmd_args { + + char *event_history; + uint32_t reader_thread_count; ++ ++ int fuse_flush_handle_interrupt; + }; + typedef struct _cmd_args cmd_args_t; + +diff --git a/tests/features/interrupt.t b/tests/features/interrupt.t +new file mode 100644 +index 0000000..476d875 +--- /dev/null ++++ b/tests/features/interrupt.t +@@ -0,0 +1,67 @@ ++#!/bin/bash ++ ++##Copy this file to tests/bugs before running run.sh (cp extras/test/bug-920583.t tests/bugs/) ++ ++. $(dirname $0)/../include.rc ++. $(dirname $0)/../volume.rc ++ ++TESTS_EXPECTED_IN_LOOP=4 ++ ++cleanup; ++logdir=`gluster --print-logdir` ++ ++TEST build_tester $(dirname $0)/open_and_sleep.c ++ ++## Start and create a volume ++TEST glusterd; ++TEST pidof glusterd; ++ ++TEST $CLI volume create $V0 replica 2 stripe 2 $H0:$B0/${V0}{1,2,3,4,5,6,7,8}; ++ ++## Verify volume is is created ++EXPECT "$V0" volinfo_field $V0 'Volume Name'; ++EXPECT 'Created' volinfo_field $V0 'Status'; ++ ++## Start volume and verify ++TEST $CLI volume start $V0; ++EXPECT 'Started' volinfo_field $V0 'Status'; ++ ++function log-file-name() ++{ ++ logfilename=$M0".log" ++ echo ${logfilename:1} | tr / - ++} ++ ++log_file=$logdir"/"`log-file-name` ++ ++function test_interrupt { ++ local handlebool="$1" ++ local logpattern="$2" ++ ++ TEST $GFS --volfile-id=$V0 --volfile-server=$H0 --fuse-flush-handle-interrupt=$handlebool --log-level=DEBUG $M0 ++ ++ # If the test helper fails (which is considered a setup error, not failure of the test ++ # case itself), kill will be invoked without argument, and that will be the actual ++ # error which is caught. ++ TEST "./$(dirname $0)/open_and_sleep $M0/testfile | { sleep 0.1; xargs -n1 kill -INT; }" ++ ++ TEST "grep -E '$logpattern' $log_file" ++ # Basic sanity check, making sure filesystem has not crashed. ++ TEST test -f $M0/testfile ++} ++ ++# Theoretically FLUSH might finish before INTERRUPT is handled, ++# in which case we'd get the "no handler found" message (but it's unlikely). ++test_interrupt yes 'FLUSH.*interrupt handler triggered|INTERRUPT.*no handler found' ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++test_interrupt no 'INTERRUPT.*no handler found' ++ ++## Finish up ++TEST $CLI volume stop $V0; ++EXPECT 'Stopped' volinfo_field $V0 'Status'; ++ ++TEST $CLI volume delete $V0; ++TEST ! $CLI volume info $V0; ++ ++cleanup_tester $(dirname $0)/open_and_sleep; ++cleanup; +diff --git a/tests/features/open_and_sleep.c b/tests/features/open_and_sleep.c +new file mode 100644 +index 0000000..da089e9 +--- /dev/null ++++ b/tests/features/open_and_sleep.c +@@ -0,0 +1,27 @@ ++#include ++#include ++#include ++ ++int ++main (int argc, char **argv) ++{ ++ pid_t pid; ++ int fd; ++ ++ if (argc >= 2) { ++ fd = open (argv[1], O_RDWR | O_CREAT, 0644); ++ if (fd == -1) { ++ fprintf (stderr, "cannot open/create %s\n", argv[1]); ++ return 1; ++ } ++ } ++ ++ pid = getpid (); ++ printf ("%d\n", pid); ++ fflush (stdout); ++ ++ for (;;) ++ sleep (1); ++ ++ return 0; ++} +diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c +index 0d4b9db..44c39e4 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.c ++++ b/xlators/mount/fuse/src/fuse-bridge.c +@@ -1779,6 +1779,21 @@ fuse_err_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + } + + static int ++fuse_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, dict_t *xdata) ++{ ++ fuse_private_t *priv = this->private; ++ ++ if (priv->flush_handle_interrupt) { ++ if (fuse_interrupt_finish_fop (frame, this, _gf_false, NULL)) { ++ return 0; ++ } ++ } ++ ++ return fuse_err_cbk (frame, cookie, this, op_ret, op_errno, xdata); ++} ++ ++static int + fuse_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +@@ -2961,6 +2976,19 @@ fuse_flush_resume (fuse_state_t *state) + { + FUSE_FOP (state, fuse_err_cbk, GF_FOP_FLUSH, + flush, state->fd, state->xdata); ++ FUSE_FOP (state, fuse_flush_cbk, GF_FOP_FLUSH, ++ flush, state->fd, state->xdata); ++} ++ ++static void ++fuse_flush_interrupt_handler (xlator_t *this, fuse_interrupt_record_t *fir) ++{ ++ gf_log ("glusterfs-fuse", GF_LOG_DEBUG, ++ "FLUSH unique %" PRIu64 ": interrupt handler triggered", ++ fir->fuse_in_header.unique); ++ ++ fuse_interrupt_finish_interrupt (this, fir, INTERRUPT_HANDLED, ++ _gf_false, NULL); + } + + static void +@@ -2968,6 +2996,7 @@ fuse_flush (xlator_t *this, fuse_in_header_t *finh, void *msg, + struct iobuf *iobuf) + { + struct fuse_flush_in *ffi = msg; ++ fuse_private_t *priv = NULL; + + fuse_state_t *state = NULL; + fd_t *fd = NULL; +@@ -2976,6 +3005,27 @@ fuse_flush (xlator_t *this, fuse_in_header_t *finh, void *msg, + fd = FH_TO_FD (ffi->fh); + state->fd = fd; + ++ priv = this->private; ++ if (priv->flush_handle_interrupt) { ++ fuse_interrupt_record_t *fir = NULL; ++ ++ fir = fuse_interrupt_record_new (finh, ++ fuse_flush_interrupt_handler); ++ if (!fir) { ++ send_fuse_err (this, finh, ENOMEM); ++ ++ gf_log ("glusterfs-fuse", GF_LOG_ERROR, ++ "FLUSH unique %" PRIu64 ++ ":" ++ " interrupt record allocation failed", ++ finh->unique); ++ free_fuse_state (state); ++ ++ return; ++ } ++ fuse_interrupt_record_insert (this, fir); ++ } ++ + fuse_resolve_fd_init (state, &state->resolve, fd); + + state->lk_owner = ffi->lock_owner; +@@ -6226,6 +6276,9 @@ init (xlator_t *this_xl) + GF_OPTION_INIT("event-history", priv->event_history, bool, + cleanup_exit); + ++ GF_OPTION_INIT ("flush-handle-interrupt", priv->flush_handle_interrupt, bool, ++ cleanup_exit); ++ + /* user has set only background-qlen, not congestion-threshold, + use the fuse kernel driver formula to set congestion. ie, 75% */ + if (dict_get (this_xl->options, "background-qlen") && +@@ -6552,5 +6605,11 @@ struct volume_options options[] = { + .description = "makes glusterfs invalidate kernel inodes after " + "reaching this limit (0 means 'unlimited')", + }, ++ { .key = {"flush-handle-interrupt"}, ++ .type = GF_OPTION_TYPE_BOOL, ++ .default_value = "false", ++ .description = "Handle iterrupts in FLUSH handler (for testing " ++ "purposes).", ++ }, + { .key = {NULL} }, + }; +diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h +index ba3e000..e18469d 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.h ++++ b/xlators/mount/fuse/src/fuse-bridge.h +@@ -157,6 +157,8 @@ struct fuse_private { + /* Interrupt subscription */ + struct list_head interrupt_list; + pthread_mutex_t interrupt_mutex; ++ ++ gf_boolean_t flush_handle_interrupt; + }; + typedef struct fuse_private fuse_private_t; + +@@ -191,7 +193,7 @@ typedef struct fuse_interrupt_record fuse_interrupt_record_t; + typedef void (*fuse_interrupt_handler_t) (xlator_t *this, + fuse_interrupt_record_t *); + struct fuse_interrupt_record { +- struct fuse_in_header fuse_in_header; ++ fuse_in_header_t fuse_in_header; + void *data; + gf_boolean_t hit; + fuse_interrupt_state_t interrupt_state; +diff --git a/xlators/mount/fuse/utils/mount.glusterfs.in b/xlators/mount/fuse/utils/mount.glusterfs.in +index 9a0404f..a3a9fbd 100755 +--- a/xlators/mount/fuse/utils/mount.glusterfs.in ++++ b/xlators/mount/fuse/utils/mount.glusterfs.in +@@ -273,6 +273,10 @@ start_glusterfs () + cmd_line=$(echo "$cmd_line --dump-fuse=$dump_fuse"); + fi + ++ if [ -n "$fuse_flush_handle_interrupt" ]; then ++ cmd_line=$(echo "$cmd_line --fuse-flush-handle-interrupt=$fuse_flush_handle_interrupt"); ++ fi ++ + # if trasnport type is specified, we have to append it to + # volume name, so that it fetches the right client vol file + +@@ -524,6 +528,9 @@ with_options() + [ $value = "false" ] ; then + no_root_squash=1; + fi ;; ++ "fuse-flush-handle-interrupt") ++ fuse_flush_handle_interrupt=$value ++ ;; + "context"|"fscontext"|"defcontext"|"rootcontext") + # standard SElinux mount options to pass to the kernel + [ -z "$fuse_mountopts" ] || fuse_mountopts="$fuse_mountopts," +-- +1.8.3.1 + diff --git a/SOURCES/0528-locks-handle-clear-locks-xattr-in-fgetxattr-too.patch b/SOURCES/0528-locks-handle-clear-locks-xattr-in-fgetxattr-too.patch new file mode 100644 index 0000000..370d8f6 --- /dev/null +++ b/SOURCES/0528-locks-handle-clear-locks-xattr-in-fgetxattr-too.patch @@ -0,0 +1,250 @@ +From 3c0f27fd697a8c977873d44fbdf3aa63c1065645 Mon Sep 17 00:00:00 2001 +From: Csaba Henk +Date: Thu, 6 Dec 2018 16:13:46 +0100 +Subject: [PATCH 528/529] locks: handle "clear locks" xattr in fgetxattr too + +The lock clearing procedure was kicked in only in +getxattr context. We need it to work the same way +if it's triggered via fgetxattr (as is the case +with interrupt handling). + +Also cleaned up the instrumentation a bit (more logs, +proper management of allocated data). + +Upstream: https://review.gluster.org/21820 +> updates: #465 +> Change-Id: Icfca26ee181da3b8e15ca3fcf61cd5702e2730c8 +> Signed-off-by: Csaba Henk + +Change-Id: Ia15108fd6d92ea2bdb73cea5fb04126785b19663 +BUG: 1595246 +Signed-off-by: Csaba Henk +Reviewed-on: https://code.engineering.redhat.com/gerrit/162551 +Tested-by: RHGS Build Bot +Reviewed-by: Amar Tumballi Suryanarayan +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/features/locks/src/clear.c | 6 +++ + xlators/features/locks/src/clear.h | 2 + + xlators/features/locks/src/posix.c | 107 ++++++++++++++++++++++++------------- + 3 files changed, 77 insertions(+), 38 deletions(-) + +diff --git a/xlators/features/locks/src/clear.c b/xlators/features/locks/src/clear.c +index 22c03b5..c3d5dd2 100644 +--- a/xlators/features/locks/src/clear.c ++++ b/xlators/features/locks/src/clear.c +@@ -24,6 +24,12 @@ + #include "statedump.h" + #include "clear.h" + ++const char *clrlk_type_names[CLRLK_TYPE_MAX] = { ++ [CLRLK_INODE] = "inode", ++ [CLRLK_ENTRY] = "entry", ++ [CLRLK_POSIX] = "posix", ++}; ++ + int + clrlk_get_kind (char *kind) + { +diff --git a/xlators/features/locks/src/clear.h b/xlators/features/locks/src/clear.h +index 78fc5ae..1542953 100644 +--- a/xlators/features/locks/src/clear.h ++++ b/xlators/features/locks/src/clear.h +@@ -22,6 +22,8 @@ typedef enum { + CLRLK_TYPE_MAX + } clrlk_type; + ++extern const char *clrlk_type_names[]; ++ + typedef enum { + CLRLK_BLOCKED = 1, + CLRLK_GRANTED, +diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c +index 2cc2837..142a5cc 100644 +--- a/xlators/features/locks/src/posix.c ++++ b/xlators/features/locks/src/posix.c +@@ -1028,41 +1028,35 @@ pl_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + + } + +-int32_t +-pl_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, +- const char *name, dict_t *xdata) ++static int32_t ++pl_getxattr_clrlk (xlator_t *this, const char *name, inode_t *inode, ++ dict_t **dict, int32_t *op_errno) + { +- int32_t op_errno = EINVAL; +- int op_ret = -1; + int32_t bcount = 0; + int32_t gcount = 0; +- char key[PATH_MAX] = {0, }; ++ char *key = NULL; + char *lk_summary = NULL; + pl_inode_t *pl_inode = NULL; +- dict_t *dict = NULL; + clrlk_args args = {0,}; + char *brickname = NULL; ++ int32_t op_ret = -1; + +- if (!name) +- goto usual; +- +- if (strncmp (name, GF_XATTR_CLRLK_CMD, strlen (GF_XATTR_CLRLK_CMD))) +- goto usual; ++ *op_errno = EINVAL; + + if (clrlk_parse_args (name, &args)) { +- op_errno = EINVAL; ++ *op_errno = EINVAL; + goto out; + } + +- dict = dict_new (); +- if (!dict) { +- op_errno = ENOMEM; ++ *dict = dict_new (); ++ if (!*dict) { ++ *op_errno = ENOMEM; + goto out; + } + +- pl_inode = pl_inode_get (this, loc->inode); ++ pl_inode = pl_inode_get (this, inode); + if (!pl_inode) { +- op_errno = ENOMEM; ++ *op_errno = ENOMEM; + goto out; + } + +@@ -1072,23 +1066,31 @@ pl_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, + op_ret = clrlk_clear_lks_in_all_domains (this, pl_inode, + &args, &bcount, + &gcount, +- &op_errno); +- if (op_ret) +- goto out; ++ op_errno); + break; + case CLRLK_POSIX: + op_ret = clrlk_clear_posixlk (this, pl_inode, &args, + &bcount, &gcount, +- &op_errno); +- if (op_ret) +- goto out; ++ op_errno); + break; +- case CLRLK_TYPE_MAX: +- op_errno = EINVAL; +- goto out; ++ default: ++ op_ret = -1; ++ *op_errno = EINVAL; ++ } ++ if (op_ret) { ++ if (args.type >= CLRLK_TYPE_MAX) { ++ gf_log (this->name, GF_LOG_ERROR, ++ "clear locks: invalid lock type %d", args.type); ++ } else { ++ gf_log (this->name, GF_LOG_ERROR, ++ "clear locks of type %s failed: %s", ++ clrlk_type_names[args.type], strerror (*op_errno)); ++ } ++ ++ goto out; + } + +- op_ret = fetch_pathinfo (this, loc->inode, &op_errno, &brickname); ++ op_ret = fetch_pathinfo (this, inode, op_errno, &brickname); + if (op_ret) { + gf_log (this->name, GF_LOG_WARNING, + "Couldn't get brickname"); +@@ -1105,39 +1107,62 @@ pl_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, + if (!gcount && !bcount) { + if (gf_asprintf (&lk_summary, "No locks cleared.") == -1) { + op_ret = -1; +- op_errno = ENOMEM; ++ *op_errno = ENOMEM; + goto out; + } + } else if (gf_asprintf (&lk_summary, "%s: %s blocked locks=%d " + "granted locks=%d", + (brickname == NULL)? this->name : brickname, +- (args.type == CLRLK_INODE)? "inode": +- (args.type == CLRLK_ENTRY)? "entry": +- (args.type == CLRLK_POSIX)? "posix": " ", ++ clrlk_type_names[args.type], + bcount, gcount) == -1) { + op_ret = -1; +- op_errno = ENOMEM; ++ *op_errno = ENOMEM; + goto out; + } ++ gf_log (this->name, GF_LOG_DEBUG, "%s", lk_summary); + +- if (snprintf(key, sizeof(key), "%s", name) >= sizeof(key)) { ++ key = gf_strdup (name); ++ if (!key) { + op_ret = -1; + goto out; + } +- if (dict_set_dynstr (dict, key, lk_summary)) { ++ if (dict_set_dynstr (*dict, key, lk_summary)) { + op_ret = -1; +- op_errno = ENOMEM; ++ *op_errno = ENOMEM; + goto out; + } + + op_ret = 0; + out: + GF_FREE(brickname); +- STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata); + + GF_FREE (args.opts); +- if (op_ret && lk_summary) ++ if (op_ret) { + GF_FREE (lk_summary); ++ GF_FREE (key); ++ } ++ ++ return op_ret; ++} ++ ++int32_t ++pl_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name, ++ dict_t *xdata) ++{ ++ int32_t op_errno = EINVAL; ++ int32_t op_ret = -1; ++ dict_t *dict = NULL; ++ ++ if (!name) ++ goto usual; ++ ++ if (strncmp (name, GF_XATTR_CLRLK_CMD, strlen (GF_XATTR_CLRLK_CMD))) ++ goto usual; ++ ++ op_ret = pl_getxattr_clrlk (this, name, loc->inode, &dict, &op_errno); ++ ++ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata); ++ + if (dict) + dict_unref (dict); + return 0; +@@ -1415,6 +1440,12 @@ pl_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, + } + + goto unwind; ++ } else if (strncmp (name, GF_XATTR_CLRLK_CMD, ++ strlen (GF_XATTR_CLRLK_CMD)) == 0) { ++ op_ret = pl_getxattr_clrlk (this, name, fd->inode, &dict, ++ &op_errno); ++ ++ goto unwind; + } else { + goto usual; + } +-- +1.8.3.1 + diff --git a/SOURCES/0529-fuse-SETLKW-interrupt.patch b/SOURCES/0529-fuse-SETLKW-interrupt.patch new file mode 100644 index 0000000..7af923f --- /dev/null +++ b/SOURCES/0529-fuse-SETLKW-interrupt.patch @@ -0,0 +1,222 @@ +From 5f2e017ce7875de1906eb319339f11c4ef321208 Mon Sep 17 00:00:00 2001 +From: Csaba Henk +Date: Mon, 22 Oct 2018 00:59:05 +0200 +Subject: [PATCH 529/529] fuse: SETLKW interrupt + +Use the (f)getxattr based clearlocks interface to +interrupt a pending lock request. + +Upstream: https://review.gluster.org/21472 +> updates: #465 +> Change-Id: I4e91a4d8791fc688fed400a02de4c53487e61be2 +> Signed-off-by: Csaba Henk + +Change-Id: Ib436f1524cda6ade24c6970caee3dbd7d5f452d4 +BUG: 1595246 +Signed-off-by: Csaba Henk +Reviewed-on: https://code.engineering.redhat.com/gerrit/162552 +Tested-by: RHGS Build Bot +Reviewed-by: Amar Tumballi Suryanarayan +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + tests/features/flock_interrupt.t | 33 +++++++++ + xlators/mount/fuse/src/fuse-bridge.c | 132 +++++++++++++++++++++++++++++++++++ + 2 files changed, 165 insertions(+) + create mode 100644 tests/features/flock_interrupt.t + +diff --git a/tests/features/flock_interrupt.t b/tests/features/flock_interrupt.t +new file mode 100644 +index 0000000..8603b65 +--- /dev/null ++++ b/tests/features/flock_interrupt.t +@@ -0,0 +1,33 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../include.rc ++. $(dirname $0)/../volume.rc ++ ++cleanup; ++ ++## Start and create a volume ++TEST glusterd; ++TEST pidof glusterd; ++ ++TEST $CLI volume create $V0 $H0:$B0/${V0}0; ++ ++## Verify volume is is created ++EXPECT "$V0" volinfo_field $V0 'Volume Name'; ++EXPECT 'Created' volinfo_field $V0 'Status'; ++ ++## Start volume and verify ++TEST $CLI volume start $V0; ++EXPECT 'Started' volinfo_field $V0 'Status'; ++ ++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; ++TEST touch $M0/testfile; ++ ++function flock_interrupt { ++ flock $MO/testfile sleep 3 & flock -w 1 $M0/testfile true; ++ echo ok; ++} ++ ++EXPECT_WITHIN 2 ok flock_interrupt; ++ ++## Finish up ++cleanup; +diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c +index 44c39e4..deaf533 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.c ++++ b/xlators/mount/fuse/src/fuse-bridge.c +@@ -4346,6 +4346,18 @@ fuse_setlk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + { + uint32_t op = 0; + fuse_state_t *state = NULL; ++ int ret = 0; ++ ++ ret = fuse_interrupt_finish_fop (frame, this, _gf_false, ++ (void **)&state); ++ if (state) { ++ GF_FREE (state->name); ++ dict_unref (state->xdata); ++ GF_FREE (state); ++ } ++ if (ret) { ++ return 0; ++ } + + state = frame->root->state; + op = state->finh->opcode; +@@ -4392,10 +4404,130 @@ fuse_setlk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + return 0; + } + ++static int ++fuse_setlk_interrupt_handler_cbk (call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, ++ int32_t op_errno, dict_t *dict, ++ dict_t *xdata) ++{ ++ fuse_interrupt_state_t intstat = INTERRUPT_NONE; ++ fuse_interrupt_record_t *fir; ++ fuse_state_t *state = NULL; ++ int ret = 0; ++ ++ ret = dict_get_bin (xdata, "fuse-interrupt-record", (void **)&fir); ++ if (ret < 0) { ++ gf_log ("glusterfs-fuse", GF_LOG_ERROR, ++ "interrupt record not found"); ++ ++ goto out; ++ } ++ ++ intstat = op_ret >= 0 ? INTERRUPT_HANDLED : INTERRUPT_SQUELCHED; ++ ++ fuse_interrupt_finish_interrupt (this, fir, intstat, _gf_false, ++ (void **)&state); ++ if (state) { ++ GF_FREE (state->name); ++ dict_unref (state->xdata); ++ GF_FREE (state); ++ } ++ ++out: ++ STACK_DESTROY (frame->root); ++ ++ return 0; ++} ++ ++static void ++fuse_setlk_interrupt_handler (xlator_t *this, fuse_interrupt_record_t *fir) ++{ ++ fuse_state_t *state = NULL; ++ call_frame_t *frame = NULL; ++ char *xattr_name = NULL; ++ int ret = 0; ++ ++ gf_log ("glusterfs-fuse", GF_LOG_DEBUG, ++ "SETLK%s unique %" PRIu64 ": interrupt handler triggered", ++ fir->fuse_in_header.opcode == FUSE_SETLK ? "" : "W", ++ fir->fuse_in_header.unique); ++ ++ state = fir->data; ++ ++ ret = gf_asprintf ( ++ &xattr_name, GF_XATTR_CLRLK_CMD ".tposix.kblocked.%hd,%jd-%jd", ++ state->lk_lock.l_whence, state->lk_lock.l_start, ++ state->lk_lock.l_len); ++ if (ret == -1) { ++ xattr_name = NULL; ++ goto err; ++ } ++ ++ frame = get_call_frame_for_req (state); ++ if (!frame) { ++ goto err; ++ } ++ frame->root->state = state; ++ frame->root->op = GF_FOP_GETXATTR; ++ frame->op = GF_FOP_GETXATTR; ++ state->name = xattr_name; ++ ++ STACK_WIND (frame, fuse_setlk_interrupt_handler_cbk, state->active_subvol, ++ state->active_subvol->fops->fgetxattr, state->fd, xattr_name, ++ state->xdata); ++ ++ return; ++ ++err: ++ GF_FREE (xattr_name); ++ fuse_interrupt_finish_interrupt (this, fir, INTERRUPT_SQUELCHED, ++ _gf_false, (void **)&state); ++ if (state) { ++ dict_unref (state->xdata); ++ GF_FREE (state); ++ } ++} + + void + fuse_setlk_resume (fuse_state_t *state) + { ++ fuse_interrupt_record_t *fir = NULL; ++ fuse_state_t *state_clone = NULL; ++ ++ fir = fuse_interrupt_record_new (state->finh, fuse_setlk_interrupt_handler); ++ state_clone = gf_memdup (state, sizeof (*state)); ++ if (state_clone) { ++ /* ++ * Calling this allocator with fir casted to (char *) seems like ++ * an abuse of this API, but in fact the API is stupid to assume ++ * a (char *) argument (in the funcion it's casted to (void *) ++ * anyway). ++ */ ++ state_clone->xdata = dict_for_key_value ( ++ "fuse-interrupt-record", (char *)fir, sizeof (*fir), _gf_true); ++ } ++ if (!fir || !state_clone || !state_clone->xdata) { ++ if (fir) { ++ GF_FREE (fir); ++ } ++ if (state_clone) { ++ GF_FREE (state_clone); ++ } ++ send_fuse_err (state->this, state->finh, ENOMEM); ++ ++ gf_log ("glusterfs-fuse", GF_LOG_ERROR, ++ "SETLK%s unique %"PRIu64":" ++ " interrupt record allocation failed", ++ state->finh->opcode == FUSE_SETLK ? "" : "W", ++ state->finh->unique); ++ free_fuse_state (state); ++ ++ return; ++ } ++ state_clone->name = NULL; ++ fir->data = state_clone; ++ fuse_interrupt_record_insert (state->this, fir); ++ + gf_log ("glusterfs-fuse", GF_LOG_TRACE, + "%"PRIu64": SETLK%s %p", state->finh->unique, + state->finh->opcode == FUSE_SETLK ? "" : "W", state->fd); +-- +1.8.3.1 + diff --git a/SOURCES/0530-spec-fix-lua-script-execution-during-install.patch b/SOURCES/0530-spec-fix-lua-script-execution-during-install.patch new file mode 100644 index 0000000..ea9549a --- /dev/null +++ b/SOURCES/0530-spec-fix-lua-script-execution-during-install.patch @@ -0,0 +1,181 @@ +From 0cef0696d665b385706d9be7aad48afee2537775 Mon Sep 17 00:00:00 2001 +From: Milind Changire +Date: Tue, 12 Feb 2019 16:25:37 +0530 +Subject: [PATCH 530/530] spec: fix lua script execution during install + +Explicit bash shell invocation in the os.execute() API is not required. +Since the script starts with the #!/bin/sh hash-bang sequence, +os.execute() successfully spawns and runs the script. + +Label: DOWNSTREAM ONLY + +BUG: 1410145 +Change-Id: I7774380adb1c164f50145270e91c1d2c086cd9d4 +Signed-off-by: Milind Changire +Reviewed-on: https://code.engineering.redhat.com/gerrit/162774 +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + glusterfs.spec.in | 63 +++++++++++++------------------------------------------ + 1 file changed, 15 insertions(+), 48 deletions(-) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 2680bec..89cd501 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -1640,10 +1640,7 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- Since we run pretrans scripts only for RPMs built for a server build, +--- we can now use os.tmpname() since it is available on RHEL6 and later +--- platforms which are server platforms. +-ok, how, val = os.execute("/bin/bash -c \"" .. script .. "\"") ++ok, how, val = os.execute(script) + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -1677,10 +1674,7 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- Since we run pretrans scripts only for RPMs built for a server build, +--- we can now use os.tmpname() since it is available on RHEL6 and later +--- platforms which are server platforms. +-ok, how, val = os.execute("/bin/bash -c \"" .. script .. "\"") ++ok, how, val = os.execute(script) + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -1714,10 +1708,7 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- Since we run pretrans scripts only for RPMs built for a server build, +--- we can now use os.tmpname() since it is available on RHEL6 and later +--- platforms which are server platforms. +-ok, how, val = os.execute("/bin/bash -c \"" .. script .. "\"") ++ok, how, val = os.execute(script) + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -1751,10 +1742,7 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- Since we run pretrans scripts only for RPMs built for a server build, +--- we can now use os.tmpname() since it is available on RHEL6 and later +--- platforms which are server platforms. +-ok, how, val = os.execute("/bin/bash -c \"" .. script .. "\"") ++ok, how, val = os.execute(script) + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -1788,10 +1776,7 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- Since we run pretrans scripts only for RPMs built for a server build, +--- we can now use os.tmpname() since it is available on RHEL6 and later +--- platforms which are server platforms. +-ok, how, val = os.execute("/bin/bash -c \"" .. script .. "\"") ++ok, how, val = os.execute(script) + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -1825,10 +1810,7 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- Since we run pretrans scripts only for RPMs built for a server build, +--- we can now use os.tmpname() since it is available on RHEL6 and later +--- platforms which are server platforms. +-ok, how, val = os.execute("/bin/bash -c \"" .. script .. "\"") ++ok, how, val = os.execute(script) + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -1862,10 +1844,7 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- Since we run pretrans scripts only for RPMs built for a server build, +--- we can now use os.tmpname() since it is available on RHEL6 and later +--- platforms which are server platforms. +-ok, how, val = os.execute("/bin/bash -c \"" .. script .. "\"") ++ok, how, val = os.execute(script) + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -1900,10 +1879,7 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- Since we run pretrans scripts only for RPMs built for a server build, +--- we can now use os.tmpname() since it is available on RHEL6 and later +--- platforms which are server platforms. +-ok, how, val = os.execute("/bin/bash -c \"" .. script .. "\"") ++ok, how, val = os.execute(script) + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -1938,10 +1914,7 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- Since we run pretrans scripts only for RPMs built for a server build, +--- we can now use os.tmpname() since it is available on RHEL6 and later +--- platforms which are server platforms. +-ok, how, val = os.execute("/bin/bash -c \"" .. script .. "\"") ++ok, how, val = os.execute(script) + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -1976,10 +1949,7 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- Since we run pretrans scripts only for RPMs built for a server build, +--- we can now use os.tmpname() since it is available on RHEL6 and later +--- platforms which are server platforms. +-ok, how, val = os.execute("/bin/bash -c \"" .. script .. "\"") ++ok, how, val = os.execute(script) + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -2015,10 +1985,7 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- Since we run pretrans scripts only for RPMs built for a server build, +--- we can now use os.tmpname() since it is available on RHEL6 and later +--- platforms which are server platforms. +-ok, how, val = os.execute("/bin/bash -c \"" .. script .. "\"") ++ok, how, val = os.execute(script) + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -2053,10 +2020,7 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- Since we run pretrans scripts only for RPMs built for a server build, +--- we can now use os.tmpname() since it is available on RHEL6 and later +--- platforms which are server platforms. +-ok, how, val = os.execute("/bin/bash -c \"" .. script .. "\"") ++ok, how, val = os.execute(script) + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -2113,6 +2077,9 @@ fi + %endif + + %changelog ++* Tue Feb 09 2019 Milind Changire ++- Fix lua script execution during install (#1410145) ++ + * Sat Feb 09 2019 Milind Changire + - Avoid creation of temporary file in lua script during install (#1410145) + +-- +1.8.3.1 + diff --git a/SOURCES/0531-fuse-remove-the-duplicate-FUSE_FOP-calls.patch b/SOURCES/0531-fuse-remove-the-duplicate-FUSE_FOP-calls.patch new file mode 100644 index 0000000..f952871 --- /dev/null +++ b/SOURCES/0531-fuse-remove-the-duplicate-FUSE_FOP-calls.patch @@ -0,0 +1,33 @@ +From 223aa5e0b89c0d876002036cde660bf5d34c3909 Mon Sep 17 00:00:00 2001 +From: Amar Tumballi +Date: Wed, 13 Feb 2019 20:39:09 +0530 +Subject: [PATCH 531/531] fuse: remove the duplicate FUSE_FOP calls + +This got introduced as part of resolving conflicts in +https://code.engineering.redhat.com/gerrit/#/c/162550/3 + +BUG: 1676904 +Change-Id: If83c5c4c3ebabb91b997681c261199c53524a6a9 +Signed-off-by: Amar Tumballi +Reviewed-on: https://code.engineering.redhat.com/gerrit/162920 +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mount/fuse/src/fuse-bridge.c | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c +index deaf533..175e1b8 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.c ++++ b/xlators/mount/fuse/src/fuse-bridge.c +@@ -2974,8 +2974,6 @@ fuse_lseek (xlator_t *this, fuse_in_header_t *finh, void *msg, + void + fuse_flush_resume (fuse_state_t *state) + { +- FUSE_FOP (state, fuse_err_cbk, GF_FOP_FLUSH, +- flush, state->fd, state->xdata); + FUSE_FOP (state, fuse_flush_cbk, GF_FOP_FLUSH, + flush, state->fd, state->xdata); + } +-- +1.8.3.1 + diff --git a/SOURCES/0532-dht-fix-double-extra-unref-of-inode-at-heal-path.patch b/SOURCES/0532-dht-fix-double-extra-unref-of-inode-at-heal-path.patch new file mode 100644 index 0000000..838afa8 --- /dev/null +++ b/SOURCES/0532-dht-fix-double-extra-unref-of-inode-at-heal-path.patch @@ -0,0 +1,43 @@ +From 253ad23a2cc9e177555ec692e537f760411a3517 Mon Sep 17 00:00:00 2001 +From: Susant Palai +Date: Wed, 20 Feb 2019 19:04:00 +0530 +Subject: [PATCH 532/532] dht: fix double extra unref of inode at heal path + +The loc_wipe is done in the _out_ section, inode_unref(loc.parent) here +casues a double extra unref of loc.parent. + +> Change-Id: I2dc809328d3d34bf7b02c7df9a4f97788af511e6 +> updates: bz#1651439 +> Signed-off-by: Kinglong Mee +(cherrypick of https://review.gluster.org/#/c/glusterfs/+/21998/) + +Change-Id: I2e3b548fd283a02fff05d8485874203233affbe8 +BUG: 1678232 +Signed-off-by: Susant Palai +Reviewed-on: https://code.engineering.redhat.com/gerrit/163382 +Tested-by: Sunil Kumar Heggodu Gopala Acharya +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/cluster/dht/src/dht-helper.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c +index d35d7e8..6a8b653 100644 +--- a/xlators/cluster/dht/src/dht-helper.c ++++ b/xlators/cluster/dht/src/dht-helper.c +@@ -1980,10 +1980,10 @@ dht_heal_path (xlator_t *this, char *path, inode_table_t *itable) + */ + linked_inode = loc.inode; + bname = strtok_r (NULL, "/", &save_ptr); +- inode_unref (loc.parent); + if (!bname) { + goto out; + } ++ inode_unref (loc.parent); + loc.parent = loc.inode; + gf_uuid_copy (loc.pargfid, loc.inode->gfid); + loc.inode = NULL; +-- +1.8.3.1 + diff --git a/SOURCES/0533-glusterd-glusterd-memory-leak-while-running-gluster-.patch b/SOURCES/0533-glusterd-glusterd-memory-leak-while-running-gluster-.patch new file mode 100644 index 0000000..b72d4a8 --- /dev/null +++ b/SOURCES/0533-glusterd-glusterd-memory-leak-while-running-gluster-.patch @@ -0,0 +1,75 @@ +From 5b57b686984498eee09ce1a5f27eef6a14e4387e Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Wed, 6 Mar 2019 12:55:56 +0530 +Subject: [PATCH 533/534] glusterd: glusterd memory leak while running "gluster + v profile" in a loop + +Problem: glusterd has memory leak while running "gluster v profile" + in a loop + +Solution: Resolve leak code path to avoid leak + +> Change-Id: Id608703ff6d0ad34ed8f921a5d25544e24cfadcd +> fixes: bz#1685414 +> Cherry pick from commit 9374484917466dff4688d96ff7faa0de1c804a6c +> Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/22301/ + +Change-Id: I874a0e9947913c201c67b78aaaa982d1fae78b46 +BUG: 1684648 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/164609 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mgmt/glusterd/src/glusterd-mgmt.c | 5 +++-- + xlators/mgmt/glusterd/src/glusterd-syncop.c | 4 +++- + 2 files changed, 6 insertions(+), 3 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-mgmt.c +index 275059c..e176288 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-mgmt.c ++++ b/xlators/mgmt/glusterd/src/glusterd-mgmt.c +@@ -1380,8 +1380,9 @@ out: + if (rsp.op_errstr) + free (rsp.op_errstr); + +- if (rsp.dict.dict_val) +- free (rsp.dict.dict_val); ++ if (rsp_dict) ++ dict_unref (rsp_dict); ++ + GF_FREE (peerid); + /* req->rpc_status set to -1 means, STACK_DESTROY will be called from + * the caller function. +diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c +index 7baef64..3b7fa8b 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-syncop.c ++++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c +@@ -1707,6 +1707,7 @@ gd_brick_op_phase (glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, + char **op_errstr) + { + glusterd_pending_node_t *pending_node = NULL; ++ glusterd_pending_node_t *tmp = NULL; + struct cds_list_head selected = {0,}; + xlator_t *this = NULL; + int brick_count = 0; +@@ -1742,7 +1743,7 @@ gd_brick_op_phase (glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, + rsp_dict = NULL; + + brick_count = 0; +- cds_list_for_each_entry (pending_node, &selected, list) { ++ cds_list_for_each_entry_safe (pending_node, tmp, &selected, list) { + rpc = glusterd_pending_node_get_rpc (pending_node); + if (!rpc) { + if (pending_node->type == GD_NODE_REBALANCE) { +@@ -1792,6 +1793,7 @@ gd_brick_op_phase (glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, + + brick_count++; + glusterd_pending_node_put_rpc (pending_node); ++ GF_FREE(pending_node); + } + + pending_node = NULL; +-- +1.8.3.1 + diff --git a/SOURCES/0534-fuse-make-sure-the-send-lookup-on-root-instead-of-ge.patch b/SOURCES/0534-fuse-make-sure-the-send-lookup-on-root-instead-of-ge.patch new file mode 100644 index 0000000..2a4efbe --- /dev/null +++ b/SOURCES/0534-fuse-make-sure-the-send-lookup-on-root-instead-of-ge.patch @@ -0,0 +1,74 @@ +From 1454b4a477bf841d6bea610b3bb0b084730d38ab Mon Sep 17 00:00:00 2001 +From: Amar Tumballi +Date: Fri, 13 Apr 2018 10:28:01 +0530 +Subject: [PATCH 534/534] fuse: make sure the send lookup on root instead of + getattr() + +This change was done in https://review.gluster.org/16945. While the +changes added there were required, it was not necessary to remove the +getattr part. As fuse's lookup on root(/) comes as getattr only, this +change is very much required. + +The previous fix for this bug was to add the check for revalidation in +lookup when it was sent on root. But I had removed the part where +getattr is coming on root. The removing was not requried to fix the +issue then. Added back this part of the code, to make sure we have +proper validation of root inode in many places like acl, etc. + +upstream patch: https://review.gluster.org/#/c/glusterfs/+/19867/ + +> updates: bz#1437780 +> Change-Id: I859c4ee1a3f407465cbf19f8934530848424ff50 +> Signed-off-by: Amar Tumballi + +Change-Id: I85af744e7bd1a52367d85d5a5b07c4bdf409a66d +BUG: 1668327 +Signed-off-by: N Balachandran +Reviewed-on: https://code.engineering.redhat.com/gerrit/164663 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mount/fuse/src/fuse-bridge.c | 20 ++++++++++++++++++++ + 1 file changed, 20 insertions(+) + +diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c +index 175e1b8..ffc1013 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.c ++++ b/xlators/mount/fuse/src/fuse-bridge.c +@@ -1311,6 +1311,7 @@ fuse_getattr (xlator_t *this, fuse_in_header_t *finh, void *msg, + fuse_private_t *priv = NULL; + #endif + fuse_state_t *state; ++ int ret = -1; + + GET_STATE (this, finh, state); + #if FUSE_KERNEL_MINOR_VERSION >= 9 +@@ -1318,6 +1319,25 @@ fuse_getattr (xlator_t *this, fuse_in_header_t *finh, void *msg, + if (priv->proto_minor >= 9 && fgi->getattr_flags & FUSE_GETATTR_FH) + state->fd = fd_ref ((fd_t *)fgi->fh); + #endif ++ if (finh->nodeid == 1) { ++ state->gfid[15] = 1; ++ ++ ret = fuse_loc_fill (&state->loc, state, finh->nodeid, 0, NULL); ++ if (ret < 0) { ++ gf_log ("glusterfs-fuse", GF_LOG_WARNING, ++ "%"PRIu64": GETATTR on / (fuse_loc_fill() failed)", ++ finh->unique); ++ send_fuse_err (this, finh, ENOENT); ++ free_fuse_state (state); ++ return; ++ } ++ ++ fuse_gfid_set (state); ++ ++ FUSE_FOP (state, fuse_root_lookup_cbk, GF_FOP_LOOKUP, ++ lookup, &state->loc, state->xdata); ++ return; ++ } + + if (state->fd) + fuse_resolve_fd_init (state, &state->resolve, state->fd); +-- +1.8.3.1 + diff --git a/SOURCES/0535-posix-Deletion-of-block-hosting-volume-throwing-erro.patch b/SOURCES/0535-posix-Deletion-of-block-hosting-volume-throwing-erro.patch new file mode 100644 index 0000000..e865641 --- /dev/null +++ b/SOURCES/0535-posix-Deletion-of-block-hosting-volume-throwing-erro.patch @@ -0,0 +1,66 @@ +From 1d9151816d9ef915974081d82fd78b59377b6d1a Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Sat, 9 Mar 2019 08:55:44 +0530 +Subject: [PATCH 535/538] posix: Deletion of block hosting volume throwing + error "target is busy" + +Deletion of block hosting volume with heketi-cli few volumes failed to delete +with the message "target is busy".After analyzing the root cause we found fd +was not closed because janitor thread was killed by posix_fini.To avoid the same before +notifying CHILD_DOWN event to parent all fd's should be closed by janitor_thread. + +Note: The patch is applicable only for downstream release, in upstream release + we are using different approach to handle janitor_thread + +Change-Id: I8c8482924af1868b4810e708962cd2978c2a40ab +BUG: 1669020 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/164908 +Tested-by: RHGS Build Bot +Reviewed-by: Pranith Kumar Karampuri +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/storage/posix/src/posix.c | 20 ++++++++++++++++++++ + 1 file changed, 20 insertions(+) + +diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c +index 8a6282d..9efa1f1 100644 +--- a/xlators/storage/posix/src/posix.c ++++ b/xlators/storage/posix/src/posix.c +@@ -7115,6 +7115,15 @@ notify (xlator_t *this, + ...) + { + xlator_t *victim = data; ++ struct posix_private *priv = NULL; ++ struct timespec sleep_till = {0,}; ++ ++ if (!this) ++ return 0; ++ ++ priv = this->private; ++ if (!priv) ++ return 0; + + switch (event) + { +@@ -7128,6 +7137,17 @@ notify (xlator_t *this, + { + if (!victim->cleanup_starting) + break; ++ pthread_mutex_lock (&priv->janitor_lock); ++ { ++ while (!list_empty (&priv->janitor_fds)) { ++ clock_gettime(CLOCK_REALTIME, &sleep_till); ++ sleep_till.tv_sec += 1; ++ (void)pthread_cond_timedwait(&priv->janitor_cond, &priv->janitor_lock, ++ &sleep_till); ++ } ++ } ++ pthread_mutex_unlock (&priv->janitor_lock); ++ + gf_log(this->name, GF_LOG_INFO, "Sending CHILD_DOWN for brick %s", + victim->name); + default_notify(this->parents->xlator, GF_EVENT_CHILD_DOWN, data); +-- +1.8.3.1 + diff --git a/SOURCES/0536-cluster-dht-Fix-lookup-selfheal-and-rmdir-race.patch b/SOURCES/0536-cluster-dht-Fix-lookup-selfheal-and-rmdir-race.patch new file mode 100644 index 0000000..4b633a6 --- /dev/null +++ b/SOURCES/0536-cluster-dht-Fix-lookup-selfheal-and-rmdir-race.patch @@ -0,0 +1,103 @@ +From f928dfab3d2e94211d9865ce994ae3a503f2b9a0 Mon Sep 17 00:00:00 2001 +From: N Balachandran +Date: Mon, 11 Mar 2019 08:06:15 +0530 +Subject: [PATCH 536/538] cluster/dht: Fix lookup selfheal and rmdir race + +A race between the lookup selfheal and rmdir can cause +directories to be healed only on non-hashed subvols. +This can prevent the directory from being listed from +the mount point and in turn causes rm -rf to fail with +ENOTEMPTY. +Fix: Update the layout information correctly and reduce +the call count only after processing the response. + +upstream : https://review.gluster.org/#/c/glusterfs/+/22195/ + +> Change-Id: I812779aaf3d7bcf24aab1cb158cb6ed50d212451 +> fixes: bz#1676400 +> Signed-off-by: N Balachandran + +Change-Id: Ic76248ca7dca05d926f48ba84cd5bfa2943fab92 +BUG: 1458215 +Signed-off-by: N Balachandran +Reviewed-on: https://code.engineering.redhat.com/gerrit/164931 +Tested-by: RHGS Build Bot +Reviewed-by: Mohit Agrawal +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/cluster/dht/src/dht-selfheal.c | 36 ++++++++++++++++++++++++---------- + 1 file changed, 26 insertions(+), 10 deletions(-) + +diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c +index 035a709..045689b 100644 +--- a/xlators/cluster/dht/src/dht-selfheal.c ++++ b/xlators/cluster/dht/src/dht-selfheal.c +@@ -1366,6 +1366,7 @@ dht_selfheal_dir_mkdir_lookup_cbk (call_frame_t *frame, void *cookie, + int errst = 0; + int32_t mds_xattr_val[1] = {0}; + char gfid_local[GF_UUID_BUF_SIZE] = {0}; ++ int index = -1; + + VALIDATE_OR_GOTO (this->private, err); + +@@ -1375,32 +1376,47 @@ dht_selfheal_dir_mkdir_lookup_cbk (call_frame_t *frame, void *cookie, + prev = cookie; + conf = this->private; + +- if (local->gfid) ++ if (!gf_uuid_is_null(local->gfid)) + gf_uuid_unparse(local->gfid, gfid_local); + +- this_call_cnt = dht_frame_return (frame); +- + LOCK (&frame->lock); + { ++ index = dht_layout_index_for_subvol(layout, prev); + if ((op_ret < 0) && + (op_errno == ENOENT || op_errno == ESTALE)) { + local->selfheal.hole_cnt = !local->selfheal.hole_cnt ? 1 + : local->selfheal.hole_cnt + 1; ++ /* the status might have changed. Update the layout ++ * with the new status ++ */ ++ if (index >= 0) { ++ layout->list[index].err = op_errno; ++ } + } + + if (!op_ret) { + dht_iatt_merge (this, &local->stbuf, stbuf, prev); ++ check_mds = dht_dict_get_array (xattr, ++ conf->mds_xattr_key, ++ mds_xattr_val, 1, ++ &errst); ++ if (dict_get (xattr, conf->mds_xattr_key) && ++ check_mds && !errst) { ++ dict_unref (local->xattr); ++ local->xattr = dict_ref (xattr); ++ } ++ /* the status might have changed. Update the layout ++ * with the new status ++ */ ++ if (index >= 0) { ++ layout->list[index].err = -1; ++ } + } +- check_mds = dht_dict_get_array (xattr, conf->mds_xattr_key, +- mds_xattr_val, 1, &errst); +- if (dict_get (xattr, conf->mds_xattr_key) && check_mds && !errst) { +- dict_unref (local->xattr); +- local->xattr = dict_ref (xattr); +- } +- + } + UNLOCK (&frame->lock); + ++ this_call_cnt = dht_frame_return (frame); ++ + if (is_last_call (this_call_cnt)) { + if (local->selfheal.hole_cnt == layout->cnt) { + gf_msg_debug (this->name, op_errno, +-- +1.8.3.1 + diff --git a/SOURCES/0537-cluster-dht-Do-not-use-gfid-req-in-fresh-lookup.patch b/SOURCES/0537-cluster-dht-Do-not-use-gfid-req-in-fresh-lookup.patch new file mode 100644 index 0000000..0a7a38f --- /dev/null +++ b/SOURCES/0537-cluster-dht-Do-not-use-gfid-req-in-fresh-lookup.patch @@ -0,0 +1,191 @@ +From 3f7ff1a0343045c737fcca3ffbdc380be8a1f065 Mon Sep 17 00:00:00 2001 +From: N Balachandran +Date: Mon, 11 Mar 2019 11:30:05 +0530 +Subject: [PATCH 537/538] cluster/dht: Do not use gfid-req in fresh lookup + +Fuse sets a random gfid-req value for a fresh lookup. Posix +lookup will set this gfid on entries with missing gfids causing +a GFID mismatch for directories. +DHT will now ignore the Fuse provided gfid-req and use the GFID +returned from other subvols to heal the missing gfid. + +upstream: https://review.gluster.org/#/c/22112/ + +> Change-Id: I5f541978808f246ba4542564251e341ec490db14 +> fixes: bz#1670259 +> Signed-off-by: N Balachandran + +Change-Id: I50e5a9cb4db8164989082daab30a0d5cac70920c +BUG: 1661258 +Signed-off-by: N Balachandran +Reviewed-on: https://code.engineering.redhat.com/gerrit/164947 +Tested-by: RHGS Build Bot +Reviewed-by: Mohit Agrawal +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/cluster/dht/src/dht-common.c | 63 ++++++++++++++++++++++++++++++++++-- + xlators/cluster/dht/src/dht-common.h | 2 ++ + 2 files changed, 62 insertions(+), 3 deletions(-) + +diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c +index 1311a8d..12a17e6 100644 +--- a/xlators/cluster/dht/src/dht-common.c ++++ b/xlators/cluster/dht/src/dht-common.c +@@ -1347,7 +1347,7 @@ dht_lookup_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + if (!op_ret && gf_uuid_is_null (local->gfid)) { + memcpy (local->gfid, stbuf->ia_gfid, 16); + } +- if (local->gfid) ++ if (!gf_uuid_is_null (local->gfid)) + gf_uuid_unparse(local->gfid, gfid_local); + + /* Check if the gfid is different for file from other node */ +@@ -1376,9 +1376,13 @@ dht_lookup_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + if (op_ret == -1) { + local->op_errno = op_errno; + gf_msg_debug (this->name, op_errno, +- "lookup of %s on %s returned error", ++ "%s: lookup on %s returned error", + local->loc.path, prev->name); + ++ /* The GFID is missing on this subvol. Force a heal. */ ++ if (op_errno == ENODATA) { ++ local->need_selfheal = 1; ++ } + goto unlock; + } + +@@ -1485,6 +1489,17 @@ unlock: + + if (local->need_selfheal) { + local->need_selfheal = 0; ++ /* Set the gfid-req so posix will set the GFID*/ ++ if (!gf_uuid_is_null(local->gfid)) { ++ ret = dict_set_static_bin(local->xattr_req, ++ "gfid-req", ++ local->gfid, 16); ++ } else { ++ if (!gf_uuid_is_null(local->gfid_req)) ++ ret = dict_set_static_bin(local->xattr_req, ++ "gfid-req", ++ local->gfid_req, 16); ++ } + dht_lookup_everywhere (frame, this, &local->loc); + return 0; + } +@@ -1686,6 +1701,12 @@ dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + local->need_lookup_everywhere = 1; + } + } ++ /* The GFID is missing on this subvol*/ ++ if ((op_errno == ENODATA) && ++ (IA_ISDIR(local->loc.inode->ia_type))) { ++ local->need_lookup_everywhere = 1; ++ } ++ + goto unlock; + } + +@@ -1931,6 +1952,13 @@ cont: + /* We know that current cached subvol is no more + valid, get the new one */ + local->cached_subvol = NULL; ++ if (local->xattr_req) { ++ if (!gf_uuid_is_null(local->gfid)) { ++ ret = dict_set_static_bin(local->xattr_req, ++ "gfid-req", ++ local->gfid, 16); ++ } ++ } + dht_lookup_everywhere (frame, this, &local->loc); + return 0; + } +@@ -2391,6 +2419,17 @@ dht_lookup_everywhere_done (call_frame_t *frame, xlator_t *this) + return 0; + } + ++ if (local->op_ret && local->gfid_missing) { ++ if (gf_uuid_is_null(local->gfid_req)) { ++ DHT_STACK_UNWIND(lookup, frame, -1, ENODATA, NULL, ++ NULL, NULL, NULL); ++ return 0; ++ } ++ /* A hack */ ++ dht_lookup_directory(frame, this, &local->loc); ++ return 0; ++ } ++ + if (local->dir_count) { + dht_lookup_directory (frame, this, &local->loc); + return 0; +@@ -2751,6 +2790,8 @@ dht_lookup_everywhere_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + if (op_ret == -1) { + if (op_errno != ENOENT) + local->op_errno = op_errno; ++ if (op_errno == ENODATA) ++ local->gfid_missing = _gf_true; + goto unlock; + } + +@@ -3292,7 +3333,8 @@ dht_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + } + } + +- if (is_dir || (op_ret == -1 && op_errno == ENOTCONN)) { ++ if (is_dir || (op_ret == -1 && ++ ((op_errno == ENOTCONN) || (op_errno == ENODATA)))) { + dht_lookup_directory (frame, this, &local->loc); + return 0; + } +@@ -3421,6 +3463,7 @@ dht_lookup (call_frame_t *frame, xlator_t *this, + int call_cnt = 0; + int gen = 0; + loc_t new_loc = {0,}; ++ void *gfid_req_ptr = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); +@@ -3630,6 +3673,20 @@ dht_lookup (call_frame_t *frame, xlator_t *this, + /* need it for dir self-heal */ + dht_check_and_set_acl_xattr_req (loc->inode, local->xattr_req); + ++ /* Fuse sets a random value in gfid-req. If the gfid is missing ++ * on one or more subvols, posix will set the gfid to this ++ * value, causing GFID mismatches for directories. ++ */ ++ ret = dict_get_ptr(local->xattr_req, "gfid-req", ++ &gfid_req_ptr); ++ if (ret) { ++ gf_msg_debug(this->name, 0, ++ "%s: No gfid-req available", loc->path); ++ } else { ++ memcpy(local->gfid_req, gfid_req_ptr, sizeof(uuid_t)); ++ dict_del(local->xattr_req, "gfid-req"); ++ } ++ + if (!hashed_subvol) { + + gf_msg_debug (this->name, 0, +diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h +index b40815c..94a0869 100644 +--- a/xlators/cluster/dht/src/dht-common.h ++++ b/xlators/cluster/dht/src/dht-common.h +@@ -346,6 +346,7 @@ struct dht_local { + + /* gfid related */ + uuid_t gfid; ++ uuid_t gfid_req; + + /* flag used to make sure we need to return estale in + {lookup,revalidate}_cbk */ +@@ -394,6 +395,7 @@ struct dht_local { + loc_t loc2_copy; + gf_boolean_t locked; + gf_boolean_t dont_create_linkto; ++ gf_boolean_t gfid_missing; + }; + typedef struct dht_local dht_local_t; + +-- +1.8.3.1 + diff --git a/SOURCES/0538-cluster-afr-Send-truncate-on-arbiter-brick-from-SHD.patch b/SOURCES/0538-cluster-afr-Send-truncate-on-arbiter-brick-from-SHD.patch new file mode 100644 index 0000000..4b5895c --- /dev/null +++ b/SOURCES/0538-cluster-afr-Send-truncate-on-arbiter-brick-from-SHD.patch @@ -0,0 +1,154 @@ +From 039d3b0631336ba2197fdf203226151a488d60bb Mon Sep 17 00:00:00 2001 +From: karthik-us +Date: Mon, 11 Mar 2019 17:03:28 +0530 +Subject: [PATCH 538/538] cluster/afr: Send truncate on arbiter brick from SHD + +Problem: +In an arbiter volume configuration SHD will not send any writes onto the arbiter +brick even if there is data pending marker for the arbiter brick. If we have a +arbiter setup on the geo-rep master and there are data pending markers for the files +on arbiter brick, SHD will not mark any data changelog during healing. While syncing +the data from master to slave, if the arbiter-brick is considered as ACTIVE, then +there is a chance that slave will miss out some data. If the arbiter brick is being +newly added or replaced there is a chance of slave missing all the data during sync. + +Fix: +If there is data pending marker for the arbiter brick, send truncate on the arbiter +brick during heal, so that it will record truncate as the data transaction in changelog. + +Backport of: https://review.gluster.org/#/c/glusterfs/+/22325/ + +Change-Id: I174d5d557f1ae55dbe758bc92368c133f1ad0929 +BUG: 1683893 +Signed-off-by: karthik-us +Reviewed-on: https://code.engineering.redhat.com/gerrit/164978 +Tested-by: RHGS Build Bot +Reviewed-by: Ravishankar Narayanankutty +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + ...bug-1686568-send-truncate-on-arbiter-from-shd.t | 38 ++++++++++++++++++++++ + tests/volume.rc | 2 +- + xlators/cluster/afr/src/afr-self-heal-data.c | 25 +++++++------- + 3 files changed, 51 insertions(+), 14 deletions(-) + create mode 100644 tests/bugs/replicate/bug-1686568-send-truncate-on-arbiter-from-shd.t + +diff --git a/tests/bugs/replicate/bug-1686568-send-truncate-on-arbiter-from-shd.t b/tests/bugs/replicate/bug-1686568-send-truncate-on-arbiter-from-shd.t +new file mode 100644 +index 0000000..78581e9 +--- /dev/null ++++ b/tests/bugs/replicate/bug-1686568-send-truncate-on-arbiter-from-shd.t +@@ -0,0 +1,38 @@ ++#!/bin/bash ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++cleanup; ++ ++CHANGELOG_PATH_0="$B0/${V0}2/.glusterfs/changelogs" ++ROLLOVER_TIME=100 ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} ++TEST $CLI volume set $V0 changelog.changelog on ++TEST $CLI volume set $V0 changelog.rollover-time $ROLLOVER_TIME ++TEST $CLI volume start $V0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 ++ ++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; ++TEST dd if=/dev/zero of=$M0/file1 bs=128K count=5 ++ ++TEST $CLI volume profile $V0 start ++TEST $CLI volume add-brick $V0 replica 3 arbiter 1 $H0:$B0/${V0}2 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 ++ ++TEST $CLI volume heal $V0 ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 ++ ++TEST $CLI volume profile $V0 info ++truncate_count=$($CLI volume profile $V0 info | grep TRUNCATE | awk '{count += $8} END {print count}') ++ ++EXPECT "1" echo $truncate_count ++EXPECT "1" check_changelog_op ${CHANGELOG_PATH_0} "^ D " ++ ++cleanup; +diff --git a/tests/volume.rc b/tests/volume.rc +index 6a983fd..3af663c 100644 +--- a/tests/volume.rc ++++ b/tests/volume.rc +@@ -874,5 +874,5 @@ function check_changelog_op { + local clog_path=$1 + local op=$2 + +- $PYTHON $(dirname $0)/../../utils/changelogparser.py ${clog_path}/CHANGELOG | grep $op | wc -l ++ $PYTHON $(dirname $0)/../../utils/changelogparser.py ${clog_path}/CHANGELOG | grep "$op" | wc -l + } +diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c +index 2ac6e47..8bdea2a 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-data.c ++++ b/xlators/cluster/afr/src/afr-self-heal-data.c +@@ -399,17 +399,18 @@ __afr_selfheal_truncate_sinks (call_frame_t *frame, xlator_t *this, + { + afr_local_t *local = NULL; + afr_private_t *priv = NULL; +- unsigned char arbiter_sink_status = 0; + int i = 0; + + local = frame->local; + priv = this->private; + +- if (priv->arbiter_count) { +- arbiter_sink_status = healed_sinks[ARBITER_BRICK_INDEX]; +- healed_sinks[ARBITER_BRICK_INDEX] = 0; +- } +- ++ /* This will send truncate on the arbiter brick as well if it is marked ++ * as sink. If changelog is enabled on the volume it captures truncate ++ * as a data transactions on the arbiter brick. This will help geo-rep ++ * to properly sync the data from master to slave if arbiter is the ++ * ACTIVE brick during syncing and which had got some entries healed for ++ * data as part of self heal. ++ */ + AFR_ONLIST (healed_sinks, frame, afr_sh_generic_fop_cbk, ftruncate, fd, + size, NULL); + +@@ -420,8 +421,6 @@ __afr_selfheal_truncate_sinks (call_frame_t *frame, xlator_t *this, + */ + healed_sinks[i] = 0; + +- if (arbiter_sink_status) +- healed_sinks[ARBITER_BRICK_INDEX] = arbiter_sink_status; + return 0; + } + +@@ -733,6 +732,11 @@ __afr_selfheal_data (call_frame_t *frame, xlator_t *this, fd_t *fd, + goto unlock; + } + ++ ret = __afr_selfheal_truncate_sinks (frame, this, fd, healed_sinks, ++ locked_replies[source].poststat.ia_size); ++ if (ret < 0) ++ goto unlock; ++ + if (priv->arbiter_count && + AFR_COUNT (healed_sinks, priv->child_count) == 1 && + healed_sinks[ARBITER_BRICK_INDEX]) { +@@ -740,11 +744,6 @@ __afr_selfheal_data (call_frame_t *frame, xlator_t *this, fd_t *fd, + goto restore_time; + } + +- ret = __afr_selfheal_truncate_sinks (frame, this, fd, healed_sinks, +- locked_replies[source].poststat.ia_size); +- if (ret < 0) +- goto unlock; +- + ret = 0; + + } +-- +1.8.3.1 + diff --git a/SOURCES/0539-cluster-ec-Reopen-shouldn-t-happen-with-O_TRUNC.patch b/SOURCES/0539-cluster-ec-Reopen-shouldn-t-happen-with-O_TRUNC.patch new file mode 100644 index 0000000..ff8170b --- /dev/null +++ b/SOURCES/0539-cluster-ec-Reopen-shouldn-t-happen-with-O_TRUNC.patch @@ -0,0 +1,39 @@ +From abe7f5cd3a5da0f6dc5df0a93c4773cb7781498b Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K +Date: Sun, 5 May 2019 21:17:24 +0530 +Subject: [PATCH 539/540] cluster/ec: Reopen shouldn't happen with O_TRUNC + +Problem: +Doing re-open with O_TRUNC will truncate the fragment even when it is not +needed needing extra heals + +Fix: +At the time of re-open don't use O_TRUNC. + +Upstream-patch: https://review.gluster.org/c/glusterfs/+/22660 +fixes bz#1709174 +Change-Id: Idc6408968efaad897b95a5a52481c66e843d3fb8 +Signed-off-by: Pranith Kumar K +Reviewed-on: https://code.engineering.redhat.com/gerrit/170042 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/cluster/ec/src/ec-common.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c +index a7a8234..363aa0c 100644 +--- a/xlators/cluster/ec/src/ec-common.c ++++ b/xlators/cluster/ec/src/ec-common.c +@@ -122,7 +122,7 @@ ec_fix_open (ec_fop_data_t *fop) + NULL, NULL, &fop->loc[0], fop->fd, NULL); + } else{ + ec_open(fop->frame, fop->xl, need_open, EC_MINIMUM_ONE, +- NULL, NULL, &loc, fop->fd->flags, fop->fd, NULL); ++ NULL, NULL, &loc, fop->fd->flags & (~O_TRUNC), fop->fd, NULL); + } + + out: +-- +1.8.3.1 + diff --git a/SOURCES/0540-dht-NULL-check-before-setting-error-flag.patch b/SOURCES/0540-dht-NULL-check-before-setting-error-flag.patch new file mode 100644 index 0000000..6653352 --- /dev/null +++ b/SOURCES/0540-dht-NULL-check-before-setting-error-flag.patch @@ -0,0 +1,43 @@ +From b0f153893c9fd4328d51e32be4ecf8d75d5968d5 Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC +Date: Fri, 17 May 2019 12:49:59 +0530 +Subject: [PATCH 540/540] dht: NULL check before setting error flag + +Function dht_common_mark_mdsxattr blindly setting value for +an integer pointer without validating it. In fact there are +two callers of this function that passes NULL value to the +same pointer which leads to a crash. + +Backport of : https://review.gluster.org/#/c/22345/ + +>Change-Id: Id94ffe216f6a21f007b3291bff0b1e1c1989075c +>fixes: bz#1687811 +>Signed-off-by: Mohammed Rafi KC + +Change-Id: I3efb227619bef5f69570763848e09b2784371140 +BUG: 1711159 +Signed-off-by: Mohammed Rafi KC +Reviewed-on: https://code.engineering.redhat.com/gerrit/170622 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/cluster/dht/src/dht-common.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c +index 12a17e6..e998417 100644 +--- a/xlators/cluster/dht/src/dht-common.c ++++ b/xlators/cluster/dht/src/dht-common.c +@@ -890,7 +890,8 @@ dht_common_mark_mdsxattr (call_frame_t *frame, int *errst, int mark_during_fresh + "Failed to get hashed subvol for path %s" + "gfid is %s ", + local->loc.path, gfid_local); +- (*errst) = 1; ++ if (errst) ++ (*errst) = 1; + ret = -1; + goto out; + } +-- +1.8.3.1 + diff --git a/SPECS/glusterfs.spec b/SPECS/glusterfs.spec index fc37231..97a16ff 100644 --- a/SPECS/glusterfs.spec +++ b/SPECS/glusterfs.spec @@ -192,7 +192,7 @@ Release: 0.1%{?prereltag:.%{prereltag}}%{?dist} %else Name: glusterfs Version: 3.12.2 -Release: 18%{?dist} +Release: 47.2%{?dist} %endif License: GPLv2 or LGPLv3+ Group: System Environment/Base @@ -627,6 +627,184 @@ Patch0359: 0359-storage-posix-Increment-trusted.pgfid-in-posix_mknod.patch Patch0360: 0360-geo-rep-Make-automatic-gfid-conflict-resolution-opti.patch Patch0361: 0361-ctr-skip-ctr-xlator-init-if-ctr-is-not-enabled.patch Patch0362: 0362-glusterd-glusterd_brick_start-shouldn-t-cleanup-pidf.patch +Patch0363: 0363-Update-rfc.sh-to-rhgs-3.4.1.patch +Patch0364: 0364-fips-Replace-md5sum-usage-to-enable-fips-support.patch +Patch0365: 0365-glusterd-ignore-importing-volume-which-is-undergoing.patch +Patch0366: 0366-glusterd-fail-volume-stop-operation-if-brick-detach-.patch +Patch0367: 0367-cluster-ec-Improve-logging-for-some-critical-error-m.patch +Patch0368: 0368-mount-fuse-convert-ENOENT-to-ESTALE-in-open-dir-_res.patch +Patch0369: 0369-geo-rep-Fix-deadlock-during-worker-start.patch +Patch0370: 0370-libgfchangelog-Fix-changelog-history-API.patch +Patch0371: 0371-performance-write-behind-remove-the-request-from-wip.patch +Patch0372: 0372-Revert-posix-disable-block-and-character-files.patch +Patch0373: 0373-posix-disable-open-read-write-on-special-files.patch +Patch0374: 0374-socket-set-42-as-default-tpc-user-timeout.patch +Patch0375: 0375-extras-Add-new-options-to-group-virt.patch +Patch0376: 0376-rchecksum-fips-Replace-MD5-usage-to-enable-fips-supp.patch +Patch0377: 0377-fips-geo-rep-Replace-MD5-with-SHA256.patch +Patch0378: 0378-posix-afr-handle-backward-compatibility-for-rchecksu.patch +Patch0379: 0379-glusterd-change-op-version-of-fips-mode-rchecksum.patch +Patch0380: 0380-cluster-afr-Batch-writes-in-same-lock-even-when-mult.patch +Patch0381: 0381-cluster-afr-Make-data-eager-lock-decision-based-on-n.patch +Patch0382: 0382-mount-fuse-make-fuse-dumping-available-as-mount-opti.patch +Patch0383: 0383-glusterd-bump-up-GD_OP_VERSION_MAX.patch +Patch0384: 0384-features-uss-Use-xxh64-to-generate-gfid-instead-of-m.patch +Patch0385: 0385-afr-fix-incorrect-reporting-of-directory-split-brain.patch +Patch0386: 0386-glusterd-make-sure-that-brickinfo-uuid-is-not-null.patch +Patch0387: 0387-georep-Fix-config-set-of-monitor-status.patch +Patch0388: 0388-glusterd-handshake-prevent-a-buffer-overflow.patch +Patch0389: 0389-server-don-t-allow-in-basename.patch +Patch0390: 0390-core-glusterfsd-keeping-fd-open-in-index-xlator.patch +Patch0391: 0391-glusterd-Use-GF_ATOMIC-to-update-blockers-counter-at.patch +Patch0392: 0392-glusterd-don-t-wait-for-blockers-flag-for-stop-volum.patch +Patch0393: 0393-core-Pass-xlator_name-in-server_call_xlator_mem_clea.patch +Patch0394: 0394-io-stats-prevent-taking-file-dump-on-server-side.patch +Patch0395: 0395-index-prevent-arbitrary-file-creation-outside-entry-.patch +Patch0396: 0396-protocol-remove-the-option-verify-volfile-checksum.patch +Patch0397: 0397-features-locks-add-buffer-overflow-checks-in-pl_getx.patch +Patch0398: 0398-lock-Do-not-allow-meta-lock-count-to-be-more-than-on.patch +Patch0399: 0399-all-fix-the-format-string-exceptions.patch +Patch0400: 0400-all-fix-the-format-warnings-due-to-strict-check.patch +Patch0401: 0401-client_t.c-fix-the-format-error.patch +Patch0402: 0402-core-glusterfsd-keeping-fd-open-in-index-xlator.patch +Patch0403: 0403-afr-prevent-winding-inodelks-twice-for-arbiter-volum.patch +Patch0404: 0404-core-Resolve-some-warnings-to-release-a-build.patch +Patch0405: 0405-glusterfsd-add-missing-UNLOCK.patch +Patch0406: 0406-glusterd-improve-logging-for-stage_deleted-flag.patch +Patch0407: 0407-spec-update-RHGS-version-for-RHGSWA.patch +# Patch0408: 0408-Update-rfc.sh-to-rhgs-3.4.2.patch +Patch0409: 0409-Update-database-profile-group.patch +Patch0410: 0410-cli-fix-glusterd-memory-leak-cause-by-gluster-v-stat.patch +Patch0411: 0411-glusterd-ensure-volinfo-caps-is-set-to-correct-value.patch +Patch0412: 0412-glusterd-set-fsid-while-performing-replace-brick.patch +Patch0413: 0413-glusterfind-add-logs-to-identify-parsing-phases.patch +Patch0414: 0414-logrotate-utilize-the-new-maxsize-option.patch +Patch0415: 0415-statedump-fix-clang-null-dereference-error.patch +Patch0416: 0416-glusterd-ignore-RPC-events-when-glusterd-is-shutting.patch +Patch0417: 0417-cli-Add-warning-message-while-converting-to-replica-.patch +Patch0418: 0418-cli-correct-rebalance-status-elapsed-check.patch +Patch0419: 0419-glusterfs-During-reconfigure-set-log-level-per-xlato.patch +Patch0420: 0420-Modify-log-message-DH-ciphers-are-disabled-from-ERRO.patch +Patch0421: 0421-rpc-handle-EAGAIN-when-SSL_ERROR_SYSCALL-is-returned.patch +Patch0422: 0422-glusterd-raise-default-transport.listen-backlog.patch +Patch0423: 0423-glusterd-acquire-lock-to-update-volinfo-structure.patch +Patch0424: 0424-cluster-afr-Delegate-metadata-heal-with-pending-xatt.patch +Patch0425: 0425-cluster-afr-Delegate-name-heal-when-possible.patch +Patch0426: 0426-features-shard-Make-operations-on-internal-directori.patch +Patch0427: 0427-features-shard-Add-option-to-barrier-parallel-lookup.patch +Patch0428: 0428-libglusterfs-syncop-Handle-barrier_-init-destroy-in-.patch +Patch0429: 0429-features-shard-Introducing-.shard-.remove_me-for-ato.patch +Patch0430: 0430-features-shard-Perform-shards-deletion-in-the-backgr.patch +Patch0431: 0431-glusterd-Reset-op-version-for-features.shard-deletio.patch +Patch0432: 0432-features-shard-Fix-crash-and-test-case-in-RENAME-fop.patch +Patch0433: 0433-mgmt-glusterd-use-proper-path-to-the-volfile.patch +Patch0434: 0434-cluster-afr-s-uuid_is_null-gf_uuid_is_null.patch +Patch0435: 0435-geo-rep-Fix-traceback-with-symlink-metadata-sync.patch +Patch0436: 0436-geo-rep-Fix-issue-in-gfid-conflict-resolution.patch +Patch0437: 0437-geo-rep-Add-more-intelligence-to-automatic-error-han.patch +Patch0438: 0438-cluster-dht-In-rename-unlink-after-creating-linkto-f.patch +Patch0439: 0439-cluster-dht-fixes-to-unlinking-invalid-linkto-file.patch +Patch0440: 0440-features-locks-Use-pthread_mutex_unlock-instead-of-p.patch +Patch0441: 0441-features-shard-Make-lru-limit-of-inode-list-configur.patch +Patch0442: 0442-glusterd-Reset-op-version-for-features.shard-lru-lim.patch +Patch0443: 0443-features-shard-Hold-a-ref-on-base-inode-when-adding-.patch +Patch0444: 0444-features-shard-fix-formatting-warning.patch +Patch0445: 0445-glusterd-don-t-call-svcs_reconfigure-for-all-volumes.patch +Patch0446: 0446-core-Portmap-entries-showing-stale-brick-entries-whe.patch +Patch0447: 0447-cluster-ec-Don-t-update-trusted.ec.version-if-fop-su.patch +Patch0448: 0448-core-Resolve-memory-leak-at-the-time-of-graph-init.patch +Patch0449: 0449-glusterd-mux-Optimize-brick-disconnect-handler-code.patch +Patch0450: 0450-glusterd-fix-Resource-leak-coverity-issue.patch +Patch0451: 0451-core-Resolve-memory-leak-at-the-time-of-graph-init.patch +Patch0452: 0452-glusterd-make-max-bricks-per-process-default-value-t.patch +Patch0453: 0453-server-Resolve-memory-leak-path-in-server_init.patch +Patch0454: 0454-glusterd-set-cluster.max-bricks-per-process-to-250.patch +Patch0455: 0455-glusterd-fix-get_mux_limit_per_process-to-read-defau.patch +# Patch0456: 0456-Update-rfc.sh-to-rhgs-3.4.3.patch +Patch0457: 0457-cluster-dht-sync-brick-root-perms-on-add-brick.patch +Patch0458: 0458-glusterd-fix-crash.patch +Patch0459: 0459-glfsheal-add-a-nolog-flag.patch +Patch0460: 0460-cli-add-a-warning-confirmation-message-in-peer-detac.patch +Patch0461: 0461-mount-fuse-Add-support-for-multi-threaded-fuse-reade.patch +Patch0462: 0462-posix-Do-not-log-ENXIO-errors-for-seek-fop.patch +Patch0463: 0463-build-glusterfs.spec-.in-firewalld-file-doesn-t-use-.patch +Patch0464: 0464-build-exclude-packaging-crypt.so.patch +Patch0465: 0465-build-add-missing-explicit-package-dependencies.patch +Patch0466: 0466-extras-Add-group-distributed-virt-for-single-brick-o.patch +Patch0467: 0467-glusterd-glusterd-to-regenerate-volfiles-when-GD_OP_.patch +Patch0468: 0468-core-move-invalid-port-logs-to-DEBUG-log-level.patch +Patch0469: 0469-nfs-set-ctx-for-every-inode-looked-up-nfs3_fh_resolv.patch +Patch0470: 0470-dht-fix-use-after-free-in-dht_rmdir_readdirp_cbk.patch +Patch0471: 0471-glusterd-migrating-profile-commands-to-mgmt_v3-frame.patch +Patch0472: 0472-glusterd-introduce-a-new-op-version-for-rhgs-3.4.3.patch +Patch0473: 0473-rpc-bump-up-server.event-threads.patch +Patch0474: 0474-afr-open_ftruncate_cbk-should-read-fd-from-local-con.patch +Patch0475: 0475-glusterd-perform-store-operation-in-cleanup-lock.patch +Patch0476: 0476-afr-add-checks-for-allowing-lookups.patch +Patch0477: 0477-glusterd-perform-rcu_read_lock-unlock-under-cleanup_.patch +Patch0478: 0478-libglusterfs-fix-memory-corruption-caused-by-per-thr.patch +Patch0479: 0479-ganesha-ha-ensure-pacemaker-is-enabled-after-setup.patch +Patch0480: 0480-geo-rep-Make-slave-volume-read-only-by-default.patch +Patch0481: 0481-extras-hooks-Do-not-blindly-remove-volume-share-from.patch +Patch0482: 0482-extras-hooks-General-improvements-to-S30samba-start..patch +Patch0483: 0483-Do-not-blindly-add-volume-share-section-to-smb.conf.patch +Patch0484: 0484-extras-New-group-volume-set-command-for-Samba-integr.patch +Patch0485: 0485-cluster-ec-Prevent-volume-create-without-redundant-b.patch +Patch0486: 0486-performance-rda-Fixed-dict_t-memory-leak.patch +Patch0487: 0487-mem-pool-add-tracking-of-mem_pool-that-requested-the.patch +Patch0488: 0488-cluster-afr-Allow-lookup-on-root-if-it-is-from-ADD_R.patch +Patch0489: 0489-cluster-afr-Do-not-update-read_subvol-in-inode_ctx-a.patch +Patch0490: 0490-glusterd-migrating-rebalance-commands-to-mgmt_v3-fra.patch +Patch0491: 0491-glusterd-tag-rebalance-mgmt_v3-command-to-op-version.patch +Patch0492: 0492-mem-pool-track-glusterfs_ctx_t-in-struct-mem_pool.patch +Patch0493: 0493-mem-pool-count-allocations-done-per-user-pool.patch +Patch0494: 0494-mem-pool-Resolve-crash-in-mem_pool_destroy.patch +Patch0495: 0495-build-add-conditional-dependency-on-server-for-devel.patch +Patch0496: 0496-glusterd-kill-the-process-without-releasing-the-clea.patch +Patch0497: 0497-cluster-dht-Use-percentages-for-space-check.patch +Patch0498: 0498-mem-pool-Code-refactor-in-mem_pool.c.patch +Patch0499: 0499-cluster-dht-Fix-incorrect-backport.patch +Patch0500: 0500-extras-Add-readdir-ahead-to-samba-group-command.patch +Patch0501: 0501-glusterd-aggregate-rsp-from-peers-for-profile-comman.patch +Patch0502: 0502-posix-posix_health_check_thread_proc-crash-due-to-pr.patch +Patch0503: 0503-core-brick-process-is-crashed-at-the-time-of-spawn-t.patch +Patch0504: 0504-dht-Add-NULL-check-for-stbuf-in-dht_rmdir_lookup_cbk.patch +Patch0505: 0505-features-shard-Fix-launch-of-multiple-synctasks-for-.patch +Patch0506: 0506-features-shard-Assign-fop-id-during-background-delet.patch +Patch0507: 0507-geo-rep-fix-rename-sync-on-hybrid-crawl.patch +Patch0508: 0508-glusterd-Resolve-multiple-leaks-in-glusterd-code-pat.patch +Patch0509: 0509-core-heketi-cli-is-throwing-error-target-is-busy.patch +Patch0510: 0510-features-shard-Ref-shard-inode-while-adding-to-fsync.patch +Patch0511: 0511-Update-rfc.sh-to-rhgs-3.4.4.patch +Patch0512: 0512-Test-fixes-for-rhgs-3.4-downstream.patch +Patch0513: 0513-core-heketi-cli-is-throwing-error-target-is-busy.patch +Patch0514: 0514-glusterd-display-gluster-volume-status-when-quorum-t.patch +Patch0515: 0515-cli-change-the-warning-message.patch +Patch0516: 0516-geo-rep-Fix-permissions-with-non-root-setup.patch +Patch0517: 0517-geo-rep-validate-the-config-checkpoint-date-format.patch +Patch0518: 0518-logging-create-parent-dir-if-not-available.patch +Patch0519: 0519-cluster-dht-Delete-invalid-linkto-files-in-rmdir.patch +Patch0520: 0520-spec-avoid-creation-of-temp-file-in-lua-script.patch +Patch0521: 0521-rpc-use-address-family-option-from-vol-file.patch +Patch0522: 0522-fuse-add-lru-limit-option.patch +Patch0523: 0523-libglusterfs-rename-macros-roof-and-floor-to-not-con.patch +Patch0524: 0524-program-GF-DUMP-Shield-ping-processing-from-traffic-.patch +Patch0525: 0525-rpcsvc-provide-each-request-handler-thread-its-own-q.patch +Patch0526: 0526-fuse-interrupt-handling-framework.patch +Patch0527: 0527-fuse-diagnostic-FLUSH-interrupt.patch +Patch0528: 0528-locks-handle-clear-locks-xattr-in-fgetxattr-too.patch +Patch0529: 0529-fuse-SETLKW-interrupt.patch +Patch0530: 0530-spec-fix-lua-script-execution-during-install.patch +Patch0531: 0531-fuse-remove-the-duplicate-FUSE_FOP-calls.patch +Patch0532: 0532-dht-fix-double-extra-unref-of-inode-at-heal-path.patch +Patch0533: 0533-glusterd-glusterd-memory-leak-while-running-gluster-.patch +Patch0534: 0534-fuse-make-sure-the-send-lookup-on-root-instead-of-ge.patch +Patch0535: 0535-posix-Deletion-of-block-hosting-volume-throwing-erro.patch +Patch0536: 0536-cluster-dht-Fix-lookup-selfheal-and-rmdir-race.patch +Patch0537: 0537-cluster-dht-Do-not-use-gfid-req-in-fresh-lookup.patch +Patch0538: 0538-cluster-afr-Send-truncate-on-arbiter-brick-from-SHD.patch +Patch0539: 0539-cluster-ec-Reopen-shouldn-t-happen-with-O_TRUNC.patch +Patch0540: 0540-dht-NULL-check-before-setting-error-flag.patch %description GlusterFS is a distributed file-system capable of scaling to several @@ -646,6 +824,7 @@ Summary: GlusterFS api library Group: System Environment/Daemons Requires: %{name}%{?_isa} = %{version}-%{release} Requires: %{name}-client-xlators%{?_isa} = %{version}-%{release} +Requires: %{name}-libs%{?_isa} = %{version}-%{release} %description api GlusterFS is a distributed file-system capable of scaling to several @@ -664,6 +843,7 @@ Group: Development/Libraries Requires: %{name}%{?_isa} = %{version}-%{release} Requires: %{name}-devel%{?_isa} = %{version}-%{release} Requires: libacl-devel +Requires: %{name}-api%{?_isa} = %{version}-%{release} %description api-devel GlusterFS is a distributed file-system capable of scaling to several @@ -700,6 +880,10 @@ Requires: %{name}%{?_isa} = %{version}-%{release} %if ( 0%{!?_without_extra_xlators:1} ) Requires: %{name}-extra-xlators = %{version}-%{release} %endif +Requires: %{name}-libs%{?_isa} = %{version}-%{release} +%if ( 0%{?_build_server} ) +Requires: %{name}-server%{?_isa} = %{version}-%{release} +%endif %description devel GlusterFS is a distributed file-system capable of scaling to several @@ -749,6 +933,7 @@ Requires: %{name}-client-xlators%{?_isa} = %{version}-%{release} Obsoletes: %{name}-client < %{version}-%{release} Provides: %{name}-client = %{version}-%{release} +Requires: %{name}-libs%{?_isa} = %{version}-%{release} %description fuse GlusterFS is a distributed file-system capable of scaling to several @@ -818,6 +1003,7 @@ BuildRequires: python-ctypes Requires: python2-gluster = %{version}-%{release} Requires: rsync Requires: util-linux +Requires: %{name}-libs%{?_isa} = %{version}-%{release} %description geo-replication GlusterFS is a distributed file-system capable of scaling to several @@ -890,6 +1076,7 @@ BuildRequires: libibverbs-devel BuildRequires: librdmacm-devel >= 1.0.15 %endif Requires: %{name}%{?_isa} = %{version}-%{release} +Requires: %{name}-libs%{?_isa} = %{version}-%{release} %description rdma GlusterFS is a distributed file-system capable of scaling to several @@ -1020,6 +1207,7 @@ This package provides the glusterfs server daemon. %package client-xlators Summary: GlusterFS client-side translators Group: Applications/File +Requires: %{name}-libs%{?_isa} = %{version}-%{release} %description client-xlators GlusterFS is a distributed file-system capable of scaling to several @@ -1098,16 +1286,39 @@ do for doc in ${EXCLUDE_DOCS}; do EXCLUDE_DOCS_OPT="--exclude=$doc $EXCLUDE_DOCS_OPT" done - # apply the patch with 'git apply' - git apply -p1 --exclude=rfc.sh \ - --exclude=.gitignore \ - --exclude=.testignore \ - --exclude=MAINTAINERS \ - --exclude=extras/checkpatch.pl \ - --exclude=build-aux/checkpatch.pl \ - --exclude='tests/*' \ - ${EXCLUDE_DOCS_OPT} \ - $p + + # HACK to fix build + bn=$(basename $p) + if [ "$bn" == "0085-Revert-all-remove-code-which-is-not-being-considered.patch" ]; then + (patch -p1 -u -F3 < $p || :) + if [ -f libglusterfs/Makefile.am.rej ]; then + sed -i -e 's/^SUBDIRS = src/SUBDIRS = src src\/gfdb/g;s/^CLEANFILES = /CLEANFILES =/g' libglusterfs/Makefile.am + fi + elif [ "$bn" == "0053-Revert-packaging-ganesha-remove-glusterfs-ganesha-su.patch" ]; then + (patch -p1 < $p || :) + elif [ "$bn" == "0055-Revert-storhaug-HA-first-step-remove-resource-agents.patch" ]; then + (patch -p1 < $p || :) + elif [ "$bn" == "0090-Revert-build-conditionally-build-legacy-gNFS-server-.patch" ]; then + (patch -p1 < $p || :) + elif [ "$bn" == "0117-spec-Remove-thin-arbiter-package.patch" ]; then + (patch -p1 < $p || :) + elif [ "$bn" == "0023-hooks-remove-selinux-hooks.patch" ]; then + (patch -p1 < $p || :) + elif [ "$bn" == "0042-spec-client-server-Builds-are-failing-on-rhel-6.patch" ]; then + (patch -p1 < $p || :) + else + # apply the patch with 'git apply' + git apply -p1 --exclude=rfc.sh \ + --exclude=.gitignore \ + --exclude=.testignore \ + --exclude=MAINTAINERS \ + --exclude=extras/checkpatch.pl \ + --exclude=build-aux/checkpatch.pl \ + --exclude='tests/*' \ + ${EXCLUDE_DOCS_OPT} \ + $p + fi + done @@ -1280,7 +1491,7 @@ install -p -m 0744 -D extras/command-completion/gluster.bash \ %{buildroot}%{_sysconfdir}/bash_completion.d/gluster %if ( 0%{?_build_server} ) -echo "RHGS 3.4.0" > %{buildroot}%{_datadir}/glusterfs/release +echo "RHGS 3.4" > %{buildroot}%{_datadir}/glusterfs/release %endif %clean @@ -1582,7 +1793,7 @@ exit 0 %exclude %{_tmpfilesdir}/gluster.conf %endif %if ( 0%{?_with_firewalld:1} ) -%exclude /usr/lib/firewalld/services/glusterfs.xml +%exclude %{_prefix}/lib/firewalld/services/glusterfs.xml %endif %endif %doc ChangeLog COPYING-GPLV2 COPYING-LGPLV3 INSTALL README.md THANKS @@ -1612,8 +1823,8 @@ exit 0 %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/debug/trace.so %if ( ! ( 0%{?rhel} && 0%{?rhel} < 6 ) ) # RHEL-5 based distributions have a too old openssl -%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/encryption - %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/encryption/crypt.so +%exclude %dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/encryption +%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/encryption/crypt.so %endif %dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/access-control.so @@ -1928,7 +2139,9 @@ exit 0 %attr(0644,-,-) %{_sharedstatedir}/glusterd/groups/metadata-cache %attr(0644,-,-) %{_sharedstatedir}/glusterd/groups/gluster-block %attr(0644,-,-) %{_sharedstatedir}/glusterd/groups/db-workload + %attr(0644,-,-) %{_sharedstatedir}/glusterd/groups/distributed-virt %attr(0644,-,-) %{_sharedstatedir}/glusterd/groups/nl-cache + %attr(0644,-,-) %{_sharedstatedir}/glusterd/groups/samba %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/glusterfind %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/glusterfind/.keys %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/glustershd @@ -2042,15 +2255,7 @@ if [ $? -eq 0 ]; then fi ]] --- Since we run pretrans scripts only for RPMs built for a server build, --- we can now use os.tmpname() since it is available on RHEL6 and later --- platforms which are server platforms. -tmpname = os.tmpname() -tmpfile = io.open(tmpname, "w") -tmpfile:write(script) -tmpfile:close() -ok, how, val = os.execute("/bin/bash " .. tmpname) -os.remove(tmpname) +ok, how, val = os.execute(script) if not (ok == 0) then error("Detected running glusterfs processes", ok) end @@ -2084,15 +2289,7 @@ if [ $? -eq 0 ]; then fi ]] --- Since we run pretrans scripts only for RPMs built for a server build, --- we can now use os.tmpname() since it is available on RHEL6 and later --- platforms which are server platforms. -tmpname = os.tmpname() -tmpfile = io.open(tmpname, "w") -tmpfile:write(script) -tmpfile:close() -ok, how, val = os.execute("/bin/bash " .. tmpname) -os.remove(tmpname) +ok, how, val = os.execute(script) if not (ok == 0) then error("Detected running glusterfs processes", ok) end @@ -2126,15 +2323,7 @@ if [ $? -eq 0 ]; then fi ]] --- Since we run pretrans scripts only for RPMs built for a server build, --- we can now use os.tmpname() since it is available on RHEL6 and later --- platforms which are server platforms. -tmpname = os.tmpname() -tmpfile = io.open(tmpname, "w") -tmpfile:write(script) -tmpfile:close() -ok, how, val = os.execute("/bin/bash " .. tmpname) -os.remove(tmpname) +ok, how, val = os.execute(script) if not (ok == 0) then error("Detected running glusterfs processes", ok) end @@ -2168,15 +2357,7 @@ if [ $? -eq 0 ]; then fi ]] --- Since we run pretrans scripts only for RPMs built for a server build, --- we can now use os.tmpname() since it is available on RHEL6 and later --- platforms which are server platforms. -tmpname = os.tmpname() -tmpfile = io.open(tmpname, "w") -tmpfile:write(script) -tmpfile:close() -ok, how, val = os.execute("/bin/bash " .. tmpname) -os.remove(tmpname) +ok, how, val = os.execute(script) if not (ok == 0) then error("Detected running glusterfs processes", ok) end @@ -2210,15 +2391,7 @@ if [ $? -eq 0 ]; then fi ]] --- Since we run pretrans scripts only for RPMs built for a server build, --- we can now use os.tmpname() since it is available on RHEL6 and later --- platforms which are server platforms. -tmpname = os.tmpname() -tmpfile = io.open(tmpname, "w") -tmpfile:write(script) -tmpfile:close() -ok, how, val = os.execute("/bin/bash " .. tmpname) -os.remove(tmpname) +ok, how, val = os.execute(script) if not (ok == 0) then error("Detected running glusterfs processes", ok) end @@ -2252,15 +2425,7 @@ if [ $? -eq 0 ]; then fi ]] --- Since we run pretrans scripts only for RPMs built for a server build, --- we can now use os.tmpname() since it is available on RHEL6 and later --- platforms which are server platforms. -tmpname = os.tmpname() -tmpfile = io.open(tmpname, "w") -tmpfile:write(script) -tmpfile:close() -ok, how, val = os.execute("/bin/bash " .. tmpname) -os.remove(tmpname) +ok, how, val = os.execute(script) if not (ok == 0) then error("Detected running glusterfs processes", ok) end @@ -2294,15 +2459,7 @@ if [ $? -eq 0 ]; then fi ]] --- Since we run pretrans scripts only for RPMs built for a server build, --- we can now use os.tmpname() since it is available on RHEL6 and later --- platforms which are server platforms. -tmpname = os.tmpname() -tmpfile = io.open(tmpname, "w") -tmpfile:write(script) -tmpfile:close() -ok, how, val = os.execute("/bin/bash " .. tmpname) -os.remove(tmpname) +ok, how, val = os.execute(script) if not (ok == 0) then error("Detected running glusterfs processes", ok) end @@ -2337,15 +2494,7 @@ if [ $? -eq 0 ]; then fi ]] --- Since we run pretrans scripts only for RPMs built for a server build, --- we can now use os.tmpname() since it is available on RHEL6 and later --- platforms which are server platforms. -tmpname = os.tmpname() -tmpfile = io.open(tmpname, "w") -tmpfile:write(script) -tmpfile:close() -ok, how, val = os.execute("/bin/bash " .. tmpname) -os.remove(tmpname) +ok, how, val = os.execute(script) if not (ok == 0) then error("Detected running glusterfs processes", ok) end @@ -2380,15 +2529,7 @@ if [ $? -eq 0 ]; then fi ]] --- Since we run pretrans scripts only for RPMs built for a server build, --- we can now use os.tmpname() since it is available on RHEL6 and later --- platforms which are server platforms. -tmpname = os.tmpname() -tmpfile = io.open(tmpname, "w") -tmpfile:write(script) -tmpfile:close() -ok, how, val = os.execute("/bin/bash " .. tmpname) -os.remove(tmpname) +ok, how, val = os.execute(script) if not (ok == 0) then error("Detected running glusterfs processes", ok) end @@ -2423,15 +2564,7 @@ if [ $? -eq 0 ]; then fi ]] --- Since we run pretrans scripts only for RPMs built for a server build, --- we can now use os.tmpname() since it is available on RHEL6 and later --- platforms which are server platforms. -tmpname = os.tmpname() -tmpfile = io.open(tmpname, "w") -tmpfile:write(script) -tmpfile:close() -ok, how, val = os.execute("/bin/bash " .. tmpname) -os.remove(tmpname) +ok, how, val = os.execute(script) if not (ok == 0) then error("Detected running glusterfs processes", ok) end @@ -2467,15 +2600,7 @@ if [ $? -eq 0 ]; then fi ]] --- Since we run pretrans scripts only for RPMs built for a server build, --- we can now use os.tmpname() since it is available on RHEL6 and later --- platforms which are server platforms. -tmpname = os.tmpname() -tmpfile = io.open(tmpname, "w") -tmpfile:write(script) -tmpfile:close() -ok, how, val = os.execute("/bin/bash " .. tmpname) -os.remove(tmpname) +ok, how, val = os.execute(script) if not (ok == 0) then error("Detected running glusterfs processes", ok) end @@ -2510,15 +2635,7 @@ if [ $? -eq 0 ]; then fi ]] --- Since we run pretrans scripts only for RPMs built for a server build, --- we can now use os.tmpname() since it is available on RHEL6 and later --- platforms which are server platforms. -tmpname = os.tmpname() -tmpfile = io.open(tmpname, "w") -tmpfile:write(script) -tmpfile:close() -ok, how, val = os.execute("/bin/bash " .. tmpname) -os.remove(tmpname) +ok, how, val = os.execute(script) if not (ok == 0) then error("Detected running glusterfs processes", ok) end @@ -2575,8 +2692,104 @@ fi %endif %changelog -* Tue Oct 30 2018 CentOS Sources - 3.12.2-18.el7.centos -- remove vendor and/or packager lines +* Fri May 17 2019 Rinku Kothiya - 3.12.2-47.2 +- fixes bugs bz#1711159 + +* Tue May 14 2019 Rinku Kothiya - 3.12.2-47.1 +- fixes bugs bz#1709174 + +* Wed Mar 13 2019 Milind Changire - 3.12.2-47 +- fixes bugs bz#1458215 bz#1661258 bz#1669020 bz#1683893 + +* Thu Mar 07 2019 Milind Changire - 3.12.2-46 +- fixes bugs bz#1668327 bz#1684648 + +* Thu Feb 21 2019 Milind Changire - 3.12.2-45 +- fixes bugs bz#1678232 + +* Thu Feb 14 2019 Milind Changire - 3.12.2-43 +- fixes bugs bz#1676904 + +* Tue Feb 12 2019 Milind Changire - 3.12.2-42 +- fixes bugs bz#1410145 + +* Tue Feb 12 2019 Milind Changire - 3.12.2-41 +- fixes bugs bz#1390151 bz#1410145 bz#1429190 bz#1510752 bz#1511779 + bz#1570958 bz#1574490 bz#1595246 bz#1618669 bz#1661393 bz#1668989 bz#1669020 + +* Fri Jan 25 2019 Milind Changire - 3.12.2-40 +- fixes bugs bz#1668304 bz#1669020 + +* Mon Jan 21 2019 Milind Changire - 3.12.2-39 +- fixes bugs bz#1667169 + +* Fri Jan 18 2019 Milind Changire - 3.12.2-38 +- fixes bugs bz#1664235 + +* Mon Jan 14 2019 Milind Changire - 3.12.2-37 +- fixes bugs bz#1662059 bz#1662828 bz#1664529 + +* Fri Jan 04 2019 Milind Changire - 3.12.2-36 +- fixes bugs bz#1290124 bz#1655385 bz#1663232 + +* Wed Jan 02 2019 Milind Changire - 3.12.2-35 +- fixes bugs bz#1654161 + +* Wed Dec 19 2018 Milind Changire - 3.12.2-34 +- fixes bugs bz#1648893 bz#1656357 + +* Tue Dec 18 2018 Milind Changire - 3.12.2-33 +- fixes bugs bz#1350745 bz#1362129 bz#1541568 bz#1597252 bz#1599220 + bz#1633177 bz#1637564 bz#1639476 bz#1639568 bz#1643370 bz#1645480 bz#1648296 + bz#1648893 bz#1651040 bz#1651460 bz#1652466 bz#1652537 bz#1653224 bz#1653613 + bz#1654103 bz#1654161 bz#1655385 bz#1655578 bz#1656357 bz#1659439 + +* Fri Dec 07 2018 Milind Changire - 3.12.2-32 +- fixes bugs bz#1656924 + +* Wed Dec 05 2018 Milind Changire - 3.12.2-31 +- fixes bugs bz#1653073 + +* Tue Dec 04 2018 Milind Changire - 3.12.2-30 +- fixes bugs bz#1650138 bz#1653073 + +* Thu Nov 22 2018 Milind Changire - 3.12.2-29 +- fixes bugs bz#1650138 + +* Tue Nov 20 2018 Milind Changire - 3.12.2-28 +- fixes bugs bz#1626350 bz#1648210 bz#1649651 bz#1650138 + +* Fri Nov 09 2018 Milind Changire - 3.12.2-27 +- respin + +* Fri Nov 09 2018 Milind Changire - 3.12.2-26 +- fixes bugs bz#1479446 bz#1520882 bz#1579758 bz#1598407 bz#1599808 + bz#1603118 bz#1619357 bz#1622001 bz#1622308 bz#1631166 bz#1631418 bz#1632563 + bz#1634649 bz#1635071 bz#1635100 bz#1635136 bz#1636291 bz#1638069 bz#1640347 + bz#1642854 bz#1643035 bz#1644120 bz#1644279 bz#1645916 bz#1647675 + +* Thu Oct 25 2018 Milind Changire - 3.12.2-25 +- fixes bugs bz#1641586 + +* Tue Oct 23 2018 Milind Changire - 3.12.2-24 +- fixes bugs bz#1618221 bz#1641489 + +* Tue Oct 16 2018 Sunil Kumar Acharya - 3.12.2-23 +- fixes bugs bz#1631372 bz#1636902 + +* Tue Oct 09 2018 Milind Changire - 3.12.2-22 +- fixes bugs bz#1631329 bz#1631372 + +* Wed Oct 03 2018 Milind Changire - 3.12.2-21 +- fixes bugs bz#1623749 bz#1630997 + +* Sat Sep 22 2018 Sunil Kumar Acharya - 3.12.2-20 +- Build respin with appropriate target version. + +* Sat Sep 22 2018 Sunil Kumar Acharya - 3.12.2-19 +- fixes bugs bz#1459709 bz#1610743 bz#1618221 bz#1619627 bz#1622649 + bz#1623749 bz#1623874 bz#1624444 bz#1625622 bz#1626780 bz#1627098 bz#1627617 + bz#1627639 bz#1630688 * Mon Aug 27 2018 Milind Changire - 3.12.2-18 - fixes bugs bz#1524336 bz#1622029 bz#1622452