diff --git a/README.debrand b/README.debrand deleted file mode 100644 index 01c46d2..0000000 --- a/README.debrand +++ /dev/null @@ -1,2 +0,0 @@ -Warning: This package was configured for automatic debranding, but the changes -failed to apply. diff --git a/SOURCES/0384-Update-rfc.sh-to-rhgs-3.5.3.patch b/SOURCES/0384-Update-rfc.sh-to-rhgs-3.5.3.patch new file mode 100644 index 0000000..4db2222 --- /dev/null +++ b/SOURCES/0384-Update-rfc.sh-to-rhgs-3.5.3.patch @@ -0,0 +1,26 @@ +From 27dc773af276e33fcca10788fae17d131c8d9bce Mon Sep 17 00:00:00 2001 +From: Rinku Kothiya <rkothiya@redhat.com> +Date: Sun, 31 May 2020 15:46:24 -0400 +Subject: [PATCH 384/449] Update rfc.sh to rhgs-3.5.3 + +Signed-off-by: Rinku Kothiya <rkothiya@redhat.com> +--- + rfc.sh | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/rfc.sh b/rfc.sh +index 37d551f..1dca29f 100755 +--- a/rfc.sh ++++ b/rfc.sh +@@ -18,7 +18,7 @@ done + shift $((OPTIND-1)) + + +-branch="rhgs-3.5.2"; ++branch="rhgs-3.5.3"; + + set_hooks_commit_msg() + { +-- +1.8.3.1 + diff --git a/SOURCES/0385-glusterd-start-glusterd-automatically-on-abnormal-sh.patch b/SOURCES/0385-glusterd-start-glusterd-automatically-on-abnormal-sh.patch new file mode 100644 index 0000000..2b194d3 --- /dev/null +++ b/SOURCES/0385-glusterd-start-glusterd-automatically-on-abnormal-sh.patch @@ -0,0 +1,50 @@ +From 143f85f55ded7a9075408e97d05abd9568d56e7b Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde <srakonde@redhat.com> +Date: Mon, 25 Nov 2019 16:35:42 +0530 +Subject: [PATCH 385/449] glusterd: start glusterd automatically on abnormal + shutdown + +If glusterd crashes or goes down abnormally, systemd should +automatically bring the glusterd up. + +With this change, systemd brings glusterd up for atmost 3 times +within time period of 1 hour. If the limit exceeds, we have to +start the glusterd manually and reset the failure count using +systemctl reset-failed. + +credits: John Strunk <jstrunk@redhat.com> + +> upstream patch link: https://review.gluster.org/#/c/glusterfs/+/23751/ +> fixes: bz#1776264 +> Change-Id: I312d243652fb13ba028814a2ea615b67e3b10b6a +> Signed-off-by: Sanju Rakonde <srakonde@redhat.com> + +BUG: 1663557 +Change-Id: I312d243652fb13ba028814a2ea615b67e3b10b6a +Signed-off-by: Sanju Rakonde <srakonde@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202251 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + extras/systemd/glusterd.service.in | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/extras/systemd/glusterd.service.in b/extras/systemd/glusterd.service.in +index f604160..b944762 100644 +--- a/extras/systemd/glusterd.service.in ++++ b/extras/systemd/glusterd.service.in +@@ -15,6 +15,11 @@ ExecStart=@prefix@/sbin/glusterd -p @localstatedir@/run/glusterd.pid --log-leve + KillMode=process + TimeoutSec=300 + SuccessExitStatus=15 ++Restart=on-abnormal ++RestartSec=60 ++StartLimitBurst=3 ++StartLimitIntervalSec=3600 ++StartLimitInterval=3600 + + [Install] + WantedBy=multi-user.target +-- +1.8.3.1 + diff --git a/SOURCES/0386-glusterd-increase-the-StartLimitBurst.patch b/SOURCES/0386-glusterd-increase-the-StartLimitBurst.patch new file mode 100644 index 0000000..ff6d0f9 --- /dev/null +++ b/SOURCES/0386-glusterd-increase-the-StartLimitBurst.patch @@ -0,0 +1,39 @@ +From 02e7afdfb740db7cfa1a2f0f79933172d172ff27 Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde <srakonde@redhat.com> +Date: Tue, 7 Jan 2020 15:32:13 +0530 +Subject: [PATCH 386/449] glusterd: increase the StartLimitBurst + +Based on https://bugzilla.redhat.com/show_bug.cgi?id=1782200#c6 +increasing the limit. + +> upstream patch link: https://review.gluster.org/#/c/glusterfs/+/23970/ +> fixes: bz#1782200 +> Change-Id: Ia885c7bdb2a90f0946c5268da894f6a4da5a69b7 +> Signed-off-by: Sanju Rakonde <srakonde@redhat.com> + +BUG: 1663557 +Change-Id: Ia885c7bdb2a90f0946c5268da894f6a4da5a69b7 +Signed-off-by: Sanju Rakonde <srakonde@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202252 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + extras/systemd/glusterd.service.in | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/extras/systemd/glusterd.service.in b/extras/systemd/glusterd.service.in +index b944762..699aea3 100644 +--- a/extras/systemd/glusterd.service.in ++++ b/extras/systemd/glusterd.service.in +@@ -17,7 +17,7 @@ TimeoutSec=300 + SuccessExitStatus=15 + Restart=on-abnormal + RestartSec=60 +-StartLimitBurst=3 ++StartLimitBurst=6 + StartLimitIntervalSec=3600 + StartLimitInterval=3600 + +-- +1.8.3.1 + diff --git a/SOURCES/0387-To-fix-readdir-ahead-memory-leak.patch b/SOURCES/0387-To-fix-readdir-ahead-memory-leak.patch new file mode 100644 index 0000000..b685215 --- /dev/null +++ b/SOURCES/0387-To-fix-readdir-ahead-memory-leak.patch @@ -0,0 +1,47 @@ +From d54f087a2484695ff7ac214d39f2750fddcef2d5 Mon Sep 17 00:00:00 2001 +From: HuangShujun <549702281@qq.com> +Date: Thu, 5 Dec 2019 10:07:10 +0200 +Subject: [PATCH 387/449] To fix readdir-ahead memory leak + +Glusterfs client process has memory leak if create several files under +one folder, and delete the folder. According to statedump, the ref +counts of readdir-ahead is bigger than zero in the inode table. + +Readdir-ahead get parent inode by inode_parent in rda_mark_inode_dirty +when each rda_writev_cbk,the inode ref count of parent folder will be +increased in inode_parent, but readdir-ahead do not unref it later. + +The correction is unref the parent inode at the end of +rda_mark_inode_dirty. + +Upstream patch: +> Upstream patch link: https://review.gluster.org/c/glusterfs/+/23815 +> Fixes: bz#1779055 +> Signed-off-by: HuangShujun <549702281@qq.com> +> Change-Id: Iee68ab1089cbc2fbc4185b93720fb1f66ee89524 + +BUG: 1781550 +Change-Id: Iee68ab1089cbc2fbc4185b93720fb1f66ee89524 +Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202312 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/performance/readdir-ahead/src/readdir-ahead.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/xlators/performance/readdir-ahead/src/readdir-ahead.c b/xlators/performance/readdir-ahead/src/readdir-ahead.c +index 7fd4f8d..933941d 100644 +--- a/xlators/performance/readdir-ahead/src/readdir-ahead.c ++++ b/xlators/performance/readdir-ahead/src/readdir-ahead.c +@@ -254,6 +254,7 @@ rda_mark_inode_dirty(xlator_t *this, inode_t *inode) + } + } + UNLOCK(&parent->lock); ++ inode_unref(parent); + } + + return; +-- +1.8.3.1 + diff --git a/SOURCES/0388-rpc-Cleanup-SSL-specific-data-at-the-time-of-freeing.patch b/SOURCES/0388-rpc-Cleanup-SSL-specific-data-at-the-time-of-freeing.patch new file mode 100644 index 0000000..dc23ba8 --- /dev/null +++ b/SOURCES/0388-rpc-Cleanup-SSL-specific-data-at-the-time-of-freeing.patch @@ -0,0 +1,142 @@ +From fbda9baaf7231e3237277348cc7e873f3113fd14 Mon Sep 17 00:00:00 2001 +From: l17zhou <cynthia.zhou@nokia-sbell.com.cn> +Date: Mon, 4 Nov 2019 08:45:52 +0200 +Subject: [PATCH 388/449] rpc: Cleanup SSL specific data at the time of freeing + rpc object + +Problem: At the time of cleanup rpc object ssl specific data + is not freeing so it has become a leak. + +Solution: To avoid the leak cleanup ssl specific data at the + time of cleanup rpc object + +> Credits: l17zhou <cynthia.zhou@nokia-sbell.com.cn> +> Fixes: bz#1768407 +> Change-Id: I37f598673ae2d7a33c75f39eb8843ccc6dffaaf0 +> (Cherry pick from commit 54ed71dba174385ab0d8fa415e09262f6250430c) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23650/) + +Change-Id: I37f598673ae2d7a33c75f39eb8843ccc6dffaaf0 +BUG: 1786516 +Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202308 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + rpc/rpc-transport/socket/src/socket.c | 22 ++++++++++++++++++++-- + tests/features/ssl-authz.t | 23 ++++++++++++++++++++--- + 2 files changed, 40 insertions(+), 5 deletions(-) + +diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c +index 65845ea..226b2e2 100644 +--- a/rpc/rpc-transport/socket/src/socket.c ++++ b/rpc/rpc-transport/socket/src/socket.c +@@ -446,6 +446,7 @@ ssl_setup_connection_postfix(rpc_transport_t *this) + gf_log(this->name, GF_LOG_DEBUG, + "SSL verification succeeded (client: %s) (server: %s)", + this->peerinfo.identifier, this->myinfo.identifier); ++ X509_free(peer); + return gf_strdup(peer_CN); + + /* Error paths. */ +@@ -1157,7 +1158,15 @@ __socket_reset(rpc_transport_t *this) + memset(&priv->incoming, 0, sizeof(priv->incoming)); + + event_unregister_close(this->ctx->event_pool, priv->sock, priv->idx); +- ++ if (priv->use_ssl && priv->ssl_ssl) { ++ SSL_clear(priv->ssl_ssl); ++ SSL_free(priv->ssl_ssl); ++ priv->ssl_ssl = NULL; ++ } ++ if (priv->use_ssl && priv->ssl_ctx) { ++ SSL_CTX_free(priv->ssl_ctx); ++ priv->ssl_ctx = NULL; ++ } + priv->sock = -1; + priv->idx = -1; + priv->connected = -1; +@@ -3217,7 +3226,6 @@ socket_server_event_handler(int fd, int idx, int gen, void *data, int poll_in, + new_priv->sock = new_sock; + + new_priv->ssl_enabled = priv->ssl_enabled; +- new_priv->ssl_ctx = priv->ssl_ctx; + new_priv->connected = 1; + new_priv->is_server = _gf_true; + +@@ -4672,6 +4680,16 @@ fini(rpc_transport_t *this) + pthread_mutex_destroy(&priv->out_lock); + pthread_mutex_destroy(&priv->cond_lock); + pthread_cond_destroy(&priv->cond); ++ if (priv->use_ssl && priv->ssl_ssl) { ++ SSL_clear(priv->ssl_ssl); ++ SSL_free(priv->ssl_ssl); ++ priv->ssl_ssl = NULL; ++ } ++ if (priv->use_ssl && priv->ssl_ctx) { ++ SSL_CTX_free(priv->ssl_ctx); ++ priv->ssl_ctx = NULL; ++ } ++ + if (priv->ssl_private_key) { + GF_FREE(priv->ssl_private_key); + } +diff --git a/tests/features/ssl-authz.t b/tests/features/ssl-authz.t +index cae010c..132b598 100755 +--- a/tests/features/ssl-authz.t ++++ b/tests/features/ssl-authz.t +@@ -25,6 +25,7 @@ TEST glusterd + TEST pidof glusterd + TEST $CLI volume info; + ++TEST $CLI v set all cluster.brick-multiplex on + # Construct a cipher list that excludes CBC because of POODLE. + # http://web.nvd.nist.gov/view/vuln/detail?vulnId=CVE-2014-3566 + # +@@ -45,12 +46,12 @@ TEST openssl genrsa -out $SSL_KEY 2048 + TEST openssl req -new -x509 -key $SSL_KEY -subj /CN=Anyone -out $SSL_CERT + ln $SSL_CERT $SSL_CA + +-TEST $CLI volume create $V0 $H0:$B0/1 ++TEST $CLI volume create $V0 replica 3 $H0:$B0/{1,2,3} force + TEST $CLI volume set $V0 server.ssl on + TEST $CLI volume set $V0 client.ssl on + TEST $CLI volume set $V0 ssl.cipher-list $(valid_ciphers) + TEST $CLI volume start $V0 +-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" online_brick_count + + # This mount should SUCCEED because ssl-allow=* by default. This effectively + # disables SSL authorization, though authentication and encryption might still +@@ -59,11 +60,27 @@ TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0 + TEST ping_file $M0/before + EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 + ++glusterfsd_pid=`pgrep glusterfsd` ++TEST [ $glusterfsd_pid != 0 ] ++start=`pmap -x $glusterfsd_pid | grep total | awk -F " " '{print $4}'` ++echo "Memory consumption for glusterfsd process" ++for i in $(seq 1 100); do ++ gluster v heal $V0 info >/dev/null ++done ++ ++end=`pmap -x $glusterfsd_pid | grep total | awk -F " " '{print $4}'` ++diff=$((end-start)) ++ ++# If memory consumption is more than 5M some leak in SSL code path ++ ++TEST [ $diff -lt 5000 ] ++ ++ + # Set ssl-allow to a wildcard that includes our identity. + TEST $CLI volume stop $V0 + TEST $CLI volume set $V0 auth.ssl-allow Any* + TEST $CLI volume start $V0 +-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" online_brick_count + + # This mount should SUCCEED because we match the wildcard. + TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0 +-- +1.8.3.1 + diff --git a/SOURCES/0389-posix-Avoid-diskpace-error-in-case-of-overwriting-th.patch b/SOURCES/0389-posix-Avoid-diskpace-error-in-case-of-overwriting-th.patch new file mode 100644 index 0000000..7f20fb2 --- /dev/null +++ b/SOURCES/0389-posix-Avoid-diskpace-error-in-case-of-overwriting-th.patch @@ -0,0 +1,297 @@ +From 50318713486e79d9258cf22e656caff402256dde Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal <moagrawal@redhat.com> +Date: Sun, 20 Oct 2019 22:01:01 +0530 +Subject: [PATCH 389/449] posix: Avoid diskpace error in case of overwriting + the data + +Problem: Sometime fops like posix_writev, posix_fallocate, posix_zerofile + failed and throw error ENOSPC if storage.reserve threshold limit + has reached even fops is overwriting the data + +Solution: Retry the fops in case of overwrite if diskspace check + is failed + +> Credits: kinsu <vpolakis@gmail.com> +> Change-Id: I987d73bcf47ed1bb27878df40c39751296e95fe8 +> Updates: #745 +> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +> (Cherry pick from commit ca3e5905ac02fb9c373ac3de10b44f061d04cd6f) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23572/) + +Change-Id: I987d73bcf47ed1bb27878df40c39751296e95fe8 +BUG: 1787331 +Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202307 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/bugs/posix/bug-1651445.t | 1 + + xlators/storage/posix/src/posix-entry-ops.c | 1 - + xlators/storage/posix/src/posix-inode-fd-ops.c | 141 ++++++++++++++++++++++--- + 3 files changed, 126 insertions(+), 17 deletions(-) + +diff --git a/tests/bugs/posix/bug-1651445.t b/tests/bugs/posix/bug-1651445.t +index 5248d47..4d08b69 100644 +--- a/tests/bugs/posix/bug-1651445.t ++++ b/tests/bugs/posix/bug-1651445.t +@@ -33,6 +33,7 @@ sleep 5 + # setup_lvm create lvm partition of 150M and 40M are reserve so after + # consuming more than 110M next dd should fail + TEST ! dd if=/dev/zero of=$M0/c bs=5M count=1 ++TEST dd if=/dev/urandom of=$M0/a bs=1022 count=1 oflag=seek_bytes,sync seek=102 conv=notrunc + + rm -rf $M0/* + +diff --git a/xlators/storage/posix/src/posix-entry-ops.c b/xlators/storage/posix/src/posix-entry-ops.c +index 283b305..bea0bbf 100644 +--- a/xlators/storage/posix/src/posix-entry-ops.c ++++ b/xlators/storage/posix/src/posix-entry-ops.c +@@ -1634,7 +1634,6 @@ posix_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + + priv = this->private; + VALIDATE_OR_GOTO(priv, out); +- DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out); + + SET_FS_ID(frame->root->uid, frame->root->gid); + MAKE_ENTRY_HANDLE(real_oldpath, par_oldpath, this, oldloc, NULL); +diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c +index a2a518f..bcce06e 100644 +--- a/xlators/storage/posix/src/posix-inode-fd-ops.c ++++ b/xlators/storage/posix/src/posix-inode-fd-ops.c +@@ -692,6 +692,10 @@ posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, + gf_boolean_t locked = _gf_false; + posix_inode_ctx_t *ctx = NULL; + struct posix_private *priv = NULL; ++ gf_boolean_t check_space_error = _gf_false; ++ struct stat statbuf = { ++ 0, ++ }; + + DECLARE_OLD_FS_ID_VAR; + +@@ -711,7 +715,10 @@ posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, + if (priv->disk_reserve) + posix_disk_space_check(this); + +- DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, ret, ret, out); ++ DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, ret, ret, unlock); ++ ++overwrite: ++ check_space_error = _gf_true; + + ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); + if (ret < 0) { +@@ -735,7 +742,7 @@ posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, + ret = -errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "fallocate (fstat) failed on fd=%p", fd); +- goto out; ++ goto unlock; + } + + if (xdata) { +@@ -745,7 +752,7 @@ posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "file state check failed, fd %p", fd); + ret = -EIO; +- goto out; ++ goto unlock; + } + } + +@@ -756,7 +763,7 @@ posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, + "fallocate failed on %s offset: %jd, " + "len:%zu, flags: %d", + uuid_utoa(fd->inode->gfid), offset, len, flags); +- goto out; ++ goto unlock; + } + + ret = posix_fdstat(this, fd->inode, pfd->fd, statpost); +@@ -764,16 +771,47 @@ posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, + ret = -errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "fallocate (fstat) failed on fd=%p", fd); +- goto out; ++ goto unlock; + } + + posix_set_ctime(frame, this, NULL, pfd->fd, fd->inode, statpost); + +-out: ++unlock: + if (locked) { + pthread_mutex_unlock(&ctx->write_atomic_lock); + locked = _gf_false; + } ++ ++ if (op_errno == ENOSPC && priv->disk_space_full && !check_space_error) { ++#ifdef FALLOC_FL_KEEP_SIZE ++ if (flags & FALLOC_FL_KEEP_SIZE) { ++ goto overwrite; ++ } ++#endif ++ ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL, ++ "pfd is NULL from fd=%p", fd); ++ goto out; ++ } ++ ++ if (sys_fstat(pfd->fd, &statbuf) < 0) { ++ gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_FILE_OP_FAILED, ++ "%d", pfd->fd); ++ goto out; ++ } ++ ++ if (offset + len <= statbuf.st_size) { ++ gf_msg_debug(this->name, 0, ++ "io vector size will not" ++ " change disk size so allow overwrite for" ++ " fd %d", ++ pfd->fd); ++ goto overwrite; ++ } ++ } ++ ++out: + SET_TO_OLD_FS_ID(); + if (ret == ENOSPC) + ret = -ENOSPC; +@@ -1083,25 +1121,57 @@ posix_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + int op_ret = -1; + int op_errno = EINVAL; + dict_t *rsp_xdata = NULL; ++ gf_boolean_t check_space_error = _gf_false; ++ struct posix_fd *pfd = NULL; ++ struct stat statbuf = { ++ 0, ++ }; + +- VALIDATE_OR_GOTO(frame, out); +- VALIDATE_OR_GOTO(this, out); ++ VALIDATE_OR_GOTO(frame, unwind); ++ VALIDATE_OR_GOTO(this, unwind); + + priv = this->private; + DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out); + ++overwrite: ++ check_space_error = _gf_true; + ret = posix_do_zerofill(frame, this, fd, offset, len, &statpre, &statpost, + xdata, &rsp_xdata); + if (ret < 0) { + op_ret = -1; + op_errno = -ret; +- goto out; ++ goto unwind; + } + + STACK_UNWIND_STRICT(zerofill, frame, 0, 0, &statpre, &statpost, rsp_xdata); + return 0; + + out: ++ if (op_errno == ENOSPC && priv->disk_space_full && !check_space_error) { ++ ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL, ++ "pfd is NULL from fd=%p", fd); ++ goto out; ++ } ++ ++ if (sys_fstat(pfd->fd, &statbuf) < 0) { ++ gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_FILE_OP_FAILED, ++ "%d", pfd->fd); ++ goto out; ++ } ++ ++ if (offset + len <= statbuf.st_size) { ++ gf_msg_debug(this->name, 0, ++ "io vector size will not" ++ " change disk size so allow overwrite for" ++ " fd %d", ++ pfd->fd); ++ goto overwrite; ++ } ++ } ++ ++unwind: + STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, NULL, NULL, + rsp_xdata); + return 0; +@@ -1857,19 +1927,28 @@ posix_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, + gf_boolean_t write_append = _gf_false; + gf_boolean_t update_atomic = _gf_false; + posix_inode_ctx_t *ctx = NULL; ++ gf_boolean_t check_space_error = _gf_false; ++ struct stat statbuf = { ++ 0, ++ }; ++ int totlen = 0; ++ int idx = 0; + +- VALIDATE_OR_GOTO(frame, out); +- VALIDATE_OR_GOTO(this, out); +- VALIDATE_OR_GOTO(fd, out); +- VALIDATE_OR_GOTO(fd->inode, out); +- VALIDATE_OR_GOTO(vector, out); +- VALIDATE_OR_GOTO(this->private, out); ++ VALIDATE_OR_GOTO(frame, unwind); ++ VALIDATE_OR_GOTO(this, unwind); ++ VALIDATE_OR_GOTO(fd, unwind); ++ VALIDATE_OR_GOTO(fd->inode, unwind); ++ VALIDATE_OR_GOTO(vector, unwind); ++ VALIDATE_OR_GOTO(this->private, unwind); + + priv = this->private; + +- VALIDATE_OR_GOTO(priv, out); ++ VALIDATE_OR_GOTO(priv, unwind); + DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out); + ++overwrite: ++ ++ check_space_error = _gf_true; + if ((fd->inode->ia_type == IA_IFBLK) || (fd->inode->ia_type == IA_IFCHR)) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, P_MSG_INVALID_ARGUMENT, + "writev received on a block/char file (%s)", +@@ -2011,6 +2090,36 @@ out: + locked = _gf_false; + } + ++ if (op_errno == ENOSPC && priv->disk_space_full && !check_space_error) { ++ ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL, ++ "pfd is NULL from fd=%p", fd); ++ goto unwind; ++ } ++ ++ if (sys_fstat(pfd->fd, &statbuf) < 0) { ++ gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_FILE_OP_FAILED, ++ "%d", pfd->fd); ++ goto unwind; ++ } ++ ++ for (idx = 0; idx < count; idx++) { ++ totlen = vector[idx].iov_len; ++ } ++ ++ if ((offset + totlen <= statbuf.st_size) && ++ !(statbuf.st_blocks * statbuf.st_blksize < statbuf.st_size)) { ++ gf_msg_debug(this->name, 0, ++ "io vector size will not" ++ " change disk size so allow overwrite for" ++ " fd %d", ++ pfd->fd); ++ goto overwrite; ++ } ++ } ++ ++unwind: + STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, &preop, &postop, + rsp_xdata); + +-- +1.8.3.1 + diff --git a/SOURCES/0390-glusterd-deafult-options-after-volume-reset.patch b/SOURCES/0390-glusterd-deafult-options-after-volume-reset.patch new file mode 100644 index 0000000..d95ce71 --- /dev/null +++ b/SOURCES/0390-glusterd-deafult-options-after-volume-reset.patch @@ -0,0 +1,93 @@ +From 86df0ced1cac0e3c48f6149bb2f5442f8548f89e Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde <srakonde@redhat.com> +Date: Wed, 25 Dec 2019 21:56:32 +0530 +Subject: [PATCH 390/449] glusterd: deafult options after volume reset + +Problem: default option itransport.address-family is disappered +in volume info output after a volume reset. + +Cause: with 3.8.0 onwards volume option transport.address-family +has default value, any volume which is created will have this +option set. So, volume info will show this in its output. But, +with reset volume, this option is not handled. + +Solution: In glusterd_enable_default_options(), we should add this +option along with other default options. This function is called +by glusterd_options_reset() with volume reset command. + +> upstream patch link: https://review.gluster.org/#/c/glusterfs/+/23921/ +> fixes: bz#1786478 +> Change-Id: I58f7aa24cf01f308c4efe6cae748cc3bc8b99b1d +> Signed-off-by: Sanju Rakonde <srakonde@redhat.com> + +BUG: 1781710 +Change-Id: I58f7aa24cf01f308c4efe6cae748cc3bc8b99b1d +Signed-off-by: Sanju Rakonde <srakonde@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202258 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/bugs/glusterd/optimized-basic-testcases.t | 5 +++++ + xlators/mgmt/glusterd/src/glusterd-utils.c | 23 +++++++++++++++++++++++ + 2 files changed, 28 insertions(+) + +diff --git a/tests/bugs/glusterd/optimized-basic-testcases.t b/tests/bugs/glusterd/optimized-basic-testcases.t +index d700b5e..c7e8c32 100644 +--- a/tests/bugs/glusterd/optimized-basic-testcases.t ++++ b/tests/bugs/glusterd/optimized-basic-testcases.t +@@ -69,6 +69,11 @@ TEST pidof glusterd; + TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2}; + EXPECT 'Created' volinfo_field $V0 'Status'; + ++#bug-1786478 - default volume option after volume reset ++addr_family=`volinfo_field $V0 'transport.address-family'` ++TEST $CLI volume reset $V0 ++EXPECT $addr_family volinfo_field $V0 'transport.address-family' ++ + #bug-955588 - uuid validation + + uuid=`grep UUID $GLUSTERD_WORKDIR/glusterd.info | cut -f2 -d=` +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index c92cdf3..6654741 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -13032,6 +13032,11 @@ glusterd_enable_default_options(glusterd_volinfo_t *volinfo, char *option) + int ret = 0; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; ++#ifdef IPV6_DEFAULT ++ char *addr_family = "inet6"; ++#else ++ char *addr_family = "inet"; ++#endif + + this = THIS; + GF_ASSERT(this); +@@ -13109,6 +13114,24 @@ glusterd_enable_default_options(glusterd_volinfo_t *volinfo, char *option) + } + } + } ++ ++ if (conf->op_version >= GD_OP_VERSION_3_9_0) { ++ if (!option || !strcmp("transport.address-family", option)) { ++ if (volinfo->transport_type == GF_TRANSPORT_TCP) { ++ ret = dict_set_dynstr_with_alloc( ++ volinfo->dict, "transport.address-family", addr_family); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, errno, ++ GD_MSG_DICT_SET_FAILED, ++ "failed to set transport." ++ "address-family on %s", ++ volinfo->volname); ++ goto out; ++ } ++ } ++ } ++ } ++ + if (conf->op_version >= GD_OP_VERSION_7_0) { + ret = dict_set_dynstr_with_alloc(volinfo->dict, + "storage.fips-mode-rchecksum", "on"); +-- +1.8.3.1 + diff --git a/SOURCES/0391-glusterd-unlink-the-file-after-killing-the-process.patch b/SOURCES/0391-glusterd-unlink-the-file-after-killing-the-process.patch new file mode 100644 index 0000000..2a88254 --- /dev/null +++ b/SOURCES/0391-glusterd-unlink-the-file-after-killing-the-process.patch @@ -0,0 +1,39 @@ +From d23859d5cbd5823b2587811aa57030436ce9e74c Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde <srakonde@redhat.com> +Date: Tue, 17 Dec 2019 15:52:30 +0530 +Subject: [PATCH 391/449] glusterd: unlink the file after killing the process + +In glusterd_proc_stop(), after killing the pid +we should remove the pidfile. + +> upstream patch link: https://review.gluster.org/#/c/glusterfs/+/23890/ +> fixes: bz#1784375 +> Change-Id: Ib6367aed590932c884b0f6f892fc40542aa19686 +> Signed-off-by: Sanju Rakonde <srakonde@redhat.com> + +BUG: 1784211 +Change-Id: Ib6367aed590932c884b0f6f892fc40542aa19686 +Signed-off-by: Sanju Rakonde <srakonde@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202257 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c +index f55a5fd..a05c90d 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c ++++ b/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c +@@ -107,6 +107,8 @@ glusterd_proc_stop(glusterd_proc_t *proc, int sig, int flags) + "service, reason:%s", + proc->name, strerror(errno)); + } ++ } else { ++ (void)glusterd_unlink_file(proc->pidfile); + } + if (flags != PROC_STOP_FORCE) + goto out; +-- +1.8.3.1 + diff --git a/SOURCES/0392-glusterd-Brick-process-fails-to-come-up-with-brickmu.patch b/SOURCES/0392-glusterd-Brick-process-fails-to-come-up-with-brickmu.patch new file mode 100644 index 0000000..e295e4f --- /dev/null +++ b/SOURCES/0392-glusterd-Brick-process-fails-to-come-up-with-brickmu.patch @@ -0,0 +1,187 @@ +From a30a5fdef2e252eba9f44a3c671de8f3aa4f17d7 Mon Sep 17 00:00:00 2001 +From: Vishal Pandey <vpandey@redhat.com> +Date: Tue, 19 Nov 2019 11:39:22 +0530 +Subject: [PATCH 392/449] glusterd: Brick process fails to come up with + brickmux on + +Issue: +1- In a cluster of 3 Nodes N1, N2, N3. Create 3 volumes vol1, +vol2, vol3 with 3 bricks (one from each node) +2- Set cluster.brick-multiplex on +3- Start all 3 volumes +4- Check if all bricks on a node are running on same port +5- Kill N1 +6- Set performance.readdir-ahead for volumes vol1, vol2, vol3 +7- Bring N1 up and check volume status +8- All bricks processes not running on N1. + +Root Cause - +Since, There is a diff in volfile versions in N1 as compared +to N2 and N3 therefore glusterd_import_friend_volume() is called. +glusterd_import_friend_volume() copies the new_volinfo and deletes +old_volinfo and then calls glusterd_start_bricks(). +glusterd_start_bricks() looks for the volfiles and sends an rpc +request to glusterfs_handle_attach(). Now, since the volinfo +has been deleted by glusterd_delete_stale_volume() +from priv->volumes list before glusterd_start_bricks() and +glusterd_create_volfiles_and_notify_services() and +glusterd_list_add_order is called after glusterd_start_bricks(), +therefore the attach RPC req gets an empty volfile path +and that causes the brick to crash. + +Fix- Call glusterd_list_add_order() and +glusterd_create_volfiles_and_notify_services before +glusterd_start_bricks() cal is made in glusterd_import_friend_volume + +> upstream patch link: https://review.gluster.org/#/c/glusterfs/+/23724/ +> Change-Id: Idfe0e8710f7eb77ca3ddfa1cabeb45b2987f41aa +> Fixes: bz#1773856 +> Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> + +BUG: 1683602 +Change-Id: Idfe0e8710f7eb77ca3ddfa1cabeb45b2987f41aa +Signed-off-by: Sanju Rakonde <srakonde@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202255 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + .../glusterd/brick-mux-validation-in-cluster.t | 61 +++++++++++++++++++++- + xlators/mgmt/glusterd/src/glusterd-utils.c | 28 +++++----- + 2 files changed, 75 insertions(+), 14 deletions(-) + +diff --git a/tests/bugs/glusterd/brick-mux-validation-in-cluster.t b/tests/bugs/glusterd/brick-mux-validation-in-cluster.t +index 4e57038..f088dbb 100644 +--- a/tests/bugs/glusterd/brick-mux-validation-in-cluster.t ++++ b/tests/bugs/glusterd/brick-mux-validation-in-cluster.t +@@ -7,6 +7,20 @@ function count_brick_processes { + pgrep glusterfsd | wc -l + } + ++function count_brick_pids { ++ $CLI_1 --xml volume status all | sed -n '/.*<pid>\([^<]*\).*/s//\1/p' \ ++ | grep -v "N/A" | sort | uniq | wc -l ++} ++ ++function count_N/A_brick_pids { ++ $CLI_1 --xml volume status all | sed -n '/.*<pid>\([^<]*\).*/s//\1/p' \ ++ | grep -- '\-1' | sort | uniq | wc -l ++} ++ ++function check_peers { ++ $CLI_2 peer status | grep 'Peer in Cluster (Connected)' | wc -l ++} ++ + cleanup; + + TEST launch_cluster 3 +@@ -48,4 +62,49 @@ TEST $CLI_1 volume stop $V1 + + EXPECT 3 count_brick_processes + +-cleanup ++TEST $CLI_1 volume stop $META_VOL ++ ++TEST $CLI_1 volume delete $META_VOL ++TEST $CLI_1 volume delete $V0 ++TEST $CLI_1 volume delete $V1 ++ ++#bug-1773856 - Brick process fails to come up with brickmux on ++ ++TEST $CLI_1 volume create $V0 $H1:$B1/${V0}1 $H2:$B2/${V0}1 $H3:$B3/${V0}1 force ++TEST $CLI_1 volume start $V0 ++ ++ ++EXPECT 3 count_brick_processes ++ ++#create and start a new volume ++TEST $CLI_1 volume create $V1 $H1:$B1/${V1}2 $H2:$B2/${V1}2 $H3:$B3/${V1}2 force ++TEST $CLI_1 volume start $V1 ++ ++EXPECT 3 count_brick_processes ++ ++V2=patchy2 ++TEST $CLI_1 volume create $V2 $H1:$B1/${V2}3 $H2:$B2/${V2}3 $H3:$B3/${V2}3 force ++TEST $CLI_1 volume start $V2 ++ ++EXPECT 3 count_brick_processes ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_brick_pids ++ ++TEST kill_node 1 ++ ++sleep 10 ++ ++EXPECT_WITHIN $PROBE_TIMEOUT 1 check_peers; ++ ++$CLI_2 volume set $V0 performance.readdir-ahead on ++$CLI_2 volume set $V1 performance.readdir-ahead on ++ ++TEST $glusterd_1; ++ ++sleep 10 ++ ++EXPECT 4 count_brick_processes ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 4 count_brick_pids ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 0 count_N/A_brick_pids ++ ++cleanup; +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index 6654741..1b78812 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -4988,16 +4988,6 @@ glusterd_import_friend_volume(dict_t *peer_data, int count) + glusterd_volinfo_unref(old_volinfo); + } + +- if (glusterd_is_volume_started(new_volinfo)) { +- (void)glusterd_start_bricks(new_volinfo); +- if (glusterd_is_snapd_enabled(new_volinfo)) { +- svc = &(new_volinfo->snapd.svc); +- if (svc->manager(svc, new_volinfo, PROC_START_NO_WAIT)) { +- gf_event(EVENT_SVC_MANAGER_FAILED, "svc_name=%s", svc->name); +- } +- } +- } +- + ret = glusterd_store_volinfo(new_volinfo, GLUSTERD_VOLINFO_VER_AC_NONE); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_STORE_FAIL, +@@ -5007,19 +4997,31 @@ glusterd_import_friend_volume(dict_t *peer_data, int count) + goto out; + } + +- ret = glusterd_create_volfiles_and_notify_services(new_volinfo); ++ ret = glusterd_create_volfiles(new_volinfo); + if (ret) + goto out; + ++ glusterd_list_add_order(&new_volinfo->vol_list, &priv->volumes, ++ glusterd_compare_volume_name); ++ ++ if (glusterd_is_volume_started(new_volinfo)) { ++ (void)glusterd_start_bricks(new_volinfo); ++ if (glusterd_is_snapd_enabled(new_volinfo)) { ++ svc = &(new_volinfo->snapd.svc); ++ if (svc->manager(svc, new_volinfo, PROC_START_NO_WAIT)) { ++ gf_event(EVENT_SVC_MANAGER_FAILED, "svc_name=%s", svc->name); ++ } ++ } ++ } ++ + ret = glusterd_import_quota_conf(peer_data, count, new_volinfo, "volume"); + if (ret) { + gf_event(EVENT_IMPORT_QUOTA_CONF_FAILED, "volume=%s", + new_volinfo->volname); + goto out; + } +- glusterd_list_add_order(&new_volinfo->vol_list, &priv->volumes, +- glusterd_compare_volume_name); + ++ ret = glusterd_fetchspec_notify(this); + out: + gf_msg_debug("glusterd", 0, "Returning with ret: %d", ret); + return ret; +-- +1.8.3.1 + diff --git a/SOURCES/0393-afr-restore-timestamp-of-files-during-metadata-heal.patch b/SOURCES/0393-afr-restore-timestamp-of-files-during-metadata-heal.patch new file mode 100644 index 0000000..bb93180 --- /dev/null +++ b/SOURCES/0393-afr-restore-timestamp-of-files-during-metadata-heal.patch @@ -0,0 +1,129 @@ +From b528c21e6fedc9ac841942828b82e0c808da5efb Mon Sep 17 00:00:00 2001 +From: Sheetal Pamecha <spamecha@redhat.com> +Date: Thu, 2 Jan 2020 12:05:12 +0530 +Subject: [PATCH 393/449] afr: restore timestamp of files during metadata heal + +For files: During metadata heal, we restore timestamps +only for non-regular (char, block etc.) files. +Extenting it for regular files as timestamp is updated +via touch command also + +> upstream patch link: https://review.gluster.org/#/c/glusterfs/+/23953/ +> fixes: bz#1787274 +> Change-Id: I26fe4fb6dff679422ba4698a7f828bf62ca7ca18 +> Signed-off-by: Sheetal Pamecha <spamecha@redhat.com> + +BUG: 1761531 +Change-Id: I26fe4fb6dff679422ba4698a7f828bf62ca7ca18 +Signed-off-by: Sheetal Pamecha <spamecha@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202332 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + .../bug-1761531-metadata-heal-restore-time.t | 74 ++++++++++++++++++++++ + xlators/cluster/afr/src/afr-self-heal-metadata.c | 8 +-- + 2 files changed, 76 insertions(+), 6 deletions(-) + create mode 100644 tests/bugs/replicate/bug-1761531-metadata-heal-restore-time.t + +diff --git a/tests/bugs/replicate/bug-1761531-metadata-heal-restore-time.t b/tests/bugs/replicate/bug-1761531-metadata-heal-restore-time.t +new file mode 100644 +index 0000000..7e24eae +--- /dev/null ++++ b/tests/bugs/replicate/bug-1761531-metadata-heal-restore-time.t +@@ -0,0 +1,74 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../afr.rc ++cleanup ++ ++GET_MDATA_PATH=$(dirname $0)/../../utils ++build_tester $GET_MDATA_PATH/get-mdata-xattr.c ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 replica 3 $H0:$B0/brick{0..2} ++TEST $CLI volume start $V0 ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2 ++ ++TEST touch $M0/a ++sleep 1 ++TEST kill_brick $V0 $H0 $B0/brick0 ++TEST touch $M0/a ++ ++EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0 ++ ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 ++ ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 ++ ++mtime0=$(get_mtime $B0/brick0/a) ++mtime1=$(get_mtime $B0/brick1/a) ++TEST [ $mtime0 -eq $mtime1 ] ++ ++ctime0=$(get_ctime $B0/brick0/a) ++ctime1=$(get_ctime $B0/brick1/a) ++TEST [ $ctime0 -eq $ctime1 ] ++ ++############################################################################### ++# Repeat the test with ctime feature disabled. ++TEST $CLI volume set $V0 features.ctime off ++ ++TEST touch $M0/b ++sleep 1 ++TEST kill_brick $V0 $H0 $B0/brick0 ++TEST touch $M0/b ++ ++EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0 ++ ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 ++ ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 ++ ++mtime2=$(get_mtime $B0/brick0/b) ++mtime3=$(get_mtime $B0/brick1/b) ++TEST [ $mtime2 -eq $mtime3 ] ++ ++TEST rm $GET_MDATA_PATH/get-mdata-xattr ++ ++TEST force_umount $M0 ++TEST $CLI volume stop $V0 ++TEST $CLI volume delete $V0 ++ ++cleanup +diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c +index ecfa791..f4e31b6 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c ++++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c +@@ -421,12 +421,8 @@ afr_selfheal_metadata(call_frame_t *frame, xlator_t *this, inode_t *inode) + if (ret) + goto unlock; + +- /* Restore atime/mtime for files that don't need data heal as +- * restoring timestamps happens only as a part of data-heal. +- */ +- if (!IA_ISREG(locked_replies[source].poststat.ia_type)) +- afr_selfheal_restore_time(frame, this, inode, source, healed_sinks, +- locked_replies); ++ afr_selfheal_restore_time(frame, this, inode, source, healed_sinks, ++ locked_replies); + + ret = afr_selfheal_undo_pending( + frame, this, inode, sources, sinks, healed_sinks, undid_pending, +-- +1.8.3.1 + diff --git a/SOURCES/0394-man-gluster-Add-volume-top-command-to-gluster-man-pa.patch b/SOURCES/0394-man-gluster-Add-volume-top-command-to-gluster-man-pa.patch new file mode 100644 index 0000000..96a8f74 --- /dev/null +++ b/SOURCES/0394-man-gluster-Add-volume-top-command-to-gluster-man-pa.patch @@ -0,0 +1,38 @@ +From 768a6d9bca86c0a50128b8776c11ef2b6d36388d Mon Sep 17 00:00:00 2001 +From: Vishal Pandey <vpandey@redhat.com> +Date: Thu, 21 Nov 2019 12:56:34 +0530 +Subject: [PATCH 394/449] man/gluster: Add volume top command to gluster man + page + +> Upstream patch link: https://review.gluster.org/#/c/glusterfs/+/23735/ +> Change-Id: Ib74607d2b2e5a1a0316221f1176a7dcccea632d4 +> Fixes: bz#1774866 +> Signed-off-by: Vishal Pandey <vpandey@redhat.com> + +BUG: 1754391 +Change-Id: Ib74607d2b2e5a1a0316221f1176a7dcccea632d4 +Signed-off-by: Sanju Rakonde <srakonde@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202333 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + doc/gluster.8 | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/doc/gluster.8 b/doc/gluster.8 +index da6472d..88cbf44 100644 +--- a/doc/gluster.8 ++++ b/doc/gluster.8 +@@ -113,6 +113,9 @@ Rotate the log file for corresponding volume/brick. + \fB\ volume profile <VOLNAME> {start|info [peek|incremental [peek]|cumulative|clear]|stop} [nfs] \fR + Profile operations on the volume. Once started, volume profile <volname> info provides cumulative statistics of the FOPs performed. + .TP ++\fB\ volume top <VOLNAME> {open|read|write|opendir|readdir|clear} [nfs|brick <brick>] [list-cnt <value>] | {read-perf|write-perf} [bs <size> count <count>] [brick <brick>] [list-cnt <value>] \fR ++Generates a profile of a volume representing the performance and bottlenecks/hotspots of each brick. ++.TP + \fB\ volume statedump <VOLNAME> [[nfs|quotad] [all|mem|iobuf|callpool|priv|fd|inode|history]... | [client <hostname:process-id>]] \fR + Dumps the in memory state of the specified process or the bricks of the volume. + .TP +-- +1.8.3.1 + diff --git a/SOURCES/0395-Cli-Removing-old-log-rotate-command.patch b/SOURCES/0395-Cli-Removing-old-log-rotate-command.patch new file mode 100644 index 0000000..0918777 --- /dev/null +++ b/SOURCES/0395-Cli-Removing-old-log-rotate-command.patch @@ -0,0 +1,111 @@ +From 5b3fcc8db86b4dc7af1eb63315ca2ff41c60fdea Mon Sep 17 00:00:00 2001 +From: kshithijiyer <kshithij.ki@gmail.com> +Date: Sat, 30 Nov 2019 15:25:11 +0530 +Subject: [PATCH 395/449] [Cli] Removing old log rotate command. + +The old command for log rotate is still present removing +it completely. Also adding testcase to test the +log rotate command with both the old as well as the new command +and fixing testcase which use the old syntax to use the new +one. + +Code to be removed: +1. In cli-cmd-volume.c from struct cli_cmd volume_cmds[]: +{"volume log rotate <VOLNAME> [BRICK]", cli_cmd_log_rotate_cbk, + "rotate the log file for corresponding volume/brick" + " NOTE: This is an old syntax, will be deprecated from next release."}, + +2. In cli-cmd-volume.c from cli_cmd_log_rotate_cbk(): + ||(strcmp("rotate", words[2]) == 0))) + +3. In cli-cmd-parser.c from cli_cmd_log_rotate_parse() +if (strcmp("rotate", words[2]) == 0) + volname = (char *)words[3]; +else + +> upstream patch link: https://review.gluster.org/#/c/glusterfs/+/23392/ +> fixes: bz#1750387 +> Change-Id: I56e4d295044e8d5fd1fc0d848bc87e135e9e32b4 +> Signed-off-by: kshithijiyer <kshithij.ki@gmail.com> + +BUG: 1784415 +Change-Id: I56e4d295044e8d5fd1fc0d848bc87e135e9e32b4 +Signed-off-by: Sanju Rakonde <srakonde@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202334 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + cli/src/cli-cmd-parser.c | 2 -- + cli/src/cli-cmd-volume.c | 7 +------ + tests/bugs/glusterd/optimized-basic-testcases.t | 3 ++- + tests/bugs/glusterfs-server/bug-852147.t | 2 +- + 4 files changed, 4 insertions(+), 10 deletions(-) + +diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c +index 4456a7b..ac0a263 100644 +--- a/cli/src/cli-cmd-parser.c ++++ b/cli/src/cli-cmd-parser.c +@@ -2592,8 +2592,6 @@ cli_cmd_log_rotate_parse(const char **words, int wordcount, dict_t **options) + + if (strcmp("rotate", words[3]) == 0) + volname = (char *)words[2]; +- else if (strcmp("rotate", words[2]) == 0) +- volname = (char *)words[3]; + GF_ASSERT(volname); + + ret = dict_set_str(dict, "volname", volname); +diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c +index 754d333..f33fc99 100644 +--- a/cli/src/cli-cmd-volume.c ++++ b/cli/src/cli-cmd-volume.c +@@ -2349,8 +2349,7 @@ cli_cmd_log_rotate_cbk(struct cli_state *state, struct cli_cmd_word *word, + goto out; + } + +- if (!((strcmp("rotate", words[2]) == 0) || +- (strcmp("rotate", words[3]) == 0))) { ++ if (!(strcmp("rotate", words[3]) == 0)) { + cli_usage_out(word->pattern); + parse_error = 1; + goto out; +@@ -3401,10 +3400,6 @@ struct cli_cmd volume_cmds[] = { + {"volume log <VOLNAME> rotate [BRICK]", cli_cmd_log_rotate_cbk, + "rotate the log file for corresponding volume/brick"}, + +- {"volume log rotate <VOLNAME> [BRICK]", cli_cmd_log_rotate_cbk, +- "rotate the log file for corresponding volume/brick" +- " NOTE: This is an old syntax, will be deprecated from next release."}, +- + {"volume sync <HOSTNAME> [all|<VOLNAME>]", cli_cmd_sync_volume_cbk, + "sync the volume information from a peer"}, + +diff --git a/tests/bugs/glusterd/optimized-basic-testcases.t b/tests/bugs/glusterd/optimized-basic-testcases.t +index c7e8c32..862f329 100644 +--- a/tests/bugs/glusterd/optimized-basic-testcases.t ++++ b/tests/bugs/glusterd/optimized-basic-testcases.t +@@ -129,7 +129,8 @@ TEST ! $CLI volume set all $V0 cluster.op-version $OP_VERS_NEW + + #bug-1022055 - validate log rotate command + +-TEST $CLI volume log rotate $V0; ++TEST ! $CLI volume log rotate $V0; ++TEST $CLI volume log $V0 rotate; + + #bug-1092841 - validating barrier enable/disable + +diff --git a/tests/bugs/glusterfs-server/bug-852147.t b/tests/bugs/glusterfs-server/bug-852147.t +index c644cfa..75db2a2 100755 +--- a/tests/bugs/glusterfs-server/bug-852147.t ++++ b/tests/bugs/glusterfs-server/bug-852147.t +@@ -66,7 +66,7 @@ ren_file=$log_file".*" + rm -rf $ren_file + + #Initiating log rotate +-TEST $CLI volume log rotate $V0 ++TEST $CLI volume log $V0 rotate + + #Capturing new log file's size + new_file_size=`file-size $log_file` +-- +1.8.3.1 + diff --git a/SOURCES/0396-Updating-gluster-manual.patch b/SOURCES/0396-Updating-gluster-manual.patch new file mode 100644 index 0000000..bb33d10 --- /dev/null +++ b/SOURCES/0396-Updating-gluster-manual.patch @@ -0,0 +1,56 @@ +From 728aab1c1cfcf352d4ca1fde0b80044dc24bd9fa Mon Sep 17 00:00:00 2001 +From: Rishubh Jain <risjain@redhat.com> +Date: Sun, 18 Aug 2019 18:02:57 +0530 +Subject: [PATCH 396/449] Updating gluster manual. + +Adding disperse-data to gluster manual under +volume create command + +> Upstream Patch Link: https://review.gluster.org/#/c/glusterfs/+/23258/ +> Change-Id: Ic9eb47c9e71a1d7a11af9394c615c8e90f8d1d69 +> Fixes: bz#1668239 +> Signed-off-by: Rishubh Jain <risjain@redhat.com> +> Signed-off-by: Sheetal Pamecha <spamecha@redhat.com> + +BUG: 1667954 +Change-Id: Ic9eb47c9e71a1d7a11af9394c615c8e90f8d1d69 +Signed-off-by: Sheetal Pamecha <spamecha@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202342 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + doc/gluster.8 | 2 +- + tests/basic/glusterd/disperse-create.t | 4 ++++ + 2 files changed, 5 insertions(+), 1 deletion(-) + +diff --git a/doc/gluster.8 b/doc/gluster.8 +index 88cbf44..66bdb48 100644 +--- a/doc/gluster.8 ++++ b/doc/gluster.8 +@@ -41,7 +41,7 @@ List all volumes in cluster + \fB\ volume status [all | <VOLNAME> [nfs|shd|<BRICK>|quotad|tierd]] [detail|clients|mem|inode|fd|callpool|tasks|client-list] \fR + Display status of all or specified volume(s)/brick + .TP +-\fB\ volume create <NEW-VOLNAME> [stripe <COUNT>] [replica <COUNT>] [disperse [<COUNT>]] [redundancy <COUNT>] [transport <tcp|rdma|tcp,rdma>] <NEW-BRICK> ... \fR ++\fB\ volume create <NEW-VOLNAME> [stripe <COUNT>] [[replica <COUNT> [arbiter <COUNT>]]|[replica 2 thin-arbiter 1]] [disperse [<COUNT>]] [disperse-data <COUNT>] [redundancy <COUNT>] [transport <tcp|rdma|tcp,rdma>] <NEW-BRICK> ... <TA-BRICK> \fR + Create a new volume of the specified type using the specified bricks and transport type (the default transport type is tcp). + To create a volume with both transports (tcp and rdma), give 'transport tcp,rdma' as an option. + .TP +diff --git a/tests/basic/glusterd/disperse-create.t b/tests/basic/glusterd/disperse-create.t +index 384c675..db8a621 100644 +--- a/tests/basic/glusterd/disperse-create.t ++++ b/tests/basic/glusterd/disperse-create.t +@@ -20,6 +20,10 @@ TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/b7 $H0:$B0/b8 $H0:$B + EXPECT "1 x \(2 \+ 1\) = 3" volinfo_field $V0 "Number of Bricks" + + TEST $CLI volume delete $V0 ++TEST $CLI volume create $V0 disperse-data 2 $H0:$B0/b10 $H0:$B0/b11 $H0:$B0/b12 ++EXPECT "1 x \(2 \+ 1\) = 3" volinfo_field $V0 "Number of Bricks" ++ ++TEST $CLI volume delete $V0 + TEST $CLI volume create $V0 redundancy 1 $H0:$B0/b10 $H0:$B0/b11 $H0:$B0/b12 + EXPECT "1 x \(2 \+ 1\) = 3" volinfo_field $V0 "Number of Bricks" + +-- +1.8.3.1 + diff --git a/SOURCES/0397-mgmt-brick-mux-Avoid-sending-two-response-when-attac.patch b/SOURCES/0397-mgmt-brick-mux-Avoid-sending-two-response-when-attac.patch new file mode 100644 index 0000000..6694813 --- /dev/null +++ b/SOURCES/0397-mgmt-brick-mux-Avoid-sending-two-response-when-attac.patch @@ -0,0 +1,52 @@ +From 73cef29731c0d7b8b4f3b880c032dc232b8fcc31 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal <moagrawa@redhat.com> +Date: Thu, 4 Jun 2020 16:06:44 +0530 +Subject: [PATCH 397/449] mgmt/brick-mux: Avoid sending two response when + attach is failed. + +We were sending two response back to glusterd when an attach is +failed. One from the handler function glusterfs_handle_attach and +another from rpcsvc_check_and_reply_error. It was causing problems +like ref leaks, transport disconnect etc. + +> Change-Id: I3bb5b59959530760b568d52becb519499b3dcd2b +> updates: bz#1785143 +> Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> +> (Cherry pick from commit 42f484dcecd9942611396d9bd2ad3a39019b0e1f) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23906/) + +Change-Id: I3bb5b59959530760b568d52becb519499b3dcd2b +BUG: 1776901 +Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202346 +Reviewed-by: Sanju Rakonde <srakonde@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + glusterfsd/src/glusterfsd-mgmt.c | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c +index 15acc10..61d1b21 100644 +--- a/glusterfsd/src/glusterfsd-mgmt.c ++++ b/glusterfsd/src/glusterfsd-mgmt.c +@@ -954,7 +954,15 @@ glusterfs_handle_attach(rpcsvc_request_t *req) + ret = -1; + } + +- glusterfs_translator_info_response_send(req, ret, NULL, NULL); ++ ret = glusterfs_translator_info_response_send(req, ret, NULL, NULL); ++ if (ret) { ++ /* Response sent back to glusterd, req is already destroyed. So ++ * resetting the ret to 0. Otherwise another response will be ++ * send from rpcsvc_check_and_reply_error. Which will lead to ++ * double resource leak. ++ */ ++ ret = 0; ++ } + + out: + UNLOCK(&ctx->volfile_lock); +-- +1.8.3.1 + diff --git a/SOURCES/0398-ec-change-error-message-for-heal-commands-for-disper.patch b/SOURCES/0398-ec-change-error-message-for-heal-commands-for-disper.patch new file mode 100644 index 0000000..5779539 --- /dev/null +++ b/SOURCES/0398-ec-change-error-message-for-heal-commands-for-disper.patch @@ -0,0 +1,75 @@ +From 03d2c7b52da5efd6ad660315a0548c8b91e51439 Mon Sep 17 00:00:00 2001 +From: Sheetal Pamecha <spamecha@redhat.com> +Date: Sun, 22 Dec 2019 22:52:30 +0530 +Subject: [PATCH 398/449] ec: change error message for heal commands for + disperse volume + +Currently when we issue a heal statistics or similar commands +for disperse volume, it fails with message "Volume is not of +type replicate." Adding message "this command is supported for +volumes of type replicate" to reflect supportability and better +understanding of heal functionality for disperse volumes. + +> Upstream Patch Link: https://review.gluster.org/#/c/glusterfs/+/23916/ +> fixes: bz#1785998 +> Change-Id: I9688a9fdf427cb6f657cfd5b8db2f76a6c56f6e2 +> Signed-off-by: Sheetal Pamecha <spamecha@redhat.com> + +BUG: 1487177 +Change-Id: I9688a9fdf427cb6f657cfd5b8db2f76a6c56f6e2 +Signed-off-by: Sheetal Pamecha <spamecha@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202344 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + heal/src/glfs-heal.c | 15 ++++++++++----- + xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 4 +++- + 2 files changed, 13 insertions(+), 6 deletions(-) + +diff --git a/heal/src/glfs-heal.c b/heal/src/glfs-heal.c +index 7e37e47..125b12c 100644 +--- a/heal/src/glfs-heal.c ++++ b/heal/src/glfs-heal.c +@@ -1726,14 +1726,19 @@ main(int argc, char **argv) + goto out; + } + ++ char *var_str = (heal_op == GF_SHD_OP_INDEX_SUMMARY || ++ heal_op == GF_SHD_OP_HEAL_SUMMARY) ++ ? "replicate/disperse" ++ : "replicate"; ++ + ret = glfsh_validate_volume(top_subvol, heal_op); + if (ret < 0) { + ret = -EINVAL; +- gf_asprintf(&op_errstr, "Volume %s is not of type %s", volname, +- (heal_op == GF_SHD_OP_INDEX_SUMMARY || +- heal_op == GF_SHD_OP_HEAL_SUMMARY) +- ? "replicate/disperse" +- : "replicate"); ++ gf_asprintf(&op_errstr, ++ "This command is supported " ++ "for only volumes of %s type. Volume %s " ++ "is not of type %s", ++ var_str, volname, var_str); + goto out; + } + rootloc.inode = inode_ref(top_subvol->itable->root); +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +index 076bc80..93042ab 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +@@ -2008,7 +2008,9 @@ glusterd_handle_heal_cmd(xlator_t *this, glusterd_volinfo_t *volinfo, + if (!glusterd_is_volume_replicate(volinfo)) { + ret = -1; + snprintf(msg, sizeof(msg), +- "Volume %s is not of type " ++ "This command is supported " ++ "for only volume of replicated " ++ "type. Volume %s is not of type " + "replicate", + volinfo->volname); + *op_errstr = gf_strdup(msg); +-- +1.8.3.1 + diff --git a/SOURCES/0399-glusterd-coverity-fixes.patch b/SOURCES/0399-glusterd-coverity-fixes.patch new file mode 100644 index 0000000..8052a46 --- /dev/null +++ b/SOURCES/0399-glusterd-coverity-fixes.patch @@ -0,0 +1,79 @@ +From 1ebd2a3227469b1775f19c8f78af7d3d19f749a3 Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee <amukherj@redhat.com> +Date: Fri, 26 Apr 2019 08:47:12 +0530 +Subject: [PATCH 399/449] glusterd: coverity fixes + +1400775 - USE_AFTER_FREE +1400742 - Missing Unlock +1400736 - CHECKED_RETURN +1398470 - Missing Unlock + +Missing unlock is the tricky one, we have had annotation added, but +coverity still continued to complaint. Added pthread_mutex_unlock to +clean up the lock before destroying it to see if it makes coverity +happy. + +> upstream patch link: https://review.gluster.org/#/c/glusterfs/+/22634/ +> Updates: bz#789278 +> Change-Id: I1d892612a17f805144d96c1b15004a85a1639414 +> Signed-off-by: Atin Mukherjee <amukherj@redhat.com> + +BUG: 1787310 +Change-Id: I1d892612a17f805144d96c1b15004a85a1639414 +Signed-off-by: Sanju Rakonde <srakonde@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202343 +Reviewed-by: Mohit Agrawal <moagrawa@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-peer-utils.c | 1 + + xlators/mgmt/glusterd/src/glusterd-sm.c | 1 - + xlators/mgmt/glusterd/src/glusterd-utils.c | 8 +++++++- + 3 files changed, 8 insertions(+), 2 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-peer-utils.c b/xlators/mgmt/glusterd/src/glusterd-peer-utils.c +index f24c86e..8c1feeb 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-peer-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-peer-utils.c +@@ -48,6 +48,7 @@ glusterd_peerinfo_destroy(struct rcu_head *head) + } + + glusterd_sm_tr_log_delete(&peerinfo->sm_log); ++ pthread_mutex_unlock(&peerinfo->delete_lock); + pthread_mutex_destroy(&peerinfo->delete_lock); + GF_FREE(peerinfo); + +diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.c b/xlators/mgmt/glusterd/src/glusterd-sm.c +index 54a7bd1..044da3d 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-sm.c ++++ b/xlators/mgmt/glusterd/src/glusterd-sm.c +@@ -868,7 +868,6 @@ glusterd_ac_friend_remove(glusterd_friend_sm_event_t *event, void *ctx) + "Cleanup returned: %d", ret); + } + out: +- /* coverity[ LOCK] */ + return 0; + } + +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index 1b78812..a1299bc 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -5840,7 +5840,13 @@ attach_brick_callback(struct rpc_req *req, struct iovec *iov, int count, + /* PID file is copied once brick has attached + successfully + */ +- glusterd_copy_file(pidfile1, pidfile2); ++ ret = glusterd_copy_file(pidfile1, pidfile2); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, ++ "Could not copy file %s to %s", pidfile1, pidfile2); ++ goto out; ++ } ++ + brickinfo->status = GF_BRICK_STARTED; + brickinfo->rpc = rpc_clnt_ref(other_brick->rpc); + gf_log(THIS->name, GF_LOG_INFO, "brick %s is attached successfully", +-- +1.8.3.1 + diff --git a/SOURCES/0400-cli-throw-a-warning-if-replica-count-greater-than-3.patch b/SOURCES/0400-cli-throw-a-warning-if-replica-count-greater-than-3.patch new file mode 100644 index 0000000..dd1ea52 --- /dev/null +++ b/SOURCES/0400-cli-throw-a-warning-if-replica-count-greater-than-3.patch @@ -0,0 +1,98 @@ +From 12ed9226fa24d073ab2b89692194b454a194c379 Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde <srakonde@redhat.com> +Date: Thu, 4 Jun 2020 15:14:29 +0530 +Subject: [PATCH 400/449] cli: throw a warning if replica count greater than 3 + +As volumes with replica count greater than 3 are not +supported, a warning message is be thrown to user +while creating the volume with replica count greater +than 3 or while converting a volume to replica > 3 +volume by add-brick/remove-brick operations. + +Label: DOWNSTREAM ONLY + +BUG: 1763129 +Change-Id: I5a32a5a2d99b5175fb692dfcab27396089f24b72 +Signed-off-by: Sanju Rakonde <srakonde@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202338 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + cli/src/cli-cmd-parser.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 45 insertions(+) + +diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c +index ac0a263..5e7ce53 100644 +--- a/cli/src/cli-cmd-parser.c ++++ b/cli/src/cli-cmd-parser.c +@@ -619,6 +619,23 @@ cli_cmd_volume_create_parse(struct cli_state *state, const char **words, + } + } + } ++ ++ if (replica_count > 3) { ++ if (strcmp(words[wordcount - 1], "force")) { ++ question = ++ "Volumes with replica count greater than 3 are" ++ "not supported. \nDo you still want to continue?\n"; ++ answer = cli_cmd_get_confirmation(state, question); ++ if (GF_ANSWER_NO == answer) { ++ gf_log("cli", GF_LOG_ERROR, ++ "Volume create " ++ "cancelled, exiting"); ++ ret = -1; ++ goto out; ++ } ++ } ++ } ++ + ret = dict_set_int32(dict, "replica-count", replica_count); + if (ret) + goto out; +@@ -1815,6 +1832,20 @@ cli_cmd_volume_add_brick_parse(struct cli_state *state, const char **words, + goto out; + } + } ++ } else if (count > 3) { ++ if (strcmp(words[wordcount - 1], "force")) { ++ question = ++ "Volumes with replica count greater than 3 are" ++ "not supported. \nDo you still want to continue?\n"; ++ answer = cli_cmd_get_confirmation(state, question); ++ if (GF_ANSWER_NO == answer) { ++ gf_log("cli", GF_LOG_ERROR, ++ "add-brick " ++ "cancelled, exiting"); ++ ret = -1; ++ goto out; ++ } ++ } + } + } else if ((strcmp(w, "stripe")) == 0) { + cli_err("stripe option not supported"); +@@ -2082,6 +2113,20 @@ cli_cmd_volume_remove_brick_parse(struct cli_state *state, const char **words, + goto out; + } + } ++ } else if (count > 3) { ++ if (strcmp(words[wordcount - 1], "force")) { ++ ques = ++ "Volumes with replica count greater than 3 are" ++ "not supported. \nDo you still want to continue?\n"; ++ answer = cli_cmd_get_confirmation(state, ques); ++ if (GF_ANSWER_NO == answer) { ++ gf_log("cli", GF_LOG_ERROR, ++ "Remove-brick " ++ "cancelled, exiting"); ++ ret = -1; ++ goto out; ++ } ++ } + } + + ret = dict_set_int32(dict, "replica-count", count); +-- +1.8.3.1 + diff --git a/SOURCES/0401-cli-change-the-warning-message.patch b/SOURCES/0401-cli-change-the-warning-message.patch new file mode 100644 index 0000000..5c3e895 --- /dev/null +++ b/SOURCES/0401-cli-change-the-warning-message.patch @@ -0,0 +1,70 @@ +From 704bf84d432e1eea1534e35ee27d4116a7273146 Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde <srakonde@redhat.com> +Date: Thu, 4 Jun 2020 16:15:35 +0530 +Subject: [PATCH 401/449] cli: change the warning message + +while creating the replica 2 volume or converting +a volume to replica 2 volume, we issue a warning +saying "replica 2 volumes are prone to split brain". +As the support for replica 2 volumes has been deprecated, +warning message should be changed accordingly to reflect +the same. + +Label: DOWNSTREAM ONLY + +BUG: 1763124 +Change-Id: If55e5412cda2e4a21a6359492d8d704dd702530d +Signed-off-by: Sanju Rakonde <srakonde@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202348 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + cli/src/cli-cmd-parser.c | 16 ++++++++-------- + 1 file changed, 8 insertions(+), 8 deletions(-) + +diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c +index 5e7ce53..7446b95 100644 +--- a/cli/src/cli-cmd-parser.c ++++ b/cli/src/cli-cmd-parser.c +@@ -603,8 +603,8 @@ cli_cmd_volume_create_parse(struct cli_state *state, const char **words, + if (replica_count == 2) { + if (strcmp(words[wordcount - 1], "force")) { + question = +- "Replica 2 volumes are prone" +- " to split-brain. Use " ++ "Support for replica 2 volumes stands deprecated as " ++ "they are prone to split-brain. Use " + "Arbiter or Replica 3 to " + "avoid this.\n" + "Do you still want to " +@@ -1817,9 +1817,9 @@ cli_cmd_volume_add_brick_parse(struct cli_state *state, const char **words, + if (count == 2) { + if (strcmp(words[wordcount - 1], "force")) { + question = +- "Replica 2 volumes are prone to " +- "split-brain. Use Arbiter or " +- "Replica 3 to avaoid this. See: " ++ "Support for replica 2 volumes stands deprecated as they " ++ "are prone to split-brain. Use Arbiter or " ++ "Replica 3 to avoid this. See: " + "http://docs.gluster.org/en/latest/Administrator%20Guide/" + "Split%20brain%20and%20ways%20to%20deal%20with%20it/." + "\nDo you still want to continue?\n"; +@@ -2098,9 +2098,9 @@ cli_cmd_volume_remove_brick_parse(struct cli_state *state, const char **words, + if (count == 2) { + if (strcmp(words[wordcount - 1], "force")) { + ques = +- "Replica 2 volumes are prone to " +- "split-brain. Use Arbiter or Replica 3 " +- "to avaoid this. See: " ++ "Support for replica 2 volumes stands deprecated as they " ++ "are prone to split-brain. Use Arbiter or Replica 3 " ++ "to avoid this. See: " + "http://docs.gluster.org/en/latest/Administrator%20Guide/" + "Split%20brain%20and%20ways%20to%20deal%20with%20it/." + "\nDo you still want to continue?\n"; +-- +1.8.3.1 + diff --git a/SOURCES/0402-afr-wake-up-index-healer-threads.patch b/SOURCES/0402-afr-wake-up-index-healer-threads.patch new file mode 100644 index 0000000..34ca329 --- /dev/null +++ b/SOURCES/0402-afr-wake-up-index-healer-threads.patch @@ -0,0 +1,198 @@ +From ecaa0f10820f4b6e803021919ce59a43aedf356b Mon Sep 17 00:00:00 2001 +From: Ravishankar N <ravishankar@redhat.com> +Date: Thu, 4 Jun 2020 16:15:35 +0530 +Subject: [PATCH 402/449] afr: wake up index healer threads + +...whenever shd is re-enabled after disabling or there is a change in +`cluster.heal-timeout`, without needing to restart shd or waiting for the +current `cluster.heal-timeout` seconds to expire. + +> Upstream patch link:https://review.gluster.org/#/c/glusterfs/+/23288/ +> Change-Id: Ia5ebd7c8e9f5b54cba3199c141fdd1af2f9b9bfe +> fixes: bz#1744548 +> Reported-by: Glen Kiessling <glenk1973@hotmail.com> +> Signed-off-by: Ravishankar N <ravishankar@redhat.com> + +BUG: 1764091 +Change-Id: I42aa0807f09b5a09510fe9efb4a1697dad3410a3 +Signed-off-by: Ravishankar N <ravishankar@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202368 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/bugs/replicate/bug-1744548-heal-timeout.t | 42 +++++++++++++++++++++++++ + xlators/cluster/afr/src/afr-common.c | 6 ++-- + xlators/cluster/afr/src/afr-self-heald.c | 14 ++++++--- + xlators/cluster/afr/src/afr-self-heald.h | 3 -- + xlators/cluster/afr/src/afr.c | 10 ++++++ + xlators/cluster/afr/src/afr.h | 2 ++ + 6 files changed, 66 insertions(+), 11 deletions(-) + create mode 100644 tests/bugs/replicate/bug-1744548-heal-timeout.t + +diff --git a/tests/bugs/replicate/bug-1744548-heal-timeout.t b/tests/bugs/replicate/bug-1744548-heal-timeout.t +new file mode 100644 +index 0000000..3cb73bc +--- /dev/null ++++ b/tests/bugs/replicate/bug-1744548-heal-timeout.t +@@ -0,0 +1,42 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../afr.rc ++ ++cleanup; ++ ++TEST glusterd; ++TEST pidof glusterd; ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} ++TEST $CLI volume heal $V0 disable ++TEST $CLI volume start $V0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 ++TEST ! $CLI volume heal $V0 ++ ++# Enable shd and verify that index crawl is triggered immediately. ++TEST $CLI volume profile $V0 start ++TEST $CLI volume profile $V0 info clear ++TEST $CLI volume heal $V0 enable ++TEST $CLI volume heal $V0 ++# Each brick does 3 opendirs, corresponding to dirty, xattrop and entry-changes ++COUNT=`$CLI volume profile $V0 info incremental |grep OPENDIR|awk '{print $8}'|tr -d '\n'` ++TEST [ "$COUNT" == "333" ] ++ ++# Check that a change in heal-timeout is honoured immediately. ++TEST $CLI volume set $V0 cluster.heal-timeout 5 ++sleep 10 ++COUNT=`$CLI volume profile $V0 info incremental |grep OPENDIR|awk '{print $8}'|tr -d '\n'` ++# Two crawls must have happened. ++TEST [ "$COUNT" == "666" ] ++ ++# shd must not heal if it is disabled and heal-timeout is changed. ++TEST $CLI volume heal $V0 disable ++TEST $CLI volume profile $V0 info clear ++TEST $CLI volume set $V0 cluster.heal-timeout 6 ++sleep 6 ++COUNT=`$CLI volume profile $V0 info incremental |grep OPENDIR|awk '{print $8}'|tr -d '\n'` ++TEST [ -z $COUNT ] ++cleanup; +diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c +index 3690b84..eef7fd2 100644 +--- a/xlators/cluster/afr/src/afr-common.c ++++ b/xlators/cluster/afr/src/afr-common.c +@@ -5613,10 +5613,8 @@ afr_notify(xlator_t *this, int32_t event, void *data, void *data2) + * b) Already heard from everyone, but we now got a child-up + * event. + */ +- if (have_heard_from_all && priv->shd.iamshd) { +- for (i = 0; i < priv->child_count; i++) +- if (priv->child_up[i]) +- afr_selfheal_childup(this, i); ++ if (have_heard_from_all) { ++ afr_selfheal_childup(this, priv); + } + } + out: +diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c +index 7eb1207..95ac5f2 100644 +--- a/xlators/cluster/afr/src/afr-self-heald.c ++++ b/xlators/cluster/afr/src/afr-self-heald.c +@@ -1258,12 +1258,18 @@ out: + return ret; + } + +-int +-afr_selfheal_childup(xlator_t *this, int subvol) ++void ++afr_selfheal_childup(xlator_t *this, afr_private_t *priv) + { +- afr_shd_index_healer_spawn(this, subvol); ++ int subvol = 0; + +- return 0; ++ if (!priv->shd.iamshd) ++ return; ++ for (subvol = 0; subvol < priv->child_count; subvol++) ++ if (priv->child_up[subvol]) ++ afr_shd_index_healer_spawn(this, subvol); ++ ++ return; + } + + int +diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h +index 7de7c43..1990539 100644 +--- a/xlators/cluster/afr/src/afr-self-heald.h ++++ b/xlators/cluster/afr/src/afr-self-heald.h +@@ -60,9 +60,6 @@ typedef struct { + } afr_self_heald_t; + + int +-afr_selfheal_childup(xlator_t *this, int subvol); +- +-int + afr_selfheal_daemon_init(xlator_t *this); + + int +diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c +index 33258a0..8f9e71f 100644 +--- a/xlators/cluster/afr/src/afr.c ++++ b/xlators/cluster/afr/src/afr.c +@@ -141,6 +141,7 @@ reconfigure(xlator_t *this, dict_t *options) + afr_private_t *priv = NULL; + xlator_t *read_subvol = NULL; + int read_subvol_index = -1; ++ int timeout_old = 0; + int ret = -1; + int index = -1; + char *qtype = NULL; +@@ -150,6 +151,7 @@ reconfigure(xlator_t *this, dict_t *options) + char *locking_scheme = NULL; + gf_boolean_t consistent_io = _gf_false; + gf_boolean_t choose_local_old = _gf_false; ++ gf_boolean_t enabled_old = _gf_false; + + priv = this->private; + +@@ -255,11 +257,13 @@ reconfigure(xlator_t *this, dict_t *options) + GF_OPTION_RECONF("ensure-durability", priv->ensure_durability, options, + bool, out); + ++ enabled_old = priv->shd.enabled; + GF_OPTION_RECONF("self-heal-daemon", priv->shd.enabled, options, bool, out); + + GF_OPTION_RECONF("iam-self-heal-daemon", priv->shd.iamshd, options, bool, + out); + ++ timeout_old = priv->shd.timeout; + GF_OPTION_RECONF("heal-timeout", priv->shd.timeout, options, int32, out); + + GF_OPTION_RECONF("consistent-metadata", priv->consistent_metadata, options, +@@ -283,6 +287,12 @@ reconfigure(xlator_t *this, dict_t *options) + consistent_io = _gf_false; + priv->consistent_io = consistent_io; + ++ if (priv->shd.enabled) { ++ if ((priv->shd.enabled != enabled_old) || ++ (timeout_old != priv->shd.timeout)) ++ afr_selfheal_childup(this, priv); ++ } ++ + ret = 0; + out: + return ret; +diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h +index e731cfa..18f1a6a 100644 +--- a/xlators/cluster/afr/src/afr.h ++++ b/xlators/cluster/afr/src/afr.h +@@ -1332,4 +1332,6 @@ afr_lookup_has_quorum(call_frame_t *frame, xlator_t *this, + void + afr_mark_new_entry_changelog(call_frame_t *frame, xlator_t *this); + ++void ++afr_selfheal_childup(xlator_t *this, afr_private_t *priv); + #endif /* __AFR_H__ */ +-- +1.8.3.1 + diff --git a/SOURCES/0403-Fix-spurious-failure-in-bug-1744548-heal-timeout.t.patch b/SOURCES/0403-Fix-spurious-failure-in-bug-1744548-heal-timeout.t.patch new file mode 100644 index 0000000..569bdc0 --- /dev/null +++ b/SOURCES/0403-Fix-spurious-failure-in-bug-1744548-heal-timeout.t.patch @@ -0,0 +1,84 @@ +From b311385a3c4bd56d69d1fa7e9bd3d9a2ae5c344e Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K <pkarampu@redhat.com> +Date: Mon, 7 Oct 2019 12:27:01 +0530 +Subject: [PATCH 403/449] Fix spurious failure in bug-1744548-heal-timeout.t + +Script was assuming that the heal would have triggered +by the time test was executed, which may not be the case. +It can lead to following failures when the race happens: + +... +18:29:45 not ok 14 [ 85/ 1] < 26> '[ 331 == 333 ]' -> '' +... +18:29:45 not ok 16 [ 10097/ 1] < 33> '[ 668 == 666 ]' -> '' + +Heal on 3rd brick didn't start completely first time the command was executed. +So the extra count got added to the next profile info. + +Fixed it by depending on cumulative stats and waiting until the count is +satisfied using EXPECT_WITHIN + +> Upstream patch link:https://review.gluster.org/23523 +>fixes: bz#1759002 +>Change-Id: I3b410671c902d6b1458a757fa245613cb29d967d +>Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> + +BUG: 1764091 +Change-Id: Ic4d16b6c8a1bbc35735567d60fd0383456b9f534 +Signed-off-by: Ravishankar N <ravishankar@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202369 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/bugs/replicate/bug-1744548-heal-timeout.t | 17 +++++++++++------ + 1 file changed, 11 insertions(+), 6 deletions(-) + +diff --git a/tests/bugs/replicate/bug-1744548-heal-timeout.t b/tests/bugs/replicate/bug-1744548-heal-timeout.t +index 3cb73bc..0aaa3ea 100644 +--- a/tests/bugs/replicate/bug-1744548-heal-timeout.t ++++ b/tests/bugs/replicate/bug-1744548-heal-timeout.t +@@ -4,6 +4,11 @@ + . $(dirname $0)/../../volume.rc + . $(dirname $0)/../../afr.rc + ++function get_cumulative_opendir_count { ++#sed 'n:d' prints odd-numbered lines ++ $CLI volume profile $V0 info |grep OPENDIR|sed 'n;d' | awk '{print $8}'|tr -d '\n' ++} ++ + cleanup; + + TEST glusterd; +@@ -20,23 +25,23 @@ TEST ! $CLI volume heal $V0 + TEST $CLI volume profile $V0 start + TEST $CLI volume profile $V0 info clear + TEST $CLI volume heal $V0 enable +-TEST $CLI volume heal $V0 + # Each brick does 3 opendirs, corresponding to dirty, xattrop and entry-changes +-COUNT=`$CLI volume profile $V0 info incremental |grep OPENDIR|awk '{print $8}'|tr -d '\n'` +-TEST [ "$COUNT" == "333" ] ++EXPECT_WITHIN $HEAL_TIMEOUT "^333$" get_cumulative_opendir_count + + # Check that a change in heal-timeout is honoured immediately. + TEST $CLI volume set $V0 cluster.heal-timeout 5 + sleep 10 +-COUNT=`$CLI volume profile $V0 info incremental |grep OPENDIR|awk '{print $8}'|tr -d '\n'` + # Two crawls must have happened. +-TEST [ "$COUNT" == "666" ] ++EXPECT_WITHIN $HEAL_TIMEOUT "^999$" get_cumulative_opendir_count + + # shd must not heal if it is disabled and heal-timeout is changed. + TEST $CLI volume heal $V0 disable ++#Wait for configuration update and any opendir fops to complete ++sleep 10 + TEST $CLI volume profile $V0 info clear + TEST $CLI volume set $V0 cluster.heal-timeout 6 +-sleep 6 ++#Better to wait for more than 6 seconds to account for configuration updates ++sleep 10 + COUNT=`$CLI volume profile $V0 info incremental |grep OPENDIR|awk '{print $8}'|tr -d '\n'` + TEST [ -z $COUNT ] + cleanup; +-- +1.8.3.1 + diff --git a/SOURCES/0404-tests-Fix-spurious-failure.patch b/SOURCES/0404-tests-Fix-spurious-failure.patch new file mode 100644 index 0000000..9cbb6ea --- /dev/null +++ b/SOURCES/0404-tests-Fix-spurious-failure.patch @@ -0,0 +1,38 @@ +From b65ca1045910bc18c601681788eb322dbb8ec2fa Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K <pkarampu@redhat.com> +Date: Mon, 14 Oct 2019 10:29:31 +0530 +Subject: [PATCH 404/449] tests: Fix spurious failure + +> Upstream patch:https://review.gluster.org/23546 +> fixes: bz#1759002 +> Change-Id: I4d49e1c2ca9b3c1d74b9dd5a30f1c66983a76529 +> Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> + +BUG: 1764091 +Change-Id: I8b66f08cce7a87788867c6373aed71d6fc65155f +Signed-off-by: Ravishankar N <ravishankar@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202370 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/bugs/replicate/bug-1744548-heal-timeout.t | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/tests/bugs/replicate/bug-1744548-heal-timeout.t b/tests/bugs/replicate/bug-1744548-heal-timeout.t +index 0aaa3ea..c208112 100644 +--- a/tests/bugs/replicate/bug-1744548-heal-timeout.t ++++ b/tests/bugs/replicate/bug-1744548-heal-timeout.t +@@ -5,8 +5,8 @@ + . $(dirname $0)/../../afr.rc + + function get_cumulative_opendir_count { +-#sed 'n:d' prints odd-numbered lines +- $CLI volume profile $V0 info |grep OPENDIR|sed 'n;d' | awk '{print $8}'|tr -d '\n' ++#sed command prints content between Cumulative and Interval, this keeps content from Cumulative stats ++ $CLI volume profile $V0 info |sed -n '/^Cumulative/,/^Interval/p'|grep OPENDIR| awk '{print $8}'|tr -d '\n' + } + + cleanup; +-- +1.8.3.1 + diff --git a/SOURCES/0405-core-fix-return-of-local-in-__nlc_inode_ctx_get.patch b/SOURCES/0405-core-fix-return-of-local-in-__nlc_inode_ctx_get.patch new file mode 100644 index 0000000..765c154 --- /dev/null +++ b/SOURCES/0405-core-fix-return-of-local-in-__nlc_inode_ctx_get.patch @@ -0,0 +1,175 @@ +From 9c5f5b4ffd49e8c8631defb7b6873248bbfdaf9c Mon Sep 17 00:00:00 2001 +From: Rinku Kothiya <rkothiya@redhat.com> +Date: Tue, 23 Jul 2019 13:16:04 +0000 +Subject: [PATCH 405/449] [core] fix return of local in __nlc_inode_ctx_get + +__nlc_inode_ctx_get assigns a value to nlc_pe_p which is never used by +its parent function or any of the predecessor hence remove the +assignment and also that function argument as it is not being used +anywhere. + +> fixes: bz#1732496 +> Change-Id: I5b950e1e251bd50a646616da872a4efe9d2ff8c9 +> (Cherry pick from commit 84a55090123a7e3124100e5564da8c521c3c22ab ) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23093/) + +BUG: 1686897 + +Change-Id: I5b950e1e251bd50a646616da872a4efe9d2ff8c9 +Signed-off-by: Rinku Kothiya <rkothiya@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202372 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/performance/nl-cache/src/nl-cache-helper.c | 36 +++++++++------------- + 1 file changed, 14 insertions(+), 22 deletions(-) + +diff --git a/xlators/performance/nl-cache/src/nl-cache-helper.c b/xlators/performance/nl-cache/src/nl-cache-helper.c +index 009f33a..4314038 100644 +--- a/xlators/performance/nl-cache/src/nl-cache-helper.c ++++ b/xlators/performance/nl-cache/src/nl-cache-helper.c +@@ -145,12 +145,10 @@ nlc_disable_cache(xlator_t *this) + } + + static int +-__nlc_inode_ctx_get(xlator_t *this, inode_t *inode, nlc_ctx_t **nlc_ctx_p, +- nlc_pe_t **nlc_pe_p) ++__nlc_inode_ctx_get(xlator_t *this, inode_t *inode, nlc_ctx_t **nlc_ctx_p) + { + int ret = 0; + nlc_ctx_t *nlc_ctx = NULL; +- nlc_pe_t *nlc_pe = NULL; + uint64_t nlc_ctx_int = 0; + uint64_t nlc_pe_int = 0; + +@@ -159,10 +157,6 @@ __nlc_inode_ctx_get(xlator_t *this, inode_t *inode, nlc_ctx_t **nlc_ctx_p, + nlc_ctx = (void *)(long)(nlc_ctx_int); + *nlc_ctx_p = nlc_ctx; + } +- if (ret == 0 && nlc_pe_p) { +- nlc_pe = (void *)(long)(nlc_pe_int); +- *nlc_pe_p = nlc_pe; +- } + return ret; + } + +@@ -186,14 +180,13 @@ nlc_inode_ctx_set(xlator_t *this, inode_t *inode, nlc_ctx_t *nlc_ctx, + } + + static void +-nlc_inode_ctx_get(xlator_t *this, inode_t *inode, nlc_ctx_t **nlc_ctx_p, +- nlc_pe_t **nlc_pe_p) ++nlc_inode_ctx_get(xlator_t *this, inode_t *inode, nlc_ctx_t **nlc_ctx_p) + { + int ret = 0; + + LOCK(&inode->lock); + { +- ret = __nlc_inode_ctx_get(this, inode, nlc_ctx_p, nlc_pe_p); ++ ret = __nlc_inode_ctx_get(this, inode, nlc_ctx_p); + if (ret < 0) + gf_msg_debug(this->name, 0, + "inode ctx get failed for " +@@ -290,8 +283,7 @@ out: + } + + static nlc_ctx_t * +-nlc_inode_ctx_get_set(xlator_t *this, inode_t *inode, nlc_ctx_t **nlc_ctx_p, +- nlc_pe_t **nlc_pe_p) ++nlc_inode_ctx_get_set(xlator_t *this, inode_t *inode, nlc_ctx_t **nlc_ctx_p) + { + int ret = 0; + nlc_ctx_t *nlc_ctx = NULL; +@@ -301,7 +293,7 @@ nlc_inode_ctx_get_set(xlator_t *this, inode_t *inode, nlc_ctx_t **nlc_ctx_p, + + LOCK(&inode->lock); + { +- ret = __nlc_inode_ctx_get(this, inode, &nlc_ctx, nlc_pe_p); ++ ret = __nlc_inode_ctx_get(this, inode, &nlc_ctx); + if (nlc_ctx) + goto unlock; + +@@ -410,7 +402,7 @@ nlc_set_dir_state(xlator_t *this, inode_t *inode, uint64_t state) + goto out; + } + +- nlc_inode_ctx_get_set(this, inode, &nlc_ctx, NULL); ++ nlc_inode_ctx_get_set(this, inode, &nlc_ctx); + if (!nlc_ctx) + goto out; + +@@ -430,7 +422,7 @@ nlc_cache_timeout_handler(struct gf_tw_timer_list *timer, void *data, + nlc_timer_data_t *tmp = data; + nlc_ctx_t *nlc_ctx = NULL; + +- nlc_inode_ctx_get(tmp->this, tmp->inode, &nlc_ctx, NULL); ++ nlc_inode_ctx_get(tmp->this, tmp->inode, &nlc_ctx); + if (!nlc_ctx) + goto out; + +@@ -696,7 +688,7 @@ nlc_inode_clear_cache(xlator_t *this, inode_t *inode, int reason) + { + nlc_ctx_t *nlc_ctx = NULL; + +- nlc_inode_ctx_get(this, inode, &nlc_ctx, NULL); ++ nlc_inode_ctx_get(this, inode, &nlc_ctx); + if (!nlc_ctx) + goto out; + +@@ -883,7 +875,7 @@ nlc_dir_add_ne(xlator_t *this, inode_t *inode, const char *name) + goto out; + } + +- nlc_inode_ctx_get_set(this, inode, &nlc_ctx, NULL); ++ nlc_inode_ctx_get_set(this, inode, &nlc_ctx); + if (!nlc_ctx) + goto out; + +@@ -914,7 +906,7 @@ nlc_dir_remove_pe(xlator_t *this, inode_t *parent, inode_t *entry_ino, + goto out; + } + +- nlc_inode_ctx_get(this, parent, &nlc_ctx, NULL); ++ nlc_inode_ctx_get(this, parent, &nlc_ctx); + if (!nlc_ctx) + goto out; + +@@ -945,7 +937,7 @@ nlc_dir_add_pe(xlator_t *this, inode_t *inode, inode_t *entry_ino, + goto out; + } + +- nlc_inode_ctx_get_set(this, inode, &nlc_ctx, NULL); ++ nlc_inode_ctx_get_set(this, inode, &nlc_ctx); + if (!nlc_ctx) + goto out; + +@@ -1051,7 +1043,7 @@ nlc_is_negative_lookup(xlator_t *this, loc_t *loc) + goto out; + } + +- nlc_inode_ctx_get(this, inode, &nlc_ctx, NULL); ++ nlc_inode_ctx_get(this, inode, &nlc_ctx); + if (!nlc_ctx) + goto out; + +@@ -1102,7 +1094,7 @@ nlc_get_real_file_name(xlator_t *this, loc_t *loc, const char *fname, + goto out; + } + +- nlc_inode_ctx_get(this, inode, &nlc_ctx, NULL); ++ nlc_inode_ctx_get(this, inode, &nlc_ctx); + if (!nlc_ctx) + goto out; + +@@ -1152,7 +1144,7 @@ nlc_dump_inodectx(xlator_t *this, inode_t *inode) + nlc_ne_t *ne = NULL; + nlc_ne_t *tmp1 = NULL; + +- nlc_inode_ctx_get(this, inode, &nlc_ctx, NULL); ++ nlc_inode_ctx_get(this, inode, &nlc_ctx); + + if (!nlc_ctx) + goto out; +-- +1.8.3.1 + diff --git a/SOURCES/0406-afr-support-split-brain-CLI-for-replica-3.patch b/SOURCES/0406-afr-support-split-brain-CLI-for-replica-3.patch new file mode 100644 index 0000000..4b57e8a --- /dev/null +++ b/SOURCES/0406-afr-support-split-brain-CLI-for-replica-3.patch @@ -0,0 +1,185 @@ +From a75bb15fbe64f14580c44b8a33314c8bbeffdede Mon Sep 17 00:00:00 2001 +From: Ravishankar N <ravishankar@redhat.com> +Date: Thu, 4 Jun 2020 18:54:46 +0530 +Subject: [PATCH 406/449] afr: support split-brain CLI for replica 3 + +Patch in upstream master: https://review.gluster.org/#/c/glusterfs/+/23502/ + +Ever since we added quorum checks for lookups in afr via commit +bd44d59741bb8c0f5d7a62c5b1094179dd0ce8a4, the split-brain resolution +commands would not work for replica 3 because there would be no +readables for the lookup fop. + +The argument was that split-brains do not occur in replica 3 but we do +see (data/metadata) split-brain cases once in a while which indicate that there are +a few bugs/corner cases yet to be discovered and fixed. + +Fortunately, commit 8016d51a3bbd410b0b927ed66be50a09574b7982 added +GF_CLIENT_PID_GLFS_HEALD as the pid for all fops made by glfsheal. If we +leverage this and allow lookups in afr when pid is GF_CLIENT_PID_GLFS_HEALD, +split-brain resolution commands will work for replica 3 volumes too. + +Likewise, the check is added in shard_lookup as well to permit resolving +split-brains by specifying "/.shard/shard-file.xx" as the file name +(which previously used to fail with EPERM). + +BUG: 1759875 +Change-Id: I203735b909c7d30fc4faaf3ecd4f5b6b379ab266 +Signed-off-by: Ravishankar N <ravishankar@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202375 +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + .../replicate/bug-1756938-replica-3-sbrain-cli.t | 111 +++++++++++++++++++++ + xlators/cluster/afr/src/afr-common.c | 3 +- + xlators/features/shard/src/shard.c | 3 +- + 3 files changed, 115 insertions(+), 2 deletions(-) + create mode 100644 tests/bugs/replicate/bug-1756938-replica-3-sbrain-cli.t + +diff --git a/tests/bugs/replicate/bug-1756938-replica-3-sbrain-cli.t b/tests/bugs/replicate/bug-1756938-replica-3-sbrain-cli.t +new file mode 100644 +index 0000000..c1bdf34 +--- /dev/null ++++ b/tests/bugs/replicate/bug-1756938-replica-3-sbrain-cli.t +@@ -0,0 +1,111 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../afr.rc ++ ++cleanup; ++ ++TEST glusterd; ++TEST pidof glusterd; ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} ++TEST $CLI volume set $V0 features.shard enable ++TEST $CLI volume set $V0 features.shard-block-size 4MB ++ ++TEST $CLI volume start $V0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 ++TEST glusterfs --volfile-server=$H0 --volfile-id=/$V0 $M0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2 ++ ++#Create split-brain by setting afr xattrs/gfids manually. ++#file1 is non-sharded and will be in data split-brain. ++#file2 will have one shard which will be in data split-brain. ++#file3 will have one shard which will be in gfid split-brain. ++#file4 will have one shard which will be in data & metadata split-brain. ++TEST dd if=/dev/zero of=$M0/file1 bs=1024 count=1024 oflag=direct ++TEST dd if=/dev/zero of=$M0/file2 bs=1M count=6 oflag=direct ++TEST dd if=/dev/zero of=$M0/file3 bs=1M count=6 oflag=direct ++TEST dd if=/dev/zero of=$M0/file4 bs=1M count=6 oflag=direct ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++ ++#------------------------------------------------------------------------------- ++TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000000000000 $B0/${V0}0/file1 ++TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000000000000 $B0/${V0}0/file1 ++TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000000000000 $B0/${V0}1/file1 ++TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000000000000 $B0/${V0}1/file1 ++TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000000000000 $B0/${V0}2/file1 ++TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000000000000 $B0/${V0}2/file1 ++ ++#------------------------------------------------------------------------------- ++gfid_f2=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/file2)) ++TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000000000000 $B0/${V0}0/.shard/$gfid_f2.1 ++TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000000000000 $B0/${V0}0/.shard/$gfid_f2.1 ++TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000000000000 $B0/${V0}1/.shard/$gfid_f2.1 ++TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000000000000 $B0/${V0}1/.shard/$gfid_f2.1 ++TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000000000000 $B0/${V0}2/.shard/$gfid_f2.1 ++TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000000000000 $B0/${V0}2/.shard/$gfid_f2.1 ++ ++#------------------------------------------------------------------------------- ++TESTS_EXPECTED_IN_LOOP=5 ++function assign_new_gfid { ++ brickpath=$1 ++ filename=$2 ++ gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $brickpath/$filename)) ++ gfid_shard=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $brickpath/.shard/$gfid.1)) ++ ++ TEST rm $brickpath/.glusterfs/${gfid_shard:0:2}/${gfid_shard:2:2}/$gfid_shard ++ TEST setfattr -x trusted.gfid $brickpath/.shard/$gfid.1 ++ new_gfid=$(get_random_gfid) ++ new_gfid_str=$(gf_gfid_xattr_to_str $new_gfid) ++ TEST setfattr -n trusted.gfid -v $new_gfid $brickpath/.shard/$gfid.1 ++ TEST mkdir -p $brickpath/.glusterfs/${new_gfid_str:0:2}/${new_gfid_str:2:2} ++ TEST ln $brickpath/.shard/$gfid.1 $brickpath/.glusterfs/${new_gfid_str:0:2}/${new_gfid_str:2:2}/$new_gfid_str ++} ++assign_new_gfid $B0/$V0"1" file3 ++assign_new_gfid $B0/$V0"2" file3 ++ ++#------------------------------------------------------------------------------- ++gfid_f4=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/file4)) ++TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000100000000 $B0/${V0}0/.shard/$gfid_f4.1 ++TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000100000000 $B0/${V0}0/.shard/$gfid_f4.1 ++TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000100000000 $B0/${V0}1/.shard/$gfid_f4.1 ++TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000100000000 $B0/${V0}1/.shard/$gfid_f4.1 ++TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000100000000 $B0/${V0}2/.shard/$gfid_f4.1 ++TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000100000000 $B0/${V0}2/.shard/$gfid_f4.1 ++ ++#------------------------------------------------------------------------------- ++#Add entry to xattrop dir on first brick and check for split-brain. ++xattrop_dir0=$(afr_get_index_path $B0/$V0"0") ++base_entry_b0=`ls $xattrop_dir0` ++ ++gfid_f1=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/file1)) ++TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_f1 ++ ++gfid_f2_shard1=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/.shard/$gfid_f2.1)) ++TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_f2_shard1 ++ ++gfid_f3=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/file3)) ++gfid_f3_shard1=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/.shard/$gfid_f3.1)) ++TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_f3_shard1 ++ ++gfid_f4_shard1=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/.shard/$gfid_f4.1)) ++TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_f4_shard1 ++ ++#------------------------------------------------------------------------------- ++#gfid split-brain won't show up in split-brain count. ++EXPECT "3" afr_get_split_brain_count $V0 ++EXPECT_NOT "^0$" get_pending_heal_count $V0 ++ ++#Resolve split-brains ++TEST $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}1 /file1 ++GFIDSTR="gfid:$gfid_f2_shard1" ++TEST $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}1 $GFIDSTR ++TEST $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}1 /.shard/$gfid_f3.1 ++TEST $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}1 /.shard/$gfid_f4.1 ++TEST $CLI volume heal $V0 ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 ++cleanup; +diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c +index eef7fd2..32127c6 100644 +--- a/xlators/cluster/afr/src/afr-common.c ++++ b/xlators/cluster/afr/src/afr-common.c +@@ -2250,7 +2250,8 @@ afr_attempt_readsubvol_set(call_frame_t *frame, xlator_t *this, + if ((spb_choice >= 0) && + (AFR_COUNT(success_replies, child_count) == child_count)) { + *read_subvol = spb_choice; +- } else if (!priv->quorum_count) { ++ } else if (!priv->quorum_count || ++ frame->root->pid == GF_CLIENT_PID_GLFS_HEAL) { + *read_subvol = afr_first_up_child(frame, this); + } else if (priv->quorum_count && + afr_has_quorum(data_readable, this, NULL)) { +diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c +index 2e2ef5d..16d557b 100644 +--- a/xlators/features/shard/src/shard.c ++++ b/xlators/features/shard/src/shard.c +@@ -1472,7 +1472,8 @@ int shard_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, + shard_local_t *local = NULL; + + this->itable = loc->inode->table; +- if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { ++ if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) && ++ (frame->root->pid != GF_CLIENT_PID_GLFS_HEAL)) { + SHARD_ENTRY_FOP_CHECK(loc, op_errno, err); + } + +-- +1.8.3.1 + diff --git a/SOURCES/0407-geo-rep-Improving-help-message-in-schedule_georep.py.patch b/SOURCES/0407-geo-rep-Improving-help-message-in-schedule_georep.py.patch new file mode 100644 index 0000000..459462d --- /dev/null +++ b/SOURCES/0407-geo-rep-Improving-help-message-in-schedule_georep.py.patch @@ -0,0 +1,60 @@ +From de31f2b0cb09a59941892c9981cb8a8b3aced9ec Mon Sep 17 00:00:00 2001 +From: kshithijiyer <kshithij.ki@gmail.com> +Date: Tue, 24 Dec 2019 13:02:21 +0530 +Subject: [PATCH 407/449] [geo-rep] Improving help message in + schedule_georep.py.in + +SLAVE positional argument doesn't provide a clear +picture of what it is when compared to mastervol and slavevol +in schedule_georep.py.in. It would be better if we change it to +something like "Slave hostame (<username>@SLAVEHOST or SLAVEHOST)" + +Present: +---------- +positional arguments: + mastervol Master Volume Name + SLAVE SLAVEHOST or root@SLAVEHOST or user@SLAVEHOST + slavevol Slave Volume Name + +Suggested: +----------- +positional arguments: + mastervol Master Volume Name + SLAVE Slave hostname (<username>@SLAVEHOST or SLAVEHOST) + slavevol Slave Volume Name + +Backport of: + >Upstream Patch: https://review.gluster.org/#/c/glusterfs/+/23919/ + >fixes: bz#1786276 + >Change-Id: I73d52247997d623f77d55e51cbb6eccc08eb95ff + >Signed-off-by: kshithijiyer <kshithij.ki@gmail.com> + +BUG: 1787994 +Change-Id: I73d52247997d623f77d55e51cbb6eccc08eb95ff +Signed-off-by: kshithijiyer <kshithij.ki@gmail.com> +Signed-off-by: Sunny Kumar <sunkumar@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202454 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + extras/geo-rep/schedule_georep.py.in | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/extras/geo-rep/schedule_georep.py.in b/extras/geo-rep/schedule_georep.py.in +index f29ae02..ac93716 100644 +--- a/extras/geo-rep/schedule_georep.py.in ++++ b/extras/geo-rep/schedule_georep.py.in +@@ -459,8 +459,8 @@ if __name__ == "__main__": + description=__doc__) + parser.add_argument("mastervol", help="Master Volume Name") + parser.add_argument("slave", +- help="SLAVEHOST or root@SLAVEHOST " +- "or user@SLAVEHOST", ++ help="Slave hostname " ++ "(<username>@SLAVEHOST or SLAVEHOST)", + metavar="SLAVE") + parser.add_argument("slavevol", help="Slave Volume Name") + parser.add_argument("--interval", help="Interval in Seconds. " +-- +1.8.3.1 + diff --git a/SOURCES/0408-geo-rep-Fix-ssh-port-validation.patch b/SOURCES/0408-geo-rep-Fix-ssh-port-validation.patch new file mode 100644 index 0000000..9fad8d1 --- /dev/null +++ b/SOURCES/0408-geo-rep-Fix-ssh-port-validation.patch @@ -0,0 +1,107 @@ +From 07ab5a460da007fc3809b1a943614d1c7f5fcfef Mon Sep 17 00:00:00 2001 +From: Sunny Kumar <sunkumar@redhat.com> +Date: Fri, 17 Jan 2020 11:03:46 +0000 +Subject: [PATCH 408/449] geo-rep: Fix ssh-port validation + +If non-standard ssh-port is used, Geo-rep can be configured to use ssh port +by using config option, the value should be in allowed port range and non negative. + +At present it can accept negative value and outside allowed port range which is incorrect. + +Many Linux kernels use the port range 32768 to 61000. +IANA suggests it should be in the range 1 to 2^16 - 1, so keeping the same. + +$ gluster volume geo-replication master 127.0.0.1::slave config ssh-port -22 +geo-replication config updated successfully +$ gluster volume geo-replication master 127.0.0.1::slave config ssh-port 22222222 +geo-replication config updated successfully + +This patch fixes the above issue and have added few validations around this +in test cases. +Backport of: + >Upstream Patch: https://review.gluster.org/#/c/glusterfs/+/24035/ + >Change-Id: I9875ab3f00d7257370fbac6f5ed4356d2fed3f3c + >Fixes: bz#1792276 + >Signed-off-by: Sunny Kumar <sunkumar@redhat.com> + +BUG: 1796814 +Change-Id: I9875ab3f00d7257370fbac6f5ed4356d2fed3f3c +Signed-off-by: Sunny Kumar <sunkumar@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202453 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + geo-replication/gsyncd.conf.in | 4 +++- + tests/00-geo-rep/00-georep-verify-non-root-setup.t | 16 ++++++++++++++++ + tests/00-geo-rep/georep-basic-dr-rsync.t | 13 +++++++++++++ + 3 files changed, 32 insertions(+), 1 deletion(-) + +diff --git a/geo-replication/gsyncd.conf.in b/geo-replication/gsyncd.conf.in +index 9155cd8..11e57fd 100644 +--- a/geo-replication/gsyncd.conf.in ++++ b/geo-replication/gsyncd.conf.in +@@ -266,7 +266,9 @@ allowed_values=ERROR,INFO,WARNING,DEBUG + + [ssh-port] + value=22 +-validation=int ++validation=minmax ++min=1 ++max=65535 + help=Set SSH port + type=int + +diff --git a/tests/00-geo-rep/00-georep-verify-non-root-setup.t b/tests/00-geo-rep/00-georep-verify-non-root-setup.t +index c9fd8b2..12f0c01 100644 +--- a/tests/00-geo-rep/00-georep-verify-non-root-setup.t ++++ b/tests/00-geo-rep/00-georep-verify-non-root-setup.t +@@ -223,6 +223,22 @@ TEST $GEOREP_CLI $master $slave_url resume + #Validate failure of volume stop when geo-rep is running + TEST ! $CLI volume stop $GMV0 + ++#Negative test for ssh-port ++#Port should be integer and between 1-65535 range ++ ++TEST ! $GEOREP_CLI $master $slave_url config ssh-port -22 ++ ++TEST ! $GEOREP_CLI $master $slave_url config ssh-port abc ++ ++TEST ! $GEOREP_CLI $master $slave_url config ssh-port 6875943 ++ ++TEST ! $GEOREP_CLI $master $slave_url config ssh-port 4.5 ++ ++TEST ! $GEOREP_CLI $master $slave_url config ssh-port 22a ++ ++#Config Set ssh-port to validate int validation ++TEST $GEOREP_CLI $master $slave config ssh-port 22 ++ + #Hybrid directory rename test BZ#1763439 + TEST $GEOREP_CLI $master $slave_url config change_detector xsync + mkdir ${master_mnt}/dir1 +diff --git a/tests/00-geo-rep/georep-basic-dr-rsync.t b/tests/00-geo-rep/georep-basic-dr-rsync.t +index b6fbf18..d785aa5 100644 +--- a/tests/00-geo-rep/georep-basic-dr-rsync.t ++++ b/tests/00-geo-rep/georep-basic-dr-rsync.t +@@ -71,6 +71,19 @@ EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_status_num_rows "Created" + #Config gluster-command-dir + TEST $GEOREP_CLI $master $slave config gluster-command-dir ${GLUSTER_CMD_DIR} + ++#Negative test for ssh-port ++#Port should be integer and between 1-65535 range ++ ++TEST ! $GEOREP_CLI $master $slave config ssh-port -22 ++ ++TEST ! $GEOREP_CLI $master $slave config ssh-port abc ++ ++TEST ! $GEOREP_CLI $master $slave config ssh-port 6875943 ++ ++TEST ! $GEOREP_CLI $master $slave config ssh-port 4.5 ++ ++TEST ! $GEOREP_CLI $master $slave config ssh-port 22a ++ + #Config Set ssh-port to validate int validation + TEST $GEOREP_CLI $master $slave config ssh-port 22 + +-- +1.8.3.1 + diff --git a/SOURCES/0409-system-posix-acl-update-ctx-only-if-iatt-is-non-NULL.patch b/SOURCES/0409-system-posix-acl-update-ctx-only-if-iatt-is-non-NULL.patch new file mode 100644 index 0000000..ca1c25a --- /dev/null +++ b/SOURCES/0409-system-posix-acl-update-ctx-only-if-iatt-is-non-NULL.patch @@ -0,0 +1,52 @@ +From a92b4f6373cb18544325436cf86abfebd6780d79 Mon Sep 17 00:00:00 2001 +From: Homma <homma@allworks.co.jp> +Date: Fri, 5 Jul 2019 16:10:41 +0530 +Subject: [PATCH 409/449] system/posix-acl: update ctx only if iatt is non-NULL + +We need to safe-guard against possible zero'ing out of iatt +structure in acl ctx, which can cause many issues. + +> upstream patch: https://review.gluster.org/#/c/glusterfs/+/23003/ +> fixes: 1668286 +> Change-Id: Ie81a57d7453a6624078de3be8c0845bf4d432773 +> Signed-off-by: Amar Tumballi <amarts@redhat.com> + +BUG: 1781649 +Change-Id: I655b61551d30215b9f23cafc3ef9a5c0d98a43d0 +Signed-off-by: Raghavendra M <raghavendra@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202446 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/system/posix-acl/src/posix-acl.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/xlators/system/posix-acl/src/posix-acl.c b/xlators/system/posix-acl/src/posix-acl.c +index 38e48b8..c6ba281 100644 +--- a/xlators/system/posix-acl/src/posix-acl.c ++++ b/xlators/system/posix-acl/src/posix-acl.c +@@ -875,6 +875,13 @@ posix_acl_ctx_update(inode_t *inode, xlator_t *this, struct iatt *buf, + int ret = 0; + int i = 0; + ++ if (!buf || !buf->ia_ctime) { ++ /* No need to update ctx if buf is empty */ ++ gf_log_callingfn(this->name, GF_LOG_DEBUG, "iatt struct is empty (%d)", ++ fop); ++ goto out; ++ } ++ + LOCK(&inode->lock); + { + ctx = __posix_acl_ctx_get(inode, this, _gf_true); +@@ -928,6 +935,7 @@ posix_acl_ctx_update(inode_t *inode, xlator_t *this, struct iatt *buf, + } + unlock: + UNLOCK(&inode->lock); ++out: + return ret; + } + +-- +1.8.3.1 + diff --git a/SOURCES/0410-afr-prevent-spurious-entry-heals-leading-to-gfid-spl.patch b/SOURCES/0410-afr-prevent-spurious-entry-heals-leading-to-gfid-spl.patch new file mode 100644 index 0000000..97bdc78 --- /dev/null +++ b/SOURCES/0410-afr-prevent-spurious-entry-heals-leading-to-gfid-spl.patch @@ -0,0 +1,249 @@ +From 2b2eb846c49caba13ab92ec66af20292e7780fc1 Mon Sep 17 00:00:00 2001 +From: Ravishankar N <ravishankar@redhat.com> +Date: Tue, 11 Feb 2020 14:34:48 +0530 +Subject: [PATCH 410/449] afr: prevent spurious entry heals leading to gfid + split-brain + +Problem: +In a hyperconverged setup with granular-entry-heal enabled, if a file is +recreated while one of the bricks is down, and an index heal is triggered +(with the brick still down), entry-self heal was doing a spurious heal +with just the 2 good bricks. It was doing a post-op leading to removal +of the filename from .glusterfs/indices/entry-changes as well as +erroneous setting of afr xattrs on the parent. When the brick came up, +the xattrs were cleared, resulting in the renamed file not getting +healed and leading to gfid split-brain and EIO on the mount. + +Fix: +Proceed with entry heal only when shd can connect to all bricks of the replica, +just like in data and metadata heal. + +BUG: 1804164 + +> Upstream patch:https://review.gluster.org/#/c/glusterfs/+/24109/ +> fixes: bz#1801624 +> Change-Id: I916ae26ad1fabf259bc6362da52d433b7223b17e +> Signed-off-by: Ravishankar N <ravishankar@redhat.com> + +Change-Id: I23f57e543cff1e3f35eb8dbc60a2babfae6838c7 +Signed-off-by: Ravishankar N <ravishankar@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202395 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + .../bug-1433571-undo-pending-only-on-up-bricks.t | 18 ++----- + tests/bugs/replicate/bug-1801624-entry-heal.t | 58 ++++++++++++++++++++++ + xlators/cluster/afr/src/afr-common.c | 4 +- + xlators/cluster/afr/src/afr-self-heal-common.c | 8 +-- + xlators/cluster/afr/src/afr-self-heal-entry.c | 6 +-- + xlators/cluster/afr/src/afr-self-heal-name.c | 2 +- + xlators/cluster/afr/src/afr-self-heal.h | 2 - + 7 files changed, 69 insertions(+), 29 deletions(-) + create mode 100644 tests/bugs/replicate/bug-1801624-entry-heal.t + +diff --git a/tests/bugs/replicate/bug-1433571-undo-pending-only-on-up-bricks.t b/tests/bugs/replicate/bug-1433571-undo-pending-only-on-up-bricks.t +index 0767f47..10ce013 100644 +--- a/tests/bugs/replicate/bug-1433571-undo-pending-only-on-up-bricks.t ++++ b/tests/bugs/replicate/bug-1433571-undo-pending-only-on-up-bricks.t +@@ -49,25 +49,15 @@ TEST $CLI volume start $V0 force + EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 + EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2 + +-#Kill brick 0 and turn on the client side heal and do ls to trigger the heal. +-#The pending xattrs on bricks 1 & 2 should have pending entry on brick 0. +-TEST kill_brick $V0 $H0 $B0/${V0}0 ++# We were killing one brick and checking that entry heal does not reset the ++# pending xattrs for the down brick. Now that we need all bricks to be up for ++# entry heal, I'm removing that test from the .t ++ + TEST $CLI volume set $V0 cluster.data-self-heal on + TEST $CLI volume set $V0 cluster.metadata-self-heal on + TEST $CLI volume set $V0 cluster.entry-self-heal on + + TEST ls $M0 +-EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1 +-EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}2 +-EXPECT_WITHIN $HEAL_TIMEOUT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1 +-EXPECT_WITHIN $HEAL_TIMEOUT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}2 +- +-#Bring back all the bricks and trigger the heal again by doing ls. Now the +-#pending xattrs on all the bricks should be 0. +-TEST $CLI volume start $V0 force +-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 +-TEST ls $M0 +- + TEST cat $M0/f1 + TEST cat $M0/f2 + TEST cat $M0/f3 +diff --git a/tests/bugs/replicate/bug-1801624-entry-heal.t b/tests/bugs/replicate/bug-1801624-entry-heal.t +new file mode 100644 +index 0000000..94b4651 +--- /dev/null ++++ b/tests/bugs/replicate/bug-1801624-entry-heal.t +@@ -0,0 +1,58 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++cleanup; ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 replica 3 $H0:$B0/brick{0,1,2} ++TEST $CLI volume set $V0 heal-timeout 5 ++TEST $CLI volume start $V0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 ++TEST $CLI volume heal $V0 granular-entry-heal enable ++ ++TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2 ++echo "Data">$M0/FILE ++ret=$? ++TEST [ $ret -eq 0 ] ++ ++# Re-create the file when a brick is down. ++TEST kill_brick $V0 $H0 $B0/brick1 ++TEST rm $M0/FILE ++echo "New Data">$M0/FILE ++ret=$? ++TEST [ $ret -eq 0 ] ++EXPECT_WITHIN $HEAL_TIMEOUT "4" get_pending_heal_count $V0 ++ ++# Launching index heal must not reset parent dir afr xattrs or remove granular entry indices. ++$CLI volume heal $V0 # CLI will fail but heal is launched anyway. ++TEST sleep 5 # give index heal a chance to do one run. ++brick0_pending=$(get_hex_xattr trusted.afr.$V0-client-1 $B0/brick0/) ++brick2_pending=$(get_hex_xattr trusted.afr.$V0-client-1 $B0/brick2/) ++TEST [ $brick0_pending -eq "000000000000000000000002" ] ++TEST [ $brick2_pending -eq "000000000000000000000002" ] ++EXPECT "FILE" ls $B0/brick0/.glusterfs/indices/entry-changes/00000000-0000-0000-0000-000000000001/ ++EXPECT "FILE" ls $B0/brick2/.glusterfs/indices/entry-changes/00000000-0000-0000-0000-000000000001/ ++ ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/brick1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 ++$CLI volume heal $V0 ++EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0 ++ ++# No gfid-split-brain (i.e. EIO) must be seen. Try on fresh mount to avoid cached values. ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2 ++TEST cat $M0/FILE ++ ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++cleanup; +diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c +index 32127c6..5806556 100644 +--- a/xlators/cluster/afr/src/afr-common.c ++++ b/xlators/cluster/afr/src/afr-common.c +@@ -6629,7 +6629,7 @@ afr_fav_child_reset_sink_xattrs(void *opaque) + ret = afr_selfheal_inodelk(heal_frame, this, inode, this->name, 0, 0, + locked_on); + { +- if (ret < AFR_SH_MIN_PARTICIPANTS) ++ if (ret < priv->child_count) + goto data_unlock; + ret = __afr_selfheal_data_prepare( + heal_frame, this, inode, locked_on, sources, sinks, +@@ -6646,7 +6646,7 @@ afr_fav_child_reset_sink_xattrs(void *opaque) + ret = afr_selfheal_inodelk(heal_frame, this, inode, this->name, + LLONG_MAX - 1, 0, locked_on); + { +- if (ret < AFR_SH_MIN_PARTICIPANTS) ++ if (ret < priv->child_count) + goto mdata_unlock; + ret = __afr_selfheal_metadata_prepare( + heal_frame, this, inode, locked_on, sources, sinks, +diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c +index 81ef38a..ce1ea50 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-common.c ++++ b/xlators/cluster/afr/src/afr-self-heal-common.c +@@ -1575,7 +1575,6 @@ afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this, + char *accused = NULL; /* Accused others without any self-accusal */ + char *pending = NULL; /* Have pending operations on others */ + char *self_accused = NULL; /* Accused itself */ +- int min_participants = -1; + + priv = this->private; + +@@ -1599,12 +1598,7 @@ afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this, + } + } + +- if (type == AFR_DATA_TRANSACTION || type == AFR_METADATA_TRANSACTION) { +- min_participants = priv->child_count; +- } else { +- min_participants = AFR_SH_MIN_PARTICIPANTS; +- } +- if (afr_success_count(replies, priv->child_count) < min_participants) { ++ if (afr_success_count(replies, priv->child_count) < priv->child_count) { + /* Treat this just like locks not being acquired */ + return -ENOTCONN; + } +diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c +index 3ce882e..40be898 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-entry.c ++++ b/xlators/cluster/afr/src/afr-self-heal-entry.c +@@ -597,7 +597,7 @@ afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd, + ret = afr_selfheal_entrylk(frame, this, fd->inode, this->name, NULL, + locked_on); + { +- if (ret < AFR_SH_MIN_PARTICIPANTS) { ++ if (ret < priv->child_count) { + gf_msg_debug(this->name, 0, + "%s: Skipping " + "entry self-heal as only %d sub-volumes " +@@ -991,7 +991,7 @@ __afr_selfheal_entry(call_frame_t *frame, xlator_t *this, fd_t *fd, + ret = afr_selfheal_entrylk(frame, this, fd->inode, this->name, NULL, + data_lock); + { +- if (ret < AFR_SH_MIN_PARTICIPANTS) { ++ if (ret < priv->child_count) { + gf_msg_debug(this->name, 0, + "%s: Skipping " + "entry self-heal as only %d sub-volumes could " +@@ -1115,7 +1115,7 @@ afr_selfheal_entry(call_frame_t *frame, xlator_t *this, inode_t *inode) + ret = afr_selfheal_tie_breaker_entrylk(frame, this, inode, priv->sh_domain, + NULL, locked_on); + { +- if (ret < AFR_SH_MIN_PARTICIPANTS) { ++ if (ret < priv->child_count) { + gf_msg_debug(this->name, 0, + "%s: Skipping " + "entry self-heal as only %d sub-volumes could " +diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c +index 36640b5..7d4f208 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-name.c ++++ b/xlators/cluster/afr/src/afr-self-heal-name.c +@@ -514,7 +514,7 @@ afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent, + ret = afr_selfheal_entrylk(frame, this, parent, this->name, bname, + locked_on); + { +- if (ret < AFR_SH_MIN_PARTICIPANTS) { ++ if (ret < priv->child_count) { + ret = -ENOTCONN; + goto unlock; + } +diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h +index 6555ec5..8234cec 100644 +--- a/xlators/cluster/afr/src/afr-self-heal.h ++++ b/xlators/cluster/afr/src/afr-self-heal.h +@@ -11,8 +11,6 @@ + #ifndef _AFR_SELFHEAL_H + #define _AFR_SELFHEAL_H + +-#define AFR_SH_MIN_PARTICIPANTS 2 +- + /* Perform fop on all UP subvolumes and wait for all callbacks to return */ + + #define AFR_ONALL(frame, rfn, fop, args...) \ +-- +1.8.3.1 + diff --git a/SOURCES/0411-tools-glusterfind-validate-session-name.patch b/SOURCES/0411-tools-glusterfind-validate-session-name.patch new file mode 100644 index 0000000..db633f2 --- /dev/null +++ b/SOURCES/0411-tools-glusterfind-validate-session-name.patch @@ -0,0 +1,116 @@ +From 854defb4ff5e0d53f51545d20796aff662f9850f Mon Sep 17 00:00:00 2001 +From: Saravanakumar Arumugam <sarumuga@redhat.com> +Date: Thu, 9 Jul 2015 15:56:28 +0530 +Subject: [PATCH 411/449] tools/glusterfind : validate session name + +Validate a session name(during create) for the following: +1. minimum 2 character length. +2. Maximum 256 characters. +3. No special characters apart from underscore, hyphen allowed. + +Also, validate volume(expect, while using glusterfind list). + +>Change-Id: I1b1e64e218f93d0a531d3cf69fc2ce7e2ed11d01 +>BUG: 1241494 +>Signed-off-by: Saravanakumar Arumugam <sarumuga@redhat.com> +>Signed-off-by: Shwetha K Acharya <sacharya@redhat.com> + +backport of https://review.gluster.org/#/c/glusterfs/+/11602/ + +BUG: 1234220 +Change-Id: I1b1e64e218f93d0a531d3cf69fc2ce7e2ed11d01 +Signed-off-by: Shwetha K Acharya <sacharya@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202469 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tools/glusterfind/src/main.py | 50 ++++++++++++++++++++++++++++++++++++------- + 1 file changed, 42 insertions(+), 8 deletions(-) + +diff --git a/tools/glusterfind/src/main.py b/tools/glusterfind/src/main.py +index 5ca1fec..4b5466d 100644 +--- a/tools/glusterfind/src/main.py ++++ b/tools/glusterfind/src/main.py +@@ -23,6 +23,7 @@ import tempfile + import signal + from datetime import datetime + import codecs ++import re + + from utils import execute, is_host_local, mkdirp, fail + from utils import setup_logger, human_time, handle_rm_error +@@ -520,11 +521,8 @@ def write_output(outfile, outfilemerger, field_separator): + else: + gfind_write(f, row[0], field_separator, p_rep) + +-def mode_create(session_dir, args): +- logger.debug("Init is called - Session: %s, Volume: %s" +- % (args.session, args.volume)) +- +- cmd = ["gluster", 'volume', 'info', args.volume, "--xml"] ++def validate_volume(volume): ++ cmd = ["gluster", 'volume', 'info', volume, "--xml"] + _, data, _ = execute(cmd, + exit_msg="Failed to Run Gluster Volume Info", + logger=logger) +@@ -532,11 +530,42 @@ def mode_create(session_dir, args): + tree = etree.fromstring(data) + statusStr = tree.find('volInfo/volumes/volume/statusStr').text + except (ParseError, AttributeError) as e: +- fail("Invalid Volume: %s" % e, logger=logger) +- ++ fail("Invalid Volume: Check the Volume name! %s" % e) + if statusStr != "Started": +- fail("Volume %s is not online" % args.volume, logger=logger) ++ fail("Volume %s is not online" % volume) ++ ++# The rules for a valid session name. ++SESSION_NAME_RULES = { ++ 'min_length': 2, ++ 'max_length': 256, # same as maximum volume length ++ # Specifies all alphanumeric characters, underscore, hyphen. ++ 'valid_chars': r'0-9a-zA-Z_-', ++} ++ ++ ++# checks valid session name, fail otherwise ++def validate_session_name(session): ++ # Check for minimum length ++ if len(session) < SESSION_NAME_RULES['min_length']: ++ fail('session_name must be at least ' + ++ str(SESSION_NAME_RULES['min_length']) + ' characters long.') ++ # Check for maximum length ++ if len(session) > SESSION_NAME_RULES['max_length']: ++ fail('session_name must not exceed ' + ++ str(SESSION_NAME_RULES['max_length']) + ' characters length.') ++ ++ # Matches strings composed entirely of characters specified within ++ if not re.match(r'^[' + SESSION_NAME_RULES['valid_chars'] + ++ ']+$', session): ++ fail('Session name can only contain these characters: ' + ++ SESSION_NAME_RULES['valid_chars']) ++ ++ ++def mode_create(session_dir, args): ++ validate_session_name(args.session) + ++ logger.debug("Init is called - Session: %s, Volume: %s" ++ % (args.session, args.volume)) + mkdirp(session_dir, exit_on_err=True, logger=logger) + mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True, + logger=logger) +@@ -850,6 +879,11 @@ def main(): + args.mode not in ["create", "list", "query"]: + fail("Invalid session %s" % args.session) + ++ # volume involved, validate the volume first ++ if args.mode not in ["list"]: ++ validate_volume(args.volume) ++ ++ + # "default" is a system defined session name + if args.mode in ["create", "post", "pre", "delete"] and \ + args.session == "default": +-- +1.8.3.1 + diff --git a/SOURCES/0412-gluster-smb-add-smb-parameter-when-access-gluster-by.patch b/SOURCES/0412-gluster-smb-add-smb-parameter-when-access-gluster-by.patch new file mode 100644 index 0000000..865fddf --- /dev/null +++ b/SOURCES/0412-gluster-smb-add-smb-parameter-when-access-gluster-by.patch @@ -0,0 +1,46 @@ +From 0769c5ddc78ea37b9a43ac35dd71ec8cea4b8da8 Mon Sep 17 00:00:00 2001 +From: yinkui <13965432176@163.com> +Date: Fri, 16 Aug 2019 10:15:07 +0800 +Subject: [PATCH 412/449] gluster-smb:add smb parameter when access gluster by + cifs + +Backport of https://review.gluster.org/23240 + +Change-Id: I9ff54f2ca6f86bb5b2f4740485a0159e1fd7785f +BUG: 1783232 +Signed-off-by: yinkui <13965432176@163.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202472 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + extras/hook-scripts/set/post/S30samba-set.sh | 1 + + extras/hook-scripts/start/post/S30samba-start.sh | 1 + + 2 files changed, 2 insertions(+) + +diff --git a/extras/hook-scripts/set/post/S30samba-set.sh b/extras/hook-scripts/set/post/S30samba-set.sh +index d2a62d3..e73f00f 100755 +--- a/extras/hook-scripts/set/post/S30samba-set.sh ++++ b/extras/hook-scripts/set/post/S30samba-set.sh +@@ -90,6 +90,7 @@ function add_samba_share () { + STRING+="path = /\n" + STRING+="read only = no\n" + STRING+="guest ok = yes\n" ++ STRING+="kernel share modes = no\n" + printf "$STRING" >> ${CONFIGFILE} + } + +diff --git a/extras/hook-scripts/start/post/S30samba-start.sh b/extras/hook-scripts/start/post/S30samba-start.sh +index 2854bdd..0d5a5ed 100755 +--- a/extras/hook-scripts/start/post/S30samba-start.sh ++++ b/extras/hook-scripts/start/post/S30samba-start.sh +@@ -89,6 +89,7 @@ function add_samba_share () { + STRING+="path = /\n" + STRING+="read only = no\n" + STRING+="guest ok = yes\n" ++ STRING+="kernel share modes = no\n" + printf "$STRING" >> "${CONFIGFILE}" + } + +-- +1.8.3.1 + diff --git a/SOURCES/0413-extras-hooks-Remove-smb.conf-parameter-allowing-gues.patch b/SOURCES/0413-extras-hooks-Remove-smb.conf-parameter-allowing-gues.patch new file mode 100644 index 0000000..1ff6348 --- /dev/null +++ b/SOURCES/0413-extras-hooks-Remove-smb.conf-parameter-allowing-gues.patch @@ -0,0 +1,46 @@ +From aec3dd00fa76547316fddd07e6ded428d945986c Mon Sep 17 00:00:00 2001 +From: Anoop C S <anoopcs@redhat.com> +Date: Fri, 22 Nov 2019 17:36:55 +0530 +Subject: [PATCH 413/449] extras/hooks: Remove smb.conf parameter allowing + guest access + +Backport of https://review.gluster.org/23745 + +Change-Id: I88f494f16153d27ab6e2f2faf4d557e075671b10 +BUG: 1775637 +Signed-off-by: Anoop C S <anoopcs@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202473 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + extras/hook-scripts/set/post/S30samba-set.sh | 1 - + extras/hook-scripts/start/post/S30samba-start.sh | 1 - + 2 files changed, 2 deletions(-) + +diff --git a/extras/hook-scripts/set/post/S30samba-set.sh b/extras/hook-scripts/set/post/S30samba-set.sh +index e73f00f..854f131 100755 +--- a/extras/hook-scripts/set/post/S30samba-set.sh ++++ b/extras/hook-scripts/set/post/S30samba-set.sh +@@ -89,7 +89,6 @@ function add_samba_share () { + STRING+="glusterfs:loglevel = 7\n" + STRING+="path = /\n" + STRING+="read only = no\n" +- STRING+="guest ok = yes\n" + STRING+="kernel share modes = no\n" + printf "$STRING" >> ${CONFIGFILE} + } +diff --git a/extras/hook-scripts/start/post/S30samba-start.sh b/extras/hook-scripts/start/post/S30samba-start.sh +index 0d5a5ed..cac0cbf 100755 +--- a/extras/hook-scripts/start/post/S30samba-start.sh ++++ b/extras/hook-scripts/start/post/S30samba-start.sh +@@ -88,7 +88,6 @@ function add_samba_share () { + STRING+="glusterfs:loglevel = 7\n" + STRING+="path = /\n" + STRING+="read only = no\n" +- STRING+="guest ok = yes\n" + STRING+="kernel share modes = no\n" + printf "$STRING" >> "${CONFIGFILE}" + } +-- +1.8.3.1 + diff --git a/SOURCES/0414-cluster-syncop-avoid-duplicate-unlock-of-inodelk-ent.patch b/SOURCES/0414-cluster-syncop-avoid-duplicate-unlock-of-inodelk-ent.patch new file mode 100644 index 0000000..67b71dd --- /dev/null +++ b/SOURCES/0414-cluster-syncop-avoid-duplicate-unlock-of-inodelk-ent.patch @@ -0,0 +1,62 @@ +From 5b549cbf3f1873054c6d187b09aa9f9313971b1f Mon Sep 17 00:00:00 2001 +From: Kinglong Mee <kinglongmee@gmail.com> +Date: Mon, 18 Mar 2019 20:47:54 +0800 +Subject: [PATCH 414/449] cluster-syncop: avoid duplicate unlock of + inodelk/entrylk + +When using ec, there are many spam messages in brick and client +logs files. + +When shd does entry heal, it takes lock on a directory using +cluster_tiebreaker_inodelk(). If it does not get lock on all +the bricks because other clients has got lock on some bricks, +it will unlock the locks on those bricks which it got and then +will try blocking locks (If any one of the previous was successful). + +The problem come here. In case we do not get locks on all the +required bricks, we are sending unlock request twice on those +bricks where we got the locks. + +BUG: 1750211 +> Upstream patch: https://review.gluster.org/#/c/glusterfs/+/22377/ +> Change-Id: Ib164d29ebb071f620a4ca9679c4345ef7c88512a +> Updates: bz#1689920 +> Signed-off-by: Kinglong Mee <mijinlong@open-fs.com> + +Change-Id: I1647548ba75fdd27fd4e20dec08db67774f43375 +Reviewed-on: https://code.engineering.redhat.com/gerrit/202477 +Tested-by: Ashish Pandey <aspandey@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + libglusterfs/src/cluster-syncop.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/libglusterfs/src/cluster-syncop.c b/libglusterfs/src/cluster-syncop.c +index 5a08f26..6ee89dd 100644 +--- a/libglusterfs/src/cluster-syncop.c ++++ b/libglusterfs/src/cluster-syncop.c +@@ -1203,6 +1203,10 @@ cluster_tiebreaker_inodelk(xlator_t **subvols, unsigned char *on, + if (num_success) { + FOP_SEQ(subvols, on, numsubvols, replies, locked_on, frame, + inodelk, dom, &loc, F_SETLKW, &flock, NULL); ++ } else { ++ loc_wipe(&loc); ++ memset(locked_on, 0, numsubvols); ++ return 0; + } + break; + } +@@ -1244,7 +1248,9 @@ cluster_tiebreaker_entrylk(xlator_t **subvols, unsigned char *on, + entrylk, dom, &loc, name, ENTRYLK_LOCK, ENTRYLK_WRLCK, + NULL); + } else { ++ loc_wipe(&loc); + memset(locked_on, 0, numsubvols); ++ return 0; + } + break; + } +-- +1.8.3.1 + diff --git a/SOURCES/0415-dht-Fix-stale-layout-and-create-issue.patch b/SOURCES/0415-dht-Fix-stale-layout-and-create-issue.patch new file mode 100644 index 0000000..476a8cc --- /dev/null +++ b/SOURCES/0415-dht-Fix-stale-layout-and-create-issue.patch @@ -0,0 +1,523 @@ +From ba23e6d8f4eff11a228816149a8a1ccd6df41146 Mon Sep 17 00:00:00 2001 +From: Susant Palai <spalai@redhat.com> +Date: Fri, 27 Dec 2019 12:06:19 +0530 +Subject: [PATCH 415/449] dht: Fix stale-layout and create issue + +Problem: With lookup-optimize set to on by default, a client with +stale-layout can create a new file on a wrong subvol. This will lead to +possible duplicate files if two different clients attempt to create the +same file with two different layouts. + +Solution: Send in-memory layout to be cross checked at posix before +commiting a "create". In case of a mismatch, sync the client layout with +that of the server and attempt the create fop one more time. + +test: Manual, testcase(attached) + +(Backport of https://review.gluster.org/#/c/glusterfs/+/23927/) + +BUG: 1748865 +Change-Id: I6c82c97418654ae8eb3b81ab65f1247aa4002ceb +Signed-off-by: Susant Palai <spalai@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202465 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/bugs/distribute/bug-1786679.t | 69 +++++++++++ + xlators/cluster/dht/src/dht-common.c | 147 ++++++++++++++++++++--- + xlators/cluster/dht/src/dht-common.h | 6 + + xlators/protocol/client/src/client-rpc-fops_v2.c | 9 +- + xlators/storage/posix/src/posix-entry-ops.c | 29 ++++- + xlators/storage/posix/src/posix-helpers.c | 76 ++++++++++++ + xlators/storage/posix/src/posix.h | 4 + + 7 files changed, 321 insertions(+), 19 deletions(-) + create mode 100755 tests/bugs/distribute/bug-1786679.t + +diff --git a/tests/bugs/distribute/bug-1786679.t b/tests/bugs/distribute/bug-1786679.t +new file mode 100755 +index 0000000..219ce51 +--- /dev/null ++++ b/tests/bugs/distribute/bug-1786679.t +@@ -0,0 +1,69 @@ ++#!/bin/bash ++ ++SCRIPT_TIMEOUT=250 ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../dht.rc ++ ++ ++# create 2 subvols ++# create a dir ++# create a file ++# change layout ++# remove the file ++# execute create from a different mount ++# Without the patch, the file will be present on both of the bricks ++ ++cleanup ++ ++function get_layout () { ++ ++layout=`getfattr -n trusted.glusterfs.dht -e hex $1 2>&1 | grep dht | gawk -F"=" '{print $2}'` ++ ++echo $layout ++ ++} ++ ++function set_layout() ++{ ++ setfattr -n "trusted.glusterfs.dht" -v $1 $2 ++} ++ ++TEST glusterd ++TEST pidof glusterd ++ ++BRICK1=$B0/${V0}-0 ++BRICK2=$B0/${V0}-1 ++ ++TEST $CLI volume create $V0 $H0:$BRICK1 $H0:$BRICK2 ++TEST $CLI volume start $V0 ++ ++# Mount FUSE and create symlink ++TEST glusterfs -s $H0 --volfile-id $V0 $M0 ++TEST mkdir $M0/dir ++TEST touch $M0/dir/file ++TEST ! stat "$BRICK1/dir/file" ++TEST stat "$BRICK2/dir/file" ++ ++layout1="$(get_layout "$BRICK1/dir")" ++layout2="$(get_layout "$BRICK2/dir")" ++ ++TEST set_layout $layout1 "$BRICK2/dir" ++TEST set_layout $layout2 "$BRICK1/dir" ++ ++TEST rm $M0/dir/file -f ++TEST gluster v set $V0 client-log-level DEBUG ++ ++#Without the patch in place, this client will create the file in $BRICK2 ++#which will lead to two files being on both the bricks when a new client ++#create the file with the same name ++TEST touch $M0/dir/file ++ ++TEST glusterfs -s $H0 --volfile-id $V0 $M1 ++TEST touch $M1/dir/file ++ ++TEST stat "$BRICK1/dir/file" ++TEST ! stat "$BRICK2/dir/file" ++ ++cleanup +diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c +index 7890e7a..6aa18f3 100644 +--- a/xlators/cluster/dht/src/dht-common.c ++++ b/xlators/cluster/dht/src/dht-common.c +@@ -8262,6 +8262,11 @@ dht_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + xlator_t *prev = NULL; + int ret = -1; + dht_local_t *local = NULL; ++ gf_boolean_t parent_layout_changed = _gf_false; ++ char pgfid[GF_UUID_BUF_SIZE] = {0}; ++ xlator_t *subvol = NULL; ++ ++ local = frame->local; + + local = frame->local; + if (!local) { +@@ -8270,8 +8275,69 @@ dht_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + goto out; + } + +- if (op_ret == -1) ++ if (op_ret == -1) { ++ local->op_errno = op_errno; ++ parent_layout_changed = (xdata && ++ dict_get(xdata, GF_PREOP_CHECK_FAILED)) ++ ? _gf_true ++ : _gf_false; ++ ++ if (parent_layout_changed) { ++ if (local && local->lock[0].layout.parent_layout.locks) { ++ /* Returning failure as the layout could not be fixed even under ++ * the lock */ ++ goto out; ++ } ++ ++ gf_uuid_unparse(local->loc.parent->gfid, pgfid); ++ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_PARENT_LAYOUT_CHANGED, ++ "create (%s/%s) (path: %s): parent layout " ++ "changed. Attempting a layout refresh and then a " ++ "retry", ++ pgfid, local->loc.name, local->loc.path); ++ ++ /* ++ dht_refresh_layout needs directory info in local->loc.Hence, ++ storing the parent_loc in local->loc and storing the create ++ context in local->loc2. We will restore this information in ++ dht_creation_do. ++ */ ++ ++ loc_wipe(&local->loc2); ++ ++ ret = loc_copy(&local->loc2, &local->loc); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY, ++ "loc_copy failed %s", local->loc.path); ++ ++ goto out; ++ } ++ ++ loc_wipe(&local->loc); ++ ++ ret = dht_build_parent_loc(this, &local->loc, &local->loc2, ++ &op_errno); ++ ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_LOC_FAILED, ++ "parent loc build failed"); ++ goto out; ++ } ++ ++ subvol = dht_subvol_get_hashed(this, &local->loc2); ++ ++ ret = dht_create_lock(frame, subvol); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INODE_LK_ERROR, ++ "locking parent failed"); ++ goto out; ++ } ++ ++ return 0; ++ } ++ + goto out; ++ } + + prev = cookie; + +@@ -8392,6 +8458,8 @@ dht_create_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this, + gf_msg_debug(this->name, 0, "creating %s on %s", loc->path, + subvol->name); + ++ dht_set_parent_layout_in_dict(loc, this, local); ++ + STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol, + subvol->fops->create, loc, flags, mode, umask, fd, + params); +@@ -8400,10 +8468,6 @@ dht_create_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this, + avail_subvol = dht_free_disk_available_subvol(this, subvol, local); + + if (avail_subvol != subvol) { +- local->params = dict_ref(params); +- local->flags = flags; +- local->mode = mode; +- local->umask = umask; + local->cached_subvol = avail_subvol; + local->hashed_subvol = subvol; + +@@ -8419,6 +8483,8 @@ dht_create_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this, + gf_msg_debug(this->name, 0, "creating %s on %s", loc->path, + subvol->name); + ++ dht_set_parent_layout_in_dict(loc, this, local); ++ + STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol, + subvol->fops->create, loc, flags, mode, umask, fd, + params); +@@ -8680,6 +8746,60 @@ err: + } + + int ++dht_set_parent_layout_in_dict(loc_t *loc, xlator_t *this, dht_local_t *local) ++{ ++ dht_conf_t *conf = this->private; ++ dht_layout_t *parent_layout = NULL; ++ int *parent_disk_layout = NULL; ++ xlator_t *hashed_subvol = NULL; ++ char pgfid[GF_UUID_BUF_SIZE] = {0}; ++ int ret = 0; ++ ++ gf_uuid_unparse(loc->parent->gfid, pgfid); ++ ++ parent_layout = dht_layout_get(this, loc->parent); ++ hashed_subvol = dht_subvol_get_hashed(this, loc); ++ ++ ret = dht_disk_layout_extract_for_subvol(this, parent_layout, hashed_subvol, ++ &parent_disk_layout); ++ if (ret == -1) { ++ gf_msg(this->name, GF_LOG_WARNING, local->op_errno, ++ DHT_MSG_PARENT_LAYOUT_CHANGED, ++ "%s (%s/%s) (path: %s): " ++ "extracting in-memory layout of parent failed. ", ++ gf_fop_list[local->fop], pgfid, loc->name, loc->path); ++ goto err; ++ } ++ ++ ret = dict_set_str_sizen(local->params, GF_PREOP_PARENT_KEY, ++ conf->xattr_name); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_WARNING, local->op_errno, ++ DHT_MSG_PARENT_LAYOUT_CHANGED, ++ "%s (%s/%s) (path: %s): " ++ "setting %s key in params dictionary failed. ", ++ gf_fop_list[local->fop], pgfid, loc->name, loc->path, ++ GF_PREOP_PARENT_KEY); ++ goto err; ++ } ++ ++ ret = dict_set_bin(local->params, conf->xattr_name, parent_disk_layout, ++ 4 * 4); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_WARNING, local->op_errno, ++ DHT_MSG_PARENT_LAYOUT_CHANGED, ++ "%s (%s/%s) (path: %s): " ++ "setting parent-layout in params dictionary failed. ", ++ gf_fop_list[local->fop], pgfid, loc->name, loc->path); ++ goto err; ++ } ++ ++err: ++ dht_layout_unref(this, parent_layout); ++ return ret; ++} ++ ++int + dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *params) + { +@@ -8705,6 +8825,11 @@ dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + goto err; + } + ++ local->params = dict_ref(params); ++ local->flags = flags; ++ local->mode = mode; ++ local->umask = umask; ++ + if (dht_filter_loc_subvol_key(this, loc, &local->loc, &subvol)) { + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_INFO, + "creating %s on %s (got create on %s)", local->loc.path, +@@ -8720,10 +8845,6 @@ dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + + if (hashed_subvol && (hashed_subvol != subvol)) { + /* Create the linkto file and then the data file */ +- local->params = dict_ref(params); +- local->flags = flags; +- local->mode = mode; +- local->umask = umask; + local->cached_subvol = subvol; + local->hashed_subvol = hashed_subvol; + +@@ -8736,6 +8857,9 @@ dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + * file as we expect a lookup everywhere if there are problems + * with the parent layout + */ ++ ++ dht_set_parent_layout_in_dict(loc, this, local); ++ + STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol, + subvol->fops->create, &local->loc, flags, mode, umask, + fd, params); +@@ -8787,11 +8911,6 @@ dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + goto err; + } + +- local->params = dict_ref(params); +- local->flags = flags; +- local->mode = mode; +- local->umask = umask; +- + loc_wipe(&local->loc); + + ret = dht_build_parent_loc(this, &local->loc, loc, &op_errno); +diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h +index 8e65111..1b3e826 100644 +--- a/xlators/cluster/dht/src/dht-common.h ++++ b/xlators/cluster/dht/src/dht-common.h +@@ -1549,4 +1549,10 @@ dht_check_remote_fd_failed_error(dht_local_t *local, int op_ret, int op_errno); + int + dht_dir_layout_error_check(xlator_t *this, inode_t *inode); + ++int32_t ++dht_create_lock(call_frame_t *frame, xlator_t *subvol); ++ ++int ++dht_set_parent_layout_in_dict(loc_t *loc, xlator_t *this, dht_local_t *local); ++ + #endif /* _DHT_H */ +diff --git a/xlators/protocol/client/src/client-rpc-fops_v2.c b/xlators/protocol/client/src/client-rpc-fops_v2.c +index 2673b6e..613dda8 100644 +--- a/xlators/protocol/client/src/client-rpc-fops_v2.c ++++ b/xlators/protocol/client/src/client-rpc-fops_v2.c +@@ -2094,11 +2094,12 @@ client4_0_create_cbk(struct rpc_req *req, struct iovec *iov, int count, + goto out; + } + ++ ret = client_post_create_v2(this, &rsp, &stbuf, &preparent, &postparent, ++ local, &xdata); ++ if (ret < 0) ++ goto out; ++ + if (-1 != rsp.op_ret) { +- ret = client_post_create_v2(this, &rsp, &stbuf, &preparent, &postparent, +- local, &xdata); +- if (ret < 0) +- goto out; + ret = client_add_fd_to_saved_fds(frame->this, fd, &local->loc, + local->flags, rsp.fd, 0); + if (ret) { +diff --git a/xlators/storage/posix/src/posix-entry-ops.c b/xlators/storage/posix/src/posix-entry-ops.c +index bea0bbf..65650b3 100644 +--- a/xlators/storage/posix/src/posix-entry-ops.c ++++ b/xlators/storage/posix/src/posix-entry-ops.c +@@ -2070,6 +2070,8 @@ posix_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + gf_boolean_t entry_created = _gf_false, gfid_set = _gf_false; + mode_t mode_bit = 0; + ++ dict_t *xdata_rsp = dict_ref(xdata); ++ + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO(frame, out); +@@ -2118,6 +2120,28 @@ posix_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + was_present = 0; + } + ++ if (!was_present) { ++ if (posix_is_layout_stale(xdata, par_path, this)) { ++ op_ret = -1; ++ op_errno = EIO; ++ if (!xdata_rsp) { ++ xdata_rsp = dict_new(); ++ if (!xdata_rsp) { ++ op_errno = ENOMEM; ++ goto out; ++ } ++ } ++ ++ if (dict_set_int32_sizen(xdata_rsp, GF_PREOP_CHECK_FAILED, 1) == ++ -1) { ++ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_DICT_SET_FAILED, ++ "setting key %s in dict failed", GF_PREOP_CHECK_FAILED); ++ } ++ ++ goto out; ++ } ++ } ++ + if (priv->o_direct) + _flags |= O_DIRECT; + +@@ -2239,7 +2263,10 @@ out: + + STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, + (loc) ? loc->inode : NULL, &stbuf, &preparent, +- &postparent, xdata); ++ &postparent, xdata_rsp); ++ ++ if (xdata_rsp) ++ dict_unref(xdata_rsp); + + return 0; + } +diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c +index 35dd3b6..2c27d22 100644 +--- a/xlators/storage/posix/src/posix-helpers.c ++++ b/xlators/storage/posix/src/posix-helpers.c +@@ -3559,3 +3559,79 @@ posix_update_iatt_buf(struct iatt *buf, int fd, char *loc, dict_t *xattr_req) + } + } + } ++ ++gf_boolean_t ++posix_is_layout_stale(dict_t *xdata, char *par_path, xlator_t *this) ++{ ++ int op_ret = 0; ++ ssize_t size = 0; ++ char value_buf[4096] = { ++ 0, ++ }; ++ gf_boolean_t have_val = _gf_false; ++ data_t *arg_data = NULL; ++ char *xattr_name = NULL; ++ gf_boolean_t is_stale = _gf_false; ++ ++ op_ret = dict_get_str_sizen(xdata, GF_PREOP_PARENT_KEY, &xattr_name); ++ if (xattr_name == NULL) { ++ op_ret = 0; ++ goto out; ++ } ++ ++ arg_data = dict_get(xdata, xattr_name); ++ if (!arg_data) { ++ op_ret = 0; ++ goto out; ++ } ++ ++ size = sys_lgetxattr(par_path, xattr_name, value_buf, ++ sizeof(value_buf) - 1); ++ ++ if (size >= 0) { ++ have_val = _gf_true; ++ } else { ++ if (errno == ERANGE) { ++ gf_msg(this->name, GF_LOG_INFO, errno, P_MSG_PREOP_CHECK_FAILED, ++ "getxattr on key (%s) path (%s) failed due to" ++ " buffer overflow", ++ xattr_name, par_path); ++ size = sys_lgetxattr(par_path, xattr_name, NULL, 0); ++ } ++ if (size < 0) { ++ op_ret = -1; ++ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_PREOP_CHECK_FAILED, ++ "getxattr on key (%s) failed, path : %s", xattr_name, ++ par_path); ++ goto out; ++ } ++ } ++ ++ if (!have_val) { ++ size = sys_lgetxattr(par_path, xattr_name, value_buf, size); ++ if (size < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_PREOP_CHECK_FAILED, ++ "getxattr on key (%s) failed (%s)", xattr_name, ++ strerror(errno)); ++ goto out; ++ } ++ } ++ ++ if ((arg_data->len != size) || (memcmp(arg_data->data, value_buf, size))) { ++ gf_msg(this->name, GF_LOG_INFO, EIO, P_MSG_PREOP_CHECK_FAILED, ++ "failing preop as on-disk xattr value differs from argument " ++ "value for key %s", ++ xattr_name); ++ op_ret = -1; ++ } ++ ++out: ++ dict_del_sizen(xdata, xattr_name); ++ dict_del_sizen(xdata, GF_PREOP_PARENT_KEY); ++ ++ if (op_ret == -1) { ++ is_stale = _gf_true; ++ } ++ ++ return is_stale; ++} +diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h +index dd51062..ac9d83c 100644 +--- a/xlators/storage/posix/src/posix.h ++++ b/xlators/storage/posix/src/posix.h +@@ -671,4 +671,8 @@ posix_spawn_ctx_janitor_thread(xlator_t *this); + + void + posix_update_iatt_buf(struct iatt *buf, int fd, char *loc, dict_t *xdata); ++ ++gf_boolean_t ++posix_is_layout_stale(dict_t *xdata, char *par_path, xlator_t *this); ++ + #endif /* _POSIX_H */ +-- +1.8.3.1 + diff --git a/SOURCES/0416-tests-fix-spurious-failure-of-bug-1402841.t-mt-dir-s.patch b/SOURCES/0416-tests-fix-spurious-failure-of-bug-1402841.t-mt-dir-s.patch new file mode 100644 index 0000000..1954e6a --- /dev/null +++ b/SOURCES/0416-tests-fix-spurious-failure-of-bug-1402841.t-mt-dir-s.patch @@ -0,0 +1,72 @@ +From 63cfdd987b1dfbf97486f0f884380faee0ae25d0 Mon Sep 17 00:00:00 2001 +From: Ravishankar N <ravishankar@redhat.com> +Date: Wed, 4 Sep 2019 11:27:30 +0530 +Subject: [PATCH 416/449] tests: fix spurious failure of + bug-1402841.t-mt-dir-scan-race.t + +Upstream patch: https://review.gluster.org/23352 + +Problem: +Since commit 600ba94183333c4af9b4a09616690994fd528478, shd starts +healing as soon as it is toggled from disabled to enabled. This was +causing the following line in the .t to fail on a 'fast' machine (always +on my laptop and sometimes on the jenkins slaves). + +EXPECT_NOT "^0$" get_pending_heal_count $V0 + +because by the time shd was disabled, the heal was already completed. + +Fix: +Increase the no. of files to be healed and make it a variable called +FILE_COUNT, should we need to bump it up further because the machines +become even faster. Also created pending metadata heals to increase the +time taken to heal a file. + +>fixes: bz#1748744 +>Change-Id: I5a26b08e45b8c19bce3c01ce67bdcc28ed48198d +Signed-off-by: Ravishankar N <ravishankar@redhat.com> + +BUG: 1844359 +Change-Id: Ie3676c6c2c27e7574b958d2eaac23801dfaed3a9 +Reviewed-on: https://code.engineering.redhat.com/gerrit/202481 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/bugs/core/bug-1402841.t-mt-dir-scan-race.t | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +diff --git a/tests/bugs/core/bug-1402841.t-mt-dir-scan-race.t b/tests/bugs/core/bug-1402841.t-mt-dir-scan-race.t +index 6351ba2..a1b9a85 100755 +--- a/tests/bugs/core/bug-1402841.t-mt-dir-scan-race.t ++++ b/tests/bugs/core/bug-1402841.t-mt-dir-scan-race.t +@@ -3,6 +3,8 @@ + . $(dirname $0)/../../volume.rc + cleanup; + ++FILE_COUNT=500 ++ + TEST glusterd + TEST pidof glusterd + TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} +@@ -11,15 +13,14 @@ TEST $CLI volume set $V0 cluster.shd-wait-qlength 100 + TEST $CLI volume start $V0 + + TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0; +-touch $M0/file{1..200} +- ++for i in `seq 1 $FILE_COUNT`; do touch $M0/file$i; done + TEST kill_brick $V0 $H0 $B0/${V0}1 +-for i in {1..200}; do echo hello>$M0/file$i; done ++for i in `seq 1 $FILE_COUNT`; do echo hello>$M0/file$i; chmod -x $M0/file$i; done + TEST $CLI volume start $V0 force + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 + +-EXPECT "200" get_pending_heal_count $V0 ++EXPECT "$FILE_COUNT" get_pending_heal_count $V0 + TEST $CLI volume set $V0 self-heal-daemon on + + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +-- +1.8.3.1 + diff --git a/SOURCES/0417-events-fix-IPv6-memory-corruption.patch b/SOURCES/0417-events-fix-IPv6-memory-corruption.patch new file mode 100644 index 0000000..cefb5bf --- /dev/null +++ b/SOURCES/0417-events-fix-IPv6-memory-corruption.patch @@ -0,0 +1,153 @@ +From 5e231ceb35bb763d6fafc7c3efe1c3c582929cc2 Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez <xhernandez@redhat.com> +Date: Tue, 14 Jan 2020 13:28:47 +0100 +Subject: [PATCH 417/449] events: fix IPv6 memory corruption + +When an event was generated and the target host was resolved to an IPv6 +address, there was a memory overflow when that address was copied to a +fixed IPv4 structure (IPv6 addresses are longer than IPv4 ones). + +This fix correctly handles IPv4 and IPv6 addresses returned by +getaddrinfo() + +Backport of: +> Upstream-patch-link: https://review.gluster.org/24014 +> Change-Id: I5864a0c6e6f1b405bd85988529570140cf23b250 +> Fixes: bz#1790870 +> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> + +BUG: 1792873 +Change-Id: I5864a0c6e6f1b405bd85988529570140cf23b250 +Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202486 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + libglusterfs/src/events.c | 56 +++++++++++++---------------------------------- + 1 file changed, 15 insertions(+), 41 deletions(-) + +diff --git a/libglusterfs/src/events.c b/libglusterfs/src/events.c +index 4e2f8f9..6d1e383 100644 +--- a/libglusterfs/src/events.c ++++ b/libglusterfs/src/events.c +@@ -34,7 +34,6 @@ _gf_event(eventtypes_t event, const char *fmt, ...) + int ret = 0; + int sock = -1; + char *eventstr = NULL; +- struct sockaddr_in server; + va_list arguments; + char *msg = NULL; + glusterfs_ctx_t *ctx = NULL; +@@ -42,11 +41,10 @@ _gf_event(eventtypes_t event, const char *fmt, ...) + struct addrinfo hints; + struct addrinfo *result = NULL; + xlator_t *this = THIS; +- int sin_family = AF_INET; + char *volfile_server_transport = NULL; + + /* Global context */ +- ctx = THIS->ctx; ++ ctx = this->ctx; + + if (event < 0 || event >= EVENT_LAST) { + ret = EVENT_ERROR_INVALID_INPUTS; +@@ -60,48 +58,31 @@ _gf_event(eventtypes_t event, const char *fmt, ...) + goto out; + } + +- memset(&hints, 0, sizeof(hints)); +- hints.ai_family = AF_UNSPEC; +- + if (ctx) { + volfile_server_transport = ctx->cmd_args.volfile_server_transport; + } +- + if (!volfile_server_transport) { + volfile_server_transport = "tcp"; + } +- /* Get Host name to send message */ ++ ++ /* host = NULL returns localhost */ ++ host = NULL; + if (ctx && ctx->cmd_args.volfile_server && + (strcmp(volfile_server_transport, "unix"))) { + /* If it is client code then volfile_server is set + use that information to push the events. */ +- if ((getaddrinfo(ctx->cmd_args.volfile_server, NULL, &hints, +- &result)) != 0) { +- ret = EVENT_ERROR_RESOLVE; +- goto out; +- } +- +- if (get_ip_from_addrinfo(result, &host) == NULL) { +- ret = EVENT_ERROR_RESOLVE; +- goto out; +- } +- +- sin_family = result->ai_family; +- } else { +- /* Localhost, Use the defined IP for localhost */ +- host = gf_strdup(EVENT_HOST); ++ host = ctx->cmd_args.volfile_server; + } + +- /* Socket Configurations */ +- server.sin_family = sin_family; +- server.sin_port = htons(EVENT_PORT); +- ret = inet_pton(server.sin_family, host, &server.sin_addr); +- if (ret <= 0) { +- gf_msg(this->name, GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG, +- "inet_pton failed with return code %d", ret); ++ memset(&hints, 0, sizeof(hints)); ++ hints.ai_family = AF_UNSPEC; ++ hints.ai_socktype = SOCK_DGRAM; ++ hints.ai_flags = AI_ADDRCONFIG; ++ ++ if ((getaddrinfo(host, TOSTRING(EVENT_PORT), &hints, &result)) != 0) { ++ ret = EVENT_ERROR_RESOLVE; + goto out; + } +- memset(&server.sin_zero, '\0', sizeof(server.sin_zero)); + + va_start(arguments, fmt); + ret = gf_vasprintf(&msg, fmt, arguments); +@@ -113,15 +94,15 @@ _gf_event(eventtypes_t event, const char *fmt, ...) + } + + ret = gf_asprintf(&eventstr, "%u %d %s", (unsigned)time(NULL), event, msg); +- ++ GF_FREE(msg); + if (ret <= 0) { + ret = EVENT_ERROR_MSG_FORMAT; + goto out; + } + + /* Send Message */ +- if (sendto(sock, eventstr, strlen(eventstr), 0, (struct sockaddr *)&server, +- sizeof(server)) <= 0) { ++ if (sendto(sock, eventstr, strlen(eventstr), 0, result->ai_addr, ++ result->ai_addrlen) <= 0) { + ret = EVENT_ERROR_SEND; + goto out; + } +@@ -133,17 +114,10 @@ out: + sys_close(sock); + } + +- /* Allocated by gf_vasprintf */ +- if (msg) +- GF_FREE(msg); +- + /* Allocated by gf_asprintf */ + if (eventstr) + GF_FREE(eventstr); + +- if (host) +- GF_FREE(host); +- + if (result) + freeaddrinfo(result); + +-- +1.8.3.1 + diff --git a/SOURCES/0418-md-cache-avoid-clearing-cache-when-not-necessary.patch b/SOURCES/0418-md-cache-avoid-clearing-cache-when-not-necessary.patch new file mode 100644 index 0000000..45622d9 --- /dev/null +++ b/SOURCES/0418-md-cache-avoid-clearing-cache-when-not-necessary.patch @@ -0,0 +1,439 @@ +From 7ad8c03a28fca67150972cda964ebe9233766b54 Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez <xhernandez@redhat.com> +Date: Mon, 30 Mar 2020 11:09:39 +0200 +Subject: [PATCH 418/449] md-cache: avoid clearing cache when not necessary + +mdc_inode_xatt_set() blindly cleared current cache when dict was not +NULL, even if there was no xattr requested. + +This patch fixes this by only calling mdc_inode_xatt_set() when we have +explicitly requested something to cache. + +Backport of: +> Upstream-patch-link: https://review.gluster.org/24267 +> Change-Id: Idc91a4693f1ff39f7059acde26682ccc361b947d +> Fixes: #1140 +> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> + +BUG: 1815434 +Change-Id: Idc91a4693f1ff39f7059acde26682ccc361b947d +Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202487 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/performance/md-cache/src/md-cache.c | 165 ++++++++++++++++------------ + 1 file changed, 93 insertions(+), 72 deletions(-) + +diff --git a/xlators/performance/md-cache/src/md-cache.c b/xlators/performance/md-cache/src/md-cache.c +index a6b363f..bbbee3b 100644 +--- a/xlators/performance/md-cache/src/md-cache.c ++++ b/xlators/performance/md-cache/src/md-cache.c +@@ -133,6 +133,7 @@ struct mdc_local { + char *key; + dict_t *xattr; + uint64_t incident_time; ++ bool update_cache; + }; + + int +@@ -969,7 +970,7 @@ out: + return ret; + } + +-void ++static bool + mdc_load_reqs(xlator_t *this, dict_t *dict) + { + struct mdc_conf *conf = this->private; +@@ -978,6 +979,7 @@ mdc_load_reqs(xlator_t *this, dict_t *dict) + char *tmp = NULL; + char *tmp1 = NULL; + int ret = 0; ++ bool loaded = false; + + tmp1 = conf->mdc_xattr_str; + if (!tmp1) +@@ -995,13 +997,17 @@ mdc_load_reqs(xlator_t *this, dict_t *dict) + conf->mdc_xattr_str = NULL; + gf_msg("md-cache", GF_LOG_ERROR, 0, MD_CACHE_MSG_NO_XATTR_CACHE, + "Disabled cache for xattrs, dict_set failed"); ++ goto out; + } + pattern = strtok_r(NULL, ",", &tmp); + } + +- GF_FREE(mdc_xattr_str); ++ loaded = true; ++ + out: +- return; ++ GF_FREE(mdc_xattr_str); ++ ++ return loaded; + } + + struct checkpair { +@@ -1092,6 +1098,25 @@ err: + return ret; + } + ++static dict_t * ++mdc_prepare_request(xlator_t *this, mdc_local_t *local, dict_t *xdata) ++{ ++ if (xdata == NULL) { ++ xdata = dict_new(); ++ if (xdata == NULL) { ++ local->update_cache = false; ++ ++ return NULL; ++ } ++ } else { ++ dict_ref(xdata); ++ } ++ ++ local->update_cache = mdc_load_reqs(this, xdata); ++ ++ return xdata; ++} ++ + int + mdc_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct statvfs *buf, +@@ -1201,7 +1226,9 @@ mdc_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + + if (local->loc.inode) { + mdc_inode_iatt_set(this, local->loc.inode, stbuf, local->incident_time); +- mdc_inode_xatt_set(this, local->loc.inode, dict); ++ if (local->update_cache) { ++ mdc_inode_xatt_set(this, local->loc.inode, dict); ++ } + } + out: + MDC_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, stbuf, dict, +@@ -1220,7 +1247,6 @@ mdc_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) + 0, + }; + dict_t *xattr_rsp = NULL; +- dict_t *xattr_alloc = NULL; + mdc_local_t *local = NULL; + struct mdc_conf *conf = this->private; + +@@ -1271,18 +1297,18 @@ mdc_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) + return 0; + + uncached: +- if (!xdata) +- xdata = xattr_alloc = dict_new(); +- if (xdata) +- mdc_load_reqs(this, xdata); ++ xdata = mdc_prepare_request(this, local, xdata); + + STACK_WIND(frame, mdc_lookup_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, loc, xdata); + + if (xattr_rsp) + dict_unref(xattr_rsp); +- if (xattr_alloc) +- dict_unref(xattr_alloc); ++ ++ if (xdata != NULL) { ++ dict_unref(xdata); ++ } ++ + return 0; + } + +@@ -1305,7 +1331,9 @@ mdc_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + } + + mdc_inode_iatt_set(this, local->loc.inode, buf, local->incident_time); +- mdc_inode_xatt_set(this, local->loc.inode, xdata); ++ if (local->update_cache) { ++ mdc_inode_xatt_set(this, local->loc.inode, xdata); ++ } + + out: + MDC_STACK_UNWIND(stat, frame, op_ret, op_errno, buf, xdata); +@@ -1319,7 +1347,6 @@ mdc_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) + int ret; + struct iatt stbuf; + mdc_local_t *local = NULL; +- dict_t *xattr_alloc = NULL; + struct mdc_conf *conf = this->private; + + local = mdc_local_get(frame, loc->inode); +@@ -1343,17 +1370,16 @@ mdc_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) + return 0; + + uncached: +- if (!xdata) +- xdata = xattr_alloc = dict_new(); +- if (xdata) +- mdc_load_reqs(this, xdata); ++ xdata = mdc_prepare_request(this, local, xdata); + + GF_ATOMIC_INC(conf->mdc_counter.stat_miss); + STACK_WIND(frame, mdc_stat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->stat, loc, xdata); + +- if (xattr_alloc) +- dict_unref(xattr_alloc); ++ if (xdata != NULL) { ++ dict_unref(xdata); ++ } ++ + return 0; + } + +@@ -1376,7 +1402,9 @@ mdc_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + } + + mdc_inode_iatt_set(this, local->fd->inode, buf, local->incident_time); +- mdc_inode_xatt_set(this, local->fd->inode, xdata); ++ if (local->update_cache) { ++ mdc_inode_xatt_set(this, local->fd->inode, xdata); ++ } + + out: + MDC_STACK_UNWIND(fstat, frame, op_ret, op_errno, buf, xdata); +@@ -1390,7 +1418,6 @@ mdc_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) + int ret; + struct iatt stbuf; + mdc_local_t *local = NULL; +- dict_t *xattr_alloc = NULL; + struct mdc_conf *conf = this->private; + + local = mdc_local_get(frame, fd->inode); +@@ -1409,17 +1436,16 @@ mdc_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) + return 0; + + uncached: +- if (!xdata) +- xdata = xattr_alloc = dict_new(); +- if (xdata) +- mdc_load_reqs(this, xdata); ++ xdata = mdc_prepare_request(this, local, xdata); + + GF_ATOMIC_INC(conf->mdc_counter.stat_miss); + STACK_WIND(frame, mdc_fstat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fstat, fd, xdata); + +- if (xattr_alloc) +- dict_unref(xattr_alloc); ++ if (xdata != NULL) { ++ dict_unref(xdata); ++ } ++ + return 0; + } + +@@ -2393,7 +2419,9 @@ mdc_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + goto out; + } + +- mdc_inode_xatt_set(this, local->loc.inode, xdata); ++ if (local->update_cache) { ++ mdc_inode_xatt_set(this, local->loc.inode, xdata); ++ } + + out: + MDC_STACK_UNWIND(getxattr, frame, op_ret, op_errno, xattr, xdata); +@@ -2410,7 +2438,6 @@ mdc_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key, + mdc_local_t *local = NULL; + dict_t *xattr = NULL; + struct mdc_conf *conf = this->private; +- dict_t *xattr_alloc = NULL; + gf_boolean_t key_satisfied = _gf_true; + + local = mdc_local_get(frame, loc->inode); +@@ -2443,18 +2470,17 @@ mdc_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key, + + uncached: + if (key_satisfied) { +- if (!xdata) +- xdata = xattr_alloc = dict_new(); +- if (xdata) +- mdc_load_reqs(this, xdata); ++ xdata = mdc_prepare_request(this, local, xdata); + } + + GF_ATOMIC_INC(conf->mdc_counter.xattr_miss); + STACK_WIND(frame, mdc_getxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->getxattr, loc, key, xdata); + +- if (xattr_alloc) +- dict_unref(xattr_alloc); ++ if (key_satisfied && (xdata != NULL)) { ++ dict_unref(xdata); ++ } ++ + return 0; + } + +@@ -2481,7 +2507,9 @@ mdc_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + goto out; + } + +- mdc_inode_xatt_set(this, local->fd->inode, xdata); ++ if (local->update_cache) { ++ mdc_inode_xatt_set(this, local->fd->inode, xdata); ++ } + + out: + MDC_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, xattr, xdata); +@@ -2498,7 +2526,6 @@ mdc_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *key, + dict_t *xattr = NULL; + int op_errno = ENODATA; + struct mdc_conf *conf = this->private; +- dict_t *xattr_alloc = NULL; + gf_boolean_t key_satisfied = _gf_true; + + local = mdc_local_get(frame, fd->inode); +@@ -2531,18 +2558,17 @@ mdc_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *key, + + uncached: + if (key_satisfied) { +- if (!xdata) +- xdata = xattr_alloc = dict_new(); +- if (xdata) +- mdc_load_reqs(this, xdata); ++ xdata = mdc_prepare_request(this, local, xdata); + } + + GF_ATOMIC_INC(conf->mdc_counter.xattr_miss); + STACK_WIND(frame, mdc_fgetxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fgetxattr, fd, key, xdata); + +- if (xattr_alloc) +- dict_unref(xattr_alloc); ++ if (key_satisfied && (xdata != NULL)) { ++ dict_unref(xdata); ++ } ++ + return 0; + } + +@@ -2752,27 +2778,22 @@ int + mdc_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, + dict_t *xdata) + { +- dict_t *xattr_alloc = NULL; + mdc_local_t *local = NULL; + + local = mdc_local_get(frame, loc->inode); + + loc_copy(&local->loc, loc); + +- if (!xdata) +- xdata = xattr_alloc = dict_new(); +- +- if (xdata) { +- /* Tell readdir-ahead to include these keys in xdata when it +- * internally issues readdirp() in it's opendir_cbk */ +- mdc_load_reqs(this, xdata); +- } ++ /* Tell readdir-ahead to include these keys in xdata when it ++ * internally issues readdirp() in it's opendir_cbk */ ++ xdata = mdc_prepare_request(this, local, xdata); + + STACK_WIND(frame, mdc_opendir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->opendir, loc, fd, xdata); + +- if (xattr_alloc) +- dict_unref(xattr_alloc); ++ if (xdata != NULL) { ++ dict_unref(xdata); ++ } + + return 0; + } +@@ -2800,7 +2821,9 @@ mdc_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + continue; + mdc_inode_iatt_set(this, entry->inode, &entry->d_stat, + local->incident_time); +- mdc_inode_xatt_set(this, entry->inode, entry->dict); ++ if (local->update_cache) { ++ mdc_inode_xatt_set(this, entry->inode, entry->dict); ++ } + } + + unwind: +@@ -2812,7 +2835,6 @@ int + mdc_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, dict_t *xdata) + { +- dict_t *xattr_alloc = NULL; + mdc_local_t *local = NULL; + + local = mdc_local_get(frame, fd->inode); +@@ -2821,15 +2843,15 @@ mdc_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + + local->fd = fd_ref(fd); + +- if (!xdata) +- xdata = xattr_alloc = dict_new(); +- if (xdata) +- mdc_load_reqs(this, xdata); ++ xdata = mdc_prepare_request(this, local, xdata); + + STACK_WIND(frame, mdc_readdirp_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdirp, fd, size, offset, xdata); +- if (xattr_alloc) +- dict_unref(xattr_alloc); ++ ++ if (xdata != NULL) { ++ dict_unref(xdata); ++ } ++ + return 0; + out: + MDC_STACK_UNWIND(readdirp, frame, -1, ENOMEM, NULL, NULL); +@@ -2860,7 +2882,6 @@ int + mdc_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, dict_t *xdata) + { +- int need_unref = 0; + mdc_local_t *local = NULL; + struct mdc_conf *conf = this->private; + +@@ -2876,19 +2897,14 @@ mdc_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + return 0; + } + +- if (!xdata) { +- xdata = dict_new(); +- need_unref = 1; +- } +- +- if (xdata) +- mdc_load_reqs(this, xdata); ++ xdata = mdc_prepare_request(this, local, xdata); + + STACK_WIND(frame, mdc_readdirp_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdirp, fd, size, offset, xdata); + +- if (need_unref && xdata) ++ if (xdata != NULL) { + dict_unref(xdata); ++ } + + return 0; + unwind: +@@ -3468,7 +3484,12 @@ mdc_register_xattr_inval(xlator_t *this) + goto out; + } + +- mdc_load_reqs(this, xattr); ++ if (!mdc_load_reqs(this, xattr)) { ++ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, MD_CACHE_MSG_NO_MEMORY, ++ "failed to populate cache entries"); ++ ret = -1; ++ goto out; ++ } + + frame = create_frame(this, this->ctx->pool); + if (!frame) { +-- +1.8.3.1 + diff --git a/SOURCES/0419-cluster-afr-fix-race-when-bricks-come-up.patch b/SOURCES/0419-cluster-afr-fix-race-when-bricks-come-up.patch new file mode 100644 index 0000000..ea8c2ea --- /dev/null +++ b/SOURCES/0419-cluster-afr-fix-race-when-bricks-come-up.patch @@ -0,0 +1,104 @@ +From b9b479de2a7fd1c5eefa7aa1142e0a39e0c96ca9 Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez <xhernandez@redhat.com> +Date: Sun, 1 Mar 2020 19:49:04 +0100 +Subject: [PATCH 419/449] cluster/afr: fix race when bricks come up + +The was a problem when self-heal was sending lookups at the same time +that one of the bricks was coming up. In this case there was a chance +that the number of 'up' bricks changes in the middle of sending the +requests to subvolumes which caused a discrepancy in the expected +number of replies and the actual number of sent requests. + +This discrepancy caused that AFR continued executing requests before +all requests were complete. Eventually, the frame of the pending +request was destroyed when the operation terminated, causing a use- +after-free issue when the answer was finally received. + +In theory the same thing could happen in the reverse way, i.e. AFR +tries to wait for more replies than sent requests, causing a hang. + +Backport of: +> Upstream-patch-link: https://review.gluster.org/24191 +> Change-Id: I7ed6108554ca379d532efb1a29b2de8085410b70 +> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> +> Fixes: bz#1808875 + +BUG: 1794663 +Change-Id: I7ed6108554ca379d532efb1a29b2de8085410b70 +Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202489 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/cluster/afr/src/afr-self-heal-common.c | 6 +++--- + xlators/cluster/afr/src/afr-self-heal-name.c | 4 +++- + xlators/cluster/afr/src/afr-self-heal.h | 7 +++++-- + 3 files changed, 11 insertions(+), 6 deletions(-) + +diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c +index ce1ea50..d942ccf 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-common.c ++++ b/xlators/cluster/afr/src/afr-self-heal-common.c +@@ -1869,12 +1869,12 @@ int + afr_selfheal_unlocked_discover(call_frame_t *frame, inode_t *inode, uuid_t gfid, + struct afr_reply *replies) + { +- afr_private_t *priv = NULL; ++ afr_local_t *local = NULL; + +- priv = frame->this->private; ++ local = frame->local; + + return afr_selfheal_unlocked_discover_on(frame, inode, gfid, replies, +- priv->child_up); ++ local->child_up); + } + + unsigned int +diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c +index 7d4f208..dace071 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-name.c ++++ b/xlators/cluster/afr/src/afr-self-heal-name.c +@@ -560,13 +560,15 @@ afr_selfheal_name_unlocked_inspect(call_frame_t *frame, xlator_t *this, + struct afr_reply *replies = NULL; + inode_t *inode = NULL; + int first_idx = -1; ++ afr_local_t *local = NULL; + + priv = this->private; ++ local = frame->local; + + replies = alloca0(sizeof(*replies) * priv->child_count); + + inode = afr_selfheal_unlocked_lookup_on(frame, parent, bname, replies, +- priv->child_up, NULL); ++ local->child_up, NULL); + if (!inode) + return -ENOMEM; + +diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h +index 8234cec..f7ecf5d 100644 +--- a/xlators/cluster/afr/src/afr-self-heal.h ++++ b/xlators/cluster/afr/src/afr-self-heal.h +@@ -46,13 +46,16 @@ + afr_local_t *__local = frame->local; \ + afr_private_t *__priv = frame->this->private; \ + int __i = 0; \ +- int __count = AFR_COUNT(list, __priv->child_count); \ ++ int __count = 0; \ ++ unsigned char *__list = alloca(__priv->child_count); \ + \ ++ memcpy(__list, list, sizeof(*__list) * __priv->child_count); \ ++ __count = AFR_COUNT(__list, __priv->child_count); \ + __local->barrier.waitfor = __count; \ + afr_local_replies_wipe(__local, __priv); \ + \ + for (__i = 0; __i < __priv->child_count; __i++) { \ +- if (!list[__i]) \ ++ if (!__list[__i]) \ + continue; \ + STACK_WIND_COOKIE(frame, rfn, (void *)(long)__i, \ + __priv->children[__i], \ +-- +1.8.3.1 + diff --git a/SOURCES/0420-scripts-quota_fsck-script-TypeError-d-format-not-dic.patch b/SOURCES/0420-scripts-quota_fsck-script-TypeError-d-format-not-dic.patch new file mode 100644 index 0000000..cb27b33 --- /dev/null +++ b/SOURCES/0420-scripts-quota_fsck-script-TypeError-d-format-not-dic.patch @@ -0,0 +1,46 @@ +From 42a05c7f8464f529f53bced31a64ea373e16f58b Mon Sep 17 00:00:00 2001 +From: Hari Gowtham <hgowtham@redhat.com> +Date: Thu, 24 Oct 2019 17:40:44 +0530 +Subject: [PATCH 420/449] scripts: quota_fsck script TypeError: %d format:not + dict + +Problem: One of the prints in the script have been using +%i as the format for printing which doesn't work. + +Fix: use %s as the format in the place of %i + +>Fixes: bz#1764129 +>Change-Id: I4480ede7bf62906ddedbe5f880a1e89c76946641 +>Signed-off-by: Hari Gowtham <hgowtham@redhat.com> +>Upstream patch: https://review.gluster.org/#/c/glusterfs/+/23586/ + +BUG: 1786681 +Change-Id: I4480ede7bf62906ddedbe5f880a1e89c76946641 +Signed-off-by: hari gowtham <hgowtham@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202484 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunny Kumar <sunkumar@redhat.com> +--- + extras/quota/quota_fsck.py | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/extras/quota/quota_fsck.py b/extras/quota/quota_fsck.py +index 485a37a..174f2a2 100755 +--- a/extras/quota/quota_fsck.py ++++ b/extras/quota/quota_fsck.py +@@ -58,10 +58,10 @@ def print_msg(log_type, path, xattr_dict = {}, stbuf = "", dir_size = None): + elif log_type == QUOTA_SIZE_MISMATCH: + print("mismatch") + if dir_size is not None: +- print('%24s %60s %12s %12s' % ("Size Mismatch", path, ++ print('%24s %60s %12s %12s' % ("Size Mismatch", path, + xattr_dict, dir_size)) + else: +- print('%-24s %-60s %-12i %-12i' % ("Size Mismatch", path, xattr_dict, ++ print('%-24s %-60s %-12s %-12s' % ("Size Mismatch", path, xattr_dict, + stbuf.st_size)) + + def size_differs_lot(s1, s2): +-- +1.8.3.1 + diff --git a/SOURCES/0421-Improve-logging-in-EC-client-and-lock-translator.patch b/SOURCES/0421-Improve-logging-in-EC-client-and-lock-translator.patch new file mode 100644 index 0000000..06f0304 --- /dev/null +++ b/SOURCES/0421-Improve-logging-in-EC-client-and-lock-translator.patch @@ -0,0 +1,93 @@ +From 8267e5e97327633bf21fd02df8d52e3a97f0f9ea Mon Sep 17 00:00:00 2001 +From: Ashish Pandey <aspandey@redhat.com> +Date: Wed, 4 Dec 2019 17:06:18 +0530 +Subject: [PATCH 421/449] Improve logging in EC, client and lock translator + +BUG: 1787294 +> Upstream patch: https://review.gluster.org/#/c/glusterfs/+/23814/ +> Change-Id: I98af8672a25ff9fd9dba91a2e1384719f9155255 +> Fixes: bz#1779760 + +Change-Id: I5cb04993f12d6248f2349a0c5a9e2c0ceecaf528 +Reviewed-on: https://code.engineering.redhat.com/gerrit/202533 +Tested-by: Ashish Pandey <aspandey@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/cluster/ec/src/ec-combine.c | 5 +++-- + xlators/cluster/ec/src/ec-common.c | 2 +- + xlators/features/locks/src/inodelk.c | 12 ++++++++---- + 3 files changed, 12 insertions(+), 7 deletions(-) + +diff --git a/xlators/cluster/ec/src/ec-combine.c b/xlators/cluster/ec/src/ec-combine.c +index c5af2ab..99e5534 100644 +--- a/xlators/cluster/ec/src/ec-combine.c ++++ b/xlators/cluster/ec/src/ec-combine.c +@@ -179,13 +179,14 @@ ec_iatt_combine(ec_fop_data_t *fop, struct iatt *dst, struct iatt *src, + "links: %u-%u, uid: %u-%u, gid: %u-%u, " + "rdev: %" PRIu64 "-%" PRIu64 ", size: %" PRIu64 "-%" PRIu64 + ", " +- "mode: %o-%o)", ++ "mode: %o-%o), %s", + dst[i].ia_ino, src[i].ia_ino, dst[i].ia_nlink, + src[i].ia_nlink, dst[i].ia_uid, src[i].ia_uid, dst[i].ia_gid, + src[i].ia_gid, dst[i].ia_rdev, src[i].ia_rdev, + dst[i].ia_size, src[i].ia_size, + st_mode_from_ia(dst[i].ia_prot, dst[i].ia_type), +- st_mode_from_ia(src[i].ia_prot, dst[i].ia_type)); ++ st_mode_from_ia(src[i].ia_prot, dst[i].ia_type), ++ ec_msg_str(fop)); + + return 0; + } +diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c +index 5cae37b..e580bfb 100644 +--- a/xlators/cluster/ec/src/ec-common.c ++++ b/xlators/cluster/ec/src/ec-common.c +@@ -2240,7 +2240,7 @@ ec_unlocked(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, op_errno, EC_MSG_UNLOCK_FAILED, +- "entry/inode unlocking failed (%s)", ec_fop_name(link->fop->id)); ++ "entry/inode unlocking failed :(%s)", ec_msg_str(link->fop)); + } else { + ec_trace("UNLOCKED", link->fop, "lock=%p", link->lock); + } +diff --git a/xlators/features/locks/src/inodelk.c b/xlators/features/locks/src/inodelk.c +index df00ede..a9c42f1 100644 +--- a/xlators/features/locks/src/inodelk.c ++++ b/xlators/features/locks/src/inodelk.c +@@ -502,22 +502,26 @@ static pl_inode_lock_t * + __inode_unlock_lock(xlator_t *this, pl_inode_lock_t *lock, pl_dom_list_t *dom) + { + pl_inode_lock_t *conf = NULL; ++ inode_t *inode = NULL; ++ ++ inode = lock->pl_inode->inode; + + conf = find_matching_inodelk(lock, dom); + if (!conf) { + gf_log(this->name, GF_LOG_ERROR, + " Matching lock not found for unlock %llu-%llu, by %s " +- "on %p", ++ "on %p for gfid:%s", + (unsigned long long)lock->fl_start, + (unsigned long long)lock->fl_end, lkowner_utoa(&lock->owner), +- lock->client); ++ lock->client, inode ? uuid_utoa(inode->gfid) : "UNKNOWN"); + goto out; + } + __delete_inode_lock(conf); + gf_log(this->name, GF_LOG_DEBUG, +- " Matching lock found for unlock %llu-%llu, by %s on %p", ++ " Matching lock found for unlock %llu-%llu, by %s on %p for gfid:%s", + (unsigned long long)lock->fl_start, (unsigned long long)lock->fl_end, +- lkowner_utoa(&lock->owner), lock->client); ++ lkowner_utoa(&lock->owner), lock->client, ++ inode ? uuid_utoa(inode->gfid) : "UNKNOWN"); + + out: + return conf; +-- +1.8.3.1 + diff --git a/SOURCES/0422-cluster-afr-Prioritize-ENOSPC-over-other-errors.patch b/SOURCES/0422-cluster-afr-Prioritize-ENOSPC-over-other-errors.patch new file mode 100644 index 0000000..400ba67 --- /dev/null +++ b/SOURCES/0422-cluster-afr-Prioritize-ENOSPC-over-other-errors.patch @@ -0,0 +1,236 @@ +From 8b11ac1575ef167af2a47a96f7b7ed0f32bb5897 Mon Sep 17 00:00:00 2001 +From: karthik-us <ksubrahm@redhat.com> +Date: Fri, 5 Jun 2020 17:20:04 +0530 +Subject: [PATCH 422/449] cluster/afr: Prioritize ENOSPC over other errors + +Backport of: https://review.gluster.org/#/c/glusterfs/+/24477/ + +Problem: +In a replicate/arbiter volume if file creations or writes fails on +quorum number of bricks and on one brick it is due to ENOSPC and +on other brick it fails for a different reason, it may fail with +errors other than ENOSPC in some cases. + +Fix: +Prioritize ENOSPC over other lesser priority errors and do not set +op_errno in posix_gfid_set if op_ret is 0 to avoid receiving any +error_no which can be misinterpreted by __afr_dir_write_finalize(). + +Also removing the function afr_has_arbiter_fop_cbk_quorum() which +might consider a successful reply form a single brick as quorum +success in some cases, whereas we always need fop to be successful +on quorum number of bricks in arbiter configuration. + +Change-Id: I4dd2bff17e6812bc7c8372130976e365e2407d88 +Signed-off-by: karthik-us <ksubrahm@redhat.com> +BUG: 1837467 +Reviewed-on: https://code.engineering.redhat.com/gerrit/202526 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + .../bugs/replicate/issue-1254-prioritize-enospc.t | 80 ++++++++++++++++++++++ + xlators/cluster/afr/src/afr-common.c | 4 +- + xlators/cluster/afr/src/afr-transaction.c | 48 +------------ + xlators/storage/posix/src/posix-helpers.c | 2 +- + 4 files changed, 86 insertions(+), 48 deletions(-) + create mode 100644 tests/bugs/replicate/issue-1254-prioritize-enospc.t + +diff --git a/tests/bugs/replicate/issue-1254-prioritize-enospc.t b/tests/bugs/replicate/issue-1254-prioritize-enospc.t +new file mode 100644 +index 0000000..fab94b7 +--- /dev/null ++++ b/tests/bugs/replicate/issue-1254-prioritize-enospc.t +@@ -0,0 +1,80 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++ ++cleanup ++ ++function create_bricks { ++ TEST truncate -s 100M $B0/brick0 ++ TEST truncate -s 100M $B0/brick1 ++ TEST truncate -s 20M $B0/brick2 ++ LO1=`SETUP_LOOP $B0/brick0` ++ TEST [ $? -eq 0 ] ++ TEST MKFS_LOOP $LO1 ++ LO2=`SETUP_LOOP $B0/brick1` ++ TEST [ $? -eq 0 ] ++ TEST MKFS_LOOP $LO2 ++ LO3=`SETUP_LOOP $B0/brick2` ++ TEST [ $? -eq 0 ] ++ TEST MKFS_LOOP $LO3 ++ TEST mkdir -p $B0/${V0}0 $B0/${V0}1 $B0/${V0}2 ++ TEST MOUNT_LOOP $LO1 $B0/${V0}0 ++ TEST MOUNT_LOOP $LO2 $B0/${V0}1 ++ TEST MOUNT_LOOP $LO3 $B0/${V0}2 ++} ++ ++function create_files { ++ local i=1 ++ while (true) ++ do ++ touch $M0/file$i ++ if [ -e $B0/${V0}2/file$i ]; ++ then ++ ((i++)) ++ else ++ break ++ fi ++ done ++} ++ ++TESTS_EXPECTED_IN_LOOP=13 ++ ++#Arbiter volume: Check for ENOSPC when arbiter brick becomes full# ++TEST glusterd ++create_bricks ++TEST $CLI volume create $V0 replica 3 arbiter 1 $H0:$B0/${V0}{0,1,2} ++TEST $CLI volume start $V0 ++TEST $CLI volume set $V0 performance.write-behind off ++TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0 ++ ++create_files ++TEST kill_brick $V0 $H0 $B0/${V0}1 ++error1=$(touch $M0/file-1 2>&1) ++EXPECT "No space left on device" echo $error1 ++error2=$(mkdir $M0/dir-1 2>&1) ++EXPECT "No space left on device" echo $error2 ++error3=$((echo "Test" > $M0/file-3) 2>&1) ++EXPECT "No space left on device" echo $error3 ++ ++cleanup ++ ++#Replica-3 volume: Check for ENOSPC when one of the brick becomes full# ++#Keeping the third brick of lower size to simulate disk full scenario# ++TEST glusterd ++create_bricks ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} ++TEST $CLI volume start $V0 ++TEST $CLI volume set $V0 performance.write-behind off ++TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0 ++ ++create_files ++TEST kill_brick $V0 $H0 $B0/${V0}1 ++error1=$(touch $M0/file-1 2>&1) ++EXPECT "No space left on device" echo $error1 ++error2=$(mkdir $M0/dir-1 2>&1) ++EXPECT "No space left on device" echo $error2 ++error3=$((cat /dev/zero > $M0/file1) 2>&1) ++EXPECT "No space left on device" echo $error3 ++ ++cleanup +diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c +index 5806556..59710aa 100644 +--- a/xlators/cluster/afr/src/afr-common.c ++++ b/xlators/cluster/afr/src/afr-common.c +@@ -2464,7 +2464,7 @@ error: + * others in that they must be given higher priority while + * returning to the user. + * +- * The hierarchy is ENODATA > ENOENT > ESTALE > others ++ * The hierarchy is ENODATA > ENOENT > ESTALE > ENOSPC others + */ + + int +@@ -2476,6 +2476,8 @@ afr_higher_errno(int32_t old_errno, int32_t new_errno) + return ENOENT; + if (old_errno == ESTALE || new_errno == ESTALE) + return ESTALE; ++ if (old_errno == ENOSPC || new_errno == ENOSPC) ++ return ENOSPC; + + return new_errno; + } +diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c +index 15f3a7e..8e65ae2 100644 +--- a/xlators/cluster/afr/src/afr-transaction.c ++++ b/xlators/cluster/afr/src/afr-transaction.c +@@ -514,42 +514,6 @@ afr_compute_pre_op_sources(call_frame_t *frame, xlator_t *this) + local->transaction.pre_op_sources[j] = 0; + } + +-gf_boolean_t +-afr_has_arbiter_fop_cbk_quorum(call_frame_t *frame) +-{ +- afr_local_t *local = NULL; +- afr_private_t *priv = NULL; +- xlator_t *this = NULL; +- gf_boolean_t fop_failed = _gf_false; +- unsigned char *pre_op_sources = NULL; +- int i = 0; +- +- local = frame->local; +- this = frame->this; +- priv = this->private; +- pre_op_sources = local->transaction.pre_op_sources; +- +- /* If the fop failed on the brick, it is not a source. */ +- for (i = 0; i < priv->child_count; i++) +- if (local->transaction.failed_subvols[i]) +- pre_op_sources[i] = 0; +- +- switch (AFR_COUNT(pre_op_sources, priv->child_count)) { +- case 1: +- if (pre_op_sources[ARBITER_BRICK_INDEX]) +- fop_failed = _gf_true; +- break; +- case 0: +- fop_failed = _gf_true; +- break; +- } +- +- if (fop_failed) +- return _gf_false; +- +- return _gf_true; +-} +- + void + afr_txn_arbitrate_fop(call_frame_t *frame, xlator_t *this) + { +@@ -968,12 +932,8 @@ afr_need_dirty_marking(call_frame_t *frame, xlator_t *this) + priv->child_count) + return _gf_false; + +- if (priv->arbiter_count) { +- if (!afr_has_arbiter_fop_cbk_quorum(frame)) +- need_dirty = _gf_true; +- } else if (!afr_has_fop_cbk_quorum(frame)) { ++ if (!afr_has_fop_cbk_quorum(frame)) + need_dirty = _gf_true; +- } + + return need_dirty; + } +@@ -1023,12 +983,8 @@ afr_handle_quorum(call_frame_t *frame, xlator_t *this) + * no split-brain with the fix. The problem is eliminated completely. + */ + +- if (priv->arbiter_count) { +- if (afr_has_arbiter_fop_cbk_quorum(frame)) +- return; +- } else if (afr_has_fop_cbk_quorum(frame)) { ++ if (afr_has_fop_cbk_quorum(frame)) + return; +- } + + if (afr_need_dirty_marking(frame, this)) + goto set_response; +diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c +index 2c27d22..949c799 100644 +--- a/xlators/storage/posix/src/posix-helpers.c ++++ b/xlators/storage/posix/src/posix-helpers.c +@@ -1059,7 +1059,7 @@ verify_handle: + ret = posix_handle_soft(this, path, loc, uuid_curr, &stat); + + out: +- if (!(*op_errno)) ++ if (ret && !(*op_errno)) + *op_errno = errno; + return ret; + } +-- +1.8.3.1 + diff --git a/SOURCES/0423-ctime-Fix-ctime-inconsisteny-with-utimensat.patch b/SOURCES/0423-ctime-Fix-ctime-inconsisteny-with-utimensat.patch new file mode 100644 index 0000000..6a547ea --- /dev/null +++ b/SOURCES/0423-ctime-Fix-ctime-inconsisteny-with-utimensat.patch @@ -0,0 +1,128 @@ +From c140d30382306d08eaf2bc5c53e5be26d3e381e1 Mon Sep 17 00:00:00 2001 +From: Kotresh HR <khiremat@redhat.com> +Date: Mon, 18 Nov 2019 05:24:33 -0500 +Subject: [PATCH 423/449] ctime: Fix ctime inconsisteny with utimensat + +Problem: +When touch is used to create a file, the ctime is not matching +atime and mtime which ideally should match. There is a difference +in nano seconds. + +Cause: +When touch is used modify atime or mtime to current time (UTIME_NOW), +the current time is taken from kernel. The ctime gets updated to current +time when atime or mtime is updated. But the current time to update +ctime is taken from utime xlator. Hence the difference in nano seconds. + +Fix: +When utimesat uses UTIME_NOW, use the current time from kernel. + +>fixes: bz#1773530 +>Change-Id: I9ccfa47dcd39df23396852b4216f1773c49250ce +>Signed-off-by: Kotresh HR <khiremat@redhat.com> + +backport of: https://review.gluster.org/#/c/glusterfs/+/23719/ +BUG: 1761932 +Change-Id: I9ccfa47dcd39df23396852b4216f1773c49250ce +Signed-off-by: Kotresh HR <khiremat@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202541 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + libglusterfs/src/glusterfs/xlator.h | 2 ++ + tests/basic/ctime/ctime-utimesat.t | 28 ++++++++++++++++++++++++++ + xlators/features/utime/src/utime-gen-fops-c.py | 10 +++++++++ + xlators/mount/fuse/src/fuse-bridge.c | 8 ++++++++ + 4 files changed, 48 insertions(+) + create mode 100644 tests/basic/ctime/ctime-utimesat.t + +diff --git a/libglusterfs/src/glusterfs/xlator.h b/libglusterfs/src/glusterfs/xlator.h +index da551e9..db04c4d 100644 +--- a/libglusterfs/src/glusterfs/xlator.h ++++ b/libglusterfs/src/glusterfs/xlator.h +@@ -35,6 +35,8 @@ + #define GF_SET_ATTR_ATIME 0x10 + #define GF_SET_ATTR_MTIME 0x20 + #define GF_SET_ATTR_CTIME 0x40 ++#define GF_ATTR_ATIME_NOW 0x80 ++#define GF_ATTR_MTIME_NOW 0x100 + + #define gf_attr_mode_set(mode) ((mode)&GF_SET_ATTR_MODE) + #define gf_attr_uid_set(mode) ((mode)&GF_SET_ATTR_UID) +diff --git a/tests/basic/ctime/ctime-utimesat.t b/tests/basic/ctime/ctime-utimesat.t +new file mode 100644 +index 0000000..540e57a +--- /dev/null ++++ b/tests/basic/ctime/ctime-utimesat.t +@@ -0,0 +1,28 @@ ++#!/bin/bash ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../afr.rc ++cleanup; ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} ++TEST $CLI volume set $V0 performance.stat-prefetch off ++TEST $CLI volume set $V0 performance.read-ahead off ++TEST $CLI volume set $V0 performance.quick-read off ++TEST $CLI volume set $V0 performance.read-after-open off ++TEST $CLI volume set $V0 performance.open-behind off ++TEST $CLI volume set $V0 performance.write-behind off ++TEST $CLI volume set $V0 performance.io-cache off ++ ++TEST $CLI volume start $V0 ++ ++TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0; ++ ++touch $M0/FILE ++ ++atime=$(stat -c "%.X" $M0/FILE) ++EXPECT $atime stat -c "%.Y" $M0/FILE ++EXPECT $atime stat -c "%.Z" $M0/FILE ++ ++cleanup +diff --git a/xlators/features/utime/src/utime-gen-fops-c.py b/xlators/features/utime/src/utime-gen-fops-c.py +index 8730a51..9fb3e1b 100755 +--- a/xlators/features/utime/src/utime-gen-fops-c.py ++++ b/xlators/features/utime/src/utime-gen-fops-c.py +@@ -95,6 +95,16 @@ gf_utime_@NAME@ (call_frame_t *frame, xlator_t *this, + frame->root->flags |= MDATA_CTIME; + } + ++ if (valid & (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME)) { ++ if (valid & GF_ATTR_ATIME_NOW) { ++ frame->root->ctime.tv_sec = stbuf->ia_atime; ++ frame->root->ctime.tv_nsec = stbuf->ia_atime_nsec; ++ } else if (valid & GF_ATTR_MTIME_NOW) { ++ frame->root->ctime.tv_sec = stbuf->ia_mtime; ++ frame->root->ctime.tv_nsec = stbuf->ia_mtime_nsec; ++ } ++ } ++ + STACK_WIND (frame, gf_utime_@NAME@_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->@NAME@, @SHORT_ARGS@); + return 0; +diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c +index 6e99053..fdeec49 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.c ++++ b/xlators/mount/fuse/src/fuse-bridge.c +@@ -1706,6 +1706,14 @@ fattr_to_gf_set_attr(int32_t valid) + gf_valid |= GF_SET_ATTR_CTIME; + #endif + ++#if FUSE_KERNEL_MINOR_VERSION >= 9 ++ if (valid & FATTR_ATIME_NOW) ++ gf_valid |= GF_ATTR_ATIME_NOW; ++ ++ if (valid & FATTR_MTIME_NOW) ++ gf_valid |= GF_ATTR_MTIME_NOW; ++#endif ++ + if (valid & FATTR_SIZE) + gf_valid |= GF_SET_ATTR_SIZE; + +-- +1.8.3.1 + diff --git a/SOURCES/0424-afr-make-heal-info-lockless.patch b/SOURCES/0424-afr-make-heal-info-lockless.patch new file mode 100644 index 0000000..593fa34 --- /dev/null +++ b/SOURCES/0424-afr-make-heal-info-lockless.patch @@ -0,0 +1,884 @@ +From 54d4ea44fec96560aad9c41f7e4f5aad164ffb8b Mon Sep 17 00:00:00 2001 +From: Ravishankar N <ravishankar@redhat.com> +Date: Fri, 5 Jun 2020 14:14:15 +0530 +Subject: [PATCH 424/449] afr: make heal info lockless + +Changes in locks xlator: +Added support for per-domain inodelk count requests. +Caller needs to set GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS key in the +dict and then set each key with name +'GLUSTERFS_INODELK_DOM_PREFIX:<domain name>'. +In the response dict, the xlator will send the per domain count as +values for each of these keys. + +Changes in AFR: +Replaced afr_selfheal_locked_inspect() with afr_lockless_inspect(). Logic has +been added to make the latter behave same as the former, thus not +breaking the current heal info output behaviour. + +> Upstream patch: https://review.gluster.org/#/c/glusterfs/+/23771/ +> fixes: bz#1774011 +> Change-Id: Ie9e83c162aa77f44a39c2ba7115de558120ada4d + +BUG: 1721355 +Change-Id: I8ed4b504880b19e00068312efd90cd0706787404 +Signed-off-by: Ravishankar N <ravishankar@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202490 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Karthik Subrahmanya <ksubrahm@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + heal/src/glfs-heal.c | 17 +- + libglusterfs/src/glusterfs/glusterfs.h | 2 + + xlators/cluster/afr/src/afr-common.c | 367 +++++++++++-------------- + xlators/cluster/afr/src/afr-self-heal-common.c | 43 ++- + xlators/cluster/afr/src/afr-self-heal.h | 3 +- + xlators/features/locks/src/common.h | 4 + + xlators/features/locks/src/locks.h | 8 + + xlators/features/locks/src/posix.c | 117 +++++++- + 8 files changed, 338 insertions(+), 223 deletions(-) + +diff --git a/heal/src/glfs-heal.c b/heal/src/glfs-heal.c +index 125b12c..5af9e31 100644 +--- a/heal/src/glfs-heal.c ++++ b/heal/src/glfs-heal.c +@@ -775,7 +775,8 @@ static int + glfsh_process_entries(xlator_t *xl, fd_t *fd, gf_dirent_t *entries, + uint64_t *offset, num_entries_t *num_entries, + print_status glfsh_print_status, +- gf_boolean_t ignore_dirty, glfsh_fail_mode_t mode) ++ gf_boolean_t ignore_dirty, glfsh_fail_mode_t mode, ++ dict_t *xattr_req) + { + gf_dirent_t *entry = NULL; + gf_dirent_t *tmp = NULL; +@@ -807,7 +808,7 @@ glfsh_process_entries(xlator_t *xl, fd_t *fd, gf_dirent_t *entries, + + gf_uuid_parse(entry->d_name, gfid); + gf_uuid_copy(loc.gfid, gfid); +- ret = syncop_getxattr(this, &loc, &dict, GF_HEAL_INFO, NULL, NULL); ++ ret = syncop_getxattr(this, &loc, &dict, GF_HEAL_INFO, xattr_req, NULL); + if (ret) { + if ((mode != GLFSH_MODE_CONTINUE_ON_ERROR) && (ret == -ENOTCONN)) + goto out; +@@ -876,19 +877,19 @@ glfsh_crawl_directory(glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc, + if (heal_op == GF_SHD_OP_INDEX_SUMMARY) { + ret = glfsh_process_entries(readdir_xl, fd, &entries, &offset, + num_entries, glfsh_print_heal_status, +- ignore, mode); ++ ignore, mode, xattr_req); + if (ret < 0) + goto out; + } else if (heal_op == GF_SHD_OP_SPLIT_BRAIN_FILES) { + ret = glfsh_process_entries(readdir_xl, fd, &entries, &offset, + num_entries, glfsh_print_spb_status, +- ignore, mode); ++ ignore, mode, xattr_req); + if (ret < 0) + goto out; + } else if (heal_op == GF_SHD_OP_HEAL_SUMMARY) { + ret = glfsh_process_entries(readdir_xl, fd, &entries, &offset, + num_entries, glfsh_print_summary_status, +- ignore, mode); ++ ignore, mode, xattr_req); + if (ret < 0) + goto out; + } else if (heal_op == GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK) { +@@ -897,7 +898,7 @@ glfsh_crawl_directory(glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc, + } else if (heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE) { + ret = glfsh_process_entries(readdir_xl, fd, &entries, &offset, + num_entries, glfsh_heal_status_boolean, +- ignore, mode); ++ ignore, mode, xattr_req); + if (ret < 0) + goto out; + } +@@ -951,6 +952,10 @@ glfsh_print_pending_heals_type(glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc, + int32_t op_errno = 0; + gf_boolean_t ignore = _gf_false; + ++ ret = dict_set_str(xattr_req, "index-vgfid", vgfid); ++ if (ret) ++ return ret; ++ + if (!strcmp(vgfid, GF_XATTROP_DIRTY_GFID)) + ignore = _gf_true; + +diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h +index 3b594c0..177a020 100644 +--- a/libglusterfs/src/glusterfs/glusterfs.h ++++ b/libglusterfs/src/glusterfs/glusterfs.h +@@ -217,6 +217,8 @@ enum gf_internal_fop_indicator { + #define GLUSTERFS_POSIXLK_COUNT "glusterfs.posixlk-count" + #define GLUSTERFS_PARENT_ENTRYLK "glusterfs.parent-entrylk" + #define GLUSTERFS_INODELK_DOM_COUNT "glusterfs.inodelk-dom-count" ++#define GLUSTERFS_INODELK_DOM_PREFIX "glusterfs.inodelk-dom-prefix" ++#define GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS "glusterfs.multi-dom-lk-cnt-req" + #define GFID_TO_PATH_KEY "glusterfs.gfid2path" + #define GF_XATTR_STIME_PATTERN "trusted.glusterfs.*.stime" + #define GF_XATTR_XTIME_PATTERN "trusted.glusterfs.*.xtime" +diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c +index 59710aa..c355ec5 100644 +--- a/xlators/cluster/afr/src/afr-common.c ++++ b/xlators/cluster/afr/src/afr-common.c +@@ -5908,259 +5908,218 @@ out: + return _gf_true; + } + +-int +-afr_selfheal_locked_metadata_inspect(call_frame_t *frame, xlator_t *this, +- inode_t *inode, gf_boolean_t *msh, +- unsigned char *pending) ++static dict_t * ++afr_set_heal_info(char *status) + { ++ dict_t *dict = NULL; + int ret = -1; +- unsigned char *locked_on = NULL; +- unsigned char *sources = NULL; +- unsigned char *sinks = NULL; +- unsigned char *healed_sinks = NULL; +- unsigned char *undid_pending = NULL; +- struct afr_reply *locked_replies = NULL; +- +- afr_private_t *priv = this->private; + +- locked_on = alloca0(priv->child_count); +- sources = alloca0(priv->child_count); +- sinks = alloca0(priv->child_count); +- healed_sinks = alloca0(priv->child_count); +- undid_pending = alloca0(priv->child_count); ++ dict = dict_new(); ++ if (!dict) { ++ ret = -ENOMEM; ++ goto out; ++ } + +- locked_replies = alloca0(sizeof(*locked_replies) * priv->child_count); ++ ret = dict_set_dynstr_sizen(dict, "heal-info", status); ++ if (ret) ++ gf_msg("", GF_LOG_WARNING, -ret, AFR_MSG_DICT_SET_FAILED, ++ "Failed to set heal-info key to " ++ "%s", ++ status); ++out: ++ /* Any error other than EINVAL, dict_set_dynstr frees status */ ++ if (ret == -ENOMEM || ret == -EINVAL) { ++ GF_FREE(status); ++ } + +- ret = afr_selfheal_inodelk(frame, this, inode, this->name, LLONG_MAX - 1, 0, +- locked_on); +- { +- if (ret == 0) { +- /* Not a single lock */ +- ret = -afr_final_errno(frame->local, priv); +- if (ret == 0) +- ret = -ENOTCONN; /* all invalid responses */ +- goto out; +- } +- ret = __afr_selfheal_metadata_prepare( +- frame, this, inode, locked_on, sources, sinks, healed_sinks, +- undid_pending, locked_replies, pending); +- *msh = afr_decide_heal_info(priv, sources, ret); ++ if (ret && dict) { ++ dict_unref(dict); ++ dict = NULL; + } +- afr_selfheal_uninodelk(frame, this, inode, this->name, LLONG_MAX - 1, 0, +- locked_on); +-out: +- if (locked_replies) +- afr_replies_wipe(locked_replies, priv->child_count); +- return ret; ++ return dict; + } + +-int +-afr_selfheal_locked_data_inspect(call_frame_t *frame, xlator_t *this, fd_t *fd, +- gf_boolean_t *dsh, unsigned char *pflag) ++static gf_boolean_t ++afr_is_dirty_count_non_unary_for_txn(xlator_t *this, struct afr_reply *replies, ++ afr_transaction_type type) + { +- int ret = -1; +- unsigned char *data_lock = NULL; +- unsigned char *sources = NULL; +- unsigned char *sinks = NULL; +- unsigned char *healed_sinks = NULL; +- unsigned char *undid_pending = NULL; +- afr_private_t *priv = NULL; +- struct afr_reply *locked_replies = NULL; +- inode_t *inode = fd->inode; ++ afr_private_t *priv = this->private; ++ int *dirty = alloca0(priv->child_count * sizeof(int)); ++ int i = 0; + +- priv = this->private; +- data_lock = alloca0(priv->child_count); +- sources = alloca0(priv->child_count); +- sinks = alloca0(priv->child_count); +- healed_sinks = alloca0(priv->child_count); +- undid_pending = alloca0(priv->child_count); ++ afr_selfheal_extract_xattr(this, replies, type, dirty, NULL); ++ for (i = 0; i < priv->child_count; i++) { ++ if (dirty[i] > 1) ++ return _gf_true; ++ } + +- locked_replies = alloca0(sizeof(*locked_replies) * priv->child_count); ++ return _gf_false; ++} + +- ret = afr_selfheal_inodelk(frame, this, inode, this->name, 0, 0, data_lock); +- { +- if (ret == 0) { +- ret = -afr_final_errno(frame->local, priv); +- if (ret == 0) +- ret = -ENOTCONN; /* all invalid responses */ +- goto out; +- } +- ret = __afr_selfheal_data_prepare(frame, this, inode, data_lock, +- sources, sinks, healed_sinks, +- undid_pending, locked_replies, pflag); +- *dsh = afr_decide_heal_info(priv, sources, ret); ++static gf_boolean_t ++afr_is_dirty_count_non_unary(xlator_t *this, struct afr_reply *replies, ++ ia_type_t ia_type) ++{ ++ gf_boolean_t data_chk = _gf_false; ++ gf_boolean_t mdata_chk = _gf_false; ++ gf_boolean_t entry_chk = _gf_false; ++ ++ switch (ia_type) { ++ case IA_IFDIR: ++ mdata_chk = _gf_true; ++ entry_chk = _gf_true; ++ break; ++ case IA_IFREG: ++ mdata_chk = _gf_true; ++ data_chk = _gf_true; ++ break; ++ default: ++ /*IA_IFBLK, IA_IFCHR, IA_IFLNK, IA_IFIFO, IA_IFSOCK*/ ++ mdata_chk = _gf_true; ++ break; + } +- afr_selfheal_uninodelk(frame, this, inode, this->name, 0, 0, data_lock); +-out: +- if (locked_replies) +- afr_replies_wipe(locked_replies, priv->child_count); +- return ret; ++ ++ if (data_chk && afr_is_dirty_count_non_unary_for_txn( ++ this, replies, AFR_DATA_TRANSACTION)) { ++ return _gf_true; ++ } else if (mdata_chk && afr_is_dirty_count_non_unary_for_txn( ++ this, replies, AFR_METADATA_TRANSACTION)) { ++ return _gf_true; ++ } else if (entry_chk && afr_is_dirty_count_non_unary_for_txn( ++ this, replies, AFR_ENTRY_TRANSACTION)) { ++ return _gf_true; ++ } ++ ++ return _gf_false; + } + +-int +-afr_selfheal_locked_entry_inspect(call_frame_t *frame, xlator_t *this, +- inode_t *inode, gf_boolean_t *esh, +- unsigned char *pflag) ++static int ++afr_update_heal_status(xlator_t *this, struct afr_reply *replies, ++ char *index_vgfid, ia_type_t ia_type, gf_boolean_t *esh, ++ gf_boolean_t *dsh, gf_boolean_t *msh) + { + int ret = -1; +- int source = -1; ++ GF_UNUSED int ret1 = 0; ++ int i = 0; ++ int io_domain_lk_count = 0; ++ int shd_domain_lk_count = 0; + afr_private_t *priv = NULL; +- unsigned char *locked_on = NULL; +- unsigned char *data_lock = NULL; +- unsigned char *sources = NULL; +- unsigned char *sinks = NULL; +- unsigned char *healed_sinks = NULL; +- struct afr_reply *locked_replies = NULL; +- gf_boolean_t granular_locks = _gf_false; ++ char *key1 = NULL; ++ char *key2 = NULL; + + priv = this->private; +- granular_locks = priv->granular_locks; /*Assign to local variable so that +- reconfigure doesn't change this +- value between locking and unlocking +- below*/ +- locked_on = alloca0(priv->child_count); +- data_lock = alloca0(priv->child_count); +- sources = alloca0(priv->child_count); +- sinks = alloca0(priv->child_count); +- healed_sinks = alloca0(priv->child_count); +- +- locked_replies = alloca0(sizeof(*locked_replies) * priv->child_count); ++ key1 = alloca0(strlen(GLUSTERFS_INODELK_DOM_PREFIX) + 2 + ++ strlen(this->name)); ++ key2 = alloca0(strlen(GLUSTERFS_INODELK_DOM_PREFIX) + 2 + ++ strlen(priv->sh_domain)); ++ sprintf(key1, "%s:%s", GLUSTERFS_INODELK_DOM_PREFIX, this->name); ++ sprintf(key2, "%s:%s", GLUSTERFS_INODELK_DOM_PREFIX, priv->sh_domain); + +- if (!granular_locks) { +- ret = afr_selfheal_tryentrylk(frame, this, inode, priv->sh_domain, NULL, +- locked_on); +- } +- { +- if (!granular_locks && ret == 0) { +- ret = -afr_final_errno(frame->local, priv); +- if (ret == 0) +- ret = -ENOTCONN; /* all invalid responses */ +- goto out; ++ for (i = 0; i < priv->child_count; i++) { ++ if ((replies[i].valid != 1) || (replies[i].op_ret != 0)) ++ continue; ++ if (!io_domain_lk_count) { ++ ret1 = dict_get_int32(replies[i].xdata, key1, &io_domain_lk_count); + } ++ if (!shd_domain_lk_count) { ++ ret1 = dict_get_int32(replies[i].xdata, key2, &shd_domain_lk_count); ++ } ++ } + +- ret = afr_selfheal_entrylk(frame, this, inode, this->name, NULL, +- data_lock); +- { +- if (ret == 0) { +- ret = -afr_final_errno(frame->local, priv); +- if (ret == 0) +- ret = -ENOTCONN; +- /* all invalid responses */ +- goto unlock; +- } +- ret = __afr_selfheal_entry_prepare(frame, this, inode, data_lock, +- sources, sinks, healed_sinks, +- locked_replies, &source, pflag); +- if ((ret == 0) && (*pflag & PFLAG_SBRAIN)) +- ret = -EIO; +- *esh = afr_decide_heal_info(priv, sources, ret); ++ if (!strcmp(index_vgfid, GF_XATTROP_INDEX_GFID)) { ++ if (shd_domain_lk_count) { ++ ret = -EAGAIN; /*For 'possibly-healing'. */ ++ } else { ++ ret = 0; /*needs heal. Just set a non -ve value so that it is ++ assumed as the source index.*/ ++ } ++ } else if (!strcmp(index_vgfid, GF_XATTROP_DIRTY_GFID)) { ++ if ((afr_is_dirty_count_non_unary(this, replies, ia_type)) || ++ (!io_domain_lk_count)) { ++ /* Needs heal. */ ++ ret = 0; ++ } else { ++ /* No heal needed. */ ++ *dsh = *esh = *msh = 0; + } +- afr_selfheal_unentrylk(frame, this, inode, this->name, NULL, data_lock, +- NULL); + } +-unlock: +- if (!granular_locks) +- afr_selfheal_unentrylk(frame, this, inode, priv->sh_domain, NULL, +- locked_on, NULL); +-out: +- if (locked_replies) +- afr_replies_wipe(locked_replies, priv->child_count); + return ret; + } + ++/*return EIO, EAGAIN or pending*/ + int +-afr_selfheal_locked_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid, +- inode_t **inode, gf_boolean_t *entry_selfheal, +- gf_boolean_t *data_selfheal, +- gf_boolean_t *metadata_selfheal, +- unsigned char *pending) +- ++afr_lockless_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid, ++ inode_t **inode, char *index_vgfid, ++ gf_boolean_t *entry_selfheal, gf_boolean_t *data_selfheal, ++ gf_boolean_t *metadata_selfheal, unsigned char *pending) + { + int ret = -1; +- fd_t *fd = NULL; ++ int i = 0; ++ afr_private_t *priv = NULL; ++ struct afr_reply *replies = NULL; + gf_boolean_t dsh = _gf_false; + gf_boolean_t msh = _gf_false; + gf_boolean_t esh = _gf_false; ++ unsigned char *sources = NULL; ++ unsigned char *sinks = NULL; ++ unsigned char *valid_on = NULL; ++ uint64_t *witness = NULL; ++ ++ priv = this->private; ++ replies = alloca0(sizeof(*replies) * priv->child_count); ++ sources = alloca0(sizeof(*sources) * priv->child_count); ++ sinks = alloca0(sizeof(*sinks) * priv->child_count); ++ witness = alloca0(sizeof(*witness) * priv->child_count); ++ valid_on = alloca0(sizeof(*valid_on) * priv->child_count); + + ret = afr_selfheal_unlocked_inspect(frame, this, gfid, inode, &dsh, &msh, +- &esh); ++ &esh, replies); + if (ret) + goto out; +- +- /* For every heal type hold locks and check if it indeed needs heal */ +- +- /* Heal-info does an open() on the file being examined so that the +- * current eager-lock holding client, if present, at some point sees +- * open-fd count being > 1 and releases the eager-lock so that heal-info +- * doesn't remain blocked forever until IO completes. +- */ +- if ((*inode)->ia_type == IA_IFREG) { +- ret = afr_selfheal_data_open(this, *inode, &fd); +- if (ret < 0) { +- gf_msg_debug(this->name, -ret, "%s: Failed to open", +- uuid_utoa((*inode)->gfid)); +- goto out; ++ for (i = 0; i < priv->child_count; i++) { ++ if (replies[i].valid && replies[i].op_ret == 0) { ++ valid_on[i] = 1; + } + } +- + if (msh) { +- ret = afr_selfheal_locked_metadata_inspect(frame, this, *inode, &msh, +- pending); +- if (ret == -EIO) ++ ret = afr_selfheal_find_direction(frame, this, replies, ++ AFR_METADATA_TRANSACTION, valid_on, ++ sources, sinks, witness, pending); ++ if (*pending & PFLAG_SBRAIN) ++ ret = -EIO; ++ if (ret) + goto out; + } +- + if (dsh) { +- ret = afr_selfheal_locked_data_inspect(frame, this, fd, &dsh, pending); +- if (ret == -EIO || (ret == -EAGAIN)) ++ ret = afr_selfheal_find_direction(frame, this, replies, ++ AFR_DATA_TRANSACTION, valid_on, ++ sources, sinks, witness, pending); ++ if (*pending & PFLAG_SBRAIN) ++ ret = -EIO; ++ if (ret) + goto out; + } +- + if (esh) { +- ret = afr_selfheal_locked_entry_inspect(frame, this, *inode, &esh, +- pending); ++ ret = afr_selfheal_find_direction(frame, this, replies, ++ AFR_ENTRY_TRANSACTION, valid_on, ++ sources, sinks, witness, pending); ++ if (*pending & PFLAG_SBRAIN) ++ ret = -EIO; ++ if (ret) ++ goto out; + } + ++ ret = afr_update_heal_status(this, replies, index_vgfid, (*inode)->ia_type, ++ &esh, &dsh, &msh); + out: + *data_selfheal = dsh; + *entry_selfheal = esh; + *metadata_selfheal = msh; +- if (fd) +- fd_unref(fd); ++ if (replies) ++ afr_replies_wipe(replies, priv->child_count); + return ret; + } + +-static dict_t * +-afr_set_heal_info(char *status) +-{ +- dict_t *dict = NULL; +- int ret = -1; +- +- dict = dict_new(); +- if (!dict) { +- ret = -ENOMEM; +- goto out; +- } +- +- ret = dict_set_dynstr_sizen(dict, "heal-info", status); +- if (ret) +- gf_msg("", GF_LOG_WARNING, -ret, AFR_MSG_DICT_SET_FAILED, +- "Failed to set heal-info key to " +- "%s", +- status); +-out: +- /* Any error other than EINVAL, dict_set_dynstr frees status */ +- if (ret == -ENOMEM || ret == -EINVAL) { +- GF_FREE(status); +- } +- +- if (ret && dict) { +- dict_unref(dict); +- dict = NULL; +- } +- return dict; +-} +- + int + afr_get_heal_info(call_frame_t *frame, xlator_t *this, loc_t *loc) + { +@@ -6174,10 +6133,18 @@ afr_get_heal_info(call_frame_t *frame, xlator_t *this, loc_t *loc) + inode_t *inode = NULL; + char *substr = NULL; + char *status = NULL; ++ afr_local_t *local = NULL; ++ char *index_vgfid = NULL; ++ ++ local = frame->local; ++ if (dict_get_str(local->xdata_req, "index-vgfid", &index_vgfid)) { ++ ret = -1; ++ goto out; ++ } + +- ret = afr_selfheal_locked_inspect(frame, this, loc->gfid, &inode, +- &entry_selfheal, &data_selfheal, +- &metadata_selfheal, &pending); ++ ret = afr_lockless_inspect(frame, this, loc->gfid, &inode, index_vgfid, ++ &entry_selfheal, &data_selfheal, ++ &metadata_selfheal, &pending); + + if (ret == -ENOMEM) { + ret = -1; +diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c +index d942ccf..1608f75 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-common.c ++++ b/xlators/cluster/afr/src/afr-self-heal-common.c +@@ -1827,6 +1827,37 @@ afr_selfheal_unlocked_lookup_on(call_frame_t *frame, inode_t *parent, + return inode; + } + ++static int ++afr_set_multi_dom_lock_count_request(xlator_t *this, dict_t *dict) ++{ ++ int ret = 0; ++ afr_private_t *priv = NULL; ++ char *key1 = NULL; ++ char *key2 = NULL; ++ ++ priv = this->private; ++ key1 = alloca0(strlen(GLUSTERFS_INODELK_DOM_PREFIX) + 2 + ++ strlen(this->name)); ++ key2 = alloca0(strlen(GLUSTERFS_INODELK_DOM_PREFIX) + 2 + ++ strlen(priv->sh_domain)); ++ ++ ret = dict_set_uint32(dict, GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS, 1); ++ if (ret) ++ return ret; ++ ++ sprintf(key1, "%s:%s", GLUSTERFS_INODELK_DOM_PREFIX, this->name); ++ ret = dict_set_uint32(dict, key1, 1); ++ if (ret) ++ return ret; ++ ++ sprintf(key2, "%s:%s", GLUSTERFS_INODELK_DOM_PREFIX, priv->sh_domain); ++ ret = dict_set_uint32(dict, key2, 1); ++ if (ret) ++ return ret; ++ ++ return 0; ++} ++ + int + afr_selfheal_unlocked_discover_on(call_frame_t *frame, inode_t *inode, + uuid_t gfid, struct afr_reply *replies, +@@ -1851,6 +1882,11 @@ afr_selfheal_unlocked_discover_on(call_frame_t *frame, inode_t *inode, + return -ENOMEM; + } + ++ if (afr_set_multi_dom_lock_count_request(frame->this, xattr_req)) { ++ dict_unref(xattr_req); ++ return -1; ++ } ++ + loc.inode = inode_ref(inode); + gf_uuid_copy(loc.gfid, gfid); + +@@ -2241,7 +2277,8 @@ int + afr_selfheal_unlocked_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid, + inode_t **link_inode, gf_boolean_t *data_selfheal, + gf_boolean_t *metadata_selfheal, +- gf_boolean_t *entry_selfheal) ++ gf_boolean_t *entry_selfheal, ++ struct afr_reply *replies_dst) + { + afr_private_t *priv = NULL; + inode_t *inode = NULL; +@@ -2377,6 +2414,8 @@ afr_selfheal_unlocked_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid, + + ret = 0; + out: ++ if (replies && replies_dst) ++ afr_replies_copy(replies_dst, replies, priv->child_count); + if (inode) + inode_unref(inode); + if (replies) +@@ -2493,7 +2532,7 @@ afr_selfheal_do(call_frame_t *frame, xlator_t *this, uuid_t gfid) + + ret = afr_selfheal_unlocked_inspect(frame, this, gfid, &inode, + &data_selfheal, &metadata_selfheal, +- &entry_selfheal); ++ &entry_selfheal, NULL); + if (ret) + goto out; + +diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h +index f7ecf5d..b39af02 100644 +--- a/xlators/cluster/afr/src/afr-self-heal.h ++++ b/xlators/cluster/afr/src/afr-self-heal.h +@@ -327,7 +327,8 @@ int + afr_selfheal_unlocked_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid, + inode_t **link_inode, gf_boolean_t *data_selfheal, + gf_boolean_t *metadata_selfheal, +- gf_boolean_t *entry_selfheal); ++ gf_boolean_t *entry_selfheal, ++ struct afr_reply *replies); + + int + afr_selfheal_do(call_frame_t *frame, xlator_t *this, uuid_t gfid); +diff --git a/xlators/features/locks/src/common.h b/xlators/features/locks/src/common.h +index 3a74967..ea86b96 100644 +--- a/xlators/features/locks/src/common.h ++++ b/xlators/features/locks/src/common.h +@@ -45,6 +45,10 @@ + fd_unref(__local->fd); \ + if (__local->inode) \ + inode_unref(__local->inode); \ ++ if (__local->xdata) { \ ++ dict_unref(__local->xdata); \ ++ __local->xdata = NULL; \ ++ } \ + mem_put(__local); \ + } \ + } while (0) +diff --git a/xlators/features/locks/src/locks.h b/xlators/features/locks/src/locks.h +index b817960..aa267de 100644 +--- a/xlators/features/locks/src/locks.h ++++ b/xlators/features/locks/src/locks.h +@@ -239,6 +239,7 @@ typedef struct { + gf_boolean_t inodelk_count_req; + gf_boolean_t posixlk_count_req; + gf_boolean_t parent_entrylk_req; ++ gf_boolean_t multiple_dom_lk_requests; + int update_mlock_enforced_flag; + } pl_local_t; + +@@ -260,6 +261,13 @@ typedef struct _locks_ctx { + struct list_head metalk_list; + } pl_ctx_t; + ++typedef struct _multi_dom_lk_data { ++ xlator_t *this; ++ inode_t *inode; ++ dict_t *xdata_rsp; ++ gf_boolean_t keep_max; ++} multi_dom_lk_data; ++ + typedef enum { DECREMENT, INCREMENT } pl_count_op_t; + + pl_ctx_t * +diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c +index 4592240..9a14c64 100644 +--- a/xlators/features/locks/src/posix.c ++++ b/xlators/features/locks/src/posix.c +@@ -150,13 +150,20 @@ fetch_pathinfo(xlator_t *, inode_t *, int32_t *, char **); + gf_boolean_t + pl_has_xdata_requests(dict_t *xdata) + { +- static char *reqs[] = {GLUSTERFS_ENTRYLK_COUNT, GLUSTERFS_INODELK_COUNT, +- GLUSTERFS_INODELK_DOM_COUNT, GLUSTERFS_POSIXLK_COUNT, +- GLUSTERFS_PARENT_ENTRYLK, NULL}; +- static int reqs_size[] = { +- SLEN(GLUSTERFS_ENTRYLK_COUNT), SLEN(GLUSTERFS_INODELK_COUNT), +- SLEN(GLUSTERFS_INODELK_DOM_COUNT), SLEN(GLUSTERFS_POSIXLK_COUNT), +- SLEN(GLUSTERFS_PARENT_ENTRYLK), 0}; ++ static char *reqs[] = {GLUSTERFS_ENTRYLK_COUNT, ++ GLUSTERFS_INODELK_COUNT, ++ GLUSTERFS_INODELK_DOM_COUNT, ++ GLUSTERFS_POSIXLK_COUNT, ++ GLUSTERFS_PARENT_ENTRYLK, ++ GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS, ++ NULL}; ++ static int reqs_size[] = {SLEN(GLUSTERFS_ENTRYLK_COUNT), ++ SLEN(GLUSTERFS_INODELK_COUNT), ++ SLEN(GLUSTERFS_INODELK_DOM_COUNT), ++ SLEN(GLUSTERFS_POSIXLK_COUNT), ++ SLEN(GLUSTERFS_PARENT_ENTRYLK), ++ SLEN(GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS), ++ 0}; + int i = 0; + + if (!xdata) +@@ -169,12 +176,22 @@ pl_has_xdata_requests(dict_t *xdata) + return _gf_false; + } + ++static int ++dict_delete_domain_key(dict_t *dict, char *key, data_t *value, void *data) ++{ ++ dict_del(dict, key); ++ return 0; ++} ++ + void + pl_get_xdata_requests(pl_local_t *local, dict_t *xdata) + { + if (!local || !xdata) + return; + ++ GF_ASSERT(local->xdata == NULL); ++ local->xdata = dict_copy_with_ref(xdata, NULL); ++ + if (dict_get_sizen(xdata, GLUSTERFS_ENTRYLK_COUNT)) { + local->entrylk_count_req = 1; + dict_del_sizen(xdata, GLUSTERFS_ENTRYLK_COUNT); +@@ -183,6 +200,12 @@ pl_get_xdata_requests(pl_local_t *local, dict_t *xdata) + local->inodelk_count_req = 1; + dict_del_sizen(xdata, GLUSTERFS_INODELK_COUNT); + } ++ if (dict_get_sizen(xdata, GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS)) { ++ local->multiple_dom_lk_requests = 1; ++ dict_del_sizen(xdata, GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS); ++ dict_foreach_fnmatch(xdata, GLUSTERFS_INODELK_DOM_PREFIX "*", ++ dict_delete_domain_key, NULL); ++ } + + local->inodelk_dom_count_req = dict_get_sizen(xdata, + GLUSTERFS_INODELK_DOM_COUNT); +@@ -210,7 +233,7 @@ pl_needs_xdata_response(pl_local_t *local) + + if (local->parent_entrylk_req || local->entrylk_count_req || + local->inodelk_dom_count_req || local->inodelk_count_req || +- local->posixlk_count_req) ++ local->posixlk_count_req || local->multiple_dom_lk_requests) + return _gf_true; + + return _gf_false; +@@ -411,6 +434,75 @@ pl_posixlk_xattr_fill(xlator_t *this, inode_t *inode, dict_t *dict, + } + + void ++pl_inodelk_xattr_fill_each(xlator_t *this, inode_t *inode, dict_t *dict, ++ char *domname, gf_boolean_t keep_max, char *key) ++{ ++ int32_t count = 0; ++ int32_t maxcount = -1; ++ int ret = -1; ++ ++ if (keep_max) { ++ ret = dict_get_int32(dict, key, &maxcount); ++ if (ret < 0) ++ gf_msg_debug(this->name, 0, " Failed to fetch the value for key %s", ++ GLUSTERFS_INODELK_COUNT); ++ } ++ count = get_inodelk_count(this, inode, domname); ++ if (maxcount >= count) ++ return; ++ ++ ret = dict_set_int32(dict, key, count); ++ if (ret < 0) { ++ gf_msg_debug(this->name, 0, ++ "Failed to set count for " ++ "key %s", ++ key); ++ } ++ ++ return; ++} ++ ++static int ++pl_inodelk_xattr_fill_multiple(dict_t *this, char *key, data_t *value, ++ void *data) ++{ ++ multi_dom_lk_data *d = data; ++ char *tmp_key = NULL; ++ char *save_ptr = NULL; ++ ++ tmp_key = gf_strdup(key); ++ strtok_r(tmp_key, ":", &save_ptr); ++ if (!*save_ptr) { ++ gf_msg(THIS->name, GF_LOG_ERROR, 0, EINVAL, ++ "Could not tokenize domain string from key %s", key); ++ return -1; ++ } ++ ++ pl_inodelk_xattr_fill_each(d->this, d->inode, d->xdata_rsp, save_ptr, ++ d->keep_max, key); ++ if (tmp_key) ++ GF_FREE(tmp_key); ++ ++ return 0; ++} ++ ++void ++pl_fill_multiple_dom_lk_requests(xlator_t *this, pl_local_t *local, ++ inode_t *inode, dict_t *dict, ++ gf_boolean_t keep_max) ++{ ++ multi_dom_lk_data data; ++ ++ data.this = this; ++ data.inode = inode; ++ data.xdata_rsp = dict; ++ data.keep_max = keep_max; ++ ++ dict_foreach_fnmatch(local->xdata, GLUSTERFS_INODELK_DOM_PREFIX "*", ++ pl_inodelk_xattr_fill_multiple, &data); ++} ++ ++void + pl_set_xdata_response(xlator_t *this, pl_local_t *local, inode_t *parent, + inode_t *inode, char *name, dict_t *xdata, + gf_boolean_t max_lock) +@@ -437,6 +529,9 @@ pl_set_xdata_response(xlator_t *this, pl_local_t *local, inode_t *parent, + + if (local->posixlk_count_req) + pl_posixlk_xattr_fill(this, inode, xdata, max_lock); ++ ++ if (local->multiple_dom_lk_requests) ++ pl_fill_multiple_dom_lk_requests(this, local, inode, xdata, max_lock); + } + + /* Checks whether the region where fop is acting upon conflicts +@@ -773,9 +868,6 @@ pl_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + { + pl_local_t *local = frame->local; + +- if (local->xdata) +- dict_unref(local->xdata); +- + pl_track_io_fop_count(local, this, DECREMENT); + + if (local->op == GF_FOP_TRUNCATE) +@@ -932,9 +1024,6 @@ unwind: + "ret: %d, error: %s", + op_ret, strerror(op_errno)); + +- if (local->xdata) +- dict_unref(local->xdata); +- + switch (local->op) { + case GF_FOP_TRUNCATE: + PL_STACK_UNWIND(truncate, xdata, frame, op_ret, op_errno, buf, +-- +1.8.3.1 + diff --git a/SOURCES/0425-tests-Fix-spurious-self-heald.t-failure.patch b/SOURCES/0425-tests-Fix-spurious-self-heald.t-failure.patch new file mode 100644 index 0000000..7bfc04a --- /dev/null +++ b/SOURCES/0425-tests-Fix-spurious-self-heald.t-failure.patch @@ -0,0 +1,187 @@ +From 2c582ea6c76031463501b31d9250e739d5aeda79 Mon Sep 17 00:00:00 2001 +From: Ravishankar N <ravishankar@redhat.com> +Date: Fri, 5 Jun 2020 14:28:11 +0530 +Subject: [PATCH 425/449] tests: Fix spurious self-heald.t failure + +Problem: +heal-info code assumes that all indices in xattrop directory +definitely need heal. There is one corner case. +The very first xattrop on the file will lead to adding the +gfid to 'xattrop' index in fop path and in _cbk path it is +removed because the fop is zero-xattr xattrop in success case. +These gfids could be read by heal-info and shown as needing heal. + +Fix: +Check the pending flag to see if the file definitely needs or +not instead of which index is being crawled at the moment. + +> Upstream patch: https://review.gluster.org/#/c/glusterfs/+/24110/ +> fixes: bz#1801623 +> Change-Id: I79f00dc7366fedbbb25ec4bec838dba3b34c7ad5 +> Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> + +BUG: 1721355 +Change-Id: I7efdf45a5158fadfdbdd21c91837f193d80fa6c7 +Signed-off-by: Ravishankar N <ravishankar@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202491 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com> +--- + heal/src/glfs-heal.c | 17 ++++++---------- + xlators/cluster/afr/src/afr-common.c | 38 ++++++++++++++---------------------- + 2 files changed, 21 insertions(+), 34 deletions(-) + +diff --git a/heal/src/glfs-heal.c b/heal/src/glfs-heal.c +index 5af9e31..125b12c 100644 +--- a/heal/src/glfs-heal.c ++++ b/heal/src/glfs-heal.c +@@ -775,8 +775,7 @@ static int + glfsh_process_entries(xlator_t *xl, fd_t *fd, gf_dirent_t *entries, + uint64_t *offset, num_entries_t *num_entries, + print_status glfsh_print_status, +- gf_boolean_t ignore_dirty, glfsh_fail_mode_t mode, +- dict_t *xattr_req) ++ gf_boolean_t ignore_dirty, glfsh_fail_mode_t mode) + { + gf_dirent_t *entry = NULL; + gf_dirent_t *tmp = NULL; +@@ -808,7 +807,7 @@ glfsh_process_entries(xlator_t *xl, fd_t *fd, gf_dirent_t *entries, + + gf_uuid_parse(entry->d_name, gfid); + gf_uuid_copy(loc.gfid, gfid); +- ret = syncop_getxattr(this, &loc, &dict, GF_HEAL_INFO, xattr_req, NULL); ++ ret = syncop_getxattr(this, &loc, &dict, GF_HEAL_INFO, NULL, NULL); + if (ret) { + if ((mode != GLFSH_MODE_CONTINUE_ON_ERROR) && (ret == -ENOTCONN)) + goto out; +@@ -877,19 +876,19 @@ glfsh_crawl_directory(glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc, + if (heal_op == GF_SHD_OP_INDEX_SUMMARY) { + ret = glfsh_process_entries(readdir_xl, fd, &entries, &offset, + num_entries, glfsh_print_heal_status, +- ignore, mode, xattr_req); ++ ignore, mode); + if (ret < 0) + goto out; + } else if (heal_op == GF_SHD_OP_SPLIT_BRAIN_FILES) { + ret = glfsh_process_entries(readdir_xl, fd, &entries, &offset, + num_entries, glfsh_print_spb_status, +- ignore, mode, xattr_req); ++ ignore, mode); + if (ret < 0) + goto out; + } else if (heal_op == GF_SHD_OP_HEAL_SUMMARY) { + ret = glfsh_process_entries(readdir_xl, fd, &entries, &offset, + num_entries, glfsh_print_summary_status, +- ignore, mode, xattr_req); ++ ignore, mode); + if (ret < 0) + goto out; + } else if (heal_op == GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK) { +@@ -898,7 +897,7 @@ glfsh_crawl_directory(glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc, + } else if (heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE) { + ret = glfsh_process_entries(readdir_xl, fd, &entries, &offset, + num_entries, glfsh_heal_status_boolean, +- ignore, mode, xattr_req); ++ ignore, mode); + if (ret < 0) + goto out; + } +@@ -952,10 +951,6 @@ glfsh_print_pending_heals_type(glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc, + int32_t op_errno = 0; + gf_boolean_t ignore = _gf_false; + +- ret = dict_set_str(xattr_req, "index-vgfid", vgfid); +- if (ret) +- return ret; +- + if (!strcmp(vgfid, GF_XATTROP_DIRTY_GFID)) + ignore = _gf_true; + +diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c +index c355ec5..89e2483 100644 +--- a/xlators/cluster/afr/src/afr-common.c ++++ b/xlators/cluster/afr/src/afr-common.c +@@ -5995,8 +5995,8 @@ afr_is_dirty_count_non_unary(xlator_t *this, struct afr_reply *replies, + + static int + afr_update_heal_status(xlator_t *this, struct afr_reply *replies, +- char *index_vgfid, ia_type_t ia_type, gf_boolean_t *esh, +- gf_boolean_t *dsh, gf_boolean_t *msh) ++ ia_type_t ia_type, gf_boolean_t *esh, gf_boolean_t *dsh, ++ gf_boolean_t *msh, unsigned char pending) + { + int ret = -1; + GF_UNUSED int ret1 = 0; +@@ -6026,14 +6026,7 @@ afr_update_heal_status(xlator_t *this, struct afr_reply *replies, + } + } + +- if (!strcmp(index_vgfid, GF_XATTROP_INDEX_GFID)) { +- if (shd_domain_lk_count) { +- ret = -EAGAIN; /*For 'possibly-healing'. */ +- } else { +- ret = 0; /*needs heal. Just set a non -ve value so that it is +- assumed as the source index.*/ +- } +- } else if (!strcmp(index_vgfid, GF_XATTROP_DIRTY_GFID)) { ++ if (!pending) { + if ((afr_is_dirty_count_non_unary(this, replies, ia_type)) || + (!io_domain_lk_count)) { + /* Needs heal. */ +@@ -6042,6 +6035,13 @@ afr_update_heal_status(xlator_t *this, struct afr_reply *replies, + /* No heal needed. */ + *dsh = *esh = *msh = 0; + } ++ } else { ++ if (shd_domain_lk_count) { ++ ret = -EAGAIN; /*For 'possibly-healing'. */ ++ } else { ++ ret = 0; /*needs heal. Just set a non -ve value so that it is ++ assumed as the source index.*/ ++ } + } + return ret; + } +@@ -6049,8 +6049,8 @@ afr_update_heal_status(xlator_t *this, struct afr_reply *replies, + /*return EIO, EAGAIN or pending*/ + int + afr_lockless_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid, +- inode_t **inode, char *index_vgfid, +- gf_boolean_t *entry_selfheal, gf_boolean_t *data_selfheal, ++ inode_t **inode, gf_boolean_t *entry_selfheal, ++ gf_boolean_t *data_selfheal, + gf_boolean_t *metadata_selfheal, unsigned char *pending) + { + int ret = -1; +@@ -6109,8 +6109,8 @@ afr_lockless_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid, + goto out; + } + +- ret = afr_update_heal_status(this, replies, index_vgfid, (*inode)->ia_type, +- &esh, &dsh, &msh); ++ ret = afr_update_heal_status(this, replies, (*inode)->ia_type, &esh, &dsh, ++ &msh, *pending); + out: + *data_selfheal = dsh; + *entry_selfheal = esh; +@@ -6133,16 +6133,8 @@ afr_get_heal_info(call_frame_t *frame, xlator_t *this, loc_t *loc) + inode_t *inode = NULL; + char *substr = NULL; + char *status = NULL; +- afr_local_t *local = NULL; +- char *index_vgfid = NULL; +- +- local = frame->local; +- if (dict_get_str(local->xdata_req, "index-vgfid", &index_vgfid)) { +- ret = -1; +- goto out; +- } + +- ret = afr_lockless_inspect(frame, this, loc->gfid, &inode, index_vgfid, ++ ret = afr_lockless_inspect(frame, this, loc->gfid, &inode, + &entry_selfheal, &data_selfheal, + &metadata_selfheal, &pending); + +-- +1.8.3.1 + diff --git a/SOURCES/0426-geo-rep-Fix-for-Transport-End-Point-not-connected-is.patch b/SOURCES/0426-geo-rep-Fix-for-Transport-End-Point-not-connected-is.patch new file mode 100644 index 0000000..a96b66e --- /dev/null +++ b/SOURCES/0426-geo-rep-Fix-for-Transport-End-Point-not-connected-is.patch @@ -0,0 +1,216 @@ +From 91936fe5ef854bd9d2f91e643795d0e7791b97ba Mon Sep 17 00:00:00 2001 +From: Harpreet Kaur <hlalwani@redhat.com> +Date: Mon, 7 Jan 2019 16:38:25 +0530 +Subject: [PATCH 426/449] geo-rep: Fix for "Transport End Point not connected" + issue + +problem: Geo-rep gsyncd process mounts the master and slave volume + on master nodes and slave nodes respectively and starts + the sync. But it doesn't wait for the mount to be in ready + state to accept I/O. The gluster mount is considered to be + ready when all the distribute sub-volumes is up. If the all + the distribute subvolumes are not up, it can cause ENOTCONN + error, when lookup on file comes and file is on the subvol + that is down. + +solution: Added a Virtual Xattr "dht.subvol.status" which returns "1" + if all subvols are up and "0" if all subvols are not up. + Geo-rep then uses this virtual xattr after a fresh mount, to + check whether all subvols are up or not and then starts the + I/O. + +>fixes: bz#1664335 +>Change-Id: If3ad01d728b1372da7c08ccbe75a45bdc1ab2a91 +>Signed-off-by: Harpreet Kaur <hlalwani@redhat.com> +>Signed-off-by: Kotresh HR <khiremat@redhat.com> + +backport of https://review.gluster.org/#/c/glusterfs/+/22001/ +BUG: 1640573 +Change-Id: If3ad01d728b1372da7c08ccbe75a45bdc1ab2a91 +Signed-off-by: Shwetha K Acharya <sacharya@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202554 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + geo-replication/syncdaemon/resource.py | 11 ++++++ + geo-replication/syncdaemon/syncdutils.py | 20 +++++++++-- + xlators/cluster/dht/src/dht-common.c | 59 ++++++++++++++++++++++++++++++++ + xlators/cluster/dht/src/dht-common.h | 4 +++ + 4 files changed, 91 insertions(+), 3 deletions(-) + +diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py +index 189d8a1..0c61de9 100644 +--- a/geo-replication/syncdaemon/resource.py ++++ b/geo-replication/syncdaemon/resource.py +@@ -37,6 +37,7 @@ from syncdutils import ChangelogException, ChangelogHistoryNotAvailable + from syncdutils import get_changelog_log_level, get_rsync_version + from syncdutils import CHANGELOG_AGENT_CLIENT_VERSION + from syncdutils import GX_GFID_CANONICAL_LEN ++from syncdutils import gf_mount_ready + from gsyncdstatus import GeorepStatus + from syncdutils import lf, Popen, sup + from syncdutils import Xattr, matching_disk_gfid, get_gfid_from_mnt +@@ -950,6 +951,16 @@ class Mounter(object): + logging.exception('mount cleanup failure:') + rv = 200 + os._exit(rv) ++ ++ #Polling the dht.subvol.status value. ++ RETRIES = 10 ++ while not gf_mount_ready(): ++ if RETRIES < 0: ++ logging.error('Subvols are not up') ++ break ++ RETRIES -= 1 ++ time.sleep(0.2) ++ + logging.debug('auxiliary glusterfs mount prepared') + + +diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py +index b08098e..7560fa1 100644 +--- a/geo-replication/syncdaemon/syncdutils.py ++++ b/geo-replication/syncdaemon/syncdutils.py +@@ -21,8 +21,8 @@ import subprocess + import socket + from subprocess import PIPE + from threading import Lock, Thread as baseThread +-from errno import EACCES, EAGAIN, EPIPE, ENOTCONN, ECONNABORTED +-from errno import EINTR, ENOENT, ESTALE, EBUSY, errorcode ++from errno import EACCES, EAGAIN, EPIPE, ENOTCONN, ENOMEM, ECONNABORTED ++from errno import EINTR, ENOENT, ESTALE, EBUSY, ENODATA, errorcode + from signal import signal, SIGTERM + import select as oselect + from os import waitpid as owaitpid +@@ -55,6 +55,8 @@ from rconf import rconf + + from hashlib import sha256 as sha256 + ++ENOTSUP = getattr(errno, 'ENOTSUP', 'EOPNOTSUPP') ++ + # auxiliary gfid based access prefix + _CL_AUX_GFID_PFX = ".gfid/" + ROOT_GFID = "00000000-0000-0000-0000-000000000001" +@@ -100,6 +102,19 @@ def unescape_space_newline(s): + .replace(NEWLINE_ESCAPE_CHAR, "\n")\ + .replace(PERCENTAGE_ESCAPE_CHAR, "%") + ++# gf_mount_ready() returns 1 if all subvols are up, else 0 ++def gf_mount_ready(): ++ ret = errno_wrap(Xattr.lgetxattr, ++ ['.', 'dht.subvol.status', 16], ++ [ENOENT, ENOTSUP, ENODATA], [ENOMEM]) ++ ++ if isinstance(ret, int): ++ logging.error("failed to get the xattr value") ++ return 1 ++ ret = ret.rstrip('\x00') ++ if ret == "1": ++ return 1 ++ return 0 + + def norm(s): + if s: +@@ -564,7 +579,6 @@ def errno_wrap(call, arg=[], errnos=[], retry_errnos=[]): + def lstat(e): + return errno_wrap(os.lstat, [e], [ENOENT], [ESTALE, EBUSY]) + +- + def get_gfid_from_mnt(gfidpath): + return errno_wrap(Xattr.lgetxattr, + [gfidpath, 'glusterfs.gfid.string', +diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c +index 6aa18f3..23cc80c 100644 +--- a/xlators/cluster/dht/src/dht-common.c ++++ b/xlators/cluster/dht/src/dht-common.c +@@ -4858,6 +4858,60 @@ out: + return 0; + } + ++/* Virtual Xattr which returns 1 if all subvols are up, ++ else returns 0. Geo-rep then uses this virtual xattr ++ after a fresh mount and starts the I/O. ++*/ ++ ++enum dht_vxattr_subvol { ++ DHT_VXATTR_SUBVOLS_UP = 1, ++ DHT_VXATTR_SUBVOLS_DOWN = 0, ++}; ++ ++int ++dht_vgetxattr_subvol_status(call_frame_t *frame, xlator_t *this, ++ const char *key) ++{ ++ dht_local_t *local = NULL; ++ int ret = -1; ++ int op_errno = ENODATA; ++ int value = DHT_VXATTR_SUBVOLS_UP; ++ int i = 0; ++ dht_conf_t *conf = NULL; ++ ++ conf = this->private; ++ local = frame->local; ++ ++ if (!key) { ++ op_errno = EINVAL; ++ goto out; ++ } ++ local->xattr = dict_new(); ++ if (!local->xattr) { ++ op_errno = ENOMEM; ++ goto out; ++ } ++ for (i = 0; i < conf->subvolume_cnt; i++) { ++ if (!conf->subvolume_status[i]) { ++ value = DHT_VXATTR_SUBVOLS_DOWN; ++ gf_msg_debug(this->name, 0, "subvol %s is down ", ++ conf->subvolumes[i]->name); ++ break; ++ } ++ } ++ ret = dict_set_int8(local->xattr, (char *)key, value); ++ if (ret < 0) { ++ op_errno = -ret; ++ ret = -1; ++ goto out; ++ } ++ ret = 0; ++ ++out: ++ DHT_STACK_UNWIND(getxattr, frame, ret, op_errno, local->xattr, NULL); ++ return 0; ++} ++ + int + dht_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key, + dict_t *xdata) +@@ -4915,6 +4969,11 @@ dht_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key, + goto err; + } + ++ if (strncmp(key, DHT_SUBVOL_STATUS_KEY, SLEN(DHT_SUBVOL_STATUS_KEY)) == 0) { ++ dht_vgetxattr_subvol_status(frame, this, key); ++ return 0; ++ } ++ + /* skip over code which is irrelevant if !DHT_IS_DIR(layout) */ + if (!DHT_IS_DIR(layout)) + goto no_dht_is_dir; +diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h +index 1b3e826..9ec5b51 100644 +--- a/xlators/cluster/dht/src/dht-common.h ++++ b/xlators/cluster/dht/src/dht-common.h +@@ -45,6 +45,10 @@ + #define DHT_DIR_STAT_BLOCKS 8 + #define DHT_DIR_STAT_SIZE 4096 + ++/* Virtual xattr for subvols status */ ++ ++#define DHT_SUBVOL_STATUS_KEY "dht.subvol.status" ++ + /* Virtual xattrs for debugging */ + + #define DHT_DBG_HASHED_SUBVOL_PATTERN "dht.file.hashed-subvol.*" +-- +1.8.3.1 + diff --git a/SOURCES/0427-storage-posix-Fixing-a-coverity-issue.patch b/SOURCES/0427-storage-posix-Fixing-a-coverity-issue.patch new file mode 100644 index 0000000..ebeb556 --- /dev/null +++ b/SOURCES/0427-storage-posix-Fixing-a-coverity-issue.patch @@ -0,0 +1,38 @@ +From 3943fce5818a353117fc1c492e6383434d742979 Mon Sep 17 00:00:00 2001 +From: Barak Sason <bsasonro@redhat.com> +Date: Sun, 18 Aug 2019 17:52:04 +0300 +Subject: [PATCH 427/449] storage/posix - Fixing a coverity issue + +Fixed a resource leak of variable 'pfd' + +backport of https://review.gluster.org/#/c/glusterfs/+/23261/ +>CID: 1400673 +>Updates: bz#789278 +>Change-Id: I78e1e8a89e0604b56e35a75c25d436b35db096c3 +>Signed-off-by: Barak Sason <bsasonro@redhat.com> + +BUG: 1787310 +Change-Id: I78e1e8a89e0604b56e35a75c25d436b35db096c3 +Signed-off-by: Barak Sason Rofman <bsasonro@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202563 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/storage/posix/src/posix-inode-fd-ops.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c +index bcce06e..5748b9f 100644 +--- a/xlators/storage/posix/src/posix-inode-fd-ops.c ++++ b/xlators/storage/posix/src/posix-inode-fd-ops.c +@@ -1603,6 +1603,7 @@ posix_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + if (op_ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "pre-operation fstat failed on fd=%p", fd); ++ GF_FREE(pfd); + goto out; + } + +-- +1.8.3.1 + diff --git a/SOURCES/0428-glusterd-ganesha-fixing-resource-leak-in-tear_down_c.patch b/SOURCES/0428-glusterd-ganesha-fixing-resource-leak-in-tear_down_c.patch new file mode 100644 index 0000000..dba8f3c --- /dev/null +++ b/SOURCES/0428-glusterd-ganesha-fixing-resource-leak-in-tear_down_c.patch @@ -0,0 +1,48 @@ +From 7e1bf1e338a6effe209f57b1b92a70d5d25a73bf Mon Sep 17 00:00:00 2001 +From: Jiffin Tony Thottan <jthottan@redhat.com> +Date: Mon, 26 Aug 2019 11:32:18 +0530 +Subject: [PATCH 428/449] glusterd/ganesha: fixing resource leak in + tear_down_cluster() + +backport of https://review.gluster.org/#/c/glusterfs/+/23295/ +>CID: 1370947 +>Updates: bz#789278 +>Change-Id: Ib694056430ff0536ed705a0e77e5ace22486891e +>Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> + +BUG: 1787310 +Change-Id: Ib694056430ff0536ed705a0e77e5ace22486891e +Signed-off-by: Barak Sason Rofman <bsasonro@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202561 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-ganesha.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-ganesha.c b/xlators/mgmt/glusterd/src/glusterd-ganesha.c +index 0a16925..06f028f 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-ganesha.c ++++ b/xlators/mgmt/glusterd/src/glusterd-ganesha.c +@@ -659,10 +659,18 @@ tear_down_cluster(gf_boolean_t run_teardown) + "Failed to close dir %s. Reason :" + " %s", + CONFDIR, strerror(errno)); ++ goto exit; + } + } + + out: ++ if (dir && sys_closedir(dir)) { ++ gf_msg_debug(THIS->name, 0, ++ "Failed to close dir %s. Reason :" ++ " %s", ++ CONFDIR, strerror(errno)); ++ } ++exit: + return ret; + } + +-- +1.8.3.1 + diff --git a/SOURCES/0429-dht-rebalance-fixing-failure-occurace-due-to-rebalan.patch b/SOURCES/0429-dht-rebalance-fixing-failure-occurace-due-to-rebalan.patch new file mode 100644 index 0000000..8ac6529 --- /dev/null +++ b/SOURCES/0429-dht-rebalance-fixing-failure-occurace-due-to-rebalan.patch @@ -0,0 +1,61 @@ +From 1370db202a2a60810409f74c390448bf8fbd6998 Mon Sep 17 00:00:00 2001 +From: Barak Sason Rofman <bsasonro@redhat.com> +Date: Sun, 9 Feb 2020 15:09:30 +0200 +Subject: [PATCH 429/449] dht/rebalance - fixing failure occurace due to + rebalance stop + +Probelm description: +When topping rebalance, the following error messages appear in the +rebalance log file: +[2020-01-28 14:31:42.452070] W [dht-rebalance.c:3447:gf_defrag_process_dir] 0-distrep-dht: Found error from gf_defrag_get_entry +[2020-01-28 14:31:42.452764] E [MSGID: 109111] [dht-rebalance.c:3971:gf_defrag_fix_layout] 0-distrep-dht: gf_defrag_process_dir failed for directory: /0/1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16/17/18/19/20/21/22/23/24/25/26/27/28/29/30/31 +[2020-01-28 14:31:42.453498] E [MSGID: 109016] [dht-rebalance.c:3906:gf_defrag_fix_layout] 0-distrep-dht: Fix layout failed for /0/1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16/17/18/19/20/21/22/23/24/25/26/27/28/29/30 + +In order to avoid seing these error messages, a modification to the +error handling mechanism has been made. +In addition, several log messages had been added in order to improve debugging efficiency + +backport of https://review.gluster.org/#/c/glusterfs/+/24103/ +>fixes: bz#1800956 +>Change-Id: Ifc82dae79ab3da9fe22ee25088a2a6b855afcfcf +>Signed-off-by: Barak Sason Rofman <bsasonro@redhat.com> + +BUG: 1286171 +Change-Id: Ifc82dae79ab3da9fe22ee25088a2a6b855afcfcf +Signed-off-by: Barak Sason Rofman <bsasonro@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202562 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/cluster/dht/src/dht-rebalance.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c +index 8f31dca..88b6b54 100644 +--- a/xlators/cluster/dht/src/dht-rebalance.c ++++ b/xlators/cluster/dht/src/dht-rebalance.c +@@ -3479,6 +3479,10 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, + migrate_data, dir_dfmeta, xattr_req, + &should_commit_hash, perrno); + ++ if (defrag->defrag_status == GF_DEFRAG_STATUS_STOPPED) { ++ goto out; ++ } ++ + if (ret) { + gf_log(this->name, GF_LOG_WARNING, + "Found " +@@ -3935,6 +3939,10 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, + ret = gf_defrag_fix_layout(this, defrag, &entry_loc, fix_layout, + migrate_data); + ++ if (defrag->defrag_status == GF_DEFRAG_STATUS_STOPPED) { ++ goto out; ++ } ++ + if (ret && ret != 2) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LAYOUT_FIX_FAILED, + "Fix layout failed for %s", entry_loc.path); +-- +1.8.3.1 + diff --git a/SOURCES/0430-Fix-some-Null-pointer-dereference-coverity-issues.patch b/SOURCES/0430-Fix-some-Null-pointer-dereference-coverity-issues.patch new file mode 100644 index 0000000..6ff69e8 --- /dev/null +++ b/SOURCES/0430-Fix-some-Null-pointer-dereference-coverity-issues.patch @@ -0,0 +1,291 @@ +From 7fe500a03d42dba6082c28ef7284c950c44fbfa3 Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez <xhernandez@redhat.com> +Date: Wed, 22 May 2019 17:46:19 +0200 +Subject: [PATCH 430/449] Fix some "Null pointer dereference" coverity issues + +This patch fixes the following CID's: + + * 1124829 + * 1274075 + * 1274083 + * 1274128 + * 1274135 + * 1274141 + * 1274143 + * 1274197 + * 1274205 + * 1274210 + * 1274211 + * 1288801 + * 1398629 + +Backport of: +> Upstream-patch-link: https://review.gluster.org/22767 +> Change-Id: Ia7c86cfab3245b20777ffa296e1a59748040f558 +> Updates: bz#789278 +> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> + +BUG: 1787310 +Change-Id: Ia7c86cfab3245b20777ffa296e1a59748040f558 +Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202616 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + cli/src/cli-cmd-system.c | 2 +- + cli/src/cli-xml-output.c | 2 +- + glusterfsd/src/glusterfsd.c | 24 +++++++++++++----------- + libglusterfs/src/inode.c | 3 +++ + rpc/rpc-lib/src/rpcsvc.c | 4 ++++ + xlators/cluster/dht/src/dht-shared.c | 4 ++++ + xlators/cluster/dht/src/switch.c | 9 +++++++++ + xlators/features/trash/src/trash.c | 2 +- + xlators/mgmt/glusterd/src/glusterd-geo-rep.c | 7 +++++-- + xlators/nfs/server/src/mount3.c | 6 ++++++ + xlators/protocol/client/src/client.c | 7 ++++++- + xlators/storage/posix/src/posix-helpers.c | 3 +++ + 12 files changed, 56 insertions(+), 17 deletions(-) + +diff --git a/cli/src/cli-cmd-system.c b/cli/src/cli-cmd-system.c +index 8cd1542..cb3a9ea 100644 +--- a/cli/src/cli-cmd-system.c ++++ b/cli/src/cli-cmd-system.c +@@ -446,7 +446,7 @@ cli_cmd_sys_exec_cbk(struct cli_state *state, struct cli_cmd_word *word, + dict_t *dict = NULL; + cli_local_t *local = NULL; + +- if (wordcount < 3) { ++ if ((wordcount < 3) || (words[2] == NULL)) { + cli_usage_out(word->pattern); + goto out; + } +diff --git a/cli/src/cli-xml-output.c b/cli/src/cli-xml-output.c +index 006e2fb..903997c 100644 +--- a/cli/src/cli-xml-output.c ++++ b/cli/src/cli-xml-output.c +@@ -64,7 +64,7 @@ cli_begin_xml_output(xmlTextWriterPtr *writer, xmlDocPtr *doc) + int ret = -1; + + *writer = xmlNewTextWriterDoc(doc, 0); +- if (writer == NULL) { ++ if (*writer == NULL) { + ret = -1; + goto out; + } +diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c +index 974fb88..9821180 100644 +--- a/glusterfsd/src/glusterfsd.c ++++ b/glusterfsd/src/glusterfsd.c +@@ -1235,19 +1235,21 @@ parse_opts(int key, char *arg, struct argp_state *state) + case ARGP_BRICK_PORT_KEY: + n = 0; + +- port_str = strtok_r(arg, ",", &tmp_str); +- if (gf_string2uint_base10(port_str, &n) == 0) { +- cmd_args->brick_port = n; +- port_str = strtok_r(NULL, ",", &tmp_str); +- if (port_str) { +- if (gf_string2uint_base10(port_str, &n) == 0) { +- cmd_args->brick_port2 = n; +- break; ++ if (arg != NULL) { ++ port_str = strtok_r(arg, ",", &tmp_str); ++ if (gf_string2uint_base10(port_str, &n) == 0) { ++ cmd_args->brick_port = n; ++ port_str = strtok_r(NULL, ",", &tmp_str); ++ if (port_str) { ++ if (gf_string2uint_base10(port_str, &n) == 0) { ++ cmd_args->brick_port2 = n; ++ break; ++ } ++ argp_failure(state, -1, 0, ++ "wrong brick (listen) port %s", arg); + } +- argp_failure(state, -1, 0, "wrong brick (listen) port %s", +- arg); ++ break; + } +- break; + } + + argp_failure(state, -1, 0, "unknown brick (listen) port %s", arg); +diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c +index 9dbb25b..4c3c546 100644 +--- a/libglusterfs/src/inode.c ++++ b/libglusterfs/src/inode.c +@@ -899,6 +899,9 @@ inode_resolve(inode_table_t *table, char *path) + + parent = inode_ref(table->root); + str = tmp = gf_strdup(path); ++ if (str == NULL) { ++ goto out; ++ } + + while (1) { + bname = strtok_r(str, "/", &saveptr); +diff --git a/rpc/rpc-lib/src/rpcsvc.c b/rpc/rpc-lib/src/rpcsvc.c +index 5a35139..b058932 100644 +--- a/rpc/rpc-lib/src/rpcsvc.c ++++ b/rpc/rpc-lib/src/rpcsvc.c +@@ -2874,6 +2874,10 @@ rpcsvc_transport_peer_check_search(dict_t *options, char *pattern, char *ip, + } + + dup_addrstr = gf_strdup(addrstr); ++ if (dup_addrstr == NULL) { ++ ret = -1; ++ goto err; ++ } + addrtok = strtok_r(dup_addrstr, ",", &svptr); + while (addrtok) { + /* CASEFOLD not present on Solaris */ +diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c +index ea4b7c6..58e3339 100644 +--- a/xlators/cluster/dht/src/dht-shared.c ++++ b/xlators/cluster/dht/src/dht-shared.c +@@ -278,6 +278,10 @@ dht_parse_decommissioned_bricks(xlator_t *this, dht_conf_t *conf, + goto out; + + dup_brick = gf_strdup(bricks); ++ if (dup_brick == NULL) { ++ goto out; ++ } ++ + node = strtok_r(dup_brick, ",", &tmpstr); + while (node) { + for (i = 0; i < conf->subvolume_cnt; i++) { +diff --git a/xlators/cluster/dht/src/switch.c b/xlators/cluster/dht/src/switch.c +index a782fcd..207d109 100644 +--- a/xlators/cluster/dht/src/switch.c ++++ b/xlators/cluster/dht/src/switch.c +@@ -610,9 +610,15 @@ set_switch_pattern(xlator_t *this, dht_conf_t *conf, const char *pattern_str) + /* Get the pattern for considering switch case. + "option block-size *avi:10MB" etc */ + option_string = gf_strdup(pattern_str); ++ if (option_string == NULL) { ++ goto err; ++ } + switch_str = strtok_r(option_string, ";", &tmp_str); + while (switch_str) { + dup_str = gf_strdup(switch_str); ++ if (dup_str == NULL) { ++ goto err; ++ } + switch_opt = GF_CALLOC(1, sizeof(struct switch_struct), + gf_switch_mt_switch_struct); + if (!switch_opt) { +@@ -647,6 +653,9 @@ set_switch_pattern(xlator_t *this, dht_conf_t *conf, const char *pattern_str) + + if (childs) { + dup_childs = gf_strdup(childs); ++ if (dup_childs == NULL) { ++ goto err; ++ } + child = strtok_r(dup_childs, ",", &tmp); + while (child) { + if (gf_switch_valid_child(this, child)) { +diff --git a/xlators/features/trash/src/trash.c b/xlators/features/trash/src/trash.c +index d668436..f96ed73 100644 +--- a/xlators/features/trash/src/trash.c ++++ b/xlators/features/trash/src/trash.c +@@ -170,7 +170,7 @@ store_eliminate_path(char *str, trash_elim_path **eliminate) + int ret = 0; + char *strtokptr = NULL; + +- if (eliminate == NULL) { ++ if ((str == NULL) || (eliminate == NULL)) { + ret = EINVAL; + goto out; + } +diff --git a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c +index 0f40bea..85c06c1 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c ++++ b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c +@@ -5981,7 +5981,7 @@ glusterd_get_slave_info(char *slave, char **slave_url, char **hostname, + GF_ASSERT(this); + + ret = glusterd_urltransform_single(slave, "normalize", &linearr); +- if (ret == -1) { ++ if ((ret == -1) || (linearr[0] == NULL)) { + ret = snprintf(errmsg, sizeof(errmsg) - 1, "Invalid Url: %s", slave); + errmsg[ret] = '\0'; + *op_errstr = gf_strdup(errmsg); +@@ -5992,7 +5992,10 @@ glusterd_get_slave_info(char *slave, char **slave_url, char **hostname, + + tmp = strtok_r(linearr[0], "/", &save_ptr); + tmp = strtok_r(NULL, "/", &save_ptr); +- slave = strtok_r(tmp, ":", &save_ptr); ++ slave = NULL; ++ if (tmp != NULL) { ++ slave = strtok_r(tmp, ":", &save_ptr); ++ } + if (slave) { + ret = glusterd_geo_rep_parse_slave(slave, hostname, op_errstr); + if (ret) { +diff --git a/xlators/nfs/server/src/mount3.c b/xlators/nfs/server/src/mount3.c +index 396809c..734453c 100644 +--- a/xlators/nfs/server/src/mount3.c ++++ b/xlators/nfs/server/src/mount3.c +@@ -3205,6 +3205,12 @@ mnt3_export_parse_auth_param(struct mnt3_export *exp, char *exportpath) + struct host_auth_spec *host = NULL; + int ret = 0; + ++ if (exportpath == NULL) { ++ gf_msg(GF_MNT, GF_LOG_ERROR, EINVAL, NFS_MSG_PARSE_HOSTSPEC_FAIL, ++ "Export path is NULL"); ++ return -1; ++ } ++ + /* Using exportpath directly in strtok_r because we want + * to strip off AUTH parameter from exportpath. */ + token = strtok_r(exportpath, "(", &savPtr); +diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c +index e156d4d..ed855ca 100644 +--- a/xlators/protocol/client/src/client.c ++++ b/xlators/protocol/client/src/client.c +@@ -1222,9 +1222,12 @@ client_set_remote_options(char *value, xlator_t *this) + char *remote_port_str = NULL; + char *tmp = NULL; + int remote_port = 0; +- int ret = 0; ++ int ret = -1; + + dup_value = gf_strdup(value); ++ if (dup_value == NULL) { ++ goto out; ++ } + host = strtok_r(dup_value, ":", &tmp); + subvol = strtok_r(NULL, ":", &tmp); + remote_port_str = strtok_r(NULL, ":", &tmp); +@@ -1238,6 +1241,7 @@ client_set_remote_options(char *value, xlator_t *this) + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_DICT_SET_FAILED, + "failed to set remote-host with %s", host); ++ GF_FREE(host_dup); + goto out; + } + } +@@ -1252,6 +1256,7 @@ client_set_remote_options(char *value, xlator_t *this) + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_DICT_SET_FAILED, + "failed to set remote-host with %s", host); ++ GF_FREE(subvol_dup); + goto out; + } + } +diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c +index 949c799..2336add 100644 +--- a/xlators/storage/posix/src/posix-helpers.c ++++ b/xlators/storage/posix/src/posix-helpers.c +@@ -390,6 +390,9 @@ _posix_get_marker_quota_contributions(posix_xattr_filler_t *filler, char *key) + int i = 0, ret = 0; + + tmp_key = ptr = gf_strdup(key); ++ if (tmp_key == NULL) { ++ return -1; ++ } + for (i = 0; i < 4; i++) { + token = strtok_r(tmp_key, ".", &saveptr); + tmp_key = NULL; +-- +1.8.3.1 + diff --git a/SOURCES/0431-glusterd-check-for-same-node-while-adding-bricks-in-.patch b/SOURCES/0431-glusterd-check-for-same-node-while-adding-bricks-in-.patch new file mode 100644 index 0000000..341cfc1 --- /dev/null +++ b/SOURCES/0431-glusterd-check-for-same-node-while-adding-bricks-in-.patch @@ -0,0 +1,638 @@ +From d7c52ddd2cbadb1d9a55767c2f7fe6ba38d9a2ed Mon Sep 17 00:00:00 2001 +From: Sheetal Pamecha <spamecha@redhat.com> +Date: Wed, 20 Nov 2019 12:42:12 +0530 +Subject: [PATCH 431/449] glusterd: check for same node while adding bricks in + disperse volume + +The optimal way for configuring disperse and replicate volumes +is to have all bricks in different nodes. + +During create operation it fails saying it is not optimal, user +must use force to over-ride this behavior. Implementing same +during add-brick operation to avoid situation where all the added +bricks end up from same host. Operation will error out accordingly. +and this can be over-ridden by using force same as create. + +> Upstream Patch Link: https://review.gluster.org/#/c/glusterfs/+/23729 +> fixes: #1047 +> Change-Id: I3ee9c97c1a14b73f4532893bc00187ef9355238b +> Signed-off-by: Sheetal Pamecha <spamecha@redhat.com> + +BUG: 1524457 +Change-Id: I3ee9c97c1a14b73f4532893bc00187ef9355238b +Signed-off-by: Sheetal Pamecha <spamecha@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202621 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sanju Rakonde <srakonde@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-brick-ops.c | 20 +- + xlators/mgmt/glusterd/src/glusterd-utils.c | 224 ++++++++++++++++++ + xlators/mgmt/glusterd/src/glusterd-utils.h | 4 + + xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 293 +++--------------------- + 4 files changed, 276 insertions(+), 265 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +index c5141de..d424f31 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c ++++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +@@ -21,7 +21,6 @@ + #include "glusterd-messages.h" + #include "glusterd-server-quorum.h" + #include <glusterfs/run.h> +-#include "glusterd-volgen.h" + #include <glusterfs/syscall.h> + #include <sys/signal.h> + +@@ -1575,6 +1574,25 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict) + + is_force = dict_get_str_boolean(dict, "force", _gf_false); + ++ /* Check brick order if the volume type is replicate or disperse. If ++ * force at the end of command not given then check brick order. ++ */ ++ ++ if (!is_force) { ++ if ((volinfo->type == GF_CLUSTER_TYPE_REPLICATE) || ++ (volinfo->type == GF_CLUSTER_TYPE_DISPERSE)) { ++ ret = glusterd_check_brick_order(dict, msg, volinfo->type); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BAD_BRKORDER, ++ "Not adding brick because of " ++ "bad brick order. %s", ++ msg); ++ *op_errstr = gf_strdup(msg); ++ goto out; ++ } ++ } ++ } ++ + if (volinfo->replica_count < replica_count && !is_force) { + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index a1299bc..14e23d1 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -14759,3 +14759,227 @@ glusterd_is_profile_on(glusterd_volinfo_t *volinfo) + return _gf_true; + return _gf_false; + } ++ ++static gf_ai_compare_t ++glusterd_compare_addrinfo(struct addrinfo *first, struct addrinfo *next) ++{ ++ int ret = -1; ++ struct addrinfo *tmp1 = NULL; ++ struct addrinfo *tmp2 = NULL; ++ char firstip[NI_MAXHOST] = {0.}; ++ char nextip[NI_MAXHOST] = { ++ 0, ++ }; ++ ++ for (tmp1 = first; tmp1 != NULL; tmp1 = tmp1->ai_next) { ++ ret = getnameinfo(tmp1->ai_addr, tmp1->ai_addrlen, firstip, NI_MAXHOST, ++ NULL, 0, NI_NUMERICHOST); ++ if (ret) ++ return GF_AI_COMPARE_ERROR; ++ for (tmp2 = next; tmp2 != NULL; tmp2 = tmp2->ai_next) { ++ ret = getnameinfo(tmp2->ai_addr, tmp2->ai_addrlen, nextip, ++ NI_MAXHOST, NULL, 0, NI_NUMERICHOST); ++ if (ret) ++ return GF_AI_COMPARE_ERROR; ++ if (!strcmp(firstip, nextip)) { ++ return GF_AI_COMPARE_MATCH; ++ } ++ } ++ } ++ return GF_AI_COMPARE_NO_MATCH; ++} ++ ++/* Check for non optimal brick order for Replicate/Disperse : ++ * Checks if bricks belonging to a replicate or disperse ++ * volume are present on the same server ++ */ ++int32_t ++glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type) ++{ ++ int ret = -1; ++ int i = 0; ++ int j = 0; ++ int k = 0; ++ xlator_t *this = NULL; ++ addrinfo_list_t *ai_list = NULL; ++ addrinfo_list_t *ai_list_tmp1 = NULL; ++ addrinfo_list_t *ai_list_tmp2 = NULL; ++ char *brick = NULL; ++ char *brick_list = NULL; ++ char *brick_list_dup = NULL; ++ char *brick_list_ptr = NULL; ++ char *tmpptr = NULL; ++ char *volname = NULL; ++ int32_t brick_count = 0; ++ int32_t sub_count = 0; ++ struct addrinfo *ai_info = NULL; ++ char brick_addr[128] = { ++ 0, ++ }; ++ int addrlen = 0; ++ ++ const char failed_string[2048] = ++ "Failed to perform brick order " ++ "check. Use 'force' at the end of the command" ++ " if you want to override this behavior. "; ++ const char found_string[2048] = ++ "Multiple bricks of a %s " ++ "volume are present on the same server. This " ++ "setup is not optimal. Bricks should be on " ++ "different nodes to have best fault tolerant " ++ "configuration. Use 'force' at the end of the " ++ "command if you want to override this " ++ "behavior. "; ++ ++ this = THIS; ++ ++ GF_ASSERT(this); ++ ++ ai_list = MALLOC(sizeof(addrinfo_list_t)); ++ ai_list->info = NULL; ++ CDS_INIT_LIST_HEAD(&ai_list->list); ++ ++ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, ++ "Unable to get volume name"); ++ goto out; ++ } ++ ++ ret = dict_get_strn(dict, "bricks", SLEN("bricks"), &brick_list); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, ++ "Bricks check : Could not " ++ "retrieve bricks list"); ++ goto out; ++ } ++ ++ ret = dict_get_int32n(dict, "count", SLEN("count"), &brick_count); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, ++ "Bricks check : Could not " ++ "retrieve brick count"); ++ goto out; ++ } ++ ++ if (type != GF_CLUSTER_TYPE_DISPERSE) { ++ ret = dict_get_int32n(dict, "replica-count", SLEN("replica-count"), ++ &sub_count); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, ++ "Bricks check : Could" ++ " not retrieve replica count"); ++ goto out; ++ } ++ gf_msg_debug(this->name, 0, ++ "Replicate cluster type " ++ "found. Checking brick order."); ++ } else { ++ ret = dict_get_int32n(dict, "disperse-count", SLEN("disperse-count"), ++ &sub_count); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, ++ "Bricks check : Could" ++ " not retrieve disperse count"); ++ goto out; ++ } ++ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_DISPERSE_CLUSTER_FOUND, ++ "Disperse cluster type" ++ " found. Checking brick order."); ++ } ++ brick_list_dup = brick_list_ptr = gf_strdup(brick_list); ++ /* Resolve hostnames and get addrinfo */ ++ while (i < brick_count) { ++ ++i; ++ brick = strtok_r(brick_list_dup, " \n", &tmpptr); ++ brick_list_dup = tmpptr; ++ if (brick == NULL) ++ goto check_failed; ++ tmpptr = strrchr(brick, ':'); ++ if (tmpptr == NULL) ++ goto check_failed; ++ addrlen = strlen(brick) - strlen(tmpptr); ++ strncpy(brick_addr, brick, addrlen); ++ brick_addr[addrlen] = '\0'; ++ ret = getaddrinfo(brick_addr, NULL, NULL, &ai_info); ++ if (ret != 0) { ++ ret = 0; ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_HOSTNAME_RESOLVE_FAIL, ++ "unable to resolve host name for addr %s", brick_addr); ++ goto out; ++ } ++ ai_list_tmp1 = MALLOC(sizeof(addrinfo_list_t)); ++ if (ai_list_tmp1 == NULL) { ++ ret = 0; ++ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, ++ "failed to allocate " ++ "memory"); ++ freeaddrinfo(ai_info); ++ goto out; ++ } ++ ai_list_tmp1->info = ai_info; ++ cds_list_add_tail(&ai_list_tmp1->list, &ai_list->list); ++ ai_list_tmp1 = NULL; ++ } ++ ++ i = 0; ++ ai_list_tmp1 = cds_list_entry(ai_list->list.next, addrinfo_list_t, list); ++ ++ /* Check for bad brick order */ ++ while (i < brick_count) { ++ ++i; ++ ai_info = ai_list_tmp1->info; ++ ai_list_tmp1 = cds_list_entry(ai_list_tmp1->list.next, addrinfo_list_t, ++ list); ++ if (0 == i % sub_count) { ++ j = 0; ++ continue; ++ } ++ ai_list_tmp2 = ai_list_tmp1; ++ k = j; ++ while (k < sub_count - 1) { ++ ++k; ++ ret = glusterd_compare_addrinfo(ai_info, ai_list_tmp2->info); ++ if (GF_AI_COMPARE_ERROR == ret) ++ goto check_failed; ++ if (GF_AI_COMPARE_MATCH == ret) ++ goto found_bad_brick_order; ++ ai_list_tmp2 = cds_list_entry(ai_list_tmp2->list.next, ++ addrinfo_list_t, list); ++ } ++ ++j; ++ } ++ gf_msg_debug(this->name, 0, "Brick order okay"); ++ ret = 0; ++ goto out; ++ ++check_failed: ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BAD_BRKORDER_CHECK_FAIL, ++ "Failed bad brick order check"); ++ snprintf(err_str, sizeof(failed_string), failed_string); ++ ret = -1; ++ goto out; ++ ++found_bad_brick_order: ++ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_BAD_BRKORDER, ++ "Bad brick order found"); ++ if (type == GF_CLUSTER_TYPE_DISPERSE) { ++ snprintf(err_str, sizeof(found_string), found_string, "disperse"); ++ } else { ++ snprintf(err_str, sizeof(found_string), found_string, "replicate"); ++ } ++ ++ ret = -1; ++out: ++ ai_list_tmp2 = NULL; ++ GF_FREE(brick_list_ptr); ++ cds_list_for_each_entry(ai_list_tmp1, &ai_list->list, list) ++ { ++ if (ai_list_tmp1->info) ++ freeaddrinfo(ai_list_tmp1->info); ++ free(ai_list_tmp2); ++ ai_list_tmp2 = ai_list_tmp1; ++ } ++ free(ai_list_tmp2); ++ return ret; ++} +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h +index ead16b2..e2e2454 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.h ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.h +@@ -881,4 +881,8 @@ glusterd_is_profile_on(glusterd_volinfo_t *volinfo); + + char * + search_brick_path_from_proc(pid_t brick_pid, char *brickpath); ++ ++int32_t ++glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type); ++ + #endif +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +index 93042ab..8da2ff3 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +@@ -41,240 +41,6 @@ + #define glusterd_op_start_volume_args_get(dict, volname, flags) \ + glusterd_op_stop_volume_args_get(dict, volname, flags) + +-gf_ai_compare_t +-glusterd_compare_addrinfo(struct addrinfo *first, struct addrinfo *next) +-{ +- int ret = -1; +- struct addrinfo *tmp1 = NULL; +- struct addrinfo *tmp2 = NULL; +- char firstip[NI_MAXHOST] = {0.}; +- char nextip[NI_MAXHOST] = { +- 0, +- }; +- +- for (tmp1 = first; tmp1 != NULL; tmp1 = tmp1->ai_next) { +- ret = getnameinfo(tmp1->ai_addr, tmp1->ai_addrlen, firstip, NI_MAXHOST, +- NULL, 0, NI_NUMERICHOST); +- if (ret) +- return GF_AI_COMPARE_ERROR; +- for (tmp2 = next; tmp2 != NULL; tmp2 = tmp2->ai_next) { +- ret = getnameinfo(tmp2->ai_addr, tmp2->ai_addrlen, nextip, +- NI_MAXHOST, NULL, 0, NI_NUMERICHOST); +- if (ret) +- return GF_AI_COMPARE_ERROR; +- if (!strcmp(firstip, nextip)) { +- return GF_AI_COMPARE_MATCH; +- } +- } +- } +- return GF_AI_COMPARE_NO_MATCH; +-} +- +-/* Check for non optimal brick order for replicate : +- * Checks if bricks belonging to a replicate volume +- * are present on the same server +- */ +-int32_t +-glusterd_check_brick_order(dict_t *dict, char *err_str) +-{ +- int ret = -1; +- int i = 0; +- int j = 0; +- int k = 0; +- xlator_t *this = NULL; +- addrinfo_list_t *ai_list = NULL; +- addrinfo_list_t *ai_list_tmp1 = NULL; +- addrinfo_list_t *ai_list_tmp2 = NULL; +- char *brick = NULL; +- char *brick_list = NULL; +- char *brick_list_dup = NULL; +- char *brick_list_ptr = NULL; +- char *tmpptr = NULL; +- char *volname = NULL; +- int32_t brick_count = 0; +- int32_t type = GF_CLUSTER_TYPE_NONE; +- int32_t sub_count = 0; +- struct addrinfo *ai_info = NULL; +- char brick_addr[128] = { +- 0, +- }; +- int addrlen = 0; +- +- const char failed_string[2048] = +- "Failed to perform brick order " +- "check. Use 'force' at the end of the command" +- " if you want to override this behavior. "; +- const char found_string[2048] = +- "Multiple bricks of a %s " +- "volume are present on the same server. This " +- "setup is not optimal. Bricks should be on " +- "different nodes to have best fault tolerant " +- "configuration. Use 'force' at the end of the " +- "command if you want to override this " +- "behavior. "; +- +- this = THIS; +- +- GF_ASSERT(this); +- +- ai_list = MALLOC(sizeof(addrinfo_list_t)); +- ai_list->info = NULL; +- CDS_INIT_LIST_HEAD(&ai_list->list); +- +- ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, +- "Unable to get volume name"); +- goto out; +- } +- +- ret = dict_get_int32n(dict, "type", SLEN("type"), &type); +- if (ret) { +- snprintf(err_str, 512, "Unable to get type of volume %s", volname); +- gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_GET_FAILED, "%s", +- err_str); +- goto out; +- } +- +- ret = dict_get_strn(dict, "bricks", SLEN("bricks"), &brick_list); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, +- "Bricks check : Could not " +- "retrieve bricks list"); +- goto out; +- } +- +- ret = dict_get_int32n(dict, "count", SLEN("count"), &brick_count); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, +- "Bricks check : Could not " +- "retrieve brick count"); +- goto out; +- } +- +- if (type != GF_CLUSTER_TYPE_DISPERSE) { +- ret = dict_get_int32n(dict, "replica-count", SLEN("replica-count"), +- &sub_count); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, +- "Bricks check : Could" +- " not retrieve replica count"); +- goto out; +- } +- gf_msg_debug(this->name, 0, +- "Replicate cluster type " +- "found. Checking brick order."); +- } else { +- ret = dict_get_int32n(dict, "disperse-count", SLEN("disperse-count"), +- &sub_count); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, +- "Bricks check : Could" +- " not retrieve disperse count"); +- goto out; +- } +- gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_DISPERSE_CLUSTER_FOUND, +- "Disperse cluster type" +- " found. Checking brick order."); +- } +- +- brick_list_dup = brick_list_ptr = gf_strdup(brick_list); +- /* Resolve hostnames and get addrinfo */ +- while (i < brick_count) { +- ++i; +- brick = strtok_r(brick_list_dup, " \n", &tmpptr); +- brick_list_dup = tmpptr; +- if (brick == NULL) +- goto check_failed; +- tmpptr = strrchr(brick, ':'); +- if (tmpptr == NULL) +- goto check_failed; +- addrlen = strlen(brick) - strlen(tmpptr); +- strncpy(brick_addr, brick, addrlen); +- brick_addr[addrlen] = '\0'; +- ret = getaddrinfo(brick_addr, NULL, NULL, &ai_info); +- if (ret != 0) { +- ret = 0; +- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_HOSTNAME_RESOLVE_FAIL, +- "unable to resolve host name for addr %s", brick_addr); +- goto out; +- } +- ai_list_tmp1 = MALLOC(sizeof(addrinfo_list_t)); +- if (ai_list_tmp1 == NULL) { +- ret = 0; +- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, +- "failed to allocate " +- "memory"); +- freeaddrinfo(ai_info); +- goto out; +- } +- ai_list_tmp1->info = ai_info; +- cds_list_add_tail(&ai_list_tmp1->list, &ai_list->list); +- ai_list_tmp1 = NULL; +- } +- +- i = 0; +- ai_list_tmp1 = cds_list_entry(ai_list->list.next, addrinfo_list_t, list); +- +- /* Check for bad brick order */ +- while (i < brick_count) { +- ++i; +- ai_info = ai_list_tmp1->info; +- ai_list_tmp1 = cds_list_entry(ai_list_tmp1->list.next, addrinfo_list_t, +- list); +- if (0 == i % sub_count) { +- j = 0; +- continue; +- } +- ai_list_tmp2 = ai_list_tmp1; +- k = j; +- while (k < sub_count - 1) { +- ++k; +- ret = glusterd_compare_addrinfo(ai_info, ai_list_tmp2->info); +- if (GF_AI_COMPARE_ERROR == ret) +- goto check_failed; +- if (GF_AI_COMPARE_MATCH == ret) +- goto found_bad_brick_order; +- ai_list_tmp2 = cds_list_entry(ai_list_tmp2->list.next, +- addrinfo_list_t, list); +- } +- ++j; +- } +- gf_msg_debug(this->name, 0, "Brick order okay"); +- ret = 0; +- goto out; +- +-check_failed: +- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BAD_BRKORDER_CHECK_FAIL, +- "Failed bad brick order check"); +- snprintf(err_str, sizeof(failed_string), failed_string); +- ret = -1; +- goto out; +- +-found_bad_brick_order: +- gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_BAD_BRKORDER, +- "Bad brick order found"); +- if (type == GF_CLUSTER_TYPE_DISPERSE) { +- snprintf(err_str, sizeof(found_string), found_string, "disperse"); +- } else { +- snprintf(err_str, sizeof(found_string), found_string, "replicate"); +- } +- +- ret = -1; +-out: +- ai_list_tmp2 = NULL; +- GF_FREE(brick_list_ptr); +- cds_list_for_each_entry(ai_list_tmp1, &ai_list->list, list) +- { +- if (ai_list_tmp1->info) +- freeaddrinfo(ai_list_tmp1->info); +- free(ai_list_tmp2); +- ai_list_tmp2 = ai_list_tmp1; +- } +- free(ai_list_tmp2); +- return ret; +-} +- + int + __glusterd_handle_create_volume(rpcsvc_request_t *req) + { +@@ -1337,6 +1103,35 @@ glusterd_op_stage_create_volume(dict_t *dict, char **op_errstr, + } + } + ++ /*Check brick order if the volume type is replicate or disperse. If ++ * force at the end of command not given then check brick order. ++ */ ++ if (is_origin_glusterd(dict)) { ++ ret = dict_get_int32n(dict, "type", SLEN("type"), &type); ++ if (ret) { ++ snprintf(msg, sizeof(msg), ++ "Unable to get type of " ++ "volume %s", ++ volname); ++ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_GET_FAILED, "%s", ++ msg); ++ goto out; ++ } ++ ++ if (!is_force) { ++ if ((type == GF_CLUSTER_TYPE_REPLICATE) || ++ (type == GF_CLUSTER_TYPE_DISPERSE)) { ++ ret = glusterd_check_brick_order(dict, msg, type); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BAD_BRKORDER, ++ "Not creating volume because of " ++ "bad brick order"); ++ goto out; ++ } ++ } ++ } ++ } ++ + while (i < brick_count) { + i++; + brick = strtok_r(brick_list, " \n", &tmpptr); +@@ -1423,36 +1218,6 @@ glusterd_op_stage_create_volume(dict_t *dict, char **op_errstr, + brick_info = NULL; + } + +- /*Check brick order if the volume type is replicate or disperse. If +- * force at the end of command not given then check brick order. +- */ +- if (is_origin_glusterd(dict)) { +- ret = dict_get_int32n(dict, "type", SLEN("type"), &type); +- if (ret) { +- snprintf(msg, sizeof(msg), +- "Unable to get type of " +- "volume %s", +- volname); +- gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_GET_FAILED, "%s", +- msg); +- goto out; +- } +- +- if (!is_force) { +- if ((type == GF_CLUSTER_TYPE_REPLICATE) || +- (type == GF_CLUSTER_TYPE_DISPERSE)) { +- ret = glusterd_check_brick_order(dict, msg); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BAD_BRKORDER, +- "Not " +- "creating volume because of " +- "bad brick order"); +- goto out; +- } +- } +- } +- } +- + ret = dict_set_int32n(rsp_dict, "brick_count", SLEN("brick_count"), + local_brick_count); + if (ret) { +-- +1.8.3.1 + diff --git a/SOURCES/0432-glusterd-Fix-coverity-defects-put-coverity-annotatio.patch b/SOURCES/0432-glusterd-Fix-coverity-defects-put-coverity-annotatio.patch new file mode 100644 index 0000000..ef589de --- /dev/null +++ b/SOURCES/0432-glusterd-Fix-coverity-defects-put-coverity-annotatio.patch @@ -0,0 +1,503 @@ +From aa215163cb7d806dc98bef2386a4e282a5e54a31 Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee <amukherj@redhat.com> +Date: Thu, 25 Apr 2019 12:00:52 +0530 +Subject: [PATCH 432/449] glusterd: Fix coverity defects & put coverity + annotations + +Along with fixing few defect, put the required annotations for the defects which +are marked ignore/false positive/intentional as per the coverity defect sheet. +This should avoid the per component graph showing many defects as open in the +coverity glusterfs web page. + +> upstream patch link: https://review.gluster.org/#/c/glusterfs/+/22619/ +> Updates: bz#789278 +> Change-Id: I19461dc3603a3bd8f88866a1ab3db43d783af8e4 +> Signed-off-by: Atin Mukherjee <amukherj@redhat.com> + +BUG: 1787310 +Change-Id: I19461dc3603a3bd8f88866a1ab3db43d783af8e4 +Signed-off-by: Sanju Rakonde <srakonde@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202631 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-brick-ops.c | 7 +++-- + xlators/mgmt/glusterd/src/glusterd-geo-rep.c | 3 +- + .../glusterd/src/glusterd-gfproxyd-svc-helper.c | 2 +- + xlators/mgmt/glusterd/src/glusterd-handler.c | 8 ++++- + xlators/mgmt/glusterd/src/glusterd-mountbroker.c | 5 ++- + xlators/mgmt/glusterd/src/glusterd-op-sm.c | 8 +++++ + xlators/mgmt/glusterd/src/glusterd-peer-utils.c | 2 ++ + xlators/mgmt/glusterd/src/glusterd-server-quorum.c | 1 + + xlators/mgmt/glusterd/src/glusterd-store.c | 4 --- + xlators/mgmt/glusterd/src/glusterd-svc-helper.c | 4 +-- + xlators/mgmt/glusterd/src/glusterd-syncop.c | 1 + + .../mgmt/glusterd/src/glusterd-tierd-svc-helper.c | 4 +-- + xlators/mgmt/glusterd/src/glusterd-utils.c | 9 ++++-- + xlators/mgmt/glusterd/src/glusterd-volgen.c | 36 +++++++++++++--------- + 14 files changed, 63 insertions(+), 31 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +index d424f31..121346c 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c ++++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +@@ -2032,7 +2032,6 @@ glusterd_op_stage_remove_brick(dict_t *dict, char **op_errstr) + case GF_OP_CMD_STATUS: + ret = 0; + goto out; +- + case GF_OP_CMD_DETACH_START: + if (volinfo->type != GF_CLUSTER_TYPE_TIER) { + snprintf(msg, sizeof(msg), +@@ -2044,7 +2043,7 @@ glusterd_op_stage_remove_brick(dict_t *dict, char **op_errstr) + errstr); + goto out; + } +- ++ /* Fall through */ + case GF_OP_CMD_START: { + if ((volinfo->type == GF_CLUSTER_TYPE_REPLICATE) && + dict_getn(dict, "replica-count", SLEN("replica-count"))) { +@@ -2259,7 +2258,8 @@ out: + if (op_errstr) + *op_errstr = errstr; + } +- ++ if (!op_errstr && errstr) ++ GF_FREE(errstr); + return ret; + } + +@@ -2687,6 +2687,7 @@ glusterd_op_remove_brick(dict_t *dict, char **op_errstr) + * Update defrag_cmd as well or it will only be done + * for nodes on which the brick to be removed exists. + */ ++ /* coverity[MIXED_ENUMS] */ + volinfo->rebal.defrag_cmd = cmd; + volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_NOT_STARTED; + ret = dict_get_strn(dict, GF_REMOVE_BRICK_TID_KEY, +diff --git a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c +index 85c06c1..5a91df4 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c ++++ b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c +@@ -4107,6 +4107,7 @@ gd_pause_or_resume_gsync(dict_t *dict, char *master, char *slave, + + out: + sys_close(pfd); ++ /* coverity[INTEGER_OVERFLOW] */ + return ret; + } + +@@ -4183,7 +4184,7 @@ stop_gsync(char *master, char *slave, char **msg, char *conf_path, + + out: + sys_close(pfd); +- ++ /* coverity[INTEGER_OVERFLOW] */ + return ret; + } + +diff --git a/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc-helper.c +index 67e3f41..e338bf4 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc-helper.c ++++ b/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc-helper.c +@@ -111,7 +111,7 @@ glusterd_svc_get_gfproxyd_volfile(glusterd_volinfo_t *volinfo, char *svc_name, + goto out; + } + +- /* coverity[secure_temp] mkstemp uses 0600 as the mode and is safe */ ++ /* coverity[SECURE_TEMP] mkstemp uses 0600 as the mode and is safe */ + tmp_fd = mkstemp(*tmpvol); + if (tmp_fd < 0) { + gf_msg("glusterd", GF_LOG_WARNING, errno, GD_MSG_FILE_OP_FAILED, +diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c +index 2e73c98..1f31e72 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-handler.c ++++ b/xlators/mgmt/glusterd/src/glusterd-handler.c +@@ -930,6 +930,7 @@ __glusterd_handle_cluster_lock(rpcsvc_request_t *req) + + op_ctx = dict_new(); + if (!op_ctx) { ++ ret = -1; + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_DICT_CREATE_FAIL, + "Unable to set new dict"); + goto out; +@@ -956,6 +957,9 @@ out: + glusterd_friend_sm(); + glusterd_op_sm(); + ++ if (ret) ++ GF_FREE(ctx); ++ + return ret; + } + +@@ -3470,6 +3474,7 @@ glusterd_rpc_create(struct rpc_clnt **rpc, dict_t *options, + GF_ASSERT(this); + + GF_ASSERT(options); ++ GF_VALIDATE_OR_GOTO(this->name, rpc, out); + + if (force && rpc && *rpc) { + (void)rpc_clnt_unref(*rpc); +@@ -3482,7 +3487,6 @@ glusterd_rpc_create(struct rpc_clnt **rpc, dict_t *options, + goto out; + + ret = rpc_clnt_register_notify(new_rpc, notify_fn, notify_data); +- *rpc = new_rpc; + if (ret) + goto out; + ret = rpc_clnt_start(new_rpc); +@@ -3491,6 +3495,8 @@ out: + if (new_rpc) { + (void)rpc_clnt_unref(new_rpc); + } ++ } else { ++ *rpc = new_rpc; + } + + gf_msg_debug(this->name, 0, "returning %d", ret); +diff --git a/xlators/mgmt/glusterd/src/glusterd-mountbroker.c b/xlators/mgmt/glusterd/src/glusterd-mountbroker.c +index 332ddef..c017ccb 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-mountbroker.c ++++ b/xlators/mgmt/glusterd/src/glusterd-mountbroker.c +@@ -334,7 +334,10 @@ make_ghadoop_mountspec(gf_mount_spec_t *mspec, const char *volname, char *user, + if (ret == -1) + return ret; + +- return parse_mount_pattern_desc(mspec, hadoop_mnt_desc); ++ ret = parse_mount_pattern_desc(mspec, hadoop_mnt_desc); ++ GF_FREE(hadoop_mnt_desc); ++ ++ return ret; + } + + static gf_boolean_t +diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +index 6475611..46fc607 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c ++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +@@ -2467,6 +2467,7 @@ glusterd_start_bricks(glusterd_volinfo_t *volinfo) + if (!brickinfo->start_triggered) { + pthread_mutex_lock(&brickinfo->restart_mutex); + { ++ /* coverity[SLEEP] */ + ret = glusterd_brick_start(volinfo, brickinfo, _gf_false, + _gf_false); + } +@@ -3466,6 +3467,7 @@ _add_task_to_dict(dict_t *dict, glusterd_volinfo_t *volinfo, int op, int index) + + switch (op) { + case GD_OP_REMOVE_TIER_BRICK: ++ /* Fall through */ + case GD_OP_REMOVE_BRICK: + snprintf(key, sizeof(key), "task%d", index); + ret = _add_remove_bricks_to_dict(dict, volinfo, key); +@@ -7550,6 +7552,7 @@ glusterd_op_ac_send_brick_op(glusterd_op_sm_event_t *event, void *ctx) + glusterd_op_t op = GD_OP_NONE; + glusterd_req_ctx_t *req_ctx = NULL; + char *op_errstr = NULL; ++ gf_boolean_t free_req_ctx = _gf_false; + + this = THIS; + priv = this->private; +@@ -7558,6 +7561,9 @@ glusterd_op_ac_send_brick_op(glusterd_op_sm_event_t *event, void *ctx) + req_ctx = ctx; + } else { + req_ctx = GF_CALLOC(1, sizeof(*req_ctx), gf_gld_mt_op_allack_ctx_t); ++ if (!req_ctx) ++ goto out; ++ free_req_ctx = _gf_true; + op = glusterd_op_get_op(); + req_ctx->op = op; + gf_uuid_copy(req_ctx->uuid, MY_UUID); +@@ -7588,6 +7594,8 @@ glusterd_op_ac_send_brick_op(glusterd_op_sm_event_t *event, void *ctx) + } + + out: ++ if (ret && req_ctx && free_req_ctx) ++ GF_FREE(req_ctx); + gf_msg_debug(this->name, 0, "Returning with %d", ret); + + return ret; +diff --git a/xlators/mgmt/glusterd/src/glusterd-peer-utils.c b/xlators/mgmt/glusterd/src/glusterd-peer-utils.c +index 8c1feeb..1a65359 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-peer-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-peer-utils.c +@@ -82,6 +82,7 @@ glusterd_peerinfo_cleanup(glusterd_peerinfo_t *peerinfo) + call_rcu(&peerinfo->rcu_head.head, glusterd_peerinfo_destroy); + + if (quorum_action) ++ /* coverity[SLEEP] */ + glusterd_do_quorum_action(); + return 0; + } +@@ -358,6 +359,7 @@ glusterd_uuid_to_hostname(uuid_t uuid) + + if (!gf_uuid_compare(MY_UUID, uuid)) { + hostname = gf_strdup("localhost"); ++ return hostname; + } + RCU_READ_LOCK; + if (!cds_list_empty(&priv->peers)) { +diff --git a/xlators/mgmt/glusterd/src/glusterd-server-quorum.c b/xlators/mgmt/glusterd/src/glusterd-server-quorum.c +index fd334e6..f378187 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-server-quorum.c ++++ b/xlators/mgmt/glusterd/src/glusterd-server-quorum.c +@@ -372,6 +372,7 @@ glusterd_do_volume_quorum_action(xlator_t *this, glusterd_volinfo_t *volinfo, + if (!brickinfo->start_triggered) { + pthread_mutex_lock(&brickinfo->restart_mutex); + { ++ /* coverity[SLEEP] */ + ret = glusterd_brick_start(volinfo, brickinfo, _gf_false, + _gf_false); + } +diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c +index b3b5ee9..4fa8116 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-store.c ++++ b/xlators/mgmt/glusterd/src/glusterd-store.c +@@ -4764,10 +4764,6 @@ glusterd_store_retrieve_peers(xlator_t *this) + */ + address = cds_list_entry(peerinfo->hostnames.next, + glusterd_peer_hostname_t, hostname_list); +- if (!address) { +- ret = -1; +- goto next; +- } + peerinfo->hostname = gf_strdup(address->hostname); + + ret = glusterd_friend_add_from_peerinfo(peerinfo, 1, NULL); +diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c +index ca19a75..1d1f42d 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c ++++ b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c +@@ -179,7 +179,7 @@ glusterd_svc_check_volfile_identical(char *svc_name, + goto out; + } + +- /* coverity[secure_temp] mkstemp uses 0600 as the mode and is safe */ ++ /* coverity[SECURE_TEMP] mkstemp uses 0600 as the mode and is safe */ + tmp_fd = mkstemp(tmpvol); + if (tmp_fd < 0) { + gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_FILE_OP_FAILED, +@@ -241,7 +241,7 @@ glusterd_svc_check_topology_identical(char *svc_name, + goto out; + } + +- /* coverity[secure_temp] mkstemp uses 0600 as the mode and is safe */ ++ /* coverity[SECURE_TEMP] mkstemp uses 0600 as the mode and is safe */ + tmpfd = mkstemp(tmpvol); + if (tmpfd < 0) { + gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_FILE_OP_FAILED, +diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c +index 618d8bc..9e47d14 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-syncop.c ++++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c +@@ -1752,6 +1752,7 @@ gd_brick_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, + if (dict_get(op_ctx, "client-count")) + break; + } ++ /* coverity[MIXED_ENUMS] */ + } else if (cmd == GF_OP_CMD_DETACH_START) { + op = GD_OP_REMOVE_BRICK; + dict_del(req_dict, "rebalance-command"); +diff --git a/xlators/mgmt/glusterd/src/glusterd-tierd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-tierd-svc-helper.c +index 922eae7..59843a0 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-tierd-svc-helper.c ++++ b/xlators/mgmt/glusterd/src/glusterd-tierd-svc-helper.c +@@ -116,7 +116,7 @@ glusterd_svc_check_tier_volfile_identical(char *svc_name, + goto out; + } + +- /* coverity[secure_temp] mkstemp uses 0600 as the mode and is safe */ ++ /* coverity[SECURE_TEMP] mkstemp uses 0600 as the mode and is safe */ + tmp_fd = mkstemp(tmpvol); + if (tmp_fd < 0) { + gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_FILE_OP_FAILED, +@@ -177,7 +177,7 @@ glusterd_svc_check_tier_topology_identical(char *svc_name, + goto out; + } + +- /* coverity[secure_temp] mkstemp uses 0600 as the mode and is safe */ ++ /* coverity[SECURE_TEMP] mkstemp uses 0600 as the mode and is safe */ + tmpfd = mkstemp(tmpvol); + if (tmpfd < 0) { + gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_FILE_OP_FAILED, +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index 14e23d1..8b0fc9a 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -908,6 +908,7 @@ glusterd_create_sub_tier_volinfo(glusterd_volinfo_t *volinfo, + (*dup_volinfo)->brick_count = tier_info->cold_brick_count; + } + out: ++ /* coverity[REVERSE_NULL] */ + if (ret && *dup_volinfo) { + glusterd_volinfo_delete(*dup_volinfo); + *dup_volinfo = NULL; +@@ -2738,6 +2739,7 @@ glusterd_readin_file(const char *filepath, int *line_count) + /* Reduce allocation to minimal size. */ + p = GF_REALLOC(lines, (counter + 1) * sizeof(char *)); + if (!p) { ++ /* coverity[TAINTED_SCALAR] */ + free_lines(lines, counter); + lines = NULL; + goto out; +@@ -6782,6 +6784,7 @@ glusterd_restart_bricks(void *opaque) + if (!brickinfo->start_triggered) { + pthread_mutex_lock(&brickinfo->restart_mutex); + { ++ /* coverity[SLEEP] */ + glusterd_brick_start(volinfo, brickinfo, _gf_false, + _gf_false); + } +@@ -8886,7 +8889,7 @@ glusterd_nfs_statedump(char *options, int option_cnt, char **op_errstr) + kill(pid, SIGUSR1); + + sleep(1); +- ++ /* coverity[TAINTED_STRING] */ + sys_unlink(dumpoptions_path); + ret = 0; + out: +@@ -9012,6 +9015,7 @@ glusterd_quotad_statedump(char *options, int option_cnt, char **op_errstr) + + sleep(1); + ++ /* coverity[TAINTED_STRING] */ + sys_unlink(dumpoptions_path); + ret = 0; + out: +@@ -13423,7 +13427,7 @@ glusterd_get_global_options_for_all_vols(rpcsvc_request_t *req, dict_t *ctx, + if (key_fixed) + key = key_fixed; + } +- ++ /* coverity[CONSTANT_EXPRESSION_RESULT] */ + ALL_VOLUME_OPTION_CHECK("all", _gf_true, key, ret, op_errstr, out); + + for (i = 0; valid_all_vol_opts[i].option; i++) { +@@ -14153,6 +14157,7 @@ glusterd_disallow_op_for_tier(glusterd_volinfo_t *volinfo, glusterd_op_t op, + break; + case GD_OP_REMOVE_BRICK: + switch (cmd) { ++ /* coverity[MIXED_ENUMS] */ + case GF_DEFRAG_CMD_DETACH_START: + case GF_OP_CMD_DETACH_COMMIT_FORCE: + case GF_OP_CMD_DETACH_COMMIT: +diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c +index 539e8a5..6852f8e 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c +@@ -322,7 +322,7 @@ volopt_trie_cbk(char *word, void *param) + } + + static int +-process_nodevec(struct trienodevec *nodevec, char **hint) ++process_nodevec(struct trienodevec *nodevec, char **outputhint, char *inputhint) + { + int ret = 0; + char *hint1 = NULL; +@@ -331,14 +331,14 @@ process_nodevec(struct trienodevec *nodevec, char **hint) + trienode_t **nodes = nodevec->nodes; + + if (!nodes[0]) { +- *hint = NULL; ++ *outputhint = NULL; + return 0; + } + + #if 0 + /* Limit as in git */ + if (trienode_get_dist (nodes[0]) >= 6) { +- *hint = NULL; ++ *outputhint = NULL; + return 0; + } + #endif +@@ -347,23 +347,30 @@ process_nodevec(struct trienodevec *nodevec, char **hint) + return -1; + + if (nodevec->cnt < 2 || !nodes[1]) { +- *hint = hint1; ++ *outputhint = hint1; + return 0; + } + +- if (trienode_get_word(nodes[1], &hint2)) ++ if (trienode_get_word(nodes[1], &hint2)) { ++ GF_FREE(hint1); + return -1; ++ } + +- if (*hint) +- hintinfx = *hint; +- ret = gf_asprintf(hint, "%s or %s%s", hint1, hintinfx, hint2); ++ if (inputhint) ++ hintinfx = inputhint; ++ ret = gf_asprintf(outputhint, "%s or %s%s", hint1, hintinfx, hint2); + if (ret > 0) + ret = 0; ++ if (hint1) ++ GF_FREE(hint1); ++ if (hint2) ++ GF_FREE(hint2); + return ret; + } + + static int +-volopt_trie_section(int lvl, char **patt, char *word, char **hint, int hints) ++volopt_trie_section(int lvl, char **patt, char *word, char **outputhint, ++ char *inputhint, int hints) + { + trienode_t *nodes[] = {NULL, NULL}; + struct trienodevec nodevec = {nodes, 2}; +@@ -384,7 +391,7 @@ volopt_trie_section(int lvl, char **patt, char *word, char **hint, int hints) + nodevec.cnt = hints; + ret = trie_measure_vec(trie, word, &nodevec); + if (!ret && nodevec.nodes[0]) +- ret = process_nodevec(&nodevec, hint); ++ ret = process_nodevec(&nodevec, outputhint, inputhint); + + trie_destroy(trie); + +@@ -396,6 +403,7 @@ volopt_trie(char *key, char **hint) + { + char *patt[] = {NULL}; + char *fullhint = NULL; ++ char *inputhint = NULL; + char *dot = NULL; + char *dom = NULL; + int len = 0; +@@ -405,7 +413,7 @@ volopt_trie(char *key, char **hint) + + dot = strchr(key, '.'); + if (!dot) +- return volopt_trie_section(1, patt, key, hint, 2); ++ return volopt_trie_section(1, patt, key, hint, inputhint, 2); + + len = dot - key; + dom = gf_strdup(key); +@@ -413,7 +421,7 @@ volopt_trie(char *key, char **hint) + return -1; + dom[len] = '\0'; + +- ret = volopt_trie_section(0, NULL, dom, patt, 1); ++ ret = volopt_trie_section(0, NULL, dom, patt, inputhint, 1); + GF_FREE(dom); + if (ret) { + patt[0] = NULL; +@@ -422,8 +430,8 @@ volopt_trie(char *key, char **hint) + if (!patt[0]) + goto out; + +- *hint = "..."; +- ret = volopt_trie_section(1, patt, dot + 1, hint, 2); ++ inputhint = "..."; ++ ret = volopt_trie_section(1, patt, dot + 1, hint, inputhint, 2); + if (ret) + goto out; + if (*hint) { +-- +1.8.3.1 + diff --git a/SOURCES/0433-socket-Resolve-ssl_ctx-leak-for-a-brick-while-only-m.patch b/SOURCES/0433-socket-Resolve-ssl_ctx-leak-for-a-brick-while-only-m.patch new file mode 100644 index 0000000..d980eb5 --- /dev/null +++ b/SOURCES/0433-socket-Resolve-ssl_ctx-leak-for-a-brick-while-only-m.patch @@ -0,0 +1,54 @@ +From 0e3871a57b7a621444dc5cfd49935a1e412f6436 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal <moagrawa@redhat.com> +Date: Mon, 8 Jun 2020 13:27:50 +0530 +Subject: [PATCH 433/449] socket: Resolve ssl_ctx leak for a brick while only + mgmt SSL is enabled + +Problem: While only mgmt SSL is enabled for a brick process use_ssl flag + is false for a brick process and socket api's cleanup ssl_ctx only + while use_ssl and ssl_ctx both are valid + +Solution: To avoid a leak check only ssl_ctx, if it is valid cleanup + ssl_ctx + +> Fixes: #1196 +> Change-Id: I2f4295478f4149dcb7d608ea78ee5104f28812c3 +> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +> (Cherry pick from commit 9873baee34afdf0c20f5fc98a7dbf2a9f07447e2) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/24366/) + +BUG: 1810924 +Change-Id: I2f4295478f4149dcb7d608ea78ee5104f28812c3 +Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202625 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + rpc/rpc-transport/socket/src/socket.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c +index 226b2e2..54cd5df 100644 +--- a/rpc/rpc-transport/socket/src/socket.c ++++ b/rpc/rpc-transport/socket/src/socket.c +@@ -1163,7 +1163,7 @@ __socket_reset(rpc_transport_t *this) + SSL_free(priv->ssl_ssl); + priv->ssl_ssl = NULL; + } +- if (priv->use_ssl && priv->ssl_ctx) { ++ if (priv->ssl_ctx) { + SSL_CTX_free(priv->ssl_ctx); + priv->ssl_ctx = NULL; + } +@@ -4685,7 +4685,7 @@ fini(rpc_transport_t *this) + SSL_free(priv->ssl_ssl); + priv->ssl_ssl = NULL; + } +- if (priv->use_ssl && priv->ssl_ctx) { ++ if (priv->ssl_ctx) { + SSL_CTX_free(priv->ssl_ctx); + priv->ssl_ctx = NULL; + } +-- +1.8.3.1 + diff --git a/SOURCES/0434-glusterd-ganesha-fix-Coverity-CID-1405785.patch b/SOURCES/0434-glusterd-ganesha-fix-Coverity-CID-1405785.patch new file mode 100644 index 0000000..3f038a3 --- /dev/null +++ b/SOURCES/0434-glusterd-ganesha-fix-Coverity-CID-1405785.patch @@ -0,0 +1,39 @@ +From 2ea3fc203671429d0aa9994e5bbd57f6a604523d Mon Sep 17 00:00:00 2001 +From: Xie Changlong <xiechanglong@cmss.chinamobile.com> +Date: Mon, 28 Oct 2019 17:43:28 +0800 +Subject: [PATCH 434/449] glusterd/ganesha: fix Coverity CID 1405785 + +To avoid double free + +> upstream patch link: https://review.gluster.org/#/c/glusterfs/+/23630/ +> updates: bz#789278 +> Change-Id: I15ae54ed696295d4cb015668722e77983b062ccb +> Signed-off-by: Xie Changlong <xiechanglong@cmss.chinamobile.com> + +BUG: 1787310 +Change-Id: I15ae54ed696295d4cb015668722e77983b062ccb +Signed-off-by: Sanju Rakonde <srakonde@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202623 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-ganesha.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-ganesha.c b/xlators/mgmt/glusterd/src/glusterd-ganesha.c +index 06f028f..caba34f 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-ganesha.c ++++ b/xlators/mgmt/glusterd/src/glusterd-ganesha.c +@@ -659,8 +659,8 @@ tear_down_cluster(gf_boolean_t run_teardown) + "Failed to close dir %s. Reason :" + " %s", + CONFDIR, strerror(errno)); +- goto exit; + } ++ goto exit; + } + + out: +-- +1.8.3.1 + diff --git a/SOURCES/0435-glusterd-coverity-fix.patch b/SOURCES/0435-glusterd-coverity-fix.patch new file mode 100644 index 0000000..f587107 --- /dev/null +++ b/SOURCES/0435-glusterd-coverity-fix.patch @@ -0,0 +1,38 @@ +From 9425fd5a49a17a8f91c13632ae055a6510b0b44c Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde <srakonde@redhat.com> +Date: Fri, 17 May 2019 14:27:58 +0530 +Subject: [PATCH 435/449] glusterd: coverity fix + +CID: 1401345 - Unused value + +> upstream patch link: https://review.gluster.org/#/c/glusterfs/+/22737/ +> updates: bz#789278 +> Change-Id: I6b8f2611151ce0174042384b7632019c312ebae3 +> Signed-off-by: Sanju Rakonde <srakonde@redhat.com> + +BUG: 1787310 +Change-Id: I6b8f2611151ce0174042384b7632019c312ebae3 +Signed-off-by: Sanju Rakonde <srakonde@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202622 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-utils.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index 8b0fc9a..2eb2a76 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -3740,7 +3740,7 @@ glusterd_compare_friend_volume(dict_t *peer_data, int32_t count, + * deleting a volume, so we shouldn't be trying to create a + * fresh volume here which would lead to a stale entry + */ +- if (stage_deleted == 0) ++ if (!ret && stage_deleted == 0) + *status = GLUSTERD_VOL_COMP_UPDATE_REQ; + ret = 0; + goto out; +-- +1.8.3.1 + diff --git a/SOURCES/0436-glusterd-coverity-fixes.patch b/SOURCES/0436-glusterd-coverity-fixes.patch new file mode 100644 index 0000000..799681f --- /dev/null +++ b/SOURCES/0436-glusterd-coverity-fixes.patch @@ -0,0 +1,187 @@ +From 179213798496448316547506da65dbd9fd741dfa Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee <amukherj@redhat.com> +Date: Wed, 24 Apr 2019 22:02:51 +0530 +Subject: [PATCH 436/449] glusterd: coverity fixes + +Addresses the following: + +* CID 1124776: Resource leaks (RESOURCE_LEAK) - Variable "aa" going out +of scope leaks the storage it points to in glusterd-volgen.c + +* Bunch of CHECKED_RETURN defects in the callers of synctask_barrier_init + +* CID 1400742: Program hangs (LOCK) - adding annotation to fix this +false positive + +> upstream patch link: https://review.gluster.org/#/c/glusterfs/+/22615 +> Updates: bz#789278 +> Change-Id: I02f16e7eeb8c5cf72f7d0b29d00df4f03b3718b3 +> Signed-off-by: Atin Mukherjee <amukherj@redhat.com> + +BUG: 1787310 +Change-Id: I02f16e7eeb8c5cf72f7d0b29d00df4f03b3718b3 +Signed-off-by: Sanju Rakonde <srakonde@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202626 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-handler.c | 6 ++++++ + xlators/mgmt/glusterd/src/glusterd-mgmt.c | 24 +++++++++++++++++++----- + xlators/mgmt/glusterd/src/glusterd-syncop.c | 22 ++++++++++++++++++---- + xlators/mgmt/glusterd/src/glusterd-volgen.c | 5 +++-- + 4 files changed, 46 insertions(+), 11 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c +index 1f31e72..b8799ab 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-handler.c ++++ b/xlators/mgmt/glusterd/src/glusterd-handler.c +@@ -3458,6 +3458,12 @@ glusterd_friend_remove(uuid_t uuid, char *hostname) + ret = glusterd_peerinfo_cleanup(peerinfo); + out: + gf_msg_debug(THIS->name, 0, "returning %d", ret); ++ /* We don't need to do a mutex unlock of peerinfo->delete_lock as the same ++ * will be anyway destroyed within glusterd_peerinfo_cleanup, coverity ++ * though cries about it ++ */ ++ /* coverity[LOCK] */ ++ + return ret; + } + +diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-mgmt.c +index a4915f3..1e185d7 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-mgmt.c ++++ b/xlators/mgmt/glusterd/src/glusterd-mgmt.c +@@ -757,7 +757,10 @@ glusterd_mgmt_v3_initiate_lockdown(glusterd_op_t op, dict_t *dict, + + /* Sending mgmt_v3 lock req to other nodes in the cluster */ + gd_syncargs_init(&args, NULL); +- synctask_barrier_init((&args)); ++ ret = synctask_barrier_init((&args)); ++ if (ret) ++ goto out; ++ + peer_cnt = 0; + + RCU_READ_LOCK; +@@ -1108,7 +1111,10 @@ glusterd_mgmt_v3_pre_validate(glusterd_op_t op, dict_t *req_dict, + + /* Sending Pre Validation req to other nodes in the cluster */ + gd_syncargs_init(&args, req_dict); +- synctask_barrier_init((&args)); ++ ret = synctask_barrier_init((&args)); ++ if (ret) ++ goto out; ++ + peer_cnt = 0; + + RCU_READ_LOCK; +@@ -1458,7 +1464,10 @@ glusterd_mgmt_v3_brick_op(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, + + /* Sending brick op req to other nodes in the cluster */ + gd_syncargs_init(&args, op_ctx); +- synctask_barrier_init((&args)); ++ ret = synctask_barrier_init((&args)); ++ if (ret) ++ goto out; ++ + peer_cnt = 0; + + RCU_READ_LOCK; +@@ -1722,7 +1731,9 @@ glusterd_mgmt_v3_commit(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, + + /* Sending commit req to other nodes in the cluster */ + gd_syncargs_init(&args, op_ctx); +- synctask_barrier_init((&args)); ++ ret = synctask_barrier_init((&args)); ++ if (ret) ++ goto out; + peer_cnt = 0; + + RCU_READ_LOCK; +@@ -1963,7 +1974,10 @@ glusterd_mgmt_v3_post_validate(glusterd_op_t op, int32_t op_ret, dict_t *dict, + + /* Sending Post Validation req to other nodes in the cluster */ + gd_syncargs_init(&args, req_dict); +- synctask_barrier_init((&args)); ++ ret = synctask_barrier_init((&args)); ++ if (ret) ++ goto out; ++ + peer_cnt = 0; + + RCU_READ_LOCK; +diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c +index 9e47d14..c78983a 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-syncop.c ++++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c +@@ -1191,7 +1191,12 @@ gd_lock_op_phase(glusterd_conf_t *conf, glusterd_op_t op, dict_t *op_ctx, + struct syncargs args = {0}; + + this = THIS; +- synctask_barrier_init((&args)); ++ GF_VALIDATE_OR_GOTO("glusterd", this, out); ++ ++ ret = synctask_barrier_init((&args)); ++ if (ret) ++ goto out; ++ + peer_cnt = 0; + + RCU_READ_LOCK; +@@ -1321,7 +1326,10 @@ stage_done: + } + + gd_syncargs_init(&args, aggr_dict); +- synctask_barrier_init((&args)); ++ ret = synctask_barrier_init((&args)); ++ if (ret) ++ goto out; ++ + peer_cnt = 0; + + RCU_READ_LOCK; +@@ -1449,7 +1457,10 @@ commit_done: + } + + gd_syncargs_init(&args, op_ctx); +- synctask_barrier_init((&args)); ++ ret = synctask_barrier_init((&args)); ++ if (ret) ++ goto out; ++ + peer_cnt = 0; + origin_glusterd = is_origin_glusterd(req_dict); + +@@ -1541,7 +1552,10 @@ gd_unlock_op_phase(glusterd_conf_t *conf, glusterd_op_t op, int *op_ret, + goto out; + } + +- synctask_barrier_init((&args)); ++ ret = synctask_barrier_init((&args)); ++ if (ret) ++ goto out; ++ + peer_cnt = 0; + + if (cluster_lock) { +diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c +index 6852f8e..16346e7 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c +@@ -4808,9 +4808,10 @@ nfs_option_handler(volgen_graph_t *graph, struct volopt_map_entry *vme, + + if (ret != -1) { + ret = gf_canonicalize_path(vme->value); +- if (ret) ++ if (ret) { ++ GF_FREE(aa); + return -1; +- ++ } + ret = xlator_set_option(xl, aa, ret, vme->value); + GF_FREE(aa); + } +-- +1.8.3.1 + diff --git a/SOURCES/0437-glusterd-prevent-use-after-free-in-glusterd_op_ac_se.patch b/SOURCES/0437-glusterd-prevent-use-after-free-in-glusterd_op_ac_se.patch new file mode 100644 index 0000000..21fcd8a --- /dev/null +++ b/SOURCES/0437-glusterd-prevent-use-after-free-in-glusterd_op_ac_se.patch @@ -0,0 +1,48 @@ +From ffd428d07036531b7ed98c7393b87490aaa223ec Mon Sep 17 00:00:00 2001 +From: Niels de Vos <ndevos@redhat.com> +Date: Fri, 3 May 2019 09:18:31 +0200 +Subject: [PATCH 437/449] glusterd: prevent use-after-free in + glusterd_op_ac_send_brick_op() + +Coverity reported that GF_FREE(req_ctx) could be called 2x on req_ctx. + +> upstream patch link: https://review.gluster.org/#/c/glusterfs/+/22656/ +> Change-Id: I9120686e5920de8c27688e10de0db6aa26292064 +> CID: 1401115 +> Updates: bz#789278 +> Signed-off-by: Niels de Vos <ndevos@redhat.com> + +BUG: 1787310 +Change-Id: I9120686e5920de8c27688e10de0db6aa26292064 +Signed-off-by: Sanju Rakonde <srakonde@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202619 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-op-sm.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +index 46fc607..1e84f5f 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c ++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +@@ -7575,7 +7575,6 @@ glusterd_op_ac_send_brick_op(glusterd_op_sm_event_t *event, void *ctx) + if (op_errstr == NULL) + gf_asprintf(&op_errstr, OPERRSTR_BUILD_PAYLOAD); + opinfo.op_errstr = op_errstr; +- GF_FREE(req_ctx); + goto out; + } + } +@@ -7594,7 +7593,7 @@ glusterd_op_ac_send_brick_op(glusterd_op_sm_event_t *event, void *ctx) + } + + out: +- if (ret && req_ctx && free_req_ctx) ++ if (ret && free_req_ctx) + GF_FREE(req_ctx); + gf_msg_debug(this->name, 0, "Returning with %d", ret); + +-- +1.8.3.1 + diff --git a/SOURCES/0438-dht-sparse-files-rebalance-enhancements.patch b/SOURCES/0438-dht-sparse-files-rebalance-enhancements.patch new file mode 100644 index 0000000..6e10ce6 --- /dev/null +++ b/SOURCES/0438-dht-sparse-files-rebalance-enhancements.patch @@ -0,0 +1,324 @@ +From 7b2f1bd4e5a57ea3abd5f14a7d81b120735faecd Mon Sep 17 00:00:00 2001 +From: Barak Sason Rofman <bsasonro@redhat.com> +Date: Wed, 6 May 2020 13:28:40 +0300 +Subject: [PATCH 438/449] dht - sparse files rebalance enhancements + +Currently data migration in rebalance reads sparse file sequentially, +disregarding which segments are holes and which are data. This can lead +to extremely long migration time for large sparse file. +Data migration mechanism needs to be enhanced so only data segments are +read and migrated. This can be achieved using lseek to seek for holes +and data in the file. +This enhancement is a consequence of +https://bugzilla.redhat.com/show_bug.cgi?id=1823703 + +> fixes: #1222 +> Change-Id: If5f448a0c532926464e1f34f504c5c94749b08c3 +> Signed-off-by: Barak Sason Rofman <bsasonro@redhat.com> +> (Cherry pick from commit 7b7559733ca0c25c63f9d56cb7f4650dbd694c40) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/24409/) + +BUG: 1836099 +Change-Id: If5f448a0c532926464e1f34f504c5c94749b08c3 +Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202647 +Reviewed-by: Barak Sason Rofman <bsasonro@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/basic/distribute/spare_file_rebalance.t | 51 ++++++++ + xlators/cluster/dht/src/dht-rebalance.c | 172 ++++++++++++-------------- + 2 files changed, 130 insertions(+), 93 deletions(-) + create mode 100644 tests/basic/distribute/spare_file_rebalance.t + +diff --git a/tests/basic/distribute/spare_file_rebalance.t b/tests/basic/distribute/spare_file_rebalance.t +new file mode 100644 +index 0000000..061c02f +--- /dev/null ++++ b/tests/basic/distribute/spare_file_rebalance.t +@@ -0,0 +1,51 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../dht.rc ++ ++# Initialize ++#------------------------------------------------------------ ++cleanup; ++ ++# Start glusterd ++TEST glusterd; ++TEST pidof glusterd; ++TEST $CLI volume info; ++ ++# Create a volume ++TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2}; ++ ++# Verify volume creation ++EXPECT "$V0" volinfo_field $V0 'Volume Name'; ++EXPECT 'Created' volinfo_field $V0 'Status'; ++ ++# Start volume and verify successful start ++TEST $CLI volume start $V0; ++EXPECT 'Started' volinfo_field $V0 'Status'; ++TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0; ++ ++#------------------------------------------------------------ ++ ++# Test case - Create sparse files on MP and verify ++# file info after rebalance ++#------------------------------------------------------------ ++ ++# Create some sparse files and get their size ++TEST cd $M0; ++dd if=/dev/urandom of=sparse_file bs=10k count=1 seek=2M ++cp --sparse=always sparse_file sparse_file_3; ++ ++# Add a 3rd brick ++TEST $CLI volume add-brick $V0 $H0:$B0/${V0}3; ++ ++# Trigger rebalance ++TEST $CLI volume rebalance $V0 start force; ++EXPECT_WITHIN $REBALANCE_TIMEOUT "0" rebalance_completed; ++ ++# Compare original and rebalanced files ++TEST cd $B0/${V0}2 ++TEST cmp sparse_file $B0/${V0}3/sparse_file_3 ++EXPECT_WITHIN 30 ""; ++ ++cleanup; +diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c +index 88b6b54..d0c21b4 100644 +--- a/xlators/cluster/dht/src/dht-rebalance.c ++++ b/xlators/cluster/dht/src/dht-rebalance.c +@@ -18,8 +18,8 @@ + #include <glusterfs/events.h> + + #define GF_DISK_SECTOR_SIZE 512 +-#define DHT_REBALANCE_PID 4242 /* Change it if required */ +-#define DHT_REBALANCE_BLKSIZE (1024 * 1024) /* 1 MB */ ++#define DHT_REBALANCE_PID 4242 /* Change it if required */ ++#define DHT_REBALANCE_BLKSIZE 1048576 /* 1 MB */ + #define MAX_MIGRATE_QUEUE_COUNT 500 + #define MIN_MIGRATE_QUEUE_COUNT 200 + #define MAX_REBAL_TYPE_SIZE 16 +@@ -178,75 +178,6 @@ dht_strip_out_acls(dict_t *dict) + } + } + +-static int +-dht_write_with_holes(xlator_t *to, fd_t *fd, struct iovec *vec, int count, +- int32_t size, off_t offset, struct iobref *iobref, +- int *fop_errno) +-{ +- int i = 0; +- int ret = -1; +- int start_idx = 0; +- int tmp_offset = 0; +- int write_needed = 0; +- int buf_len = 0; +- int size_pending = 0; +- char *buf = NULL; +- +- /* loop through each vector */ +- for (i = 0; i < count; i++) { +- buf = vec[i].iov_base; +- buf_len = vec[i].iov_len; +- +- for (start_idx = 0; (start_idx + GF_DISK_SECTOR_SIZE) <= buf_len; +- start_idx += GF_DISK_SECTOR_SIZE) { +- if (mem_0filled(buf + start_idx, GF_DISK_SECTOR_SIZE) != 0) { +- write_needed = 1; +- continue; +- } +- +- if (write_needed) { +- ret = syncop_write( +- to, fd, (buf + tmp_offset), (start_idx - tmp_offset), +- (offset + tmp_offset), iobref, 0, NULL, NULL); +- /* 'path' will be logged in calling function */ +- if (ret < 0) { +- gf_log(THIS->name, GF_LOG_WARNING, "failed to write (%s)", +- strerror(-ret)); +- *fop_errno = -ret; +- ret = -1; +- goto out; +- } +- +- write_needed = 0; +- } +- tmp_offset = start_idx + GF_DISK_SECTOR_SIZE; +- } +- +- if ((start_idx < buf_len) || write_needed) { +- /* This means, last chunk is not yet written.. write it */ +- ret = syncop_write(to, fd, (buf + tmp_offset), +- (buf_len - tmp_offset), (offset + tmp_offset), +- iobref, 0, NULL, NULL); +- if (ret < 0) { +- /* 'path' will be logged in calling function */ +- gf_log(THIS->name, GF_LOG_WARNING, "failed to write (%s)", +- strerror(-ret)); +- *fop_errno = -ret; +- ret = -1; +- goto out; +- } +- } +- +- size_pending = (size - buf_len); +- if (!size_pending) +- break; +- } +- +- ret = size; +-out: +- return ret; +-} +- + /* + return values: + -1 : failure +@@ -1101,32 +1032,97 @@ __dht_rebalance_migrate_data(xlator_t *this, gf_defrag_info_t *defrag, + int ret = 0; + int count = 0; + off_t offset = 0; ++ off_t data_offset = 0; ++ off_t hole_offset = 0; + struct iovec *vector = NULL; + struct iobref *iobref = NULL; + uint64_t total = 0; + size_t read_size = 0; ++ size_t data_block_size = 0; + dict_t *xdata = NULL; + dht_conf_t *conf = NULL; + + conf = this->private; ++ + /* if file size is '0', no need to enter this loop */ + while (total < ia_size) { +- read_size = (((ia_size - total) > DHT_REBALANCE_BLKSIZE) +- ? DHT_REBALANCE_BLKSIZE +- : (ia_size - total)); ++ /* This is a regular file - read it sequentially */ ++ if (!hole_exists) { ++ read_size = (((ia_size - total) > DHT_REBALANCE_BLKSIZE) ++ ? DHT_REBALANCE_BLKSIZE ++ : (ia_size - total)); ++ } else { ++ /* This is a sparse file - read only the data segments in the file ++ */ ++ ++ /* If the previous data block is fully copied, find the next data ++ * segment ++ * starting at the offset of the last read and written byte, */ ++ if (data_block_size <= 0) { ++ ret = syncop_seek(from, src, offset, GF_SEEK_DATA, NULL, ++ &data_offset); ++ if (ret) { ++ if (ret == -ENXIO) ++ ret = 0; /* No more data segments */ ++ else ++ *fop_errno = -ret; /* Error occurred */ ++ ++ break; ++ } ++ ++ /* If the position of the current data segment is greater than ++ * the position of the next hole, find the next hole in order to ++ * calculate the length of the new data segment */ ++ if (data_offset > hole_offset) { ++ /* Starting at the offset of the last data segment, find the ++ * next hole */ ++ ret = syncop_seek(from, src, data_offset, GF_SEEK_HOLE, ++ NULL, &hole_offset); ++ if (ret) { ++ /* If an error occurred here it's a real error because ++ * if the seek for a data segment was successful then ++ * necessarily another hole must exist (EOF is a hole) ++ */ ++ *fop_errno = -ret; ++ break; ++ } ++ ++ /* Calculate the total size of the current data block */ ++ data_block_size = hole_offset - data_offset; ++ } ++ } else { ++ /* There is still data in the current segment, move the ++ * data_offset to the position of the last written byte */ ++ data_offset = offset; ++ } ++ ++ /* Calculate how much data needs to be read and written. If the data ++ * segment's length is bigger than DHT_REBALANCE_BLKSIZE, read and ++ * write DHT_REBALANCE_BLKSIZE data length and the rest in the ++ * next iteration(s) */ ++ read_size = ((data_block_size > DHT_REBALANCE_BLKSIZE) ++ ? DHT_REBALANCE_BLKSIZE ++ : data_block_size); ++ ++ /* Calculate the remaining size of the data block - maybe there's no ++ * need to seek for data in the next iteration */ ++ data_block_size -= read_size; ++ ++ /* Set offset to the offset of the data segment so read and write ++ * will have the correct position */ ++ offset = data_offset; ++ } + + ret = syncop_readv(from, src, read_size, offset, 0, &vector, &count, + &iobref, NULL, NULL, NULL); ++ + if (!ret || (ret < 0)) { + *fop_errno = -ret; + break; + } + +- if (hole_exists) { +- ret = dht_write_with_holes(to, dst, vector, count, ret, offset, +- iobref, fop_errno); +- } else { +- if (!conf->force_migration && !dht_is_tier_xlator(this)) { ++ if (!conf->force_migration && !dht_is_tier_xlator(this)) { ++ if (!xdata) { + xdata = dict_new(); + if (!xdata) { + gf_msg("dht", GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED, +@@ -1146,7 +1142,7 @@ __dht_rebalance_migrate_data(xlator_t *this, gf_defrag_info_t *defrag, + * https://github.com/gluster/glusterfs/issues/308 + * for more details. + */ +- ret = dict_set_int32(xdata, GF_AVOID_OVERWRITE, 1); ++ ret = dict_set_int32_sizen(xdata, GF_AVOID_OVERWRITE, 1); + if (ret) { + gf_msg("dht", GF_LOG_ERROR, 0, ENOMEM, + "failed to set dict"); +@@ -1155,22 +1151,12 @@ __dht_rebalance_migrate_data(xlator_t *this, gf_defrag_info_t *defrag, + break; + } + } +- +- ret = syncop_writev(to, dst, vector, count, offset, iobref, 0, NULL, +- NULL, xdata, NULL); +- if (ret < 0) { +- *fop_errno = -ret; +- } +- } +- +- if ((defrag && defrag->cmd == GF_DEFRAG_CMD_START_TIER) && +- (gf_defrag_get_pause_state(&defrag->tier_conf) != TIER_RUNNING)) { +- gf_msg("tier", GF_LOG_INFO, 0, DHT_MSG_TIER_PAUSED, +- "Migrate file paused"); +- ret = -1; + } + ++ ret = syncop_writev(to, dst, vector, count, offset, iobref, 0, NULL, ++ NULL, xdata, NULL); + if (ret < 0) { ++ *fop_errno = -ret; + break; + } + +-- +1.8.3.1 + diff --git a/SOURCES/0439-cluster-afr-Delay-post-op-for-fsync.patch b/SOURCES/0439-cluster-afr-Delay-post-op-for-fsync.patch new file mode 100644 index 0000000..dc1593b --- /dev/null +++ b/SOURCES/0439-cluster-afr-Delay-post-op-for-fsync.patch @@ -0,0 +1,438 @@ +From 3ed98fc9dcb39223032e343fd5b0ad17fa3cae14 Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K <pkarampu@redhat.com> +Date: Fri, 29 May 2020 14:24:53 +0530 +Subject: [PATCH 439/449] cluster/afr: Delay post-op for fsync + +Problem: +AFR doesn't delay post-op for fsync fop. For fsync heavy workloads +this leads to un-necessary fxattrop/finodelk for every fsync leading +to bad performance. + +Fix: +Have delayed post-op for fsync. Add special flag in xdata to indicate +that afr shouldn't delay post-op in cases where either the +process will terminate or graph-switch would happen. Otherwise it leads +to un-necessary heals when the graph-switch/process-termination +happens before delayed-post-op completes. + +> Upstream-patch: https://review.gluster.org/c/glusterfs/+/24473 +> Fixes: #1253 + +BUG: 1838479 +Change-Id: I531940d13269a111c49e0510d49514dc169f4577 +Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202676 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + api/src/glfs-resolve.c | 14 ++- + tests/basic/afr/durability-off.t | 2 + + tests/basic/gfapi/gfapi-graph-switch-open-fd.t | 44 +++++++++ + tests/basic/gfapi/gfapi-keep-writing.c | 129 +++++++++++++++++++++++++ + xlators/cluster/afr/src/afr-inode-write.c | 11 ++- + xlators/cluster/afr/src/afr-transaction.c | 9 +- + xlators/cluster/afr/src/afr.h | 2 +- + xlators/cluster/dht/src/dht-rebalance.c | 15 ++- + xlators/mount/fuse/src/fuse-bridge.c | 23 ++++- + 9 files changed, 239 insertions(+), 10 deletions(-) + create mode 100644 tests/basic/gfapi/gfapi-graph-switch-open-fd.t + create mode 100644 tests/basic/gfapi/gfapi-keep-writing.c + +diff --git a/api/src/glfs-resolve.c b/api/src/glfs-resolve.c +index a79f490..062b7dc 100644 +--- a/api/src/glfs-resolve.c ++++ b/api/src/glfs-resolve.c +@@ -722,6 +722,7 @@ glfs_migrate_fd_safe(struct glfs *fs, xlator_t *newsubvol, fd_t *oldfd) + 0, + }; + char uuid1[64]; ++ dict_t *xdata = NULL; + + oldinode = oldfd->inode; + oldsubvol = oldinode->table->xl; +@@ -730,7 +731,15 @@ glfs_migrate_fd_safe(struct glfs *fs, xlator_t *newsubvol, fd_t *oldfd) + return fd_ref(oldfd); + + if (!oldsubvol->switched) { +- ret = syncop_fsync(oldsubvol, oldfd, 0, NULL, NULL, NULL, NULL); ++ xdata = dict_new(); ++ if (!xdata || dict_set_int8(xdata, "last-fsync", 1)) { ++ gf_msg(fs->volname, GF_LOG_WARNING, ENOMEM, API_MSG_FSYNC_FAILED, ++ "last-fsync set failed on %s graph %s (%d)", ++ uuid_utoa_r(oldfd->inode->gfid, uuid1), ++ graphid_str(oldsubvol), oldsubvol->graph->id); ++ } ++ ++ ret = syncop_fsync(oldsubvol, oldfd, 0, NULL, NULL, xdata, NULL); + DECODE_SYNCOP_ERR(ret); + if (ret) { + gf_msg(fs->volname, GF_LOG_WARNING, errno, API_MSG_FSYNC_FAILED, +@@ -809,6 +818,9 @@ out: + newfd = NULL; + } + ++ if (xdata) ++ dict_unref(xdata); ++ + return newfd; + } + +diff --git a/tests/basic/afr/durability-off.t b/tests/basic/afr/durability-off.t +index 155ffa0..6e0f18b 100644 +--- a/tests/basic/afr/durability-off.t ++++ b/tests/basic/afr/durability-off.t +@@ -26,6 +26,8 @@ TEST $CLI volume heal $V0 + EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0 + EXPECT "^0$" echo $($CLI volume profile $V0 info | grep -w FSYNC | wc -l) + ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 + #Test that fsyncs happen when durability is on + TEST $CLI volume set $V0 cluster.ensure-durability on + TEST $CLI volume set $V0 performance.strict-write-ordering on +diff --git a/tests/basic/gfapi/gfapi-graph-switch-open-fd.t b/tests/basic/gfapi/gfapi-graph-switch-open-fd.t +new file mode 100644 +index 0000000..2e666be +--- /dev/null ++++ b/tests/basic/gfapi/gfapi-graph-switch-open-fd.t +@@ -0,0 +1,44 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++ ++cleanup; ++ ++TEST glusterd ++ ++TEST $CLI volume create $V0 replica 3 ${H0}:$B0/brick{0..2}; ++EXPECT 'Created' volinfo_field $V0 'Status'; ++ ++TEST $CLI volume start $V0; ++EXPECT 'Started' volinfo_field $V0 'Status'; ++ ++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; ++TEST touch $M0/sync ++logdir=`gluster --print-logdir` ++ ++TEST build_tester $(dirname $0)/gfapi-keep-writing.c -lgfapi ++ ++ ++#Launch a program to keep doing writes on an fd ++./$(dirname $0)/gfapi-keep-writing ${H0} $V0 $logdir/gfapi-async-calls-test.log sync & ++p=$! ++sleep 1 #Let some writes go through ++#Check if graph switch will lead to any pending markers for ever ++TEST $CLI volume set $V0 performance.quick-read off ++TEST $CLI volume set $V0 performance.io-cache off ++TEST $CLI volume set $V0 performance.stat-prefetch off ++TEST $CLI volume set $V0 performance.read-ahead off ++ ++ ++TEST rm -f $M0/sync #Make sure the glfd is closed ++TEST wait #Wait for background process to die ++#Goal is to check if there is permanent FOOL changelog ++sleep 5 ++EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/brick0/glfs_test.txt trusted.afr.dirty ++EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/brick1/glfs_test.txt trusted.afr.dirty ++EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/brick2/glfs_test.txt trusted.afr.dirty ++ ++cleanup_tester $(dirname $0)/gfapi-async-calls-test ++ ++cleanup; +diff --git a/tests/basic/gfapi/gfapi-keep-writing.c b/tests/basic/gfapi/gfapi-keep-writing.c +new file mode 100644 +index 0000000..91b59ce +--- /dev/null ++++ b/tests/basic/gfapi/gfapi-keep-writing.c +@@ -0,0 +1,129 @@ ++#include <fcntl.h> ++#include <unistd.h> ++#include <time.h> ++#include <limits.h> ++#include <string.h> ++#include <stdio.h> ++#include <stdlib.h> ++#include <errno.h> ++#include <glusterfs/api/glfs.h> ++#include <glusterfs/api/glfs-handles.h> ++ ++#define LOG_ERR(msg) \ ++ do { \ ++ fprintf(stderr, "%s : Error (%s)\n", msg, strerror(errno)); \ ++ } while (0) ++ ++glfs_t * ++init_glfs(const char *hostname, const char *volname, const char *logfile) ++{ ++ int ret = -1; ++ glfs_t *fs = NULL; ++ ++ fs = glfs_new(volname); ++ if (!fs) { ++ LOG_ERR("glfs_new failed"); ++ return NULL; ++ } ++ ++ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007); ++ if (ret < 0) { ++ LOG_ERR("glfs_set_volfile_server failed"); ++ goto out; ++ } ++ ++ ret = glfs_set_logging(fs, logfile, 7); ++ if (ret < 0) { ++ LOG_ERR("glfs_set_logging failed"); ++ goto out; ++ } ++ ++ ret = glfs_init(fs); ++ if (ret < 0) { ++ LOG_ERR("glfs_init failed"); ++ goto out; ++ } ++ ++ ret = 0; ++out: ++ if (ret) { ++ glfs_fini(fs); ++ fs = NULL; ++ } ++ ++ return fs; ++} ++ ++int ++glfs_test_function(const char *hostname, const char *volname, ++ const char *logfile, const char *syncfile) ++{ ++ int ret = -1; ++ int flags = O_CREAT | O_RDWR; ++ glfs_t *fs = NULL; ++ glfs_fd_t *glfd = NULL; ++ const char *buff = "This is from my prog\n"; ++ const char *filename = "glfs_test.txt"; ++ struct stat buf = {0}; ++ ++ fs = init_glfs(hostname, volname, logfile); ++ if (fs == NULL) { ++ LOG_ERR("init_glfs failed"); ++ return -1; ++ } ++ ++ glfd = glfs_creat(fs, filename, flags, 0644); ++ if (glfd == NULL) { ++ LOG_ERR("glfs_creat failed"); ++ goto out; ++ } ++ ++ while (glfs_stat(fs, syncfile, &buf) == 0) { ++ ret = glfs_write(glfd, buff, strlen(buff), flags); ++ if (ret < 0) { ++ LOG_ERR("glfs_write failed"); ++ goto out; ++ } ++ } ++ ++ ret = glfs_close(glfd); ++ if (ret < 0) { ++ LOG_ERR("glfs_write failed"); ++ goto out; ++ } ++ ++out: ++ ret = glfs_fini(fs); ++ if (ret) { ++ LOG_ERR("glfs_fini failed"); ++ } ++ ++ return ret; ++} ++ ++int ++main(int argc, char *argv[]) ++{ ++ int ret = 0; ++ char *hostname = NULL; ++ char *volname = NULL; ++ char *logfile = NULL; ++ char *syncfile = NULL; ++ ++ if (argc != 5) { ++ fprintf(stderr, "Invalid argument\n"); ++ exit(1); ++ } ++ ++ hostname = argv[1]; ++ volname = argv[2]; ++ logfile = argv[3]; ++ syncfile = argv[4]; ++ ++ ret = glfs_test_function(hostname, volname, logfile, syncfile); ++ if (ret) { ++ LOG_ERR("glfs_test_function failed"); ++ } ++ ++ return ret; ++} +diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c +index 7fcc9d4..df82b6e 100644 +--- a/xlators/cluster/afr/src/afr-inode-write.c ++++ b/xlators/cluster/afr/src/afr-inode-write.c +@@ -2492,6 +2492,7 @@ afr_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, + call_frame_t *transaction_frame = NULL; + int ret = -1; + int32_t op_errno = ENOMEM; ++ int8_t last_fsync = 0; + + transaction_frame = copy_frame(frame); + if (!transaction_frame) +@@ -2501,10 +2502,16 @@ afr_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, + if (!local) + goto out; + +- if (xdata) ++ if (xdata) { + local->xdata_req = dict_copy_with_ref(xdata, NULL); +- else ++ if (dict_get_int8(xdata, "last-fsync", &last_fsync) == 0) { ++ if (last_fsync) { ++ local->transaction.disable_delayed_post_op = _gf_true; ++ } ++ } ++ } else { + local->xdata_req = dict_new(); ++ } + + if (!local->xdata_req) + goto out; +diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c +index 8e65ae2..ffd0ab8 100644 +--- a/xlators/cluster/afr/src/afr-transaction.c ++++ b/xlators/cluster/afr/src/afr-transaction.c +@@ -2385,8 +2385,13 @@ afr_is_delayed_changelog_post_op_needed(call_frame_t *frame, xlator_t *this, + goto out; + } + +- if ((local->op != GF_FOP_WRITE) && (local->op != GF_FOP_FXATTROP)) { +- /*Only allow writes but shard does [f]xattrops on writes, so ++ if (local->transaction.disable_delayed_post_op) { ++ goto out; ++ } ++ ++ if ((local->op != GF_FOP_WRITE) && (local->op != GF_FOP_FXATTROP) && ++ (local->op != GF_FOP_FSYNC)) { ++ /*Only allow writes/fsyncs but shard does [f]xattrops on writes, so + * they are fine too*/ + goto out; + } +diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h +index 18f1a6a..ff96246 100644 +--- a/xlators/cluster/afr/src/afr.h ++++ b/xlators/cluster/afr/src/afr.h +@@ -854,7 +854,7 @@ typedef struct _afr_local { + + int (*unwind)(call_frame_t *frame, xlator_t *this); + +- /* post-op hook */ ++ gf_boolean_t disable_delayed_post_op; + } transaction; + + syncbarrier_t barrier; +diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c +index d0c21b4..e9974cd 100644 +--- a/xlators/cluster/dht/src/dht-rebalance.c ++++ b/xlators/cluster/dht/src/dht-rebalance.c +@@ -1550,6 +1550,7 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, + xlator_t *old_target = NULL; + xlator_t *hashed_subvol = NULL; + fd_t *linkto_fd = NULL; ++ dict_t *xdata = NULL; + + if (from == to) { + gf_msg_debug(this->name, 0, +@@ -1868,7 +1869,15 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, + + /* TODO: Sync the locks */ + +- ret = syncop_fsync(to, dst_fd, 0, NULL, NULL, NULL, NULL); ++ xdata = dict_new(); ++ if (!xdata || dict_set_int8(xdata, "last-fsync", 1)) { ++ gf_log(this->name, GF_LOG_ERROR, ++ "%s: failed to set last-fsync flag on " ++ "%s (%s)", ++ loc->path, to->name, strerror(ENOMEM)); ++ } ++ ++ ret = syncop_fsync(to, dst_fd, 0, NULL, NULL, xdata, NULL); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, "%s: failed to fsync on %s (%s)", + loc->path, to->name, strerror(-ret)); +@@ -2342,11 +2351,15 @@ out: + + if (dst_fd) + syncop_close(dst_fd); ++ + if (src_fd) + syncop_close(src_fd); + if (linkto_fd) + syncop_close(linkto_fd); + ++ if (xdata) ++ dict_unref(xdata); ++ + loc_wipe(&tmp_loc); + loc_wipe(&parent_loc); + +diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c +index fdeec49..4264fad 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.c ++++ b/xlators/mount/fuse/src/fuse-bridge.c +@@ -5559,6 +5559,7 @@ fuse_migrate_fd(xlator_t *this, fd_t *basefd, xlator_t *old_subvol, + char create_in_progress = 0; + fuse_fd_ctx_t *basefd_ctx = NULL; + fd_t *oldfd = NULL; ++ dict_t *xdata = NULL; + + basefd_ctx = fuse_fd_ctx_get(this, basefd); + GF_VALIDATE_OR_GOTO("glusterfs-fuse", basefd_ctx, out); +@@ -5595,10 +5596,23 @@ fuse_migrate_fd(xlator_t *this, fd_t *basefd, xlator_t *old_subvol, + } + + if (oldfd->inode->table->xl == old_subvol) { +- if (IA_ISDIR(oldfd->inode->ia_type)) ++ if (IA_ISDIR(oldfd->inode->ia_type)) { + ret = syncop_fsyncdir(old_subvol, oldfd, 0, NULL, NULL); +- else +- ret = syncop_fsync(old_subvol, oldfd, 0, NULL, NULL, NULL, NULL); ++ } else { ++ xdata = dict_new(); ++ if (!xdata || dict_set_int8(xdata, "last-fsync", 1)) { ++ gf_log("glusterfs-fuse", GF_LOG_WARNING, ++ "last-fsync set failed (%s) on fd (%p)" ++ "(basefd:%p basefd-inode.gfid:%s) " ++ "(old-subvolume:%s-%d new-subvolume:%s-%d)", ++ strerror(ENOMEM), oldfd, basefd, ++ uuid_utoa(basefd->inode->gfid), old_subvol->name, ++ old_subvol->graph->id, new_subvol->name, ++ new_subvol->graph->id); ++ } ++ ++ ret = syncop_fsync(old_subvol, oldfd, 0, NULL, NULL, xdata, NULL); ++ } + + if (ret < 0) { + gf_log("glusterfs-fuse", GF_LOG_WARNING, +@@ -5653,6 +5667,9 @@ out: + + fd_unref(oldfd); + ++ if (xdata) ++ dict_unref(xdata); ++ + return ret; + } + +-- +1.8.3.1 + diff --git a/SOURCES/0440-glusterd-snapshot-Improve-log-message-during-snapsho.patch b/SOURCES/0440-glusterd-snapshot-Improve-log-message-during-snapsho.patch new file mode 100644 index 0000000..a7c1869 --- /dev/null +++ b/SOURCES/0440-glusterd-snapshot-Improve-log-message-during-snapsho.patch @@ -0,0 +1,62 @@ +From 9cbab9110523cfafe23d6c6b3080d0d744062b85 Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC <rkavunga@redhat.com> +Date: Thu, 21 May 2020 16:04:33 +0530 +Subject: [PATCH 440/449] glusterd/snapshot: Improve log message during + snapshot clone + +While taking a snapshot clone, if the snapshot is not activated, +the cli was returning that the bricks are down. +This patch clearly print tha the error is due to the snapshot +state. + +>Change-Id: Ia840e6e071342e061ad38bf15e2e2ff2b0dacdfa +>Fixes: #1255 +>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> +Upstream Patch: https://review.gluster.org/#/c/glusterfs/+/24478/ + +BUG: 1837926 +Change-Id: Ia840e6e071342e061ad38bf15e2e2ff2b0dacdfa +Signed-off-by: Srijan Sivakumar <ssivakum@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202707 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-snapshot.c | 15 ++++++++++++--- + 1 file changed, 12 insertions(+), 3 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot.c b/xlators/mgmt/glusterd/src/glusterd-snapshot.c +index c56be91..5b8ae97 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-snapshot.c ++++ b/xlators/mgmt/glusterd/src/glusterd-snapshot.c +@@ -2238,7 +2238,6 @@ glusterd_snapshot_clone_prevalidate(dict_t *dict, char **op_errstr, + char *clonename = NULL; + char *snapname = NULL; + char device_name[64] = ""; +- char key[PATH_MAX] = ""; + glusterd_snap_t *snap = NULL; + char err_str[PATH_MAX] = ""; + int ret = -1; +@@ -2299,8 +2298,18 @@ glusterd_snapshot_clone_prevalidate(dict_t *dict, char **op_errstr, + goto out; + } + +- snprintf(key, sizeof(key) - 1, "vol1_volid"); +- ret = dict_get_bin(dict, key, (void **)&snap_volid); ++ ++ if (!glusterd_is_volume_started(snap_vol)) { ++ snprintf(err_str, sizeof(err_str), ++ "Snapshot %s is " ++ "not activated", ++ snap->snapname); ++ loglevel = GF_LOG_WARNING; ++ *op_errno = EG_VOLSTP; ++ goto out; ++ } ++ ++ ret = dict_get_bin(dict, "vol1_volid", (void **)&snap_volid); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to fetch snap_volid"); +-- +1.8.3.1 + diff --git a/SOURCES/0441-fuse-occasional-logging-for-fuse-device-weird-write-.patch b/SOURCES/0441-fuse-occasional-logging-for-fuse-device-weird-write-.patch new file mode 100644 index 0000000..1e49684 --- /dev/null +++ b/SOURCES/0441-fuse-occasional-logging-for-fuse-device-weird-write-.patch @@ -0,0 +1,195 @@ +From 1bde083cbd1e06be66d00e4ca52075687cee0d60 Mon Sep 17 00:00:00 2001 +From: Csaba Henk <csaba@redhat.com> +Date: Fri, 8 May 2020 23:01:04 +0200 +Subject: [PATCH 441/449] fuse: occasional logging for fuse device 'weird' + write errors + +This change is a followup to +I510158843e4b1d482bdc496c2e97b1860dc1ba93. + +In referred change we pushed log messages about 'weird' +write errors to fuse device out of sight, by reporting +them at Debug loglevel instead of Error (where +'weird' means errno is not POSIX compliant but having +meaningful semantics for FUSE protocol). + +This solved the issue of spurious error reporting. +And so far so good: these messages don't indicate +an error condition by themselves. However, when they +come in high repetitions, that indicates a suboptimal +condition which should be reported.[1] + +Therefore now we shall emit a Warning if a certain +errno occurs a certain number of times[2] as the +outcome of a write to the fuse device. + +___ +[1] typically ENOENTs and ENOTDIRs accumulate +when glusterfs' inode invalidation lags behind +the kernel's internal inode garbage collection +(in this case above errnos mean that the inode +which we requested to be invalidated is not found +in kernel). This can be mitigated with the +invalidate-limit command line / mount option, +cf. bz#1732717. + +[2] 256, as of the current implementation. + +Upstream on https://review.gluster.org/24415 +> Change-Id: I8cc7fe104da43a88875f93b0db49d5677cc16045 +> Updates: #1000 +> Signed-off-by: Csaba Henk <csaba@redhat.com> + +BUG: 1839137 +Change-Id: I8448d6d328d47cb01d560cd99a2f43cd8dab312d +Signed-off-by: Csaba Henk <csaba@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202646 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mount/fuse/src/fuse-bridge.c | 36 +++++++++++++++++++++++++++++++++++- + xlators/mount/fuse/src/fuse-bridge.h | 18 ++++++++++++++++++ + 2 files changed, 53 insertions(+), 1 deletion(-) + +diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c +index 4264fad..2e7584c 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.c ++++ b/xlators/mount/fuse/src/fuse-bridge.c +@@ -218,14 +218,30 @@ check_and_dump_fuse_W(fuse_private_t *priv, struct iovec *iov_out, int count, + if (res == -1) { + const char *errdesc = NULL; + gf_loglevel_t loglevel = GF_LOG_ERROR; ++ gf_boolean_t errno_degraded = _gf_false; ++ gf_boolean_t errno_promoted = _gf_false; ++ ++#define ACCOUNT_ERRNO(eno) \ ++ do { \ ++ if (errno_degraded) { \ ++ pthread_mutex_lock(&priv->fusedev_errno_cnt_mutex); \ ++ { \ ++ if (!++priv->fusedev_errno_cnt[FUSEDEV_##eno]) \ ++ errno_promoted = _gf_true; \ ++ } \ ++ pthread_mutex_unlock(&priv->fusedev_errno_cnt_mutex); \ ++ } \ ++ } while (0) + + /* If caller masked the errno, then it + * does not indicate an error at the application + * level, so we degrade the log severity to DEBUG. + */ + if (errnomask && errno < ERRNOMASK_MAX && +- GET_ERRNO_MASK(errnomask, errno)) ++ GET_ERRNO_MASK(errnomask, errno)) { + loglevel = GF_LOG_DEBUG; ++ errno_degraded = _gf_true; ++ } + + switch (errno) { + /* The listed errnos are FUSE status indicators, +@@ -235,33 +251,43 @@ check_and_dump_fuse_W(fuse_private_t *priv, struct iovec *iov_out, int count, + */ + case ENOENT: + errdesc = "ENOENT"; ++ ACCOUNT_ERRNO(ENOENT); + break; + case ENOTDIR: + errdesc = "ENOTDIR"; ++ ACCOUNT_ERRNO(ENOTDIR); + break; + case ENODEV: + errdesc = "ENODEV"; ++ ACCOUNT_ERRNO(ENODEV); + break; + case EPERM: + errdesc = "EPERM"; ++ ACCOUNT_ERRNO(EPERM); + break; + case ENOMEM: + errdesc = "ENOMEM"; ++ ACCOUNT_ERRNO(ENOMEM); + break; + case ENOTCONN: + errdesc = "ENOTCONN"; ++ ACCOUNT_ERRNO(ENOTCONN); + break; + case ECONNREFUSED: + errdesc = "ECONNREFUSED"; ++ ACCOUNT_ERRNO(ECONNREFUSED); + break; + case EOVERFLOW: + errdesc = "EOVERFLOW"; ++ ACCOUNT_ERRNO(EOVERFLOW); + break; + case EBUSY: + errdesc = "EBUSY"; ++ ACCOUNT_ERRNO(EBUSY); + break; + case ENOTEMPTY: + errdesc = "ENOTEMPTY"; ++ ACCOUNT_ERRNO(ENOTEMPTY); + break; + default: + errdesc = strerror(errno); +@@ -269,7 +295,13 @@ check_and_dump_fuse_W(fuse_private_t *priv, struct iovec *iov_out, int count, + + gf_log_callingfn("glusterfs-fuse", loglevel, + "writing to fuse device failed: %s", errdesc); ++ if (errno_promoted) ++ gf_log("glusterfs-fuse", GF_LOG_WARNING, ++ "writing to fuse device yielded %s %d times", errdesc, ++ UINT8_MAX + 1); + return errno; ++ ++#undef ACCOUNT_ERRNO + } + + fouh = iov_out[0].iov_base; +@@ -6584,6 +6616,8 @@ init(xlator_t *this_xl) + INIT_LIST_HEAD(&priv->interrupt_list); + pthread_mutex_init(&priv->interrupt_mutex, NULL); + ++ pthread_mutex_init(&priv->fusedev_errno_cnt_mutex, NULL); ++ + /* get options from option dictionary */ + ret = dict_get_str(options, ZR_MOUNTPOINT_OPT, &value_string); + if (ret == -1 || value_string == NULL) { +diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h +index d2d462c..2fb15a6 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.h ++++ b/xlators/mount/fuse/src/fuse-bridge.h +@@ -78,6 +78,20 @@ typedef struct fuse_in_header fuse_in_header_t; + typedef void(fuse_handler_t)(xlator_t *this, fuse_in_header_t *finh, void *msg, + struct iobuf *iobuf); + ++enum fusedev_errno { ++ FUSEDEV_ENOENT, ++ FUSEDEV_ENOTDIR, ++ FUSEDEV_ENODEV, ++ FUSEDEV_EPERM, ++ FUSEDEV_ENOMEM, ++ FUSEDEV_ENOTCONN, ++ FUSEDEV_ECONNREFUSED, ++ FUSEDEV_EOVERFLOW, ++ FUSEDEV_EBUSY, ++ FUSEDEV_ENOTEMPTY, ++ FUSEDEV_EMAXPLUS ++}; ++ + struct fuse_private { + int fd; + uint32_t proto_minor; +@@ -192,6 +206,10 @@ struct fuse_private { + /* LRU Limit, if not set, default is 64k for now */ + uint32_t lru_limit; + uint32_t invalidate_limit; ++ ++ /* counters for fusdev errnos */ ++ uint8_t fusedev_errno_cnt[FUSEDEV_EMAXPLUS]; ++ pthread_mutex_t fusedev_errno_cnt_mutex; + }; + typedef struct fuse_private fuse_private_t; + +-- +1.8.3.1 + diff --git a/SOURCES/0442-fuse-correctly-handle-setxattr-values.patch b/SOURCES/0442-fuse-correctly-handle-setxattr-values.patch new file mode 100644 index 0000000..4be3b85 --- /dev/null +++ b/SOURCES/0442-fuse-correctly-handle-setxattr-values.patch @@ -0,0 +1,139 @@ +From 56c8ef4a64506c64aeb95d5a2c38d7107f90ac3a Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez <xhernandez@redhat.com> +Date: Tue, 5 Feb 2019 16:57:52 +0100 +Subject: [PATCH 442/449] fuse: correctly handle setxattr values + +The setxattr function receives a pointer to raw data, which may not be +null-terminated. When this data needs to be interpreted as a string, an +explicit null termination needs to be added before using the value. + +Upstream patch https://review.gluster.org/#/c/glusterfs/+/22157 +> Change-Id: Id110f9b215b22786da5782adec9449ce38d0d563 +> updates: bz#1193929 +> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> + +Note: this change is not addressing the issue of bz 1787310, +indeed it is prerequisite for other changes that do. + +BUG: 1787310 +Change-Id: I56417b130eb2a1f388108456c905a577eb658793 +Signed-off-by: Csaba Henk <csaba@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202758 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + libglusterfs/src/glusterfs/xlator.h | 2 +- + libglusterfs/src/xlator.c | 28 +++++++++++++++++++++++++--- + xlators/mount/fuse/src/fuse-bridge.c | 20 ++++++++++++++++---- + 3 files changed, 42 insertions(+), 8 deletions(-) + +diff --git a/libglusterfs/src/glusterfs/xlator.h b/libglusterfs/src/glusterfs/xlator.h +index db04c4d..8650ccc 100644 +--- a/libglusterfs/src/glusterfs/xlator.h ++++ b/libglusterfs/src/glusterfs/xlator.h +@@ -1043,7 +1043,7 @@ xlator_mem_acct_init(xlator_t *xl, int num_types); + void + xlator_mem_acct_unref(struct mem_acct *mem_acct); + int +-is_gf_log_command(xlator_t *trans, const char *name, char *value); ++is_gf_log_command(xlator_t *trans, const char *name, char *value, size_t size); + int + glusterd_check_log_level(const char *value); + int +diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c +index 6bd4f09..108b96a 100644 +--- a/libglusterfs/src/xlator.c ++++ b/libglusterfs/src/xlator.c +@@ -1278,8 +1278,21 @@ xlator_destroy(xlator_t *xl) + return 0; + } + ++static int32_t ++gf_bin_to_string(char *dst, size_t size, void *src, size_t len) ++{ ++ if (len >= size) { ++ return EINVAL; ++ } ++ ++ memcpy(dst, src, len); ++ dst[len] = 0; ++ ++ return 0; ++} ++ + int +-is_gf_log_command(xlator_t *this, const char *name, char *value) ++is_gf_log_command(xlator_t *this, const char *name, char *value, size_t size) + { + xlator_t *trav = NULL; + char key[1024] = { +@@ -1291,7 +1304,11 @@ is_gf_log_command(xlator_t *this, const char *name, char *value) + glusterfs_ctx_t *ctx = NULL; + + if (!strcmp("trusted.glusterfs.syslog", name)) { +- ret = gf_string2boolean(value, &syslog_flag); ++ ret = gf_bin_to_string(key, sizeof(key), value, size); ++ if (ret != 0) { ++ goto out; ++ } ++ ret = gf_string2boolean(key, &syslog_flag); + if (ret) { + ret = EOPNOTSUPP; + goto out; +@@ -1307,7 +1324,12 @@ is_gf_log_command(xlator_t *this, const char *name, char *value) + if (fnmatch("trusted.glusterfs*set-log-level", name, FNM_NOESCAPE)) + goto out; + +- log_level = glusterd_check_log_level(value); ++ ret = gf_bin_to_string(key, sizeof(key), value, size); ++ if (ret != 0) { ++ goto out; ++ } ++ ++ log_level = glusterd_check_log_level(key); + if (log_level == -1) { + ret = EOPNOTSUPP; + goto out; +diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c +index 2e7584c..cfad2b4 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.c ++++ b/xlators/mount/fuse/src/fuse-bridge.c +@@ -4112,7 +4112,7 @@ fuse_setxattr(xlator_t *this, fuse_in_header_t *finh, void *msg, + + /* Check if the command is for changing the log + level of process or specific xlator */ +- ret = is_gf_log_command(this, name, value); ++ ret = is_gf_log_command(this, name, value, fsi->size); + if (ret >= 0) { + op_errno = ret; + goto done; +@@ -4159,11 +4159,23 @@ fuse_setxattr(xlator_t *this, fuse_in_header_t *finh, void *msg, + * fixups to make sure that's the case. To avoid nasty + * surprises, allocate an extra byte and add a NUL here. + */ +- dict_value = memdup(value, fsi->size + 1); ++ dict_value = GF_MALLOC(fsi->size + 1, gf_common_mt_char); ++ if (dict_value == NULL) { ++ gf_log("glusterfs-fuse", GF_LOG_ERROR, ++ "%" PRIu64 ": SETXATTR value allocation failed", ++ finh->unique); ++ op_errno = ENOMEM; ++ goto done; ++ } ++ memcpy(dict_value, value, fsi->size); + dict_value[fsi->size] = '\0'; + } +- dict_set(state->xattr, newkey, +- data_from_dynptr((void *)dict_value, fsi->size)); ++ ret = dict_set_dynptr(state->xattr, newkey, dict_value, fsi->size); ++ if (ret < 0) { ++ op_errno = -ret; ++ GF_FREE(dict_value); ++ goto done; ++ } + + state->flags = fsi->flags; + state->name = newkey; +-- +1.8.3.1 + diff --git a/SOURCES/0443-fuse-fix-high-sev-coverity-issue.patch b/SOURCES/0443-fuse-fix-high-sev-coverity-issue.patch new file mode 100644 index 0000000..7c5e9c0 --- /dev/null +++ b/SOURCES/0443-fuse-fix-high-sev-coverity-issue.patch @@ -0,0 +1,55 @@ +From 3ac3312d63b9dc3c15cd8765ab8b7c601b007500 Mon Sep 17 00:00:00 2001 +From: Sunny Kumar <sunkumar@redhat.com> +Date: Tue, 19 Mar 2019 22:51:14 +0530 +Subject: [PATCH 443/449] fuse : fix high sev coverity issue + +This patch fixed coverity issue in fuse-bridge.c. + +CID : 1398630 : Resource leak +CID : 1399757 : Uninitialized pointer read + +Upstream patch https://review.gluster.org/c/glusterfs/+/22382 +> updates: bz#789278 +> +> Change-Id: I69f8591400ee56a5d215eeac443a8e3d7777db27 +> Signed-off-by: Sunny Kumar <sunkumar@redhat.com> + +BUG: 1787310 +Change-Id: Ib2c9af25019ee57131b3d384fc4b557437e75d3e +Signed-off-by: Csaba Henk <csaba@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202759 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mount/fuse/src/fuse-bridge.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c +index cfad2b4..d17320b 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.c ++++ b/xlators/mount/fuse/src/fuse-bridge.c +@@ -4174,6 +4174,7 @@ fuse_setxattr(xlator_t *this, fuse_in_header_t *finh, void *msg, + if (ret < 0) { + op_errno = -ret; + GF_FREE(dict_value); ++ GF_FREE(newkey); + goto done; + } + +@@ -5963,7 +5964,12 @@ fuse_thread_proc(void *data) + ssize_t res = 0; + struct iobuf *iobuf = NULL; + fuse_in_header_t *finh = NULL; +- struct iovec iov_in[2]; ++ struct iovec iov_in[2] = { ++ { ++ 0, ++ }, ++ }; ++ + void *msg = NULL; + /* we need 512 extra buffer size for BATCH_FORGET fop. By tests, it is + found to be reduces 'REALLOC()' in the loop */ +-- +1.8.3.1 + diff --git a/SOURCES/0444-mount-fuse-Fixing-a-coverity-issue.patch b/SOURCES/0444-mount-fuse-Fixing-a-coverity-issue.patch new file mode 100644 index 0000000..c8e3e8c --- /dev/null +++ b/SOURCES/0444-mount-fuse-Fixing-a-coverity-issue.patch @@ -0,0 +1,40 @@ +From 53a6aed98aad73ff51f884bf815bccfa337eb524 Mon Sep 17 00:00:00 2001 +From: Barak Sason <bsasonro@redhat.com> +Date: Sun, 18 Aug 2019 17:38:09 +0300 +Subject: [PATCH 444/449] mount/fuse - Fixing a coverity issue + +Fixed resource leak of dict_value and newkey variables + +CID: 1398630 + +Upstream patch https://review.gluster.org/c/glusterfs/+/23260 +> Updates: bz#789278 +> +> Change-Id: I589fdc0aecaeb4f446cd29f95bad36ccd7a35beb +> Signed-off-by: Barak Sason <bsasonro@redhat.com> + +BUG: 1787310 +Change-Id: Id191face7b082e2e8d6e62f60b56248688d396f6 +Signed-off-by: Csaba Henk <csaba@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202760 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mount/fuse/src/fuse-bridge.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c +index d17320b..f61fa39 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.c ++++ b/xlators/mount/fuse/src/fuse-bridge.c +@@ -4165,6 +4165,7 @@ fuse_setxattr(xlator_t *this, fuse_in_header_t *finh, void *msg, + "%" PRIu64 ": SETXATTR value allocation failed", + finh->unique); + op_errno = ENOMEM; ++ GF_FREE(newkey); + goto done; + } + memcpy(dict_value, value, fsi->size); +-- +1.8.3.1 + diff --git a/SOURCES/0445-feature-changelog-Avoid-thread-creation-if-xlator-is.patch b/SOURCES/0445-feature-changelog-Avoid-thread-creation-if-xlator-is.patch new file mode 100644 index 0000000..dea23f2 --- /dev/null +++ b/SOURCES/0445-feature-changelog-Avoid-thread-creation-if-xlator-is.patch @@ -0,0 +1,481 @@ +From dc03340654d921916ac3890d713fc84ef4bb1e28 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal <moagrawal@redhat.com> +Date: Sat, 29 Sep 2018 13:15:35 +0530 +Subject: [PATCH 445/449] feature/changelog: Avoid thread creation if xlator is + not enabled + +Problem: +Changelog creates threads even if the changelog is not enabled + +Background: +Changelog xlator broadly does two things + 1. Journalling - Cosumers are geo-rep and glusterfind + 2. Event Notification for registered events like (open, release etc) - + Consumers are bitrot, geo-rep + +The existing option "changelog.changelog" controls journalling and +there is no option to control event notification and is enabled by +default. So when bitrot/geo-rep is not enabled on the volume, threads +and resources(rpc and rbuf) related to event notifications consumes +resources and cpu cycle which is unnecessary. + +Solution: +The solution is to have two different options as below. + 1. changelog-notification : Event notifications + 2. changelog : Journalling + +This patch introduces the option "changelog-notification" which is +not exposed to user. When either bitrot or changelog (journalling) +is enabled, it internally enbales 'changelog-notification'. But +once the 'changelog-notification' is enabled, it will not be disabled +for the life time of the brick process even after bitrot and changelog +is disabled. As of now, rpc resource cleanup has lot of races and is +difficult to cleanup cleanly. If allowed, it leads to memory leaks +and crashes on enable/disable of bitrot or changelog (journal) in a +loop. Hence to be safer, the event notification is not disabled within +lifetime of process once enabled. + +> Change-Id: Ifd00286e0966049e8eb9f21567fe407cf11bb02a +> Updates: #475 +> Signed-off-by: Mohit Agrawal <moagrawal@redhat.com> +> (Cherry pick from commit 6de80bcd6366778ac34ce58ec496fa08cc02bd0b) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/21896/) + +BUG: 1790336 +Change-Id: Ifd00286e0966049e8eb9f21567fe407cf11bb02a +Signed-off-by: Mohit Agrawal <moagrawal@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202778 +Tested-by: Mohit Agrawal <moagrawa@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + rpc/rpc-lib/src/rpcsvc.c | 26 ++-- + tests/basic/changelog/changelog-history.t | 12 +- + tests/bugs/bitrot/bug-1227996.t | 1 - + tests/bugs/bitrot/bug-1245981.t | 4 +- + xlators/features/changelog/src/changelog-helpers.h | 4 + + .../features/changelog/src/changelog-rpc-common.c | 3 + + xlators/features/changelog/src/changelog.c | 149 +++++++++++++++------ + xlators/mgmt/glusterd/src/glusterd-volgen.c | 13 ++ + 8 files changed, 154 insertions(+), 58 deletions(-) + +diff --git a/rpc/rpc-lib/src/rpcsvc.c b/rpc/rpc-lib/src/rpcsvc.c +index b058932..3f184bf 100644 +--- a/rpc/rpc-lib/src/rpcsvc.c ++++ b/rpc/rpc-lib/src/rpcsvc.c +@@ -1865,6 +1865,18 @@ rpcsvc_program_unregister(rpcsvc_t *svc, rpcsvc_program_t *program) + goto out; + } + ++ pthread_rwlock_rdlock(&svc->rpclock); ++ { ++ list_for_each_entry(prog, &svc->programs, program) ++ { ++ if ((prog->prognum == program->prognum) && ++ (prog->progver == program->progver)) { ++ break; ++ } ++ } ++ } ++ pthread_rwlock_unlock(&svc->rpclock); ++ + ret = rpcsvc_program_unregister_portmap(program); + if (ret == -1) { + gf_log(GF_RPCSVC, GF_LOG_ERROR, +@@ -1881,17 +1893,6 @@ rpcsvc_program_unregister(rpcsvc_t *svc, rpcsvc_program_t *program) + goto out; + } + #endif +- pthread_rwlock_rdlock(&svc->rpclock); +- { +- list_for_each_entry(prog, &svc->programs, program) +- { +- if ((prog->prognum == program->prognum) && +- (prog->progver == program->progver)) { +- break; +- } +- } +- } +- pthread_rwlock_unlock(&svc->rpclock); + + gf_log(GF_RPCSVC, GF_LOG_DEBUG, + "Program unregistered: %s, Num: %d," +@@ -1912,6 +1913,9 @@ rpcsvc_program_unregister(rpcsvc_t *svc, rpcsvc_program_t *program) + + ret = 0; + out: ++ if (prog) ++ GF_FREE(prog); ++ + if (ret == -1) { + if (program) { + gf_log(GF_RPCSVC, GF_LOG_ERROR, +diff --git a/tests/basic/changelog/changelog-history.t b/tests/basic/changelog/changelog-history.t +index 3ce4098..b56e247 100644 +--- a/tests/basic/changelog/changelog-history.t ++++ b/tests/basic/changelog/changelog-history.t +@@ -5,6 +5,7 @@ + + cleanup; + ++SCRIPT_TIMEOUT=300 + HISTORY_BIN_PATH=$(dirname $0)/../../utils/changelog + build_tester $HISTORY_BIN_PATH/get-history.c -lgfchangelog + +@@ -68,18 +69,21 @@ TEST $CLI volume set $V0 changelog.changelog off + sleep 3 + time_after_disable=$(date '+%s') + ++TEST $CLI volume set $V0 changelog.changelog on ++sleep 5 ++ + #Passes, gives the changelogs till continuous changelogs are available + # but returns 1 +-EXPECT "1" $HISTORY_BIN_PATH/get-history $time_after_enable1 $time_in_sec_htime2 ++EXPECT_WITHIN 10 "1" $HISTORY_BIN_PATH/get-history $time_after_enable1 $time_in_sec_htime2 + + #Fails as start falls between htime files +-EXPECT "-3" $HISTORY_BIN_PATH/get-history $time_between_htime $time_in_sec_htime1 ++EXPECT_WITHIN 10 "-3" $HISTORY_BIN_PATH/get-history $time_between_htime $time_in_sec_htime1 + + #Passes as start and end falls in same htime file +-EXPECT "0" $HISTORY_BIN_PATH/get-history $time_in_sec_htime1 $time_in_sec_htime2 ++EXPECT_WITHIN 10 "0" $HISTORY_BIN_PATH/get-history $time_in_sec_htime1 $time_in_sec_htime2 + + #Passes, gives the changelogs till continuous changelogs are available +-EXPECT "0" $HISTORY_BIN_PATH/get-history $time_in_sec_htime2 $time_after_disable ++EXPECT_WITHIN 10 "0" $HISTORY_BIN_PATH/get-history $time_in_sec_htime2 $time_after_disable + + TEST rm $HISTORY_BIN_PATH/get-history + +diff --git a/tests/bugs/bitrot/bug-1227996.t b/tests/bugs/bitrot/bug-1227996.t +index 47ebc42..121c7b5 100644 +--- a/tests/bugs/bitrot/bug-1227996.t ++++ b/tests/bugs/bitrot/bug-1227996.t +@@ -17,7 +17,6 @@ TEST pidof glusterd; + ## Lets create and start the volume + TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1 + TEST $CLI volume start $V0 +- + ## Enable bitrot on volume $V0 + TEST $CLI volume bitrot $V0 enable + +diff --git a/tests/bugs/bitrot/bug-1245981.t b/tests/bugs/bitrot/bug-1245981.t +index 2bed4d9..f395525 100644 +--- a/tests/bugs/bitrot/bug-1245981.t ++++ b/tests/bugs/bitrot/bug-1245981.t +@@ -47,9 +47,9 @@ touch $M0/5 + sleep `expr $SLEEP_TIME \* 2` + + backpath=$(get_backend_paths $fname) +-TEST getfattr -m . -n trusted.bit-rot.signature $backpath ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.bit-rot.signature' check_for_xattr 'trusted.bit-rot.signature' $backpath + + backpath=$(get_backend_paths $M0/new_file) +-TEST getfattr -m . -n trusted.bit-rot.signature $backpath ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.bit-rot.signature' check_for_xattr 'trusted.bit-rot.signature' $backpath + + cleanup; +diff --git a/xlators/features/changelog/src/changelog-helpers.h b/xlators/features/changelog/src/changelog-helpers.h +index 517c4dc..3afacc9 100644 +--- a/xlators/features/changelog/src/changelog-helpers.h ++++ b/xlators/features/changelog/src/changelog-helpers.h +@@ -190,8 +190,12 @@ typedef struct changelog_ev_selector { + + /* changelog's private structure */ + struct changelog_priv { ++ /* changelog journalling */ + gf_boolean_t active; + ++ /* changelog live notifications */ ++ gf_boolean_t rpc_active; ++ + /* to generate unique socket file per brick */ + char *changelog_brick; + +diff --git a/xlators/features/changelog/src/changelog-rpc-common.c b/xlators/features/changelog/src/changelog-rpc-common.c +index dcdcfb1..f2d1853 100644 +--- a/xlators/features/changelog/src/changelog-rpc-common.c ++++ b/xlators/features/changelog/src/changelog-rpc-common.c +@@ -263,6 +263,9 @@ changelog_rpc_server_destroy(xlator_t *this, rpcsvc_t *rpc, char *sockfile, + struct rpcsvc_program *prog = NULL; + rpc_transport_t *trans = NULL; + ++ if (!rpc) ++ return; ++ + while (*progs) { + prog = *progs; + (void)rpcsvc_program_unregister(rpc, prog); +diff --git a/xlators/features/changelog/src/changelog.c b/xlators/features/changelog/src/changelog.c +index d9025f3..ff06c09 100644 +--- a/xlators/features/changelog/src/changelog.c ++++ b/xlators/features/changelog/src/changelog.c +@@ -34,6 +34,12 @@ static struct changelog_bootstrap cb_bootstrap[] = { + }, + }; + ++static int ++changelog_init_rpc(xlator_t *this, changelog_priv_t *priv); ++ ++static int ++changelog_init(xlator_t *this, changelog_priv_t *priv); ++ + /* Entry operations - TYPE III */ + + /** +@@ -2008,6 +2014,11 @@ notify(xlator_t *this, int event, void *data, ...) + uint64_t clntcnt = 0; + changelog_clnt_t *conn = NULL; + gf_boolean_t cleanup_notify = _gf_false; ++ char sockfile[UNIX_PATH_MAX] = { ++ 0, ++ }; ++ rpcsvc_listener_t *listener = NULL; ++ rpcsvc_listener_t *next = NULL; + + INIT_LIST_HEAD(&queue); + +@@ -2021,23 +2032,40 @@ notify(xlator_t *this, int event, void *data, ...) + "cleanup changelog rpc connection of brick %s", + priv->victim->name); + +- this->cleanup_starting = 1; +- changelog_destroy_rpc_listner(this, priv); +- conn = &priv->connections; +- if (conn) +- changelog_ev_cleanup_connections(this, conn); +- xprtcnt = GF_ATOMIC_GET(priv->xprtcnt); +- clntcnt = GF_ATOMIC_GET(priv->clntcnt); +- +- if (!xprtcnt && !clntcnt) { +- LOCK(&priv->lock); +- { +- cleanup_notify = priv->notify_down; +- priv->notify_down = _gf_true; ++ if (priv->rpc_active) { ++ this->cleanup_starting = 1; ++ changelog_destroy_rpc_listner(this, priv); ++ conn = &priv->connections; ++ if (conn) ++ changelog_ev_cleanup_connections(this, conn); ++ xprtcnt = GF_ATOMIC_GET(priv->xprtcnt); ++ clntcnt = GF_ATOMIC_GET(priv->clntcnt); ++ if (!xprtcnt && !clntcnt) { ++ LOCK(&priv->lock); ++ { ++ cleanup_notify = priv->notify_down; ++ priv->notify_down = _gf_true; ++ } ++ UNLOCK(&priv->lock); ++ list_for_each_entry_safe(listener, next, &priv->rpc->listeners, ++ list) ++ { ++ if (listener->trans) { ++ rpc_transport_unref(listener->trans); ++ } ++ } ++ CHANGELOG_MAKE_SOCKET_PATH(priv->changelog_brick, sockfile, ++ UNIX_PATH_MAX); ++ sys_unlink(sockfile); ++ if (priv->rpc) { ++ rpcsvc_destroy(priv->rpc); ++ priv->rpc = NULL; ++ } ++ if (!cleanup_notify) ++ default_notify(this, GF_EVENT_PARENT_DOWN, data); + } +- UNLOCK(&priv->lock); +- if (!cleanup_notify) +- default_notify(this, GF_EVENT_PARENT_DOWN, data); ++ } else { ++ default_notify(this, GF_EVENT_PARENT_DOWN, data); + } + goto out; + } +@@ -2425,6 +2453,22 @@ changelog_barrier_pthread_destroy(changelog_priv_t *priv) + LOCK_DESTROY(&priv->bflags.lock); + } + ++static void ++changelog_cleanup_rpc(xlator_t *this, changelog_priv_t *priv) ++{ ++ /* terminate rpc server */ ++ if (!this->cleanup_starting) ++ changelog_destroy_rpc_listner(this, priv); ++ ++ (void)changelog_cleanup_rpc_threads(this, priv); ++ /* cleanup rot buffs */ ++ rbuf_dtor(priv->rbuf); ++ ++ /* cleanup poller thread */ ++ if (priv->poller) ++ (void)changelog_thread_cleanup(this, priv->poller); ++} ++ + int + reconfigure(xlator_t *this, dict_t *options) + { +@@ -2433,6 +2477,9 @@ reconfigure(xlator_t *this, dict_t *options) + changelog_priv_t *priv = NULL; + gf_boolean_t active_earlier = _gf_true; + gf_boolean_t active_now = _gf_true; ++ gf_boolean_t rpc_active_earlier = _gf_true; ++ gf_boolean_t rpc_active_now = _gf_true; ++ gf_boolean_t iniate_rpc = _gf_false; + changelog_time_slice_t *slice = NULL; + changelog_log_data_t cld = { + 0, +@@ -2454,6 +2501,7 @@ reconfigure(xlator_t *this, dict_t *options) + + ret = -1; + active_earlier = priv->active; ++ rpc_active_earlier = priv->rpc_active; + + /* first stop the rollover and the fsync thread */ + changelog_cleanup_helper_threads(this, priv); +@@ -2487,6 +2535,29 @@ reconfigure(xlator_t *this, dict_t *options) + goto out; + + GF_OPTION_RECONF("changelog", active_now, options, bool, out); ++ GF_OPTION_RECONF("changelog-notification", rpc_active_now, options, bool, ++ out); ++ ++ /* If journalling is enabled, enable rpc notifications */ ++ if (active_now && !active_earlier) { ++ if (!rpc_active_earlier) ++ iniate_rpc = _gf_true; ++ } ++ ++ if (rpc_active_now && !rpc_active_earlier) { ++ iniate_rpc = _gf_true; ++ } ++ ++ /* TODO: Disable of changelog-notifications is not supported for now ++ * as there is no clean way of cleaning up of rpc resources ++ */ ++ ++ if (iniate_rpc) { ++ ret = changelog_init_rpc(this, priv); ++ if (ret) ++ goto out; ++ priv->rpc_active = _gf_true; ++ } + + /** + * changelog_handle_change() handles changes that could possibly +@@ -2618,6 +2689,7 @@ changelog_init_options(xlator_t *this, changelog_priv_t *priv) + goto dealloc_2; + + GF_OPTION_INIT("changelog", priv->active, bool, dealloc_2); ++ GF_OPTION_INIT("changelog-notification", priv->rpc_active, bool, dealloc_2); + GF_OPTION_INIT("capture-del-path", priv->capture_del_path, bool, dealloc_2); + + GF_OPTION_INIT("op-mode", tmp, str, dealloc_2); +@@ -2656,22 +2728,6 @@ error_return: + return -1; + } + +-static void +-changelog_cleanup_rpc(xlator_t *this, changelog_priv_t *priv) +-{ +- /* terminate rpc server */ +- if (!this->cleanup_starting) +- changelog_destroy_rpc_listner(this, priv); +- +- (void)changelog_cleanup_rpc_threads(this, priv); +- /* cleanup rot buffs */ +- rbuf_dtor(priv->rbuf); +- +- /* cleanup poller thread */ +- if (priv->poller) +- (void)changelog_thread_cleanup(this, priv->poller); +-} +- + static int + changelog_init_rpc(xlator_t *this, changelog_priv_t *priv) + { +@@ -2768,10 +2824,13 @@ init(xlator_t *this) + INIT_LIST_HEAD(&priv->queue); + priv->barrier_enabled = _gf_false; + +- /* RPC ball rolling.. */ +- ret = changelog_init_rpc(this, priv); +- if (ret) +- goto cleanup_barrier; ++ if (priv->rpc_active || priv->active) { ++ /* RPC ball rolling.. */ ++ ret = changelog_init_rpc(this, priv); ++ if (ret) ++ goto cleanup_barrier; ++ priv->rpc_active = _gf_true; ++ } + + ret = changelog_init(this, priv); + if (ret) +@@ -2783,7 +2842,9 @@ init(xlator_t *this) + return 0; + + cleanup_rpc: +- changelog_cleanup_rpc(this, priv); ++ if (priv->rpc_active) { ++ changelog_cleanup_rpc(this, priv); ++ } + cleanup_barrier: + changelog_barrier_pthread_destroy(priv); + cleanup_options: +@@ -2808,9 +2869,10 @@ fini(xlator_t *this) + priv = this->private; + + if (priv) { +- /* terminate RPC server/threads */ +- changelog_cleanup_rpc(this, priv); +- ++ if (priv->active || priv->rpc_active) { ++ /* terminate RPC server/threads */ ++ changelog_cleanup_rpc(this, priv); ++ } + /* call barrier_disable to cancel timer */ + if (priv->barrier_enabled) + __chlog_barrier_disable(this, &queue); +@@ -2879,6 +2941,13 @@ struct volume_options options[] = { + .flags = OPT_FLAG_SETTABLE, + .level = OPT_STATUS_BASIC, + .tags = {"journal", "georep", "glusterfind"}}, ++ {.key = {"changelog-notification"}, ++ .type = GF_OPTION_TYPE_BOOL, ++ .default_value = "off", ++ .description = "enable/disable changelog live notification", ++ .op_version = {3}, ++ .level = OPT_STATUS_BASIC, ++ .tags = {"bitrot", "georep"}}, + {.key = {"changelog-brick"}, + .type = GF_OPTION_TYPE_PATH, + .description = "brick path to generate unique socket file name." +diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c +index 16346e7..13f84ea 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c +@@ -1876,6 +1876,19 @@ brick_graph_add_changelog(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + ret = xlator_set_fixed_option(xl, "changelog-dir", changelog_basepath); + if (ret) + goto out; ++ ++ ret = glusterd_is_bitrot_enabled(volinfo); ++ if (ret == -1) { ++ goto out; ++ } else if (ret) { ++ ret = xlator_set_fixed_option(xl, "changelog-notification", "on"); ++ if (ret) ++ goto out; ++ } else { ++ ret = xlator_set_fixed_option(xl, "changelog-notification", "off"); ++ if (ret) ++ goto out; ++ } + out: + return ret; + } +-- +1.8.3.1 + diff --git a/SOURCES/0446-bitrot-Make-number-of-signer-threads-configurable.patch b/SOURCES/0446-bitrot-Make-number-of-signer-threads-configurable.patch new file mode 100644 index 0000000..8eb2089 --- /dev/null +++ b/SOURCES/0446-bitrot-Make-number-of-signer-threads-configurable.patch @@ -0,0 +1,594 @@ +From 866a4c49ad9c5a9125814a9f843d4c7fd967ab2b Mon Sep 17 00:00:00 2001 +From: Kotresh HR <khiremat@redhat.com> +Date: Mon, 3 Feb 2020 18:10:17 +0530 +Subject: [PATCH 446/449] bitrot: Make number of signer threads configurable + +The number of signing process threads (glfs_brpobj) +is set to 4 by default. The recommendation is to set +it to number of cores available. This patch makes it +configurable as follows + +gluster vol bitrot <volname> signer-threads <count> + +> fixes: bz#1797869 +> Change-Id: Ia883b3e5e34e0bc8d095243508d320c9c9c58adc +> Signed-off-by: Kotresh HR <khiremat@redhat.com> +> (Cherry pick from commit 8fad76650bd85463708f59d2518f5b764ae4c702) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/24091/) + +BUG: 1790336 +Change-Id: Ia883b3e5e34e0bc8d095243508d320c9c9c58adc +Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202780 +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + cli/src/cli-cmd-parser.c | 29 +++++++- + cli/src/cli-cmd-volume.c | 12 +++ + doc/gluster.8 | 6 ++ + libglusterfs/src/glusterfs/common-utils.h | 1 + + rpc/xdr/src/cli1-xdr.x | 1 + + tests/bitrot/br-signer-threads-config-1797869.t | 73 +++++++++++++++++++ + xlators/features/bit-rot/src/bitd/bit-rot.c | 45 +++++++++--- + xlators/features/bit-rot/src/bitd/bit-rot.h | 20 ++--- + .../bit-rot/src/stub/bit-rot-stub-mem-types.h | 1 + + xlators/mgmt/glusterd/src/glusterd-bitrot.c | 85 ++++++++++++++++++++++ + xlators/mgmt/glusterd/src/glusterd-volgen.c | 16 ++-- + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 9 +++ + 12 files changed, 270 insertions(+), 28 deletions(-) + create mode 100644 tests/bitrot/br-signer-threads-config-1797869.t + +diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c +index 7446b95..5fd05f4 100644 +--- a/cli/src/cli-cmd-parser.c ++++ b/cli/src/cli-cmd-parser.c +@@ -5661,7 +5661,7 @@ cli_cmd_bitrot_parse(const char **words, int wordcount, dict_t **options) + char *volname = NULL; + char *opwords[] = { + "enable", "disable", "scrub-throttle", "scrub-frequency", "scrub", +- "signing-time", NULL}; ++ "signing-time", "signer-threads", NULL}; + char *scrub_throt_values[] = {"lazy", "normal", "aggressive", NULL}; + char *scrub_freq_values[] = {"hourly", "daily", "weekly", "biweekly", + "monthly", "minute", NULL}; +@@ -5669,6 +5669,7 @@ cli_cmd_bitrot_parse(const char **words, int wordcount, dict_t **options) + dict_t *dict = NULL; + gf_bitrot_type type = GF_BITROT_OPTION_TYPE_NONE; + int32_t expiry_time = 0; ++ int32_t signer_th_count = 0; + + GF_ASSERT(words); + GF_ASSERT(options); +@@ -5849,6 +5850,31 @@ cli_cmd_bitrot_parse(const char **words, int wordcount, dict_t **options) + } + goto set_type; + } ++ } else if (!strcmp(words[3], "signer-threads")) { ++ if (!words[4]) { ++ cli_err( ++ "Missing signer-thread value for bitrot " ++ "option"); ++ ret = -1; ++ goto out; ++ } else { ++ type = GF_BITROT_OPTION_TYPE_SIGNER_THREADS; ++ ++ signer_th_count = strtol(words[4], NULL, 0); ++ if (signer_th_count < 1) { ++ cli_err("signer-thread count should not be less than 1"); ++ ret = -1; ++ goto out; ++ } ++ ++ ret = dict_set_uint32(dict, "signer-threads", ++ (unsigned int)signer_th_count); ++ if (ret) { ++ cli_out("Failed to set dict for bitrot"); ++ goto out; ++ } ++ goto set_type; ++ } + } else { + cli_err( + "Invalid option %s for bitrot. Please enter valid " +@@ -5857,7 +5883,6 @@ cli_cmd_bitrot_parse(const char **words, int wordcount, dict_t **options) + ret = -1; + goto out; + } +- + set_type: + ret = dict_set_int32(dict, "type", type); + if (ret < 0) +diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c +index f33fc99..72504ca 100644 +--- a/cli/src/cli-cmd-volume.c ++++ b/cli/src/cli-cmd-volume.c +@@ -3236,6 +3236,16 @@ struct cli_cmd bitrot_cmds[] = { + {"volume bitrot <VOLNAME> {enable|disable}", NULL, /*cli_cmd_bitrot_cbk,*/ + "Enable/disable bitrot for volume <VOLNAME>"}, + ++ {"volume bitrot <VOLNAME> signing-time <time-in-secs>", ++ NULL, /*cli_cmd_bitrot_cbk,*/ ++ "Waiting time for an object after last fd is closed to start signing " ++ "process"}, ++ ++ {"volume bitrot <VOLNAME> signer-threads <count>", ++ NULL, /*cli_cmd_bitrot_cbk,*/ ++ "Number of signing process threads. Usually set to number of available " ++ "cores"}, ++ + {"volume bitrot <VOLNAME> scrub-throttle {lazy|normal|aggressive}", + NULL, /*cli_cmd_bitrot_cbk,*/ + "Set the speed of the scrubber for volume <VOLNAME>"}, +@@ -3251,6 +3261,8 @@ struct cli_cmd bitrot_cmds[] = { + "the scrubber. ondemand starts the scrubber immediately."}, + + {"volume bitrot <VOLNAME> {enable|disable}\n" ++ "volume bitrot <VOLNAME> signing-time <time-in-secs>\n" ++ "volume bitrot <VOLNAME> signer-threads <count>\n" + "volume bitrot <volname> scrub-throttle {lazy|normal|aggressive}\n" + "volume bitrot <volname> scrub-frequency {hourly|daily|weekly|biweekly" + "|monthly}\n" +diff --git a/doc/gluster.8 b/doc/gluster.8 +index 66bdb48..084346d 100644 +--- a/doc/gluster.8 ++++ b/doc/gluster.8 +@@ -244,6 +244,12 @@ Use "!<OPTION>" to reset option <OPTION> to default value. + \fB\ volume bitrot <VOLNAME> {enable|disable} \fR + Enable/disable bitrot for volume <VOLNAME> + .TP ++\fB\ volume bitrot <VOLNAME> signing-time <time-in-secs> \fR ++Waiting time for an object after last fd is closed to start signing process. ++.TP ++\fB\ volume bitrot <VOLNAME> signer-threads <count> \fR ++Number of signing process threads. Usually set to number of available cores. ++.TP + \fB\ volume bitrot <VOLNAME> scrub-throttle {lazy|normal|aggressive} \fR + Scrub-throttle value is a measure of how fast or slow the scrubber scrubs the filesystem for volume <VOLNAME> + .TP +diff --git a/libglusterfs/src/glusterfs/common-utils.h b/libglusterfs/src/glusterfs/common-utils.h +index 0e2ecc7..f0a0a41 100644 +--- a/libglusterfs/src/glusterfs/common-utils.h ++++ b/libglusterfs/src/glusterfs/common-utils.h +@@ -126,6 +126,7 @@ trap(void); + + /* Default value of signing waiting time to sign a file for bitrot */ + #define SIGNING_TIMEOUT "120" ++#define BR_WORKERS "4" + + /* xxhash */ + #define GF_XXH64_DIGEST_LENGTH 8 +diff --git a/rpc/xdr/src/cli1-xdr.x b/rpc/xdr/src/cli1-xdr.x +index a32c864..777cb00 100644 +--- a/rpc/xdr/src/cli1-xdr.x ++++ b/rpc/xdr/src/cli1-xdr.x +@@ -68,6 +68,7 @@ enum gf_bitrot_type { + GF_BITROT_OPTION_TYPE_EXPIRY_TIME, + GF_BITROT_CMD_SCRUB_STATUS, + GF_BITROT_CMD_SCRUB_ONDEMAND, ++ GF_BITROT_OPTION_TYPE_SIGNER_THREADS, + GF_BITROT_OPTION_TYPE_MAX + }; + +diff --git a/tests/bitrot/br-signer-threads-config-1797869.t b/tests/bitrot/br-signer-threads-config-1797869.t +new file mode 100644 +index 0000000..657ef3e +--- /dev/null ++++ b/tests/bitrot/br-signer-threads-config-1797869.t +@@ -0,0 +1,73 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../include.rc ++. $(dirname $0)/../volume.rc ++. $(dirname $0)/../cluster.rc ++ ++function get_bitd_count_1 { ++ ps auxww | grep glusterfs | grep bitd.pid | grep -v grep | grep $H1 | wc -l ++} ++ ++function get_bitd_count_2 { ++ ps auxww | grep glusterfs | grep bitd.pid | grep -v grep | grep $H2 | wc -l ++} ++ ++function get_bitd_pid_1 { ++ ps auxww | grep glusterfs | grep bitd.pid | grep -v grep | grep $H1 | awk '{print $2}' ++} ++ ++function get_bitd_pid_2 { ++ ps auxww | grep glusterfs | grep bitd.pid | grep -v grep | grep $H2 | awk '{print $2}' ++} ++ ++function get_signer_th_count_1 { ++ ps -eL | grep $(get_bitd_pid_1) | grep glfs_brpobj | wc -l ++} ++ ++function get_signer_th_count_2 { ++ ps -eL | grep $(get_bitd_pid_2) | grep glfs_brpobj | wc -l ++} ++ ++cleanup; ++ ++TEST launch_cluster 2 ++ ++TEST $CLI_1 peer probe $H2; ++EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count; ++ ++TEST $CLI_1 volume create $V0 $H1:$B1 ++TEST $CLI_1 volume create $V1 $H2:$B2 ++EXPECT 'Created' volinfo_field_1 $V0 'Status'; ++EXPECT 'Created' volinfo_field_1 $V1 'Status'; ++ ++TEST $CLI_1 volume start $V0 ++TEST $CLI_1 volume start $V1 ++EXPECT 'Started' volinfo_field_1 $V0 'Status'; ++EXPECT 'Started' volinfo_field_1 $V1 'Status'; ++ ++#Enable bitrot ++TEST $CLI_1 volume bitrot $V0 enable ++TEST $CLI_1 volume bitrot $V1 enable ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count_1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count_2 ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "4" get_signer_th_count_1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "4" get_signer_th_count_2 ++ ++old_bitd_pid_1=$(get_bitd_pid_1) ++old_bitd_pid_2=$(get_bitd_pid_2) ++TEST $CLI_1 volume bitrot $V0 signer-threads 1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_signer_th_count_1 ++EXPECT_NOT "$old_bitd_pid_1" get_bitd_pid_1; ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "4" get_signer_th_count_2 ++EXPECT "$old_bitd_pid_2" get_bitd_pid_2; ++ ++old_bitd_pid_1=$(get_bitd_pid_1) ++old_bitd_pid_2=$(get_bitd_pid_2) ++TEST $CLI_1 volume bitrot $V1 signer-threads 2 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" get_signer_th_count_2 ++EXPECT_NOT "$old_bitd_pid_2" get_bitd_pid_2; ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_signer_th_count_1 ++EXPECT "$old_bitd_pid_1" get_bitd_pid_1; ++ ++cleanup; +diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c +index 7b1c5dc..b8feef7 100644 +--- a/xlators/features/bit-rot/src/bitd/bit-rot.c ++++ b/xlators/features/bit-rot/src/bitd/bit-rot.c +@@ -1734,22 +1734,26 @@ out: + return 0; + } + +-/** +- * Initialize signer specific structures, spawn worker threads. +- */ +- + static void + br_fini_signer(xlator_t *this, br_private_t *priv) + { + int i = 0; + +- for (; i < BR_WORKERS; i++) { ++ if (priv == NULL) ++ return; ++ ++ for (; i < priv->signer_th_count; i++) { + (void)gf_thread_cleanup_xint(priv->obj_queue->workers[i]); + } ++ GF_FREE(priv->obj_queue->workers); + + pthread_cond_destroy(&priv->object_cond); + } + ++/** ++ * Initialize signer specific structures, spawn worker threads. ++ */ ++ + static int32_t + br_init_signer(xlator_t *this, br_private_t *priv) + { +@@ -1769,7 +1773,12 @@ br_init_signer(xlator_t *this, br_private_t *priv) + goto cleanup_cond; + INIT_LIST_HEAD(&priv->obj_queue->objects); + +- for (i = 0; i < BR_WORKERS; i++) { ++ priv->obj_queue->workers = GF_CALLOC( ++ priv->signer_th_count, sizeof(pthread_t), gf_br_mt_br_worker_t); ++ if (!priv->obj_queue->workers) ++ goto cleanup_obj_queue; ++ ++ for (i = 0; i < priv->signer_th_count; i++) { + ret = gf_thread_create(&priv->obj_queue->workers[i], NULL, + br_process_object, this, "brpobj"); + if (ret != 0) { +@@ -1787,7 +1796,9 @@ cleanup_threads: + for (i--; i >= 0; i--) { + (void)gf_thread_cleanup_xint(priv->obj_queue->workers[i]); + } ++ GF_FREE(priv->obj_queue->workers); + ++cleanup_obj_queue: + GF_FREE(priv->obj_queue); + + cleanup_cond: +@@ -1840,7 +1851,7 @@ br_rate_limit_signer(xlator_t *this, int child_count, int numbricks) + if (contribution == 0) + contribution = 1; + spec.rate = BR_HASH_CALC_READ_SIZE * contribution; +- spec.maxlimit = BR_WORKERS * BR_HASH_CALC_READ_SIZE; ++ spec.maxlimit = priv->signer_th_count * BR_HASH_CALC_READ_SIZE; + + #endif + +@@ -1860,11 +1871,16 @@ br_rate_limit_signer(xlator_t *this, int child_count, int numbricks) + static int32_t + br_signer_handle_options(xlator_t *this, br_private_t *priv, dict_t *options) + { +- if (options) ++ if (options) { + GF_OPTION_RECONF("expiry-time", priv->expiry_time, options, uint32, + error_return); +- else ++ GF_OPTION_RECONF("signer-threads", priv->signer_th_count, options, ++ uint32, error_return); ++ } else { + GF_OPTION_INIT("expiry-time", priv->expiry_time, uint32, error_return); ++ GF_OPTION_INIT("signer-threads", priv->signer_th_count, uint32, ++ error_return); ++ } + + return 0; + +@@ -1880,6 +1896,8 @@ br_signer_init(xlator_t *this, br_private_t *priv) + + GF_OPTION_INIT("expiry-time", priv->expiry_time, uint32, error_return); + GF_OPTION_INIT("brick-count", numbricks, int32, error_return); ++ GF_OPTION_INIT("signer-threads", priv->signer_th_count, uint32, ++ error_return); + + ret = br_rate_limit_signer(this, priv->child_count, numbricks); + if (ret) +@@ -2210,6 +2228,15 @@ struct volume_options options[] = { + .description = "Pause/Resume scrub. Upon resume, scrubber " + "continues from where it left off.", + }, ++ { ++ .key = {"signer-threads"}, ++ .type = GF_OPTION_TYPE_INT, ++ .default_value = BR_WORKERS, ++ .op_version = {GD_OP_VERSION_7_0}, ++ .flags = OPT_FLAG_SETTABLE, ++ .description = "Number of signing process threads. As a best " ++ "practice, set this to the number of processor cores", ++ }, + {.key = {NULL}}, + }; + +diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.h b/xlators/features/bit-rot/src/bitd/bit-rot.h +index a4d4fd7..8ac7dcd 100644 +--- a/xlators/features/bit-rot/src/bitd/bit-rot.h ++++ b/xlators/features/bit-rot/src/bitd/bit-rot.h +@@ -30,12 +30,6 @@ + + #include <openssl/sha.h> + +-/** +- * TODO: make this configurable. As a best practice, set this to the +- * number of processor cores. +- */ +-#define BR_WORKERS 4 +- + typedef enum scrub_throttle { + BR_SCRUB_THROTTLE_VOID = -1, + BR_SCRUB_THROTTLE_LAZY = 0, +@@ -108,12 +102,12 @@ struct br_child { + typedef struct br_child br_child_t; + + struct br_obj_n_workers { +- struct list_head objects; /* queue of objects expired from the +- timer wheel and ready to be picked +- up for signing */ +- pthread_t workers[BR_WORKERS]; /* Threads which pick up the objects +- from the above queue and start +- signing each object */ ++ struct list_head objects; /* queue of objects expired from the ++ timer wheel and ready to be picked ++ up for signing */ ++ pthread_t *workers; /* Threads which pick up the objects ++ from the above queue and start ++ signing each object */ + }; + + struct br_scrubber { +@@ -209,6 +203,8 @@ struct br_private { + + uint32_t expiry_time; /* objects "wait" time */ + ++ uint32_t signer_th_count; /* Number of signing process threads */ ++ + tbf_t *tbf; /* token bucket filter */ + + gf_boolean_t iamscrubber; /* function as a fs scrubber */ +diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h b/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h +index 40bcda1..9d93caf 100644 +--- a/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h ++++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h +@@ -29,6 +29,7 @@ enum br_mem_types { + gf_br_stub_mt_sigstub_t, + gf_br_mt_br_child_event_t, + gf_br_stub_mt_misc, ++ gf_br_mt_br_worker_t, + gf_br_stub_mt_end, + }; + +diff --git a/xlators/mgmt/glusterd/src/glusterd-bitrot.c b/xlators/mgmt/glusterd/src/glusterd-bitrot.c +index c653249..f79af2d 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-bitrot.c ++++ b/xlators/mgmt/glusterd/src/glusterd-bitrot.c +@@ -34,6 +34,7 @@ const char *gd_bitrot_op_list[GF_BITROT_OPTION_TYPE_MAX] = { + [GF_BITROT_OPTION_TYPE_SCRUB_FREQ] = "scrub-frequency", + [GF_BITROT_OPTION_TYPE_SCRUB] = "scrub", + [GF_BITROT_OPTION_TYPE_EXPIRY_TIME] = "expiry-time", ++ [GF_BITROT_OPTION_TYPE_SIGNER_THREADS] = "signer-threads", + }; + + int +@@ -354,6 +355,81 @@ out: + return ret; + } + ++static gf_boolean_t ++is_bitd_configure_noop(xlator_t *this, glusterd_volinfo_t *volinfo) ++{ ++ gf_boolean_t noop = _gf_true; ++ glusterd_brickinfo_t *brickinfo = NULL; ++ ++ if (!glusterd_is_bitrot_enabled(volinfo)) ++ goto out; ++ else if (volinfo->status != GLUSTERD_STATUS_STARTED) ++ goto out; ++ else { ++ cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) ++ { ++ if (!glusterd_is_local_brick(this, volinfo, brickinfo)) ++ continue; ++ noop = _gf_false; ++ return noop; ++ } ++ } ++out: ++ return noop; ++} ++ ++static int ++glusterd_bitrot_signer_threads(glusterd_volinfo_t *volinfo, dict_t *dict, ++ char *key, char **op_errstr) ++{ ++ int32_t ret = -1; ++ uint32_t signer_th_count = 0; ++ uint32_t existing_th_count = 0; ++ xlator_t *this = NULL; ++ glusterd_conf_t *priv = NULL; ++ char dkey[32] = { ++ 0, ++ }; ++ ++ this = THIS; ++ GF_ASSERT(this); ++ ++ priv = this->private; ++ GF_VALIDATE_OR_GOTO(this->name, priv, out); ++ ++ ret = dict_get_uint32(dict, "signer-threads", &signer_th_count); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, ++ "Unable to get bitrot signer thread count."); ++ goto out; ++ } ++ ++ ret = dict_get_uint32(volinfo->dict, key, &existing_th_count); ++ if (ret == 0 && signer_th_count == existing_th_count) { ++ goto out; ++ } ++ ++ snprintf(dkey, sizeof(dkey), "%d", signer_th_count); ++ ret = dict_set_dynstr_with_alloc(volinfo->dict, key, dkey); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, ++ "Failed to set option %s", key); ++ goto out; ++ } ++ ++ if (!is_bitd_configure_noop(this, volinfo)) { ++ ret = priv->bitd_svc.manager(&(priv->bitd_svc), NULL, ++ PROC_START_NO_WAIT); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BITDSVC_RECONF_FAIL, ++ "Failed to reconfigure bitrot services"); ++ goto out; ++ } ++ } ++out: ++ return ret; ++} ++ + static int + glusterd_bitrot_enable(glusterd_volinfo_t *volinfo, char **op_errstr) + { +@@ -594,6 +670,15 @@ glusterd_op_bitrot(dict_t *dict, char **op_errstr, dict_t *rsp_dict) + volinfo, dict, "features.expiry-time", op_errstr); + if (ret) + goto out; ++ break; ++ ++ case GF_BITROT_OPTION_TYPE_SIGNER_THREADS: ++ ret = glusterd_bitrot_signer_threads( ++ volinfo, dict, "features.signer-threads", op_errstr); ++ if (ret) ++ goto out; ++ break; ++ + case GF_BITROT_CMD_SCRUB_STATUS: + case GF_BITROT_CMD_SCRUB_ONDEMAND: + break; +diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c +index 13f84ea..094a71f 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c +@@ -4658,6 +4658,12 @@ bitrot_option_handler(volgen_graph_t *graph, struct volopt_map_entry *vme, + return -1; + } + ++ if (!strcmp(vme->option, "signer-threads")) { ++ ret = xlator_set_fixed_option(xl, "signer-threads", vme->value); ++ if (ret) ++ return -1; ++ } ++ + return ret; + } + +@@ -4940,18 +4946,18 @@ glusterd_prepare_shd_volume_options_for_tier(glusterd_volinfo_t *volinfo, + dict_t *set_dict) + { + int ret = -1; +- char *key = NULL; ++ char *key = NULL; + +- key = volgen_get_shd_key (volinfo->tier_info.cold_type); ++ key = volgen_get_shd_key(volinfo->tier_info.cold_type); + if (key) { +- ret = dict_set_str (set_dict, key, "enable"); ++ ret = dict_set_str(set_dict, key, "enable"); + if (ret) + goto out; + } + +- key = volgen_get_shd_key (volinfo->tier_info.hot_type); ++ key = volgen_get_shd_key(volinfo->tier_info.hot_type); + if (key) { +- ret = dict_set_str (set_dict, key, "enable"); ++ ret = dict_set_str(set_dict, key, "enable"); + if (ret) + goto out; + } +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index 9001b88..62acadf 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -3379,6 +3379,15 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .op_version = GD_OP_VERSION_3_7_0, + .type = NO_DOC, + }, ++ { ++ .key = "features.signer-threads", ++ .voltype = "features/bit-rot", ++ .value = BR_WORKERS, ++ .option = "signer-threads", ++ .op_version = GD_OP_VERSION_7_0, ++ .type = NO_DOC, ++ }, ++ /* Upcall translator options */ + /* Upcall translator options */ + { + .key = "features.cache-invalidation", +-- +1.8.3.1 + diff --git a/SOURCES/0447-core-brick_mux-brick-crashed-when-creating-and-delet.patch b/SOURCES/0447-core-brick_mux-brick-crashed-when-creating-and-delet.patch new file mode 100644 index 0000000..a39b61b --- /dev/null +++ b/SOURCES/0447-core-brick_mux-brick-crashed-when-creating-and-delet.patch @@ -0,0 +1,359 @@ +From 51090a4b3cb000d601083f12d1875547819fc03f Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal <moagrawal@redhat.com> +Date: Wed, 4 Mar 2020 09:17:26 +0530 +Subject: [PATCH 447/449] core[brick_mux]: brick crashed when creating and + deleting volumes over time + +Problem: In brick_mux environment, while volumes are created/stopped in a loop + after running a long time the main brick is crashed.The brick is crashed + because the main brick process was not cleaned up memory for all objects + at the time of detaching a volume. + Below are the objects that are missed at the time of detaching a volume + 1) xlator object for a brick graph + 2) local_pool for posix_lock xlator + 3) rpc object cleanup at quota xlator + 4) inode leak at brick xlator + +Solution: To avoid the crash resolve all leak at the time of detaching a brick +> Change-Id: Ibb6e46c5fba22b9441a88cbaf6b3278823235913 +> updates: #977 +> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +> (Cherry pick from commit e589d8de66d3325da8fbbbe44d1a5bd6335e08ab) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/24209/) + +BUG: 1790336 +Change-Id: Ibb6e46c5fba22b9441a88cbaf6b3278823235913 +Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202782 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Xavi Hernandez Juan <xhernandez@redhat.com> +--- + libglusterfs/src/glusterfs/glusterfs.h | 1 + + libglusterfs/src/graph.c | 1 + + libglusterfs/src/graph.y | 2 +- + libglusterfs/src/xlator.c | 29 ++++++++---- + xlators/features/changelog/src/changelog.c | 1 + + xlators/features/locks/src/posix.c | 4 ++ + xlators/features/quota/src/quota-enforcer-client.c | 14 +++++- + xlators/features/quota/src/quota.c | 54 ++++++++++++++++++++-- + xlators/features/quota/src/quota.h | 3 ++ + xlators/protocol/server/src/server.c | 12 +++-- + 10 files changed, 103 insertions(+), 18 deletions(-) + +diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h +index 177a020..584846e 100644 +--- a/libglusterfs/src/glusterfs/glusterfs.h ++++ b/libglusterfs/src/glusterfs/glusterfs.h +@@ -603,6 +603,7 @@ struct _glusterfs_graph { + int used; /* Should be set when fuse gets + first CHILD_UP */ + uint32_t volfile_checksum; ++ pthread_mutex_t mutex; + }; + typedef struct _glusterfs_graph glusterfs_graph_t; + +diff --git a/libglusterfs/src/graph.c b/libglusterfs/src/graph.c +index bb5e67a..1cd92db 100644 +--- a/libglusterfs/src/graph.c ++++ b/libglusterfs/src/graph.c +@@ -1092,6 +1092,7 @@ glusterfs_graph_destroy_residual(glusterfs_graph_t *graph) + ret = xlator_tree_free_memacct(graph->first); + + list_del_init(&graph->list); ++ pthread_mutex_destroy(&graph->mutex); + GF_FREE(graph); + + return ret; +diff --git a/libglusterfs/src/graph.y b/libglusterfs/src/graph.y +index 5b92985..5733515 100644 +--- a/libglusterfs/src/graph.y ++++ b/libglusterfs/src/graph.y +@@ -541,7 +541,7 @@ glusterfs_graph_new () + return NULL; + + INIT_LIST_HEAD (&graph->list); +- ++ pthread_mutex_init(&graph->mutex, NULL); + gettimeofday (&graph->dob, NULL); + + return graph; +diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c +index 108b96a..36cc32c 100644 +--- a/libglusterfs/src/xlator.c ++++ b/libglusterfs/src/xlator.c +@@ -938,6 +938,8 @@ xlator_mem_cleanup(xlator_t *this) + xlator_list_t **trav_p = NULL; + xlator_t *top = NULL; + xlator_t *victim = NULL; ++ glusterfs_graph_t *graph = NULL; ++ gf_boolean_t graph_cleanup = _gf_false; + + if (this->call_cleanup || !this->ctx) + return; +@@ -945,6 +947,12 @@ xlator_mem_cleanup(xlator_t *this) + this->call_cleanup = 1; + ctx = this->ctx; + ++ inode_table = this->itable; ++ if (inode_table) { ++ inode_table_destroy(inode_table); ++ this->itable = NULL; ++ } ++ + xlator_call_fini(trav); + + while (prev) { +@@ -953,12 +961,6 @@ xlator_mem_cleanup(xlator_t *this) + prev = trav; + } + +- inode_table = this->itable; +- if (inode_table) { +- inode_table_destroy(inode_table); +- this->itable = NULL; +- } +- + if (this->fini) { + this->fini(this); + } +@@ -968,17 +970,28 @@ xlator_mem_cleanup(xlator_t *this) + if (ctx->active) { + top = ctx->active->first; + LOCK(&ctx->volfile_lock); +- /* TODO here we have leak for xlator node in a graph */ +- /* Need to move only top xlator from a graph */ + for (trav_p = &top->children; *trav_p; trav_p = &(*trav_p)->next) { + victim = (*trav_p)->xlator; + if (victim->call_cleanup && !strcmp(victim->name, this->name)) { ++ graph_cleanup = _gf_true; + (*trav_p) = (*trav_p)->next; + break; + } + } + UNLOCK(&ctx->volfile_lock); + } ++ ++ if (graph_cleanup) { ++ prev = this; ++ graph = ctx->active; ++ pthread_mutex_lock(&graph->mutex); ++ while (prev) { ++ trav = prev->next; ++ GF_FREE(prev); ++ prev = trav; ++ } ++ pthread_mutex_unlock(&graph->mutex); ++ } + } + + void +diff --git a/xlators/features/changelog/src/changelog.c b/xlators/features/changelog/src/changelog.c +index ff06c09..b54112c 100644 +--- a/xlators/features/changelog/src/changelog.c ++++ b/xlators/features/changelog/src/changelog.c +@@ -2872,6 +2872,7 @@ fini(xlator_t *this) + if (priv->active || priv->rpc_active) { + /* terminate RPC server/threads */ + changelog_cleanup_rpc(this, priv); ++ GF_FREE(priv->ev_dispatcher); + } + /* call barrier_disable to cancel timer */ + if (priv->barrier_enabled) +diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c +index 9a14c64..50f1265 100644 +--- a/xlators/features/locks/src/posix.c ++++ b/xlators/features/locks/src/posix.c +@@ -4102,6 +4102,10 @@ fini(xlator_t *this) + if (!priv) + return; + this->private = NULL; ++ if (this->local_pool) { ++ mem_pool_destroy(this->local_pool); ++ this->local_pool = NULL; ++ } + GF_FREE(priv->brickname); + GF_FREE(priv); + +diff --git a/xlators/features/quota/src/quota-enforcer-client.c b/xlators/features/quota/src/quota-enforcer-client.c +index 1a4c2e3..097439d 100644 +--- a/xlators/features/quota/src/quota-enforcer-client.c ++++ b/xlators/features/quota/src/quota-enforcer-client.c +@@ -362,16 +362,28 @@ quota_enforcer_notify(struct rpc_clnt *rpc, void *mydata, + { + xlator_t *this = NULL; + int ret = 0; ++ quota_priv_t *priv = NULL; + + this = mydata; +- ++ priv = this->private; + switch (event) { + case RPC_CLNT_CONNECT: { ++ pthread_mutex_lock(&priv->conn_mutex); ++ { ++ priv->conn_status = _gf_true; ++ } ++ pthread_mutex_unlock(&priv->conn_mutex); + gf_msg_trace(this->name, 0, "got RPC_CLNT_CONNECT"); + break; + } + + case RPC_CLNT_DISCONNECT: { ++ pthread_mutex_lock(&priv->conn_mutex); ++ { ++ priv->conn_status = _gf_false; ++ pthread_cond_signal(&priv->conn_cond); ++ } ++ pthread_mutex_unlock(&priv->conn_mutex); + gf_msg_trace(this->name, 0, "got RPC_CLNT_DISCONNECT"); + break; + } +diff --git a/xlators/features/quota/src/quota.c b/xlators/features/quota/src/quota.c +index a0c236d..d1123ce 100644 +--- a/xlators/features/quota/src/quota.c ++++ b/xlators/features/quota/src/quota.c +@@ -5014,6 +5014,43 @@ quota_forget(xlator_t *this, inode_t *inode) + return 0; + } + ++int ++notify(xlator_t *this, int event, void *data, ...) ++{ ++ quota_priv_t *priv = NULL; ++ int ret = 0; ++ rpc_clnt_t *rpc = NULL; ++ gf_boolean_t conn_status = _gf_true; ++ xlator_t *victim = data; ++ ++ priv = this->private; ++ if (!priv || !priv->is_quota_on) ++ goto out; ++ ++ if (event == GF_EVENT_PARENT_DOWN) { ++ rpc = priv->rpc_clnt; ++ if (rpc) { ++ rpc_clnt_disable(rpc); ++ pthread_mutex_lock(&priv->conn_mutex); ++ { ++ conn_status = priv->conn_status; ++ while (conn_status) { ++ (void)pthread_cond_wait(&priv->conn_cond, ++ &priv->conn_mutex); ++ conn_status = priv->conn_status; ++ } ++ } ++ pthread_mutex_unlock(&priv->conn_mutex); ++ gf_log(this->name, GF_LOG_INFO, ++ "Notify GF_EVENT_PARENT_DOWN for brick %s", victim->name); ++ } ++ } ++ ++out: ++ ret = default_notify(this, event, data); ++ return ret; ++} ++ + int32_t + init(xlator_t *this) + { +@@ -5056,6 +5093,10 @@ init(xlator_t *this) + goto err; + } + ++ pthread_mutex_init(&priv->conn_mutex, NULL); ++ pthread_cond_init(&priv->conn_cond, NULL); ++ priv->conn_status = _gf_false; ++ + if (priv->is_quota_on) { + rpc = quota_enforcer_init(this, this->options); + if (rpc == NULL) { +@@ -5169,20 +5210,22 @@ fini(xlator_t *this) + { + quota_priv_t *priv = NULL; + rpc_clnt_t *rpc = NULL; +- int i = 0, cnt = 0; + + priv = this->private; + if (!priv) + return; + rpc = priv->rpc_clnt; + priv->rpc_clnt = NULL; +- this->private = NULL; + if (rpc) { +- cnt = GF_ATOMIC_GET(rpc->refcount); +- for (i = 0; i < cnt; i++) +- rpc_clnt_unref(rpc); ++ rpc_clnt_connection_cleanup(&rpc->conn); ++ rpc_clnt_unref(rpc); + } ++ ++ this->private = NULL; + LOCK_DESTROY(&priv->lock); ++ pthread_mutex_destroy(&priv->conn_mutex); ++ pthread_cond_destroy(&priv->conn_cond); ++ + GF_FREE(priv); + if (this->local_pool) { + mem_pool_destroy(this->local_pool); +@@ -5314,6 +5357,7 @@ struct volume_options options[] = { + xlator_api_t xlator_api = { + .init = init, + .fini = fini, ++ .notify = notify, + .reconfigure = reconfigure, + .mem_acct_init = mem_acct_init, + .op_version = {1}, /* Present from the initial version */ +diff --git a/xlators/features/quota/src/quota.h b/xlators/features/quota/src/quota.h +index a5a99ca..e51ffd4 100644 +--- a/xlators/features/quota/src/quota.h ++++ b/xlators/features/quota/src/quota.h +@@ -217,6 +217,9 @@ struct quota_priv { + char *volume_uuid; + uint64_t validation_count; + int32_t quotad_conn_status; ++ pthread_mutex_t conn_mutex; ++ pthread_cond_t conn_cond; ++ gf_boolean_t conn_status; + }; + typedef struct quota_priv quota_priv_t; + +diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c +index a5f09fe..54d9c0f 100644 +--- a/xlators/protocol/server/src/server.c ++++ b/xlators/protocol/server/src/server.c +@@ -409,7 +409,13 @@ server_call_xlator_mem_cleanup(xlator_t *this, char *victim_name) + + arg = calloc(1, sizeof(*arg)); + arg->this = this; +- arg->victim_name = gf_strdup(victim_name); ++ arg->victim_name = strdup(victim_name); ++ if (!arg->victim_name) { ++ gf_smsg(this->name, GF_LOG_CRITICAL, ENOMEM, LG_MSG_NO_MEMORY, ++ "Memory allocation is failed"); ++ return; ++ } ++ + th_ret = gf_thread_create_detached(&th_id, server_graph_janitor_threads, + arg, "graphjanitor"); + if (th_ret) { +@@ -417,7 +423,7 @@ server_call_xlator_mem_cleanup(xlator_t *this, char *victim_name) + "graph janitor Thread" + " creation is failed for brick %s", + victim_name); +- GF_FREE(arg->victim_name); ++ free(arg->victim_name); + free(arg); + } + } +@@ -628,7 +634,7 @@ server_graph_janitor_threads(void *data) + } + + out: +- GF_FREE(arg->victim_name); ++ free(arg->victim_name); + free(arg); + return NULL; + } +-- +1.8.3.1 + diff --git a/SOURCES/0448-Posix-Use-simple-approach-to-close-fd.patch b/SOURCES/0448-Posix-Use-simple-approach-to-close-fd.patch new file mode 100644 index 0000000..f030358 --- /dev/null +++ b/SOURCES/0448-Posix-Use-simple-approach-to-close-fd.patch @@ -0,0 +1,341 @@ +From 175c99dccc47d2b4267a8819404e5cbeb8cfba11 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal <moagrawal@redhat.com> +Date: Thu, 12 Mar 2020 21:12:13 +0530 +Subject: [PATCH 448/449] Posix: Use simple approach to close fd + +Problem: posix_release(dir) functions add the fd's into a ctx->janitor_fds + and janitor thread closes the fd's.In brick_mux environment it is + difficult to handle race condition in janitor threads because brick + spawns a single janitor thread for all bricks. + +Solution: Use synctask to execute posix_release(dir) functions instead of + using background a thread to close fds. + +> Credits: Pranith Karampuri <pkarampu@redhat.com> +> Change-Id: Iffb031f0695a7da83d5a2f6bac8863dad225317e +> Fixes: bz#1811631 +> Signed-off-by: Mohit Agrawal <moagrawal@redhat.com> +> (Cherry pick from commit fb20713b380e1df8d7f9e9df96563be2f9144fd6) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/24221/) + +BUG: 1790336 +Change-Id: Iffb031f0695a7da83d5a2f6bac8863dad225317e +Signed-off-by: Mohit Agrawal <moagrawal@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202791 +Tested-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + libglusterfs/src/glusterfs/glusterfs.h | 6 +- + libglusterfs/src/glusterfs/syncop.h | 7 +- + rpc/rpc-lib/src/rpcsvc.c | 6 ++ + run-tests.sh | 2 +- + tests/features/ssl-authz.t | 7 +- + xlators/storage/posix/src/posix-common.c | 4 -- + xlators/storage/posix/src/posix-helpers.c | 98 -------------------------- + xlators/storage/posix/src/posix-inode-fd-ops.c | 28 ++------ + xlators/storage/posix/src/posix.h | 3 - + 9 files changed, 20 insertions(+), 141 deletions(-) + +diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h +index 584846e..495a4d7 100644 +--- a/libglusterfs/src/glusterfs/glusterfs.h ++++ b/libglusterfs/src/glusterfs/glusterfs.h +@@ -734,11 +734,7 @@ struct _glusterfs_ctx { + + struct list_head volfile_list; + +- /* Add members to manage janitor threads for cleanup fd */ +- struct list_head janitor_fds; +- pthread_cond_t janitor_cond; +- pthread_mutex_t janitor_lock; +- pthread_t janitor; ++ char volume_id[GF_UUID_BUF_SIZE]; /* Used only in protocol/client */ + }; + typedef struct _glusterfs_ctx glusterfs_ctx_t; + +diff --git a/libglusterfs/src/glusterfs/syncop.h b/libglusterfs/src/glusterfs/syncop.h +index 3011b4c..1e4c73b 100644 +--- a/libglusterfs/src/glusterfs/syncop.h ++++ b/libglusterfs/src/glusterfs/syncop.h +@@ -254,7 +254,7 @@ struct syncopctx { + task = synctask_get(); \ + stb->task = task; \ + if (task) \ +- frame = task->opframe; \ ++ frame = copy_frame(task->opframe); \ + else \ + frame = syncop_create_frame(THIS); \ + \ +@@ -269,10 +269,7 @@ struct syncopctx { + STACK_WIND_COOKIE(frame, cbk, (void *)stb, subvol, fn_op, params); \ + \ + __yield(stb); \ +- if (task) \ +- STACK_RESET(frame->root); \ +- else \ +- STACK_DESTROY(frame->root); \ ++ STACK_DESTROY(frame->root); \ + } while (0) + + /* +diff --git a/rpc/rpc-lib/src/rpcsvc.c b/rpc/rpc-lib/src/rpcsvc.c +index 3f184bf..23ca1fd 100644 +--- a/rpc/rpc-lib/src/rpcsvc.c ++++ b/rpc/rpc-lib/src/rpcsvc.c +@@ -375,6 +375,12 @@ rpcsvc_program_actor(rpcsvc_request_t *req) + + req->ownthread = program->ownthread; + req->synctask = program->synctask; ++ if (((req->procnum == GFS3_OP_RELEASE) || ++ (req->procnum == GFS3_OP_RELEASEDIR)) && ++ (program->prognum == GLUSTER_FOP_PROGRAM)) { ++ req->ownthread = _gf_false; ++ req->synctask = _gf_true; ++ } + + err = SUCCESS; + gf_log(GF_RPCSVC, GF_LOG_TRACE, "Actor found: %s - %s for %s", +diff --git a/run-tests.sh b/run-tests.sh +index 5683b21..c835d93 100755 +--- a/run-tests.sh ++++ b/run-tests.sh +@@ -356,7 +356,7 @@ function run_tests() + selected_tests=$((selected_tests+1)) + echo + echo $section_separator$section_separator +- if [[ $(get_test_status $t) == "BAD_TEST" ]] && \ ++ if [[ $(get_test_status $t) =~ "BAD_TEST" ]] && \ + [[ $skip_bad_tests == "yes" ]] + then + skipped_bad_tests=$((skipped_bad_tests+1)) +diff --git a/tests/features/ssl-authz.t b/tests/features/ssl-authz.t +index 132b598..497083e 100755 +--- a/tests/features/ssl-authz.t ++++ b/tests/features/ssl-authz.t +@@ -67,13 +67,14 @@ echo "Memory consumption for glusterfsd process" + for i in $(seq 1 100); do + gluster v heal $V0 info >/dev/null + done +- ++#Wait to cleanup memory ++sleep 10 + end=`pmap -x $glusterfsd_pid | grep total | awk -F " " '{print $4}'` + diff=$((end-start)) + +-# If memory consumption is more than 5M some leak in SSL code path ++# If memory consumption is more than 15M some leak in SSL code path + +-TEST [ $diff -lt 5000 ] ++TEST [ $diff -lt 15000 ] + + + # Set ssl-allow to a wildcard that includes our identity. +diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c +index 2cb58ba..ac53796 100644 +--- a/xlators/storage/posix/src/posix-common.c ++++ b/xlators/storage/posix/src/posix-common.c +@@ -1041,10 +1041,6 @@ posix_init(xlator_t *this) + pthread_mutex_init(&_private->janitor_mutex, NULL); + pthread_cond_init(&_private->janitor_cond, NULL); + INIT_LIST_HEAD(&_private->fsyncs); +- ret = posix_spawn_ctx_janitor_thread(this); +- if (ret) +- goto out; +- + ret = gf_thread_create(&_private->fsyncer, NULL, posix_fsyncer, this, + "posixfsy"); + if (ret) { +diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c +index 2336add..39dbcce 100644 +--- a/xlators/storage/posix/src/posix-helpers.c ++++ b/xlators/storage/posix/src/posix-helpers.c +@@ -1582,104 +1582,6 @@ unlock: + return; + } + +-static struct posix_fd * +-janitor_get_next_fd(glusterfs_ctx_t *ctx, int32_t janitor_sleep) +-{ +- struct posix_fd *pfd = NULL; +- +- struct timespec timeout; +- +- pthread_mutex_lock(&ctx->janitor_lock); +- { +- if (list_empty(&ctx->janitor_fds)) { +- time(&timeout.tv_sec); +- timeout.tv_sec += janitor_sleep; +- timeout.tv_nsec = 0; +- +- pthread_cond_timedwait(&ctx->janitor_cond, &ctx->janitor_lock, +- &timeout); +- goto unlock; +- } +- +- pfd = list_entry(ctx->janitor_fds.next, struct posix_fd, list); +- +- list_del(ctx->janitor_fds.next); +- } +-unlock: +- pthread_mutex_unlock(&ctx->janitor_lock); +- +- return pfd; +-} +- +-static void * +-posix_ctx_janitor_thread_proc(void *data) +-{ +- xlator_t *this = NULL; +- struct posix_fd *pfd; +- glusterfs_ctx_t *ctx = NULL; +- struct posix_private *priv = NULL; +- int32_t sleep_duration = 0; +- +- this = data; +- ctx = THIS->ctx; +- THIS = this; +- +- priv = this->private; +- sleep_duration = priv->janitor_sleep_duration; +- while (1) { +- pfd = janitor_get_next_fd(ctx, sleep_duration); +- if (pfd) { +- if (pfd->dir == NULL) { +- gf_msg_trace(this->name, 0, "janitor: closing file fd=%d", +- pfd->fd); +- sys_close(pfd->fd); +- } else { +- gf_msg_debug(this->name, 0, "janitor: closing dir fd=%p", +- pfd->dir); +- sys_closedir(pfd->dir); +- } +- +- GF_FREE(pfd); +- } +- } +- +- return NULL; +-} +- +-int +-posix_spawn_ctx_janitor_thread(xlator_t *this) +-{ +- struct posix_private *priv = NULL; +- int ret = 0; +- glusterfs_ctx_t *ctx = NULL; +- +- priv = this->private; +- ctx = THIS->ctx; +- +- LOCK(&priv->lock); +- { +- if (!ctx->janitor) { +- pthread_mutex_init(&ctx->janitor_lock, NULL); +- pthread_cond_init(&ctx->janitor_cond, NULL); +- INIT_LIST_HEAD(&ctx->janitor_fds); +- +- ret = gf_thread_create(&ctx->janitor, NULL, +- posix_ctx_janitor_thread_proc, this, +- "posixctxjan"); +- +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_THREAD_FAILED, +- "spawning janitor " +- "thread failed"); +- goto unlock; +- } +- } +- } +-unlock: +- UNLOCK(&priv->lock); +- return ret; +-} +- + static int + is_fresh_file(int64_t ctime_sec) + { +diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c +index 5748b9f..d135d8b 100644 +--- a/xlators/storage/posix/src/posix-inode-fd-ops.c ++++ b/xlators/storage/posix/src/posix-inode-fd-ops.c +@@ -1358,7 +1358,6 @@ posix_releasedir(xlator_t *this, fd_t *fd) + struct posix_fd *pfd = NULL; + uint64_t tmp_pfd = 0; + int ret = 0; +- glusterfs_ctx_t *ctx = NULL; + + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(fd, out); +@@ -1376,21 +1375,11 @@ posix_releasedir(xlator_t *this, fd_t *fd) + goto out; + } + +- ctx = THIS->ctx; +- +- pthread_mutex_lock(&ctx->janitor_lock); +- { +- INIT_LIST_HEAD(&pfd->list); +- list_add_tail(&pfd->list, &ctx->janitor_fds); +- pthread_cond_signal(&ctx->janitor_cond); +- } +- pthread_mutex_unlock(&ctx->janitor_lock); +- +- /*gf_msg_debug(this->name, 0, "janitor: closing dir fd=%p", pfd->dir); ++ gf_msg_debug(this->name, 0, "janitor: closing dir fd=%p", pfd->dir); + + sys_closedir(pfd->dir); + GF_FREE(pfd); +- */ ++ + out: + return 0; + } +@@ -2510,13 +2499,11 @@ posix_release(xlator_t *this, fd_t *fd) + struct posix_fd *pfd = NULL; + int ret = -1; + uint64_t tmp_pfd = 0; +- glusterfs_ctx_t *ctx = NULL; + + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(fd, out); + + priv = this->private; +- ctx = THIS->ctx; + + ret = fd_ctx_del(fd, this, &tmp_pfd); + if (ret < 0) { +@@ -2531,13 +2518,10 @@ posix_release(xlator_t *this, fd_t *fd) + "pfd->dir is %p (not NULL) for file fd=%p", pfd->dir, fd); + } + +- pthread_mutex_lock(&ctx->janitor_lock); +- { +- INIT_LIST_HEAD(&pfd->list); +- list_add_tail(&pfd->list, &ctx->janitor_fds); +- pthread_cond_signal(&ctx->janitor_cond); +- } +- pthread_mutex_unlock(&ctx->janitor_lock); ++ gf_msg_debug(this->name, 0, "janitor: closing dir fd=%p", pfd->dir); ++ ++ sys_close(pfd->fd); ++ GF_FREE(pfd); + + if (!priv) + goto out; +diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h +index ac9d83c..61495a7 100644 +--- a/xlators/storage/posix/src/posix.h ++++ b/xlators/storage/posix/src/posix.h +@@ -666,9 +666,6 @@ posix_cs_maintenance(xlator_t *this, fd_t *fd, loc_t *loc, int *pfd, + int + posix_check_dev_file(xlator_t *this, inode_t *inode, char *fop, int *op_errno); + +-int +-posix_spawn_ctx_janitor_thread(xlator_t *this); +- + void + posix_update_iatt_buf(struct iatt *buf, int fd, char *loc, dict_t *xdata); + +-- +1.8.3.1 + diff --git a/SOURCES/0449-test-Test-case-brick-mux-validation-in-cluster.t-is-.patch b/SOURCES/0449-test-Test-case-brick-mux-validation-in-cluster.t-is-.patch new file mode 100644 index 0000000..6a161bf --- /dev/null +++ b/SOURCES/0449-test-Test-case-brick-mux-validation-in-cluster.t-is-.patch @@ -0,0 +1,107 @@ +From 6e15fca1621b06270983f57ac146f0f8e52f0797 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal <moagrawal@redhat.com> +Date: Tue, 9 Jun 2020 15:38:12 +0530 +Subject: [PATCH 449/449] test: Test case brick-mux-validation-in-cluster.t is + failing on RHEL-8 + +Brick process are not properly attached on any cluster node while +some volume options are changed on peer node and glusterd is down on +that specific node. + +Solution: At the time of restart glusterd it got a friend update request +from a peer node if peer node having some changes on volume.If the brick +process is started before received a friend update request in that case +brick_mux behavior is not workingproperly. All bricks are attached to +the same process even volumes options are not the same. To avoid the +issue introduce an atomic flag volpeerupdate and update the value while +glusterd has received a friend update request from peer for a specific +volume.If volpeerupdate flag is 1 volume is started by +glusterd_import_friend_volume synctask + +> Change-Id: I4c026f1e7807ded249153670e6967a2be8d22cb7 +> Credit: Sanju Rakaonde <srakonde@redhat.com> +> fixes: #1290 +> Signed-off-by: Mohit Agrawal <moagrawal@redhat.com> +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/24540/) +> (Cherry pick from commit 955bfd567329cf7fe63e9c3b89d333a55e5e9a20) + +BUG: 1844359 +Change-Id: I4c026f1e7807ded249153670e6967a2be8d22cb7 +Signed-off-by: Mohit Agrawal <moagrawal@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202812 +Tested-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-by: Sanju Rakonde <srakonde@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/bugs/glusterd/brick-mux-validation-in-cluster.t | 4 +--- + xlators/mgmt/glusterd/src/glusterd-utils.c | 7 +++++-- + xlators/mgmt/glusterd/src/glusterd.h | 4 ++++ + 3 files changed, 10 insertions(+), 5 deletions(-) + +diff --git a/tests/bugs/glusterd/brick-mux-validation-in-cluster.t b/tests/bugs/glusterd/brick-mux-validation-in-cluster.t +index f088dbb..b6af487 100644 +--- a/tests/bugs/glusterd/brick-mux-validation-in-cluster.t ++++ b/tests/bugs/glusterd/brick-mux-validation-in-cluster.t +@@ -100,10 +100,8 @@ $CLI_2 volume set $V0 performance.readdir-ahead on + $CLI_2 volume set $V1 performance.readdir-ahead on + + TEST $glusterd_1; ++EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count + +-sleep 10 +- +-EXPECT 4 count_brick_processes + EXPECT_WITHIN $PROCESS_UP_TIMEOUT 4 count_brick_pids + EXPECT_WITHIN $PROCESS_UP_TIMEOUT 0 count_N/A_brick_pids + +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index 2eb2a76..6f904ae 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -3758,6 +3758,7 @@ glusterd_compare_friend_volume(dict_t *peer_data, int32_t count, + "Version of volume %s differ. local version = %d, " + "remote version = %d on peer %s", + volinfo->volname, volinfo->version, version, hostname); ++ GF_ATOMIC_INIT(volinfo->volpeerupdate, 1); + *status = GLUSTERD_VOL_COMP_UPDATE_REQ; + goto out; + } else if (version < volinfo->version) { +@@ -4784,7 +4785,8 @@ glusterd_volinfo_stop_stale_bricks(glusterd_volinfo_t *new_volinfo, + * or if it's part of the new volume and is pending a snap, + * then stop the brick process + */ +- if (ret || (new_brickinfo->snap_status == -1)) { ++ if (ret || (new_brickinfo->snap_status == -1) || ++ GF_ATOMIC_GET(old_volinfo->volpeerupdate)) { + /*TODO: may need to switch to 'atomic' flavour of + * brick_stop, once we make peer rpc program also + * synctask enabled*/ +@@ -6490,7 +6492,8 @@ glusterd_brick_start(glusterd_volinfo_t *volinfo, + * three different triggers for an attempt to start the brick process + * due to the quorum handling code in glusterd_friend_sm. + */ +- if (brickinfo->status == GF_BRICK_STARTING || brickinfo->start_triggered) { ++ if (brickinfo->status == GF_BRICK_STARTING || brickinfo->start_triggered || ++ GF_ATOMIC_GET(volinfo->volpeerupdate)) { + gf_msg_debug(this->name, 0, + "brick %s is already in starting " + "phase", +diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h +index 1c6c3b1..f739b5d 100644 +--- a/xlators/mgmt/glusterd/src/glusterd.h ++++ b/xlators/mgmt/glusterd/src/glusterd.h +@@ -523,6 +523,10 @@ struct glusterd_volinfo_ { + pthread_mutex_t store_volinfo_lock; /* acquire lock for + * updating the volinfo + */ ++ gf_atomic_t volpeerupdate; ++ /* Flag to check about volume has received updates ++ from peer ++ */ + }; + + typedef enum gd_snap_status_ { +-- +1.8.3.1 + diff --git a/SOURCES/0450-tests-basic-ctime-enable-ctime-before-testing.patch b/SOURCES/0450-tests-basic-ctime-enable-ctime-before-testing.patch new file mode 100644 index 0000000..96de5a1 --- /dev/null +++ b/SOURCES/0450-tests-basic-ctime-enable-ctime-before-testing.patch @@ -0,0 +1,35 @@ +From 09dce9ce8e946a86209b6f057bf14323036fa12a Mon Sep 17 00:00:00 2001 +From: Shwetha K Acharya <sacharya@redhat.com> +Date: Wed, 10 Jun 2020 11:44:56 +0530 +Subject: [PATCH 450/451] tests/basic/ctime: enable ctime before testing + +This is to ensure that this test successfully runs, even if +ctime is disabled by default (which is the case in downstream.) + +Label: DOWNSTREAM ONLY + +BUG: 1844359 +Change-Id: I91e80b3d8a56fc089aeb58b0254812111d394842 +Signed-off-by: Shwetha K Acharya <sacharya@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202874 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/basic/ctime/ctime-utimesat.t | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/tests/basic/ctime/ctime-utimesat.t b/tests/basic/ctime/ctime-utimesat.t +index 540e57a..da12fbe 100644 +--- a/tests/basic/ctime/ctime-utimesat.t ++++ b/tests/basic/ctime/ctime-utimesat.t +@@ -14,6 +14,7 @@ TEST $CLI volume set $V0 performance.read-after-open off + TEST $CLI volume set $V0 performance.open-behind off + TEST $CLI volume set $V0 performance.write-behind off + TEST $CLI volume set $V0 performance.io-cache off ++TEST $CLI volume set $V0 ctime on + + TEST $CLI volume start $V0 + +-- +1.8.3.1 + diff --git a/SOURCES/0451-extras-Modify-group-virt-to-include-network-related-.patch b/SOURCES/0451-extras-Modify-group-virt-to-include-network-related-.patch new file mode 100644 index 0000000..bba69e1 --- /dev/null +++ b/SOURCES/0451-extras-Modify-group-virt-to-include-network-related-.patch @@ -0,0 +1,44 @@ +From 96d9b659fd0367abe1666a5ac6203208e0dc056d Mon Sep 17 00:00:00 2001 +From: Krutika Dhananjay <kdhananj@redhat.com> +Date: Mon, 4 May 2020 14:30:57 +0530 +Subject: [PATCH 451/451] extras: Modify group 'virt' to include + network-related options + +This is needed to work around an issue seen where vms running on +online hosts are getting killed when a different host is rebooted +in ovirt-gluster hyperconverged environments. Actual RCA is quite +lengthy and documented in the github issue. Please refer to it +for more details. + +Upstream patch: +> Upstream patch link: https://review.gluster.org/c/glusterfs/+/24400 +> Change-Id: Ic25b5f50144ad42458e5c847e1e7e191032396c1 +> Fixes: #1217 +> Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com> + +Change-Id: Ic25b5f50144ad42458e5c847e1e7e191032396c1 +BUG: 1845064 +Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/203291 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + extras/group-virt.example | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/extras/group-virt.example b/extras/group-virt.example +index c2ce89d..3a441eb 100644 +--- a/extras/group-virt.example ++++ b/extras/group-virt.example +@@ -16,3 +16,8 @@ cluster.choose-local=off + client.event-threads=4 + server.event-threads=4 + performance.client-io-threads=on ++network.ping-timeout=20 ++server.tcp-user-timeout=20 ++server.keepalive-time=10 ++server.keepalive-interval=2 ++server.keepalive-count=5 +-- +1.8.3.1 + diff --git a/SOURCES/0452-Tier-DHT-Handle-the-pause-case-missed-out.patch b/SOURCES/0452-Tier-DHT-Handle-the-pause-case-missed-out.patch new file mode 100644 index 0000000..0b115bb --- /dev/null +++ b/SOURCES/0452-Tier-DHT-Handle-the-pause-case-missed-out.patch @@ -0,0 +1,48 @@ +From c184943bdf38de5b4cbf165fd1cd98ce7bd9e976 Mon Sep 17 00:00:00 2001 +From: hari gowtham <hgowtham@redhat.com> +Date: Tue, 16 Jun 2020 14:47:53 +0530 +Subject: [PATCH 452/456] Tier/DHT: Handle the pause case missed out + +Problem: While backporting a change from master +the changes related to tier were removed. This started affecting +the tier pause functionality. Backporting it +to downstream left this usecase messed up as we still support tier. +patch that caused this: https://code.engineering.redhat.com/gerrit/#/c/202647/2 + +Fix: add the condition back for tier pause to work. + +Label: DOWNSTREAM ONLY + +BUG: 1844359 +Change-Id: I46c6c179b09c7e1a729be9fd257fa4a490f0287e +Signed-off-by: hari gowtham <hgowtham@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/203560 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/cluster/dht/src/dht-rebalance.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c +index e9974cd..abc10fc 100644 +--- a/xlators/cluster/dht/src/dht-rebalance.c ++++ b/xlators/cluster/dht/src/dht-rebalance.c +@@ -1160,6 +1160,15 @@ __dht_rebalance_migrate_data(xlator_t *this, gf_defrag_info_t *defrag, + break; + } + ++ if ((defrag && defrag->cmd == GF_DEFRAG_CMD_START_TIER) && ++ (gf_defrag_get_pause_state(&defrag->tier_conf) != TIER_RUNNING)) { ++ gf_msg("tier", GF_LOG_INFO, 0, DHT_MSG_TIER_PAUSED, ++ "Migrate file paused"); ++ ret = -1; ++ break; ++ } ++ ++ + offset += ret; + total += ret; + +-- +1.8.3.1 + diff --git a/SOURCES/0453-glusterd-add-brick-command-failure.patch b/SOURCES/0453-glusterd-add-brick-command-failure.patch new file mode 100644 index 0000000..dd21350 --- /dev/null +++ b/SOURCES/0453-glusterd-add-brick-command-failure.patch @@ -0,0 +1,300 @@ +From a04592cce9aaa6ccb8a038bc3b4e31bc125d1d10 Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde <srakonde@redhat.com> +Date: Tue, 16 Jun 2020 18:03:21 +0530 +Subject: [PATCH 453/456] glusterd: add-brick command failure + +Problem: add-brick operation is failing when replica or disperse +count is not mentioned in the add-brick command. + +Reason: with commit a113d93 we are checking brick order while +doing add-brick operation for replica and disperse volumes. If +replica count or disperse count is not mentioned in the command, +the dict get is failing and resulting add-brick operation failure. + +> upstream patch: https://review.gluster.org/#/c/glusterfs/+/24581/ +> fixes: #1306 +> Change-Id: Ie957540e303bfb5f2d69015661a60d7e72557353 +> Signed-off-by: Sanju Rakonde <srakonde@redhat.com> + +BUG: 1847081 +Change-Id: Ie957540e303bfb5f2d69015661a60d7e72557353 +Signed-off-by: Sanju Rakonde <srakonde@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/203867 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/bugs/glusterd/brick-order-check-add-brick.t | 40 ++++++++++++++++++++++ + tests/cluster.rc | 11 ++++-- + xlators/mgmt/glusterd/src/glusterd-brick-ops.c | 39 ++++++++++++++------- + xlators/mgmt/glusterd/src/glusterd-utils.c | 30 ++--------------- + xlators/mgmt/glusterd/src/glusterd-utils.h | 3 +- + xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 41 +++++++++++++++++++---- + 6 files changed, 115 insertions(+), 49 deletions(-) + create mode 100644 tests/bugs/glusterd/brick-order-check-add-brick.t + +diff --git a/tests/bugs/glusterd/brick-order-check-add-brick.t b/tests/bugs/glusterd/brick-order-check-add-brick.t +new file mode 100644 +index 0000000..29f0ed1 +--- /dev/null ++++ b/tests/bugs/glusterd/brick-order-check-add-brick.t +@@ -0,0 +1,40 @@ ++#!/bin/bash ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../cluster.rc ++. $(dirname $0)/../../snapshot.rc ++ ++cleanup; ++ ++TEST verify_lvm_version; ++#Create cluster with 3 nodes ++TEST launch_cluster 3 -NO_DEBUG -NO_FORCE ++TEST setup_lvm 3 ++ ++TEST $CLI_1 peer probe $H2 ++TEST $CLI_1 peer probe $H3 ++EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count ++ ++TEST $CLI_1 volume create $V0 replica 3 $H1:$L1/$V0 $H2:$L2/$V0 $H3:$L3/$V0 ++EXPECT '1 x 3 = 3' volinfo_field $V0 'Number of Bricks' ++EXPECT 'Created' volinfo_field $V0 'Status' ++ ++TEST $CLI_1 volume start $V0 ++EXPECT 'Started' volinfo_field $V0 'Status' ++ ++#add-brick with or without mentioning the replica count should not fail ++TEST $CLI_1 volume add-brick $V0 replica 3 $H1:$L1/${V0}_1 $H2:$L2/${V0}_1 $H3:$L3/${V0}_1 ++EXPECT '2 x 3 = 6' volinfo_field $V0 'Number of Bricks' ++ ++TEST $CLI_1 volume add-brick $V0 $H1:$L1/${V0}_2 $H2:$L2/${V0}_2 $H3:$L3/${V0}_2 ++EXPECT '3 x 3 = 9' volinfo_field $V0 'Number of Bricks' ++ ++#adding bricks from same host should fail the brick order check ++TEST ! $CLI_1 volume add-brick $V0 $H1:$L1/${V0}_3 $H1:$L1/${V0}_4 $H1:$L1/${V0}_5 ++EXPECT '3 x 3 = 9' volinfo_field $V0 'Number of Bricks' ++ ++#adding bricks from same host with force should succeed ++TEST $CLI_1 volume add-brick $V0 $H1:$L1/${V0}_3 $H1:$L1/${V0}_4 $H1:$L1/${V0}_5 force ++EXPECT '4 x 3 = 12' volinfo_field $V0 'Number of Bricks' ++ ++cleanup +diff --git a/tests/cluster.rc b/tests/cluster.rc +index 99be8e7..8b73153 100644 +--- a/tests/cluster.rc ++++ b/tests/cluster.rc +@@ -11,7 +11,7 @@ function launch_cluster() { + define_backends $count; + define_hosts $count; + define_glusterds $count $2; +- define_clis $count; ++ define_clis $count $3; + + start_glusterds; + } +@@ -133,8 +133,13 @@ function define_clis() { + lopt1="--log-file=$logdir/$logfile1" + + +- eval "CLI_$i='$CLI --glusterd-sock=${!b}/glusterd/gd.sock $lopt'"; +- eval "CLI$i='$CLI --glusterd-sock=${!b}/glusterd/gd.sock $lopt1'"; ++ if [ "$2" == "-NO_FORCE" ]; then ++ eval "CLI_$i='$CLI_NO_FORCE --glusterd-sock=${!b}/glusterd/gd.sock $lopt'"; ++ eval "CLI$i='$CLI_NO_FORCE --glusterd-sock=${!b}/glusterd/gd.sock $lopt1'"; ++ else ++ eval "CLI_$i='$CLI --glusterd-sock=${!b}/glusterd/gd.sock $lopt'"; ++ eval "CLI$i='$CLI --glusterd-sock=${!b}/glusterd/gd.sock $lopt1'"; ++ fi + done + } + +diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +index 121346c..5ae577a 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c ++++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +@@ -1576,20 +1576,35 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict) + + /* Check brick order if the volume type is replicate or disperse. If + * force at the end of command not given then check brick order. ++ * doing this check at the originator node is sufficient. + */ + +- if (!is_force) { +- if ((volinfo->type == GF_CLUSTER_TYPE_REPLICATE) || +- (volinfo->type == GF_CLUSTER_TYPE_DISPERSE)) { +- ret = glusterd_check_brick_order(dict, msg, volinfo->type); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BAD_BRKORDER, +- "Not adding brick because of " +- "bad brick order. %s", +- msg); +- *op_errstr = gf_strdup(msg); +- goto out; +- } ++ if (is_origin_glusterd(dict) && !is_force) { ++ ret = 0; ++ if (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) { ++ gf_msg_debug(this->name, 0, ++ "Replicate cluster type " ++ "found. Checking brick order."); ++ if (replica_count) ++ ret = glusterd_check_brick_order(dict, msg, volinfo->type, ++ replica_count); ++ else ++ ret = glusterd_check_brick_order(dict, msg, volinfo->type, ++ volinfo->replica_count); ++ } else if (volinfo->type == GF_CLUSTER_TYPE_DISPERSE) { ++ gf_msg_debug(this->name, 0, ++ "Disperse cluster type" ++ " found. Checking brick order."); ++ ret = glusterd_check_brick_order(dict, msg, volinfo->type, ++ volinfo->disperse_count); ++ } ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BAD_BRKORDER, ++ "Not adding brick because of " ++ "bad brick order. %s", ++ msg); ++ *op_errstr = gf_strdup(msg); ++ goto out; + } + } + +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index 6f904ae..545e688 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -14802,7 +14802,8 @@ glusterd_compare_addrinfo(struct addrinfo *first, struct addrinfo *next) + * volume are present on the same server + */ + int32_t +-glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type) ++glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type, ++ int32_t sub_count) + { + int ret = -1; + int i = 0; +@@ -14819,7 +14820,6 @@ glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type) + char *tmpptr = NULL; + char *volname = NULL; + int32_t brick_count = 0; +- int32_t sub_count = 0; + struct addrinfo *ai_info = NULL; + char brick_addr[128] = { + 0, +@@ -14870,31 +14870,6 @@ glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type) + goto out; + } + +- if (type != GF_CLUSTER_TYPE_DISPERSE) { +- ret = dict_get_int32n(dict, "replica-count", SLEN("replica-count"), +- &sub_count); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, +- "Bricks check : Could" +- " not retrieve replica count"); +- goto out; +- } +- gf_msg_debug(this->name, 0, +- "Replicate cluster type " +- "found. Checking brick order."); +- } else { +- ret = dict_get_int32n(dict, "disperse-count", SLEN("disperse-count"), +- &sub_count); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, +- "Bricks check : Could" +- " not retrieve disperse count"); +- goto out; +- } +- gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_DISPERSE_CLUSTER_FOUND, +- "Disperse cluster type" +- " found. Checking brick order."); +- } + brick_list_dup = brick_list_ptr = gf_strdup(brick_list); + /* Resolve hostnames and get addrinfo */ + while (i < brick_count) { +@@ -14989,5 +14964,6 @@ out: + ai_list_tmp2 = ai_list_tmp1; + } + free(ai_list_tmp2); ++ gf_msg_debug("glusterd", 0, "Returning %d", ret); + return ret; + } +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h +index e2e2454..5f5de82 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.h ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.h +@@ -883,6 +883,7 @@ char * + search_brick_path_from_proc(pid_t brick_pid, char *brickpath); + + int32_t +-glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type); ++glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type, ++ int32_t sub_count); + + #endif +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +index 8da2ff3..134b04c 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +@@ -1024,6 +1024,8 @@ glusterd_op_stage_create_volume(dict_t *dict, char **op_errstr, + int32_t local_brick_count = 0; + int32_t i = 0; + int32_t type = 0; ++ int32_t replica_count = 0; ++ int32_t disperse_count = 0; + char *brick = NULL; + char *tmpptr = NULL; + xlator_t *this = NULL; +@@ -1119,15 +1121,42 @@ glusterd_op_stage_create_volume(dict_t *dict, char **op_errstr, + } + + if (!is_force) { +- if ((type == GF_CLUSTER_TYPE_REPLICATE) || +- (type == GF_CLUSTER_TYPE_DISPERSE)) { +- ret = glusterd_check_brick_order(dict, msg, type); ++ if (type == GF_CLUSTER_TYPE_REPLICATE) { ++ ret = dict_get_int32n(dict, "replica-count", ++ SLEN("replica-count"), &replica_count); + if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BAD_BRKORDER, +- "Not creating volume because of " +- "bad brick order"); ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, ++ "Bricks check : Could" ++ " not retrieve replica count"); ++ goto out; ++ } ++ gf_msg_debug(this->name, 0, ++ "Replicate cluster type " ++ "found. Checking brick order."); ++ ret = glusterd_check_brick_order(dict, msg, type, ++ replica_count); ++ } else if (type == GF_CLUSTER_TYPE_DISPERSE) { ++ ret = dict_get_int32n(dict, "disperse-count", ++ SLEN("disperse-count"), &disperse_count); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, ++ "Bricks check : Could" ++ " not retrieve disperse count"); + goto out; + } ++ gf_msg_debug(this->name, 0, ++ "Disperse cluster type" ++ " found. Checking brick order."); ++ ret = glusterd_check_brick_order(dict, msg, type, ++ disperse_count); ++ } ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BAD_BRKORDER, ++ "Not creating the volume because of " ++ "bad brick order. %s", ++ msg); ++ *op_errstr = gf_strdup(msg); ++ goto out; + } + } + } +-- +1.8.3.1 + diff --git a/SOURCES/0454-features-locks-avoid-use-after-freed-of-frame-for-bl.patch b/SOURCES/0454-features-locks-avoid-use-after-freed-of-frame-for-bl.patch new file mode 100644 index 0000000..6ad460d --- /dev/null +++ b/SOURCES/0454-features-locks-avoid-use-after-freed-of-frame-for-bl.patch @@ -0,0 +1,152 @@ +From cddd253c5e3f0a7c3b91c35cea8ad1921cb43b98 Mon Sep 17 00:00:00 2001 +From: Kinglong Mee <kinglongmee@gmail.com> +Date: Thu, 18 Jul 2019 11:43:01 +0800 +Subject: [PATCH 454/456] features/locks: avoid use after freed of frame for + blocked lock + +The fop contains blocked lock may use freed frame info when other +unlock fop has unwind the blocked lock. + +Because the blocked lock is added to block list in inode lock(or +other lock), after that, when out of the inode lock, the fop +contains the blocked lock should not use it. + +Upstream Patch - https://review.gluster.org/#/c/glusterfs/+/23155/ + +>Change-Id: Icb309a1cc78380dc982b26d50c18d67e4f2c8915 +>fixes: bz#1737291 +>Signed-off-by: Kinglong Mee <mijinlong@horiscale.com> + +Change-Id: Icb309a1cc78380dc982b26d50c18d67e4f2c8915 +BUG: 1812789 +Reviewed-on: https://code.engineering.redhat.com/gerrit/206465 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Xavi Hernandez Juan <xhernandez@redhat.com> +--- + xlators/features/locks/src/common.c | 4 ++++ + xlators/features/locks/src/entrylk.c | 4 ++-- + xlators/features/locks/src/inodelk.c | 7 +++++-- + xlators/features/locks/src/posix.c | 5 +++-- + xlators/features/locks/src/reservelk.c | 2 -- + 5 files changed, 14 insertions(+), 8 deletions(-) + +diff --git a/xlators/features/locks/src/common.c b/xlators/features/locks/src/common.c +index 6e7fb4b..1406e70 100644 +--- a/xlators/features/locks/src/common.c ++++ b/xlators/features/locks/src/common.c +@@ -1080,6 +1080,10 @@ pl_setlk(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock, + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, lkowner_utoa(&lock->owner), + lock->user_flock.l_start, lock->user_flock.l_len); ++ ++ pl_trace_block(this, lock->frame, NULL, NULL, F_SETLKW, ++ &lock->user_flock, NULL); ++ + lock->blocked = 1; + __insert_lock(pl_inode, lock); + ret = -1; +diff --git a/xlators/features/locks/src/entrylk.c b/xlators/features/locks/src/entrylk.c +index ced5eca..93c649c 100644 +--- a/xlators/features/locks/src/entrylk.c ++++ b/xlators/features/locks/src/entrylk.c +@@ -552,6 +552,8 @@ __lock_blocked_add(xlator_t *this, pl_inode_t *pinode, pl_dom_list_t *dom, + gf_msg_trace(this->name, 0, "Blocking lock: {pinode=%p, basename=%s}", + pinode, lock->basename); + ++ entrylk_trace_block(this, lock->frame, NULL, NULL, NULL, lock->basename, ++ ENTRYLK_LOCK, lock->type); + out: + return -EAGAIN; + } +@@ -932,8 +934,6 @@ out: + op_ret, op_errno); + unwind: + STACK_UNWIND_STRICT(entrylk, frame, op_ret, op_errno, NULL); +- } else { +- entrylk_trace_block(this, frame, volume, fd, loc, basename, cmd, type); + } + + if (pcontend != NULL) { +diff --git a/xlators/features/locks/src/inodelk.c b/xlators/features/locks/src/inodelk.c +index a9c42f1..24dee49 100644 +--- a/xlators/features/locks/src/inodelk.c ++++ b/xlators/features/locks/src/inodelk.c +@@ -420,6 +420,8 @@ __lock_blocked_add(xlator_t *this, pl_dom_list_t *dom, pl_inode_lock_t *lock, + lkowner_utoa(&lock->owner), lock->user_flock.l_start, + lock->user_flock.l_len); + ++ pl_trace_block(this, lock->frame, NULL, NULL, F_SETLKW, &lock->user_flock, ++ lock->volume); + out: + return -EAGAIN; + } +@@ -959,6 +961,7 @@ pl_common_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, + int ret = -1; + GF_UNUSED int dict_ret = -1; + int can_block = 0; ++ short lock_type = 0; + pl_inode_t *pinode = NULL; + pl_inode_lock_t *reqlock = NULL; + pl_dom_list_t *dom = NULL; +@@ -1024,13 +1027,13 @@ pl_common_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, + /* fall through */ + + case F_SETLK: ++ lock_type = flock->l_type; + memcpy(&reqlock->user_flock, flock, sizeof(struct gf_flock)); + ret = pl_inode_setlk(this, ctx, pinode, reqlock, can_block, dom, + inode); + + if (ret < 0) { +- if ((can_block) && (F_UNLCK != flock->l_type)) { +- pl_trace_block(this, frame, fd, loc, cmd, flock, volume); ++ if ((can_block) && (F_UNLCK != lock_type)) { + goto out; + } + gf_log(this->name, GF_LOG_TRACE, "returning EAGAIN"); +diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c +index 50f1265..7887b82 100644 +--- a/xlators/features/locks/src/posix.c ++++ b/xlators/features/locks/src/posix.c +@@ -2557,6 +2557,7 @@ pl_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, + uint32_t lk_flags = 0; + posix_locks_private_t *priv = this->private; + pl_local_t *local = NULL; ++ short lock_type = 0; + + int ret = dict_get_uint32(xdata, GF_LOCK_MODE, &lk_flags); + if (ret == 0) { +@@ -2701,6 +2702,7 @@ pl_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, + case F_SETLK: + reqlock->frame = frame; + reqlock->this = this; ++ lock_type = flock->l_type; + + pthread_mutex_lock(&pl_inode->mutex); + { +@@ -2738,8 +2740,7 @@ pl_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, + + ret = pl_setlk(this, pl_inode, reqlock, can_block); + if (ret == -1) { +- if ((can_block) && (F_UNLCK != flock->l_type)) { +- pl_trace_block(this, frame, fd, NULL, cmd, flock, NULL); ++ if ((can_block) && (F_UNLCK != lock_type)) { + goto out; + } + gf_log(this->name, GF_LOG_DEBUG, "returning EAGAIN"); +diff --git a/xlators/features/locks/src/reservelk.c b/xlators/features/locks/src/reservelk.c +index 51076d7..604691f 100644 +--- a/xlators/features/locks/src/reservelk.c ++++ b/xlators/features/locks/src/reservelk.c +@@ -312,8 +312,6 @@ grant_blocked_lock_calls(xlator_t *this, pl_inode_t *pl_inode) + ret = pl_setlk(this, pl_inode, lock, can_block); + if (ret == -1) { + if (can_block) { +- pl_trace_block(this, lock->frame, fd, NULL, cmd, +- &lock->user_flock, NULL); + continue; + } else { + gf_log(this->name, GF_LOG_DEBUG, "returning EAGAIN"); +-- +1.8.3.1 + diff --git a/SOURCES/0455-locks-prevent-deletion-of-locked-entries.patch b/SOURCES/0455-locks-prevent-deletion-of-locked-entries.patch new file mode 100644 index 0000000..5960690 --- /dev/null +++ b/SOURCES/0455-locks-prevent-deletion-of-locked-entries.patch @@ -0,0 +1,1253 @@ +From 3f6ff474db3934f43d9963dfe4dda7d201211e75 Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez <xhernandez@redhat.com> +Date: Fri, 12 Jun 2020 00:06:36 +0200 +Subject: [PATCH 455/456] locks: prevent deletion of locked entries + +To keep consistency inside transactions started by locking an entry or +an inode, this change delays the removal of entries that are currently +locked by one or more clients. Once all locks are released, the removal +is processed. + +It has also been improved the detection of stale inodes in the locking +code of EC. + +>Upstream patch - https://review.gluster.org/#/c/glusterfs/+/20025/ +>Fixes: #990 + +Change-Id: Ic8ba23d9480f80c7f74e7a310bf8a15922320fd5 +BUG: 1812789 +Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/206442 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +--- + xlators/cluster/ec/src/ec-locks.c | 69 ++++++-- + xlators/features/locks/src/common.c | 316 ++++++++++++++++++++++++++++++++++- + xlators/features/locks/src/common.h | 43 +++++ + xlators/features/locks/src/entrylk.c | 19 +-- + xlators/features/locks/src/inodelk.c | 150 ++++++++++------- + xlators/features/locks/src/locks.h | 23 ++- + xlators/features/locks/src/posix.c | 183 ++++++++++++++++++-- + 7 files changed, 689 insertions(+), 114 deletions(-) + +diff --git a/xlators/cluster/ec/src/ec-locks.c b/xlators/cluster/ec/src/ec-locks.c +index ffcac07..db86296 100644 +--- a/xlators/cluster/ec/src/ec-locks.c ++++ b/xlators/cluster/ec/src/ec-locks.c +@@ -28,9 +28,36 @@ ec_lock_check(ec_fop_data_t *fop, uintptr_t *mask) + ec_t *ec = fop->xl->private; + ec_cbk_data_t *ans = NULL; + ec_cbk_data_t *cbk = NULL; +- uintptr_t locked = 0, notlocked = 0; ++ uintptr_t locked = 0; ++ int32_t good = 0; ++ int32_t eagain = 0; ++ int32_t estale = 0; + int32_t error = -1; + ++ /* There are some errors that we'll handle in an special way while trying ++ * to acquire a lock. ++ * ++ * EAGAIN: If it's found during a parallel non-blocking lock request, we ++ * consider that there's contention on the inode, so we consider ++ * the acquisition a failure and try again with a sequential ++ * blocking lock request. This will ensure that we get a lock on ++ * as many bricks as possible (ignoring EAGAIN here would cause ++ * unnecessary triggers of self-healing). ++ * ++ * If it's found during a sequential blocking lock request, it's ++ * considered an error. Lock will only succeed if there are ++ * enough other bricks locked. ++ * ++ * ESTALE: This can appear during parallel or sequential lock request if ++ * the inode has just been unlinked. We consider this error is ++ * not recoverable, but we also don't consider it as fatal. So, ++ * if it happens during parallel lock, we won't attempt a ++ * sequential one unless there are EAGAIN errors on other ++ * bricks (and are enough to form a quorum), but if we reach ++ * quorum counting the ESTALE bricks, we consider the whole ++ * result of the operation is ESTALE instead of EIO. ++ */ ++ + list_for_each_entry(ans, &fop->cbk_list, list) + { + if (ans->op_ret >= 0) { +@@ -38,24 +65,23 @@ ec_lock_check(ec_fop_data_t *fop, uintptr_t *mask) + error = EIO; + } + locked |= ans->mask; ++ good = ans->count; + cbk = ans; +- } else { +- if (ans->op_errno == EAGAIN) { +- switch (fop->uint32) { +- case EC_LOCK_MODE_NONE: +- case EC_LOCK_MODE_ALL: +- /* Goal is to treat non-blocking lock as failure +- * even if there is a single EAGAIN*/ +- notlocked |= ans->mask; +- break; +- } +- } ++ } else if (ans->op_errno == ESTALE) { ++ estale += ans->count; ++ } else if ((ans->op_errno == EAGAIN) && ++ (fop->uint32 != EC_LOCK_MODE_INC)) { ++ eagain += ans->count; + } + } + + if (error == -1) { +- if (gf_bits_count(locked | notlocked) >= ec->fragments) { +- if (notlocked == 0) { ++ /* If we have enough quorum with succeeded and EAGAIN answers, we ++ * ignore for now any ESTALE answer. If there are EAGAIN answers, ++ * we retry with a sequential blocking lock request if needed. ++ * Otherwise we succeed. */ ++ if ((good + eagain) >= ec->fragments) { ++ if (eagain == 0) { + if (fop->answer == NULL) { + fop->answer = cbk; + } +@@ -68,21 +94,28 @@ ec_lock_check(ec_fop_data_t *fop, uintptr_t *mask) + case EC_LOCK_MODE_NONE: + error = EAGAIN; + break; +- + case EC_LOCK_MODE_ALL: + fop->uint32 = EC_LOCK_MODE_INC; + break; +- + default: ++ /* This shouldn't happen because eagain cannot be > 0 ++ * when fop->uint32 is EC_LOCK_MODE_INC. */ + error = EIO; + break; + } + } + } else { +- if (fop->answer && fop->answer->op_ret < 0) ++ /* We have been unable to find enough candidates that will be able ++ * to take the lock. If we have quorum on some answer, we return ++ * it. Otherwise we check if ESTALE answers allow us to reach ++ * quorum. If so, we return ESTALE. */ ++ if (fop->answer && fop->answer->op_ret < 0) { + error = fop->answer->op_errno; +- else ++ } else if ((good + eagain + estale) >= ec->fragments) { ++ error = ESTALE; ++ } else { + error = EIO; ++ } + } + } + +diff --git a/xlators/features/locks/src/common.c b/xlators/features/locks/src/common.c +index 1406e70..0c52853 100644 +--- a/xlators/features/locks/src/common.c ++++ b/xlators/features/locks/src/common.c +@@ -462,11 +462,16 @@ pl_inode_get(xlator_t *this, inode_t *inode, pl_local_t *local) + INIT_LIST_HEAD(&pl_inode->blocked_calls); + INIT_LIST_HEAD(&pl_inode->metalk_list); + INIT_LIST_HEAD(&pl_inode->queued_locks); ++ INIT_LIST_HEAD(&pl_inode->waiting); + gf_uuid_copy(pl_inode->gfid, inode->gfid); + + pl_inode->check_mlock_info = _gf_true; + pl_inode->mlock_enforced = _gf_false; + ++ /* -2 means never looked up. -1 means something went wrong and link ++ * tracking is disabled. */ ++ pl_inode->links = -2; ++ + ret = __inode_ctx_put(inode, this, (uint64_t)(long)(pl_inode)); + if (ret) { + pthread_mutex_destroy(&pl_inode->mutex); +@@ -1276,4 +1281,313 @@ pl_local_init(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd) + } + + return 0; +-} +\ No newline at end of file ++} ++ ++gf_boolean_t ++pl_is_lk_owner_valid(gf_lkowner_t *owner, client_t *client) ++{ ++ if (client && (client->opversion < GD_OP_VERSION_7_0)) { ++ return _gf_true; ++ } ++ ++ if (is_lk_owner_null(owner)) { ++ return _gf_false; ++ } ++ return _gf_true; ++} ++ ++static int32_t ++pl_inode_from_loc(loc_t *loc, inode_t **pinode) ++{ ++ inode_t *inode = NULL; ++ int32_t error = 0; ++ ++ if (loc->inode != NULL) { ++ inode = inode_ref(loc->inode); ++ goto done; ++ } ++ ++ if (loc->parent == NULL) { ++ error = EINVAL; ++ goto done; ++ } ++ ++ if (!gf_uuid_is_null(loc->gfid)) { ++ inode = inode_find(loc->parent->table, loc->gfid); ++ if (inode != NULL) { ++ goto done; ++ } ++ } ++ ++ if (loc->name == NULL) { ++ error = EINVAL; ++ goto done; ++ } ++ ++ inode = inode_grep(loc->parent->table, loc->parent, loc->name); ++ if (inode == NULL) { ++ /* We haven't found any inode. This means that the file doesn't exist ++ * or that even if it exists, we don't have any knowledge about it, so ++ * we don't have locks on it either, which is fine for our purposes. */ ++ goto done; ++ } ++ ++done: ++ *pinode = inode; ++ ++ return error; ++} ++ ++static gf_boolean_t ++pl_inode_has_owners(xlator_t *xl, client_t *client, pl_inode_t *pl_inode, ++ struct timespec *now, struct list_head *contend) ++{ ++ pl_dom_list_t *dom; ++ pl_inode_lock_t *lock; ++ gf_boolean_t has_owners = _gf_false; ++ ++ list_for_each_entry(dom, &pl_inode->dom_list, inode_list) ++ { ++ list_for_each_entry(lock, &dom->inodelk_list, list) ++ { ++ /* If the lock belongs to the same client, we assume it's related ++ * to the same operation, so we allow the removal to continue. */ ++ if (lock->client == client) { ++ continue; ++ } ++ /* If the lock belongs to an internal process, we don't block the ++ * removal. */ ++ if (lock->client_pid < 0) { ++ continue; ++ } ++ if (contend == NULL) { ++ return _gf_true; ++ } ++ has_owners = _gf_true; ++ inodelk_contention_notify_check(xl, lock, now, contend); ++ } ++ } ++ ++ return has_owners; ++} ++ ++int32_t ++pl_inode_remove_prepare(xlator_t *xl, call_frame_t *frame, loc_t *loc, ++ pl_inode_t **ppl_inode, struct list_head *contend) ++{ ++ struct timespec now; ++ inode_t *inode; ++ pl_inode_t *pl_inode; ++ int32_t error; ++ ++ pl_inode = NULL; ++ ++ error = pl_inode_from_loc(loc, &inode); ++ if ((error != 0) || (inode == NULL)) { ++ goto done; ++ } ++ ++ pl_inode = pl_inode_get(xl, inode, NULL); ++ if (pl_inode == NULL) { ++ inode_unref(inode); ++ error = ENOMEM; ++ goto done; ++ } ++ ++ /* pl_inode_from_loc() already increments ref count for inode, so ++ * we only assign here our reference. */ ++ pl_inode->inode = inode; ++ ++ timespec_now(&now); ++ ++ pthread_mutex_lock(&pl_inode->mutex); ++ ++ if (pl_inode->removed) { ++ error = ESTALE; ++ goto unlock; ++ } ++ ++ if (pl_inode_has_owners(xl, frame->root->client, pl_inode, &now, contend)) { ++ error = -1; ++ /* We skip the unlock here because the caller must create a stub when ++ * we return -1 and do a call to pl_inode_remove_complete(), which ++ * assumes the lock is still acquired and will release it once ++ * everything else is prepared. */ ++ goto done; ++ } ++ ++ pl_inode->is_locked = _gf_true; ++ pl_inode->remove_running++; ++ ++unlock: ++ pthread_mutex_unlock(&pl_inode->mutex); ++ ++done: ++ *ppl_inode = pl_inode; ++ ++ return error; ++} ++ ++int32_t ++pl_inode_remove_complete(xlator_t *xl, pl_inode_t *pl_inode, call_stub_t *stub, ++ struct list_head *contend) ++{ ++ pl_inode_lock_t *lock; ++ int32_t error = -1; ++ ++ if (stub != NULL) { ++ list_add_tail(&stub->list, &pl_inode->waiting); ++ pl_inode->is_locked = _gf_true; ++ } else { ++ error = ENOMEM; ++ ++ while (!list_empty(contend)) { ++ lock = list_first_entry(contend, pl_inode_lock_t, list); ++ list_del_init(&lock->list); ++ __pl_inodelk_unref(lock); ++ } ++ } ++ ++ pthread_mutex_unlock(&pl_inode->mutex); ++ ++ if (error < 0) { ++ inodelk_contention_notify(xl, contend); ++ } ++ ++ inode_unref(pl_inode->inode); ++ ++ return error; ++} ++ ++void ++pl_inode_remove_wake(struct list_head *list) ++{ ++ call_stub_t *stub; ++ ++ while (!list_empty(list)) { ++ stub = list_first_entry(list, call_stub_t, list); ++ list_del_init(&stub->list); ++ ++ call_resume(stub); ++ } ++} ++ ++void ++pl_inode_remove_cbk(xlator_t *xl, pl_inode_t *pl_inode, int32_t error) ++{ ++ struct list_head contend, granted; ++ struct timespec now; ++ pl_dom_list_t *dom; ++ ++ if (pl_inode == NULL) { ++ return; ++ } ++ ++ INIT_LIST_HEAD(&contend); ++ INIT_LIST_HEAD(&granted); ++ timespec_now(&now); ++ ++ pthread_mutex_lock(&pl_inode->mutex); ++ ++ if (error == 0) { ++ if (pl_inode->links >= 0) { ++ pl_inode->links--; ++ } ++ if (pl_inode->links == 0) { ++ pl_inode->removed = _gf_true; ++ } ++ } ++ ++ pl_inode->remove_running--; ++ ++ if ((pl_inode->remove_running == 0) && list_empty(&pl_inode->waiting)) { ++ pl_inode->is_locked = _gf_false; ++ ++ list_for_each_entry(dom, &pl_inode->dom_list, inode_list) ++ { ++ __grant_blocked_inode_locks(xl, pl_inode, &granted, dom, &now, ++ &contend); ++ } ++ } ++ ++ pthread_mutex_unlock(&pl_inode->mutex); ++ ++ unwind_granted_inodes(xl, pl_inode, &granted); ++ ++ inodelk_contention_notify(xl, &contend); ++ ++ inode_unref(pl_inode->inode); ++} ++ ++void ++pl_inode_remove_unlocked(xlator_t *xl, pl_inode_t *pl_inode, ++ struct list_head *list) ++{ ++ call_stub_t *stub, *tmp; ++ ++ if (!pl_inode->is_locked) { ++ return; ++ } ++ ++ list_for_each_entry_safe(stub, tmp, &pl_inode->waiting, list) ++ { ++ if (!pl_inode_has_owners(xl, stub->frame->root->client, pl_inode, NULL, ++ NULL)) { ++ list_move_tail(&stub->list, list); ++ } ++ } ++} ++ ++/* This function determines if an inodelk attempt can be done now or it needs ++ * to wait. ++ * ++ * Possible return values: ++ * < 0: An error occurred. Currently only -ESTALE can be returned if the ++ * inode has been deleted previously by unlink/rmdir/rename ++ * = 0: The lock can be attempted. ++ * > 0: The lock needs to wait because a conflicting remove operation is ++ * ongoing. ++ */ ++int32_t ++pl_inode_remove_inodelk(pl_inode_t *pl_inode, pl_inode_lock_t *lock) ++{ ++ pl_dom_list_t *dom; ++ pl_inode_lock_t *ilock; ++ ++ /* If the inode has been deleted, we won't allow any lock. */ ++ if (pl_inode->removed) { ++ return -ESTALE; ++ } ++ ++ /* We only synchronize with locks made for regular operations coming from ++ * the user. Locks done for internal purposes are hard to control and could ++ * lead to long delays or deadlocks quite easily. */ ++ if (lock->client_pid < 0) { ++ return 0; ++ } ++ if (!pl_inode->is_locked) { ++ return 0; ++ } ++ if (pl_inode->remove_running > 0) { ++ return 1; ++ } ++ ++ list_for_each_entry(dom, &pl_inode->dom_list, inode_list) ++ { ++ list_for_each_entry(ilock, &dom->inodelk_list, list) ++ { ++ /* If a lock from the same client is already granted, we allow this ++ * one to continue. This is necessary to prevent deadlocks when ++ * multiple locks are taken for the same operation. ++ * ++ * On the other side it's unlikely that the same client sends ++ * completely unrelated locks for the same inode. ++ */ ++ if (ilock->client == lock->client) { ++ return 0; ++ } ++ } ++ } ++ ++ return 1; ++} +diff --git a/xlators/features/locks/src/common.h b/xlators/features/locks/src/common.h +index ea86b96..6c81ac3 100644 +--- a/xlators/features/locks/src/common.h ++++ b/xlators/features/locks/src/common.h +@@ -105,6 +105,15 @@ void + __pl_inodelk_unref(pl_inode_lock_t *lock); + + void ++__grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode, ++ struct list_head *granted, pl_dom_list_t *dom, ++ struct timespec *now, struct list_head *contend); ++ ++void ++unwind_granted_inodes(xlator_t *this, pl_inode_t *pl_inode, ++ struct list_head *granted); ++ ++void + grant_blocked_entry_locks(xlator_t *this, pl_inode_t *pl_inode, + pl_dom_list_t *dom, struct timespec *now, + struct list_head *contend); +@@ -204,6 +213,16 @@ pl_metalock_is_active(pl_inode_t *pl_inode); + void + __pl_queue_lock(pl_inode_t *pl_inode, posix_lock_t *reqlock); + ++void ++inodelk_contention_notify_check(xlator_t *xl, pl_inode_lock_t *lock, ++ struct timespec *now, ++ struct list_head *contend); ++ ++void ++entrylk_contention_notify_check(xlator_t *xl, pl_entry_lock_t *lock, ++ struct timespec *now, ++ struct list_head *contend); ++ + gf_boolean_t + pl_does_monkey_want_stuck_lock(); + +@@ -216,4 +235,28 @@ pl_clean_local(pl_local_t *local); + int + pl_local_init(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd); + ++gf_boolean_t ++pl_is_lk_owner_valid(gf_lkowner_t *owner, client_t *client); ++ ++int32_t ++pl_inode_remove_prepare(xlator_t *xl, call_frame_t *frame, loc_t *loc, ++ pl_inode_t **ppl_inode, struct list_head *contend); ++ ++int32_t ++pl_inode_remove_complete(xlator_t *xl, pl_inode_t *pl_inode, call_stub_t *stub, ++ struct list_head *contend); ++ ++void ++pl_inode_remove_wake(struct list_head *list); ++ ++void ++pl_inode_remove_cbk(xlator_t *xl, pl_inode_t *pl_inode, int32_t error); ++ ++void ++pl_inode_remove_unlocked(xlator_t *xl, pl_inode_t *pl_inode, ++ struct list_head *list); ++ ++int32_t ++pl_inode_remove_inodelk(pl_inode_t *pl_inode, pl_inode_lock_t *lock); ++ + #endif /* __COMMON_H__ */ +diff --git a/xlators/features/locks/src/entrylk.c b/xlators/features/locks/src/entrylk.c +index 93c649c..b97836f 100644 +--- a/xlators/features/locks/src/entrylk.c ++++ b/xlators/features/locks/src/entrylk.c +@@ -197,9 +197,9 @@ out: + return revoke_lock; + } + +-static gf_boolean_t +-__entrylk_needs_contention_notify(xlator_t *this, pl_entry_lock_t *lock, +- struct timespec *now) ++void ++entrylk_contention_notify_check(xlator_t *this, pl_entry_lock_t *lock, ++ struct timespec *now, struct list_head *contend) + { + posix_locks_private_t *priv; + int64_t elapsed; +@@ -209,7 +209,7 @@ __entrylk_needs_contention_notify(xlator_t *this, pl_entry_lock_t *lock, + /* If this lock is in a list, it means that we are about to send a + * notification for it, so no need to do anything else. */ + if (!list_empty(&lock->contend)) { +- return _gf_false; ++ return; + } + + elapsed = now->tv_sec; +@@ -218,7 +218,7 @@ __entrylk_needs_contention_notify(xlator_t *this, pl_entry_lock_t *lock, + elapsed--; + } + if (elapsed < priv->notify_contention_delay) { +- return _gf_false; ++ return; + } + + /* All contention notifications will be sent outside of the locked +@@ -231,7 +231,7 @@ __entrylk_needs_contention_notify(xlator_t *this, pl_entry_lock_t *lock, + + lock->contention_time = *now; + +- return _gf_true; ++ list_add_tail(&lock->contend, contend); + } + + void +@@ -325,9 +325,7 @@ __entrylk_grantable(xlator_t *this, pl_dom_list_t *dom, pl_entry_lock_t *lock, + break; + } + } +- if (__entrylk_needs_contention_notify(this, tmp, now)) { +- list_add_tail(&tmp->contend, contend); +- } ++ entrylk_contention_notify_check(this, tmp, now, contend); + } + } + +@@ -690,10 +688,9 @@ __grant_blocked_entry_locks(xlator_t *this, pl_inode_t *pl_inode, + bl_ret = __lock_entrylk(bl->this, pl_inode, bl, 0, dom, now, contend); + + if (bl_ret == 0) { +- list_add(&bl->blocked_locks, granted); ++ list_add_tail(&bl->blocked_locks, granted); + } + } +- return; + } + + /* Grants locks if possible which are blocked on a lock */ +diff --git a/xlators/features/locks/src/inodelk.c b/xlators/features/locks/src/inodelk.c +index 24dee49..1a07243 100644 +--- a/xlators/features/locks/src/inodelk.c ++++ b/xlators/features/locks/src/inodelk.c +@@ -231,9 +231,9 @@ out: + return revoke_lock; + } + +-static gf_boolean_t +-__inodelk_needs_contention_notify(xlator_t *this, pl_inode_lock_t *lock, +- struct timespec *now) ++void ++inodelk_contention_notify_check(xlator_t *this, pl_inode_lock_t *lock, ++ struct timespec *now, struct list_head *contend) + { + posix_locks_private_t *priv; + int64_t elapsed; +@@ -243,7 +243,7 @@ __inodelk_needs_contention_notify(xlator_t *this, pl_inode_lock_t *lock, + /* If this lock is in a list, it means that we are about to send a + * notification for it, so no need to do anything else. */ + if (!list_empty(&lock->contend)) { +- return _gf_false; ++ return; + } + + elapsed = now->tv_sec; +@@ -252,7 +252,7 @@ __inodelk_needs_contention_notify(xlator_t *this, pl_inode_lock_t *lock, + elapsed--; + } + if (elapsed < priv->notify_contention_delay) { +- return _gf_false; ++ return; + } + + /* All contention notifications will be sent outside of the locked +@@ -265,7 +265,7 @@ __inodelk_needs_contention_notify(xlator_t *this, pl_inode_lock_t *lock, + + lock->contention_time = *now; + +- return _gf_true; ++ list_add_tail(&lock->contend, contend); + } + + void +@@ -353,9 +353,7 @@ __inodelk_grantable(xlator_t *this, pl_dom_list_t *dom, pl_inode_lock_t *lock, + break; + } + } +- if (__inodelk_needs_contention_notify(this, l, now)) { +- list_add_tail(&l->contend, contend); +- } ++ inodelk_contention_notify_check(this, l, now, contend); + } + } + +@@ -435,12 +433,17 @@ __lock_inodelk(xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock, + struct list_head *contend) + { + pl_inode_lock_t *conf = NULL; +- int ret = -EINVAL; ++ int ret; + +- conf = __inodelk_grantable(this, dom, lock, now, contend); +- if (conf) { +- ret = __lock_blocked_add(this, dom, lock, can_block); +- goto out; ++ ret = pl_inode_remove_inodelk(pl_inode, lock); ++ if (ret < 0) { ++ return ret; ++ } ++ if (ret == 0) { ++ conf = __inodelk_grantable(this, dom, lock, now, contend); ++ } ++ if ((ret > 0) || (conf != NULL)) { ++ return __lock_blocked_add(this, dom, lock, can_block); + } + + /* To prevent blocked locks starvation, check if there are any blocked +@@ -462,17 +465,13 @@ __lock_inodelk(xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock, + "starvation"); + } + +- ret = __lock_blocked_add(this, dom, lock, can_block); +- goto out; ++ return __lock_blocked_add(this, dom, lock, can_block); + } + __pl_inodelk_ref(lock); + gettimeofday(&lock->granted_time, NULL); + list_add(&lock->list, &dom->inodelk_list); + +- ret = 0; +- +-out: +- return ret; ++ return 0; + } + + /* Return true if the two inodelks have exactly same lock boundaries */ +@@ -529,12 +528,11 @@ out: + return conf; + } + +-static void ++void + __grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode, + struct list_head *granted, pl_dom_list_t *dom, + struct timespec *now, struct list_head *contend) + { +- int bl_ret = 0; + pl_inode_lock_t *bl = NULL; + pl_inode_lock_t *tmp = NULL; + +@@ -547,52 +545,48 @@ __grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode, + { + list_del_init(&bl->blocked_locks); + +- bl_ret = __lock_inodelk(this, pl_inode, bl, 1, dom, now, contend); ++ bl->status = __lock_inodelk(this, pl_inode, bl, 1, dom, now, contend); + +- if (bl_ret == 0) { +- list_add(&bl->blocked_locks, granted); ++ if (bl->status != -EAGAIN) { ++ list_add_tail(&bl->blocked_locks, granted); + } + } +- return; + } + +-/* Grant all inodelks blocked on a lock */ + void +-grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode, +- pl_dom_list_t *dom, struct timespec *now, +- struct list_head *contend) ++unwind_granted_inodes(xlator_t *this, pl_inode_t *pl_inode, ++ struct list_head *granted) + { +- struct list_head granted; + pl_inode_lock_t *lock; + pl_inode_lock_t *tmp; ++ int32_t op_ret; ++ int32_t op_errno; + +- INIT_LIST_HEAD(&granted); +- +- pthread_mutex_lock(&pl_inode->mutex); +- { +- __grant_blocked_inode_locks(this, pl_inode, &granted, dom, now, +- contend); +- } +- pthread_mutex_unlock(&pl_inode->mutex); +- +- list_for_each_entry_safe(lock, tmp, &granted, blocked_locks) ++ list_for_each_entry_safe(lock, tmp, granted, blocked_locks) + { +- gf_log(this->name, GF_LOG_TRACE, +- "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64 " => Granted", +- lock->fl_type == F_UNLCK ? "Unlock" : "Lock", lock->client_pid, +- lkowner_utoa(&lock->owner), lock->user_flock.l_start, +- lock->user_flock.l_len); +- ++ if (lock->status == 0) { ++ op_ret = 0; ++ op_errno = 0; ++ gf_log(this->name, GF_LOG_TRACE, ++ "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64 ++ " => Granted", ++ lock->fl_type == F_UNLCK ? "Unlock" : "Lock", ++ lock->client_pid, lkowner_utoa(&lock->owner), ++ lock->user_flock.l_start, lock->user_flock.l_len); ++ } else { ++ op_ret = -1; ++ op_errno = -lock->status; ++ } + pl_trace_out(this, lock->frame, NULL, NULL, F_SETLKW, &lock->user_flock, +- 0, 0, lock->volume); ++ op_ret, op_errno, lock->volume); + +- STACK_UNWIND_STRICT(inodelk, lock->frame, 0, 0, NULL); ++ STACK_UNWIND_STRICT(inodelk, lock->frame, op_ret, op_errno, NULL); + lock->frame = NULL; + } + + pthread_mutex_lock(&pl_inode->mutex); + { +- list_for_each_entry_safe(lock, tmp, &granted, blocked_locks) ++ list_for_each_entry_safe(lock, tmp, granted, blocked_locks) + { + list_del_init(&lock->blocked_locks); + __pl_inodelk_unref(lock); +@@ -601,6 +595,26 @@ grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode, + pthread_mutex_unlock(&pl_inode->mutex); + } + ++/* Grant all inodelks blocked on a lock */ ++void ++grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode, ++ pl_dom_list_t *dom, struct timespec *now, ++ struct list_head *contend) ++{ ++ struct list_head granted; ++ ++ INIT_LIST_HEAD(&granted); ++ ++ pthread_mutex_lock(&pl_inode->mutex); ++ { ++ __grant_blocked_inode_locks(this, pl_inode, &granted, dom, now, ++ contend); ++ } ++ pthread_mutex_unlock(&pl_inode->mutex); ++ ++ unwind_granted_inodes(this, pl_inode, &granted); ++} ++ + static void + pl_inodelk_log_cleanup(pl_inode_lock_t *lock) + { +@@ -662,7 +676,7 @@ pl_inodelk_client_cleanup(xlator_t *this, pl_ctx_t *ctx) + * and blocked lists, then this means that a parallel + * unlock on another inodelk (L2 say) may have 'granted' + * L1 and added it to 'granted' list in +- * __grant_blocked_node_locks() (although using the ++ * __grant_blocked_inode_locks() (although using the + * 'blocked_locks' member). In that case, the cleanup + * codepath must try and grant other overlapping + * blocked inodelks from other clients, now that L1 is +@@ -747,6 +761,7 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode, + gf_boolean_t need_inode_unref = _gf_false; + struct list_head *pcontend = NULL; + struct list_head contend; ++ struct list_head wake; + struct timespec now = {}; + short fl_type; + +@@ -798,6 +813,8 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode, + timespec_now(&now); + } + ++ INIT_LIST_HEAD(&wake); ++ + if (ctx) + pthread_mutex_lock(&ctx->lock); + pthread_mutex_lock(&pl_inode->mutex); +@@ -820,18 +837,17 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode, + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, lkowner_utoa(&lock->owner), + lock->user_flock.l_start, lock->user_flock.l_len); +- if (can_block) ++ if (can_block) { + unref = _gf_false; +- /* For all but the case where a non-blocking +- * lock attempt fails, the extra ref taken at +- * the start of this function must be negated. +- */ +- else +- need_inode_unref = _gf_true; ++ } + } +- +- if (ctx && (!ret || can_block)) ++ /* For all but the case where a non-blocking lock attempt fails ++ * with -EAGAIN, the extra ref taken at the start of this function ++ * must be negated. */ ++ need_inode_unref = (ret != 0) && ((ret != -EAGAIN) || !can_block); ++ if (ctx && !need_inode_unref) { + list_add_tail(&lock->client_list, &ctx->inodelk_lockers); ++ } + } else { + /* Irrespective of whether unlock succeeds or not, + * the extra inode ref that was done at the start of +@@ -849,6 +865,8 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode, + list_del_init(&retlock->client_list); + __pl_inodelk_unref(retlock); + ++ pl_inode_remove_unlocked(this, pl_inode, &wake); ++ + ret = 0; + } + out: +@@ -859,6 +877,8 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode, + if (ctx) + pthread_mutex_unlock(&ctx->lock); + ++ pl_inode_remove_wake(&wake); ++ + /* The following (extra) unref corresponds to the ref that + * was done at the time the lock was granted. + */ +@@ -1033,10 +1053,14 @@ pl_common_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, + inode); + + if (ret < 0) { +- if ((can_block) && (F_UNLCK != lock_type)) { +- goto out; ++ if (ret == -EAGAIN) { ++ if (can_block && (F_UNLCK != lock_type)) { ++ goto out; ++ } ++ gf_log(this->name, GF_LOG_TRACE, "returning EAGAIN"); ++ } else { ++ gf_log(this->name, GF_LOG_TRACE, "returning %d", ret); + } +- gf_log(this->name, GF_LOG_TRACE, "returning EAGAIN"); + op_errno = -ret; + goto unwind; + } +diff --git a/xlators/features/locks/src/locks.h b/xlators/features/locks/src/locks.h +index aa267de..6666feb 100644 +--- a/xlators/features/locks/src/locks.h ++++ b/xlators/features/locks/src/locks.h +@@ -102,6 +102,9 @@ struct __pl_inode_lock { + + struct list_head client_list; /* list of all locks from a client */ + short fl_type; ++ ++ int32_t status; /* Error code when we try to grant a lock in blocked ++ state */ + }; + typedef struct __pl_inode_lock pl_inode_lock_t; + +@@ -164,13 +167,14 @@ struct __pl_inode { + struct list_head rw_list; /* list of waiting r/w requests */ + struct list_head reservelk_list; /* list of reservelks */ + struct list_head blocked_reservelks; /* list of blocked reservelks */ +- struct list_head +- blocked_calls; /* List of blocked lock calls while a reserve is held*/ +- struct list_head metalk_list; /* Meta lock list */ +- /* This is to store the incoming lock +- requests while meta lock is enabled */ +- struct list_head queued_locks; +- int mandatory; /* if mandatory locking is enabled */ ++ struct list_head blocked_calls; /* List of blocked lock calls while a ++ reserve is held*/ ++ struct list_head metalk_list; /* Meta lock list */ ++ struct list_head queued_locks; /* This is to store the incoming lock ++ requests while meta lock is enabled */ ++ struct list_head waiting; /* List of pending fops waiting to unlink/rmdir ++ the inode. */ ++ int mandatory; /* if mandatory locking is enabled */ + + inode_t *refkeeper; /* hold refs on an inode while locks are + held to prevent pruning */ +@@ -197,6 +201,11 @@ struct __pl_inode { + */ + int fop_wind_count; + pthread_cond_t check_fop_wind_count; ++ ++ int32_t links; /* Number of hard links the inode has. */ ++ uint32_t remove_running; /* Number of remove operations running. */ ++ gf_boolean_t is_locked; /* Regular locks will be blocked. */ ++ gf_boolean_t removed; /* The inode has been deleted. */ + }; + typedef struct __pl_inode pl_inode_t; + +diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c +index 7887b82..5ae0125 100644 +--- a/xlators/features/locks/src/posix.c ++++ b/xlators/features/locks/src/posix.c +@@ -147,6 +147,29 @@ fetch_pathinfo(xlator_t *, inode_t *, int32_t *, char **); + } \ + } while (0) + ++#define PL_INODE_REMOVE(_fop, _frame, _xl, _loc1, _loc2, _cont, _cbk, \ ++ _args...) \ ++ ({ \ ++ struct list_head contend; \ ++ pl_inode_t *__pl_inode; \ ++ call_stub_t *__stub; \ ++ int32_t __error; \ ++ INIT_LIST_HEAD(&contend); \ ++ __error = pl_inode_remove_prepare(_xl, _frame, _loc2 ? _loc2 : _loc1, \ ++ &__pl_inode, &contend); \ ++ if (__error < 0) { \ ++ __stub = fop_##_fop##_stub(_frame, _cont, ##_args); \ ++ __error = pl_inode_remove_complete(_xl, __pl_inode, __stub, \ ++ &contend); \ ++ } else if (__error == 0) { \ ++ PL_LOCAL_GET_REQUESTS(_frame, _xl, xdata, ((fd_t *)NULL), _loc1, \ ++ _loc2); \ ++ STACK_WIND_COOKIE(_frame, _cbk, __pl_inode, FIRST_CHILD(_xl), \ ++ FIRST_CHILD(_xl)->fops->_fop, ##_args); \ ++ } \ ++ __error; \ ++ }) ++ + gf_boolean_t + pl_has_xdata_requests(dict_t *xdata) + { +@@ -2969,11 +2992,85 @@ out: + return ret; + } + ++static int32_t ++pl_request_link_count(dict_t **pxdata) ++{ ++ dict_t *xdata; ++ ++ xdata = *pxdata; ++ if (xdata == NULL) { ++ xdata = dict_new(); ++ if (xdata == NULL) { ++ return ENOMEM; ++ } ++ } else { ++ dict_ref(xdata); ++ } ++ ++ if (dict_set_uint32(xdata, GET_LINK_COUNT, 0) != 0) { ++ dict_unref(xdata); ++ return ENOMEM; ++ } ++ ++ *pxdata = xdata; ++ ++ return 0; ++} ++ ++static int32_t ++pl_check_link_count(dict_t *xdata) ++{ ++ int32_t count; ++ ++ /* In case we are unable to read the link count from xdata, we take a ++ * conservative approach and return -2, which will prevent the inode from ++ * being considered deleted. In fact it will cause link tracking for this ++ * inode to be disabled completely to avoid races. */ ++ ++ if (xdata == NULL) { ++ return -2; ++ } ++ ++ if (dict_get_int32(xdata, GET_LINK_COUNT, &count) != 0) { ++ return -2; ++ } ++ ++ return count; ++} ++ + int32_t + pl_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, inode_t *inode, struct iatt *buf, dict_t *xdata, + struct iatt *postparent) + { ++ pl_inode_t *pl_inode; ++ ++ if (op_ret >= 0) { ++ pl_inode = pl_inode_get(this, inode, NULL); ++ if (pl_inode == NULL) { ++ PL_STACK_UNWIND(lookup, xdata, frame, -1, ENOMEM, NULL, NULL, NULL, ++ NULL); ++ return 0; ++ } ++ ++ pthread_mutex_lock(&pl_inode->mutex); ++ ++ /* We only update the link count if we previously didn't know it. ++ * Doing it always can lead to races since lookup is not executed ++ * atomically most of the times. */ ++ if (pl_inode->links == -2) { ++ pl_inode->links = pl_check_link_count(xdata); ++ if (buf->ia_type == IA_IFDIR) { ++ /* Directories have at least 2 links. To avoid special handling ++ * for directories, we simply decrement the value here to make ++ * them equivalent to regular files. */ ++ pl_inode->links--; ++ } ++ } ++ ++ pthread_mutex_unlock(&pl_inode->mutex); ++ } ++ + PL_STACK_UNWIND(lookup, xdata, frame, op_ret, op_errno, inode, buf, xdata, + postparent); + return 0; +@@ -2982,9 +3079,17 @@ pl_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t + pl_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) + { +- PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL); +- STACK_WIND(frame, pl_lookup_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->lookup, loc, xdata); ++ int32_t error; ++ ++ error = pl_request_link_count(&xdata); ++ if (error == 0) { ++ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL); ++ STACK_WIND(frame, pl_lookup_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->lookup, loc, xdata); ++ dict_unref(xdata); ++ } else { ++ STACK_UNWIND_STRICT(lookup, frame, -1, error, NULL, NULL, NULL, NULL); ++ } + return 0; + } + +@@ -3792,6 +3897,10 @@ unlock: + gf_proc_dump_write("posixlk-count", "%d", count); + __dump_posixlks(pl_inode); + } ++ ++ gf_proc_dump_write("links", "%d", pl_inode->links); ++ gf_proc_dump_write("removes_pending", "%u", pl_inode->remove_running); ++ gf_proc_dump_write("removed", "%u", pl_inode->removed); + } + pthread_mutex_unlock(&pl_inode->mutex); + +@@ -4137,8 +4246,11 @@ pl_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + struct iatt *postoldparent, struct iatt *prenewparent, + struct iatt *postnewparent, dict_t *xdata) + { ++ pl_inode_remove_cbk(this, cookie, op_ret < 0 ? op_errno : 0); ++ + PL_STACK_UNWIND(rename, xdata, frame, op_ret, op_errno, buf, preoldparent, + postoldparent, prenewparent, postnewparent, xdata); ++ + return 0; + } + +@@ -4146,10 +4258,15 @@ int32_t + pl_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) + { +- PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), oldloc, newloc); ++ int32_t error; ++ ++ error = PL_INODE_REMOVE(rename, frame, this, oldloc, newloc, pl_rename, ++ pl_rename_cbk, oldloc, newloc, xdata); ++ if (error > 0) { ++ STACK_UNWIND_STRICT(rename, frame, -1, error, NULL, NULL, NULL, NULL, ++ NULL, NULL); ++ } + +- STACK_WIND(frame, pl_rename_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); + return 0; + } + +@@ -4273,8 +4390,11 @@ pl_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) + { ++ pl_inode_remove_cbk(this, cookie, op_ret < 0 ? op_errno : 0); ++ + PL_STACK_UNWIND(unlink, xdata, frame, op_ret, op_errno, preparent, + postparent, xdata); ++ + return 0; + } + +@@ -4282,9 +4402,14 @@ int32_t + pl_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, + dict_t *xdata) + { +- PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL); +- STACK_WIND(frame, pl_unlink_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata); ++ int32_t error; ++ ++ error = PL_INODE_REMOVE(unlink, frame, this, loc, NULL, pl_unlink, ++ pl_unlink_cbk, loc, xflag, xdata); ++ if (error > 0) { ++ STACK_UNWIND_STRICT(unlink, frame, -1, error, NULL, NULL, NULL); ++ } ++ + return 0; + } + +@@ -4351,8 +4476,11 @@ pl_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) + { ++ pl_inode_remove_cbk(this, cookie, op_ret < 0 ? op_errno : 0); ++ + PL_STACK_UNWIND_FOR_CLIENT(rmdir, xdata, frame, op_ret, op_errno, preparent, + postparent, xdata); ++ + return 0; + } + +@@ -4360,9 +4488,14 @@ int + pl_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags, + dict_t *xdata) + { +- PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL); +- STACK_WIND(frame, pl_rmdir_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->rmdir, loc, xflags, xdata); ++ int32_t error; ++ ++ error = PL_INODE_REMOVE(rmdir, frame, this, loc, NULL, pl_rmdir, ++ pl_rmdir_cbk, loc, xflags, xdata); ++ if (error > 0) { ++ STACK_UNWIND_STRICT(rmdir, frame, -1, error, NULL, NULL, NULL); ++ } ++ + return 0; + } + +@@ -4392,6 +4525,19 @@ pl_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, inode_t *inode, struct iatt *buf, + struct iatt *preparent, struct iatt *postparent, dict_t *xdata) + { ++ pl_inode_t *pl_inode = (pl_inode_t *)cookie; ++ ++ if (op_ret >= 0) { ++ pthread_mutex_lock(&pl_inode->mutex); ++ ++ /* TODO: can happen pl_inode->links == 0 ? */ ++ if (pl_inode->links >= 0) { ++ pl_inode->links++; ++ } ++ ++ pthread_mutex_unlock(&pl_inode->mutex); ++ } ++ + PL_STACK_UNWIND_FOR_CLIENT(link, xdata, frame, op_ret, op_errno, inode, buf, + preparent, postparent, xdata); + return 0; +@@ -4401,9 +4547,18 @@ int + pl_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) + { ++ pl_inode_t *pl_inode; ++ ++ pl_inode = pl_inode_get(this, oldloc->inode, NULL); ++ if (pl_inode == NULL) { ++ STACK_UNWIND_STRICT(link, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, ++ NULL); ++ return 0; ++ } ++ + PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), oldloc, newloc); +- STACK_WIND(frame, pl_link_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata); ++ STACK_WIND_COOKIE(frame, pl_link_cbk, pl_inode, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata); + return 0; + } + +-- +1.8.3.1 + diff --git a/SOURCES/0456-add-clean-local-after-grant-lock.patch b/SOURCES/0456-add-clean-local-after-grant-lock.patch new file mode 100644 index 0000000..6b8210b --- /dev/null +++ b/SOURCES/0456-add-clean-local-after-grant-lock.patch @@ -0,0 +1,74 @@ +From c38b38249fdf951565f6501ce8e9a4d01142d43e Mon Sep 17 00:00:00 2001 +From: l17zhou <cynthia.zhou@nokia-sbell.com> +Date: Tue, 3 Dec 2019 07:43:35 +0200 +Subject: [PATCH 456/456] add clean local after grant lock + +found by flock test, without correct ref number of fd, +lock will not be correctly released. + +Upstream patch: +> Upstream patch link: https://review.gluster.org/c/glusterfs/+/23794 +> Fixes: bz#1779089 +> Change-Id: I3e466b17c852eb219c8778e43af8ad670a8449cc +> Signed-off-by: l17zhou <cynthia.zhou@nokia-sbell.com> + +BUG: 1854165 +Change-Id: I3e466b17c852eb219c8778e43af8ad670a8449cc +Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/206673 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/features/locks/src/common.c | 15 ++++++++------- + 1 file changed, 8 insertions(+), 7 deletions(-) + +diff --git a/xlators/features/locks/src/common.c b/xlators/features/locks/src/common.c +index 0c52853..cddbfa6 100644 +--- a/xlators/features/locks/src/common.c ++++ b/xlators/features/locks/src/common.c +@@ -961,7 +961,7 @@ grant_blocked_locks(xlator_t *this, pl_inode_t *pl_inode) + struct list_head granted_list; + posix_lock_t *tmp = NULL; + posix_lock_t *lock = NULL; +- ++ pl_local_t *local = NULL; + INIT_LIST_HEAD(&granted_list); + + pthread_mutex_lock(&pl_inode->mutex); +@@ -976,9 +976,9 @@ grant_blocked_locks(xlator_t *this, pl_inode_t *pl_inode) + + pl_trace_out(this, lock->frame, NULL, NULL, F_SETLKW, &lock->user_flock, + 0, 0, NULL); +- +- STACK_UNWIND_STRICT(lk, lock->frame, 0, 0, &lock->user_flock, NULL); +- ++ local = lock->frame->local; ++ PL_STACK_UNWIND_AND_FREE(local, lk, lock->frame, 0, 0, ++ &lock->user_flock, NULL); + __destroy_lock(lock); + } + +@@ -997,6 +997,7 @@ pl_send_prelock_unlock(xlator_t *this, pl_inode_t *pl_inode, + struct list_head granted_list; + posix_lock_t *tmp = NULL; + posix_lock_t *lock = NULL; ++ pl_local_t *local = NULL; + + int ret = -1; + +@@ -1024,9 +1025,9 @@ pl_send_prelock_unlock(xlator_t *this, pl_inode_t *pl_inode, + + pl_trace_out(this, lock->frame, NULL, NULL, F_SETLKW, &lock->user_flock, + 0, 0, NULL); +- +- STACK_UNWIND_STRICT(lk, lock->frame, 0, 0, &lock->user_flock, NULL); +- ++ local = lock->frame->local; ++ PL_STACK_UNWIND_AND_FREE(local, lk, lock->frame, 0, 0, ++ &lock->user_flock, NULL); + __destroy_lock(lock); + } + +-- +1.8.3.1 + diff --git a/SOURCES/0457-cluster-ec-Improve-detection-of-new-heals.patch b/SOURCES/0457-cluster-ec-Improve-detection-of-new-heals.patch new file mode 100644 index 0000000..be9202a --- /dev/null +++ b/SOURCES/0457-cluster-ec-Improve-detection-of-new-heals.patch @@ -0,0 +1,409 @@ +From 3e8b3a2c2c6f83635486035fc8040c87d89813d2 Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez <xhernandez@redhat.com> +Date: Thu, 2 Jul 2020 18:08:52 +0200 +Subject: [PATCH 457/465] cluster/ec: Improve detection of new heals + +When EC successfully healed a directory it assumed that maybe other +entries inside that directory could have been created, which could +require additional heal cycles. For this reason, when the heal happened +as part of one index heal iteration, it triggered a new iteration. + +The problem happened when the directory was healthy, so no new entries +were added, but its index entry was not removed for some reason. In +this case self-heal started and endless loop healing the same directory +continuously, cause high CPU utilization. + +This patch improves detection of new files added to the heal index so +that a new index heal iteration is only triggered if there is new work +to do. + +>Upstream patch: https://review.gluster.org/#/c/glusterfs/+/24665/ +>Fixes: #1354 + +Change-Id: I2355742b85fbfa6de758bccc5d2e1a283c82b53f +BUG: 1852736 +Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/208041 +Tested-by: Ashish Pandey <aspandey@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Ashish Pandey <aspandey@redhat.com> +--- + xlators/cluster/ec/src/ec-common.c | 2 +- + xlators/cluster/ec/src/ec-heal.c | 58 +++++++++++++++++++++++----------- + xlators/cluster/ec/src/ec-heald.c | 24 ++++++++++---- + xlators/cluster/ec/src/ec-inode-read.c | 27 ++++++++++++++-- + xlators/cluster/ec/src/ec-types.h | 4 +-- + xlators/cluster/ec/src/ec.h | 1 + + 6 files changed, 86 insertions(+), 30 deletions(-) + +diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c +index e580bfb..e3f8769 100644 +--- a/xlators/cluster/ec/src/ec-common.c ++++ b/xlators/cluster/ec/src/ec-common.c +@@ -230,7 +230,7 @@ ec_child_next(ec_t *ec, ec_fop_data_t *fop, uint32_t idx) + int32_t + ec_heal_report(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, uintptr_t mask, uintptr_t good, +- uintptr_t bad, dict_t *xdata) ++ uintptr_t bad, uint32_t pending, dict_t *xdata) + { + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_DEBUG, op_errno, EC_MSG_HEAL_FAIL, +diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c +index 06a7016..e2de879 100644 +--- a/xlators/cluster/ec/src/ec-heal.c ++++ b/xlators/cluster/ec/src/ec-heal.c +@@ -72,6 +72,7 @@ struct ec_name_data { + char *name; + inode_t *parent; + default_args_cbk_t *replies; ++ uint32_t heal_pending; + }; + + static char *ec_ignore_xattrs[] = {GF_SELINUX_XATTR_KEY, QUOTA_SIZE_KEY, NULL}; +@@ -996,6 +997,7 @@ ec_set_new_entry_dirty(ec_t *ec, loc_t *loc, struct iatt *ia, + ret = -ENOTCONN; + goto out; + } ++ + out: + if (xattr) + dict_unref(xattr); +@@ -1164,6 +1166,7 @@ ec_create_name(call_frame_t *frame, ec_t *ec, inode_t *parent, char *name, + dict_t *xdata = NULL; + char *linkname = NULL; + ec_config_t config; ++ + /* There should be just one gfid key */ + EC_REPLIES_ALLOC(replies, ec->nodes); + if (gfid_db->count != 1) { +@@ -1408,6 +1411,11 @@ __ec_heal_name(call_frame_t *frame, ec_t *ec, inode_t *parent, char *name, + + ret = ec_create_name(frame, ec, parent, name, replies, gfid_db, enoent, + participants); ++ if (ret >= 0) { ++ /* If ec_create_name() succeeded we return 1 to indicate that a new ++ * file has been created and it will need to be healed. */ ++ ret = 1; ++ } + out: + cluster_replies_wipe(replies, ec->nodes); + loc_wipe(&loc); +@@ -1485,18 +1493,22 @@ ec_name_heal_handler(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, + ret = ec_heal_name(name_data->frame, ec, parent->inode, entry->d_name, + name_on); + +- if (ret < 0) ++ if (ret < 0) { + memset(name_on, 0, ec->nodes); ++ } else { ++ name_data->heal_pending += ret; ++ } + + for (i = 0; i < ec->nodes; i++) + if (name_data->participants[i] && !name_on[i]) + name_data->failed_on[i] = 1; ++ + return 0; + } + + int + ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode, +- unsigned char *participants) ++ unsigned char *participants, uint32_t *pending) + { + int i = 0; + int j = 0; +@@ -1509,7 +1521,7 @@ ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode, + name_data.frame = frame; + name_data.participants = participants; + name_data.failed_on = alloca0(ec->nodes); +- ; ++ name_data.heal_pending = 0; + + for (i = 0; i < ec->nodes; i++) { + if (!participants[i]) +@@ -1528,6 +1540,8 @@ ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode, + break; + } + } ++ *pending += name_data.heal_pending; ++ + loc_wipe(&loc); + return ret; + } +@@ -1535,7 +1549,7 @@ ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode, + int + __ec_heal_entry(call_frame_t *frame, ec_t *ec, inode_t *inode, + unsigned char *heal_on, unsigned char *sources, +- unsigned char *healed_sinks) ++ unsigned char *healed_sinks, uint32_t *pending) + { + unsigned char *locked_on = NULL; + unsigned char *output = NULL; +@@ -1580,7 +1594,7 @@ unlock: + if (sources[i] || healed_sinks[i]) + participants[i] = 1; + } +- ret = ec_heal_names(frame, ec, inode, participants); ++ ret = ec_heal_names(frame, ec, inode, participants, pending); + + if (EC_COUNT(participants, ec->nodes) <= ec->fragments) + goto out; +@@ -1601,7 +1615,8 @@ out: + + int + ec_heal_entry(call_frame_t *frame, ec_t *ec, inode_t *inode, +- unsigned char *sources, unsigned char *healed_sinks) ++ unsigned char *sources, unsigned char *healed_sinks, ++ uint32_t *pending) + { + unsigned char *locked_on = NULL; + unsigned char *up_subvols = NULL; +@@ -1632,7 +1647,7 @@ ec_heal_entry(call_frame_t *frame, ec_t *ec, inode_t *inode, + goto unlock; + } + ret = __ec_heal_entry(frame, ec, inode, locked_on, sources, +- healed_sinks); ++ healed_sinks, pending); + } + unlock: + cluster_uninodelk(ec->xl_list, locked_on, ec->nodes, replies, output, frame, +@@ -1953,14 +1968,14 @@ ec_manager_heal_block(ec_fop_data_t *fop, int32_t state) + if (fop->cbks.heal) { + fop->cbks.heal(fop->req_frame, fop, fop->xl, 0, 0, + (heal->good | heal->bad), heal->good, heal->bad, +- NULL); ++ 0, NULL); + } + + return EC_STATE_END; + case -EC_STATE_REPORT: + if (fop->cbks.heal) { +- fop->cbks.heal(fop->req_frame, fop, fop->xl, -1, fop->error, 0, +- 0, 0, NULL); ++ fop->cbks.heal(fop->req_frame, fop->data, fop->xl, -1, ++ fop->error, 0, 0, 0, 0, NULL); + } + + return EC_STATE_END; +@@ -1997,14 +2012,15 @@ out: + if (fop != NULL) { + ec_manager(fop, error); + } else { +- func(frame, NULL, this, -1, error, 0, 0, 0, NULL); ++ func(frame, heal, this, -1, error, 0, 0, 0, 0, NULL); + } + } + + int32_t + ec_heal_block_done(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, uintptr_t mask, +- uintptr_t good, uintptr_t bad, dict_t *xdata) ++ uintptr_t good, uintptr_t bad, uint32_t pending, ++ dict_t *xdata) + { + ec_fop_data_t *fop = cookie; + ec_heal_t *heal = fop->data; +@@ -2489,6 +2505,7 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial) + intptr_t mbad = 0; + intptr_t good = 0; + intptr_t bad = 0; ++ uint32_t pending = 0; + ec_fop_data_t *fop = data; + gf_boolean_t blocking = _gf_false; + ec_heal_need_t need_heal = EC_HEAL_NONEED; +@@ -2524,7 +2541,7 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial) + if (loc->name && strlen(loc->name)) { + ret = ec_heal_name(frame, ec, loc->parent, (char *)loc->name, + participants); +- if (ret == 0) { ++ if (ret >= 0) { + gf_msg_debug(this->name, 0, + "%s: name heal " + "successful on %" PRIXPTR, +@@ -2542,7 +2559,7 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial) + + /* Mount triggers heal only when it detects that it must need heal, shd + * triggers heals periodically which need not be thorough*/ +- if (ec->shd.iamshd) { ++ if (ec->shd.iamshd && (ret <= 0)) { + ec_heal_inspect(frame, ec, loc->inode, up_subvols, _gf_false, _gf_false, + &need_heal); + +@@ -2552,13 +2569,15 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial) + goto out; + } + } ++ + sources = alloca0(ec->nodes); + healed_sinks = alloca0(ec->nodes); + if (IA_ISREG(loc->inode->ia_type)) { + ret = ec_heal_data(frame, ec, blocking, loc->inode, sources, + healed_sinks); + } else if (IA_ISDIR(loc->inode->ia_type) && !partial) { +- ret = ec_heal_entry(frame, ec, loc->inode, sources, healed_sinks); ++ ret = ec_heal_entry(frame, ec, loc->inode, sources, healed_sinks, ++ &pending); + } else { + ret = 0; + memcpy(sources, participants, ec->nodes); +@@ -2588,10 +2607,11 @@ out: + if (fop->cbks.heal) { + fop->cbks.heal(fop->req_frame, fop, fop->xl, op_ret, op_errno, + ec_char_array_to_mask(participants, ec->nodes), +- mgood & good, mbad & bad, NULL); ++ mgood & good, mbad & bad, pending, NULL); + } + if (frame) + STACK_DESTROY(frame->root); ++ + return; + } + +@@ -2638,8 +2658,8 @@ void + ec_heal_fail(ec_t *ec, ec_fop_data_t *fop) + { + if (fop->cbks.heal) { +- fop->cbks.heal(fop->req_frame, NULL, ec->xl, -1, fop->error, 0, 0, 0, +- NULL); ++ fop->cbks.heal(fop->req_frame, fop->data, ec->xl, -1, fop->error, 0, 0, ++ 0, 0, NULL); + } + ec_fop_data_release(fop); + } +@@ -2826,7 +2846,7 @@ fail: + if (fop) + ec_fop_data_release(fop); + if (func) +- func(frame, NULL, this, -1, err, 0, 0, 0, NULL); ++ func(frame, data, this, -1, err, 0, 0, 0, 0, NULL); + } + + int +diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c +index cba111a..4f4b6aa 100644 +--- a/xlators/cluster/ec/src/ec-heald.c ++++ b/xlators/cluster/ec/src/ec-heald.c +@@ -156,15 +156,27 @@ int + ec_shd_selfheal(struct subvol_healer *healer, int child, loc_t *loc, + gf_boolean_t full) + { ++ dict_t *xdata = NULL; ++ uint32_t count; + int32_t ret; + +- ret = syncop_getxattr(healer->this, loc, NULL, EC_XATTR_HEAL, NULL, NULL); +- if (!full && (ret >= 0) && (loc->inode->ia_type == IA_IFDIR)) { ++ ret = syncop_getxattr(healer->this, loc, NULL, EC_XATTR_HEAL, NULL, &xdata); ++ if (!full && (loc->inode->ia_type == IA_IFDIR)) { + /* If we have just healed a directory, it's possible that +- * other index entries have appeared to be healed. We put a +- * mark so that we can check it later and restart a scan +- * without delay. */ +- healer->rerun = _gf_true; ++ * other index entries have appeared to be healed. */ ++ if ((xdata != NULL) && ++ (dict_get_uint32(xdata, EC_XATTR_HEAL_NEW, &count) == 0) && ++ (count > 0)) { ++ /* Force a rerun of the index healer. */ ++ gf_msg_debug(healer->this->name, 0, "%d more entries to heal", ++ count); ++ ++ healer->rerun = _gf_true; ++ } ++ } ++ ++ if (xdata != NULL) { ++ dict_unref(xdata); + } + + return ret; +diff --git a/xlators/cluster/ec/src/ec-inode-read.c b/xlators/cluster/ec/src/ec-inode-read.c +index f87a94a..e82e8f6 100644 +--- a/xlators/cluster/ec/src/ec-inode-read.c ++++ b/xlators/cluster/ec/src/ec-inode-read.c +@@ -393,7 +393,8 @@ ec_manager_getxattr(ec_fop_data_t *fop, int32_t state) + int32_t + ec_getxattr_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *xl, + int32_t op_ret, int32_t op_errno, uintptr_t mask, +- uintptr_t good, uintptr_t bad, dict_t *xdata) ++ uintptr_t good, uintptr_t bad, uint32_t pending, ++ dict_t *xdata) + { + ec_fop_data_t *fop = cookie; + fop_getxattr_cbk_t func = fop->data; +@@ -402,6 +403,25 @@ ec_getxattr_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *xl, + char *str; + char bin1[65], bin2[65]; + ++ /* We try to return the 'pending' information in xdata, but if this cannot ++ * be set, we will ignore it silently. We prefer to report the success or ++ * failure of the heal itself. */ ++ if (xdata == NULL) { ++ xdata = dict_new(); ++ } else { ++ dict_ref(xdata); ++ } ++ if (xdata != NULL) { ++ if (dict_set_uint32(xdata, EC_XATTR_HEAL_NEW, pending) != 0) { ++ /* dict_set_uint32() is marked as 'warn_unused_result' and gcc ++ * enforces to check the result in this case. However we don't ++ * really care if it succeeded or not. We'll just do the same. ++ * ++ * This empty 'if' avoids the warning, and it will be removed by ++ * the optimizer. */ ++ } ++ } ++ + if (op_ret >= 0) { + dict = dict_new(); + if (dict == NULL) { +@@ -435,11 +455,14 @@ ec_getxattr_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *xl, + } + + out: +- func(frame, NULL, xl, op_ret, op_errno, dict, NULL); ++ func(frame, NULL, xl, op_ret, op_errno, dict, xdata); + + if (dict != NULL) { + dict_unref(dict); + } ++ if (xdata != NULL) { ++ dict_unref(xdata); ++ } + + return 0; + } +diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h +index 34a9768..f15429d 100644 +--- a/xlators/cluster/ec/src/ec-types.h ++++ b/xlators/cluster/ec/src/ec-types.h +@@ -186,10 +186,10 @@ struct _ec_inode { + + typedef int32_t (*fop_heal_cbk_t)(call_frame_t *, void *, xlator_t *, int32_t, + int32_t, uintptr_t, uintptr_t, uintptr_t, +- dict_t *); ++ uint32_t, dict_t *); + typedef int32_t (*fop_fheal_cbk_t)(call_frame_t *, void *, xlator_t *, int32_t, + int32_t, uintptr_t, uintptr_t, uintptr_t, +- dict_t *); ++ uint32_t, dict_t *); + + union _ec_cbk { + fop_access_cbk_t access; +diff --git a/xlators/cluster/ec/src/ec.h b/xlators/cluster/ec/src/ec.h +index 1b210d9..6f6de6d 100644 +--- a/xlators/cluster/ec/src/ec.h ++++ b/xlators/cluster/ec/src/ec.h +@@ -18,6 +18,7 @@ + #define EC_XATTR_SIZE EC_XATTR_PREFIX "size" + #define EC_XATTR_VERSION EC_XATTR_PREFIX "version" + #define EC_XATTR_HEAL EC_XATTR_PREFIX "heal" ++#define EC_XATTR_HEAL_NEW EC_XATTR_PREFIX "heal-new" + #define EC_XATTR_DIRTY EC_XATTR_PREFIX "dirty" + #define EC_STRIPE_CACHE_MAX_SIZE 10 + #define EC_VERSION_SIZE 2 +-- +1.8.3.1 + diff --git a/SOURCES/0458-features-bit-rot-stub-clean-the-mutex-after-cancelli.patch b/SOURCES/0458-features-bit-rot-stub-clean-the-mutex-after-cancelli.patch new file mode 100644 index 0000000..b7b9f04 --- /dev/null +++ b/SOURCES/0458-features-bit-rot-stub-clean-the-mutex-after-cancelli.patch @@ -0,0 +1,182 @@ +From ed73f2046dd3fbb22341bf9fc004087d90dfbe6d Mon Sep 17 00:00:00 2001 +From: Raghavendra Bhat <raghavendra@redhat.com> +Date: Mon, 15 Apr 2019 14:09:34 -0400 +Subject: [PATCH 458/465] features/bit-rot-stub: clean the mutex after + cancelling the signer thread + +When bit-rot feature is disabled, the signer thread from the bit-rot-stub +xlator (the thread which performs the setxattr of the signature on to the +disk) is cancelled. But, if the cancelled signer thread had already held +the mutex (&priv->lock) which it uses to monitor the queue of files to +be signed, then the mutex is never released. This creates problems in +future when the feature is enabled again. Both the new instance of the +signer thread and the regular thread which enqueues the files to be +signed will be blocked on this mutex. + +So, as part of cancelling the signer thread, unlock the mutex associated +with it as well using pthread_cleanup_push and pthread_cleanup_pop. + +Upstream patch: + > patch: https://review.gluster.org/22572 + > fixes: #bz1700078 + > Change-Id: Ib761910caed90b268e69794ddeb108165487af40 + +Change-Id: Ib761910caed90b268e69794ddeb108165487af40 +BUG: 1851424 +Signed-off-by: Raghavendra M <raghavendra@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/208304 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + .../bit-rot/src/stub/bit-rot-stub-messages.h | 4 +- + xlators/features/bit-rot/src/stub/bit-rot-stub.c | 62 +++++++++++++++++++--- + 2 files changed, 59 insertions(+), 7 deletions(-) + +diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h +index 7f07f29..155802b 100644 +--- a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h ++++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h +@@ -39,6 +39,8 @@ GLFS_MSGID(BITROT_STUB, BRS_MSG_NO_MEMORY, BRS_MSG_SET_EVENT_FAILED, + BRS_MSG_BAD_HANDLE_DIR_NULL, BRS_MSG_BAD_OBJ_THREAD_FAIL, + BRS_MSG_BAD_OBJ_DIR_CLOSE_FAIL, BRS_MSG_LINK_FAIL, + BRS_MSG_BAD_OBJ_UNLINK_FAIL, BRS_MSG_DICT_SET_FAILED, +- BRS_MSG_PATH_GET_FAILED, BRS_MSG_NULL_LOCAL); ++ BRS_MSG_PATH_GET_FAILED, BRS_MSG_NULL_LOCAL, ++ BRS_MSG_SPAWN_SIGN_THRD_FAILED, BRS_MSG_KILL_SIGN_THREAD, ++ BRS_MSG_NON_BITD_PID, BRS_MSG_SIGN_PREPARE_FAIL); + + #endif /* !_BITROT_STUB_MESSAGES_H_ */ +diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.c b/xlators/features/bit-rot/src/stub/bit-rot-stub.c +index 3f48a4b..c3f81bc 100644 +--- a/xlators/features/bit-rot/src/stub/bit-rot-stub.c ++++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.c +@@ -26,6 +26,15 @@ + + #define BR_STUB_REQUEST_COOKIE 0x1 + ++void ++br_stub_lock_cleaner(void *arg) ++{ ++ pthread_mutex_t *clean_mutex = arg; ++ ++ pthread_mutex_unlock(clean_mutex); ++ return; ++} ++ + void * + br_stub_signth(void *); + +@@ -166,8 +175,11 @@ init(xlator_t *this) + + ret = gf_thread_create(&priv->signth, NULL, br_stub_signth, this, + "brssign"); +- if (ret != 0) ++ if (ret != 0) { ++ gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SPAWN_SIGN_THRD_FAILED, ++ "failed to create the new thread for signer"); + goto cleanup_lock; ++ } + + ret = br_stub_bad_object_container_init(this, priv); + if (ret) { +@@ -214,11 +226,15 @@ reconfigure(xlator_t *this, dict_t *options) + priv = this->private; + + GF_OPTION_RECONF("bitrot", priv->do_versioning, options, bool, err); +- if (priv->do_versioning) { ++ if (priv->do_versioning && !priv->signth) { + ret = gf_thread_create(&priv->signth, NULL, br_stub_signth, this, + "brssign"); +- if (ret != 0) ++ if (ret != 0) { ++ gf_msg(this->name, GF_LOG_WARNING, 0, ++ BRS_MSG_SPAWN_SIGN_THRD_FAILED, ++ "failed to create the new thread for signer"); + goto err; ++ } + + ret = br_stub_bad_object_container_init(this, priv); + if (ret) { +@@ -232,8 +248,11 @@ reconfigure(xlator_t *this, dict_t *options) + gf_msg(this->name, GF_LOG_ERROR, 0, + BRS_MSG_CANCEL_SIGN_THREAD_FAILED, + "Could not cancel sign serializer thread"); ++ } else { ++ gf_msg(this->name, GF_LOG_INFO, 0, BRS_MSG_KILL_SIGN_THREAD, ++ "killed the signer thread"); ++ priv->signth = 0; + } +- priv->signth = 0; + } + + if (priv->container.thread) { +@@ -902,6 +921,24 @@ br_stub_signth(void *arg) + + THIS = this; + while (1) { ++ /* ++ * Disabling bit-rot feature leads to this particular thread ++ * getting cleaned up by reconfigure via a call to the function ++ * gf_thread_cleanup_xint (which in turn calls pthread_cancel ++ * and pthread_join). But, if this thread had held the mutex ++ * &priv->lock at the time of cancellation, then it leads to ++ * deadlock in future when bit-rot feature is enabled (which ++ * again spawns this thread which cant hold the lock as the ++ * mutex is still held by the previous instance of the thread ++ * which got killed). Also, the br_stub_handle_object_signature ++ * function which is called whenever file has to be signed ++ * also gets blocked as it too attempts to acquire &priv->lock. ++ * ++ * So, arrange for the lock to be unlocked as part of the ++ * cleanup of this thread using pthread_cleanup_push and ++ * pthread_cleanup_pop. ++ */ ++ pthread_cleanup_push(br_stub_lock_cleaner, &priv->lock); + pthread_mutex_lock(&priv->lock); + { + while (list_empty(&priv->squeue)) +@@ -912,6 +949,7 @@ br_stub_signth(void *arg) + list_del_init(&sigstub->list); + } + pthread_mutex_unlock(&priv->lock); ++ pthread_cleanup_pop(0); + + call_resume(sigstub->stub); + +@@ -1042,12 +1080,22 @@ br_stub_handle_object_signature(call_frame_t *frame, xlator_t *this, fd_t *fd, + + priv = this->private; + +- if (frame->root->pid != GF_CLIENT_PID_BITD) ++ if (frame->root->pid != GF_CLIENT_PID_BITD) { ++ gf_msg(this->name, GF_LOG_WARNING, op_errno, BRS_MSG_NON_BITD_PID, ++ "PID %d from where signature request" ++ "came, does not belong to bit-rot daemon." ++ "Unwinding the fop", ++ frame->root->pid); + goto dofop; ++ } + + ret = br_stub_prepare_signature(this, dict, fd->inode, sign, &fakesuccess); +- if (ret) ++ if (ret) { ++ gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SIGN_PREPARE_FAIL, ++ "failed to prepare the signature for %s. Unwinding the fop", ++ uuid_utoa(fd->inode->gfid)); + goto dofop; ++ } + if (fakesuccess) { + op_ret = op_errno = 0; + goto dofop; +@@ -1387,6 +1435,8 @@ br_stub_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, + /* object signature request */ + ret = dict_get_bin(dict, GLUSTERFS_SET_OBJECT_SIGNATURE, (void **)&sign); + if (!ret) { ++ gf_msg_debug(this->name, 0, "got SIGNATURE request on %s", ++ uuid_utoa(fd->inode->gfid)); + br_stub_handle_object_signature(frame, this, fd, dict, sign, xdata); + goto done; + } +-- +1.8.3.1 + diff --git a/SOURCES/0459-features-bit-rot-Unconditionally-sign-the-files-duri.patch b/SOURCES/0459-features-bit-rot-Unconditionally-sign-the-files-duri.patch new file mode 100644 index 0000000..2c9b66e --- /dev/null +++ b/SOURCES/0459-features-bit-rot-Unconditionally-sign-the-files-duri.patch @@ -0,0 +1,181 @@ +From 80eef2f52bb92ed740ac00eeb11ee7a3e7fffff2 Mon Sep 17 00:00:00 2001 +From: Raghavendra Bhat <raghavendra@redhat.com> +Date: Mon, 11 Mar 2019 12:16:50 -0400 +Subject: [PATCH 459/465] features/bit-rot: Unconditionally sign the files + during oneshot crawl + +Currently bit-rot feature has an issue with disabling and reenabling it +on the same volume. Consider enabling bit-rot detection which goes on to +crawl and sign all the files present in the volume. Then some files are +modified and the bit-rot daemon goes on to sign the modified files with +the correct signature. Now, disable bit-rot feature. While, signing and +scrubbing are not happening, previous checksums of the files continue to +exist as extended attributes. Now, if some files with checksum xattrs get +modified, they are not signed with new signature as the feature is off. + +At this point, if the feature is enabled again, the bit rot daemon will +go and sign those files which does not have any bit-rot specific xattrs +(i.e. those files which were created after bit-rot was disabled). Whereas +the files with bit-rot xattrs wont get signed with proper new checksum. +At this point if scrubber runs, it finds the on disk checksum and the actual +checksum of the file to be different (because the file got modified) and +marks the file as corrupted. + +FIX: + +The fix is to unconditionally sign the files when the bit-rot daemon +comes up (instead of skipping the files with bit-rot xattrs). + +upstream fix: + > patch: https://review.gluster.org/#/c/glusterfs/+/22360/ + > fixes: #bz1700078 + > Change-ID: Iadfb47dd39f7e2e77f22d549a4a07a385284f4f5 + +Change-Id: Iadfb47dd39f7e2e77f22d549a4a07a385284f4f5 +BUG: 1851424 +Signed-off-by: Raghavendra M <raghavendra@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/208305 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/bitrot/bug-1700078.t | 87 +++++++++++++++++++++++++++++ + xlators/features/bit-rot/src/bitd/bit-rot.c | 15 ++++- + 2 files changed, 101 insertions(+), 1 deletion(-) + create mode 100644 tests/bitrot/bug-1700078.t + +diff --git a/tests/bitrot/bug-1700078.t b/tests/bitrot/bug-1700078.t +new file mode 100644 +index 0000000..f273742 +--- /dev/null ++++ b/tests/bitrot/bug-1700078.t +@@ -0,0 +1,87 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../include.rc ++. $(dirname $0)/../volume.rc ++ ++cleanup; ++ ++## Start glusterd ++TEST glusterd; ++TEST pidof glusterd; ++ ++## Lets create and start the volume ++TEST $CLI volume create $V0 $H0:$B0/${V0}1 ++TEST $CLI volume start $V0 ++ ++## Enable bitrot for volume $V0 ++TEST $CLI volume bitrot $V0 enable ++ ++## Turn off quick-read so that it wont cache the contents ++# of the file in lookup. For corrupted files, it might ++# end up in reads being served from the cache instead of ++# an error. ++TEST $CLI volume set $V0 performance.quick-read off ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Active' scrub_status $V0 'State of scrub' ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '/var/log/glusterfs/bitd.log' scrub_status $V0 'Bitrot error log location' ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '/var/log/glusterfs/scrub.log' scrub_status $V0 'Scrubber error log location' ++ ++## Set expiry-timeout to 1 sec ++TEST $CLI volume set $V0 features.expiry-time 1 ++ ++##Mount $V0 ++TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 ++ ++## Turn off quick-read xlator so that, the contents are not served from the ++# quick-read cache. ++TEST $CLI volume set $V0 performance.quick-read off ++ ++#Create sample file ++TEST `echo "1234" > $M0/FILE1` ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.bit-rot.signature' check_for_xattr 'trusted.bit-rot.signature' "/$B0/${V0}1/FILE1" ++ ++##disable bitrot ++TEST $CLI volume bitrot $V0 disable ++ ++## modify the file ++TEST `echo "write" >> $M0/FILE1` ++ ++# unmount and remount when the file has to be accessed. ++# This is to ensure that, when the remount happens, ++# and the file is read, its contents are served from the ++# brick instead of cache. ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++ ++##enable bitrot ++TEST $CLI volume bitrot $V0 enable ++ ++# expiry time is set to 1 second. Hence sleep for 2 seconds for the ++# oneshot crawler to finish its crawling and sign the file properly. ++sleep 2 ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Active' scrub_status $V0 'State of scrub' ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '/var/log/glusterfs/bitd.log' scrub_status $V0 'Bitrot error log location' ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '/var/log/glusterfs/scrub.log' scrub_status $V0 'Scrubber error log location' ++ ++## Ondemand scrub ++TEST $CLI volume bitrot $V0 scrub ondemand ++ ++# the scrub ondemand CLI command, just ensures that ++# the scrubber has received the ondemand scrub directive ++# and started. sleep for 2 seconds for scrubber to finish ++# crawling and marking file(s) as bad (if if finds that ++# corruption has happened) which are filesystem operations. ++sleep 2 ++ ++TEST ! getfattr -n 'trusted.bit-rot.bad-file' $B0/${V0}1/FILE1 ++ ++##Mount $V0 ++TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 ++ ++TEST cat $M0/FILE1 ++ ++cleanup; +diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c +index b8feef7..424c0d5 100644 +--- a/xlators/features/bit-rot/src/bitd/bit-rot.c ++++ b/xlators/features/bit-rot/src/bitd/bit-rot.c +@@ -973,6 +973,7 @@ bitd_oneshot_crawl(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, + int32_t ret = -1; + inode_t *linked_inode = NULL; + gf_boolean_t need_signing = _gf_false; ++ gf_boolean_t need_reopen = _gf_true; + + GF_VALIDATE_OR_GOTO("bit-rot", subvol, out); + GF_VALIDATE_OR_GOTO("bit-rot", data, out); +@@ -1046,6 +1047,18 @@ bitd_oneshot_crawl(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, + uuid_utoa(linked_inode->gfid)); + } else { + need_signing = br_check_object_need_sign(this, xattr, child); ++ ++ /* ++ * If we are here means, bitrot daemon has started. Is it just ++ * a simple restart of the daemon or is it started because the ++ * feature is enabled is something hard to determine. Hence, ++ * if need_signing is false (because bit-rot version and signature ++ * are present), then still go ahead and sign it. ++ */ ++ if (!need_signing) { ++ need_signing = _gf_true; ++ need_reopen = _gf_true; ++ } + } + + if (!need_signing) +@@ -1054,7 +1067,7 @@ bitd_oneshot_crawl(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, + gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_TRIGGER_SIGN, + "Triggering signing for %s [GFID: %s | Brick: %s]", loc.path, + uuid_utoa(linked_inode->gfid), child->brick_path); +- br_trigger_sign(this, child, linked_inode, &loc, _gf_true); ++ br_trigger_sign(this, child, linked_inode, &loc, need_reopen); + + ret = 0; + +-- +1.8.3.1 + diff --git a/SOURCES/0460-cluster-ec-Remove-stale-entries-from-indices-xattrop.patch b/SOURCES/0460-cluster-ec-Remove-stale-entries-from-indices-xattrop.patch new file mode 100644 index 0000000..e31349a --- /dev/null +++ b/SOURCES/0460-cluster-ec-Remove-stale-entries-from-indices-xattrop.patch @@ -0,0 +1,152 @@ +From b166826b283d9071532174ebbec857dea600064b Mon Sep 17 00:00:00 2001 +From: Ashish Pandey <aspandey@redhat.com> +Date: Thu, 23 Jul 2020 11:07:32 +0530 +Subject: [PATCH 460/465] cluster/ec: Remove stale entries from indices/xattrop + folder + +Problem: +If a gfid is present in indices/xattrop folder while +the file/dir is actaully healthy and all the xattrs are healthy, +it causes lot of lookups by shd on an entry which does not need +to be healed. +This whole process eats up lot of CPU usage without doing meaningful +work. + +Solution: +Set trusted.ec.dirty xattr of the entry so that actual heal process +happens and at the end of it, during unset of dirty, gfid enrty from +indices/xattrop will be removed. + +>Upstream patch : https://review.gluster.org/#/c/glusterfs/+/24765/ +>Fixes: #1385 + +Change-Id: Ib1b9377d8dda384bba49523e9ff6ba9f0699cc1b +BUG: 1785714 +Signed-off-by: Ashish Pandey <aspandey@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/208591 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/cluster/ec/src/ec-heal.c | 73 ++++++++++++++++++++++++++++++++++++++- + xlators/cluster/ec/src/ec-types.h | 7 +++- + 2 files changed, 78 insertions(+), 2 deletions(-) + +diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c +index e2de879..7d25853 100644 +--- a/xlators/cluster/ec/src/ec-heal.c ++++ b/xlators/cluster/ec/src/ec-heal.c +@@ -2488,6 +2488,59 @@ out: + return ret; + } + ++int ++ec_heal_set_dirty_without_lock(call_frame_t *frame, ec_t *ec, inode_t *inode) ++{ ++ int i = 0; ++ int ret = 0; ++ dict_t **xattr = NULL; ++ loc_t loc = {0}; ++ uint64_t dirty_xattr[EC_VERSION_SIZE] = {0}; ++ unsigned char *on = NULL; ++ default_args_cbk_t *replies = NULL; ++ dict_t *dict = NULL; ++ ++ /* Allocate the required memory */ ++ loc.inode = inode_ref(inode); ++ gf_uuid_copy(loc.gfid, inode->gfid); ++ on = alloca0(ec->nodes); ++ EC_REPLIES_ALLOC(replies, ec->nodes); ++ xattr = GF_CALLOC(ec->nodes, sizeof(*xattr), gf_common_mt_pointer); ++ if (!xattr) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ dict = dict_new(); ++ if (!dict) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ for (i = 0; i < ec->nodes; i++) { ++ xattr[i] = dict; ++ on[i] = 1; ++ } ++ dirty_xattr[EC_METADATA_TXN] = hton64(1); ++ ret = dict_set_static_bin(dict, EC_XATTR_DIRTY, dirty_xattr, ++ (sizeof(*dirty_xattr) * EC_VERSION_SIZE)); ++ if (ret < 0) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ PARALLEL_FOP_ONLIST(ec->xl_list, on, ec->nodes, replies, frame, ++ ec_wind_xattrop_parallel, &loc, GF_XATTROP_ADD_ARRAY64, ++ xattr, NULL); ++out: ++ if (dict) { ++ dict_unref(dict); ++ } ++ if (xattr) { ++ GF_FREE(xattr); ++ } ++ cluster_replies_wipe(replies, ec->nodes); ++ loc_wipe(&loc); ++ return ret; ++} ++ + void + ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial) + { +@@ -2563,7 +2616,18 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial) + ec_heal_inspect(frame, ec, loc->inode, up_subvols, _gf_false, _gf_false, + &need_heal); + +- if (need_heal == EC_HEAL_NONEED) { ++ if (need_heal == EC_HEAL_PURGE_INDEX) { ++ gf_msg(ec->xl->name, GF_LOG_INFO, 0, EC_MSG_HEAL_FAIL, ++ "Index entry needs to be purged for: %s ", ++ uuid_utoa(loc->gfid)); ++ /* We need to send xattrop to set dirty flag so that it can be ++ * healed and index entry could be removed. We need not to take lock ++ * on this entry to do so as we are just setting dirty flag which ++ * actually increases the trusted.ec.dirty count and does not set ++ * the new value. ++ * This will make sure that it is not interfering in other fops.*/ ++ ec_heal_set_dirty_without_lock(frame, ec, loc->inode); ++ } else if (need_heal == EC_HEAL_NONEED) { + gf_msg(ec->xl->name, GF_LOG_DEBUG, 0, EC_MSG_HEAL_FAIL, + "Heal is not required for : %s ", uuid_utoa(loc->gfid)); + goto out; +@@ -2958,6 +3022,13 @@ _need_heal_calculate(ec_t *ec, uint64_t *dirty, unsigned char *sources, + goto out; + } + } ++ /* If lock count is 0, all dirty flags are 0 and all the ++ * versions are macthing then why are we here. It looks ++ * like something went wrong while removing the index entries ++ * after completing a successful heal or fop. In this case ++ * we need to remove this index entry to avoid triggering heal ++ * in a loop and causing lookups again and again*/ ++ *need_heal = EC_HEAL_PURGE_INDEX; + } else { + for (i = 0; i < ec->nodes; i++) { + /* Since each lock can only increment the dirty +diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h +index f15429d..700dc39 100644 +--- a/xlators/cluster/ec/src/ec-types.h ++++ b/xlators/cluster/ec/src/ec-types.h +@@ -130,7 +130,12 @@ typedef void (*ec_resume_f)(ec_fop_data_t *, int32_t); + + enum _ec_read_policy { EC_ROUND_ROBIN, EC_GFID_HASH, EC_READ_POLICY_MAX }; + +-enum _ec_heal_need { EC_HEAL_NONEED, EC_HEAL_MAYBE, EC_HEAL_MUST }; ++enum _ec_heal_need { ++ EC_HEAL_NONEED, ++ EC_HEAL_MAYBE, ++ EC_HEAL_MUST, ++ EC_HEAL_PURGE_INDEX ++}; + + enum _ec_stripe_part { EC_STRIPE_HEAD, EC_STRIPE_TAIL }; + +-- +1.8.3.1 + diff --git a/SOURCES/0461-geo-replication-Fix-IPv6-parsing.patch b/SOURCES/0461-geo-replication-Fix-IPv6-parsing.patch new file mode 100644 index 0000000..098be5f --- /dev/null +++ b/SOURCES/0461-geo-replication-Fix-IPv6-parsing.patch @@ -0,0 +1,127 @@ +From d425ed54261d5bc19aa853854cc3b64647e3c897 Mon Sep 17 00:00:00 2001 +From: Aravinda Vishwanathapura <aravinda@kadalu.io> +Date: Sun, 12 Jul 2020 12:42:36 +0530 +Subject: [PATCH 461/465] geo-replication: Fix IPv6 parsing + +Brick paths in Volinfo used `:` as delimiter, Geo-rep uses split +based on `:` char. This will go wrong with IPv6. + +This patch handles the IPv6 case and handles the split properly. +Backport of: + >Upstream Patch: https://review.gluster.org/#/c/glusterfs/+/24706 + >Fixes: #1366 + >Change-Id: I25e88d693744381c0ccf3c1dbf1541b84be2499d + >Signed-off-by: Aravinda Vishwanathapura <aravinda@kadalu.io> + +BUG: 1855966 +Change-Id: I25e88d693744381c0ccf3c1dbf1541b84be2499d +Signed-off-by: Sunny Kumar <sunkumar@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/208610 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + geo-replication/syncdaemon/master.py | 5 ++-- + geo-replication/syncdaemon/syncdutils.py | 43 +++++++++++++++++++++++++++++--- + 2 files changed, 43 insertions(+), 5 deletions(-) + +diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py +index 3f98337..08e98f8 100644 +--- a/geo-replication/syncdaemon/master.py ++++ b/geo-replication/syncdaemon/master.py +@@ -26,7 +26,8 @@ from rconf import rconf + from syncdutils import Thread, GsyncdError, escape_space_newline + from syncdutils import unescape_space_newline, gauxpfx, escape + from syncdutils import lstat, errno_wrap, FreeObject, lf, matching_disk_gfid +-from syncdutils import NoStimeAvailable, PartialHistoryAvailable ++from syncdutils import NoStimeAvailable, PartialHistoryAvailable, host_brick_split ++ + + URXTIME = (-1, 0) + +@@ -1466,7 +1467,7 @@ class GMasterChangelogMixin(GMasterCommon): + node = rconf.args.resource_remote + node_data = node.split("@") + node = node_data[-1] +- remote_node_ip = node.split(":")[0] ++ remote_node_ip, _ = host_brick_split(node) + self.status.set_slave_node(remote_node_ip) + + def changelogs_batch_process(self, changes): +diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py +index 7560fa1..f43e13b 100644 +--- a/geo-replication/syncdaemon/syncdutils.py ++++ b/geo-replication/syncdaemon/syncdutils.py +@@ -883,6 +883,19 @@ class Popen(subprocess.Popen): + self.errfail() + + ++def host_brick_split(value): ++ """ ++ IPv6 compatible way to split and get the host ++ and brick information. Example inputs: ++ node1.example.com:/exports/bricks/brick1/brick ++ fe80::af0f:df82:844f:ef66%utun0:/exports/bricks/brick1/brick ++ """ ++ parts = value.split(":") ++ brick = parts[-1] ++ hostparts = parts[0:-1] ++ return (":".join(hostparts), brick) ++ ++ + class Volinfo(object): + + def __init__(self, vol, host='localhost', prelude=[], master=True): +@@ -925,7 +938,7 @@ class Volinfo(object): + @memoize + def bricks(self): + def bparse(b): +- host, dirp = b.find("name").text.split(':', 2) ++ host, dirp = host_brick_split(b.find("name").text) + return {'host': host, 'dir': dirp, 'uuid': b.find("hostUuid").text} + return [bparse(b) for b in self.get('brick')] + +@@ -1001,6 +1014,16 @@ class VolinfoFromGconf(object): + def is_hot(self, brickpath): + return False + ++ def is_uuid(self, value): ++ try: ++ uuid.UUID(value) ++ return True ++ except ValueError: ++ return False ++ ++ def possible_path(self, value): ++ return "/" in value ++ + @property + @memoize + def bricks(self): +@@ -1014,8 +1037,22 @@ class VolinfoFromGconf(object): + out = [] + for b in bricks_data: + parts = b.split(":") +- bpath = parts[2] if len(parts) == 3 else "" +- out.append({"host": parts[1], "dir": bpath, "uuid": parts[0]}) ++ b_uuid = None ++ if self.is_uuid(parts[0]): ++ b_uuid = parts[0] ++ # Set all parts except first ++ parts = parts[1:] ++ ++ if self.possible_path(parts[-1]): ++ bpath = parts[-1] ++ # Set all parts except last ++ parts = parts[0:-1] ++ ++ out.append({ ++ "host": ":".join(parts), # if remaining parts are IPv6 name ++ "dir": bpath, ++ "uuid": b_uuid ++ }) + + return out + +-- +1.8.3.1 + diff --git a/SOURCES/0462-Issue-with-gf_fill_iatt_for_dirent.patch b/SOURCES/0462-Issue-with-gf_fill_iatt_for_dirent.patch new file mode 100644 index 0000000..aa5fd21 --- /dev/null +++ b/SOURCES/0462-Issue-with-gf_fill_iatt_for_dirent.patch @@ -0,0 +1,43 @@ +From f027734165374979bd0bff8ea059dfaadca85e07 Mon Sep 17 00:00:00 2001 +From: Soumya Koduri <skoduri@redhat.com> +Date: Thu, 2 Jul 2020 02:07:56 +0530 +Subject: [PATCH 462/465] Issue with gf_fill_iatt_for_dirent + +In "gf_fill_iatt_for_dirent()", while calculating inode_path for loc, +the inode should be of parent's. Instead it is loc.inode which results in error + and eventually lookup/readdirp fails. + +This patch fixes the same. + +This is backport of below mainstream fix : + +> Change-Id: Ied086234a4634e8cb13520521ac547c87b3c76b5 +> Fixes: #1351 +> Upstream patch: https://review.gluster.org/#/c/glusterfs/+/24661/ + +Change-Id: Ied086234a4634e8cb13520521ac547c87b3c76b5 +BUG: 1853189 +Signed-off-by: Soumya Koduri <skoduri@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/208691 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + libglusterfs/src/gf-dirent.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/libglusterfs/src/gf-dirent.c b/libglusterfs/src/gf-dirent.c +index f289723..3fa67f2 100644 +--- a/libglusterfs/src/gf-dirent.c ++++ b/libglusterfs/src/gf-dirent.c +@@ -277,7 +277,7 @@ gf_fill_iatt_for_dirent(gf_dirent_t *entry, inode_t *parent, xlator_t *subvol) + gf_uuid_copy(loc.pargfid, parent->gfid); + loc.name = entry->d_name; + loc.parent = inode_ref(parent); +- ret = inode_path(loc.inode, entry->d_name, &path); ++ ret = inode_path(loc.parent, entry->d_name, &path); + loc.path = path; + if (ret < 0) + goto out; +-- +1.8.3.1 + diff --git a/SOURCES/0463-cluster-ec-Change-handling-of-heal-failure-to-avoid-.patch b/SOURCES/0463-cluster-ec-Change-handling-of-heal-failure-to-avoid-.patch new file mode 100644 index 0000000..b47cdd1 --- /dev/null +++ b/SOURCES/0463-cluster-ec-Change-handling-of-heal-failure-to-avoid-.patch @@ -0,0 +1,87 @@ +From 7d87933f648092ae55d57a96fd06e3df975d764c Mon Sep 17 00:00:00 2001 +From: Ashish Pandey <aspandey@redhat.com> +Date: Tue, 18 Aug 2020 10:33:48 +0530 +Subject: [PATCH 463/465] cluster/ec: Change handling of heal failure to avoid + crash + +Problem: +ec_getxattr_heal_cbk was called with NULL as second argument +in case heal was failing. +This function was dereferencing "cookie" argument which caused crash. + +Solution: +Cookie is changed to carry the value that was supposed to be +stored in fop->data, so even in the case when fop is NULL in error +case, there won't be any NULL dereference. + +Thanks to Xavi for the suggestion about the fix. + +>Upstream patch: https://review.gluster.org/#/c/glusterfs/+/23050/ +>fixes: bz#1729085 + +Change-Id: I0798000d5cadb17c3c2fbfa1baf77033ffc2bb8c +BUG: 1852736 +Reviewed-on: https://code.engineering.redhat.com/gerrit/209012 +Tested-by: Ashish Pandey <aspandey@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Xavi Hernandez Juan <xhernandez@redhat.com> +--- + xlators/cluster/ec/src/ec-heal.c | 11 ++++++----- + xlators/cluster/ec/src/ec-inode-read.c | 4 ++-- + 2 files changed, 8 insertions(+), 7 deletions(-) + +diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c +index 7d25853..6e6948b 100644 +--- a/xlators/cluster/ec/src/ec-heal.c ++++ b/xlators/cluster/ec/src/ec-heal.c +@@ -1966,7 +1966,7 @@ ec_manager_heal_block(ec_fop_data_t *fop, int32_t state) + + case EC_STATE_REPORT: + if (fop->cbks.heal) { +- fop->cbks.heal(fop->req_frame, fop, fop->xl, 0, 0, ++ fop->cbks.heal(fop->req_frame, fop->data, fop->xl, 0, 0, + (heal->good | heal->bad), heal->good, heal->bad, + 0, NULL); + } +@@ -2022,10 +2022,11 @@ ec_heal_block_done(call_frame_t *frame, void *cookie, xlator_t *this, + uintptr_t good, uintptr_t bad, uint32_t pending, + dict_t *xdata) + { +- ec_fop_data_t *fop = cookie; +- ec_heal_t *heal = fop->data; ++ ec_heal_t *heal = cookie; + +- fop->heal = NULL; ++ if (heal->fop) { ++ heal->fop->heal = NULL; ++ } + heal->fop = NULL; + heal->error = op_ret < 0 ? op_errno : 0; + syncbarrier_wake(heal->data); +@@ -2669,7 +2670,7 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial) + out: + ec_reset_entry_healing(fop); + if (fop->cbks.heal) { +- fop->cbks.heal(fop->req_frame, fop, fop->xl, op_ret, op_errno, ++ fop->cbks.heal(fop->req_frame, fop->data, fop->xl, op_ret, op_errno, + ec_char_array_to_mask(participants, ec->nodes), + mgood & good, mbad & bad, pending, NULL); + } +diff --git a/xlators/cluster/ec/src/ec-inode-read.c b/xlators/cluster/ec/src/ec-inode-read.c +index e82e8f6..c50d0ad 100644 +--- a/xlators/cluster/ec/src/ec-inode-read.c ++++ b/xlators/cluster/ec/src/ec-inode-read.c +@@ -396,8 +396,8 @@ ec_getxattr_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *xl, + uintptr_t good, uintptr_t bad, uint32_t pending, + dict_t *xdata) + { +- ec_fop_data_t *fop = cookie; +- fop_getxattr_cbk_t func = fop->data; ++ fop_getxattr_cbk_t func = cookie; ++ + ec_t *ec = xl->private; + dict_t *dict = NULL; + char *str; +-- +1.8.3.1 + diff --git a/SOURCES/0464-storage-posix-Remove-nr_files-usage.patch b/SOURCES/0464-storage-posix-Remove-nr_files-usage.patch new file mode 100644 index 0000000..d98e33d --- /dev/null +++ b/SOURCES/0464-storage-posix-Remove-nr_files-usage.patch @@ -0,0 +1,102 @@ +From 7c51addf7912a94320e6b148bd66f2dbf274c533 Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K <pkarampu@redhat.com> +Date: Mon, 11 Mar 2019 14:04:39 +0530 +Subject: [PATCH 464/465] storage/posix: Remove nr_files usage + +nr_files is supposed to represent the number of files opened in posix. +Present logic doesn't seem to handle anon-fds because of which the +counts would always be wrong. + +I don't remember anyone using this value in debugging any problem probably +because we always have 'ls -l /proc/<pid>/fd' which not only prints the +fds that are active but also prints their paths. It also handles directories +and anon-fds which actually opened the file. So removing this code +instead of fixing the buggy logic to have the nr_files. + +> fixes bz#1688106 +> Change-Id: Ibf8713fdfdc1ef094e08e6818152637206a54040 +> Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> +> (Cherry pick from commit f5987d38f216a3142dfe45f03bf66ff4827d9b55) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/22333/) + +Change-Id: Ibf8713fdfdc1ef094e08e6818152637206a54040 +BUG: 1851989 +Signed-off-by: Mohit Agrawal<moagrawa@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/209468 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/storage/posix/src/posix-common.c | 2 -- + xlators/storage/posix/src/posix-entry-ops.c | 2 -- + xlators/storage/posix/src/posix-inode-fd-ops.c | 2 -- + xlators/storage/posix/src/posix.h | 1 - + 4 files changed, 7 deletions(-) + +diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c +index ac53796..b317627 100644 +--- a/xlators/storage/posix/src/posix-common.c ++++ b/xlators/storage/posix/src/posix-common.c +@@ -128,7 +128,6 @@ posix_priv(xlator_t *this) + gf_proc_dump_write("max_read", "%" PRId64, GF_ATOMIC_GET(priv->read_value)); + gf_proc_dump_write("max_write", "%" PRId64, + GF_ATOMIC_GET(priv->write_value)); +- gf_proc_dump_write("nr_files", "%" PRId64, GF_ATOMIC_GET(priv->nr_files)); + + return 0; + } +@@ -815,7 +814,6 @@ posix_init(xlator_t *this) + } + + LOCK_INIT(&_private->lock); +- GF_ATOMIC_INIT(_private->nr_files, 0); + GF_ATOMIC_INIT(_private->read_value, 0); + GF_ATOMIC_INIT(_private->write_value, 0); + +diff --git a/xlators/storage/posix/src/posix-entry-ops.c b/xlators/storage/posix/src/posix-entry-ops.c +index 65650b3..b3a5381 100644 +--- a/xlators/storage/posix/src/posix-entry-ops.c ++++ b/xlators/storage/posix/src/posix-entry-ops.c +@@ -2243,8 +2243,6 @@ fill_stat: + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_FD_PATH_SETTING_FAILED, + "failed to set the fd context path=%s fd=%p", real_path, fd); + +- GF_ATOMIC_INC(priv->nr_files); +- + op_ret = 0; + + out: +diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c +index d135d8b..81f4a6b 100644 +--- a/xlators/storage/posix/src/posix-inode-fd-ops.c ++++ b/xlators/storage/posix/src/posix-inode-fd-ops.c +@@ -1605,7 +1605,6 @@ posix_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_FD_PATH_SETTING_FAILED, + "failed to set the fd context path=%s fd=%p", real_path, fd); + +- GF_ATOMIC_INC(priv->nr_files); + op_ret = 0; + + out: +@@ -2526,7 +2525,6 @@ posix_release(xlator_t *this, fd_t *fd) + if (!priv) + goto out; + +- GF_ATOMIC_DEC(priv->nr_files); + out: + return 0; + } +diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h +index 61495a7..124dbb4 100644 +--- a/xlators/storage/posix/src/posix.h ++++ b/xlators/storage/posix/src/posix.h +@@ -154,7 +154,6 @@ struct posix_private { + + gf_atomic_t read_value; /* Total read, from init */ + gf_atomic_t write_value; /* Total write, from init */ +- gf_atomic_t nr_files; + /* + In some cases, two exported volumes may reside on the same + partition on the server. Sending statvfs info for both +-- +1.8.3.1 + diff --git a/SOURCES/0465-posix-Implement-a-janitor-thread-to-close-fd.patch b/SOURCES/0465-posix-Implement-a-janitor-thread-to-close-fd.patch new file mode 100644 index 0000000..fc22456 --- /dev/null +++ b/SOURCES/0465-posix-Implement-a-janitor-thread-to-close-fd.patch @@ -0,0 +1,384 @@ +From 143b93b230b429cc712353243ed794b68494c040 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal <moagrawa@redhat.com> +Date: Mon, 27 Jul 2020 18:08:00 +0530 +Subject: [PATCH 465/465] posix: Implement a janitor thread to close fd + +Problem: In the commit fb20713b380e1df8d7f9e9df96563be2f9144fd6 we use + syntask to close fd but we have found the patch is reducing the + performance + +Solution: Use janitor thread to close fd's and save the pfd ctx into + ctx janitor list and also save the posix_xlator into pfd object to + avoid the race condition during cleanup in brick_mux environment + +> Change-Id: Ifb3d18a854b267333a3a9e39845bfefb83fbc092 +> Fixes: #1396 +> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/24755/) +> (Cherry pick from commit 41b9616435cbdf671805856e487e373060c9455b + +Change-Id: Ifb3d18a854b267333a3a9e39845bfefb83fbc092 +BUG: 1851989 +Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/209448 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + glusterfsd/src/glusterfsd.c | 4 ++ + libglusterfs/src/glusterfs/glusterfs.h | 7 ++ + rpc/rpc-lib/src/rpcsvc.c | 6 -- + xlators/storage/posix/src/posix-common.c | 34 +++++++++- + xlators/storage/posix/src/posix-helpers.c | 93 ++++++++++++++++++++++++++ + xlators/storage/posix/src/posix-inode-fd-ops.c | 33 ++++----- + xlators/storage/posix/src/posix.h | 7 ++ + 7 files changed, 161 insertions(+), 23 deletions(-) + +diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c +index 9821180..955bf1d 100644 +--- a/glusterfsd/src/glusterfsd.c ++++ b/glusterfsd/src/glusterfsd.c +@@ -1839,6 +1839,10 @@ glusterfs_ctx_defaults_init(glusterfs_ctx_t *ctx) + + INIT_LIST_HEAD(&cmd_args->xlator_options); + INIT_LIST_HEAD(&cmd_args->volfile_servers); ++ ctx->pxl_count = 0; ++ pthread_mutex_init(&ctx->fd_lock, NULL); ++ pthread_cond_init(&ctx->fd_cond, NULL); ++ INIT_LIST_HEAD(&ctx->janitor_fds); + + lim.rlim_cur = RLIM_INFINITY; + lim.rlim_max = RLIM_INFINITY; +diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h +index 495a4d7..bf6a987 100644 +--- a/libglusterfs/src/glusterfs/glusterfs.h ++++ b/libglusterfs/src/glusterfs/glusterfs.h +@@ -733,6 +733,13 @@ struct _glusterfs_ctx { + } stats; + + struct list_head volfile_list; ++ /* Add members to manage janitor threads for cleanup fd */ ++ struct list_head janitor_fds; ++ pthread_cond_t fd_cond; ++ pthread_mutex_t fd_lock; ++ pthread_t janitor; ++ /* The variable is use to save total posix xlator count */ ++ uint32_t pxl_count; + + char volume_id[GF_UUID_BUF_SIZE]; /* Used only in protocol/client */ + }; +diff --git a/rpc/rpc-lib/src/rpcsvc.c b/rpc/rpc-lib/src/rpcsvc.c +index 23ca1fd..3f184bf 100644 +--- a/rpc/rpc-lib/src/rpcsvc.c ++++ b/rpc/rpc-lib/src/rpcsvc.c +@@ -375,12 +375,6 @@ rpcsvc_program_actor(rpcsvc_request_t *req) + + req->ownthread = program->ownthread; + req->synctask = program->synctask; +- if (((req->procnum == GFS3_OP_RELEASE) || +- (req->procnum == GFS3_OP_RELEASEDIR)) && +- (program->prognum == GLUSTER_FOP_PROGRAM)) { +- req->ownthread = _gf_false; +- req->synctask = _gf_true; +- } + + err = SUCCESS; + gf_log(GF_RPCSVC, GF_LOG_TRACE, "Actor found: %s - %s for %s", +diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c +index b317627..c5a43a1 100644 +--- a/xlators/storage/posix/src/posix-common.c ++++ b/xlators/storage/posix/src/posix-common.c +@@ -150,6 +150,7 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...) + struct timespec sleep_till = { + 0, + }; ++ glusterfs_ctx_t *ctx = this->ctx; + + switch (event) { + case GF_EVENT_PARENT_UP: { +@@ -160,8 +161,6 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...) + case GF_EVENT_PARENT_DOWN: { + if (!victim->cleanup_starting) + break; +- gf_log(this->name, GF_LOG_INFO, "Sending CHILD_DOWN for brick %s", +- victim->name); + + if (priv->janitor) { + pthread_mutex_lock(&priv->janitor_mutex); +@@ -187,6 +186,16 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...) + GF_FREE(priv->janitor); + } + priv->janitor = NULL; ++ pthread_mutex_lock(&ctx->fd_lock); ++ { ++ while (priv->rel_fdcount > 0) { ++ pthread_cond_wait(&priv->fd_cond, &ctx->fd_lock); ++ } ++ } ++ pthread_mutex_unlock(&ctx->fd_lock); ++ ++ gf_log(this->name, GF_LOG_INFO, "Sending CHILD_DOWN for brick %s", ++ victim->name); + default_notify(this->parents->xlator, GF_EVENT_CHILD_DOWN, data); + } break; + default: +@@ -1038,7 +1047,13 @@ posix_init(xlator_t *this) + pthread_cond_init(&_private->fsync_cond, NULL); + pthread_mutex_init(&_private->janitor_mutex, NULL); + pthread_cond_init(&_private->janitor_cond, NULL); ++ pthread_cond_init(&_private->fd_cond, NULL); + INIT_LIST_HEAD(&_private->fsyncs); ++ _private->rel_fdcount = 0; ++ ret = posix_spawn_ctx_janitor_thread(this); ++ if (ret) ++ goto out; ++ + ret = gf_thread_create(&_private->fsyncer, NULL, posix_fsyncer, this, + "posixfsy"); + if (ret) { +@@ -1133,6 +1148,8 @@ posix_fini(xlator_t *this) + { + struct posix_private *priv = this->private; + gf_boolean_t health_check = _gf_false; ++ glusterfs_ctx_t *ctx = this->ctx; ++ uint32_t count; + int ret = 0; + + if (!priv) +@@ -1166,6 +1183,19 @@ posix_fini(xlator_t *this) + priv->janitor = NULL; + } + ++ pthread_mutex_lock(&ctx->fd_lock); ++ { ++ count = --ctx->pxl_count; ++ if (count == 0) { ++ pthread_cond_signal(&ctx->fd_cond); ++ } ++ } ++ pthread_mutex_unlock(&ctx->fd_lock); ++ ++ if (count == 0) { ++ pthread_join(ctx->janitor, NULL); ++ } ++ + if (priv->fsyncer) { + (void)gf_thread_cleanup_xint(priv->fsyncer); + priv->fsyncer = 0; +diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c +index 39dbcce..73a44be 100644 +--- a/xlators/storage/posix/src/posix-helpers.c ++++ b/xlators/storage/posix/src/posix-helpers.c +@@ -1582,6 +1582,99 @@ unlock: + return; + } + ++static struct posix_fd * ++janitor_get_next_fd(glusterfs_ctx_t *ctx) ++{ ++ struct posix_fd *pfd = NULL; ++ ++ while (list_empty(&ctx->janitor_fds)) { ++ if (ctx->pxl_count == 0) { ++ return NULL; ++ } ++ ++ pthread_cond_wait(&ctx->fd_cond, &ctx->fd_lock); ++ } ++ ++ pfd = list_first_entry(&ctx->janitor_fds, struct posix_fd, list); ++ list_del_init(&pfd->list); ++ ++ return pfd; ++} ++ ++static void ++posix_close_pfd(xlator_t *xl, struct posix_fd *pfd) ++{ ++ THIS = xl; ++ ++ if (pfd->dir == NULL) { ++ gf_msg_trace(xl->name, 0, "janitor: closing file fd=%d", pfd->fd); ++ sys_close(pfd->fd); ++ } else { ++ gf_msg_debug(xl->name, 0, "janitor: closing dir fd=%p", pfd->dir); ++ sys_closedir(pfd->dir); ++ } ++ ++ GF_FREE(pfd); ++} ++ ++static void * ++posix_ctx_janitor_thread_proc(void *data) ++{ ++ xlator_t *xl; ++ struct posix_fd *pfd; ++ glusterfs_ctx_t *ctx = NULL; ++ struct posix_private *priv_fd; ++ ++ ctx = data; ++ ++ pthread_mutex_lock(&ctx->fd_lock); ++ ++ while ((pfd = janitor_get_next_fd(ctx)) != NULL) { ++ pthread_mutex_unlock(&ctx->fd_lock); ++ ++ xl = pfd->xl; ++ posix_close_pfd(xl, pfd); ++ ++ pthread_mutex_lock(&ctx->fd_lock); ++ ++ priv_fd = xl->private; ++ priv_fd->rel_fdcount--; ++ if (!priv_fd->rel_fdcount) ++ pthread_cond_signal(&priv_fd->fd_cond); ++ } ++ ++ pthread_mutex_unlock(&ctx->fd_lock); ++ ++ return NULL; ++} ++ ++int ++posix_spawn_ctx_janitor_thread(xlator_t *this) ++{ ++ int ret = 0; ++ glusterfs_ctx_t *ctx = NULL; ++ ++ ctx = this->ctx; ++ ++ pthread_mutex_lock(&ctx->fd_lock); ++ { ++ if (ctx->pxl_count++ == 0) { ++ ret = gf_thread_create(&ctx->janitor, NULL, ++ posix_ctx_janitor_thread_proc, ctx, ++ "posixctxjan"); ++ ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_THREAD_FAILED, ++ "spawning janitor thread failed"); ++ ctx->pxl_count--; ++ } ++ } ++ } ++ pthread_mutex_unlock(&ctx->fd_lock); ++ ++ return ret; ++} ++ + static int + is_fresh_file(int64_t ctime_sec) + { +diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c +index 81f4a6b..21119ea 100644 +--- a/xlators/storage/posix/src/posix-inode-fd-ops.c ++++ b/xlators/storage/posix/src/posix-inode-fd-ops.c +@@ -1352,6 +1352,22 @@ out: + return 0; + } + ++static void ++posix_add_fd_to_cleanup(xlator_t *this, struct posix_fd *pfd) ++{ ++ glusterfs_ctx_t *ctx = this->ctx; ++ struct posix_private *priv = this->private; ++ ++ pfd->xl = this; ++ pthread_mutex_lock(&ctx->fd_lock); ++ { ++ list_add_tail(&pfd->list, &ctx->janitor_fds); ++ priv->rel_fdcount++; ++ pthread_cond_signal(&ctx->fd_cond); ++ } ++ pthread_mutex_unlock(&ctx->fd_lock); ++} ++ + int32_t + posix_releasedir(xlator_t *this, fd_t *fd) + { +@@ -1374,11 +1390,7 @@ posix_releasedir(xlator_t *this, fd_t *fd) + "pfd->dir is NULL for fd=%p", fd); + goto out; + } +- +- gf_msg_debug(this->name, 0, "janitor: closing dir fd=%p", pfd->dir); +- +- sys_closedir(pfd->dir); +- GF_FREE(pfd); ++ posix_add_fd_to_cleanup(this, pfd); + + out: + return 0; +@@ -2494,7 +2506,6 @@ out: + int32_t + posix_release(xlator_t *this, fd_t *fd) + { +- struct posix_private *priv = NULL; + struct posix_fd *pfd = NULL; + int ret = -1; + uint64_t tmp_pfd = 0; +@@ -2502,8 +2513,6 @@ posix_release(xlator_t *this, fd_t *fd) + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(fd, out); + +- priv = this->private; +- + ret = fd_ctx_del(fd, this, &tmp_pfd); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_PFD_NULL, +@@ -2517,13 +2526,7 @@ posix_release(xlator_t *this, fd_t *fd) + "pfd->dir is %p (not NULL) for file fd=%p", pfd->dir, fd); + } + +- gf_msg_debug(this->name, 0, "janitor: closing dir fd=%p", pfd->dir); +- +- sys_close(pfd->fd); +- GF_FREE(pfd); +- +- if (!priv) +- goto out; ++ posix_add_fd_to_cleanup(this, pfd); + + out: + return 0; +diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h +index 124dbb4..07f367b 100644 +--- a/xlators/storage/posix/src/posix.h ++++ b/xlators/storage/posix/src/posix.h +@@ -134,6 +134,8 @@ struct posix_fd { + off_t dir_eof; /* offset at dir EOF */ + int odirect; + struct list_head list; /* to add to the janitor list */ ++ xlator_t *xl; ++ char _pad[4]; /* manual padding */ + }; + + struct posix_private { +@@ -204,6 +206,7 @@ struct posix_private { + pthread_cond_t fsync_cond; + pthread_mutex_t janitor_mutex; + pthread_cond_t janitor_cond; ++ pthread_cond_t fd_cond; + int fsync_queue_count; + + enum { +@@ -259,6 +262,7 @@ struct posix_private { + gf_boolean_t fips_mode_rchecksum; + gf_boolean_t ctime; + gf_boolean_t janitor_task_stop; ++ uint32_t rel_fdcount; + }; + + typedef struct { +@@ -665,6 +669,9 @@ posix_cs_maintenance(xlator_t *this, fd_t *fd, loc_t *loc, int *pfd, + int + posix_check_dev_file(xlator_t *this, inode_t *inode, char *fop, int *op_errno); + ++int ++posix_spawn_ctx_janitor_thread(xlator_t *this); ++ + void + posix_update_iatt_buf(struct iatt *buf, int fd, char *loc, dict_t *xdata); + +-- +1.8.3.1 + diff --git a/SOURCES/0466-cluster-ec-Change-stale-index-handling.patch b/SOURCES/0466-cluster-ec-Change-stale-index-handling.patch new file mode 100644 index 0000000..1dc9f57 --- /dev/null +++ b/SOURCES/0466-cluster-ec-Change-stale-index-handling.patch @@ -0,0 +1,68 @@ +From b603170ae5f583037b8177a9d19e56c7821edf0b Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K <pkarampu@redhat.com> +Date: Tue, 25 Aug 2020 04:19:54 +0530 +Subject: [PATCH 466/466] cluster/ec: Change stale index handling + +Problem: +Earlier approach is setting dirty bit which requires extra heal + +Fix: +Send zero-xattrop which deletes stale index without any need +for extra heal. + + > Fixes: #1385 + > Upstream-patch: https://review.gluster.org/c/glusterfs/+/24911/ + +BUG: 1785714 +Change-Id: I7e97a1d8b5516f7be47cae55d0e56b14332b6cae +Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/209904 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Tested-by: Ashish Pandey <aspandey@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/cluster/ec/src/ec-heal.c | 14 +++++--------- + 1 file changed, 5 insertions(+), 9 deletions(-) + +diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c +index 6e6948b..06bafa5 100644 +--- a/xlators/cluster/ec/src/ec-heal.c ++++ b/xlators/cluster/ec/src/ec-heal.c +@@ -2490,7 +2490,7 @@ out: + } + + int +-ec_heal_set_dirty_without_lock(call_frame_t *frame, ec_t *ec, inode_t *inode) ++ec_heal_purge_stale_index(call_frame_t *frame, ec_t *ec, inode_t *inode) + { + int i = 0; + int ret = 0; +@@ -2520,7 +2520,6 @@ ec_heal_set_dirty_without_lock(call_frame_t *frame, ec_t *ec, inode_t *inode) + xattr[i] = dict; + on[i] = 1; + } +- dirty_xattr[EC_METADATA_TXN] = hton64(1); + ret = dict_set_static_bin(dict, EC_XATTR_DIRTY, dirty_xattr, + (sizeof(*dirty_xattr) * EC_VERSION_SIZE)); + if (ret < 0) { +@@ -2621,13 +2620,10 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial) + gf_msg(ec->xl->name, GF_LOG_INFO, 0, EC_MSG_HEAL_FAIL, + "Index entry needs to be purged for: %s ", + uuid_utoa(loc->gfid)); +- /* We need to send xattrop to set dirty flag so that it can be +- * healed and index entry could be removed. We need not to take lock +- * on this entry to do so as we are just setting dirty flag which +- * actually increases the trusted.ec.dirty count and does not set +- * the new value. +- * This will make sure that it is not interfering in other fops.*/ +- ec_heal_set_dirty_without_lock(frame, ec, loc->inode); ++ /* We need to send zero-xattrop so that stale index entry could be ++ * removed. We need not take lock on this entry to do so as ++ * xattrop on a brick is atomic. */ ++ ec_heal_purge_stale_index(frame, ec, loc->inode); + } else if (need_heal == EC_HEAL_NONEED) { + gf_msg(ec->xl->name, GF_LOG_DEBUG, 0, EC_MSG_HEAL_FAIL, + "Heal is not required for : %s ", uuid_utoa(loc->gfid)); +-- +1.8.3.1 + diff --git a/SOURCES/0467-build-Added-dependency-for-glusterfs-selinux.patch b/SOURCES/0467-build-Added-dependency-for-glusterfs-selinux.patch new file mode 100644 index 0000000..93bb140 --- /dev/null +++ b/SOURCES/0467-build-Added-dependency-for-glusterfs-selinux.patch @@ -0,0 +1,38 @@ +From 9176ee8f10c3c33f31d00261995ed27e8680934a Mon Sep 17 00:00:00 2001 +From: Rinku Kothiya <rkothiya@redhat.com> +Date: Thu, 3 Sep 2020 11:46:38 +0000 +Subject: [PATCH 467/467] build: Added dependency for glusterfs-selinux + +> Fixes: #1442 +> Upstream-patch: https://review.gluster.org/#/c/glusterfs/+/24876/ +> Change-Id: I7d79bceff329db4d525bc8a77ba7ffe41bf53c97 +> Signed-off-by: Rinku Kothiya <rkothiya@redhat.com> + +BUG: 1460657 + +Change-Id: I7d79bceff329db4d525bc8a77ba7ffe41bf53c97 +Signed-off-by: Rinku Kothiya <rkothiya@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/210637 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + glusterfs.spec.in | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 9def416..ed6bdf3 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -650,6 +650,9 @@ Summary: Clustered file-system server + Requires: %{name}%{?_isa} = %{version}-%{release} + Requires: %{name}-cli%{?_isa} = %{version}-%{release} + Requires: %{name}-libs%{?_isa} = %{version}-%{release} ++%if ( 0%{?fedora} && 0%{?fedora} >= 30 || ( 0%{?rhel} && 0%{?rhel} >= 8 ) ) ++Requires: glusterfs-selinux >= 0.1.0-2 ++%endif + # some daemons (like quota) use a fuse-mount, glusterfsd is part of -fuse + Requires: %{name}-fuse%{?_isa} = %{version}-%{release} + # self-heal daemon, rebalance, nfs-server etc. are actually clients +-- +1.8.3.1 + diff --git a/SOURCES/0468-build-Update-the-glusterfs-selinux-version.patch b/SOURCES/0468-build-Update-the-glusterfs-selinux-version.patch new file mode 100644 index 0000000..b4b5ead --- /dev/null +++ b/SOURCES/0468-build-Update-the-glusterfs-selinux-version.patch @@ -0,0 +1,36 @@ +From 4b72f5e7704d480bac869f7a32ac891898bb994f Mon Sep 17 00:00:00 2001 +From: Rinku Kothiya <rkothiya@redhat.com> +Date: Thu, 3 Sep 2020 14:56:27 +0000 +Subject: [PATCH 468/468] build: Update the glusterfs-selinux version + +Updated the glusterfs-selinux version according to +the downstream official version. + +Label: DOWNSTREAM ONLY + +BUG: 1460657 + +Change-Id: I7b8bbf53f71f6f56103042950d8910f0cb63a685 +Signed-off-by: Rinku Kothiya <rkothiya@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/210685 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +--- + glusterfs.spec.in | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index ed6bdf3..30d7162 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -651,7 +651,7 @@ Requires: %{name}%{?_isa} = %{version}-%{release} + Requires: %{name}-cli%{?_isa} = %{version}-%{release} + Requires: %{name}-libs%{?_isa} = %{version}-%{release} + %if ( 0%{?fedora} && 0%{?fedora} >= 30 || ( 0%{?rhel} && 0%{?rhel} >= 8 ) ) +-Requires: glusterfs-selinux >= 0.1.0-2 ++Requires: glusterfs-selinux >= 1.0-1 + %endif + # some daemons (like quota) use a fuse-mount, glusterfsd is part of -fuse + Requires: %{name}-fuse%{?_isa} = %{version}-%{release} +-- +1.8.3.1 + diff --git a/SOURCES/0469-cluster-ec-Don-t-trigger-heal-for-stale-index.patch b/SOURCES/0469-cluster-ec-Don-t-trigger-heal-for-stale-index.patch new file mode 100644 index 0000000..0fadfc9 --- /dev/null +++ b/SOURCES/0469-cluster-ec-Don-t-trigger-heal-for-stale-index.patch @@ -0,0 +1,33 @@ +From 6fed6cfcb26e6ed3c9640c5f889629315bbd83c2 Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K <pkarampu@redhat.com> +Date: Mon, 31 Aug 2020 12:22:05 +0530 +Subject: [PATCH 469/469] cluster/ec: Don't trigger heal for stale index + + > Fixes: #1385 + > Upstream-patch: https://review.gluster.org/c/glusterfs/+/24930 + +BUG: 1785714 +Change-Id: I3609dd2e1f63c4bd6a19d528b935bf5b05443824 +Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/210731 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/cluster/ec/src/ec-heal.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c +index 06bafa5..f6376cd 100644 +--- a/xlators/cluster/ec/src/ec-heal.c ++++ b/xlators/cluster/ec/src/ec-heal.c +@@ -2624,6 +2624,7 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial) + * removed. We need not take lock on this entry to do so as + * xattrop on a brick is atomic. */ + ec_heal_purge_stale_index(frame, ec, loc->inode); ++ goto out; + } else if (need_heal == EC_HEAL_NONEED) { + gf_msg(ec->xl->name, GF_LOG_DEBUG, 0, EC_MSG_HEAL_FAIL, + "Heal is not required for : %s ", uuid_utoa(loc->gfid)); +-- +1.8.3.1 + diff --git a/SOURCES/0470-extras-snap_scheduler-changes-in-gluster-shared-stor.patch b/SOURCES/0470-extras-snap_scheduler-changes-in-gluster-shared-stor.patch new file mode 100644 index 0000000..e26d46a --- /dev/null +++ b/SOURCES/0470-extras-snap_scheduler-changes-in-gluster-shared-stor.patch @@ -0,0 +1,63 @@ +From 8e427716f4e2855093b1a1a0e3a9ec79ebac7faf Mon Sep 17 00:00:00 2001 +From: Shwetha K Acharya <sacharya@redhat.com> +Date: Thu, 10 Sep 2020 13:49:09 +0530 +Subject: [PATCH 470/473] extras/snap_scheduler: changes in + gluster-shared-storage mount path + +The patch https://review.gluster.org/#/c/glusterfs/+/24934/, changes mount point +of gluster_shared_storage from /var/run to /run to address the issue of symlink +at mount path in fstab. +NOTE: mount point /var/run is symlink to /run + +The required changes with respect to gluster_shared_storage mount path are +introduced with this patch in snap_scheduler. + +>Fixes: #1476 +>Change-Id: I9ce88c2f624c6aa5982de04edfee2d0a9f160d62 +>Signed-off-by: Shwetha K Acharya <sacharya@redhat.com> + +backport of https://review.gluster.org/#/c/glusterfs/+/24971/ +BUG: 1873469 +Change-Id: I9ce88c2f624c6aa5982de04edfee2d0a9f160d62 +Signed-off-by: Shwetha K Acharya <sacharya@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/211391 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + extras/snap_scheduler/gcron.py | 4 ++-- + extras/snap_scheduler/snap_scheduler.py | 2 +- + 2 files changed, 3 insertions(+), 3 deletions(-) + +diff --git a/extras/snap_scheduler/gcron.py b/extras/snap_scheduler/gcron.py +index cc16310..0e4df77 100755 +--- a/extras/snap_scheduler/gcron.py ++++ b/extras/snap_scheduler/gcron.py +@@ -19,10 +19,10 @@ import logging.handlers + import fcntl + + +-GCRON_TASKS = "/var/run/gluster/shared_storage/snaps/glusterfs_snap_cron_tasks" ++GCRON_TASKS = "/run/gluster/shared_storage/snaps/glusterfs_snap_cron_tasks" + GCRON_CROND_TASK = "/etc/cron.d/glusterfs_snap_cron_tasks" + GCRON_RELOAD_FLAG = "/var/run/gluster/crond_task_reload_flag" +-LOCK_FILE_DIR = "/var/run/gluster/shared_storage/snaps/lock_files/" ++LOCK_FILE_DIR = "/run/gluster/shared_storage/snaps/lock_files/" + log = logging.getLogger("gcron-logger") + start_time = 0.0 + +diff --git a/extras/snap_scheduler/snap_scheduler.py b/extras/snap_scheduler/snap_scheduler.py +index 5a29d41..e8fcc44 100755 +--- a/extras/snap_scheduler/snap_scheduler.py ++++ b/extras/snap_scheduler/snap_scheduler.py +@@ -67,7 +67,7 @@ except ImportError: + SCRIPT_NAME = "snap_scheduler" + scheduler_enabled = False + log = logging.getLogger(SCRIPT_NAME) +-SHARED_STORAGE_DIR="/var/run/gluster/shared_storage" ++SHARED_STORAGE_DIR="/run/gluster/shared_storage" + GCRON_DISABLED = SHARED_STORAGE_DIR+"/snaps/gcron_disabled" + GCRON_ENABLED = SHARED_STORAGE_DIR+"/snaps/gcron_enabled" + GCRON_TASKS = SHARED_STORAGE_DIR+"/snaps/glusterfs_snap_cron_tasks" +-- +1.8.3.1 + diff --git a/SOURCES/0471-nfs-ganesha-gluster_shared_storage-fails-to-automoun.patch b/SOURCES/0471-nfs-ganesha-gluster_shared_storage-fails-to-automoun.patch new file mode 100644 index 0000000..0ebba37 --- /dev/null +++ b/SOURCES/0471-nfs-ganesha-gluster_shared_storage-fails-to-automoun.patch @@ -0,0 +1,73 @@ +From d23ad767281af85cf07f5c3f63de482d40ee1953 Mon Sep 17 00:00:00 2001 +From: Shwetha K Acharya <sacharya@redhat.com> +Date: Thu, 10 Sep 2020 13:16:12 +0530 +Subject: [PATCH 471/473] nfs-ganesha: gluster_shared_storage fails to + automount on node reboot on rhel 8 + +The patch https://review.gluster.org/#/c/glusterfs/+/24934/, changes mount point +of gluster_shared_storage from /var/run to /run to address the issue of symlink +at mount path in fstab. +NOTE: mount point /var/run is symlink to /run + +The required changes with respect to gluster_shared_storage mount path are +introduced with this patch in nfs-ganesha. + +>Fixes: #1475 +>Change-Id: I9c7677a053e1291f71476d47ba6fa2e729f59625 +>Signed-off-by: Shwetha K Acharya <sacharya@redhat.com> + +backport of https://review.gluster.org/#/c/glusterfs/+/24970/ +BUG: 1873469 +Change-Id: I9c7677a053e1291f71476d47ba6fa2e729f59625 +Signed-off-by: Shwetha K Acharya <sacharya@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/211392 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + extras/ganesha/ocf/ganesha_nfsd | 2 +- + extras/ganesha/scripts/ganesha-ha.sh | 2 +- + extras/hook-scripts/start/post/S31ganesha-start.sh | 2 +- + 3 files changed, 3 insertions(+), 3 deletions(-) + +diff --git a/extras/ganesha/ocf/ganesha_nfsd b/extras/ganesha/ocf/ganesha_nfsd +index 93fc8be..f91e8b6 100644 +--- a/extras/ganesha/ocf/ganesha_nfsd ++++ b/extras/ganesha/ocf/ganesha_nfsd +@@ -36,7 +36,7 @@ else + . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + fi + +-OCF_RESKEY_ha_vol_mnt_default="/var/run/gluster/shared_storage" ++OCF_RESKEY_ha_vol_mnt_default="/run/gluster/shared_storage" + : ${OCF_RESKEY_ha_vol_mnt=${OCF_RESKEY_ha_vol_mnt_default}} + + ganesha_meta_data() { +diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh +index a6814b1..9790a71 100644 +--- a/extras/ganesha/scripts/ganesha-ha.sh ++++ b/extras/ganesha/scripts/ganesha-ha.sh +@@ -24,7 +24,7 @@ GANESHA_HA_SH=$(realpath $0) + HA_NUM_SERVERS=0 + HA_SERVERS="" + HA_VOL_NAME="gluster_shared_storage" +-HA_VOL_MNT="/var/run/gluster/shared_storage" ++HA_VOL_MNT="/run/gluster/shared_storage" + HA_CONFDIR=$HA_VOL_MNT"/nfs-ganesha" + SERVICE_MAN="DISTRO_NOT_FOUND" + +diff --git a/extras/hook-scripts/start/post/S31ganesha-start.sh b/extras/hook-scripts/start/post/S31ganesha-start.sh +index 90ba6bc..7ad6f23 100755 +--- a/extras/hook-scripts/start/post/S31ganesha-start.sh ++++ b/extras/hook-scripts/start/post/S31ganesha-start.sh +@@ -4,7 +4,7 @@ OPTSPEC="volname:,gd-workdir:" + VOL= + declare -i EXPORT_ID + ganesha_key="ganesha.enable" +-GANESHA_DIR="/var/run/gluster/shared_storage/nfs-ganesha" ++GANESHA_DIR="/run/gluster/shared_storage/nfs-ganesha" + CONF1="$GANESHA_DIR/ganesha.conf" + GLUSTERD_WORKDIR= + +-- +1.8.3.1 + diff --git a/SOURCES/0472-geo-rep-gluster_shared_storage-fails-to-automount-on.patch b/SOURCES/0472-geo-rep-gluster_shared_storage-fails-to-automount-on.patch new file mode 100644 index 0000000..79d4d0e --- /dev/null +++ b/SOURCES/0472-geo-rep-gluster_shared_storage-fails-to-automount-on.patch @@ -0,0 +1,98 @@ +From ccd45222c46b91b4d0cd57db9ea8b1515c97ada0 Mon Sep 17 00:00:00 2001 +From: Shwetha K Acharya <sacharya@redhat.com> +Date: Mon, 31 Aug 2020 20:08:39 +0530 +Subject: [PATCH 472/473] geo-rep: gluster_shared_storage fails to automount on + node reboot on rhel 8. + +Issue: On reboot, all the mounts get wiped out. + Only the mounts mentioned in /etc/fstab automatically gets mounted + during boot/reboot. + + But /etc/fstab complains on not getting a canonical path + (it gets path containing a symlink) + This is because the gluster_shared_storage, is mounted to + /var/run which is symlink to /run. This is a general practice + followed by most operating systems. + + [root@ ~]# ls -lsah /var/run + 0 lrwxrwxrwx. 1 root root 6 Jul 22 19:39 /var/run -> ../run + +Fix: Mount gluster_shared_storage on /run. + (Also It is seen that /var/run is mostly + used by old or legacy systems, thus it is a good practice to + update /var/run to /run) + +>fixes: #1459 +>Change-Id: I8c16653be8cd746c84f01abf0eea19284fb97c77 +>Signed-off-by: Shwetha K Acharya <sacharya@redhat.com> + +backport of https://review.gluster.org/#/c/glusterfs/+/24934/ +BUG: 1873469 +Change-Id: I8c16653be8cd746c84f01abf0eea19284fb97c77 +Signed-off-by: Shwetha K Acharya <sacharya@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/211387 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + .../set/post/S32gluster_enable_shared_storage.sh | 18 +++++++++--------- + geo-replication/gsyncd.conf.in | 2 +- + 2 files changed, 10 insertions(+), 10 deletions(-) + +diff --git a/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh b/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh +index 885ed03..3bae37c 100755 +--- a/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh ++++ b/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh +@@ -79,9 +79,9 @@ done + + if [ "$option" == "disable" ]; then + # Unmount the volume on all the nodes +- umount /var/run/gluster/shared_storage +- cat /etc/fstab | grep -v "gluster_shared_storage /var/run/gluster/shared_storage/" > /var/run/gluster/fstab.tmp +- mv /var/run/gluster/fstab.tmp /etc/fstab ++ umount /run/gluster/shared_storage ++ cat /etc/fstab | grep -v "gluster_shared_storage /run/gluster/shared_storage/" > /run/gluster/fstab.tmp ++ mv /run/gluster/fstab.tmp /etc/fstab + fi + + if [ "$is_originator" == 1 ]; then +@@ -105,7 +105,7 @@ function check_volume_status() + } + + mount_cmd="mount -t glusterfs $local_node_hostname:/gluster_shared_storage \ +- /var/run/gluster/shared_storage" ++ /run/gluster/shared_storage" + + if [ "$option" == "enable" ]; then + retry=0; +@@ -120,10 +120,10 @@ if [ "$option" == "enable" ]; then + status=$(check_volume_status) + done + # Mount the volume on all the nodes +- umount /var/run/gluster/shared_storage +- mkdir -p /var/run/gluster/shared_storage ++ umount /run/gluster/shared_storage ++ mkdir -p /run/gluster/shared_storage + $mount_cmd +- cp /etc/fstab /var/run/gluster/fstab.tmp +- echo "$local_node_hostname:/gluster_shared_storage /var/run/gluster/shared_storage/ glusterfs defaults 0 0" >> /var/run/gluster/fstab.tmp +- mv /var/run/gluster/fstab.tmp /etc/fstab ++ cp /etc/fstab /run/gluster/fstab.tmp ++ echo "$local_node_hostname:/gluster_shared_storage /run/gluster/shared_storage/ glusterfs defaults 0 0" >> /run/gluster/fstab.tmp ++ mv /run/gluster/fstab.tmp /etc/fstab + fi +diff --git a/geo-replication/gsyncd.conf.in b/geo-replication/gsyncd.conf.in +index 11e57fd..9688c79 100644 +--- a/geo-replication/gsyncd.conf.in ++++ b/geo-replication/gsyncd.conf.in +@@ -123,7 +123,7 @@ type=bool + help=Use this to set Active Passive mode to meta-volume. + + [meta-volume-mnt] +-value=/var/run/gluster/shared_storage ++value=/run/gluster/shared_storage + help=Meta Volume or Shared Volume mount path + + [allow-network] +-- +1.8.3.1 + diff --git a/SOURCES/0473-glusterd-Fix-Add-brick-with-increasing-replica-count.patch b/SOURCES/0473-glusterd-Fix-Add-brick-with-increasing-replica-count.patch new file mode 100644 index 0000000..0629fa7 --- /dev/null +++ b/SOURCES/0473-glusterd-Fix-Add-brick-with-increasing-replica-count.patch @@ -0,0 +1,75 @@ +From 80f1b3aedcde02ae25b341519857ba9a5b2fa722 Mon Sep 17 00:00:00 2001 +From: Sheetal Pamecha <spamecha@redhat.com> +Date: Thu, 24 Sep 2020 19:43:29 +0530 +Subject: [PATCH 473/473] glusterd: Fix Add-brick with increasing replica count + failure + +Problem: add-brick operation fails with multiple bricks on same +server error when replica count is increased. + +This was happening because of extra runs in a loop to compare +hostnames and if bricks supplied were less than "replica" count, +the bricks will get compared to itself resulting in above error. + +>Upstream-patch: https://review.gluster.org/#/c/glusterfs/+/25029 +>Fixes: #1508 + +BUG: 1881823 +Change-Id: I8668e964340b7bf59728bb838525d2db062197ed +Signed-off-by: Sheetal Pamecha <spamecha@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/213064 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/bugs/glusterd/brick-order-check-add-brick.t | 21 +++++++++++++++++++++ + xlators/mgmt/glusterd/src/glusterd-utils.c | 4 ++++ + 2 files changed, 25 insertions(+) + +diff --git a/tests/bugs/glusterd/brick-order-check-add-brick.t b/tests/bugs/glusterd/brick-order-check-add-brick.t +index 29f0ed1..0be31da 100644 +--- a/tests/bugs/glusterd/brick-order-check-add-brick.t ++++ b/tests/bugs/glusterd/brick-order-check-add-brick.t +@@ -37,4 +37,25 @@ EXPECT '3 x 3 = 9' volinfo_field $V0 'Number of Bricks' + TEST $CLI_1 volume add-brick $V0 $H1:$L1/${V0}_3 $H1:$L1/${V0}_4 $H1:$L1/${V0}_5 force + EXPECT '4 x 3 = 12' volinfo_field $V0 'Number of Bricks' + ++TEST $CLI_1 volume stop $V0 ++TEST $CLI_1 volume delete $V0 ++ ++TEST $CLI_1 volume create $V0 replica 2 $H1:$L1/${V0}1 $H2:$L2/${V0}1 ++EXPECT '1 x 2 = 2' volinfo_field $V0 'Number of Bricks' ++EXPECT 'Created' volinfo_field $V0 'Status' ++ ++TEST $CLI_1 volume start $V0 ++EXPECT 'Started' volinfo_field $V0 'Status' ++ ++#Add-brick with Increasing replica count ++TEST $CLI_1 volume add-brick $V0 replica 3 $H3:$L3/${V0}1 ++EXPECT '1 x 3 = 3' volinfo_field $V0 'Number of Bricks' ++ ++#Add-brick with Increasing replica count from same host should fail ++TEST ! $CLI_1 volume add-brick $V0 replica 5 $H1:$L1/${V0}2 $H1:$L1/${V0}3 ++ ++#adding multiple bricks from same host should fail the brick order check ++TEST ! $CLI_1 volume add-brick $V0 replica 3 $H1:$L1/${V0}{4..6} $H2:$L2/${V0}{7..9} ++EXPECT '1 x 3 = 3' volinfo_field $V0 'Number of Bricks' ++ + cleanup +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index 545e688..d25fc8a 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -14908,6 +14908,10 @@ glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type, + i = 0; + ai_list_tmp1 = cds_list_entry(ai_list->list.next, addrinfo_list_t, list); + ++ if (brick_count < sub_count) { ++ sub_count = brick_count; ++ } ++ + /* Check for bad brick order */ + while (i < brick_count) { + ++i; +-- +1.8.3.1 + diff --git a/SOURCES/0474-features-locks-posixlk-clear-lock-should-set-error-a.patch b/SOURCES/0474-features-locks-posixlk-clear-lock-should-set-error-a.patch new file mode 100644 index 0000000..034a2a2 --- /dev/null +++ b/SOURCES/0474-features-locks-posixlk-clear-lock-should-set-error-a.patch @@ -0,0 +1,49 @@ +From 3612b3a46c33d19bb7d4aee6eb6625d8d903d459 Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K <pkarampu@redhat.com> +Date: Wed, 17 Jun 2020 10:44:37 +0530 +Subject: [PATCH 474/478] features/locks: posixlk-clear-lock should set error + as EINTR + +Problem: +fuse on receiving interrupt for setlk sends clear-lock "fop" +using virtual-getxattr. At the moment blocked locks which are +cleared return EAGAIN errno as opposed to EINTR errno + +Fix: +Return EINTR errno. + +Upstream: +> Reviewed-on: https://review.gluster.org/24587 +> Updates: #1310 +> Change-Id: I47de0fcaec370b267f2f5f89deeb37e1b9c0ee9b +> Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> + +BUG: 1821743 +Change-Id: Id8301ce6e21c009949e88db5904d8b6ecc278f66 +Signed-off-by: Csaba Henk <csaba@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/216157 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/features/locks/src/clear.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/xlators/features/locks/src/clear.c b/xlators/features/locks/src/clear.c +index 116aed6..ab1eac6 100644 +--- a/xlators/features/locks/src/clear.c ++++ b/xlators/features/locks/src/clear.c +@@ -181,9 +181,9 @@ clrlk_clear_posixlk(xlator_t *this, pl_inode_t *pl_inode, clrlk_args *args, + if (plock->blocked) { + bcount++; + pl_trace_out(this, plock->frame, NULL, NULL, F_SETLKW, +- &plock->user_flock, -1, EAGAIN, NULL); ++ &plock->user_flock, -1, EINTR, NULL); + +- STACK_UNWIND_STRICT(lk, plock->frame, -1, EAGAIN, ++ STACK_UNWIND_STRICT(lk, plock->frame, -1, EINTR, + &plock->user_flock, NULL); + + } else { +-- +1.8.3.1 + diff --git a/SOURCES/0475-fuse-lock-interrupt-fix-flock_interrupt.t.patch b/SOURCES/0475-fuse-lock-interrupt-fix-flock_interrupt.t.patch new file mode 100644 index 0000000..24a62b3 --- /dev/null +++ b/SOURCES/0475-fuse-lock-interrupt-fix-flock_interrupt.t.patch @@ -0,0 +1,46 @@ +From 47d8c316f622850d060af90d1d939528ace5607a Mon Sep 17 00:00:00 2001 +From: Csaba Henk <csaba@redhat.com> +Date: Thu, 14 Feb 2019 02:01:38 +0100 +Subject: [PATCH 475/478] fuse lock interrupt: fix flock_interrupt.t + +Upstream: +> Reviewed-on: https://review.gluster.org/22213 +> updates: bz#1193929 +> Change-Id: I347de62755100cd69e3cf341434767ae23fd1ba4 +> Signed-off-by: Csaba Henk <csaba@redhat.com> + +BUG: 1821743 +Change-Id: I0088f804bca215152e7ca2c490402c11f7b5333a +Signed-off-by: Csaba Henk <csaba@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/216158 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/features/flock_interrupt.t | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/tests/features/flock_interrupt.t b/tests/features/flock_interrupt.t +index 8603b65..964a4bc 100644 +--- a/tests/features/flock_interrupt.t ++++ b/tests/features/flock_interrupt.t +@@ -22,12 +22,12 @@ EXPECT 'Started' volinfo_field $V0 'Status'; + TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; + TEST touch $M0/testfile; + +-function flock_interrupt { +- flock $MO/testfile sleep 3 & flock -w 1 $M0/testfile true; +- echo ok; +-} ++echo > got_lock ++flock $M0/testfile sleep 6 & { sleep 0.3; flock -w 2 $M0/testfile true; echo ok > got_lock; } & + +-EXPECT_WITHIN 2 ok flock_interrupt; ++EXPECT_WITHIN 4 ok cat got_lock; + + ## Finish up ++sleep 7; ++rm -f got_lock; + cleanup; +-- +1.8.3.1 + diff --git a/SOURCES/0476-mount-fuse-use-cookies-to-get-fuse-interrupt-record-.patch b/SOURCES/0476-mount-fuse-use-cookies-to-get-fuse-interrupt-record-.patch new file mode 100644 index 0000000..6c9d736 --- /dev/null +++ b/SOURCES/0476-mount-fuse-use-cookies-to-get-fuse-interrupt-record-.patch @@ -0,0 +1,114 @@ +From 40519185067d891f06818c574301ea1af4b36479 Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K <pkarampu@redhat.com> +Date: Wed, 17 Jun 2020 10:45:19 +0530 +Subject: [PATCH 476/478] mount/fuse: use cookies to get fuse-interrupt-record + instead of xdata + +Problem: +On executing tests/features/flock_interrupt.t the following error log +appears +[2020-06-16 11:51:54.631072 +0000] E +[fuse-bridge.c:4791:fuse_setlk_interrupt_handler_cbk] 0-glusterfs-fuse: +interrupt record not found + +This happens because fuse-interrupt-record is never sent on the wire by +getxattr fop and there is no guarantee that in the cbk it will be +available in case of failures. + +Fix: +wind getxattr fop with fuse-interrupt-record as cookie and recover it +in the cbk + +Upstream: +> Reviewed-on: https://review.gluster.org/24588 +> Fixes: #1310 +> Change-Id: I4cfff154321a449114fc26e9440db0f08e5c7daa +> Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> + +BUG: 1821743 +Change-Id: If9576801654d4d743bd66ae90ca259c4d34746a7 +Signed-off-by: Csaba Henk <csaba@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/216159 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/features/flock_interrupt.t | 1 - + xlators/mount/fuse/src/fuse-bridge.c | 28 +++++++--------------------- + 2 files changed, 7 insertions(+), 22 deletions(-) + +diff --git a/tests/features/flock_interrupt.t b/tests/features/flock_interrupt.t +index 964a4bc..b8717e3 100644 +--- a/tests/features/flock_interrupt.t ++++ b/tests/features/flock_interrupt.t +@@ -28,6 +28,5 @@ flock $M0/testfile sleep 6 & { sleep 0.3; flock -w 2 $M0/testfile true; echo ok + EXPECT_WITHIN 4 ok cat got_lock; + + ## Finish up +-sleep 7; + rm -f got_lock; + cleanup; +diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c +index f61fa39..1bddac2 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.c ++++ b/xlators/mount/fuse/src/fuse-bridge.c +@@ -4768,16 +4768,8 @@ fuse_setlk_interrupt_handler_cbk(call_frame_t *frame, void *cookie, + int32_t op_errno, dict_t *dict, dict_t *xdata) + { + fuse_interrupt_state_t intstat = INTERRUPT_NONE; +- fuse_interrupt_record_t *fir; ++ fuse_interrupt_record_t *fir = cookie; + fuse_state_t *state = NULL; +- int ret = 0; +- +- ret = dict_get_bin(xdata, "fuse-interrupt-record", (void **)&fir); +- if (ret < 0) { +- gf_log("glusterfs-fuse", GF_LOG_ERROR, "interrupt record not found"); +- +- goto out; +- } + + intstat = op_ret >= 0 ? INTERRUPT_HANDLED : INTERRUPT_SQUELCHED; + +@@ -4789,7 +4781,6 @@ fuse_setlk_interrupt_handler_cbk(call_frame_t *frame, void *cookie, + GF_FREE(state); + } + +-out: + STACK_DESTROY(frame->root); + + return 0; +@@ -4827,9 +4818,10 @@ fuse_setlk_interrupt_handler(xlator_t *this, fuse_interrupt_record_t *fir) + frame->op = GF_FOP_GETXATTR; + state->name = xattr_name; + +- STACK_WIND(frame, fuse_setlk_interrupt_handler_cbk, state->active_subvol, +- state->active_subvol->fops->fgetxattr, state->fd, xattr_name, +- state->xdata); ++ STACK_WIND_COOKIE(frame, fuse_setlk_interrupt_handler_cbk, fir, ++ state->active_subvol, ++ state->active_subvol->fops->fgetxattr, state->fd, ++ xattr_name, state->xdata); + + return; + +@@ -4852,15 +4844,9 @@ fuse_setlk_resume(fuse_state_t *state) + fir = fuse_interrupt_record_new(state->finh, fuse_setlk_interrupt_handler); + state_clone = gf_memdup(state, sizeof(*state)); + if (state_clone) { +- /* +- * Calling this allocator with fir casted to (char *) seems like +- * an abuse of this API, but in fact the API is stupid to assume +- * a (char *) argument (in the funcion it's casted to (void *) +- * anyway). +- */ +- state_clone->xdata = dict_for_key_value( +- "fuse-interrupt-record", (char *)fir, sizeof(*fir), _gf_true); ++ state_clone->xdata = dict_new(); + } ++ + if (!fir || !state_clone || !state_clone->xdata) { + if (fir) { + GF_FREE(fir); +-- +1.8.3.1 + diff --git a/SOURCES/0477-glusterd-snapshot-Snapshot-prevalidation-failure-not.patch b/SOURCES/0477-glusterd-snapshot-Snapshot-prevalidation-failure-not.patch new file mode 100644 index 0000000..c604ccd --- /dev/null +++ b/SOURCES/0477-glusterd-snapshot-Snapshot-prevalidation-failure-not.patch @@ -0,0 +1,51 @@ +From 3d50207b346cb5d95af94aa010ebd1ec3e795554 Mon Sep 17 00:00:00 2001 +From: srijan-sivakumar <ssivakum@redhat.com> +Date: Wed, 4 Nov 2020 11:44:51 +0530 +Subject: [PATCH 477/478] glusterd/snapshot: Snapshot prevalidation failure not + failing. + +The value of `ret` is to be set to `-1` to indicate failure +or else the prevalidation which is supposed to be a failure +as the snapshot isn't even activated for cloning will move +to next stage. + +Label: DOWNSTREAM ONLY +BUG: 1837926 + +Change-Id: I95122c3a261332630efa00033a1892a8f95fc00b +Signed-off-by: srijan-sivakumar <ssivakum@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/216920 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Shwetha Acharya <sacharya@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-snapshot.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot.c b/xlators/mgmt/glusterd/src/glusterd-snapshot.c +index 5b8ae97..ee3cea0 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-snapshot.c ++++ b/xlators/mgmt/glusterd/src/glusterd-snapshot.c +@@ -2298,8 +2298,8 @@ glusterd_snapshot_clone_prevalidate(dict_t *dict, char **op_errstr, + goto out; + } + +- + if (!glusterd_is_volume_started(snap_vol)) { ++ ret = -1; + snprintf(err_str, sizeof(err_str), + "Snapshot %s is " + "not activated", +@@ -9361,7 +9361,8 @@ glusterd_handle_snapshot_fn(rpcsvc_request_t *req) + "for a snapshot"); + op_errno = EG_OPNOTSUP; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_UNSUPPORTED_VERSION, +- "%s (%d < %d)", err_str, conf->op_version, GD_OP_VERSION_RHS_3_0); ++ "%s (%d < %d)", err_str, conf->op_version, ++ GD_OP_VERSION_RHS_3_0); + ret = -1; + goto out; + } +-- +1.8.3.1 + diff --git a/SOURCES/0478-DHT-Fixing-rebalance-failure-on-issuing-stop-command.patch b/SOURCES/0478-DHT-Fixing-rebalance-failure-on-issuing-stop-command.patch new file mode 100644 index 0000000..596fe2b --- /dev/null +++ b/SOURCES/0478-DHT-Fixing-rebalance-failure-on-issuing-stop-command.patch @@ -0,0 +1,119 @@ +From e772bef5631017145cd0270d72a9ada1378e022a Mon Sep 17 00:00:00 2001 +From: Barak Sason Rofman <bsasonro@redhat.com> +Date: Fri, 30 Oct 2020 08:27:47 +0200 +Subject: [PATCH 478/478] DHT - Fixing rebalance failure on issuing stop + command + +Issuing a stop command for an ongoing rebalance process results in an error. +This issue was brought up in https://bugzilla.redhat.com/1286171 and a patch +(https://review.gluster.org/24103/) was submitted to resolve the issue. + +However the submitted patch resolved only part of the +problem by reducing the number of log messages that were printed (since +rebalnace is currently a recursive process, an error message was printed +for every directory) but didn't fully resolve the root cause for the +failure. + +This patch fixes the issue by modifying the code-path which handles the +termination of the rebalance process by issuing a stop command. + +Upstream: +> Reviewed-on: https://github.com/gluster/glusterfs/pull/1628 +> fixes: #1627 +> Change-Id: I604f2b0f8b1ccb1026b8425a14200bbd1dc5bd03 +> Signed-off-by: Barak Sason Rofman bsasonro@redhat.com + +BUG: 1286171 +Change-Id: I604f2b0f8b1ccb1026b8425a14200bbd1dc5bd03 +Signed-off-by: Barak Sason Rofman <bsasonro@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/216896 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/cluster/dht/src/dht-rebalance.c | 22 ++++++++++++---------- + 1 file changed, 12 insertions(+), 10 deletions(-) + +diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c +index abc10fc..d49a719 100644 +--- a/xlators/cluster/dht/src/dht-rebalance.c ++++ b/xlators/cluster/dht/src/dht-rebalance.c +@@ -3113,12 +3113,10 @@ int static gf_defrag_get_entry(xlator_t *this, int i, + struct dht_container *tmp_container = NULL; + + if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { +- ret = -1; + goto out; + } + + if (dir_dfmeta->offset_var[i].readdir_done == 1) { +- ret = 0; + goto out; + } + +@@ -3135,7 +3133,6 @@ int static gf_defrag_get_entry(xlator_t *this, int i, + &(dir_dfmeta->equeue[i]), xattr_req, NULL); + if (ret == 0) { + dir_dfmeta->offset_var[i].readdir_done = 1; +- ret = 0; + goto out; + } + +@@ -3161,7 +3158,6 @@ int static gf_defrag_get_entry(xlator_t *this, int i, + + while (1) { + if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { +- ret = -1; + goto out; + } + +@@ -3273,12 +3269,14 @@ int static gf_defrag_get_entry(xlator_t *this, int i, + } + + out: +- if (ret == 0) { +- *container = tmp_container; +- } else { +- if (tmp_container) { ++ if (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED) { ++ if (ret == 0) { ++ *container = tmp_container; ++ } else { + gf_defrag_free_container(tmp_container); + } ++ } else { ++ gf_defrag_free_container(tmp_container); + } + + return ret; +@@ -3487,7 +3485,7 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, + migrate_data, dir_dfmeta, xattr_req, + &should_commit_hash, perrno); + +- if (defrag->defrag_status == GF_DEFRAG_STATUS_STOPPED) { ++ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { + goto out; + } + +@@ -3947,7 +3945,7 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, + ret = gf_defrag_fix_layout(this, defrag, &entry_loc, fix_layout, + migrate_data); + +- if (defrag->defrag_status == GF_DEFRAG_STATUS_STOPPED) { ++ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { + goto out; + } + +@@ -4015,6 +4013,10 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, + (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX)) { + ret = gf_defrag_process_dir(this, defrag, loc, migrate_data, &perrno); + ++ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { ++ goto out; ++ } ++ + if (ret && (ret != 2)) { + if (perrno == ENOENT || perrno == ESTALE) { + ret = 0; +-- +1.8.3.1 + diff --git a/SOURCES/0479-ganesha-ha-revised-regex-exprs-for-status.patch b/SOURCES/0479-ganesha-ha-revised-regex-exprs-for-status.patch new file mode 100644 index 0000000..8bbdf9d --- /dev/null +++ b/SOURCES/0479-ganesha-ha-revised-regex-exprs-for-status.patch @@ -0,0 +1,53 @@ +From 9036c9f0fd081c83c5c4fcd1ecba858421442777 Mon Sep 17 00:00:00 2001 +From: "Kaleb S. KEITHLEY" <kkeithle@redhat.com> +Date: Tue, 10 Nov 2020 07:39:14 -0500 +Subject: [PATCH 479/479] ganesha-ha: revised regex exprs for --status + +better whitespace in regex + +This has worked for years, but somehow no longer works on rhel8 + +> Updates: #1000 +> Change-Id: I2c1a3537573d125608334772ba1a263c55407dd4 +> Signed-off-by: Kaleb S. KEITHLEY <kkeithle@redhat.com> +> https://github.com/gluster/glusterfs/commit/4026fe9a956238d8e4785cf39c3b7290eae90f03 + +BUG: 1895301 +Change-Id: I2c1a3537573d125608334772ba1a263c55407dd4 +Signed-off-by: Kaleb S. KEITHLEY <kkeithle@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/217480 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +--- + extras/ganesha/scripts/ganesha-ha.sh | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh +index 9790a71..491c61d 100644 +--- a/extras/ganesha/scripts/ganesha-ha.sh ++++ b/extras/ganesha/scripts/ganesha-ha.sh +@@ -948,18 +948,18 @@ status() + # check if the VIP and port block/unblock RAs are on the expected nodes + for n in ${nodes[*]}; do + +- grep -E -x "${n}-nfs_block \(ocf::heartbeat:portblock\): Started ${n}" > /dev/null 2>&1 ${scratch} ++ grep -E -x "${n}-nfs_block +\(ocf::heartbeat:portblock\): +Started ${n}" > /dev/null 2>&1 ${scratch} + result=$? + ((healthy+=${result})) +- grep -E -x "${n}-cluster_ip-1 \(ocf::heartbeat:IPaddr\): Started ${n}" > /dev/null 2>&1 ${scratch} ++ grep -E -x "${n}-cluster_ip-1 +\(ocf::heartbeat:IPaddr\): +Started ${n}" > /dev/null 2>&1 ${scratch} + result=$? + ((healthy+=${result})) +- grep -E -x "${n}-nfs_unblock \(ocf::heartbeat:portblock\): Started ${n}" > /dev/null 2>&1 ${scratch} ++ grep -E -x "${n}-nfs_unblock +\(ocf::heartbeat:portblock\): +Started ${n}" > /dev/null 2>&1 ${scratch} + result=$? + ((healthy+=${result})) + done + +- grep -E "\):\ Stopped|FAILED" > /dev/null 2>&1 ${scratch} ++ grep -E "\): +Stopped|FAILED" > /dev/null 2>&1 ${scratch} + result=$? + + if [ ${result} -eq 0 ]; then +-- +1.8.3.1 + diff --git a/SOURCES/0480-DHT-Rebalance-Ensure-Rebalance-reports-status-only-o.patch b/SOURCES/0480-DHT-Rebalance-Ensure-Rebalance-reports-status-only-o.patch new file mode 100644 index 0000000..31c404f --- /dev/null +++ b/SOURCES/0480-DHT-Rebalance-Ensure-Rebalance-reports-status-only-o.patch @@ -0,0 +1,255 @@ +From 759c12fc016a6399bb179aa0f930602c87d1e0f8 Mon Sep 17 00:00:00 2001 +From: Barak Sason Rofman <bsasonro@redhat.com> +Date: Tue, 24 Nov 2020 12:56:10 +0200 +Subject: [PATCH 480/480] DHT/Rebalance - Ensure Rebalance reports status only + once upon stopping + +Upon issuing rebalance stop command, the status of rebalance is being +logged twice to the log file, which can sometime result in an +inconsistent reports (one report states status stopped, while the other +may report something else). + +This fix ensures rebalance reports it's status only once and that the +correct status is being reported. + +Upstream: +> Reviewed-on: https://github.com/gluster/glusterfs/pull/1783 +> fixes: #1782 +> Change-Id: Id3206edfad33b3db60e9df8e95a519928dc7cb37 +> Signed-off-by: Barak Sason Rofman bsasonro@redhat.com + +BUG: 1286171 +Change-Id: Id3206edfad33b3db60e9df8e95a519928dc7cb37 +Signed-off-by: Barak Sason Rofman <bsasonro@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/218953 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Csaba Henk <chenk@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/bugs/distribute/bug-1286171.t | 75 +++++++++++++++++++++++++++++++++ + xlators/cluster/dht/src/dht-common.c | 2 +- + xlators/cluster/dht/src/dht-common.h | 2 +- + xlators/cluster/dht/src/dht-rebalance.c | 63 ++++++++++++++------------- + 4 files changed, 108 insertions(+), 34 deletions(-) + create mode 100644 tests/bugs/distribute/bug-1286171.t + +diff --git a/tests/bugs/distribute/bug-1286171.t b/tests/bugs/distribute/bug-1286171.t +new file mode 100644 +index 0000000..a2ca36f +--- /dev/null ++++ b/tests/bugs/distribute/bug-1286171.t +@@ -0,0 +1,75 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../cluster.rc ++. $(dirname $0)/../../volume.rc ++ ++# Initialize ++#------------------------------------------------------------ ++cleanup; ++ ++volname=bug-1286171 ++ ++# Start glusterd ++TEST glusterd; ++TEST pidof glusterd; ++TEST $CLI volume info; ++ ++# Create a volume ++TEST $CLI volume create $volname $H0:$B0/${volname}{1,2} ++ ++# Verify volume creation ++EXPECT "$volname" volinfo_field $volname 'Volume Name'; ++EXPECT 'Created' volinfo_field $volname 'Status'; ++ ++# Start volume and verify successful start ++TEST $CLI volume start $volname; ++EXPECT 'Started' volinfo_field $volname 'Status'; ++TEST glusterfs --volfile-id=$volname --volfile-server=$H0 --entry-timeout=0 $M0; ++#------------------------------------------------------------ ++ ++# Create a nested dir structure and some file under MP ++cd $M0; ++for i in {1..5} ++do ++ mkdir dir$i ++ cd dir$i ++ for j in {1..5} ++ do ++ mkdir dir$i$j ++ cd dir$i$j ++ for k in {1..5} ++ do ++ mkdir dir$i$j$k ++ cd dir$i$j$k ++ touch {1..300} ++ cd .. ++ done ++ touch {1..300} ++ cd .. ++ done ++ touch {1..300} ++ cd .. ++done ++touch {1..300} ++ ++# Add-brick and start rebalance ++TEST $CLI volume add-brick $volname $H0:$B0/${volname}4; ++TEST $CLI volume rebalance $volname start; ++ ++# Let rebalance run for a while ++sleep 5 ++ ++# Stop rebalance ++TEST $CLI volume rebalance $volname stop; ++ ++# Allow rebalance to stop ++sleep 5 ++ ++# Examine the logfile for errors ++cd /var/log/glusterfs; ++failures=`grep "failures:" ${volname}-rebalance.log | tail -1 | sed 's/.*failures: //; s/,.*//'`; ++ ++TEST [ $failures == 0 ]; ++ ++cleanup; +diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c +index 23cc80c..4db89df 100644 +--- a/xlators/cluster/dht/src/dht-common.c ++++ b/xlators/cluster/dht/src/dht-common.c +@@ -10969,7 +10969,7 @@ dht_notify(xlator_t *this, int event, void *data, ...) + if ((cmd == GF_DEFRAG_CMD_STATUS) || + (cmd == GF_DEFRAG_CMD_STATUS_TIER) || + (cmd == GF_DEFRAG_CMD_DETACH_STATUS)) +- gf_defrag_status_get(conf, output); ++ gf_defrag_status_get(conf, output, _gf_false); + else if (cmd == GF_DEFRAG_CMD_START_DETACH_TIER) + gf_defrag_start_detach_tier(defrag); + else if (cmd == GF_DEFRAG_CMD_DETACH_START) +diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h +index 9ec5b51..92f1b89 100644 +--- a/xlators/cluster/dht/src/dht-common.h ++++ b/xlators/cluster/dht/src/dht-common.h +@@ -1252,7 +1252,7 @@ dht_fxattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata); + + int +-gf_defrag_status_get(dht_conf_t *conf, dict_t *dict); ++gf_defrag_status_get(dht_conf_t *conf, dict_t *dict, gf_boolean_t log_status); + + void + gf_defrag_set_pause_state(gf_tier_conf_t *tier_conf, tier_pause_state_t state); +diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c +index d49a719..16ac16c 100644 +--- a/xlators/cluster/dht/src/dht-rebalance.c ++++ b/xlators/cluster/dht/src/dht-rebalance.c +@@ -2720,7 +2720,6 @@ gf_defrag_migrate_single_file(void *opaque) + iatt_ptr = &entry->d_stat; + + if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { +- ret = -1; + goto out; + } + +@@ -3833,7 +3832,6 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, + list_for_each_entry_safe(entry, tmp, &entries.list, list) + { + if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { +- ret = 1; + goto out; + } + +@@ -4863,7 +4861,7 @@ out: + LOCK(&defrag->lock); + { + status = dict_new(); +- gf_defrag_status_get(conf, status); ++ gf_defrag_status_get(conf, status, _gf_true); + if (ctx && ctx->notify) + ctx->notify(GF_EN_DEFRAG_STATUS, status); + if (status) +@@ -4998,7 +4996,7 @@ out: + } + + int +-gf_defrag_status_get(dht_conf_t *conf, dict_t *dict) ++gf_defrag_status_get(dht_conf_t *conf, dict_t *dict, gf_boolean_t log_status) + { + int ret = 0; + uint64_t files = 0; +@@ -5095,34 +5093,35 @@ gf_defrag_status_get(dht_conf_t *conf, dict_t *dict) + gf_log(THIS->name, GF_LOG_WARNING, "failed to set time-left"); + + log: +- switch (defrag->defrag_status) { +- case GF_DEFRAG_STATUS_NOT_STARTED: +- status = "not started"; +- break; +- case GF_DEFRAG_STATUS_STARTED: +- status = "in progress"; +- break; +- case GF_DEFRAG_STATUS_STOPPED: +- status = "stopped"; +- break; +- case GF_DEFRAG_STATUS_COMPLETE: +- status = "completed"; +- break; +- case GF_DEFRAG_STATUS_FAILED: +- status = "failed"; +- break; +- default: +- break; +- } ++ if (log_status) { ++ switch (defrag->defrag_status) { ++ case GF_DEFRAG_STATUS_NOT_STARTED: ++ status = "not started"; ++ break; ++ case GF_DEFRAG_STATUS_STARTED: ++ status = "in progress"; ++ break; ++ case GF_DEFRAG_STATUS_STOPPED: ++ status = "stopped"; ++ break; ++ case GF_DEFRAG_STATUS_COMPLETE: ++ status = "completed"; ++ break; ++ case GF_DEFRAG_STATUS_FAILED: ++ status = "failed"; ++ break; ++ default: ++ break; ++ } + +- gf_msg(THIS->name, GF_LOG_INFO, 0, DHT_MSG_REBALANCE_STATUS, +- "Rebalance is %s. Time taken is %.2f secs", status, elapsed); +- gf_msg(THIS->name, GF_LOG_INFO, 0, DHT_MSG_REBALANCE_STATUS, +- "Files migrated: %" PRIu64 ", size: %" PRIu64 ", lookups: %" PRIu64 +- ", failures: %" PRIu64 +- ", skipped: " +- "%" PRIu64, +- files, size, lookup, failures, skipped); ++ gf_msg("DHT", GF_LOG_INFO, 0, DHT_MSG_REBALANCE_STATUS, ++ "Rebalance is %s. Time taken is %.2f secs " ++ "Files migrated: %" PRIu64 ", size: %" PRIu64 ++ ", lookups: %" PRIu64 ", failures: %" PRIu64 ++ ", skipped: " ++ "%" PRIu64, ++ status, elapsed, files, size, lookup, failures, skipped); ++ } + out: + return 0; + } +@@ -5299,7 +5298,7 @@ gf_defrag_stop(dht_conf_t *conf, gf_defrag_status_t status, dict_t *output) + defrag->defrag_status = status; + + if (output) +- gf_defrag_status_get(conf, output); ++ gf_defrag_status_get(conf, output, _gf_false); + ret = 0; + out: + gf_msg_debug("", 0, "Returning %d", ret); +-- +1.8.3.1 + diff --git a/SOURCES/0481-RHGS-3.5.3-rebuild-to-ship-with-RHEL.patch b/SOURCES/0481-RHGS-3.5.3-rebuild-to-ship-with-RHEL.patch new file mode 100644 index 0000000..dd9b0ab --- /dev/null +++ b/SOURCES/0481-RHGS-3.5.3-rebuild-to-ship-with-RHEL.patch @@ -0,0 +1,33 @@ +From 346aa7cbc34b9bbbaca45180215a4d9ffd5055df Mon Sep 17 00:00:00 2001 +From: Rinku Kothiya <rkothiya@redhat.com> +Date: Fri, 19 Feb 2021 06:19:07 +0000 +Subject: [PATCH 481/481] RHGS-3.5.3 rebuild to ship with RHEL. + +Label: DOWNSTREAM ONLY +BUG: 1930561 + +Change-Id: I9c7f30cc6bc616344b27072bfde056c7bba1e143 +Signed-off-by: Rinku Kothiya <rkothiya@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/228413 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + glusterfs.spec.in | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 30d7162..52f9b40 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -1983,6 +1983,8 @@ fi + %endif + + %changelog ++* Fri Feb 19 2021 Rinku Kothiya <rkothiya@redhat.com> ++- Build RGHS clients for RHEL (#1930561) + + * Mon May 11 2020 Sunny Kumar <sunkumar@redhat.com> + - added requires policycoreutils-python-utils on rhel8 for geo-replication +-- +1.8.3.1 + diff --git a/SPECS/glusterfs.spec b/SPECS/glusterfs.spec index 73b0bba..905084f 100644 --- a/SPECS/glusterfs.spec +++ b/SPECS/glusterfs.spec @@ -79,6 +79,11 @@ # rpmbuild -ta glusterfs-6.0.tar.gz --without rdma %{?_without_rdma:%global _without_rdma --disable-ibverbs} +# No RDMA Support on 32-bit ARM +%ifarch armv7hl +%global _without_rdma --disable-ibverbs +%endif + # server # if you wish to build rpms without server components, compile like this # rpmbuild -ta glusterfs-6.0.tar.gz --without server @@ -232,7 +237,8 @@ Release: 0.1%{?prereltag:.%{prereltag}}%{?dist} %else Name: glusterfs Version: 6.0 -Release: 37%{?dist} +Release: 49.1%{?dist} +ExcludeArch: i686 %endif License: GPLv2 or LGPLv3+ URL: http://docs.gluster.org/ @@ -692,6 +698,104 @@ Patch0380: 0380-features-shard-Aggregate-size-block-count-in-iatt-be.patch Patch0381: 0381-dht-add-null-check-in-gf_defrag_free_dir_dfmeta.patch Patch0382: 0382-features-shard-Aggregate-file-size-block-count-befor.patch Patch0383: 0383-common-ha-ganesha-ha.sh-bad-test-for-rhel-centos-for.patch +Patch0384: 0384-Update-rfc.sh-to-rhgs-3.5.3.patch +Patch0385: 0385-glusterd-start-glusterd-automatically-on-abnormal-sh.patch +Patch0386: 0386-glusterd-increase-the-StartLimitBurst.patch +Patch0387: 0387-To-fix-readdir-ahead-memory-leak.patch +Patch0388: 0388-rpc-Cleanup-SSL-specific-data-at-the-time-of-freeing.patch +Patch0389: 0389-posix-Avoid-diskpace-error-in-case-of-overwriting-th.patch +Patch0390: 0390-glusterd-deafult-options-after-volume-reset.patch +Patch0391: 0391-glusterd-unlink-the-file-after-killing-the-process.patch +Patch0392: 0392-glusterd-Brick-process-fails-to-come-up-with-brickmu.patch +Patch0393: 0393-afr-restore-timestamp-of-files-during-metadata-heal.patch +Patch0394: 0394-man-gluster-Add-volume-top-command-to-gluster-man-pa.patch +Patch0395: 0395-Cli-Removing-old-log-rotate-command.patch +Patch0396: 0396-Updating-gluster-manual.patch +Patch0397: 0397-mgmt-brick-mux-Avoid-sending-two-response-when-attac.patch +Patch0398: 0398-ec-change-error-message-for-heal-commands-for-disper.patch +Patch0399: 0399-glusterd-coverity-fixes.patch +Patch0400: 0400-cli-throw-a-warning-if-replica-count-greater-than-3.patch +Patch0401: 0401-cli-change-the-warning-message.patch +Patch0402: 0402-afr-wake-up-index-healer-threads.patch +Patch0403: 0403-Fix-spurious-failure-in-bug-1744548-heal-timeout.t.patch +Patch0404: 0404-tests-Fix-spurious-failure.patch +Patch0405: 0405-core-fix-return-of-local-in-__nlc_inode_ctx_get.patch +Patch0406: 0406-afr-support-split-brain-CLI-for-replica-3.patch +Patch0407: 0407-geo-rep-Improving-help-message-in-schedule_georep.py.patch +Patch0408: 0408-geo-rep-Fix-ssh-port-validation.patch +Patch0409: 0409-system-posix-acl-update-ctx-only-if-iatt-is-non-NULL.patch +Patch0410: 0410-afr-prevent-spurious-entry-heals-leading-to-gfid-spl.patch +Patch0411: 0411-tools-glusterfind-validate-session-name.patch +Patch0412: 0412-gluster-smb-add-smb-parameter-when-access-gluster-by.patch +Patch0413: 0413-extras-hooks-Remove-smb.conf-parameter-allowing-gues.patch +Patch0414: 0414-cluster-syncop-avoid-duplicate-unlock-of-inodelk-ent.patch +Patch0415: 0415-dht-Fix-stale-layout-and-create-issue.patch +Patch0416: 0416-tests-fix-spurious-failure-of-bug-1402841.t-mt-dir-s.patch +Patch0417: 0417-events-fix-IPv6-memory-corruption.patch +Patch0418: 0418-md-cache-avoid-clearing-cache-when-not-necessary.patch +Patch0419: 0419-cluster-afr-fix-race-when-bricks-come-up.patch +Patch0420: 0420-scripts-quota_fsck-script-TypeError-d-format-not-dic.patch +Patch0421: 0421-Improve-logging-in-EC-client-and-lock-translator.patch +Patch0422: 0422-cluster-afr-Prioritize-ENOSPC-over-other-errors.patch +Patch0423: 0423-ctime-Fix-ctime-inconsisteny-with-utimensat.patch +Patch0424: 0424-afr-make-heal-info-lockless.patch +Patch0425: 0425-tests-Fix-spurious-self-heald.t-failure.patch +Patch0426: 0426-geo-rep-Fix-for-Transport-End-Point-not-connected-is.patch +Patch0427: 0427-storage-posix-Fixing-a-coverity-issue.patch +Patch0428: 0428-glusterd-ganesha-fixing-resource-leak-in-tear_down_c.patch +Patch0429: 0429-dht-rebalance-fixing-failure-occurace-due-to-rebalan.patch +Patch0430: 0430-Fix-some-Null-pointer-dereference-coverity-issues.patch +Patch0431: 0431-glusterd-check-for-same-node-while-adding-bricks-in-.patch +Patch0432: 0432-glusterd-Fix-coverity-defects-put-coverity-annotatio.patch +Patch0433: 0433-socket-Resolve-ssl_ctx-leak-for-a-brick-while-only-m.patch +Patch0434: 0434-glusterd-ganesha-fix-Coverity-CID-1405785.patch +Patch0435: 0435-glusterd-coverity-fix.patch +Patch0436: 0436-glusterd-coverity-fixes.patch +Patch0437: 0437-glusterd-prevent-use-after-free-in-glusterd_op_ac_se.patch +Patch0438: 0438-dht-sparse-files-rebalance-enhancements.patch +Patch0439: 0439-cluster-afr-Delay-post-op-for-fsync.patch +Patch0440: 0440-glusterd-snapshot-Improve-log-message-during-snapsho.patch +Patch0441: 0441-fuse-occasional-logging-for-fuse-device-weird-write-.patch +Patch0442: 0442-fuse-correctly-handle-setxattr-values.patch +Patch0443: 0443-fuse-fix-high-sev-coverity-issue.patch +Patch0444: 0444-mount-fuse-Fixing-a-coverity-issue.patch +Patch0445: 0445-feature-changelog-Avoid-thread-creation-if-xlator-is.patch +Patch0446: 0446-bitrot-Make-number-of-signer-threads-configurable.patch +Patch0447: 0447-core-brick_mux-brick-crashed-when-creating-and-delet.patch +Patch0448: 0448-Posix-Use-simple-approach-to-close-fd.patch +Patch0449: 0449-test-Test-case-brick-mux-validation-in-cluster.t-is-.patch +Patch0450: 0450-tests-basic-ctime-enable-ctime-before-testing.patch +Patch0451: 0451-extras-Modify-group-virt-to-include-network-related-.patch +Patch0452: 0452-Tier-DHT-Handle-the-pause-case-missed-out.patch +Patch0453: 0453-glusterd-add-brick-command-failure.patch +Patch0454: 0454-features-locks-avoid-use-after-freed-of-frame-for-bl.patch +Patch0455: 0455-locks-prevent-deletion-of-locked-entries.patch +Patch0456: 0456-add-clean-local-after-grant-lock.patch +Patch0457: 0457-cluster-ec-Improve-detection-of-new-heals.patch +Patch0458: 0458-features-bit-rot-stub-clean-the-mutex-after-cancelli.patch +Patch0459: 0459-features-bit-rot-Unconditionally-sign-the-files-duri.patch +Patch0460: 0460-cluster-ec-Remove-stale-entries-from-indices-xattrop.patch +Patch0461: 0461-geo-replication-Fix-IPv6-parsing.patch +Patch0462: 0462-Issue-with-gf_fill_iatt_for_dirent.patch +Patch0463: 0463-cluster-ec-Change-handling-of-heal-failure-to-avoid-.patch +Patch0464: 0464-storage-posix-Remove-nr_files-usage.patch +Patch0465: 0465-posix-Implement-a-janitor-thread-to-close-fd.patch +Patch0466: 0466-cluster-ec-Change-stale-index-handling.patch +Patch0467: 0467-build-Added-dependency-for-glusterfs-selinux.patch +Patch0468: 0468-build-Update-the-glusterfs-selinux-version.patch +Patch0469: 0469-cluster-ec-Don-t-trigger-heal-for-stale-index.patch +Patch0470: 0470-extras-snap_scheduler-changes-in-gluster-shared-stor.patch +Patch0471: 0471-nfs-ganesha-gluster_shared_storage-fails-to-automoun.patch +Patch0472: 0472-geo-rep-gluster_shared_storage-fails-to-automount-on.patch +Patch0473: 0473-glusterd-Fix-Add-brick-with-increasing-replica-count.patch +Patch0474: 0474-features-locks-posixlk-clear-lock-should-set-error-a.patch +Patch0475: 0475-fuse-lock-interrupt-fix-flock_interrupt.t.patch +Patch0476: 0476-mount-fuse-use-cookies-to-get-fuse-interrupt-record-.patch +Patch0477: 0477-glusterd-snapshot-Snapshot-prevalidation-failure-not.patch +Patch0478: 0478-DHT-Fixing-rebalance-failure-on-issuing-stop-command.patch +Patch0479: 0479-ganesha-ha-revised-regex-exprs-for-status.patch +Patch0480: 0480-DHT-Rebalance-Ensure-Rebalance-reports-status-only-o.patch +Patch0481: 0481-RHGS-3.5.3-rebuild-to-ship-with-RHEL.patch %description GlusterFS is a distributed file-system capable of scaling to several @@ -1029,6 +1133,9 @@ Summary: Clustered file-system server Requires: %{name}%{?_isa} = %{version}-%{release} Requires: %{name}-cli%{?_isa} = %{version}-%{release} Requires: %{name}-libs%{?_isa} = %{version}-%{release} +%if ( 0%{?fedora} && 0%{?fedora} >= 30 || ( 0%{?rhel} && 0%{?rhel} >= 8 ) ) +Requires: glusterfs-selinux >= 1.0-1 +%endif # some daemons (like quota) use a fuse-mount, glusterfsd is part of -fuse Requires: %{name}-fuse%{?_isa} = %{version}-%{release} # self-heal daemon, rebalance, nfs-server etc. are actually clients @@ -1148,7 +1255,7 @@ do DEST_FILES=( $(egrep '^\+\+\+ b/' $p | cut -f 2- -d '/') ) EXCLUDE_DOCS=() for idx in ${!SOURCE_FILES[@]}; do - # skip the doc + # skip the doc source_file=${SOURCE_FILES[$idx]} dest_file=${DEST_FILES[$idx]} if [[ "$dest_file" =~ ^doc/.+ ]]; then @@ -2432,8 +2539,50 @@ fi %endif %changelog -* Tue Sep 29 2020 CentOS Sources <bugs@centos.org> - 6.0-37.el7.centos -- remove vendor and/or packager lines +* Fri Feb 19 2021 Gluster Jenkins <dkhandel+glusterjenkins@redhat.com> - 6.0-49.1 +- fixes bugs bz#1930561 + +* Wed Nov 25 2020 Gluster Jenkins <dkhandel+glusterjenkins@redhat.com> - 6.0-49 +- fixes bugs bz#1286171 + +* Tue Nov 10 2020 Gluster Jenkins <dkhandel+glusterjenkins@redhat.com> - 6.0-48 +- fixes bugs bz#1895301 + +* Thu Nov 05 2020 Gluster Jenkins <dkhandel+glusterjenkins@redhat.com> - 6.0-47 +- fixes bugs bz#1286171 bz#1821743 bz#1837926 + +* Wed Oct 21 2020 Gluster Jenkins <dkhandel+glusterjenkins@redhat.com> - 6.0-46 +- fixes bugs bz#1873469 bz#1881823 + +* Wed Sep 09 2020 Deepshikha Khandelwal <dkhandel@redhat.com> - 6.0-45 +- fixes bugs bz#1785714 + +* Thu Sep 03 2020 Deepshikha Khandelwal <dkhandel@redhat.com> - 6.0-44 +- fixes bugs bz#1460657 + +* Thu Sep 03 2020 Deepshikha Khandelwal <dkhandel@redhat.com> - 6.0-43 +- fixes bugs bz#1460657 + +* Wed Sep 02 2020 Deepshikha Khandelwal <dkhandel@redhat.com> - 6.0-42 +- fixes bugs bz#1785714 + +* Tue Aug 25 2020 Deepshikha Khandelwal <dkhandel@redhat.com> - 6.0-41 +- fixes bugs bz#1785714 bz#1851424 bz#1851989 bz#1852736 bz#1853189 bz#1855966 + +* Tue Jul 21 2020 Deepshikha Khandelwal <dkhandel@redhat.com> - 6.0-40 +- fixes bugs bz#1812789 bz#1844359 bz#1847081 bz#1854165 + +* Wed Jun 17 2020 Deepshikha Khandelwal <dkhandel@redhat.com> - 6.0-39 +- fixes bugs bz#1844359 bz#1845064 + +* Wed Jun 10 2020 Rinku Kothiya <rkothiya@redhat.com> - 6.0-38 +- fixes bugs bz#1234220 bz#1286171 bz#1487177 bz#1524457 bz#1640573 + bz#1663557 bz#1667954 bz#1683602 bz#1686897 bz#1721355 bz#1748865 bz#1750211 + bz#1754391 bz#1759875 bz#1761531 bz#1761932 bz#1763124 bz#1763129 bz#1764091 + bz#1775637 bz#1776901 bz#1781550 bz#1781649 bz#1781710 bz#1783232 bz#1784211 + bz#1784415 bz#1786516 bz#1786681 bz#1787294 bz#1787310 bz#1787331 bz#1787994 + bz#1790336 bz#1792873 bz#1794663 bz#1796814 bz#1804164 bz#1810924 bz#1815434 + bz#1836099 bz#1837467 bz#1837926 bz#1838479 bz#1839137 bz#1844359 * Fri May 29 2020 Rinku Kothiya <rkothiya@redhat.com> - 6.0-37 - fixes bugs bz#1840794