Blob Blame History Raw
From ed73f2046dd3fbb22341bf9fc004087d90dfbe6d Mon Sep 17 00:00:00 2001
From: Raghavendra Bhat <raghavendra@redhat.com>
Date: Mon, 15 Apr 2019 14:09:34 -0400
Subject: [PATCH 458/465] features/bit-rot-stub: clean the mutex after
 cancelling the signer thread

When bit-rot feature is disabled, the signer thread from the bit-rot-stub
xlator (the thread which performs the setxattr of the signature on to the
disk) is cancelled. But, if the cancelled signer thread had already held
the mutex (&priv->lock) which it uses to monitor the queue of files to
be signed, then the mutex is never released. This creates problems in
future when the feature is enabled again. Both the new instance of the
signer thread and the regular thread which enqueues the files to be
signed will be blocked on this mutex.

So, as part of cancelling the signer thread, unlock the mutex associated
with it as well using pthread_cleanup_push and pthread_cleanup_pop.

Upstream patch:
	> patch: https://review.gluster.org/22572
	> fixes: #bz1700078
	> Change-Id: Ib761910caed90b268e69794ddeb108165487af40

Change-Id: Ib761910caed90b268e69794ddeb108165487af40
BUG: 1851424
Signed-off-by: Raghavendra M <raghavendra@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/208304
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
 .../bit-rot/src/stub/bit-rot-stub-messages.h       |  4 +-
 xlators/features/bit-rot/src/stub/bit-rot-stub.c   | 62 +++++++++++++++++++---
 2 files changed, 59 insertions(+), 7 deletions(-)

diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h
index 7f07f29..155802b 100644
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h
@@ -39,6 +39,8 @@ GLFS_MSGID(BITROT_STUB, BRS_MSG_NO_MEMORY, BRS_MSG_SET_EVENT_FAILED,
            BRS_MSG_BAD_HANDLE_DIR_NULL, BRS_MSG_BAD_OBJ_THREAD_FAIL,
            BRS_MSG_BAD_OBJ_DIR_CLOSE_FAIL, BRS_MSG_LINK_FAIL,
            BRS_MSG_BAD_OBJ_UNLINK_FAIL, BRS_MSG_DICT_SET_FAILED,
-           BRS_MSG_PATH_GET_FAILED, BRS_MSG_NULL_LOCAL);
+           BRS_MSG_PATH_GET_FAILED, BRS_MSG_NULL_LOCAL,
+           BRS_MSG_SPAWN_SIGN_THRD_FAILED, BRS_MSG_KILL_SIGN_THREAD,
+           BRS_MSG_NON_BITD_PID, BRS_MSG_SIGN_PREPARE_FAIL);
 
 #endif /* !_BITROT_STUB_MESSAGES_H_ */
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.c b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
index 3f48a4b..c3f81bc 100644
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub.c
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
@@ -26,6 +26,15 @@
 
 #define BR_STUB_REQUEST_COOKIE 0x1
 
+void
+br_stub_lock_cleaner(void *arg)
+{
+    pthread_mutex_t *clean_mutex = arg;
+
+    pthread_mutex_unlock(clean_mutex);
+    return;
+}
+
 void *
 br_stub_signth(void *);
 
@@ -166,8 +175,11 @@ init(xlator_t *this)
 
     ret = gf_thread_create(&priv->signth, NULL, br_stub_signth, this,
                            "brssign");
-    if (ret != 0)
+    if (ret != 0) {
+        gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SPAWN_SIGN_THRD_FAILED,
+               "failed to create the new thread for signer");
         goto cleanup_lock;
+    }
 
     ret = br_stub_bad_object_container_init(this, priv);
     if (ret) {
@@ -214,11 +226,15 @@ reconfigure(xlator_t *this, dict_t *options)
     priv = this->private;
 
     GF_OPTION_RECONF("bitrot", priv->do_versioning, options, bool, err);
-    if (priv->do_versioning) {
+    if (priv->do_versioning && !priv->signth) {
         ret = gf_thread_create(&priv->signth, NULL, br_stub_signth, this,
                                "brssign");
-        if (ret != 0)
+        if (ret != 0) {
+            gf_msg(this->name, GF_LOG_WARNING, 0,
+                   BRS_MSG_SPAWN_SIGN_THRD_FAILED,
+                   "failed to create the new thread for signer");
             goto err;
+        }
 
         ret = br_stub_bad_object_container_init(this, priv);
         if (ret) {
@@ -232,8 +248,11 @@ reconfigure(xlator_t *this, dict_t *options)
                 gf_msg(this->name, GF_LOG_ERROR, 0,
                        BRS_MSG_CANCEL_SIGN_THREAD_FAILED,
                        "Could not cancel sign serializer thread");
+            } else {
+                gf_msg(this->name, GF_LOG_INFO, 0, BRS_MSG_KILL_SIGN_THREAD,
+                       "killed the signer thread");
+                priv->signth = 0;
             }
-            priv->signth = 0;
         }
 
         if (priv->container.thread) {
@@ -902,6 +921,24 @@ br_stub_signth(void *arg)
 
     THIS = this;
     while (1) {
+        /*
+         * Disabling bit-rot feature leads to this particular thread
+         * getting cleaned up by reconfigure via a call to the function
+         * gf_thread_cleanup_xint (which in turn calls pthread_cancel
+         * and pthread_join). But, if this thread had held the mutex
+         * &priv->lock at the time of cancellation, then it leads to
+         * deadlock in future when bit-rot feature is enabled (which
+         * again spawns this thread which cant hold the lock as the
+         * mutex is still held by the previous instance of the thread
+         * which got killed). Also, the br_stub_handle_object_signature
+         * function which is called whenever file has to be signed
+         * also gets blocked as it too attempts to acquire &priv->lock.
+         *
+         * So, arrange for the lock to be unlocked as part of the
+         * cleanup of this thread using pthread_cleanup_push and
+         * pthread_cleanup_pop.
+         */
+        pthread_cleanup_push(br_stub_lock_cleaner, &priv->lock);
         pthread_mutex_lock(&priv->lock);
         {
             while (list_empty(&priv->squeue))
@@ -912,6 +949,7 @@ br_stub_signth(void *arg)
             list_del_init(&sigstub->list);
         }
         pthread_mutex_unlock(&priv->lock);
+        pthread_cleanup_pop(0);
 
         call_resume(sigstub->stub);
 
@@ -1042,12 +1080,22 @@ br_stub_handle_object_signature(call_frame_t *frame, xlator_t *this, fd_t *fd,
 
     priv = this->private;
 
-    if (frame->root->pid != GF_CLIENT_PID_BITD)
+    if (frame->root->pid != GF_CLIENT_PID_BITD) {
+        gf_msg(this->name, GF_LOG_WARNING, op_errno, BRS_MSG_NON_BITD_PID,
+               "PID %d from where signature request"
+               "came, does not belong to bit-rot daemon."
+               "Unwinding the fop",
+               frame->root->pid);
         goto dofop;
+    }
 
     ret = br_stub_prepare_signature(this, dict, fd->inode, sign, &fakesuccess);
-    if (ret)
+    if (ret) {
+        gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SIGN_PREPARE_FAIL,
+               "failed to prepare the signature for %s. Unwinding the fop",
+               uuid_utoa(fd->inode->gfid));
         goto dofop;
+    }
     if (fakesuccess) {
         op_ret = op_errno = 0;
         goto dofop;
@@ -1387,6 +1435,8 @@ br_stub_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
     /* object signature request */
     ret = dict_get_bin(dict, GLUSTERFS_SET_OBJECT_SIGNATURE, (void **)&sign);
     if (!ret) {
+        gf_msg_debug(this->name, 0, "got SIGNATURE request on %s",
+                     uuid_utoa(fd->inode->gfid));
         br_stub_handle_object_signature(frame, this, fd, dict, sign, xdata);
         goto done;
     }
-- 
1.8.3.1