74096c
From ed73f2046dd3fbb22341bf9fc004087d90dfbe6d Mon Sep 17 00:00:00 2001
74096c
From: Raghavendra Bhat <raghavendra@redhat.com>
74096c
Date: Mon, 15 Apr 2019 14:09:34 -0400
74096c
Subject: [PATCH 458/465] features/bit-rot-stub: clean the mutex after
74096c
 cancelling the signer thread
74096c
74096c
When bit-rot feature is disabled, the signer thread from the bit-rot-stub
74096c
xlator (the thread which performs the setxattr of the signature on to the
74096c
disk) is cancelled. But, if the cancelled signer thread had already held
74096c
the mutex (&priv->lock) which it uses to monitor the queue of files to
74096c
be signed, then the mutex is never released. This creates problems in
74096c
future when the feature is enabled again. Both the new instance of the
74096c
signer thread and the regular thread which enqueues the files to be
74096c
signed will be blocked on this mutex.
74096c
74096c
So, as part of cancelling the signer thread, unlock the mutex associated
74096c
with it as well using pthread_cleanup_push and pthread_cleanup_pop.
74096c
74096c
Upstream patch:
74096c
	> patch: https://review.gluster.org/22572
74096c
	> fixes: #bz1700078
74096c
	> Change-Id: Ib761910caed90b268e69794ddeb108165487af40
74096c
74096c
Change-Id: Ib761910caed90b268e69794ddeb108165487af40
74096c
BUG: 1851424
74096c
Signed-off-by: Raghavendra M <raghavendra@redhat.com>
74096c
Reviewed-on: https://code.engineering.redhat.com/gerrit/208304
74096c
Tested-by: RHGS Build Bot <nigelb@redhat.com>
74096c
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
74096c
---
74096c
 .../bit-rot/src/stub/bit-rot-stub-messages.h       |  4 +-
74096c
 xlators/features/bit-rot/src/stub/bit-rot-stub.c   | 62 +++++++++++++++++++---
74096c
 2 files changed, 59 insertions(+), 7 deletions(-)
74096c
74096c
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h
74096c
index 7f07f29..155802b 100644
74096c
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h
74096c
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h
74096c
@@ -39,6 +39,8 @@ GLFS_MSGID(BITROT_STUB, BRS_MSG_NO_MEMORY, BRS_MSG_SET_EVENT_FAILED,
74096c
            BRS_MSG_BAD_HANDLE_DIR_NULL, BRS_MSG_BAD_OBJ_THREAD_FAIL,
74096c
            BRS_MSG_BAD_OBJ_DIR_CLOSE_FAIL, BRS_MSG_LINK_FAIL,
74096c
            BRS_MSG_BAD_OBJ_UNLINK_FAIL, BRS_MSG_DICT_SET_FAILED,
74096c
-           BRS_MSG_PATH_GET_FAILED, BRS_MSG_NULL_LOCAL);
74096c
+           BRS_MSG_PATH_GET_FAILED, BRS_MSG_NULL_LOCAL,
74096c
+           BRS_MSG_SPAWN_SIGN_THRD_FAILED, BRS_MSG_KILL_SIGN_THREAD,
74096c
+           BRS_MSG_NON_BITD_PID, BRS_MSG_SIGN_PREPARE_FAIL);
74096c
 
74096c
 #endif /* !_BITROT_STUB_MESSAGES_H_ */
74096c
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.c b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
74096c
index 3f48a4b..c3f81bc 100644
74096c
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub.c
74096c
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
74096c
@@ -26,6 +26,15 @@
74096c
 
74096c
 #define BR_STUB_REQUEST_COOKIE 0x1
74096c
 
74096c
+void
74096c
+br_stub_lock_cleaner(void *arg)
74096c
+{
74096c
+    pthread_mutex_t *clean_mutex = arg;
74096c
+
74096c
+    pthread_mutex_unlock(clean_mutex);
74096c
+    return;
74096c
+}
74096c
+
74096c
 void *
74096c
 br_stub_signth(void *);
74096c
 
74096c
@@ -166,8 +175,11 @@ init(xlator_t *this)
74096c
 
74096c
     ret = gf_thread_create(&priv->signth, NULL, br_stub_signth, this,
74096c
                            "brssign");
74096c
-    if (ret != 0)
74096c
+    if (ret != 0) {
74096c
+        gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SPAWN_SIGN_THRD_FAILED,
74096c
+               "failed to create the new thread for signer");
74096c
         goto cleanup_lock;
74096c
+    }
74096c
 
74096c
     ret = br_stub_bad_object_container_init(this, priv);
74096c
     if (ret) {
74096c
@@ -214,11 +226,15 @@ reconfigure(xlator_t *this, dict_t *options)
74096c
     priv = this->private;
74096c
 
74096c
     GF_OPTION_RECONF("bitrot", priv->do_versioning, options, bool, err);
74096c
-    if (priv->do_versioning) {
74096c
+    if (priv->do_versioning && !priv->signth) {
74096c
         ret = gf_thread_create(&priv->signth, NULL, br_stub_signth, this,
74096c
                                "brssign");
74096c
-        if (ret != 0)
74096c
+        if (ret != 0) {
74096c
+            gf_msg(this->name, GF_LOG_WARNING, 0,
74096c
+                   BRS_MSG_SPAWN_SIGN_THRD_FAILED,
74096c
+                   "failed to create the new thread for signer");
74096c
             goto err;
74096c
+        }
74096c
 
74096c
         ret = br_stub_bad_object_container_init(this, priv);
74096c
         if (ret) {
74096c
@@ -232,8 +248,11 @@ reconfigure(xlator_t *this, dict_t *options)
74096c
                 gf_msg(this->name, GF_LOG_ERROR, 0,
74096c
                        BRS_MSG_CANCEL_SIGN_THREAD_FAILED,
74096c
                        "Could not cancel sign serializer thread");
74096c
+            } else {
74096c
+                gf_msg(this->name, GF_LOG_INFO, 0, BRS_MSG_KILL_SIGN_THREAD,
74096c
+                       "killed the signer thread");
74096c
+                priv->signth = 0;
74096c
             }
74096c
-            priv->signth = 0;
74096c
         }
74096c
 
74096c
         if (priv->container.thread) {
74096c
@@ -902,6 +921,24 @@ br_stub_signth(void *arg)
74096c
 
74096c
     THIS = this;
74096c
     while (1) {
74096c
+        /*
74096c
+         * Disabling bit-rot feature leads to this particular thread
74096c
+         * getting cleaned up by reconfigure via a call to the function
74096c
+         * gf_thread_cleanup_xint (which in turn calls pthread_cancel
74096c
+         * and pthread_join). But, if this thread had held the mutex
74096c
+         * &priv->lock at the time of cancellation, then it leads to
74096c
+         * deadlock in future when bit-rot feature is enabled (which
74096c
+         * again spawns this thread which cant hold the lock as the
74096c
+         * mutex is still held by the previous instance of the thread
74096c
+         * which got killed). Also, the br_stub_handle_object_signature
74096c
+         * function which is called whenever file has to be signed
74096c
+         * also gets blocked as it too attempts to acquire &priv->lock.
74096c
+         *
74096c
+         * So, arrange for the lock to be unlocked as part of the
74096c
+         * cleanup of this thread using pthread_cleanup_push and
74096c
+         * pthread_cleanup_pop.
74096c
+         */
74096c
+        pthread_cleanup_push(br_stub_lock_cleaner, &priv->lock);
74096c
         pthread_mutex_lock(&priv->lock);
74096c
         {
74096c
             while (list_empty(&priv->squeue))
74096c
@@ -912,6 +949,7 @@ br_stub_signth(void *arg)
74096c
             list_del_init(&sigstub->list);
74096c
         }
74096c
         pthread_mutex_unlock(&priv->lock);
74096c
+        pthread_cleanup_pop(0);
74096c
 
74096c
         call_resume(sigstub->stub);
74096c
 
74096c
@@ -1042,12 +1080,22 @@ br_stub_handle_object_signature(call_frame_t *frame, xlator_t *this, fd_t *fd,
74096c
 
74096c
     priv = this->private;
74096c
 
74096c
-    if (frame->root->pid != GF_CLIENT_PID_BITD)
74096c
+    if (frame->root->pid != GF_CLIENT_PID_BITD) {
74096c
+        gf_msg(this->name, GF_LOG_WARNING, op_errno, BRS_MSG_NON_BITD_PID,
74096c
+               "PID %d from where signature request"
74096c
+               "came, does not belong to bit-rot daemon."
74096c
+               "Unwinding the fop",
74096c
+               frame->root->pid);
74096c
         goto dofop;
74096c
+    }
74096c
 
74096c
     ret = br_stub_prepare_signature(this, dict, fd->inode, sign, &fakesuccess);
74096c
-    if (ret)
74096c
+    if (ret) {
74096c
+        gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SIGN_PREPARE_FAIL,
74096c
+               "failed to prepare the signature for %s. Unwinding the fop",
74096c
+               uuid_utoa(fd->inode->gfid));
74096c
         goto dofop;
74096c
+    }
74096c
     if (fakesuccess) {
74096c
         op_ret = op_errno = 0;
74096c
         goto dofop;
74096c
@@ -1387,6 +1435,8 @@ br_stub_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
74096c
     /* object signature request */
74096c
     ret = dict_get_bin(dict, GLUSTERFS_SET_OBJECT_SIGNATURE, (void **)&sign;;
74096c
     if (!ret) {
74096c
+        gf_msg_debug(this->name, 0, "got SIGNATURE request on %s",
74096c
+                     uuid_utoa(fd->inode->gfid));
74096c
         br_stub_handle_object_signature(frame, this, fd, dict, sign, xdata);
74096c
         goto done;
74096c
     }
74096c
-- 
74096c
1.8.3.1
74096c