d84cf8
From ed73f2046dd3fbb22341bf9fc004087d90dfbe6d Mon Sep 17 00:00:00 2001
d84cf8
From: Raghavendra Bhat <raghavendra@redhat.com>
d84cf8
Date: Mon, 15 Apr 2019 14:09:34 -0400
d84cf8
Subject: [PATCH 458/465] features/bit-rot-stub: clean the mutex after
d84cf8
 cancelling the signer thread
d84cf8
d84cf8
When bit-rot feature is disabled, the signer thread from the bit-rot-stub
d84cf8
xlator (the thread which performs the setxattr of the signature on to the
d84cf8
disk) is cancelled. But, if the cancelled signer thread had already held
d84cf8
the mutex (&priv->lock) which it uses to monitor the queue of files to
d84cf8
be signed, then the mutex is never released. This creates problems in
d84cf8
future when the feature is enabled again. Both the new instance of the
d84cf8
signer thread and the regular thread which enqueues the files to be
d84cf8
signed will be blocked on this mutex.
d84cf8
d84cf8
So, as part of cancelling the signer thread, unlock the mutex associated
d84cf8
with it as well using pthread_cleanup_push and pthread_cleanup_pop.
d84cf8
d84cf8
Upstream patch:
d84cf8
	> patch: https://review.gluster.org/22572
d84cf8
	> fixes: #bz1700078
d84cf8
	> Change-Id: Ib761910caed90b268e69794ddeb108165487af40
d84cf8
d84cf8
Change-Id: Ib761910caed90b268e69794ddeb108165487af40
d84cf8
BUG: 1851424
d84cf8
Signed-off-by: Raghavendra M <raghavendra@redhat.com>
d84cf8
Reviewed-on: https://code.engineering.redhat.com/gerrit/208304
d84cf8
Tested-by: RHGS Build Bot <nigelb@redhat.com>
d84cf8
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
d84cf8
---
d84cf8
 .../bit-rot/src/stub/bit-rot-stub-messages.h       |  4 +-
d84cf8
 xlators/features/bit-rot/src/stub/bit-rot-stub.c   | 62 +++++++++++++++++++---
d84cf8
 2 files changed, 59 insertions(+), 7 deletions(-)
d84cf8
d84cf8
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h
d84cf8
index 7f07f29..155802b 100644
d84cf8
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h
d84cf8
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h
d84cf8
@@ -39,6 +39,8 @@ GLFS_MSGID(BITROT_STUB, BRS_MSG_NO_MEMORY, BRS_MSG_SET_EVENT_FAILED,
d84cf8
            BRS_MSG_BAD_HANDLE_DIR_NULL, BRS_MSG_BAD_OBJ_THREAD_FAIL,
d84cf8
            BRS_MSG_BAD_OBJ_DIR_CLOSE_FAIL, BRS_MSG_LINK_FAIL,
d84cf8
            BRS_MSG_BAD_OBJ_UNLINK_FAIL, BRS_MSG_DICT_SET_FAILED,
d84cf8
-           BRS_MSG_PATH_GET_FAILED, BRS_MSG_NULL_LOCAL);
d84cf8
+           BRS_MSG_PATH_GET_FAILED, BRS_MSG_NULL_LOCAL,
d84cf8
+           BRS_MSG_SPAWN_SIGN_THRD_FAILED, BRS_MSG_KILL_SIGN_THREAD,
d84cf8
+           BRS_MSG_NON_BITD_PID, BRS_MSG_SIGN_PREPARE_FAIL);
d84cf8
 
d84cf8
 #endif /* !_BITROT_STUB_MESSAGES_H_ */
d84cf8
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.c b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
d84cf8
index 3f48a4b..c3f81bc 100644
d84cf8
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub.c
d84cf8
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
d84cf8
@@ -26,6 +26,15 @@
d84cf8
 
d84cf8
 #define BR_STUB_REQUEST_COOKIE 0x1
d84cf8
 
d84cf8
+void
d84cf8
+br_stub_lock_cleaner(void *arg)
d84cf8
+{
d84cf8
+    pthread_mutex_t *clean_mutex = arg;
d84cf8
+
d84cf8
+    pthread_mutex_unlock(clean_mutex);
d84cf8
+    return;
d84cf8
+}
d84cf8
+
d84cf8
 void *
d84cf8
 br_stub_signth(void *);
d84cf8
 
d84cf8
@@ -166,8 +175,11 @@ init(xlator_t *this)
d84cf8
 
d84cf8
     ret = gf_thread_create(&priv->signth, NULL, br_stub_signth, this,
d84cf8
                            "brssign");
d84cf8
-    if (ret != 0)
d84cf8
+    if (ret != 0) {
d84cf8
+        gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SPAWN_SIGN_THRD_FAILED,
d84cf8
+               "failed to create the new thread for signer");
d84cf8
         goto cleanup_lock;
d84cf8
+    }
d84cf8
 
d84cf8
     ret = br_stub_bad_object_container_init(this, priv);
d84cf8
     if (ret) {
d84cf8
@@ -214,11 +226,15 @@ reconfigure(xlator_t *this, dict_t *options)
d84cf8
     priv = this->private;
d84cf8
 
d84cf8
     GF_OPTION_RECONF("bitrot", priv->do_versioning, options, bool, err);
d84cf8
-    if (priv->do_versioning) {
d84cf8
+    if (priv->do_versioning && !priv->signth) {
d84cf8
         ret = gf_thread_create(&priv->signth, NULL, br_stub_signth, this,
d84cf8
                                "brssign");
d84cf8
-        if (ret != 0)
d84cf8
+        if (ret != 0) {
d84cf8
+            gf_msg(this->name, GF_LOG_WARNING, 0,
d84cf8
+                   BRS_MSG_SPAWN_SIGN_THRD_FAILED,
d84cf8
+                   "failed to create the new thread for signer");
d84cf8
             goto err;
d84cf8
+        }
d84cf8
 
d84cf8
         ret = br_stub_bad_object_container_init(this, priv);
d84cf8
         if (ret) {
d84cf8
@@ -232,8 +248,11 @@ reconfigure(xlator_t *this, dict_t *options)
d84cf8
                 gf_msg(this->name, GF_LOG_ERROR, 0,
d84cf8
                        BRS_MSG_CANCEL_SIGN_THREAD_FAILED,
d84cf8
                        "Could not cancel sign serializer thread");
d84cf8
+            } else {
d84cf8
+                gf_msg(this->name, GF_LOG_INFO, 0, BRS_MSG_KILL_SIGN_THREAD,
d84cf8
+                       "killed the signer thread");
d84cf8
+                priv->signth = 0;
d84cf8
             }
d84cf8
-            priv->signth = 0;
d84cf8
         }
d84cf8
 
d84cf8
         if (priv->container.thread) {
d84cf8
@@ -902,6 +921,24 @@ br_stub_signth(void *arg)
d84cf8
 
d84cf8
     THIS = this;
d84cf8
     while (1) {
d84cf8
+        /*
d84cf8
+         * Disabling bit-rot feature leads to this particular thread
d84cf8
+         * getting cleaned up by reconfigure via a call to the function
d84cf8
+         * gf_thread_cleanup_xint (which in turn calls pthread_cancel
d84cf8
+         * and pthread_join). But, if this thread had held the mutex
d84cf8
+         * &priv->lock at the time of cancellation, then it leads to
d84cf8
+         * deadlock in future when bit-rot feature is enabled (which
d84cf8
+         * again spawns this thread which cant hold the lock as the
d84cf8
+         * mutex is still held by the previous instance of the thread
d84cf8
+         * which got killed). Also, the br_stub_handle_object_signature
d84cf8
+         * function which is called whenever file has to be signed
d84cf8
+         * also gets blocked as it too attempts to acquire &priv->lock.
d84cf8
+         *
d84cf8
+         * So, arrange for the lock to be unlocked as part of the
d84cf8
+         * cleanup of this thread using pthread_cleanup_push and
d84cf8
+         * pthread_cleanup_pop.
d84cf8
+         */
d84cf8
+        pthread_cleanup_push(br_stub_lock_cleaner, &priv->lock);
d84cf8
         pthread_mutex_lock(&priv->lock);
d84cf8
         {
d84cf8
             while (list_empty(&priv->squeue))
d84cf8
@@ -912,6 +949,7 @@ br_stub_signth(void *arg)
d84cf8
             list_del_init(&sigstub->list);
d84cf8
         }
d84cf8
         pthread_mutex_unlock(&priv->lock);
d84cf8
+        pthread_cleanup_pop(0);
d84cf8
 
d84cf8
         call_resume(sigstub->stub);
d84cf8
 
d84cf8
@@ -1042,12 +1080,22 @@ br_stub_handle_object_signature(call_frame_t *frame, xlator_t *this, fd_t *fd,
d84cf8
 
d84cf8
     priv = this->private;
d84cf8
 
d84cf8
-    if (frame->root->pid != GF_CLIENT_PID_BITD)
d84cf8
+    if (frame->root->pid != GF_CLIENT_PID_BITD) {
d84cf8
+        gf_msg(this->name, GF_LOG_WARNING, op_errno, BRS_MSG_NON_BITD_PID,
d84cf8
+               "PID %d from where signature request"
d84cf8
+               "came, does not belong to bit-rot daemon."
d84cf8
+               "Unwinding the fop",
d84cf8
+               frame->root->pid);
d84cf8
         goto dofop;
d84cf8
+    }
d84cf8
 
d84cf8
     ret = br_stub_prepare_signature(this, dict, fd->inode, sign, &fakesuccess);
d84cf8
-    if (ret)
d84cf8
+    if (ret) {
d84cf8
+        gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SIGN_PREPARE_FAIL,
d84cf8
+               "failed to prepare the signature for %s. Unwinding the fop",
d84cf8
+               uuid_utoa(fd->inode->gfid));
d84cf8
         goto dofop;
d84cf8
+    }
d84cf8
     if (fakesuccess) {
d84cf8
         op_ret = op_errno = 0;
d84cf8
         goto dofop;
d84cf8
@@ -1387,6 +1435,8 @@ br_stub_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
d84cf8
     /* object signature request */
d84cf8
     ret = dict_get_bin(dict, GLUSTERFS_SET_OBJECT_SIGNATURE, (void **)&sign;;
d84cf8
     if (!ret) {
d84cf8
+        gf_msg_debug(this->name, 0, "got SIGNATURE request on %s",
d84cf8
+                     uuid_utoa(fd->inode->gfid));
d84cf8
         br_stub_handle_object_signature(frame, this, fd, dict, sign, xdata);
d84cf8
         goto done;
d84cf8
     }
d84cf8
-- 
d84cf8
1.8.3.1
d84cf8