190130
From ed73f2046dd3fbb22341bf9fc004087d90dfbe6d Mon Sep 17 00:00:00 2001
190130
From: Raghavendra Bhat <raghavendra@redhat.com>
190130
Date: Mon, 15 Apr 2019 14:09:34 -0400
190130
Subject: [PATCH 458/465] features/bit-rot-stub: clean the mutex after
190130
 cancelling the signer thread
190130
190130
When bit-rot feature is disabled, the signer thread from the bit-rot-stub
190130
xlator (the thread which performs the setxattr of the signature on to the
190130
disk) is cancelled. But, if the cancelled signer thread had already held
190130
the mutex (&priv->lock) which it uses to monitor the queue of files to
190130
be signed, then the mutex is never released. This creates problems in
190130
future when the feature is enabled again. Both the new instance of the
190130
signer thread and the regular thread which enqueues the files to be
190130
signed will be blocked on this mutex.
190130
190130
So, as part of cancelling the signer thread, unlock the mutex associated
190130
with it as well using pthread_cleanup_push and pthread_cleanup_pop.
190130
190130
Upstream patch:
190130
	> patch: https://review.gluster.org/22572
190130
	> fixes: #bz1700078
190130
	> Change-Id: Ib761910caed90b268e69794ddeb108165487af40
190130
190130
Change-Id: Ib761910caed90b268e69794ddeb108165487af40
190130
BUG: 1851424
190130
Signed-off-by: Raghavendra M <raghavendra@redhat.com>
190130
Reviewed-on: https://code.engineering.redhat.com/gerrit/208304
190130
Tested-by: RHGS Build Bot <nigelb@redhat.com>
190130
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
190130
---
190130
 .../bit-rot/src/stub/bit-rot-stub-messages.h       |  4 +-
190130
 xlators/features/bit-rot/src/stub/bit-rot-stub.c   | 62 +++++++++++++++++++---
190130
 2 files changed, 59 insertions(+), 7 deletions(-)
190130
190130
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h
190130
index 7f07f29..155802b 100644
190130
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h
190130
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h
190130
@@ -39,6 +39,8 @@ GLFS_MSGID(BITROT_STUB, BRS_MSG_NO_MEMORY, BRS_MSG_SET_EVENT_FAILED,
190130
            BRS_MSG_BAD_HANDLE_DIR_NULL, BRS_MSG_BAD_OBJ_THREAD_FAIL,
190130
            BRS_MSG_BAD_OBJ_DIR_CLOSE_FAIL, BRS_MSG_LINK_FAIL,
190130
            BRS_MSG_BAD_OBJ_UNLINK_FAIL, BRS_MSG_DICT_SET_FAILED,
190130
-           BRS_MSG_PATH_GET_FAILED, BRS_MSG_NULL_LOCAL);
190130
+           BRS_MSG_PATH_GET_FAILED, BRS_MSG_NULL_LOCAL,
190130
+           BRS_MSG_SPAWN_SIGN_THRD_FAILED, BRS_MSG_KILL_SIGN_THREAD,
190130
+           BRS_MSG_NON_BITD_PID, BRS_MSG_SIGN_PREPARE_FAIL);
190130
 
190130
 #endif /* !_BITROT_STUB_MESSAGES_H_ */
190130
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.c b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
190130
index 3f48a4b..c3f81bc 100644
190130
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub.c
190130
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
190130
@@ -26,6 +26,15 @@
190130
 
190130
 #define BR_STUB_REQUEST_COOKIE 0x1
190130
 
190130
+void
190130
+br_stub_lock_cleaner(void *arg)
190130
+{
190130
+    pthread_mutex_t *clean_mutex = arg;
190130
+
190130
+    pthread_mutex_unlock(clean_mutex);
190130
+    return;
190130
+}
190130
+
190130
 void *
190130
 br_stub_signth(void *);
190130
 
190130
@@ -166,8 +175,11 @@ init(xlator_t *this)
190130
 
190130
     ret = gf_thread_create(&priv->signth, NULL, br_stub_signth, this,
190130
                            "brssign");
190130
-    if (ret != 0)
190130
+    if (ret != 0) {
190130
+        gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SPAWN_SIGN_THRD_FAILED,
190130
+               "failed to create the new thread for signer");
190130
         goto cleanup_lock;
190130
+    }
190130
 
190130
     ret = br_stub_bad_object_container_init(this, priv);
190130
     if (ret) {
190130
@@ -214,11 +226,15 @@ reconfigure(xlator_t *this, dict_t *options)
190130
     priv = this->private;
190130
 
190130
     GF_OPTION_RECONF("bitrot", priv->do_versioning, options, bool, err);
190130
-    if (priv->do_versioning) {
190130
+    if (priv->do_versioning && !priv->signth) {
190130
         ret = gf_thread_create(&priv->signth, NULL, br_stub_signth, this,
190130
                                "brssign");
190130
-        if (ret != 0)
190130
+        if (ret != 0) {
190130
+            gf_msg(this->name, GF_LOG_WARNING, 0,
190130
+                   BRS_MSG_SPAWN_SIGN_THRD_FAILED,
190130
+                   "failed to create the new thread for signer");
190130
             goto err;
190130
+        }
190130
 
190130
         ret = br_stub_bad_object_container_init(this, priv);
190130
         if (ret) {
190130
@@ -232,8 +248,11 @@ reconfigure(xlator_t *this, dict_t *options)
190130
                 gf_msg(this->name, GF_LOG_ERROR, 0,
190130
                        BRS_MSG_CANCEL_SIGN_THREAD_FAILED,
190130
                        "Could not cancel sign serializer thread");
190130
+            } else {
190130
+                gf_msg(this->name, GF_LOG_INFO, 0, BRS_MSG_KILL_SIGN_THREAD,
190130
+                       "killed the signer thread");
190130
+                priv->signth = 0;
190130
             }
190130
-            priv->signth = 0;
190130
         }
190130
 
190130
         if (priv->container.thread) {
190130
@@ -902,6 +921,24 @@ br_stub_signth(void *arg)
190130
 
190130
     THIS = this;
190130
     while (1) {
190130
+        /*
190130
+         * Disabling bit-rot feature leads to this particular thread
190130
+         * getting cleaned up by reconfigure via a call to the function
190130
+         * gf_thread_cleanup_xint (which in turn calls pthread_cancel
190130
+         * and pthread_join). But, if this thread had held the mutex
190130
+         * &priv->lock at the time of cancellation, then it leads to
190130
+         * deadlock in future when bit-rot feature is enabled (which
190130
+         * again spawns this thread which cant hold the lock as the
190130
+         * mutex is still held by the previous instance of the thread
190130
+         * which got killed). Also, the br_stub_handle_object_signature
190130
+         * function which is called whenever file has to be signed
190130
+         * also gets blocked as it too attempts to acquire &priv->lock.
190130
+         *
190130
+         * So, arrange for the lock to be unlocked as part of the
190130
+         * cleanup of this thread using pthread_cleanup_push and
190130
+         * pthread_cleanup_pop.
190130
+         */
190130
+        pthread_cleanup_push(br_stub_lock_cleaner, &priv->lock);
190130
         pthread_mutex_lock(&priv->lock);
190130
         {
190130
             while (list_empty(&priv->squeue))
190130
@@ -912,6 +949,7 @@ br_stub_signth(void *arg)
190130
             list_del_init(&sigstub->list);
190130
         }
190130
         pthread_mutex_unlock(&priv->lock);
190130
+        pthread_cleanup_pop(0);
190130
 
190130
         call_resume(sigstub->stub);
190130
 
190130
@@ -1042,12 +1080,22 @@ br_stub_handle_object_signature(call_frame_t *frame, xlator_t *this, fd_t *fd,
190130
 
190130
     priv = this->private;
190130
 
190130
-    if (frame->root->pid != GF_CLIENT_PID_BITD)
190130
+    if (frame->root->pid != GF_CLIENT_PID_BITD) {
190130
+        gf_msg(this->name, GF_LOG_WARNING, op_errno, BRS_MSG_NON_BITD_PID,
190130
+               "PID %d from where signature request"
190130
+               "came, does not belong to bit-rot daemon."
190130
+               "Unwinding the fop",
190130
+               frame->root->pid);
190130
         goto dofop;
190130
+    }
190130
 
190130
     ret = br_stub_prepare_signature(this, dict, fd->inode, sign, &fakesuccess);
190130
-    if (ret)
190130
+    if (ret) {
190130
+        gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SIGN_PREPARE_FAIL,
190130
+               "failed to prepare the signature for %s. Unwinding the fop",
190130
+               uuid_utoa(fd->inode->gfid));
190130
         goto dofop;
190130
+    }
190130
     if (fakesuccess) {
190130
         op_ret = op_errno = 0;
190130
         goto dofop;
190130
@@ -1387,6 +1435,8 @@ br_stub_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
190130
     /* object signature request */
190130
     ret = dict_get_bin(dict, GLUSTERFS_SET_OBJECT_SIGNATURE, (void **)&sign;;
190130
     if (!ret) {
190130
+        gf_msg_debug(this->name, 0, "got SIGNATURE request on %s",
190130
+                     uuid_utoa(fd->inode->gfid));
190130
         br_stub_handle_object_signature(frame, this, fd, dict, sign, xdata);
190130
         goto done;
190130
     }
190130
-- 
190130
1.8.3.1
190130