9ae3f9
From ed73f2046dd3fbb22341bf9fc004087d90dfbe6d Mon Sep 17 00:00:00 2001
9ae3f9
From: Raghavendra Bhat <raghavendra@redhat.com>
9ae3f9
Date: Mon, 15 Apr 2019 14:09:34 -0400
9ae3f9
Subject: [PATCH 458/465] features/bit-rot-stub: clean the mutex after
9ae3f9
 cancelling the signer thread
9ae3f9
9ae3f9
When bit-rot feature is disabled, the signer thread from the bit-rot-stub
9ae3f9
xlator (the thread which performs the setxattr of the signature on to the
9ae3f9
disk) is cancelled. But, if the cancelled signer thread had already held
9ae3f9
the mutex (&priv->lock) which it uses to monitor the queue of files to
9ae3f9
be signed, then the mutex is never released. This creates problems in
9ae3f9
future when the feature is enabled again. Both the new instance of the
9ae3f9
signer thread and the regular thread which enqueues the files to be
9ae3f9
signed will be blocked on this mutex.
9ae3f9
9ae3f9
So, as part of cancelling the signer thread, unlock the mutex associated
9ae3f9
with it as well using pthread_cleanup_push and pthread_cleanup_pop.
9ae3f9
9ae3f9
Upstream patch:
9ae3f9
	> patch: https://review.gluster.org/22572
9ae3f9
	> fixes: #bz1700078
9ae3f9
	> Change-Id: Ib761910caed90b268e69794ddeb108165487af40
9ae3f9
9ae3f9
Change-Id: Ib761910caed90b268e69794ddeb108165487af40
9ae3f9
BUG: 1851424
9ae3f9
Signed-off-by: Raghavendra M <raghavendra@redhat.com>
9ae3f9
Reviewed-on: https://code.engineering.redhat.com/gerrit/208304
9ae3f9
Tested-by: RHGS Build Bot <nigelb@redhat.com>
9ae3f9
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
9ae3f9
---
9ae3f9
 .../bit-rot/src/stub/bit-rot-stub-messages.h       |  4 +-
9ae3f9
 xlators/features/bit-rot/src/stub/bit-rot-stub.c   | 62 +++++++++++++++++++---
9ae3f9
 2 files changed, 59 insertions(+), 7 deletions(-)
9ae3f9
9ae3f9
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h
9ae3f9
index 7f07f29..155802b 100644
9ae3f9
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h
9ae3f9
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h
9ae3f9
@@ -39,6 +39,8 @@ GLFS_MSGID(BITROT_STUB, BRS_MSG_NO_MEMORY, BRS_MSG_SET_EVENT_FAILED,
9ae3f9
            BRS_MSG_BAD_HANDLE_DIR_NULL, BRS_MSG_BAD_OBJ_THREAD_FAIL,
9ae3f9
            BRS_MSG_BAD_OBJ_DIR_CLOSE_FAIL, BRS_MSG_LINK_FAIL,
9ae3f9
            BRS_MSG_BAD_OBJ_UNLINK_FAIL, BRS_MSG_DICT_SET_FAILED,
9ae3f9
-           BRS_MSG_PATH_GET_FAILED, BRS_MSG_NULL_LOCAL);
9ae3f9
+           BRS_MSG_PATH_GET_FAILED, BRS_MSG_NULL_LOCAL,
9ae3f9
+           BRS_MSG_SPAWN_SIGN_THRD_FAILED, BRS_MSG_KILL_SIGN_THREAD,
9ae3f9
+           BRS_MSG_NON_BITD_PID, BRS_MSG_SIGN_PREPARE_FAIL);
9ae3f9
 
9ae3f9
 #endif /* !_BITROT_STUB_MESSAGES_H_ */
9ae3f9
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.c b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
9ae3f9
index 3f48a4b..c3f81bc 100644
9ae3f9
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub.c
9ae3f9
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
9ae3f9
@@ -26,6 +26,15 @@
9ae3f9
 
9ae3f9
 #define BR_STUB_REQUEST_COOKIE 0x1
9ae3f9
 
9ae3f9
+void
9ae3f9
+br_stub_lock_cleaner(void *arg)
9ae3f9
+{
9ae3f9
+    pthread_mutex_t *clean_mutex = arg;
9ae3f9
+
9ae3f9
+    pthread_mutex_unlock(clean_mutex);
9ae3f9
+    return;
9ae3f9
+}
9ae3f9
+
9ae3f9
 void *
9ae3f9
 br_stub_signth(void *);
9ae3f9
 
9ae3f9
@@ -166,8 +175,11 @@ init(xlator_t *this)
9ae3f9
 
9ae3f9
     ret = gf_thread_create(&priv->signth, NULL, br_stub_signth, this,
9ae3f9
                            "brssign");
9ae3f9
-    if (ret != 0)
9ae3f9
+    if (ret != 0) {
9ae3f9
+        gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SPAWN_SIGN_THRD_FAILED,
9ae3f9
+               "failed to create the new thread for signer");
9ae3f9
         goto cleanup_lock;
9ae3f9
+    }
9ae3f9
 
9ae3f9
     ret = br_stub_bad_object_container_init(this, priv);
9ae3f9
     if (ret) {
9ae3f9
@@ -214,11 +226,15 @@ reconfigure(xlator_t *this, dict_t *options)
9ae3f9
     priv = this->private;
9ae3f9
 
9ae3f9
     GF_OPTION_RECONF("bitrot", priv->do_versioning, options, bool, err);
9ae3f9
-    if (priv->do_versioning) {
9ae3f9
+    if (priv->do_versioning && !priv->signth) {
9ae3f9
         ret = gf_thread_create(&priv->signth, NULL, br_stub_signth, this,
9ae3f9
                                "brssign");
9ae3f9
-        if (ret != 0)
9ae3f9
+        if (ret != 0) {
9ae3f9
+            gf_msg(this->name, GF_LOG_WARNING, 0,
9ae3f9
+                   BRS_MSG_SPAWN_SIGN_THRD_FAILED,
9ae3f9
+                   "failed to create the new thread for signer");
9ae3f9
             goto err;
9ae3f9
+        }
9ae3f9
 
9ae3f9
         ret = br_stub_bad_object_container_init(this, priv);
9ae3f9
         if (ret) {
9ae3f9
@@ -232,8 +248,11 @@ reconfigure(xlator_t *this, dict_t *options)
9ae3f9
                 gf_msg(this->name, GF_LOG_ERROR, 0,
9ae3f9
                        BRS_MSG_CANCEL_SIGN_THREAD_FAILED,
9ae3f9
                        "Could not cancel sign serializer thread");
9ae3f9
+            } else {
9ae3f9
+                gf_msg(this->name, GF_LOG_INFO, 0, BRS_MSG_KILL_SIGN_THREAD,
9ae3f9
+                       "killed the signer thread");
9ae3f9
+                priv->signth = 0;
9ae3f9
             }
9ae3f9
-            priv->signth = 0;
9ae3f9
         }
9ae3f9
 
9ae3f9
         if (priv->container.thread) {
9ae3f9
@@ -902,6 +921,24 @@ br_stub_signth(void *arg)
9ae3f9
 
9ae3f9
     THIS = this;
9ae3f9
     while (1) {
9ae3f9
+        /*
9ae3f9
+         * Disabling bit-rot feature leads to this particular thread
9ae3f9
+         * getting cleaned up by reconfigure via a call to the function
9ae3f9
+         * gf_thread_cleanup_xint (which in turn calls pthread_cancel
9ae3f9
+         * and pthread_join). But, if this thread had held the mutex
9ae3f9
+         * &priv->lock at the time of cancellation, then it leads to
9ae3f9
+         * deadlock in future when bit-rot feature is enabled (which
9ae3f9
+         * again spawns this thread which cant hold the lock as the
9ae3f9
+         * mutex is still held by the previous instance of the thread
9ae3f9
+         * which got killed). Also, the br_stub_handle_object_signature
9ae3f9
+         * function which is called whenever file has to be signed
9ae3f9
+         * also gets blocked as it too attempts to acquire &priv->lock.
9ae3f9
+         *
9ae3f9
+         * So, arrange for the lock to be unlocked as part of the
9ae3f9
+         * cleanup of this thread using pthread_cleanup_push and
9ae3f9
+         * pthread_cleanup_pop.
9ae3f9
+         */
9ae3f9
+        pthread_cleanup_push(br_stub_lock_cleaner, &priv->lock);
9ae3f9
         pthread_mutex_lock(&priv->lock);
9ae3f9
         {
9ae3f9
             while (list_empty(&priv->squeue))
9ae3f9
@@ -912,6 +949,7 @@ br_stub_signth(void *arg)
9ae3f9
             list_del_init(&sigstub->list);
9ae3f9
         }
9ae3f9
         pthread_mutex_unlock(&priv->lock);
9ae3f9
+        pthread_cleanup_pop(0);
9ae3f9
 
9ae3f9
         call_resume(sigstub->stub);
9ae3f9
 
9ae3f9
@@ -1042,12 +1080,22 @@ br_stub_handle_object_signature(call_frame_t *frame, xlator_t *this, fd_t *fd,
9ae3f9
 
9ae3f9
     priv = this->private;
9ae3f9
 
9ae3f9
-    if (frame->root->pid != GF_CLIENT_PID_BITD)
9ae3f9
+    if (frame->root->pid != GF_CLIENT_PID_BITD) {
9ae3f9
+        gf_msg(this->name, GF_LOG_WARNING, op_errno, BRS_MSG_NON_BITD_PID,
9ae3f9
+               "PID %d from where signature request"
9ae3f9
+               "came, does not belong to bit-rot daemon."
9ae3f9
+               "Unwinding the fop",
9ae3f9
+               frame->root->pid);
9ae3f9
         goto dofop;
9ae3f9
+    }
9ae3f9
 
9ae3f9
     ret = br_stub_prepare_signature(this, dict, fd->inode, sign, &fakesuccess);
9ae3f9
-    if (ret)
9ae3f9
+    if (ret) {
9ae3f9
+        gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SIGN_PREPARE_FAIL,
9ae3f9
+               "failed to prepare the signature for %s. Unwinding the fop",
9ae3f9
+               uuid_utoa(fd->inode->gfid));
9ae3f9
         goto dofop;
9ae3f9
+    }
9ae3f9
     if (fakesuccess) {
9ae3f9
         op_ret = op_errno = 0;
9ae3f9
         goto dofop;
9ae3f9
@@ -1387,6 +1435,8 @@ br_stub_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
9ae3f9
     /* object signature request */
9ae3f9
     ret = dict_get_bin(dict, GLUSTERFS_SET_OBJECT_SIGNATURE, (void **)&sign;;
9ae3f9
     if (!ret) {
9ae3f9
+        gf_msg_debug(this->name, 0, "got SIGNATURE request on %s",
9ae3f9
+                     uuid_utoa(fd->inode->gfid));
9ae3f9
         br_stub_handle_object_signature(frame, this, fd, dict, sign, xdata);
9ae3f9
         goto done;
9ae3f9
     }
9ae3f9
-- 
9ae3f9
1.8.3.1
9ae3f9