14f8ab
From ed73f2046dd3fbb22341bf9fc004087d90dfbe6d Mon Sep 17 00:00:00 2001
14f8ab
From: Raghavendra Bhat <raghavendra@redhat.com>
14f8ab
Date: Mon, 15 Apr 2019 14:09:34 -0400
14f8ab
Subject: [PATCH 458/465] features/bit-rot-stub: clean the mutex after
14f8ab
 cancelling the signer thread
14f8ab
14f8ab
When bit-rot feature is disabled, the signer thread from the bit-rot-stub
14f8ab
xlator (the thread which performs the setxattr of the signature on to the
14f8ab
disk) is cancelled. But, if the cancelled signer thread had already held
14f8ab
the mutex (&priv->lock) which it uses to monitor the queue of files to
14f8ab
be signed, then the mutex is never released. This creates problems in
14f8ab
future when the feature is enabled again. Both the new instance of the
14f8ab
signer thread and the regular thread which enqueues the files to be
14f8ab
signed will be blocked on this mutex.
14f8ab
14f8ab
So, as part of cancelling the signer thread, unlock the mutex associated
14f8ab
with it as well using pthread_cleanup_push and pthread_cleanup_pop.
14f8ab
14f8ab
Upstream patch:
14f8ab
	> patch: https://review.gluster.org/22572
14f8ab
	> fixes: #bz1700078
14f8ab
	> Change-Id: Ib761910caed90b268e69794ddeb108165487af40
14f8ab
14f8ab
Change-Id: Ib761910caed90b268e69794ddeb108165487af40
14f8ab
BUG: 1851424
14f8ab
Signed-off-by: Raghavendra M <raghavendra@redhat.com>
14f8ab
Reviewed-on: https://code.engineering.redhat.com/gerrit/208304
14f8ab
Tested-by: RHGS Build Bot <nigelb@redhat.com>
14f8ab
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
14f8ab
---
14f8ab
 .../bit-rot/src/stub/bit-rot-stub-messages.h       |  4 +-
14f8ab
 xlators/features/bit-rot/src/stub/bit-rot-stub.c   | 62 +++++++++++++++++++---
14f8ab
 2 files changed, 59 insertions(+), 7 deletions(-)
14f8ab
14f8ab
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h
14f8ab
index 7f07f29..155802b 100644
14f8ab
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h
14f8ab
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h
14f8ab
@@ -39,6 +39,8 @@ GLFS_MSGID(BITROT_STUB, BRS_MSG_NO_MEMORY, BRS_MSG_SET_EVENT_FAILED,
14f8ab
            BRS_MSG_BAD_HANDLE_DIR_NULL, BRS_MSG_BAD_OBJ_THREAD_FAIL,
14f8ab
            BRS_MSG_BAD_OBJ_DIR_CLOSE_FAIL, BRS_MSG_LINK_FAIL,
14f8ab
            BRS_MSG_BAD_OBJ_UNLINK_FAIL, BRS_MSG_DICT_SET_FAILED,
14f8ab
-           BRS_MSG_PATH_GET_FAILED, BRS_MSG_NULL_LOCAL);
14f8ab
+           BRS_MSG_PATH_GET_FAILED, BRS_MSG_NULL_LOCAL,
14f8ab
+           BRS_MSG_SPAWN_SIGN_THRD_FAILED, BRS_MSG_KILL_SIGN_THREAD,
14f8ab
+           BRS_MSG_NON_BITD_PID, BRS_MSG_SIGN_PREPARE_FAIL);
14f8ab
 
14f8ab
 #endif /* !_BITROT_STUB_MESSAGES_H_ */
14f8ab
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.c b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
14f8ab
index 3f48a4b..c3f81bc 100644
14f8ab
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub.c
14f8ab
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
14f8ab
@@ -26,6 +26,15 @@
14f8ab
 
14f8ab
 #define BR_STUB_REQUEST_COOKIE 0x1
14f8ab
 
14f8ab
+void
14f8ab
+br_stub_lock_cleaner(void *arg)
14f8ab
+{
14f8ab
+    pthread_mutex_t *clean_mutex = arg;
14f8ab
+
14f8ab
+    pthread_mutex_unlock(clean_mutex);
14f8ab
+    return;
14f8ab
+}
14f8ab
+
14f8ab
 void *
14f8ab
 br_stub_signth(void *);
14f8ab
 
14f8ab
@@ -166,8 +175,11 @@ init(xlator_t *this)
14f8ab
 
14f8ab
     ret = gf_thread_create(&priv->signth, NULL, br_stub_signth, this,
14f8ab
                            "brssign");
14f8ab
-    if (ret != 0)
14f8ab
+    if (ret != 0) {
14f8ab
+        gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SPAWN_SIGN_THRD_FAILED,
14f8ab
+               "failed to create the new thread for signer");
14f8ab
         goto cleanup_lock;
14f8ab
+    }
14f8ab
 
14f8ab
     ret = br_stub_bad_object_container_init(this, priv);
14f8ab
     if (ret) {
14f8ab
@@ -214,11 +226,15 @@ reconfigure(xlator_t *this, dict_t *options)
14f8ab
     priv = this->private;
14f8ab
 
14f8ab
     GF_OPTION_RECONF("bitrot", priv->do_versioning, options, bool, err);
14f8ab
-    if (priv->do_versioning) {
14f8ab
+    if (priv->do_versioning && !priv->signth) {
14f8ab
         ret = gf_thread_create(&priv->signth, NULL, br_stub_signth, this,
14f8ab
                                "brssign");
14f8ab
-        if (ret != 0)
14f8ab
+        if (ret != 0) {
14f8ab
+            gf_msg(this->name, GF_LOG_WARNING, 0,
14f8ab
+                   BRS_MSG_SPAWN_SIGN_THRD_FAILED,
14f8ab
+                   "failed to create the new thread for signer");
14f8ab
             goto err;
14f8ab
+        }
14f8ab
 
14f8ab
         ret = br_stub_bad_object_container_init(this, priv);
14f8ab
         if (ret) {
14f8ab
@@ -232,8 +248,11 @@ reconfigure(xlator_t *this, dict_t *options)
14f8ab
                 gf_msg(this->name, GF_LOG_ERROR, 0,
14f8ab
                        BRS_MSG_CANCEL_SIGN_THREAD_FAILED,
14f8ab
                        "Could not cancel sign serializer thread");
14f8ab
+            } else {
14f8ab
+                gf_msg(this->name, GF_LOG_INFO, 0, BRS_MSG_KILL_SIGN_THREAD,
14f8ab
+                       "killed the signer thread");
14f8ab
+                priv->signth = 0;
14f8ab
             }
14f8ab
-            priv->signth = 0;
14f8ab
         }
14f8ab
 
14f8ab
         if (priv->container.thread) {
14f8ab
@@ -902,6 +921,24 @@ br_stub_signth(void *arg)
14f8ab
 
14f8ab
     THIS = this;
14f8ab
     while (1) {
14f8ab
+        /*
14f8ab
+         * Disabling bit-rot feature leads to this particular thread
14f8ab
+         * getting cleaned up by reconfigure via a call to the function
14f8ab
+         * gf_thread_cleanup_xint (which in turn calls pthread_cancel
14f8ab
+         * and pthread_join). But, if this thread had held the mutex
14f8ab
+         * &priv->lock at the time of cancellation, then it leads to
14f8ab
+         * deadlock in future when bit-rot feature is enabled (which
14f8ab
+         * again spawns this thread which cant hold the lock as the
14f8ab
+         * mutex is still held by the previous instance of the thread
14f8ab
+         * which got killed). Also, the br_stub_handle_object_signature
14f8ab
+         * function which is called whenever file has to be signed
14f8ab
+         * also gets blocked as it too attempts to acquire &priv->lock.
14f8ab
+         *
14f8ab
+         * So, arrange for the lock to be unlocked as part of the
14f8ab
+         * cleanup of this thread using pthread_cleanup_push and
14f8ab
+         * pthread_cleanup_pop.
14f8ab
+         */
14f8ab
+        pthread_cleanup_push(br_stub_lock_cleaner, &priv->lock);
14f8ab
         pthread_mutex_lock(&priv->lock);
14f8ab
         {
14f8ab
             while (list_empty(&priv->squeue))
14f8ab
@@ -912,6 +949,7 @@ br_stub_signth(void *arg)
14f8ab
             list_del_init(&sigstub->list);
14f8ab
         }
14f8ab
         pthread_mutex_unlock(&priv->lock);
14f8ab
+        pthread_cleanup_pop(0);
14f8ab
 
14f8ab
         call_resume(sigstub->stub);
14f8ab
 
14f8ab
@@ -1042,12 +1080,22 @@ br_stub_handle_object_signature(call_frame_t *frame, xlator_t *this, fd_t *fd,
14f8ab
 
14f8ab
     priv = this->private;
14f8ab
 
14f8ab
-    if (frame->root->pid != GF_CLIENT_PID_BITD)
14f8ab
+    if (frame->root->pid != GF_CLIENT_PID_BITD) {
14f8ab
+        gf_msg(this->name, GF_LOG_WARNING, op_errno, BRS_MSG_NON_BITD_PID,
14f8ab
+               "PID %d from where signature request"
14f8ab
+               "came, does not belong to bit-rot daemon."
14f8ab
+               "Unwinding the fop",
14f8ab
+               frame->root->pid);
14f8ab
         goto dofop;
14f8ab
+    }
14f8ab
 
14f8ab
     ret = br_stub_prepare_signature(this, dict, fd->inode, sign, &fakesuccess);
14f8ab
-    if (ret)
14f8ab
+    if (ret) {
14f8ab
+        gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SIGN_PREPARE_FAIL,
14f8ab
+               "failed to prepare the signature for %s. Unwinding the fop",
14f8ab
+               uuid_utoa(fd->inode->gfid));
14f8ab
         goto dofop;
14f8ab
+    }
14f8ab
     if (fakesuccess) {
14f8ab
         op_ret = op_errno = 0;
14f8ab
         goto dofop;
14f8ab
@@ -1387,6 +1435,8 @@ br_stub_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
14f8ab
     /* object signature request */
14f8ab
     ret = dict_get_bin(dict, GLUSTERFS_SET_OBJECT_SIGNATURE, (void **)&sign;;
14f8ab
     if (!ret) {
14f8ab
+        gf_msg_debug(this->name, 0, "got SIGNATURE request on %s",
14f8ab
+                     uuid_utoa(fd->inode->gfid));
14f8ab
         br_stub_handle_object_signature(frame, this, fd, dict, sign, xdata);
14f8ab
         goto done;
14f8ab
     }
14f8ab
-- 
14f8ab
1.8.3.1
14f8ab