256ebe
From cca418b78ec976aa69eacd56b0e6127ea7e3dd26 Mon Sep 17 00:00:00 2001
256ebe
From: Pranith Kumar K <pkarampu@redhat.com>
256ebe
Date: Thu, 4 Apr 2019 15:31:56 +0530
256ebe
Subject: [PATCH 095/124] cluster/afr: Remove local from owners_list on failure
256ebe
 of lock-acquisition
256ebe
256ebe
	Backport of https://review.gluster.org/c/glusterfs/+/22515
256ebe
256ebe
When eager-lock lock acquisition fails because of say network failures, the
256ebe
local is not being removed from owners_list, this leads to accumulation of
256ebe
waiting frames and the application will hang because the waiting frames are
256ebe
under the assumption that another transaction is in the process of acquiring
256ebe
lock because owner-list is not empty. Handled this case as well in this patch.
256ebe
Added asserts to make it easier to find these problems in future.
256ebe
256ebe
Change-Id: I3101393265e9827755725b1f2d94a93d8709e923
256ebe
fixes: bz#1688395
256ebe
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
256ebe
Reviewed-on: https://code.engineering.redhat.com/gerrit/167859
256ebe
Tested-by: RHGS Build Bot <nigelb@redhat.com>
256ebe
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
256ebe
---
256ebe
 tests/bugs/replicate/bug-1696599-io-hang.t | 47 ++++++++++++++++++++++++++++++
256ebe
 xlators/cluster/afr/src/afr-common.c       |  8 ++---
256ebe
 xlators/cluster/afr/src/afr-lk-common.c    |  1 -
256ebe
 xlators/cluster/afr/src/afr-transaction.c  | 19 +++++-------
256ebe
 xlators/cluster/afr/src/afr.h              |  4 +--
256ebe
 5 files changed, 61 insertions(+), 18 deletions(-)
256ebe
 create mode 100755 tests/bugs/replicate/bug-1696599-io-hang.t
256ebe
256ebe
diff --git a/tests/bugs/replicate/bug-1696599-io-hang.t b/tests/bugs/replicate/bug-1696599-io-hang.t
256ebe
new file mode 100755
256ebe
index 0000000..869cdb9
256ebe
--- /dev/null
256ebe
+++ b/tests/bugs/replicate/bug-1696599-io-hang.t
256ebe
@@ -0,0 +1,47 @@
256ebe
+#!/bin/bash
256ebe
+
256ebe
+. $(dirname $0)/../../include.rc
256ebe
+. $(dirname $0)/../../volume.rc
256ebe
+. $(dirname $0)/../../fileio.rc
256ebe
+
256ebe
+#Tests that local structures in afr are removed from granted/blocked list of
256ebe
+#locks when inodelk fails on all bricks
256ebe
+
256ebe
+cleanup;
256ebe
+
256ebe
+TEST glusterd
256ebe
+TEST pidof glusterd
256ebe
+
256ebe
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1..3}
256ebe
+TEST $CLI volume set $V0 performance.quick-read off
256ebe
+TEST $CLI volume set $V0 performance.write-behind off
256ebe
+TEST $CLI volume set $V0 performance.io-cache off
256ebe
+TEST $CLI volume set $V0 performance.stat-prefetch off
256ebe
+TEST $CLI volume set $V0 performance.client-io-threads off
256ebe
+TEST $CLI volume set $V0 delay-gen locks
256ebe
+TEST $CLI volume set $V0 delay-gen.delay-duration 5000000
256ebe
+TEST $CLI volume set $V0 delay-gen.delay-percentage 100
256ebe
+TEST $CLI volume set $V0 delay-gen.enable finodelk
256ebe
+
256ebe
+TEST $CLI volume start $V0
256ebe
+EXPECT 'Started' volinfo_field $V0 'Status'
256ebe
+
256ebe
+TEST $GFS -s $H0 --volfile-id $V0 $M0
256ebe
+TEST touch $M0/file
256ebe
+#Trigger write and stop bricks so inodelks fail on all bricks leading to
256ebe
+#lock failure condition
256ebe
+echo abc >> $M0/file &
256ebe
+
256ebe
+TEST $CLI volume stop $V0
256ebe
+TEST $CLI volume reset $V0 delay-gen
256ebe
+wait
256ebe
+TEST $CLI volume start $V0
256ebe
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
256ebe
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1
256ebe
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 2
256ebe
+#Test that only one write succeeded, this tests that delay-gen worked as
256ebe
+#expected
256ebe
+echo abc >> $M0/file
256ebe
+EXPECT "abc" cat $M0/file
256ebe
+
256ebe
+cleanup;
256ebe
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
256ebe
index 45b96e3..47a5d3a 100644
256ebe
--- a/xlators/cluster/afr/src/afr-common.c
256ebe
+++ b/xlators/cluster/afr/src/afr-common.c
256ebe
@@ -5763,6 +5763,10 @@ afr_transaction_local_init(afr_local_t *local, xlator_t *this)
256ebe
     afr_private_t *priv = NULL;
256ebe
 
256ebe
     priv = this->private;
256ebe
+    INIT_LIST_HEAD(&local->transaction.wait_list);
256ebe
+    INIT_LIST_HEAD(&local->transaction.owner_list);
256ebe
+    INIT_LIST_HEAD(&local->ta_waitq);
256ebe
+    INIT_LIST_HEAD(&local->ta_onwireq);
256ebe
     ret = afr_internal_lock_init(&local->internal_lock, priv->child_count);
256ebe
     if (ret < 0)
256ebe
         goto out;
256ebe
@@ -5800,10 +5804,6 @@ afr_transaction_local_init(afr_local_t *local, xlator_t *this)
256ebe
         goto out;
256ebe
 
256ebe
     ret = 0;
256ebe
-    INIT_LIST_HEAD(&local->transaction.wait_list);
256ebe
-    INIT_LIST_HEAD(&local->transaction.owner_list);
256ebe
-    INIT_LIST_HEAD(&local->ta_waitq);
256ebe
-    INIT_LIST_HEAD(&local->ta_onwireq);
256ebe
 out:
256ebe
     return ret;
256ebe
 }
256ebe
diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c
256ebe
index 4091671..bc8eabe 100644
256ebe
--- a/xlators/cluster/afr/src/afr-lk-common.c
256ebe
+++ b/xlators/cluster/afr/src/afr-lk-common.c
256ebe
@@ -397,7 +397,6 @@ afr_unlock_now(call_frame_t *frame, xlator_t *this)
256ebe
     int_lock->lk_call_count = call_count;
256ebe
 
256ebe
     if (!call_count) {
256ebe
-        GF_ASSERT(!local->transaction.do_eager_unlock);
256ebe
         gf_msg_trace(this->name, 0, "No internal locks unlocked");
256ebe
         int_lock->lock_cbk(frame, this);
256ebe
         goto out;
256ebe
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
256ebe
index 229820b..15f3a7e 100644
256ebe
--- a/xlators/cluster/afr/src/afr-transaction.c
256ebe
+++ b/xlators/cluster/afr/src/afr-transaction.c
256ebe
@@ -372,6 +372,8 @@ afr_transaction_done(call_frame_t *frame, xlator_t *this)
256ebe
     }
256ebe
     local->transaction.unwind(frame, this);
256ebe
 
256ebe
+    GF_ASSERT(list_empty(&local->transaction.owner_list));
256ebe
+    GF_ASSERT(list_empty(&local->transaction.wait_list));
256ebe
     AFR_STACK_DESTROY(frame);
256ebe
 
256ebe
     return 0;
256ebe
@@ -393,7 +395,7 @@ afr_lock_fail_shared(afr_local_t *local, struct list_head *list)
256ebe
 }
256ebe
 
256ebe
 static void
256ebe
-afr_handle_lock_acquire_failure(afr_local_t *local, gf_boolean_t locked)
256ebe
+afr_handle_lock_acquire_failure(afr_local_t *local)
256ebe
 {
256ebe
     struct list_head shared;
256ebe
     afr_lock_t *lock = NULL;
256ebe
@@ -414,13 +416,8 @@ afr_handle_lock_acquire_failure(afr_local_t *local, gf_boolean_t locked)
256ebe
     afr_lock_fail_shared(local, &shared);
256ebe
     local->transaction.do_eager_unlock = _gf_true;
256ebe
 out:
256ebe
-    if (locked) {
256ebe
-        local->internal_lock.lock_cbk = afr_transaction_done;
256ebe
-        afr_unlock(local->transaction.frame, local->transaction.frame->this);
256ebe
-    } else {
256ebe
-        afr_transaction_done(local->transaction.frame,
256ebe
-                             local->transaction.frame->this);
256ebe
-    }
256ebe
+    local->internal_lock.lock_cbk = afr_transaction_done;
256ebe
+    afr_unlock(local->transaction.frame, local->transaction.frame->this);
256ebe
 }
256ebe
 
256ebe
 call_frame_t *
256ebe
@@ -619,7 +616,7 @@ afr_transaction_perform_fop(call_frame_t *frame, xlator_t *this)
256ebe
     failure_count = AFR_COUNT(local->transaction.failed_subvols,
256ebe
                               priv->child_count);
256ebe
     if (failure_count == priv->child_count) {
256ebe
-        afr_handle_lock_acquire_failure(local, _gf_true);
256ebe
+        afr_handle_lock_acquire_failure(local);
256ebe
         return 0;
256ebe
     } else {
256ebe
         lock = &local->inode_ctx->lock[local->transaction.type];
256ebe
@@ -2092,7 +2089,7 @@ err:
256ebe
     local->op_ret = -1;
256ebe
     local->op_errno = op_errno;
256ebe
 
256ebe
-    afr_handle_lock_acquire_failure(local, _gf_true);
256ebe
+    afr_handle_lock_acquire_failure(local);
256ebe
 
256ebe
     if (xdata_req)
256ebe
         dict_unref(xdata_req);
256ebe
@@ -2361,7 +2358,7 @@ afr_internal_lock_finish(call_frame_t *frame, xlator_t *this)
256ebe
     } else {
256ebe
         lock = &local->inode_ctx->lock[local->transaction.type];
256ebe
         if (local->internal_lock.lock_op_ret < 0) {
256ebe
-            afr_handle_lock_acquire_failure(local, _gf_false);
256ebe
+            afr_handle_lock_acquire_failure(local);
256ebe
         } else {
256ebe
             lock->event_generation = local->event_generation;
256ebe
             afr_changelog_pre_op(frame, this);
256ebe
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
256ebe
index 2cc3797..e731cfa 100644
256ebe
--- a/xlators/cluster/afr/src/afr.h
256ebe
+++ b/xlators/cluster/afr/src/afr.h
256ebe
@@ -1091,8 +1091,8 @@ afr_cleanup_fd_ctx(xlator_t *this, fd_t *fd);
256ebe
 #define AFR_FRAME_INIT(frame, op_errno)                                        \
256ebe
     ({                                                                         \
256ebe
         frame->local = mem_get0(THIS->local_pool);                             \
256ebe
-        if (afr_local_init(frame->local, THIS->private, &op_errno)) {          \
256ebe
-            afr_local_cleanup(frame->local, THIS);                             \
256ebe
+        if (afr_local_init(frame->local, frame->this->private, &op_errno)) {   \
256ebe
+            afr_local_cleanup(frame->local, frame->this);                      \
256ebe
             mem_put(frame->local);                                             \
256ebe
             frame->local = NULL;                                               \
256ebe
         };                                                                     \
256ebe
-- 
256ebe
1.8.3.1
256ebe