e3c68b
From cca418b78ec976aa69eacd56b0e6127ea7e3dd26 Mon Sep 17 00:00:00 2001
e3c68b
From: Pranith Kumar K <pkarampu@redhat.com>
e3c68b
Date: Thu, 4 Apr 2019 15:31:56 +0530
e3c68b
Subject: [PATCH 095/124] cluster/afr: Remove local from owners_list on failure
e3c68b
 of lock-acquisition
e3c68b
e3c68b
	Backport of https://review.gluster.org/c/glusterfs/+/22515
e3c68b
e3c68b
When eager-lock lock acquisition fails because of say network failures, the
e3c68b
local is not being removed from owners_list, this leads to accumulation of
e3c68b
waiting frames and the application will hang because the waiting frames are
e3c68b
under the assumption that another transaction is in the process of acquiring
e3c68b
lock because owner-list is not empty. Handled this case as well in this patch.
e3c68b
Added asserts to make it easier to find these problems in future.
e3c68b
e3c68b
Change-Id: I3101393265e9827755725b1f2d94a93d8709e923
e3c68b
fixes: bz#1688395
e3c68b
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
e3c68b
Reviewed-on: https://code.engineering.redhat.com/gerrit/167859
e3c68b
Tested-by: RHGS Build Bot <nigelb@redhat.com>
e3c68b
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
e3c68b
---
e3c68b
 tests/bugs/replicate/bug-1696599-io-hang.t | 47 ++++++++++++++++++++++++++++++
e3c68b
 xlators/cluster/afr/src/afr-common.c       |  8 ++---
e3c68b
 xlators/cluster/afr/src/afr-lk-common.c    |  1 -
e3c68b
 xlators/cluster/afr/src/afr-transaction.c  | 19 +++++-------
e3c68b
 xlators/cluster/afr/src/afr.h              |  4 +--
e3c68b
 5 files changed, 61 insertions(+), 18 deletions(-)
e3c68b
 create mode 100755 tests/bugs/replicate/bug-1696599-io-hang.t
e3c68b
e3c68b
diff --git a/tests/bugs/replicate/bug-1696599-io-hang.t b/tests/bugs/replicate/bug-1696599-io-hang.t
e3c68b
new file mode 100755
e3c68b
index 0000000..869cdb9
e3c68b
--- /dev/null
e3c68b
+++ b/tests/bugs/replicate/bug-1696599-io-hang.t
e3c68b
@@ -0,0 +1,47 @@
e3c68b
+#!/bin/bash
e3c68b
+
e3c68b
+. $(dirname $0)/../../include.rc
e3c68b
+. $(dirname $0)/../../volume.rc
e3c68b
+. $(dirname $0)/../../fileio.rc
e3c68b
+
e3c68b
+#Tests that local structures in afr are removed from granted/blocked list of
e3c68b
+#locks when inodelk fails on all bricks
e3c68b
+
e3c68b
+cleanup;
e3c68b
+
e3c68b
+TEST glusterd
e3c68b
+TEST pidof glusterd
e3c68b
+
e3c68b
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1..3}
e3c68b
+TEST $CLI volume set $V0 performance.quick-read off
e3c68b
+TEST $CLI volume set $V0 performance.write-behind off
e3c68b
+TEST $CLI volume set $V0 performance.io-cache off
e3c68b
+TEST $CLI volume set $V0 performance.stat-prefetch off
e3c68b
+TEST $CLI volume set $V0 performance.client-io-threads off
e3c68b
+TEST $CLI volume set $V0 delay-gen locks
e3c68b
+TEST $CLI volume set $V0 delay-gen.delay-duration 5000000
e3c68b
+TEST $CLI volume set $V0 delay-gen.delay-percentage 100
e3c68b
+TEST $CLI volume set $V0 delay-gen.enable finodelk
e3c68b
+
e3c68b
+TEST $CLI volume start $V0
e3c68b
+EXPECT 'Started' volinfo_field $V0 'Status'
e3c68b
+
e3c68b
+TEST $GFS -s $H0 --volfile-id $V0 $M0
e3c68b
+TEST touch $M0/file
e3c68b
+#Trigger write and stop bricks so inodelks fail on all bricks leading to
e3c68b
+#lock failure condition
e3c68b
+echo abc >> $M0/file &
e3c68b
+
e3c68b
+TEST $CLI volume stop $V0
e3c68b
+TEST $CLI volume reset $V0 delay-gen
e3c68b
+wait
e3c68b
+TEST $CLI volume start $V0
e3c68b
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
e3c68b
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1
e3c68b
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 2
e3c68b
+#Test that only one write succeeded, this tests that delay-gen worked as
e3c68b
+#expected
e3c68b
+echo abc >> $M0/file
e3c68b
+EXPECT "abc" cat $M0/file
e3c68b
+
e3c68b
+cleanup;
e3c68b
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
e3c68b
index 45b96e3..47a5d3a 100644
e3c68b
--- a/xlators/cluster/afr/src/afr-common.c
e3c68b
+++ b/xlators/cluster/afr/src/afr-common.c
e3c68b
@@ -5763,6 +5763,10 @@ afr_transaction_local_init(afr_local_t *local, xlator_t *this)
e3c68b
     afr_private_t *priv = NULL;
e3c68b
 
e3c68b
     priv = this->private;
e3c68b
+    INIT_LIST_HEAD(&local->transaction.wait_list);
e3c68b
+    INIT_LIST_HEAD(&local->transaction.owner_list);
e3c68b
+    INIT_LIST_HEAD(&local->ta_waitq);
e3c68b
+    INIT_LIST_HEAD(&local->ta_onwireq);
e3c68b
     ret = afr_internal_lock_init(&local->internal_lock, priv->child_count);
e3c68b
     if (ret < 0)
e3c68b
         goto out;
e3c68b
@@ -5800,10 +5804,6 @@ afr_transaction_local_init(afr_local_t *local, xlator_t *this)
e3c68b
         goto out;
e3c68b
 
e3c68b
     ret = 0;
e3c68b
-    INIT_LIST_HEAD(&local->transaction.wait_list);
e3c68b
-    INIT_LIST_HEAD(&local->transaction.owner_list);
e3c68b
-    INIT_LIST_HEAD(&local->ta_waitq);
e3c68b
-    INIT_LIST_HEAD(&local->ta_onwireq);
e3c68b
 out:
e3c68b
     return ret;
e3c68b
 }
e3c68b
diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c
e3c68b
index 4091671..bc8eabe 100644
e3c68b
--- a/xlators/cluster/afr/src/afr-lk-common.c
e3c68b
+++ b/xlators/cluster/afr/src/afr-lk-common.c
e3c68b
@@ -397,7 +397,6 @@ afr_unlock_now(call_frame_t *frame, xlator_t *this)
e3c68b
     int_lock->lk_call_count = call_count;
e3c68b
 
e3c68b
     if (!call_count) {
e3c68b
-        GF_ASSERT(!local->transaction.do_eager_unlock);
e3c68b
         gf_msg_trace(this->name, 0, "No internal locks unlocked");
e3c68b
         int_lock->lock_cbk(frame, this);
e3c68b
         goto out;
e3c68b
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
e3c68b
index 229820b..15f3a7e 100644
e3c68b
--- a/xlators/cluster/afr/src/afr-transaction.c
e3c68b
+++ b/xlators/cluster/afr/src/afr-transaction.c
e3c68b
@@ -372,6 +372,8 @@ afr_transaction_done(call_frame_t *frame, xlator_t *this)
e3c68b
     }
e3c68b
     local->transaction.unwind(frame, this);
e3c68b
 
e3c68b
+    GF_ASSERT(list_empty(&local->transaction.owner_list));
e3c68b
+    GF_ASSERT(list_empty(&local->transaction.wait_list));
e3c68b
     AFR_STACK_DESTROY(frame);
e3c68b
 
e3c68b
     return 0;
e3c68b
@@ -393,7 +395,7 @@ afr_lock_fail_shared(afr_local_t *local, struct list_head *list)
e3c68b
 }
e3c68b
 
e3c68b
 static void
e3c68b
-afr_handle_lock_acquire_failure(afr_local_t *local, gf_boolean_t locked)
e3c68b
+afr_handle_lock_acquire_failure(afr_local_t *local)
e3c68b
 {
e3c68b
     struct list_head shared;
e3c68b
     afr_lock_t *lock = NULL;
e3c68b
@@ -414,13 +416,8 @@ afr_handle_lock_acquire_failure(afr_local_t *local, gf_boolean_t locked)
e3c68b
     afr_lock_fail_shared(local, &shared);
e3c68b
     local->transaction.do_eager_unlock = _gf_true;
e3c68b
 out:
e3c68b
-    if (locked) {
e3c68b
-        local->internal_lock.lock_cbk = afr_transaction_done;
e3c68b
-        afr_unlock(local->transaction.frame, local->transaction.frame->this);
e3c68b
-    } else {
e3c68b
-        afr_transaction_done(local->transaction.frame,
e3c68b
-                             local->transaction.frame->this);
e3c68b
-    }
e3c68b
+    local->internal_lock.lock_cbk = afr_transaction_done;
e3c68b
+    afr_unlock(local->transaction.frame, local->transaction.frame->this);
e3c68b
 }
e3c68b
 
e3c68b
 call_frame_t *
e3c68b
@@ -619,7 +616,7 @@ afr_transaction_perform_fop(call_frame_t *frame, xlator_t *this)
e3c68b
     failure_count = AFR_COUNT(local->transaction.failed_subvols,
e3c68b
                               priv->child_count);
e3c68b
     if (failure_count == priv->child_count) {
e3c68b
-        afr_handle_lock_acquire_failure(local, _gf_true);
e3c68b
+        afr_handle_lock_acquire_failure(local);
e3c68b
         return 0;
e3c68b
     } else {
e3c68b
         lock = &local->inode_ctx->lock[local->transaction.type];
e3c68b
@@ -2092,7 +2089,7 @@ err:
e3c68b
     local->op_ret = -1;
e3c68b
     local->op_errno = op_errno;
e3c68b
 
e3c68b
-    afr_handle_lock_acquire_failure(local, _gf_true);
e3c68b
+    afr_handle_lock_acquire_failure(local);
e3c68b
 
e3c68b
     if (xdata_req)
e3c68b
         dict_unref(xdata_req);
e3c68b
@@ -2361,7 +2358,7 @@ afr_internal_lock_finish(call_frame_t *frame, xlator_t *this)
e3c68b
     } else {
e3c68b
         lock = &local->inode_ctx->lock[local->transaction.type];
e3c68b
         if (local->internal_lock.lock_op_ret < 0) {
e3c68b
-            afr_handle_lock_acquire_failure(local, _gf_false);
e3c68b
+            afr_handle_lock_acquire_failure(local);
e3c68b
         } else {
e3c68b
             lock->event_generation = local->event_generation;
e3c68b
             afr_changelog_pre_op(frame, this);
e3c68b
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
e3c68b
index 2cc3797..e731cfa 100644
e3c68b
--- a/xlators/cluster/afr/src/afr.h
e3c68b
+++ b/xlators/cluster/afr/src/afr.h
e3c68b
@@ -1091,8 +1091,8 @@ afr_cleanup_fd_ctx(xlator_t *this, fd_t *fd);
e3c68b
 #define AFR_FRAME_INIT(frame, op_errno)                                        \
e3c68b
     ({                                                                         \
e3c68b
         frame->local = mem_get0(THIS->local_pool);                             \
e3c68b
-        if (afr_local_init(frame->local, THIS->private, &op_errno)) {          \
e3c68b
-            afr_local_cleanup(frame->local, THIS);                             \
e3c68b
+        if (afr_local_init(frame->local, frame->this->private, &op_errno)) {   \
e3c68b
+            afr_local_cleanup(frame->local, frame->this);                      \
e3c68b
             mem_put(frame->local);                                             \
e3c68b
             frame->local = NULL;                                               \
e3c68b
         };                                                                     \
e3c68b
-- 
e3c68b
1.8.3.1
e3c68b