50dc83
From cca418b78ec976aa69eacd56b0e6127ea7e3dd26 Mon Sep 17 00:00:00 2001
50dc83
From: Pranith Kumar K <pkarampu@redhat.com>
50dc83
Date: Thu, 4 Apr 2019 15:31:56 +0530
50dc83
Subject: [PATCH 095/124] cluster/afr: Remove local from owners_list on failure
50dc83
 of lock-acquisition
50dc83
50dc83
	Backport of https://review.gluster.org/c/glusterfs/+/22515
50dc83
50dc83
When eager-lock lock acquisition fails because of say network failures, the
50dc83
local is not being removed from owners_list, this leads to accumulation of
50dc83
waiting frames and the application will hang because the waiting frames are
50dc83
under the assumption that another transaction is in the process of acquiring
50dc83
lock because owner-list is not empty. Handled this case as well in this patch.
50dc83
Added asserts to make it easier to find these problems in future.
50dc83
50dc83
Change-Id: I3101393265e9827755725b1f2d94a93d8709e923
50dc83
fixes: bz#1688395
50dc83
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
50dc83
Reviewed-on: https://code.engineering.redhat.com/gerrit/167859
50dc83
Tested-by: RHGS Build Bot <nigelb@redhat.com>
50dc83
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
50dc83
---
50dc83
 tests/bugs/replicate/bug-1696599-io-hang.t | 47 ++++++++++++++++++++++++++++++
50dc83
 xlators/cluster/afr/src/afr-common.c       |  8 ++---
50dc83
 xlators/cluster/afr/src/afr-lk-common.c    |  1 -
50dc83
 xlators/cluster/afr/src/afr-transaction.c  | 19 +++++-------
50dc83
 xlators/cluster/afr/src/afr.h              |  4 +--
50dc83
 5 files changed, 61 insertions(+), 18 deletions(-)
50dc83
 create mode 100755 tests/bugs/replicate/bug-1696599-io-hang.t
50dc83
50dc83
diff --git a/tests/bugs/replicate/bug-1696599-io-hang.t b/tests/bugs/replicate/bug-1696599-io-hang.t
50dc83
new file mode 100755
50dc83
index 0000000..869cdb9
50dc83
--- /dev/null
50dc83
+++ b/tests/bugs/replicate/bug-1696599-io-hang.t
50dc83
@@ -0,0 +1,47 @@
50dc83
+#!/bin/bash
50dc83
+
50dc83
+. $(dirname $0)/../../include.rc
50dc83
+. $(dirname $0)/../../volume.rc
50dc83
+. $(dirname $0)/../../fileio.rc
50dc83
+
50dc83
+#Tests that local structures in afr are removed from granted/blocked list of
50dc83
+#locks when inodelk fails on all bricks
50dc83
+
50dc83
+cleanup;
50dc83
+
50dc83
+TEST glusterd
50dc83
+TEST pidof glusterd
50dc83
+
50dc83
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1..3}
50dc83
+TEST $CLI volume set $V0 performance.quick-read off
50dc83
+TEST $CLI volume set $V0 performance.write-behind off
50dc83
+TEST $CLI volume set $V0 performance.io-cache off
50dc83
+TEST $CLI volume set $V0 performance.stat-prefetch off
50dc83
+TEST $CLI volume set $V0 performance.client-io-threads off
50dc83
+TEST $CLI volume set $V0 delay-gen locks
50dc83
+TEST $CLI volume set $V0 delay-gen.delay-duration 5000000
50dc83
+TEST $CLI volume set $V0 delay-gen.delay-percentage 100
50dc83
+TEST $CLI volume set $V0 delay-gen.enable finodelk
50dc83
+
50dc83
+TEST $CLI volume start $V0
50dc83
+EXPECT 'Started' volinfo_field $V0 'Status'
50dc83
+
50dc83
+TEST $GFS -s $H0 --volfile-id $V0 $M0
50dc83
+TEST touch $M0/file
50dc83
+#Trigger write and stop bricks so inodelks fail on all bricks leading to
50dc83
+#lock failure condition
50dc83
+echo abc >> $M0/file &
50dc83
+
50dc83
+TEST $CLI volume stop $V0
50dc83
+TEST $CLI volume reset $V0 delay-gen
50dc83
+wait
50dc83
+TEST $CLI volume start $V0
50dc83
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
50dc83
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1
50dc83
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 2
50dc83
+#Test that only one write succeeded, this tests that delay-gen worked as
50dc83
+#expected
50dc83
+echo abc >> $M0/file
50dc83
+EXPECT "abc" cat $M0/file
50dc83
+
50dc83
+cleanup;
50dc83
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
50dc83
index 45b96e3..47a5d3a 100644
50dc83
--- a/xlators/cluster/afr/src/afr-common.c
50dc83
+++ b/xlators/cluster/afr/src/afr-common.c
50dc83
@@ -5763,6 +5763,10 @@ afr_transaction_local_init(afr_local_t *local, xlator_t *this)
50dc83
     afr_private_t *priv = NULL;
50dc83
 
50dc83
     priv = this->private;
50dc83
+    INIT_LIST_HEAD(&local->transaction.wait_list);
50dc83
+    INIT_LIST_HEAD(&local->transaction.owner_list);
50dc83
+    INIT_LIST_HEAD(&local->ta_waitq);
50dc83
+    INIT_LIST_HEAD(&local->ta_onwireq);
50dc83
     ret = afr_internal_lock_init(&local->internal_lock, priv->child_count);
50dc83
     if (ret < 0)
50dc83
         goto out;
50dc83
@@ -5800,10 +5804,6 @@ afr_transaction_local_init(afr_local_t *local, xlator_t *this)
50dc83
         goto out;
50dc83
 
50dc83
     ret = 0;
50dc83
-    INIT_LIST_HEAD(&local->transaction.wait_list);
50dc83
-    INIT_LIST_HEAD(&local->transaction.owner_list);
50dc83
-    INIT_LIST_HEAD(&local->ta_waitq);
50dc83
-    INIT_LIST_HEAD(&local->ta_onwireq);
50dc83
 out:
50dc83
     return ret;
50dc83
 }
50dc83
diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c
50dc83
index 4091671..bc8eabe 100644
50dc83
--- a/xlators/cluster/afr/src/afr-lk-common.c
50dc83
+++ b/xlators/cluster/afr/src/afr-lk-common.c
50dc83
@@ -397,7 +397,6 @@ afr_unlock_now(call_frame_t *frame, xlator_t *this)
50dc83
     int_lock->lk_call_count = call_count;
50dc83
 
50dc83
     if (!call_count) {
50dc83
-        GF_ASSERT(!local->transaction.do_eager_unlock);
50dc83
         gf_msg_trace(this->name, 0, "No internal locks unlocked");
50dc83
         int_lock->lock_cbk(frame, this);
50dc83
         goto out;
50dc83
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
50dc83
index 229820b..15f3a7e 100644
50dc83
--- a/xlators/cluster/afr/src/afr-transaction.c
50dc83
+++ b/xlators/cluster/afr/src/afr-transaction.c
50dc83
@@ -372,6 +372,8 @@ afr_transaction_done(call_frame_t *frame, xlator_t *this)
50dc83
     }
50dc83
     local->transaction.unwind(frame, this);
50dc83
 
50dc83
+    GF_ASSERT(list_empty(&local->transaction.owner_list));
50dc83
+    GF_ASSERT(list_empty(&local->transaction.wait_list));
50dc83
     AFR_STACK_DESTROY(frame);
50dc83
 
50dc83
     return 0;
50dc83
@@ -393,7 +395,7 @@ afr_lock_fail_shared(afr_local_t *local, struct list_head *list)
50dc83
 }
50dc83
 
50dc83
 static void
50dc83
-afr_handle_lock_acquire_failure(afr_local_t *local, gf_boolean_t locked)
50dc83
+afr_handle_lock_acquire_failure(afr_local_t *local)
50dc83
 {
50dc83
     struct list_head shared;
50dc83
     afr_lock_t *lock = NULL;
50dc83
@@ -414,13 +416,8 @@ afr_handle_lock_acquire_failure(afr_local_t *local, gf_boolean_t locked)
50dc83
     afr_lock_fail_shared(local, &shared);
50dc83
     local->transaction.do_eager_unlock = _gf_true;
50dc83
 out:
50dc83
-    if (locked) {
50dc83
-        local->internal_lock.lock_cbk = afr_transaction_done;
50dc83
-        afr_unlock(local->transaction.frame, local->transaction.frame->this);
50dc83
-    } else {
50dc83
-        afr_transaction_done(local->transaction.frame,
50dc83
-                             local->transaction.frame->this);
50dc83
-    }
50dc83
+    local->internal_lock.lock_cbk = afr_transaction_done;
50dc83
+    afr_unlock(local->transaction.frame, local->transaction.frame->this);
50dc83
 }
50dc83
 
50dc83
 call_frame_t *
50dc83
@@ -619,7 +616,7 @@ afr_transaction_perform_fop(call_frame_t *frame, xlator_t *this)
50dc83
     failure_count = AFR_COUNT(local->transaction.failed_subvols,
50dc83
                               priv->child_count);
50dc83
     if (failure_count == priv->child_count) {
50dc83
-        afr_handle_lock_acquire_failure(local, _gf_true);
50dc83
+        afr_handle_lock_acquire_failure(local);
50dc83
         return 0;
50dc83
     } else {
50dc83
         lock = &local->inode_ctx->lock[local->transaction.type];
50dc83
@@ -2092,7 +2089,7 @@ err:
50dc83
     local->op_ret = -1;
50dc83
     local->op_errno = op_errno;
50dc83
 
50dc83
-    afr_handle_lock_acquire_failure(local, _gf_true);
50dc83
+    afr_handle_lock_acquire_failure(local);
50dc83
 
50dc83
     if (xdata_req)
50dc83
         dict_unref(xdata_req);
50dc83
@@ -2361,7 +2358,7 @@ afr_internal_lock_finish(call_frame_t *frame, xlator_t *this)
50dc83
     } else {
50dc83
         lock = &local->inode_ctx->lock[local->transaction.type];
50dc83
         if (local->internal_lock.lock_op_ret < 0) {
50dc83
-            afr_handle_lock_acquire_failure(local, _gf_false);
50dc83
+            afr_handle_lock_acquire_failure(local);
50dc83
         } else {
50dc83
             lock->event_generation = local->event_generation;
50dc83
             afr_changelog_pre_op(frame, this);
50dc83
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
50dc83
index 2cc3797..e731cfa 100644
50dc83
--- a/xlators/cluster/afr/src/afr.h
50dc83
+++ b/xlators/cluster/afr/src/afr.h
50dc83
@@ -1091,8 +1091,8 @@ afr_cleanup_fd_ctx(xlator_t *this, fd_t *fd);
50dc83
 #define AFR_FRAME_INIT(frame, op_errno)                                        \
50dc83
     ({                                                                         \
50dc83
         frame->local = mem_get0(THIS->local_pool);                             \
50dc83
-        if (afr_local_init(frame->local, THIS->private, &op_errno)) {          \
50dc83
-            afr_local_cleanup(frame->local, THIS);                             \
50dc83
+        if (afr_local_init(frame->local, frame->this->private, &op_errno)) {   \
50dc83
+            afr_local_cleanup(frame->local, frame->this);                      \
50dc83
             mem_put(frame->local);                                             \
50dc83
             frame->local = NULL;                                               \
50dc83
         };                                                                     \
50dc83
-- 
50dc83
1.8.3.1
50dc83