|
|
d1681e |
From 8f1a849968242aa3c53a05fbf1b0647a905e9cdd Mon Sep 17 00:00:00 2001
|
|
|
d1681e |
From: Atin Mukherjee <amukherj@redhat.com>
|
|
|
d1681e |
Date: Thu, 23 Aug 2018 12:12:36 +0530
|
|
|
d1681e |
Subject: [PATCH 358/359] glusterd: glusterd_brick_start shouldn't try to bring
|
|
|
d1681e |
up brick if only_connect is true
|
|
|
d1681e |
|
|
|
d1681e |
With the latest refactoring in glusterd_brick_start () function in case
|
|
|
d1681e |
we run into a situation where is_gf_service_running () return a valid
|
|
|
d1681e |
pid which is running but doesn't belong to a gluster process, even in
|
|
|
d1681e |
case of only_connect flag passed as gf_true we'd end up trying to start
|
|
|
d1681e |
a brick which would cause a deadlock in brick multiplexing as both
|
|
|
d1681e |
glusterd_restart_bricks () and glusterd_do_volume_quorum_action () would
|
|
|
d1681e |
cause context switching with each other for the same brick. The
|
|
|
d1681e |
following bt shows the same:
|
|
|
d1681e |
|
|
|
d1681e |
(gdb) t a a bt
|
|
|
d1681e |
|
|
|
d1681e |
Thread 8 (Thread 0x7fcced48a700 (LWP 11959)):
|
|
|
d1681e |
srch_vol=srch_vol@entry=0xbe0410, comp_vol=comp_vol@entry=0xc03680,
|
|
|
d1681e |
brickinfo=brickinfo@entry=0xc14ef0) at glusterd-utils.c:5834
|
|
|
d1681e |
brickinfo=0xc14ef0, volinfo=0xc03680, conf=<optimized out>)
|
|
|
d1681e |
at glusterd-utils.c:5902
|
|
|
d1681e |
brickinfo=brickinfo@entry=0xc14ef0, wait=wait@entry=_gf_false,
|
|
|
d1681e |
only_connect=only_connect@entry=_gf_true) at glusterd-utils.c:6251
|
|
|
d1681e |
volinfo=0xc03680, meets_quorum=_gf_true) at glusterd-server-quorum.c:402
|
|
|
d1681e |
at glusterd-server-quorum.c:443
|
|
|
d1681e |
iov=iov@entry=0x7fcce0004040, count=count@entry=1,
|
|
|
d1681e |
myframe=myframe@entry=0x7fcce00023a0) at glusterd-rpc-ops.c:542
|
|
|
d1681e |
iov=0x7fcce0004040, count=1, myframe=0x7fcce00023a0,
|
|
|
d1681e |
fn=0x7fccf12403d0 <__glusterd_friend_add_cbk>) at glusterd-rpc-ops.c:223
|
|
|
d1681e |
---Type <return> to continue, or q <return> to quit---
|
|
|
d1681e |
at rpc-transport.c:538
|
|
|
d1681e |
|
|
|
d1681e |
Thread 7 (Thread 0x7fccedc8b700 (LWP 11958)):
|
|
|
d1681e |
|
|
|
d1681e |
Thread 6 (Thread 0x7fccf1d67700 (LWP 11877)):
|
|
|
d1681e |
brickinfo=brickinfo@entry=0xc14ef0) at glusterd-utils.c:5834
|
|
|
d1681e |
at glusterd-utils.c:6251
|
|
|
d1681e |
|
|
|
d1681e |
Thread 5 (Thread 0x7fccf2568700 (LWP 11876)):
|
|
|
d1681e |
|
|
|
d1681e |
Thread 4 (Thread 0x7fccf2d69700 (LWP 11875)):
|
|
|
d1681e |
|
|
|
d1681e |
Thread 3 (Thread 0x7fccf356a700 (LWP 11874)):
|
|
|
d1681e |
|
|
|
d1681e |
Thread 2 (Thread 0x7fccf3d6b700 (LWP 11873)):
|
|
|
d1681e |
---Type <return> to continue, or q <return> to quit---
|
|
|
d1681e |
|
|
|
d1681e |
Thread 1 (Thread 0x7fccf68a8780 (LWP 11872)):
|
|
|
d1681e |
|
|
|
d1681e |
Fix:
|
|
|
d1681e |
|
|
|
d1681e |
The solution is to ensure we don't restart bricks if only_connect is
|
|
|
d1681e |
true and just ensure that the brick is attempted to be connected.
|
|
|
d1681e |
|
|
|
d1681e |
Test:
|
|
|
d1681e |
|
|
|
d1681e |
Simulated a code change to ensure gf_is_service_running () always return
|
|
|
d1681e |
to true to hit the scenario.
|
|
|
d1681e |
|
|
|
d1681e |
>upstream patch : https://review.gluster.org/#/c/glusterfs/+/20935
|
|
|
d1681e |
|
|
|
d1681e |
>Change-Id: Iec184e6c9e8aabef931d310f931f4d7a580f0f48
|
|
|
d1681e |
>Fixes: bz#1620544
|
|
|
d1681e |
>Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
|
|
|
d1681e |
|
|
|
d1681e |
Change-Id: Iec184e6c9e8aabef931d310f931f4d7a580f0f48
|
|
|
d1681e |
BUG: 1620469
|
|
|
d1681e |
Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
|
|
|
d1681e |
Reviewed-on: https://code.engineering.redhat.com/gerrit/147888
|
|
|
d1681e |
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
|
|
d1681e |
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
|
|
|
d1681e |
---
|
|
|
d1681e |
xlators/mgmt/glusterd/src/glusterd-utils.c | 4 ++++
|
|
|
d1681e |
1 file changed, 4 insertions(+)
|
|
|
d1681e |
|
|
|
d1681e |
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
|
|
|
d1681e |
index 136a032..b9e8d8d 100644
|
|
|
d1681e |
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
|
|
|
d1681e |
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
|
|
|
d1681e |
@@ -6173,6 +6173,8 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,
|
|
|
d1681e |
if (sys_access (pidfile , R_OK) == 0) {
|
|
|
d1681e |
sys_unlink (pidfile);
|
|
|
d1681e |
}
|
|
|
d1681e |
+ if (only_connect)
|
|
|
d1681e |
+ return 0;
|
|
|
d1681e |
goto run;
|
|
|
d1681e |
}
|
|
|
d1681e |
GF_FREE (brickpath);
|
|
|
d1681e |
@@ -6187,6 +6189,8 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,
|
|
|
d1681e |
if (sys_access (pidfile , R_OK) == 0) {
|
|
|
d1681e |
sys_unlink (pidfile);
|
|
|
d1681e |
}
|
|
|
d1681e |
+ if (only_connect)
|
|
|
d1681e |
+ return 0;
|
|
|
d1681e |
goto run;
|
|
|
d1681e |
}
|
|
|
d1681e |
}
|
|
|
d1681e |
--
|
|
|
d1681e |
1.8.3.1
|
|
|
d1681e |
|