|
|
21ab4e |
From cded8a819bee945041f86ba42fc531dcb075d521 Mon Sep 17 00:00:00 2001
|
|
|
21ab4e |
From: Kotresh HR <khiremat@redhat.com>
|
|
|
21ab4e |
Date: Tue, 4 Apr 2017 15:39:46 -0400
|
|
|
21ab4e |
Subject: [PATCH 456/473] geo-rep: Improve worker log messages
|
|
|
21ab4e |
|
|
|
21ab4e |
Monitor process expects worker to establish SSH Tunnel to slave node
|
|
|
21ab4e |
and mount master volume locally with in 60 secs and acknowledge monitor
|
|
|
21ab4e |
process by closing feedback fd. If something goes wrong and worker
|
|
|
21ab4e |
does not close feedback fd with in 60 secs, monitor kills the worker.
|
|
|
21ab4e |
But there was no clue in log message about the actual issue. This patch
|
|
|
21ab4e |
adds log and indicates whether the worker is hung during SSH
|
|
|
21ab4e |
or master mount.
|
|
|
21ab4e |
|
|
|
21ab4e |
> Change-Id: Id08a12fa6f3bba1d4fe8036728dbc290e6c14c8c
|
|
|
21ab4e |
> BUG: 1261689
|
|
|
21ab4e |
> Signed-off-by: Kotresh HR <khiremat@redhat.com>
|
|
|
21ab4e |
> Reviewed-on: https://review.gluster.org/16997
|
|
|
21ab4e |
> Smoke: Gluster Build System <jenkins@build.gluster.org>
|
|
|
21ab4e |
> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
|
|
|
21ab4e |
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
|
|
|
21ab4e |
> Reviewed-by: Aravinda VK <avishwan@redhat.com>
|
|
|
21ab4e |
|
|
|
21ab4e |
Change-Id: Id08a12fa6f3bba1d4fe8036728dbc290e6c14c8c
|
|
|
21ab4e |
BUG: 1435587
|
|
|
21ab4e |
Signed-off-by: Kotresh HR <khiremat@redhat.com>
|
|
|
21ab4e |
Reviewed-on: https://code.engineering.redhat.com/gerrit/106655
|
|
|
21ab4e |
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
|
|
|
21ab4e |
---
|
|
|
21ab4e |
geo-replication/syncdaemon/gsyncd.py | 1 +
|
|
|
21ab4e |
geo-replication/syncdaemon/monitor.py | 9 +++++++--
|
|
|
21ab4e |
geo-replication/syncdaemon/resource.py | 8 ++++++++
|
|
|
21ab4e |
3 files changed, 16 insertions(+), 2 deletions(-)
|
|
|
21ab4e |
|
|
|
21ab4e |
diff --git a/geo-replication/syncdaemon/gsyncd.py b/geo-replication/syncdaemon/gsyncd.py
|
|
|
21ab4e |
index 3092728..f9471e4 100644
|
|
|
21ab4e |
--- a/geo-replication/syncdaemon/gsyncd.py
|
|
|
21ab4e |
+++ b/geo-replication/syncdaemon/gsyncd.py
|
|
|
21ab4e |
@@ -775,6 +775,7 @@ def main_i():
|
|
|
21ab4e |
remote.connect_remote(go_daemon='done')
|
|
|
21ab4e |
local.connect()
|
|
|
21ab4e |
if ffd:
|
|
|
21ab4e |
+ logging.info ("Closing feedback fd, waking up the monitor")
|
|
|
21ab4e |
os.close(ffd)
|
|
|
21ab4e |
local.service_loop(*[r for r in [remote] if r])
|
|
|
21ab4e |
|
|
|
21ab4e |
diff --git a/geo-replication/syncdaemon/monitor.py b/geo-replication/syncdaemon/monitor.py
|
|
|
21ab4e |
index d9a9c33..dc0211e 100644
|
|
|
21ab4e |
--- a/geo-replication/syncdaemon/monitor.py
|
|
|
21ab4e |
+++ b/geo-replication/syncdaemon/monitor.py
|
|
|
21ab4e |
@@ -369,8 +369,13 @@ class Monitor(object):
|
|
|
21ab4e |
|
|
|
21ab4e |
time.sleep(1)
|
|
|
21ab4e |
else:
|
|
|
21ab4e |
- logging.info("worker(%s) not confirmed in %d sec, "
|
|
|
21ab4e |
- "aborting it" % (w[0]['dir'], conn_timeout))
|
|
|
21ab4e |
+ logging.info("worker(%s) not confirmed in %d sec, aborting it. "
|
|
|
21ab4e |
+ "Gsyncd invocation on remote slave via SSH or "
|
|
|
21ab4e |
+ "gluster master mount might have hung. Please "
|
|
|
21ab4e |
+ "check the above logs for exact issue and check "
|
|
|
21ab4e |
+ "master or slave volume for errors. Restarting "
|
|
|
21ab4e |
+ "master/slave volume accordingly might help."
|
|
|
21ab4e |
+ % (w[0]['dir'], conn_timeout))
|
|
|
21ab4e |
errno_wrap(os.kill, [cpid, signal.SIGKILL], [ESRCH])
|
|
|
21ab4e |
nwait(apid) # wait for agent
|
|
|
21ab4e |
ret = nwait(cpid)
|
|
|
21ab4e |
diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py
|
|
|
21ab4e |
index e4b4e55..55bebe1 100644
|
|
|
21ab4e |
--- a/geo-replication/syncdaemon/resource.py
|
|
|
21ab4e |
+++ b/geo-replication/syncdaemon/resource.py
|
|
|
21ab4e |
@@ -1413,6 +1413,8 @@ class GLUSTER(AbstractUrl, SlaveLocal, SlaveRemote):
|
|
|
21ab4e |
with given backend
|
|
|
21ab4e |
"""
|
|
|
21ab4e |
|
|
|
21ab4e |
+ logging.info ("Mounting gluster volume locally...")
|
|
|
21ab4e |
+ t0 = time.time()
|
|
|
21ab4e |
label = getattr(gconf, 'mountbroker', None)
|
|
|
21ab4e |
if not label and not privileged():
|
|
|
21ab4e |
label = syncdutils.getusername()
|
|
|
21ab4e |
@@ -1423,6 +1425,8 @@ class GLUSTER(AbstractUrl, SlaveLocal, SlaveRemote):
|
|
|
21ab4e |
['log-file=' + gconf.gluster_log_file, 'volfile-server=' +
|
|
|
21ab4e |
self.host, 'volfile-id=' + self.volume, 'client-pid=-1']
|
|
|
21ab4e |
mounter(params).inhibit(*[l for l in [label] if l])
|
|
|
21ab4e |
+ logging.info ("Mounted gluster volume. Time taken: {0:.4f} "
|
|
|
21ab4e |
+ "secs".format((time.time() - t0)))
|
|
|
21ab4e |
|
|
|
21ab4e |
def connect_remote(self, *a, **kw):
|
|
|
21ab4e |
sup(self, *a, **kw)
|
|
|
21ab4e |
@@ -1669,10 +1673,14 @@ class SSH(AbstractUrl, SlaveRemote):
|
|
|
21ab4e |
self.inner_rsc.url)
|
|
|
21ab4e |
|
|
|
21ab4e |
deferred = go_daemon == 'postconn'
|
|
|
21ab4e |
+ logging.info ("Initializing SSH connection between master and slave...")
|
|
|
21ab4e |
+ t0 = time.time()
|
|
|
21ab4e |
ret = sup(self, gconf.ssh_command.split() +
|
|
|
21ab4e |
["-p", str(gconf.ssh_port)] +
|
|
|
21ab4e |
gconf.ssh_ctl_args + [self.remote_addr],
|
|
|
21ab4e |
slave=self.inner_rsc.url, deferred=deferred)
|
|
|
21ab4e |
+ logging.info ("SSH connection between master and slave established. "
|
|
|
21ab4e |
+ "Time taken: {0:.4f} secs".format((time.time() - t0)))
|
|
|
21ab4e |
|
|
|
21ab4e |
if deferred:
|
|
|
21ab4e |
# send a message to peer so that we can wait for
|
|
|
21ab4e |
--
|
|
|
21ab4e |
1.8.3.1
|
|
|
21ab4e |
|