From dc52f3950738c07b9b4e044346266d5f3efee5e2 Mon Sep 17 00:00:00 2001
From: Saravanakumar Arumugam <sarumuga@redhat.com>
Date: Wed, 24 Aug 2016 15:19:53 +0530
Subject: [PATCH 73/79] geo-rep: add geo-rep events for server side changes
Event Type defined in #15351 to avoid merge conflicts
Add geo-rep events applicable to changes in
geo-rep session in the server side.
> Reviewed-on: http://review.gluster.org/15328
> Tested-by: Aravinda VK <avishwan@redhat.com>
> Smoke: Gluster Build System <jenkins@build.gluster.org>
> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
> Reviewed-by: Aravinda VK <avishwan@redhat.com>
Change-Id: Ia66574d2abccad7fce6a96667efbc7c6c8903fc6
BUG: 1361118
Signed-off-by: Saravanakumar Arumugam <sarumuga@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/84879
Tested-by: Aravinda Vishwanathapura Krishna Murthy <avishwan@redhat.com>
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
configure.ac | 2 +-
geo-replication/syncdaemon/Makefile.am | 2 +-
geo-replication/syncdaemon/conf.py.in | 14 +++++++
.../{configinterface.py.in => configinterface.py} | 17 ++++----
geo-replication/syncdaemon/monitor.py | 41 ++++++++++++--------
geo-replication/syncdaemon/syncdutils.py | 9 ++++
7 files changed, 60 insertions(+), 27 deletions(-)
create mode 100644 geo-replication/syncdaemon/conf.py.in
rename geo-replication/syncdaemon/{configinterface.py.in => configinterface.py} (95%)
diff --git a/configure.ac b/configure.ac
index 011cf14..e4bf48e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -42,8 +42,8 @@ AC_CONFIG_FILES([Makefile
geo-replication/src/peer_gsec_create
geo-replication/src/peer_mountbroker
extras/peer_add_secret_pub
- geo-replication/syncdaemon/configinterface.py
extras/snap_scheduler/conf.py
+ geo-replication/syncdaemon/conf.py
glusterfsd/Makefile
glusterfsd/src/Makefile
rpc/Makefile
diff --git a/geo-replication/syncdaemon/Makefile.am b/geo-replication/syncdaemon/Makefile.am
index ed0f5e4..7cdaf45 100644
--- a/geo-replication/syncdaemon/Makefile.am
+++ b/geo-replication/syncdaemon/Makefile.am
@@ -3,6 +3,6 @@ syncdaemondir = $(libexecdir)/glusterfs/python/syncdaemon
syncdaemon_PYTHON = gconf.py gsyncd.py __init__.py master.py README.md repce.py \
resource.py configinterface.py syncdutils.py monitor.py libcxattr.py \
$(top_builddir)/contrib/ipaddr-py/ipaddr.py libgfchangelog.py changelogagent.py \
- gsyncdstatus.py
+ gsyncdstatus.py conf.py
CLEANFILES =
diff --git a/geo-replication/syncdaemon/conf.py.in b/geo-replication/syncdaemon/conf.py.in
new file mode 100644
index 0000000..8807278
--- /dev/null
+++ b/geo-replication/syncdaemon/conf.py.in
@@ -0,0 +1,14 @@
+#
+# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+GLUSTERFS_LIBEXECDIR = '@GLUSTERFS_LIBEXECDIR@'
+GLUSTERD_WORKDIR = "@GLUSTERD_WORKDIR@"
+
+LOCALSTATEDIR = "@localstatedir@"
diff --git a/geo-replication/syncdaemon/configinterface.py.in b/geo-replication/syncdaemon/configinterface.py
similarity index 95%
rename from geo-replication/syncdaemon/configinterface.py.in
rename to geo-replication/syncdaemon/configinterface.py
index e1cf007..adcefb8 100644
--- a/geo-replication/syncdaemon/configinterface.py.in
+++ b/geo-replication/syncdaemon/configinterface.py
@@ -23,6 +23,7 @@ import tempfile
import shutil
from syncdutils import escape, unescape, norm, update_file, GsyncdError
+from conf import GLUSTERD_WORKDIR, LOCALSTATEDIR
SECT_ORD = '__section_order__'
SECT_META = '__meta__'
@@ -30,14 +31,14 @@ config_version = 2.0
re_type = type(re.compile(''))
-TMPL_CONFIG_FILE = "@GLUSTERD_WORKDIR@/geo-replication/gsyncd_template.conf"
+TMPL_CONFIG_FILE = GLUSTERD_WORKDIR + "/geo-replication/gsyncd_template.conf"
# (SECTION, OPTION, OLD VALUE, NEW VALUE)
CONFIGS = (
("peersrx . .",
"georep_session_working_dir",
"",
- "@GLUSTERD_WORKDIR@/geo-replication/${mastervol}_${remotehost}_"
+ GLUSTERD_WORKDIR + "/geo-replication/${mastervol}_${remotehost}_"
"${slavevol}/"),
("peersrx .",
"gluster_params",
@@ -51,7 +52,7 @@ CONFIGS = (
"ssh_command_tar",
"",
"ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no "
- "-i @GLUSTERD_WORKDIR@/geo-replication/tar_ssh.pem"),
+ "-i " + GLUSTERD_WORKDIR + "/geo-replication/tar_ssh.pem"),
("peersrx . .",
"changelog_log_file",
"",
@@ -59,7 +60,7 @@ CONFIGS = (
"/${eSlave}${local_id}-changes.log"),
("peersrx . .",
"working_dir",
- "@localstatedir@/run/gluster/${mastervol}/${eSlave}",
+ LOCALSTATEDIR + "/run/gluster/${mastervol}/${eSlave}",
"${iprefix}/lib/misc/glusterfsd/${mastervol}/${eSlave}"),
("peersrx . .",
"ignore_deletes",
@@ -67,15 +68,15 @@ CONFIGS = (
"false"),
("peersrx . .",
"pid-file",
- "@GLUSTERD_WORKDIR@/geo-replication/${mastervol}_${remotehost}_"
+ GLUSTERD_WORKDIR + "/geo-replication/${mastervol}_${remotehost}_"
"${slavevol}/${eSlave}.pid",
- "@GLUSTERD_WORKDIR@/geo-replication/${mastervol}_${remotehost}_"
+ GLUSTERD_WORKDIR + "/geo-replication/${mastervol}_${remotehost}_"
"${slavevol}/monitor.pid"),
("peersrx . .",
"state-file",
- "@GLUSTERD_WORKDIR@/geo-replication/${mastervol}_${remotehost}_"
+ GLUSTERD_WORKDIR + "/geo-replication/${mastervol}_${remotehost}_"
"${slavevol}/${eSlave}.status",
- "@GLUSTERD_WORKDIR@/geo-replication/${mastervol}_${remotehost}_"
+ GLUSTERD_WORKDIR + "/geo-replication/${mastervol}_${remotehost}_"
"${slavevol}/monitor.status"),
)
diff --git a/geo-replication/syncdaemon/monitor.py b/geo-replication/syncdaemon/monitor.py
index a26de0c..a624fe4 100644
--- a/geo-replication/syncdaemon/monitor.py
+++ b/geo-replication/syncdaemon/monitor.py
@@ -25,6 +25,7 @@ from gconf import gconf
from syncdutils import select, waitpid, errno_wrap
from syncdutils import set_term_handler, is_host_local, GsyncdError
from syncdutils import escape, Thread, finalize, memoize
+from syncdutils import gf_event, eventtypes
from gsyncdstatus import GeorepStatus, set_monitor_status
@@ -209,11 +210,12 @@ class Monitor(object):
blown worker blows up on EPIPE if the net goes down,
due to the keep-alive thread)
"""
- if not self.status.get(w[0], None):
- self.status[w[0]] = GeorepStatus(gconf.state_file, w[0])
+ if not self.status.get(w[0]['dir'], None):
+ self.status[w[0]['dir']] = GeorepStatus(gconf.state_file,
+ w[0]['dir'])
set_monitor_status(gconf.state_file, self.ST_STARTED)
- self.status[w[0]].set_worker_status(self.ST_INIT)
+ self.status[w[0]['dir']].set_worker_status(self.ST_INIT)
ret = 0
@@ -280,7 +282,7 @@ class Monitor(object):
if apid == 0:
os.close(rw)
os.close(ww)
- os.execv(sys.executable, argv + ['--local-path', w[0],
+ os.execv(sys.executable, argv + ['--local-path', w[0]['dir'],
'--agent',
'--rpc-fd',
','.join([str(ra), str(wa),
@@ -292,9 +294,9 @@ class Monitor(object):
os.close(ra)
os.close(wa)
os.execv(sys.executable, argv + ['--feedback-fd', str(pw),
- '--local-path', w[0],
+ '--local-path', w[0]['dir'],
'--local-id',
- '.' + escape(w[0]),
+ '.' + escape(w[0]['dir']),
'--rpc-fd',
','.join([str(rw), str(ww),
str(ra), str(wa)]),
@@ -324,31 +326,31 @@ class Monitor(object):
if ret_agent is not None:
# Agent is died Kill Worker
logging.info("Changelog Agent died, "
- "Aborting Worker(%s)" % w[0])
+ "Aborting Worker(%s)" % w[0]['dir'])
errno_wrap(os.kill, [cpid, signal.SIGKILL], [ESRCH])
nwait(cpid)
nwait(apid)
if ret is not None:
logging.info("worker(%s) died before establishing "
- "connection" % w[0])
+ "connection" % w[0]['dir'])
nwait(apid) # wait for agent
else:
- logging.debug("worker(%s) connected" % w[0])
+ logging.debug("worker(%s) connected" % w[0]['dir'])
while time.time() < t0 + conn_timeout:
ret = nwait(cpid, os.WNOHANG)
ret_agent = nwait(apid, os.WNOHANG)
if ret is not None:
logging.info("worker(%s) died in startup "
- "phase" % w[0])
+ "phase" % w[0]['dir'])
nwait(apid) # wait for agent
break
if ret_agent is not None:
# Agent is died Kill Worker
logging.info("Changelog Agent died, Aborting "
- "Worker(%s)" % w[0])
+ "Worker(%s)" % w[0]['dir'])
errno_wrap(os.kill, [cpid, signal.SIGKILL], [ESRCH])
nwait(cpid)
nwait(apid)
@@ -357,12 +359,12 @@ class Monitor(object):
time.sleep(1)
else:
logging.info("worker(%s) not confirmed in %d sec, "
- "aborting it" % (w[0], conn_timeout))
+ "aborting it" % (w[0]['dir'], conn_timeout))
errno_wrap(os.kill, [cpid, signal.SIGKILL], [ESRCH])
nwait(apid) # wait for agent
ret = nwait(cpid)
if ret is None:
- self.status[w[0]].set_worker_status(self.ST_STABLE)
+ self.status[w[0]['dir']].set_worker_status(self.ST_STABLE)
# If worker dies, agent terminates on EOF.
# So lets wait for agent first.
nwait(apid)
@@ -372,9 +374,16 @@ class Monitor(object):
else:
ret = exit_status(ret)
if ret in (0, 1):
- self.status[w[0]].set_worker_status(self.ST_FAULTY)
+ self.status[w[0]['dir']].set_worker_status(self.ST_FAULTY)
+ gf_event(eventtypes.GEOREP_FAULTY,
+ master_volume=master.volume,
+ master_node=w[0]['host'],
+ slave_host=slave_host,
+ slave_volume=slave_vol,
+ current_slave_host=current_slave_host,
+ brick_path=w[0]['dir'])
time.sleep(10)
- self.status[w[0]].set_worker_status(self.ST_INCON)
+ self.status[w[0]['dir']].set_worker_status(self.ST_INCON)
return ret
def multiplex(self, wspx, suuid, slave_vol, slave_host, master):
@@ -461,7 +470,7 @@ def distribute(*resources):
for idx, brick in enumerate(mvol.bricks):
if is_host_local(brick['host']):
is_hot = mvol.is_hot(":".join([brick['host'], brick['dir']]))
- workerspex.append((brick['dir'],
+ workerspex.append((brick,
slaves[idx % len(slaves)],
get_subvol_num(idx, mvol, is_hot),
is_hot))
diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py
index 987e1bf..f5d3c51 100644
--- a/geo-replication/syncdaemon/syncdutils.py
+++ b/geo-replication/syncdaemon/syncdutils.py
@@ -23,6 +23,10 @@ from signal import signal, SIGTERM
import select as oselect
from os import waitpid as owaitpid
+from conf import GLUSTERFS_LIBEXECDIR
+sys.path.insert(1, GLUSTERFS_LIBEXECDIR)
+from events import eventtypes
+
try:
from cPickle import PickleError
except ImportError:
@@ -523,3 +527,8 @@ class GlusterLogLevel(object):
def get_changelog_log_level(lvl):
return getattr(GlusterLogLevel, lvl, GlusterLogLevel.INFO)
+
+
+def gf_event(event_type, **kwargs):
+ from events.gf_event import gf_event as gfevent
+ gfevent(event_type, **kwargs)
--
1.7.1