From dc52f3950738c07b9b4e044346266d5f3efee5e2 Mon Sep 17 00:00:00 2001 From: Saravanakumar Arumugam Date: Wed, 24 Aug 2016 15:19:53 +0530 Subject: [PATCH 73/79] geo-rep: add geo-rep events for server side changes Event Type defined in #15351 to avoid merge conflicts Add geo-rep events applicable to changes in geo-rep session in the server side. > Reviewed-on: http://review.gluster.org/15328 > Tested-by: Aravinda VK > Smoke: Gluster Build System > NetBSD-regression: NetBSD Build System > CentOS-regression: Gluster Build System > Reviewed-by: Aravinda VK Change-Id: Ia66574d2abccad7fce6a96667efbc7c6c8903fc6 BUG: 1361118 Signed-off-by: Saravanakumar Arumugam Reviewed-on: https://code.engineering.redhat.com/gerrit/84879 Tested-by: Aravinda Vishwanathapura Krishna Murthy Reviewed-by: Atin Mukherjee --- configure.ac | 2 +- geo-replication/syncdaemon/Makefile.am | 2 +- geo-replication/syncdaemon/conf.py.in | 14 +++++++ .../{configinterface.py.in => configinterface.py} | 17 ++++---- geo-replication/syncdaemon/monitor.py | 41 ++++++++++++-------- geo-replication/syncdaemon/syncdutils.py | 9 ++++ 7 files changed, 60 insertions(+), 27 deletions(-) create mode 100644 geo-replication/syncdaemon/conf.py.in rename geo-replication/syncdaemon/{configinterface.py.in => configinterface.py} (95%) diff --git a/configure.ac b/configure.ac index 011cf14..e4bf48e 100644 --- a/configure.ac +++ b/configure.ac @@ -42,8 +42,8 @@ AC_CONFIG_FILES([Makefile geo-replication/src/peer_gsec_create geo-replication/src/peer_mountbroker extras/peer_add_secret_pub - geo-replication/syncdaemon/configinterface.py extras/snap_scheduler/conf.py + geo-replication/syncdaemon/conf.py glusterfsd/Makefile glusterfsd/src/Makefile rpc/Makefile diff --git a/geo-replication/syncdaemon/Makefile.am b/geo-replication/syncdaemon/Makefile.am index ed0f5e4..7cdaf45 100644 --- a/geo-replication/syncdaemon/Makefile.am +++ b/geo-replication/syncdaemon/Makefile.am @@ -3,6 +3,6 @@ syncdaemondir = $(libexecdir)/glusterfs/python/syncdaemon syncdaemon_PYTHON = gconf.py gsyncd.py __init__.py master.py README.md repce.py \ resource.py configinterface.py syncdutils.py monitor.py libcxattr.py \ $(top_builddir)/contrib/ipaddr-py/ipaddr.py libgfchangelog.py changelogagent.py \ - gsyncdstatus.py + gsyncdstatus.py conf.py CLEANFILES = diff --git a/geo-replication/syncdaemon/conf.py.in b/geo-replication/syncdaemon/conf.py.in new file mode 100644 index 0000000..8807278 --- /dev/null +++ b/geo-replication/syncdaemon/conf.py.in @@ -0,0 +1,14 @@ +# +# Copyright (c) 2016 Red Hat, Inc. +# This file is part of GlusterFS. + +# This file is licensed to you under your choice of the GNU Lesser +# General Public License, version 3 or any later version (LGPLv3 or +# later), or the GNU General Public License, version 2 (GPLv2), in all +# cases as published by the Free Software Foundation. +# + +GLUSTERFS_LIBEXECDIR = '@GLUSTERFS_LIBEXECDIR@' +GLUSTERD_WORKDIR = "@GLUSTERD_WORKDIR@" + +LOCALSTATEDIR = "@localstatedir@" diff --git a/geo-replication/syncdaemon/configinterface.py.in b/geo-replication/syncdaemon/configinterface.py similarity index 95% rename from geo-replication/syncdaemon/configinterface.py.in rename to geo-replication/syncdaemon/configinterface.py index e1cf007..adcefb8 100644 --- a/geo-replication/syncdaemon/configinterface.py.in +++ b/geo-replication/syncdaemon/configinterface.py @@ -23,6 +23,7 @@ import tempfile import shutil from syncdutils import escape, unescape, norm, update_file, GsyncdError +from conf import GLUSTERD_WORKDIR, LOCALSTATEDIR SECT_ORD = '__section_order__' SECT_META = '__meta__' @@ -30,14 +31,14 @@ config_version = 2.0 re_type = type(re.compile('')) -TMPL_CONFIG_FILE = "@GLUSTERD_WORKDIR@/geo-replication/gsyncd_template.conf" +TMPL_CONFIG_FILE = GLUSTERD_WORKDIR + "/geo-replication/gsyncd_template.conf" # (SECTION, OPTION, OLD VALUE, NEW VALUE) CONFIGS = ( ("peersrx . .", "georep_session_working_dir", "", - "@GLUSTERD_WORKDIR@/geo-replication/${mastervol}_${remotehost}_" + GLUSTERD_WORKDIR + "/geo-replication/${mastervol}_${remotehost}_" "${slavevol}/"), ("peersrx .", "gluster_params", @@ -51,7 +52,7 @@ CONFIGS = ( "ssh_command_tar", "", "ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no " - "-i @GLUSTERD_WORKDIR@/geo-replication/tar_ssh.pem"), + "-i " + GLUSTERD_WORKDIR + "/geo-replication/tar_ssh.pem"), ("peersrx . .", "changelog_log_file", "", @@ -59,7 +60,7 @@ CONFIGS = ( "/${eSlave}${local_id}-changes.log"), ("peersrx . .", "working_dir", - "@localstatedir@/run/gluster/${mastervol}/${eSlave}", + LOCALSTATEDIR + "/run/gluster/${mastervol}/${eSlave}", "${iprefix}/lib/misc/glusterfsd/${mastervol}/${eSlave}"), ("peersrx . .", "ignore_deletes", @@ -67,15 +68,15 @@ CONFIGS = ( "false"), ("peersrx . .", "pid-file", - "@GLUSTERD_WORKDIR@/geo-replication/${mastervol}_${remotehost}_" + GLUSTERD_WORKDIR + "/geo-replication/${mastervol}_${remotehost}_" "${slavevol}/${eSlave}.pid", - "@GLUSTERD_WORKDIR@/geo-replication/${mastervol}_${remotehost}_" + GLUSTERD_WORKDIR + "/geo-replication/${mastervol}_${remotehost}_" "${slavevol}/monitor.pid"), ("peersrx . .", "state-file", - "@GLUSTERD_WORKDIR@/geo-replication/${mastervol}_${remotehost}_" + GLUSTERD_WORKDIR + "/geo-replication/${mastervol}_${remotehost}_" "${slavevol}/${eSlave}.status", - "@GLUSTERD_WORKDIR@/geo-replication/${mastervol}_${remotehost}_" + GLUSTERD_WORKDIR + "/geo-replication/${mastervol}_${remotehost}_" "${slavevol}/monitor.status"), ) diff --git a/geo-replication/syncdaemon/monitor.py b/geo-replication/syncdaemon/monitor.py index a26de0c..a624fe4 100644 --- a/geo-replication/syncdaemon/monitor.py +++ b/geo-replication/syncdaemon/monitor.py @@ -25,6 +25,7 @@ from gconf import gconf from syncdutils import select, waitpid, errno_wrap from syncdutils import set_term_handler, is_host_local, GsyncdError from syncdutils import escape, Thread, finalize, memoize +from syncdutils import gf_event, eventtypes from gsyncdstatus import GeorepStatus, set_monitor_status @@ -209,11 +210,12 @@ class Monitor(object): blown worker blows up on EPIPE if the net goes down, due to the keep-alive thread) """ - if not self.status.get(w[0], None): - self.status[w[0]] = GeorepStatus(gconf.state_file, w[0]) + if not self.status.get(w[0]['dir'], None): + self.status[w[0]['dir']] = GeorepStatus(gconf.state_file, + w[0]['dir']) set_monitor_status(gconf.state_file, self.ST_STARTED) - self.status[w[0]].set_worker_status(self.ST_INIT) + self.status[w[0]['dir']].set_worker_status(self.ST_INIT) ret = 0 @@ -280,7 +282,7 @@ class Monitor(object): if apid == 0: os.close(rw) os.close(ww) - os.execv(sys.executable, argv + ['--local-path', w[0], + os.execv(sys.executable, argv + ['--local-path', w[0]['dir'], '--agent', '--rpc-fd', ','.join([str(ra), str(wa), @@ -292,9 +294,9 @@ class Monitor(object): os.close(ra) os.close(wa) os.execv(sys.executable, argv + ['--feedback-fd', str(pw), - '--local-path', w[0], + '--local-path', w[0]['dir'], '--local-id', - '.' + escape(w[0]), + '.' + escape(w[0]['dir']), '--rpc-fd', ','.join([str(rw), str(ww), str(ra), str(wa)]), @@ -324,31 +326,31 @@ class Monitor(object): if ret_agent is not None: # Agent is died Kill Worker logging.info("Changelog Agent died, " - "Aborting Worker(%s)" % w[0]) + "Aborting Worker(%s)" % w[0]['dir']) errno_wrap(os.kill, [cpid, signal.SIGKILL], [ESRCH]) nwait(cpid) nwait(apid) if ret is not None: logging.info("worker(%s) died before establishing " - "connection" % w[0]) + "connection" % w[0]['dir']) nwait(apid) # wait for agent else: - logging.debug("worker(%s) connected" % w[0]) + logging.debug("worker(%s) connected" % w[0]['dir']) while time.time() < t0 + conn_timeout: ret = nwait(cpid, os.WNOHANG) ret_agent = nwait(apid, os.WNOHANG) if ret is not None: logging.info("worker(%s) died in startup " - "phase" % w[0]) + "phase" % w[0]['dir']) nwait(apid) # wait for agent break if ret_agent is not None: # Agent is died Kill Worker logging.info("Changelog Agent died, Aborting " - "Worker(%s)" % w[0]) + "Worker(%s)" % w[0]['dir']) errno_wrap(os.kill, [cpid, signal.SIGKILL], [ESRCH]) nwait(cpid) nwait(apid) @@ -357,12 +359,12 @@ class Monitor(object): time.sleep(1) else: logging.info("worker(%s) not confirmed in %d sec, " - "aborting it" % (w[0], conn_timeout)) + "aborting it" % (w[0]['dir'], conn_timeout)) errno_wrap(os.kill, [cpid, signal.SIGKILL], [ESRCH]) nwait(apid) # wait for agent ret = nwait(cpid) if ret is None: - self.status[w[0]].set_worker_status(self.ST_STABLE) + self.status[w[0]['dir']].set_worker_status(self.ST_STABLE) # If worker dies, agent terminates on EOF. # So lets wait for agent first. nwait(apid) @@ -372,9 +374,16 @@ class Monitor(object): else: ret = exit_status(ret) if ret in (0, 1): - self.status[w[0]].set_worker_status(self.ST_FAULTY) + self.status[w[0]['dir']].set_worker_status(self.ST_FAULTY) + gf_event(eventtypes.GEOREP_FAULTY, + master_volume=master.volume, + master_node=w[0]['host'], + slave_host=slave_host, + slave_volume=slave_vol, + current_slave_host=current_slave_host, + brick_path=w[0]['dir']) time.sleep(10) - self.status[w[0]].set_worker_status(self.ST_INCON) + self.status[w[0]['dir']].set_worker_status(self.ST_INCON) return ret def multiplex(self, wspx, suuid, slave_vol, slave_host, master): @@ -461,7 +470,7 @@ def distribute(*resources): for idx, brick in enumerate(mvol.bricks): if is_host_local(brick['host']): is_hot = mvol.is_hot(":".join([brick['host'], brick['dir']])) - workerspex.append((brick['dir'], + workerspex.append((brick, slaves[idx % len(slaves)], get_subvol_num(idx, mvol, is_hot), is_hot)) diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py index 987e1bf..f5d3c51 100644 --- a/geo-replication/syncdaemon/syncdutils.py +++ b/geo-replication/syncdaemon/syncdutils.py @@ -23,6 +23,10 @@ from signal import signal, SIGTERM import select as oselect from os import waitpid as owaitpid +from conf import GLUSTERFS_LIBEXECDIR +sys.path.insert(1, GLUSTERFS_LIBEXECDIR) +from events import eventtypes + try: from cPickle import PickleError except ImportError: @@ -523,3 +527,8 @@ class GlusterLogLevel(object): def get_changelog_log_level(lvl): return getattr(GlusterLogLevel, lvl, GlusterLogLevel.INFO) + + +def gf_event(event_type, **kwargs): + from events.gf_event import gf_event as gfevent + gfevent(event_type, **kwargs) -- 1.7.1