Blob Blame History Raw
From dc52f3950738c07b9b4e044346266d5f3efee5e2 Mon Sep 17 00:00:00 2001
From: Saravanakumar Arumugam <sarumuga@redhat.com>
Date: Wed, 24 Aug 2016 15:19:53 +0530
Subject: [PATCH 73/79] geo-rep: add geo-rep events for server side changes

Event Type defined in #15351 to avoid merge conflicts

Add geo-rep events applicable to changes in
geo-rep session in the server side.

> Reviewed-on: http://review.gluster.org/15328
> Tested-by: Aravinda VK <avishwan@redhat.com>
> Smoke: Gluster Build System <jenkins@build.gluster.org>
> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
> Reviewed-by: Aravinda VK <avishwan@redhat.com>

Change-Id: Ia66574d2abccad7fce6a96667efbc7c6c8903fc6
BUG: 1361118
Signed-off-by: Saravanakumar Arumugam <sarumuga@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/84879
Tested-by: Aravinda Vishwanathapura Krishna Murthy <avishwan@redhat.com>
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
 configure.ac                                       |    2 +-
 geo-replication/syncdaemon/Makefile.am             |    2 +-
 geo-replication/syncdaemon/conf.py.in              |   14 +++++++
 .../{configinterface.py.in => configinterface.py}  |   17 ++++----
 geo-replication/syncdaemon/monitor.py              |   41 ++++++++++++--------
 geo-replication/syncdaemon/syncdutils.py           |    9 ++++
 7 files changed, 60 insertions(+), 27 deletions(-)
 create mode 100644 geo-replication/syncdaemon/conf.py.in
 rename geo-replication/syncdaemon/{configinterface.py.in => configinterface.py} (95%)

diff --git a/configure.ac b/configure.ac
index 011cf14..e4bf48e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -42,8 +42,8 @@ AC_CONFIG_FILES([Makefile
                 geo-replication/src/peer_gsec_create
                 geo-replication/src/peer_mountbroker
                 extras/peer_add_secret_pub
-                geo-replication/syncdaemon/configinterface.py
                 extras/snap_scheduler/conf.py
+                geo-replication/syncdaemon/conf.py
                 glusterfsd/Makefile
                 glusterfsd/src/Makefile
                 rpc/Makefile
diff --git a/geo-replication/syncdaemon/Makefile.am b/geo-replication/syncdaemon/Makefile.am
index ed0f5e4..7cdaf45 100644
--- a/geo-replication/syncdaemon/Makefile.am
+++ b/geo-replication/syncdaemon/Makefile.am
@@ -3,6 +3,6 @@ syncdaemondir = $(libexecdir)/glusterfs/python/syncdaemon
 syncdaemon_PYTHON = gconf.py gsyncd.py __init__.py master.py README.md repce.py \
 	resource.py configinterface.py syncdutils.py monitor.py libcxattr.py \
 	$(top_builddir)/contrib/ipaddr-py/ipaddr.py libgfchangelog.py changelogagent.py \
-	gsyncdstatus.py
+	gsyncdstatus.py conf.py
 
 CLEANFILES =
diff --git a/geo-replication/syncdaemon/conf.py.in b/geo-replication/syncdaemon/conf.py.in
new file mode 100644
index 0000000..8807278
--- /dev/null
+++ b/geo-replication/syncdaemon/conf.py.in
@@ -0,0 +1,14 @@
+#
+# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+GLUSTERFS_LIBEXECDIR = '@GLUSTERFS_LIBEXECDIR@'
+GLUSTERD_WORKDIR = "@GLUSTERD_WORKDIR@"
+
+LOCALSTATEDIR = "@localstatedir@"
diff --git a/geo-replication/syncdaemon/configinterface.py.in b/geo-replication/syncdaemon/configinterface.py
similarity index 95%
rename from geo-replication/syncdaemon/configinterface.py.in
rename to geo-replication/syncdaemon/configinterface.py
index e1cf007..adcefb8 100644
--- a/geo-replication/syncdaemon/configinterface.py.in
+++ b/geo-replication/syncdaemon/configinterface.py
@@ -23,6 +23,7 @@ import tempfile
 import shutil
 
 from syncdutils import escape, unescape, norm, update_file, GsyncdError
+from conf import GLUSTERD_WORKDIR, LOCALSTATEDIR
 
 SECT_ORD = '__section_order__'
 SECT_META = '__meta__'
@@ -30,14 +31,14 @@ config_version = 2.0
 
 re_type = type(re.compile(''))
 
-TMPL_CONFIG_FILE = "@GLUSTERD_WORKDIR@/geo-replication/gsyncd_template.conf"
+TMPL_CONFIG_FILE = GLUSTERD_WORKDIR + "/geo-replication/gsyncd_template.conf"
 
 # (SECTION, OPTION, OLD VALUE, NEW VALUE)
 CONFIGS = (
     ("peersrx . .",
      "georep_session_working_dir",
      "",
-     "@GLUSTERD_WORKDIR@/geo-replication/${mastervol}_${remotehost}_"
+     GLUSTERD_WORKDIR + "/geo-replication/${mastervol}_${remotehost}_"
      "${slavevol}/"),
     ("peersrx .",
      "gluster_params",
@@ -51,7 +52,7 @@ CONFIGS = (
      "ssh_command_tar",
      "",
      "ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no "
-     "-i @GLUSTERD_WORKDIR@/geo-replication/tar_ssh.pem"),
+     "-i " + GLUSTERD_WORKDIR + "/geo-replication/tar_ssh.pem"),
     ("peersrx . .",
      "changelog_log_file",
      "",
@@ -59,7 +60,7 @@ CONFIGS = (
      "/${eSlave}${local_id}-changes.log"),
     ("peersrx . .",
      "working_dir",
-     "@localstatedir@/run/gluster/${mastervol}/${eSlave}",
+     LOCALSTATEDIR + "/run/gluster/${mastervol}/${eSlave}",
      "${iprefix}/lib/misc/glusterfsd/${mastervol}/${eSlave}"),
     ("peersrx . .",
      "ignore_deletes",
@@ -67,15 +68,15 @@ CONFIGS = (
      "false"),
     ("peersrx . .",
      "pid-file",
-     "@GLUSTERD_WORKDIR@/geo-replication/${mastervol}_${remotehost}_"
+     GLUSTERD_WORKDIR + "/geo-replication/${mastervol}_${remotehost}_"
      "${slavevol}/${eSlave}.pid",
-     "@GLUSTERD_WORKDIR@/geo-replication/${mastervol}_${remotehost}_"
+     GLUSTERD_WORKDIR + "/geo-replication/${mastervol}_${remotehost}_"
      "${slavevol}/monitor.pid"),
     ("peersrx . .",
      "state-file",
-     "@GLUSTERD_WORKDIR@/geo-replication/${mastervol}_${remotehost}_"
+     GLUSTERD_WORKDIR + "/geo-replication/${mastervol}_${remotehost}_"
      "${slavevol}/${eSlave}.status",
-     "@GLUSTERD_WORKDIR@/geo-replication/${mastervol}_${remotehost}_"
+     GLUSTERD_WORKDIR + "/geo-replication/${mastervol}_${remotehost}_"
      "${slavevol}/monitor.status"),
 )
 
diff --git a/geo-replication/syncdaemon/monitor.py b/geo-replication/syncdaemon/monitor.py
index a26de0c..a624fe4 100644
--- a/geo-replication/syncdaemon/monitor.py
+++ b/geo-replication/syncdaemon/monitor.py
@@ -25,6 +25,7 @@ from gconf import gconf
 from syncdutils import select, waitpid, errno_wrap
 from syncdutils import set_term_handler, is_host_local, GsyncdError
 from syncdutils import escape, Thread, finalize, memoize
+from syncdutils import gf_event, eventtypes
 
 from gsyncdstatus import GeorepStatus, set_monitor_status
 
@@ -209,11 +210,12 @@ class Monitor(object):
         blown worker blows up on EPIPE if the net goes down,
         due to the keep-alive thread)
         """
-        if not self.status.get(w[0], None):
-            self.status[w[0]] = GeorepStatus(gconf.state_file, w[0])
+        if not self.status.get(w[0]['dir'], None):
+            self.status[w[0]['dir']] = GeorepStatus(gconf.state_file,
+                                                    w[0]['dir'])
 
         set_monitor_status(gconf.state_file, self.ST_STARTED)
-        self.status[w[0]].set_worker_status(self.ST_INIT)
+        self.status[w[0]['dir']].set_worker_status(self.ST_INIT)
 
         ret = 0
 
@@ -280,7 +282,7 @@ class Monitor(object):
             if apid == 0:
                 os.close(rw)
                 os.close(ww)
-                os.execv(sys.executable, argv + ['--local-path', w[0],
+                os.execv(sys.executable, argv + ['--local-path', w[0]['dir'],
                                                  '--agent',
                                                  '--rpc-fd',
                                                  ','.join([str(ra), str(wa),
@@ -292,9 +294,9 @@ class Monitor(object):
                 os.close(ra)
                 os.close(wa)
                 os.execv(sys.executable, argv + ['--feedback-fd', str(pw),
-                                                 '--local-path', w[0],
+                                                 '--local-path', w[0]['dir'],
                                                  '--local-id',
-                                                 '.' + escape(w[0]),
+                                                 '.' + escape(w[0]['dir']),
                                                  '--rpc-fd',
                                                  ','.join([str(rw), str(ww),
                                                            str(ra), str(wa)]),
@@ -324,31 +326,31 @@ class Monitor(object):
                 if ret_agent is not None:
                     # Agent is died Kill Worker
                     logging.info("Changelog Agent died, "
-                                 "Aborting Worker(%s)" % w[0])
+                                 "Aborting Worker(%s)" % w[0]['dir'])
                     errno_wrap(os.kill, [cpid, signal.SIGKILL], [ESRCH])
                     nwait(cpid)
                     nwait(apid)
 
                 if ret is not None:
                     logging.info("worker(%s) died before establishing "
-                                 "connection" % w[0])
+                                 "connection" % w[0]['dir'])
                     nwait(apid)  # wait for agent
                 else:
-                    logging.debug("worker(%s) connected" % w[0])
+                    logging.debug("worker(%s) connected" % w[0]['dir'])
                     while time.time() < t0 + conn_timeout:
                         ret = nwait(cpid, os.WNOHANG)
                         ret_agent = nwait(apid, os.WNOHANG)
 
                         if ret is not None:
                             logging.info("worker(%s) died in startup "
-                                         "phase" % w[0])
+                                         "phase" % w[0]['dir'])
                             nwait(apid)  # wait for agent
                             break
 
                         if ret_agent is not None:
                             # Agent is died Kill Worker
                             logging.info("Changelog Agent died, Aborting "
-                                         "Worker(%s)" % w[0])
+                                         "Worker(%s)" % w[0]['dir'])
                             errno_wrap(os.kill, [cpid, signal.SIGKILL], [ESRCH])
                             nwait(cpid)
                             nwait(apid)
@@ -357,12 +359,12 @@ class Monitor(object):
                         time.sleep(1)
             else:
                 logging.info("worker(%s) not confirmed in %d sec, "
-                             "aborting it" % (w[0], conn_timeout))
+                             "aborting it" % (w[0]['dir'], conn_timeout))
                 errno_wrap(os.kill, [cpid, signal.SIGKILL], [ESRCH])
                 nwait(apid)  # wait for agent
                 ret = nwait(cpid)
             if ret is None:
-                self.status[w[0]].set_worker_status(self.ST_STABLE)
+                self.status[w[0]['dir']].set_worker_status(self.ST_STABLE)
                 # If worker dies, agent terminates on EOF.
                 # So lets wait for agent first.
                 nwait(apid)
@@ -372,9 +374,16 @@ class Monitor(object):
             else:
                 ret = exit_status(ret)
                 if ret in (0, 1):
-                    self.status[w[0]].set_worker_status(self.ST_FAULTY)
+                    self.status[w[0]['dir']].set_worker_status(self.ST_FAULTY)
+                    gf_event(eventtypes.GEOREP_FAULTY,
+                             master_volume=master.volume,
+                             master_node=w[0]['host'],
+                             slave_host=slave_host,
+                             slave_volume=slave_vol,
+                             current_slave_host=current_slave_host,
+                             brick_path=w[0]['dir'])
             time.sleep(10)
-        self.status[w[0]].set_worker_status(self.ST_INCON)
+        self.status[w[0]['dir']].set_worker_status(self.ST_INCON)
         return ret
 
     def multiplex(self, wspx, suuid, slave_vol, slave_host, master):
@@ -461,7 +470,7 @@ def distribute(*resources):
     for idx, brick in enumerate(mvol.bricks):
         if is_host_local(brick['host']):
             is_hot = mvol.is_hot(":".join([brick['host'], brick['dir']]))
-            workerspex.append((brick['dir'],
+            workerspex.append((brick,
                                slaves[idx % len(slaves)],
                                get_subvol_num(idx, mvol, is_hot),
                                is_hot))
diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py
index 987e1bf..f5d3c51 100644
--- a/geo-replication/syncdaemon/syncdutils.py
+++ b/geo-replication/syncdaemon/syncdutils.py
@@ -23,6 +23,10 @@ from signal import signal, SIGTERM
 import select as oselect
 from os import waitpid as owaitpid
 
+from conf import GLUSTERFS_LIBEXECDIR
+sys.path.insert(1, GLUSTERFS_LIBEXECDIR)
+from events import eventtypes
+
 try:
     from cPickle import PickleError
 except ImportError:
@@ -523,3 +527,8 @@ class GlusterLogLevel(object):
 
 def get_changelog_log_level(lvl):
     return getattr(GlusterLogLevel, lvl, GlusterLogLevel.INFO)
+
+
+def gf_event(event_type, **kwargs):
+    from events.gf_event import gf_event as gfevent
+    gfevent(event_type, **kwargs)
-- 
1.7.1