21ab4e
From 2f93f8ae22acebe8312cc12f76b98a03b0dacc9f Mon Sep 17 00:00:00 2001
21ab4e
From: Aravinda VK <avishwan@redhat.com>
21ab4e
Date: Thu, 20 Oct 2016 15:05:38 +0530
21ab4e
Subject: [PATCH 344/361] geo-rep: Logging improvements
21ab4e
21ab4e
- Redundant log messages removed.
21ab4e
- Worker and connected slave node details added in "starting worker" log
21ab4e
- Added log for Monitor state change
21ab4e
- Added log for Worker status change(Initializing/Active/Passive/Faulty)
21ab4e
- Added log for Crawl status Change
21ab4e
- Added log for config set and reset
21ab4e
- Added log for checkpoint set, reset and completion
21ab4e
21ab4e
mainline:
21ab4e
> BUG: 1359612
21ab4e
> Reviewed-on: http://review.gluster.org/15684
21ab4e
> Smoke: Gluster Build System <jenkins@build.gluster.org>
21ab4e
> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
21ab4e
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
21ab4e
> Reviewed-by: Kotresh HR <khiremat@redhat.com>
21ab4e
(cherry picked from commit f72ee92c343467db268140330ad3ce9bf5d7404d)
21ab4e
21ab4e
BUG: 1359613
21ab4e
Change-Id: Icc7173ff3c93de4b862bdb1a61760db7eaf14271
21ab4e
Signed-off-by: Aravinda VK <avishwan@redhat.com>
21ab4e
Reviewed-on: https://code.engineering.redhat.com/gerrit/101284
21ab4e
Tested-by: Milind Changire <mchangir@redhat.com>
21ab4e
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
21ab4e
---
21ab4e
 geo-replication/syncdaemon/gsyncd.py       | 42 ++++++++++++++++++++++--------
21ab4e
 geo-replication/syncdaemon/gsyncdstatus.py | 15 ++++++++---
21ab4e
 geo-replication/syncdaemon/master.py       | 23 ++++++++--------
21ab4e
 geo-replication/syncdaemon/monitor.py      |  8 +++---
21ab4e
 4 files changed, 58 insertions(+), 30 deletions(-)
21ab4e
21ab4e
diff --git a/geo-replication/syncdaemon/gsyncd.py b/geo-replication/syncdaemon/gsyncd.py
21ab4e
index eccc5ea..3092728 100644
21ab4e
--- a/geo-replication/syncdaemon/gsyncd.py
21ab4e
+++ b/geo-replication/syncdaemon/gsyncd.py
21ab4e
@@ -36,7 +36,7 @@ import xml.etree.ElementTree as XET
21ab4e
 from subprocess import PIPE
21ab4e
 import subprocess
21ab4e
 from changelogagent import agent, Changelog
21ab4e
-from gsyncdstatus import set_monitor_status, GeorepStatus
21ab4e
+from gsyncdstatus import set_monitor_status, GeorepStatus, human_time_utc
21ab4e
 from libcxattr import Xattr
21ab4e
 import struct
21ab4e
 from syncdutils import get_master_and_slave_data_from_args
21ab4e
@@ -554,7 +554,7 @@ def main_i():
21ab4e
         rconf['config_file'], canon_peers, confdata,
21ab4e
         defaults.__dict__, opts.__dict__, namedict)
21ab4e
 
21ab4e
-    checkpoint_change = False
21ab4e
+    conf_change = False
21ab4e
     if confdata:
21ab4e
         opt_ok = norm(confdata.opt) in tunables + [None]
21ab4e
         if confdata.op == 'check':
21ab4e
@@ -573,10 +573,10 @@ def main_i():
21ab4e
         # when modifying checkpoint, it's important to make a log
21ab4e
         # of that, so in that case we go on to set up logging even
21ab4e
         # if its just config invocation
21ab4e
-        if confdata.opt == 'checkpoint' and confdata.op in ('set', 'del') and \
21ab4e
-           not confdata.rx:
21ab4e
-            checkpoint_change = True
21ab4e
-        if not checkpoint_change:
21ab4e
+        if confdata.op in ('set', 'del') and not confdata.rx:
21ab4e
+            conf_change = True
21ab4e
+
21ab4e
+        if not conf_change:
21ab4e
             return
21ab4e
 
21ab4e
     gconf.__dict__.update(defaults.__dict__)
21ab4e
@@ -674,13 +674,23 @@ def main_i():
21ab4e
     if not privileged() and gconf.log_file_mbr:
21ab4e
         gconf.log_file = gconf.log_file_mbr
21ab4e
 
21ab4e
-    if checkpoint_change:
21ab4e
+    if conf_change:
21ab4e
         try:
21ab4e
             GLogger._gsyncd_loginit(log_file=gconf.log_file, label='conf')
21ab4e
+            gconf.log_exit = False
21ab4e
+
21ab4e
             if confdata.op == 'set':
21ab4e
-                logging.info('checkpoint %s set' % confdata.val)
21ab4e
+                if confdata.opt == 'checkpoint':
21ab4e
+                    logging.info("Checkpoint Set: %s" % (
21ab4e
+                        human_time_utc(confdata.val)))
21ab4e
+                else:
21ab4e
+                    logging.info("Config Set: %s = %s" % (
21ab4e
+                        confdata.opt, confdata.val))
21ab4e
             elif confdata.op == 'del':
21ab4e
-                logging.info('checkpoint info was reset')
21ab4e
+                if confdata.opt == 'checkpoint':
21ab4e
+                    logging.info("Checkpoint Reset")
21ab4e
+                else:
21ab4e
+                    logging.info("Config Reset: %s" % confdata.opt)
21ab4e
         except IOError:
21ab4e
             if sys.exc_info()[1].errno == ENOENT:
21ab4e
                 # directory of log path is not present,
21ab4e
@@ -697,6 +707,17 @@ def main_i():
21ab4e
     if create:
21ab4e
         if getattr(gconf, 'state_file', None):
21ab4e
             set_monitor_status(gconf.state_file, create)
21ab4e
+
21ab4e
+        try:
21ab4e
+            GLogger._gsyncd_loginit(log_file=gconf.log_file, label='monitor')
21ab4e
+            gconf.log_exit = False
21ab4e
+            logging.info("Monitor Status: %s" % create)
21ab4e
+        except IOError:
21ab4e
+            if sys.exc_info()[1].errno == ENOENT:
21ab4e
+                # If log dir not present
21ab4e
+                pass
21ab4e
+            else:
21ab4e
+                raise
21ab4e
         return
21ab4e
 
21ab4e
     go_daemon = rconf['go_daemon']
21ab4e
@@ -729,7 +750,7 @@ def main_i():
21ab4e
     if be_monitor:
21ab4e
         label = 'monitor'
21ab4e
     elif be_agent:
21ab4e
-        label = 'agent'
21ab4e
+        label = gconf.local_path
21ab4e
     elif remote:
21ab4e
         # master
21ab4e
         label = gconf.local_path
21ab4e
@@ -746,7 +767,6 @@ def main_i():
21ab4e
     if be_monitor:
21ab4e
         return monitor(*rscs)
21ab4e
 
21ab4e
-    logging.info("syncing: %s" % " -> ".join(r.url for r in rscs))
21ab4e
     if remote:
21ab4e
         go_daemon = remote.connect_remote(go_daemon=go_daemon)
21ab4e
         if go_daemon:
21ab4e
diff --git a/geo-replication/syncdaemon/gsyncdstatus.py b/geo-replication/syncdaemon/gsyncdstatus.py
21ab4e
index 2c1a48d..266cf5a 100644
21ab4e
--- a/geo-replication/syncdaemon/gsyncdstatus.py
21ab4e
+++ b/geo-replication/syncdaemon/gsyncdstatus.py
21ab4e
@@ -17,6 +17,8 @@ import json
21ab4e
 import time
21ab4e
 from datetime import datetime
21ab4e
 from errno import EACCES, EAGAIN, ENOENT
21ab4e
+import logging
21ab4e
+
21ab4e
 from syncdutils import EVENT_GEOREP_ACTIVE, EVENT_GEOREP_PASSIVE, gf_event
21ab4e
 from syncdutils import EVENT_GEOREP_CHECKPOINT_COMPLETED
21ab4e
 
21ab4e
@@ -222,6 +224,10 @@ class GeorepStatus(object):
21ab4e
                     data["checkpoint_time"] = checkpoint_time
21ab4e
                     data["checkpoint_completion_time"] = curr_time
21ab4e
                     data["checkpoint_completed"] = "Yes"
21ab4e
+                    logging.info("Checkpoint completed. Checkpoint "
21ab4e
+                                 "Time: %s, Completion Time: %s" % (
21ab4e
+                                     human_time_utc(checkpoint_time),
21ab4e
+                                     human_time_utc(curr_time)))
21ab4e
                     self.trigger_gf_event_checkpoint_completion(
21ab4e
                         checkpoint_time, curr_time)
21ab4e
 
21ab4e
@@ -230,10 +236,12 @@ class GeorepStatus(object):
21ab4e
         self._update(merger)
21ab4e
 
21ab4e
     def set_worker_status(self, status):
21ab4e
-        self.set_field("worker_status", status)
21ab4e
+        if self.set_field("worker_status", status):
21ab4e
+            logging.info("Worker Status: %s" % status)
21ab4e
 
21ab4e
     def set_worker_crawl_status(self, status):
21ab4e
-        self.set_field("crawl_status", status)
21ab4e
+        if self.set_field("crawl_status", status):
21ab4e
+            logging.info("Crawl Status: %s" % status)
21ab4e
 
21ab4e
     def set_slave_node(self, slave_node):
21ab4e
         def merger(data):
21ab4e
@@ -260,11 +268,12 @@ class GeorepStatus(object):
21ab4e
 
21ab4e
     def set_active(self):
21ab4e
         if self.set_field("worker_status", "Active"):
21ab4e
+            logging.info("Worker Status: Active")
21ab4e
             self.send_event(EVENT_GEOREP_ACTIVE)
21ab4e
 
21ab4e
     def set_passive(self):
21ab4e
         if self.set_field("worker_status", "Passive"):
21ab4e
-            self.send_event(EVENT_GEOREP_PASSIVE)
21ab4e
+            logging.info("Worker Status: Passive")
21ab4e
 
21ab4e
     def get_monitor_status(self):
21ab4e
         data = ""
21ab4e
diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py
21ab4e
index 6437dcc..fb68525 100644
21ab4e
--- a/geo-replication/syncdaemon/master.py
21ab4e
+++ b/geo-replication/syncdaemon/master.py
21ab4e
@@ -80,7 +80,7 @@ def gmaster_builder(excrawl=None):
21ab4e
         modemixin = 'normal'
21ab4e
     changemixin = 'xsync' if gconf.change_detector == 'xsync' \
21ab4e
                   else excrawl or gconf.change_detector
21ab4e
-    logging.info('setting up %s change detection mode' % changemixin)
21ab4e
+    logging.debug('setting up %s change detection mode' % changemixin)
21ab4e
     modemixin = getattr(this, modemixin.capitalize() + 'Mixin')
21ab4e
     crawlmixin = getattr(this, 'GMaster' + changemixin.capitalize() + 'Mixin')
21ab4e
     sendmarkmixin = boolify(
21ab4e
@@ -363,10 +363,8 @@ class GMasterCommon(object):
21ab4e
         self.slave = slave
21ab4e
         self.jobtab = {}
21ab4e
         if boolify(gconf.use_tarssh):
21ab4e
-            logging.info("using 'tar over ssh' as the sync engine")
21ab4e
             self.syncer = Syncer(slave, self.slave.tarssh, [2])
21ab4e
         else:
21ab4e
-            logging.info("using 'rsync' as the sync engine")
21ab4e
             # partial transfer (cf. rsync(1)), that's normal
21ab4e
             self.syncer = Syncer(slave, self.slave.rsync, [23, 24])
21ab4e
         # crawls vs. turns:
21ab4e
@@ -507,9 +505,9 @@ class GMasterCommon(object):
21ab4e
         volinfo_sys = self.volinfo_hook()
21ab4e
         self.volinfo = volinfo_sys[self.KNAT]
21ab4e
         inter_master = volinfo_sys[self.KFGN]
21ab4e
-        logging.info("%s master with volume id %s ..." %
21ab4e
-                     (inter_master and "intermediate" or "primary",
21ab4e
-                      self.uuid))
21ab4e
+        logging.debug("%s master with volume id %s ..." %
21ab4e
+                      (inter_master and "intermediate" or "primary",
21ab4e
+                       self.uuid))
21ab4e
         gconf.configinterface.set('volume_id', self.uuid)
21ab4e
         if self.volinfo:
21ab4e
             if self.volinfo['retval']:
21ab4e
@@ -518,7 +516,6 @@ class GMasterCommon(object):
21ab4e
         else:
21ab4e
             raise GsyncdError("master volinfo unavailable")
21ab4e
         self.lastreport['time'] = time.time()
21ab4e
-        logging.info('crawl interval: %d seconds' % self.sleep_interval)
21ab4e
 
21ab4e
         t0 = time.time()
21ab4e
         crawl = self.should_crawl()
21ab4e
@@ -529,9 +526,9 @@ class GMasterCommon(object):
21ab4e
             self.start = time.time()
21ab4e
             should_display_info = self.start - self.lastreport['time'] >= 60
21ab4e
             if should_display_info:
21ab4e
-                logging.info("%d crawls, %d turns",
21ab4e
-                             self.crawls - self.lastreport['crawls'],
21ab4e
-                             self.turns - self.lastreport['turns'])
21ab4e
+                logging.debug("%d crawls, %d turns",
21ab4e
+                              self.crawls - self.lastreport['crawls'],
21ab4e
+                              self.turns - self.lastreport['turns'])
21ab4e
                 self.lastreport.update(crawls=self.crawls,
21ab4e
                                        turns=self.turns,
21ab4e
                                        time=self.start)
21ab4e
@@ -1167,9 +1164,11 @@ class GMasterChangeloghistoryMixin(GMasterChangelogMixin):
21ab4e
                      % (self.history_turns, repr(purge_time), repr(end_time)))
21ab4e
 
21ab4e
         if not purge_time or purge_time == URXTIME:
21ab4e
-            logging.info("stime not available, abandoning history crawl")
21ab4e
             raise NoPurgeTimeAvailable()
21ab4e
 
21ab4e
+        if not data_stime or data_stime == URXTIME:
21ab4e
+            raise NoStimeAvailable()
21ab4e
+
21ab4e
         # Changelogs backend path is hardcoded as
21ab4e
         # <BRICK_PATH>/.glusterfs/changelogs, if user configured to different
21ab4e
         # location then consuming history will not work(Known issue as of now)
21ab4e
@@ -1247,10 +1246,10 @@ class GMasterXsyncMixin(GMasterChangelogMixin):
21ab4e
         self.stimes = []
21ab4e
         self.sleep_interval = 60
21ab4e
         self.tempdir = self.setup_working_dir()
21ab4e
+        logging.info('Working dir: %s' % self.tempdir)
21ab4e
         self.tempdir = os.path.join(self.tempdir, 'xsync')
21ab4e
         self.processed_changelogs_dir = self.tempdir
21ab4e
         self.name = "xsync"
21ab4e
-        logging.info('xsync temp directory: %s' % self.tempdir)
21ab4e
         try:
21ab4e
             os.makedirs(self.tempdir)
21ab4e
         except OSError:
21ab4e
diff --git a/geo-replication/syncdaemon/monitor.py b/geo-replication/syncdaemon/monitor.py
21ab4e
index 8fdd68d..d9a9c33 100644
21ab4e
--- a/geo-replication/syncdaemon/monitor.py
21ab4e
+++ b/geo-replication/syncdaemon/monitor.py
21ab4e
@@ -271,8 +271,8 @@ class Monitor(object):
21ab4e
             # Spawn the worker and agent in lock to avoid fd leak
21ab4e
             self.lock.acquire()
21ab4e
 
21ab4e
-            logging.info('-' * conn_timeout)
21ab4e
-            logging.info('starting gsyncd worker')
21ab4e
+            logging.info('starting gsyncd worker(%s). Slave node: %s' %
21ab4e
+                         (w[0]['dir'], remote_host))
21ab4e
 
21ab4e
             # Couple of pipe pairs for RPC communication b/w
21ab4e
             # worker and changelog agent.
21ab4e
@@ -461,7 +461,7 @@ def distribute(*resources):
21ab4e
                                       new_stime_xattr_name)
21ab4e
     else:
21ab4e
         raise GsyncdError("unknown slave type " + slave.url)
21ab4e
-    logging.info('slave bricks: ' + repr(sbricks))
21ab4e
+    logging.debug('slave bricks: ' + repr(sbricks))
21ab4e
     if isinstance(si, FILE):
21ab4e
         slaves = [slave.url]
21ab4e
     else:
21ab4e
@@ -486,7 +486,7 @@ def distribute(*resources):
21ab4e
                                slaves[idx % len(slaves)],
21ab4e
                                get_subvol_num(idx, mvol, is_hot),
21ab4e
                                is_hot))
21ab4e
-    logging.info('worker specs: ' + repr(workerspex))
21ab4e
+    logging.debug('worker specs: ' + repr(workerspex))
21ab4e
     return workerspex, suuid, slave_vol, slave_host, master
21ab4e
 
21ab4e
 
21ab4e
-- 
21ab4e
1.8.3.1
21ab4e