Blob Blame History Raw
From 2f93f8ae22acebe8312cc12f76b98a03b0dacc9f Mon Sep 17 00:00:00 2001
From: Aravinda VK <avishwan@redhat.com>
Date: Thu, 20 Oct 2016 15:05:38 +0530
Subject: [PATCH 344/361] geo-rep: Logging improvements

- Redundant log messages removed.
- Worker and connected slave node details added in "starting worker" log
- Added log for Monitor state change
- Added log for Worker status change(Initializing/Active/Passive/Faulty)
- Added log for Crawl status Change
- Added log for config set and reset
- Added log for checkpoint set, reset and completion

mainline:
> BUG: 1359612
> Reviewed-on: http://review.gluster.org/15684
> Smoke: Gluster Build System <jenkins@build.gluster.org>
> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
> Reviewed-by: Kotresh HR <khiremat@redhat.com>
(cherry picked from commit f72ee92c343467db268140330ad3ce9bf5d7404d)

BUG: 1359613
Change-Id: Icc7173ff3c93de4b862bdb1a61760db7eaf14271
Signed-off-by: Aravinda VK <avishwan@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/101284
Tested-by: Milind Changire <mchangir@redhat.com>
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
 geo-replication/syncdaemon/gsyncd.py       | 42 ++++++++++++++++++++++--------
 geo-replication/syncdaemon/gsyncdstatus.py | 15 ++++++++---
 geo-replication/syncdaemon/master.py       | 23 ++++++++--------
 geo-replication/syncdaemon/monitor.py      |  8 +++---
 4 files changed, 58 insertions(+), 30 deletions(-)

diff --git a/geo-replication/syncdaemon/gsyncd.py b/geo-replication/syncdaemon/gsyncd.py
index eccc5ea..3092728 100644
--- a/geo-replication/syncdaemon/gsyncd.py
+++ b/geo-replication/syncdaemon/gsyncd.py
@@ -36,7 +36,7 @@ import xml.etree.ElementTree as XET
 from subprocess import PIPE
 import subprocess
 from changelogagent import agent, Changelog
-from gsyncdstatus import set_monitor_status, GeorepStatus
+from gsyncdstatus import set_monitor_status, GeorepStatus, human_time_utc
 from libcxattr import Xattr
 import struct
 from syncdutils import get_master_and_slave_data_from_args
@@ -554,7 +554,7 @@ def main_i():
         rconf['config_file'], canon_peers, confdata,
         defaults.__dict__, opts.__dict__, namedict)
 
-    checkpoint_change = False
+    conf_change = False
     if confdata:
         opt_ok = norm(confdata.opt) in tunables + [None]
         if confdata.op == 'check':
@@ -573,10 +573,10 @@ def main_i():
         # when modifying checkpoint, it's important to make a log
         # of that, so in that case we go on to set up logging even
         # if its just config invocation
-        if confdata.opt == 'checkpoint' and confdata.op in ('set', 'del') and \
-           not confdata.rx:
-            checkpoint_change = True
-        if not checkpoint_change:
+        if confdata.op in ('set', 'del') and not confdata.rx:
+            conf_change = True
+
+        if not conf_change:
             return
 
     gconf.__dict__.update(defaults.__dict__)
@@ -674,13 +674,23 @@ def main_i():
     if not privileged() and gconf.log_file_mbr:
         gconf.log_file = gconf.log_file_mbr
 
-    if checkpoint_change:
+    if conf_change:
         try:
             GLogger._gsyncd_loginit(log_file=gconf.log_file, label='conf')
+            gconf.log_exit = False
+
             if confdata.op == 'set':
-                logging.info('checkpoint %s set' % confdata.val)
+                if confdata.opt == 'checkpoint':
+                    logging.info("Checkpoint Set: %s" % (
+                        human_time_utc(confdata.val)))
+                else:
+                    logging.info("Config Set: %s = %s" % (
+                        confdata.opt, confdata.val))
             elif confdata.op == 'del':
-                logging.info('checkpoint info was reset')
+                if confdata.opt == 'checkpoint':
+                    logging.info("Checkpoint Reset")
+                else:
+                    logging.info("Config Reset: %s" % confdata.opt)
         except IOError:
             if sys.exc_info()[1].errno == ENOENT:
                 # directory of log path is not present,
@@ -697,6 +707,17 @@ def main_i():
     if create:
         if getattr(gconf, 'state_file', None):
             set_monitor_status(gconf.state_file, create)
+
+        try:
+            GLogger._gsyncd_loginit(log_file=gconf.log_file, label='monitor')
+            gconf.log_exit = False
+            logging.info("Monitor Status: %s" % create)
+        except IOError:
+            if sys.exc_info()[1].errno == ENOENT:
+                # If log dir not present
+                pass
+            else:
+                raise
         return
 
     go_daemon = rconf['go_daemon']
@@ -729,7 +750,7 @@ def main_i():
     if be_monitor:
         label = 'monitor'
     elif be_agent:
-        label = 'agent'
+        label = gconf.local_path
     elif remote:
         # master
         label = gconf.local_path
@@ -746,7 +767,6 @@ def main_i():
     if be_monitor:
         return monitor(*rscs)
 
-    logging.info("syncing: %s" % " -> ".join(r.url for r in rscs))
     if remote:
         go_daemon = remote.connect_remote(go_daemon=go_daemon)
         if go_daemon:
diff --git a/geo-replication/syncdaemon/gsyncdstatus.py b/geo-replication/syncdaemon/gsyncdstatus.py
index 2c1a48d..266cf5a 100644
--- a/geo-replication/syncdaemon/gsyncdstatus.py
+++ b/geo-replication/syncdaemon/gsyncdstatus.py
@@ -17,6 +17,8 @@ import json
 import time
 from datetime import datetime
 from errno import EACCES, EAGAIN, ENOENT
+import logging
+
 from syncdutils import EVENT_GEOREP_ACTIVE, EVENT_GEOREP_PASSIVE, gf_event
 from syncdutils import EVENT_GEOREP_CHECKPOINT_COMPLETED
 
@@ -222,6 +224,10 @@ class GeorepStatus(object):
                     data["checkpoint_time"] = checkpoint_time
                     data["checkpoint_completion_time"] = curr_time
                     data["checkpoint_completed"] = "Yes"
+                    logging.info("Checkpoint completed. Checkpoint "
+                                 "Time: %s, Completion Time: %s" % (
+                                     human_time_utc(checkpoint_time),
+                                     human_time_utc(curr_time)))
                     self.trigger_gf_event_checkpoint_completion(
                         checkpoint_time, curr_time)
 
@@ -230,10 +236,12 @@ class GeorepStatus(object):
         self._update(merger)
 
     def set_worker_status(self, status):
-        self.set_field("worker_status", status)
+        if self.set_field("worker_status", status):
+            logging.info("Worker Status: %s" % status)
 
     def set_worker_crawl_status(self, status):
-        self.set_field("crawl_status", status)
+        if self.set_field("crawl_status", status):
+            logging.info("Crawl Status: %s" % status)
 
     def set_slave_node(self, slave_node):
         def merger(data):
@@ -260,11 +268,12 @@ class GeorepStatus(object):
 
     def set_active(self):
         if self.set_field("worker_status", "Active"):
+            logging.info("Worker Status: Active")
             self.send_event(EVENT_GEOREP_ACTIVE)
 
     def set_passive(self):
         if self.set_field("worker_status", "Passive"):
-            self.send_event(EVENT_GEOREP_PASSIVE)
+            logging.info("Worker Status: Passive")
 
     def get_monitor_status(self):
         data = ""
diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py
index 6437dcc..fb68525 100644
--- a/geo-replication/syncdaemon/master.py
+++ b/geo-replication/syncdaemon/master.py
@@ -80,7 +80,7 @@ def gmaster_builder(excrawl=None):
         modemixin = 'normal'
     changemixin = 'xsync' if gconf.change_detector == 'xsync' \
                   else excrawl or gconf.change_detector
-    logging.info('setting up %s change detection mode' % changemixin)
+    logging.debug('setting up %s change detection mode' % changemixin)
     modemixin = getattr(this, modemixin.capitalize() + 'Mixin')
     crawlmixin = getattr(this, 'GMaster' + changemixin.capitalize() + 'Mixin')
     sendmarkmixin = boolify(
@@ -363,10 +363,8 @@ class GMasterCommon(object):
         self.slave = slave
         self.jobtab = {}
         if boolify(gconf.use_tarssh):
-            logging.info("using 'tar over ssh' as the sync engine")
             self.syncer = Syncer(slave, self.slave.tarssh, [2])
         else:
-            logging.info("using 'rsync' as the sync engine")
             # partial transfer (cf. rsync(1)), that's normal
             self.syncer = Syncer(slave, self.slave.rsync, [23, 24])
         # crawls vs. turns:
@@ -507,9 +505,9 @@ class GMasterCommon(object):
         volinfo_sys = self.volinfo_hook()
         self.volinfo = volinfo_sys[self.KNAT]
         inter_master = volinfo_sys[self.KFGN]
-        logging.info("%s master with volume id %s ..." %
-                     (inter_master and "intermediate" or "primary",
-                      self.uuid))
+        logging.debug("%s master with volume id %s ..." %
+                      (inter_master and "intermediate" or "primary",
+                       self.uuid))
         gconf.configinterface.set('volume_id', self.uuid)
         if self.volinfo:
             if self.volinfo['retval']:
@@ -518,7 +516,6 @@ class GMasterCommon(object):
         else:
             raise GsyncdError("master volinfo unavailable")
         self.lastreport['time'] = time.time()
-        logging.info('crawl interval: %d seconds' % self.sleep_interval)
 
         t0 = time.time()
         crawl = self.should_crawl()
@@ -529,9 +526,9 @@ class GMasterCommon(object):
             self.start = time.time()
             should_display_info = self.start - self.lastreport['time'] >= 60
             if should_display_info:
-                logging.info("%d crawls, %d turns",
-                             self.crawls - self.lastreport['crawls'],
-                             self.turns - self.lastreport['turns'])
+                logging.debug("%d crawls, %d turns",
+                              self.crawls - self.lastreport['crawls'],
+                              self.turns - self.lastreport['turns'])
                 self.lastreport.update(crawls=self.crawls,
                                        turns=self.turns,
                                        time=self.start)
@@ -1167,9 +1164,11 @@ class GMasterChangeloghistoryMixin(GMasterChangelogMixin):
                      % (self.history_turns, repr(purge_time), repr(end_time)))
 
         if not purge_time or purge_time == URXTIME:
-            logging.info("stime not available, abandoning history crawl")
             raise NoPurgeTimeAvailable()
 
+        if not data_stime or data_stime == URXTIME:
+            raise NoStimeAvailable()
+
         # Changelogs backend path is hardcoded as
         # <BRICK_PATH>/.glusterfs/changelogs, if user configured to different
         # location then consuming history will not work(Known issue as of now)
@@ -1247,10 +1246,10 @@ class GMasterXsyncMixin(GMasterChangelogMixin):
         self.stimes = []
         self.sleep_interval = 60
         self.tempdir = self.setup_working_dir()
+        logging.info('Working dir: %s' % self.tempdir)
         self.tempdir = os.path.join(self.tempdir, 'xsync')
         self.processed_changelogs_dir = self.tempdir
         self.name = "xsync"
-        logging.info('xsync temp directory: %s' % self.tempdir)
         try:
             os.makedirs(self.tempdir)
         except OSError:
diff --git a/geo-replication/syncdaemon/monitor.py b/geo-replication/syncdaemon/monitor.py
index 8fdd68d..d9a9c33 100644
--- a/geo-replication/syncdaemon/monitor.py
+++ b/geo-replication/syncdaemon/monitor.py
@@ -271,8 +271,8 @@ class Monitor(object):
             # Spawn the worker and agent in lock to avoid fd leak
             self.lock.acquire()
 
-            logging.info('-' * conn_timeout)
-            logging.info('starting gsyncd worker')
+            logging.info('starting gsyncd worker(%s). Slave node: %s' %
+                         (w[0]['dir'], remote_host))
 
             # Couple of pipe pairs for RPC communication b/w
             # worker and changelog agent.
@@ -461,7 +461,7 @@ def distribute(*resources):
                                       new_stime_xattr_name)
     else:
         raise GsyncdError("unknown slave type " + slave.url)
-    logging.info('slave bricks: ' + repr(sbricks))
+    logging.debug('slave bricks: ' + repr(sbricks))
     if isinstance(si, FILE):
         slaves = [slave.url]
     else:
@@ -486,7 +486,7 @@ def distribute(*resources):
                                slaves[idx % len(slaves)],
                                get_subvol_num(idx, mvol, is_hot),
                                is_hot))
-    logging.info('worker specs: ' + repr(workerspex))
+    logging.debug('worker specs: ' + repr(workerspex))
     return workerspex, suuid, slave_vol, slave_host, master
 
 
-- 
1.8.3.1