From 2f93f8ae22acebe8312cc12f76b98a03b0dacc9f Mon Sep 17 00:00:00 2001 From: Aravinda VK Date: Thu, 20 Oct 2016 15:05:38 +0530 Subject: [PATCH 344/361] geo-rep: Logging improvements - Redundant log messages removed. - Worker and connected slave node details added in "starting worker" log - Added log for Monitor state change - Added log for Worker status change(Initializing/Active/Passive/Faulty) - Added log for Crawl status Change - Added log for config set and reset - Added log for checkpoint set, reset and completion mainline: > BUG: 1359612 > Reviewed-on: http://review.gluster.org/15684 > Smoke: Gluster Build System > NetBSD-regression: NetBSD Build System > CentOS-regression: Gluster Build System > Reviewed-by: Kotresh HR (cherry picked from commit f72ee92c343467db268140330ad3ce9bf5d7404d) BUG: 1359613 Change-Id: Icc7173ff3c93de4b862bdb1a61760db7eaf14271 Signed-off-by: Aravinda VK Reviewed-on: https://code.engineering.redhat.com/gerrit/101284 Tested-by: Milind Changire Reviewed-by: Atin Mukherjee --- geo-replication/syncdaemon/gsyncd.py | 42 ++++++++++++++++++++++-------- geo-replication/syncdaemon/gsyncdstatus.py | 15 ++++++++--- geo-replication/syncdaemon/master.py | 23 ++++++++-------- geo-replication/syncdaemon/monitor.py | 8 +++--- 4 files changed, 58 insertions(+), 30 deletions(-) diff --git a/geo-replication/syncdaemon/gsyncd.py b/geo-replication/syncdaemon/gsyncd.py index eccc5ea..3092728 100644 --- a/geo-replication/syncdaemon/gsyncd.py +++ b/geo-replication/syncdaemon/gsyncd.py @@ -36,7 +36,7 @@ import xml.etree.ElementTree as XET from subprocess import PIPE import subprocess from changelogagent import agent, Changelog -from gsyncdstatus import set_monitor_status, GeorepStatus +from gsyncdstatus import set_monitor_status, GeorepStatus, human_time_utc from libcxattr import Xattr import struct from syncdutils import get_master_and_slave_data_from_args @@ -554,7 +554,7 @@ def main_i(): rconf['config_file'], canon_peers, confdata, defaults.__dict__, opts.__dict__, namedict) - checkpoint_change = False + conf_change = False if confdata: opt_ok = norm(confdata.opt) in tunables + [None] if confdata.op == 'check': @@ -573,10 +573,10 @@ def main_i(): # when modifying checkpoint, it's important to make a log # of that, so in that case we go on to set up logging even # if its just config invocation - if confdata.opt == 'checkpoint' and confdata.op in ('set', 'del') and \ - not confdata.rx: - checkpoint_change = True - if not checkpoint_change: + if confdata.op in ('set', 'del') and not confdata.rx: + conf_change = True + + if not conf_change: return gconf.__dict__.update(defaults.__dict__) @@ -674,13 +674,23 @@ def main_i(): if not privileged() and gconf.log_file_mbr: gconf.log_file = gconf.log_file_mbr - if checkpoint_change: + if conf_change: try: GLogger._gsyncd_loginit(log_file=gconf.log_file, label='conf') + gconf.log_exit = False + if confdata.op == 'set': - logging.info('checkpoint %s set' % confdata.val) + if confdata.opt == 'checkpoint': + logging.info("Checkpoint Set: %s" % ( + human_time_utc(confdata.val))) + else: + logging.info("Config Set: %s = %s" % ( + confdata.opt, confdata.val)) elif confdata.op == 'del': - logging.info('checkpoint info was reset') + if confdata.opt == 'checkpoint': + logging.info("Checkpoint Reset") + else: + logging.info("Config Reset: %s" % confdata.opt) except IOError: if sys.exc_info()[1].errno == ENOENT: # directory of log path is not present, @@ -697,6 +707,17 @@ def main_i(): if create: if getattr(gconf, 'state_file', None): set_monitor_status(gconf.state_file, create) + + try: + GLogger._gsyncd_loginit(log_file=gconf.log_file, label='monitor') + gconf.log_exit = False + logging.info("Monitor Status: %s" % create) + except IOError: + if sys.exc_info()[1].errno == ENOENT: + # If log dir not present + pass + else: + raise return go_daemon = rconf['go_daemon'] @@ -729,7 +750,7 @@ def main_i(): if be_monitor: label = 'monitor' elif be_agent: - label = 'agent' + label = gconf.local_path elif remote: # master label = gconf.local_path @@ -746,7 +767,6 @@ def main_i(): if be_monitor: return monitor(*rscs) - logging.info("syncing: %s" % " -> ".join(r.url for r in rscs)) if remote: go_daemon = remote.connect_remote(go_daemon=go_daemon) if go_daemon: diff --git a/geo-replication/syncdaemon/gsyncdstatus.py b/geo-replication/syncdaemon/gsyncdstatus.py index 2c1a48d..266cf5a 100644 --- a/geo-replication/syncdaemon/gsyncdstatus.py +++ b/geo-replication/syncdaemon/gsyncdstatus.py @@ -17,6 +17,8 @@ import json import time from datetime import datetime from errno import EACCES, EAGAIN, ENOENT +import logging + from syncdutils import EVENT_GEOREP_ACTIVE, EVENT_GEOREP_PASSIVE, gf_event from syncdutils import EVENT_GEOREP_CHECKPOINT_COMPLETED @@ -222,6 +224,10 @@ class GeorepStatus(object): data["checkpoint_time"] = checkpoint_time data["checkpoint_completion_time"] = curr_time data["checkpoint_completed"] = "Yes" + logging.info("Checkpoint completed. Checkpoint " + "Time: %s, Completion Time: %s" % ( + human_time_utc(checkpoint_time), + human_time_utc(curr_time))) self.trigger_gf_event_checkpoint_completion( checkpoint_time, curr_time) @@ -230,10 +236,12 @@ class GeorepStatus(object): self._update(merger) def set_worker_status(self, status): - self.set_field("worker_status", status) + if self.set_field("worker_status", status): + logging.info("Worker Status: %s" % status) def set_worker_crawl_status(self, status): - self.set_field("crawl_status", status) + if self.set_field("crawl_status", status): + logging.info("Crawl Status: %s" % status) def set_slave_node(self, slave_node): def merger(data): @@ -260,11 +268,12 @@ class GeorepStatus(object): def set_active(self): if self.set_field("worker_status", "Active"): + logging.info("Worker Status: Active") self.send_event(EVENT_GEOREP_ACTIVE) def set_passive(self): if self.set_field("worker_status", "Passive"): - self.send_event(EVENT_GEOREP_PASSIVE) + logging.info("Worker Status: Passive") def get_monitor_status(self): data = "" diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py index 6437dcc..fb68525 100644 --- a/geo-replication/syncdaemon/master.py +++ b/geo-replication/syncdaemon/master.py @@ -80,7 +80,7 @@ def gmaster_builder(excrawl=None): modemixin = 'normal' changemixin = 'xsync' if gconf.change_detector == 'xsync' \ else excrawl or gconf.change_detector - logging.info('setting up %s change detection mode' % changemixin) + logging.debug('setting up %s change detection mode' % changemixin) modemixin = getattr(this, modemixin.capitalize() + 'Mixin') crawlmixin = getattr(this, 'GMaster' + changemixin.capitalize() + 'Mixin') sendmarkmixin = boolify( @@ -363,10 +363,8 @@ class GMasterCommon(object): self.slave = slave self.jobtab = {} if boolify(gconf.use_tarssh): - logging.info("using 'tar over ssh' as the sync engine") self.syncer = Syncer(slave, self.slave.tarssh, [2]) else: - logging.info("using 'rsync' as the sync engine") # partial transfer (cf. rsync(1)), that's normal self.syncer = Syncer(slave, self.slave.rsync, [23, 24]) # crawls vs. turns: @@ -507,9 +505,9 @@ class GMasterCommon(object): volinfo_sys = self.volinfo_hook() self.volinfo = volinfo_sys[self.KNAT] inter_master = volinfo_sys[self.KFGN] - logging.info("%s master with volume id %s ..." % - (inter_master and "intermediate" or "primary", - self.uuid)) + logging.debug("%s master with volume id %s ..." % + (inter_master and "intermediate" or "primary", + self.uuid)) gconf.configinterface.set('volume_id', self.uuid) if self.volinfo: if self.volinfo['retval']: @@ -518,7 +516,6 @@ class GMasterCommon(object): else: raise GsyncdError("master volinfo unavailable") self.lastreport['time'] = time.time() - logging.info('crawl interval: %d seconds' % self.sleep_interval) t0 = time.time() crawl = self.should_crawl() @@ -529,9 +526,9 @@ class GMasterCommon(object): self.start = time.time() should_display_info = self.start - self.lastreport['time'] >= 60 if should_display_info: - logging.info("%d crawls, %d turns", - self.crawls - self.lastreport['crawls'], - self.turns - self.lastreport['turns']) + logging.debug("%d crawls, %d turns", + self.crawls - self.lastreport['crawls'], + self.turns - self.lastreport['turns']) self.lastreport.update(crawls=self.crawls, turns=self.turns, time=self.start) @@ -1167,9 +1164,11 @@ class GMasterChangeloghistoryMixin(GMasterChangelogMixin): % (self.history_turns, repr(purge_time), repr(end_time))) if not purge_time or purge_time == URXTIME: - logging.info("stime not available, abandoning history crawl") raise NoPurgeTimeAvailable() + if not data_stime or data_stime == URXTIME: + raise NoStimeAvailable() + # Changelogs backend path is hardcoded as # /.glusterfs/changelogs, if user configured to different # location then consuming history will not work(Known issue as of now) @@ -1247,10 +1246,10 @@ class GMasterXsyncMixin(GMasterChangelogMixin): self.stimes = [] self.sleep_interval = 60 self.tempdir = self.setup_working_dir() + logging.info('Working dir: %s' % self.tempdir) self.tempdir = os.path.join(self.tempdir, 'xsync') self.processed_changelogs_dir = self.tempdir self.name = "xsync" - logging.info('xsync temp directory: %s' % self.tempdir) try: os.makedirs(self.tempdir) except OSError: diff --git a/geo-replication/syncdaemon/monitor.py b/geo-replication/syncdaemon/monitor.py index 8fdd68d..d9a9c33 100644 --- a/geo-replication/syncdaemon/monitor.py +++ b/geo-replication/syncdaemon/monitor.py @@ -271,8 +271,8 @@ class Monitor(object): # Spawn the worker and agent in lock to avoid fd leak self.lock.acquire() - logging.info('-' * conn_timeout) - logging.info('starting gsyncd worker') + logging.info('starting gsyncd worker(%s). Slave node: %s' % + (w[0]['dir'], remote_host)) # Couple of pipe pairs for RPC communication b/w # worker and changelog agent. @@ -461,7 +461,7 @@ def distribute(*resources): new_stime_xattr_name) else: raise GsyncdError("unknown slave type " + slave.url) - logging.info('slave bricks: ' + repr(sbricks)) + logging.debug('slave bricks: ' + repr(sbricks)) if isinstance(si, FILE): slaves = [slave.url] else: @@ -486,7 +486,7 @@ def distribute(*resources): slaves[idx % len(slaves)], get_subvol_num(idx, mvol, is_hot), is_hot)) - logging.info('worker specs: ' + repr(workerspex)) + logging.debug('worker specs: ' + repr(workerspex)) return workerspex, suuid, slave_vol, slave_host, master -- 1.8.3.1