From 2f93f8ae22acebe8312cc12f76b98a03b0dacc9f Mon Sep 17 00:00:00 2001
From: Aravinda VK <avishwan@redhat.com>
Date: Thu, 20 Oct 2016 15:05:38 +0530
Subject: [PATCH 344/361] geo-rep: Logging improvements
- Redundant log messages removed.
- Worker and connected slave node details added in "starting worker" log
- Added log for Monitor state change
- Added log for Worker status change(Initializing/Active/Passive/Faulty)
- Added log for Crawl status Change
- Added log for config set and reset
- Added log for checkpoint set, reset and completion
mainline:
> BUG: 1359612
> Reviewed-on: http://review.gluster.org/15684
> Smoke: Gluster Build System <jenkins@build.gluster.org>
> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
> Reviewed-by: Kotresh HR <khiremat@redhat.com>
(cherry picked from commit f72ee92c343467db268140330ad3ce9bf5d7404d)
BUG: 1359613
Change-Id: Icc7173ff3c93de4b862bdb1a61760db7eaf14271
Signed-off-by: Aravinda VK <avishwan@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/101284
Tested-by: Milind Changire <mchangir@redhat.com>
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
geo-replication/syncdaemon/gsyncd.py | 42 ++++++++++++++++++++++--------
geo-replication/syncdaemon/gsyncdstatus.py | 15 ++++++++---
geo-replication/syncdaemon/master.py | 23 ++++++++--------
geo-replication/syncdaemon/monitor.py | 8 +++---
4 files changed, 58 insertions(+), 30 deletions(-)
diff --git a/geo-replication/syncdaemon/gsyncd.py b/geo-replication/syncdaemon/gsyncd.py
index eccc5ea..3092728 100644
--- a/geo-replication/syncdaemon/gsyncd.py
+++ b/geo-replication/syncdaemon/gsyncd.py
@@ -36,7 +36,7 @@ import xml.etree.ElementTree as XET
from subprocess import PIPE
import subprocess
from changelogagent import agent, Changelog
-from gsyncdstatus import set_monitor_status, GeorepStatus
+from gsyncdstatus import set_monitor_status, GeorepStatus, human_time_utc
from libcxattr import Xattr
import struct
from syncdutils import get_master_and_slave_data_from_args
@@ -554,7 +554,7 @@ def main_i():
rconf['config_file'], canon_peers, confdata,
defaults.__dict__, opts.__dict__, namedict)
- checkpoint_change = False
+ conf_change = False
if confdata:
opt_ok = norm(confdata.opt) in tunables + [None]
if confdata.op == 'check':
@@ -573,10 +573,10 @@ def main_i():
# when modifying checkpoint, it's important to make a log
# of that, so in that case we go on to set up logging even
# if its just config invocation
- if confdata.opt == 'checkpoint' and confdata.op in ('set', 'del') and \
- not confdata.rx:
- checkpoint_change = True
- if not checkpoint_change:
+ if confdata.op in ('set', 'del') and not confdata.rx:
+ conf_change = True
+
+ if not conf_change:
return
gconf.__dict__.update(defaults.__dict__)
@@ -674,13 +674,23 @@ def main_i():
if not privileged() and gconf.log_file_mbr:
gconf.log_file = gconf.log_file_mbr
- if checkpoint_change:
+ if conf_change:
try:
GLogger._gsyncd_loginit(log_file=gconf.log_file, label='conf')
+ gconf.log_exit = False
+
if confdata.op == 'set':
- logging.info('checkpoint %s set' % confdata.val)
+ if confdata.opt == 'checkpoint':
+ logging.info("Checkpoint Set: %s" % (
+ human_time_utc(confdata.val)))
+ else:
+ logging.info("Config Set: %s = %s" % (
+ confdata.opt, confdata.val))
elif confdata.op == 'del':
- logging.info('checkpoint info was reset')
+ if confdata.opt == 'checkpoint':
+ logging.info("Checkpoint Reset")
+ else:
+ logging.info("Config Reset: %s" % confdata.opt)
except IOError:
if sys.exc_info()[1].errno == ENOENT:
# directory of log path is not present,
@@ -697,6 +707,17 @@ def main_i():
if create:
if getattr(gconf, 'state_file', None):
set_monitor_status(gconf.state_file, create)
+
+ try:
+ GLogger._gsyncd_loginit(log_file=gconf.log_file, label='monitor')
+ gconf.log_exit = False
+ logging.info("Monitor Status: %s" % create)
+ except IOError:
+ if sys.exc_info()[1].errno == ENOENT:
+ # If log dir not present
+ pass
+ else:
+ raise
return
go_daemon = rconf['go_daemon']
@@ -729,7 +750,7 @@ def main_i():
if be_monitor:
label = 'monitor'
elif be_agent:
- label = 'agent'
+ label = gconf.local_path
elif remote:
# master
label = gconf.local_path
@@ -746,7 +767,6 @@ def main_i():
if be_monitor:
return monitor(*rscs)
- logging.info("syncing: %s" % " -> ".join(r.url for r in rscs))
if remote:
go_daemon = remote.connect_remote(go_daemon=go_daemon)
if go_daemon:
diff --git a/geo-replication/syncdaemon/gsyncdstatus.py b/geo-replication/syncdaemon/gsyncdstatus.py
index 2c1a48d..266cf5a 100644
--- a/geo-replication/syncdaemon/gsyncdstatus.py
+++ b/geo-replication/syncdaemon/gsyncdstatus.py
@@ -17,6 +17,8 @@ import json
import time
from datetime import datetime
from errno import EACCES, EAGAIN, ENOENT
+import logging
+
from syncdutils import EVENT_GEOREP_ACTIVE, EVENT_GEOREP_PASSIVE, gf_event
from syncdutils import EVENT_GEOREP_CHECKPOINT_COMPLETED
@@ -222,6 +224,10 @@ class GeorepStatus(object):
data["checkpoint_time"] = checkpoint_time
data["checkpoint_completion_time"] = curr_time
data["checkpoint_completed"] = "Yes"
+ logging.info("Checkpoint completed. Checkpoint "
+ "Time: %s, Completion Time: %s" % (
+ human_time_utc(checkpoint_time),
+ human_time_utc(curr_time)))
self.trigger_gf_event_checkpoint_completion(
checkpoint_time, curr_time)
@@ -230,10 +236,12 @@ class GeorepStatus(object):
self._update(merger)
def set_worker_status(self, status):
- self.set_field("worker_status", status)
+ if self.set_field("worker_status", status):
+ logging.info("Worker Status: %s" % status)
def set_worker_crawl_status(self, status):
- self.set_field("crawl_status", status)
+ if self.set_field("crawl_status", status):
+ logging.info("Crawl Status: %s" % status)
def set_slave_node(self, slave_node):
def merger(data):
@@ -260,11 +268,12 @@ class GeorepStatus(object):
def set_active(self):
if self.set_field("worker_status", "Active"):
+ logging.info("Worker Status: Active")
self.send_event(EVENT_GEOREP_ACTIVE)
def set_passive(self):
if self.set_field("worker_status", "Passive"):
- self.send_event(EVENT_GEOREP_PASSIVE)
+ logging.info("Worker Status: Passive")
def get_monitor_status(self):
data = ""
diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py
index 6437dcc..fb68525 100644
--- a/geo-replication/syncdaemon/master.py
+++ b/geo-replication/syncdaemon/master.py
@@ -80,7 +80,7 @@ def gmaster_builder(excrawl=None):
modemixin = 'normal'
changemixin = 'xsync' if gconf.change_detector == 'xsync' \
else excrawl or gconf.change_detector
- logging.info('setting up %s change detection mode' % changemixin)
+ logging.debug('setting up %s change detection mode' % changemixin)
modemixin = getattr(this, modemixin.capitalize() + 'Mixin')
crawlmixin = getattr(this, 'GMaster' + changemixin.capitalize() + 'Mixin')
sendmarkmixin = boolify(
@@ -363,10 +363,8 @@ class GMasterCommon(object):
self.slave = slave
self.jobtab = {}
if boolify(gconf.use_tarssh):
- logging.info("using 'tar over ssh' as the sync engine")
self.syncer = Syncer(slave, self.slave.tarssh, [2])
else:
- logging.info("using 'rsync' as the sync engine")
# partial transfer (cf. rsync(1)), that's normal
self.syncer = Syncer(slave, self.slave.rsync, [23, 24])
# crawls vs. turns:
@@ -507,9 +505,9 @@ class GMasterCommon(object):
volinfo_sys = self.volinfo_hook()
self.volinfo = volinfo_sys[self.KNAT]
inter_master = volinfo_sys[self.KFGN]
- logging.info("%s master with volume id %s ..." %
- (inter_master and "intermediate" or "primary",
- self.uuid))
+ logging.debug("%s master with volume id %s ..." %
+ (inter_master and "intermediate" or "primary",
+ self.uuid))
gconf.configinterface.set('volume_id', self.uuid)
if self.volinfo:
if self.volinfo['retval']:
@@ -518,7 +516,6 @@ class GMasterCommon(object):
else:
raise GsyncdError("master volinfo unavailable")
self.lastreport['time'] = time.time()
- logging.info('crawl interval: %d seconds' % self.sleep_interval)
t0 = time.time()
crawl = self.should_crawl()
@@ -529,9 +526,9 @@ class GMasterCommon(object):
self.start = time.time()
should_display_info = self.start - self.lastreport['time'] >= 60
if should_display_info:
- logging.info("%d crawls, %d turns",
- self.crawls - self.lastreport['crawls'],
- self.turns - self.lastreport['turns'])
+ logging.debug("%d crawls, %d turns",
+ self.crawls - self.lastreport['crawls'],
+ self.turns - self.lastreport['turns'])
self.lastreport.update(crawls=self.crawls,
turns=self.turns,
time=self.start)
@@ -1167,9 +1164,11 @@ class GMasterChangeloghistoryMixin(GMasterChangelogMixin):
% (self.history_turns, repr(purge_time), repr(end_time)))
if not purge_time or purge_time == URXTIME:
- logging.info("stime not available, abandoning history crawl")
raise NoPurgeTimeAvailable()
+ if not data_stime or data_stime == URXTIME:
+ raise NoStimeAvailable()
+
# Changelogs backend path is hardcoded as
# <BRICK_PATH>/.glusterfs/changelogs, if user configured to different
# location then consuming history will not work(Known issue as of now)
@@ -1247,10 +1246,10 @@ class GMasterXsyncMixin(GMasterChangelogMixin):
self.stimes = []
self.sleep_interval = 60
self.tempdir = self.setup_working_dir()
+ logging.info('Working dir: %s' % self.tempdir)
self.tempdir = os.path.join(self.tempdir, 'xsync')
self.processed_changelogs_dir = self.tempdir
self.name = "xsync"
- logging.info('xsync temp directory: %s' % self.tempdir)
try:
os.makedirs(self.tempdir)
except OSError:
diff --git a/geo-replication/syncdaemon/monitor.py b/geo-replication/syncdaemon/monitor.py
index 8fdd68d..d9a9c33 100644
--- a/geo-replication/syncdaemon/monitor.py
+++ b/geo-replication/syncdaemon/monitor.py
@@ -271,8 +271,8 @@ class Monitor(object):
# Spawn the worker and agent in lock to avoid fd leak
self.lock.acquire()
- logging.info('-' * conn_timeout)
- logging.info('starting gsyncd worker')
+ logging.info('starting gsyncd worker(%s). Slave node: %s' %
+ (w[0]['dir'], remote_host))
# Couple of pipe pairs for RPC communication b/w
# worker and changelog agent.
@@ -461,7 +461,7 @@ def distribute(*resources):
new_stime_xattr_name)
else:
raise GsyncdError("unknown slave type " + slave.url)
- logging.info('slave bricks: ' + repr(sbricks))
+ logging.debug('slave bricks: ' + repr(sbricks))
if isinstance(si, FILE):
slaves = [slave.url]
else:
@@ -486,7 +486,7 @@ def distribute(*resources):
slaves[idx % len(slaves)],
get_subvol_num(idx, mvol, is_hot),
is_hot))
- logging.info('worker specs: ' + repr(workerspex))
+ logging.debug('worker specs: ' + repr(workerspex))
return workerspex, suuid, slave_vol, slave_host, master
--
1.8.3.1