From 62ad86bf93d94212eddbb50b010f5bd2908e9600 Mon Sep 17 00:00:00 2001 From: Milind Changire Date: Wed, 26 Oct 2016 15:28:14 +0530 Subject: [PATCH 145/157] tools/glusterfind: kill remote processes and separate run-time directories Problem #1: Hitting CTRL+C leaves stale processes on remote nodes if glusterfind pre has been initiated. Solution #1: Adding "-t -t" to ssh command-line forces pseudo-terminal to be assigned to remote process. When local process receives Keyboard Interrupt, SIGHUP is immediately conveyed to the remote terminal causing remote changelog.py process to terminate immediately. Problem #2: Concurrent glusterfind pre runs are not possible on the same glusterfind session in case of a runaway process. Solution #2: glusterfind pre runs now add random directory name to the working directory to store and manage temporary database and changelog processing. If KeyboardInterrupt is received, the function call run_cmd_nodes("cleanup", args, tmpfilename=gtmpfilename) cleans up the remote run specific directory. Patch: 7571380 cli/xml: Fix wrong XML format in volume get command broke "gluster volume get changelog.rollover-time --xml" Now fixed function utils.py::get_changelog_rollover_time() Fixed spurious trailing space getting written if second path is empty in main.py::write_output() Fixed repetitive changelog processing in changelog.py::get_changes() mainline: > > Reviewed-on: http://review.gluster.org/15609 > > Smoke: Gluster Build System > > CentOS-regression: Gluster Build System > > NetBSD-regression: NetBSD Build System > > Reviewed-by: Aravinda VK (cherry picked from commit feea851fad4f89b48bfe89fe3b75250cc7bd6501) release-3.9: > Reviewed-on: http://review.gluster.org/15729 > Smoke: Gluster Build System > NetBSD-regression: NetBSD Build System > CentOS-regression: Gluster Build System > Reviewed-by: Aravinda VK (cherry picked from commit 915ae56a65d5a96bfddf977193dca60535ac7c11) Change-Id: Ia8d96e2cd47bf2a64416bece312e67631a1dbf29 BUG: 1379790 Signed-off-by: Milind Changire Reviewed-on: https://code.engineering.redhat.com/gerrit/88272 Reviewed-by: Atin Mukherjee --- tools/glusterfind/src/changelog.py | 2 +- tools/glusterfind/src/main.py | 68 +++++++++++++++++++++++++++++------- tools/glusterfind/src/nodeagent.py | 4 ++- tools/glusterfind/src/utils.py | 2 +- 4 files changed, 60 insertions(+), 16 deletions(-) diff --git a/tools/glusterfind/src/changelog.py b/tools/glusterfind/src/changelog.py index 283a035..721b8d0 100644 --- a/tools/glusterfind/src/changelog.py +++ b/tools/glusterfind/src/changelog.py @@ -284,7 +284,7 @@ def get_changes(brick, hash_dir, log_file, start, end, args): # history_getchanges() changes = [] while libgfchangelog.cl_history_scan() > 0: - changes += libgfchangelog.cl_history_getchanges() + changes = libgfchangelog.cl_history_getchanges() for change in changes: # Ignore if last processed changelog comes diff --git a/tools/glusterfind/src/main.py b/tools/glusterfind/src/main.py index 37d6c38..0c993f5 100644 --- a/tools/glusterfind/src/main.py +++ b/tools/glusterfind/src/main.py @@ -18,6 +18,9 @@ import xml.etree.cElementTree as etree from argparse import ArgumentParser, RawDescriptionHelpFormatter, Action import logging import shutil +import tempfile +import signal +from datetime import datetime from utils import execute, is_host_local, mkdirp, fail from utils import setup_logger, human_time, handle_rm_error @@ -34,6 +37,7 @@ ParseError = etree.ParseError if hasattr(etree, 'ParseError') else SyntaxError logger = logging.getLogger() node_outfiles = [] vol_statusStr = "" +gtmpfilename = None class StoreAbsPath(Action): @@ -71,6 +75,8 @@ def node_cmd(host, host_uuid, task, cmd, args, opts): cmd = ["ssh", "-oNumberOfPasswordPrompts=0", "-oStrictHostKeyChecking=no", + "-t", + "-t", "-i", pem_key_path, "root@%s" % host] + cmd @@ -98,8 +104,13 @@ def run_cmd_nodes(task, args, **kwargs): host_uuid = node[0] cmd = [] opts = {} + + # tmpfilename is valid only for tasks: pre, query and cleanup + tmpfilename = kwargs.get("tmpfilename", "BADNAME") + node_outfile = os.path.join(conf.get_opt("working_dir"), args.session, args.volume, + tmpfilename, "tmp_output_%s" % num) if task == "pre": @@ -117,6 +128,9 @@ def run_cmd_nodes(task, args, **kwargs): tag = '""' if not is_host_local(host_uuid) else "" node_outfiles.append(node_outfile) + # remote file will be copied into this directory + mkdirp(os.path.dirname(node_outfile), + exit_on_err=True, logger=logger) cmd = [change_detector, args.session, @@ -144,6 +158,9 @@ def run_cmd_nodes(task, args, **kwargs): tag = '""' if not is_host_local(host_uuid) else "" node_outfiles.append(node_outfile) + # remote file will be copied into this directory + mkdirp(os.path.dirname(node_outfile), + exit_on_err=True, logger=logger) cmd = [change_detector, args.session, @@ -162,8 +179,9 @@ def run_cmd_nodes(task, args, **kwargs): opts["node_outfile"] = node_outfile opts["copy_outfile"] = True elif task == "cleanup": - # After pre run, cleanup the working directory and other temp files - # Remove the copied node_outfile in main node + # After pre/query run, cleanup the working directory and other + # temp files. Remove the directory to which node_outfile has + # been copied in main node try: os.remove(node_outfile) except (OSError, IOError): @@ -174,7 +192,9 @@ def run_cmd_nodes(task, args, **kwargs): cmd = [conf.get_opt("nodeagent"), "cleanup", args.session, - args.volume] + (["--debug"] if args.debug else []) + args.volume, + os.path.dirname(node_outfile)] + \ + (["--debug"] if args.debug else []) elif task == "create": if vol_statusStr != "Started": fail("Volume %s is not online" % args.volume, @@ -422,8 +442,8 @@ def enable_volume_options(args): % args.volume) -def write_output(args, outfilemerger): - with codecs.open(args.outfile, "a", encoding="utf-8") as f: +def write_output(outfile, outfilemerger): + with codecs.open(outfile, "a", encoding="utf-8") as f: for row in outfilemerger.get(): # Multiple paths in case of Hardlinks paths = row[1].split(",") @@ -438,9 +458,10 @@ def write_output(args, outfilemerger): if p_rep == row_2_rep: continue - f.write(u"{0} {1} {2}\n".format(row[0], - p_rep, - row_2_rep)) + if row_2_rep and row_2_rep != "": + f.write(u"{0} {1} {2}\n".format(row[0], p_rep, row_2_rep)) + else: + f.write(u"{0} {1}\n".format(row[0], p_rep)) def mode_create(session_dir, args): @@ -490,6 +511,8 @@ def mode_create(session_dir, args): def mode_query(session_dir, args): + global gtmpfilename + # Verify volume status cmd = ["gluster", 'volume', 'info', args.volume, "--xml"] _, data, _ = execute(cmd, @@ -533,7 +556,10 @@ def mode_query(session_dir, args): "Start time: %s" % ("default", args.volume, start)) - run_cmd_nodes("query", args, start=start) + prefix = datetime.now().strftime("%Y%m%d-%H%M%S-%f-") + gtmpfilename = prefix + next(tempfile._get_candidate_names()) + + run_cmd_nodes("query", args, start=start, tmpfilename=gtmpfilename) # Merger if args.full: @@ -545,7 +571,7 @@ def mode_query(session_dir, args): # Read each Changelogs db and generate finaldb create_file(args.outfile, exit_on_err=True, logger=logger) outfilemerger = OutputMerger(args.outfile + ".db", node_outfiles) - write_output(args, outfilemerger) + write_output(args.outfile, outfilemerger) try: os.remove(args.outfile + ".db") @@ -558,6 +584,8 @@ def mode_query(session_dir, args): def mode_pre(session_dir, args): + global gtmpfilename + """ Read from Session file and write to session.pre file """ @@ -587,7 +615,10 @@ def mode_pre(session_dir, args): "Start time: %s, End time: %s" % (args.session, args.volume, start, endtime_to_update)) - run_cmd_nodes("pre", args, start=start) + prefix = datetime.now().strftime("%Y%m%d-%H%M%S-%f-") + gtmpfilename = prefix + next(tempfile._get_candidate_names()) + + run_cmd_nodes("pre", args, start=start, tmpfilename=gtmpfilename) # Merger if args.full: @@ -599,8 +630,7 @@ def mode_pre(session_dir, args): # Read each Changelogs db and generate finaldb create_file(args.outfile, exit_on_err=True, logger=logger) outfilemerger = OutputMerger(args.outfile + ".db", node_outfiles) - - write_output(args, outfilemerger) + write_output(args.outfile, outfilemerger) try: os.remove(args.outfile + ".db") @@ -713,6 +743,10 @@ def mode_list(session_dir, args): def main(): + global gtmpfilename + + args = None + try: args = _get_args() mkdirp(conf.get_opt("session_dir"), exit_on_err=True) @@ -756,5 +790,13 @@ def main(): # mode_ will be the function name to be called globals()["mode_" + args.mode](session_dir, args) except KeyboardInterrupt: + if args is not None: + if args.mode == "pre" or args.mode == "query": + # cleanup session + if gtmpfilename is not None: + # no more interrupts until we clean up + signal.signal(signal.SIGINT, signal.SIG_IGN) + run_cmd_nodes("cleanup", args, tmpfilename=gtmpfilename) + # Interrupted, exit with non zero error code sys.exit(2) diff --git a/tools/glusterfind/src/nodeagent.py b/tools/glusterfind/src/nodeagent.py index f707449..07d8282 100644 --- a/tools/glusterfind/src/nodeagent.py +++ b/tools/glusterfind/src/nodeagent.py @@ -26,7 +26,8 @@ logger = logging.getLogger() def mode_cleanup(args): working_dir = os.path.join(conf.get_opt("working_dir"), args.session, - args.volume) + args.volume, + args.tmpfilename) mkdirp(os.path.join(conf.get_opt("log_dir"), args.session, args.volume), exit_on_err=True) @@ -98,6 +99,7 @@ def _get_args(): parser_cleanup = subparsers.add_parser('cleanup') parser_cleanup.add_argument("session", help="Session Name") parser_cleanup.add_argument("volume", help="Volume Name") + parser_cleanup.add_argument("tmpfilename", help="Temporary File Name") parser_cleanup.add_argument("--debug", help="Debug", action="store_true") parser_session_create = subparsers.add_parser('create') diff --git a/tools/glusterfind/src/utils.py b/tools/glusterfind/src/utils.py index 598cc9e..70737be 100644 --- a/tools/glusterfind/src/utils.py +++ b/tools/glusterfind/src/utils.py @@ -227,7 +227,7 @@ def get_changelog_rollover_time(volumename): try: tree = etree.fromstring(out) - return int(tree.find('volGetopts/Value').text) + return int(tree.find('volGetopts/Opt/Value').text) except ParseError: return DEFAULT_CHANGELOG_INTERVAL -- 1.7.1