From 782bbf19d62d1a5fb0d799bca3180001ca853c70 Mon Sep 17 00:00:00 2001 From: Ken Dreyer Date: Mar 02 2021 21:15:23 +0000 Subject: downgrade to v15.2.9-46-g353d697036 The c8-sig-storage-ceph-octopus branch should use cephadm from upstream's "octopus" branch in GitHub. --- diff --git a/SOURCES/cephadm b/SOURCES/cephadm index 40da060..564028f 100644 --- a/SOURCES/cephadm +++ b/SOURCES/cephadm @@ -1,8 +1,8 @@ #!/usr/bin/python3 -DEFAULT_IMAGE = 'docker.io/ceph/daemon-base:latest-master-devel' -DEFAULT_IMAGE_IS_MASTER = True -LATEST_STABLE_RELEASE = 'pacific' +DEFAULT_IMAGE='docker.io/ceph/ceph:v15' +DEFAULT_IMAGE_IS_MASTER=False +LATEST_STABLE_RELEASE = 'octopus' DATA_DIR = '/var/lib/ceph' LOG_DIR = '/var/log/ceph' LOCK_DIR = '/run/cephadm' @@ -44,7 +44,6 @@ import ipaddress import json import logging from logging.config import dictConfig -from operator import truediv import os import platform import pwd @@ -59,23 +58,18 @@ import tempfile import time import errno import struct -from socketserver import ThreadingMixIn -from http.server import BaseHTTPRequestHandler, HTTPServer -import signal -import io -from contextlib import redirect_stdout -import ssl from enum import Enum - - -from typing import Dict, List, Tuple, Optional, Union, Any, NoReturn, Callable, IO +try: + from typing import Dict, List, Tuple, Optional, Union, Any, NoReturn, Callable, IO +except ImportError: + pass import re import uuid from functools import wraps from glob import glob -from threading import Thread, RLock +from threading import Thread if sys.version_info >= (3, 0): from io import StringIO @@ -96,81 +90,11 @@ else: if sys.version_info > (3, 0): unicode = str +container_path = '' cached_stdin = None DATEFMT = '%Y-%m-%dT%H:%M:%S.%fZ' - -logger: logging.Logger = None # type: ignore - -################################## - -class BaseConfig: - - def __init__(self): - self.image: str = "" - self.docker: bool = False - self.data_dir: str = DATA_DIR - self.log_dir: str = LOG_DIR - self.logrotate_dir: str = LOGROTATE_DIR - self.unit_dir: str = UNIT_DIR - self.verbose: bool = False - self.timeout: Optional[int] = DEFAULT_TIMEOUT - self.retry: int = DEFAULT_RETRY - self.env: List[str] = [] - - self.container_path: str = "" - - def set_from_args(self, args: argparse.Namespace): - argdict: Dict[str, Any] = vars(args) - for k, v in argdict.items(): - if hasattr(self, k): - setattr(self, k, v) - - -class CephadmContext: - - def __init__(self): - - self.__dict__["_args"] = None - self.__dict__["_conf"] = BaseConfig() - - - def set_args(self, args: argparse.Namespace) -> None: - self._conf.set_from_args(args) - self._args = args - - - def has_function(self) -> bool: - return "func" in self._args - - - def has(self, name: str) -> bool: - return hasattr(self, name) - - - def __getattr__(self, name: str) -> Any: - if "_conf" in self.__dict__ and \ - hasattr(self._conf, name): - return getattr(self._conf, name) - elif "_args" in self.__dict__ and \ - hasattr(self._args, name): - return getattr(self._args, name) - else: - return super().__getattribute__(name) - - def __setattr__(self, name: str, value: Any) -> None: - if hasattr(self._conf, name): - setattr(self._conf, name, value) - elif hasattr(self._args, name): - setattr(self._args, name, value) - else: - super().__setattr__(name, value) - - -################################## - - # Log and console output config logging_config = { 'version': 1, @@ -287,17 +211,7 @@ class Monitoring(object): } # type: ignore ################################## -def populate_files(config_dir, config_files, uid, gid): - # type: (str, Dict, int, int) -> None - """create config files for different services""" - for fname in config_files: - config_file = os.path.join(config_dir, fname) - config_content = dict_get_join(config_files, fname) - logger.info('Write file: %s' % (config_file)) - with open(config_file, 'w') as f: - os.fchown(f.fileno(), uid, gid) - os.fchmod(f.fileno(), 0o600) - f.write(config_content) + class NFSGanesha(object): """Defines a NFS-Ganesha container""" @@ -313,13 +227,11 @@ class NFSGanesha(object): } def __init__(self, - ctx, fsid, daemon_id, config_json, image=DEFAULT_IMAGE): - # type: (CephadmContext, str, Union[int, str], Dict, str) -> None - self.ctx = ctx + # type: (str, Union[int, str], Dict, str) -> None self.fsid = fsid self.daemon_id = daemon_id self.image = image @@ -336,10 +248,9 @@ class NFSGanesha(object): self.validate() @classmethod - def init(cls, ctx, fsid, daemon_id): - # type: (CephadmContext, str, Union[int, str]) -> NFSGanesha - return cls(ctx, fsid, daemon_id, get_parm(ctx.config_json), - ctx.image) + def init(cls, fsid, daemon_id): + # type: (str, Union[int, str]) -> NFSGanesha + return cls(fsid, daemon_id, get_parm(args.config_json), args.image) def get_container_mounts(self, data_dir): # type: (str) -> Dict[str, str] @@ -363,11 +274,11 @@ class NFSGanesha(object): return envs @staticmethod - def get_version(ctx, container_id): - # type: (CephadmContext, str) -> Optional[str] + def get_version(container_id): + # type: (str) -> Optional[str] version = None - out, err, code = call(ctx, - [ctx.container_path, 'exec', container_id, + out, err, code = call( + [container_path, 'exec', container_id, NFSGanesha.entrypoint, '-v']) if code == 0: match = re.search(r'NFS-Ganesha Release\s*=\s*[V]*([\d.]+)', out) @@ -425,7 +336,14 @@ class NFSGanesha(object): makedirs(config_dir, uid, gid, 0o755) # populate files from the config-json - populate_files(config_dir, self.files, uid, gid) + for fname in self.files: + config_file = os.path.join(config_dir, fname) + config_content = dict_get_join(self.files, fname) + logger.info('Write file: %s' % (config_file)) + with open(config_file, 'w') as f: + os.fchown(f.fileno(), uid, gid) + os.fchmod(f.fileno(), 0o600) + f.write(config_content) # write the RGW keyring if self.rgw: @@ -448,14 +366,12 @@ class NFSGanesha(object): args += ['--userid', self.userid] args += [action, self.get_daemon_name()] - data_dir = get_data_dir(self.fsid, self.ctx.data_dir, - self.daemon_type, self.daemon_id) + data_dir = get_data_dir(self.fsid, self.daemon_type, self.daemon_id) volume_mounts = self.get_container_mounts(data_dir) envs = self.get_container_envs() logger.info('Creating RADOS grace for action: %s' % action) c = CephContainer( - self.ctx, image=self.image, entrypoint=entrypoint, args=args, @@ -477,13 +393,11 @@ class CephIscsi(object): required_files = ['iscsi-gateway.cfg'] def __init__(self, - ctx, fsid, daemon_id, config_json, image=DEFAULT_IMAGE): - # type: (CephadmContext, str, Union[int, str], Dict, str) -> None - self.ctx = ctx + # type: (str, Union[int, str], Dict, str) -> None self.fsid = fsid self.daemon_id = daemon_id self.image = image @@ -495,10 +409,9 @@ class CephIscsi(object): self.validate() @classmethod - def init(cls, ctx, fsid, daemon_id): - # type: (CephadmContext, str, Union[int, str]) -> CephIscsi - return cls(ctx, fsid, daemon_id, - get_parm(ctx.config_json), ctx.image) + def init(cls, fsid, daemon_id): + # type: (str, Union[int, str]) -> CephIscsi + return cls(fsid, daemon_id, get_parm(args.config_json), args.image) @staticmethod def get_container_mounts(data_dir, log_dir): @@ -524,11 +437,11 @@ class CephIscsi(object): return binds @staticmethod - def get_version(ctx, container_id): - # type: (CephadmContext, str) -> Optional[str] + def get_version(container_id): + # type: (str) -> Optional[str] version = None - out, err, code = call(ctx, - [ctx.container_path, 'exec', container_id, + out, err, code = call( + [container_path, 'exec', container_id, '/usr/bin/python3', '-c', "import pkg_resources; print(pkg_resources.require('ceph_iscsi')[0].version)"]) if code == 0: version = out.strip() @@ -571,7 +484,14 @@ class CephIscsi(object): makedirs(configfs_dir, uid, gid, 0o755) # populate files from the config-json - populate_files(data_dir, self.files, uid, gid) + for fname in self.files: + config_file = os.path.join(data_dir, fname) + config_content = dict_get_join(self.files, fname) + logger.info('Write file: %s' % (config_file)) + with open(config_file, 'w') as f: + os.fchown(f.fileno(), uid, gid) + os.fchmod(f.fileno(), 0o600) + f.write(config_content) @staticmethod def configfs_mount_umount(data_dir, mount=True): @@ -587,7 +507,7 @@ class CephIscsi(object): def get_tcmu_runner_container(self): # type: () -> CephContainer - tcmu_container = get_container(self.ctx, self.fsid, self.daemon_type, self.daemon_id) + tcmu_container = get_container(self.fsid, self.daemon_type, self.daemon_id) tcmu_container.entrypoint = "/usr/bin/tcmu-runner" tcmu_container.cname = self.get_container_name(desc='tcmu') # remove extra container args for tcmu container. @@ -597,180 +517,12 @@ class CephIscsi(object): ################################## -class HAproxy(object): - """Defines an HAproxy container""" - daemon_type = 'haproxy' - required_files = ['haproxy.cfg'] - default_image = 'haproxy' - - def __init__(self, - ctx: CephadmContext, - fsid: str, daemon_id: Union[int, str], - config_json: Dict, image: str) -> None: - self.ctx = ctx - self.fsid = fsid - self.daemon_id = daemon_id - self.image = image - - # config-json options - self.files = dict_get(config_json, 'files', {}) - - self.validate() - - @classmethod - def init(cls, ctx: CephadmContext, - fsid: str, daemon_id: Union[int, str]) -> 'HAproxy': - return cls(ctx, fsid, daemon_id, get_parm(ctx.config_json), - ctx.image) - - def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None: - """Create files under the container data dir""" - if not os.path.isdir(data_dir): - raise OSError('data_dir is not a directory: %s' % (data_dir)) - - # create additional directories in data dir for HAproxy to use - if not os.path.isdir(os.path.join(data_dir, 'haproxy')): - makedirs(os.path.join(data_dir, 'haproxy'), uid, gid, DATA_DIR_MODE) - - data_dir = os.path.join(data_dir, 'haproxy') - populate_files(data_dir, self.files, uid, gid) - - def get_daemon_args(self) -> List[str]: - return ['haproxy', '-f', '/var/lib/haproxy/haproxy.cfg'] - - def validate(self): - # type: () -> None - if not is_fsid(self.fsid): - raise Error('not an fsid: %s' % self.fsid) - if not self.daemon_id: - raise Error('invalid daemon_id: %s' % self.daemon_id) - if not self.image: - raise Error('invalid image: %s' % self.image) - - # check for the required files - if self.required_files: - for fname in self.required_files: - if fname not in self.files: - raise Error('required file missing from config-json: %s' % fname) - - def get_daemon_name(self): - # type: () -> str - return '%s.%s' % (self.daemon_type, self.daemon_id) - - def get_container_name(self, desc=None): - # type: (Optional[str]) -> str - cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name()) - if desc: - cname = '%s-%s' % (cname, desc) - return cname - - def extract_uid_gid_haproxy(self): - # better directory for this? - return extract_uid_gid(self.ctx, file_path='/var/lib') - - @staticmethod - def get_container_mounts(data_dir: str) -> Dict[str, str]: - mounts = dict() - mounts[os.path.join(data_dir,'haproxy')] = '/var/lib/haproxy' - return mounts - -################################## - - -class Keepalived(object): - """Defines an Keepalived container""" - daemon_type = 'keepalived' - required_files = ['keepalived.conf'] - default_image = 'arcts/keepalived' - - def __init__(self, - ctx: CephadmContext, - fsid: str, daemon_id: Union[int, str], - config_json: Dict, image: str) -> None: - self.ctx = ctx - self.fsid = fsid - self.daemon_id = daemon_id - self.image = image - - # config-json options - self.files = dict_get(config_json, 'files', {}) - - self.validate() - - @classmethod - def init(cls, ctx: CephadmContext, fsid: str, - daemon_id: Union[int, str]) -> 'Keepalived': - return cls(ctx, fsid, daemon_id, - get_parm(ctx.config_json), ctx.image) - - def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None: - """Create files under the container data dir""" - if not os.path.isdir(data_dir): - raise OSError('data_dir is not a directory: %s' % (data_dir)) - - # create additional directories in data dir for keepalived to use - if not os.path.isdir(os.path.join(data_dir, 'keepalived')): - makedirs(os.path.join(data_dir, 'keepalived'), uid, gid, DATA_DIR_MODE) - - # populate files from the config-json - populate_files(data_dir, self.files, uid, gid) - - def validate(self): - # type: () -> None - if not is_fsid(self.fsid): - raise Error('not an fsid: %s' % self.fsid) - if not self.daemon_id: - raise Error('invalid daemon_id: %s' % self.daemon_id) - if not self.image: - raise Error('invalid image: %s' % self.image) - - # check for the required files - if self.required_files: - for fname in self.required_files: - if fname not in self.files: - raise Error('required file missing from config-json: %s' % fname) - - def get_daemon_name(self): - # type: () -> str - return '%s.%s' % (self.daemon_type, self.daemon_id) - - def get_container_name(self, desc=None): - # type: (Optional[str]) -> str - cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name()) - if desc: - cname = '%s-%s' % (cname, desc) - return cname - - @staticmethod - def get_container_envs(): - # type: () -> List[str] - envs = [ - 'KEEPALIVED_AUTOCONF=false', - 'KEEPALIVED_CONF=/etc/keepalived/keepalived.conf', - 'KEEPALIVED_CMD="/usr/sbin/keepalived -n -l -f /etc/keepalived/keepalived.conf"', - 'KEEPALIVED_DEBUG=false' - ] - return envs - - def extract_uid_gid_keepalived(self): - # better directory for this? - return extract_uid_gid(self.ctx, file_path='/var/lib') - - @staticmethod - def get_container_mounts(data_dir: str) -> Dict[str, str]: - mounts = dict() - mounts[os.path.join(data_dir,'keepalived.conf')] = '/etc/keepalived/keepalived.conf' - return mounts - -################################## - class CustomContainer(object): """Defines a custom container""" daemon_type = 'container' - def __init__(self, - fsid: str, daemon_id: Union[int, str], + def __init__(self, fsid: str, daemon_id: Union[int, str], config_json: Dict, image: str) -> None: self.fsid = fsid self.daemon_id = daemon_id @@ -790,10 +542,8 @@ class CustomContainer(object): self.files = dict_get(config_json, 'files', {}) @classmethod - def init(cls, ctx: CephadmContext, - fsid: str, daemon_id: Union[int, str]) -> 'CustomContainer': - return cls(fsid, daemon_id, - get_parm(ctx.config_json), ctx.image) + def init(cls, fsid: str, daemon_id: Union[int, str]) -> 'CustomContainer': + return cls(fsid, daemon_id, get_parm(args.config_json), args.image) def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None: """ @@ -897,7 +647,7 @@ def dict_get(d: Dict, key: str, default: Any = None, require: bool = False) -> A """ if require and key not in d.keys(): raise Error('{} missing from dict'.format(key)) - return d.get(key, default) # type: ignore + return d.get(key, default) ################################## @@ -927,17 +677,14 @@ def get_supported_daemons(): supported_daemons.append(NFSGanesha.daemon_type) supported_daemons.append(CephIscsi.daemon_type) supported_daemons.append(CustomContainer.daemon_type) - supported_daemons.append(CephadmDaemon.daemon_type) - supported_daemons.append(HAproxy.daemon_type) - supported_daemons.append(Keepalived.daemon_type) assert len(supported_daemons) == len(set(supported_daemons)) return supported_daemons ################################## -def attempt_bind(ctx, s, address, port): - # type: (CephadmContext, socket.socket, str, int) -> None +def attempt_bind(s, address, port): + # type: (socket.socket, str, int) -> None try: s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) s.bind((address, port)) @@ -952,25 +699,25 @@ def attempt_bind(ctx, s, address, port): s.close() -def port_in_use(ctx, port_num): - # type: (CephadmContext, int) -> bool +def port_in_use(port_num): + # type: (int) -> bool """Detect whether a port is in use on the local machine - IPv4 and IPv6""" logger.info('Verifying port %d ...' % port_num) try: s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - attempt_bind(ctx, s, '0.0.0.0', port_num) + attempt_bind(s, '0.0.0.0', port_num) s = socket.socket(socket.AF_INET6, socket.SOCK_STREAM) - attempt_bind(ctx, s, '::', port_num) + attempt_bind(s, '::', port_num) except OSError: return True else: return False -def check_ip_port(ctx, ip, port): - # type: (CephadmContext, str, int) -> None - if not ctx.skip_ping_check: +def check_ip_port(ip, port): + # type: (str, int) -> None + if not args.skip_ping_check: logger.info('Verifying IP %s port %d ...' % (ip, port)) if is_ipv6(ip): s = socket.socket(socket.AF_INET6, socket.SOCK_STREAM) @@ -978,7 +725,7 @@ def check_ip_port(ctx, ip, port): else: s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) try: - attempt_bind(ctx, s, ip, port) + attempt_bind(s, ip, port) except OSError as e: raise Error(e) @@ -988,6 +735,12 @@ def check_ip_port(ctx, ip, port): # https://github.com/benediktschmitt/py-filelock/blob/master/filelock.py # that drops all of the compatibility (this is Unix/Linux only). +try: + TimeoutError +except NameError: + TimeoutError = OSError + + class Timeout(TimeoutError): """ Raised when the lock could not be acquired in *timeout* @@ -1021,17 +774,16 @@ class _Acquire_ReturnProxy(object): class FileLock(object): - def __init__(self, ctx: CephadmContext, name, timeout=-1): + def __init__(self, name, timeout=-1): if not os.path.exists(LOCK_DIR): os.mkdir(LOCK_DIR, 0o700) self._lock_file = os.path.join(LOCK_DIR, name + '.lock') - self.ctx = ctx # The file descriptor for the *_lock_file* as it is returned by the # os.open() function. # This file lock is only NOT None, if the object currently holds the # lock. - self._lock_file_fd: Optional[int] = None + self._lock_file_fd = None self.timeout = timeout # The lock counter is used for implementing the nested locking # mechanism. Whenever the lock is acquired, the counter is increased and @@ -1070,7 +822,6 @@ class FileLock(object): This method returns now a *proxy* object instead of *self*, so that it can be used in a with statement without side effects. """ - # Use the default timeout, if no timeout is provided. if timeout is None: timeout = self.timeout @@ -1184,8 +935,7 @@ class CallVerbosity(Enum): VERBOSE = 3 -def call(ctx: CephadmContext, - command: List[str], +def call(command: List[str], desc: Optional[str] = None, verbosity: CallVerbosity = CallVerbosity.VERBOSE_ON_FAILURE, timeout: Optional[int] = DEFAULT_TIMEOUT, @@ -1199,12 +949,11 @@ def call(ctx: CephadmContext, :param timeout: timeout in seconds """ - if desc is None: desc = command[0] if desc: desc += ': ' - timeout = timeout or ctx.timeout + timeout = timeout or args.timeout logger.debug("Running command: %s" % ' '.join(command)) process = subprocess.Popen( @@ -1249,14 +998,11 @@ def call(ctx: CephadmContext, ) for fd in reads: try: - message = str() message_b = os.read(fd, 1024) if isinstance(message_b, bytes): message = message_b.decode('utf-8') - elif isinstance(message_b, str): + if isinstance(message_b, str): message = message_b - else: - assert False if stop and message: # process has terminated, but have more to read still, so not stopping yet # (os.read returns '' when it encounters EOF) @@ -1315,21 +1061,20 @@ def call(ctx: CephadmContext, return out, err, returncode -def call_throws( - ctx: CephadmContext, - command: List[str], +def call_throws(command: List[str], desc: Optional[str] = None, verbosity: CallVerbosity = CallVerbosity.VERBOSE_ON_FAILURE, timeout: Optional[int] = DEFAULT_TIMEOUT, **kwargs) -> Tuple[str, str, int]: - out, err, ret = call(ctx, command, desc, verbosity, timeout, **kwargs) + out, err, ret = call(command, desc, verbosity, timeout, **kwargs) if ret: raise RuntimeError('Failed command: %s' % ' '.join(command)) return out, err, ret -def call_timeout(ctx, command, timeout): - # type: (CephadmContext, List[str], int) -> int +def call_timeout(command, timeout): + # type: (List[str], int) -> int + logger.debug('Running command (timeout=%s): %s' % (timeout, ' '.join(command))) @@ -1369,15 +1114,15 @@ def call_timeout(ctx, command, timeout): ################################## -def is_available(ctx, what, func): - # type: (CephadmContext, str, Callable[[], bool]) -> None +def is_available(what, func): + # type: (str, Callable[[], bool]) -> None """ Wait for a service to become available :param what: the name of the service :param func: the callable object that determines availability """ - retry = ctx.retry + retry = args.retry logger.info('Waiting for %s...' % what) num = 1 while True: @@ -1477,11 +1222,11 @@ def try_convert_datetime(s): return None -def get_podman_version(ctx, container_path): - # type: (CephadmContext, str) -> Tuple[int, ...] +def get_podman_version(): + # type: () -> Tuple[int, ...] if 'podman' not in container_path: raise ValueError('not using podman') - out, _, _ = call_throws(ctx, [container_path, '--version']) + out, _, _ = call_throws([container_path, '--version']) return _parse_podman_version(out) @@ -1558,22 +1303,22 @@ def infer_fsid(func): If we only find a single fsid in /var/lib/ceph/*, use that """ @wraps(func) - def _infer_fsid(ctx: CephadmContext): - if ctx.fsid: - logger.debug('Using specified fsid: %s' % ctx.fsid) - return func(ctx) + def _infer_fsid(): + if args.fsid: + logger.debug('Using specified fsid: %s' % args.fsid) + return func() fsids_set = set() - daemon_list = list_daemons(ctx, detail=False) + daemon_list = list_daemons(detail=False) for daemon in daemon_list: if not is_fsid(daemon['fsid']): # 'unknown' fsid continue - elif ctx.has("name") or not ctx.name: - # ctx.name not specified + elif 'name' not in args or not args.name: + # args.name not specified fsids_set.add(daemon['fsid']) - elif daemon['name'] == ctx.name: - # ctx.name is a match + elif daemon['name'] == args.name: + # args.name is a match fsids_set.add(daemon['fsid']) fsids = sorted(fsids_set) @@ -1582,10 +1327,10 @@ def infer_fsid(func): pass elif len(fsids) == 1: logger.info('Inferring fsid %s' % fsids[0]) - ctx.fsid = fsids[0] + args.fsid = fsids[0] else: raise Error('Cannot infer an fsid, one must be specified: %s' % fsids) - return func(ctx) + return func() return _infer_fsid @@ -1595,34 +1340,33 @@ def infer_config(func): If we find a MON daemon, use the config from that container """ @wraps(func) - def _infer_config(ctx: CephadmContext): - if ctx.config: - logger.debug('Using specified config: %s' % ctx.config) - return func(ctx) + def _infer_config(): + if args.config: + logger.debug('Using specified config: %s' % args.config) + return func() config = None - if ctx.fsid: - name = ctx.name + if args.fsid: + name = args.name if not name: - daemon_list = list_daemons(ctx, detail=False) + daemon_list = list_daemons(detail=False) for daemon in daemon_list: if daemon['name'].startswith('mon.'): name = daemon['name'] break if name: - config = '/var/lib/ceph/{}/{}/config'.format(ctx.fsid, - name) + config = '/var/lib/ceph/{}/{}/config'.format(args.fsid, name) if config: logger.info('Inferring config %s' % config) - ctx.config = config + args.config = config elif os.path.exists(SHELL_DEFAULT_CONF): logger.debug('Using default config: %s' % SHELL_DEFAULT_CONF) - ctx.config = SHELL_DEFAULT_CONF - return func(ctx) + args.config = SHELL_DEFAULT_CONF + return func() return _infer_config -def _get_default_image(ctx: CephadmContext): +def _get_default_image(): if DEFAULT_IMAGE_IS_MASTER: warn = '''This is a development version of cephadm. For information regarding the latest stable release: @@ -1638,45 +1382,41 @@ def infer_image(func): Use the most recent ceph image """ @wraps(func) - def _infer_image(ctx: CephadmContext): - if not ctx.image: - ctx.image = os.environ.get('CEPHADM_IMAGE') - if not ctx.image: - ctx.image = get_last_local_ceph_image(ctx, ctx.container_path) - if not ctx.image: - ctx.image = _get_default_image(ctx) - return func(ctx) + def _infer_image(): + if not args.image: + args.image = os.environ.get('CEPHADM_IMAGE') + if not args.image: + args.image = get_last_local_ceph_image() + if not args.image: + args.image = _get_default_image() + return func() return _infer_image def default_image(func): @wraps(func) - def _default_image(ctx: CephadmContext): - if not ctx.image: - if ctx.has("name") and ctx.name: - type_ = ctx.name.split('.', 1)[0] + def _default_image(): + if not args.image: + if 'name' in args and args.name: + type_ = args.name.split('.', 1)[0] if type_ in Monitoring.components: - ctx.image = Monitoring.components[type_]['image'] - if type_ == 'haproxy': - ctx.image = HAproxy.default_image - if type_ == 'keepalived': - ctx.image = Keepalived.default_image - if not ctx.image: - ctx.image = os.environ.get('CEPHADM_IMAGE') - if not ctx.image: - ctx.image = _get_default_image(ctx) - - return func(ctx) + args.image = Monitoring.components[type_]['image'] + if not args.image: + args.image = os.environ.get('CEPHADM_IMAGE') + if not args.image: + args.image = _get_default_image() + + return func() return _default_image -def get_last_local_ceph_image(ctx: CephadmContext, container_path: str): +def get_last_local_ceph_image(): """ :return: The most recent local ceph image (already pulled) """ - out, _, _ = call_throws(ctx, + out, _, _ = call_throws( [container_path, 'images', '--filter', 'label=ceph=True', '--filter', 'dangling=false', @@ -1685,7 +1425,7 @@ def get_last_local_ceph_image(ctx: CephadmContext, container_path: str): def _filter_last_local_ceph_image(out): - # type: (str) -> Optional[str] + # str -> Optional[str] for image in out.splitlines(): if image and not image.endswith('@'): logger.info('Using recent ceph image %s' % image) @@ -1694,7 +1434,7 @@ def _filter_last_local_ceph_image(out): def write_tmp(s, uid, gid): - # type: (str, int, int) -> IO[str] + # type: (str, int, int) -> Any tmp_f = tempfile.NamedTemporaryFile(mode='w', prefix='ceph-tmp') os.fchown(tmp_f.fileno(), uid, gid) @@ -1714,19 +1454,19 @@ def makedirs(dir, uid, gid, mode): os.chmod(dir, mode) # the above is masked by umask... -def get_data_dir(fsid, data_dir, t, n): - # type: (str, str, str, Union[int, str]) -> str - return os.path.join(data_dir, fsid, '%s.%s' % (t, n)) +def get_data_dir(fsid, t, n): + # type: (str, str, Union[int, str]) -> str + return os.path.join(args.data_dir, fsid, '%s.%s' % (t, n)) -def get_log_dir(fsid, log_dir): - # type: (str, str) -> str - return os.path.join(log_dir, fsid) +def get_log_dir(fsid): + # type: (str) -> str + return os.path.join(args.log_dir, fsid) -def make_data_dir_base(fsid, data_dir, uid, gid): - # type: (str, str, int, int) -> str - data_dir_base = os.path.join(data_dir, fsid) +def make_data_dir_base(fsid, uid, gid): + # type: (str, int, int) -> str + data_dir_base = os.path.join(args.data_dir, fsid) makedirs(data_dir_base, uid, gid, DATA_DIR_MODE) makedirs(os.path.join(data_dir_base, 'crash'), uid, gid, DATA_DIR_MODE) makedirs(os.path.join(data_dir_base, 'crash', 'posted'), uid, gid, @@ -1734,38 +1474,38 @@ def make_data_dir_base(fsid, data_dir, uid, gid): return data_dir_base -def make_data_dir(ctx, fsid, daemon_type, daemon_id, uid=None, gid=None): - # type: (CephadmContext, str, str, Union[int, str], Optional[int], Optional[int]) -> str +def make_data_dir(fsid, daemon_type, daemon_id, uid=None, gid=None): + # type: (str, str, Union[int, str], Optional[int], Optional[int]) -> str if uid is None or gid is None: - uid, gid = extract_uid_gid(ctx) - make_data_dir_base(fsid, ctx.data_dir, uid, gid) - data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id) + uid, gid = extract_uid_gid() + make_data_dir_base(fsid, uid, gid) + data_dir = get_data_dir(fsid, daemon_type, daemon_id) makedirs(data_dir, uid, gid, DATA_DIR_MODE) return data_dir -def make_log_dir(ctx, fsid, uid=None, gid=None): - # type: (CephadmContext, str, Optional[int], Optional[int]) -> str +def make_log_dir(fsid, uid=None, gid=None): + # type: (str, Optional[int], Optional[int]) -> str if uid is None or gid is None: - uid, gid = extract_uid_gid(ctx) - log_dir = get_log_dir(fsid, ctx.log_dir) + uid, gid = extract_uid_gid() + log_dir = get_log_dir(fsid) makedirs(log_dir, uid, gid, LOG_DIR_MODE) return log_dir -def make_var_run(ctx, fsid, uid, gid): - # type: (CephadmContext, str, int, int) -> None - call_throws(ctx, ['install', '-d', '-m0770', '-o', str(uid), '-g', str(gid), +def make_var_run(fsid, uid, gid): + # type: (str, int, int) -> None + call_throws(['install', '-d', '-m0770', '-o', str(uid), '-g', str(gid), '/var/run/ceph/%s' % fsid]) -def copy_tree(ctx, src, dst, uid=None, gid=None): - # type: (CephadmContext, List[str], str, Optional[int], Optional[int]) -> None +def copy_tree(src, dst, uid=None, gid=None): + # type: (List[str], str, Optional[int], Optional[int]) -> None """ Copy a directory tree from src to dst """ if uid is None or gid is None: - (uid, gid) = extract_uid_gid(ctx) + (uid, gid) = extract_uid_gid() for src_dir in src: dst_dir = dst @@ -1784,13 +1524,13 @@ def copy_tree(ctx, src, dst, uid=None, gid=None): os.chown(os.path.join(dirpath, filename), uid, gid) -def copy_files(ctx, src, dst, uid=None, gid=None): - # type: (CephadmContext, List[str], str, Optional[int], Optional[int]) -> None +def copy_files(src, dst, uid=None, gid=None): + # type: (List[str], str, Optional[int], Optional[int]) -> None """ Copy a files from src to dst """ if uid is None or gid is None: - (uid, gid) = extract_uid_gid(ctx) + (uid, gid) = extract_uid_gid() for src_file in src: dst_file = dst @@ -1804,13 +1544,13 @@ def copy_files(ctx, src, dst, uid=None, gid=None): os.chown(dst_file, uid, gid) -def move_files(ctx, src, dst, uid=None, gid=None): - # type: (CephadmContext, List[str], str, Optional[int], Optional[int]) -> None +def move_files(src, dst, uid=None, gid=None): + # type: (List[str], str, Optional[int], Optional[int]) -> None """ Move files from src to dst """ if uid is None or gid is None: - (uid, gid) = extract_uid_gid(ctx) + (uid, gid) = extract_uid_gid() for src_file in src: dst_file = dst @@ -1878,31 +1618,29 @@ def find_program(filename): def get_unit_name(fsid, daemon_type, daemon_id=None): # type: (str, str, Optional[Union[int, str]]) -> str # accept either name or type + id - if daemon_type == CephadmDaemon.daemon_type and daemon_id is not None: - return 'ceph-%s-%s.%s' % (fsid, daemon_type, daemon_id) - elif daemon_id is not None: + if daemon_id is not None: return 'ceph-%s@%s.%s' % (fsid, daemon_type, daemon_id) else: return 'ceph-%s@%s' % (fsid, daemon_type) -def get_unit_name_by_daemon_name(ctx: CephadmContext, fsid, name): - daemon = get_daemon_description(ctx, fsid, name) +def get_unit_name_by_daemon_name(fsid, name): + daemon = get_daemon_description(fsid, name) try: return daemon['systemd_unit'] except KeyError: raise Error('Failed to get unit name for {}'.format(daemon)) -def check_unit(ctx, unit_name): - # type: (CephadmContext, str) -> Tuple[bool, str, bool] +def check_unit(unit_name): + # type: (str) -> Tuple[bool, str, bool] # NOTE: we ignore the exit code here because systemctl outputs # various exit codes based on the state of the service, but the # string result is more explicit (and sufficient). enabled = False installed = False try: - out, err, code = call(ctx, ['systemctl', 'is-enabled', unit_name], + out, err, code = call(['systemctl', 'is-enabled', unit_name], verbosity=CallVerbosity.DEBUG) if code == 0: enabled = True @@ -1916,7 +1654,7 @@ def check_unit(ctx, unit_name): state = 'unknown' try: - out, err, code = call(ctx, ['systemctl', 'is-active', unit_name], + out, err, code = call(['systemctl', 'is-active', unit_name], verbosity=CallVerbosity.DEBUG) out = out.strip() if out in ['active']: @@ -1933,10 +1671,10 @@ def check_unit(ctx, unit_name): return (enabled, state, installed) -def check_units(ctx, units, enabler=None): - # type: (CephadmContext, List[str], Optional[Packager]) -> bool +def check_units(units, enabler=None): + # type: (List[str], Optional[Packager]) -> bool for u in units: - (enabled, state, installed) = check_unit(ctx, u) + (enabled, state, installed) = check_unit(u) if enabled and state == 'running': logger.info('Unit %s is enabled and running' % u) return True @@ -1960,13 +1698,12 @@ def get_legacy_config_fsid(cluster, legacy_dir=None): return None -def get_legacy_daemon_fsid(ctx, cluster, - daemon_type, daemon_id, legacy_dir=None): - # type: (CephadmContext, str, str, Union[int, str], Optional[str]) -> Optional[str] +def get_legacy_daemon_fsid(cluster, daemon_type, daemon_id, legacy_dir=None): + # type: (str, str, Union[int, str], Optional[str]) -> Optional[str] fsid = None if daemon_type == 'osd': try: - fsid_file = os.path.join(ctx.data_dir, + fsid_file = os.path.join(args.data_dir, daemon_type, 'ceph-%s' % daemon_id, 'ceph_fsid') @@ -1981,8 +1718,8 @@ def get_legacy_daemon_fsid(ctx, cluster, return fsid -def get_daemon_args(ctx, fsid, daemon_type, daemon_id): - # type: (CephadmContext, str, str, Union[int, str]) -> List[str] +def get_daemon_args(fsid, daemon_type, daemon_id): + # type: (str, str, Union[int, str]) -> List[str] r = list() # type: List[str] if daemon_type in Ceph.daemons and daemon_type != 'crash': @@ -2002,30 +1739,27 @@ def get_daemon_args(ctx, fsid, daemon_type, daemon_id): metadata = Monitoring.components[daemon_type] r += metadata.get('args', list()) if daemon_type == 'alertmanager': - config = get_parm(ctx.config_json) + config = get_parm(args.config_json) peers = config.get('peers', list()) # type: ignore for peer in peers: r += ["--cluster.peer={}".format(peer)] # some alertmanager, by default, look elsewhere for a config r += ["--config.file=/etc/alertmanager/alertmanager.yml"] elif daemon_type == NFSGanesha.daemon_type: - nfs_ganesha = NFSGanesha.init(ctx, fsid, daemon_id) + nfs_ganesha = NFSGanesha.init(fsid, daemon_id) r += nfs_ganesha.get_daemon_args() - elif daemon_type == HAproxy.daemon_type: - haproxy = HAproxy.init(ctx, fsid, daemon_id) - r += haproxy.get_daemon_args() elif daemon_type == CustomContainer.daemon_type: - cc = CustomContainer.init(ctx, fsid, daemon_id) + cc = CustomContainer.init(fsid, daemon_id) r.extend(cc.get_daemon_args()) return r -def create_daemon_dirs(ctx, fsid, daemon_type, daemon_id, uid, gid, +def create_daemon_dirs(fsid, daemon_type, daemon_id, uid, gid, config=None, keyring=None): - # type: (CephadmContext, str, str, Union[int, str], int, int, Optional[str], Optional[str]) -> None - data_dir = make_data_dir(ctx, fsid, daemon_type, daemon_id, uid=uid, gid=gid) - make_log_dir(ctx, fsid, uid=uid, gid=gid) + # type: (str, str, Union[int, str], int, int, Optional[str], Optional[str]) -> None + data_dir = make_data_dir(fsid, daemon_type, daemon_id, uid=uid, gid=gid) + make_log_dir(fsid, uid=uid, gid=gid) if config: config_path = os.path.join(data_dir, 'config') @@ -2042,30 +1776,26 @@ def create_daemon_dirs(ctx, fsid, daemon_type, daemon_id, uid, gid, f.write(keyring) if daemon_type in Monitoring.components.keys(): - config_json: Dict[str, Any] = get_parm(ctx.config_json) + config_json: Dict[str, Any] = get_parm(args.config_json) required_files = Monitoring.components[daemon_type].get('config-json-files', list()) # Set up directories specific to the monitoring component config_dir = '' - data_dir_root = '' if daemon_type == 'prometheus': - data_dir_root = get_data_dir(fsid, ctx.data_dir, - daemon_type, daemon_id) + data_dir_root = get_data_dir(fsid, daemon_type, daemon_id) config_dir = 'etc/prometheus' makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755) makedirs(os.path.join(data_dir_root, config_dir, 'alerting'), uid, gid, 0o755) makedirs(os.path.join(data_dir_root, 'data'), uid, gid, 0o755) elif daemon_type == 'grafana': - data_dir_root = get_data_dir(fsid, ctx.data_dir, - daemon_type, daemon_id) + data_dir_root = get_data_dir(fsid, daemon_type, daemon_id) config_dir = 'etc/grafana' makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755) makedirs(os.path.join(data_dir_root, config_dir, 'certs'), uid, gid, 0o755) makedirs(os.path.join(data_dir_root, config_dir, 'provisioning/datasources'), uid, gid, 0o755) makedirs(os.path.join(data_dir_root, 'data'), uid, gid, 0o755) elif daemon_type == 'alertmanager': - data_dir_root = get_data_dir(fsid, ctx.data_dir, - daemon_type, daemon_id) + data_dir_root = get_data_dir(fsid, daemon_type, daemon_id) config_dir = 'etc/alertmanager' makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755) makedirs(os.path.join(data_dir_root, config_dir, 'data'), uid, gid, 0o755) @@ -2080,23 +1810,15 @@ def create_daemon_dirs(ctx, fsid, daemon_type, daemon_id, uid, gid, f.write(content) elif daemon_type == NFSGanesha.daemon_type: - nfs_ganesha = NFSGanesha.init(ctx, fsid, daemon_id) + nfs_ganesha = NFSGanesha.init(fsid, daemon_id) nfs_ganesha.create_daemon_dirs(data_dir, uid, gid) elif daemon_type == CephIscsi.daemon_type: - ceph_iscsi = CephIscsi.init(ctx, fsid, daemon_id) + ceph_iscsi = CephIscsi.init(fsid, daemon_id) ceph_iscsi.create_daemon_dirs(data_dir, uid, gid) - elif daemon_type == HAproxy.daemon_type: - haproxy = HAproxy.init(ctx, fsid, daemon_id) - haproxy.create_daemon_dirs(data_dir, uid, gid) - - elif daemon_type == Keepalived.daemon_type: - keepalived = Keepalived.init(ctx, fsid, daemon_id) - keepalived.create_daemon_dirs(data_dir, uid, gid) - elif daemon_type == CustomContainer.daemon_type: - cc = CustomContainer.init(ctx, fsid, daemon_id) + cc = CustomContainer.init(fsid, daemon_id) cc.create_daemon_dirs(data_dir, uid, gid) @@ -2135,47 +1857,47 @@ def get_parm(option): return js -def get_config_and_keyring(ctx): - # type: (CephadmContext) -> Tuple[Optional[str], Optional[str]] +def get_config_and_keyring(): + # type: () -> Tuple[Optional[str], Optional[str]] config = None keyring = None - if ctx.has("config_json") and ctx.config_json: - d = get_parm(ctx.config_json) + if 'config_json' in args and args.config_json: + d = get_parm(args.config_json) config = d.get('config') keyring = d.get('keyring') - if ctx.has("config") and ctx.config: - with open(ctx.config, 'r') as f: + if 'config' in args and args.config: + with open(args.config, 'r') as f: config = f.read() - if ctx.has("key") and ctx.key: - keyring = '[%s]\n\tkey = %s\n' % (ctx.name, ctx.key) - elif ctx.has("keyring") and ctx.keyring: - with open(ctx.keyring, 'r') as f: + if 'key' in args and args.key: + keyring = '[%s]\n\tkey = %s\n' % (args.name, args.key) + elif 'keyring' in args and args.keyring: + with open(args.keyring, 'r') as f: keyring = f.read() return config, keyring -def get_container_binds(ctx, fsid, daemon_type, daemon_id): - # type: (CephadmContext, str, str, Union[int, str, None]) -> List[List[str]] +def get_container_binds(fsid, daemon_type, daemon_id): + # type: (str, str, Union[int, str, None]) -> List[List[str]] binds = list() if daemon_type == CephIscsi.daemon_type: binds.extend(CephIscsi.get_container_binds()) elif daemon_type == CustomContainer.daemon_type: assert daemon_id - cc = CustomContainer.init(ctx, fsid, daemon_id) - data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id) + cc = CustomContainer.init(fsid, daemon_id) + data_dir = get_data_dir(fsid, daemon_type, daemon_id) binds.extend(cc.get_container_binds(data_dir)) return binds -def get_container_mounts(ctx, fsid, daemon_type, daemon_id, +def get_container_mounts(fsid, daemon_type, daemon_id, no_config=False): - # type: (CephadmContext, str, str, Union[int, str, None], Optional[bool]) -> Dict[str, str] + # type: (str, str, Union[int, str, None], Optional[bool]) -> Dict[str, str] mounts = dict() if daemon_type in Ceph.daemons: @@ -2183,14 +1905,14 @@ def get_container_mounts(ctx, fsid, daemon_type, daemon_id, run_path = os.path.join('/var/run/ceph', fsid); if os.path.exists(run_path): mounts[run_path] = '/var/run/ceph:z' - log_dir = get_log_dir(fsid, ctx.log_dir) + log_dir = get_log_dir(fsid) mounts[log_dir] = '/var/log/ceph:z' crash_dir = '/var/lib/ceph/%s/crash' % fsid if os.path.exists(crash_dir): mounts[crash_dir] = '/var/lib/ceph/crash:z' if daemon_type in Ceph.daemons and daemon_id: - data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id) + data_dir = get_data_dir(fsid, daemon_type, daemon_id) if daemon_type == 'rgw': cdata_dir = '/var/lib/ceph/radosgw/ceph-rgw.%s' % (daemon_id) else: @@ -2212,8 +1934,8 @@ def get_container_mounts(ctx, fsid, daemon_type, daemon_id, mounts['/run/lock/lvm'] = '/run/lock/lvm' try: - if ctx.shared_ceph_folder: # make easy manager modules/ceph-volume development - ceph_folder = pathify(ctx.shared_ceph_folder) + if args.shared_ceph_folder: # make easy manager modules/ceph-volume development + ceph_folder = pathify(args.shared_ceph_folder) if os.path.exists(ceph_folder): mounts[ceph_folder + '/src/ceph-volume/ceph_volume'] = '/usr/lib/python3.6/site-packages/ceph_volume' mounts[ceph_folder + '/src/pybind/mgr'] = '/usr/share/ceph/mgr' @@ -2228,7 +1950,7 @@ def get_container_mounts(ctx, fsid, daemon_type, daemon_id, pass if daemon_type in Monitoring.components and daemon_id: - data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id) + data_dir = get_data_dir(fsid, daemon_type, daemon_id) if daemon_type == 'prometheus': mounts[os.path.join(data_dir, 'etc/prometheus')] = '/etc/prometheus:Z' mounts[os.path.join(data_dir, 'data')] = '/prometheus:Z' @@ -2245,37 +1967,26 @@ def get_container_mounts(ctx, fsid, daemon_type, daemon_id, if daemon_type == NFSGanesha.daemon_type: assert daemon_id - data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id) - nfs_ganesha = NFSGanesha.init(ctx, fsid, daemon_id) + data_dir = get_data_dir(fsid, daemon_type, daemon_id) + nfs_ganesha = NFSGanesha.init(fsid, daemon_id) mounts.update(nfs_ganesha.get_container_mounts(data_dir)) - if daemon_type == HAproxy.daemon_type: - assert daemon_id - data_dir = get_data_dir(fsid, daemon_type, daemon_type, daemon_id) - mounts.update(HAproxy.get_container_mounts(data_dir)) - if daemon_type == CephIscsi.daemon_type: assert daemon_id - data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id) - log_dir = get_log_dir(fsid, ctx.log_dir) + data_dir = get_data_dir(fsid, daemon_type, daemon_id) + log_dir = get_log_dir(fsid) mounts.update(CephIscsi.get_container_mounts(data_dir, log_dir)) - if daemon_type == Keepalived.daemon_type: - assert daemon_id - data_dir = get_data_dir(fsid, daemon_type, daemon_type, daemon_id) - mounts.update(Keepalived.get_container_mounts(data_dir)) - if daemon_type == CustomContainer.daemon_type: assert daemon_id - cc = CustomContainer.init(ctx, fsid, daemon_id) - data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id) + cc = CustomContainer.init(fsid, daemon_id) + data_dir = get_data_dir(fsid, daemon_type, daemon_id) mounts.update(cc.get_container_mounts(data_dir)) return mounts -def get_container(ctx: CephadmContext, - fsid: str, daemon_type: str, daemon_id: Union[int, str], +def get_container(fsid: str, daemon_type: str, daemon_id: Union[int, str], privileged: bool = False, ptrace: bool = False, container_args: Optional[List[str]] = None) -> 'CephContainer': @@ -2308,12 +2019,6 @@ def get_container(ctx: CephadmContext, entrypoint = NFSGanesha.entrypoint name = '%s.%s' % (daemon_type, daemon_id) envs.extend(NFSGanesha.get_container_envs()) - elif daemon_type == HAproxy.daemon_type: - name = '%s.%s' % (daemon_type, daemon_id) - elif daemon_type == Keepalived.daemon_type: - name = '%s.%s' % (daemon_type, daemon_id) - envs.extend(Keepalived.get_container_envs()) - container_args.extend(['--cap-add NET_ADMIN']) elif daemon_type == CephIscsi.daemon_type: entrypoint = CephIscsi.entrypoint name = '%s.%s' % (daemon_type, daemon_id) @@ -2321,14 +2026,14 @@ def get_container(ctx: CephadmContext, # to configfs we need to make this a privileged container. privileged = True elif daemon_type == CustomContainer.daemon_type: - cc = CustomContainer.init(ctx, fsid, daemon_id) + cc = CustomContainer.init(fsid, daemon_id) entrypoint = cc.entrypoint host_network = False envs.extend(cc.get_container_envs()) container_args.extend(cc.get_container_args()) if daemon_type in Monitoring.components: - uid, gid = extract_uid_gid_monitoring(ctx, daemon_type) + uid, gid = extract_uid_gid_monitoring(daemon_type) monitoring_args = [ '--user', str(uid), @@ -2343,7 +2048,7 @@ def get_container(ctx: CephadmContext, # if using podman, set -d, --conmon-pidfile & --cidfile flags # so service can have Type=Forking - if 'podman' in ctx.container_path: + if 'podman' in container_path: runtime_dir = '/run' container_args.extend(['-d', '--conmon-pidfile', @@ -2352,27 +2057,26 @@ def get_container(ctx: CephadmContext, runtime_dir + '/ceph-%s@%s.%s.service-cid' % (fsid, daemon_type, daemon_id)]) return CephContainer( - ctx, - image=ctx.image, + image=args.image, entrypoint=entrypoint, - args=ceph_args + get_daemon_args(ctx, fsid, daemon_type, daemon_id), + args=ceph_args + get_daemon_args(fsid, daemon_type, daemon_id), container_args=container_args, - volume_mounts=get_container_mounts(ctx, fsid, daemon_type, daemon_id), - bind_mounts=get_container_binds(ctx, fsid, daemon_type, daemon_id), + volume_mounts=get_container_mounts(fsid, daemon_type, daemon_id), + bind_mounts=get_container_binds(fsid, daemon_type, daemon_id), cname='ceph-%s-%s.%s' % (fsid, daemon_type, daemon_id), envs=envs, privileged=privileged, ptrace=ptrace, - init=ctx.container_init, + init=args.container_init, host_network=host_network, ) -def extract_uid_gid(ctx, img='', file_path='/var/lib/ceph'): - # type: (CephadmContext, str, Union[str, List[str]]) -> Tuple[int, int] +def extract_uid_gid(img='', file_path='/var/lib/ceph'): + # type: (str, Union[str, List[str]]) -> Tuple[int, int] if not img: - img = ctx.image + img = args.image if isinstance(file_path, str): paths = [file_path] @@ -2382,7 +2086,6 @@ def extract_uid_gid(ctx, img='', file_path='/var/lib/ceph'): for fp in paths: try: out = CephContainer( - ctx, image=img, entrypoint='stat', args=['-c', '%u %g', fp] @@ -2394,18 +2097,18 @@ def extract_uid_gid(ctx, img='', file_path='/var/lib/ceph'): raise RuntimeError('uid/gid not found') -def deploy_daemon(ctx, fsid, daemon_type, daemon_id, c, uid, gid, +def deploy_daemon(fsid, daemon_type, daemon_id, c, uid, gid, config=None, keyring=None, osd_fsid=None, reconfig=False, ports=None): - # type: (CephadmContext, str, str, Union[int, str], Optional[CephContainer], int, int, Optional[str], Optional[str], Optional[str], Optional[bool], Optional[List[int]]) -> None + # type: (str, str, Union[int, str], CephContainer, int, int, Optional[str], Optional[str], Optional[str], Optional[bool], Optional[List[int]]) -> None ports = ports or [] - if any([port_in_use(ctx, port) for port in ports]): + if any([port_in_use(port) for port in ports]): raise Error("TCP Port(s) '{}' required for {} already in use".format(",".join(map(str, ports)), daemon_type)) - data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id) + data_dir = get_data_dir(fsid, daemon_type, daemon_id) if reconfig and not os.path.exists(data_dir): raise Error('cannot reconfig, data path %s does not exist' % data_dir) if daemon_type == 'mon' and not os.path.exists(data_dir): @@ -2418,19 +2121,18 @@ def deploy_daemon(ctx, fsid, daemon_type, daemon_id, c, uid, gid, tmp_config = write_tmp(config, uid, gid) # --mkfs - create_daemon_dirs(ctx, fsid, daemon_type, daemon_id, uid, gid) - mon_dir = get_data_dir(fsid, ctx.data_dir, 'mon', daemon_id) - log_dir = get_log_dir(fsid, ctx.log_dir) + create_daemon_dirs(fsid, daemon_type, daemon_id, uid, gid) + mon_dir = get_data_dir(fsid, 'mon', daemon_id) + log_dir = get_log_dir(fsid) out = CephContainer( - ctx, - image=ctx.image, + image=args.image, entrypoint='/usr/bin/ceph-mon', args=['--mkfs', '-i', str(daemon_id), '--fsid', fsid, '-c', '/tmp/config', '--keyring', '/tmp/keyring', - ] + get_daemon_args(ctx, fsid, 'mon', daemon_id), + ] + get_daemon_args(fsid, 'mon', daemon_id), volume_mounts={ log_dir: '/var/log/ceph:z', mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % (daemon_id), @@ -2447,29 +2149,13 @@ def deploy_daemon(ctx, fsid, daemon_type, daemon_id, c, uid, gid, else: # dirs, conf, keyring create_daemon_dirs( - ctx, fsid, daemon_type, daemon_id, uid, gid, config, keyring) if not reconfig: - if daemon_type == CephadmDaemon.daemon_type: - port = next(iter(ports), None) # get first tcp port provided or None - - if ctx.config_json == '-': - config_js = get_parm('-') - else: - config_js = get_parm(ctx.config_json) - assert isinstance(config_js, dict) - - cephadm_exporter = CephadmDaemon(ctx, fsid, daemon_id, port) - cephadm_exporter.deploy_daemon_unit(config_js) - else: - if c: - deploy_daemon_units(ctx, fsid, uid, gid, daemon_type, daemon_id, - c, osd_fsid=osd_fsid) - else: - raise RuntimeError("attempting to deploy a daemon without a container image") + deploy_daemon_units(fsid, uid, gid, daemon_type, daemon_id, c, + osd_fsid=osd_fsid) if not os.path.exists(data_dir + '/unit.created'): with open(data_dir + '/unit.created', 'w') as f: @@ -2482,24 +2168,24 @@ def deploy_daemon(ctx, fsid, daemon_type, daemon_id, c, uid, gid, os.fchmod(f.fileno(), 0o600) os.fchown(f.fileno(), uid, gid) - update_firewalld(ctx, daemon_type) + update_firewalld(daemon_type) # Open ports explicitly required for the daemon if ports: - fw = Firewalld(ctx) + fw = Firewalld() fw.open_ports(ports) fw.apply_rules() if reconfig and daemon_type not in Ceph.daemons: # ceph daemons do not need a restart; others (presumably) do to pick # up the new config - call_throws(ctx, ['systemctl', 'reset-failed', + call_throws(['systemctl', 'reset-failed', get_unit_name(fsid, daemon_type, daemon_id)]) - call_throws(ctx, ['systemctl', 'restart', + call_throws(['systemctl', 'restart', get_unit_name(fsid, daemon_type, daemon_id)]) -def _write_container_cmd_to_bash(ctx, file_obj, container, comment=None, background=False): - # type: (CephadmContext, IO[str], CephContainer, Optional[str], Optional[bool]) -> None +def _write_container_cmd_to_bash(file_obj, container, comment=None, background=False): + # type: (IO[str], CephContainer, Optional[str], Optional[bool]) -> None if comment: # Sometimes adding a comment, especially if there are multiple containers in one # unit file, makes it easier to read and grok. @@ -2507,19 +2193,19 @@ def _write_container_cmd_to_bash(ctx, file_obj, container, comment=None, backgro # Sometimes, adding `--rm` to a run_cmd doesn't work. Let's remove the container manually file_obj.write('! '+ ' '.join(container.rm_cmd()) + ' 2> /dev/null\n') # Sometimes, `podman rm` doesn't find the container. Then you'll have to add `--storage` - if 'podman' in ctx.container_path: + if 'podman' in container_path: file_obj.write('! '+ ' '.join(container.rm_cmd(storage=True)) + ' 2> /dev/null\n') # container run command file_obj.write(' '.join(container.run_cmd()) + (' &' if background else '') + '\n') -def deploy_daemon_units(ctx, fsid, uid, gid, daemon_type, daemon_id, c, +def deploy_daemon_units(fsid, uid, gid, daemon_type, daemon_id, c, enable=True, start=True, osd_fsid=None): - # type: (CephadmContext, str, int, int, str, Union[int, str], CephContainer, bool, bool, Optional[str]) -> None + # type: (str, int, int, str, Union[int, str], CephContainer, bool, bool, Optional[str]) -> None # cmd - data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id) + data_dir = get_data_dir(fsid, daemon_type, daemon_id) with open(data_dir + '/unit.run.new', 'w') as f: f.write('set -e\n') @@ -2540,8 +2226,7 @@ def deploy_daemon_units(ctx, fsid, uid, gid, daemon_type, daemon_id, c, f.write('[ ! -L {p} ] || chown {uid}:{gid} {p}\n'.format(p=p, uid=uid, gid=gid)) else: prestart = CephContainer( - ctx, - image=ctx.image, + image=args.image, entrypoint='/usr/sbin/ceph-volume', args=[ 'lvm', 'activate', @@ -2549,23 +2234,23 @@ def deploy_daemon_units(ctx, fsid, uid, gid, daemon_type, daemon_id, c, '--no-systemd' ], privileged=True, - volume_mounts=get_container_mounts(ctx, fsid, daemon_type, daemon_id), - bind_mounts=get_container_binds(ctx, fsid, daemon_type, daemon_id), + volume_mounts=get_container_mounts(fsid, daemon_type, daemon_id), + bind_mounts=get_container_binds(fsid, daemon_type, daemon_id), cname='ceph-%s-%s.%s-activate' % (fsid, daemon_type, daemon_id), ) - _write_container_cmd_to_bash(ctx, f, prestart, 'LVM OSDs use ceph-volume lvm activate') + _write_container_cmd_to_bash(f, prestart, 'LVM OSDs use ceph-volume lvm activate') elif daemon_type == NFSGanesha.daemon_type: # add nfs to the rados grace db - nfs_ganesha = NFSGanesha.init(ctx, fsid, daemon_id) + nfs_ganesha = NFSGanesha.init(fsid, daemon_id) prestart = nfs_ganesha.get_rados_grace_container('add') - _write_container_cmd_to_bash(ctx, f, prestart, 'add daemon to rados grace') + _write_container_cmd_to_bash(f, prestart, 'add daemon to rados grace') elif daemon_type == CephIscsi.daemon_type: f.write(' '.join(CephIscsi.configfs_mount_umount(data_dir, mount=True)) + '\n') - ceph_iscsi = CephIscsi.init(ctx, fsid, daemon_id) + ceph_iscsi = CephIscsi.init(fsid, daemon_id) tcmu_container = ceph_iscsi.get_tcmu_runner_container() - _write_container_cmd_to_bash(ctx, f, tcmu_container, 'iscsi tcmu-runnter container', background=True) + _write_container_cmd_to_bash(f, tcmu_container, 'iscsi tcmu-runnter container', background=True) - _write_container_cmd_to_bash(ctx, f, c, '%s.%s' % (daemon_type, str(daemon_id))) + _write_container_cmd_to_bash(f, c, '%s.%s' % (daemon_type, str(daemon_id))) os.fchmod(f.fileno(), 0o600) os.rename(data_dir + '/unit.run.new', data_dir + '/unit.run') @@ -2575,28 +2260,27 @@ def deploy_daemon_units(ctx, fsid, uid, gid, daemon_type, daemon_id, c, if daemon_type == 'osd': assert osd_fsid poststop = CephContainer( - ctx, - image=ctx.image, + image=args.image, entrypoint='/usr/sbin/ceph-volume', args=[ 'lvm', 'deactivate', str(daemon_id), osd_fsid, ], privileged=True, - volume_mounts=get_container_mounts(ctx, fsid, daemon_type, daemon_id), - bind_mounts=get_container_binds(ctx, fsid, daemon_type, daemon_id), + volume_mounts=get_container_mounts(fsid, daemon_type, daemon_id), + bind_mounts=get_container_binds(fsid, daemon_type, daemon_id), cname='ceph-%s-%s.%s-deactivate' % (fsid, daemon_type, daemon_id), ) - _write_container_cmd_to_bash(ctx, f, poststop, 'deactivate osd') + _write_container_cmd_to_bash(f, poststop, 'deactivate osd') elif daemon_type == NFSGanesha.daemon_type: # remove nfs from the rados grace db - nfs_ganesha = NFSGanesha.init(ctx, fsid, daemon_id) + nfs_ganesha = NFSGanesha.init(fsid, daemon_id) poststop = nfs_ganesha.get_rados_grace_container('remove') - _write_container_cmd_to_bash(ctx, f, poststop, 'remove daemon from rados grace') + _write_container_cmd_to_bash(f, poststop, 'remove daemon from rados grace') elif daemon_type == CephIscsi.daemon_type: # make sure we also stop the tcmu container - ceph_iscsi = CephIscsi.init(ctx, fsid, daemon_id) + ceph_iscsi = CephIscsi.init(fsid, daemon_id) tcmu_container = ceph_iscsi.get_tcmu_runner_container() f.write('! '+ ' '.join(tcmu_container.stop_cmd()) + '\n') f.write(' '.join(CephIscsi.configfs_mount_umount(data_dir, mount=False)) + '\n') @@ -2604,39 +2288,37 @@ def deploy_daemon_units(ctx, fsid, uid, gid, daemon_type, daemon_id, c, os.rename(data_dir + '/unit.poststop.new', data_dir + '/unit.poststop') - if c: - with open(data_dir + '/unit.image.new', 'w') as f: - f.write(c.image + '\n') - os.fchmod(f.fileno(), 0o600) - os.rename(data_dir + '/unit.image.new', - data_dir + '/unit.image') + with open(data_dir + '/unit.image.new', 'w') as f: + f.write(c.image + '\n') + os.fchmod(f.fileno(), 0o600) + os.rename(data_dir + '/unit.image.new', + data_dir + '/unit.image') # systemd - install_base_units(ctx, fsid) - unit = get_unit_file(ctx, fsid) + install_base_units(fsid) + unit = get_unit_file(fsid) unit_file = 'ceph-%s@.service' % (fsid) - with open(ctx.unit_dir + '/' + unit_file + '.new', 'w') as f: + with open(args.unit_dir + '/' + unit_file + '.new', 'w') as f: f.write(unit) - os.rename(ctx.unit_dir + '/' + unit_file + '.new', - ctx.unit_dir + '/' + unit_file) - call_throws(ctx, ['systemctl', 'daemon-reload']) + os.rename(args.unit_dir + '/' + unit_file + '.new', + args.unit_dir + '/' + unit_file) + call_throws(['systemctl', 'daemon-reload']) unit_name = get_unit_name(fsid, daemon_type, daemon_id) - call(ctx, ['systemctl', 'stop', unit_name], + call(['systemctl', 'stop', unit_name], verbosity=CallVerbosity.DEBUG) - call(ctx, ['systemctl', 'reset-failed', unit_name], + call(['systemctl', 'reset-failed', unit_name], verbosity=CallVerbosity.DEBUG) if enable: - call_throws(ctx, ['systemctl', 'enable', unit_name]) + call_throws(['systemctl', 'enable', unit_name]) if start: - call_throws(ctx, ['systemctl', 'start', unit_name]) + call_throws(['systemctl', 'start', unit_name]) class Firewalld(object): - def __init__(self, ctx): - # type: (CephadmContext) -> None - self.ctx = ctx + def __init__(self): + # type: () -> None self.available = self.check() def check(self): @@ -2645,7 +2327,7 @@ class Firewalld(object): if not self.cmd: logger.debug('firewalld does not appear to be present') return False - (enabled, state, _) = check_unit(self.ctx, 'firewalld.service') + (enabled, state, _) = check_unit('firewalld.service') if not enabled: logger.debug('firewalld.service is not enabled') return False @@ -2671,13 +2353,10 @@ class Firewalld(object): else: return - if not self.cmd: - raise RuntimeError("command not defined") - - out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--query-service', svc], verbosity=CallVerbosity.DEBUG) + out, err, ret = call([self.cmd, '--permanent', '--query-service', svc], verbosity=CallVerbosity.DEBUG) if ret: logger.info('Enabling firewalld service %s in current zone...' % svc) - out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--add-service', svc]) + out, err, ret = call([self.cmd, '--permanent', '--add-service', svc]) if ret: raise RuntimeError( 'unable to add service %s to current zone: %s' % (svc, err)) @@ -2690,58 +2369,30 @@ class Firewalld(object): logger.debug('Not possible to open ports <%s>. firewalld.service is not available' % fw_ports) return - if not self.cmd: - raise RuntimeError("command not defined") - for port in fw_ports: tcp_port = str(port) + '/tcp' - out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--query-port', tcp_port], verbosity=CallVerbosity.DEBUG) + out, err, ret = call([self.cmd, '--permanent', '--query-port', tcp_port], verbosity=CallVerbosity.DEBUG) if ret: logger.info('Enabling firewalld port %s in current zone...' % tcp_port) - out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--add-port', tcp_port]) + out, err, ret = call([self.cmd, '--permanent', '--add-port', tcp_port]) if ret: raise RuntimeError('unable to add port %s to current zone: %s' % (tcp_port, err)) else: logger.debug('firewalld port %s is enabled in current zone' % tcp_port) - def close_ports(self, fw_ports): - # type: (List[int]) -> None - if not self.available: - logger.debug('Not possible to close ports <%s>. firewalld.service is not available' % fw_ports) - return - - if not self.cmd: - raise RuntimeError("command not defined") - - for port in fw_ports: - tcp_port = str(port) + '/tcp' - out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--query-port', tcp_port], verbosity=CallVerbosity.DEBUG) - if not ret: - logger.info('Disabling port %s in current zone...' % tcp_port) - out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--remove-port', tcp_port]) - if ret: - raise RuntimeError('unable to remove port %s from current zone: %s' % - (tcp_port, err)) - else: - logger.info(f"Port {tcp_port} disabled") - else: - logger.info(f"firewalld port {tcp_port} already closed") - + out, err, ret = call([self.cmd, '--permanent', '--query-port', tcp_port], verbose_on_failure=False) def apply_rules(self): # type: () -> None if not self.available: return - if not self.cmd: - raise RuntimeError("command not defined") - - call_throws(self.ctx, [self.cmd, '--reload']) + call_throws([self.cmd, '--reload']) -def update_firewalld(ctx, daemon_type): - # type: (CephadmContext, str) -> None - firewall = Firewalld(ctx) +def update_firewalld(daemon_type): + # type: (str) -> None + firewall = Firewalld() firewall.enable_service_for(daemon_type) @@ -2753,34 +2404,34 @@ def update_firewalld(ctx, daemon_type): firewall.open_ports(fw_ports) firewall.apply_rules() -def install_base_units(ctx, fsid): - # type: (CephadmContext, str) -> None +def install_base_units(fsid): + # type: (str) -> None """ Set up ceph.target and ceph-$fsid.target units. """ # global unit - existed = os.path.exists(ctx.unit_dir + '/ceph.target') - with open(ctx.unit_dir + '/ceph.target.new', 'w') as f: + existed = os.path.exists(args.unit_dir + '/ceph.target') + with open(args.unit_dir + '/ceph.target.new', 'w') as f: f.write('[Unit]\n' 'Description=All Ceph clusters and services\n' '\n' '[Install]\n' 'WantedBy=multi-user.target\n') - os.rename(ctx.unit_dir + '/ceph.target.new', - ctx.unit_dir + '/ceph.target') + os.rename(args.unit_dir + '/ceph.target.new', + args.unit_dir + '/ceph.target') if not existed: # we disable before enable in case a different ceph.target # (from the traditional package) is present; while newer # systemd is smart enough to disable the old # (/lib/systemd/...) and enable the new (/etc/systemd/...), # some older versions of systemd error out with EEXIST. - call_throws(ctx, ['systemctl', 'disable', 'ceph.target']) - call_throws(ctx, ['systemctl', 'enable', 'ceph.target']) - call_throws(ctx, ['systemctl', 'start', 'ceph.target']) + call_throws(['systemctl', 'disable', 'ceph.target']) + call_throws(['systemctl', 'enable', 'ceph.target']) + call_throws(['systemctl', 'start', 'ceph.target']) # cluster unit - existed = os.path.exists(ctx.unit_dir + '/ceph-%s.target' % fsid) - with open(ctx.unit_dir + '/ceph-%s.target.new' % fsid, 'w') as f: + existed = os.path.exists(args.unit_dir + '/ceph-%s.target' % fsid) + with open(args.unit_dir + '/ceph-%s.target.new' % fsid, 'w') as f: f.write('[Unit]\n' 'Description=Ceph cluster {fsid}\n' 'PartOf=ceph.target\n' @@ -2790,14 +2441,14 @@ def install_base_units(ctx, fsid): 'WantedBy=multi-user.target ceph.target\n'.format( fsid=fsid) ) - os.rename(ctx.unit_dir + '/ceph-%s.target.new' % fsid, - ctx.unit_dir + '/ceph-%s.target' % fsid) + os.rename(args.unit_dir + '/ceph-%s.target.new' % fsid, + args.unit_dir + '/ceph-%s.target' % fsid) if not existed: - call_throws(ctx, ['systemctl', 'enable', 'ceph-%s.target' % fsid]) - call_throws(ctx, ['systemctl', 'start', 'ceph-%s.target' % fsid]) + call_throws(['systemctl', 'enable', 'ceph-%s.target' % fsid]) + call_throws(['systemctl', 'start', 'ceph-%s.target' % fsid]) # logrotate for the cluster - with open(ctx.logrotate_dir + '/ceph-%s' % fsid, 'w') as f: + with open(args.logrotate_dir + '/ceph-%s' % fsid, 'w') as f: """ This is a bit sloppy in that the killall/pkill will touch all ceph daemons in all containers, but I don't see an elegant way to send SIGHUP *just* to @@ -2822,10 +2473,10 @@ def install_base_units(ctx, fsid): """ % fsid) -def get_unit_file(ctx, fsid): - # type: (CephadmContext, str) -> str +def get_unit_file(fsid): + # type: (str) -> str extra_args = '' - if 'podman' in ctx.container_path: + if 'podman' in container_path: extra_args = ('ExecStartPre=-/bin/rm -f /%t/%n-pid /%t/%n-cid\n' 'ExecStopPost=-/bin/rm -f /%t/%n-pid /%t/%n-cid\n' 'Type=forking\n' @@ -2863,9 +2514,9 @@ StartLimitBurst=5 [Install] WantedBy=ceph-{fsid}.target """.format( - container_path=ctx.container_path, + container_path=container_path, fsid=fsid, - data_dir=ctx.data_dir, + data_dir=args.data_dir, extra_args=extra_args) return u @@ -2875,7 +2526,6 @@ WantedBy=ceph-{fsid}.target class CephContainer: def __init__(self, - ctx: CephadmContext, image: str, entrypoint: str, args: List[str] = [], @@ -2889,7 +2539,6 @@ class CephContainer: init: bool = False, host_network: bool = True, ) -> None: - self.ctx = ctx self.image = image self.entrypoint = entrypoint self.args = args @@ -2905,16 +2554,11 @@ class CephContainer: def run_cmd(self) -> List[str]: cmd_args: List[str] = [ - str(self.ctx.container_path), + str(container_path), 'run', '--rm', '--ipc=host', ] - - if 'podman' in self.ctx.container_path and \ - os.path.exists('/etc/ceph/podman-auth.json'): - cmd_args.append('--authfile=/etc/ceph/podman-auth.json') - envs: List[str] = [ '-e', 'CONTAINER_IMAGE=%s' % self.image, '-e', 'NODE_NAME=%s' % get_hostname(), @@ -2956,7 +2600,7 @@ class CephContainer: def shell_cmd(self, cmd: List[str]) -> List[str]: cmd_args: List[str] = [ - str(self.ctx.container_path), + str(container_path), 'run', '--rm', '--ipc=host', @@ -2994,7 +2638,7 @@ class CephContainer: def exec_cmd(self, cmd): # type: (List[str]) -> List[str] return [ - str(self.ctx.container_path), + str(container_path), 'exec', ] + self.container_args + [ self.cname, @@ -3003,7 +2647,7 @@ class CephContainer: def rm_cmd(self, storage=False): # type: (bool) -> List[str] ret = [ - str(self.ctx.container_path), + str(container_path), 'rm', '-f', ] if storage: @@ -3014,7 +2658,7 @@ class CephContainer: def stop_cmd(self): # type () -> List[str] ret = [ - str(self.ctx.container_path), + str(container_path), 'stop', self.cname, ] return ret @@ -3022,7 +2666,6 @@ class CephContainer: def run(self, timeout=DEFAULT_TIMEOUT): # type: (Optional[int]) -> str out, _, _ = call_throws( - self.ctx, self.run_cmd(), desc=self.entrypoint, timeout=timeout) return out @@ -3030,9 +2673,9 @@ class CephContainer: @infer_image -def command_version(ctx): - # type: (CephadmContext) -> int - out = CephContainer(ctx, ctx.image, 'ceph', ['--version']).run() +def command_version(): + # type: () -> int + out = CephContainer(args.image, 'ceph', ['--version']).run() print(out.strip()) return 0 @@ -3040,15 +2683,15 @@ def command_version(ctx): @infer_image -def command_pull(ctx): - # type: (CephadmContext) -> int +def command_pull(): + # type: () -> int - _pull_image(ctx, ctx.image) - return command_inspect_image(ctx) + _pull_image(args.image) + return command_inspect_image() -def _pull_image(ctx, image): - # type: (CephadmContext, str) -> None +def _pull_image(image): + # type: (str) -> None logger.info('Pulling container image %s...' % image) ignorelist = [ @@ -3057,13 +2700,11 @@ def _pull_image(ctx, image): "Digest did not match, expected", ] - cmd = [ctx.container_path, 'pull', image] - if 'podman' in ctx.container_path and os.path.exists('/etc/ceph/podman-auth.json'): - cmd.append('--authfile=/etc/ceph/podman-auth.json') + cmd = [container_path, 'pull', image] cmd_str = ' '.join(cmd) for sleep_secs in [1, 4, 25]: - out, err, ret = call(ctx, cmd) + out, err, ret = call(cmd) if not ret: return @@ -3078,17 +2719,17 @@ def _pull_image(ctx, image): @infer_image -def command_inspect_image(ctx): - # type: (CephadmContext) -> int - out, err, ret = call_throws(ctx, [ - ctx.container_path, 'inspect', +def command_inspect_image(): + # type: () -> int + out, err, ret = call_throws([ + container_path, 'inspect', '--format', '{{.ID}},{{json .RepoDigests}}', - ctx.image]) + args.image]) if ret: return errno.ENOENT - info_from = get_image_info_from_inspect(out.strip(), ctx.image) + info_from = get_image_info_from_inspect(out.strip(), args.image) - ver = CephContainer(ctx, ctx.image, 'ceph', ['--version']).run().strip() + ver = CephContainer(args.image, 'ceph', ['--version']).run().strip() info_from['ceph_version'] = ver print(json.dumps(info_from, indent=4, sort_keys=True)) @@ -3145,37 +2786,80 @@ def is_ipv6(address): return False -def prepare_mon_addresses( - ctx: CephadmContext -) -> Tuple[str, bool, Optional[str]]: - r = re.compile(r':(\d+)$') - base_ip = "" +@default_image +def command_bootstrap(): + # type: () -> int + + if not args.output_config: + args.output_config = os.path.join(args.output_dir, 'ceph.conf') + if not args.output_keyring: + args.output_keyring = os.path.join(args.output_dir, + 'ceph.client.admin.keyring') + if not args.output_pub_ssh_key: + args.output_pub_ssh_key = os.path.join(args.output_dir, 'ceph.pub') + + # verify output files + for f in [args.output_config, args.output_keyring, args.output_pub_ssh_key]: + if not args.allow_overwrite: + if os.path.exists(f): + raise Error('%s already exists; delete or pass ' + '--allow-overwrite to overwrite' % f) + dirname = os.path.dirname(f) + if dirname and not os.path.exists(dirname): + fname = os.path.basename(f) + logger.info(f"Creating directory {dirname} for {fname}") + try: + # use makedirs to create intermediate missing dirs + os.makedirs(dirname, 0o755) + except PermissionError: + raise Error(f"Unable to create {dirname} due to permissions failure. Retry with root, or sudo or preallocate the directory.") + + + if not args.skip_prepare_host: + command_prepare_host() + else: + logger.info('Skip prepare_host') + + # initial vars + fsid = args.fsid or make_fsid() + hostname = get_hostname() + if '.' in hostname and not args.allow_fqdn_hostname: + raise Error('hostname is a fully qualified domain name (%s); either fix (e.g., "sudo hostname %s" or similar) or pass --allow-fqdn-hostname' % (hostname, hostname.split('.')[0])) + mon_id = args.mon_id or hostname + mgr_id = args.mgr_id or generate_service_id() + logger.info('Cluster fsid: %s' % fsid) ipv6 = False - if ctx.mon_ip: - ipv6 = is_ipv6(ctx.mon_ip) + l = FileLock(fsid) + l.acquire() + + # ip + r = re.compile(r':(\d+)$') + base_ip = '' + if args.mon_ip: + ipv6 = is_ipv6(args.mon_ip) if ipv6: - ctx.mon_ip = wrap_ipv6(ctx.mon_ip) - hasport = r.findall(ctx.mon_ip) + args.mon_ip = wrap_ipv6(args.mon_ip) + hasport = r.findall(args.mon_ip) if hasport: port = int(hasport[0]) if port == 6789: - addr_arg = '[v1:%s]' % ctx.mon_ip + addr_arg = '[v1:%s]' % args.mon_ip elif port == 3300: - addr_arg = '[v2:%s]' % ctx.mon_ip + addr_arg = '[v2:%s]' % args.mon_ip else: logger.warning('Using msgr2 protocol for unrecognized port %d' % port) - addr_arg = '[v2:%s]' % ctx.mon_ip - base_ip = ctx.mon_ip[0:-(len(str(port)))-1] - check_ip_port(ctx, base_ip, port) + addr_arg = '[v2:%s]' % args.mon_ip + base_ip = args.mon_ip[0:-(len(str(port)))-1] + check_ip_port(base_ip, port) else: - base_ip = ctx.mon_ip - addr_arg = '[v2:%s:3300,v1:%s:6789]' % (ctx.mon_ip, ctx.mon_ip) - check_ip_port(ctx, ctx.mon_ip, 3300) - check_ip_port(ctx, ctx.mon_ip, 6789) - elif ctx.mon_addrv: - addr_arg = ctx.mon_addrv + base_ip = args.mon_ip + addr_arg = '[v2:%s:3300,v1:%s:6789]' % (args.mon_ip, args.mon_ip) + check_ip_port(args.mon_ip, 3300) + check_ip_port(args.mon_ip, 6789) + elif args.mon_addrv: + addr_arg = args.mon_addrv if addr_arg[0] != '[' or addr_arg[-1] != ']': raise Error('--mon-addrv value %s must use square backets' % addr_arg) @@ -3189,16 +2873,16 @@ def prepare_mon_addresses( # strip off v1: or v2: prefix addr = re.sub(r'^\w+:', '', addr) base_ip = addr[0:-(len(str(port)))-1] - check_ip_port(ctx, base_ip, port) + check_ip_port(base_ip, port) else: raise Error('must specify --mon-ip or --mon-addrv') logger.debug('Base mon IP is %s, final addrv is %s' % (base_ip, addr_arg)) mon_network = None - if not ctx.skip_mon_network: + if not args.skip_mon_network: # make sure IP is configured locally, and then figure out the # CIDR network - for net, ips in list_networks(ctx).items(): + for net, ips in list_networks().items(): if ipaddress.ip_address(unicode(unwrap_ipv6(base_ip))) in \ [ipaddress.ip_address(unicode(ip)) for ip in ips]: mon_network = net @@ -3209,34 +2893,40 @@ def prepare_mon_addresses( raise Error('Failed to infer CIDR network for mon ip %s; pass ' '--skip-mon-network to configure it later' % base_ip) - return (addr_arg, ipv6, mon_network) + # config + cp = read_config(args.config) + if not cp.has_section('global'): + cp.add_section('global') + cp.set('global', 'fsid', fsid); + cp.set('global', 'mon host', addr_arg) + cp.set('global', 'container_image', args.image) + cpf = StringIO() + cp.write(cpf) + config = cpf.getvalue() + if args.registry_json or args.registry_url: + command_registry_login() -def create_initial_keys( - ctx: CephadmContext, - uid: int, gid: int, - mgr_id: str -) -> Tuple[str, str, str, Any, Any]: # type: ignore + if not args.skip_pull: + _pull_image(args.image) - _image = ctx.image + logger.info('Extracting ceph user uid/gid from container image...') + (uid, gid) = extract_uid_gid() # create some initial keys logger.info('Creating initial keys...') mon_key = CephContainer( - ctx, - image=_image, + image=args.image, entrypoint='/usr/bin/ceph-authtool', args=['--gen-print-key'], ).run().strip() admin_key = CephContainer( - ctx, - image=_image, + image=args.image, entrypoint='/usr/bin/ceph-authtool', args=['--gen-print-key'], ).run().strip() mgr_key = CephContainer( - ctx, - image=_image, + image=args.image, entrypoint='/usr/bin/ceph-authtool', args=['--gen-print-key'], ).run().strip() @@ -3257,59 +2947,36 @@ def create_initial_keys( '\tcaps osd = allow *\n' % (mon_key, admin_key, mgr_id, mgr_key)) - admin_keyring = write_tmp('[client.admin]\n' - '\tkey = ' + admin_key + '\n', - uid, gid) - # tmp keyring file - bootstrap_keyring = write_tmp(keyring, uid, gid) - return (mon_key, mgr_key, admin_key, - bootstrap_keyring, admin_keyring) + tmp_bootstrap_keyring = write_tmp(keyring, uid, gid) - -def create_initial_monmap( - ctx: CephadmContext, - uid: int, gid: int, - fsid: str, - mon_id: str, mon_addr: str -) -> Any: + # create initial monmap, tmp monmap file logger.info('Creating initial monmap...') - monmap = write_tmp('', 0, 0) + tmp_monmap = write_tmp('', 0, 0) out = CephContainer( - ctx, - image=ctx.image, + image=args.image, entrypoint='/usr/bin/monmaptool', args=['--create', '--clobber', '--fsid', fsid, - '--addv', mon_id, mon_addr, + '--addv', mon_id, addr_arg, '/tmp/monmap' ], volume_mounts={ - monmap.name: '/tmp/monmap:z', + tmp_monmap.name: '/tmp/monmap:z', }, ).run() - logger.debug(f"monmaptool for {mon_id} {mon_addr} on {out}") # pass monmap file to ceph user for use by ceph-mon --mkfs below - os.fchown(monmap.fileno(), uid, gid) - return monmap - + os.fchown(tmp_monmap.fileno(), uid, gid) -def prepare_create_mon( - ctx: CephadmContext, - uid: int, gid: int, - fsid: str, mon_id: str, - bootstrap_keyring_path: str, - monmap_path: str -): + # create mon logger.info('Creating mon...') - create_daemon_dirs(ctx, fsid, 'mon', mon_id, uid, gid) - mon_dir = get_data_dir(fsid, ctx.data_dir, 'mon', mon_id) - log_dir = get_log_dir(fsid, ctx.log_dir) + create_daemon_dirs(fsid, 'mon', mon_id, uid, gid) + mon_dir = get_data_dir(fsid, 'mon', mon_id) + log_dir = get_log_dir(fsid) out = CephContainer( - ctx, - image=ctx.image, + image=args.image, entrypoint='/usr/bin/ceph-mon', args=['--mkfs', '-i', mon_id, @@ -3317,263 +2984,74 @@ def prepare_create_mon( '-c', '/dev/null', '--monmap', '/tmp/monmap', '--keyring', '/tmp/keyring', - ] + get_daemon_args(ctx, fsid, 'mon', mon_id), + ] + get_daemon_args(fsid, 'mon', mon_id), volume_mounts={ log_dir: '/var/log/ceph:z', mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % (mon_id), - bootstrap_keyring_path: '/tmp/keyring:z', - monmap_path: '/tmp/monmap:z', + tmp_bootstrap_keyring.name: '/tmp/keyring:z', + tmp_monmap.name: '/tmp/monmap:z', }, ).run() - logger.debug(f"create mon.{mon_id} on {out}") - return (mon_dir, log_dir) + with open(mon_dir + '/config', 'w') as f: + os.fchown(f.fileno(), uid, gid) + os.fchmod(f.fileno(), 0o600) + f.write(config) -def create_mon( - ctx: CephadmContext, - uid: int, gid: int, - fsid: str, mon_id: str -) -> None: - mon_c = get_container(ctx, fsid, 'mon', mon_id) - deploy_daemon(ctx, fsid, 'mon', mon_id, mon_c, uid, gid, + make_var_run(fsid, uid, gid) + mon_c = get_container(fsid, 'mon', mon_id) + deploy_daemon(fsid, 'mon', mon_id, mon_c, uid, gid, config=None, keyring=None) + # client.admin key + config to issue various CLI commands + tmp_admin_keyring = write_tmp('[client.admin]\n' + '\tkey = ' + admin_key + '\n', + uid, gid) + tmp_config = write_tmp(config, uid, gid) + + # a CLI helper to reduce our typing + def cli(cmd, extra_mounts={}, timeout=DEFAULT_TIMEOUT): + # type: (List[str], Dict[str, str], Optional[int]) -> str + mounts = { + log_dir: '/var/log/ceph:z', + tmp_admin_keyring.name: '/etc/ceph/ceph.client.admin.keyring:z', + tmp_config.name: '/etc/ceph/ceph.conf:z', + } + for k, v in extra_mounts.items(): + mounts[k] = v + timeout = timeout or args.timeout + return CephContainer( + image=args.image, + entrypoint='/usr/bin/ceph', + args=cmd, + volume_mounts=mounts, + ).run(timeout=timeout) -def wait_for_mon( - ctx: CephadmContext, - mon_id: str, mon_dir: str, - admin_keyring_path: str, config_path: str -): logger.info('Waiting for mon to start...') c = CephContainer( - ctx, - image=ctx.image, + image=args.image, entrypoint='/usr/bin/ceph', args=[ 'status'], volume_mounts={ mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % (mon_id), - admin_keyring_path: '/etc/ceph/ceph.client.admin.keyring:z', - config_path: '/etc/ceph/ceph.conf:z', + tmp_admin_keyring.name: '/etc/ceph/ceph.client.admin.keyring:z', + tmp_config.name: '/etc/ceph/ceph.conf:z', }, ) # wait for the service to become available def is_mon_available(): # type: () -> bool - timeout=ctx.timeout if ctx.timeout else 60 # seconds - out, err, ret = call(ctx, c.run_cmd(), + timeout=args.timeout if args.timeout else 60 # seconds + out, err, ret = call(c.run_cmd(), desc=c.entrypoint, timeout=timeout) return ret == 0 + is_available('mon', is_mon_available) - is_available(ctx, 'mon', is_mon_available) - - -def create_mgr( - ctx: CephadmContext, - uid: int, gid: int, - fsid: str, mgr_id: str, mgr_key: str, - config: str, clifunc: Callable -) -> None: - logger.info('Creating mgr...') - mgr_keyring = '[mgr.%s]\n\tkey = %s\n' % (mgr_id, mgr_key) - mgr_c = get_container(ctx, fsid, 'mgr', mgr_id) - # Note:the default port used by the Prometheus node exporter is opened in fw - deploy_daemon(ctx, fsid, 'mgr', mgr_id, mgr_c, uid, gid, - config=config, keyring=mgr_keyring, ports=[9283]) - - # wait for the service to become available - logger.info('Waiting for mgr to start...') - def is_mgr_available(): - # type: () -> bool - timeout=ctx.timeout if ctx.timeout else 60 # seconds - try: - out = clifunc(['status', '-f', 'json-pretty'], timeout=timeout) - j = json.loads(out) - return j.get('mgrmap', {}).get('available', False) - except Exception as e: - logger.debug('status failed: %s' % e) - return False - is_available(ctx, 'mgr', is_mgr_available) - - -def prepare_ssh( - ctx: CephadmContext, - cli: Callable, wait_for_mgr_restart: Callable -) -> None: - - cli(['config-key', 'set', 'mgr/cephadm/ssh_user', ctx.ssh_user]) - - logger.info('Enabling cephadm module...') - cli(['mgr', 'module', 'enable', 'cephadm']) - wait_for_mgr_restart() - - logger.info('Setting orchestrator backend to cephadm...') - cli(['orch', 'set', 'backend', 'cephadm']) - - if ctx.ssh_config: - logger.info('Using provided ssh config...') - mounts = { - pathify(ctx.ssh_config.name): '/tmp/cephadm-ssh-config:z', - } - cli(['cephadm', 'set-ssh-config', '-i', '/tmp/cephadm-ssh-config'], extra_mounts=mounts) - - if ctx.ssh_private_key and ctx.ssh_public_key: - logger.info('Using provided ssh keys...') - mounts = { - pathify(ctx.ssh_private_key.name): '/tmp/cephadm-ssh-key:z', - pathify(ctx.ssh_public_key.name): '/tmp/cephadm-ssh-key.pub:z' - } - cli(['cephadm', 'set-priv-key', '-i', '/tmp/cephadm-ssh-key'], extra_mounts=mounts) - cli(['cephadm', 'set-pub-key', '-i', '/tmp/cephadm-ssh-key.pub'], extra_mounts=mounts) - else: - logger.info('Generating ssh key...') - cli(['cephadm', 'generate-key']) - ssh_pub = cli(['cephadm', 'get-pub-key']) - - with open(ctx.output_pub_ssh_key, 'w') as f: - f.write(ssh_pub) - logger.info('Wrote public SSH key to to %s' % ctx.output_pub_ssh_key) - - logger.info('Adding key to %s@localhost\'s authorized_keys...' % ctx.ssh_user) - try: - s_pwd = pwd.getpwnam(ctx.ssh_user) - except KeyError as e: - raise Error('Cannot find uid/gid for ssh-user: %s' % (ctx.ssh_user)) - ssh_uid = s_pwd.pw_uid - ssh_gid = s_pwd.pw_gid - ssh_dir = os.path.join(s_pwd.pw_dir, '.ssh') - - if not os.path.exists(ssh_dir): - makedirs(ssh_dir, ssh_uid, ssh_gid, 0o700) - - auth_keys_file = '%s/authorized_keys' % ssh_dir - add_newline = False - - if os.path.exists(auth_keys_file): - with open(auth_keys_file, 'r') as f: - f.seek(0, os.SEEK_END) - if f.tell() > 0: - f.seek(f.tell()-1, os.SEEK_SET) # go to last char - if f.read() != '\n': - add_newline = True - - with open(auth_keys_file, 'a') as f: - os.fchown(f.fileno(), ssh_uid, ssh_gid) # just in case we created it - os.fchmod(f.fileno(), 0o600) # just in case we created it - if add_newline: - f.write('\n') - f.write(ssh_pub.strip() + '\n') - - host = get_hostname() - logger.info('Adding host %s...' % host) - try: - cli(['orch', 'host', 'add', host]) - except RuntimeError as e: - raise Error('Failed to add host <%s>: %s' % (host, e)) - - if not ctx.orphan_initial_daemons: - for t in ['mon', 'mgr', 'crash']: - logger.info('Deploying %s service with default placement...' % t) - cli(['orch', 'apply', t]) - - if not ctx.skip_monitoring_stack: - logger.info('Enabling mgr prometheus module...') - cli(['mgr', 'module', 'enable', 'prometheus']) - for t in ['prometheus', 'grafana', 'node-exporter', 'alertmanager']: - logger.info('Deploying %s service with default placement...' % t) - cli(['orch', 'apply', t]) - - -def prepare_dashboard( - ctx: CephadmContext, - uid: int, gid: int, - cli: Callable, wait_for_mgr_restart: Callable -) -> None: - - # Configure SSL port (cephadm only allows to configure dashboard SSL port) - # if the user does not want to use SSL he can change this setting once the cluster is up - cli(["config", "set", "mgr", "mgr/dashboard/ssl_server_port" , str(ctx.ssl_dashboard_port)]) - - # configuring dashboard parameters - logger.info('Enabling the dashboard module...') - cli(['mgr', 'module', 'enable', 'dashboard']) - wait_for_mgr_restart() - - # dashboard crt and key - if ctx.dashboard_key and ctx.dashboard_crt: - logger.info('Using provided dashboard certificate...') - mounts = { - pathify(ctx.dashboard_crt.name): '/tmp/dashboard.crt:z', - pathify(ctx.dashboard_key.name): '/tmp/dashboard.key:z' - } - cli(['dashboard', 'set-ssl-certificate', '-i', '/tmp/dashboard.crt'], extra_mounts=mounts) - cli(['dashboard', 'set-ssl-certificate-key', '-i', '/tmp/dashboard.key'], extra_mounts=mounts) - else: - logger.info('Generating a dashboard self-signed certificate...') - cli(['dashboard', 'create-self-signed-cert']) - - logger.info('Creating initial admin user...') - password = ctx.initial_dashboard_password or generate_password() - tmp_password_file = write_tmp(password, uid, gid) - cmd = ['dashboard', 'ac-user-create', ctx.initial_dashboard_user, '-i', '/tmp/dashboard.pw', 'administrator', '--force-password'] - if not ctx.dashboard_password_noupdate: - cmd.append('--pwd-update-required') - cli(cmd, extra_mounts={pathify(tmp_password_file.name): '/tmp/dashboard.pw:z'}) - logger.info('Fetching dashboard port number...') - out = cli(['config', 'get', 'mgr', 'mgr/dashboard/ssl_server_port']) - port = int(out) - - # Open dashboard port - fw = Firewalld(ctx) - fw.open_ports([port]) - fw.apply_rules() - - logger.info('Ceph Dashboard is now available at:\n\n' - '\t URL: https://%s:%s/\n' - '\t User: %s\n' - '\tPassword: %s\n' % ( - get_fqdn(), port, - ctx.initial_dashboard_user, - password)) - - -def prepare_bootstrap_config( - ctx: CephadmContext, - fsid: str, mon_addr: str, image: str - -) -> str: - - cp = read_config(ctx.config) - if not cp.has_section('global'): - cp.add_section('global') - cp.set('global', 'fsid', fsid) - cp.set('global', 'mon host', mon_addr) - cp.set('global', 'container_image', image) - cpf = StringIO() - cp.write(cpf) - config = cpf.getvalue() - - if ctx.registry_json or ctx.registry_url: - command_registry_login(ctx) - - if not ctx.skip_pull: - _pull_image(ctx, image) - - return config - - -def finish_bootstrap_config( - ctx: CephadmContext, - fsid: str, - config: str, - mon_id: str, mon_dir: str, - mon_network: Optional[str], ipv6: bool, - cli: Callable - -) -> None: - if not ctx.no_minimize_config: + # assimilate and minimize config + if not args.no_minimize_config: logger.info('Assimilating anything we can from ceph.conf...') cli([ 'config', 'assimilate-conf', @@ -3592,7 +3070,7 @@ def finish_bootstrap_config( with open(mon_dir + '/config', 'r') as f: config = f.read() logger.info('Restarting the monitor...') - call_throws(ctx, [ + call_throws([ 'systemctl', 'restart', get_unit_name(fsid, 'mon', mon_id) @@ -3606,123 +3084,38 @@ def finish_bootstrap_config( logger.info('Enabling IPv6 (ms_bind_ipv6)') cli(['config', 'set', 'global', 'ms_bind_ipv6', 'true']) - - with open(ctx.output_config, 'w') as f: - f.write(config) - logger.info('Wrote config to %s' % ctx.output_config) - pass - - -@default_image -def command_bootstrap(ctx): - # type: (CephadmContext) -> int - - host: Optional[str] = None - - if not ctx.output_config: - ctx.output_config = os.path.join(ctx.output_dir, 'ceph.conf') - if not ctx.output_keyring: - ctx.output_keyring = os.path.join(ctx.output_dir, - 'ceph.client.admin.keyring') - if not ctx.output_pub_ssh_key: - ctx.output_pub_ssh_key = os.path.join(ctx.output_dir, 'ceph.pub') - - # verify output files - for f in [ctx.output_config, ctx.output_keyring, - ctx.output_pub_ssh_key]: - if not ctx.allow_overwrite: - if os.path.exists(f): - raise Error('%s already exists; delete or pass ' - '--allow-overwrite to overwrite' % f) - dirname = os.path.dirname(f) - if dirname and not os.path.exists(dirname): - fname = os.path.basename(f) - logger.info(f"Creating directory {dirname} for {fname}") - try: - # use makedirs to create intermediate missing dirs - os.makedirs(dirname, 0o755) - except PermissionError: - raise Error(f"Unable to create {dirname} due to permissions failure. Retry with root, or sudo or preallocate the directory.") - - - if not ctx.skip_prepare_host: - command_prepare_host(ctx) - else: - logger.info('Skip prepare_host') - - # initial vars - fsid = ctx.fsid or make_fsid() - hostname = get_hostname() - if '.' in hostname and not ctx.allow_fqdn_hostname: - raise Error('hostname is a fully qualified domain name (%s); either fix (e.g., "sudo hostname %s" or similar) or pass --allow-fqdn-hostname' % (hostname, hostname.split('.')[0])) - mon_id = ctx.mon_id or hostname - mgr_id = ctx.mgr_id or generate_service_id() - logger.info('Cluster fsid: %s' % fsid) - - l = FileLock(ctx, fsid) - l.acquire() - - (addr_arg, ipv6, mon_network) = prepare_mon_addresses(ctx) - config = prepare_bootstrap_config(ctx, fsid, addr_arg, ctx.image) - - logger.info('Extracting ceph user uid/gid from container image...') - (uid, gid) = extract_uid_gid(ctx) - - # create some initial keys - (mon_key, mgr_key, admin_key, - bootstrap_keyring, admin_keyring - ) = \ - create_initial_keys(ctx, uid, gid, mgr_id) - - monmap = create_initial_monmap(ctx, uid, gid, fsid, mon_id, addr_arg) - (mon_dir, log_dir) = \ - prepare_create_mon(ctx, uid, gid, fsid, mon_id, - bootstrap_keyring.name, monmap.name) - - with open(mon_dir + '/config', 'w') as f: - os.fchown(f.fileno(), uid, gid) - os.fchmod(f.fileno(), 0o600) - f.write(config) - - make_var_run(ctx, fsid, uid, gid) - create_mon(ctx, uid, gid, fsid, mon_id) - - # config to issue various CLI commands - tmp_config = write_tmp(config, uid, gid) - - # a CLI helper to reduce our typing - def cli(cmd, extra_mounts={}, timeout=DEFAULT_TIMEOUT): - # type: (List[str], Dict[str, str], Optional[int]) -> str - mounts = { - log_dir: '/var/log/ceph:z', - admin_keyring.name: '/etc/ceph/ceph.client.admin.keyring:z', - tmp_config.name: '/etc/ceph/ceph.conf:z', - } - for k, v in extra_mounts.items(): - mounts[k] = v - timeout = timeout or ctx.timeout - return CephContainer( - ctx, - image=ctx.image, - entrypoint='/usr/bin/ceph', - args=cmd, - volume_mounts=mounts, - ).run(timeout=timeout) - - wait_for_mon(ctx, mon_id, mon_dir, admin_keyring.name, tmp_config.name) - - finish_bootstrap_config(ctx, fsid, config, mon_id, mon_dir, - mon_network, ipv6, cli) + # create mgr + logger.info('Creating mgr...') + mgr_keyring = '[mgr.%s]\n\tkey = %s\n' % (mgr_id, mgr_key) + mgr_c = get_container(fsid, 'mgr', mgr_id) + # Note:the default port used by the Prometheus node exporter is opened in fw + deploy_daemon(fsid, 'mgr', mgr_id, mgr_c, uid, gid, + config=config, keyring=mgr_keyring, ports=[9283]) # output files - with open(ctx.output_keyring, 'w') as f: + with open(args.output_keyring, 'w') as f: os.fchmod(f.fileno(), 0o600) f.write('[client.admin]\n' '\tkey = ' + admin_key + '\n') - logger.info('Wrote keyring to %s' % ctx.output_keyring) + logger.info('Wrote keyring to %s' % args.output_keyring) - # create mgr - create_mgr(ctx, uid, gid, fsid, mgr_id, mgr_key, config, cli) + with open(args.output_config, 'w') as f: + f.write(config) + logger.info('Wrote config to %s' % args.output_config) + + # wait for the service to become available + logger.info('Waiting for mgr to start...') + def is_mgr_available(): + # type: () -> bool + timeout=args.timeout if args.timeout else 60 # seconds + try: + out = cli(['status', '-f', 'json-pretty'], timeout=timeout) + j = json.loads(out) + return j.get('mgrmap', {}).get('available', False) + except Exception as e: + logger.debug('status failed: %s' % e) + return False + is_available('mgr', is_mgr_available) # wait for mgr to restart (after enabling a module) def wait_for_mgr_restart(): @@ -3741,50 +3134,150 @@ def command_bootstrap(ctx): except Exception as e: logger.debug('tell mgr mgr_status failed: %s' % e) return False - is_available(ctx, 'mgr epoch %d' % epoch, mgr_has_latest_epoch) + is_available('Mgr epoch %d' % epoch, mgr_has_latest_epoch) # ssh - host = None - if not ctx.skip_ssh: - prepare_ssh(ctx, cli, wait_for_mgr_restart) - - if ctx.registry_url and ctx.registry_username and ctx.registry_password: - cli(['config', 'set', 'mgr', 'mgr/cephadm/registry_url', ctx.registry_url, '--force']) - cli(['config', 'set', 'mgr', 'mgr/cephadm/registry_username', ctx.registry_username, '--force']) - cli(['config', 'set', 'mgr', 'mgr/cephadm/registry_password', ctx.registry_password, '--force']) - - if ctx.container_init: - cli(['config', 'set', 'mgr', 'mgr/cephadm/container_init', str(ctx.container_init), '--force']) - - if ctx.with_exporter: - cli(['config-key', 'set', 'mgr/cephadm/exporter_enabled', 'true']) - if ctx.exporter_config: - logger.info("Applying custom cephadm exporter settings") - # validated within the parser, so we can just apply to the store - with tempfile.NamedTemporaryFile(buffering=0) as tmp: - tmp.write(json.dumps(ctx.exporter_config).encode('utf-8')) - mounts = { - tmp.name: "/tmp/exporter-config.json:z" - } - cli(["cephadm", "set-exporter-config", "-i", "/tmp/exporter-config.json"], extra_mounts=mounts) - logger.info("-> Use ceph orch apply cephadm-exporter to deploy") + if not args.skip_ssh: + cli(['config-key', 'set', 'mgr/cephadm/ssh_user', args.ssh_user]) + + logger.info('Enabling cephadm module...') + cli(['mgr', 'module', 'enable', 'cephadm']) + wait_for_mgr_restart() + + logger.info('Setting orchestrator backend to cephadm...') + cli(['orch', 'set', 'backend', 'cephadm']) + + if args.ssh_config: + logger.info('Using provided ssh config...') + mounts = { + pathify(args.ssh_config.name): '/tmp/cephadm-ssh-config:z', + } + cli(['cephadm', 'set-ssh-config', '-i', '/tmp/cephadm-ssh-config'], extra_mounts=mounts) + + if args.ssh_private_key and args.ssh_public_key: + logger.info('Using provided ssh keys...') + mounts = { + pathify(args.ssh_private_key.name): '/tmp/cephadm-ssh-key:z', + pathify(args.ssh_public_key.name): '/tmp/cephadm-ssh-key.pub:z' + } + cli(['cephadm', 'set-priv-key', '-i', '/tmp/cephadm-ssh-key'], extra_mounts=mounts) + cli(['cephadm', 'set-pub-key', '-i', '/tmp/cephadm-ssh-key.pub'], extra_mounts=mounts) else: - # generate a default SSL configuration for the exporter(s) - logger.info("Generating a default cephadm exporter configuration (self-signed)") - cli(['cephadm', 'generate-exporter-config']) - # - # deploy the service (commented out until the cephadm changes are in the ceph container build) - logger.info('Deploying cephadm exporter service with default placement...') - cli(['orch', 'apply', 'cephadm-exporter']) + logger.info('Generating ssh key...') + cli(['cephadm', 'generate-key']) + ssh_pub = cli(['cephadm', 'get-pub-key']) + with open(args.output_pub_ssh_key, 'w') as f: + f.write(ssh_pub) + logger.info('Wrote public SSH key to to %s' % args.output_pub_ssh_key) - if not ctx.skip_dashboard: - prepare_dashboard(ctx, uid, gid, cli, wait_for_mgr_restart) + logger.info('Adding key to %s@localhost\'s authorized_keys...' % args.ssh_user) + try: + s_pwd = pwd.getpwnam(args.ssh_user) + except KeyError as e: + raise Error('Cannot find uid/gid for ssh-user: %s' % (args.ssh_user)) + ssh_uid = s_pwd.pw_uid + ssh_gid = s_pwd.pw_gid + ssh_dir = os.path.join(s_pwd.pw_dir, '.ssh') + + if not os.path.exists(ssh_dir): + makedirs(ssh_dir, ssh_uid, ssh_gid, 0o700) + + auth_keys_file = '%s/authorized_keys' % ssh_dir + add_newline = False + + if os.path.exists(auth_keys_file): + with open(auth_keys_file, 'r') as f: + f.seek(0, os.SEEK_END) + if f.tell() > 0: + f.seek(f.tell()-1, os.SEEK_SET) # go to last char + if f.read() != '\n': + add_newline = True + + with open(auth_keys_file, 'a') as f: + os.fchown(f.fileno(), ssh_uid, ssh_gid) # just in case we created it + os.fchmod(f.fileno(), 0o600) # just in case we created it + if add_newline: + f.write('\n') + f.write(ssh_pub.strip() + '\n') + + host = get_hostname() + logger.info('Adding host %s...' % host) + try: + cli(['orch', 'host', 'add', host]) + except RuntimeError as e: + raise Error('Failed to add host <%s>: %s' % (host, e)) + + if not args.orphan_initial_daemons: + for t in ['mon', 'mgr', 'crash']: + logger.info('Deploying %s service with default placement...' % t) + cli(['orch', 'apply', t]) + + if not args.skip_monitoring_stack: + logger.info('Enabling mgr prometheus module...') + cli(['mgr', 'module', 'enable', 'prometheus']) + for t in ['prometheus', 'grafana', 'node-exporter', 'alertmanager']: + logger.info('Deploying %s service with default placement...' % t) + cli(['orch', 'apply', t]) + + if args.registry_url and args.registry_username and args.registry_password: + cli(['config', 'set', 'mgr', 'mgr/cephadm/registry_url', args.registry_url, '--force']) + cli(['config', 'set', 'mgr', 'mgr/cephadm/registry_username', args.registry_username, '--force']) + cli(['config', 'set', 'mgr', 'mgr/cephadm/registry_password', args.registry_password, '--force']) + + if args.container_init: + cli(['config', 'set', 'mgr', 'mgr/cephadm/container_init', str(args.container_init), '--force']) + + if not args.skip_dashboard: + # Configure SSL port (cephadm only allows to configure dashboard SSL port) + # if the user does not want to use SSL he can change this setting once the cluster is up + cli(["config", "set", "mgr", "mgr/dashboard/ssl_server_port" , str(args.ssl_dashboard_port)]) + + # configuring dashboard parameters + logger.info('Enabling the dashboard module...') + cli(['mgr', 'module', 'enable', 'dashboard']) + wait_for_mgr_restart() + + # dashboard crt and key + if args.dashboard_key and args.dashboard_crt: + logger.info('Using provided dashboard certificate...') + mounts = { + pathify(args.dashboard_crt.name): '/tmp/dashboard.crt:z', + pathify(args.dashboard_key.name): '/tmp/dashboard.key:z' + } + cli(['dashboard', 'set-ssl-certificate', '-i', '/tmp/dashboard.crt'], extra_mounts=mounts) + cli(['dashboard', 'set-ssl-certificate-key', '-i', '/tmp/dashboard.key'], extra_mounts=mounts) + else: + logger.info('Generating a dashboard self-signed certificate...') + cli(['dashboard', 'create-self-signed-cert']) + + logger.info('Creating initial admin user...') + password = args.initial_dashboard_password or generate_password() + cmd = ['dashboard', 'ac-user-create', args.initial_dashboard_user, password, 'administrator', '--force-password'] + if not args.dashboard_password_noupdate: + cmd.append('--pwd-update-required') + cli(cmd) + logger.info('Fetching dashboard port number...') + out = cli(['config', 'get', 'mgr', 'mgr/dashboard/ssl_server_port']) + port = int(out) + + # Open dashboard port + fw = Firewalld() + fw.open_ports([port]) + fw.apply_rules() + + logger.info('Ceph Dashboard is now available at:\n\n' + '\t URL: https://%s:%s/\n' + '\t User: %s\n' + '\tPassword: %s\n' % ( + get_fqdn(), port, + args.initial_dashboard_user, + password)) - if ctx.apply_spec: - logger.info('Applying %s to cluster' % ctx.apply_spec) + if args.apply_spec: + logger.info('Applying %s to cluster' % args.apply_spec) - with open(ctx.apply_spec) as f: + with open(args.apply_spec) as f: for line in f: if 'hostname:' in line: line = line.replace('\n', '') @@ -3793,12 +3286,12 @@ def command_bootstrap(ctx): logger.info('Adding ssh key to %s' % split[1]) ssh_key = '/etc/ceph/ceph.pub' - if ctx.ssh_public_key: - ssh_key = ctx.ssh_public_key.name - out, err, code = call_throws(ctx, ['ssh-copy-id', '-f', '-i', ssh_key, '%s@%s' % (ctx.ssh_user, split[1])]) + if args.ssh_public_key: + ssh_key = args.ssh_public_key.name + out, err, code = call_throws(['ssh-copy-id', '-f', '-i', ssh_key, '%s@%s' % (args.ssh_user, split[1])]) mounts = {} - mounts[pathify(ctx.apply_spec)] = '/tmp/spec.yml:z' + mounts[pathify(args.apply_spec)] = '/tmp/spec.yml:z' out = cli(['orch', 'apply', '-i', '/tmp/spec.yml'], extra_mounts=mounts) logger.info(out) @@ -3807,8 +3300,8 @@ def command_bootstrap(ctx): '\tsudo %s shell --fsid %s -c %s -k %s\n' % ( sys.argv[0], fsid, - ctx.output_config, - ctx.output_keyring)) + args.output_config, + args.output_keyring)) logger.info('Please consider enabling telemetry to help improve Ceph:\n\n' '\tceph telemetry on\n\n' 'For more information see:\n\n' @@ -3818,15 +3311,15 @@ def command_bootstrap(ctx): ################################## -def command_registry_login(ctx: CephadmContext): - if ctx.registry_json: - logger.info("Pulling custom registry login info from %s." % ctx.registry_json) - d = get_parm(ctx.registry_json) +def command_registry_login(): + if args.registry_json: + logger.info("Pulling custom registry login info from %s." % args.registry_json) + d = get_parm(args.registry_json) if d.get('url') and d.get('username') and d.get('password'): - ctx.registry_url = d.get('url') - ctx.registry_username = d.get('username') - ctx.registry_password = d.get('password') - registry_login(ctx, ctx.registry_url, ctx.registry_username, ctx.registry_password) + args.registry_url = d.get('url') + args.registry_username = d.get('username') + args.registry_password = d.get('password') + registry_login(args.registry_url, args.registry_username, args.registry_password) else: raise Error("json provided for custom registry login did not include all necessary fields. " "Please setup json file as\n" @@ -3835,98 +3328,93 @@ def command_registry_login(ctx: CephadmContext): " \"username\": \"REGISTRY_USERNAME\",\n" " \"password\": \"REGISTRY_PASSWORD\"\n" "}\n") - elif ctx.registry_url and ctx.registry_username and ctx.registry_password: - registry_login(ctx, ctx.registry_url, ctx.registry_username, ctx.registry_password) + elif args.registry_url and args.registry_username and args.registry_password: + registry_login(args.registry_url, args.registry_username, args.registry_password) else: raise Error("Invalid custom registry arguments received. To login to a custom registry include " "--registry-url, --registry-username and --registry-password " "options or --registry-json option") return 0 -def registry_login(ctx: CephadmContext, url, username, password): +def registry_login(url, username, password): logger.info("Logging into custom registry.") try: - container_path = ctx.container_path - cmd = [container_path, 'login', - '-u', username, '-p', password, - url] - if 'podman' in container_path: - cmd.append('--authfile=/etc/ceph/podman-auth.json') - out, _, _ = call_throws(ctx, cmd) - if 'podman' in container_path: - os.chmod('/etc/ceph/podman-auth.json', 0o600) + out, _, _ = call_throws([container_path, 'login', + '-u', username, + '-p', password, + url]) except: - raise Error("Failed to login to custom registry @ %s as %s with given password" % (ctx.registry_url, ctx.registry_username)) + raise Error("Failed to login to custom registry @ %s as %s with given password" % (args.registry_url, args.registry_username)) ################################## -def extract_uid_gid_monitoring(ctx, daemon_type): - # type: (CephadmContext, str) -> Tuple[int, int] +def extract_uid_gid_monitoring(daemon_type): + # type: (str) -> Tuple[int, int] if daemon_type == 'prometheus': - uid, gid = extract_uid_gid(ctx, file_path='/etc/prometheus') + uid, gid = extract_uid_gid(file_path='/etc/prometheus') elif daemon_type == 'node-exporter': uid, gid = 65534, 65534 elif daemon_type == 'grafana': - uid, gid = extract_uid_gid(ctx, file_path='/var/lib/grafana') + uid, gid = extract_uid_gid(file_path='/var/lib/grafana') elif daemon_type == 'alertmanager': - uid, gid = extract_uid_gid(ctx, file_path=['/etc/alertmanager', '/etc/prometheus']) + uid, gid = extract_uid_gid(file_path=['/etc/alertmanager', '/etc/prometheus']) else: raise Error("{} not implemented yet".format(daemon_type)) return uid, gid @default_image -def command_deploy(ctx): - # type: (CephadmContext) -> None - daemon_type, daemon_id = ctx.name.split('.', 1) +def command_deploy(): + # type: () -> None + daemon_type, daemon_id = args.name.split('.', 1) - l = FileLock(ctx, ctx.fsid) + l = FileLock(args.fsid) l.acquire() if daemon_type not in get_supported_daemons(): raise Error('daemon type %s not recognized' % daemon_type) redeploy = False - unit_name = get_unit_name(ctx.fsid, daemon_type, daemon_id) - (_, state, _) = check_unit(ctx, unit_name) + unit_name = get_unit_name(args.fsid, daemon_type, daemon_id) + (_, state, _) = check_unit(unit_name) if state == 'running': redeploy = True - if ctx.reconfig: - logger.info('%s daemon %s ...' % ('Reconfig', ctx.name)) + if args.reconfig: + logger.info('%s daemon %s ...' % ('Reconfig', args.name)) elif redeploy: - logger.info('%s daemon %s ...' % ('Redeploy', ctx.name)) + logger.info('%s daemon %s ...' % ('Redeploy', args.name)) else: - logger.info('%s daemon %s ...' % ('Deploy', ctx.name)) + logger.info('%s daemon %s ...' % ('Deploy', args.name)) # Get and check ports explicitly required to be opened daemon_ports = [] # type: List[int] - if ctx.tcp_ports: - daemon_ports = list(map(int, ctx.tcp_ports.split())) + if args.tcp_ports: + daemon_ports = list(map(int, args.tcp_ports.split())) if daemon_type in Ceph.daemons: - config, keyring = get_config_and_keyring(ctx) - uid, gid = extract_uid_gid(ctx) - make_var_run(ctx, ctx.fsid, uid, gid) + config, keyring = get_config_and_keyring() + uid, gid = extract_uid_gid() + make_var_run(args.fsid, uid, gid) - c = get_container(ctx, ctx.fsid, daemon_type, daemon_id, - ptrace=ctx.allow_ptrace) - deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid, + c = get_container(args.fsid, daemon_type, daemon_id, + ptrace=args.allow_ptrace) + deploy_daemon(args.fsid, daemon_type, daemon_id, c, uid, gid, config=config, keyring=keyring, - osd_fsid=ctx.osd_fsid, - reconfig=ctx.reconfig, + osd_fsid=args.osd_fsid, + reconfig=args.reconfig, ports=daemon_ports) elif daemon_type in Monitoring.components: # monitoring daemon - prometheus, grafana, alertmanager, node-exporter # Default Checks - if not ctx.reconfig and not redeploy: + if not args.reconfig and not redeploy: daemon_ports.extend(Monitoring.port_map[daemon_type]) # make sure provided config-json is sufficient - config = get_parm(ctx.config_json) # type: ignore + config = get_parm(args.config_json) # type: ignore required_files = Monitoring.components[daemon_type].get('config-json-files', list()) required_args = Monitoring.components[daemon_type].get('config-json-args', list()) if required_files: @@ -3938,76 +3426,46 @@ def command_deploy(ctx): raise Error("{} deployment requires config-json which must " "contain arg for {}".format(daemon_type.capitalize(), ', '.join(required_args))) - uid, gid = extract_uid_gid_monitoring(ctx, daemon_type) - c = get_container(ctx, ctx.fsid, daemon_type, daemon_id) - deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid, - reconfig=ctx.reconfig, + uid, gid = extract_uid_gid_monitoring(daemon_type) + c = get_container(args.fsid, daemon_type, daemon_id) + deploy_daemon(args.fsid, daemon_type, daemon_id, c, uid, gid, + reconfig=args.reconfig, ports=daemon_ports) elif daemon_type == NFSGanesha.daemon_type: - if not ctx.reconfig and not redeploy: + if not args.reconfig and not redeploy: daemon_ports.extend(NFSGanesha.port_map.values()) - config, keyring = get_config_and_keyring(ctx) + config, keyring = get_config_and_keyring() # TODO: extract ganesha uid/gid (997, 994) ? - uid, gid = extract_uid_gid(ctx) - c = get_container(ctx, ctx.fsid, daemon_type, daemon_id) - deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid, + uid, gid = extract_uid_gid() + c = get_container(args.fsid, daemon_type, daemon_id) + deploy_daemon(args.fsid, daemon_type, daemon_id, c, uid, gid, config=config, keyring=keyring, - reconfig=ctx.reconfig, + reconfig=args.reconfig, ports=daemon_ports) elif daemon_type == CephIscsi.daemon_type: - config, keyring = get_config_and_keyring(ctx) - uid, gid = extract_uid_gid(ctx) - c = get_container(ctx, ctx.fsid, daemon_type, daemon_id) - deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid, + config, keyring = get_config_and_keyring() + uid, gid = extract_uid_gid() + c = get_container(args.fsid, daemon_type, daemon_id) + deploy_daemon(args.fsid, daemon_type, daemon_id, c, uid, gid, config=config, keyring=keyring, - reconfig=ctx.reconfig, - ports=daemon_ports) - - elif daemon_type == HAproxy.daemon_type: - haproxy = HAproxy.init(ctx, ctx.fsid, daemon_id) - uid, gid = haproxy.extract_uid_gid_haproxy() - c = get_container(ctx, ctx.fsid, daemon_type, daemon_id) - deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid, - reconfig=ctx.reconfig, - ports=daemon_ports) - - elif daemon_type == Keepalived.daemon_type: - keepalived = Keepalived.init(ctx, ctx.fsid, daemon_id) - uid, gid = keepalived.extract_uid_gid_keepalived() - c = get_container(ctx, ctx.fsid, daemon_type, daemon_id) - deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid, - reconfig=ctx.reconfig, + reconfig=args.reconfig, ports=daemon_ports) elif daemon_type == CustomContainer.daemon_type: - cc = CustomContainer.init(ctx, ctx.fsid, daemon_id) - if not ctx.reconfig and not redeploy: + cc = CustomContainer.init(args.fsid, daemon_id) + if not args.reconfig and not redeploy: daemon_ports.extend(cc.ports) - c = get_container(ctx, ctx.fsid, daemon_type, daemon_id, + c = get_container(args.fsid, daemon_type, daemon_id, privileged=cc.privileged, - ptrace=ctx.allow_ptrace) - deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, + ptrace=args.allow_ptrace) + deploy_daemon(args.fsid, daemon_type, daemon_id, c, uid=cc.uid, gid=cc.gid, config=None, - keyring=None, reconfig=ctx.reconfig, + keyring=None, reconfig=args.reconfig, ports=daemon_ports) - elif daemon_type == CephadmDaemon.daemon_type: - # get current user gid and uid - uid = os.getuid() - gid = os.getgid() - config_js = get_parm(ctx.config_json) # type: Dict[str, str] - if not daemon_ports: - logger.info("cephadm-exporter will use default port ({})".format(CephadmDaemon.default_port)) - daemon_ports =[CephadmDaemon.default_port] - - CephadmDaemon.validate_config(config_js) - - deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, None, - uid, gid, ports=daemon_ports) - else: raise Error('daemon type {} not implemented in command_deploy function' .format(daemon_type)) @@ -4016,12 +3474,12 @@ def command_deploy(ctx): @infer_image -def command_run(ctx): - # type: (CephadmContext) -> int - (daemon_type, daemon_id) = ctx.name.split('.', 1) - c = get_container(ctx, ctx.fsid, daemon_type, daemon_id) +def command_run(): + # type: () -> int + (daemon_type, daemon_id) = args.name.split('.', 1) + c = get_container(args.fsid, daemon_type, daemon_id) command = c.run_cmd() - return call_timeout(ctx, command, ctx.timeout) + return call_timeout(command, args.timeout) ################################## @@ -4029,39 +3487,39 @@ def command_run(ctx): @infer_fsid @infer_config @infer_image -def command_shell(ctx): - # type: (CephadmContext) -> int - if ctx.fsid: - make_log_dir(ctx, ctx.fsid) - if ctx.name: - if '.' in ctx.name: - (daemon_type, daemon_id) = ctx.name.split('.', 1) +def command_shell(): + # type: () -> int + if args.fsid: + make_log_dir(args.fsid) + if args.name: + if '.' in args.name: + (daemon_type, daemon_id) = args.name.split('.', 1) else: - daemon_type = ctx.name + daemon_type = args.name daemon_id = None else: daemon_type = 'osd' # get the most mounts daemon_id = None - if daemon_id and not ctx.fsid: + if daemon_id and not args.fsid: raise Error('must pass --fsid to specify cluster') # use /etc/ceph files by default, if present. we do this instead of # making these defaults in the arg parser because we don't want an error # if they don't exist. - if not ctx.keyring and os.path.exists(SHELL_DEFAULT_KEYRING): - ctx.keyring = SHELL_DEFAULT_KEYRING + if not args.keyring and os.path.exists(SHELL_DEFAULT_KEYRING): + args.keyring = SHELL_DEFAULT_KEYRING container_args = [] # type: List[str] - mounts = get_container_mounts(ctx, ctx.fsid, daemon_type, daemon_id, - no_config=True if ctx.config else False) - binds = get_container_binds(ctx, ctx.fsid, daemon_type, daemon_id) - if ctx.config: - mounts[pathify(ctx.config)] = '/etc/ceph/ceph.conf:z' - if ctx.keyring: - mounts[pathify(ctx.keyring)] = '/etc/ceph/ceph.keyring:z' - if ctx.mount: - for _mount in ctx.mount: + mounts = get_container_mounts(args.fsid, daemon_type, daemon_id, + no_config=True if args.config else False) + binds = get_container_binds(args.fsid, daemon_type, daemon_id) + if args.config: + mounts[pathify(args.config)] = '/etc/ceph/ceph.conf:z' + if args.keyring: + mounts[pathify(args.keyring)] = '/etc/ceph/ceph.keyring:z' + if args.mount: + for _mount in args.mount: split_src_dst = _mount.split(':') mount = pathify(split_src_dst[0]) filename = os.path.basename(split_src_dst[0]) @@ -4070,8 +3528,8 @@ def command_shell(ctx): mounts[mount] = dst else: mounts[mount] = '/mnt/{}:z'.format(filename) - if ctx.command: - command = ctx.command + if args.command: + command = args.command else: command = ['bash'] container_args += [ @@ -4079,8 +3537,8 @@ def command_shell(ctx): '-e', 'LANG=C', '-e', "PS1=%s" % CUSTOM_PS1, ] - if ctx.fsid: - home = os.path.join(ctx.data_dir, ctx.fsid, 'home') + if args.fsid: + home = os.path.join(args.data_dir, args.fsid, 'home') if not os.path.exists(home): logger.debug('Creating root home at %s' % home) makedirs(home, 0, 0, 0o660) @@ -4092,31 +3550,30 @@ def command_shell(ctx): mounts[home] = '/root' c = CephContainer( - ctx, - image=ctx.image, + image=args.image, entrypoint='doesnotmatter', args=[], container_args=container_args, volume_mounts=mounts, bind_mounts=binds, - envs=ctx.env, + envs=args.env, privileged=True) command = c.shell_cmd(command) - return call_timeout(ctx, command, ctx.timeout) + return call_timeout(command, args.timeout) ################################## @infer_fsid -def command_enter(ctx): - # type: (CephadmContext) -> int - if not ctx.fsid: +def command_enter(): + # type: () -> int + if not args.fsid: raise Error('must pass --fsid to specify cluster') - (daemon_type, daemon_id) = ctx.name.split('.', 1) + (daemon_type, daemon_id) = args.name.split('.', 1) container_args = [] # type: List[str] - if ctx.command: - command = ctx.command + if args.command: + command = args.command else: command = ['sh'] container_args += [ @@ -4125,35 +3582,34 @@ def command_enter(ctx): '-e', "PS1=%s" % CUSTOM_PS1, ] c = CephContainer( - ctx, - image=ctx.image, + image=args.image, entrypoint='doesnotmatter', container_args=container_args, - cname='ceph-%s-%s.%s' % (ctx.fsid, daemon_type, daemon_id), + cname='ceph-%s-%s.%s' % (args.fsid, daemon_type, daemon_id), ) command = c.exec_cmd(command) - return call_timeout(ctx, command, ctx.timeout) + return call_timeout(command, args.timeout) ################################## @infer_fsid @infer_image -def command_ceph_volume(ctx): - # type: (CephadmContext) -> None - if ctx.fsid: - make_log_dir(ctx, ctx.fsid) +def command_ceph_volume(): + # type: () -> None + if args.fsid: + make_log_dir(args.fsid) - l = FileLock(ctx, ctx.fsid) + l = FileLock(args.fsid) l.acquire() (uid, gid) = (0, 0) # ceph-volume runs as root - mounts = get_container_mounts(ctx, ctx.fsid, 'osd', None) + mounts = get_container_mounts(args.fsid, 'osd', None) tmp_config = None tmp_keyring = None - (config, keyring) = get_config_and_keyring(ctx) + (config, keyring) = get_config_and_keyring() if config: # tmp config file @@ -4166,16 +3622,14 @@ def command_ceph_volume(ctx): mounts[tmp_keyring.name] = '/var/lib/ceph/bootstrap-osd/ceph.keyring:z' c = CephContainer( - ctx, - image=ctx.image, + image=args.image, entrypoint='/usr/sbin/ceph-volume', - envs=ctx.env, - args=ctx.command, + envs=args.env, + args=args.command, privileged=True, volume_mounts=mounts, ) - verbosity = CallVerbosity.VERBOSE if ctx.log_output else CallVerbosity.VERBOSE_ON_FAILURE - out, err, code = call_throws(ctx, c.run_cmd(), verbosity=verbosity) + out, err, code = call_throws(c.run_cmd(), verbosity=CallVerbosity.VERBOSE) if not code: print(out) @@ -4183,16 +3637,16 @@ def command_ceph_volume(ctx): @infer_fsid -def command_unit(ctx): - # type: (CephadmContext) -> None - if not ctx.fsid: +def command_unit(): + # type: () -> None + if not args.fsid: raise Error('must pass --fsid to specify cluster') - unit_name = get_unit_name_by_daemon_name(ctx, ctx.fsid, ctx.name) + unit_name = get_unit_name_by_daemon_name(args.fsid, args.name) - call_throws(ctx, [ + call_throws([ 'systemctl', - ctx.command, + args.command, unit_name], verbosity=CallVerbosity.VERBOSE, desc='' @@ -4202,17 +3656,17 @@ def command_unit(ctx): @infer_fsid -def command_logs(ctx): - # type: (CephadmContext) -> None - if not ctx.fsid: +def command_logs(): + # type: () -> None + if not args.fsid: raise Error('must pass --fsid to specify cluster') - unit_name = get_unit_name_by_daemon_name(ctx, ctx.fsid, ctx.name) + unit_name = get_unit_name_by_daemon_name(args.fsid, args.name) cmd = [find_program('journalctl')] cmd.extend(['-u', unit_name]) - if ctx.command: - cmd.extend(ctx.command) + if args.command: + cmd.extend(args.command) # call this directly, without our wrapper, so that we get an unmolested # stdout with logger prefixing. @@ -4222,8 +3676,8 @@ def command_logs(ctx): ################################## -def list_networks(ctx): - # type: (CephadmContext) -> Dict[str,List[str]] +def list_networks(): + # type: () -> Dict[str,List[str]] ## sadly, 18.04's iproute2 4.15.0-2ubun doesn't support the -j flag, ## so we'll need to use a regex to parse 'ip' command output. @@ -4231,16 +3685,13 @@ def list_networks(ctx): #j = json.loads(out) #for x in j: - res = _list_ipv4_networks(ctx) - res.update(_list_ipv6_networks(ctx)) + res = _list_ipv4_networks() + res.update(_list_ipv6_networks()) return res -def _list_ipv4_networks(ctx: CephadmContext): - execstr: Optional[str] = find_executable('ip') - if not execstr: - raise FileNotFoundError("unable to find 'ip' command") - out, _, _ = call_throws(ctx, [execstr, 'route', 'ls']) +def _list_ipv4_networks(): + out, _, _ = call_throws([find_executable('ip'), 'route', 'ls']) return _parse_ipv4_route(out) @@ -4259,12 +3710,9 @@ def _parse_ipv4_route(out): return r -def _list_ipv6_networks(ctx: CephadmContext): - execstr: Optional[str] = find_executable('ip') - if not execstr: - raise FileNotFoundError("unable to find 'ip' command") - routes, _, _ = call_throws(ctx, [execstr, '-6', 'route', 'ls']) - ips, _, _ = call_throws(ctx, [execstr, '-6', 'addr', 'ls']) +def _list_ipv6_networks(): + routes, _, _ = call_throws([find_executable('ip'), '-6', 'route', 'ls']) + ips, _, _ = call_throws([find_executable('ip'), '-6', 'addr', 'ls']) return _parse_ipv6_route(routes, ips) @@ -4294,28 +3742,28 @@ def _parse_ipv6_route(routes, ips): return r -def command_list_networks(ctx): - # type: (CephadmContext) -> None - r = list_networks(ctx) +def command_list_networks(): + # type: () -> None + r = list_networks() print(json.dumps(r, indent=4)) ################################## -def command_ls(ctx): - # type: (CephadmContext) -> None - ls = list_daemons(ctx, detail=not ctx.no_detail, - legacy_dir=ctx.legacy_dir) +def command_ls(): + # type: () -> None + + ls = list_daemons(detail=not args.no_detail, + legacy_dir=args.legacy_dir) print(json.dumps(ls, indent=4)) -def list_daemons(ctx, detail=True, legacy_dir=None): - # type: (CephadmContext, bool, Optional[str]) -> List[Dict[str, str]] - host_version: Optional[str] = None +def list_daemons(detail=True, legacy_dir=None): + # type: (bool, Optional[str]) -> List[Dict[str, str]] + host_version = None ls = [] - container_path = ctx.container_path - data_dir = ctx.data_dir + data_dir = args.data_dir if legacy_dir is not None: data_dir = os.path.abspath(legacy_dir + data_dir) @@ -4332,32 +3780,30 @@ def list_daemons(ctx, detail=True, legacy_dir=None): continue (cluster, daemon_id) = j.split('-', 1) fsid = get_legacy_daemon_fsid( - ctx, cluster, daemon_type, daemon_id, legacy_dir=legacy_dir) legacy_unit_name = 'ceph-%s@%s' % (daemon_type, daemon_id) - val: Dict[str, Any] = { + i = { 'style': 'legacy', 'name': '%s.%s' % (daemon_type, daemon_id), 'fsid': fsid if fsid is not None else 'unknown', 'systemd_unit': legacy_unit_name, } if detail: - (val['enabled'], val['state'], _) = \ - check_unit(ctx, legacy_unit_name) + (i['enabled'], i['state'], _) = check_unit(legacy_unit_name) if not host_version: try: - out, err, code = call(ctx, ['ceph', '-v']) + out, err, code = call(['ceph', '-v']) if not code and out.startswith('ceph version '): host_version = out.split(' ')[2] except Exception: pass - val['host_version'] = host_version - ls.append(val) + i['host_version'] = host_version + ls.append(i) elif is_fsid(i): fsid = str(i) # convince mypy that fsid is a str here for j in os.listdir(os.path.join(data_dir, i)): - if '.' in j and os.path.isdir(os.path.join(data_dir, fsid, j)): + if '.' in j: name = j (daemon_type, daemon_id) = j.split('.', 1) unit_name = get_unit_name(fsid, @@ -4365,7 +3811,7 @@ def list_daemons(ctx, detail=True, legacy_dir=None): daemon_id) else: continue - val = { + i = { 'style': 'cephadm:v1', 'name': name, 'fsid': fsid, @@ -4373,21 +3819,19 @@ def list_daemons(ctx, detail=True, legacy_dir=None): } if detail: # get container id - (val['enabled'], val['state'], _) = \ - check_unit(ctx, unit_name) + (i['enabled'], i['state'], _) = check_unit(unit_name) container_id = None image_name = None image_id = None version = None start_stamp = None - if 'podman' in container_path and \ - get_podman_version(ctx, container_path) < (1, 6, 2): + if 'podman' in container_path and get_podman_version() < (1, 6, 2): image_field = '.ImageID' else: image_field = '.Image' - out, err, code = call(ctx, + out, err, code = call( [ container_path, 'inspect', '--format', '{{.Id}},{{.Config.Image}},{{%s}},{{.Created}},{{index .Config.Labels "io.ceph.version"}}' % image_field, @@ -4403,12 +3847,12 @@ def list_daemons(ctx, detail=True, legacy_dir=None): if not version or '.' not in version: version = seen_versions.get(image_id, None) if daemon_type == NFSGanesha.daemon_type: - version = NFSGanesha.get_version(ctx,container_id) + version = NFSGanesha.get_version(container_id) if daemon_type == CephIscsi.daemon_type: - version = CephIscsi.get_version(ctx,container_id) + version = CephIscsi.get_version(container_id) elif not version: if daemon_type in Ceph.daemons: - out, err, code = call(ctx, + out, err, code = call( [container_path, 'exec', container_id, 'ceph', '-v']) if not code and \ @@ -4416,7 +3860,7 @@ def list_daemons(ctx, detail=True, legacy_dir=None): version = out.split(' ')[2] seen_versions[image_id] = version elif daemon_type == 'grafana': - out, err, code = call(ctx, + out, err, code = call( [container_path, 'exec', container_id, 'grafana-server', '-v']) if not code and \ @@ -4427,29 +3871,13 @@ def list_daemons(ctx, detail=True, legacy_dir=None): 'alertmanager', 'node-exporter']: cmd = daemon_type.replace('-', '_') - out, err, code = call(ctx, + out, err, code = call( [container_path, 'exec', container_id, cmd, '--version']) if not code and \ err.startswith('%s, version ' % cmd): version = err.split(' ')[2] seen_versions[image_id] = version - elif daemon_type == 'haproxy': - out, err, code = call(ctx, - [container_path, 'exec', container_id, - 'haproxy', '-v']) - if not code and \ - out.startswith('HA-Proxy version '): - version = out.split(' ')[2] - seen_versions[image_id] = version - elif daemon_type == 'keepalived': - out, err, code = call(ctx, - [container_path, 'exec', container_id, - 'keepalived', '--version']) - if not code and \ - err.startswith('Keepalived '): - version = err.split(' ')[1] - seen_versions[image_id] = version elif daemon_type == CustomContainer.daemon_type: # Because a custom container can contain # everything, we do not know which command @@ -4464,28 +3892,28 @@ def list_daemons(ctx, detail=True, legacy_dir=None): image_name = f.read().strip() or None except IOError: pass - val['container_id'] = container_id - val['container_image_name'] = image_name - val['container_image_id'] = image_id - val['version'] = version - val['started'] = start_stamp - val['created'] = get_file_timestamp( + i['container_id'] = container_id + i['container_image_name'] = image_name + i['container_image_id'] = image_id + i['version'] = version + i['started'] = start_stamp + i['created'] = get_file_timestamp( os.path.join(data_dir, fsid, j, 'unit.created') ) - val['deployed'] = get_file_timestamp( + i['deployed'] = get_file_timestamp( os.path.join(data_dir, fsid, j, 'unit.image')) - val['configured'] = get_file_timestamp( + i['configured'] = get_file_timestamp( os.path.join(data_dir, fsid, j, 'unit.configured')) - ls.append(val) + ls.append(i) return ls -def get_daemon_description(ctx, fsid, name, detail=False, legacy_dir=None): - # type: (CephadmContext, str, str, bool, Optional[str]) -> Dict[str, str] +def get_daemon_description(fsid, name, detail=False, legacy_dir=None): + # type: (str, str, bool, Optional[str]) -> Dict[str, str] - for d in list_daemons(ctx, detail=detail, legacy_dir=legacy_dir): + for d in list_daemons(detail=detail, legacy_dir=legacy_dir): if d['fsid'] != fsid: continue if d['name'] != name: @@ -4497,48 +3925,46 @@ def get_daemon_description(ctx, fsid, name, detail=False, legacy_dir=None): ################################## @default_image -def command_adopt(ctx): - # type: (CephadmContext) -> None +def command_adopt(): + # type: () -> None - if not ctx.skip_pull: - _pull_image(ctx, ctx.image) + if not args.skip_pull: + _pull_image(args.image) - (daemon_type, daemon_id) = ctx.name.split('.', 1) + (daemon_type, daemon_id) = args.name.split('.', 1) # legacy check - if ctx.style != 'legacy': - raise Error('adoption of style %s not implemented' % ctx.style) + if args.style != 'legacy': + raise Error('adoption of style %s not implemented' % args.style) # lock - fsid = get_legacy_daemon_fsid(ctx, - ctx.cluster, + fsid = get_legacy_daemon_fsid(args.cluster, daemon_type, daemon_id, - legacy_dir=ctx.legacy_dir) + legacy_dir=args.legacy_dir) if not fsid: raise Error('could not detect legacy fsid; set fsid in ceph.conf') - l = FileLock(ctx, fsid) + l = FileLock(fsid) l.acquire() # call correct adoption if daemon_type in Ceph.daemons: - command_adopt_ceph(ctx, daemon_type, daemon_id, fsid); + command_adopt_ceph(daemon_type, daemon_id, fsid); elif daemon_type == 'prometheus': - command_adopt_prometheus(ctx, daemon_id, fsid) + command_adopt_prometheus(daemon_id, fsid) elif daemon_type == 'grafana': - command_adopt_grafana(ctx, daemon_id, fsid) + command_adopt_grafana(daemon_id, fsid) elif daemon_type == 'node-exporter': raise Error('adoption of node-exporter not implemented') elif daemon_type == 'alertmanager': - command_adopt_alertmanager(ctx, daemon_id, fsid) + command_adopt_alertmanager(daemon_id, fsid) else: raise Error('daemon type %s not recognized' % daemon_type) class AdoptOsd(object): - def __init__(self, ctx, osd_data_dir, osd_id): - # type: (CephadmContext, str, str) -> None - self.ctx = ctx + def __init__(self, osd_data_dir, osd_id): + # type: (str, str) -> None self.osd_data_dir = osd_data_dir self.osd_id = osd_id @@ -4564,16 +3990,16 @@ class AdoptOsd(object): def check_offline_lvm_osd(self): # type: () -> Tuple[Optional[str], Optional[str]] + osd_fsid, osd_type = None, None c = CephContainer( - self.ctx, - image=self.ctx.image, + image=args.image, entrypoint='/usr/sbin/ceph-volume', args=['lvm', 'list', '--format=json'], privileged=True ) - out, err, code = call_throws(self.ctx, c.run_cmd()) + out, err, code = call_throws(c.run_cmd()) if not code: try: js = json.loads(out) @@ -4594,6 +4020,7 @@ class AdoptOsd(object): def check_offline_simple_osd(self): # type: () -> Tuple[Optional[str], Optional[str]] + osd_fsid, osd_type = None, None osd_file = glob("/etc/ceph/osd/{}-[a-f0-9-]*.json".format(self.osd_id)) @@ -4607,21 +4034,21 @@ class AdoptOsd(object): if osd_type != "filestore": # need this to be mounted for the adopt to work, as it # needs to move files from this directory - call_throws(self.ctx, ['mount', js["data"]["path"], self.osd_data_dir]) + call_throws(['mount', js["data"]["path"], self.osd_data_dir]) except ValueError as e: logger.info("Invalid JSON in {}: {}".format(osd_file, e)) return osd_fsid, osd_type -def command_adopt_ceph(ctx, daemon_type, daemon_id, fsid): - # type: (CephadmContext, str, str, str) -> None +def command_adopt_ceph(daemon_type, daemon_id, fsid): + # type: (str, str, str) -> None - (uid, gid) = extract_uid_gid(ctx) + (uid, gid) = extract_uid_gid() data_dir_src = ('/var/lib/ceph/%s/%s-%s' % - (daemon_type, ctx.cluster, daemon_id)) - data_dir_src = os.path.abspath(ctx.legacy_dir + data_dir_src) + (daemon_type, args.cluster, daemon_id)) + data_dir_src = os.path.abspath(args.legacy_dir + data_dir_src) if not os.path.exists(data_dir_src): raise Error("{}.{} data directory '{}' does not exist. " @@ -4630,7 +4057,7 @@ def command_adopt_ceph(ctx, daemon_type, daemon_id, fsid): osd_fsid = None if daemon_type == 'osd': - adopt_osd = AdoptOsd(ctx, data_dir_src, daemon_id) + adopt_osd = AdoptOsd(data_dir_src, daemon_id) osd_fsid, osd_type = adopt_osd.check_online_osd() if not osd_fsid: osd_fsid, osd_type = adopt_osd.check_offline_lvm_osd() @@ -4647,28 +4074,28 @@ def command_adopt_ceph(ctx, daemon_type, daemon_id, fsid): # cluster we are adopting based on the /etc/{defaults,sysconfig}/ceph # CLUSTER field. unit_name = 'ceph-%s@%s' % (daemon_type, daemon_id) - (enabled, state, _) = check_unit(ctx, unit_name) + (enabled, state, _) = check_unit(unit_name) if state == 'running': logger.info('Stopping old systemd unit %s...' % unit_name) - call_throws(ctx, ['systemctl', 'stop', unit_name]) + call_throws(['systemctl', 'stop', unit_name]) if enabled: logger.info('Disabling old systemd unit %s...' % unit_name) - call_throws(ctx, ['systemctl', 'disable', unit_name]) + call_throws(['systemctl', 'disable', unit_name]) # data logger.info('Moving data...') - data_dir_dst = make_data_dir(ctx, fsid, daemon_type, daemon_id, + data_dir_dst = make_data_dir(fsid, daemon_type, daemon_id, uid=uid, gid=gid) - move_files(ctx, glob(os.path.join(data_dir_src, '*')), + move_files(glob(os.path.join(data_dir_src, '*')), data_dir_dst, uid=uid, gid=gid) logger.debug('Remove dir \'%s\'' % (data_dir_src)) if os.path.ismount(data_dir_src): - call_throws(ctx, ['umount', data_dir_src]) + call_throws(['umount', data_dir_src]) os.rmdir(data_dir_src) logger.info('Chowning content...') - call_throws(ctx, ['chown', '-c', '-R', '%d.%d' % (uid, gid), data_dir_dst]) + call_throws(['chown', '-c', '-R', '%d.%d' % (uid, gid), data_dir_dst]) if daemon_type == 'mon': # rename *.ldb -> *.sst, in case they are coming from ubuntu @@ -4699,107 +4126,108 @@ def command_adopt_ceph(ctx, daemon_type, daemon_id, fsid): logger.info('Renaming %s -> %s', simple_fn, new_fn) os.rename(simple_fn, new_fn) logger.info('Disabling host unit ceph-volume@ simple unit...') - call(ctx, ['systemctl', 'disable', + call(['systemctl', 'disable', 'ceph-volume@simple-%s-%s.service' % (daemon_id, osd_fsid)]) else: # assume this is an 'lvm' c-v for now, but don't error # out if it's not. logger.info('Disabling host unit ceph-volume@ lvm unit...') - call(ctx, ['systemctl', 'disable', + call(['systemctl', 'disable', 'ceph-volume@lvm-%s-%s.service' % (daemon_id, osd_fsid)]) # config - config_src = '/etc/ceph/%s.conf' % (ctx.cluster) - config_src = os.path.abspath(ctx.legacy_dir + config_src) + config_src = '/etc/ceph/%s.conf' % (args.cluster) + config_src = os.path.abspath(args.legacy_dir + config_src) config_dst = os.path.join(data_dir_dst, 'config') - copy_files(ctx, [config_src], config_dst, uid=uid, gid=gid) + copy_files([config_src], config_dst, uid=uid, gid=gid) # logs logger.info('Moving logs...') log_dir_src = ('/var/log/ceph/%s-%s.%s.log*' % - (ctx.cluster, daemon_type, daemon_id)) - log_dir_src = os.path.abspath(ctx.legacy_dir + log_dir_src) - log_dir_dst = make_log_dir(ctx, fsid, uid=uid, gid=gid) - move_files(ctx, glob(log_dir_src), + (args.cluster, daemon_type, daemon_id)) + log_dir_src = os.path.abspath(args.legacy_dir + log_dir_src) + log_dir_dst = make_log_dir(fsid, uid=uid, gid=gid) + move_files(glob(log_dir_src), log_dir_dst, uid=uid, gid=gid) logger.info('Creating new units...') - make_var_run(ctx, fsid, uid, gid) - c = get_container(ctx, fsid, daemon_type, daemon_id) - deploy_daemon_units(ctx, fsid, uid, gid, daemon_type, daemon_id, c, + make_var_run(fsid, uid, gid) + c = get_container(fsid, daemon_type, daemon_id) + deploy_daemon_units(fsid, uid, gid, daemon_type, daemon_id, c, enable=True, # unconditionally enable the new unit - start=(state == 'running' or ctx.force_start), + start=(state == 'running' or args.force_start), osd_fsid=osd_fsid) - update_firewalld(ctx, daemon_type) + update_firewalld(daemon_type) + +def command_adopt_prometheus(daemon_id, fsid): + # type: (str, str) -> None -def command_adopt_prometheus(ctx, daemon_id, fsid): - # type: (CephadmContext, str, str) -> None daemon_type = 'prometheus' - (uid, gid) = extract_uid_gid_monitoring(ctx, daemon_type) + (uid, gid) = extract_uid_gid_monitoring(daemon_type) - _stop_and_disable(ctx, 'prometheus') + _stop_and_disable('prometheus') - data_dir_dst = make_data_dir(ctx, fsid, daemon_type, daemon_id, + data_dir_dst = make_data_dir(fsid, daemon_type, daemon_id, uid=uid, gid=gid) # config config_src = '/etc/prometheus/prometheus.yml' - config_src = os.path.abspath(ctx.legacy_dir + config_src) + config_src = os.path.abspath(args.legacy_dir + config_src) config_dst = os.path.join(data_dir_dst, 'etc/prometheus') makedirs(config_dst, uid, gid, 0o755) - copy_files(ctx, [config_src], config_dst, uid=uid, gid=gid) + copy_files([config_src], config_dst, uid=uid, gid=gid) # data data_src = '/var/lib/prometheus/metrics/' - data_src = os.path.abspath(ctx.legacy_dir + data_src) + data_src = os.path.abspath(args.legacy_dir + data_src) data_dst = os.path.join(data_dir_dst, 'data') - copy_tree(ctx, [data_src], data_dst, uid=uid, gid=gid) + copy_tree([data_src], data_dst, uid=uid, gid=gid) - make_var_run(ctx, fsid, uid, gid) - c = get_container(ctx, fsid, daemon_type, daemon_id) - deploy_daemon(ctx, fsid, daemon_type, daemon_id, c, uid, gid) - update_firewalld(ctx, daemon_type) + make_var_run(fsid, uid, gid) + c = get_container(fsid, daemon_type, daemon_id) + deploy_daemon(fsid, daemon_type, daemon_id, c, uid, gid) + update_firewalld(daemon_type) -def command_adopt_grafana(ctx, daemon_id, fsid): - # type: (CephadmContext, str, str) -> None +def command_adopt_grafana(daemon_id, fsid): + # type: (str, str) -> None daemon_type = 'grafana' - (uid, gid) = extract_uid_gid_monitoring(ctx, daemon_type) + (uid, gid) = extract_uid_gid_monitoring(daemon_type) - _stop_and_disable(ctx, 'grafana-server') + _stop_and_disable('grafana-server') - data_dir_dst = make_data_dir(ctx, fsid, daemon_type, daemon_id, + data_dir_dst = make_data_dir(fsid, daemon_type, daemon_id, uid=uid, gid=gid) # config config_src = '/etc/grafana/grafana.ini' - config_src = os.path.abspath(ctx.legacy_dir + config_src) + config_src = os.path.abspath(args.legacy_dir + config_src) config_dst = os.path.join(data_dir_dst, 'etc/grafana') makedirs(config_dst, uid, gid, 0o755) - copy_files(ctx, [config_src], config_dst, uid=uid, gid=gid) + copy_files([config_src], config_dst, uid=uid, gid=gid) prov_src = '/etc/grafana/provisioning/' - prov_src = os.path.abspath(ctx.legacy_dir + prov_src) + prov_src = os.path.abspath(args.legacy_dir + prov_src) prov_dst = os.path.join(data_dir_dst, 'etc/grafana') - copy_tree(ctx, [prov_src], prov_dst, uid=uid, gid=gid) + copy_tree([prov_src], prov_dst, uid=uid, gid=gid) # cert cert = '/etc/grafana/grafana.crt' key = '/etc/grafana/grafana.key' if os.path.exists(cert) and os.path.exists(key): cert_src = '/etc/grafana/grafana.crt' - cert_src = os.path.abspath(ctx.legacy_dir + cert_src) + cert_src = os.path.abspath(args.legacy_dir + cert_src) makedirs(os.path.join(data_dir_dst, 'etc/grafana/certs'), uid, gid, 0o755) cert_dst = os.path.join(data_dir_dst, 'etc/grafana/certs/cert_file') - copy_files(ctx, [cert_src], cert_dst, uid=uid, gid=gid) + copy_files([cert_src], cert_dst, uid=uid, gid=gid) key_src = '/etc/grafana/grafana.key' - key_src = os.path.abspath(ctx.legacy_dir + key_src) + key_src = os.path.abspath(args.legacy_dir + key_src) key_dst = os.path.join(data_dir_dst, 'etc/grafana/certs/cert_key') - copy_files(ctx, [key_src], key_dst, uid=uid, gid=gid) + copy_files([key_src], key_dst, uid=uid, gid=gid) _adjust_grafana_ini(os.path.join(config_dst, 'grafana.ini')) else: @@ -4807,44 +4235,44 @@ def command_adopt_grafana(ctx, daemon_id, fsid): # data - possible custom dashboards/plugins data_src = '/var/lib/grafana/' - data_src = os.path.abspath(ctx.legacy_dir + data_src) + data_src = os.path.abspath(args.legacy_dir + data_src) data_dst = os.path.join(data_dir_dst, 'data') - copy_tree(ctx, [data_src], data_dst, uid=uid, gid=gid) + copy_tree([data_src], data_dst, uid=uid, gid=gid) - make_var_run(ctx, fsid, uid, gid) - c = get_container(ctx, fsid, daemon_type, daemon_id) - deploy_daemon(ctx, fsid, daemon_type, daemon_id, c, uid, gid) - update_firewalld(ctx, daemon_type) + make_var_run(fsid, uid, gid) + c = get_container(fsid, daemon_type, daemon_id) + deploy_daemon(fsid, daemon_type, daemon_id, c, uid, gid) + update_firewalld(daemon_type) -def command_adopt_alertmanager(ctx, daemon_id, fsid): - # type: (CephadmContext, str, str) -> None +def command_adopt_alertmanager(daemon_id, fsid): + # type: (str, str) -> None daemon_type = 'alertmanager' - (uid, gid) = extract_uid_gid_monitoring(ctx, daemon_type) + (uid, gid) = extract_uid_gid_monitoring(daemon_type) - _stop_and_disable(ctx, 'prometheus-alertmanager') + _stop_and_disable('prometheus-alertmanager') - data_dir_dst = make_data_dir(ctx, fsid, daemon_type, daemon_id, + data_dir_dst = make_data_dir(fsid, daemon_type, daemon_id, uid=uid, gid=gid) # config config_src = '/etc/prometheus/alertmanager.yml' - config_src = os.path.abspath(ctx.legacy_dir + config_src) + config_src = os.path.abspath(args.legacy_dir + config_src) config_dst = os.path.join(data_dir_dst, 'etc/alertmanager') makedirs(config_dst, uid, gid, 0o755) - copy_files(ctx, [config_src], config_dst, uid=uid, gid=gid) + copy_files([config_src], config_dst, uid=uid, gid=gid) # data data_src = '/var/lib/prometheus/alertmanager/' - data_src = os.path.abspath(ctx.legacy_dir + data_src) + data_src = os.path.abspath(args.legacy_dir + data_src) data_dst = os.path.join(data_dir_dst, 'etc/alertmanager/data') - copy_tree(ctx, [data_src], data_dst, uid=uid, gid=gid) + copy_tree([data_src], data_dst, uid=uid, gid=gid) - make_var_run(ctx, fsid, uid, gid) - c = get_container(ctx, fsid, daemon_type, daemon_id) - deploy_daemon(ctx, fsid, daemon_type, daemon_id, c, uid, gid) - update_firewalld(ctx, daemon_type) + make_var_run(fsid, uid, gid) + c = get_container(fsid, daemon_type, daemon_id) + deploy_daemon(fsid, daemon_type, daemon_id, c, uid, gid) + update_firewalld(daemon_type) def _adjust_grafana_ini(filename): @@ -4873,42 +4301,44 @@ def _adjust_grafana_ini(filename): raise Error("Cannot update {}: {}".format(filename, err)) -def _stop_and_disable(ctx, unit_name): - # type: (CephadmContext, str) -> None +def _stop_and_disable(unit_name): + # type: (str) -> None - (enabled, state, _) = check_unit(ctx, unit_name) + (enabled, state, _) = check_unit(unit_name) if state == 'running': logger.info('Stopping old systemd unit %s...' % unit_name) - call_throws(ctx, ['systemctl', 'stop', unit_name]) + call_throws(['systemctl', 'stop', unit_name]) if enabled: logger.info('Disabling old systemd unit %s...' % unit_name) - call_throws(ctx, ['systemctl', 'disable', unit_name]) + call_throws(['systemctl', 'disable', unit_name]) ################################## -def command_rm_daemon(ctx): - # type: (CephadmContext) -> None - l = FileLock(ctx, ctx.fsid) +def command_rm_daemon(): + # type: () -> None + + l = FileLock(args.fsid) l.acquire() - (daemon_type, daemon_id) = ctx.name.split('.', 1) - unit_name = get_unit_name_by_daemon_name(ctx, ctx.fsid, ctx.name) - if daemon_type in ['mon', 'osd'] and not ctx.force: + unit_name = get_unit_name_by_daemon_name(args.fsid, args.name) + + (daemon_type, daemon_id) = args.name.split('.', 1) + if daemon_type in ['mon', 'osd'] and not args.force: raise Error('must pass --force to proceed: ' 'this command may destroy precious data!') - call(ctx, ['systemctl', 'stop', unit_name], + call(['systemctl', 'stop', unit_name], verbosity=CallVerbosity.DEBUG) - call(ctx, ['systemctl', 'reset-failed', unit_name], + call(['systemctl', 'reset-failed', unit_name], verbosity=CallVerbosity.DEBUG) - call(ctx, ['systemctl', 'disable', unit_name], + call(['systemctl', 'disable', unit_name], verbosity=CallVerbosity.DEBUG) - data_dir = get_data_dir(ctx.fsid, ctx.data_dir, daemon_type, daemon_id) + data_dir = get_data_dir(args.fsid, daemon_type, daemon_id) if daemon_type in ['mon', 'osd', 'prometheus'] and \ - not ctx.force_delete_data: + not args.force_delete_data: # rename it out of the way -- do not delete - backup_dir = os.path.join(ctx.data_dir, ctx.fsid, 'removed') + backup_dir = os.path.join(args.data_dir, args.fsid, 'removed') if not os.path.exists(backup_dir): makedirs(backup_dir, 0, 0, DATA_DIR_MODE) dirname = '%s.%s_%s' % (daemon_type, daemon_id, @@ -4916,65 +4346,63 @@ def command_rm_daemon(ctx): os.rename(data_dir, os.path.join(backup_dir, dirname)) else: - if daemon_type == CephadmDaemon.daemon_type: - CephadmDaemon.uninstall(ctx, ctx.fsid, daemon_type, daemon_id) - call_throws(ctx, ['rm', '-rf', data_dir]) + call_throws(['rm', '-rf', data_dir]) ################################## -def command_rm_cluster(ctx): - # type: (CephadmContext) -> None - if not ctx.force: +def command_rm_cluster(): + # type: () -> None + if not args.force: raise Error('must pass --force to proceed: ' 'this command may destroy precious data!') - l = FileLock(ctx, ctx.fsid) + l = FileLock(args.fsid) l.acquire() # stop + disable individual daemon units - for d in list_daemons(ctx, detail=False): - if d['fsid'] != ctx.fsid: + for d in list_daemons(detail=False): + if d['fsid'] != args.fsid: continue if d['style'] != 'cephadm:v1': continue - unit_name = get_unit_name(ctx.fsid, d['name']) - call(ctx, ['systemctl', 'stop', unit_name], + unit_name = get_unit_name(args.fsid, d['name']) + call(['systemctl', 'stop', unit_name], verbosity=CallVerbosity.DEBUG) - call(ctx, ['systemctl', 'reset-failed', unit_name], + call(['systemctl', 'reset-failed', unit_name], verbosity=CallVerbosity.DEBUG) - call(ctx, ['systemctl', 'disable', unit_name], + call(['systemctl', 'disable', unit_name], verbosity=CallVerbosity.DEBUG) # cluster units - for unit_name in ['ceph-%s.target' % ctx.fsid]: - call(ctx, ['systemctl', 'stop', unit_name], + for unit_name in ['ceph-%s.target' % args.fsid]: + call(['systemctl', 'stop', unit_name], verbosity=CallVerbosity.DEBUG) - call(ctx, ['systemctl', 'reset-failed', unit_name], + call(['systemctl', 'reset-failed', unit_name], verbosity=CallVerbosity.DEBUG) - call(ctx, ['systemctl', 'disable', unit_name], + call(['systemctl', 'disable', unit_name], verbosity=CallVerbosity.DEBUG) - slice_name = 'system-%s.slice' % (('ceph-%s' % ctx.fsid).replace('-', + slice_name = 'system-%s.slice' % (('ceph-%s' % args.fsid).replace('-', '\\x2d')) - call(ctx, ['systemctl', 'stop', slice_name], + call(['systemctl', 'stop', slice_name], verbosity=CallVerbosity.DEBUG) # rm units - call_throws(ctx, ['rm', '-f', ctx.unit_dir + - '/ceph-%s@.service' % ctx.fsid]) - call_throws(ctx, ['rm', '-f', ctx.unit_dir + - '/ceph-%s.target' % ctx.fsid]) - call_throws(ctx, ['rm', '-rf', - ctx.unit_dir + '/ceph-%s.target.wants' % ctx.fsid]) + call_throws(['rm', '-f', args.unit_dir + + '/ceph-%s@.service' % args.fsid]) + call_throws(['rm', '-f', args.unit_dir + + '/ceph-%s.target' % args.fsid]) + call_throws(['rm', '-rf', + args.unit_dir + '/ceph-%s.target.wants' % args.fsid]) # rm data - call_throws(ctx, ['rm', '-rf', ctx.data_dir + '/' + ctx.fsid]) + call_throws(['rm', '-rf', args.data_dir + '/' + args.fsid]) # rm logs - call_throws(ctx, ['rm', '-rf', ctx.log_dir + '/' + ctx.fsid]) - call_throws(ctx, ['rm', '-rf', ctx.log_dir + - '/*.wants/ceph-%s@*' % ctx.fsid]) + call_throws(['rm', '-rf', args.log_dir + '/' + args.fsid]) + call_throws(['rm', '-rf', args.log_dir + + '/*.wants/ceph-%s@*' % args.fsid]) # rm logrotate config - call_throws(ctx, ['rm', '-f', ctx.logrotate_dir + '/ceph-%s' % ctx.fsid]) + call_throws(['rm', '-f', args.logrotate_dir + '/ceph-%s' % args.fsid]) # clean up config, keyring, and pub key files files = ['/etc/ceph/ceph.conf', '/etc/ceph/ceph.pub', '/etc/ceph/ceph.client.admin.keyring'] @@ -4982,7 +4410,7 @@ def command_rm_cluster(ctx): if os.path.exists(files[0]): valid_fsid = False with open(files[0]) as f: - if ctx.fsid in f.read(): + if args.fsid in f.read(): valid_fsid = True if valid_fsid: for n in range(0, len(files)): @@ -4992,8 +4420,8 @@ def command_rm_cluster(ctx): ################################## -def check_time_sync(ctx, enabler=None): - # type: (CephadmContext, Optional[Packager]) -> bool +def check_time_sync(enabler=None): + # type: (Optional[Packager]) -> bool units = [ 'chrony.service', # 18.04 (at least) 'chronyd.service', # el / opensuse @@ -5002,19 +4430,20 @@ def check_time_sync(ctx, enabler=None): 'ntp.service', # 18.04 (at least) 'ntpsec.service', # 20.04 (at least) / buster ] - if not check_units(ctx, units, enabler): + if not check_units(units, enabler): logger.warning('No time sync service is running; checked for %s' % units) return False return True -def command_check_host(ctx: CephadmContext) -> None: - container_path = ctx.container_path +def command_check_host(): + # type: () -> None + global container_path errors = [] commands = ['systemctl', 'lvcreate'] - if ctx.docker: + if args.docker: container_path = find_program('docker') else: for i in CONTAINER_PREFERENCE: @@ -5024,7 +4453,7 @@ def command_check_host(ctx: CephadmContext) -> None: except Exception as e: logger.debug('Could not locate %s: %s' % (i, e)) if not container_path: - errors.append('ERROR: Unable to locate a supported container engine ({})'.format(' or '.join(CONTAINER_PREFERENCE))) + errors.append('Unable to locate any of %s' % CONTAINER_PREFERENCE) else: logger.info('podman|docker (%s) is present' % container_path) @@ -5033,18 +4462,18 @@ def command_check_host(ctx: CephadmContext) -> None: find_program(command) logger.info('%s is present' % command) except ValueError: - errors.append('ERROR: %s binary does not appear to be installed' % command) + errors.append('%s binary does not appear to be installed' % command) # check for configured+running chronyd or ntp - if not check_time_sync(ctx): - errors.append('ERROR: No time synchronization is active') + if not check_time_sync(): + errors.append('No time synchronization is active') - if ctx.has("expect_hostname") and ctx.expect_hostname: - if get_hostname().lower() != ctx.expect_hostname.lower(): - errors.append('ERROR: hostname "%s" does not match expected hostname "%s"' % ( - get_hostname(), ctx.expect_hostname)) + if 'expect_hostname' in args and args.expect_hostname: + if get_hostname().lower() != args.expect_hostname.lower(): + errors.append('hostname "%s" does not match expected hostname "%s"' % ( + get_hostname(), args.expect_hostname)) logger.info('Hostname "%s" matches what is expected.', - ctx.expect_hostname) + args.expect_hostname) if errors: raise Error('\n'.join(errors)) @@ -5054,39 +4483,38 @@ def command_check_host(ctx: CephadmContext) -> None: ################################## -def command_prepare_host(ctx: CephadmContext) -> None: - container_path = ctx.container_path - +def command_prepare_host(): + # type: () -> None logger.info('Verifying podman|docker is present...') pkg = None if not container_path: if not pkg: - pkg = create_packager(ctx) + pkg = create_packager() pkg.install_podman() logger.info('Verifying lvm2 is present...') if not find_executable('lvcreate'): if not pkg: - pkg = create_packager(ctx) + pkg = create_packager() pkg.install(['lvm2']) logger.info('Verifying time synchronization is in place...') - if not check_time_sync(ctx): + if not check_time_sync(): if not pkg: - pkg = create_packager(ctx) + pkg = create_packager() pkg.install(['chrony']) # check again, and this time try to enable # the service - check_time_sync(ctx, enabler=pkg) + check_time_sync(enabler=pkg) - if ctx.has("expect_hostname") and ctx.expect_hostname and ctx.expect_hostname != get_hostname(): - logger.warning('Adjusting hostname from %s -> %s...' % (get_hostname(), ctx.expect_hostname)) - call_throws(ctx, ['hostname', ctx.expect_hostname]) + if 'expect_hostname' in args and args.expect_hostname and args.expect_hostname != get_hostname(): + logger.warning('Adjusting hostname from %s -> %s...' % (get_hostname(), args.expect_hostname)) + call_throws(['hostname', args.expect_hostname]) with open('/etc/hostname', 'w') as f: - f.write(ctx.expect_hostname + '\n') + f.write(args.expect_hostname + '\n') logger.info('Repeating the final host check...') - command_check_host(ctx) + command_check_host() ################################## @@ -5110,16 +4538,6 @@ class CustomValidation(argparse.Action): if self.dest == "name": self._check_name(values) setattr(namespace, self.dest, values) - elif self.dest == 'exporter_config': - cfg = get_parm(values) - # run the class' validate method, and convert to an argparse error - # if problems are found - try: - CephadmDaemon.validate_config(cfg) - except Error as e: - raise argparse.ArgumentError(self, - str(e)) - setattr(namespace, self.dest, cfg) ################################## @@ -5147,14 +4565,12 @@ def get_distro(): class Packager(object): - def __init__(self, ctx: CephadmContext, - stable=None, version=None, branch=None, commit=None): + def __init__(self, stable=None, version=None, branch=None, commit=None): assert \ (stable and not version and not branch and not commit) or \ (not stable and version and not branch and not commit) or \ (not stable and not version and branch) or \ (not stable and not version and not branch and not commit) - self.ctx = ctx self.stable = stable self.version = version self.branch = branch @@ -5181,7 +4597,6 @@ class Packager(object): except HTTPError as err: logger.error('repository not found in shaman (might not be available yet)') raise Error('%s, failed to fetch %s' % (err, shaman_url)) - chacra_url = '' try: chacra_url = shaman_response.geturl() chacra_response = urlopen(chacra_url) @@ -5191,8 +4606,8 @@ class Packager(object): return chacra_response.read().decode('utf-8') def repo_gpgkey(self): - if self.ctx.gpg_url: - return self.ctx.gpg_url + if args.gpg_url: + return args.gpg_url if self.stable or self.version: return 'https://download.ceph.com/keys/release.asc', 'release' else: @@ -5202,7 +4617,7 @@ class Packager(object): """ Start and enable the service (typically using systemd). """ - call_throws(self.ctx, ['systemctl', 'enable', '--now', service]) + call_throws(['systemctl', 'enable', '--now', service]) class Apt(Packager): @@ -5211,12 +4626,10 @@ class Apt(Packager): 'debian': 'debian', } - def __init__(self, ctx: CephadmContext, - stable, version, branch, commit, + def __init__(self, stable, version, branch, commit, distro, distro_version, distro_codename): - super(Apt, self).__init__(ctx, stable=stable, version=version, + super(Apt, self).__init__(stable=stable, version=version, branch=branch, commit=commit) - self.ctx = ctx self.distro = self.DISTRO_NAMES[distro] self.distro_codename = distro_codename self.distro_version = distro_version @@ -5225,7 +4638,6 @@ class Apt(Packager): return '/etc/apt/sources.list.d/ceph.list' def add_repo(self): - url, name = self.repo_gpgkey() logger.info('Installing repo GPG key from %s...' % url) try: @@ -5240,10 +4652,10 @@ class Apt(Packager): if self.version: content = 'deb %s/debian-%s/ %s main\n' % ( - self.ctx.repo_url, self.version, self.distro_codename) + args.repo_url, self.version, self.distro_codename) elif self.stable: content = 'deb %s/debian-%s/ %s main\n' % ( - self.ctx.repo_url, self.stable, self.distro_codename) + args.repo_url, self.stable, self.distro_codename) else: content = self.query_shaman(self.distro, self.distro_codename, self.branch, self.commit) @@ -5267,13 +4679,13 @@ class Apt(Packager): def install(self, ls): logger.info('Installing packages %s...' % ls) - call_throws(self.ctx, ['apt', 'install', '-y'] + ls) + call_throws(['apt-get', 'install', '-y'] + ls) def install_podman(self): if self.distro == 'ubuntu': logger.info('Setting up repo for podman...') self.add_kubic_repo() - call_throws(self.ctx, ['apt', 'update']) + call_throws(['apt-get', 'update']) logger.info('Attempting podman install...') try: @@ -5307,7 +4719,7 @@ class Apt(Packager): key = response.read().decode('utf-8') tmp_key = write_tmp(key, 0, 0) keyring = self.kubric_repo_gpgkey_path() - call_throws(self.ctx, ['apt-key', '--keyring', keyring, 'add', tmp_key.name]) + call_throws(['apt-key', '--keyring', keyring, 'add', tmp_key.name]) logger.info('Installing repo file at %s...' % self.kubic_repo_path()) content = 'deb %s /\n' % self.kubic_repo_url() @@ -5334,12 +4746,10 @@ class YumDnf(Packager): 'fedora': ('fedora', 'fc'), } - def __init__(self, ctx: CephadmContext, - stable, version, branch, commit, + def __init__(self, stable, version, branch, commit, distro, distro_version): - super(YumDnf, self).__init__(ctx, stable=stable, version=version, + super(YumDnf, self).__init__(stable=stable, version=version, branch=branch, commit=commit) - self.ctx = ctx self.major = int(distro_version.split('.')[0]) self.distro_normalized = self.DISTRO_NAMES[distro][0] self.distro_code = self.DISTRO_NAMES[distro][1] + str(self.major) @@ -5411,10 +4821,10 @@ class YumDnf(Packager): def repo_baseurl(self): assert self.stable or self.version if self.version: - return '%s/rpm-%s/%s' % (self.ctx.repo_url, self.version, + return '%s/rpm-%s/%s' % (args.repo_url, self.version, self.distro_code) else: - return '%s/rpm-%s/%s' % (self.ctx.repo_url, self.stable, + return '%s/rpm-%s/%s' % (args.repo_url, self.stable, self.distro_code) def add_repo(self): @@ -5444,7 +4854,7 @@ class YumDnf(Packager): if self.distro_code.startswith('el'): logger.info('Enabling EPEL...') - call_throws(self.ctx, [self.tool, 'install', '-y', 'epel-release']) + call_throws([self.tool, 'install', '-y', 'epel-release']) def rm_repo(self): if os.path.exists(self.repo_path()): @@ -5452,7 +4862,7 @@ class YumDnf(Packager): def install(self, ls): logger.info('Installing packages %s...' % ls) - call_throws(self.ctx, [self.tool, 'install', '-y'] + ls) + call_throws([self.tool, 'install', '-y'] + ls) def install_podman(self): self.install(['podman']) @@ -5465,12 +4875,10 @@ class Zypper(Packager): 'opensuse-leap' ] - def __init__(self, ctx: CephadmContext, - stable, version, branch, commit, + def __init__(self, stable, version, branch, commit, distro, distro_version): - super(Zypper, self).__init__(ctx, stable=stable, version=version, + super(Zypper, self).__init__(stable=stable, version=version, branch=branch, commit=commit) - self.ctx = ctx self.tool = 'zypper' self.distro = 'opensuse' self.distro_version = '15.1' @@ -5512,11 +4920,9 @@ class Zypper(Packager): def repo_baseurl(self): assert self.stable or self.version if self.version: - return '%s/rpm-%s/%s' % (self.ctx.repo_url, - self.stable, self.distro) + return '%s/rpm-%s/%s' % (args.repo_url, self.stable, self.distro) else: - return '%s/rpm-%s/%s' % (self.ctx.repo_url, - self.stable, self.distro) + return '%s/rpm-%s/%s' % (args.repo_url, self.stable, self.distro) def add_repo(self): if self.stable or self.version: @@ -5549,57 +4955,56 @@ class Zypper(Packager): def install(self, ls): logger.info('Installing packages %s...' % ls) - call_throws(self.ctx, [self.tool, 'in', '-y'] + ls) + call_throws([self.tool, 'in', '-y'] + ls) def install_podman(self): self.install(['podman']) -def create_packager(ctx: CephadmContext, - stable=None, version=None, branch=None, commit=None): +def create_packager(stable=None, version=None, branch=None, commit=None): distro, distro_version, distro_codename = get_distro() if distro in YumDnf.DISTRO_NAMES: - return YumDnf(ctx, stable=stable, version=version, + return YumDnf(stable=stable, version=version, branch=branch, commit=commit, distro=distro, distro_version=distro_version) elif distro in Apt.DISTRO_NAMES: - return Apt(ctx, stable=stable, version=version, + return Apt(stable=stable, version=version, branch=branch, commit=commit, distro=distro, distro_version=distro_version, distro_codename=distro_codename) elif distro in Zypper.DISTRO_NAMES: - return Zypper(ctx, stable=stable, version=version, + return Zypper(stable=stable, version=version, branch=branch, commit=commit, distro=distro, distro_version=distro_version) raise Error('Distro %s version %s not supported' % (distro, distro_version)) -def command_add_repo(ctx: CephadmContext): - if ctx.version and ctx.release: +def command_add_repo(): + if args.version and args.release: raise Error('you can specify either --release or --version but not both') - if not ctx.version and not ctx.release and not ctx.dev and not ctx.dev_commit: + if not args.version and not args.release and not args.dev and not args.dev_commit: raise Error('please supply a --release, --version, --dev or --dev-commit argument') - if ctx.version: + if args.version: try: - (x, y, z) = ctx.version.split('.') + (x, y, z) = args.version.split('.') except Exception as e: raise Error('version must be in the form x.y.z (e.g., 15.2.0)') - pkg = create_packager(ctx, stable=ctx.release, - version=ctx.version, - branch=ctx.dev, - commit=ctx.dev_commit) + pkg = create_packager(stable=args.release, + version=args.version, + branch=args.dev, + commit=args.dev_commit) pkg.add_repo() -def command_rm_repo(ctx: CephadmContext): - pkg = create_packager(ctx) +def command_rm_repo(): + pkg = create_packager() pkg.rm_repo() -def command_install(ctx: CephadmContext): - pkg = create_packager(ctx) - pkg.install(ctx.packages) +def command_install(): + pkg = create_packager() + pkg.install(args.packages) ################################## @@ -5707,19 +5112,18 @@ class HostFacts(): "0x1af4": "Virtio Block Device" } - def __init__(self, ctx: CephadmContext): - self.ctx: CephadmContext = ctx - self.cpu_model: str = 'Unknown' - self.cpu_count: int = 0 - self.cpu_cores: int = 0 - self.cpu_threads: int = 0 - self.interfaces: Dict[str, Any] = {} + def __init__(self): + self.cpu_model = 'Unknown' + self.cpu_count = 0 + self.cpu_cores = 0 + self.cpu_threads = 0 + self.interfaces = {} - self._meminfo: List[str] = read_file(['/proc/meminfo']).splitlines() + self._meminfo = read_file(['/proc/meminfo']).splitlines() self._get_cpuinfo() self._process_nics() - self.arch: str = platform.processor() - self.kernel: str = platform.release() + self.arch = platform.processor() + self.kernel = platform.release() def _get_cpuinfo(self): # type: () -> None @@ -6057,9 +5461,9 @@ class HostFacts(): return float(up_secs) def kernel_security(self): - # type: () -> Optional[Dict[str, str]] + # type: () -> Dict[str, str] """Determine the security features enabled in the kernel - SELinux, AppArmor""" - def _fetch_selinux() -> Optional[Dict[str, str]]: + def _fetch_selinux(): """Read the selinux config file to determine state""" security = {} for selinux_path in HostFacts._selinux_path_list: @@ -6076,9 +5480,8 @@ class HostFacts(): else: security['description'] = "SELinux: Enabled({}, {})".format(security['SELINUX'], security['SELINUXTYPE']) return security - return None - def _fetch_apparmor() -> Optional[Dict[str, str]]: + def _fetch_apparmor(): """Read the apparmor profiles directly, returning an overview of AppArmor status""" security = {} for apparmor_path in HostFacts._apparmor_path_list: @@ -6103,24 +5506,19 @@ class HostFacts(): security['description'] += "({})".format(summary_str) return security - return None - ret = None if os.path.exists('/sys/kernel/security/lsm'): lsm = read_file(['/sys/kernel/security/lsm']).strip() if 'selinux' in lsm: - ret = _fetch_selinux() + return _fetch_selinux() elif 'apparmor' in lsm: - ret = _fetch_apparmor() + return _fetch_apparmor() else: return { "type": "Unknown", "description": "Linux Security Module framework is active, but is not using SELinux or AppArmor" } - if ret is not None: - return ret - return { "type": "None", "description": "Linux Security Module framework is not available" @@ -6132,7 +5530,7 @@ class HostFacts(): """Get kernel parameters required/used in Ceph clusters""" k_param = {} - out, _, _ = call_throws(self.ctx, ['sysctl', '-a'], verbosity=CallVerbosity.SILENT) + out, _, _ = call_throws(['sysctl', '-a'], verbosity=CallVerbosity.SILENT) if out: param_list = out.split('\n') param_dict = { param.split(" = ")[0]:param.split(" = ")[-1] for param in param_list} @@ -6155,759 +5553,14 @@ class HostFacts(): ################################## -def command_gather_facts(ctx: CephadmContext): +def command_gather_facts(): """gather_facts is intended to provide host releated metadata to the caller""" - host = HostFacts(ctx) + host = HostFacts() print(host.dump()) ################################## -def command_verify_prereqs(ctx: CephadmContext): - if ctx.service_type == 'haproxy' or ctx.service_type == 'keepalived': - out, err, code = call( - ctx, ['sysctl', '-n', 'net.ipv4.ip_nonlocal_bind'] - ) - if out.strip() != "1": - raise Error('net.ipv4.ip_nonlocal_bind not set to 1') - -################################## - - -class CephadmCache: - task_types = ['disks', 'daemons', 'host', 'http_server'] - - def __init__(self): - self.started_epoch_secs = time.time() - self.tasks = { - "daemons": "inactive", - "disks": "inactive", - "host": "inactive", - "http_server": "inactive", - } - self.errors = [] - self.disks = {} - self.daemons = {} - self.host = {} - self.lock = RLock() - - @property - def health(self): - return { - "started_epoch_secs": self.started_epoch_secs, - "tasks": self.tasks, - "errors": self.errors, - } - - def to_json(self): - return { - "health": self.health, - "host": self.host, - "daemons": self.daemons, - "disks": self.disks, - } - - def update_health(self, task_type, task_status, error_msg=None): - assert task_type in CephadmCache.task_types - with self.lock: - self.tasks[task_type] = task_status - if error_msg: - self.errors.append(error_msg) - - def update_task(self, task_type, content): - assert task_type in CephadmCache.task_types - assert isinstance(content, dict) - with self.lock: - current = getattr(self, task_type) - for k in content: - current[k] = content[k] - - setattr(self, task_type, current) - - -class CephadmHTTPServer(ThreadingMixIn, HTTPServer): - allow_reuse_address = True - daemon_threads = True - cephadm_cache: CephadmCache - token: str - -class CephadmDaemonHandler(BaseHTTPRequestHandler): - server: CephadmHTTPServer - api_version = 'v1' - valid_routes = [ - f'/{api_version}/metadata', - f'/{api_version}/metadata/health', - f'/{api_version}/metadata/disks', - f'/{api_version}/metadata/daemons', - f'/{api_version}/metadata/host', - ] - - class Decorators: - @classmethod - def authorize(cls, f): - """Implement a basic token check. - - The token is installed at deployment time and must be provided to - ensure we only respond to callers who know our token i.e. mgr - """ - def wrapper(self, *args, **kwargs): - auth = self.headers.get("Authorization", None) - if auth != "Bearer " + self.server.token: - self.send_error(401) - return - f(self, *args, **kwargs) - return wrapper - - def _help_page(self): - return """ - -cephadm metadata exporter - - -

cephadm metadata exporter {api_version}

- - - - - - - - - -
EndpointMethodsResponseDescription
{api_version}/metadataGETJSONReturn all metadata for the host
{api_version}/metadata/daemonsGETJSONReturn daemon and systemd states for ceph daemons (ls)
{api_version}/metadata/disksGETJSONshow disk inventory (ceph-volume)
{api_version}/metadata/healthGETJSONShow current health of the exporter sub-tasks
{api_version}/metadata/hostGETJSONShow host metadata (gather-facts)
- -""".format(api_version=CephadmDaemonHandler.api_version) - - def _fetch_root(self): - self.send_response(200) - self.send_header('Content-type', 'text/html; charset=utf-8') - self.end_headers() - self.wfile.write(self._help_page().encode('utf-8')) - - @Decorators.authorize - def do_GET(self): - """Handle *all* GET requests""" - - if self.path == '/': - # provide a html response if someone hits the root url, to document the - # available api endpoints - return self._fetch_root() - elif self.path in CephadmDaemonHandler.valid_routes: - u = self.path.split('/')[-1] - data = json.dumps({}) - status_code = 200 - - tasks = self.server.cephadm_cache.health.get('tasks', {}) - assert tasks - - # We're using the http status code to help indicate thread health - # - 200 (OK): request successful - # - 204 (No Content): access to a cache relating to a dead thread - # - 206 (Partial content): one or more theads are inactive - # - 500 (Server Error): all threads inactive - if u == 'metadata': - data = json.dumps(self.server.cephadm_cache.to_json()) - if all([tasks[task_name] == 'inactive' for task_name in tasks if task_name != 'http_server']): - # All the subtasks are dead! - status_code = 500 - elif any([tasks[task_name] == 'inactive' for task_name in tasks if task_name != 'http_server']): - status_code = 206 - - # Individual GETs against the a tasks endpoint will also return a 503 if the corresponding thread is inactive - elif u == 'daemons': - data = json.dumps(self.server.cephadm_cache.daemons) - if tasks['daemons'] == 'inactive': - status_code = 204 - elif u == 'disks': - data = json.dumps(self.server.cephadm_cache.disks) - if tasks['disks'] == 'inactive': - status_code = 204 - elif u == 'host': - data = json.dumps(self.server.cephadm_cache.host) - if tasks['host'] == 'inactive': - status_code = 204 - - # a GET against health will always return a 200, since the op is always successful - elif u == 'health': - data = json.dumps(self.server.cephadm_cache.health) - - self.send_response(status_code) - self.send_header('Content-type','application/json') - self.end_headers() - self.wfile.write(data.encode('utf-8')) - else: - # Invalid GET URL - bad_request_msg = "Valid URLs are: {}".format(', '.join(CephadmDaemonHandler.valid_routes)) - self.send_response(404, message=bad_request_msg) # reason - self.send_header('Content-type','application/json') - self.end_headers() - self.wfile.write(json.dumps({"message": bad_request_msg}).encode('utf-8')) - - def log_message(self, format, *args): - rqst = " ".join(str(a) for a in args) - logger.info(f"client:{self.address_string()} [{self.log_date_time_string()}] {rqst}") - - -class CephadmDaemon(): - - daemon_type = "cephadm-exporter" - default_port = 9443 - bin_name = 'cephadm' - key_name = "key" - crt_name = "crt" - token_name = "token" - config_requirements = [ - key_name, - crt_name, - token_name, - ] - loop_delay = 1 - thread_check_interval = 5 - - def __init__(self, ctx: CephadmContext, fsid, daemon_id=None, port=None): - self.ctx = ctx - self.fsid = fsid - self.daemon_id = daemon_id - if not port: - self.port = CephadmDaemon.default_port - else: - self.port = port - self.workers: List[Thread] = [] - self.http_server: CephadmHTTPServer - self.stop = False - self.cephadm_cache = CephadmCache() - self.errors: List[str] = [] - self.token = read_file([os.path.join(self.daemon_path, CephadmDaemon.token_name)]) - - @classmethod - def validate_config(cls, config): - reqs = ", ".join(CephadmDaemon.config_requirements) - errors = [] - - if not config or not all([k_name in config for k_name in CephadmDaemon.config_requirements]): - raise Error(f"config must contain the following fields : {reqs}") - - if not all([isinstance(config[k_name], str) for k_name in CephadmDaemon.config_requirements]): - errors.append(f"the following fields must be strings: {reqs}") - - crt = config[CephadmDaemon.crt_name] - key = config[CephadmDaemon.key_name] - token = config[CephadmDaemon.token_name] - - if not crt.startswith('-----BEGIN CERTIFICATE-----') or not crt.endswith('-----END CERTIFICATE-----\n'): - errors.append("crt field is not a valid SSL certificate") - if not key.startswith('-----BEGIN PRIVATE KEY-----') or not key.endswith('-----END PRIVATE KEY-----\n'): - errors.append("key is not a valid SSL private key") - if len(token) < 8: - errors.append("'token' must be more than 8 characters long") - - if 'port' in config: - try: - p = int(config['port']) - if p <= 1024: - raise ValueError - except (TypeError, ValueError): - errors.append("port must be an integer > 1024") - - if errors: - raise Error("Parameter errors : {}".format(", ".join(errors))) - - @property - def port_active(self): - return port_in_use(self.ctx, self.port) - - @property - def can_run(self): - # if port is in use - if self.port_active: - self.errors.append(f"TCP port {self.port} already in use, unable to bind") - if not os.path.exists(os.path.join(self.daemon_path, CephadmDaemon.key_name)): - self.errors.append(f"Key file '{CephadmDaemon.key_name}' is missing from {self.daemon_path}") - if not os.path.exists(os.path.join(self.daemon_path, CephadmDaemon.crt_name)): - self.errors.append(f"Certificate file '{CephadmDaemon.crt_name}' is missing from {self.daemon_path}") - if self.token == "Unknown": - self.errors.append(f"Authentication token '{CephadmDaemon.token_name}' is missing from {self.daemon_path}") - return len(self.errors) == 0 - - @staticmethod - def _unit_name(fsid, daemon_id): - return "{}.service".format(get_unit_name(fsid, CephadmDaemon.daemon_type, daemon_id)) - - @property - def unit_name(self): - return CephadmDaemon._unit_name(self.fsid, self.daemon_id) - - @property - def daemon_path(self): - return os.path.join( - self.ctx.data_dir, - self.fsid, - f'{self.daemon_type}.{self.daemon_id}' - ) - - @property - def binary_path(self): - return os.path.join( - self.ctx.data_dir, - self.fsid, - CephadmDaemon.bin_name - ) - - def _handle_thread_exception(self, exc, thread_type): - e_msg = f"{exc.__class__.__name__} exception: {str(exc)}" - thread_info = getattr(self.cephadm_cache, thread_type) - errors = thread_info.get('scrape_errors', []) - errors.append(e_msg) - logger.error(e_msg) - logger.exception(exc) - self.cephadm_cache.update_task( - thread_type, - { - "scrape_errors": errors, - "data": None, - } - ) - - def _scrape_host_facts(self, refresh_interval=10): - ctr = 0 - exception_encountered = False - - while True: - - if self.stop or exception_encountered: - break - - if ctr >= refresh_interval: - ctr = 0 - logger.debug("executing host-facts scrape") - errors = [] - s_time = time.time() - - try: - facts = HostFacts(self.ctx) - except Exception as e: - self._handle_thread_exception(e, 'host') - exception_encountered = True - else: - elapsed = time.time() - s_time - try: - data = json.loads(facts.dump()) - except json.decoder.JSONDecodeError: - errors.append("host-facts provided invalid JSON") - logger.warning(errors[-1]) - data = {} - self.cephadm_cache.update_task( - 'host', - { - "scrape_timestamp": s_time, - "scrape_duration_secs": elapsed, - "scrape_errors": errors, - "data": data, - } - ) - logger.debug(f"completed host-facts scrape - {elapsed}s") - - time.sleep(CephadmDaemon.loop_delay) - ctr += CephadmDaemon.loop_delay - logger.info("host-facts thread stopped") - - def _scrape_ceph_volume(self, refresh_interval=15): - # we're invoking the ceph_volume command, so we need to set the args that it - # expects to use - self.ctx.command = "inventory --format=json".split() - self.ctx.fsid = self.fsid - self.ctx.log_output = False - - ctr = 0 - exception_encountered = False - - while True: - if self.stop or exception_encountered: - break - - if ctr >= refresh_interval: - ctr = 0 - logger.debug("executing ceph-volume scrape") - errors = [] - s_time = time.time() - stream = io.StringIO() - try: - with redirect_stdout(stream): - command_ceph_volume(self.ctx) - except Exception as e: - self._handle_thread_exception(e, 'disks') - exception_encountered = True - else: - elapsed = time.time() - s_time - - # if the call to ceph-volume returns junk with the - # json, it won't parse - stdout = stream.getvalue() - - data = [] - if stdout: - try: - data = json.loads(stdout) - except json.decoder.JSONDecodeError: - errors.append("ceph-volume thread provided bad json data") - logger.warning(errors[-1]) - else: - errors.append("ceph-volume didn't return any data") - logger.warning(errors[-1]) - - self.cephadm_cache.update_task( - 'disks', - { - "scrape_timestamp": s_time, - "scrape_duration_secs": elapsed, - "scrape_errors": errors, - "data": data, - } - ) - - logger.debug(f"completed ceph-volume scrape - {elapsed}s") - time.sleep(CephadmDaemon.loop_delay) - ctr += CephadmDaemon.loop_delay - - logger.info("ceph-volume thread stopped") - - def _scrape_list_daemons(self, refresh_interval=20): - ctr = 0 - exception_encountered = False - while True: - if self.stop or exception_encountered: - break - - if ctr >= refresh_interval: - ctr = 0 - logger.debug("executing list-daemons scrape") - errors = [] - s_time = time.time() - - try: - # list daemons should ideally be invoked with a fsid - data = list_daemons(self.ctx) - except Exception as e: - self._handle_thread_exception(e, 'daemons') - exception_encountered = True - else: - if not isinstance(data, list): - errors.append("list-daemons didn't supply a list?") - logger.warning(errors[-1]) - data = [] - elapsed = time.time() - s_time - self.cephadm_cache.update_task( - 'daemons', - { - "scrape_timestamp": s_time, - "scrape_duration_secs": elapsed, - "scrape_errors": errors, - "data": data, - } - ) - logger.debug(f"completed list-daemons scrape - {elapsed}s") - - time.sleep(CephadmDaemon.loop_delay) - ctr += CephadmDaemon.loop_delay - logger.info("list-daemons thread stopped") - - def _create_thread(self, target, name, refresh_interval=None): - if refresh_interval: - t = Thread(target=target, args=(refresh_interval,)) - else: - t = Thread(target=target) - t.daemon = True - t.name = name - self.cephadm_cache.update_health(name, "active") - t.start() - - start_msg = f"Started {name} thread" - if refresh_interval: - logger.info(f"{start_msg}, with a refresh interval of {refresh_interval}s") - else: - logger.info(f"{start_msg}") - return t - - def reload(self, *args): - """reload -HUP received - - This is a placeholder function only, and serves to provide the hook that could - be exploited later if the exporter evolves to incorporate a config file - """ - logger.info("Reload request received - ignoring, no action needed") - - def shutdown(self, *args): - logger.info("Shutdown request received") - self.stop = True - self.http_server.shutdown() - - def run(self): - logger.info(f"cephadm exporter starting for FSID '{self.fsid}'") - if not self.can_run: - logger.error("Unable to start the exporter daemon") - for e in self.errors: - logger.error(e) - return - - # register signal handlers for running under systemd control - signal.signal(signal.SIGTERM, self.shutdown) - signal.signal(signal.SIGINT, self.shutdown) - signal.signal(signal.SIGHUP, self.reload) - logger.debug("Signal handlers attached") - - host_facts = self._create_thread(self._scrape_host_facts, 'host', 5) - self.workers.append(host_facts) - - daemons = self._create_thread(self._scrape_list_daemons, 'daemons', 20) - self.workers.append(daemons) - - disks = self._create_thread(self._scrape_ceph_volume, 'disks', 20) - self.workers.append(disks) - - self.http_server = CephadmHTTPServer(('0.0.0.0', self.port), CephadmDaemonHandler) # IPv4 only - self.http_server.socket = ssl.wrap_socket(self.http_server.socket, - keyfile=os.path.join(self.daemon_path, CephadmDaemon.key_name), - certfile=os.path.join(self.daemon_path, CephadmDaemon.crt_name), - server_side=True) - - self.http_server.cephadm_cache = self.cephadm_cache - self.http_server.token = self.token - server_thread = self._create_thread(self.http_server.serve_forever, 'http_server') - logger.info(f"https server listening on {self.http_server.server_address[0]}:{self.http_server.server_port}") - - ctr = 0 - while server_thread.is_alive(): - if self.stop: - break - - if ctr >= CephadmDaemon.thread_check_interval: - ctr = 0 - for worker in self.workers: - if self.cephadm_cache.tasks[worker.name] == 'inactive': - continue - if not worker.is_alive(): - logger.warning(f"{worker.name} thread not running") - stop_time = datetime.datetime.now().strftime("%Y/%m/%d %H:%M:%S") - self.cephadm_cache.update_health(worker.name, "inactive", f"{worker.name} stopped at {stop_time}") - - time.sleep(CephadmDaemon.loop_delay) - ctr += CephadmDaemon.loop_delay - - logger.info("Main http server thread stopped") - - @property - def unit_run(self): - - return """set -e -{py3} {bin_path} exporter --fsid {fsid} --id {daemon_id} --port {port} &""".format( - py3 = shutil.which('python3'), - bin_path=self.binary_path, - fsid=self.fsid, - daemon_id=self.daemon_id, - port=self.port - ) - - @property - def unit_file(self): - return """#generated by cephadm -[Unit] -Description=cephadm exporter service for cluster {fsid} -After=network-online.target -Wants=network-online.target - -PartOf=ceph-{fsid}.target -Before=ceph-{fsid}.target - -[Service] -Type=forking -ExecStart=/bin/bash {daemon_path}/unit.run -ExecReload=/bin/kill -HUP $MAINPID -Restart=on-failure -RestartSec=10s - -[Install] -WantedBy=ceph-{fsid}.target -""".format( - fsid=self.fsid, - daemon_path=self.daemon_path -) - - def deploy_daemon_unit(self, config=None): - """deploy a specific unit file for cephadm - - The normal deploy_daemon_units doesn't apply for this - daemon since it's not a container, so we just create a - simple service definition and add it to the fsid's target - """ - if not config: - raise Error("Attempting to deploy cephadm daemon without a config") - assert isinstance(config, dict) - - # Create the required config files in the daemons dir, with restricted permissions - for filename in config: - with open(os.open(os.path.join(self.daemon_path, filename), os.O_CREAT | os.O_WRONLY, mode=0o600), "w") as f: - f.write(config[filename]) - - # When __file__ is we're being invoked over remoto via the orchestrator, so - # we pick up the file from where the orchestrator placed it - otherwise we'll - # copy it to the binary location for this cluster - if not __file__ == '': - shutil.copy(__file__, - self.binary_path) - - with open(os.path.join(self.daemon_path, 'unit.run'), "w") as f: - f.write(self.unit_run) - - with open(os.path.join(self.ctx.unit_dir, - f"{self.unit_name}.new"), - "w" - ) as f: - f.write(self.unit_file) - os.rename( - os.path.join(self.ctx.unit_dir, f"{self.unit_name}.new"), - os.path.join(self.ctx.unit_dir, self.unit_name)) - - call_throws(self.ctx, ['systemctl', 'daemon-reload']) - call(self.ctx, ['systemctl', 'stop', self.unit_name], - verbosity=CallVerbosity.DEBUG) - call(self.ctx, ['systemctl', 'reset-failed', self.unit_name], - verbosity=CallVerbosity.DEBUG) - call_throws(self.ctx, ['systemctl', 'enable', '--now', self.unit_name]) - - @classmethod - def uninstall(cls, ctx: CephadmContext, fsid, daemon_type, daemon_id): - unit_name = CephadmDaemon._unit_name(fsid, daemon_id) - unit_path = os.path.join(ctx.unit_dir, unit_name) - unit_run = os.path.join(ctx.data_dir, fsid, f"{daemon_type}.{daemon_id}", "unit.run") - port = None - try: - with open(unit_run, "r") as u: - contents = u.read().strip(" &") - except OSError: - logger.warning(f"Unable to access the unit.run file @ {unit_run}") - return - - port = None - for line in contents.split('\n'): - if '--port ' in line: - try: - port = int(line.split('--port ')[-1]) - except ValueError: - logger.warning("Unexpected format in unit.run file: port is not numeric") - logger.warning("Unable to remove the systemd file and close the port") - return - break - - if port: - fw = Firewalld(ctx) - try: - fw.close_ports([port]) - except RuntimeError: - logger.error(f"Unable to close port {port}") - - stdout, stderr, rc = call(ctx, ["rm", "-f", unit_path]) - if rc: - logger.error(f"Unable to remove the systemd file @ {unit_path}") - else: - logger.info(f"removed systemd unit file @ {unit_path}") - stdout, stderr, rc = call(ctx, ["systemctl", "daemon-reload"]) - - -def command_exporter(ctx: CephadmContext): - exporter = CephadmDaemon(ctx, ctx.fsid, daemon_id=ctx.id, port=ctx.port) - - if ctx.fsid not in os.listdir(ctx.data_dir): - raise Error(f"cluster fsid '{ctx.fsid}' not found in '{ctx.data_dir}'") - - exporter.run() - - -################################## - -def systemd_target_state(target_name: str, subsystem: str = 'ceph') -> bool: - # TODO: UNITTEST - return os.path.exists( - os.path.join( - UNIT_DIR, - f"{subsystem}.target.wants", - target_name - ) - ) - - -@infer_fsid -def command_maintenance(ctx: CephadmContext): - if not ctx.fsid: - raise Error('must pass --fsid to specify cluster') - - target = f"ceph-{ctx.fsid}.target" - - if ctx.maintenance_action.lower() == 'enter': - logger.info("Requested to place host into maintenance") - if systemd_target_state(target): - _out, _err, code = call(ctx, - ['systemctl', 'disable', target], - verbosity=CallVerbosity.DEBUG - ) - if code: - logger.error(f"Failed to disable the {target} target") - return "failed - to disable the target" - else: - # stopping a target waits by default - _out, _err, code = call(ctx, - ['systemctl', 'stop', target], - verbosity=CallVerbosity.DEBUG - ) - if code: - logger.error(f"Failed to stop the {target} target") - return "failed - to disable the target" - else: - return f"success - systemd target {target} disabled" - - else: - return "skipped - target already disabled" - - else: - logger.info("Requested to exit maintenance state") - # exit maintenance request - if not systemd_target_state(target): - _out, _err, code = call(ctx, - ['systemctl', 'enable', target], - verbosity=CallVerbosity.DEBUG - ) - if code: - logger.error(f"Failed to enable the {target} target") - return "failed - unable to enable the target" - else: - # starting a target waits by default - _out, _err, code = call(ctx, - ['systemctl', 'start', target], - verbosity=CallVerbosity.DEBUG - ) - if code: - logger.error(f"Failed to start the {target} target") - return "failed - unable to start the target" - else: - return f"success - systemd target {target} enabled and started" - - -################################## def _get_parser(): # type: () -> argparse.ArgumentParser @@ -7133,11 +5786,6 @@ def _get_parser(): '--keyring', '-k', help='ceph.keyring to pass through to the container') parser_ceph_volume.add_argument( - '--log-output', - action='store_true', - default=True, - help='suppress ceph volume output from the log') - parser_ceph_volume.add_argument( 'command', nargs=argparse.REMAINDER, help='command') @@ -7320,14 +5968,6 @@ def _get_parser(): '--container-init', action='store_true', help='Run podman/docker with `--init`') - parser_bootstrap.add_argument( - '--with-exporter', - action='store_true', - help='Automatically deploy cephadm metadata exporter to each node') - parser_bootstrap.add_argument( - '--exporter-config', - action=CustomValidation, - help=f'Exporter configuration information in JSON format (providing: {", ".join(CephadmDaemon.config_requirements)}, port information)') parser_deploy = subparsers.add_parser( 'deploy', help='deploy a daemon') @@ -7449,46 +6089,6 @@ def _get_parser(): 'gather-facts', help='gather and return host related information (JSON format)') parser_gather_facts.set_defaults(func=command_gather_facts) - parser_exporter = subparsers.add_parser( - 'exporter', help='Start cephadm in exporter mode (web service), providing host/daemon/disk metadata') - parser_exporter.add_argument( - '--fsid', - required=True, - type=str, - help='fsid of the cephadm exporter to run against') - parser_exporter.add_argument( - '--port', - type=int, - default=int(CephadmDaemon.default_port), - help='port number for the cephadm exporter service') - parser_exporter.add_argument( - '--id', - type=str, - default=get_hostname().split('.')[0], - help='daemon identifer for the exporter') - parser_exporter.set_defaults(func=command_exporter) - - parser_maintenance = subparsers.add_parser( - 'host-maintenance', help='Manage the maintenance state of a host') - parser_maintenance.add_argument( - '--fsid', - help='cluster FSID') - parser_maintenance.add_argument( - "maintenance_action", - type=str, - choices=['enter', 'exit'], - help="Maintenance action - enter maintenance, or exit maintenance") - parser_maintenance.set_defaults(func=command_maintenance) - - parser_verify_prereqs = subparsers.add_parser( - 'verify-prereqs', - help='verify system prerequisites for a given service are met on this host') - parser_verify_prereqs.set_defaults(func=command_verify_prereqs) - parser_verify_prereqs.add_argument( - '--daemon-type', - required=True, - help='service type of service to whose prereqs will be checked') - return parser @@ -7500,18 +6100,12 @@ def _parse_args(av): return args -def cephadm_init_ctx(args: List[str]) -> Optional[CephadmContext]: - - ctx = CephadmContext() - ctx.set_args(_parse_args(args)) - return ctx - - -def cephadm_init(args: List[str]) -> Optional[CephadmContext]: +if __name__ == "__main__": - global logger - ctx = cephadm_init_ctx(args) - assert ctx is not None + # root? + if os.geteuid() != 0: + sys.stderr.write('ERROR: cephadm should be run as root\n') + sys.exit(1) # Logger configuration if not os.path.exists(LOG_DIR): @@ -7519,62 +6113,47 @@ def cephadm_init(args: List[str]) -> Optional[CephadmContext]: dictConfig(logging_config) logger = logging.getLogger() - if ctx.verbose: + # allow argv to be injected + try: + av = injected_argv # type: ignore + except NameError: + av = sys.argv[1:] + logger.debug("%s\ncephadm %s" % ("-" * 80, av)) + args = _parse_args(av) + + # More verbose console output + if args.verbose: for handler in logger.handlers: - if handler.name == "console": - handler.setLevel(logging.DEBUG) + if handler.name == "console": + handler.setLevel(logging.DEBUG) - if not ctx.has_function(): - sys.stderr.write("No command specified; pass -h or --help for usage\n") - return None + if 'func' not in args: + sys.stderr.write('No command specified; pass -h or --help for usage\n') + sys.exit(1) - ctx.container_path = "" - if ctx.func != command_check_host: - if ctx.docker: - ctx.container_path = find_program("docker") + # podman or docker? + if args.func != command_check_host: + if args.docker: + container_path = find_program('docker') else: for i in CONTAINER_PREFERENCE: try: - ctx.container_path = find_program(i) + container_path = find_program(i) break except Exception as e: - logger.debug("Could not locate %s: %s" % (i, e)) - if not ctx.container_path and ctx.func != command_prepare_host\ - and ctx.func != command_add_repo: - sys.stderr.write("Unable to locate any of %s\n" % - CONTAINER_PREFERENCE) - return None - - return ctx - - -def main(): - - # root? - if os.geteuid() != 0: - sys.stderr.write('ERROR: cephadm should be run as root\n') - sys.exit(1) - - av: List[str] = [] - try: - av = injected_argv # type: ignore - except NameError: - av = sys.argv[1:] - - ctx = cephadm_init(av) - if not ctx: # error, exit - sys.exit(1) + logger.debug('Could not locate %s: %s' % (i, e)) + if not container_path and args.func != command_prepare_host\ + and args.func != command_add_repo: + sys.stderr.write('Unable to locate any of %s\n' % CONTAINER_PREFERENCE) + sys.exit(1) try: - r = ctx.func(ctx) + r = args.func() except Error as e: - if ctx.verbose: + if args.verbose: raise sys.stderr.write('ERROR: %s\n' % e) sys.exit(1) if not r: r = 0 sys.exit(r) - -if __name__ == "__main__": - main() diff --git a/SPECS/cephadm.spec b/SPECS/cephadm.spec index 6db8521..e50a9c7 100644 --- a/SPECS/cephadm.spec +++ b/SPECS/cephadm.spec @@ -1,14 +1,14 @@ # Upstream ceph commit upon which this package is based: -# patches_base=74275226ac79999bfd40e683dc9a1309e76033bf +# patches_base=353d6970361a26381539b74a94bba8ac32666dc1 Name: cephadm Epoch: 2 -Version: 16.1.0 -Release: 100%{?dist} +Version: 15.2.9 +Release: 46%{?dist} Summary: Utility to bootstrap Ceph clusters License: LGPL-2.1 URL: https://ceph.io -Source0: https://github.com/ceph/ceph/raw/74275226ac79999bfd40e683dc9a1309e76033bf/src/cephadm/cephadm +Source0: https://github.com/ceph/ceph/raw/353d6970361a26381539b74a94bba8ac32666dc1/src/cephadm/cephadm Source1: COPYING-LGPL2.1 BuildArch: noarch @@ -53,6 +53,5 @@ exit 0 %attr(0600,cephadm,cephadm) %{_sharedstatedir}/cephadm/.ssh/authorized_keys %changelog -* Wed Feb 10 2021 Ken Dreyer - 16.1.0-100 -- initial package - +* Tue Mar 02 2021 Ken Dreyer - 2:15.2.9-46 +- initial octopus package