diff --git a/SOURCES/cephadm b/SOURCES/cephadm
index 524c038..a7b664c 100644
--- a/SOURCES/cephadm
+++ b/SOURCES/cephadm
@@ -61,6 +61,7 @@ DATA_DIR = '/var/lib/ceph'
LOG_DIR = '/var/log/ceph'
LOCK_DIR = '/run/cephadm'
LOGROTATE_DIR = '/etc/logrotate.d'
+SYSCTL_DIR = '/usr/lib/sysctl.d'
UNIT_DIR = '/etc/systemd/system'
LOG_DIR_MODE = 0o770
DATA_DIR_MODE = 0o700
@@ -109,6 +110,7 @@ class BaseConfig:
self.data_dir: str = DATA_DIR
self.log_dir: str = LOG_DIR
self.logrotate_dir: str = LOGROTATE_DIR
+ self.sysctl_dir: str = SYSCTL_DIR
self.unit_dir: str = UNIT_DIR
self.verbose: bool = False
self.timeout: Optional[int] = DEFAULT_TIMEOUT
@@ -210,11 +212,9 @@ logging_config = {
},
'log_file': {
'level': 'DEBUG',
- 'class': 'logging.handlers.RotatingFileHandler',
+ 'class': 'logging.handlers.WatchedFileHandler',
'formatter': 'cephadm',
'filename': '%s/cephadm.log' % LOG_DIR,
- 'maxBytes': 1024000,
- 'backupCount': 1,
}
},
'loggers': {
@@ -249,6 +249,18 @@ class Ceph(object):
##################################
+class OSD(object):
+ @staticmethod
+ def get_sysctl_settings() -> List[str]:
+ return [
+ '# allow a large number of OSDs',
+ 'fs.aio-max-nr = 1048576',
+ 'kernel.pid_max = 4194304',
+ ]
+
+##################################
+
+
class Monitoring(object):
"""Define the configs for the monitoring containers"""
@@ -349,7 +361,7 @@ def populate_files(config_dir, config_files, uid, gid):
config_file = os.path.join(config_dir, fname)
config_content = dict_get_join(config_files, fname)
logger.info('Write file: %s' % (config_file))
- with open(config_file, 'w') as f:
+ with open(config_file, 'w', encoding='utf-8') as f:
os.fchown(f.fileno(), uid, gid)
os.fchmod(f.fileno(), 0o600)
f.write(config_content)
@@ -491,36 +503,6 @@ class NFSGanesha(object):
os.fchown(f.fileno(), uid, gid)
f.write(self.rgw.get('keyring', ''))
- def get_rados_grace_container(self, action):
- # type: (str) -> CephContainer
- """Container for a ganesha action on the grace db"""
- entrypoint = '/usr/bin/ganesha-rados-grace'
-
- assert self.pool
- args = ['--pool', self.pool]
- if self.namespace:
- args += ['--ns', self.namespace]
- if self.userid:
- args += ['--userid', self.userid]
- args += [action, self.get_daemon_name()]
-
- data_dir = get_data_dir(self.fsid, self.ctx.data_dir,
- self.daemon_type, self.daemon_id)
- volume_mounts = self.get_container_mounts(data_dir)
- envs = self.get_container_envs()
-
- logger.info('Creating RADOS grace for action: %s' % action)
- c = CephContainer(
- self.ctx,
- image=self.image,
- entrypoint=entrypoint,
- args=args,
- volume_mounts=volume_mounts,
- cname=self.get_container_name(desc='grace-%s' % action),
- envs=envs
- )
- return c
-
##################################
@@ -732,6 +714,13 @@ class HAproxy(object):
mounts[os.path.join(data_dir, 'haproxy')] = '/var/lib/haproxy'
return mounts
+ @staticmethod
+ def get_sysctl_settings() -> List[str]:
+ return [
+ '# IP forwarding',
+ 'net.ipv4.ip_forward = 1',
+ ]
+
##################################
@@ -811,12 +800,12 @@ class Keepalived(object):
return envs
@staticmethod
- def get_prestart():
- return (
- '# keepalived needs IP forwarding and non-local bind\n'
- 'sysctl net.ipv4.ip_forward=1\n'
- 'sysctl net.ipv4.ip_nonlocal_bind=1\n'
- )
+ def get_sysctl_settings() -> List[str]:
+ return [
+ '# IP forwarding and non-local bind',
+ 'net.ipv4.ip_forward = 1',
+ 'net.ipv4.ip_nonlocal_bind = 1',
+ ]
def extract_uid_gid_keepalived(self):
# better directory for this?
@@ -1190,7 +1179,7 @@ class FileLock(object):
lock_id, lock_filename, poll_intervall
)
time.sleep(poll_intervall)
- except: # noqa
+ except Exception:
# Something did go wrong, so decrement the counter.
self._lock_counter = max(0, self._lock_counter - 1)
@@ -2061,10 +2050,11 @@ def check_units(ctx, units, enabler=None):
def is_container_running(ctx: CephadmContext, name: str) -> bool:
- out, err, ret = call_throws(ctx, [
- ctx.container_engine.path, 'ps',
- '--format', '{{.Names}}'])
- return name in out
+ out, err, ret = call(ctx, [
+ ctx.container_engine.path, 'container', 'inspect',
+ '--format', '{{.State.Status}}', name
+ ])
+ return out == 'running'
def get_legacy_config_fsid(cluster, legacy_dir=None):
@@ -2163,7 +2153,6 @@ def create_daemon_dirs(ctx, fsid, daemon_type, daemon_id, uid, gid,
if daemon_type in Monitoring.components.keys():
config_json: Dict[str, Any] = get_parm(ctx.config_json)
- required_files = Monitoring.components[daemon_type].get('config-json-files', list())
# Set up directories specific to the monitoring component
config_dir = ''
@@ -2192,10 +2181,14 @@ def create_daemon_dirs(ctx, fsid, daemon_type, daemon_id, uid, gid,
makedirs(os.path.join(data_dir_root, config_dir, 'data'), uid, gid, 0o755)
# populate the config directory for the component from the config-json
- for fname in required_files:
- if 'files' in config_json: # type: ignore
+ if 'files' in config_json:
+ for fname in config_json['files']:
content = dict_get_join(config_json['files'], fname)
- with open(os.path.join(data_dir_root, config_dir, fname), 'w') as f:
+ if os.path.isabs(fname):
+ fpath = os.path.join(data_dir_root, fname.lstrip(os.path.sep))
+ else:
+ fpath = os.path.join(data_dir_root, config_dir, fname)
+ with open(fpath, 'w', encoding='utf-8') as f:
os.fchown(f.fileno(), uid, gid)
os.fchmod(f.fileno(), 0o600)
f.write(content)
@@ -2267,8 +2260,8 @@ def get_config_and_keyring(ctx):
try:
with open(ctx.config, 'r') as f:
config = f.read()
- except FileNotFoundError:
- raise Error('config file: %s does not exist' % ctx.config)
+ except FileNotFoundError as e:
+ raise Error(e)
if 'key' in ctx and ctx.key:
keyring = '[%s]\n\tkey = %s\n' % (ctx.name, ctx.key)
@@ -2276,8 +2269,8 @@ def get_config_and_keyring(ctx):
try:
with open(ctx.keyring, 'r') as f:
keyring = f.read()
- except FileNotFoundError:
- raise Error('keyring file: %s does not exist' % ctx.keyring)
+ except FileNotFoundError as e:
+ raise Error(e)
return config, keyring
@@ -2682,7 +2675,7 @@ def deploy_daemon_units(
# cmd
data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
with open(data_dir + '/unit.run.new', 'w') as f, \
- open(data_dir + '/unit.meta.new', 'w') as metaf:
+ open(data_dir + '/unit.meta.new', 'w') as metaf:
f.write('set -e\n')
if daemon_type in Ceph.daemons:
@@ -2718,18 +2711,11 @@ def deploy_daemon_units(
memory_limit=ctx.memory_limit,
)
_write_container_cmd_to_bash(ctx, f, prestart, 'LVM OSDs use ceph-volume lvm activate')
- elif daemon_type == NFSGanesha.daemon_type:
- # add nfs to the rados grace db
- nfs_ganesha = NFSGanesha.init(ctx, fsid, daemon_id)
- prestart = nfs_ganesha.get_rados_grace_container('add')
- _write_container_cmd_to_bash(ctx, f, prestart, 'add daemon to rados grace')
elif daemon_type == CephIscsi.daemon_type:
f.write(' '.join(CephIscsi.configfs_mount_umount(data_dir, mount=True)) + '\n')
ceph_iscsi = CephIscsi.init(ctx, fsid, daemon_id)
tcmu_container = ceph_iscsi.get_tcmu_runner_container()
_write_container_cmd_to_bash(ctx, f, tcmu_container, 'iscsi tcmu-runnter container', background=True)
- elif daemon_type == Keepalived.daemon_type:
- f.write(Keepalived.get_prestart())
_write_container_cmd_to_bash(ctx, f, c, '%s.%s' % (daemon_type, str(daemon_id)))
@@ -2771,11 +2757,6 @@ def deploy_daemon_units(
daemon_id),
)
_write_container_cmd_to_bash(ctx, f, poststop, 'deactivate osd')
- elif daemon_type == NFSGanesha.daemon_type:
- # remove nfs from the rados grace db
- nfs_ganesha = NFSGanesha.init(ctx, fsid, daemon_id)
- poststop = nfs_ganesha.get_rados_grace_container('remove')
- _write_container_cmd_to_bash(ctx, f, poststop, 'remove daemon from rados grace')
elif daemon_type == CephIscsi.daemon_type:
# make sure we also stop the tcmu container
ceph_iscsi = CephIscsi.init(ctx, fsid, daemon_id)
@@ -2793,6 +2774,9 @@ def deploy_daemon_units(
os.rename(data_dir + '/unit.image.new',
data_dir + '/unit.image')
+ # sysctl
+ install_sysctl(ctx, fsid, daemon_type)
+
# systemd
install_base_units(ctx, fsid)
unit = get_unit_file(ctx, fsid)
@@ -2935,6 +2919,36 @@ def update_firewalld(ctx, daemon_type):
firewall.apply_rules()
+def install_sysctl(ctx: CephadmContext, fsid: str, daemon_type: str) -> None:
+ """
+ Set up sysctl settings
+ """
+ def _write(conf: Path, lines: List[str]) -> None:
+ lines = [
+ '# created by cephadm',
+ '',
+ *lines,
+ '',
+ ]
+ with open(conf, 'w') as f:
+ f.write('\n'.join(lines))
+
+ conf = Path(ctx.sysctl_dir).joinpath(f'90-ceph-{fsid}-{daemon_type}.conf')
+ lines: Optional[List] = None
+
+ if daemon_type == 'osd':
+ lines = OSD.get_sysctl_settings()
+ elif daemon_type == 'haproxy':
+ lines = HAproxy.get_sysctl_settings()
+ elif daemon_type == 'keepalived':
+ lines = Keepalived.get_sysctl_settings()
+
+ # apply the sysctl settings
+ if lines:
+ _write(conf, lines)
+ call_throws(ctx, ['sysctl', '--system'])
+
+
def install_base_units(ctx, fsid):
# type: (CephadmContext, str) -> None
"""
@@ -3102,13 +3116,13 @@ class CephContainer:
'run',
'--rm',
'--ipc=host',
+ # some containers (ahem, haproxy) override this, but we want a fast
+ # shutdown always (and, more importantly, a successful exit even if we
+ # fall back to SIGKILL).
+ '--stop-signal=SIGTERM',
]
if isinstance(self.ctx.container_engine, Podman):
- # podman adds the container *name* to /etc/hosts (for 127.0.1.1)
- # by default, which makes python's socket.getfqdn() return that
- # instead of a valid hostname.
- cmd_args.append('--no-hosts')
if os.path.exists('/etc/ceph/podman-auth.json'):
cmd_args.append('--authfile=/etc/ceph/podman-auth.json')
@@ -3966,11 +3980,7 @@ def command_bootstrap(ctx):
except PermissionError:
raise Error(f'Unable to create {dirname} due to permissions failure. Retry with root, or sudo or preallocate the directory.')
- if ctx.config and os.path.exists(ctx.config):
- with open(ctx.config) as f:
- user_conf = f.read()
- else:
- user_conf = None
+ (user_conf, _) = get_config_and_keyring(ctx)
if not ctx.skip_prepare_host:
command_prepare_host(ctx)
@@ -4072,7 +4082,7 @@ def command_bootstrap(ctx):
# the mgr (e.g. mgr/cephadm/container_image_prometheus)
# they don't seem to be stored if there isn't a mgr yet.
# Since re-assimilating the same conf settings should be
- # idempotent we can just do it aain here.
+ # idempotent we can just do it again here.
with tempfile.NamedTemporaryFile(buffering=0) as tmp:
tmp.write(user_conf.encode('utf-8'))
cli(['config', 'assimilate-conf',
@@ -4329,8 +4339,8 @@ def command_deploy(ctx):
ports=daemon_ports)
elif daemon_type == NFSGanesha.daemon_type:
- if not ctx.reconfig and not redeploy:
- daemon_ports.extend(NFSGanesha.port_map.values())
+ if not ctx.reconfig and not redeploy and not daemon_ports:
+ daemon_ports = list(NFSGanesha.port_map.values())
config, keyring = get_config_and_keyring(ctx)
# TODO: extract ganesha uid/gid (997, 994) ?
@@ -4489,6 +4499,10 @@ def command_shell(ctx):
os.path.join(home, f))
mounts[home] = '/root'
+ for i in ctx.volume:
+ a, b = i.split(':', 1)
+ mounts[a] = b
+
c = CephContainer(
ctx,
image=ctx.image,
@@ -5507,10 +5521,10 @@ def command_rm_cluster(ctx):
_zap_osds(ctx)
# rm units
- call_throws(ctx, ['rm', '-f', ctx.unit_dir + # noqa: W504
- '/ceph-%s@.service' % ctx.fsid])
- call_throws(ctx, ['rm', '-f', ctx.unit_dir + # noqa: W504
- '/ceph-%s.target' % ctx.fsid])
+ call_throws(ctx, ['rm', '-f', ctx.unit_dir
+ + '/ceph-%s@.service' % ctx.fsid])
+ call_throws(ctx, ['rm', '-f', ctx.unit_dir
+ + '/ceph-%s.target' % ctx.fsid])
call_throws(ctx, ['rm', '-rf',
ctx.unit_dir + '/ceph-%s.target.wants' % ctx.fsid])
# rm data
@@ -5519,12 +5533,21 @@ def command_rm_cluster(ctx):
if not ctx.keep_logs:
# rm logs
call_throws(ctx, ['rm', '-rf', ctx.log_dir + '/' + ctx.fsid])
- call_throws(ctx, ['rm', '-rf', ctx.log_dir + # noqa: W504
- '/*.wants/ceph-%s@*' % ctx.fsid])
+ call_throws(ctx, ['rm', '-rf', ctx.log_dir
+ + '/*.wants/ceph-%s@*' % ctx.fsid])
# rm logrotate config
call_throws(ctx, ['rm', '-f', ctx.logrotate_dir + '/ceph-%s' % ctx.fsid])
+ # rm cephadm logrotate config if last cluster on host
+ if not os.listdir(ctx.data_dir):
+ call_throws(ctx, ['rm', '-f', ctx.logrotate_dir + '/cephadm'])
+
+ # rm sysctl settings
+ sysctl_dir = Path(ctx.sysctl_dir)
+ for p in sysctl_dir.glob(f'90-ceph-{ctx.fsid}-*.conf'):
+ p.unlink()
+
# clean up config, keyring, and pub key files
files = ['/etc/ceph/ceph.conf', '/etc/ceph/ceph.pub', '/etc/ceph/ceph.client.admin.keyring']
@@ -6712,16 +6735,6 @@ def command_gather_facts(ctx: CephadmContext):
host = HostFacts(ctx)
print(host.dump())
-##################################
-
-
-def command_verify_prereqs(ctx: CephadmContext):
- if ctx.service_type == 'haproxy' or ctx.service_type == 'keepalived':
- out, err, code = call(
- ctx, ['sysctl', '-n', 'net.ipv4.ip_nonlocal_bind']
- )
- if out.strip() != '1':
- raise Error('net.ipv4.ip_nonlocal_bind not set to 1')
##################################
@@ -6803,12 +6816,14 @@ class CephadmDaemonHandler(BaseHTTPRequestHandler):
The token is installed at deployment time and must be provided to
ensure we only respond to callers who know our token i.e. mgr
"""
+
def wrapper(self, *args, **kwargs):
auth = self.headers.get('Authorization', None)
if auth != 'Bearer ' + self.server.token:
self.send_error(401)
return
f(self, *args, **kwargs)
+
return wrapper
def _help_page(self):
@@ -7483,6 +7498,10 @@ def _get_parser():
'--logrotate-dir',
default=LOGROTATE_DIR,
help='location of logrotate configuration files')
+ parser.add_argument(
+ '--sysctl-dir',
+ default=SYSCTL_DIR,
+ help='location of sysctl configuration files')
parser.add_argument(
'--unit-dir',
default=UNIT_DIR,
@@ -7662,6 +7681,11 @@ def _get_parser():
action='append',
default=[],
help='set environment variable')
+ parser_shell.add_argument(
+ '--volume', '-v',
+ action='append',
+ default=[],
+ help='set environment variable')
parser_shell.add_argument(
'command', nargs=argparse.REMAINDER,
help='command (optional)')
@@ -8083,15 +8107,6 @@ def _get_parser():
help='Maintenance action - enter maintenance, or exit maintenance')
parser_maintenance.set_defaults(func=command_maintenance)
- parser_verify_prereqs = subparsers.add_parser(
- 'verify-prereqs',
- help='verify system prerequisites for a given service are met on this host')
- parser_verify_prereqs.set_defaults(func=command_verify_prereqs)
- parser_verify_prereqs.add_argument(
- '--daemon-type',
- required=True,
- help='service type of service to whose prereqs will be checked')
-
return parser
@@ -8135,6 +8150,18 @@ def cephadm_init(args: List[str]) -> Optional[CephadmContext]:
dictConfig(logging_config)
logger = logging.getLogger()
+ if not os.path.exists(ctx.logrotate_dir + '/cephadm'):
+ with open(ctx.logrotate_dir + '/cephadm', 'w') as f:
+ f.write("""# created by cephadm
+/var/log/ceph/cephadm.log {
+ rotate 7
+ daily
+ compress
+ missingok
+ notifempty
+}
+""")
+
if ctx.verbose:
for handler in logger.handlers:
if handler.name == 'console':