|
|
5d2ee9 |
From 77a273e02c1c811485d13ddca0f844512aed2cff Mon Sep 17 00:00:00 2001
|
|
|
5d2ee9 |
From: Jan Synacek <jsynacek@redhat.com>
|
|
|
5d2ee9 |
Date: Wed, 12 Feb 2020 12:58:54 +0100
|
|
|
5d2ee9 |
Subject: [PATCH] pid1: make sure to restore correct default values for some
|
|
|
5d2ee9 |
rlimits
|
|
|
5d2ee9 |
|
|
|
5d2ee9 |
Commit fb39af4ce42d7ef9af63009f271f404038703704 forgot to restore the default
|
|
|
5d2ee9 |
rlimit values (RLIMIT_NOFILE and RLIMIT_MEMLOCK) while PID1 is reloading.
|
|
|
5d2ee9 |
|
|
|
5d2ee9 |
This patch extracts the code in charge of initializing the default values for
|
|
|
5d2ee9 |
those rlimits in order to create dedicated functions, which take care of their
|
|
|
5d2ee9 |
initialization.
|
|
|
5d2ee9 |
|
|
|
5d2ee9 |
These functions are then called in parse_configuration() so we make sure that
|
|
|
5d2ee9 |
the default values for these rlimits get restored every time PID1 is reloading
|
|
|
5d2ee9 |
its configuration.
|
|
|
5d2ee9 |
|
|
|
5d2ee9 |
(cherry picked from commit a9fd4cd1206832a61aaf61fff583bb133e6cb965)
|
|
|
5d2ee9 |
Resolves: #1789930
|
|
|
5d2ee9 |
---
|
|
|
5d2ee9 |
src/core/main.c | 135 +++++++++++++++++++++++++++++++++++++-----------
|
|
|
5d2ee9 |
1 file changed, 106 insertions(+), 29 deletions(-)
|
|
|
5d2ee9 |
|
|
|
5d2ee9 |
diff --git a/src/core/main.c b/src/core/main.c
|
|
|
5d2ee9 |
index c83249a8dc..b8c1e567ad 100644
|
|
|
5d2ee9 |
--- a/src/core/main.c
|
|
|
5d2ee9 |
+++ b/src/core/main.c
|
|
|
5d2ee9 |
@@ -136,7 +136,8 @@ static EmergencyAction arg_cad_burst_action;
|
|
|
5d2ee9 |
static CPUSet arg_cpu_affinity;
|
|
|
5d2ee9 |
static NUMAPolicy arg_numa_policy;
|
|
|
5d2ee9 |
|
|
|
5d2ee9 |
-static int parse_configuration(void);
|
|
|
5d2ee9 |
+static int parse_configuration(const struct rlimit *saved_rlimit_nofile,
|
|
|
5d2ee9 |
+ const struct rlimit *saved_rlimit_memlock);
|
|
|
5d2ee9 |
|
|
|
5d2ee9 |
_noreturn_ static void freeze_or_reboot(void) {
|
|
|
5d2ee9 |
|
|
|
5d2ee9 |
@@ -1149,25 +1150,6 @@ static int prepare_reexecute(Manager *m, FILE **_f, FDSet **_fds, bool switching
|
|
|
5d2ee9 |
static int bump_rlimit_nofile(struct rlimit *saved_rlimit) {
|
|
|
5d2ee9 |
int r, nr;
|
|
|
5d2ee9 |
|
|
|
5d2ee9 |
- assert(saved_rlimit);
|
|
|
5d2ee9 |
-
|
|
|
5d2ee9 |
- /* Save the original RLIMIT_NOFILE so that we can reset it
|
|
|
5d2ee9 |
- * later when transitioning from the initrd to the main
|
|
|
5d2ee9 |
- * systemd or suchlike. */
|
|
|
5d2ee9 |
- if (getrlimit(RLIMIT_NOFILE, saved_rlimit) < 0)
|
|
|
5d2ee9 |
- return log_warning_errno(errno, "Reading RLIMIT_NOFILE failed, ignoring: %m");
|
|
|
5d2ee9 |
-
|
|
|
5d2ee9 |
- /* Make sure forked processes get the default kernel setting */
|
|
|
5d2ee9 |
- if (!arg_default_rlimit[RLIMIT_NOFILE]) {
|
|
|
5d2ee9 |
- struct rlimit *rl;
|
|
|
5d2ee9 |
-
|
|
|
5d2ee9 |
- rl = newdup(struct rlimit, saved_rlimit, 1);
|
|
|
5d2ee9 |
- if (!rl)
|
|
|
5d2ee9 |
- return log_oom();
|
|
|
5d2ee9 |
-
|
|
|
5d2ee9 |
- arg_default_rlimit[RLIMIT_NOFILE] = rl;
|
|
|
5d2ee9 |
- }
|
|
|
5d2ee9 |
-
|
|
|
5d2ee9 |
/* Bump up the resource limit for ourselves substantially, all the way to the maximum the kernel allows */
|
|
|
5d2ee9 |
nr = read_nr_open();
|
|
|
5d2ee9 |
r = setrlimit_closest(RLIMIT_NOFILE, &RLIMIT_MAKE_CONST(nr));
|
|
|
5d2ee9 |
@@ -1180,16 +1162,12 @@ static int bump_rlimit_nofile(struct rlimit *saved_rlimit) {
|
|
|
5d2ee9 |
static int bump_rlimit_memlock(struct rlimit *saved_rlimit) {
|
|
|
5d2ee9 |
int r;
|
|
|
5d2ee9 |
|
|
|
5d2ee9 |
- assert(saved_rlimit);
|
|
|
5d2ee9 |
assert(getuid() == 0);
|
|
|
5d2ee9 |
|
|
|
5d2ee9 |
/* BPF_MAP_TYPE_LPM_TRIE bpf maps are charged against RLIMIT_MEMLOCK, even though we have CAP_IPC_LOCK which
|
|
|
5d2ee9 |
* should normally disable such checks. We need them to implement IPAccessAllow= and IPAccessDeny=, hence let's
|
|
|
5d2ee9 |
* bump the value high enough for the root user. */
|
|
|
5d2ee9 |
|
|
|
5d2ee9 |
- if (getrlimit(RLIMIT_MEMLOCK, saved_rlimit) < 0)
|
|
|
5d2ee9 |
- return log_warning_errno(errno, "Reading RLIMIT_MEMLOCK failed, ignoring: %m");
|
|
|
5d2ee9 |
-
|
|
|
5d2ee9 |
r = setrlimit_closest(RLIMIT_MEMLOCK, &RLIMIT_MAKE_CONST(1024ULL*1024ULL*16ULL));
|
|
|
5d2ee9 |
if (r < 0)
|
|
|
5d2ee9 |
return log_warning_errno(r, "Setting RLIMIT_MEMLOCK failed, ignoring: %m");
|
|
|
5d2ee9 |
@@ -1651,6 +1629,8 @@ static void do_reexecute(
|
|
|
5d2ee9 |
|
|
|
5d2ee9 |
static int invoke_main_loop(
|
|
|
5d2ee9 |
Manager *m,
|
|
|
5d2ee9 |
+ const struct rlimit *saved_rlimit_nofile,
|
|
|
5d2ee9 |
+ const struct rlimit *saved_rlimit_memlock,
|
|
|
5d2ee9 |
bool *ret_reexecute,
|
|
|
5d2ee9 |
int *ret_retval, /* Return parameters relevant for shutting down */
|
|
|
5d2ee9 |
const char **ret_shutdown_verb, /* … */
|
|
|
5d2ee9 |
@@ -1662,6 +1642,8 @@ static int invoke_main_loop(
|
|
|
5d2ee9 |
int r;
|
|
|
5d2ee9 |
|
|
|
5d2ee9 |
assert(m);
|
|
|
5d2ee9 |
+ assert(saved_rlimit_nofile);
|
|
|
5d2ee9 |
+ assert(saved_rlimit_memlock);
|
|
|
5d2ee9 |
assert(ret_reexecute);
|
|
|
5d2ee9 |
assert(ret_retval);
|
|
|
5d2ee9 |
assert(ret_shutdown_verb);
|
|
|
5d2ee9 |
@@ -1691,7 +1673,7 @@ static int invoke_main_loop(
|
|
|
5d2ee9 |
saved_log_level = m->log_level_overridden ? log_get_max_level() : -1;
|
|
|
5d2ee9 |
saved_log_target = m->log_target_overridden ? log_get_target() : _LOG_TARGET_INVALID;
|
|
|
5d2ee9 |
|
|
|
5d2ee9 |
- (void) parse_configuration();
|
|
|
5d2ee9 |
+ (void) parse_configuration(saved_rlimit_nofile, saved_rlimit_memlock);
|
|
|
5d2ee9 |
|
|
|
5d2ee9 |
set_manager_defaults(m);
|
|
|
5d2ee9 |
|
|
|
5d2ee9 |
@@ -1983,6 +1965,80 @@ static int do_queue_default_job(
|
|
|
5d2ee9 |
return 0;
|
|
|
5d2ee9 |
}
|
|
|
5d2ee9 |
|
|
|
5d2ee9 |
+static void save_rlimits(struct rlimit *saved_rlimit_nofile,
|
|
|
5d2ee9 |
+ struct rlimit *saved_rlimit_memlock) {
|
|
|
5d2ee9 |
+
|
|
|
5d2ee9 |
+ assert(saved_rlimit_nofile);
|
|
|
5d2ee9 |
+ assert(saved_rlimit_memlock);
|
|
|
5d2ee9 |
+
|
|
|
5d2ee9 |
+ if (getrlimit(RLIMIT_NOFILE, saved_rlimit_nofile) < 0)
|
|
|
5d2ee9 |
+ log_warning_errno(errno, "Reading RLIMIT_NOFILE failed, ignoring: %m");
|
|
|
5d2ee9 |
+
|
|
|
5d2ee9 |
+ if (getrlimit(RLIMIT_MEMLOCK, saved_rlimit_memlock) < 0)
|
|
|
5d2ee9 |
+ log_warning_errno(errno, "Reading RLIMIT_MEMLOCK failed, ignoring: %m");
|
|
|
5d2ee9 |
+}
|
|
|
5d2ee9 |
+
|
|
|
5d2ee9 |
+static void fallback_rlimit_nofile(const struct rlimit *saved_rlimit_nofile) {
|
|
|
5d2ee9 |
+ struct rlimit *rl;
|
|
|
5d2ee9 |
+
|
|
|
5d2ee9 |
+ if (arg_default_rlimit[RLIMIT_NOFILE])
|
|
|
5d2ee9 |
+ return;
|
|
|
5d2ee9 |
+
|
|
|
5d2ee9 |
+ /* Make sure forked processes get limits based on the original kernel setting */
|
|
|
5d2ee9 |
+
|
|
|
5d2ee9 |
+ rl = newdup(struct rlimit, saved_rlimit_nofile, 1);
|
|
|
5d2ee9 |
+ if (!rl) {
|
|
|
5d2ee9 |
+ log_oom();
|
|
|
5d2ee9 |
+ return;
|
|
|
5d2ee9 |
+ }
|
|
|
5d2ee9 |
+
|
|
|
5d2ee9 |
+ /* Bump the hard limit for system services to a substantially higher value. The default
|
|
|
5d2ee9 |
+ * hard limit current kernels set is pretty low (4K), mostly for historical
|
|
|
5d2ee9 |
+ * reasons. According to kernel developers, the fd handling in recent kernels has been
|
|
|
5d2ee9 |
+ * optimized substantially enough, so that we can bump the limit now, without paying too
|
|
|
5d2ee9 |
+ * high a price in memory or performance. Note however that we only bump the hard limit,
|
|
|
5d2ee9 |
+ * not the soft limit. That's because select() works the way it works, and chokes on fds
|
|
|
5d2ee9 |
+ * >= 1024. If we'd bump the soft limit globally, it might accidentally happen to
|
|
|
5d2ee9 |
+ * unexpecting programs that they get fds higher than what they can process using
|
|
|
5d2ee9 |
+ * select(). By only bumping the hard limit but leaving the low limit as it is we avoid
|
|
|
5d2ee9 |
+ * this pitfall: programs that are written by folks aware of the select() problem in mind
|
|
|
5d2ee9 |
+ * (and thus use poll()/epoll instead of select(), the way everybody should) can
|
|
|
5d2ee9 |
+ * explicitly opt into high fds by bumping their soft limit beyond 1024, to the hard limit
|
|
|
5d2ee9 |
+ * we pass. */
|
|
|
5d2ee9 |
+ if (arg_system) {
|
|
|
5d2ee9 |
+ int nr;
|
|
|
5d2ee9 |
+
|
|
|
5d2ee9 |
+ /* Get the underlying absolute limit the kernel enforces */
|
|
|
5d2ee9 |
+ nr = read_nr_open();
|
|
|
5d2ee9 |
+
|
|
|
5d2ee9 |
+ rl->rlim_max = MIN((rlim_t) nr, MAX(rl->rlim_max, (rlim_t) HIGH_RLIMIT_NOFILE));
|
|
|
5d2ee9 |
+ }
|
|
|
5d2ee9 |
+
|
|
|
5d2ee9 |
+ /* If for some reason we were invoked with a soft limit above 1024 (which should never
|
|
|
5d2ee9 |
+ * happen!, but who knows what we get passed in from pam_limit when invoked as --user
|
|
|
5d2ee9 |
+ * instance), then lower what we pass on to not confuse our children */
|
|
|
5d2ee9 |
+ rl->rlim_cur = MIN(rl->rlim_cur, (rlim_t) FD_SETSIZE);
|
|
|
5d2ee9 |
+
|
|
|
5d2ee9 |
+ arg_default_rlimit[RLIMIT_NOFILE] = rl;
|
|
|
5d2ee9 |
+}
|
|
|
5d2ee9 |
+
|
|
|
5d2ee9 |
+static void fallback_rlimit_memlock(const struct rlimit *saved_rlimit_memlock) {
|
|
|
5d2ee9 |
+ struct rlimit *rl;
|
|
|
5d2ee9 |
+
|
|
|
5d2ee9 |
+ /* Pass the original value down to invoked processes */
|
|
|
5d2ee9 |
+
|
|
|
5d2ee9 |
+ if (arg_default_rlimit[RLIMIT_MEMLOCK])
|
|
|
5d2ee9 |
+ return;
|
|
|
5d2ee9 |
+
|
|
|
5d2ee9 |
+ rl = newdup(struct rlimit, saved_rlimit_memlock, 1);
|
|
|
5d2ee9 |
+ if (!rl) {
|
|
|
5d2ee9 |
+ log_oom();
|
|
|
5d2ee9 |
+ return;
|
|
|
5d2ee9 |
+ }
|
|
|
5d2ee9 |
+
|
|
|
5d2ee9 |
+ arg_default_rlimit[RLIMIT_MEMLOCK] = rl;
|
|
|
5d2ee9 |
+}
|
|
|
5d2ee9 |
+
|
|
|
5d2ee9 |
static void reset_arguments(void) {
|
|
|
5d2ee9 |
/* Frees/resets arg_* variables, with a few exceptions commented below. */
|
|
|
5d2ee9 |
|
|
|
5d2ee9 |
@@ -2040,9 +2096,13 @@ static void reset_arguments(void) {
|
|
|
5d2ee9 |
numa_policy_reset(&arg_numa_policy);
|
|
|
5d2ee9 |
}
|
|
|
5d2ee9 |
|
|
|
5d2ee9 |
-static int parse_configuration(void) {
|
|
|
5d2ee9 |
+static int parse_configuration(const struct rlimit *saved_rlimit_nofile,
|
|
|
5d2ee9 |
+ const struct rlimit *saved_rlimit_memlock) {
|
|
|
5d2ee9 |
int r;
|
|
|
5d2ee9 |
|
|
|
5d2ee9 |
+ assert(saved_rlimit_nofile);
|
|
|
5d2ee9 |
+ assert(saved_rlimit_memlock);
|
|
|
5d2ee9 |
+
|
|
|
5d2ee9 |
arg_default_tasks_max = system_tasks_max_scale(DEFAULT_TASKS_MAX_PERCENTAGE, 100U);
|
|
|
5d2ee9 |
|
|
|
5d2ee9 |
/* Assign configuration defaults */
|
|
|
5d2ee9 |
@@ -2058,18 +2118,29 @@ static int parse_configuration(void) {
|
|
|
5d2ee9 |
log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
|
|
|
5d2ee9 |
}
|
|
|
5d2ee9 |
|
|
|
5d2ee9 |
+ /* Initialize some default rlimits for services if they haven't been configured */
|
|
|
5d2ee9 |
+ fallback_rlimit_nofile(saved_rlimit_nofile);
|
|
|
5d2ee9 |
+ fallback_rlimit_memlock(saved_rlimit_memlock);
|
|
|
5d2ee9 |
+
|
|
|
5d2ee9 |
/* Note that this also parses bits from the kernel command line, including "debug". */
|
|
|
5d2ee9 |
log_parse_environment();
|
|
|
5d2ee9 |
|
|
|
5d2ee9 |
return 0;
|
|
|
5d2ee9 |
}
|
|
|
5d2ee9 |
|
|
|
5d2ee9 |
-static int load_configuration(int argc, char **argv, const char **ret_error_message) {
|
|
|
5d2ee9 |
+static int load_configuration(
|
|
|
5d2ee9 |
+ int argc,
|
|
|
5d2ee9 |
+ char **argv,
|
|
|
5d2ee9 |
+ const struct rlimit *saved_rlimit_nofile,
|
|
|
5d2ee9 |
+ const struct rlimit *saved_rlimit_memlock,
|
|
|
5d2ee9 |
+ const char **ret_error_message) {
|
|
|
5d2ee9 |
int r;
|
|
|
5d2ee9 |
|
|
|
5d2ee9 |
+ assert(saved_rlimit_nofile);
|
|
|
5d2ee9 |
+ assert(saved_rlimit_memlock);
|
|
|
5d2ee9 |
assert(ret_error_message);
|
|
|
5d2ee9 |
|
|
|
5d2ee9 |
- (void) parse_configuration();
|
|
|
5d2ee9 |
+ (void) parse_configuration(saved_rlimit_nofile, saved_rlimit_memlock);
|
|
|
5d2ee9 |
|
|
|
5d2ee9 |
r = parse_argv(argc, argv);
|
|
|
5d2ee9 |
if (r < 0) {
|
|
|
5d2ee9 |
@@ -2403,11 +2474,15 @@ int main(int argc, char *argv[]) {
|
|
|
5d2ee9 |
}
|
|
|
5d2ee9 |
}
|
|
|
5d2ee9 |
|
|
|
5d2ee9 |
+ /* Save the original RLIMIT_NOFILE/RLIMIT_MEMLOCK so that we can reset it later when
|
|
|
5d2ee9 |
+ * transitioning from the initrd to the main systemd or suchlike. */
|
|
|
5d2ee9 |
+ save_rlimits(&saved_rlimit_nofile, &saved_rlimit_memlock);
|
|
|
5d2ee9 |
+
|
|
|
5d2ee9 |
/* Reset all signal handlers. */
|
|
|
5d2ee9 |
(void) reset_all_signal_handlers();
|
|
|
5d2ee9 |
(void) ignore_signals(SIGNALS_IGNORE, -1);
|
|
|
5d2ee9 |
|
|
|
5d2ee9 |
- r = load_configuration(argc, argv, &error_message);
|
|
|
5d2ee9 |
+ r = load_configuration(argc, argv, &saved_rlimit_nofile, &saved_rlimit_memlock, &error_message);
|
|
|
5d2ee9 |
if (r < 0)
|
|
|
5d2ee9 |
goto finish;
|
|
|
5d2ee9 |
|
|
|
5d2ee9 |
@@ -2522,6 +2597,8 @@ int main(int argc, char *argv[]) {
|
|
|
5d2ee9 |
}
|
|
|
5d2ee9 |
|
|
|
5d2ee9 |
(void) invoke_main_loop(m,
|
|
|
5d2ee9 |
+ &saved_rlimit_nofile,
|
|
|
5d2ee9 |
+ &saved_rlimit_memlock,
|
|
|
5d2ee9 |
&reexecute,
|
|
|
5d2ee9 |
&retval,
|
|
|
5d2ee9 |
&shutdown_verb,
|