| From 77a273e02c1c811485d13ddca0f844512aed2cff Mon Sep 17 00:00:00 2001 |
| From: Jan Synacek <jsynacek@redhat.com> |
| Date: Wed, 12 Feb 2020 12:58:54 +0100 |
| Subject: [PATCH] pid1: make sure to restore correct default values for some |
| rlimits |
| |
| Commit fb39af4ce42d7ef9af63009f271f404038703704 forgot to restore the default |
| rlimit values (RLIMIT_NOFILE and RLIMIT_MEMLOCK) while PID1 is reloading. |
| |
| This patch extracts the code in charge of initializing the default values for |
| those rlimits in order to create dedicated functions, which take care of their |
| initialization. |
| |
| These functions are then called in parse_configuration() so we make sure that |
| the default values for these rlimits get restored every time PID1 is reloading |
| its configuration. |
| |
| (cherry picked from commit a9fd4cd1206832a61aaf61fff583bb133e6cb965) |
| Resolves: #1789930 |
| |
| src/core/main.c | 135 +++++++++++++++++++++++++++++++++++++----------- |
| 1 file changed, 106 insertions(+), 29 deletions(-) |
| |
| diff --git a/src/core/main.c b/src/core/main.c |
| index c83249a8dc..b8c1e567ad 100644 |
| |
| |
| @@ -136,7 +136,8 @@ static EmergencyAction arg_cad_burst_action; |
| static CPUSet arg_cpu_affinity; |
| static NUMAPolicy arg_numa_policy; |
| |
| -static int parse_configuration(void); |
| +static int parse_configuration(const struct rlimit *saved_rlimit_nofile, |
| + const struct rlimit *saved_rlimit_memlock); |
| |
| _noreturn_ static void freeze_or_reboot(void) { |
| |
| @@ -1149,25 +1150,6 @@ static int prepare_reexecute(Manager *m, FILE **_f, FDSet **_fds, bool switching |
| static int bump_rlimit_nofile(struct rlimit *saved_rlimit) { |
| int r, nr; |
| |
| - assert(saved_rlimit); |
| - |
| - /* Save the original RLIMIT_NOFILE so that we can reset it |
| - * later when transitioning from the initrd to the main |
| - * systemd or suchlike. */ |
| - if (getrlimit(RLIMIT_NOFILE, saved_rlimit) < 0) |
| - return log_warning_errno(errno, "Reading RLIMIT_NOFILE failed, ignoring: %m"); |
| - |
| - /* Make sure forked processes get the default kernel setting */ |
| - if (!arg_default_rlimit[RLIMIT_NOFILE]) { |
| - struct rlimit *rl; |
| - |
| - rl = newdup(struct rlimit, saved_rlimit, 1); |
| - if (!rl) |
| - return log_oom(); |
| - |
| - arg_default_rlimit[RLIMIT_NOFILE] = rl; |
| - } |
| - |
| /* Bump up the resource limit for ourselves substantially, all the way to the maximum the kernel allows */ |
| nr = read_nr_open(); |
| r = setrlimit_closest(RLIMIT_NOFILE, &RLIMIT_MAKE_CONST(nr)); |
| @@ -1180,16 +1162,12 @@ static int bump_rlimit_nofile(struct rlimit *saved_rlimit) { |
| static int bump_rlimit_memlock(struct rlimit *saved_rlimit) { |
| int r; |
| |
| - assert(saved_rlimit); |
| assert(getuid() == 0); |
| |
| /* BPF_MAP_TYPE_LPM_TRIE bpf maps are charged against RLIMIT_MEMLOCK, even though we have CAP_IPC_LOCK which |
| * should normally disable such checks. We need them to implement IPAccessAllow= and IPAccessDeny=, hence let's |
| * bump the value high enough for the root user. */ |
| |
| - if (getrlimit(RLIMIT_MEMLOCK, saved_rlimit) < 0) |
| - return log_warning_errno(errno, "Reading RLIMIT_MEMLOCK failed, ignoring: %m"); |
| - |
| r = setrlimit_closest(RLIMIT_MEMLOCK, &RLIMIT_MAKE_CONST(1024ULL*1024ULL*16ULL)); |
| if (r < 0) |
| return log_warning_errno(r, "Setting RLIMIT_MEMLOCK failed, ignoring: %m"); |
| @@ -1651,6 +1629,8 @@ static void do_reexecute( |
| |
| static int invoke_main_loop( |
| Manager *m, |
| + const struct rlimit *saved_rlimit_nofile, |
| + const struct rlimit *saved_rlimit_memlock, |
| bool *ret_reexecute, |
| int *ret_retval, /* Return parameters relevant for shutting down */ |
| const char **ret_shutdown_verb, /* … */ |
| @@ -1662,6 +1642,8 @@ static int invoke_main_loop( |
| int r; |
| |
| assert(m); |
| + assert(saved_rlimit_nofile); |
| + assert(saved_rlimit_memlock); |
| assert(ret_reexecute); |
| assert(ret_retval); |
| assert(ret_shutdown_verb); |
| @@ -1691,7 +1673,7 @@ static int invoke_main_loop( |
| saved_log_level = m->log_level_overridden ? log_get_max_level() : -1; |
| saved_log_target = m->log_target_overridden ? log_get_target() : _LOG_TARGET_INVALID; |
| |
| - (void) parse_configuration(); |
| + (void) parse_configuration(saved_rlimit_nofile, saved_rlimit_memlock); |
| |
| set_manager_defaults(m); |
| |
| @@ -1983,6 +1965,80 @@ static int do_queue_default_job( |
| return 0; |
| } |
| |
| +static void save_rlimits(struct rlimit *saved_rlimit_nofile, |
| + struct rlimit *saved_rlimit_memlock) { |
| + |
| + assert(saved_rlimit_nofile); |
| + assert(saved_rlimit_memlock); |
| + |
| + if (getrlimit(RLIMIT_NOFILE, saved_rlimit_nofile) < 0) |
| + log_warning_errno(errno, "Reading RLIMIT_NOFILE failed, ignoring: %m"); |
| + |
| + if (getrlimit(RLIMIT_MEMLOCK, saved_rlimit_memlock) < 0) |
| + log_warning_errno(errno, "Reading RLIMIT_MEMLOCK failed, ignoring: %m"); |
| +} |
| + |
| +static void fallback_rlimit_nofile(const struct rlimit *saved_rlimit_nofile) { |
| + struct rlimit *rl; |
| + |
| + if (arg_default_rlimit[RLIMIT_NOFILE]) |
| + return; |
| + |
| + /* Make sure forked processes get limits based on the original kernel setting */ |
| + |
| + rl = newdup(struct rlimit, saved_rlimit_nofile, 1); |
| + if (!rl) { |
| + log_oom(); |
| + return; |
| + } |
| + |
| + /* Bump the hard limit for system services to a substantially higher value. The default |
| + * hard limit current kernels set is pretty low (4K), mostly for historical |
| + * reasons. According to kernel developers, the fd handling in recent kernels has been |
| + * optimized substantially enough, so that we can bump the limit now, without paying too |
| + * high a price in memory or performance. Note however that we only bump the hard limit, |
| + * not the soft limit. That's because select() works the way it works, and chokes on fds |
| + * >= 1024. If we'd bump the soft limit globally, it might accidentally happen to |
| + * unexpecting programs that they get fds higher than what they can process using |
| + * select(). By only bumping the hard limit but leaving the low limit as it is we avoid |
| + * this pitfall: programs that are written by folks aware of the select() problem in mind |
| + * (and thus use poll()/epoll instead of select(), the way everybody should) can |
| + * explicitly opt into high fds by bumping their soft limit beyond 1024, to the hard limit |
| + * we pass. */ |
| + if (arg_system) { |
| + int nr; |
| + |
| + /* Get the underlying absolute limit the kernel enforces */ |
| + nr = read_nr_open(); |
| + |
| + rl->rlim_max = MIN((rlim_t) nr, MAX(rl->rlim_max, (rlim_t) HIGH_RLIMIT_NOFILE)); |
| + } |
| + |
| + /* If for some reason we were invoked with a soft limit above 1024 (which should never |
| + * happen!, but who knows what we get passed in from pam_limit when invoked as --user |
| + * instance), then lower what we pass on to not confuse our children */ |
| + rl->rlim_cur = MIN(rl->rlim_cur, (rlim_t) FD_SETSIZE); |
| + |
| + arg_default_rlimit[RLIMIT_NOFILE] = rl; |
| +} |
| + |
| +static void fallback_rlimit_memlock(const struct rlimit *saved_rlimit_memlock) { |
| + struct rlimit *rl; |
| + |
| + /* Pass the original value down to invoked processes */ |
| + |
| + if (arg_default_rlimit[RLIMIT_MEMLOCK]) |
| + return; |
| + |
| + rl = newdup(struct rlimit, saved_rlimit_memlock, 1); |
| + if (!rl) { |
| + log_oom(); |
| + return; |
| + } |
| + |
| + arg_default_rlimit[RLIMIT_MEMLOCK] = rl; |
| +} |
| + |
| static void reset_arguments(void) { |
| /* Frees/resets arg_* variables, with a few exceptions commented below. */ |
| |
| @@ -2040,9 +2096,13 @@ static void reset_arguments(void) { |
| numa_policy_reset(&arg_numa_policy); |
| } |
| |
| -static int parse_configuration(void) { |
| +static int parse_configuration(const struct rlimit *saved_rlimit_nofile, |
| + const struct rlimit *saved_rlimit_memlock) { |
| int r; |
| |
| + assert(saved_rlimit_nofile); |
| + assert(saved_rlimit_memlock); |
| + |
| arg_default_tasks_max = system_tasks_max_scale(DEFAULT_TASKS_MAX_PERCENTAGE, 100U); |
| |
| /* Assign configuration defaults */ |
| @@ -2058,18 +2118,29 @@ static int parse_configuration(void) { |
| log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m"); |
| } |
| |
| + /* Initialize some default rlimits for services if they haven't been configured */ |
| + fallback_rlimit_nofile(saved_rlimit_nofile); |
| + fallback_rlimit_memlock(saved_rlimit_memlock); |
| + |
| /* Note that this also parses bits from the kernel command line, including "debug". */ |
| log_parse_environment(); |
| |
| return 0; |
| } |
| |
| -static int load_configuration(int argc, char **argv, const char **ret_error_message) { |
| +static int load_configuration( |
| + int argc, |
| + char **argv, |
| + const struct rlimit *saved_rlimit_nofile, |
| + const struct rlimit *saved_rlimit_memlock, |
| + const char **ret_error_message) { |
| int r; |
| |
| + assert(saved_rlimit_nofile); |
| + assert(saved_rlimit_memlock); |
| assert(ret_error_message); |
| |
| - (void) parse_configuration(); |
| + (void) parse_configuration(saved_rlimit_nofile, saved_rlimit_memlock); |
| |
| r = parse_argv(argc, argv); |
| if (r < 0) { |
| @@ -2403,11 +2474,15 @@ int main(int argc, char *argv[]) { |
| } |
| } |
| |
| + /* Save the original RLIMIT_NOFILE/RLIMIT_MEMLOCK so that we can reset it later when |
| + * transitioning from the initrd to the main systemd or suchlike. */ |
| + save_rlimits(&saved_rlimit_nofile, &saved_rlimit_memlock); |
| + |
| /* Reset all signal handlers. */ |
| (void) reset_all_signal_handlers(); |
| (void) ignore_signals(SIGNALS_IGNORE, -1); |
| |
| - r = load_configuration(argc, argv, &error_message); |
| + r = load_configuration(argc, argv, &saved_rlimit_nofile, &saved_rlimit_memlock, &error_message); |
| if (r < 0) |
| goto finish; |
| |
| @@ -2522,6 +2597,8 @@ int main(int argc, char *argv[]) { |
| } |
| |
| (void) invoke_main_loop(m, |
| + &saved_rlimit_nofile, |
| + &saved_rlimit_memlock, |
| &reexecute, |
| &retval, |
| &shutdown_verb, |