b8c242
From 62678ec1aa02b53cb116b6f7dd72a54bf61153b7 Mon Sep 17 00:00:00 2001
b8c242
From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= <zbyszek@in.waw.pl>
b8c242
Date: Tue, 2 Nov 2021 18:18:21 +0100
b8c242
Subject: [PATCH] procfs-util: fix confusion wrt. quantity limit and maximum
b8c242
 value
b8c242
MIME-Version: 1.0
b8c242
Content-Type: text/plain; charset=UTF-8
b8c242
Content-Transfer-Encoding: 8bit
b8c242
b8c242
From packit/rawhide-arm64 logs:
b8c242
Assertion 'limit >= INT_MAX || get_process_ppid(limit+1, NULL) == -ESRCH' failed at src/test/test-process-util.c:855, function test_get_process_ppid(). Aborting.
b8c242
――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――
b8c242
b8c242
The kernel has a few different limits. In particular kernel.threads-max can be
b8c242
set to some lower value, and kernel.pid_max can be set to a higher value. This
b8c242
is nice because it reduces PID reuse, even if the number of threads that is
b8c242
allowed is limited. But the tests assumed that we cannot have a thread with
b8c242
PID above MIN(kernel.threads-max, kernel.pid_max-1), which is not valid.
b8c242
b8c242
So let's rework the whole thing: let's expose the helpers to read
b8c242
kernel.threads-max and kernel.pid_max, and print what they return in tests.
b8c242
procfs_tasks_get_limit() was something that is only used in tests, and wasn't
b8c242
very well defined, so let's drop it.
b8c242
b8c242
Fixes #21193.
b8c242
b8c242
(cherry picked from commit c3dead53d50e334f2d072a2248256983d6dc9f8c)
b8c242
b8c242
Related: #1977569
b8c242
---
b8c242
 src/basic/procfs-util.c      | 53 +++++++++---------------------------
b8c242
 src/basic/procfs-util.h      |  4 ++-
b8c242
 src/basic/util.c             | 49 +++++++++++++++++++++++----------
b8c242
 src/test/test-process-util.c | 10 +++++--
b8c242
 src/test/test-procfs-util.c  | 37 +++++++++++++++++++------
b8c242
 5 files changed, 88 insertions(+), 65 deletions(-)
b8c242
b8c242
diff --git a/src/basic/procfs-util.c b/src/basic/procfs-util.c
b8c242
index 7aaf95bfce..fa5671dd72 100644
b8c242
--- a/src/basic/procfs-util.c
b8c242
+++ b/src/basic/procfs-util.c
b8c242
@@ -12,54 +12,34 @@
b8c242
 #include "stdio-util.h"
b8c242
 #include "string-util.h"
b8c242
 
b8c242
-int procfs_tasks_get_limit(uint64_t *ret) {
b8c242
+int procfs_get_pid_max(uint64_t *ret) {
b8c242
         _cleanup_free_ char *value = NULL;
b8c242
-        uint64_t pid_max, threads_max;
b8c242
         int r;
b8c242
 
b8c242
         assert(ret);
b8c242
 
b8c242
-        /* So there are two sysctl files that control the system limit of processes:
b8c242
-         *
b8c242
-         * 1. kernel.threads-max: this is probably the sysctl that makes more sense, as it directly puts a limit on
b8c242
-         *    concurrent tasks.
b8c242
-         *
b8c242
-         * 2. kernel.pid_max: this limits the numeric range PIDs can take, and thus indirectly also limits the number
b8c242
-         *    of concurrent threads. AFAICS it's primarily a compatibility concept: some crappy old code used a signed
b8c242
-         *    16bit type for PIDs, hence the kernel provides a way to ensure the PIDs never go beyond INT16_MAX by
b8c242
-         *    default.
b8c242
-         *
b8c242
-         * By default #2 is set to much lower values than #1, hence the limit people come into contact with first, as
b8c242
-         * it's the lowest boundary they need to bump when they want higher number of processes.
b8c242
-         *
b8c242
-         * Also note the weird definition of #2: PIDs assigned will be kept below this value, which means the number of
b8c242
-         * tasks that can be created is one lower, as PID 0 is not a valid process ID. */
b8c242
-
b8c242
         r = read_one_line_file("/proc/sys/kernel/pid_max", &value);
b8c242
         if (r < 0)
b8c242
                 return r;
b8c242
 
b8c242
-        r = safe_atou64(value, &pid_max);
b8c242
-        if (r < 0)
b8c242
-                return r;
b8c242
+        return safe_atou64(value, ret);
b8c242
+}
b8c242
 
b8c242
-        value = mfree(value);
b8c242
-        r = read_one_line_file("/proc/sys/kernel/threads-max", &value);
b8c242
-        if (r < 0)
b8c242
-                return r;
b8c242
+int procfs_get_threads_max(uint64_t *ret) {
b8c242
+        _cleanup_free_ char *value = NULL;
b8c242
+        int r;
b8c242
 
b8c242
-        r = safe_atou64(value, &threads_max);
b8c242
+        assert(ret);
b8c242
+
b8c242
+        r = read_one_line_file("/proc/sys/kernel/threads-max", &value);
b8c242
         if (r < 0)
b8c242
                 return r;
b8c242
 
b8c242
-        /* Subtract one from pid_max, since PID 0 is not a valid PID */
b8c242
-        *ret = MIN(pid_max-1, threads_max);
b8c242
-        return 0;
b8c242
+        return safe_atou64(value, ret);
b8c242
 }
b8c242
 
b8c242
 int procfs_tasks_set_limit(uint64_t limit) {
b8c242
         char buffer[DECIMAL_STR_MAX(uint64_t)+1];
b8c242
-        _cleanup_free_ char *value = NULL;
b8c242
         uint64_t pid_max;
b8c242
         int r;
b8c242
 
b8c242
@@ -74,10 +54,7 @@ int procfs_tasks_set_limit(uint64_t limit) {
b8c242
          * set it to the maximum. */
b8c242
         limit = CLAMP(limit, 20U, TASKS_MAX);
b8c242
 
b8c242
-        r = read_one_line_file("/proc/sys/kernel/pid_max", &value);
b8c242
-        if (r < 0)
b8c242
-                return r;
b8c242
-        r = safe_atou64(value, &pid_max);
b8c242
+        r = procfs_get_pid_max(&pid_max);
b8c242
         if (r < 0)
b8c242
                 return r;
b8c242
 
b8c242
@@ -98,14 +75,10 @@ int procfs_tasks_set_limit(uint64_t limit) {
b8c242
                 /* Hmm, we couldn't write this? If so, maybe it was already set properly? In that case let's not
b8c242
                  * generate an error */
b8c242
 
b8c242
-                value = mfree(value);
b8c242
-                if (read_one_line_file("/proc/sys/kernel/threads-max", &value) < 0)
b8c242
-                        return r; /* return original error */
b8c242
-
b8c242
-                if (safe_atou64(value, &threads_max) < 0)
b8c242
+                if (procfs_get_threads_max(&threads_max) < 0)
b8c242
                         return r; /* return original error */
b8c242
 
b8c242
-                if (MIN(pid_max-1, threads_max) != limit)
b8c242
+                if (MIN(pid_max - 1, threads_max) != limit)
b8c242
                         return r; /* return original error */
b8c242
 
b8c242
                 /* Yay! Value set already matches what we were trying to set, hence consider this a success. */
b8c242
diff --git a/src/basic/procfs-util.h b/src/basic/procfs-util.h
b8c242
index 5a44e9eff7..caaee8b0b6 100644
b8c242
--- a/src/basic/procfs-util.h
b8c242
+++ b/src/basic/procfs-util.h
b8c242
@@ -5,7 +5,9 @@
b8c242
 
b8c242
 #include "time-util.h"
b8c242
 
b8c242
-int procfs_tasks_get_limit(uint64_t *ret);
b8c242
+int procfs_get_pid_max(uint64_t *ret);
b8c242
+int procfs_get_threads_max(uint64_t *ret);
b8c242
+
b8c242
 int procfs_tasks_set_limit(uint64_t limit);
b8c242
 int procfs_tasks_get_current(uint64_t *ret);
b8c242
 
b8c242
diff --git a/src/basic/util.c b/src/basic/util.c
b8c242
index 609f8c2f33..548e3652cc 100644
b8c242
--- a/src/basic/util.c
b8c242
+++ b/src/basic/util.c
b8c242
@@ -527,23 +527,46 @@ uint64_t physical_memory_scale(uint64_t v, uint64_t max) {
b8c242
 }
b8c242
 
b8c242
 uint64_t system_tasks_max(void) {
b8c242
-
b8c242
-        uint64_t a = TASKS_MAX, b = TASKS_MAX;
b8c242
+        uint64_t a = TASKS_MAX, b = TASKS_MAX, c = TASKS_MAX;
b8c242
         _cleanup_free_ char *root = NULL;
b8c242
         int r;
b8c242
 
b8c242
-        /* Determine the maximum number of tasks that may run on this system. We check three sources to determine this
b8c242
-         * limit:
b8c242
+        /* Determine the maximum number of tasks that may run on this system. We check three sources to
b8c242
+         * determine this limit:
b8c242
+         *
b8c242
+         * a) kernel.threads-max sysctl: the maximum number of tasks (threads) the kernel allows.
b8c242
+         *
b8c242
+         *    This puts a direct limit on the number of concurrent tasks.
b8c242
+         *
b8c242
+         * b) kernel.pid_max sysctl: the maximum PID value.
b8c242
+         *
b8c242
+         *    This limits the numeric range PIDs can take, and thus indirectly also limits the number of
b8c242
+         *    concurrent threads. It's primarily a compatibility concept: some crappy old code used a signed
b8c242
+         *    16bit type for PIDs, hence the kernel provides a way to ensure the PIDs never go beyond
b8c242
+         *    INT16_MAX by default.
b8c242
          *
b8c242
-         * a) the maximum tasks value the kernel allows on this architecture
b8c242
-         * b) the cgroups pids_max attribute for the system
b8c242
-         * c) the kernel's configured maximum PID value
b8c242
+         *    Also note the weird definition: PIDs assigned will be kept below this value, which means
b8c242
+         *    the number of tasks that can be created is one lower, as PID 0 is not a valid process ID.
b8c242
          *
b8c242
-         * And then pick the smallest of the three */
b8c242
+         * c) pids.max on the root cgroup: the kernel's configured maximum number of tasks.
b8c242
+         *
b8c242
+         * and then pick the smallest of the three.
b8c242
+         *
b8c242
+         * By default pid_max is set to much lower values than threads-max, hence the limit people come into
b8c242
+         * contact with first, as it's the lowest boundary they need to bump when they want higher number of
b8c242
+         * processes.
b8c242
+         */
b8c242
+
b8c242
+        r = procfs_get_threads_max(&a);
b8c242
+        if (r < 0)
b8c242
+                log_debug_errno(r, "Failed to read kernel.threads-max, ignoring: %m");
b8c242
 
b8c242
-        r = procfs_tasks_get_limit(&a);
b8c242
+        r = procfs_get_pid_max(&b);
b8c242
         if (r < 0)
b8c242
-                log_debug_errno(r, "Failed to read maximum number of tasks from /proc, ignoring: %m");
b8c242
+                log_debug_errno(r, "Failed to read kernel.pid_max, ignoring: %m");
b8c242
+        else if (b > 0)
b8c242
+                /* Subtract one from pid_max, since PID 0 is not a valid PID */
b8c242
+                b--;
b8c242
 
b8c242
         r = cg_get_root_path(&root);
b8c242
         if (r < 0)
b8c242
@@ -555,15 +578,13 @@ uint64_t system_tasks_max(void) {
b8c242
                 if (r < 0)
b8c242
                         log_debug_errno(r, "Failed to read pids.max attribute of cgroup root, ignoring: %m");
b8c242
                 else if (!streq(value, "max")) {
b8c242
-                        r = safe_atou64(value, &b);
b8c242
+                        r = safe_atou64(value, &c);
b8c242
                         if (r < 0)
b8c242
                                 log_debug_errno(r, "Failed to parse pids.max attribute of cgroup root, ignoring: %m");
b8c242
                 }
b8c242
         }
b8c242
 
b8c242
-        return MIN3(TASKS_MAX,
b8c242
-                    a <= 0 ? TASKS_MAX : a,
b8c242
-                    b <= 0 ? TASKS_MAX : b);
b8c242
+        return MIN3(a, b, c);
b8c242
 }
b8c242
 
b8c242
 uint64_t system_tasks_max_scale(uint64_t v, uint64_t max) {
b8c242
diff --git a/src/test/test-process-util.c b/src/test/test-process-util.c
b8c242
index 6b14ff592b..6295889b47 100644
b8c242
--- a/src/test/test-process-util.c
b8c242
+++ b/src/test/test-process-util.c
b8c242
@@ -598,8 +598,14 @@ static void test_get_process_ppid(void) {
b8c242
         assert_se(get_process_ppid(1, NULL) == -EADDRNOTAVAIL);
b8c242
 
b8c242
         /* the process with the PID above the global limit definitely doesn't exist. Verify that */
b8c242
-        assert_se(procfs_tasks_get_limit(&limit) >= 0);
b8c242
-        assert_se(limit >= INT_MAX || get_process_ppid(limit+1, NULL) == -ESRCH);
b8c242
+        assert_se(procfs_get_pid_max(&limit) >= 0);
b8c242
+        log_debug("kernel.pid_max = %"PRIu64, limit);
b8c242
+
b8c242
+        if (limit < INT_MAX) {
b8c242
+                r = get_process_ppid(limit + 1, NULL);
b8c242
+                log_debug_errno(r, "get_process_limit(%"PRIu64") → %d/%m", limit + 1, r);
b8c242
+                assert(r == -ESRCH);
b8c242
+        }
b8c242
 
b8c242
         for (pid_t pid = 0;;) {
b8c242
                 _cleanup_free_ char *c1 = NULL, *c2 = NULL;
b8c242
diff --git a/src/test/test-procfs-util.c b/src/test/test-procfs-util.c
b8c242
index 1d0612985b..bb6943fed0 100644
b8c242
--- a/src/test/test-procfs-util.c
b8c242
+++ b/src/test/test-procfs-util.c
b8c242
@@ -5,11 +5,13 @@
b8c242
 #include "log.h"
b8c242
 #include "parse-util.h"
b8c242
 #include "procfs-util.h"
b8c242
+#include "process-util.h"
b8c242
+#include "util.h"
b8c242
 
b8c242
 int main(int argc, char *argv[]) {
b8c242
         char buf[CONST_MAX(FORMAT_TIMESPAN_MAX, FORMAT_BYTES_MAX)];
b8c242
         nsec_t nsec;
b8c242
-        uint64_t v;
b8c242
+        uint64_t v, w;
b8c242
         int r;
b8c242
 
b8c242
         log_parse_environment();
b8c242
@@ -24,22 +26,41 @@ int main(int argc, char *argv[]) {
b8c242
         assert_se(procfs_tasks_get_current(&v) >= 0);
b8c242
         log_info("Current number of tasks: %" PRIu64, v);
b8c242
 
b8c242
-        assert_se(procfs_tasks_get_limit(&v) >= 0);
b8c242
+        v = TASKS_MAX;
b8c242
+        r = procfs_get_pid_max(&v);
b8c242
+        assert(r >= 0 || r == -ENOENT || ERRNO_IS_PRIVILEGE(r));
b8c242
+        log_info("kernel.pid_max: %"PRIu64, v);
b8c242
+
b8c242
+        w = TASKS_MAX;
b8c242
+        r = procfs_get_threads_max(&w);
b8c242
+        assert(r >= 0 || r == -ENOENT || ERRNO_IS_PRIVILEGE(r));
b8c242
+        log_info("kernel.threads-max: %"PRIu64, w);
b8c242
+
b8c242
+        v = MIN(v - (v > 0), w);
b8c242
+
b8c242
+        assert_se(r >= 0);
b8c242
         log_info("Limit of tasks: %" PRIu64, v);
b8c242
         assert_se(v > 0);
b8c242
-        assert_se(procfs_tasks_set_limit(v) >= 0);
b8c242
+        r = procfs_tasks_set_limit(v);
b8c242
+        if (r == -ENOENT || ERRNO_IS_PRIVILEGE(r)) {
b8c242
+                log_notice_errno(r, "Skipping test: can't set task limits");
b8c242
+                return EXIT_TEST_SKIP;
b8c242
+        }
b8c242
+        assert(r >= 0);
b8c242
 
b8c242
         if (v > 100) {
b8c242
-                uint64_t w;
b8c242
+                log_info("Reducing limit by one to %"PRIu64"…", v-1);
b8c242
+
b8c242
                 r = procfs_tasks_set_limit(v-1);
b8c242
-                assert_se(IN_SET(r, 0, -EPERM, -EACCES, -EROFS));
b8c242
+                log_info_errno(r, "procfs_tasks_set_limit: %m");
b8c242
+                assert_se(r >= 0 || ERRNO_IS_PRIVILEGE(r));
b8c242
 
b8c242
-                assert_se(procfs_tasks_get_limit(&w) >= 0);
b8c242
-                assert_se((r == 0 && w == v - 1) || (r < 0 && w == v));
b8c242
+                assert_se(procfs_get_threads_max(&w) >= 0);
b8c242
+                assert_se(r >= 0 ? w == v - 1 : w == v);
b8c242
 
b8c242
                 assert_se(procfs_tasks_set_limit(v) >= 0);
b8c242
 
b8c242
-                assert_se(procfs_tasks_get_limit(&w) >= 0);
b8c242
+                assert_se(procfs_get_threads_max(&w) >= 0);
b8c242
                 assert_se(v == w);
b8c242
         }
b8c242