17aa40
From 62678ec1aa02b53cb116b6f7dd72a54bf61153b7 Mon Sep 17 00:00:00 2001
17aa40
From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= <zbyszek@in.waw.pl>
17aa40
Date: Tue, 2 Nov 2021 18:18:21 +0100
17aa40
Subject: [PATCH] procfs-util: fix confusion wrt. quantity limit and maximum
17aa40
 value
17aa40
MIME-Version: 1.0
17aa40
Content-Type: text/plain; charset=UTF-8
17aa40
Content-Transfer-Encoding: 8bit
17aa40
17aa40
From packit/rawhide-arm64 logs:
17aa40
Assertion 'limit >= INT_MAX || get_process_ppid(limit+1, NULL) == -ESRCH' failed at src/test/test-process-util.c:855, function test_get_process_ppid(). Aborting.
17aa40
――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――
17aa40
17aa40
The kernel has a few different limits. In particular kernel.threads-max can be
17aa40
set to some lower value, and kernel.pid_max can be set to a higher value. This
17aa40
is nice because it reduces PID reuse, even if the number of threads that is
17aa40
allowed is limited. But the tests assumed that we cannot have a thread with
17aa40
PID above MIN(kernel.threads-max, kernel.pid_max-1), which is not valid.
17aa40
17aa40
So let's rework the whole thing: let's expose the helpers to read
17aa40
kernel.threads-max and kernel.pid_max, and print what they return in tests.
17aa40
procfs_tasks_get_limit() was something that is only used in tests, and wasn't
17aa40
very well defined, so let's drop it.
17aa40
17aa40
Fixes #21193.
17aa40
17aa40
(cherry picked from commit c3dead53d50e334f2d072a2248256983d6dc9f8c)
17aa40
17aa40
Related: #1977569
17aa40
---
17aa40
 src/basic/procfs-util.c      | 53 +++++++++---------------------------
17aa40
 src/basic/procfs-util.h      |  4 ++-
17aa40
 src/basic/util.c             | 49 +++++++++++++++++++++++----------
17aa40
 src/test/test-process-util.c | 10 +++++--
17aa40
 src/test/test-procfs-util.c  | 37 +++++++++++++++++++------
17aa40
 5 files changed, 88 insertions(+), 65 deletions(-)
17aa40
17aa40
diff --git a/src/basic/procfs-util.c b/src/basic/procfs-util.c
17aa40
index 7aaf95bfce..fa5671dd72 100644
17aa40
--- a/src/basic/procfs-util.c
17aa40
+++ b/src/basic/procfs-util.c
17aa40
@@ -12,54 +12,34 @@
17aa40
 #include "stdio-util.h"
17aa40
 #include "string-util.h"
17aa40
 
17aa40
-int procfs_tasks_get_limit(uint64_t *ret) {
17aa40
+int procfs_get_pid_max(uint64_t *ret) {
17aa40
         _cleanup_free_ char *value = NULL;
17aa40
-        uint64_t pid_max, threads_max;
17aa40
         int r;
17aa40
 
17aa40
         assert(ret);
17aa40
 
17aa40
-        /* So there are two sysctl files that control the system limit of processes:
17aa40
-         *
17aa40
-         * 1. kernel.threads-max: this is probably the sysctl that makes more sense, as it directly puts a limit on
17aa40
-         *    concurrent tasks.
17aa40
-         *
17aa40
-         * 2. kernel.pid_max: this limits the numeric range PIDs can take, and thus indirectly also limits the number
17aa40
-         *    of concurrent threads. AFAICS it's primarily a compatibility concept: some crappy old code used a signed
17aa40
-         *    16bit type for PIDs, hence the kernel provides a way to ensure the PIDs never go beyond INT16_MAX by
17aa40
-         *    default.
17aa40
-         *
17aa40
-         * By default #2 is set to much lower values than #1, hence the limit people come into contact with first, as
17aa40
-         * it's the lowest boundary they need to bump when they want higher number of processes.
17aa40
-         *
17aa40
-         * Also note the weird definition of #2: PIDs assigned will be kept below this value, which means the number of
17aa40
-         * tasks that can be created is one lower, as PID 0 is not a valid process ID. */
17aa40
-
17aa40
         r = read_one_line_file("/proc/sys/kernel/pid_max", &value);
17aa40
         if (r < 0)
17aa40
                 return r;
17aa40
 
17aa40
-        r = safe_atou64(value, &pid_max);
17aa40
-        if (r < 0)
17aa40
-                return r;
17aa40
+        return safe_atou64(value, ret);
17aa40
+}
17aa40
 
17aa40
-        value = mfree(value);
17aa40
-        r = read_one_line_file("/proc/sys/kernel/threads-max", &value);
17aa40
-        if (r < 0)
17aa40
-                return r;
17aa40
+int procfs_get_threads_max(uint64_t *ret) {
17aa40
+        _cleanup_free_ char *value = NULL;
17aa40
+        int r;
17aa40
 
17aa40
-        r = safe_atou64(value, &threads_max);
17aa40
+        assert(ret);
17aa40
+
17aa40
+        r = read_one_line_file("/proc/sys/kernel/threads-max", &value);
17aa40
         if (r < 0)
17aa40
                 return r;
17aa40
 
17aa40
-        /* Subtract one from pid_max, since PID 0 is not a valid PID */
17aa40
-        *ret = MIN(pid_max-1, threads_max);
17aa40
-        return 0;
17aa40
+        return safe_atou64(value, ret);
17aa40
 }
17aa40
 
17aa40
 int procfs_tasks_set_limit(uint64_t limit) {
17aa40
         char buffer[DECIMAL_STR_MAX(uint64_t)+1];
17aa40
-        _cleanup_free_ char *value = NULL;
17aa40
         uint64_t pid_max;
17aa40
         int r;
17aa40
 
17aa40
@@ -74,10 +54,7 @@ int procfs_tasks_set_limit(uint64_t limit) {
17aa40
          * set it to the maximum. */
17aa40
         limit = CLAMP(limit, 20U, TASKS_MAX);
17aa40
 
17aa40
-        r = read_one_line_file("/proc/sys/kernel/pid_max", &value);
17aa40
-        if (r < 0)
17aa40
-                return r;
17aa40
-        r = safe_atou64(value, &pid_max);
17aa40
+        r = procfs_get_pid_max(&pid_max);
17aa40
         if (r < 0)
17aa40
                 return r;
17aa40
 
17aa40
@@ -98,14 +75,10 @@ int procfs_tasks_set_limit(uint64_t limit) {
17aa40
                 /* Hmm, we couldn't write this? If so, maybe it was already set properly? In that case let's not
17aa40
                  * generate an error */
17aa40
 
17aa40
-                value = mfree(value);
17aa40
-                if (read_one_line_file("/proc/sys/kernel/threads-max", &value) < 0)
17aa40
-                        return r; /* return original error */
17aa40
-
17aa40
-                if (safe_atou64(value, &threads_max) < 0)
17aa40
+                if (procfs_get_threads_max(&threads_max) < 0)
17aa40
                         return r; /* return original error */
17aa40
 
17aa40
-                if (MIN(pid_max-1, threads_max) != limit)
17aa40
+                if (MIN(pid_max - 1, threads_max) != limit)
17aa40
                         return r; /* return original error */
17aa40
 
17aa40
                 /* Yay! Value set already matches what we were trying to set, hence consider this a success. */
17aa40
diff --git a/src/basic/procfs-util.h b/src/basic/procfs-util.h
17aa40
index 5a44e9eff7..caaee8b0b6 100644
17aa40
--- a/src/basic/procfs-util.h
17aa40
+++ b/src/basic/procfs-util.h
17aa40
@@ -5,7 +5,9 @@
17aa40
 
17aa40
 #include "time-util.h"
17aa40
 
17aa40
-int procfs_tasks_get_limit(uint64_t *ret);
17aa40
+int procfs_get_pid_max(uint64_t *ret);
17aa40
+int procfs_get_threads_max(uint64_t *ret);
17aa40
+
17aa40
 int procfs_tasks_set_limit(uint64_t limit);
17aa40
 int procfs_tasks_get_current(uint64_t *ret);
17aa40
 
17aa40
diff --git a/src/basic/util.c b/src/basic/util.c
17aa40
index 609f8c2f33..548e3652cc 100644
17aa40
--- a/src/basic/util.c
17aa40
+++ b/src/basic/util.c
17aa40
@@ -527,23 +527,46 @@ uint64_t physical_memory_scale(uint64_t v, uint64_t max) {
17aa40
 }
17aa40
 
17aa40
 uint64_t system_tasks_max(void) {
17aa40
-
17aa40
-        uint64_t a = TASKS_MAX, b = TASKS_MAX;
17aa40
+        uint64_t a = TASKS_MAX, b = TASKS_MAX, c = TASKS_MAX;
17aa40
         _cleanup_free_ char *root = NULL;
17aa40
         int r;
17aa40
 
17aa40
-        /* Determine the maximum number of tasks that may run on this system. We check three sources to determine this
17aa40
-         * limit:
17aa40
+        /* Determine the maximum number of tasks that may run on this system. We check three sources to
17aa40
+         * determine this limit:
17aa40
+         *
17aa40
+         * a) kernel.threads-max sysctl: the maximum number of tasks (threads) the kernel allows.
17aa40
+         *
17aa40
+         *    This puts a direct limit on the number of concurrent tasks.
17aa40
+         *
17aa40
+         * b) kernel.pid_max sysctl: the maximum PID value.
17aa40
+         *
17aa40
+         *    This limits the numeric range PIDs can take, and thus indirectly also limits the number of
17aa40
+         *    concurrent threads. It's primarily a compatibility concept: some crappy old code used a signed
17aa40
+         *    16bit type for PIDs, hence the kernel provides a way to ensure the PIDs never go beyond
17aa40
+         *    INT16_MAX by default.
17aa40
          *
17aa40
-         * a) the maximum tasks value the kernel allows on this architecture
17aa40
-         * b) the cgroups pids_max attribute for the system
17aa40
-         * c) the kernel's configured maximum PID value
17aa40
+         *    Also note the weird definition: PIDs assigned will be kept below this value, which means
17aa40
+         *    the number of tasks that can be created is one lower, as PID 0 is not a valid process ID.
17aa40
          *
17aa40
-         * And then pick the smallest of the three */
17aa40
+         * c) pids.max on the root cgroup: the kernel's configured maximum number of tasks.
17aa40
+         *
17aa40
+         * and then pick the smallest of the three.
17aa40
+         *
17aa40
+         * By default pid_max is set to much lower values than threads-max, hence the limit people come into
17aa40
+         * contact with first, as it's the lowest boundary they need to bump when they want higher number of
17aa40
+         * processes.
17aa40
+         */
17aa40
+
17aa40
+        r = procfs_get_threads_max(&a);
17aa40
+        if (r < 0)
17aa40
+                log_debug_errno(r, "Failed to read kernel.threads-max, ignoring: %m");
17aa40
 
17aa40
-        r = procfs_tasks_get_limit(&a);
17aa40
+        r = procfs_get_pid_max(&b);
17aa40
         if (r < 0)
17aa40
-                log_debug_errno(r, "Failed to read maximum number of tasks from /proc, ignoring: %m");
17aa40
+                log_debug_errno(r, "Failed to read kernel.pid_max, ignoring: %m");
17aa40
+        else if (b > 0)
17aa40
+                /* Subtract one from pid_max, since PID 0 is not a valid PID */
17aa40
+                b--;
17aa40
 
17aa40
         r = cg_get_root_path(&root);
17aa40
         if (r < 0)
17aa40
@@ -555,15 +578,13 @@ uint64_t system_tasks_max(void) {
17aa40
                 if (r < 0)
17aa40
                         log_debug_errno(r, "Failed to read pids.max attribute of cgroup root, ignoring: %m");
17aa40
                 else if (!streq(value, "max")) {
17aa40
-                        r = safe_atou64(value, &b);
17aa40
+                        r = safe_atou64(value, &c);
17aa40
                         if (r < 0)
17aa40
                                 log_debug_errno(r, "Failed to parse pids.max attribute of cgroup root, ignoring: %m");
17aa40
                 }
17aa40
         }
17aa40
 
17aa40
-        return MIN3(TASKS_MAX,
17aa40
-                    a <= 0 ? TASKS_MAX : a,
17aa40
-                    b <= 0 ? TASKS_MAX : b);
17aa40
+        return MIN3(a, b, c);
17aa40
 }
17aa40
 
17aa40
 uint64_t system_tasks_max_scale(uint64_t v, uint64_t max) {
17aa40
diff --git a/src/test/test-process-util.c b/src/test/test-process-util.c
17aa40
index 6b14ff592b..6295889b47 100644
17aa40
--- a/src/test/test-process-util.c
17aa40
+++ b/src/test/test-process-util.c
17aa40
@@ -598,8 +598,14 @@ static void test_get_process_ppid(void) {
17aa40
         assert_se(get_process_ppid(1, NULL) == -EADDRNOTAVAIL);
17aa40
 
17aa40
         /* the process with the PID above the global limit definitely doesn't exist. Verify that */
17aa40
-        assert_se(procfs_tasks_get_limit(&limit) >= 0);
17aa40
-        assert_se(limit >= INT_MAX || get_process_ppid(limit+1, NULL) == -ESRCH);
17aa40
+        assert_se(procfs_get_pid_max(&limit) >= 0);
17aa40
+        log_debug("kernel.pid_max = %"PRIu64, limit);
17aa40
+
17aa40
+        if (limit < INT_MAX) {
17aa40
+                r = get_process_ppid(limit + 1, NULL);
17aa40
+                log_debug_errno(r, "get_process_limit(%"PRIu64") → %d/%m", limit + 1, r);
17aa40
+                assert(r == -ESRCH);
17aa40
+        }
17aa40
 
17aa40
         for (pid_t pid = 0;;) {
17aa40
                 _cleanup_free_ char *c1 = NULL, *c2 = NULL;
17aa40
diff --git a/src/test/test-procfs-util.c b/src/test/test-procfs-util.c
17aa40
index 1d0612985b..bb6943fed0 100644
17aa40
--- a/src/test/test-procfs-util.c
17aa40
+++ b/src/test/test-procfs-util.c
17aa40
@@ -5,11 +5,13 @@
17aa40
 #include "log.h"
17aa40
 #include "parse-util.h"
17aa40
 #include "procfs-util.h"
17aa40
+#include "process-util.h"
17aa40
+#include "util.h"
17aa40
 
17aa40
 int main(int argc, char *argv[]) {
17aa40
         char buf[CONST_MAX(FORMAT_TIMESPAN_MAX, FORMAT_BYTES_MAX)];
17aa40
         nsec_t nsec;
17aa40
-        uint64_t v;
17aa40
+        uint64_t v, w;
17aa40
         int r;
17aa40
 
17aa40
         log_parse_environment();
17aa40
@@ -24,22 +26,41 @@ int main(int argc, char *argv[]) {
17aa40
         assert_se(procfs_tasks_get_current(&v) >= 0);
17aa40
         log_info("Current number of tasks: %" PRIu64, v);
17aa40
 
17aa40
-        assert_se(procfs_tasks_get_limit(&v) >= 0);
17aa40
+        v = TASKS_MAX;
17aa40
+        r = procfs_get_pid_max(&v);
17aa40
+        assert(r >= 0 || r == -ENOENT || ERRNO_IS_PRIVILEGE(r));
17aa40
+        log_info("kernel.pid_max: %"PRIu64, v);
17aa40
+
17aa40
+        w = TASKS_MAX;
17aa40
+        r = procfs_get_threads_max(&w);
17aa40
+        assert(r >= 0 || r == -ENOENT || ERRNO_IS_PRIVILEGE(r));
17aa40
+        log_info("kernel.threads-max: %"PRIu64, w);
17aa40
+
17aa40
+        v = MIN(v - (v > 0), w);
17aa40
+
17aa40
+        assert_se(r >= 0);
17aa40
         log_info("Limit of tasks: %" PRIu64, v);
17aa40
         assert_se(v > 0);
17aa40
-        assert_se(procfs_tasks_set_limit(v) >= 0);
17aa40
+        r = procfs_tasks_set_limit(v);
17aa40
+        if (r == -ENOENT || ERRNO_IS_PRIVILEGE(r)) {
17aa40
+                log_notice_errno(r, "Skipping test: can't set task limits");
17aa40
+                return EXIT_TEST_SKIP;
17aa40
+        }
17aa40
+        assert(r >= 0);
17aa40
 
17aa40
         if (v > 100) {
17aa40
-                uint64_t w;
17aa40
+                log_info("Reducing limit by one to %"PRIu64"…", v-1);
17aa40
+
17aa40
                 r = procfs_tasks_set_limit(v-1);
17aa40
-                assert_se(IN_SET(r, 0, -EPERM, -EACCES, -EROFS));
17aa40
+                log_info_errno(r, "procfs_tasks_set_limit: %m");
17aa40
+                assert_se(r >= 0 || ERRNO_IS_PRIVILEGE(r));
17aa40
 
17aa40
-                assert_se(procfs_tasks_get_limit(&w) >= 0);
17aa40
-                assert_se((r == 0 && w == v - 1) || (r < 0 && w == v));
17aa40
+                assert_se(procfs_get_threads_max(&w) >= 0);
17aa40
+                assert_se(r >= 0 ? w == v - 1 : w == v);
17aa40
 
17aa40
                 assert_se(procfs_tasks_set_limit(v) >= 0);
17aa40
 
17aa40
-                assert_se(procfs_tasks_get_limit(&w) >= 0);
17aa40
+                assert_se(procfs_get_threads_max(&w) >= 0);
17aa40
                 assert_se(v == w);
17aa40
         }
17aa40