be0c12
From 62678ec1aa02b53cb116b6f7dd72a54bf61153b7 Mon Sep 17 00:00:00 2001
be0c12
From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= <zbyszek@in.waw.pl>
be0c12
Date: Tue, 2 Nov 2021 18:18:21 +0100
be0c12
Subject: [PATCH] procfs-util: fix confusion wrt. quantity limit and maximum
be0c12
 value
be0c12
MIME-Version: 1.0
be0c12
Content-Type: text/plain; charset=UTF-8
be0c12
Content-Transfer-Encoding: 8bit
be0c12
be0c12
From packit/rawhide-arm64 logs:
be0c12
Assertion 'limit >= INT_MAX || get_process_ppid(limit+1, NULL) == -ESRCH' failed at src/test/test-process-util.c:855, function test_get_process_ppid(). Aborting.
be0c12
――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――
be0c12
be0c12
The kernel has a few different limits. In particular kernel.threads-max can be
be0c12
set to some lower value, and kernel.pid_max can be set to a higher value. This
be0c12
is nice because it reduces PID reuse, even if the number of threads that is
be0c12
allowed is limited. But the tests assumed that we cannot have a thread with
be0c12
PID above MIN(kernel.threads-max, kernel.pid_max-1), which is not valid.
be0c12
be0c12
So let's rework the whole thing: let's expose the helpers to read
be0c12
kernel.threads-max and kernel.pid_max, and print what they return in tests.
be0c12
procfs_tasks_get_limit() was something that is only used in tests, and wasn't
be0c12
very well defined, so let's drop it.
be0c12
be0c12
Fixes #21193.
be0c12
be0c12
(cherry picked from commit c3dead53d50e334f2d072a2248256983d6dc9f8c)
be0c12
be0c12
Related: #1977569
be0c12
---
be0c12
 src/basic/procfs-util.c      | 53 +++++++++---------------------------
be0c12
 src/basic/procfs-util.h      |  4 ++-
be0c12
 src/basic/util.c             | 49 +++++++++++++++++++++++----------
be0c12
 src/test/test-process-util.c | 10 +++++--
be0c12
 src/test/test-procfs-util.c  | 37 +++++++++++++++++++------
be0c12
 5 files changed, 88 insertions(+), 65 deletions(-)
be0c12
be0c12
diff --git a/src/basic/procfs-util.c b/src/basic/procfs-util.c
be0c12
index 7aaf95bfce..fa5671dd72 100644
be0c12
--- a/src/basic/procfs-util.c
be0c12
+++ b/src/basic/procfs-util.c
be0c12
@@ -12,54 +12,34 @@
be0c12
 #include "stdio-util.h"
be0c12
 #include "string-util.h"
be0c12
 
be0c12
-int procfs_tasks_get_limit(uint64_t *ret) {
be0c12
+int procfs_get_pid_max(uint64_t *ret) {
be0c12
         _cleanup_free_ char *value = NULL;
be0c12
-        uint64_t pid_max, threads_max;
be0c12
         int r;
be0c12
 
be0c12
         assert(ret);
be0c12
 
be0c12
-        /* So there are two sysctl files that control the system limit of processes:
be0c12
-         *
be0c12
-         * 1. kernel.threads-max: this is probably the sysctl that makes more sense, as it directly puts a limit on
be0c12
-         *    concurrent tasks.
be0c12
-         *
be0c12
-         * 2. kernel.pid_max: this limits the numeric range PIDs can take, and thus indirectly also limits the number
be0c12
-         *    of concurrent threads. AFAICS it's primarily a compatibility concept: some crappy old code used a signed
be0c12
-         *    16bit type for PIDs, hence the kernel provides a way to ensure the PIDs never go beyond INT16_MAX by
be0c12
-         *    default.
be0c12
-         *
be0c12
-         * By default #2 is set to much lower values than #1, hence the limit people come into contact with first, as
be0c12
-         * it's the lowest boundary they need to bump when they want higher number of processes.
be0c12
-         *
be0c12
-         * Also note the weird definition of #2: PIDs assigned will be kept below this value, which means the number of
be0c12
-         * tasks that can be created is one lower, as PID 0 is not a valid process ID. */
be0c12
-
be0c12
         r = read_one_line_file("/proc/sys/kernel/pid_max", &value);
be0c12
         if (r < 0)
be0c12
                 return r;
be0c12
 
be0c12
-        r = safe_atou64(value, &pid_max);
be0c12
-        if (r < 0)
be0c12
-                return r;
be0c12
+        return safe_atou64(value, ret);
be0c12
+}
be0c12
 
be0c12
-        value = mfree(value);
be0c12
-        r = read_one_line_file("/proc/sys/kernel/threads-max", &value);
be0c12
-        if (r < 0)
be0c12
-                return r;
be0c12
+int procfs_get_threads_max(uint64_t *ret) {
be0c12
+        _cleanup_free_ char *value = NULL;
be0c12
+        int r;
be0c12
 
be0c12
-        r = safe_atou64(value, &threads_max);
be0c12
+        assert(ret);
be0c12
+
be0c12
+        r = read_one_line_file("/proc/sys/kernel/threads-max", &value);
be0c12
         if (r < 0)
be0c12
                 return r;
be0c12
 
be0c12
-        /* Subtract one from pid_max, since PID 0 is not a valid PID */
be0c12
-        *ret = MIN(pid_max-1, threads_max);
be0c12
-        return 0;
be0c12
+        return safe_atou64(value, ret);
be0c12
 }
be0c12
 
be0c12
 int procfs_tasks_set_limit(uint64_t limit) {
be0c12
         char buffer[DECIMAL_STR_MAX(uint64_t)+1];
be0c12
-        _cleanup_free_ char *value = NULL;
be0c12
         uint64_t pid_max;
be0c12
         int r;
be0c12
 
be0c12
@@ -74,10 +54,7 @@ int procfs_tasks_set_limit(uint64_t limit) {
be0c12
          * set it to the maximum. */
be0c12
         limit = CLAMP(limit, 20U, TASKS_MAX);
be0c12
 
be0c12
-        r = read_one_line_file("/proc/sys/kernel/pid_max", &value);
be0c12
-        if (r < 0)
be0c12
-                return r;
be0c12
-        r = safe_atou64(value, &pid_max);
be0c12
+        r = procfs_get_pid_max(&pid_max);
be0c12
         if (r < 0)
be0c12
                 return r;
be0c12
 
be0c12
@@ -98,14 +75,10 @@ int procfs_tasks_set_limit(uint64_t limit) {
be0c12
                 /* Hmm, we couldn't write this? If so, maybe it was already set properly? In that case let's not
be0c12
                  * generate an error */
be0c12
 
be0c12
-                value = mfree(value);
be0c12
-                if (read_one_line_file("/proc/sys/kernel/threads-max", &value) < 0)
be0c12
-                        return r; /* return original error */
be0c12
-
be0c12
-                if (safe_atou64(value, &threads_max) < 0)
be0c12
+                if (procfs_get_threads_max(&threads_max) < 0)
be0c12
                         return r; /* return original error */
be0c12
 
be0c12
-                if (MIN(pid_max-1, threads_max) != limit)
be0c12
+                if (MIN(pid_max - 1, threads_max) != limit)
be0c12
                         return r; /* return original error */
be0c12
 
be0c12
                 /* Yay! Value set already matches what we were trying to set, hence consider this a success. */
be0c12
diff --git a/src/basic/procfs-util.h b/src/basic/procfs-util.h
be0c12
index 5a44e9eff7..caaee8b0b6 100644
be0c12
--- a/src/basic/procfs-util.h
be0c12
+++ b/src/basic/procfs-util.h
be0c12
@@ -5,7 +5,9 @@
be0c12
 
be0c12
 #include "time-util.h"
be0c12
 
be0c12
-int procfs_tasks_get_limit(uint64_t *ret);
be0c12
+int procfs_get_pid_max(uint64_t *ret);
be0c12
+int procfs_get_threads_max(uint64_t *ret);
be0c12
+
be0c12
 int procfs_tasks_set_limit(uint64_t limit);
be0c12
 int procfs_tasks_get_current(uint64_t *ret);
be0c12
 
be0c12
diff --git a/src/basic/util.c b/src/basic/util.c
be0c12
index 609f8c2f33..548e3652cc 100644
be0c12
--- a/src/basic/util.c
be0c12
+++ b/src/basic/util.c
be0c12
@@ -527,23 +527,46 @@ uint64_t physical_memory_scale(uint64_t v, uint64_t max) {
be0c12
 }
be0c12
 
be0c12
 uint64_t system_tasks_max(void) {
be0c12
-
be0c12
-        uint64_t a = TASKS_MAX, b = TASKS_MAX;
be0c12
+        uint64_t a = TASKS_MAX, b = TASKS_MAX, c = TASKS_MAX;
be0c12
         _cleanup_free_ char *root = NULL;
be0c12
         int r;
be0c12
 
be0c12
-        /* Determine the maximum number of tasks that may run on this system. We check three sources to determine this
be0c12
-         * limit:
be0c12
+        /* Determine the maximum number of tasks that may run on this system. We check three sources to
be0c12
+         * determine this limit:
be0c12
+         *
be0c12
+         * a) kernel.threads-max sysctl: the maximum number of tasks (threads) the kernel allows.
be0c12
+         *
be0c12
+         *    This puts a direct limit on the number of concurrent tasks.
be0c12
+         *
be0c12
+         * b) kernel.pid_max sysctl: the maximum PID value.
be0c12
+         *
be0c12
+         *    This limits the numeric range PIDs can take, and thus indirectly also limits the number of
be0c12
+         *    concurrent threads. It's primarily a compatibility concept: some crappy old code used a signed
be0c12
+         *    16bit type for PIDs, hence the kernel provides a way to ensure the PIDs never go beyond
be0c12
+         *    INT16_MAX by default.
be0c12
          *
be0c12
-         * a) the maximum tasks value the kernel allows on this architecture
be0c12
-         * b) the cgroups pids_max attribute for the system
be0c12
-         * c) the kernel's configured maximum PID value
be0c12
+         *    Also note the weird definition: PIDs assigned will be kept below this value, which means
be0c12
+         *    the number of tasks that can be created is one lower, as PID 0 is not a valid process ID.
be0c12
          *
be0c12
-         * And then pick the smallest of the three */
be0c12
+         * c) pids.max on the root cgroup: the kernel's configured maximum number of tasks.
be0c12
+         *
be0c12
+         * and then pick the smallest of the three.
be0c12
+         *
be0c12
+         * By default pid_max is set to much lower values than threads-max, hence the limit people come into
be0c12
+         * contact with first, as it's the lowest boundary they need to bump when they want higher number of
be0c12
+         * processes.
be0c12
+         */
be0c12
+
be0c12
+        r = procfs_get_threads_max(&a);
be0c12
+        if (r < 0)
be0c12
+                log_debug_errno(r, "Failed to read kernel.threads-max, ignoring: %m");
be0c12
 
be0c12
-        r = procfs_tasks_get_limit(&a);
be0c12
+        r = procfs_get_pid_max(&b);
be0c12
         if (r < 0)
be0c12
-                log_debug_errno(r, "Failed to read maximum number of tasks from /proc, ignoring: %m");
be0c12
+                log_debug_errno(r, "Failed to read kernel.pid_max, ignoring: %m");
be0c12
+        else if (b > 0)
be0c12
+                /* Subtract one from pid_max, since PID 0 is not a valid PID */
be0c12
+                b--;
be0c12
 
be0c12
         r = cg_get_root_path(&root);
be0c12
         if (r < 0)
be0c12
@@ -555,15 +578,13 @@ uint64_t system_tasks_max(void) {
be0c12
                 if (r < 0)
be0c12
                         log_debug_errno(r, "Failed to read pids.max attribute of cgroup root, ignoring: %m");
be0c12
                 else if (!streq(value, "max")) {
be0c12
-                        r = safe_atou64(value, &b);
be0c12
+                        r = safe_atou64(value, &c);
be0c12
                         if (r < 0)
be0c12
                                 log_debug_errno(r, "Failed to parse pids.max attribute of cgroup root, ignoring: %m");
be0c12
                 }
be0c12
         }
be0c12
 
be0c12
-        return MIN3(TASKS_MAX,
be0c12
-                    a <= 0 ? TASKS_MAX : a,
be0c12
-                    b <= 0 ? TASKS_MAX : b);
be0c12
+        return MIN3(a, b, c);
be0c12
 }
be0c12
 
be0c12
 uint64_t system_tasks_max_scale(uint64_t v, uint64_t max) {
be0c12
diff --git a/src/test/test-process-util.c b/src/test/test-process-util.c
be0c12
index 6b14ff592b..6295889b47 100644
be0c12
--- a/src/test/test-process-util.c
be0c12
+++ b/src/test/test-process-util.c
be0c12
@@ -598,8 +598,14 @@ static void test_get_process_ppid(void) {
be0c12
         assert_se(get_process_ppid(1, NULL) == -EADDRNOTAVAIL);
be0c12
 
be0c12
         /* the process with the PID above the global limit definitely doesn't exist. Verify that */
be0c12
-        assert_se(procfs_tasks_get_limit(&limit) >= 0);
be0c12
-        assert_se(limit >= INT_MAX || get_process_ppid(limit+1, NULL) == -ESRCH);
be0c12
+        assert_se(procfs_get_pid_max(&limit) >= 0);
be0c12
+        log_debug("kernel.pid_max = %"PRIu64, limit);
be0c12
+
be0c12
+        if (limit < INT_MAX) {
be0c12
+                r = get_process_ppid(limit + 1, NULL);
be0c12
+                log_debug_errno(r, "get_process_limit(%"PRIu64") → %d/%m", limit + 1, r);
be0c12
+                assert(r == -ESRCH);
be0c12
+        }
be0c12
 
be0c12
         for (pid_t pid = 0;;) {
be0c12
                 _cleanup_free_ char *c1 = NULL, *c2 = NULL;
be0c12
diff --git a/src/test/test-procfs-util.c b/src/test/test-procfs-util.c
be0c12
index 1d0612985b..bb6943fed0 100644
be0c12
--- a/src/test/test-procfs-util.c
be0c12
+++ b/src/test/test-procfs-util.c
be0c12
@@ -5,11 +5,13 @@
be0c12
 #include "log.h"
be0c12
 #include "parse-util.h"
be0c12
 #include "procfs-util.h"
be0c12
+#include "process-util.h"
be0c12
+#include "util.h"
be0c12
 
be0c12
 int main(int argc, char *argv[]) {
be0c12
         char buf[CONST_MAX(FORMAT_TIMESPAN_MAX, FORMAT_BYTES_MAX)];
be0c12
         nsec_t nsec;
be0c12
-        uint64_t v;
be0c12
+        uint64_t v, w;
be0c12
         int r;
be0c12
 
be0c12
         log_parse_environment();
be0c12
@@ -24,22 +26,41 @@ int main(int argc, char *argv[]) {
be0c12
         assert_se(procfs_tasks_get_current(&v) >= 0);
be0c12
         log_info("Current number of tasks: %" PRIu64, v);
be0c12
 
be0c12
-        assert_se(procfs_tasks_get_limit(&v) >= 0);
be0c12
+        v = TASKS_MAX;
be0c12
+        r = procfs_get_pid_max(&v);
be0c12
+        assert(r >= 0 || r == -ENOENT || ERRNO_IS_PRIVILEGE(r));
be0c12
+        log_info("kernel.pid_max: %"PRIu64, v);
be0c12
+
be0c12
+        w = TASKS_MAX;
be0c12
+        r = procfs_get_threads_max(&w);
be0c12
+        assert(r >= 0 || r == -ENOENT || ERRNO_IS_PRIVILEGE(r));
be0c12
+        log_info("kernel.threads-max: %"PRIu64, w);
be0c12
+
be0c12
+        v = MIN(v - (v > 0), w);
be0c12
+
be0c12
+        assert_se(r >= 0);
be0c12
         log_info("Limit of tasks: %" PRIu64, v);
be0c12
         assert_se(v > 0);
be0c12
-        assert_se(procfs_tasks_set_limit(v) >= 0);
be0c12
+        r = procfs_tasks_set_limit(v);
be0c12
+        if (r == -ENOENT || ERRNO_IS_PRIVILEGE(r)) {
be0c12
+                log_notice_errno(r, "Skipping test: can't set task limits");
be0c12
+                return EXIT_TEST_SKIP;
be0c12
+        }
be0c12
+        assert(r >= 0);
be0c12
 
be0c12
         if (v > 100) {
be0c12
-                uint64_t w;
be0c12
+                log_info("Reducing limit by one to %"PRIu64"…", v-1);
be0c12
+
be0c12
                 r = procfs_tasks_set_limit(v-1);
be0c12
-                assert_se(IN_SET(r, 0, -EPERM, -EACCES, -EROFS));
be0c12
+                log_info_errno(r, "procfs_tasks_set_limit: %m");
be0c12
+                assert_se(r >= 0 || ERRNO_IS_PRIVILEGE(r));
be0c12
 
be0c12
-                assert_se(procfs_tasks_get_limit(&w) >= 0);
be0c12
-                assert_se((r == 0 && w == v - 1) || (r < 0 && w == v));
be0c12
+                assert_se(procfs_get_threads_max(&w) >= 0);
be0c12
+                assert_se(r >= 0 ? w == v - 1 : w == v);
be0c12
 
be0c12
                 assert_se(procfs_tasks_set_limit(v) >= 0);
be0c12
 
be0c12
-                assert_se(procfs_tasks_get_limit(&w) >= 0);
be0c12
+                assert_se(procfs_get_threads_max(&w) >= 0);
be0c12
                 assert_se(v == w);
be0c12
         }
be0c12