803fb7
From 7ec6e537898e139cc33017e03465ef40a86dd433 Mon Sep 17 00:00:00 2001
803fb7
From: Lennart Poettering <lennart@poettering.net>
803fb7
Date: Tue, 19 Jul 2016 15:58:49 +0200
803fb7
Subject: [PATCH] core: support percentage specifications on TasksMax=
803fb7
803fb7
This adds support for a TasksMax=40% syntax for specifying values relative to
803fb7
the system's configured maximum number of processes. This is useful in order to
803fb7
neatly subdivide the available room for tasks within containers.
803fb7
803fb7
Cherry-picked from: 83f8e80857090f63cf6a02c54d381dad3c0fad55
803fb7
Related: #1337244
803fb7
---
de8967
 man/systemd.resource-control.xml | 15 +++---
de8967
 src/core/dbus-cgroup.c           | 22 +++++++++
de8967
 src/core/load-fragment.c         | 15 ++++--
de8967
 src/libsystemd/sd-bus/bus-util.c | 19 ++++++++
de8967
 src/shared/util.c                | 84 ++++++++++++++++++++++++++++++++
de8967
 src/shared/util.h                |  5 ++
de8967
 src/test/test-util.c             | 39 +++++++++++++++
803fb7
 7 files changed, 187 insertions(+), 12 deletions(-)
803fb7
803fb7
diff --git a/man/systemd.resource-control.xml b/man/systemd.resource-control.xml
803fb7
index 217105ee5..f507c6748 100644
803fb7
--- a/man/systemd.resource-control.xml
803fb7
+++ b/man/systemd.resource-control.xml
803fb7
@@ -221,15 +221,12 @@
803fb7
         <term><varname>TasksMax=<replaceable>N</replaceable></varname></term>
803fb7
 
803fb7
         <listitem>
803fb7
-          <para>Specify the maximum number of tasks that may be
803fb7
-          created in the unit. This ensures that the number of tasks
803fb7
-          accounted for the unit (see above) stays below a specific
803fb7
-          limit. If assigned the special value
803fb7
-          <literal>infinity</literal> no tasks limit is applied. This
803fb7
-          controls the <literal>pids.max</literal> control group
803fb7
-          attribute. For details about this control group attribute,
803fb7
-          see 
803fb7
-          url="https://www.kernel.org/doc/Documentation/cgroups/pids.txt">pids.txt</ulink>.</para>
803fb7
+          <para>Specify the maximum number of tasks that may be created in the unit. This ensures that the number of
803fb7
+          tasks accounted for the unit (see above) stays below a specific limit. This either takes an absolute number
803fb7
+          of tasks or a percentage value that is taken relative to the configured maximum number of tasks on the
803fb7
+          system.  If assigned the special value <literal>infinity</literal>, no tasks limit is applied. This controls
803fb7
+          the <literal>pids.max</literal> control group attribute. For details about this control group attribute, see
803fb7
+          <ulink url="https://www.kernel.org/doc/Documentation/cgroup-v1/pids.txt">pids.txt</ulink>.</para>
803fb7
 
803fb7
           <para>Implies <literal>TasksAccounting=true</literal>. The
803fb7
           system default for this setting may be controlled with
803fb7
diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c
803fb7
index a4465dc7a..fa76c60c1 100644
803fb7
--- a/src/core/dbus-cgroup.c
803fb7
+++ b/src/core/dbus-cgroup.c
803fb7
@@ -694,6 +694,8 @@ int bus_cgroup_set_property(
803fb7
                 r = sd_bus_message_read(message, "t", &limit);
803fb7
                 if (r < 0)
803fb7
                         return r;
803fb7
+                if (limit <= 0)
803fb7
+                        return sd_bus_error_set_errnof(error, EINVAL, "%s= is too small", name);
803fb7
 
803fb7
                 if (mode != UNIT_CHECK) {
803fb7
                         c->tasks_max = limit;
803fb7
@@ -705,6 +707,26 @@ int bus_cgroup_set_property(
803fb7
                                 unit_write_drop_in_private_format(u, mode, name, "TasksMax=%" PRIu64, limit);
803fb7
                 }
803fb7
 
803fb7
+                return 1;
803fb7
+        } else if (streq(name, "TasksMaxScale")) {
803fb7
+                uint64_t limit;
803fb7
+                uint32_t raw;
803fb7
+
803fb7
+                r = sd_bus_message_read(message, "u", &raw;;
803fb7
+                if (r < 0)
803fb7
+                        return r;
803fb7
+
803fb7
+                limit = system_tasks_max_scale(raw, UINT32_MAX);
803fb7
+                if (limit <= 0 || limit >= UINT64_MAX)
803fb7
+                        return sd_bus_error_set_errnof(error, EINVAL, "%s= is out of range", name);
803fb7
+
803fb7
+                if (mode != UNIT_CHECK) {
803fb7
+                        c->tasks_max = limit;
803fb7
+                        u->cgroup_realized_mask &= ~CGROUP_PIDS;
803fb7
+                        unit_write_drop_in_private_format(u, mode, name, "TasksMax=%" PRIu32 "%%",
803fb7
+                                                          (uint32_t) (DIV_ROUND_UP((uint64_t) raw * 100U, (uint64_t) UINT32_MAX)));
803fb7
+                }
803fb7
+
803fb7
                 return 1;
803fb7
         }
803fb7
 
803fb7
diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c
803fb7
index c1ffee2c7..411475024 100644
803fb7
--- a/src/core/load-fragment.c
803fb7
+++ b/src/core/load-fragment.c
803fb7
@@ -3089,9 +3089,18 @@ int config_parse_tasks_max(
803fb7
                 return 0;
803fb7
         }
803fb7
 
803fb7
-        r = safe_atou64(rvalue, &u);
803fb7
-        if (r < 0 || u < 1) {
803fb7
-                log_syntax(unit, LOG_ERR, filename, line, EINVAL, "Maximum tasks value '%s' invalid. Ignoring.", rvalue);
803fb7
+        r = parse_percent(rvalue);
803fb7
+        if (r < 0) {
803fb7
+                r = safe_atou64(rvalue, &u);
803fb7
+                if (r < 0) {
803fb7
+                        log_syntax(unit, LOG_ERR, filename, line, r, "Maximum tasks value '%s' invalid. Ignoring.", rvalue);
803fb7
+                        return 0;
803fb7
+                }
803fb7
+        } else
803fb7
+                u = system_tasks_max_scale(r, 100U);
803fb7
+
803fb7
+        if (u <= 0 || u >= UINT64_MAX) {
803fb7
+                log_syntax(unit, LOG_ERR, filename, line, 0, "Maximum tasks value '%s' out of range. Ignoring.", rvalue);
803fb7
                 return 0;
803fb7
         }
803fb7
 
803fb7
diff --git a/src/libsystemd/sd-bus/bus-util.c b/src/libsystemd/sd-bus/bus-util.c
803fb7
index ed0849b63..f46fa2bbf 100644
803fb7
--- a/src/libsystemd/sd-bus/bus-util.c
803fb7
+++ b/src/libsystemd/sd-bus/bus-util.c
803fb7
@@ -1409,7 +1409,26 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen
803fb7
                 }
803fb7
 
803fb7
                 r = sd_bus_message_append(m, "v", "t", (uint64_t) bytes);
803fb7
+        } else if (streq(field, "TasksMax")) {
803fb7
+                uint64_t t;
803fb7
+
803fb7
+                if (isempty(eq) || streq(eq, "infinity"))
803fb7
+                        t = (uint64_t) -1;
803fb7
+                else {
803fb7
+                        r = parse_percent(eq);
803fb7
+                        if (r >= 0) {
803fb7
+                                r = sd_bus_message_append(m, "sv", "TasksMaxScale", "u", (uint32_t) (((uint64_t) UINT32_MAX * r) / 100U));
803fb7
+                                if (r < 0)
803fb7
+                                        return bus_log_create_error(r);
803fb7
+                        } else {
803fb7
+                                r = safe_atou64(eq, &t);
803fb7
+                                if (r < 0)
803fb7
+                                        return log_error_errno(r, "Failed to parse maximum tasks specification %s", assignment);
803fb7
+                        }
803fb7
+
803fb7
+                }
803fb7
 
803fb7
+                r = sd_bus_message_append(m, "sv", "TasksMax", "t", t);
803fb7
         } else if (STR_IN_SET(field, "CPUShares", "BlockIOWeight")) {
803fb7
                 uint64_t u;
803fb7
 
803fb7
diff --git a/src/shared/util.c b/src/shared/util.c
803fb7
index cadaddee3..bbb457759 100644
803fb7
--- a/src/shared/util.c
803fb7
+++ b/src/shared/util.c
803fb7
@@ -94,6 +94,7 @@
803fb7
 #include "def.h"
803fb7
 #include "sparse-endian.h"
803fb7
 #include "conf-parser.h"
803fb7
+#include "cgroup-util.h"
803fb7
 
803fb7
 int saved_argc = 0;
803fb7
 char **saved_argv = NULL;
803fb7
@@ -8707,3 +8708,86 @@ int extract_many_words(const char **p, const char *separators, ExtractFlags flag
803fb7
 
803fb7
         return c;
803fb7
 }
803fb7
+
803fb7
+int parse_percent_unbounded(const char *p) {
803fb7
+        const char *pc, *n;
803fb7
+        unsigned v;
803fb7
+        int r;
803fb7
+
803fb7
+        pc = endswith(p, "%");
803fb7
+        if (!pc)
803fb7
+                return -EINVAL;
803fb7
+
803fb7
+        n = strndupa(p, pc - p);
803fb7
+        r = safe_atou(n, &v);
803fb7
+        if (r < 0)
803fb7
+                return r;
803fb7
+
803fb7
+        return (int) v;
803fb7
+}
803fb7
+
803fb7
+int parse_percent(const char *p) {
803fb7
+        int v;
803fb7
+
803fb7
+        v = parse_percent_unbounded(p);
803fb7
+        if (v > 100)
803fb7
+                return -ERANGE;
803fb7
+
803fb7
+        return v;
803fb7
+}
803fb7
+
803fb7
+uint64_t system_tasks_max(void) {
803fb7
+
803fb7
+#if SIZEOF_PID_T == 4
803fb7
+#define TASKS_MAX ((uint64_t) (INT32_MAX-1))
803fb7
+#elif SIZEOF_PID_T == 2
803fb7
+#define TASKS_MAX ((uint64_t) (INT16_MAX-1))
803fb7
+#else
803fb7
+#error "Unknown pid_t size"
803fb7
+#endif
803fb7
+
803fb7
+        _cleanup_free_ char *value = NULL, *root = NULL;
803fb7
+        uint64_t a = TASKS_MAX, b = TASKS_MAX;
803fb7
+
803fb7
+        /* Determine the maximum number of tasks that may run on this system. We check three sources to determine this
803fb7
+         * limit:
803fb7
+         *
803fb7
+         * a) the maximum value for the pid_t type
803fb7
+         * b) the cgroups pids_max attribute for the system
803fb7
+         * c) the kernel's configure maximum PID value
803fb7
+         *
803fb7
+         * And then pick the smallest of the three */
803fb7
+
803fb7
+        if (read_one_line_file("/proc/sys/kernel/pid_max", &value) >= 0)
803fb7
+                (void) safe_atou64(value, &a);
803fb7
+
803fb7
+        if (cg_get_root_path(&root) >= 0) {
803fb7
+                free(value);
803fb7
+                value = NULL;
803fb7
+
803fb7
+                if (cg_get_attribute("pids", root, "pids.max", &value) >= 0)
803fb7
+                        (void) safe_atou64(value, &b);
803fb7
+        }
803fb7
+
803fb7
+        return MIN3(TASKS_MAX,
803fb7
+                    a <= 0 ? TASKS_MAX : a,
803fb7
+                    b <= 0 ? TASKS_MAX : b);
803fb7
+}
803fb7
+
803fb7
+uint64_t system_tasks_max_scale(uint64_t v, uint64_t max) {
803fb7
+        uint64_t t, m;
803fb7
+
803fb7
+        assert(max > 0);
803fb7
+
803fb7
+        /* Multiply the system's task value by the fraction v/max. Hence, if max==100 this calculates percentages
803fb7
+         * relative to the system's maximum number of tasks. Returns UINT64_MAX on overflow. */
803fb7
+
803fb7
+        t = system_tasks_max();
803fb7
+        assert(t > 0);
803fb7
+
803fb7
+        m = t * v;
803fb7
+        if (m / t != v) /* overflow? */
803fb7
+                return UINT64_MAX;
803fb7
+
803fb7
+        return m / max;
803fb7
+}
803fb7
diff --git a/src/shared/util.h b/src/shared/util.h
803fb7
index 12afcc342..f1b6c348f 100644
803fb7
--- a/src/shared/util.h
803fb7
+++ b/src/shared/util.h
803fb7
@@ -1098,3 +1098,8 @@ typedef enum ExtractFlags {
803fb7
 int extract_first_word(const char **p, char **ret, const char *separators, ExtractFlags flags);
803fb7
 int extract_first_word_and_warn(const char **p, char **ret, const char *separators, ExtractFlags flags, const char *unit, const char *filename, unsigned line, const char *rvalue);
803fb7
 int extract_many_words(const char **p, const char *separators, ExtractFlags flags, ...) _sentinel_;
803fb7
+int parse_percent_unbounded(const char *p);
803fb7
+int parse_percent(const char *p);
803fb7
+
803fb7
+uint64_t system_tasks_max(void);
803fb7
+uint64_t system_tasks_max_scale(uint64_t v, uint64_t max);
803fb7
diff --git a/src/test/test-util.c b/src/test/test-util.c
803fb7
index 9ae347b43..971f97d7c 100644
803fb7
--- a/src/test/test-util.c
803fb7
+++ b/src/test/test-util.c
803fb7
@@ -1530,6 +1530,43 @@ static void test_shell_maybe_quote(void) {
803fb7
         test_shell_maybe_quote_one("foo$bar", "\"foo\\$bar\"");
803fb7
 }
803fb7
 
803fb7
+static void test_system_tasks_max(void) {
803fb7
+        uint64_t t;
803fb7
+
803fb7
+        t = system_tasks_max();
803fb7
+        assert_se(t > 0);
803fb7
+        assert_se(t < UINT64_MAX);
803fb7
+
803fb7
+        log_info("Max tasks: %" PRIu64, t);
803fb7
+}
803fb7
+
803fb7
+static void test_system_tasks_max_scale(void) {
803fb7
+        uint64_t t;
803fb7
+
803fb7
+        t = system_tasks_max();
803fb7
+
803fb7
+        assert_se(system_tasks_max_scale(0, 100) == 0);
803fb7
+        assert_se(system_tasks_max_scale(100, 100) == t);
803fb7
+
803fb7
+        assert_se(system_tasks_max_scale(0, 1) == 0);
803fb7
+        assert_se(system_tasks_max_scale(1, 1) == t);
803fb7
+        assert_se(system_tasks_max_scale(2, 1) == 2*t);
803fb7
+
803fb7
+        assert_se(system_tasks_max_scale(0, 2) == 0);
803fb7
+        assert_se(system_tasks_max_scale(1, 2) == t/2);
803fb7
+        assert_se(system_tasks_max_scale(2, 2) == t);
803fb7
+        assert_se(system_tasks_max_scale(3, 2) == (3*t)/2);
803fb7
+        assert_se(system_tasks_max_scale(4, 2) == t*2);
803fb7
+
803fb7
+        assert_se(system_tasks_max_scale(0, UINT32_MAX) == 0);
803fb7
+        assert_se(system_tasks_max_scale((UINT32_MAX-1)/2, UINT32_MAX-1) == t/2);
803fb7
+        assert_se(system_tasks_max_scale(UINT32_MAX, UINT32_MAX) == t);
803fb7
+
803fb7
+        /* overflow */
803fb7
+
803fb7
+        assert_se(system_tasks_max_scale(UINT64_MAX/4, UINT64_MAX) == UINT64_MAX);
803fb7
+}
803fb7
+
803fb7
 int main(int argc, char *argv[]) {
803fb7
         log_parse_environment();
803fb7
         log_open();
803fb7
@@ -1608,6 +1645,8 @@ int main(int argc, char *argv[]) {
803fb7
         test_uid_ptr();
803fb7
         test_sparse_write();
803fb7
         test_shell_maybe_quote();
803fb7
+        test_system_tasks_max();
803fb7
+        test_system_tasks_max_scale();
803fb7
 
803fb7
         return 0;
803fb7
 }