923a60
From 7ec6e537898e139cc33017e03465ef40a86dd433 Mon Sep 17 00:00:00 2001
923a60
From: Lennart Poettering <lennart@poettering.net>
923a60
Date: Tue, 19 Jul 2016 15:58:49 +0200
923a60
Subject: [PATCH] core: support percentage specifications on TasksMax=
923a60
923a60
This adds support for a TasksMax=40% syntax for specifying values relative to
923a60
the system's configured maximum number of processes. This is useful in order to
923a60
neatly subdivide the available room for tasks within containers.
923a60
923a60
Cherry-picked from: 83f8e80857090f63cf6a02c54d381dad3c0fad55
923a60
Related: #1337244
923a60
---
923a60
 man/systemd.resource-control.xml | 15 +++---
923a60
 src/core/dbus-cgroup.c           | 22 +++++++++
923a60
 src/core/load-fragment.c         | 15 ++++--
923a60
 src/libsystemd/sd-bus/bus-util.c | 19 ++++++++
923a60
 src/shared/util.c                | 84 ++++++++++++++++++++++++++++++++
923a60
 src/shared/util.h                |  5 ++
923a60
 src/test/test-util.c             | 39 +++++++++++++++
923a60
 7 files changed, 187 insertions(+), 12 deletions(-)
923a60
923a60
diff --git a/man/systemd.resource-control.xml b/man/systemd.resource-control.xml
923a60
index 217105ee5a..f507c67487 100644
923a60
--- a/man/systemd.resource-control.xml
923a60
+++ b/man/systemd.resource-control.xml
923a60
@@ -221,15 +221,12 @@
923a60
         <term><varname>TasksMax=<replaceable>N</replaceable></varname></term>
923a60
 
923a60
         <listitem>
923a60
-          <para>Specify the maximum number of tasks that may be
923a60
-          created in the unit. This ensures that the number of tasks
923a60
-          accounted for the unit (see above) stays below a specific
923a60
-          limit. If assigned the special value
923a60
-          <literal>infinity</literal> no tasks limit is applied. This
923a60
-          controls the <literal>pids.max</literal> control group
923a60
-          attribute. For details about this control group attribute,
923a60
-          see 
923a60
-          url="https://www.kernel.org/doc/Documentation/cgroups/pids.txt">pids.txt</ulink>.</para>
923a60
+          <para>Specify the maximum number of tasks that may be created in the unit. This ensures that the number of
923a60
+          tasks accounted for the unit (see above) stays below a specific limit. This either takes an absolute number
923a60
+          of tasks or a percentage value that is taken relative to the configured maximum number of tasks on the
923a60
+          system.  If assigned the special value <literal>infinity</literal>, no tasks limit is applied. This controls
923a60
+          the <literal>pids.max</literal> control group attribute. For details about this control group attribute, see
923a60
+          <ulink url="https://www.kernel.org/doc/Documentation/cgroup-v1/pids.txt">pids.txt</ulink>.</para>
923a60
 
923a60
           <para>Implies <literal>TasksAccounting=true</literal>. The
923a60
           system default for this setting may be controlled with
923a60
diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c
923a60
index a4465dc7aa..fa76c60c1f 100644
923a60
--- a/src/core/dbus-cgroup.c
923a60
+++ b/src/core/dbus-cgroup.c
923a60
@@ -694,6 +694,8 @@ int bus_cgroup_set_property(
923a60
                 r = sd_bus_message_read(message, "t", &limit);
923a60
                 if (r < 0)
923a60
                         return r;
923a60
+                if (limit <= 0)
923a60
+                        return sd_bus_error_set_errnof(error, EINVAL, "%s= is too small", name);
923a60
 
923a60
                 if (mode != UNIT_CHECK) {
923a60
                         c->tasks_max = limit;
923a60
@@ -705,6 +707,26 @@ int bus_cgroup_set_property(
923a60
                                 unit_write_drop_in_private_format(u, mode, name, "TasksMax=%" PRIu64, limit);
923a60
                 }
923a60
 
923a60
+                return 1;
923a60
+        } else if (streq(name, "TasksMaxScale")) {
923a60
+                uint64_t limit;
923a60
+                uint32_t raw;
923a60
+
923a60
+                r = sd_bus_message_read(message, "u", &raw;;
923a60
+                if (r < 0)
923a60
+                        return r;
923a60
+
923a60
+                limit = system_tasks_max_scale(raw, UINT32_MAX);
923a60
+                if (limit <= 0 || limit >= UINT64_MAX)
923a60
+                        return sd_bus_error_set_errnof(error, EINVAL, "%s= is out of range", name);
923a60
+
923a60
+                if (mode != UNIT_CHECK) {
923a60
+                        c->tasks_max = limit;
923a60
+                        u->cgroup_realized_mask &= ~CGROUP_PIDS;
923a60
+                        unit_write_drop_in_private_format(u, mode, name, "TasksMax=%" PRIu32 "%%",
923a60
+                                                          (uint32_t) (DIV_ROUND_UP((uint64_t) raw * 100U, (uint64_t) UINT32_MAX)));
923a60
+                }
923a60
+
923a60
                 return 1;
923a60
         }
923a60
 
923a60
diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c
923a60
index c1ffee2c7e..4114750244 100644
923a60
--- a/src/core/load-fragment.c
923a60
+++ b/src/core/load-fragment.c
923a60
@@ -3089,9 +3089,18 @@ int config_parse_tasks_max(
923a60
                 return 0;
923a60
         }
923a60
 
923a60
-        r = safe_atou64(rvalue, &u);
923a60
-        if (r < 0 || u < 1) {
923a60
-                log_syntax(unit, LOG_ERR, filename, line, EINVAL, "Maximum tasks value '%s' invalid. Ignoring.", rvalue);
923a60
+        r = parse_percent(rvalue);
923a60
+        if (r < 0) {
923a60
+                r = safe_atou64(rvalue, &u);
923a60
+                if (r < 0) {
923a60
+                        log_syntax(unit, LOG_ERR, filename, line, r, "Maximum tasks value '%s' invalid. Ignoring.", rvalue);
923a60
+                        return 0;
923a60
+                }
923a60
+        } else
923a60
+                u = system_tasks_max_scale(r, 100U);
923a60
+
923a60
+        if (u <= 0 || u >= UINT64_MAX) {
923a60
+                log_syntax(unit, LOG_ERR, filename, line, 0, "Maximum tasks value '%s' out of range. Ignoring.", rvalue);
923a60
                 return 0;
923a60
         }
923a60
 
923a60
diff --git a/src/libsystemd/sd-bus/bus-util.c b/src/libsystemd/sd-bus/bus-util.c
923a60
index ed0849b638..f46fa2bbf3 100644
923a60
--- a/src/libsystemd/sd-bus/bus-util.c
923a60
+++ b/src/libsystemd/sd-bus/bus-util.c
923a60
@@ -1409,7 +1409,26 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen
923a60
                 }
923a60
 
923a60
                 r = sd_bus_message_append(m, "v", "t", (uint64_t) bytes);
923a60
+        } else if (streq(field, "TasksMax")) {
923a60
+                uint64_t t;
923a60
+
923a60
+                if (isempty(eq) || streq(eq, "infinity"))
923a60
+                        t = (uint64_t) -1;
923a60
+                else {
923a60
+                        r = parse_percent(eq);
923a60
+                        if (r >= 0) {
923a60
+                                r = sd_bus_message_append(m, "sv", "TasksMaxScale", "u", (uint32_t) (((uint64_t) UINT32_MAX * r) / 100U));
923a60
+                                if (r < 0)
923a60
+                                        return bus_log_create_error(r);
923a60
+                        } else {
923a60
+                                r = safe_atou64(eq, &t);
923a60
+                                if (r < 0)
923a60
+                                        return log_error_errno(r, "Failed to parse maximum tasks specification %s", assignment);
923a60
+                        }
923a60
+
923a60
+                }
923a60
 
923a60
+                r = sd_bus_message_append(m, "sv", "TasksMax", "t", t);
923a60
         } else if (STR_IN_SET(field, "CPUShares", "BlockIOWeight")) {
923a60
                 uint64_t u;
923a60
 
923a60
diff --git a/src/shared/util.c b/src/shared/util.c
923a60
index cadaddee32..bbb4577590 100644
923a60
--- a/src/shared/util.c
923a60
+++ b/src/shared/util.c
923a60
@@ -94,6 +94,7 @@
923a60
 #include "def.h"
923a60
 #include "sparse-endian.h"
923a60
 #include "conf-parser.h"
923a60
+#include "cgroup-util.h"
923a60
 
923a60
 int saved_argc = 0;
923a60
 char **saved_argv = NULL;
923a60
@@ -8707,3 +8708,86 @@ int extract_many_words(const char **p, const char *separators, ExtractFlags flag
923a60
 
923a60
         return c;
923a60
 }
923a60
+
923a60
+int parse_percent_unbounded(const char *p) {
923a60
+        const char *pc, *n;
923a60
+        unsigned v;
923a60
+        int r;
923a60
+
923a60
+        pc = endswith(p, "%");
923a60
+        if (!pc)
923a60
+                return -EINVAL;
923a60
+
923a60
+        n = strndupa(p, pc - p);
923a60
+        r = safe_atou(n, &v);
923a60
+        if (r < 0)
923a60
+                return r;
923a60
+
923a60
+        return (int) v;
923a60
+}
923a60
+
923a60
+int parse_percent(const char *p) {
923a60
+        int v;
923a60
+
923a60
+        v = parse_percent_unbounded(p);
923a60
+        if (v > 100)
923a60
+                return -ERANGE;
923a60
+
923a60
+        return v;
923a60
+}
923a60
+
923a60
+uint64_t system_tasks_max(void) {
923a60
+
923a60
+#if SIZEOF_PID_T == 4
923a60
+#define TASKS_MAX ((uint64_t) (INT32_MAX-1))
923a60
+#elif SIZEOF_PID_T == 2
923a60
+#define TASKS_MAX ((uint64_t) (INT16_MAX-1))
923a60
+#else
923a60
+#error "Unknown pid_t size"
923a60
+#endif
923a60
+
923a60
+        _cleanup_free_ char *value = NULL, *root = NULL;
923a60
+        uint64_t a = TASKS_MAX, b = TASKS_MAX;
923a60
+
923a60
+        /* Determine the maximum number of tasks that may run on this system. We check three sources to determine this
923a60
+         * limit:
923a60
+         *
923a60
+         * a) the maximum value for the pid_t type
923a60
+         * b) the cgroups pids_max attribute for the system
923a60
+         * c) the kernel's configure maximum PID value
923a60
+         *
923a60
+         * And then pick the smallest of the three */
923a60
+
923a60
+        if (read_one_line_file("/proc/sys/kernel/pid_max", &value) >= 0)
923a60
+                (void) safe_atou64(value, &a);
923a60
+
923a60
+        if (cg_get_root_path(&root) >= 0) {
923a60
+                free(value);
923a60
+                value = NULL;
923a60
+
923a60
+                if (cg_get_attribute("pids", root, "pids.max", &value) >= 0)
923a60
+                        (void) safe_atou64(value, &b);
923a60
+        }
923a60
+
923a60
+        return MIN3(TASKS_MAX,
923a60
+                    a <= 0 ? TASKS_MAX : a,
923a60
+                    b <= 0 ? TASKS_MAX : b);
923a60
+}
923a60
+
923a60
+uint64_t system_tasks_max_scale(uint64_t v, uint64_t max) {
923a60
+        uint64_t t, m;
923a60
+
923a60
+        assert(max > 0);
923a60
+
923a60
+        /* Multiply the system's task value by the fraction v/max. Hence, if max==100 this calculates percentages
923a60
+         * relative to the system's maximum number of tasks. Returns UINT64_MAX on overflow. */
923a60
+
923a60
+        t = system_tasks_max();
923a60
+        assert(t > 0);
923a60
+
923a60
+        m = t * v;
923a60
+        if (m / t != v) /* overflow? */
923a60
+                return UINT64_MAX;
923a60
+
923a60
+        return m / max;
923a60
+}
923a60
diff --git a/src/shared/util.h b/src/shared/util.h
923a60
index 12afcc3429..f1b6c348f8 100644
923a60
--- a/src/shared/util.h
923a60
+++ b/src/shared/util.h
923a60
@@ -1098,3 +1098,8 @@ typedef enum ExtractFlags {
923a60
 int extract_first_word(const char **p, char **ret, const char *separators, ExtractFlags flags);
923a60
 int extract_first_word_and_warn(const char **p, char **ret, const char *separators, ExtractFlags flags, const char *unit, const char *filename, unsigned line, const char *rvalue);
923a60
 int extract_many_words(const char **p, const char *separators, ExtractFlags flags, ...) _sentinel_;
923a60
+int parse_percent_unbounded(const char *p);
923a60
+int parse_percent(const char *p);
923a60
+
923a60
+uint64_t system_tasks_max(void);
923a60
+uint64_t system_tasks_max_scale(uint64_t v, uint64_t max);
923a60
diff --git a/src/test/test-util.c b/src/test/test-util.c
923a60
index 9ae347b434..971f97d7c3 100644
923a60
--- a/src/test/test-util.c
923a60
+++ b/src/test/test-util.c
923a60
@@ -1530,6 +1530,43 @@ static void test_shell_maybe_quote(void) {
923a60
         test_shell_maybe_quote_one("foo$bar", "\"foo\\$bar\"");
923a60
 }
923a60
 
923a60
+static void test_system_tasks_max(void) {
923a60
+        uint64_t t;
923a60
+
923a60
+        t = system_tasks_max();
923a60
+        assert_se(t > 0);
923a60
+        assert_se(t < UINT64_MAX);
923a60
+
923a60
+        log_info("Max tasks: %" PRIu64, t);
923a60
+}
923a60
+
923a60
+static void test_system_tasks_max_scale(void) {
923a60
+        uint64_t t;
923a60
+
923a60
+        t = system_tasks_max();
923a60
+
923a60
+        assert_se(system_tasks_max_scale(0, 100) == 0);
923a60
+        assert_se(system_tasks_max_scale(100, 100) == t);
923a60
+
923a60
+        assert_se(system_tasks_max_scale(0, 1) == 0);
923a60
+        assert_se(system_tasks_max_scale(1, 1) == t);
923a60
+        assert_se(system_tasks_max_scale(2, 1) == 2*t);
923a60
+
923a60
+        assert_se(system_tasks_max_scale(0, 2) == 0);
923a60
+        assert_se(system_tasks_max_scale(1, 2) == t/2);
923a60
+        assert_se(system_tasks_max_scale(2, 2) == t);
923a60
+        assert_se(system_tasks_max_scale(3, 2) == (3*t)/2);
923a60
+        assert_se(system_tasks_max_scale(4, 2) == t*2);
923a60
+
923a60
+        assert_se(system_tasks_max_scale(0, UINT32_MAX) == 0);
923a60
+        assert_se(system_tasks_max_scale((UINT32_MAX-1)/2, UINT32_MAX-1) == t/2);
923a60
+        assert_se(system_tasks_max_scale(UINT32_MAX, UINT32_MAX) == t);
923a60
+
923a60
+        /* overflow */
923a60
+
923a60
+        assert_se(system_tasks_max_scale(UINT64_MAX/4, UINT64_MAX) == UINT64_MAX);
923a60
+}
923a60
+
923a60
 int main(int argc, char *argv[]) {
923a60
         log_parse_environment();
923a60
         log_open();
923a60
@@ -1608,6 +1645,8 @@ int main(int argc, char *argv[]) {
923a60
         test_uid_ptr();
923a60
         test_sparse_write();
923a60
         test_shell_maybe_quote();
923a60
+        test_system_tasks_max();
923a60
+        test_system_tasks_max_scale();
923a60
 
923a60
         return 0;
923a60
 }