From 7ec6e537898e139cc33017e03465ef40a86dd433 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Tue, 19 Jul 2016 15:58:49 +0200 Subject: [PATCH] core: support percentage specifications on TasksMax= This adds support for a TasksMax=40% syntax for specifying values relative to the system's configured maximum number of processes. This is useful in order to neatly subdivide the available room for tasks within containers. Cherry-picked from: 83f8e80857090f63cf6a02c54d381dad3c0fad55 Related: #1337244 --- man/systemd.resource-control.xml | 15 +++--- src/core/dbus-cgroup.c | 22 +++++++++ src/core/load-fragment.c | 15 ++++-- src/libsystemd/sd-bus/bus-util.c | 19 ++++++++ src/shared/util.c | 84 ++++++++++++++++++++++++++++++++ src/shared/util.h | 5 ++ src/test/test-util.c | 39 +++++++++++++++ 7 files changed, 187 insertions(+), 12 deletions(-) diff --git a/man/systemd.resource-control.xml b/man/systemd.resource-control.xml index 217105ee5..f507c6748 100644 --- a/man/systemd.resource-control.xml +++ b/man/systemd.resource-control.xml @@ -221,15 +221,12 @@ TasksMax=N - Specify the maximum number of tasks that may be - created in the unit. This ensures that the number of tasks - accounted for the unit (see above) stays below a specific - limit. If assigned the special value - infinity no tasks limit is applied. This - controls the pids.max control group - attribute. For details about this control group attribute, - see pids.txt. + Specify the maximum number of tasks that may be created in the unit. This ensures that the number of + tasks accounted for the unit (see above) stays below a specific limit. This either takes an absolute number + of tasks or a percentage value that is taken relative to the configured maximum number of tasks on the + system. If assigned the special value infinity, no tasks limit is applied. This controls + the pids.max control group attribute. For details about this control group attribute, see + pids.txt. Implies TasksAccounting=true. The system default for this setting may be controlled with diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c index a4465dc7a..fa76c60c1 100644 --- a/src/core/dbus-cgroup.c +++ b/src/core/dbus-cgroup.c @@ -694,6 +694,8 @@ int bus_cgroup_set_property( r = sd_bus_message_read(message, "t", &limit); if (r < 0) return r; + if (limit <= 0) + return sd_bus_error_set_errnof(error, EINVAL, "%s= is too small", name); if (mode != UNIT_CHECK) { c->tasks_max = limit; @@ -705,6 +707,26 @@ int bus_cgroup_set_property( unit_write_drop_in_private_format(u, mode, name, "TasksMax=%" PRIu64, limit); } + return 1; + } else if (streq(name, "TasksMaxScale")) { + uint64_t limit; + uint32_t raw; + + r = sd_bus_message_read(message, "u", &raw); + if (r < 0) + return r; + + limit = system_tasks_max_scale(raw, UINT32_MAX); + if (limit <= 0 || limit >= UINT64_MAX) + return sd_bus_error_set_errnof(error, EINVAL, "%s= is out of range", name); + + if (mode != UNIT_CHECK) { + c->tasks_max = limit; + u->cgroup_realized_mask &= ~CGROUP_PIDS; + unit_write_drop_in_private_format(u, mode, name, "TasksMax=%" PRIu32 "%%", + (uint32_t) (DIV_ROUND_UP((uint64_t) raw * 100U, (uint64_t) UINT32_MAX))); + } + return 1; } diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c index c1ffee2c7..411475024 100644 --- a/src/core/load-fragment.c +++ b/src/core/load-fragment.c @@ -3089,9 +3089,18 @@ int config_parse_tasks_max( return 0; } - r = safe_atou64(rvalue, &u); - if (r < 0 || u < 1) { - log_syntax(unit, LOG_ERR, filename, line, EINVAL, "Maximum tasks value '%s' invalid. Ignoring.", rvalue); + r = parse_percent(rvalue); + if (r < 0) { + r = safe_atou64(rvalue, &u); + if (r < 0) { + log_syntax(unit, LOG_ERR, filename, line, r, "Maximum tasks value '%s' invalid. Ignoring.", rvalue); + return 0; + } + } else + u = system_tasks_max_scale(r, 100U); + + if (u <= 0 || u >= UINT64_MAX) { + log_syntax(unit, LOG_ERR, filename, line, 0, "Maximum tasks value '%s' out of range. Ignoring.", rvalue); return 0; } diff --git a/src/libsystemd/sd-bus/bus-util.c b/src/libsystemd/sd-bus/bus-util.c index ed0849b63..f46fa2bbf 100644 --- a/src/libsystemd/sd-bus/bus-util.c +++ b/src/libsystemd/sd-bus/bus-util.c @@ -1409,7 +1409,26 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen } r = sd_bus_message_append(m, "v", "t", (uint64_t) bytes); + } else if (streq(field, "TasksMax")) { + uint64_t t; + + if (isempty(eq) || streq(eq, "infinity")) + t = (uint64_t) -1; + else { + r = parse_percent(eq); + if (r >= 0) { + r = sd_bus_message_append(m, "sv", "TasksMaxScale", "u", (uint32_t) (((uint64_t) UINT32_MAX * r) / 100U)); + if (r < 0) + return bus_log_create_error(r); + } else { + r = safe_atou64(eq, &t); + if (r < 0) + return log_error_errno(r, "Failed to parse maximum tasks specification %s", assignment); + } + + } + r = sd_bus_message_append(m, "sv", "TasksMax", "t", t); } else if (STR_IN_SET(field, "CPUShares", "BlockIOWeight")) { uint64_t u; diff --git a/src/shared/util.c b/src/shared/util.c index cadaddee3..bbb457759 100644 --- a/src/shared/util.c +++ b/src/shared/util.c @@ -94,6 +94,7 @@ #include "def.h" #include "sparse-endian.h" #include "conf-parser.h" +#include "cgroup-util.h" int saved_argc = 0; char **saved_argv = NULL; @@ -8707,3 +8708,86 @@ int extract_many_words(const char **p, const char *separators, ExtractFlags flag return c; } + +int parse_percent_unbounded(const char *p) { + const char *pc, *n; + unsigned v; + int r; + + pc = endswith(p, "%"); + if (!pc) + return -EINVAL; + + n = strndupa(p, pc - p); + r = safe_atou(n, &v); + if (r < 0) + return r; + + return (int) v; +} + +int parse_percent(const char *p) { + int v; + + v = parse_percent_unbounded(p); + if (v > 100) + return -ERANGE; + + return v; +} + +uint64_t system_tasks_max(void) { + +#if SIZEOF_PID_T == 4 +#define TASKS_MAX ((uint64_t) (INT32_MAX-1)) +#elif SIZEOF_PID_T == 2 +#define TASKS_MAX ((uint64_t) (INT16_MAX-1)) +#else +#error "Unknown pid_t size" +#endif + + _cleanup_free_ char *value = NULL, *root = NULL; + uint64_t a = TASKS_MAX, b = TASKS_MAX; + + /* Determine the maximum number of tasks that may run on this system. We check three sources to determine this + * limit: + * + * a) the maximum value for the pid_t type + * b) the cgroups pids_max attribute for the system + * c) the kernel's configure maximum PID value + * + * And then pick the smallest of the three */ + + if (read_one_line_file("/proc/sys/kernel/pid_max", &value) >= 0) + (void) safe_atou64(value, &a); + + if (cg_get_root_path(&root) >= 0) { + free(value); + value = NULL; + + if (cg_get_attribute("pids", root, "pids.max", &value) >= 0) + (void) safe_atou64(value, &b); + } + + return MIN3(TASKS_MAX, + a <= 0 ? TASKS_MAX : a, + b <= 0 ? TASKS_MAX : b); +} + +uint64_t system_tasks_max_scale(uint64_t v, uint64_t max) { + uint64_t t, m; + + assert(max > 0); + + /* Multiply the system's task value by the fraction v/max. Hence, if max==100 this calculates percentages + * relative to the system's maximum number of tasks. Returns UINT64_MAX on overflow. */ + + t = system_tasks_max(); + assert(t > 0); + + m = t * v; + if (m / t != v) /* overflow? */ + return UINT64_MAX; + + return m / max; +} diff --git a/src/shared/util.h b/src/shared/util.h index 12afcc342..f1b6c348f 100644 --- a/src/shared/util.h +++ b/src/shared/util.h @@ -1098,3 +1098,8 @@ typedef enum ExtractFlags { int extract_first_word(const char **p, char **ret, const char *separators, ExtractFlags flags); int extract_first_word_and_warn(const char **p, char **ret, const char *separators, ExtractFlags flags, const char *unit, const char *filename, unsigned line, const char *rvalue); int extract_many_words(const char **p, const char *separators, ExtractFlags flags, ...) _sentinel_; +int parse_percent_unbounded(const char *p); +int parse_percent(const char *p); + +uint64_t system_tasks_max(void); +uint64_t system_tasks_max_scale(uint64_t v, uint64_t max); diff --git a/src/test/test-util.c b/src/test/test-util.c index 9ae347b43..971f97d7c 100644 --- a/src/test/test-util.c +++ b/src/test/test-util.c @@ -1530,6 +1530,43 @@ static void test_shell_maybe_quote(void) { test_shell_maybe_quote_one("foo$bar", "\"foo\\$bar\""); } +static void test_system_tasks_max(void) { + uint64_t t; + + t = system_tasks_max(); + assert_se(t > 0); + assert_se(t < UINT64_MAX); + + log_info("Max tasks: %" PRIu64, t); +} + +static void test_system_tasks_max_scale(void) { + uint64_t t; + + t = system_tasks_max(); + + assert_se(system_tasks_max_scale(0, 100) == 0); + assert_se(system_tasks_max_scale(100, 100) == t); + + assert_se(system_tasks_max_scale(0, 1) == 0); + assert_se(system_tasks_max_scale(1, 1) == t); + assert_se(system_tasks_max_scale(2, 1) == 2*t); + + assert_se(system_tasks_max_scale(0, 2) == 0); + assert_se(system_tasks_max_scale(1, 2) == t/2); + assert_se(system_tasks_max_scale(2, 2) == t); + assert_se(system_tasks_max_scale(3, 2) == (3*t)/2); + assert_se(system_tasks_max_scale(4, 2) == t*2); + + assert_se(system_tasks_max_scale(0, UINT32_MAX) == 0); + assert_se(system_tasks_max_scale((UINT32_MAX-1)/2, UINT32_MAX-1) == t/2); + assert_se(system_tasks_max_scale(UINT32_MAX, UINT32_MAX) == t); + + /* overflow */ + + assert_se(system_tasks_max_scale(UINT64_MAX/4, UINT64_MAX) == UINT64_MAX); +} + int main(int argc, char *argv[]) { log_parse_environment(); log_open(); @@ -1608,6 +1645,8 @@ int main(int argc, char *argv[]) { test_uid_ptr(); test_sparse_write(); test_shell_maybe_quote(); + test_system_tasks_max(); + test_system_tasks_max_scale(); return 0; }