From 11df25536319a4c0f3276c2218054243d9ee213c Mon Sep 17 00:00:00 2001
From: Filipe Brandenburger <filbranden@google.com>
Date: Fri, 2 Nov 2018 09:21:57 -0700
Subject: [PATCH] core: add CPUQuotaPeriodSec=
This new setting allows configuration of CFS period on the CPU cgroup, instead
of using a hardcoded default of 100ms.
Tested:
- Legacy cgroup + Unified cgroup
- systemctl set-property
- systemctl show
- Confirmed that the cgroup settings (such as cpu.cfs_period_ns) were set
appropriately, including updating the CPU quota (cpu.cfs_quota_ns) when
CPUQuotaPeriodSec= is updated.
- Checked that clamping works properly when either period or (quota * period)
are below the resolution of 1ms, or if period is above the max of 1s.
(cherry picked from commit 10f28641115733c61754342d5dcbe70b083bea4b)
Resolves: #1770379
---
doc/TRANSIENT-SETTINGS.md | 1 +
man/systemd.resource-control.xml | 19 +++++++++
src/core/cgroup.c | 61 ++++++++++++++++++++++-----
src/core/cgroup.h | 3 ++
src/core/dbus-cgroup.c | 23 ++++++++++
src/core/load-fragment-gperf.gperf.m4 | 1 +
src/core/load-fragment.c | 1 +
src/shared/bus-unit-util.c | 14 ++++++
src/test/meson.build | 5 +++
src/test/test-cgroup-cpu.c | 38 +++++++++++++++++
10 files changed, 155 insertions(+), 11 deletions(-)
create mode 100644 src/test/test-cgroup-cpu.c
diff --git a/doc/TRANSIENT-SETTINGS.md b/doc/TRANSIENT-SETTINGS.md
index 23fe84e4d1..0d2d3e9065 100644
--- a/doc/TRANSIENT-SETTINGS.md
+++ b/doc/TRANSIENT-SETTINGS.md
@@ -218,6 +218,7 @@ All cgroup/resource control settings are available for transient units
✓ CPUShares=
✓ StartupCPUShares=
✓ CPUQuota=
+✓ CPUQuotaPeriodSec=
✓ AllowedCPUs=
✓ AllowedMemoryNodes=
✓ MemoryAccounting=
diff --git a/man/systemd.resource-control.xml b/man/systemd.resource-control.xml
index b0064bf98f..cfe19a6574 100644
--- a/man/systemd.resource-control.xml
+++ b/man/systemd.resource-control.xml
@@ -231,6 +231,25 @@
</listitem>
</varlistentry>
+ <varlistentry>
+ <term><varname>CPUQuotaPeriodSec=</varname></term>
+
+ <listitem>
+ <para>Assign the duration over which the CPU time quota specified by <varname>CPUQuota=</varname> is measured.
+ Takes a time duration value in seconds, with an optional suffix such as "ms" for milliseconds (or "s" for seconds.)
+ The default setting is 100ms. The period is clamped to the range supported by the kernel, which is [1ms, 1000ms].
+ Additionally, the period is adjusted up so that the quota interval is also at least 1ms.
+ Setting <varname>CPUQuotaPeriodSec=</varname> to an empty value resets it to the default.</para>
+
+ <para>This controls the second field of <literal>cpu.max</literal> attribute on the unified control group hierarchy
+ and <literal>cpu.cfs_period_us</literal> on legacy. For details about these control group attributes, see
+ <ulink url="https://www.kernel.org/doc/Documentation/cgroup-v2.txt">cgroup-v2.txt</ulink> and
+ <ulink url="https://www.kernel.org/doc/Documentation/scheduler/sched-design-CFS.txt">sched-design-CFS.txt</ulink>.</para>
+
+ <para>Example: <varname>CPUQuotaPeriodSec=10ms</varname> to request that the CPU quota is measured in periods of 10ms.</para>
+ </listitem>
+ </varlistentry>
+
<varlistentry>
<term><varname>MemoryAccounting=</varname></term>
diff --git a/src/core/cgroup.c b/src/core/cgroup.c
index 7aa7db9261..45fd64a394 100644
--- a/src/core/cgroup.c
+++ b/src/core/cgroup.c
@@ -23,7 +23,7 @@
#include "string-util.h"
#include "virt.h"
-#define CGROUP_CPU_QUOTA_PERIOD_USEC ((usec_t) 100 * USEC_PER_MSEC)
+#define CGROUP_CPU_QUOTA_DEFAULT_PERIOD_USEC ((usec_t) 100 * USEC_PER_MSEC)
bool manager_owns_root_cgroup(Manager *m) {
assert(m);
@@ -77,6 +77,7 @@ void cgroup_context_init(CGroupContext *c) {
.cpu_weight = CGROUP_WEIGHT_INVALID,
.startup_cpu_weight = CGROUP_WEIGHT_INVALID,
.cpu_quota_per_sec_usec = USEC_INFINITY,
+ .cpu_quota_period_usec = USEC_INFINITY,
.cpu_shares = CGROUP_CPU_SHARES_INVALID,
.startup_cpu_shares = CGROUP_CPU_SHARES_INVALID,
@@ -190,6 +191,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
CGroupDeviceAllow *a;
IPAddressAccessItem *iaai;
char u[FORMAT_TIMESPAN_MAX];
+ char v[FORMAT_TIMESPAN_MAX];
assert(c);
assert(f);
@@ -211,6 +213,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
"%sCPUShares=%" PRIu64 "\n"
"%sStartupCPUShares=%" PRIu64 "\n"
"%sCPUQuotaPerSecSec=%s\n"
+ "%sCPUQuotaPeriodSec=%s\n"
"%sAllowedCPUs=%s\n"
"%sAllowedMemoryNodes=%s\n"
"%sIOWeight=%" PRIu64 "\n"
@@ -236,6 +239,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
prefix, c->cpu_shares,
prefix, c->startup_cpu_shares,
prefix, format_timespan(u, sizeof(u), c->cpu_quota_per_sec_usec, 1),
+ prefix, format_timespan(v, sizeof(v), c->cpu_quota_period_usec, 1),
prefix, cpuset_cpus,
prefix, cpuset_mems,
prefix, c->io_weight,
@@ -515,7 +519,40 @@ static uint64_t cgroup_context_cpu_shares(CGroupContext *c, ManagerState state)
return CGROUP_CPU_SHARES_DEFAULT;
}
-static void cgroup_apply_unified_cpu_config(Unit *u, uint64_t weight, uint64_t quota) {
+usec_t cgroup_cpu_adjust_period(usec_t period, usec_t quota, usec_t resolution, usec_t max_period) {
+ /* kernel uses a minimum resolution of 1ms, so both period and (quota * period)
+ * need to be higher than that boundary. quota is specified in USecPerSec.
+ * Additionally, period must be at most max_period. */
+ assert(quota > 0);
+
+ return MIN(MAX3(period, resolution, resolution * USEC_PER_SEC / quota), max_period);
+}
+
+static usec_t cgroup_cpu_adjust_period_and_log(Unit *u, usec_t period, usec_t quota) {
+ usec_t new_period;
+
+ if (quota == USEC_INFINITY)
+ /* Always use default period for infinity quota. */
+ return CGROUP_CPU_QUOTA_DEFAULT_PERIOD_USEC;
+
+ if (period == USEC_INFINITY)
+ /* Default period was requested. */
+ period = CGROUP_CPU_QUOTA_DEFAULT_PERIOD_USEC;
+
+ /* Clamp to interval [1ms, 1s] */
+ new_period = cgroup_cpu_adjust_period(period, quota, USEC_PER_MSEC, USEC_PER_SEC);
+
+ if (new_period != period) {
+ char v[FORMAT_TIMESPAN_MAX];
+ log_unit_full(u, LOG_WARNING, 0,
+ "Clamping CPU interval for cpu.max: period is now %s",
+ format_timespan(v, sizeof(v), new_period, 1));
+ }
+
+ return new_period;
+}
+
+static void cgroup_apply_unified_cpu_config(Unit *u, uint64_t weight, uint64_t quota, usec_t period) {
char buf[MAX(DECIMAL_STR_MAX(uint64_t) + 1, (DECIMAL_STR_MAX(usec_t) + 1) * 2)];
int r;
@@ -525,11 +562,12 @@ static void cgroup_apply_unified_cpu_config(Unit *u, uint64_t weight, uint64_t q
log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
"Failed to set cpu.weight: %m");
+ period = cgroup_cpu_adjust_period_and_log(u, period, quota);
if (quota != USEC_INFINITY)
xsprintf(buf, USEC_FMT " " USEC_FMT "\n",
- quota * CGROUP_CPU_QUOTA_PERIOD_USEC / USEC_PER_SEC, CGROUP_CPU_QUOTA_PERIOD_USEC);
+ MAX(quota * period / USEC_PER_SEC, USEC_PER_MSEC), period);
else
- xsprintf(buf, "max " USEC_FMT "\n", CGROUP_CPU_QUOTA_PERIOD_USEC);
+ xsprintf(buf, "max " USEC_FMT "\n", period);
r = cg_set_attribute("cpu", u->cgroup_path, "cpu.max", buf);
@@ -538,7 +576,7 @@ static void cgroup_apply_unified_cpu_config(Unit *u, uint64_t weight, uint64_t q
"Failed to set cpu.max: %m");
}
-static void cgroup_apply_legacy_cpu_config(Unit *u, uint64_t shares, uint64_t quota) {
+static void cgroup_apply_legacy_cpu_config(Unit *u, uint64_t shares, uint64_t quota, usec_t period) {
char buf[MAX(DECIMAL_STR_MAX(uint64_t), DECIMAL_STR_MAX(usec_t)) + 1];
int r;
@@ -548,20 +586,21 @@ static void cgroup_apply_legacy_cpu_config(Unit *u, uint64_t shares, uint64_t qu
log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
"Failed to set cpu.shares: %m");
- xsprintf(buf, USEC_FMT "\n", CGROUP_CPU_QUOTA_PERIOD_USEC);
+ period = cgroup_cpu_adjust_period_and_log(u, period, quota);
+
+ xsprintf(buf, USEC_FMT "\n", period);
r = cg_set_attribute("cpu", u->cgroup_path, "cpu.cfs_period_us", buf);
if (r < 0)
log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
"Failed to set cpu.cfs_period_us: %m");
if (quota != USEC_INFINITY) {
- xsprintf(buf, USEC_FMT "\n", quota * CGROUP_CPU_QUOTA_PERIOD_USEC / USEC_PER_SEC);
- r = cg_set_attribute("cpu", u->cgroup_path, "cpu.cfs_quota_us", buf);
- } else
+ xsprintf(buf, USEC_FMT "\n", MAX(quota * period / USEC_PER_SEC, USEC_PER_MSEC));
r = cg_set_attribute("cpu", u->cgroup_path, "cpu.cfs_quota_us", "-1");
if (r < 0)
log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
"Failed to set cpu.cfs_quota_us: %m");
+ }
}
static uint64_t cgroup_cpu_shares_to_weight(uint64_t shares) {
@@ -815,7 +854,7 @@ static void cgroup_context_apply(
} else
weight = CGROUP_WEIGHT_DEFAULT;
- cgroup_apply_unified_cpu_config(u, weight, c->cpu_quota_per_sec_usec);
+ cgroup_apply_unified_cpu_config(u, weight, c->cpu_quota_per_sec_usec, c->cpu_quota_period_usec);
} else {
uint64_t shares;
@@ -831,7 +870,7 @@ static void cgroup_context_apply(
else
shares = CGROUP_CPU_SHARES_DEFAULT;
- cgroup_apply_legacy_cpu_config(u, shares, c->cpu_quota_per_sec_usec);
+ cgroup_apply_legacy_cpu_config(u, shares, c->cpu_quota_per_sec_usec, c->cpu_quota_period_usec);
}
}
diff --git a/src/core/cgroup.h b/src/core/cgroup.h
index f7365b4c46..2ba57d3ded 100644
--- a/src/core/cgroup.h
+++ b/src/core/cgroup.h
@@ -84,6 +84,7 @@ struct CGroupContext {
uint64_t cpu_weight;
uint64_t startup_cpu_weight;
usec_t cpu_quota_per_sec_usec;
+ usec_t cpu_quota_period_usec;
CPUSet cpuset_cpus;
CPUSet cpuset_mems;
@@ -136,6 +137,8 @@ typedef enum CGroupIPAccountingMetric {
typedef struct Unit Unit;
typedef struct Manager Manager;
+usec_t cgroup_cpu_adjust_period(usec_t period, usec_t quota, usec_t resolution, usec_t max_period);
+
void cgroup_context_init(CGroupContext *c);
void cgroup_context_done(CGroupContext *c);
void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix);
diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c
index a1d3014d61..c8b918e45d 100644
--- a/src/core/dbus-cgroup.c
+++ b/src/core/dbus-cgroup.c
@@ -334,6 +334,7 @@ const sd_bus_vtable bus_cgroup_vtable[] = {
SD_BUS_PROPERTY("CPUShares", "t", NULL, offsetof(CGroupContext, cpu_shares), 0),
SD_BUS_PROPERTY("StartupCPUShares", "t", NULL, offsetof(CGroupContext, startup_cpu_shares), 0),
SD_BUS_PROPERTY("CPUQuotaPerSecUSec", "t", bus_property_get_usec, offsetof(CGroupContext, cpu_quota_per_sec_usec), 0),
+ SD_BUS_PROPERTY("CPUQuotaPeriodUSec", "t", bus_property_get_usec, offsetof(CGroupContext, cpu_quota_period_usec), 0),
SD_BUS_PROPERTY("AllowedCPUs", "ay", property_get_cpuset, offsetof(CGroupContext, cpuset_cpus), 0),
SD_BUS_PROPERTY("AllowedMemoryNodes", "ay", property_get_cpuset, offsetof(CGroupContext, cpuset_mems), 0),
SD_BUS_PROPERTY("IOAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, io_accounting), 0),
@@ -725,6 +726,28 @@ int bus_cgroup_set_property(
return 1;
+ } else if (streq(name, "CPUQuotaPeriodUSec")) {
+ uint64_t u64;
+
+ r = sd_bus_message_read(message, "t", &u64);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ c->cpu_quota_period_usec = u64;
+ unit_invalidate_cgroup(u, CGROUP_MASK_CPU);
+ if (c->cpu_quota_period_usec == USEC_INFINITY)
+ unit_write_setting(u, flags, "CPUQuotaPeriodSec", "CPUQuotaPeriodSec=");
+ else {
+ char v[FORMAT_TIMESPAN_MAX];
+ unit_write_settingf(u, flags, "CPUQuotaPeriodSec",
+ "CPUQuotaPeriodSec=%s",
+ format_timespan(v, sizeof(v), c->cpu_quota_period_usec, 1));
+ }
+ }
+
+ return 1;
+
} else if (STR_IN_SET(name, "AllowedCPUs", "AllowedMemoryNodes")) {
const void *a;
size_t n;
diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4
index 23879c001f..4defa82ac1 100644
--- a/src/core/load-fragment-gperf.gperf.m4
+++ b/src/core/load-fragment-gperf.gperf.m4
@@ -169,6 +169,7 @@ $1.StartupCPUWeight, config_parse_cg_weight, 0,
$1.CPUShares, config_parse_cpu_shares, 0, offsetof($1, cgroup_context.cpu_shares)
$1.StartupCPUShares, config_parse_cpu_shares, 0, offsetof($1, cgroup_context.startup_cpu_shares)
$1.CPUQuota, config_parse_cpu_quota, 0, offsetof($1, cgroup_context)
+$1.CPUQuotaPeriodSec, config_parse_sec_def_infinity, 0, offsetof($1, cgroup_context.cpu_quota_period_usec)
$1.MemoryAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.memory_accounting)
$1.MemoryLow, config_parse_memory_limit, 0, offsetof($1, cgroup_context)
$1.MemoryHigh, config_parse_memory_limit, 0, offsetof($1, cgroup_context)
diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c
index 1e22013b75..762b106007 100644
--- a/src/core/load-fragment.c
+++ b/src/core/load-fragment.c
@@ -55,6 +55,7 @@
#include "unit-name.h"
#include "unit-printf.h"
#include "user-util.h"
+#include "time-util.h"
#include "web-util.h"
static int supported_socket_protocol_from_string(const char *s) {
diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c
index 3c1ecf2027..ec45d6f86d 100644
--- a/src/shared/bus-unit-util.c
+++ b/src/shared/bus-unit-util.c
@@ -480,6 +480,20 @@ static int bus_append_cgroup_property(sd_bus_message *m, const char *field, cons
return 1;
}
+ if (streq(field, "CPUQuotaPeriodSec")) {
+ usec_t u = USEC_INFINITY;
+
+ r = parse_sec_def_infinity(eq, &u);
+ if (r < 0)
+ return log_error_errno(r, "CPU quota period '%s' invalid.", eq);
+
+ r = sd_bus_message_append(m, "(sv)", "CPUQuotaPeriodUSec", "t", u);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
if (streq(field, "DeviceAllow")) {
if (isempty(eq))
diff --git a/src/test/meson.build b/src/test/meson.build
index ead000e30c..22264d034c 100644
--- a/src/test/meson.build
+++ b/src/test/meson.build
@@ -513,6 +513,11 @@ tests += [
[],
'', 'manual'],
+ [['src/test/test-cgroup-cpu.c'],
+ [libcore,
+ libshared],
+ []],
+
[['src/test/test-cgroup-mask.c',
'src/test/test-helper.c'],
[libcore,
diff --git a/src/test/test-cgroup-cpu.c b/src/test/test-cgroup-cpu.c
new file mode 100644
index 0000000000..a445acc955
--- /dev/null
+++ b/src/test/test-cgroup-cpu.c
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "cgroup.h"
+#include "log.h"
+
+static void test_cgroup_cpu_adjust_period(void) {
+ log_info("/* %s */", __func__);
+
+ /* Period 1ms, quota 40% -> Period 2.5ms */
+ assert_se(2500 == cgroup_cpu_adjust_period(USEC_PER_MSEC, 400 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC));
+ /* Period 10ms, quota 10% -> keep. */
+ assert_se(10 * USEC_PER_MSEC == cgroup_cpu_adjust_period(10 * USEC_PER_MSEC, 100 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC));
+ /* Period 1ms, quota 1000% -> keep. */
+ assert_se(USEC_PER_MSEC == cgroup_cpu_adjust_period(USEC_PER_MSEC, 10000 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC));
+ /* Period 100ms, quota 30% -> keep. */
+ assert_se(100 * USEC_PER_MSEC == cgroup_cpu_adjust_period(100 * USEC_PER_MSEC, 300 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC));
+ /* Period 5s, quota 40% -> adjust to 1s. */
+ assert_se(USEC_PER_SEC == cgroup_cpu_adjust_period(5 * USEC_PER_SEC, 400 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC));
+ /* Period 2s, quota 250% -> adjust to 1s. */
+ assert_se(USEC_PER_SEC == cgroup_cpu_adjust_period(2 * USEC_PER_SEC, 2500 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC));
+ /* Period 10us, quota 5,000,000% -> adjust to 1ms. */
+ assert_se(USEC_PER_MSEC == cgroup_cpu_adjust_period(10, 50000000 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC));
+ /* Period 10ms, quota 50,000% -> keep. */
+ assert_se(10 * USEC_PER_MSEC == cgroup_cpu_adjust_period(10 * USEC_PER_MSEC, 500000 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC));
+ /* Period 10ms, quota 1% -> adjust to 100ms. */
+ assert_se(100 * USEC_PER_MSEC == cgroup_cpu_adjust_period(10 * USEC_PER_MSEC, 10 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC));
+ /* Period 10ms, quota .001% -> adjust to 1s. */
+ assert_se(1 * USEC_PER_SEC == cgroup_cpu_adjust_period(10 * USEC_PER_MSEC, 10, USEC_PER_MSEC, USEC_PER_SEC));
+ /* Period 0ms, quota 200% -> adjust to 1ms. */
+ assert_se(1 * USEC_PER_MSEC == cgroup_cpu_adjust_period(0, 2 * USEC_PER_SEC, USEC_PER_MSEC, USEC_PER_SEC));
+ /* Period 0ms, quota 40% -> adjust to 2.5ms. */
+ assert_se(2500 == cgroup_cpu_adjust_period(0, 400 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC));
+}
+
+int main(int argc, char *argv[]) {
+ test_cgroup_cpu_adjust_period();
+ return 0;
+}