From 2240e7955d64260e94dd52a3ab9855d267c2af89 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 13 Jun 2018 14:16:35 -0700 Subject: [PATCH] core: add IODeviceLatencyTargetSec This adds support for the following proposed latency based IO control mechanism. https://lkml.org/lkml/2018/6/5/428 (cherry picked from commit 6ae4283cb14c4e4a895f4bbba703804e4128c86c) Resolves: #1831519 --- man/systemd.resource-control.xml | 29 +++++-- src/core/cgroup.c | 56 +++++++++++-- src/core/cgroup.h | 9 +++ src/core/dbus-cgroup.c | 111 ++++++++++++++++++++++++++ src/core/load-fragment-gperf.gperf.m4 | 1 + src/core/load-fragment.c | 72 +++++++++++++++++ src/core/load-fragment.h | 1 + src/shared/bus-unit-util.c | 31 +++++++ src/systemctl/systemctl.c | 22 +++++ 9 files changed, 320 insertions(+), 12 deletions(-) diff --git a/man/systemd.resource-control.xml b/man/systemd.resource-control.xml index 4329742e94..b0064bf98f 100644 --- a/man/systemd.resource-control.xml +++ b/man/systemd.resource-control.xml @@ -417,11 +417,11 @@ Set the per-device overall block I/O weight for the executed processes, if the unified control group hierarchy is used on the system. Takes a space-separated pair of a file path and a weight value to specify - the device specific weight value, between 1 and 10000. (Example: "/dev/sda 1000"). The file path may be - specified as path to a block device node or as any other file, in which case the backing block device of the - file system of the file is determined. This controls the io.weight control group - attribute, which defaults to 100. Use this option multiple times to set weights for multiple devices. For - details about this control group attribute, see /dev/sda 1000). The file + path may be specified as path to a block device node or as any other file, in which case the backing block + device of the file system of the file is determined. This controls the io.weight control + group attribute, which defaults to 100. Use this option multiple times to set weights for multiple devices. + For details about this control group attribute, see cgroup-v2.txt. Implies IOAccounting=true. @@ -482,6 +482,25 @@ + + IODeviceLatencyTargetSec=device target + + + Set the per-device average target I/O latency for the executed processes, if the unified control group + hierarchy is used on the system. Takes a file path and a timespan separated by a space to specify + the device specific latency target. (Example: "/dev/sda 25ms"). The file path may be specified + as path to a block device node or as any other file, in which case the backing block device of the file + system of the file is determined. This controls the io.latency control group + attribute. Use this option multiple times to set latency target for multiple devices. For details about this + control group attribute, see cgroup-v2.txt. + + Implies IOAccounting=true. + + These settings are supported only if the unified control group hierarchy is used. + + + IPAccounting= diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 9e4c3c7dac..ad8219bd79 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -114,6 +114,15 @@ void cgroup_context_free_io_device_weight(CGroupContext *c, CGroupIODeviceWeight free(w); } +void cgroup_context_free_io_device_latency(CGroupContext *c, CGroupIODeviceLatency *l) { + assert(c); + assert(l); + + LIST_REMOVE(device_latencies, c->io_device_latencies, l); + free(l->path); + free(l); +} + void cgroup_context_free_io_device_limit(CGroupContext *c, CGroupIODeviceLimit *l) { assert(c); assert(l); @@ -147,6 +156,9 @@ void cgroup_context_done(CGroupContext *c) { while (c->io_device_weights) cgroup_context_free_io_device_weight(c, c->io_device_weights); + while (c->io_device_latencies) + cgroup_context_free_io_device_latency(c, c->io_device_latencies); + while (c->io_device_limits) cgroup_context_free_io_device_limit(c, c->io_device_limits); @@ -171,6 +183,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) { _cleanup_free_ char *cpuset_mems = NULL; CGroupIODeviceLimit *il; CGroupIODeviceWeight *iw; + CGroupIODeviceLatency *l; CGroupBlockIODeviceBandwidth *b; CGroupBlockIODeviceWeight *w; CGroupDeviceAllow *a; @@ -256,11 +269,18 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) { LIST_FOREACH(device_weights, iw, c->io_device_weights) fprintf(f, - "%sIODeviceWeight=%s %" PRIu64, + "%sIODeviceWeight=%s %" PRIu64 "\n", prefix, iw->path, iw->weight); + LIST_FOREACH(device_latencies, l, c->io_device_latencies) + fprintf(f, + "%sIODeviceLatencyTargetSec=%s %s\n", + prefix, + l->path, + format_timespan(u, sizeof(u), l->target_usec, 1)); + LIST_FOREACH(device_limits, il, c->io_device_limits) { char buf[FORMAT_BYTES_MAX]; CGroupIOLimitType type; @@ -573,6 +593,7 @@ static bool cgroup_context_has_io_config(CGroupContext *c) { c->io_weight != CGROUP_WEIGHT_INVALID || c->startup_io_weight != CGROUP_WEIGHT_INVALID || c->io_device_weights || + c->io_device_latencies || c->io_device_limits; } @@ -646,6 +667,26 @@ static void cgroup_apply_blkio_device_weight(Unit *u, const char *dev_path, uint "Failed to set blkio.weight_device: %m"); } +static void cgroup_apply_io_device_latency(Unit *u, const char *dev_path, usec_t target) { + char buf[DECIMAL_STR_MAX(dev_t)*2+2+7+DECIMAL_STR_MAX(uint64_t)+1]; + dev_t dev; + int r; + + r = lookup_block_device(dev_path, &dev); + if (r < 0) + return; + + if (target != USEC_INFINITY) + xsprintf(buf, "%u:%u target=%" PRIu64 "\n", major(dev), minor(dev), target); + else + xsprintf(buf, "%u:%u target=max\n", major(dev), minor(dev)); + + r = cg_set_attribute("io", u->cgroup_path, "io.latency", buf); + if (r < 0) + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set io.latency on cgroup %s: %m", u->cgroup_path); +} + static void cgroup_apply_io_device_limit(Unit *u, const char *dev_path, uint64_t *limits) { char limit_bufs[_CGROUP_IO_LIMIT_TYPE_MAX][DECIMAL_STR_MAX(uint64_t)]; char buf[DECIMAL_STR_MAX(dev_t)*2+2+(6+DECIMAL_STR_MAX(uint64_t)+1)*4]; @@ -827,13 +868,11 @@ static void cgroup_context_apply( if (has_io) { CGroupIODeviceWeight *w; - /* FIXME: no way to reset this list */ LIST_FOREACH(device_weights, w, c->io_device_weights) cgroup_apply_io_device_weight(u, w->path, w->weight); } else if (has_blockio) { CGroupBlockIODeviceWeight *w; - /* FIXME: no way to reset this list */ LIST_FOREACH(device_weights, w, c->blockio_device_weights) { weight = cgroup_weight_blkio_to_io(w->weight); @@ -843,9 +882,15 @@ static void cgroup_context_apply( cgroup_apply_io_device_weight(u, w->path, weight); } } + + if (has_io) { + CGroupIODeviceLatency *l; + + LIST_FOREACH(device_latencies, l, c->io_device_latencies) + cgroup_apply_io_device_latency(u, l->path, l->target_usec); + } } - /* Apply limits and free ones without config. */ if (has_io) { CGroupIODeviceLimit *l; @@ -902,7 +947,6 @@ static void cgroup_context_apply( if (has_io) { CGroupIODeviceWeight *w; - /* FIXME: no way to reset this list */ LIST_FOREACH(device_weights, w, c->io_device_weights) { weight = cgroup_weight_io_to_blkio(w->weight); @@ -914,13 +958,11 @@ static void cgroup_context_apply( } else if (has_blockio) { CGroupBlockIODeviceWeight *w; - /* FIXME: no way to reset this list */ LIST_FOREACH(device_weights, w, c->blockio_device_weights) cgroup_apply_blkio_device_weight(u, w->path, w->weight); } } - /* Apply limits and free ones without config. */ if (has_io) { CGroupIODeviceLimit *l; diff --git a/src/core/cgroup.h b/src/core/cgroup.h index da10575394..f7365b4c46 100644 --- a/src/core/cgroup.h +++ b/src/core/cgroup.h @@ -13,6 +13,7 @@ typedef struct CGroupContext CGroupContext; typedef struct CGroupDeviceAllow CGroupDeviceAllow; typedef struct CGroupIODeviceWeight CGroupIODeviceWeight; typedef struct CGroupIODeviceLimit CGroupIODeviceLimit; +typedef struct CGroupIODeviceLatency CGroupIODeviceLatency; typedef struct CGroupBlockIODeviceWeight CGroupBlockIODeviceWeight; typedef struct CGroupBlockIODeviceBandwidth CGroupBlockIODeviceBandwidth; @@ -52,6 +53,12 @@ struct CGroupIODeviceLimit { uint64_t limits[_CGROUP_IO_LIMIT_TYPE_MAX]; }; +struct CGroupIODeviceLatency { + LIST_FIELDS(CGroupIODeviceLatency, device_latencies); + char *path; + usec_t target_usec; +}; + struct CGroupBlockIODeviceWeight { LIST_FIELDS(CGroupBlockIODeviceWeight, device_weights); char *path; @@ -85,6 +92,7 @@ struct CGroupContext { uint64_t startup_io_weight; LIST_HEAD(CGroupIODeviceWeight, io_device_weights); LIST_HEAD(CGroupIODeviceLimit, io_device_limits); + LIST_HEAD(CGroupIODeviceLatency, io_device_latencies); uint64_t memory_low; uint64_t memory_high; @@ -137,6 +145,7 @@ CGroupMask cgroup_context_get_mask(CGroupContext *c); void cgroup_context_free_device_allow(CGroupContext *c, CGroupDeviceAllow *a); void cgroup_context_free_io_device_weight(CGroupContext *c, CGroupIODeviceWeight *w); void cgroup_context_free_io_device_limit(CGroupContext *c, CGroupIODeviceLimit *l); +void cgroup_context_free_io_device_latency(CGroupContext *c, CGroupIODeviceLatency *l); void cgroup_context_free_blockio_device_weight(CGroupContext *c, CGroupBlockIODeviceWeight *w); void cgroup_context_free_blockio_device_bandwidth(CGroupContext *c, CGroupBlockIODeviceBandwidth *b); diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c index 30d4e83932..a1d3014d61 100644 --- a/src/core/dbus-cgroup.c +++ b/src/core/dbus-cgroup.c @@ -140,6 +140,36 @@ static int property_get_io_device_limits( return sd_bus_message_close_container(reply); } +static int property_get_io_device_latency( + sd_bus *bus, + const char *path, + const char *interface, + const char *property, + sd_bus_message *reply, + void *userdata, + sd_bus_error *error) { + + CGroupContext *c = userdata; + CGroupIODeviceLatency *l; + int r; + + assert(bus); + assert(reply); + assert(c); + + r = sd_bus_message_open_container(reply, 'a', "(st)"); + if (r < 0) + return r; + + LIST_FOREACH(device_latencies, l, c->io_device_latencies) { + r = sd_bus_message_append(reply, "(st)", l->path, l->target_usec); + if (r < 0) + return r; + } + + return sd_bus_message_close_container(reply); +} + static int property_get_blockio_device_weight( sd_bus *bus, const char *path, @@ -314,6 +344,7 @@ const sd_bus_vtable bus_cgroup_vtable[] = { SD_BUS_PROPERTY("IOWriteBandwidthMax", "a(st)", property_get_io_device_limits, 0, 0), SD_BUS_PROPERTY("IOReadIOPSMax", "a(st)", property_get_io_device_limits, 0, 0), SD_BUS_PROPERTY("IOWriteIOPSMax", "a(st)", property_get_io_device_limits, 0, 0), + SD_BUS_PROPERTY("IODeviceLatencyTargetUSec", "a(st)", property_get_io_device_latency, 0, 0), SD_BUS_PROPERTY("BlockIOAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, blockio_accounting), 0), SD_BUS_PROPERTY("BlockIOWeight", "t", NULL, offsetof(CGroupContext, blockio_weight), 0), SD_BUS_PROPERTY("StartupBlockIOWeight", "t", NULL, offsetof(CGroupContext, startup_blockio_weight), 0), @@ -898,6 +929,86 @@ int bus_cgroup_set_property( return 1; + } else if (streq(name, "IODeviceLatencyTargetUSec")) { + const char *path; + uint64_t target; + unsigned n = 0; + + r = sd_bus_message_enter_container(message, 'a', "(st)"); + if (r < 0) + return r; + + while ((r = sd_bus_message_read(message, "(st)", &path, &target)) > 0) { + + if (!path_is_normalized(path)) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Path '%s' specified in %s= is not normalized.", name, path); + + if (!UNIT_WRITE_FLAGS_NOOP(flags)) { + CGroupIODeviceLatency *a = NULL, *b; + + LIST_FOREACH(device_latencies, b, c->io_device_latencies) { + if (path_equal(b->path, path)) { + a = b; + break; + } + } + + if (!a) { + a = new0(CGroupIODeviceLatency, 1); + if (!a) + return -ENOMEM; + + a->path = strdup(path); + if (!a->path) { + free(a); + return -ENOMEM; + } + LIST_PREPEND(device_latencies, c->io_device_latencies, a); + } + + a->target_usec = target; + } + + n++; + } + + r = sd_bus_message_exit_container(message); + if (r < 0) + return r; + + if (!UNIT_WRITE_FLAGS_NOOP(flags)) { + _cleanup_free_ char *buf = NULL; + _cleanup_fclose_ FILE *f = NULL; + char ts[FORMAT_TIMESPAN_MAX]; + CGroupIODeviceLatency *a; + size_t size = 0; + + if (n == 0) { + while (c->io_device_latencies) + cgroup_context_free_io_device_latency(c, c->io_device_latencies); + } + + unit_invalidate_cgroup(u, CGROUP_MASK_IO); + + f = open_memstream(&buf, &size); + if (!f) + return -ENOMEM; + + (void) __fsetlocking(f, FSETLOCKING_BYCALLER); + + fputs("IODeviceLatencyTargetSec=\n", f); + LIST_FOREACH(device_latencies, a, c->io_device_latencies) + fprintf(f, "IODeviceLatencyTargetSec=%s %s\n", + a->path, format_timespan(ts, sizeof(ts), a->target_usec, 1)); + + r = fflush_and_check(f); + if (r < 0) + return r; + unit_write_setting(u, flags, name, buf); + } + + return 1; + } else if (STR_IN_SET(name, "BlockIOReadBandwidth", "BlockIOWriteBandwidth")) { const char *path; bool read = true; diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4 index 8883818ff2..23879c001f 100644 --- a/src/core/load-fragment-gperf.gperf.m4 +++ b/src/core/load-fragment-gperf.gperf.m4 @@ -185,6 +185,7 @@ $1.IOReadBandwidthMax, config_parse_io_limit, 0, $1.IOWriteBandwidthMax, config_parse_io_limit, 0, offsetof($1, cgroup_context) $1.IOReadIOPSMax, config_parse_io_limit, 0, offsetof($1, cgroup_context) $1.IOWriteIOPSMax, config_parse_io_limit, 0, offsetof($1, cgroup_context) +$1.IODeviceLatencyTargetSec, config_parse_io_device_latency, 0, offsetof($1, cgroup_context) $1.BlockIOAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.blockio_accounting) $1.BlockIOWeight, config_parse_blockio_weight, 0, offsetof($1, cgroup_context.blockio_weight) $1.StartupBlockIOWeight, config_parse_blockio_weight, 0, offsetof($1, cgroup_context.startup_blockio_weight) diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c index 9b2724307d..1e22013b75 100644 --- a/src/core/load-fragment.c +++ b/src/core/load-fragment.c @@ -3383,6 +3383,77 @@ int config_parse_io_device_weight( return 0; } +int config_parse_io_device_latency( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_free_ char *path = NULL, *resolved = NULL; + CGroupIODeviceLatency *l; + CGroupContext *c = data; + const char *p = rvalue; + usec_t usec; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + if (isempty(rvalue)) { + while (c->io_device_latencies) + cgroup_context_free_io_device_latency(c, c->io_device_latencies); + + return 0; + } + + r = extract_first_word(&p, &path, NULL, EXTRACT_QUOTES); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Invalid syntax, ignoring: %s", rvalue); + return 0; + } + if (r == 0 || isempty(p)) { + log_syntax(unit, LOG_WARNING, filename, line, 0, + "Failed to extract device path and latency from '%s', ignoring.", rvalue); + return 0; + } + + r = unit_full_printf(userdata, path, &resolved); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to resolve unit specifiers in '%s', ignoring: %m", path); + return 0; + } + + r = path_simplify_and_warn(resolved, 0, unit, filename, line, lvalue); + if (r < 0) + return 0; + + if (parse_sec(p, &usec) < 0) { + log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse timer value, ignoring: %s", p); + return 0; + } + + l = new0(CGroupIODeviceLatency, 1); + if (!l) + return log_oom(); + + l->path = TAKE_PTR(resolved); + l->target_usec = usec; + + LIST_PREPEND(device_latencies, c->io_device_latencies, l); + return 0; +} + int config_parse_io_limit( const char *unit, const char *filename, @@ -4572,6 +4643,7 @@ void unit_dump_config_items(FILE *f) { { config_parse_device_policy, "POLICY" }, { config_parse_io_limit, "LIMIT" }, { config_parse_io_device_weight, "DEVICEWEIGHT" }, + { config_parse_io_device_latency, "DEVICELATENCY" }, { config_parse_blockio_bandwidth, "BANDWIDTH" }, { config_parse_blockio_weight, "WEIGHT" }, { config_parse_blockio_device_weight, "DEVICEWEIGHT" }, diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h index 424fa478a7..65a94d53cc 100644 --- a/src/core/load-fragment.h +++ b/src/core/load-fragment.h @@ -68,6 +68,7 @@ CONFIG_PARSER_PROTOTYPE(config_parse_tasks_max); CONFIG_PARSER_PROTOTYPE(config_parse_delegate); CONFIG_PARSER_PROTOTYPE(config_parse_device_policy); CONFIG_PARSER_PROTOTYPE(config_parse_device_allow); +CONFIG_PARSER_PROTOTYPE(config_parse_io_device_latency); CONFIG_PARSER_PROTOTYPE(config_parse_io_device_weight); CONFIG_PARSER_PROTOTYPE(config_parse_io_limit); CONFIG_PARSER_PROTOTYPE(config_parse_blockio_weight); diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c index e0b2cfb170..3c1ecf2027 100644 --- a/src/shared/bus-unit-util.c +++ b/src/shared/bus-unit-util.c @@ -566,6 +566,37 @@ static int bus_append_cgroup_property(sd_bus_message *m, const char *field, cons return 1; } + if (streq(field, "IODeviceLatencyTargetSec")) { + const char *field_usec = "IODeviceLatencyTargetUSec"; + + if (isempty(eq)) + r = sd_bus_message_append(m, "(sv)", field_usec, "a(st)", USEC_INFINITY); + else { + const char *path, *target, *e; + usec_t usec; + + e = strchr(eq, ' '); + if (!e) { + log_error("Failed to parse %s value %s.", field, eq); + return -EINVAL; + } + + path = strndupa(eq, e - eq); + target = e+1; + + r = parse_sec(target, &usec); + if (r < 0) + return log_error_errno(r, "Failed to parse %s value %s: %m", field, target); + + r = sd_bus_message_append(m, "(sv)", field_usec, "a(st)", 1, path, usec); + } + + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + if (STR_IN_SET(field, "IPAddressAllow", "IPAddressDeny")) { unsigned char prefixlen; union in_addr_union prefix = {}; diff --git a/src/systemctl/systemctl.c b/src/systemctl/systemctl.c index a3074bc5e3..559e49f104 100644 --- a/src/systemctl/systemctl.c +++ b/src/systemctl/systemctl.c @@ -4875,6 +4875,28 @@ static int print_property(const char *name, sd_bus_message *m, bool value, bool return 1; + } else if (contents[0] == SD_BUS_TYPE_STRUCT_BEGIN && + streq(name, "IODeviceLatencyTargetUSec")) { + char ts[FORMAT_TIMESPAN_MAX]; + const char *path; + uint64_t target; + + r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "(st)"); + if (r < 0) + return bus_log_parse_error(r); + + while ((r = sd_bus_message_read(m, "(st)", &path, &target)) > 0) + print_prop(name, "%s %s", strna(path), + format_timespan(ts, sizeof(ts), target, 1)); + if (r < 0) + return bus_log_parse_error(r); + + r = sd_bus_message_exit_container(m); + if (r < 0) + return bus_log_parse_error(r); + + return 1; + } else if (contents[0] == SD_BUS_TYPE_BYTE && streq(name, "StandardInputData")) { _cleanup_free_ char *h = NULL; const void *p;