| From 2ccd5198faa8ca65001f90c551924e86bf737a85 Mon Sep 17 00:00:00 2001 |
| From: Anita Zhang <the.anitazha@gmail.com> |
| Date: Mon, 25 Jan 2021 23:56:23 -0800 |
| Subject: [PATCH 1/7] oom: shorten xattr name |
| |
| |
| src/core/cgroup.c | 2 +- |
| src/oom/oomd-util.c | 4 ++-- |
| src/oom/test-oomd-util.c | 2 +- |
| 3 files changed, 4 insertions(+), 4 deletions(-) |
| |
| diff --git a/src/core/cgroup.c b/src/core/cgroup.c |
| index c9cf7fb16c6..70282a7abda 100644 |
| |
| |
| @@ -2746,7 +2746,7 @@ int unit_check_oomd_kill(Unit *u) { |
| else if (r == 0) |
| return 0; |
| |
| - r = cg_get_xattr_malloc(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, "user.systemd_oomd_kill", &value); |
| + r = cg_get_xattr_malloc(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, "user.oomd_kill", &value); |
| if (r < 0 && r != -ENODATA) |
| return r; |
| |
| diff --git a/src/oom/oomd-util.c b/src/oom/oomd-util.c |
| index fcccddb92ea..80b9583440c 100644 |
| |
| |
| @@ -201,9 +201,9 @@ int oomd_cgroup_kill(const char *path, bool recurse, bool dry_run) { |
| if (r < 0) |
| return r; |
| |
| - r = increment_oomd_xattr(path, "user.systemd_oomd_kill", set_size(pids_killed)); |
| + r = increment_oomd_xattr(path, "user.oomd_kill", set_size(pids_killed)); |
| if (r < 0) |
| - log_debug_errno(r, "Failed to set user.systemd_oomd_kill on kill: %m"); |
| + log_debug_errno(r, "Failed to set user.oomd_kill on kill: %m"); |
| |
| return set_size(pids_killed) != 0; |
| } |
| diff --git a/src/oom/test-oomd-util.c b/src/oom/test-oomd-util.c |
| index 54fe2a03d14..3dec4f0ff06 100644 |
| |
| |
| @@ -79,7 +79,7 @@ static void test_oomd_cgroup_kill(void) { |
| sleep(2); |
| assert_se(cg_is_empty(SYSTEMD_CGROUP_CONTROLLER, cgroup) == true); |
| |
| - assert_se(cg_get_xattr_malloc(SYSTEMD_CGROUP_CONTROLLER, cgroup, "user.systemd_oomd_kill", &v) >= 0); |
| + assert_se(cg_get_xattr_malloc(SYSTEMD_CGROUP_CONTROLLER, cgroup, "user.oomd_kill", &v) >= 0); |
| assert_se(memcmp(v, i == 0 ? "2" : "4", 2) == 0); |
| } |
| } |
| |
| From d38916b398127e005d0cf131092a99317661ec3c Mon Sep 17 00:00:00 2001 |
| From: Anita Zhang <the.anitazha@gmail.com> |
| Date: Fri, 5 Feb 2021 03:00:11 -0800 |
| Subject: [PATCH 2/7] oom: wrap reply.path with empty_to_root |
| |
| |
| src/oom/oomd-manager.c | 6 +++--- |
| 1 file changed, 3 insertions(+), 3 deletions(-) |
| |
| diff --git a/src/oom/oomd-manager.c b/src/oom/oomd-manager.c |
| index 338935b3ec6..825fe38e189 100644 |
| |
| |
| @@ -93,7 +93,7 @@ static int process_managed_oom_reply( |
| m->monitored_swap_cgroup_contexts : m->monitored_mem_pressure_cgroup_contexts; |
| |
| if (reply.mode == MANAGED_OOM_AUTO) { |
| - (void) oomd_cgroup_context_free(hashmap_remove(monitor_hm, reply.path)); |
| + (void) oomd_cgroup_context_free(hashmap_remove(monitor_hm, empty_to_root(reply.path))); |
| continue; |
| } |
| |
| @@ -109,7 +109,7 @@ static int process_managed_oom_reply( |
| } |
| } |
| |
| - ret = oomd_insert_cgroup_context(NULL, monitor_hm, reply.path); |
| + ret = oomd_insert_cgroup_context(NULL, monitor_hm, empty_to_root(reply.path)); |
| if (ret == -ENOMEM) { |
| r = ret; |
| goto finish; |
| @@ -117,7 +117,7 @@ static int process_managed_oom_reply( |
| |
| /* Always update the limit in case it was changed. For non-memory pressure detection the value is |
| * ignored so always updating it here is not a problem. */ |
| - ctx = hashmap_get(monitor_hm, reply.path); |
| + ctx = hashmap_get(monitor_hm, empty_to_root(reply.path)); |
| if (ctx) |
| ctx->mem_pressure_limit = limit; |
| } |
| |
| From a695da238e7a6bd6eb440facc784aa6fca6c3d90 Mon Sep 17 00:00:00 2001 |
| From: Anita Zhang <the.anitazha@gmail.com> |
| Date: Wed, 27 Jan 2021 23:43:13 -0800 |
| Subject: [PATCH 3/7] oom: sort by pgscan and memory usage |
| |
| If 2 candidates have the same pgscan, prioritize the one with the larger |
| memory usage. |
| |
| src/oom/oomd-util.c | 2 +- |
| src/oom/oomd-util.h | 5 ++++- |
| src/oom/test-oomd-util.c | 24 ++++++++++++++---------- |
| 3 files changed, 19 insertions(+), 12 deletions(-) |
| |
| diff --git a/src/oom/oomd-util.c b/src/oom/oomd-util.c |
| index 80b9583440c..8f138d64c6c 100644 |
| |
| |
| @@ -214,7 +214,7 @@ int oomd_kill_by_pgscan(Hashmap *h, const char *prefix, bool dry_run) { |
| |
| assert(h); |
| |
| - r = oomd_sort_cgroup_contexts(h, compare_pgscan, prefix, &sorted); |
| + r = oomd_sort_cgroup_contexts(h, compare_pgscan_and_memory_usage, prefix, &sorted); |
| if (r < 0) |
| return r; |
| |
| diff --git a/src/oom/oomd-util.h b/src/oom/oomd-util.h |
| index d7a9890e7a2..f0648c5dcdd 100644 |
| |
| |
| @@ -61,10 +61,13 @@ bool oomd_memory_reclaim(Hashmap *h); |
| /* Returns true if the amount of swap free is below the percentage of swap specified by `threshold_percent`. */ |
| bool oomd_swap_free_below(const OomdSystemContext *ctx, uint64_t threshold_percent); |
| |
| -static inline int compare_pgscan(OomdCGroupContext * const *c1, OomdCGroupContext * const *c2) { |
| +static inline int compare_pgscan_and_memory_usage(OomdCGroupContext * const *c1, OomdCGroupContext * const *c2) { |
| assert(c1); |
| assert(c2); |
| |
| + if ((*c2)->pgscan == (*c1)->pgscan) |
| + return CMP((*c2)->current_memory_usage, (*c1)->current_memory_usage); |
| + |
| return CMP((*c2)->pgscan, (*c1)->pgscan); |
| } |
| |
| diff --git a/src/oom/test-oomd-util.c b/src/oom/test-oomd-util.c |
| index 3dec4f0ff06..a1fe78806a1 100644 |
| |
| |
| @@ -292,16 +292,20 @@ static void test_oomd_sort_cgroups(void) { |
| OomdCGroupContext ctx[4] = { |
| { .path = paths[0], |
| .swap_usage = 20, |
| - .pgscan = 60 }, |
| + .pgscan = 60, |
| + .current_memory_usage = 10 }, |
| { .path = paths[1], |
| .swap_usage = 60, |
| - .pgscan = 40 }, |
| + .pgscan = 40, |
| + .current_memory_usage = 20 }, |
| { .path = paths[2], |
| .swap_usage = 40, |
| - .pgscan = 20 }, |
| + .pgscan = 40, |
| + .current_memory_usage = 40 }, |
| { .path = paths[3], |
| .swap_usage = 10, |
| - .pgscan = 80 }, |
| + .pgscan = 80, |
| + .current_memory_usage = 10 }, |
| }; |
| |
| assert_se(h = hashmap_new(&string_hash_ops)); |
| @@ -318,16 +322,16 @@ static void test_oomd_sort_cgroups(void) { |
| assert_se(sorted_cgroups[3] == &ctx[3]); |
| sorted_cgroups = mfree(sorted_cgroups); |
| |
| - assert_se(oomd_sort_cgroup_contexts(h, compare_pgscan, NULL, &sorted_cgroups) == 4); |
| + assert_se(oomd_sort_cgroup_contexts(h, compare_pgscan_and_memory_usage, NULL, &sorted_cgroups) == 4); |
| assert_se(sorted_cgroups[0] == &ctx[3]); |
| assert_se(sorted_cgroups[1] == &ctx[0]); |
| - assert_se(sorted_cgroups[2] == &ctx[1]); |
| - assert_se(sorted_cgroups[3] == &ctx[2]); |
| + assert_se(sorted_cgroups[2] == &ctx[2]); |
| + assert_se(sorted_cgroups[3] == &ctx[1]); |
| sorted_cgroups = mfree(sorted_cgroups); |
| |
| - assert_se(oomd_sort_cgroup_contexts(h, compare_pgscan, "/herp.slice/derp.scope", &sorted_cgroups) == 2); |
| - assert_se(sorted_cgroups[0] == &ctx[1]); |
| - assert_se(sorted_cgroups[1] == &ctx[2]); |
| + assert_se(oomd_sort_cgroup_contexts(h, compare_pgscan_and_memory_usage, "/herp.slice/derp.scope", &sorted_cgroups) == 2); |
| + assert_se(sorted_cgroups[0] == &ctx[2]); |
| + assert_se(sorted_cgroups[1] == &ctx[1]); |
| assert_se(sorted_cgroups[2] == 0); |
| assert_se(sorted_cgroups[3] == 0); |
| sorted_cgroups = mfree(sorted_cgroups); |
| |
| From c73a2c3a6788a2a28899f29579fdd68816f60d59 Mon Sep 17 00:00:00 2001 |
| From: Anita Zhang <the.anitazha@gmail.com> |
| Date: Thu, 28 Jan 2021 15:47:26 -0800 |
| Subject: [PATCH 4/7] oom: skip over cgroups with no memory usage |
| |
| |
| src/oom/oomd-util.c | 3 ++- |
| 1 file changed, 2 insertions(+), 1 deletion(-) |
| |
| diff --git a/src/oom/oomd-util.c b/src/oom/oomd-util.c |
| index 8f138d64c6c..fa8b8b70b19 100644 |
| |
| |
| @@ -219,7 +219,8 @@ int oomd_kill_by_pgscan(Hashmap *h, const char *prefix, bool dry_run) { |
| return r; |
| |
| for (int i = 0; i < r; i++) { |
| - if (sorted[i]->pgscan == 0) |
| + /* Skip cgroups with no reclaim and memory usage; it won't alleviate pressure */ |
| + if (sorted[i]->pgscan == 0 && sorted[i]->current_memory_usage == 0) |
| break; |
| |
| r = oomd_cgroup_kill(sorted[i]->path, true, dry_run); |
| |
| From 63d6d9160523a2c1a71e96ff4125a1440d827b32 Mon Sep 17 00:00:00 2001 |
| From: Anita Zhang <the.anitazha@gmail.com> |
| Date: Tue, 26 Jan 2021 00:57:36 -0800 |
| Subject: [PATCH 5/7] oom: implement avoid/omit xattr support |
| |
| There may be situations where a cgroup should be protected from killing |
| or deprioritized as a candidate. In FB oomd xattrs are used to bias oomd |
| away from supervisor cgroups and towards worker cgroups in container |
| tasks. On desktops this can be used to protect important units with |
| unpredictable resource consumption. |
| |
| The patch allows systemd-oomd to understand 2 xattrs: |
| "user.oomd_avoid" and "user.oomd_omit". If systemd-oomd sees these |
| xattrs set to 1 on a candidate cgroup (i.e. while attempting to kill something) |
| AND the cgroup is owned by root:root, it will either deprioritize the cgroup as |
| a candidate (avoid) or remove it completely as a candidate (omit). |
| |
| Usage is restricted to root:root cgroups to prevent situations where an |
| unprivileged user can set their own cgroups lower in the kill priority than |
| another user's (and prevent them from omitting their units from |
| systemd-oomd killing). |
| |
| src/basic/cgroup-util.c | 22 +++++++++ |
| src/basic/cgroup-util.h | 1 + |
| src/oom/oomd-util.c | 35 ++++++++++++--- |
| src/oom/oomd-util.h | 11 +++++ |
| src/oom/test-oomd-util.c | 54 +++++++++++++++++++++-- |
| test/test-functions | 1 + |
| test/units/testsuite-56-testmunch.service | 7 +++ |
| test/units/testsuite-56.sh | 31 +++++++++++-- |
| 8 files changed, 149 insertions(+), 13 deletions(-) |
| create mode 100644 test/units/testsuite-56-testmunch.service |
| |
| diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c |
| index b567822b7ef..45dc1142048 100644 |
| |
| |
| @@ -1703,6 +1703,28 @@ int cg_get_attribute_as_bool(const char *controller, const char *path, const cha |
| return 0; |
| } |
| |
| + |
| +int cg_get_owner(const char *controller, const char *path, uid_t *ret_uid, gid_t *ret_gid) { |
| + _cleanup_free_ char *f = NULL; |
| + struct stat stats; |
| + int r; |
| + |
| + assert(ret_uid); |
| + assert(ret_gid); |
| + |
| + r = cg_get_path(controller, path, NULL, &f); |
| + if (r < 0) |
| + return r; |
| + |
| + r = stat(f, &stats); |
| + if (r < 0) |
| + return -errno; |
| + |
| + *ret_uid = stats.st_uid; |
| + *ret_gid = stats.st_gid; |
| + return 0; |
| +} |
| + |
| int cg_get_keyed_attribute_full( |
| const char *controller, |
| const char *path, |
| diff --git a/src/basic/cgroup-util.h b/src/basic/cgroup-util.h |
| index bdc0d0d086c..63bd25f703e 100644 |
| |
| |
| @@ -212,6 +212,7 @@ int cg_get_attribute_as_uint64(const char *controller, const char *path, const c |
| int cg_get_attribute_as_bool(const char *controller, const char *path, const char *attribute, bool *ret); |
| |
| int cg_set_access(const char *controller, const char *path, uid_t uid, gid_t gid); |
| +int cg_get_owner(const char *controller, const char *path, uid_t *ret_uid, gid_t *ret_gid); |
| |
| int cg_set_xattr(const char *controller, const char *path, const char *name, const void *value, size_t size, int flags); |
| int cg_get_xattr(const char *controller, const char *path, const char *name, void *value, size_t size); |
| diff --git a/src/oom/oomd-util.c b/src/oom/oomd-util.c |
| index fa8b8b70b19..db6383bf436 100644 |
| |
| |
| @@ -159,7 +159,8 @@ int oomd_sort_cgroup_contexts(Hashmap *h, oomd_compare_t compare_func, const cha |
| return -ENOMEM; |
| |
| HASHMAP_FOREACH(item, h) { |
| - if (item->path && prefix && !path_startswith(item->path, prefix)) |
| + /* Skip over cgroups that are not valid candidates or are explicitly marked for omission */ |
| + if ((item->path && prefix && !path_startswith(item->path, prefix)) || item->omit) |
| continue; |
| |
| sorted[k++] = item; |
| @@ -219,9 +220,10 @@ int oomd_kill_by_pgscan(Hashmap *h, const char *prefix, bool dry_run) { |
| return r; |
| |
| for (int i = 0; i < r; i++) { |
| - /* Skip cgroups with no reclaim and memory usage; it won't alleviate pressure */ |
| + /* Skip cgroups with no reclaim and memory usage; it won't alleviate pressure. */ |
| + /* Don't break since there might be "avoid" cgroups at the end. */ |
| if (sorted[i]->pgscan == 0 && sorted[i]->current_memory_usage == 0) |
| - break; |
| + continue; |
| |
| r = oomd_cgroup_kill(sorted[i]->path, true, dry_run); |
| if (r > 0 || r == -ENOMEM) |
| @@ -244,8 +246,10 @@ int oomd_kill_by_swap_usage(Hashmap *h, bool dry_run) { |
| /* Try to kill cgroups with non-zero swap usage until we either succeed in |
| * killing or we get to a cgroup with no swap usage. */ |
| for (int i = 0; i < r; i++) { |
| + /* Skip over cgroups with no resource usage. Don't break since there might be "avoid" |
| + * cgroups at the end. */ |
| if (sorted[i]->swap_usage == 0) |
| - break; |
| + continue; |
| |
| r = oomd_cgroup_kill(sorted[i]->path, true, dry_run); |
| if (r > 0 || r == -ENOMEM) |
| @@ -257,8 +261,10 @@ int oomd_kill_by_swap_usage(Hashmap *h, bool dry_run) { |
| |
| int oomd_cgroup_context_acquire(const char *path, OomdCGroupContext **ret) { |
| _cleanup_(oomd_cgroup_context_freep) OomdCGroupContext *ctx = NULL; |
| - _cleanup_free_ char *p = NULL, *val = NULL; |
| + _cleanup_free_ char *p = NULL, *val = NULL, *avoid_val = NULL, *omit_val = NULL; |
| bool is_root; |
| + uid_t uid; |
| + gid_t gid; |
| int r; |
| |
| assert(path); |
| @@ -278,6 +284,25 @@ int oomd_cgroup_context_acquire(const char *path, OomdCGroupContext **ret) { |
| if (r < 0) |
| return log_debug_errno(r, "Error parsing memory pressure from %s: %m", p); |
| |
| + r = cg_get_owner(SYSTEMD_CGROUP_CONTROLLER, path, &uid, &gid); |
| + if (r < 0) |
| + log_debug_errno(r, "Failed to get owner/group from %s: %m", path); |
| + else if (uid == 0 && gid == 0) { |
| + /* Ignore most errors when reading the xattr since it is usually unset and cgroup xattrs are only used |
| + * as an optional feature of systemd-oomd (and the system might not even support them). */ |
| + r = cg_get_xattr_malloc(SYSTEMD_CGROUP_CONTROLLER, path, "user.oomd_avoid", &avoid_val); |
| + if (r >= 0 && streq(avoid_val, "1")) |
| + ctx->avoid = true; |
| + else if (r == -ENOMEM) |
| + return r; |
| + |
| + r = cg_get_xattr_malloc(SYSTEMD_CGROUP_CONTROLLER, path, "user.oomd_omit", &omit_val); |
| + if (r >= 0 && streq(omit_val, "1")) |
| + ctx->omit = true; |
| + else if (r == -ENOMEM) |
| + return r; |
| + } |
| + |
| if (is_root) { |
| r = procfs_memory_get_used(&ctx->current_memory_usage); |
| if (r < 0) |
| diff --git a/src/oom/oomd-util.h b/src/oom/oomd-util.h |
| index f0648c5dcdd..ab6a8da1ef6 100644 |
| |
| |
| @@ -29,6 +29,9 @@ struct OomdCGroupContext { |
| uint64_t last_pgscan; |
| uint64_t pgscan; |
| |
| + bool avoid; |
| + bool omit; |
| + |
| /* These are only used by oomd_pressure_above for acting on high memory pressure. */ |
| loadavg_t mem_pressure_limit; |
| usec_t mem_pressure_duration_usec; |
| @@ -61,10 +64,15 @@ bool oomd_memory_reclaim(Hashmap *h); |
| /* Returns true if the amount of swap free is below the percentage of swap specified by `threshold_percent`. */ |
| bool oomd_swap_free_below(const OomdSystemContext *ctx, uint64_t threshold_percent); |
| |
| +/* The compare functions will sort from largest to smallest, putting all the contexts with "avoid" at the end |
| + * (after the smallest values). */ |
| static inline int compare_pgscan_and_memory_usage(OomdCGroupContext * const *c1, OomdCGroupContext * const *c2) { |
| assert(c1); |
| assert(c2); |
| |
| + if ((*c1)->avoid != (*c2)->avoid) |
| + return CMP((*c1)->avoid, (*c2)->avoid); |
| + |
| if ((*c2)->pgscan == (*c1)->pgscan) |
| return CMP((*c2)->current_memory_usage, (*c1)->current_memory_usage); |
| |
| @@ -75,6 +83,9 @@ static inline int compare_swap_usage(OomdCGroupContext * const *c1, OomdCGroupCo |
| assert(c1); |
| assert(c2); |
| |
| + if ((*c1)->avoid != (*c2)->avoid) |
| + return CMP((*c1)->avoid, (*c2)->avoid); |
| + |
| return CMP((*c2)->swap_usage, (*c1)->swap_usage); |
| } |
| |
| diff --git a/src/oom/test-oomd-util.c b/src/oom/test-oomd-util.c |
| index a1fe78806a1..193edee0eba 100644 |
| |
| |
| @@ -89,6 +89,8 @@ static void test_oomd_cgroup_context_acquire_and_insert(void) { |
| _cleanup_(oomd_cgroup_context_freep) OomdCGroupContext *ctx = NULL; |
| _cleanup_free_ char *cgroup = NULL; |
| OomdCGroupContext *c1, *c2; |
| + bool test_xattrs; |
| + int r; |
| |
| if (geteuid() != 0) |
| return (void) log_tests_skipped("not root"); |
| @@ -101,6 +103,16 @@ static void test_oomd_cgroup_context_acquire_and_insert(void) { |
| |
| assert_se(cg_pid_get_path(NULL, 0, &cgroup) >= 0); |
| |
| + /* If we don't have permissions to set xattrs we're likely in a userns or missing capabilities |
| + * so skip the xattr portions of the test. */ |
| + r = cg_set_xattr(SYSTEMD_CGROUP_CONTROLLER, cgroup, "user.oomd_test", "1", 1, 0); |
| + test_xattrs = !ERRNO_IS_PRIVILEGE(r) && !ERRNO_IS_NOT_SUPPORTED(r); |
| + |
| + if (test_xattrs) { |
| + assert_se(cg_set_xattr(SYSTEMD_CGROUP_CONTROLLER, cgroup, "user.oomd_omit", "1", 1, 0) >= 0); |
| + assert_se(cg_set_xattr(SYSTEMD_CGROUP_CONTROLLER, cgroup, "user.oomd_avoid", "1", 1, 0) >= 0); |
| + } |
| + |
| assert_se(oomd_cgroup_context_acquire(cgroup, &ctx) == 0); |
| |
| assert_se(streq(ctx->path, cgroup)); |
| @@ -110,12 +122,21 @@ static void test_oomd_cgroup_context_acquire_and_insert(void) { |
| assert_se(ctx->swap_usage == 0); |
| assert_se(ctx->last_pgscan == 0); |
| assert_se(ctx->pgscan == 0); |
| + if (test_xattrs) { |
| + assert_se(ctx->omit == true); |
| + assert_se(ctx->avoid == true); |
| + } else { |
| + assert_se(ctx->omit == false); |
| + assert_se(ctx->avoid == false); |
| + } |
| ctx = oomd_cgroup_context_free(ctx); |
| |
| /* Test the root cgroup */ |
| assert_se(oomd_cgroup_context_acquire("", &ctx) == 0); |
| assert_se(streq(ctx->path, "/")); |
| assert_se(ctx->current_memory_usage > 0); |
| + assert_se(ctx->omit == false); |
| + assert_se(ctx->avoid == false); |
| |
| /* Test hashmap inserts */ |
| assert_se(h1 = hashmap_new(&oomd_cgroup_ctx_hash_ops)); |
| @@ -137,6 +158,15 @@ static void test_oomd_cgroup_context_acquire_and_insert(void) { |
| assert_se(c2->last_pgscan == 5555); |
| assert_se(c2->mem_pressure_limit == 6789); |
| assert_se(c2->last_hit_mem_pressure_limit == 42); |
| + |
| + /* Assert that avoid/omit are not set if the cgroup is not owned by root */ |
| + if (test_xattrs) { |
| + ctx = oomd_cgroup_context_free(ctx); |
| + assert_se(cg_set_access(SYSTEMD_CGROUP_CONTROLLER, cgroup, 65534, 65534) >= 0); |
| + assert_se(oomd_cgroup_context_acquire(cgroup, &ctx) == 0); |
| + assert_se(ctx->omit == false); |
| + assert_se(ctx->avoid == false); |
| + } |
| } |
| |
| static void test_oomd_system_context_acquire(void) { |
| @@ -287,9 +317,11 @@ static void test_oomd_sort_cgroups(void) { |
| char **paths = STRV_MAKE("/herp.slice", |
| "/herp.slice/derp.scope", |
| "/herp.slice/derp.scope/sheep.service", |
| - "/zupa.slice"); |
| + "/zupa.slice", |
| + "/omitted.slice", |
| + "/avoid.slice"); |
| |
| - OomdCGroupContext ctx[4] = { |
| + OomdCGroupContext ctx[6] = { |
| { .path = paths[0], |
| .swap_usage = 20, |
| .pgscan = 60, |
| @@ -306,6 +338,14 @@ static void test_oomd_sort_cgroups(void) { |
| .swap_usage = 10, |
| .pgscan = 80, |
| .current_memory_usage = 10 }, |
| + { .path = paths[4], |
| + .swap_usage = 90, |
| + .pgscan = 100, |
| + .omit = true }, |
| + { .path = paths[5], |
| + .swap_usage = 99, |
| + .pgscan = 200, |
| + .avoid = true }, |
| }; |
| |
| assert_se(h = hashmap_new(&string_hash_ops)); |
| @@ -314,19 +354,23 @@ static void test_oomd_sort_cgroups(void) { |
| assert_se(hashmap_put(h, "/herp.slice/derp.scope", &ctx[1]) >= 0); |
| assert_se(hashmap_put(h, "/herp.slice/derp.scope/sheep.service", &ctx[2]) >= 0); |
| assert_se(hashmap_put(h, "/zupa.slice", &ctx[3]) >= 0); |
| + assert_se(hashmap_put(h, "/omitted.slice", &ctx[4]) >= 0); |
| + assert_se(hashmap_put(h, "/avoid.slice", &ctx[5]) >= 0); |
| |
| - assert_se(oomd_sort_cgroup_contexts(h, compare_swap_usage, NULL, &sorted_cgroups) == 4); |
| + assert_se(oomd_sort_cgroup_contexts(h, compare_swap_usage, NULL, &sorted_cgroups) == 5); |
| assert_se(sorted_cgroups[0] == &ctx[1]); |
| assert_se(sorted_cgroups[1] == &ctx[2]); |
| assert_se(sorted_cgroups[2] == &ctx[0]); |
| assert_se(sorted_cgroups[3] == &ctx[3]); |
| + assert_se(sorted_cgroups[4] == &ctx[5]); |
| sorted_cgroups = mfree(sorted_cgroups); |
| |
| - assert_se(oomd_sort_cgroup_contexts(h, compare_pgscan_and_memory_usage, NULL, &sorted_cgroups) == 4); |
| + assert_se(oomd_sort_cgroup_contexts(h, compare_pgscan_and_memory_usage, NULL, &sorted_cgroups) == 5); |
| assert_se(sorted_cgroups[0] == &ctx[3]); |
| assert_se(sorted_cgroups[1] == &ctx[0]); |
| assert_se(sorted_cgroups[2] == &ctx[2]); |
| assert_se(sorted_cgroups[3] == &ctx[1]); |
| + assert_se(sorted_cgroups[4] == &ctx[5]); |
| sorted_cgroups = mfree(sorted_cgroups); |
| |
| assert_se(oomd_sort_cgroup_contexts(h, compare_pgscan_and_memory_usage, "/herp.slice/derp.scope", &sorted_cgroups) == 2); |
| @@ -334,6 +378,8 @@ static void test_oomd_sort_cgroups(void) { |
| assert_se(sorted_cgroups[1] == &ctx[1]); |
| assert_se(sorted_cgroups[2] == 0); |
| assert_se(sorted_cgroups[3] == 0); |
| + assert_se(sorted_cgroups[4] == 0); |
| + assert_se(sorted_cgroups[5] == 0); |
| sorted_cgroups = mfree(sorted_cgroups); |
| } |
| |
| diff --git a/test/test-functions b/test/test-functions |
| index df6022982c2..6996cd74752 100644 |
| |
| |
| @@ -124,6 +124,7 @@ BASICTOOLS=( |
| rmdir |
| sed |
| seq |
| + setfattr |
| setfont |
| setsid |
| sfdisk |
| diff --git a/test/units/testsuite-56-testmunch.service b/test/units/testsuite-56-testmunch.service |
| new file mode 100644 |
| index 00000000000..b4b925a7af0 |
| |
| |
| @@ -0,0 +1,7 @@ |
| +[Unit] |
| +Description=Create some memory pressure |
| + |
| +[Service] |
| +MemoryHigh=2M |
| +Slice=testsuite-56-workload.slice |
| +ExecStart=/usr/lib/systemd/tests/testdata/units/testsuite-56-slowgrowth.sh |
| diff --git a/test/units/testsuite-56.sh b/test/units/testsuite-56.sh |
| index 8b01fe37ed4..88c185b8869 100755 |
| |
| |
| @@ -23,20 +23,43 @@ oomctl | grep "/testsuite-56-workload.slice" |
| oomctl | grep "1.00%" |
| oomctl | grep "Default Memory Pressure Duration: 5s" |
| |
| -# systemd-oomd watches for elevated pressure for 30 seconds before acting. |
| -# It can take time to build up pressure so either wait 5 minutes or for the service to fail. |
| -timeout=$(date -ud "5 minutes" +%s) |
| +# systemd-oomd watches for elevated pressure for 5 seconds before acting. |
| +# It can take time to build up pressure so either wait 2 minutes or for the service to fail. |
| +timeout=$(date -ud "2 minutes" +%s) |
| while [[ $(date -u +%s) -le $timeout ]]; do |
| if ! systemctl status testsuite-56-testbloat.service; then |
| break |
| fi |
| - sleep 15 |
| + sleep 5 |
| done |
| |
| # testbloat should be killed and testchill should be fine |
| if systemctl status testsuite-56-testbloat.service; then exit 42; fi |
| if ! systemctl status testsuite-56-testchill.service; then exit 24; fi |
| |
| +# only run this portion of the test if we can set xattrs |
| +if setfattr -n user.xattr_test -v 1 /sys/fs/cgroup/; then |
| + sleep 120 # wait for systemd-oomd kill cool down and elevated memory pressure to come down |
| + |
| + systemctl start testsuite-56-testchill.service |
| + systemctl start testsuite-56-testmunch.service |
| + systemctl start testsuite-56-testbloat.service |
| + setfattr -n user.oomd_avoid -v 1 /sys/fs/cgroup/testsuite.slice/testsuite-56.slice/testsuite-56-workload.slice/testsuite-56-testbloat.service |
| + |
| + timeout=$(date -ud "2 minutes" +%s) |
| + while [[ $(date -u +%s) -le $timeout ]]; do |
| + if ! systemctl status testsuite-56-testmunch.service; then |
| + break |
| + fi |
| + sleep 5 |
| + done |
| + |
| + # testmunch should be killed since testbloat had the avoid xattr on it |
| + if ! systemctl status testsuite-56-testbloat.service; then exit 25; fi |
| + if systemctl status testsuite-56-testmunch.service; then exit 43; fi |
| + if ! systemctl status testsuite-56-testchill.service; then exit 24; fi |
| +fi |
| + |
| systemd-analyze log-level info |
| |
| echo OK > /testok |
| |
| From d87ecfecdb6fb77097f843888e2a05945b6b396b Mon Sep 17 00:00:00 2001 |
| From: Anita Zhang <the.anitazha@gmail.com> |
| Date: Thu, 28 Jan 2021 02:31:44 -0800 |
| Subject: [PATCH 6/7] oom: add unit file settings for oomd avoid/omit xattrs |
| |
| |
| docs/TRANSIENT-SETTINGS.md | 1 + |
| src/core/cgroup.c | 58 ++++++++++++++++++--- |
| src/core/cgroup.h | 15 ++++++ |
| src/core/dbus-cgroup.c | 22 ++++++++ |
| src/core/execute.c | 4 ++ |
| src/core/load-fragment-gperf.gperf.m4 | 1 + |
| src/core/load-fragment.c | 1 + |
| src/core/load-fragment.h | 1 + |
| src/shared/bus-unit-util.c | 3 +- |
| src/test/test-tables.c | 1 + |
| test/fuzz/fuzz-unit-file/directives.service | 4 ++ |
| test/units/testsuite-56.sh | 8 ++- |
| 12 files changed, 109 insertions(+), 10 deletions(-) |
| |
| diff --git a/docs/TRANSIENT-SETTINGS.md b/docs/TRANSIENT-SETTINGS.md |
| index 50370602543..9f69a3162a0 100644 |
| |
| |
| @@ -273,6 +273,7 @@ All cgroup/resource control settings are available for transient units |
| ✓ ManagedOOMSwap= |
| ✓ ManagedOOMMemoryPressure= |
| ✓ ManagedOOMMemoryPressureLimit= |
| +✓ ManagedOOMPreference= |
| ``` |
| |
| ## Process Killing Settings |
| diff --git a/src/core/cgroup.c b/src/core/cgroup.c |
| index 70282a7abda..833b434b555 100644 |
| |
| |
| @@ -131,6 +131,7 @@ void cgroup_context_init(CGroupContext *c) { |
| |
| .moom_swap = MANAGED_OOM_AUTO, |
| .moom_mem_pressure = MANAGED_OOM_AUTO, |
| + .moom_preference = MANAGED_OOM_PREFERENCE_NONE, |
| }; |
| } |
| |
| @@ -417,7 +418,8 @@ void cgroup_context_dump(Unit *u, FILE* f, const char *prefix) { |
| "%sDelegate: %s\n" |
| "%sManagedOOMSwap: %s\n" |
| "%sManagedOOMMemoryPressure: %s\n" |
| - "%sManagedOOMMemoryPressureLimit: %" PRIu32 ".%02" PRIu32 "%%\n", |
| + "%sManagedOOMMemoryPressureLimit: %" PRIu32 ".%02" PRIu32 "%%\n" |
| + "%sManagedOOMPreference: %s%%\n", |
| prefix, yes_no(c->cpu_accounting), |
| prefix, yes_no(c->io_accounting), |
| prefix, yes_no(c->blockio_accounting), |
| @@ -450,7 +452,8 @@ void cgroup_context_dump(Unit *u, FILE* f, const char *prefix) { |
| prefix, yes_no(c->delegate), |
| prefix, managed_oom_mode_to_string(c->moom_swap), |
| prefix, managed_oom_mode_to_string(c->moom_mem_pressure), |
| - prefix, c->moom_mem_pressure_limit_permyriad / 100, c->moom_mem_pressure_limit_permyriad % 100); |
| + prefix, c->moom_mem_pressure_limit_permyriad / 100, c->moom_mem_pressure_limit_permyriad % 100, |
| + prefix, managed_oom_preference_to_string(c->moom_preference)); |
| |
| if (c->delegate) { |
| _cleanup_free_ char *t = NULL; |
| @@ -600,6 +603,35 @@ int cgroup_add_device_allow(CGroupContext *c, const char *dev, const char *mode) |
| UNIT_DEFINE_ANCESTOR_MEMORY_LOOKUP(memory_low); |
| UNIT_DEFINE_ANCESTOR_MEMORY_LOOKUP(memory_min); |
| |
| +void cgroup_oomd_xattr_apply(Unit *u, const char *cgroup_path) { |
| + CGroupContext *c; |
| + int r; |
| + |
| + assert(u); |
| + |
| + c = unit_get_cgroup_context(u); |
| + if (!c) |
| + return; |
| + |
| + r = cg_remove_xattr(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, "user.oomd_avoid"); |
| + if (r != -ENODATA) |
| + log_unit_debug_errno(u, r, "Failed to remove oomd_avoid flag on control group %s, ignoring: %m", cgroup_path); |
| + |
| + r = cg_remove_xattr(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, "user.oomd_omit"); |
| + if (r != -ENODATA) |
| + log_unit_debug_errno(u, r, "Failed to remove oomd_omit flag on control group %s, ignoring: %m", cgroup_path); |
| + |
| + if (c->moom_preference == MANAGED_OOM_PREFERENCE_AVOID) { |
| + r = cg_set_xattr(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, "user.oomd_avoid", "1", 1, 0); |
| + if (r < 0) |
| + log_unit_debug_errno(u, r, "Failed to set oomd_avoid flag on control group %s, ignoring: %m", cgroup_path); |
| + } else if (c->moom_preference == MANAGED_OOM_PREFERENCE_OMIT) { |
| + r = cg_set_xattr(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, "user.oomd_omit", "1", 1, 0); |
| + if (r < 0) |
| + log_unit_debug_errno(u, r, "Failed to set oomd_omit flag on control group %s, ignoring: %m", cgroup_path); |
| + } |
| +} |
| + |
| static void cgroup_xattr_apply(Unit *u) { |
| char ids[SD_ID128_STRING_MAX]; |
| int r; |
| @@ -630,6 +662,8 @@ static void cgroup_xattr_apply(Unit *u) { |
| if (r != -ENODATA) |
| log_unit_debug_errno(u, r, "Failed to remove delegate flag on control group %s, ignoring: %m", u->cgroup_path); |
| } |
| + |
| + cgroup_oomd_xattr_apply(u, u->cgroup_path); |
| } |
| |
| static int lookup_block_device(const char *p, dev_t *ret) { |
| @@ -3737,12 +3771,6 @@ int unit_cgroup_freezer_action(Unit *u, FreezerAction action) { |
| return 1; |
| } |
| |
| -static const char* const cgroup_device_policy_table[_CGROUP_DEVICE_POLICY_MAX] = { |
| - [CGROUP_DEVICE_POLICY_AUTO] = "auto", |
| - [CGROUP_DEVICE_POLICY_CLOSED] = "closed", |
| - [CGROUP_DEVICE_POLICY_STRICT] = "strict", |
| -}; |
| - |
| int unit_get_cpuset(Unit *u, CPUSet *cpus, const char *name) { |
| _cleanup_free_ char *v = NULL; |
| int r; |
| @@ -3771,6 +3799,12 @@ int unit_get_cpuset(Unit *u, CPUSet *cpus, const char *name) { |
| return parse_cpu_set_full(v, cpus, false, NULL, NULL, 0, NULL); |
| } |
| |
| +static const char* const cgroup_device_policy_table[_CGROUP_DEVICE_POLICY_MAX] = { |
| + [CGROUP_DEVICE_POLICY_AUTO] = "auto", |
| + [CGROUP_DEVICE_POLICY_CLOSED] = "closed", |
| + [CGROUP_DEVICE_POLICY_STRICT] = "strict", |
| +}; |
| + |
| DEFINE_STRING_TABLE_LOOKUP(cgroup_device_policy, CGroupDevicePolicy); |
| |
| static const char* const freezer_action_table[_FREEZER_ACTION_MAX] = { |
| @@ -3779,3 +3813,11 @@ static const char* const freezer_action_table[_FREEZER_ACTION_MAX] = { |
| }; |
| |
| DEFINE_STRING_TABLE_LOOKUP(freezer_action, FreezerAction); |
| + |
| +static const char* const managed_oom_preference_table[_MANAGED_OOM_PREFERENCE_MAX] = { |
| + [MANAGED_OOM_PREFERENCE_NONE] = "none", |
| + [MANAGED_OOM_PREFERENCE_AVOID] = "avoid", |
| + [MANAGED_OOM_PREFERENCE_OMIT] = "omit", |
| +}; |
| + |
| +DEFINE_STRING_TABLE_LOOKUP(managed_oom_preference, ManagedOOMPreference); |
| diff --git a/src/core/cgroup.h b/src/core/cgroup.h |
| index 9fbfabbb7e3..7d9ab4ae6b8 100644 |
| |
| |
| @@ -94,6 +94,15 @@ struct CGroupBlockIODeviceBandwidth { |
| uint64_t wbps; |
| }; |
| |
| +typedef enum ManagedOOMPreference { |
| + MANAGED_OOM_PREFERENCE_NONE, |
| + MANAGED_OOM_PREFERENCE_AVOID, |
| + MANAGED_OOM_PREFERENCE_OMIT, |
| + |
| + _MANAGED_OOM_PREFERENCE_MAX, |
| + _MANAGED_OOM_PREFERENCE_INVALID = -1 |
| +} ManagedOOMPreference; |
| + |
| struct CGroupContext { |
| bool cpu_accounting; |
| bool io_accounting; |
| @@ -164,6 +173,7 @@ struct CGroupContext { |
| ManagedOOMMode moom_swap; |
| ManagedOOMMode moom_mem_pressure; |
| uint32_t moom_mem_pressure_limit_permyriad; |
| + ManagedOOMPreference moom_preference; |
| }; |
| |
| /* Used when querying IP accounting data */ |
| @@ -204,6 +214,8 @@ void cgroup_context_free_blockio_device_bandwidth(CGroupContext *c, CGroupBlockI |
| |
| int cgroup_add_device_allow(CGroupContext *c, const char *dev, const char *mode); |
| |
| +void cgroup_oomd_xattr_apply(Unit *u, const char *cgroup_path); |
| + |
| CGroupMask unit_get_own_mask(Unit *u); |
| CGroupMask unit_get_delegate_mask(Unit *u); |
| CGroupMask unit_get_members_mask(Unit *u); |
| @@ -294,3 +306,6 @@ int unit_cgroup_freezer_action(Unit *u, FreezerAction action); |
| |
| const char* freezer_action_to_string(FreezerAction a) _const_; |
| FreezerAction freezer_action_from_string(const char *s) _pure_; |
| + |
| +const char* managed_oom_preference_to_string(ManagedOOMPreference a) _const_; |
| +ManagedOOMPreference managed_oom_preference_from_string(const char *s) _pure_; |
| diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c |
| index 6f309feb236..0b2d945283e 100644 |
| |
| |
| @@ -21,6 +21,7 @@ BUS_DEFINE_PROPERTY_GET(bus_property_get_tasks_max, "t", TasksMax, tasks_max_res |
| |
| static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_cgroup_device_policy, cgroup_device_policy, CGroupDevicePolicy); |
| static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_managed_oom_mode, managed_oom_mode, ManagedOOMMode); |
| +static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_managed_oom_preference, managed_oom_preference, ManagedOOMPreference); |
| |
| static int property_get_cgroup_mask( |
| sd_bus *bus, |
| @@ -395,6 +396,7 @@ const sd_bus_vtable bus_cgroup_vtable[] = { |
| SD_BUS_PROPERTY("ManagedOOMSwap", "s", property_get_managed_oom_mode, offsetof(CGroupContext, moom_swap), 0), |
| SD_BUS_PROPERTY("ManagedOOMMemoryPressure", "s", property_get_managed_oom_mode, offsetof(CGroupContext, moom_mem_pressure), 0), |
| SD_BUS_PROPERTY("ManagedOOMMemoryPressureLimitPermyriad", "u", NULL, offsetof(CGroupContext, moom_mem_pressure_limit_permyriad), 0), |
| + SD_BUS_PROPERTY("ManagedOOMPreference", "s", property_get_managed_oom_preference, offsetof(CGroupContext, moom_preference), 0), |
| SD_BUS_VTABLE_END |
| }; |
| |
| @@ -1720,6 +1722,26 @@ int bus_cgroup_set_property( |
| return 1; |
| } |
| |
| + if (streq(name, "ManagedOOMPreference")) { |
| + ManagedOOMPreference p; |
| + const char *pref; |
| + |
| + r = sd_bus_message_read(message, "s", &pref); |
| + if (r < 0) |
| + return r; |
| + |
| + p = managed_oom_preference_from_string(pref); |
| + if (p < 0) |
| + return -EINVAL; |
| + |
| + if (!UNIT_WRITE_FLAGS_NOOP(flags)) { |
| + c->moom_preference = p; |
| + unit_write_settingf(u, flags, name, "ManagedOOMPreference=%s", pref); |
| + } |
| + |
| + return 1; |
| + } |
| + |
| if (streq(name, "DisableControllers") || (u->transient && u->load_state == UNIT_STUB)) |
| return bus_cgroup_set_transient_property(u, c, name, message, flags, error); |
| |
| diff --git a/src/core/execute.c b/src/core/execute.c |
| index b7d78f2197e..0368582884c 100644 |
| |
| |
| @@ -4701,6 +4701,10 @@ int exec_spawn(Unit *unit, |
| r = cg_create(SYSTEMD_CGROUP_CONTROLLER, subcgroup_path); |
| if (r < 0) |
| return log_unit_error_errno(unit, r, "Failed to create control group '%s': %m", subcgroup_path); |
| + |
| + /* Normally we would not propagate the oomd xattrs to children but since we created this |
| + * sub-cgroup interally we should do it. */ |
| + cgroup_oomd_xattr_apply(unit, subcgroup_path); |
| } |
| } |
| |
| diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4 |
| index 81f4561a572..dbcbe645934 100644 |
| |
| |
| @@ -230,6 +230,7 @@ $1.IPEgressFilterPath, config_parse_ip_filter_bpf_progs, |
| $1.ManagedOOMSwap, config_parse_managed_oom_mode, 0, offsetof($1, cgroup_context.moom_swap) |
| $1.ManagedOOMMemoryPressure, config_parse_managed_oom_mode, 0, offsetof($1, cgroup_context.moom_mem_pressure) |
| $1.ManagedOOMMemoryPressureLimit, config_parse_managed_oom_mem_pressure_limit, 0, offsetof($1, cgroup_context.moom_mem_pressure_limit_permyriad) |
| +$1.ManagedOOMPreference, config_parse_managed_oom_preference, 0, offsetof($1, cgroup_context.moom_preference) |
| $1.NetClass, config_parse_warn_compat, DISABLED_LEGACY, 0' |
| )m4_dnl |
| Unit.Description, config_parse_unit_string_printf, 0, offsetof(Unit, description) |
| diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c |
| index 06b71aaf157..c6b017556f9 100644 |
| |
| |
| @@ -133,6 +133,7 @@ DEFINE_CONFIG_PARSE_ENUM(config_parse_service_restart, service_restart, ServiceR |
| DEFINE_CONFIG_PARSE_ENUM(config_parse_service_timeout_failure_mode, service_timeout_failure_mode, ServiceTimeoutFailureMode, "Failed to parse timeout failure mode"); |
| DEFINE_CONFIG_PARSE_ENUM(config_parse_socket_bind, socket_address_bind_ipv6_only_or_bool, SocketAddressBindIPv6Only, "Failed to parse bind IPv6 only value"); |
| DEFINE_CONFIG_PARSE_ENUM(config_parse_oom_policy, oom_policy, OOMPolicy, "Failed to parse OOM policy"); |
| +DEFINE_CONFIG_PARSE_ENUM(config_parse_managed_oom_preference, managed_oom_preference, ManagedOOMPreference, "Failed to parse ManagedOOMPreference="); |
| DEFINE_CONFIG_PARSE_ENUM_WITH_DEFAULT(config_parse_ip_tos, ip_tos, int, -1, "Failed to parse IP TOS value"); |
| DEFINE_CONFIG_PARSE_PTR(config_parse_blockio_weight, cg_blkio_weight_parse, uint64_t, "Invalid block IO weight"); |
| DEFINE_CONFIG_PARSE_PTR(config_parse_cg_weight, cg_weight_parse, uint64_t, "Invalid weight"); |
| diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h |
| index 6b2175cd2af..e4a5cb79869 100644 |
| |
| |
| @@ -78,6 +78,7 @@ CONFIG_PARSER_PROTOTYPE(config_parse_tasks_max); |
| CONFIG_PARSER_PROTOTYPE(config_parse_delegate); |
| CONFIG_PARSER_PROTOTYPE(config_parse_managed_oom_mode); |
| CONFIG_PARSER_PROTOTYPE(config_parse_managed_oom_mem_pressure_limit); |
| +CONFIG_PARSER_PROTOTYPE(config_parse_managed_oom_preference); |
| CONFIG_PARSER_PROTOTYPE(config_parse_device_policy); |
| CONFIG_PARSER_PROTOTYPE(config_parse_device_allow); |
| CONFIG_PARSER_PROTOTYPE(config_parse_io_device_latency); |
| diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c |
| index 84f57d94d23..5bbaa07dd1c 100644 |
| |
| |
| @@ -435,7 +435,8 @@ static int bus_append_cgroup_property(sd_bus_message *m, const char *field, cons |
| if (STR_IN_SET(field, "DevicePolicy", |
| "Slice", |
| "ManagedOOMSwap", |
| - "ManagedOOMMemoryPressure")) |
| + "ManagedOOMMemoryPressure", |
| + "ManagedOOMPreference")) |
| return bus_append_string(m, field, eq); |
| |
| if (STR_IN_SET(field, "ManagedOOMMemoryPressureLimit")) { |
| diff --git a/src/test/test-tables.c b/src/test/test-tables.c |
| index 641cadec858..cc93bbbc749 100644 |
| |
| |
| @@ -73,6 +73,7 @@ int main(int argc, char **argv) { |
| test_table(log_target, LOG_TARGET); |
| test_table(mac_address_policy, MAC_ADDRESS_POLICY); |
| test_table(managed_oom_mode, MANAGED_OOM_MODE); |
| + test_table(managed_oom_preference, MANAGED_OOM_PREFERENCE); |
| test_table(manager_state, MANAGER_STATE); |
| test_table(manager_timestamp, MANAGER_TIMESTAMP); |
| test_table(mount_exec_command, MOUNT_EXEC_COMMAND); |
| diff --git a/test/fuzz/fuzz-unit-file/directives.service b/test/fuzz/fuzz-unit-file/directives.service |
| index 15fa556dd64..0c7ded6786a 100644 |
| |
| |
| @@ -138,6 +138,10 @@ MakeDirectory= |
| Mark= |
| MaxConnections= |
| MaxConnectionsPerSource= |
| +ManagedOOMSwap= |
| +ManagedOOMMemoryPressure= |
| +ManagedOOMMemoryPressureLimitPercent= |
| +ManagedOOMPreference= |
| MemoryAccounting= |
| MemoryHigh= |
| MemoryLimit= |
| diff --git a/test/units/testsuite-56.sh b/test/units/testsuite-56.sh |
| index 88c185b8869..1884f814689 100755 |
| |
| |
| @@ -13,6 +13,8 @@ if [[ "$cgroup_type" != *"cgroup2"* ]] && [[ "$cgroup_type" != *"0x63677270"* ]] |
| fi |
| [[ -e /skipped ]] && exit 0 || true |
| |
| +rm -rf /etc/systemd/system/testsuite-56-testbloat.service.d |
| + |
| echo "DefaultMemoryPressureDurationSec=5s" >> /etc/systemd/oomd.conf |
| |
| systemctl start testsuite-56-testchill.service |
| @@ -41,10 +43,14 @@ if ! systemctl status testsuite-56-testchill.service; then exit 24; fi |
| if setfattr -n user.xattr_test -v 1 /sys/fs/cgroup/; then |
| sleep 120 # wait for systemd-oomd kill cool down and elevated memory pressure to come down |
| |
| + mkdir -p /etc/systemd/system/testsuite-56-testbloat.service.d/ |
| + echo "[Service]" > /etc/systemd/system/testsuite-56-testbloat.service.d/override.conf |
| + echo "ManagedOOMPreference=avoid" >> /etc/systemd/system/testsuite-56-testbloat.service.d/override.conf |
| + |
| + systemctl daemon-reload |
| systemctl start testsuite-56-testchill.service |
| systemctl start testsuite-56-testmunch.service |
| systemctl start testsuite-56-testbloat.service |
| - setfattr -n user.oomd_avoid -v 1 /sys/fs/cgroup/testsuite.slice/testsuite-56.slice/testsuite-56-workload.slice/testsuite-56-testbloat.service |
| |
| timeout=$(date -ud "2 minutes" +%s) |
| while [[ $(date -u +%s) -le $timeout ]]; do |
| |
| From 32d695eccfeef00023992cdf20bf39f9d0288c67 Mon Sep 17 00:00:00 2001 |
| From: Anita Zhang <the.anitazha@gmail.com> |
| Date: Thu, 28 Jan 2021 17:35:17 -0800 |
| Subject: [PATCH 7/7] man: document ManagedOOMPreference= |
| |
| |
| man/org.freedesktop.systemd1.xml | 36 ++++++++++++++++++++++++++++++++ |
| man/systemd.resource-control.xml | 32 ++++++++++++++++++++++++++++ |
| 2 files changed, 68 insertions(+) |
| |
| diff --git a/man/org.freedesktop.systemd1.xml b/man/org.freedesktop.systemd1.xml |
| index 7543a617b78..1d419ac495e 100644 |
| |
| |
| @@ -2450,6 +2450,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { |
| readonly s ManagedOOMMemoryPressure = '...'; |
| @org.freedesktop.DBus.Property.EmitsChangedSignal("false") |
| readonly u ManagedOOMMemoryPressureLimitPermyriad = ...; |
| + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") |
| + readonly s ManagedOOMPreference = '...'; |
| @org.freedesktop.DBus.Property.EmitsChangedSignal("const") |
| readonly as Environment = ['...', ...]; |
| @org.freedesktop.DBus.Property.EmitsChangedSignal("const") |
| @@ -2974,6 +2976,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { |
| |
| <!--property ManagedOOMMemoryPressureLimitPermyriad is not documented!--> |
| |
| + <!--property ManagedOOMPreference is not documented!--> |
| + |
| <!--property EnvironmentFiles is not documented!--> |
| |
| <!--property PassEnvironment is not documented!--> |
| @@ -3538,6 +3542,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { |
| |
| <variablelist class="dbus-property" generated="True" extra-ref="ManagedOOMMemoryPressureLimitPermyriad"/> |
| |
| + <variablelist class="dbus-property" generated="True" extra-ref="ManagedOOMPreference"/> |
| + |
| <variablelist class="dbus-property" generated="True" extra-ref="Environment"/> |
| |
| <variablelist class="dbus-property" generated="True" extra-ref="EnvironmentFiles"/> |
| @@ -4204,6 +4210,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket { |
| readonly s ManagedOOMMemoryPressure = '...'; |
| @org.freedesktop.DBus.Property.EmitsChangedSignal("false") |
| readonly u ManagedOOMMemoryPressureLimitPermyriad = ...; |
| + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") |
| + readonly s ManagedOOMPreference = '...'; |
| @org.freedesktop.DBus.Property.EmitsChangedSignal("const") |
| readonly as Environment = ['...', ...]; |
| @org.freedesktop.DBus.Property.EmitsChangedSignal("const") |
| @@ -4756,6 +4764,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket { |
| |
| <!--property ManagedOOMMemoryPressureLimitPermyriad is not documented!--> |
| |
| + <!--property ManagedOOMPreference is not documented!--> |
| + |
| <!--property EnvironmentFiles is not documented!--> |
| |
| <!--property PassEnvironment is not documented!--> |
| @@ -5318,6 +5328,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket { |
| |
| <variablelist class="dbus-property" generated="True" extra-ref="ManagedOOMMemoryPressureLimitPermyriad"/> |
| |
| + <variablelist class="dbus-property" generated="True" extra-ref="ManagedOOMPreference"/> |
| + |
| <variablelist class="dbus-property" generated="True" extra-ref="Environment"/> |
| |
| <variablelist class="dbus-property" generated="True" extra-ref="EnvironmentFiles"/> |
| @@ -5897,6 +5909,8 @@ node /org/freedesktop/systemd1/unit/home_2emount { |
| readonly s ManagedOOMMemoryPressure = '...'; |
| @org.freedesktop.DBus.Property.EmitsChangedSignal("false") |
| readonly u ManagedOOMMemoryPressureLimitPermyriad = ...; |
| + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") |
| + readonly s ManagedOOMPreference = '...'; |
| @org.freedesktop.DBus.Property.EmitsChangedSignal("const") |
| readonly as Environment = ['...', ...]; |
| @org.freedesktop.DBus.Property.EmitsChangedSignal("const") |
| @@ -6377,6 +6391,8 @@ node /org/freedesktop/systemd1/unit/home_2emount { |
| |
| <!--property ManagedOOMMemoryPressureLimitPermyriad is not documented!--> |
| |
| + <!--property ManagedOOMPreference is not documented!--> |
| + |
| <!--property EnvironmentFiles is not documented!--> |
| |
| <!--property PassEnvironment is not documented!--> |
| @@ -6857,6 +6873,8 @@ node /org/freedesktop/systemd1/unit/home_2emount { |
| |
| <variablelist class="dbus-property" generated="True" extra-ref="ManagedOOMMemoryPressureLimitPermyriad"/> |
| |
| + <variablelist class="dbus-property" generated="True" extra-ref="ManagedOOMPreference"/> |
| + |
| <variablelist class="dbus-property" generated="True" extra-ref="Environment"/> |
| |
| <variablelist class="dbus-property" generated="True" extra-ref="EnvironmentFiles"/> |
| @@ -7557,6 +7575,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap { |
| readonly s ManagedOOMMemoryPressure = '...'; |
| @org.freedesktop.DBus.Property.EmitsChangedSignal("false") |
| readonly u ManagedOOMMemoryPressureLimitPermyriad = ...; |
| + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") |
| + readonly s ManagedOOMPreference = '...'; |
| @org.freedesktop.DBus.Property.EmitsChangedSignal("const") |
| readonly as Environment = ['...', ...]; |
| @org.freedesktop.DBus.Property.EmitsChangedSignal("const") |
| @@ -8023,6 +8043,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap { |
| |
| <!--property ManagedOOMMemoryPressureLimitPermyriad is not documented!--> |
| |
| + <!--property ManagedOOMPreference is not documented!--> |
| + |
| <!--property EnvironmentFiles is not documented!--> |
| |
| <!--property PassEnvironment is not documented!--> |
| @@ -8489,6 +8511,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap { |
| |
| <variablelist class="dbus-property" generated="True" extra-ref="ManagedOOMMemoryPressureLimitPermyriad"/> |
| |
| + <variablelist class="dbus-property" generated="True" extra-ref="ManagedOOMPreference"/> |
| + |
| <variablelist class="dbus-property" generated="True" extra-ref="Environment"/> |
| |
| <variablelist class="dbus-property" generated="True" extra-ref="EnvironmentFiles"/> |
| @@ -9042,6 +9066,8 @@ node /org/freedesktop/systemd1/unit/system_2eslice { |
| readonly s ManagedOOMMemoryPressure = '...'; |
| @org.freedesktop.DBus.Property.EmitsChangedSignal("false") |
| readonly u ManagedOOMMemoryPressureLimitPermyriad = ...; |
| + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") |
| + readonly s ManagedOOMPreference = '...'; |
| }; |
| interface org.freedesktop.DBus.Peer { ... }; |
| interface org.freedesktop.DBus.Introspectable { ... }; |
| @@ -9178,6 +9204,8 @@ node /org/freedesktop/systemd1/unit/system_2eslice { |
| |
| <!--property ManagedOOMMemoryPressureLimitPermyriad is not documented!--> |
| |
| + <!--property ManagedOOMPreference is not documented!--> |
| + |
| <!--Autogenerated cross-references for systemd.directives, do not edit--> |
| |
| <variablelist class="dbus-interface" generated="True" extra-ref="org.freedesktop.systemd1.Unit"/> |
| @@ -9318,6 +9346,8 @@ node /org/freedesktop/systemd1/unit/system_2eslice { |
| |
| <variablelist class="dbus-property" generated="True" extra-ref="ManagedOOMMemoryPressureLimitPermyriad"/> |
| |
| + <variablelist class="dbus-property" generated="True" extra-ref="ManagedOOMPreference"/> |
| + |
| <!--End of Autogenerated section--> |
| |
| <refsect2> |
| @@ -9477,6 +9507,8 @@ node /org/freedesktop/systemd1/unit/session_2d1_2escope { |
| readonly s ManagedOOMMemoryPressure = '...'; |
| @org.freedesktop.DBus.Property.EmitsChangedSignal("false") |
| readonly u ManagedOOMMemoryPressureLimitPermyriad = ...; |
| + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") |
| + readonly s ManagedOOMPreference = '...'; |
| @org.freedesktop.DBus.Property.EmitsChangedSignal("const") |
| readonly s KillMode = '...'; |
| @org.freedesktop.DBus.Property.EmitsChangedSignal("const") |
| @@ -9629,6 +9661,8 @@ node /org/freedesktop/systemd1/unit/session_2d1_2escope { |
| |
| <!--property ManagedOOMMemoryPressureLimitPermyriad is not documented!--> |
| |
| + <!--property ManagedOOMPreference is not documented!--> |
| + |
| <!--property KillMode is not documented!--> |
| |
| <!--property KillSignal is not documented!--> |
| @@ -9795,6 +9829,8 @@ node /org/freedesktop/systemd1/unit/session_2d1_2escope { |
| |
| <variablelist class="dbus-property" generated="True" extra-ref="ManagedOOMMemoryPressureLimitPermyriad"/> |
| |
| + <variablelist class="dbus-property" generated="True" extra-ref="ManagedOOMPreference"/> |
| + |
| <variablelist class="dbus-property" generated="True" extra-ref="KillMode"/> |
| |
| <variablelist class="dbus-property" generated="True" extra-ref="KillSignal"/> |
| diff --git a/man/systemd.resource-control.xml b/man/systemd.resource-control.xml |
| index be9c35057db..13ff7e9a740 100644 |
| |
| |
| @@ -913,6 +913,38 @@ DeviceAllow=/dev/loop-control |
| </para> |
| </listitem> |
| </varlistentry> |
| + |
| + <varlistentry> |
| + <term><varname>ManagedOOMPreference=none|avoid|omit</varname></term> |
| + |
| + <listitem> |
| + <para>Allows deprioritizing or omitting this unit's cgroup as a candidate when <command>systemd-oomd</command> |
| + needs to act. Requires support for extended attributes (see |
| + <citerefentry project='man-pages'><refentrytitle>xattr</refentrytitle><manvolnum>7</manvolnum></citerefentry>) |
| + in order to use <option>avoid</option> or <option>omit</option>. Additionally, <command>systemd-oomd</command> |
| + will ignore these extended attributes if the unit's cgroup is not owned by the root user and group.</para> |
| + |
| + <para>If this property is set to <option>avoid</option>, the service manager will set the |
| + "user.oomd_avoid" extended attribute on the unit's cgroup to "1". If <command>systemd-oomd</command> sees |
| + this extended attribute on a cgroup set to "1" when choosing between candidates, it will only select the |
| + cgroup with "user.oomd_avoid" if there are no other viable candidates.</para> |
| + |
| + <para>If this property is set to <option>omit</option>, the service manager will set the "user.oomd_omit" |
| + extended attribute on the unit's cgroup to "1". If <command>systemd-oomd</command> sees the this extended |
| + attribute on the cgroup set to "1", it will ignore the cgroup as a candidate and will not perform any actions |
| + on the cgroup.</para> |
| + |
| + <para>It is recommended to use <option>avoid</option> and <option>omit</option> sparingly as it can adversely |
| + affect <command>systemd-oomd</command>'s kill behavior. Also note that these extended attributes are not |
| + applied recursively to cgroups under this unit's cgroup.</para> |
| + |
| + <para>Defaults to <option>none</option> which means no extended attributes will be set and systemd-oomd will |
| + sort this unit's cgroup as defined in |
| + <citerefentry><refentrytitle>systemd-oomd.service</refentrytitle><manvolnum>8</manvolnum></citerefentry> |
| + and <citerefentry><refentrytitle>oomd.conf</refentrytitle><manvolnum>5</manvolnum></citerefentry> (if this |
| + unit's cgroup becomes a candidate).</para> |
| + </listitem> |
| + </varlistentry> |
| </variablelist> |
| </refsect1> |
| |