From 1992c5552fbdf72b58eabfe86ece16dd507f7344 Mon Sep 17 00:00:00 2001 From: Zbigniew Jędrzejewski-Szmek Date: Feb 23 2021 01:15:45 +0000 Subject: Version 248-rc1 --- diff --git a/17829.patch b/17829.patch deleted file mode 100644 index 176b969..0000000 --- a/17829.patch +++ /dev/null @@ -1,60 +0,0 @@ -From 14d044da23d6f2fa03066aedcc2600a479c1f731 Mon Sep 17 00:00:00 2001 -From: Anita Zhang -Date: Wed, 2 Dec 2020 14:41:38 -0800 -Subject: [PATCH] test: fix TEST-56-OOMD thresholds for linux 5.9 changes - -Fixes #17533 - -The memory pressure values of the units in TEST-56-OOMD seemed to be a -lot lower after updating to linux 5.9. This is likely due to a fix from -https://github.com/torvalds/linux/commit/e22c6ed90aa91abc08f107344428ebb8c2629e98. - -To account for this, I lowered memory.high on testbloat.service to -throttle it even more. This was enough to generate the 50%+ value to trigger -oomd for the test, but as an extra precaution I also lowered the oomd -threshold to 1% so it's certain to try and kill testbloat.service. ---- - test/units/testsuite-56-testbloat.service | 6 +++--- - test/units/testsuite-56-workload.slice | 2 +- - test/units/testsuite-56.sh | 2 +- - 3 files changed, 5 insertions(+), 5 deletions(-) - -diff --git a/test/units/testsuite-56-testbloat.service b/test/units/testsuite-56-testbloat.service -index 40cf5a9f36f..6163aae1dba 100644 ---- a/test/units/testsuite-56-testbloat.service -+++ b/test/units/testsuite-56-testbloat.service -@@ -2,8 +2,8 @@ - Description=Create a lot of memory pressure - - [Service] --# A very small memory.high will cause the script (trying to use a lot of memory) --# to throttle and be put under heavy pressure --MemoryHigh=2M -+# A VERY small memory.high will cause the script (trying to use a lot of memory) -+# to throttle and be put under heavy pressure. -+MemoryHigh=1M - Slice=testsuite-56-workload.slice - ExecStart=/usr/lib/systemd/tests/testdata/units/testsuite-56-slowgrowth.sh -diff --git a/test/units/testsuite-56-workload.slice b/test/units/testsuite-56-workload.slice -index 3d542ec2bae..45b04914c63 100644 ---- a/test/units/testsuite-56-workload.slice -+++ b/test/units/testsuite-56-workload.slice -@@ -7,4 +7,4 @@ MemoryAccounting=true - IOAccounting=true - TasksAccounting=true - ManagedOOMMemoryPressure=kill --ManagedOOMMemoryPressureLimitPercent=50% -+ManagedOOMMemoryPressureLimitPercent=1% -diff --git a/test/units/testsuite-56.sh b/test/units/testsuite-56.sh -index 37d62d943c0..1846248855b 100755 ---- a/test/units/testsuite-56.sh -+++ b/test/units/testsuite-56.sh -@@ -19,7 +19,7 @@ systemctl start testsuite-56-testchill.service - - # Verify systemd-oomd is monitoring the expected units - oomctl | grep "/testsuite-56-workload.slice" --oomctl | grep "50%" -+oomctl | grep "1%" - - # systemd-oomd watches for elevated pressure for 30 seconds before acting. - # It can take time to build up pressure so either wait 5 minutes or for the service to fail. diff --git a/18361.patch b/18361.patch deleted file mode 100644 index 282b7f3..0000000 --- a/18361.patch +++ /dev/null @@ -1,403 +0,0 @@ -From c20aa7b17166b9f331da33ad9288f9ede75c72db Mon Sep 17 00:00:00 2001 -From: Anita Zhang -Date: Sun, 24 Jan 2021 00:16:19 -0800 -Subject: [PATCH 1/4] oom: make memory pressure duration configurable through - oomd.conf - ---- - man/oomd.conf.xml | 12 +++++++++++- - src/oom/oomd-manager.c | 13 +++++++++---- - src/oom/oomd-manager.h | 5 +++-- - src/oom/oomd-util.h | 1 + - src/oom/oomd.c | 4 +++- - src/oom/oomd.conf | 1 + - test/units/testsuite-56.sh | 3 +++ - 7 files changed, 31 insertions(+), 8 deletions(-) - -diff --git a/man/oomd.conf.xml b/man/oomd.conf.xml -index 35a0686bc50..bb5da87c548 100644 ---- a/man/oomd.conf.xml -+++ b/man/oomd.conf.xml -@@ -65,13 +65,23 @@ - will take action. A unit can override this value with ManagedOOMMemoryPressureLimitPercent=. - The memory pressure for this property represents the fraction of time in a 10 second window in which all tasks - in the cgroup were delayed. For each monitored cgroup, if the memory pressure on that cgroup exceeds the -- limit set for more than 30 seconds, systemd-oomd will act on eligible descendant cgroups, -+ limit set for longer than the duration set by DefaultMemoryPressureDurationSec=, -+ systemd-oomd will act on eligible descendant cgroups, - starting from the ones with the most reclaim activity to the least reclaim activity. Which cgroups are - monitored and what action gets taken depends on what the unit has configured for - ManagedOOMMemoryPressure=. Takes a percentage value between 0% and 100%, inclusive. - Defaults to 60%. - - -+ -+ DefaultMemoryPressureDurationSec= -+ -+ Sets the amount of time a unit's cgroup needs to have exceeded memory pressure limits before -+ systemd-oomd will take action. Memory pressure limits are defined by -+ DefaultMemoryPressureLimitPercent= and ManagedOOMMemoryPressureLimitPercent=. -+ Defaults to 30 seconds when this property is unset or set to 0. -+ -+ - - - -diff --git a/src/oom/oomd-manager.c b/src/oom/oomd-manager.c -index fec96519e01..e8ed6a52739 100644 ---- a/src/oom/oomd-manager.c -+++ b/src/oom/oomd-manager.c -@@ -306,7 +306,7 @@ static int monitor_cgroup_contexts_handler(sd_event_source *s, uint64_t usec, vo - m->post_action_delay_start = 0; - } - -- r = oomd_pressure_above(m->monitored_mem_pressure_cgroup_contexts, PRESSURE_DURATION_USEC, &targets); -+ r = oomd_pressure_above(m->monitored_mem_pressure_cgroup_contexts, m->default_mem_pressure_duration_usec, &targets); - if (r == -ENOMEM) - return log_error_errno(r, "Failed to check if memory pressure exceeded limits"); - else if (r == 1) { -@@ -325,7 +325,7 @@ static int monitor_cgroup_contexts_handler(sd_event_source *s, uint64_t usec, vo - - SET_FOREACH(t, targets) { - log_notice("Memory pressure for %s is greater than %lu for more than %"PRIu64" seconds and there was reclaim activity", -- t->path, LOAD_INT(t->mem_pressure_limit), PRESSURE_DURATION_USEC / USEC_PER_SEC); -+ t->path, LOAD_INT(t->mem_pressure_limit), m->default_mem_pressure_duration_usec / USEC_PER_SEC); - - r = oomd_kill_by_pgscan(candidates, t->path, m->dry_run); - if (r == -ENOMEM) -@@ -471,7 +471,7 @@ static int manager_connect_bus(Manager *m) { - return 0; - } - --int manager_start(Manager *m, bool dry_run, int swap_used_limit, int mem_pressure_limit) { -+int manager_start(Manager *m, bool dry_run, int swap_used_limit, int mem_pressure_limit, usec_t mem_pressure_usec) { - unsigned long l; - int r; - -@@ -487,6 +487,8 @@ int manager_start(Manager *m, bool dry_run, int swap_used_limit, int mem_pressur - if (r < 0) - return r; - -+ m->default_mem_pressure_duration_usec = mem_pressure_usec ?: DEFAULT_MEM_PRESSURE_DURATION_USEC; -+ - r = manager_connect_bus(m); - if (r < 0) - return r; -@@ -505,6 +507,7 @@ int manager_start(Manager *m, bool dry_run, int swap_used_limit, int mem_pressur - int manager_get_dump_string(Manager *m, char **ret) { - _cleanup_free_ char *dump = NULL; - _cleanup_fclose_ FILE *f = NULL; -+ char buf[FORMAT_TIMESPAN_MAX]; - OomdCGroupContext *c; - size_t size; - char *key; -@@ -521,10 +524,12 @@ int manager_get_dump_string(Manager *m, char **ret) { - "Dry Run: %s\n" - "Swap Used Limit: %u%%\n" - "Default Memory Pressure Limit: %lu%%\n" -+ "Default Memory Pressure Duration: %s\n" - "System Context:\n", - yes_no(m->dry_run), - m->swap_used_limit, -- LOAD_INT(m->default_mem_pressure_limit)); -+ LOAD_INT(m->default_mem_pressure_limit), -+ format_timespan(buf, sizeof(buf), m->default_mem_pressure_duration_usec, USEC_PER_SEC)); - oomd_dump_system_context(&m->system_context, f, "\t"); - - fprintf(f, "Swap Monitored CGroups:\n"); -diff --git a/src/oom/oomd-manager.h b/src/oom/oomd-manager.h -index 3f3eb5aa4b6..ede9903e5a6 100644 ---- a/src/oom/oomd-manager.h -+++ b/src/oom/oomd-manager.h -@@ -16,7 +16,7 @@ - * percentage of time all tasks were delayed (i.e. unproductive). - * Generally 60 or higher might be acceptable for something like system.slice with no memory.high set; processes in - * system.slice are assumed to be less latency sensitive. */ --#define PRESSURE_DURATION_USEC (30 * USEC_PER_SEC) -+#define DEFAULT_MEM_PRESSURE_DURATION_USEC (30 * USEC_PER_SEC) - #define DEFAULT_MEM_PRESSURE_LIMIT 60 - #define DEFAULT_SWAP_USED_LIMIT 90 - -@@ -33,6 +33,7 @@ struct Manager { - bool dry_run; - unsigned swap_used_limit; - loadavg_t default_mem_pressure_limit; -+ usec_t default_mem_pressure_duration_usec; - - /* k: cgroup paths -> v: OomdCGroupContext - * Used to detect when to take action. */ -@@ -53,7 +54,7 @@ DEFINE_TRIVIAL_CLEANUP_FUNC(Manager*, manager_free); - - int manager_new(Manager **ret); - --int manager_start(Manager *m, bool dry_run, int swap_used_limit, int mem_pressure_limit); -+int manager_start(Manager *m, bool dry_run, int swap_used_limit, int mem_pressure_limit, usec_t mem_pressure_usec); - - int manager_get_dump_string(Manager *m, char **ret); - -diff --git a/src/oom/oomd-util.h b/src/oom/oomd-util.h -index 0834cbf09d7..d7a9890e7a2 100644 ---- a/src/oom/oomd-util.h -+++ b/src/oom/oomd-util.h -@@ -31,6 +31,7 @@ struct OomdCGroupContext { - - /* These are only used by oomd_pressure_above for acting on high memory pressure. */ - loadavg_t mem_pressure_limit; -+ usec_t mem_pressure_duration_usec; - usec_t last_hit_mem_pressure_limit; - }; - -diff --git a/src/oom/oomd.c b/src/oom/oomd.c -index 8cf776ec0f5..1b0f8ff6c40 100644 ---- a/src/oom/oomd.c -+++ b/src/oom/oomd.c -@@ -19,11 +19,13 @@ - static bool arg_dry_run = false; - static int arg_swap_used_limit = -1; - static int arg_mem_pressure_limit = -1; -+static usec_t arg_mem_pressure_usec = 0; - - static int parse_config(void) { - static const ConfigTableItem items[] = { - { "OOM", "SwapUsedLimitPercent", config_parse_percent, 0, &arg_swap_used_limit }, - { "OOM", "DefaultMemoryPressureLimitPercent", config_parse_percent, 0, &arg_mem_pressure_limit }, -+ { "OOM", "DefaultMemoryPressureDurationSec", config_parse_sec, 0, &arg_mem_pressure_usec }, - {} - }; - -@@ -160,7 +162,7 @@ static int run(int argc, char *argv[]) { - if (r < 0) - return log_error_errno(r, "Failed to create manager: %m"); - -- r = manager_start(m, arg_dry_run, arg_swap_used_limit, arg_mem_pressure_limit); -+ r = manager_start(m, arg_dry_run, arg_swap_used_limit, arg_mem_pressure_limit, arg_mem_pressure_usec); - if (r < 0) - return log_error_errno(r, "Failed to start up daemon: %m"); - -diff --git a/src/oom/oomd.conf b/src/oom/oomd.conf -index 8ac97169610..766cb1717f7 100644 ---- a/src/oom/oomd.conf -+++ b/src/oom/oomd.conf -@@ -14,3 +14,4 @@ - [OOM] - #SwapUsedLimitPercent=90% - #DefaultMemoryPressureLimitPercent=60% -+#DefaultMemoryPressureDurationSec=30s -diff --git a/test/units/testsuite-56.sh b/test/units/testsuite-56.sh -index 1846248855b..6e7941a57fc 100755 ---- a/test/units/testsuite-56.sh -+++ b/test/units/testsuite-56.sh -@@ -14,12 +14,15 @@ if [[ "$cgroup_type" != *"cgroup2"* ]] && [[ "$cgroup_type" != *"0x63677270"* ]] - fi - [[ -e /skipped ]] && exit 0 || true - -+echo "DefaultMemoryPressureDurationSec=5s" >> /etc/systemd/oomd.conf -+ - systemctl start testsuite-56-testbloat.service - systemctl start testsuite-56-testchill.service - - # Verify systemd-oomd is monitoring the expected units - oomctl | grep "/testsuite-56-workload.slice" - oomctl | grep "1%" -+oomctl | grep "Default Memory Pressure Duration: 5s" - - # systemd-oomd watches for elevated pressure for 30 seconds before acting. - # It can take time to build up pressure so either wait 5 minutes or for the service to fail. - -From 408a3bbd76326793ea5d1cf4e0a9444a4c252d86 Mon Sep 17 00:00:00 2001 -From: Anita Zhang -Date: Sat, 23 Jan 2021 22:10:42 -0800 -Subject: [PATCH 2/4] oom: make swap a soft requirement - ---- - man/systemd-oomd.service.xml | 4 ++-- - src/oom/oomd-manager.c | 8 ++++++-- - src/oom/oomd.c | 6 ++---- - src/oom/test-oomd-util.c | 11 +++++++++++ - 4 files changed, 21 insertions(+), 8 deletions(-) - -diff --git a/man/systemd-oomd.service.xml b/man/systemd-oomd.service.xml -index 9cb9c6076a9..ebd2467ee23 100644 ---- a/man/systemd-oomd.service.xml -+++ b/man/systemd-oomd.service.xml -@@ -56,8 +56,8 @@ - - You will need a kernel compiled with PSI support. This is available in Linux 4.20 and above. - -- The system must also have swap enabled for systemd-oomd to function correctly. With swap -- enabled, the system spends enough time swapping pages to let systemd-oomd react. -+ It is highly recommended for the system to have swap enabled for systemd-oomd to function -+ optimally. With swap enabled, the system spends enough time swapping pages to let systemd-oomd react. - Without swap, the system enters a livelocked state much more quickly and may prevent systemd-oomd - from responding in a reasonable amount of time. See - "In defence of swap: common misconceptions" -diff --git a/src/oom/oomd-manager.c b/src/oom/oomd-manager.c -index e8ed6a52739..814fda51f31 100644 ---- a/src/oom/oomd-manager.c -+++ b/src/oom/oomd-manager.c -@@ -6,6 +6,7 @@ - #include "cgroup-util.h" - #include "fd-util.h" - #include "fileio.h" -+#include "memory-util.h" - #include "oomd-manager-bus.h" - #include "oomd-manager.h" - #include "path-util.h" -@@ -294,9 +295,12 @@ static int monitor_cgroup_contexts_handler(sd_event_source *s, uint64_t usec, vo - return log_error_errno(r, "Failed to update monitored memory pressure cgroup contexts"); - - r = oomd_system_context_acquire("/proc/swaps", &m->system_context); -- /* If there aren't units depending on swap actions, the only error we exit on is ENOMEM */ -- if (r == -ENOMEM || (r < 0 && !hashmap_isempty(m->monitored_swap_cgroup_contexts))) -+ /* If there aren't units depending on swap actions, the only error we exit on is ENOMEM. -+ * Allow ENOENT in the event that swap is disabled on the system. */ -+ if (r == -ENOMEM || (r < 0 && r != -ENOENT && !hashmap_isempty(m->monitored_swap_cgroup_contexts))) - return log_error_errno(r, "Failed to acquire system context"); -+ else if (r == -ENOENT) -+ zero(m->system_context); - - /* If we're still recovering from a kill, don't try to kill again yet */ - if (m->post_action_delay_start > 0) { -diff --git a/src/oom/oomd.c b/src/oom/oomd.c -index 1b0f8ff6c40..1fbcf41492d 100644 ---- a/src/oom/oomd.c -+++ b/src/oom/oomd.c -@@ -142,10 +142,8 @@ static int run(int argc, char *argv[]) { - return log_error_errno(r, "Failed to get SwapTotal from /proc/meminfo: %m"); - - r = safe_atollu(swap, &s); -- if (r < 0) -- return log_error_errno(r, "Failed to parse SwapTotal from /proc/meminfo: %s: %m", swap); -- if (s == 0) -- return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Requires swap to operate"); -+ if (r < 0 || s == 0) -+ log_warning("Swap is currently not detected; memory pressure usage will be degraded"); - - if (!is_pressure_supported()) - return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Pressure Stall Information (PSI) is not supported"); -diff --git a/src/oom/test-oomd-util.c b/src/oom/test-oomd-util.c -index 8143408902b..54fe2a03d14 100644 ---- a/src/oom/test-oomd-util.c -+++ b/src/oom/test-oomd-util.c -@@ -159,6 +159,11 @@ static void test_oomd_system_context_acquire(void) { - assert_se(ctx.swap_total == 0); - assert_se(ctx.swap_used == 0); - -+ assert_se(write_string_file(path, "Filename Type Size Used Priority", WRITE_STRING_FILE_CREATE) == 0); -+ assert_se(oomd_system_context_acquire(path, &ctx) == 0); -+ assert_se(ctx.swap_total == 0); -+ assert_se(ctx.swap_used == 0); -+ - assert_se(write_string_file(path, "Filename Type Size Used Priority\n" - "/swapvol/swapfile file 18971644 0 -3\n" - "/dev/vda2 partition 1999868 993780 -2", WRITE_STRING_FILE_CREATE) == 0); -@@ -268,6 +273,12 @@ static void test_oomd_swap_free_below(void) { - .swap_used = 3310136 * 1024U, - }; - assert_se(oomd_swap_free_below(&ctx, 20) == false); -+ -+ ctx = (OomdSystemContext) { -+ .swap_total = 0, -+ .swap_used = 0, -+ }; -+ assert_se(oomd_swap_free_below(&ctx, 20) == false); - } - - static void test_oomd_sort_cgroups(void) { - -From 924c89e9fe95d47b6ad94544bfdd5f087646daea Mon Sep 17 00:00:00 2001 -From: Anita Zhang -Date: Sun, 24 Jan 2021 01:22:51 -0800 -Subject: [PATCH 3/4] oom: fix reclaim activity detection - -This should have been checking for any reclaim activity within a larger interval -of time rather than within the past second. On systems with swap this -doesn't seem to have mattered too much as reclaim would always increase when -memory pressure was elevated. But testing in the no swap case having -this larger interval made a difference between oomd killing or not. ---- - src/oom/oomd-manager.c | 7 +++++-- - src/oom/oomd-manager.h | 2 ++ - 2 files changed, 7 insertions(+), 2 deletions(-) - -diff --git a/src/oom/oomd-manager.c b/src/oom/oomd-manager.c -index 814fda51f31..3efa629002e 100644 ---- a/src/oom/oomd-manager.c -+++ b/src/oom/oomd-manager.c -@@ -302,6 +302,9 @@ static int monitor_cgroup_contexts_handler(sd_event_source *s, uint64_t usec, vo - else if (r == -ENOENT) - zero(m->system_context); - -+ if (oomd_memory_reclaim(m->monitored_mem_pressure_cgroup_contexts)) -+ m->last_reclaim_at = usec_now; -+ - /* If we're still recovering from a kill, don't try to kill again yet */ - if (m->post_action_delay_start > 0) { - if (m->post_action_delay_start + POST_ACTION_DELAY_USEC > usec_now) -@@ -314,12 +317,12 @@ static int monitor_cgroup_contexts_handler(sd_event_source *s, uint64_t usec, vo - if (r == -ENOMEM) - return log_error_errno(r, "Failed to check if memory pressure exceeded limits"); - else if (r == 1) { -- /* Check if there was reclaim activity in the last interval. The concern is the following case: -+ /* Check if there was reclaim activity in the given interval. The concern is the following case: - * Pressure climbed, a lot of high-frequency pages were reclaimed, and we killed the offending - * cgroup. Even after this, well-behaved processes will fault in recently resident pages and - * this will cause pressure to remain high. Thus if there isn't any reclaim pressure, no need - * to kill something (it won't help anyways). */ -- if (oomd_memory_reclaim(m->monitored_mem_pressure_cgroup_contexts)) { -+ if ((usec_now - m->last_reclaim_at) <= RECLAIM_DURATION_USEC) { - _cleanup_hashmap_free_ Hashmap *candidates = NULL; - OomdCGroupContext *t; - -diff --git a/src/oom/oomd-manager.h b/src/oom/oomd-manager.h -index ede9903e5a6..ee17abced26 100644 ---- a/src/oom/oomd-manager.h -+++ b/src/oom/oomd-manager.h -@@ -20,6 +20,7 @@ - #define DEFAULT_MEM_PRESSURE_LIMIT 60 - #define DEFAULT_SWAP_USED_LIMIT 90 - -+#define RECLAIM_DURATION_USEC (30 * USEC_PER_SEC) - #define POST_ACTION_DELAY_USEC (15 * USEC_PER_SEC) - - typedef struct Manager Manager; -@@ -42,6 +43,7 @@ struct Manager { - - OomdSystemContext system_context; - -+ usec_t last_reclaim_at; - usec_t post_action_delay_start; - - sd_event_source *cgroup_context_event_source; - -From 2e744a2cd89fc0ea67cf78cfba617b5105a26215 Mon Sep 17 00:00:00 2001 -From: Anita Zhang -Date: Sun, 24 Jan 2021 01:34:23 -0800 -Subject: [PATCH 4/4] oom: update extended test to remove swap gating - ---- - test/units/testsuite-56.sh | 3 +-- - 1 file changed, 1 insertion(+), 2 deletions(-) - -diff --git a/test/units/testsuite-56.sh b/test/units/testsuite-56.sh -index 6e7941a57fc..4dc9d8c7a86 100755 ---- a/test/units/testsuite-56.sh -+++ b/test/units/testsuite-56.sh -@@ -6,7 +6,6 @@ systemd-analyze log-level debug - systemd-analyze log-target console - - # Loose checks to ensure the environment has the necessary features for systemd-oomd --[[ "$( awk '/SwapTotal/ { print $2 }' /proc/meminfo )" != "0" ]] || echo "no swap" >> /skipped - [[ -e /proc/pressure ]] || echo "no PSI" >> /skipped - cgroup_type=$(stat -fc %T /sys/fs/cgroup/) - if [[ "$cgroup_type" != *"cgroup2"* ]] && [[ "$cgroup_type" != *"0x63677270"* ]]; then -@@ -16,8 +15,8 @@ fi - - echo "DefaultMemoryPressureDurationSec=5s" >> /etc/systemd/oomd.conf - --systemctl start testsuite-56-testbloat.service - systemctl start testsuite-56-testchill.service -+systemctl start testsuite-56-testbloat.service - - # Verify systemd-oomd is monitoring the expected units - oomctl | grep "/testsuite-56-workload.slice" diff --git a/18401.patch b/18401.patch deleted file mode 100644 index c42ae7e..0000000 --- a/18401.patch +++ /dev/null @@ -1,1201 +0,0 @@ -From 2ccd5198faa8ca65001f90c551924e86bf737a85 Mon Sep 17 00:00:00 2001 -From: Anita Zhang -Date: Mon, 25 Jan 2021 23:56:23 -0800 -Subject: [PATCH 1/7] oom: shorten xattr name - ---- - src/core/cgroup.c | 2 +- - src/oom/oomd-util.c | 4 ++-- - src/oom/test-oomd-util.c | 2 +- - 3 files changed, 4 insertions(+), 4 deletions(-) - -diff --git a/src/core/cgroup.c b/src/core/cgroup.c -index c9cf7fb16c6..70282a7abda 100644 ---- a/src/core/cgroup.c -+++ b/src/core/cgroup.c -@@ -2746,7 +2746,7 @@ int unit_check_oomd_kill(Unit *u) { - else if (r == 0) - return 0; - -- r = cg_get_xattr_malloc(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, "user.systemd_oomd_kill", &value); -+ r = cg_get_xattr_malloc(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, "user.oomd_kill", &value); - if (r < 0 && r != -ENODATA) - return r; - -diff --git a/src/oom/oomd-util.c b/src/oom/oomd-util.c -index fcccddb92ea..80b9583440c 100644 ---- a/src/oom/oomd-util.c -+++ b/src/oom/oomd-util.c -@@ -201,9 +201,9 @@ int oomd_cgroup_kill(const char *path, bool recurse, bool dry_run) { - if (r < 0) - return r; - -- r = increment_oomd_xattr(path, "user.systemd_oomd_kill", set_size(pids_killed)); -+ r = increment_oomd_xattr(path, "user.oomd_kill", set_size(pids_killed)); - if (r < 0) -- log_debug_errno(r, "Failed to set user.systemd_oomd_kill on kill: %m"); -+ log_debug_errno(r, "Failed to set user.oomd_kill on kill: %m"); - - return set_size(pids_killed) != 0; - } -diff --git a/src/oom/test-oomd-util.c b/src/oom/test-oomd-util.c -index 54fe2a03d14..3dec4f0ff06 100644 ---- a/src/oom/test-oomd-util.c -+++ b/src/oom/test-oomd-util.c -@@ -79,7 +79,7 @@ static void test_oomd_cgroup_kill(void) { - sleep(2); - assert_se(cg_is_empty(SYSTEMD_CGROUP_CONTROLLER, cgroup) == true); - -- assert_se(cg_get_xattr_malloc(SYSTEMD_CGROUP_CONTROLLER, cgroup, "user.systemd_oomd_kill", &v) >= 0); -+ assert_se(cg_get_xattr_malloc(SYSTEMD_CGROUP_CONTROLLER, cgroup, "user.oomd_kill", &v) >= 0); - assert_se(memcmp(v, i == 0 ? "2" : "4", 2) == 0); - } - } - -From d38916b398127e005d0cf131092a99317661ec3c Mon Sep 17 00:00:00 2001 -From: Anita Zhang -Date: Fri, 5 Feb 2021 03:00:11 -0800 -Subject: [PATCH 2/7] oom: wrap reply.path with empty_to_root - ---- - src/oom/oomd-manager.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/src/oom/oomd-manager.c b/src/oom/oomd-manager.c -index 338935b3ec6..825fe38e189 100644 ---- a/src/oom/oomd-manager.c -+++ b/src/oom/oomd-manager.c -@@ -93,7 +93,7 @@ static int process_managed_oom_reply( - m->monitored_swap_cgroup_contexts : m->monitored_mem_pressure_cgroup_contexts; - - if (reply.mode == MANAGED_OOM_AUTO) { -- (void) oomd_cgroup_context_free(hashmap_remove(monitor_hm, reply.path)); -+ (void) oomd_cgroup_context_free(hashmap_remove(monitor_hm, empty_to_root(reply.path))); - continue; - } - -@@ -109,7 +109,7 @@ static int process_managed_oom_reply( - } - } - -- ret = oomd_insert_cgroup_context(NULL, monitor_hm, reply.path); -+ ret = oomd_insert_cgroup_context(NULL, monitor_hm, empty_to_root(reply.path)); - if (ret == -ENOMEM) { - r = ret; - goto finish; -@@ -117,7 +117,7 @@ static int process_managed_oom_reply( - - /* Always update the limit in case it was changed. For non-memory pressure detection the value is - * ignored so always updating it here is not a problem. */ -- ctx = hashmap_get(monitor_hm, reply.path); -+ ctx = hashmap_get(monitor_hm, empty_to_root(reply.path)); - if (ctx) - ctx->mem_pressure_limit = limit; - } - -From a695da238e7a6bd6eb440facc784aa6fca6c3d90 Mon Sep 17 00:00:00 2001 -From: Anita Zhang -Date: Wed, 27 Jan 2021 23:43:13 -0800 -Subject: [PATCH 3/7] oom: sort by pgscan and memory usage - -If 2 candidates have the same pgscan, prioritize the one with the larger -memory usage. ---- - src/oom/oomd-util.c | 2 +- - src/oom/oomd-util.h | 5 ++++- - src/oom/test-oomd-util.c | 24 ++++++++++++++---------- - 3 files changed, 19 insertions(+), 12 deletions(-) - -diff --git a/src/oom/oomd-util.c b/src/oom/oomd-util.c -index 80b9583440c..8f138d64c6c 100644 ---- a/src/oom/oomd-util.c -+++ b/src/oom/oomd-util.c -@@ -214,7 +214,7 @@ int oomd_kill_by_pgscan(Hashmap *h, const char *prefix, bool dry_run) { - - assert(h); - -- r = oomd_sort_cgroup_contexts(h, compare_pgscan, prefix, &sorted); -+ r = oomd_sort_cgroup_contexts(h, compare_pgscan_and_memory_usage, prefix, &sorted); - if (r < 0) - return r; - -diff --git a/src/oom/oomd-util.h b/src/oom/oomd-util.h -index d7a9890e7a2..f0648c5dcdd 100644 ---- a/src/oom/oomd-util.h -+++ b/src/oom/oomd-util.h -@@ -61,10 +61,13 @@ bool oomd_memory_reclaim(Hashmap *h); - /* Returns true if the amount of swap free is below the percentage of swap specified by `threshold_percent`. */ - bool oomd_swap_free_below(const OomdSystemContext *ctx, uint64_t threshold_percent); - --static inline int compare_pgscan(OomdCGroupContext * const *c1, OomdCGroupContext * const *c2) { -+static inline int compare_pgscan_and_memory_usage(OomdCGroupContext * const *c1, OomdCGroupContext * const *c2) { - assert(c1); - assert(c2); - -+ if ((*c2)->pgscan == (*c1)->pgscan) -+ return CMP((*c2)->current_memory_usage, (*c1)->current_memory_usage); -+ - return CMP((*c2)->pgscan, (*c1)->pgscan); - } - -diff --git a/src/oom/test-oomd-util.c b/src/oom/test-oomd-util.c -index 3dec4f0ff06..a1fe78806a1 100644 ---- a/src/oom/test-oomd-util.c -+++ b/src/oom/test-oomd-util.c -@@ -292,16 +292,20 @@ static void test_oomd_sort_cgroups(void) { - OomdCGroupContext ctx[4] = { - { .path = paths[0], - .swap_usage = 20, -- .pgscan = 60 }, -+ .pgscan = 60, -+ .current_memory_usage = 10 }, - { .path = paths[1], - .swap_usage = 60, -- .pgscan = 40 }, -+ .pgscan = 40, -+ .current_memory_usage = 20 }, - { .path = paths[2], - .swap_usage = 40, -- .pgscan = 20 }, -+ .pgscan = 40, -+ .current_memory_usage = 40 }, - { .path = paths[3], - .swap_usage = 10, -- .pgscan = 80 }, -+ .pgscan = 80, -+ .current_memory_usage = 10 }, - }; - - assert_se(h = hashmap_new(&string_hash_ops)); -@@ -318,16 +322,16 @@ static void test_oomd_sort_cgroups(void) { - assert_se(sorted_cgroups[3] == &ctx[3]); - sorted_cgroups = mfree(sorted_cgroups); - -- assert_se(oomd_sort_cgroup_contexts(h, compare_pgscan, NULL, &sorted_cgroups) == 4); -+ assert_se(oomd_sort_cgroup_contexts(h, compare_pgscan_and_memory_usage, NULL, &sorted_cgroups) == 4); - assert_se(sorted_cgroups[0] == &ctx[3]); - assert_se(sorted_cgroups[1] == &ctx[0]); -- assert_se(sorted_cgroups[2] == &ctx[1]); -- assert_se(sorted_cgroups[3] == &ctx[2]); -+ assert_se(sorted_cgroups[2] == &ctx[2]); -+ assert_se(sorted_cgroups[3] == &ctx[1]); - sorted_cgroups = mfree(sorted_cgroups); - -- assert_se(oomd_sort_cgroup_contexts(h, compare_pgscan, "/herp.slice/derp.scope", &sorted_cgroups) == 2); -- assert_se(sorted_cgroups[0] == &ctx[1]); -- assert_se(sorted_cgroups[1] == &ctx[2]); -+ assert_se(oomd_sort_cgroup_contexts(h, compare_pgscan_and_memory_usage, "/herp.slice/derp.scope", &sorted_cgroups) == 2); -+ assert_se(sorted_cgroups[0] == &ctx[2]); -+ assert_se(sorted_cgroups[1] == &ctx[1]); - assert_se(sorted_cgroups[2] == 0); - assert_se(sorted_cgroups[3] == 0); - sorted_cgroups = mfree(sorted_cgroups); - -From c73a2c3a6788a2a28899f29579fdd68816f60d59 Mon Sep 17 00:00:00 2001 -From: Anita Zhang -Date: Thu, 28 Jan 2021 15:47:26 -0800 -Subject: [PATCH 4/7] oom: skip over cgroups with no memory usage - ---- - src/oom/oomd-util.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/src/oom/oomd-util.c b/src/oom/oomd-util.c -index 8f138d64c6c..fa8b8b70b19 100644 ---- a/src/oom/oomd-util.c -+++ b/src/oom/oomd-util.c -@@ -219,7 +219,8 @@ int oomd_kill_by_pgscan(Hashmap *h, const char *prefix, bool dry_run) { - return r; - - for (int i = 0; i < r; i++) { -- if (sorted[i]->pgscan == 0) -+ /* Skip cgroups with no reclaim and memory usage; it won't alleviate pressure */ -+ if (sorted[i]->pgscan == 0 && sorted[i]->current_memory_usage == 0) - break; - - r = oomd_cgroup_kill(sorted[i]->path, true, dry_run); - -From 63d6d9160523a2c1a71e96ff4125a1440d827b32 Mon Sep 17 00:00:00 2001 -From: Anita Zhang -Date: Tue, 26 Jan 2021 00:57:36 -0800 -Subject: [PATCH 5/7] oom: implement avoid/omit xattr support - -There may be situations where a cgroup should be protected from killing -or deprioritized as a candidate. In FB oomd xattrs are used to bias oomd -away from supervisor cgroups and towards worker cgroups in container -tasks. On desktops this can be used to protect important units with -unpredictable resource consumption. - -The patch allows systemd-oomd to understand 2 xattrs: -"user.oomd_avoid" and "user.oomd_omit". If systemd-oomd sees these -xattrs set to 1 on a candidate cgroup (i.e. while attempting to kill something) -AND the cgroup is owned by root:root, it will either deprioritize the cgroup as -a candidate (avoid) or remove it completely as a candidate (omit). - -Usage is restricted to root:root cgroups to prevent situations where an -unprivileged user can set their own cgroups lower in the kill priority than -another user's (and prevent them from omitting their units from -systemd-oomd killing). ---- - src/basic/cgroup-util.c | 22 +++++++++ - src/basic/cgroup-util.h | 1 + - src/oom/oomd-util.c | 35 ++++++++++++--- - src/oom/oomd-util.h | 11 +++++ - src/oom/test-oomd-util.c | 54 +++++++++++++++++++++-- - test/test-functions | 1 + - test/units/testsuite-56-testmunch.service | 7 +++ - test/units/testsuite-56.sh | 31 +++++++++++-- - 8 files changed, 149 insertions(+), 13 deletions(-) - create mode 100644 test/units/testsuite-56-testmunch.service - -diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c -index b567822b7ef..45dc1142048 100644 ---- a/src/basic/cgroup-util.c -+++ b/src/basic/cgroup-util.c -@@ -1703,6 +1703,28 @@ int cg_get_attribute_as_bool(const char *controller, const char *path, const cha - return 0; - } - -+ -+int cg_get_owner(const char *controller, const char *path, uid_t *ret_uid, gid_t *ret_gid) { -+ _cleanup_free_ char *f = NULL; -+ struct stat stats; -+ int r; -+ -+ assert(ret_uid); -+ assert(ret_gid); -+ -+ r = cg_get_path(controller, path, NULL, &f); -+ if (r < 0) -+ return r; -+ -+ r = stat(f, &stats); -+ if (r < 0) -+ return -errno; -+ -+ *ret_uid = stats.st_uid; -+ *ret_gid = stats.st_gid; -+ return 0; -+} -+ - int cg_get_keyed_attribute_full( - const char *controller, - const char *path, -diff --git a/src/basic/cgroup-util.h b/src/basic/cgroup-util.h -index bdc0d0d086c..63bd25f703e 100644 ---- a/src/basic/cgroup-util.h -+++ b/src/basic/cgroup-util.h -@@ -212,6 +212,7 @@ int cg_get_attribute_as_uint64(const char *controller, const char *path, const c - int cg_get_attribute_as_bool(const char *controller, const char *path, const char *attribute, bool *ret); - - int cg_set_access(const char *controller, const char *path, uid_t uid, gid_t gid); -+int cg_get_owner(const char *controller, const char *path, uid_t *ret_uid, gid_t *ret_gid); - - int cg_set_xattr(const char *controller, const char *path, const char *name, const void *value, size_t size, int flags); - int cg_get_xattr(const char *controller, const char *path, const char *name, void *value, size_t size); -diff --git a/src/oom/oomd-util.c b/src/oom/oomd-util.c -index fa8b8b70b19..db6383bf436 100644 ---- a/src/oom/oomd-util.c -+++ b/src/oom/oomd-util.c -@@ -159,7 +159,8 @@ int oomd_sort_cgroup_contexts(Hashmap *h, oomd_compare_t compare_func, const cha - return -ENOMEM; - - HASHMAP_FOREACH(item, h) { -- if (item->path && prefix && !path_startswith(item->path, prefix)) -+ /* Skip over cgroups that are not valid candidates or are explicitly marked for omission */ -+ if ((item->path && prefix && !path_startswith(item->path, prefix)) || item->omit) - continue; - - sorted[k++] = item; -@@ -219,9 +220,10 @@ int oomd_kill_by_pgscan(Hashmap *h, const char *prefix, bool dry_run) { - return r; - - for (int i = 0; i < r; i++) { -- /* Skip cgroups with no reclaim and memory usage; it won't alleviate pressure */ -+ /* Skip cgroups with no reclaim and memory usage; it won't alleviate pressure. */ -+ /* Don't break since there might be "avoid" cgroups at the end. */ - if (sorted[i]->pgscan == 0 && sorted[i]->current_memory_usage == 0) -- break; -+ continue; - - r = oomd_cgroup_kill(sorted[i]->path, true, dry_run); - if (r > 0 || r == -ENOMEM) -@@ -244,8 +246,10 @@ int oomd_kill_by_swap_usage(Hashmap *h, bool dry_run) { - /* Try to kill cgroups with non-zero swap usage until we either succeed in - * killing or we get to a cgroup with no swap usage. */ - for (int i = 0; i < r; i++) { -+ /* Skip over cgroups with no resource usage. Don't break since there might be "avoid" -+ * cgroups at the end. */ - if (sorted[i]->swap_usage == 0) -- break; -+ continue; - - r = oomd_cgroup_kill(sorted[i]->path, true, dry_run); - if (r > 0 || r == -ENOMEM) -@@ -257,8 +261,10 @@ int oomd_kill_by_swap_usage(Hashmap *h, bool dry_run) { - - int oomd_cgroup_context_acquire(const char *path, OomdCGroupContext **ret) { - _cleanup_(oomd_cgroup_context_freep) OomdCGroupContext *ctx = NULL; -- _cleanup_free_ char *p = NULL, *val = NULL; -+ _cleanup_free_ char *p = NULL, *val = NULL, *avoid_val = NULL, *omit_val = NULL; - bool is_root; -+ uid_t uid; -+ gid_t gid; - int r; - - assert(path); -@@ -278,6 +284,25 @@ int oomd_cgroup_context_acquire(const char *path, OomdCGroupContext **ret) { - if (r < 0) - return log_debug_errno(r, "Error parsing memory pressure from %s: %m", p); - -+ r = cg_get_owner(SYSTEMD_CGROUP_CONTROLLER, path, &uid, &gid); -+ if (r < 0) -+ log_debug_errno(r, "Failed to get owner/group from %s: %m", path); -+ else if (uid == 0 && gid == 0) { -+ /* Ignore most errors when reading the xattr since it is usually unset and cgroup xattrs are only used -+ * as an optional feature of systemd-oomd (and the system might not even support them). */ -+ r = cg_get_xattr_malloc(SYSTEMD_CGROUP_CONTROLLER, path, "user.oomd_avoid", &avoid_val); -+ if (r >= 0 && streq(avoid_val, "1")) -+ ctx->avoid = true; -+ else if (r == -ENOMEM) -+ return r; -+ -+ r = cg_get_xattr_malloc(SYSTEMD_CGROUP_CONTROLLER, path, "user.oomd_omit", &omit_val); -+ if (r >= 0 && streq(omit_val, "1")) -+ ctx->omit = true; -+ else if (r == -ENOMEM) -+ return r; -+ } -+ - if (is_root) { - r = procfs_memory_get_used(&ctx->current_memory_usage); - if (r < 0) -diff --git a/src/oom/oomd-util.h b/src/oom/oomd-util.h -index f0648c5dcdd..ab6a8da1ef6 100644 ---- a/src/oom/oomd-util.h -+++ b/src/oom/oomd-util.h -@@ -29,6 +29,9 @@ struct OomdCGroupContext { - uint64_t last_pgscan; - uint64_t pgscan; - -+ bool avoid; -+ bool omit; -+ - /* These are only used by oomd_pressure_above for acting on high memory pressure. */ - loadavg_t mem_pressure_limit; - usec_t mem_pressure_duration_usec; -@@ -61,10 +64,15 @@ bool oomd_memory_reclaim(Hashmap *h); - /* Returns true if the amount of swap free is below the percentage of swap specified by `threshold_percent`. */ - bool oomd_swap_free_below(const OomdSystemContext *ctx, uint64_t threshold_percent); - -+/* The compare functions will sort from largest to smallest, putting all the contexts with "avoid" at the end -+ * (after the smallest values). */ - static inline int compare_pgscan_and_memory_usage(OomdCGroupContext * const *c1, OomdCGroupContext * const *c2) { - assert(c1); - assert(c2); - -+ if ((*c1)->avoid != (*c2)->avoid) -+ return CMP((*c1)->avoid, (*c2)->avoid); -+ - if ((*c2)->pgscan == (*c1)->pgscan) - return CMP((*c2)->current_memory_usage, (*c1)->current_memory_usage); - -@@ -75,6 +83,9 @@ static inline int compare_swap_usage(OomdCGroupContext * const *c1, OomdCGroupCo - assert(c1); - assert(c2); - -+ if ((*c1)->avoid != (*c2)->avoid) -+ return CMP((*c1)->avoid, (*c2)->avoid); -+ - return CMP((*c2)->swap_usage, (*c1)->swap_usage); - } - -diff --git a/src/oom/test-oomd-util.c b/src/oom/test-oomd-util.c -index a1fe78806a1..193edee0eba 100644 ---- a/src/oom/test-oomd-util.c -+++ b/src/oom/test-oomd-util.c -@@ -89,6 +89,8 @@ static void test_oomd_cgroup_context_acquire_and_insert(void) { - _cleanup_(oomd_cgroup_context_freep) OomdCGroupContext *ctx = NULL; - _cleanup_free_ char *cgroup = NULL; - OomdCGroupContext *c1, *c2; -+ bool test_xattrs; -+ int r; - - if (geteuid() != 0) - return (void) log_tests_skipped("not root"); -@@ -101,6 +103,16 @@ static void test_oomd_cgroup_context_acquire_and_insert(void) { - - assert_se(cg_pid_get_path(NULL, 0, &cgroup) >= 0); - -+ /* If we don't have permissions to set xattrs we're likely in a userns or missing capabilities -+ * so skip the xattr portions of the test. */ -+ r = cg_set_xattr(SYSTEMD_CGROUP_CONTROLLER, cgroup, "user.oomd_test", "1", 1, 0); -+ test_xattrs = !ERRNO_IS_PRIVILEGE(r) && !ERRNO_IS_NOT_SUPPORTED(r); -+ -+ if (test_xattrs) { -+ assert_se(cg_set_xattr(SYSTEMD_CGROUP_CONTROLLER, cgroup, "user.oomd_omit", "1", 1, 0) >= 0); -+ assert_se(cg_set_xattr(SYSTEMD_CGROUP_CONTROLLER, cgroup, "user.oomd_avoid", "1", 1, 0) >= 0); -+ } -+ - assert_se(oomd_cgroup_context_acquire(cgroup, &ctx) == 0); - - assert_se(streq(ctx->path, cgroup)); -@@ -110,12 +122,21 @@ static void test_oomd_cgroup_context_acquire_and_insert(void) { - assert_se(ctx->swap_usage == 0); - assert_se(ctx->last_pgscan == 0); - assert_se(ctx->pgscan == 0); -+ if (test_xattrs) { -+ assert_se(ctx->omit == true); -+ assert_se(ctx->avoid == true); -+ } else { -+ assert_se(ctx->omit == false); -+ assert_se(ctx->avoid == false); -+ } - ctx = oomd_cgroup_context_free(ctx); - - /* Test the root cgroup */ - assert_se(oomd_cgroup_context_acquire("", &ctx) == 0); - assert_se(streq(ctx->path, "/")); - assert_se(ctx->current_memory_usage > 0); -+ assert_se(ctx->omit == false); -+ assert_se(ctx->avoid == false); - - /* Test hashmap inserts */ - assert_se(h1 = hashmap_new(&oomd_cgroup_ctx_hash_ops)); -@@ -137,6 +158,15 @@ static void test_oomd_cgroup_context_acquire_and_insert(void) { - assert_se(c2->last_pgscan == 5555); - assert_se(c2->mem_pressure_limit == 6789); - assert_se(c2->last_hit_mem_pressure_limit == 42); -+ -+ /* Assert that avoid/omit are not set if the cgroup is not owned by root */ -+ if (test_xattrs) { -+ ctx = oomd_cgroup_context_free(ctx); -+ assert_se(cg_set_access(SYSTEMD_CGROUP_CONTROLLER, cgroup, 65534, 65534) >= 0); -+ assert_se(oomd_cgroup_context_acquire(cgroup, &ctx) == 0); -+ assert_se(ctx->omit == false); -+ assert_se(ctx->avoid == false); -+ } - } - - static void test_oomd_system_context_acquire(void) { -@@ -287,9 +317,11 @@ static void test_oomd_sort_cgroups(void) { - char **paths = STRV_MAKE("/herp.slice", - "/herp.slice/derp.scope", - "/herp.slice/derp.scope/sheep.service", -- "/zupa.slice"); -+ "/zupa.slice", -+ "/omitted.slice", -+ "/avoid.slice"); - -- OomdCGroupContext ctx[4] = { -+ OomdCGroupContext ctx[6] = { - { .path = paths[0], - .swap_usage = 20, - .pgscan = 60, -@@ -306,6 +338,14 @@ static void test_oomd_sort_cgroups(void) { - .swap_usage = 10, - .pgscan = 80, - .current_memory_usage = 10 }, -+ { .path = paths[4], -+ .swap_usage = 90, -+ .pgscan = 100, -+ .omit = true }, -+ { .path = paths[5], -+ .swap_usage = 99, -+ .pgscan = 200, -+ .avoid = true }, - }; - - assert_se(h = hashmap_new(&string_hash_ops)); -@@ -314,19 +354,23 @@ static void test_oomd_sort_cgroups(void) { - assert_se(hashmap_put(h, "/herp.slice/derp.scope", &ctx[1]) >= 0); - assert_se(hashmap_put(h, "/herp.slice/derp.scope/sheep.service", &ctx[2]) >= 0); - assert_se(hashmap_put(h, "/zupa.slice", &ctx[3]) >= 0); -+ assert_se(hashmap_put(h, "/omitted.slice", &ctx[4]) >= 0); -+ assert_se(hashmap_put(h, "/avoid.slice", &ctx[5]) >= 0); - -- assert_se(oomd_sort_cgroup_contexts(h, compare_swap_usage, NULL, &sorted_cgroups) == 4); -+ assert_se(oomd_sort_cgroup_contexts(h, compare_swap_usage, NULL, &sorted_cgroups) == 5); - assert_se(sorted_cgroups[0] == &ctx[1]); - assert_se(sorted_cgroups[1] == &ctx[2]); - assert_se(sorted_cgroups[2] == &ctx[0]); - assert_se(sorted_cgroups[3] == &ctx[3]); -+ assert_se(sorted_cgroups[4] == &ctx[5]); - sorted_cgroups = mfree(sorted_cgroups); - -- assert_se(oomd_sort_cgroup_contexts(h, compare_pgscan_and_memory_usage, NULL, &sorted_cgroups) == 4); -+ assert_se(oomd_sort_cgroup_contexts(h, compare_pgscan_and_memory_usage, NULL, &sorted_cgroups) == 5); - assert_se(sorted_cgroups[0] == &ctx[3]); - assert_se(sorted_cgroups[1] == &ctx[0]); - assert_se(sorted_cgroups[2] == &ctx[2]); - assert_se(sorted_cgroups[3] == &ctx[1]); -+ assert_se(sorted_cgroups[4] == &ctx[5]); - sorted_cgroups = mfree(sorted_cgroups); - - assert_se(oomd_sort_cgroup_contexts(h, compare_pgscan_and_memory_usage, "/herp.slice/derp.scope", &sorted_cgroups) == 2); -@@ -334,6 +378,8 @@ static void test_oomd_sort_cgroups(void) { - assert_se(sorted_cgroups[1] == &ctx[1]); - assert_se(sorted_cgroups[2] == 0); - assert_se(sorted_cgroups[3] == 0); -+ assert_se(sorted_cgroups[4] == 0); -+ assert_se(sorted_cgroups[5] == 0); - sorted_cgroups = mfree(sorted_cgroups); - } - -diff --git a/test/test-functions b/test/test-functions -index df6022982c2..6996cd74752 100644 ---- a/test/test-functions -+++ b/test/test-functions -@@ -124,6 +124,7 @@ BASICTOOLS=( - rmdir - sed - seq -+ setfattr - setfont - setsid - sfdisk -diff --git a/test/units/testsuite-56-testmunch.service b/test/units/testsuite-56-testmunch.service -new file mode 100644 -index 00000000000..b4b925a7af0 ---- /dev/null -+++ b/test/units/testsuite-56-testmunch.service -@@ -0,0 +1,7 @@ -+[Unit] -+Description=Create some memory pressure -+ -+[Service] -+MemoryHigh=2M -+Slice=testsuite-56-workload.slice -+ExecStart=/usr/lib/systemd/tests/testdata/units/testsuite-56-slowgrowth.sh -diff --git a/test/units/testsuite-56.sh b/test/units/testsuite-56.sh -index 8b01fe37ed4..88c185b8869 100755 ---- a/test/units/testsuite-56.sh -+++ b/test/units/testsuite-56.sh -@@ -23,20 +23,43 @@ oomctl | grep "/testsuite-56-workload.slice" - oomctl | grep "1.00%" - oomctl | grep "Default Memory Pressure Duration: 5s" - --# systemd-oomd watches for elevated pressure for 30 seconds before acting. --# It can take time to build up pressure so either wait 5 minutes or for the service to fail. --timeout=$(date -ud "5 minutes" +%s) -+# systemd-oomd watches for elevated pressure for 5 seconds before acting. -+# It can take time to build up pressure so either wait 2 minutes or for the service to fail. -+timeout=$(date -ud "2 minutes" +%s) - while [[ $(date -u +%s) -le $timeout ]]; do - if ! systemctl status testsuite-56-testbloat.service; then - break - fi -- sleep 15 -+ sleep 5 - done - - # testbloat should be killed and testchill should be fine - if systemctl status testsuite-56-testbloat.service; then exit 42; fi - if ! systemctl status testsuite-56-testchill.service; then exit 24; fi - -+# only run this portion of the test if we can set xattrs -+if setfattr -n user.xattr_test -v 1 /sys/fs/cgroup/; then -+ sleep 120 # wait for systemd-oomd kill cool down and elevated memory pressure to come down -+ -+ systemctl start testsuite-56-testchill.service -+ systemctl start testsuite-56-testmunch.service -+ systemctl start testsuite-56-testbloat.service -+ setfattr -n user.oomd_avoid -v 1 /sys/fs/cgroup/testsuite.slice/testsuite-56.slice/testsuite-56-workload.slice/testsuite-56-testbloat.service -+ -+ timeout=$(date -ud "2 minutes" +%s) -+ while [[ $(date -u +%s) -le $timeout ]]; do -+ if ! systemctl status testsuite-56-testmunch.service; then -+ break -+ fi -+ sleep 5 -+ done -+ -+ # testmunch should be killed since testbloat had the avoid xattr on it -+ if ! systemctl status testsuite-56-testbloat.service; then exit 25; fi -+ if systemctl status testsuite-56-testmunch.service; then exit 43; fi -+ if ! systemctl status testsuite-56-testchill.service; then exit 24; fi -+fi -+ - systemd-analyze log-level info - - echo OK > /testok - -From d87ecfecdb6fb77097f843888e2a05945b6b396b Mon Sep 17 00:00:00 2001 -From: Anita Zhang -Date: Thu, 28 Jan 2021 02:31:44 -0800 -Subject: [PATCH 6/7] oom: add unit file settings for oomd avoid/omit xattrs - ---- - docs/TRANSIENT-SETTINGS.md | 1 + - src/core/cgroup.c | 58 ++++++++++++++++++--- - src/core/cgroup.h | 15 ++++++ - src/core/dbus-cgroup.c | 22 ++++++++ - src/core/execute.c | 4 ++ - src/core/load-fragment-gperf.gperf.m4 | 1 + - src/core/load-fragment.c | 1 + - src/core/load-fragment.h | 1 + - src/shared/bus-unit-util.c | 3 +- - src/test/test-tables.c | 1 + - test/fuzz/fuzz-unit-file/directives.service | 4 ++ - test/units/testsuite-56.sh | 8 ++- - 12 files changed, 109 insertions(+), 10 deletions(-) - -diff --git a/docs/TRANSIENT-SETTINGS.md b/docs/TRANSIENT-SETTINGS.md -index 50370602543..9f69a3162a0 100644 ---- a/docs/TRANSIENT-SETTINGS.md -+++ b/docs/TRANSIENT-SETTINGS.md -@@ -273,6 +273,7 @@ All cgroup/resource control settings are available for transient units - ✓ ManagedOOMSwap= - ✓ ManagedOOMMemoryPressure= - ✓ ManagedOOMMemoryPressureLimit= -+✓ ManagedOOMPreference= - ``` - - ## Process Killing Settings -diff --git a/src/core/cgroup.c b/src/core/cgroup.c -index 70282a7abda..833b434b555 100644 ---- a/src/core/cgroup.c -+++ b/src/core/cgroup.c -@@ -131,6 +131,7 @@ void cgroup_context_init(CGroupContext *c) { - - .moom_swap = MANAGED_OOM_AUTO, - .moom_mem_pressure = MANAGED_OOM_AUTO, -+ .moom_preference = MANAGED_OOM_PREFERENCE_NONE, - }; - } - -@@ -417,7 +418,8 @@ void cgroup_context_dump(Unit *u, FILE* f, const char *prefix) { - "%sDelegate: %s\n" - "%sManagedOOMSwap: %s\n" - "%sManagedOOMMemoryPressure: %s\n" -- "%sManagedOOMMemoryPressureLimit: %" PRIu32 ".%02" PRIu32 "%%\n", -+ "%sManagedOOMMemoryPressureLimit: %" PRIu32 ".%02" PRIu32 "%%\n" -+ "%sManagedOOMPreference: %s%%\n", - prefix, yes_no(c->cpu_accounting), - prefix, yes_no(c->io_accounting), - prefix, yes_no(c->blockio_accounting), -@@ -450,7 +452,8 @@ void cgroup_context_dump(Unit *u, FILE* f, const char *prefix) { - prefix, yes_no(c->delegate), - prefix, managed_oom_mode_to_string(c->moom_swap), - prefix, managed_oom_mode_to_string(c->moom_mem_pressure), -- prefix, c->moom_mem_pressure_limit_permyriad / 100, c->moom_mem_pressure_limit_permyriad % 100); -+ prefix, c->moom_mem_pressure_limit_permyriad / 100, c->moom_mem_pressure_limit_permyriad % 100, -+ prefix, managed_oom_preference_to_string(c->moom_preference)); - - if (c->delegate) { - _cleanup_free_ char *t = NULL; -@@ -600,6 +603,35 @@ int cgroup_add_device_allow(CGroupContext *c, const char *dev, const char *mode) - UNIT_DEFINE_ANCESTOR_MEMORY_LOOKUP(memory_low); - UNIT_DEFINE_ANCESTOR_MEMORY_LOOKUP(memory_min); - -+void cgroup_oomd_xattr_apply(Unit *u, const char *cgroup_path) { -+ CGroupContext *c; -+ int r; -+ -+ assert(u); -+ -+ c = unit_get_cgroup_context(u); -+ if (!c) -+ return; -+ -+ r = cg_remove_xattr(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, "user.oomd_avoid"); -+ if (r != -ENODATA) -+ log_unit_debug_errno(u, r, "Failed to remove oomd_avoid flag on control group %s, ignoring: %m", cgroup_path); -+ -+ r = cg_remove_xattr(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, "user.oomd_omit"); -+ if (r != -ENODATA) -+ log_unit_debug_errno(u, r, "Failed to remove oomd_omit flag on control group %s, ignoring: %m", cgroup_path); -+ -+ if (c->moom_preference == MANAGED_OOM_PREFERENCE_AVOID) { -+ r = cg_set_xattr(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, "user.oomd_avoid", "1", 1, 0); -+ if (r < 0) -+ log_unit_debug_errno(u, r, "Failed to set oomd_avoid flag on control group %s, ignoring: %m", cgroup_path); -+ } else if (c->moom_preference == MANAGED_OOM_PREFERENCE_OMIT) { -+ r = cg_set_xattr(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, "user.oomd_omit", "1", 1, 0); -+ if (r < 0) -+ log_unit_debug_errno(u, r, "Failed to set oomd_omit flag on control group %s, ignoring: %m", cgroup_path); -+ } -+} -+ - static void cgroup_xattr_apply(Unit *u) { - char ids[SD_ID128_STRING_MAX]; - int r; -@@ -630,6 +662,8 @@ static void cgroup_xattr_apply(Unit *u) { - if (r != -ENODATA) - log_unit_debug_errno(u, r, "Failed to remove delegate flag on control group %s, ignoring: %m", u->cgroup_path); - } -+ -+ cgroup_oomd_xattr_apply(u, u->cgroup_path); - } - - static int lookup_block_device(const char *p, dev_t *ret) { -@@ -3737,12 +3771,6 @@ int unit_cgroup_freezer_action(Unit *u, FreezerAction action) { - return 1; - } - --static const char* const cgroup_device_policy_table[_CGROUP_DEVICE_POLICY_MAX] = { -- [CGROUP_DEVICE_POLICY_AUTO] = "auto", -- [CGROUP_DEVICE_POLICY_CLOSED] = "closed", -- [CGROUP_DEVICE_POLICY_STRICT] = "strict", --}; -- - int unit_get_cpuset(Unit *u, CPUSet *cpus, const char *name) { - _cleanup_free_ char *v = NULL; - int r; -@@ -3771,6 +3799,12 @@ int unit_get_cpuset(Unit *u, CPUSet *cpus, const char *name) { - return parse_cpu_set_full(v, cpus, false, NULL, NULL, 0, NULL); - } - -+static const char* const cgroup_device_policy_table[_CGROUP_DEVICE_POLICY_MAX] = { -+ [CGROUP_DEVICE_POLICY_AUTO] = "auto", -+ [CGROUP_DEVICE_POLICY_CLOSED] = "closed", -+ [CGROUP_DEVICE_POLICY_STRICT] = "strict", -+}; -+ - DEFINE_STRING_TABLE_LOOKUP(cgroup_device_policy, CGroupDevicePolicy); - - static const char* const freezer_action_table[_FREEZER_ACTION_MAX] = { -@@ -3779,3 +3813,11 @@ static const char* const freezer_action_table[_FREEZER_ACTION_MAX] = { - }; - - DEFINE_STRING_TABLE_LOOKUP(freezer_action, FreezerAction); -+ -+static const char* const managed_oom_preference_table[_MANAGED_OOM_PREFERENCE_MAX] = { -+ [MANAGED_OOM_PREFERENCE_NONE] = "none", -+ [MANAGED_OOM_PREFERENCE_AVOID] = "avoid", -+ [MANAGED_OOM_PREFERENCE_OMIT] = "omit", -+}; -+ -+DEFINE_STRING_TABLE_LOOKUP(managed_oom_preference, ManagedOOMPreference); -diff --git a/src/core/cgroup.h b/src/core/cgroup.h -index 9fbfabbb7e3..7d9ab4ae6b8 100644 ---- a/src/core/cgroup.h -+++ b/src/core/cgroup.h -@@ -94,6 +94,15 @@ struct CGroupBlockIODeviceBandwidth { - uint64_t wbps; - }; - -+typedef enum ManagedOOMPreference { -+ MANAGED_OOM_PREFERENCE_NONE, -+ MANAGED_OOM_PREFERENCE_AVOID, -+ MANAGED_OOM_PREFERENCE_OMIT, -+ -+ _MANAGED_OOM_PREFERENCE_MAX, -+ _MANAGED_OOM_PREFERENCE_INVALID = -1 -+} ManagedOOMPreference; -+ - struct CGroupContext { - bool cpu_accounting; - bool io_accounting; -@@ -164,6 +173,7 @@ struct CGroupContext { - ManagedOOMMode moom_swap; - ManagedOOMMode moom_mem_pressure; - uint32_t moom_mem_pressure_limit_permyriad; -+ ManagedOOMPreference moom_preference; - }; - - /* Used when querying IP accounting data */ -@@ -204,6 +214,8 @@ void cgroup_context_free_blockio_device_bandwidth(CGroupContext *c, CGroupBlockI - - int cgroup_add_device_allow(CGroupContext *c, const char *dev, const char *mode); - -+void cgroup_oomd_xattr_apply(Unit *u, const char *cgroup_path); -+ - CGroupMask unit_get_own_mask(Unit *u); - CGroupMask unit_get_delegate_mask(Unit *u); - CGroupMask unit_get_members_mask(Unit *u); -@@ -294,3 +306,6 @@ int unit_cgroup_freezer_action(Unit *u, FreezerAction action); - - const char* freezer_action_to_string(FreezerAction a) _const_; - FreezerAction freezer_action_from_string(const char *s) _pure_; -+ -+const char* managed_oom_preference_to_string(ManagedOOMPreference a) _const_; -+ManagedOOMPreference managed_oom_preference_from_string(const char *s) _pure_; -diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c -index 6f309feb236..0b2d945283e 100644 ---- a/src/core/dbus-cgroup.c -+++ b/src/core/dbus-cgroup.c -@@ -21,6 +21,7 @@ BUS_DEFINE_PROPERTY_GET(bus_property_get_tasks_max, "t", TasksMax, tasks_max_res - - static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_cgroup_device_policy, cgroup_device_policy, CGroupDevicePolicy); - static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_managed_oom_mode, managed_oom_mode, ManagedOOMMode); -+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_managed_oom_preference, managed_oom_preference, ManagedOOMPreference); - - static int property_get_cgroup_mask( - sd_bus *bus, -@@ -395,6 +396,7 @@ const sd_bus_vtable bus_cgroup_vtable[] = { - SD_BUS_PROPERTY("ManagedOOMSwap", "s", property_get_managed_oom_mode, offsetof(CGroupContext, moom_swap), 0), - SD_BUS_PROPERTY("ManagedOOMMemoryPressure", "s", property_get_managed_oom_mode, offsetof(CGroupContext, moom_mem_pressure), 0), - SD_BUS_PROPERTY("ManagedOOMMemoryPressureLimitPermyriad", "u", NULL, offsetof(CGroupContext, moom_mem_pressure_limit_permyriad), 0), -+ SD_BUS_PROPERTY("ManagedOOMPreference", "s", property_get_managed_oom_preference, offsetof(CGroupContext, moom_preference), 0), - SD_BUS_VTABLE_END - }; - -@@ -1720,6 +1722,26 @@ int bus_cgroup_set_property( - return 1; - } - -+ if (streq(name, "ManagedOOMPreference")) { -+ ManagedOOMPreference p; -+ const char *pref; -+ -+ r = sd_bus_message_read(message, "s", &pref); -+ if (r < 0) -+ return r; -+ -+ p = managed_oom_preference_from_string(pref); -+ if (p < 0) -+ return -EINVAL; -+ -+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) { -+ c->moom_preference = p; -+ unit_write_settingf(u, flags, name, "ManagedOOMPreference=%s", pref); -+ } -+ -+ return 1; -+ } -+ - if (streq(name, "DisableControllers") || (u->transient && u->load_state == UNIT_STUB)) - return bus_cgroup_set_transient_property(u, c, name, message, flags, error); - -diff --git a/src/core/execute.c b/src/core/execute.c -index b7d78f2197e..0368582884c 100644 ---- a/src/core/execute.c -+++ b/src/core/execute.c -@@ -4701,6 +4701,10 @@ int exec_spawn(Unit *unit, - r = cg_create(SYSTEMD_CGROUP_CONTROLLER, subcgroup_path); - if (r < 0) - return log_unit_error_errno(unit, r, "Failed to create control group '%s': %m", subcgroup_path); -+ -+ /* Normally we would not propagate the oomd xattrs to children but since we created this -+ * sub-cgroup interally we should do it. */ -+ cgroup_oomd_xattr_apply(unit, subcgroup_path); - } - } - -diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4 -index 81f4561a572..dbcbe645934 100644 ---- a/src/core/load-fragment-gperf.gperf.m4 -+++ b/src/core/load-fragment-gperf.gperf.m4 -@@ -230,6 +230,7 @@ $1.IPEgressFilterPath, config_parse_ip_filter_bpf_progs, - $1.ManagedOOMSwap, config_parse_managed_oom_mode, 0, offsetof($1, cgroup_context.moom_swap) - $1.ManagedOOMMemoryPressure, config_parse_managed_oom_mode, 0, offsetof($1, cgroup_context.moom_mem_pressure) - $1.ManagedOOMMemoryPressureLimit, config_parse_managed_oom_mem_pressure_limit, 0, offsetof($1, cgroup_context.moom_mem_pressure_limit_permyriad) -+$1.ManagedOOMPreference, config_parse_managed_oom_preference, 0, offsetof($1, cgroup_context.moom_preference) - $1.NetClass, config_parse_warn_compat, DISABLED_LEGACY, 0' - )m4_dnl - Unit.Description, config_parse_unit_string_printf, 0, offsetof(Unit, description) -diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c -index 06b71aaf157..c6b017556f9 100644 ---- a/src/core/load-fragment.c -+++ b/src/core/load-fragment.c -@@ -133,6 +133,7 @@ DEFINE_CONFIG_PARSE_ENUM(config_parse_service_restart, service_restart, ServiceR - DEFINE_CONFIG_PARSE_ENUM(config_parse_service_timeout_failure_mode, service_timeout_failure_mode, ServiceTimeoutFailureMode, "Failed to parse timeout failure mode"); - DEFINE_CONFIG_PARSE_ENUM(config_parse_socket_bind, socket_address_bind_ipv6_only_or_bool, SocketAddressBindIPv6Only, "Failed to parse bind IPv6 only value"); - DEFINE_CONFIG_PARSE_ENUM(config_parse_oom_policy, oom_policy, OOMPolicy, "Failed to parse OOM policy"); -+DEFINE_CONFIG_PARSE_ENUM(config_parse_managed_oom_preference, managed_oom_preference, ManagedOOMPreference, "Failed to parse ManagedOOMPreference="); - DEFINE_CONFIG_PARSE_ENUM_WITH_DEFAULT(config_parse_ip_tos, ip_tos, int, -1, "Failed to parse IP TOS value"); - DEFINE_CONFIG_PARSE_PTR(config_parse_blockio_weight, cg_blkio_weight_parse, uint64_t, "Invalid block IO weight"); - DEFINE_CONFIG_PARSE_PTR(config_parse_cg_weight, cg_weight_parse, uint64_t, "Invalid weight"); -diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h -index 6b2175cd2af..e4a5cb79869 100644 ---- a/src/core/load-fragment.h -+++ b/src/core/load-fragment.h -@@ -78,6 +78,7 @@ CONFIG_PARSER_PROTOTYPE(config_parse_tasks_max); - CONFIG_PARSER_PROTOTYPE(config_parse_delegate); - CONFIG_PARSER_PROTOTYPE(config_parse_managed_oom_mode); - CONFIG_PARSER_PROTOTYPE(config_parse_managed_oom_mem_pressure_limit); -+CONFIG_PARSER_PROTOTYPE(config_parse_managed_oom_preference); - CONFIG_PARSER_PROTOTYPE(config_parse_device_policy); - CONFIG_PARSER_PROTOTYPE(config_parse_device_allow); - CONFIG_PARSER_PROTOTYPE(config_parse_io_device_latency); -diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c -index 84f57d94d23..5bbaa07dd1c 100644 ---- a/src/shared/bus-unit-util.c -+++ b/src/shared/bus-unit-util.c -@@ -435,7 +435,8 @@ static int bus_append_cgroup_property(sd_bus_message *m, const char *field, cons - if (STR_IN_SET(field, "DevicePolicy", - "Slice", - "ManagedOOMSwap", -- "ManagedOOMMemoryPressure")) -+ "ManagedOOMMemoryPressure", -+ "ManagedOOMPreference")) - return bus_append_string(m, field, eq); - - if (STR_IN_SET(field, "ManagedOOMMemoryPressureLimit")) { -diff --git a/src/test/test-tables.c b/src/test/test-tables.c -index 641cadec858..cc93bbbc749 100644 ---- a/src/test/test-tables.c -+++ b/src/test/test-tables.c -@@ -73,6 +73,7 @@ int main(int argc, char **argv) { - test_table(log_target, LOG_TARGET); - test_table(mac_address_policy, MAC_ADDRESS_POLICY); - test_table(managed_oom_mode, MANAGED_OOM_MODE); -+ test_table(managed_oom_preference, MANAGED_OOM_PREFERENCE); - test_table(manager_state, MANAGER_STATE); - test_table(manager_timestamp, MANAGER_TIMESTAMP); - test_table(mount_exec_command, MOUNT_EXEC_COMMAND); -diff --git a/test/fuzz/fuzz-unit-file/directives.service b/test/fuzz/fuzz-unit-file/directives.service -index 15fa556dd64..0c7ded6786a 100644 ---- a/test/fuzz/fuzz-unit-file/directives.service -+++ b/test/fuzz/fuzz-unit-file/directives.service -@@ -138,6 +138,10 @@ MakeDirectory= - Mark= - MaxConnections= - MaxConnectionsPerSource= -+ManagedOOMSwap= -+ManagedOOMMemoryPressure= -+ManagedOOMMemoryPressureLimitPercent= -+ManagedOOMPreference= - MemoryAccounting= - MemoryHigh= - MemoryLimit= -diff --git a/test/units/testsuite-56.sh b/test/units/testsuite-56.sh -index 88c185b8869..1884f814689 100755 ---- a/test/units/testsuite-56.sh -+++ b/test/units/testsuite-56.sh -@@ -13,6 +13,8 @@ if [[ "$cgroup_type" != *"cgroup2"* ]] && [[ "$cgroup_type" != *"0x63677270"* ]] - fi - [[ -e /skipped ]] && exit 0 || true - -+rm -rf /etc/systemd/system/testsuite-56-testbloat.service.d -+ - echo "DefaultMemoryPressureDurationSec=5s" >> /etc/systemd/oomd.conf - - systemctl start testsuite-56-testchill.service -@@ -41,10 +43,14 @@ if ! systemctl status testsuite-56-testchill.service; then exit 24; fi - if setfattr -n user.xattr_test -v 1 /sys/fs/cgroup/; then - sleep 120 # wait for systemd-oomd kill cool down and elevated memory pressure to come down - -+ mkdir -p /etc/systemd/system/testsuite-56-testbloat.service.d/ -+ echo "[Service]" > /etc/systemd/system/testsuite-56-testbloat.service.d/override.conf -+ echo "ManagedOOMPreference=avoid" >> /etc/systemd/system/testsuite-56-testbloat.service.d/override.conf -+ -+ systemctl daemon-reload - systemctl start testsuite-56-testchill.service - systemctl start testsuite-56-testmunch.service - systemctl start testsuite-56-testbloat.service -- setfattr -n user.oomd_avoid -v 1 /sys/fs/cgroup/testsuite.slice/testsuite-56.slice/testsuite-56-workload.slice/testsuite-56-testbloat.service - - timeout=$(date -ud "2 minutes" +%s) - while [[ $(date -u +%s) -le $timeout ]]; do - -From 32d695eccfeef00023992cdf20bf39f9d0288c67 Mon Sep 17 00:00:00 2001 -From: Anita Zhang -Date: Thu, 28 Jan 2021 17:35:17 -0800 -Subject: [PATCH 7/7] man: document ManagedOOMPreference= - ---- - man/org.freedesktop.systemd1.xml | 36 ++++++++++++++++++++++++++++++++ - man/systemd.resource-control.xml | 32 ++++++++++++++++++++++++++++ - 2 files changed, 68 insertions(+) - -diff --git a/man/org.freedesktop.systemd1.xml b/man/org.freedesktop.systemd1.xml -index 7543a617b78..1d419ac495e 100644 ---- a/man/org.freedesktop.systemd1.xml -+++ b/man/org.freedesktop.systemd1.xml -@@ -2450,6 +2450,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { - readonly s ManagedOOMMemoryPressure = '...'; - @org.freedesktop.DBus.Property.EmitsChangedSignal("false") - readonly u ManagedOOMMemoryPressureLimitPermyriad = ...; -+ @org.freedesktop.DBus.Property.EmitsChangedSignal("false") -+ readonly s ManagedOOMPreference = '...'; - @org.freedesktop.DBus.Property.EmitsChangedSignal("const") - readonly as Environment = ['...', ...]; - @org.freedesktop.DBus.Property.EmitsChangedSignal("const") -@@ -2974,6 +2976,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { - - - -+ -+ - - - -@@ -3538,6 +3542,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { - - - -+ -+ - - - -@@ -4204,6 +4210,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket { - readonly s ManagedOOMMemoryPressure = '...'; - @org.freedesktop.DBus.Property.EmitsChangedSignal("false") - readonly u ManagedOOMMemoryPressureLimitPermyriad = ...; -+ @org.freedesktop.DBus.Property.EmitsChangedSignal("false") -+ readonly s ManagedOOMPreference = '...'; - @org.freedesktop.DBus.Property.EmitsChangedSignal("const") - readonly as Environment = ['...', ...]; - @org.freedesktop.DBus.Property.EmitsChangedSignal("const") -@@ -4756,6 +4764,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket { - - - -+ -+ - - - -@@ -5318,6 +5328,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket { - - - -+ -+ - - - -@@ -5897,6 +5909,8 @@ node /org/freedesktop/systemd1/unit/home_2emount { - readonly s ManagedOOMMemoryPressure = '...'; - @org.freedesktop.DBus.Property.EmitsChangedSignal("false") - readonly u ManagedOOMMemoryPressureLimitPermyriad = ...; -+ @org.freedesktop.DBus.Property.EmitsChangedSignal("false") -+ readonly s ManagedOOMPreference = '...'; - @org.freedesktop.DBus.Property.EmitsChangedSignal("const") - readonly as Environment = ['...', ...]; - @org.freedesktop.DBus.Property.EmitsChangedSignal("const") -@@ -6377,6 +6391,8 @@ node /org/freedesktop/systemd1/unit/home_2emount { - - - -+ -+ - - - -@@ -6857,6 +6873,8 @@ node /org/freedesktop/systemd1/unit/home_2emount { - - - -+ -+ - - - -@@ -7557,6 +7575,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap { - readonly s ManagedOOMMemoryPressure = '...'; - @org.freedesktop.DBus.Property.EmitsChangedSignal("false") - readonly u ManagedOOMMemoryPressureLimitPermyriad = ...; -+ @org.freedesktop.DBus.Property.EmitsChangedSignal("false") -+ readonly s ManagedOOMPreference = '...'; - @org.freedesktop.DBus.Property.EmitsChangedSignal("const") - readonly as Environment = ['...', ...]; - @org.freedesktop.DBus.Property.EmitsChangedSignal("const") -@@ -8023,6 +8043,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap { - - - -+ -+ - - - -@@ -8489,6 +8511,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap { - - - -+ -+ - - - -@@ -9042,6 +9066,8 @@ node /org/freedesktop/systemd1/unit/system_2eslice { - readonly s ManagedOOMMemoryPressure = '...'; - @org.freedesktop.DBus.Property.EmitsChangedSignal("false") - readonly u ManagedOOMMemoryPressureLimitPermyriad = ...; -+ @org.freedesktop.DBus.Property.EmitsChangedSignal("false") -+ readonly s ManagedOOMPreference = '...'; - }; - interface org.freedesktop.DBus.Peer { ... }; - interface org.freedesktop.DBus.Introspectable { ... }; -@@ -9178,6 +9204,8 @@ node /org/freedesktop/systemd1/unit/system_2eslice { - - - -+ -+ - - - -@@ -9318,6 +9346,8 @@ node /org/freedesktop/systemd1/unit/system_2eslice { - - - -+ -+ - - - -@@ -9477,6 +9507,8 @@ node /org/freedesktop/systemd1/unit/session_2d1_2escope { - readonly s ManagedOOMMemoryPressure = '...'; - @org.freedesktop.DBus.Property.EmitsChangedSignal("false") - readonly u ManagedOOMMemoryPressureLimitPermyriad = ...; -+ @org.freedesktop.DBus.Property.EmitsChangedSignal("false") -+ readonly s ManagedOOMPreference = '...'; - @org.freedesktop.DBus.Property.EmitsChangedSignal("const") - readonly s KillMode = '...'; - @org.freedesktop.DBus.Property.EmitsChangedSignal("const") -@@ -9629,6 +9661,8 @@ node /org/freedesktop/systemd1/unit/session_2d1_2escope { - - - -+ -+ - - - -@@ -9795,6 +9829,8 @@ node /org/freedesktop/systemd1/unit/session_2d1_2escope { - - - -+ -+ - - - -diff --git a/man/systemd.resource-control.xml b/man/systemd.resource-control.xml -index be9c35057db..13ff7e9a740 100644 ---- a/man/systemd.resource-control.xml -+++ b/man/systemd.resource-control.xml -@@ -913,6 +913,38 @@ DeviceAllow=/dev/loop-control - - - -+ -+ -+ ManagedOOMPreference=none|avoid|omit -+ -+ -+ Allows deprioritizing or omitting this unit's cgroup as a candidate when systemd-oomd -+ needs to act. Requires support for extended attributes (see -+ xattr7) -+ in order to use or . Additionally, systemd-oomd -+ will ignore these extended attributes if the unit's cgroup is not owned by the root user and group. -+ -+ If this property is set to , the service manager will set the -+ "user.oomd_avoid" extended attribute on the unit's cgroup to "1". If systemd-oomd sees -+ this extended attribute on a cgroup set to "1" when choosing between candidates, it will only select the -+ cgroup with "user.oomd_avoid" if there are no other viable candidates. -+ -+ If this property is set to , the service manager will set the "user.oomd_omit" -+ extended attribute on the unit's cgroup to "1". If systemd-oomd sees the this extended -+ attribute on the cgroup set to "1", it will ignore the cgroup as a candidate and will not perform any actions -+ on the cgroup. -+ -+ It is recommended to use and sparingly as it can adversely -+ affect systemd-oomd's kill behavior. Also note that these extended attributes are not -+ applied recursively to cgroups under this unit's cgroup. -+ -+ Defaults to which means no extended attributes will be set and systemd-oomd will -+ sort this unit's cgroup as defined in -+ systemd-oomd.service8 -+ and oomd.conf5 (if this -+ unit's cgroup becomes a candidate). -+ -+ - - - diff --git a/18444.patch b/18444.patch deleted file mode 100644 index 7b1b066..0000000 --- a/18444.patch +++ /dev/null @@ -1,987 +0,0 @@ -From a9b1927c15fce3c9945ac249d8e8ddc42028a057 Mon Sep 17 00:00:00 2001 -From: Anita Zhang -Date: Tue, 2 Feb 2021 01:47:08 -0800 -Subject: [PATCH 1/2] parse-util: add permyriad parsing - ---- - src/basic/parse-util.c | 137 ++++++++++++++++++++++++++----------- - src/basic/parse-util.h | 3 + - src/test/test-parse-util.c | 68 ++++++++++++++++++ - 3 files changed, 169 insertions(+), 39 deletions(-) - -diff --git a/src/basic/parse-util.c b/src/basic/parse-util.c -index 5d4dafe3a5..a0fb2c9d17 100644 ---- a/src/basic/parse-util.c -+++ b/src/basic/parse-util.c -@@ -671,11 +671,11 @@ int parse_fractional_part_u(const char **p, size_t digits, unsigned *res) { - return 0; - } - --int parse_percent_unbounded(const char *p) { -+static int parse_parts_value_whole(const char *p, const char *symbol) { - const char *pc, *n; - int r, v; - -- pc = endswith(p, "%"); -+ pc = endswith(p, symbol); - if (!pc) - return -EINVAL; - -@@ -689,6 +689,74 @@ int parse_percent_unbounded(const char *p) { - return v; - } - -+static int parse_parts_value_with_tenths_place(const char *p, const char *symbol) { -+ const char *pc, *dot, *n; -+ int r, q, v; -+ -+ pc = endswith(p, symbol); -+ if (!pc) -+ return -EINVAL; -+ -+ dot = memchr(p, '.', pc - p); -+ if (dot) { -+ if (dot + 2 != pc) -+ return -EINVAL; -+ if (dot[1] < '0' || dot[1] > '9') -+ return -EINVAL; -+ q = dot[1] - '0'; -+ n = strndupa(p, dot - p); -+ } else { -+ q = 0; -+ n = strndupa(p, pc - p); -+ } -+ r = safe_atoi(n, &v); -+ if (r < 0) -+ return r; -+ if (v < 0) -+ return -ERANGE; -+ if (v > (INT_MAX - q) / 10) -+ return -ERANGE; -+ -+ v = v * 10 + q; -+ return v; -+} -+ -+static int parse_parts_value_with_hundredths_place(const char *p, const char *symbol) { -+ const char *pc, *dot, *n; -+ int r, q, v; -+ -+ pc = endswith(p, symbol); -+ if (!pc) -+ return -EINVAL; -+ -+ dot = memchr(p, '.', pc - p); -+ if (dot) { -+ if (dot + 3 != pc) -+ return -EINVAL; -+ if (dot[1] < '0' || dot[1] > '9' || dot[2] < '0' || dot[2] > '9') -+ return -EINVAL; -+ q = (dot[1] - '0') * 10 + (dot[2] - '0'); -+ n = strndupa(p, dot - p); -+ } else { -+ q = 0; -+ n = strndupa(p, pc - p); -+ } -+ r = safe_atoi(n, &v); -+ if (r < 0) -+ return r; -+ if (v < 0) -+ return -ERANGE; -+ if (v > (INT_MAX - q) / 100) -+ return -ERANGE; -+ -+ v = v * 100 + q; -+ return v; -+} -+ -+int parse_percent_unbounded(const char *p) { -+ return parse_parts_value_whole(p, "%"); -+} -+ - int parse_percent(const char *p) { - int v; - -@@ -700,46 +768,13 @@ int parse_percent(const char *p) { - } - - int parse_permille_unbounded(const char *p) { -- const char *pc, *pm, *dot, *n; -- int r, q, v; -+ const char *pm; - - pm = endswith(p, "‰"); -- if (pm) { -- n = strndupa(p, pm - p); -- r = safe_atoi(n, &v); -- if (r < 0) -- return r; -- if (v < 0) -- return -ERANGE; -- } else { -- pc = endswith(p, "%"); -- if (!pc) -- return -EINVAL; -- -- dot = memchr(p, '.', pc - p); -- if (dot) { -- if (dot + 2 != pc) -- return -EINVAL; -- if (dot[1] < '0' || dot[1] > '9') -- return -EINVAL; -- q = dot[1] - '0'; -- n = strndupa(p, dot - p); -- } else { -- q = 0; -- n = strndupa(p, pc - p); -- } -- r = safe_atoi(n, &v); -- if (r < 0) -- return r; -- if (v < 0) -- return -ERANGE; -- if (v > (INT_MAX - q) / 10) -- return -ERANGE; -+ if (pm) -+ return parse_parts_value_whole(p, "‰"); - -- v = v * 10 + q; -- } -- -- return v; -+ return parse_parts_value_with_tenths_place(p, "%"); - } - - int parse_permille(const char *p) { -@@ -752,6 +787,30 @@ int parse_permille(const char *p) { - return v; - } - -+int parse_permyriad_unbounded(const char *p) { -+ const char *pm; -+ -+ pm = endswith(p, "‱"); -+ if (pm) -+ return parse_parts_value_whole(p, "‱"); -+ -+ pm = endswith(p, "‰"); -+ if (pm) -+ return parse_parts_value_with_tenths_place(p, "‰"); -+ -+ return parse_parts_value_with_hundredths_place(p, "%"); -+} -+ -+int parse_permyriad(const char *p) { -+ int v; -+ -+ v = parse_permyriad_unbounded(p); -+ if (v > 10000) -+ return -ERANGE; -+ -+ return v; -+} -+ - int parse_nice(const char *p, int *ret) { - int n, r; - -diff --git a/src/basic/parse-util.h b/src/basic/parse-util.h -index 81478ed059..3e29291f26 100644 ---- a/src/basic/parse-util.h -+++ b/src/basic/parse-util.h -@@ -136,6 +136,9 @@ int parse_percent(const char *p); - int parse_permille_unbounded(const char *p); - int parse_permille(const char *p); - -+int parse_permyriad_unbounded(const char *p); -+int parse_permyriad(const char *p); -+ - int parse_nice(const char *p, int *ret); - - int parse_ip_port(const char *s, uint16_t *ret); -diff --git a/src/test/test-parse-util.c b/src/test/test-parse-util.c -index 1c969091ef..6e23efe134 100644 ---- a/src/test/test-parse-util.c -+++ b/src/test/test-parse-util.c -@@ -790,6 +790,72 @@ static void test_parse_permille_unbounded(void) { - assert_se(parse_permille_unbounded("429496729.6%") == -ERANGE); - } - -+static void test_parse_permyriad(void) { -+ assert_se(parse_permyriad("") == -EINVAL); -+ assert_se(parse_permyriad("foo") == -EINVAL); -+ assert_se(parse_permyriad("0") == -EINVAL); -+ assert_se(parse_permyriad("50") == -EINVAL); -+ assert_se(parse_permyriad("100") == -EINVAL); -+ assert_se(parse_permyriad("-1") == -EINVAL); -+ -+ assert_se(parse_permyriad("0‱") == 0); -+ assert_se(parse_permyriad("555‱") == 555); -+ assert_se(parse_permyriad("1000‱") == 1000); -+ assert_se(parse_permyriad("-7‱") == -ERANGE); -+ assert_se(parse_permyriad("10007‱") == -ERANGE); -+ assert_se(parse_permyriad("‱") == -EINVAL); -+ assert_se(parse_permyriad("‱‱") == -EINVAL); -+ assert_se(parse_permyriad("‱1") == -EINVAL); -+ assert_se(parse_permyriad("1‱‱") == -EINVAL); -+ assert_se(parse_permyriad("3.2‱") == -EINVAL); -+ -+ assert_se(parse_permyriad("0‰") == 0); -+ assert_se(parse_permyriad("555.5‰") == 5555); -+ assert_se(parse_permyriad("1000.0‰") == 10000); -+ assert_se(parse_permyriad("-7‰") == -ERANGE); -+ assert_se(parse_permyriad("1007‰") == -ERANGE); -+ assert_se(parse_permyriad("‰") == -EINVAL); -+ assert_se(parse_permyriad("‰‰") == -EINVAL); -+ assert_se(parse_permyriad("‰1") == -EINVAL); -+ assert_se(parse_permyriad("1‰‰") == -EINVAL); -+ assert_se(parse_permyriad("3.22‰") == -EINVAL); -+ -+ assert_se(parse_permyriad("0%") == 0); -+ assert_se(parse_permyriad("55%") == 5500); -+ assert_se(parse_permyriad("55.53%") == 5553); -+ assert_se(parse_permyriad("100%") == 10000); -+ assert_se(parse_permyriad("-7%") == -ERANGE); -+ assert_se(parse_permyriad("107%") == -ERANGE); -+ assert_se(parse_permyriad("%") == -EINVAL); -+ assert_se(parse_permyriad("%%") == -EINVAL); -+ assert_se(parse_permyriad("%1") == -EINVAL); -+ assert_se(parse_permyriad("1%%") == -EINVAL); -+ assert_se(parse_permyriad("3.212%") == -EINVAL); -+} -+ -+static void test_parse_permyriad_unbounded(void) { -+ assert_se(parse_permyriad_unbounded("1001‱") == 1001); -+ assert_se(parse_permyriad_unbounded("4000‱") == 4000); -+ assert_se(parse_permyriad_unbounded("2147483647‱") == 2147483647); -+ assert_se(parse_permyriad_unbounded("2147483648‱") == -ERANGE); -+ assert_se(parse_permyriad_unbounded("4294967295‱") == -ERANGE); -+ assert_se(parse_permyriad_unbounded("4294967296‱") == -ERANGE); -+ -+ assert_se(parse_permyriad_unbounded("101‰") == 1010); -+ assert_se(parse_permyriad_unbounded("400‰") == 4000); -+ assert_se(parse_permyriad_unbounded("214748364.7‰") == 2147483647); -+ assert_se(parse_permyriad_unbounded("214748364.8‰") == -ERANGE); -+ assert_se(parse_permyriad_unbounded("429496729.5‰") == -ERANGE); -+ assert_se(parse_permyriad_unbounded("429496729.6‰") == -ERANGE); -+ -+ assert_se(parse_permyriad_unbounded("99%") == 9900); -+ assert_se(parse_permyriad_unbounded("40%") == 4000); -+ assert_se(parse_permyriad_unbounded("21474836.47%") == 2147483647); -+ assert_se(parse_permyriad_unbounded("21474836.48%") == -ERANGE); -+ assert_se(parse_permyriad_unbounded("42949672.95%") == -ERANGE); -+ assert_se(parse_permyriad_unbounded("42949672.96%") == -ERANGE); -+} -+ - static void test_parse_nice(void) { - int n; - -@@ -987,6 +1053,8 @@ int main(int argc, char *argv[]) { - test_parse_percent_unbounded(); - test_parse_permille(); - test_parse_permille_unbounded(); -+ test_parse_permyriad(); -+ test_parse_permyriad_unbounded(); - test_parse_nice(); - test_parse_dev(); - test_parse_errno(); --- -2.29.2 - - -From 5fdc5d3384f81888704a0a19db3cb33bce2d8bdb Mon Sep 17 00:00:00 2001 -From: Anita Zhang -Date: Tue, 2 Feb 2021 14:16:03 -0800 -Subject: [PATCH 2/2] oom: rework *MemoryPressureLimit= properties to have - 1/10000 precision - -Requested in -https://github.com/systemd/systemd/pull/15206#discussion_r505506657, -preserve the full granularity for memory pressure limits (permyriad) -instead of capping out at percent. ---- - docs/TRANSIENT-SETTINGS.md | 2 +- - man/oomd.conf.xml | 6 ++--- - man/org.freedesktop.systemd1.xml | 36 +++++++++++++------------- - man/systemd.resource-control.xml | 2 +- - src/core/cgroup.c | 4 +-- - src/core/cgroup.h | 2 +- - src/core/core-varlink.c | 2 +- - src/core/dbus-cgroup.c | 16 +++++++++--- - src/core/dbus-util.c | 29 --------------------- - src/core/dbus-util.h | 1 - - src/core/load-fragment-gperf.gperf.m4 | 2 +- - src/core/load-fragment.c | 6 ++--- - src/oom/oomd-manager.c | 24 +++++++++++------ - src/oom/oomd-manager.h | 4 +-- - src/oom/oomd-util.c | 4 +-- - src/oom/oomd.c | 10 +++---- - src/oom/oomd.conf | 2 +- - src/shared/bus-get-properties.c | 17 ------------ - src/shared/bus-get-properties.h | 1 - - src/shared/bus-unit-util.c | 19 ++++++++++++-- - src/shared/conf-parser.c | 1 + - src/shared/conf-parser.h | 1 + - test/units/testsuite-56-workload.slice | 2 +- - test/units/testsuite-56.sh | 2 +- - 24 files changed, 91 insertions(+), 104 deletions(-) - -diff --git a/docs/TRANSIENT-SETTINGS.md b/docs/TRANSIENT-SETTINGS.md -index 50b9a42fa1..5037060254 100644 ---- a/docs/TRANSIENT-SETTINGS.md -+++ b/docs/TRANSIENT-SETTINGS.md -@@ -272,7 +272,7 @@ All cgroup/resource control settings are available for transient units - ✓ IPAddressDeny= - ✓ ManagedOOMSwap= - ✓ ManagedOOMMemoryPressure= --✓ ManagedOOMMemoryPressureLimitPercent= -+✓ ManagedOOMMemoryPressureLimit= - ``` - - ## Process Killing Settings -diff --git a/man/oomd.conf.xml b/man/oomd.conf.xml -index bb5da87c54..2a12be8cad 100644 ---- a/man/oomd.conf.xml -+++ b/man/oomd.conf.xml -@@ -59,10 +59,10 @@ - - - -- DefaultMemoryPressureLimitPercent= -+ DefaultMemoryPressureLimit= - - Sets the limit for memory pressure on the unit's cgroup before systemd-oomd -- will take action. A unit can override this value with ManagedOOMMemoryPressureLimitPercent=. -+ will take action. A unit can override this value with ManagedOOMMemoryPressureLimit=. - The memory pressure for this property represents the fraction of time in a 10 second window in which all tasks - in the cgroup were delayed. For each monitored cgroup, if the memory pressure on that cgroup exceeds the - limit set for longer than the duration set by DefaultMemoryPressureDurationSec=, -@@ -78,7 +78,7 @@ - - Sets the amount of time a unit's cgroup needs to have exceeded memory pressure limits before - systemd-oomd will take action. Memory pressure limits are defined by -- DefaultMemoryPressureLimitPercent= and ManagedOOMMemoryPressureLimitPercent=. -+ DefaultMemoryPressureLimit= and ManagedOOMMemoryPressureLimit=. - Defaults to 30 seconds when this property is unset or set to 0. - - -diff --git a/man/org.freedesktop.systemd1.xml b/man/org.freedesktop.systemd1.xml -index 78fd0b3378..7809b65062 100644 ---- a/man/org.freedesktop.systemd1.xml -+++ b/man/org.freedesktop.systemd1.xml -@@ -2419,7 +2419,7 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { - @org.freedesktop.DBus.Property.EmitsChangedSignal("false") - readonly s ManagedOOMMemoryPressure = '...'; - @org.freedesktop.DBus.Property.EmitsChangedSignal("false") -- readonly s ManagedOOMMemoryPressureLimitPercent = '...'; -+ readonly u ManagedOOMMemoryPressureLimitPermyriad = ...; - @org.freedesktop.DBus.Property.EmitsChangedSignal("const") - readonly as Environment = ['...', ...]; - @org.freedesktop.DBus.Property.EmitsChangedSignal("const") -@@ -2938,7 +2938,7 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { - - - -- -+ - - - -@@ -3494,7 +3494,7 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { - - - -- -+ - - - -@@ -4146,7 +4146,7 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket { - @org.freedesktop.DBus.Property.EmitsChangedSignal("false") - readonly s ManagedOOMMemoryPressure = '...'; - @org.freedesktop.DBus.Property.EmitsChangedSignal("false") -- readonly s ManagedOOMMemoryPressureLimitPercent = '...'; -+ readonly u ManagedOOMMemoryPressureLimitPermyriad = ...; - @org.freedesktop.DBus.Property.EmitsChangedSignal("const") - readonly as Environment = ['...', ...]; - @org.freedesktop.DBus.Property.EmitsChangedSignal("const") -@@ -4693,7 +4693,7 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket { - - - -- -+ - - - -@@ -5251,7 +5251,7 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket { - - - -- -+ - - - -@@ -5827,7 +5827,7 @@ node /org/freedesktop/systemd1/unit/home_2emount { - @org.freedesktop.DBus.Property.EmitsChangedSignal("false") - readonly s ManagedOOMMemoryPressure = '...'; - @org.freedesktop.DBus.Property.EmitsChangedSignal("false") -- readonly s ManagedOOMMemoryPressureLimitPercent = '...'; -+ readonly u ManagedOOMMemoryPressureLimitPermyriad = ...; - @org.freedesktop.DBus.Property.EmitsChangedSignal("const") - readonly as Environment = ['...', ...]; - @org.freedesktop.DBus.Property.EmitsChangedSignal("const") -@@ -6302,7 +6302,7 @@ node /org/freedesktop/systemd1/unit/home_2emount { - - - -- -+ - - - -@@ -6778,7 +6778,7 @@ node /org/freedesktop/systemd1/unit/home_2emount { - - - -- -+ - - - -@@ -7475,7 +7475,7 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap { - @org.freedesktop.DBus.Property.EmitsChangedSignal("false") - readonly s ManagedOOMMemoryPressure = '...'; - @org.freedesktop.DBus.Property.EmitsChangedSignal("false") -- readonly s ManagedOOMMemoryPressureLimitPercent = '...'; -+ readonly u ManagedOOMMemoryPressureLimitPermyriad = ...; - @org.freedesktop.DBus.Property.EmitsChangedSignal("const") - readonly as Environment = ['...', ...]; - @org.freedesktop.DBus.Property.EmitsChangedSignal("const") -@@ -7936,7 +7936,7 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap { - - - -- -+ - - - -@@ -8398,7 +8398,7 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap { - - - -- -+ - - - -@@ -8948,7 +8948,7 @@ node /org/freedesktop/systemd1/unit/system_2eslice { - @org.freedesktop.DBus.Property.EmitsChangedSignal("false") - readonly s ManagedOOMMemoryPressure = '...'; - @org.freedesktop.DBus.Property.EmitsChangedSignal("false") -- readonly s ManagedOOMMemoryPressureLimitPercent = '...'; -+ readonly u ManagedOOMMemoryPressureLimitPermyriad = ...; - }; - interface org.freedesktop.DBus.Peer { ... }; - interface org.freedesktop.DBus.Introspectable { ... }; -@@ -9083,7 +9083,7 @@ node /org/freedesktop/systemd1/unit/system_2eslice { - - - -- -+ - - - -@@ -9223,7 +9223,7 @@ node /org/freedesktop/systemd1/unit/system_2eslice { - - - -- -+ - - - -@@ -9383,7 +9383,7 @@ node /org/freedesktop/systemd1/unit/session_2d1_2escope { - @org.freedesktop.DBus.Property.EmitsChangedSignal("false") - readonly s ManagedOOMMemoryPressure = '...'; - @org.freedesktop.DBus.Property.EmitsChangedSignal("false") -- readonly s ManagedOOMMemoryPressureLimitPercent = '...'; -+ readonly u ManagedOOMMemoryPressureLimitPermyriad = ...; - @org.freedesktop.DBus.Property.EmitsChangedSignal("const") - readonly s KillMode = '...'; - @org.freedesktop.DBus.Property.EmitsChangedSignal("const") -@@ -9534,7 +9534,7 @@ node /org/freedesktop/systemd1/unit/session_2d1_2escope { - - - -- -+ - - - -@@ -9700,7 +9700,7 @@ node /org/freedesktop/systemd1/unit/session_2d1_2escope { - - - -- -+ - - - -diff --git a/man/systemd.resource-control.xml b/man/systemd.resource-control.xml -index 26dedda3fd..4381c4e1b7 100644 ---- a/man/systemd.resource-control.xml -+++ b/man/systemd.resource-control.xml -@@ -901,7 +901,7 @@ DeviceAllow=/dev/loop-control - - - -- ManagedOOMMemoryPressureLimitPercent= -+ ManagedOOMMemoryPressureLimit= - - - Overrides the default memory pressure limit set by -diff --git a/src/core/cgroup.c b/src/core/cgroup.c -index 7dc6c20bb7..e2ed0e546e 100644 ---- a/src/core/cgroup.c -+++ b/src/core/cgroup.c -@@ -417,7 +417,7 @@ void cgroup_context_dump(Unit *u, FILE* f, const char *prefix) { - "%sDelegate: %s\n" - "%sManagedOOMSwap: %s\n" - "%sManagedOOMMemoryPressure: %s\n" -- "%sManagedOOMMemoryPressureLimitPercent: %d%%\n", -+ "%sManagedOOMMemoryPressureLimit: %" PRIu32 ".%02" PRIu32 "%%\n", - prefix, yes_no(c->cpu_accounting), - prefix, yes_no(c->io_accounting), - prefix, yes_no(c->blockio_accounting), -@@ -450,7 +450,7 @@ void cgroup_context_dump(Unit *u, FILE* f, const char *prefix) { - prefix, yes_no(c->delegate), - prefix, managed_oom_mode_to_string(c->moom_swap), - prefix, managed_oom_mode_to_string(c->moom_mem_pressure), -- prefix, c->moom_mem_pressure_limit); -+ prefix, c->moom_mem_pressure_limit_permyriad / 100, c->moom_mem_pressure_limit_permyriad % 100); - - if (c->delegate) { - _cleanup_free_ char *t = NULL; -diff --git a/src/core/cgroup.h b/src/core/cgroup.h -index 66f3a63b82..9fbfabbb7e 100644 ---- a/src/core/cgroup.h -+++ b/src/core/cgroup.h -@@ -163,7 +163,7 @@ struct CGroupContext { - /* Settings for systemd-oomd */ - ManagedOOMMode moom_swap; - ManagedOOMMode moom_mem_pressure; -- int moom_mem_pressure_limit; -+ uint32_t moom_mem_pressure_limit_permyriad; - }; - - /* Used when querying IP accounting data */ -diff --git a/src/core/core-varlink.c b/src/core/core-varlink.c -index dd6c11ab4d..17fb9bc83f 100644 ---- a/src/core/core-varlink.c -+++ b/src/core/core-varlink.c -@@ -83,7 +83,7 @@ static int build_managed_oom_json_array_element(Unit *u, const char *property, J - JSON_BUILD_PAIR("mode", JSON_BUILD_STRING(mode)), - JSON_BUILD_PAIR("path", JSON_BUILD_STRING(u->cgroup_path)), - JSON_BUILD_PAIR("property", JSON_BUILD_STRING(property)), -- JSON_BUILD_PAIR_CONDITION(use_limit, "limit", JSON_BUILD_UNSIGNED(c->moom_mem_pressure_limit)))); -+ JSON_BUILD_PAIR_CONDITION(use_limit, "limit", JSON_BUILD_UNSIGNED(c->moom_mem_pressure_limit_permyriad)))); - } - - int manager_varlink_send_managed_oom_update(Unit *u) { -diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c -index 37c581fb22..df35ec114d 100644 ---- a/src/core/dbus-cgroup.c -+++ b/src/core/dbus-cgroup.c -@@ -395,7 +395,7 @@ const sd_bus_vtable bus_cgroup_vtable[] = { - SD_BUS_PROPERTY("DisableControllers", "as", property_get_cgroup_mask, offsetof(CGroupContext, disable_controllers), 0), - SD_BUS_PROPERTY("ManagedOOMSwap", "s", property_get_managed_oom_mode, offsetof(CGroupContext, moom_swap), 0), - SD_BUS_PROPERTY("ManagedOOMMemoryPressure", "s", property_get_managed_oom_mode, offsetof(CGroupContext, moom_mem_pressure), 0), -- SD_BUS_PROPERTY("ManagedOOMMemoryPressureLimitPercent", "s", bus_property_get_percent, offsetof(CGroupContext, moom_mem_pressure_limit), 0), -+ SD_BUS_PROPERTY("ManagedOOMMemoryPressureLimitPermyriad", "u", NULL, offsetof(CGroupContext, moom_mem_pressure_limit_permyriad), 0), - SD_BUS_VTABLE_END - }; - -@@ -1697,14 +1697,24 @@ int bus_cgroup_set_property( - return 1; - } - -- if (streq(name, "ManagedOOMMemoryPressureLimitPercent")) { -+ if (streq(name, "ManagedOOMMemoryPressureLimitPermyriad")) { -+ uint32_t v; -+ - if (!UNIT_VTABLE(u)->can_set_managed_oom) - return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Cannot set %s for this unit type", name); - -- r = bus_set_transient_percent(u, name, &c->moom_mem_pressure_limit, message, flags, error); -+ r = sd_bus_message_read(message, "u", &v); - if (r < 0) - return r; - -+ if (v > 10000) -+ return -ERANGE; -+ -+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) { -+ c->moom_mem_pressure_limit_permyriad = v; -+ unit_write_settingf(u, flags, name, "ManagedOOMMemoryPressureLimit=%" PRIu32 ".%02" PRIu32 "%%", v / 100, v % 100); -+ } -+ - if (c->moom_mem_pressure == MANAGED_OOM_KILL) - (void) manager_varlink_send_managed_oom_update(u); - -diff --git a/src/core/dbus-util.c b/src/core/dbus-util.c -index d6223db305..eb03d30cf7 100644 ---- a/src/core/dbus-util.c -+++ b/src/core/dbus-util.c -@@ -91,35 +91,6 @@ int bus_set_transient_bool( - return 1; - } - --int bus_set_transient_percent( -- Unit *u, -- const char *name, -- int *p, -- sd_bus_message *message, -- UnitWriteFlags flags, -- sd_bus_error *error) { -- -- const char *v; -- int r; -- -- assert(p); -- -- r = sd_bus_message_read(message, "s", &v); -- if (r < 0) -- return r; -- -- r = parse_percent(v); -- if (r < 0) -- return r; -- -- if (!UNIT_WRITE_FLAGS_NOOP(flags)) { -- *p = r; -- unit_write_settingf(u, flags, name, "%s=%d%%", name, r); -- } -- -- return 1; --} -- - int bus_set_transient_usec_internal( - Unit *u, - const char *name, -diff --git a/src/core/dbus-util.h b/src/core/dbus-util.h -index 4e7c68e843..b68ec38ada 100644 ---- a/src/core/dbus-util.h -+++ b/src/core/dbus-util.h -@@ -240,7 +240,6 @@ int bus_set_transient_user_relaxed(Unit *u, const char *name, char **p, sd_bus_m - int bus_set_transient_path(Unit *u, const char *name, char **p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error); - int bus_set_transient_string(Unit *u, const char *name, char **p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error); - int bus_set_transient_bool(Unit *u, const char *name, bool *p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error); --int bus_set_transient_percent(Unit *u, const char *name, int *p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error); - int bus_set_transient_usec_internal(Unit *u, const char *name, usec_t *p, bool fix_0, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error); - static inline int bus_set_transient_usec(Unit *u, const char *name, usec_t *p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error) { - return bus_set_transient_usec_internal(u, name, p, false, message, flags, error); -diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4 -index 946862c398..db2a4e28a8 100644 ---- a/src/core/load-fragment-gperf.gperf.m4 -+++ b/src/core/load-fragment-gperf.gperf.m4 -@@ -226,7 +226,7 @@ $1.IPIngressFilterPath, config_parse_ip_filter_bpf_progs, - $1.IPEgressFilterPath, config_parse_ip_filter_bpf_progs, 0, offsetof($1, cgroup_context.ip_filters_egress) - $1.ManagedOOMSwap, config_parse_managed_oom_mode, 0, offsetof($1, cgroup_context.moom_swap) - $1.ManagedOOMMemoryPressure, config_parse_managed_oom_mode, 0, offsetof($1, cgroup_context.moom_mem_pressure) --$1.ManagedOOMMemoryPressureLimitPercent, config_parse_managed_oom_mem_pressure_limit, 0, offsetof($1, cgroup_context.moom_mem_pressure_limit) -+$1.ManagedOOMMemoryPressureLimit, config_parse_managed_oom_mem_pressure_limit, 0, offsetof($1, cgroup_context.moom_mem_pressure_limit_permyriad) - $1.NetClass, config_parse_warn_compat, DISABLED_LEGACY, 0' - )m4_dnl - Unit.Description, config_parse_unit_string_printf, 0, offsetof(Unit, description) -diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c -index 4964249bf2..e0e9920e06 100644 ---- a/src/core/load-fragment.c -+++ b/src/core/load-fragment.c -@@ -3859,7 +3859,7 @@ int config_parse_managed_oom_mem_pressure_limit( - const char *rvalue, - void *data, - void *userdata) { -- int *limit = data; -+ uint32_t *limit = data; - UnitType t; - int r; - -@@ -3874,9 +3874,9 @@ int config_parse_managed_oom_mem_pressure_limit( - return 0; - } - -- r = parse_percent(rvalue); -+ r = parse_permyriad(rvalue); - if (r < 0) { -- log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse limit percent value, ignoring: %s", rvalue); -+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse memory pressure limit value, ignoring: %s", rvalue); - return 0; - } - -diff --git a/src/oom/oomd-manager.c b/src/oom/oomd-manager.c -index 3efa629002..338935b3ec 100644 ---- a/src/oom/oomd-manager.c -+++ b/src/oom/oomd-manager.c -@@ -100,10 +100,10 @@ static int process_managed_oom_reply( - limit = m->default_mem_pressure_limit; - - if (streq(reply.property, "ManagedOOMMemoryPressure")) { -- if (reply.limit > 100) -+ if (reply.limit > 10000) - continue; - else if (reply.limit != 0) { -- ret = store_loadavg_fixed_point((unsigned long) reply.limit, 0, &limit); -+ ret = store_loadavg_fixed_point((unsigned long) reply.limit / 100, (unsigned long) reply.limit % 100, &limit); - if (ret < 0) - continue; - } -@@ -478,8 +478,8 @@ static int manager_connect_bus(Manager *m) { - return 0; - } - --int manager_start(Manager *m, bool dry_run, int swap_used_limit, int mem_pressure_limit, usec_t mem_pressure_usec) { -- unsigned long l; -+int manager_start(Manager *m, bool dry_run, int swap_used_limit, int mem_pressure_limit_permyriad, usec_t mem_pressure_usec) { -+ unsigned long l, f; - int r; - - assert(m); -@@ -489,8 +489,16 @@ int manager_start(Manager *m, bool dry_run, int swap_used_limit, int mem_pressur - m->swap_used_limit = swap_used_limit != -1 ? swap_used_limit : DEFAULT_SWAP_USED_LIMIT; - assert(m->swap_used_limit <= 100); - -- l = mem_pressure_limit != -1 ? mem_pressure_limit : DEFAULT_MEM_PRESSURE_LIMIT; -- r = store_loadavg_fixed_point(l, 0, &m->default_mem_pressure_limit); -+ if (mem_pressure_limit_permyriad != -1) { -+ assert(mem_pressure_limit_permyriad <= 10000); -+ -+ l = mem_pressure_limit_permyriad / 100; -+ f = mem_pressure_limit_permyriad % 100; -+ } else { -+ l = DEFAULT_MEM_PRESSURE_LIMIT_PERCENT; -+ f = 0; -+ } -+ r = store_loadavg_fixed_point(l, f, &m->default_mem_pressure_limit); - if (r < 0) - return r; - -@@ -530,12 +538,12 @@ int manager_get_dump_string(Manager *m, char **ret) { - fprintf(f, - "Dry Run: %s\n" - "Swap Used Limit: %u%%\n" -- "Default Memory Pressure Limit: %lu%%\n" -+ "Default Memory Pressure Limit: %lu.%02lu%%\n" - "Default Memory Pressure Duration: %s\n" - "System Context:\n", - yes_no(m->dry_run), - m->swap_used_limit, -- LOAD_INT(m->default_mem_pressure_limit), -+ LOAD_INT(m->default_mem_pressure_limit), LOAD_FRAC(m->default_mem_pressure_limit), - format_timespan(buf, sizeof(buf), m->default_mem_pressure_duration_usec, USEC_PER_SEC)); - oomd_dump_system_context(&m->system_context, f, "\t"); - -diff --git a/src/oom/oomd-manager.h b/src/oom/oomd-manager.h -index ee17abced2..521665e0a8 100644 ---- a/src/oom/oomd-manager.h -+++ b/src/oom/oomd-manager.h -@@ -17,7 +17,7 @@ - * Generally 60 or higher might be acceptable for something like system.slice with no memory.high set; processes in - * system.slice are assumed to be less latency sensitive. */ - #define DEFAULT_MEM_PRESSURE_DURATION_USEC (30 * USEC_PER_SEC) --#define DEFAULT_MEM_PRESSURE_LIMIT 60 -+#define DEFAULT_MEM_PRESSURE_LIMIT_PERCENT 60 - #define DEFAULT_SWAP_USED_LIMIT 90 - - #define RECLAIM_DURATION_USEC (30 * USEC_PER_SEC) -@@ -56,7 +56,7 @@ DEFINE_TRIVIAL_CLEANUP_FUNC(Manager*, manager_free); - - int manager_new(Manager **ret); - --int manager_start(Manager *m, bool dry_run, int swap_used_limit, int mem_pressure_limit, usec_t mem_pressure_usec); -+int manager_start(Manager *m, bool dry_run, int swap_used_limit, int mem_pressure_limit_permyriad, usec_t mem_pressure_usec); - - int manager_get_dump_string(Manager *m, char **ret); - -diff --git a/src/oom/oomd-util.c b/src/oom/oomd-util.c -index cec656f6fa..fcccddb92e 100644 ---- a/src/oom/oomd-util.c -+++ b/src/oom/oomd-util.c -@@ -415,11 +415,11 @@ void oomd_dump_memory_pressure_cgroup_context(const OomdCGroupContext *ctx, FILE - - fprintf(f, - "%sPath: %s\n" -- "%s\tMemory Pressure Limit: %lu%%\n" -+ "%s\tMemory Pressure Limit: %lu.%02lu%%\n" - "%s\tPressure: Avg10: %lu.%02lu Avg60: %lu.%02lu Avg300: %lu.%02lu Total: %s\n" - "%s\tCurrent Memory Usage: %s\n", - strempty(prefix), ctx->path, -- strempty(prefix), LOAD_INT(ctx->mem_pressure_limit), -+ strempty(prefix), LOAD_INT(ctx->mem_pressure_limit), LOAD_FRAC(ctx->mem_pressure_limit), - strempty(prefix), - LOAD_INT(ctx->memory_pressure.avg10), LOAD_FRAC(ctx->memory_pressure.avg10), - LOAD_INT(ctx->memory_pressure.avg60), LOAD_FRAC(ctx->memory_pressure.avg60), -diff --git a/src/oom/oomd.c b/src/oom/oomd.c -index 1fbcf41492..811d211b58 100644 ---- a/src/oom/oomd.c -+++ b/src/oom/oomd.c -@@ -18,14 +18,14 @@ - - static bool arg_dry_run = false; - static int arg_swap_used_limit = -1; --static int arg_mem_pressure_limit = -1; -+static int arg_mem_pressure_limit_permyriad = -1; - static usec_t arg_mem_pressure_usec = 0; - - static int parse_config(void) { - static const ConfigTableItem items[] = { -- { "OOM", "SwapUsedLimitPercent", config_parse_percent, 0, &arg_swap_used_limit }, -- { "OOM", "DefaultMemoryPressureLimitPercent", config_parse_percent, 0, &arg_mem_pressure_limit }, -- { "OOM", "DefaultMemoryPressureDurationSec", config_parse_sec, 0, &arg_mem_pressure_usec }, -+ { "OOM", "SwapUsedLimitPercent", config_parse_percent, 0, &arg_swap_used_limit }, -+ { "OOM", "DefaultMemoryPressureLimit", config_parse_permyriad, 0, &arg_mem_pressure_limit_permyriad }, -+ { "OOM", "DefaultMemoryPressureDurationSec", config_parse_sec, 0, &arg_mem_pressure_usec }, - {} - }; - -@@ -160,7 +160,7 @@ static int run(int argc, char *argv[]) { - if (r < 0) - return log_error_errno(r, "Failed to create manager: %m"); - -- r = manager_start(m, arg_dry_run, arg_swap_used_limit, arg_mem_pressure_limit, arg_mem_pressure_usec); -+ r = manager_start(m, arg_dry_run, arg_swap_used_limit, arg_mem_pressure_limit_permyriad, arg_mem_pressure_usec); - if (r < 0) - return log_error_errno(r, "Failed to start up daemon: %m"); - -diff --git a/src/oom/oomd.conf b/src/oom/oomd.conf -index 766cb1717f..bd6a9391c6 100644 ---- a/src/oom/oomd.conf -+++ b/src/oom/oomd.conf -@@ -13,5 +13,5 @@ - - [OOM] - #SwapUsedLimitPercent=90% --#DefaultMemoryPressureLimitPercent=60% -+#DefaultMemoryPressureLimit=60% - #DefaultMemoryPressureDurationSec=30s -diff --git a/src/shared/bus-get-properties.c b/src/shared/bus-get-properties.c -index 32f68d5e6a..a5ce7ef17f 100644 ---- a/src/shared/bus-get-properties.c -+++ b/src/shared/bus-get-properties.c -@@ -55,23 +55,6 @@ int bus_property_get_id128( - return sd_bus_message_append_array(reply, 'y', id->bytes, 16); - } - --int bus_property_get_percent( -- sd_bus *bus, -- const char *path, -- const char *interface, -- const char *property, -- sd_bus_message *reply, -- void *userdata, -- sd_bus_error *error) { -- -- char pstr[DECIMAL_STR_MAX(int) + 2]; -- int p = *(int*) userdata; -- -- xsprintf(pstr, "%d%%", p); -- -- return sd_bus_message_append_basic(reply, 's', pstr); --} -- - #if __SIZEOF_SIZE_T__ != 8 - int bus_property_get_size( - sd_bus *bus, -diff --git a/src/shared/bus-get-properties.h b/src/shared/bus-get-properties.h -index 9832c0d067..26f3e8588c 100644 ---- a/src/shared/bus-get-properties.h -+++ b/src/shared/bus-get-properties.h -@@ -8,7 +8,6 @@ - int bus_property_get_bool(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error); - int bus_property_set_bool(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *value, void *userdata, sd_bus_error *error); - int bus_property_get_id128(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error); --int bus_property_get_percent(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error); - - #define bus_property_get_usec ((sd_bus_property_get_t) NULL) - #define bus_property_set_usec ((sd_bus_property_set_t) NULL) -diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c -index 2bab2299fb..f96059c699 100644 ---- a/src/shared/bus-unit-util.c -+++ b/src/shared/bus-unit-util.c -@@ -435,10 +435,25 @@ static int bus_append_cgroup_property(sd_bus_message *m, const char *field, cons - if (STR_IN_SET(field, "DevicePolicy", - "Slice", - "ManagedOOMSwap", -- "ManagedOOMMemoryPressure", -- "ManagedOOMMemoryPressureLimitPercent")) -+ "ManagedOOMMemoryPressure")) - return bus_append_string(m, field, eq); - -+ if (STR_IN_SET(field, "ManagedOOMMemoryPressureLimit")) { -+ char *n; -+ -+ r = parse_permyriad(eq); -+ if (r < 0) -+ return log_error_errno(r, "Failed to parse %s value: %s", field, eq); -+ -+ n = strjoina(field, "Permyriad"); -+ -+ r = sd_bus_message_append(m, "(sv)", n, "u", (uint32_t) r); -+ if (r < 0) -+ return bus_log_create_error(r); -+ -+ return 1; -+ } -+ - if (STR_IN_SET(field, "CPUAccounting", - "MemoryAccounting", - "IOAccounting", -diff --git a/src/shared/conf-parser.c b/src/shared/conf-parser.c -index 35d301d9db..c8c253d603 100644 ---- a/src/shared/conf-parser.c -+++ b/src/shared/conf-parser.c -@@ -1245,3 +1245,4 @@ int config_parse_vlanprotocol(const char* unit, - } - - DEFINE_CONFIG_PARSE(config_parse_percent, parse_percent, "Failed to parse percent value"); -+DEFINE_CONFIG_PARSE(config_parse_permyriad, parse_permyriad, "Failed to parse permyriad value"); -diff --git a/src/shared/conf-parser.h b/src/shared/conf-parser.h -index f115cb23af..988d81e43a 100644 ---- a/src/shared/conf-parser.h -+++ b/src/shared/conf-parser.h -@@ -148,6 +148,7 @@ CONFIG_PARSER_PROTOTYPE(config_parse_mtu); - CONFIG_PARSER_PROTOTYPE(config_parse_rlimit); - CONFIG_PARSER_PROTOTYPE(config_parse_vlanprotocol); - CONFIG_PARSER_PROTOTYPE(config_parse_percent); -+CONFIG_PARSER_PROTOTYPE(config_parse_permyriad); - - typedef enum Disabled { - DISABLED_CONFIGURATION, -diff --git a/test/units/testsuite-56-workload.slice b/test/units/testsuite-56-workload.slice -index 45b04914c6..8c32b28094 100644 ---- a/test/units/testsuite-56-workload.slice -+++ b/test/units/testsuite-56-workload.slice -@@ -7,4 +7,4 @@ MemoryAccounting=true - IOAccounting=true - TasksAccounting=true - ManagedOOMMemoryPressure=kill --ManagedOOMMemoryPressureLimitPercent=1% -+ManagedOOMMemoryPressureLimit=1% -diff --git a/test/units/testsuite-56.sh b/test/units/testsuite-56.sh -index 4dc9d8c7a8..8b01fe37ed 100755 ---- a/test/units/testsuite-56.sh -+++ b/test/units/testsuite-56.sh -@@ -20,7 +20,7 @@ systemctl start testsuite-56-testbloat.service - - # Verify systemd-oomd is monitoring the expected units - oomctl | grep "/testsuite-56-workload.slice" --oomctl | grep "1%" -+oomctl | grep "1.00%" - oomctl | grep "Default Memory Pressure Duration: 5s" - - # systemd-oomd watches for elevated pressure for 30 seconds before acting. --- -2.29.2 - diff --git a/95ca39f04efa278ac93881e6e364a6ae520b03e7.patch b/95ca39f04efa278ac93881e6e364a6ae520b03e7.patch deleted file mode 100644 index 478902a..0000000 --- a/95ca39f04efa278ac93881e6e364a6ae520b03e7.patch +++ /dev/null @@ -1,40 +0,0 @@ -From 95ca39f04efa278ac93881e6e364a6ae520b03e7 Mon Sep 17 00:00:00 2001 -From: Yu Watanabe -Date: Fri, 27 Nov 2020 08:29:20 +0900 -Subject: [PATCH] oom: use CMP() macro - ---- - src/oom/oomd-util.h | 14 ++------------ - 1 file changed, 2 insertions(+), 12 deletions(-) - -diff --git a/src/oom/oomd-util.h b/src/oom/oomd-util.h -index 87ecda80fbc..0834cbf09d7 100644 ---- a/src/oom/oomd-util.h -+++ b/src/oom/oomd-util.h -@@ -64,24 +64,14 @@ static inline int compare_pgscan(OomdCGroupContext * const *c1, OomdCGroupContex - assert(c1); - assert(c2); - -- if ((*c1)->pgscan > (*c2)->pgscan) -- return -1; -- else if ((*c1)->pgscan < (*c2)->pgscan) -- return 1; -- else -- return 0; -+ return CMP((*c2)->pgscan, (*c1)->pgscan); - } - - static inline int compare_swap_usage(OomdCGroupContext * const *c1, OomdCGroupContext * const *c2) { - assert(c1); - assert(c2); - -- if ((*c1)->swap_usage > (*c2)->swap_usage) -- return -1; -- else if ((*c1)->swap_usage < (*c2)->swap_usage) -- return 1; -- else -- return 0; -+ return CMP((*c2)->swap_usage, (*c1)->swap_usage); - } - - /* Get an array of OomdCGroupContexts from `h`, qsorted from largest to smallest values according to `compare_func`. diff --git a/systemd.spec b/systemd.spec index 848de06..f5a6587 100644 --- a/systemd.spec +++ b/systemd.spec @@ -1,7 +1,7 @@ #global commit c4b843473a75fb38ed5bf54e9d3cfb1cb3719efa %{?commit:%global shortcommit %(c=%{commit}; echo ${c:0:7})} -%global stable 1 +#global stable 1 # We ship a .pc file but don't want to have a dep on pkg-config. We # strip the automatically generated dep here and instead co-own the @@ -20,8 +20,8 @@ Name: systemd Url: https://www.freedesktop.org/wiki/Software/systemd -Version: 247.3 -Release: 3%{?dist} +Version: 248~rc1 +Release: 1%{?dist} # For a breakdown of the licensing, see README License: LGPLv2+ and MIT and GPLv2+ Summary: System and Service Manager @@ -70,14 +70,6 @@ i=1; for j in 00*patch; do printf "Patch%04d: %s\n" $i $j; i=$((i+1));done| GIT_DIR=../../src/systemd/.git git diffab -M v233..master@{2017-06-15} -- hwdb/[67]* hwdb/parse_hwdb.py > hwdb.patch %endif -# Backports of patches from upstream (0000–0499) -# systemd-oomd refinements for https://fedoraproject.org/wiki/Changes/EnableSystemdOomd -Patch0000: https://github.com/systemd/systemd/pull/17829.patch -Patch0001: https://github.com/systemd/systemd/pull/18361.patch -Patch0002: https://github.com/systemd/systemd/pull/18444.patch -Patch0003: https://github.com/systemd/systemd/pull/17732/commits/95ca39f04efa278ac93881e6e364a6ae520b03e7.patch -Patch0004: https://github.com/systemd/systemd/pull/18401.patch - # Downstream-only patches (5000–9999) # https://bugzilla.redhat.com/show_bug.cgi?id=1738828 Patch0500: use-bfq-scheduler.patch @@ -928,6 +920,14 @@ getent passwd systemd-network &>/dev/null || useradd -r -u 192 -l -g systemd-net %files standalone-sysusers -f .file-list-standalone-sysusers %changelog +* Tue Feb 23 2021 Zbigniew Jędrzejewski-Szmek - 248~rc1-1 +- Latest upstream prerelease, see + https://github.com/systemd/systemd/blob/v248-rc1/NEWS. +- Fixes #1614751 by only restarting services at the end of transcation. + Various packages need to be rebuilt to have the updated macros. +- Fixes #1879028, though probably not completely. +- Fixes #1925805, #1928235. + * Wed Feb 17 2021 Michel Alexandre Salim - 247.3-3 - Increase oomd user memory pressure limit to 10% (#1929856)