From 9adc5d825e13e7979441c5d7790527974d99f496 Mon Sep 17 00:00:00 2001 From: Anita Zhang Date: Jun 10 2021 18:14:08 +0000 Subject: 248.2-1.4: Backport PR #19811 to fix issues with mount sd-event rate limiting --- diff --git a/SOURCES/19811.patch b/SOURCES/19811.patch new file mode 100644 index 0000000..fb7c0e2 --- /dev/null +++ b/SOURCES/19811.patch @@ -0,0 +1,220 @@ +From 0c81900965a72b29eb76e0737ed899b925ee75b6 Mon Sep 17 00:00:00 2001 +From: Anita Zhang +Date: Tue, 8 Jun 2021 00:04:35 -0700 +Subject: [PATCH 1/2] test: add extended test for triggering mount rate limit + +It's hard to trigger the failure to exit the rate limit state in +isolation as it needs multiple event sources in order to show that it +gets stuck in the queue. Hence why this is an extended test. +--- + test/TEST-60-MOUNT-RATELIMIT/Makefile | 1 + + test/TEST-60-MOUNT-RATELIMIT/test.sh | 7 +++ + test/units/testsuite-60.service | 6 +++ + test/units/testsuite-60.sh | 73 +++++++++++++++++++++++++++ + 4 files changed, 87 insertions(+) + create mode 120000 test/TEST-60-MOUNT-RATELIMIT/Makefile + create mode 100755 test/TEST-60-MOUNT-RATELIMIT/test.sh + create mode 100644 test/units/testsuite-60.service + create mode 100755 test/units/testsuite-60.sh + +diff --git a/test/TEST-60-MOUNT-RATELIMIT/Makefile b/test/TEST-60-MOUNT-RATELIMIT/Makefile +new file mode 120000 +index 000000000000..e9f93b1104cd +--- /dev/null ++++ b/test/TEST-60-MOUNT-RATELIMIT/Makefile +@@ -0,0 +1 @@ ++../TEST-01-BASIC/Makefile +\ No newline at end of file +diff --git a/test/TEST-60-MOUNT-RATELIMIT/test.sh b/test/TEST-60-MOUNT-RATELIMIT/test.sh +new file mode 100755 +index 000000000000..f9eb11ccb441 +--- /dev/null ++++ b/test/TEST-60-MOUNT-RATELIMIT/test.sh +@@ -0,0 +1,7 @@ ++#!/usr/bin/env bash ++set -e ++TEST_DESCRIPTION="Test that mount/unmount storms can enter/exit rate limit state and will not leak units" ++ ++. $TEST_BASE_DIR/test-functions ++ ++do_test "$@" +diff --git a/test/units/testsuite-60.service b/test/units/testsuite-60.service +new file mode 100644 +index 000000000000..3715c4f341c8 +--- /dev/null ++++ b/test/units/testsuite-60.service +@@ -0,0 +1,6 @@ ++[Unit] ++Description=TEST-60-MOUNT-RATELIMIT ++ ++[Service] ++Type=oneshot ++ExecStart=/usr/lib/systemd/tests/testdata/units/%N.sh +diff --git a/test/units/testsuite-60.sh b/test/units/testsuite-60.sh +new file mode 100755 +index 000000000000..815875466762 +--- /dev/null ++++ b/test/units/testsuite-60.sh +@@ -0,0 +1,73 @@ ++#!/usr/bin/env bash ++set -eux ++set -o pipefail ++ ++systemd-analyze log-level debug ++systemd-analyze log-target journal ++ ++NUM_DIRS=20 ++ ++# mount/unmount enough times to trigger the /proc/self/mountinfo parsing rate limiting ++ ++for ((i = 0; i < NUM_DIRS; i++)); do ++ mkdir "/tmp/meow${i}" ++done ++ ++for ((i = 0; i < NUM_DIRS; i++)); do ++ mount -t tmpfs tmpfs "/tmp/meow${i}" ++done ++ ++systemctl daemon-reload ++systemctl list-units -t mount tmp-meow* | grep -q tmp-meow ++ ++for ((i = 0; i < NUM_DIRS; i++)); do ++ umount "/tmp/meow${i}" ++done ++ ++# figure out if we have entered the rate limit state ++ ++exited_rl=0 ++timeout="$(date -ud "2 minutes" +%s)" ++while [[ $(date -u +%s) -le ${timeout} ]]; do ++ if journalctl -u init.scope | grep -q "(mount-monitor-dispatch) entered rate limit"; then ++ entered_rl=1 ++ break ++ fi ++ sleep 5 ++done ++ ++# if the infra is slow we might not enter the rate limit state; in that case skip the exit check ++ ++if [ "${entered_rl}" = "1" ]; then ++ exited_rl=0 ++ timeout="$(date -ud "2 minutes" +%s)" ++ while [[ $(date -u +%s) -le ${timeout} ]]; do ++ if journalctl -u init.scope | grep -q "(mount-monitor-dispatch) left rate limit"; then ++ exited_rl=1 ++ break ++ fi ++ sleep 5 ++ done ++ ++ if [ "${exited_rl}" = "0" ]; then ++ exit 24 ++ fi ++fi ++ ++# give some time for units to settle so we don't race between exiting the rate limit state and cleaning up the units ++ ++sleep 60 ++systemctl daemon-reload ++sleep 60 ++ ++# verify that the mount units are always cleaned up at the end ++ ++if systemctl list-units -t mount tmp-meow* | grep -q tmp-meow; then ++ exit 42 ++fi ++ ++systemd-analyze log-level info ++ ++echo OK >/testok ++ ++exit 0 + +From 81107b8419c39f726fd2805517a5b9faab204e59 Mon Sep 17 00:00:00 2001 +From: Lennart Poettering +Date: Tue, 8 Jun 2021 00:07:51 -0700 +Subject: [PATCH 2/2] sd-event: change ordering of pending/ratelimited events + +Instead of ordering non-pending before pending we should order +"non-pending OR ratelimited" before "pending AND not-ratelimited". +This fixes a bug where ratelimited events were ordered at the end of the +priority queue and could be stuck there for an indeterminate amount of +time. +--- + src/libsystemd/sd-event/sd-event.c | 45 ++++++++++++++---------------- + 1 file changed, 21 insertions(+), 24 deletions(-) + +diff --git a/src/libsystemd/sd-event/sd-event.c b/src/libsystemd/sd-event/sd-event.c +index 611af45293ac..489878a3e967 100644 +--- a/src/libsystemd/sd-event/sd-event.c ++++ b/src/libsystemd/sd-event/sd-event.c +@@ -237,25 +237,6 @@ static usec_t time_event_source_next(const sd_event_source *s) { + return USEC_INFINITY; + } + +-static int earliest_time_prioq_compare(const void *a, const void *b) { +- const sd_event_source *x = a, *y = b; +- +- /* Enabled ones first */ +- if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF) +- return -1; +- if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF) +- return 1; +- +- /* Move the pending ones to the end */ +- if (!x->pending && y->pending) +- return -1; +- if (x->pending && !y->pending) +- return 1; +- +- /* Order by time */ +- return CMP(time_event_source_next(x), time_event_source_next(y)); +-} +- + static usec_t time_event_source_latest(const sd_event_source *s) { + assert(s); + +@@ -274,7 +255,15 @@ static usec_t time_event_source_latest(const sd_event_source *s) { + return USEC_INFINITY; + } + +-static int latest_time_prioq_compare(const void *a, const void *b) { ++static bool event_source_timer_candidate(const sd_event_source *s) { ++ assert(s); ++ ++ /* Returns true for event sources that either are not pending yet (i.e. where it's worth to mark them pending) ++ * or which are currently ratelimited (i.e. where it's worth leaving the ratelimited state) */ ++ return !s->pending || s->ratelimited; ++} ++ ++static int time_prioq_compare(const void *a, const void *b, usec_t (*time_func)(const sd_event_source *s)) { + const sd_event_source *x = a, *y = b; + + /* Enabled ones first */ +@@ -283,14 +272,22 @@ static int latest_time_prioq_compare(const void *a, const void *b) { + if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF) + return 1; + +- /* Move the pending ones to the end */ +- if (!x->pending && y->pending) ++ /* Order "non-pending OR ratelimited" before "pending AND not-ratelimited" */ ++ if (event_source_timer_candidate(x) && !event_source_timer_candidate(y)) + return -1; +- if (x->pending && !y->pending) ++ if (!event_source_timer_candidate(x) && event_source_timer_candidate(y)) + return 1; + + /* Order by time */ +- return CMP(time_event_source_latest(x), time_event_source_latest(y)); ++ return CMP(time_func(x), time_func(y)); ++} ++ ++static int earliest_time_prioq_compare(const void *a, const void *b) { ++ return time_prioq_compare(a, b, time_event_source_next); ++} ++ ++static int latest_time_prioq_compare(const void *a, const void *b) { ++ return time_prioq_compare(a, b, time_event_source_latest); + } + + static int exit_prioq_compare(const void *a, const void *b) { diff --git a/SPECS/systemd.spec b/SPECS/systemd.spec index eb61505..3d89b31 100644 --- a/SPECS/systemd.spec +++ b/SPECS/systemd.spec @@ -26,7 +26,7 @@ Name: systemd Url: https://www.freedesktop.org/wiki/Software/systemd Version: 248.2 -Release: 1.3%{?dist} +Release: 1.4%{?dist} # For a breakdown of the licensing, see README License: LGPLv2+ and MIT and GPLv2+ Summary: System and Service Manager @@ -94,6 +94,7 @@ Patch0102: 17495-rebased.patch # PR 18621: Quiet "proc: Bad value for 'hidepid'" messages Patch0101: https://github.com/systemd/systemd/pull/18621.patch %endif +Patch0103: https://github.com/systemd/systemd/pull/19811.patch # Downstream-only patches (0500–9999) @@ -959,6 +960,9 @@ fi %endif %changelog +* Thu Jun 10 2021 Anita Zhang - 248.2-1.4 +- Backport PR #19811 to fix issues with mount sd-event rate limiting + * Wed May 19 2021 Davide Cavalca - 248.2-1.3 - Add BuildRequires for python-jinja2 in preparation for 249 (see PR#19630)