mrc0mmand / rpms / lvm2

Forked from rpms/lvm2 2 years ago
Clone

Blame SOURCES/lvm2-2_02_104-workaround-vg-refresh-during-autoactivation-by-retrying-the-refresh.patch

191583
 WHATS_NEW      |  1 +
191583
 tools/pvscan.c | 34 +++++++++++++++++++++++++++++++++-
191583
 2 files changed, 34 insertions(+), 1 deletion(-)
191583
191583
diff --git a/WHATS_NEW b/WHATS_NEW
191583
index aabfc78..7fd107a 100644
191583
--- a/WHATS_NEW
191583
+++ b/WHATS_NEW
191583
@@ -1,5 +1,6 @@
191583
 Version 2.02.104 - 
191583
 ===================================
191583
+  Workaround VG refresh race during autoactivation by retrying the refresh.
191583
   Add dev-block-<major>:<minor>.device systemd alias for complete PV tracking.
191583
   Use major:minor as short form of --major and --minor arg for pvscan --cache.
191583
   Fix lvconvert swap of poolmetadata volume for active thin pool.
191583
diff --git a/tools/pvscan.c b/tools/pvscan.c
191583
index b6a07bd..ce8c446 100644
191583
--- a/tools/pvscan.c
191583
+++ b/tools/pvscan.c
191583
@@ -91,10 +91,15 @@ static void _pvscan_display_single(struct cmd_context *cmd,
191583
 				display_size(cmd, (uint64_t) (pv_pe_count(pv) - pv_pe_alloc_count(pv)) * pv_pe_size(pv)));
191583
 }
191583
 
191583
+#define REFRESH_BEFORE_AUTOACTIVATION_RETRIES 5
191583
+#define REFRESH_BEFORE_AUTOACTIVATION_RETRY_USLEEP_DELAY 100000
191583
+
191583
 static int _auto_activation_handler(struct cmd_context *cmd,
191583
 				    const char *vgid, int partial,
191583
 				    activation_change_t activate)
191583
 {
191583
+	unsigned int refresh_retries = REFRESH_BEFORE_AUTOACTIVATION_RETRIES;
191583
+	int refresh_done = 0;
191583
 	struct volume_group *vg;
191583
 	int consistent = 0;
191583
 	struct id vgid_raw;
191583
@@ -115,7 +120,34 @@ static int _auto_activation_handler(struct cmd_context *cmd,
191583
 		r = 1; goto out;
191583
 	}
191583
 
191583
-	if (!vg_refresh_visible(vg->cmd, vg)) {
191583
+	/* FIXME: There's a tiny race when suspending the device which is part
191583
+	 * of the refresh because when suspend ioctl is performed, the dm
191583
+	 * kernel driver executes (do_suspend and dm_suspend kernel fn):
191583
+	 *
191583
+	 *          step 1: a check whether the dev is already suspended and
191583
+	 *                  if yes it returns success immediately as there's
191583
+	 *                  nothing to do
191583
+	 *          step 2: it grabs the suspend lock
191583
+	 *          step 3: another check whether the dev is already suspended
191583
+	 *                  and if found suspended, it exits with -EINVAL now
191583
+	 *
191583
+	 * The race can occur in between step 1 and step 2. To prevent premature
191583
+	 * autoactivation failure, we're using a simple retry logic here before
191583
+	 * we fail completely. For a complete solution, we need to fix the
191583
+	 * locking so there's no possibility for suspend calls to interleave
191583
+	 * each other to cause this kind of race.
191583
+	 *
191583
+	 * Remove this workaround with "refresh_retries" once we have proper locking in!
191583
+	 */
191583
+	while (refresh_retries--) {
191583
+		if (vg_refresh_visible(vg->cmd, vg)) {
191583
+			refresh_done = 1;
191583
+			break;
191583
+		}
191583
+		usleep(REFRESH_BEFORE_AUTOACTIVATION_RETRY_USLEEP_DELAY);
191583
+	}
191583
+
191583
+	if (!refresh_done) {
191583
 		log_error("%s: refresh before autoactivation failed.", vg->name);
191583
 		goto out;
191583
 	}