WHATS_NEW | 1 +
tools/pvscan.c | 34 +++++++++++++++++++++++++++++++++-
2 files changed, 34 insertions(+), 1 deletion(-)
diff --git a/WHATS_NEW b/WHATS_NEW
index aabfc78..7fd107a 100644
--- a/WHATS_NEW
+++ b/WHATS_NEW
@@ -1,5 +1,6 @@
Version 2.02.104 -
===================================
+ Workaround VG refresh race during autoactivation by retrying the refresh.
Add dev-block-<major>:<minor>.device systemd alias for complete PV tracking.
Use major:minor as short form of --major and --minor arg for pvscan --cache.
Fix lvconvert swap of poolmetadata volume for active thin pool.
diff --git a/tools/pvscan.c b/tools/pvscan.c
index b6a07bd..ce8c446 100644
--- a/tools/pvscan.c
+++ b/tools/pvscan.c
@@ -91,10 +91,15 @@ static void _pvscan_display_single(struct cmd_context *cmd,
display_size(cmd, (uint64_t) (pv_pe_count(pv) - pv_pe_alloc_count(pv)) * pv_pe_size(pv)));
}
+#define REFRESH_BEFORE_AUTOACTIVATION_RETRIES 5
+#define REFRESH_BEFORE_AUTOACTIVATION_RETRY_USLEEP_DELAY 100000
+
static int _auto_activation_handler(struct cmd_context *cmd,
const char *vgid, int partial,
activation_change_t activate)
{
+ unsigned int refresh_retries = REFRESH_BEFORE_AUTOACTIVATION_RETRIES;
+ int refresh_done = 0;
struct volume_group *vg;
int consistent = 0;
struct id vgid_raw;
@@ -115,7 +120,34 @@ static int _auto_activation_handler(struct cmd_context *cmd,
r = 1; goto out;
}
- if (!vg_refresh_visible(vg->cmd, vg)) {
+ /* FIXME: There's a tiny race when suspending the device which is part
+ * of the refresh because when suspend ioctl is performed, the dm
+ * kernel driver executes (do_suspend and dm_suspend kernel fn):
+ *
+ * step 1: a check whether the dev is already suspended and
+ * if yes it returns success immediately as there's
+ * nothing to do
+ * step 2: it grabs the suspend lock
+ * step 3: another check whether the dev is already suspended
+ * and if found suspended, it exits with -EINVAL now
+ *
+ * The race can occur in between step 1 and step 2. To prevent premature
+ * autoactivation failure, we're using a simple retry logic here before
+ * we fail completely. For a complete solution, we need to fix the
+ * locking so there's no possibility for suspend calls to interleave
+ * each other to cause this kind of race.
+ *
+ * Remove this workaround with "refresh_retries" once we have proper locking in!
+ */
+ while (refresh_retries--) {
+ if (vg_refresh_visible(vg->cmd, vg)) {
+ refresh_done = 1;
+ break;
+ }
+ usleep(REFRESH_BEFORE_AUTOACTIVATION_RETRY_USLEEP_DELAY);
+ }
+
+ if (!refresh_done) {
log_error("%s: refresh before autoactivation failed.", vg->name);
goto out;
}