|
|
05be62 |
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
|
05be62 |
From: Benjamin Marzinski <bmarzins@redhat.com>
|
|
|
05be62 |
Date: Thu, 14 Jan 2021 20:20:24 -0600
|
|
|
05be62 |
Subject: [PATCH] multipathd: avoid io_err_stat ABBA deadlock
|
|
|
05be62 |
|
|
|
05be62 |
When the checker thread enqueues paths for the io_err_stat thread to
|
|
|
05be62 |
check, it calls enqueue_io_err_stat_by_path() with the vecs lock held.
|
|
|
05be62 |
start_io_err_stat_thread() is also called with the vecs lock held.
|
|
|
05be62 |
These two functions both lock io_err_pathvec_lock. When the io_err_stat
|
|
|
05be62 |
thread updates the paths in vecs->pathvec in poll_io_err_stat(), it has
|
|
|
05be62 |
the io_err_pathvec_lock held, and then locks the vecs lock. This can
|
|
|
05be62 |
cause an ABBA deadlock.
|
|
|
05be62 |
|
|
|
05be62 |
To solve this, service_paths() no longer updates the paths in
|
|
|
05be62 |
vecs->pathvec with the io_err_pathvec_lock held. It does this by moving
|
|
|
05be62 |
the io_err_stat_path from io_err_pathvec to a local vector when it needs
|
|
|
05be62 |
to update the path. After releasing the io_err_pathvec_lock, it goes
|
|
|
05be62 |
through this temporary vector, updates the paths with the vecs lock
|
|
|
05be62 |
held, and then frees everything.
|
|
|
05be62 |
|
|
|
05be62 |
This change fixes a bug in service_paths() where elements were being
|
|
|
05be62 |
deleted from io_err_pathvec, without the index being decremented,
|
|
|
05be62 |
causing the loop to skip elements. Also, service_paths() could be
|
|
|
05be62 |
cancelled while holding the io_err_pathvec_lock, so it should have a
|
|
|
05be62 |
cleanup handler.
|
|
|
05be62 |
|
|
|
05be62 |
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
|
|
|
05be62 |
Reviewed-by: Martin Wilck <mwilck@suse.com>
|
|
|
05be62 |
---
|
|
|
05be62 |
libmultipath/io_err_stat.c | 56 ++++++++++++++++++++++----------------
|
|
|
05be62 |
1 file changed, 32 insertions(+), 24 deletions(-)
|
|
|
05be62 |
|
|
|
05be62 |
diff --git a/libmultipath/io_err_stat.c b/libmultipath/io_err_stat.c
|
|
|
05be62 |
index f6c564f0..63ee2e07 100644
|
|
|
05be62 |
--- a/libmultipath/io_err_stat.c
|
|
|
05be62 |
+++ b/libmultipath/io_err_stat.c
|
|
|
05be62 |
@@ -390,20 +390,6 @@ recover:
|
|
|
05be62 |
return 0;
|
|
|
05be62 |
}
|
|
|
05be62 |
|
|
|
05be62 |
-static int delete_io_err_stat_by_addr(struct io_err_stat_path *p)
|
|
|
05be62 |
-{
|
|
|
05be62 |
- int i;
|
|
|
05be62 |
-
|
|
|
05be62 |
- i = find_slot(io_err_pathvec, p);
|
|
|
05be62 |
- if (i != -1)
|
|
|
05be62 |
- vector_del_slot(io_err_pathvec, i);
|
|
|
05be62 |
-
|
|
|
05be62 |
- destroy_directio_ctx(p);
|
|
|
05be62 |
- free_io_err_stat_path(p);
|
|
|
05be62 |
-
|
|
|
05be62 |
- return 0;
|
|
|
05be62 |
-}
|
|
|
05be62 |
-
|
|
|
05be62 |
static void account_async_io_state(struct io_err_stat_path *pp, int rc)
|
|
|
05be62 |
{
|
|
|
05be62 |
switch (rc) {
|
|
|
05be62 |
@@ -420,17 +406,26 @@ static void account_async_io_state(struct io_err_stat_path *pp, int rc)
|
|
|
05be62 |
}
|
|
|
05be62 |
}
|
|
|
05be62 |
|
|
|
05be62 |
-static int poll_io_err_stat(struct vectors *vecs, struct io_err_stat_path *pp)
|
|
|
05be62 |
+static int io_err_stat_time_up(struct io_err_stat_path *pp)
|
|
|
05be62 |
{
|
|
|
05be62 |
struct timespec currtime, difftime;
|
|
|
05be62 |
- struct path *path;
|
|
|
05be62 |
- double err_rate;
|
|
|
05be62 |
|
|
|
05be62 |
if (clock_gettime(CLOCK_MONOTONIC, &currtime) != 0)
|
|
|
05be62 |
- return 1;
|
|
|
05be62 |
+ return 0;
|
|
|
05be62 |
timespecsub(&currtime, &pp->start_time, &difftime);
|
|
|
05be62 |
if (difftime.tv_sec < pp->total_time)
|
|
|
05be62 |
return 0;
|
|
|
05be62 |
+ return 1;
|
|
|
05be62 |
+}
|
|
|
05be62 |
+
|
|
|
05be62 |
+static void end_io_err_stat(struct io_err_stat_path *pp)
|
|
|
05be62 |
+{
|
|
|
05be62 |
+ struct timespec currtime;
|
|
|
05be62 |
+ struct path *path;
|
|
|
05be62 |
+ double err_rate;
|
|
|
05be62 |
+
|
|
|
05be62 |
+ if (clock_gettime(CLOCK_MONOTONIC, &currtime) != 0)
|
|
|
05be62 |
+ currtime = pp->start_time;
|
|
|
05be62 |
|
|
|
05be62 |
io_err_stat_log(4, "%s: check end", pp->devname);
|
|
|
05be62 |
|
|
|
05be62 |
@@ -469,10 +464,6 @@ static int poll_io_err_stat(struct vectors *vecs, struct io_err_stat_path *pp)
|
|
|
05be62 |
pp->devname);
|
|
|
05be62 |
}
|
|
|
05be62 |
lock_cleanup_pop(vecs->lock);
|
|
|
05be62 |
-
|
|
|
05be62 |
- delete_io_err_stat_by_addr(pp);
|
|
|
05be62 |
-
|
|
|
05be62 |
- return 0;
|
|
|
05be62 |
}
|
|
|
05be62 |
|
|
|
05be62 |
static int send_each_async_io(struct dio_ctx *ct, int fd, char *dev)
|
|
|
05be62 |
@@ -632,6 +623,7 @@ static void process_async_ios_event(int timeout_nsecs, char *dev)
|
|
|
05be62 |
struct timespec timeout = { .tv_nsec = timeout_nsecs };
|
|
|
05be62 |
|
|
|
05be62 |
errno = 0;
|
|
|
05be62 |
+ pthread_testcancel();
|
|
|
05be62 |
n = io_getevents(ioctx, 1L, CONCUR_NR_EVENT, events, &timeout);
|
|
|
05be62 |
if (n < 0) {
|
|
|
05be62 |
io_err_stat_log(3, "%s: async io events returned %d (errno=%s)",
|
|
|
05be62 |
@@ -644,17 +636,33 @@ static void process_async_ios_event(int timeout_nsecs, char *dev)
|
|
|
05be62 |
|
|
|
05be62 |
static void service_paths(void)
|
|
|
05be62 |
{
|
|
|
05be62 |
+ struct _vector _pathvec = {0};
|
|
|
05be62 |
+ /* avoid gcc warnings that &_pathvec will never be NULL in vector ops */
|
|
|
05be62 |
+ struct _vector * const tmp_pathvec = &_pathvec;
|
|
|
05be62 |
struct io_err_stat_path *pp;
|
|
|
05be62 |
int i;
|
|
|
05be62 |
|
|
|
05be62 |
pthread_mutex_lock(&io_err_pathvec_lock);
|
|
|
05be62 |
+ pthread_cleanup_push(cleanup_mutex, &io_err_pathvec_lock);
|
|
|
05be62 |
vector_foreach_slot(io_err_pathvec, pp, i) {
|
|
|
05be62 |
send_batch_async_ios(pp);
|
|
|
05be62 |
process_async_ios_event(TIMEOUT_NO_IO_NSEC, pp->devname);
|
|
|
05be62 |
poll_async_io_timeout();
|
|
|
05be62 |
- poll_io_err_stat(vecs, pp);
|
|
|
05be62 |
+ if (io_err_stat_time_up(pp)) {
|
|
|
05be62 |
+ if (!vector_alloc_slot(tmp_pathvec))
|
|
|
05be62 |
+ continue;
|
|
|
05be62 |
+ vector_del_slot(io_err_pathvec, i--);
|
|
|
05be62 |
+ vector_set_slot(tmp_pathvec, pp);
|
|
|
05be62 |
+ }
|
|
|
05be62 |
}
|
|
|
05be62 |
- pthread_mutex_unlock(&io_err_pathvec_lock);
|
|
|
05be62 |
+ pthread_cleanup_pop(1);
|
|
|
05be62 |
+ vector_foreach_slot_backwards(tmp_pathvec, pp, i) {
|
|
|
05be62 |
+ end_io_err_stat(pp);
|
|
|
05be62 |
+ vector_del_slot(tmp_pathvec, i);
|
|
|
05be62 |
+ destroy_directio_ctx(pp);
|
|
|
05be62 |
+ free_io_err_stat_path(pp);
|
|
|
05be62 |
+ }
|
|
|
05be62 |
+ vector_reset(tmp_pathvec);
|
|
|
05be62 |
}
|
|
|
05be62 |
|
|
|
05be62 |
static void cleanup_exited(__attribute__((unused)) void *arg)
|
|
|
05be62 |
--
|
|
|
05be62 |
2.17.2
|
|
|
05be62 |
|