Blame SOURCES/0054-multipathd-avoid-io_err_stat-crash-during-shutdown.patch

05be62
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
05be62
From: Benjamin Marzinski <bmarzins@redhat.com>
05be62
Date: Thu, 14 Jan 2021 20:20:23 -0600
05be62
Subject: [PATCH] multipathd: avoid io_err_stat crash during shutdown
05be62
05be62
The checker thread is reponsible for enqueueing paths for the
05be62
io_err_stat thread to check. During shutdown, the io_err_stat thread is
05be62
shut down and cleaned up before the checker thread.  There is no code
05be62
to make sure that the checker thread isn't accessing the io_err_stat
05be62
pathvec or its mutex while they are being freed, which can lead to
05be62
memory corruption crashes.
05be62
05be62
To solve this, get rid of the io_err_stat_pathvec structure, and
05be62
statically define the mutex.  This means that the mutex is always valid
05be62
to access, and the io_err_stat pathvec can only be accessed while
05be62
holding it.  If the io_err_stat thread has already been cleaned up
05be62
when the checker tries to access the pathvec, it will be NULL, and the
05be62
checker will simply fail to enqueue the path.
05be62
05be62
This change also fixes a bug in free_io_err_pathvec(), which previously
05be62
only attempted to free the pathvec if it was not set, instead of when it
05be62
was set.
05be62
05be62
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
05be62
Reviewed-by: Martin Wilck <mwilck@suse.com>
05be62
---
05be62
 libmultipath/io_err_stat.c | 112 +++++++++++++++----------------------
05be62
 libmultipath/util.c        |   5 ++
05be62
 libmultipath/util.h        |   1 +
05be62
 3 files changed, 50 insertions(+), 68 deletions(-)
05be62
05be62
diff --git a/libmultipath/io_err_stat.c b/libmultipath/io_err_stat.c
05be62
index 449760a0..f6c564f0 100644
05be62
--- a/libmultipath/io_err_stat.c
05be62
+++ b/libmultipath/io_err_stat.c
05be62
@@ -34,6 +34,7 @@
05be62
 #include "lock.h"
05be62
 #include "time-util.h"
05be62
 #include "io_err_stat.h"
05be62
+#include "util.h"
05be62
 
05be62
 #define IOTIMEOUT_SEC			60
05be62
 #define TIMEOUT_NO_IO_NSEC		10000000 /*10ms = 10000000ns*/
05be62
@@ -46,12 +47,6 @@
05be62
 #define io_err_stat_log(prio, fmt, args...) \
05be62
 	condlog(prio, "io error statistic: " fmt, ##args)
05be62
 
05be62
-
05be62
-struct io_err_stat_pathvec {
05be62
-	pthread_mutex_t mutex;
05be62
-	vector		pathvec;
05be62
-};
05be62
-
05be62
 struct dio_ctx {
05be62
 	struct timespec	io_starttime;
05be62
 	unsigned int	blksize;
05be62
@@ -76,9 +71,10 @@ pthread_attr_t		io_err_stat_attr;
05be62
 
05be62
 static pthread_mutex_t io_err_thread_lock = PTHREAD_MUTEX_INITIALIZER;
05be62
 static pthread_cond_t io_err_thread_cond = PTHREAD_COND_INITIALIZER;
05be62
+static pthread_mutex_t io_err_pathvec_lock = PTHREAD_MUTEX_INITIALIZER;
05be62
 static int io_err_thread_running = 0;
05be62
 
05be62
-static struct io_err_stat_pathvec *paths;
05be62
+static vector io_err_pathvec;
05be62
 struct vectors *vecs;
05be62
 io_context_t	ioctx;
05be62
 
05be62
@@ -208,46 +204,23 @@ static void free_io_err_stat_path(struct io_err_stat_path *p)
05be62
 	FREE(p);
05be62
 }
05be62
 
05be62
-static struct io_err_stat_pathvec *alloc_pathvec(void)
05be62
-{
05be62
-	struct io_err_stat_pathvec *p;
05be62
-	int r;
05be62
-
05be62
-	p = (struct io_err_stat_pathvec *)MALLOC(sizeof(*p));
05be62
-	if (!p)
05be62
-		return NULL;
05be62
-	p->pathvec = vector_alloc();
05be62
-	if (!p->pathvec)
05be62
-		goto out_free_struct_pathvec;
05be62
-	r = pthread_mutex_init(&p->mutex, NULL);
05be62
-	if (r)
05be62
-		goto out_free_member_pathvec;
05be62
-
05be62
-	return p;
05be62
-
05be62
-out_free_member_pathvec:
05be62
-	vector_free(p->pathvec);
05be62
-out_free_struct_pathvec:
05be62
-	FREE(p);
05be62
-	return NULL;
05be62
-}
05be62
-
05be62
-static void free_io_err_pathvec(struct io_err_stat_pathvec *p)
05be62
+static void free_io_err_pathvec(void)
05be62
 {
05be62
 	struct io_err_stat_path *path;
05be62
 	int i;
05be62
 
05be62
-	if (!p)
05be62
-		return;
05be62
-	pthread_mutex_destroy(&p->mutex);
05be62
-	if (!p->pathvec) {
05be62
-		vector_foreach_slot(p->pathvec, path, i) {
05be62
-			destroy_directio_ctx(path);
05be62
-			free_io_err_stat_path(path);
05be62
-		}
05be62
-		vector_free(p->pathvec);
05be62
+	pthread_mutex_lock(&io_err_pathvec_lock);
05be62
+	pthread_cleanup_push(cleanup_mutex, &io_err_pathvec_lock);
05be62
+	if (!io_err_pathvec)
05be62
+		goto out;
05be62
+	vector_foreach_slot(io_err_pathvec, path, i) {
05be62
+		destroy_directio_ctx(path);
05be62
+		free_io_err_stat_path(path);
05be62
 	}
05be62
-	FREE(p);
05be62
+	vector_free(io_err_pathvec);
05be62
+	io_err_pathvec = NULL;
05be62
+out:
05be62
+	pthread_cleanup_pop(1);
05be62
 }
05be62
 
05be62
 /*
05be62
@@ -259,13 +232,13 @@ static int enqueue_io_err_stat_by_path(struct path *path)
05be62
 {
05be62
 	struct io_err_stat_path *p;
05be62
 
05be62
-	pthread_mutex_lock(&paths->mutex);
05be62
-	p = find_err_path_by_dev(paths->pathvec, path->dev);
05be62
+	pthread_mutex_lock(&io_err_pathvec_lock);
05be62
+	p = find_err_path_by_dev(io_err_pathvec, path->dev);
05be62
 	if (p) {
05be62
-		pthread_mutex_unlock(&paths->mutex);
05be62
+		pthread_mutex_unlock(&io_err_pathvec_lock);
05be62
 		return 0;
05be62
 	}
05be62
-	pthread_mutex_unlock(&paths->mutex);
05be62
+	pthread_mutex_unlock(&io_err_pathvec_lock);
05be62
 
05be62
 	p = alloc_io_err_stat_path();
05be62
 	if (!p)
05be62
@@ -277,18 +250,18 @@ static int enqueue_io_err_stat_by_path(struct path *path)
05be62
 
05be62
 	if (setup_directio_ctx(p))
05be62
 		goto free_ioerr_path;
05be62
-	pthread_mutex_lock(&paths->mutex);
05be62
-	if (!vector_alloc_slot(paths->pathvec))
05be62
+	pthread_mutex_lock(&io_err_pathvec_lock);
05be62
+	if (!vector_alloc_slot(io_err_pathvec))
05be62
 		goto unlock_destroy;
05be62
-	vector_set_slot(paths->pathvec, p);
05be62
-	pthread_mutex_unlock(&paths->mutex);
05be62
+	vector_set_slot(io_err_pathvec, p);
05be62
+	pthread_mutex_unlock(&io_err_pathvec_lock);
05be62
 
05be62
 	io_err_stat_log(2, "%s: enqueue path %s to check",
05be62
 			path->mpp->alias, path->dev);
05be62
 	return 0;
05be62
 
05be62
 unlock_destroy:
05be62
-	pthread_mutex_unlock(&paths->mutex);
05be62
+	pthread_mutex_unlock(&io_err_pathvec_lock);
05be62
 	destroy_directio_ctx(p);
05be62
 free_ioerr_path:
05be62
 	free_io_err_stat_path(p);
05be62
@@ -421,9 +394,9 @@ static int delete_io_err_stat_by_addr(struct io_err_stat_path *p)
05be62
 {
05be62
 	int i;
05be62
 
05be62
-	i = find_slot(paths->pathvec, p);
05be62
+	i = find_slot(io_err_pathvec, p);
05be62
 	if (i != -1)
05be62
-		vector_del_slot(paths->pathvec, i);
05be62
+		vector_del_slot(io_err_pathvec, i);
05be62
 
05be62
 	destroy_directio_ctx(p);
05be62
 	free_io_err_stat_path(p);
05be62
@@ -594,7 +567,7 @@ static void poll_async_io_timeout(void)
05be62
 
05be62
 	if (clock_gettime(CLOCK_MONOTONIC, &curr_time) != 0)
05be62
 		return;
05be62
-	vector_foreach_slot(paths->pathvec, pp, i) {
05be62
+	vector_foreach_slot(io_err_pathvec, pp, i) {
05be62
 		for (j = 0; j < CONCUR_NR_EVENT; j++) {
05be62
 			rc = try_to_cancel_timeout_io(pp->dio_ctx_array + j,
05be62
 					&curr_time, pp->devname);
05be62
@@ -640,7 +613,7 @@ static void handle_async_io_done_event(struct io_event *io_evt)
05be62
 	int rc = PATH_UNCHECKED;
05be62
 	int i, j;
05be62
 
05be62
-	vector_foreach_slot(paths->pathvec, pp, i) {
05be62
+	vector_foreach_slot(io_err_pathvec, pp, i) {
05be62
 		for (j = 0; j < CONCUR_NR_EVENT; j++) {
05be62
 			ct = pp->dio_ctx_array + j;
05be62
 			if (&ct->io == io_evt->obj) {
05be62
@@ -674,19 +647,14 @@ static void service_paths(void)
05be62
 	struct io_err_stat_path *pp;
05be62
 	int i;
05be62
 
05be62
-	pthread_mutex_lock(&paths->mutex);
05be62
-	vector_foreach_slot(paths->pathvec, pp, i) {
05be62
+	pthread_mutex_lock(&io_err_pathvec_lock);
05be62
+	vector_foreach_slot(io_err_pathvec, pp, i) {
05be62
 		send_batch_async_ios(pp);
05be62
 		process_async_ios_event(TIMEOUT_NO_IO_NSEC, pp->devname);
05be62
 		poll_async_io_timeout();
05be62
 		poll_io_err_stat(vecs, pp);
05be62
 	}
05be62
-	pthread_mutex_unlock(&paths->mutex);
05be62
-}
05be62
-
05be62
-static void cleanup_unlock(void *arg)
05be62
-{
05be62
-	pthread_mutex_unlock((pthread_mutex_t*) arg);
05be62
+	pthread_mutex_unlock(&io_err_pathvec_lock);
05be62
 }
05be62
 
05be62
 static void cleanup_exited(__attribute__((unused)) void *arg)
05be62
@@ -744,12 +712,17 @@ int start_io_err_stat_thread(void *data)
05be62
 		io_err_stat_log(4, "io_setup failed");
05be62
 		return 1;
05be62
 	}
05be62
-	paths = alloc_pathvec();
05be62
-	if (!paths)
05be62
+
05be62
+	pthread_mutex_lock(&io_err_pathvec_lock);
05be62
+	io_err_pathvec = vector_alloc();
05be62
+	if (!io_err_pathvec) {
05be62
+		pthread_mutex_unlock(&io_err_pathvec_lock);
05be62
 		goto destroy_ctx;
05be62
+	}
05be62
+	pthread_mutex_unlock(&io_err_pathvec_lock);
05be62
 
05be62
 	pthread_mutex_lock(&io_err_thread_lock);
05be62
-	pthread_cleanup_push(cleanup_unlock, &io_err_thread_lock);
05be62
+	pthread_cleanup_push(cleanup_mutex, &io_err_thread_lock);
05be62
 
05be62
 	ret = pthread_create(&io_err_stat_thr, &io_err_stat_attr,
05be62
 			     io_err_stat_loop, data);
05be62
@@ -769,7 +742,10 @@ int start_io_err_stat_thread(void *data)
05be62
 	return 0;
05be62
 
05be62
 out_free:
05be62
-	free_io_err_pathvec(paths);
05be62
+	pthread_mutex_lock(&io_err_pathvec_lock);
05be62
+	vector_free(io_err_pathvec);
05be62
+	io_err_pathvec = NULL;
05be62
+	pthread_mutex_unlock(&io_err_pathvec_lock);
05be62
 destroy_ctx:
05be62
 	io_destroy(ioctx);
05be62
 	io_err_stat_log(0, "failed to start io_error statistic thread");
05be62
@@ -785,6 +761,6 @@ void stop_io_err_stat_thread(void)
05be62
 		pthread_cancel(io_err_stat_thr);
05be62
 
05be62
 	pthread_join(io_err_stat_thr, NULL);
05be62
-	free_io_err_pathvec(paths);
05be62
+	free_io_err_pathvec();
05be62
 	io_destroy(ioctx);
05be62
 }
05be62
diff --git a/libmultipath/util.c b/libmultipath/util.c
05be62
index 51c38c87..dd30a46e 100644
05be62
--- a/libmultipath/util.c
05be62
+++ b/libmultipath/util.c
05be62
@@ -469,3 +469,8 @@ void close_fd(void *arg)
05be62
 {
05be62
 	close((long)arg);
05be62
 }
05be62
+
05be62
+void cleanup_mutex(void *arg)
05be62
+{
05be62
+	pthread_mutex_unlock(arg);
05be62
+}
05be62
diff --git a/libmultipath/util.h b/libmultipath/util.h
05be62
index 56bd78c7..ce277680 100644
05be62
--- a/libmultipath/util.h
05be62
+++ b/libmultipath/util.h
05be62
@@ -44,6 +44,7 @@ void set_max_fds(rlim_t max_fds);
05be62
 	pthread_cleanup_push(((void (*)(void *))&f), (arg))
05be62
 
05be62
 void close_fd(void *arg);
05be62
+void cleanup_mutex(void *arg);
05be62
 
05be62
 struct scandir_result {
05be62
 	struct dirent **di;
05be62
-- 
05be62
2.17.2
05be62