Blame SOURCES/0054-multipathd-avoid-io_err_stat-crash-during-shutdown.patch

a5a7cf
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
a5a7cf
From: Benjamin Marzinski <bmarzins@redhat.com>
a5a7cf
Date: Thu, 14 Jan 2021 20:20:23 -0600
a5a7cf
Subject: [PATCH] multipathd: avoid io_err_stat crash during shutdown
a5a7cf
a5a7cf
The checker thread is reponsible for enqueueing paths for the
a5a7cf
io_err_stat thread to check. During shutdown, the io_err_stat thread is
a5a7cf
shut down and cleaned up before the checker thread.  There is no code
a5a7cf
to make sure that the checker thread isn't accessing the io_err_stat
a5a7cf
pathvec or its mutex while they are being freed, which can lead to
a5a7cf
memory corruption crashes.
a5a7cf
a5a7cf
To solve this, get rid of the io_err_stat_pathvec structure, and
a5a7cf
statically define the mutex.  This means that the mutex is always valid
a5a7cf
to access, and the io_err_stat pathvec can only be accessed while
a5a7cf
holding it.  If the io_err_stat thread has already been cleaned up
a5a7cf
when the checker tries to access the pathvec, it will be NULL, and the
a5a7cf
checker will simply fail to enqueue the path.
a5a7cf
a5a7cf
This change also fixes a bug in free_io_err_pathvec(), which previously
a5a7cf
only attempted to free the pathvec if it was not set, instead of when it
a5a7cf
was set.
a5a7cf
a5a7cf
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
a5a7cf
Reviewed-by: Martin Wilck <mwilck@suse.com>
a5a7cf
---
a5a7cf
 libmultipath/io_err_stat.c | 112 +++++++++++++++----------------------
a5a7cf
 libmultipath/util.c        |   5 ++
a5a7cf
 libmultipath/util.h        |   1 +
a5a7cf
 3 files changed, 50 insertions(+), 68 deletions(-)
a5a7cf
a5a7cf
diff --git a/libmultipath/io_err_stat.c b/libmultipath/io_err_stat.c
a5a7cf
index 449760a0..f6c564f0 100644
a5a7cf
--- a/libmultipath/io_err_stat.c
a5a7cf
+++ b/libmultipath/io_err_stat.c
a5a7cf
@@ -34,6 +34,7 @@
a5a7cf
 #include "lock.h"
a5a7cf
 #include "time-util.h"
a5a7cf
 #include "io_err_stat.h"
a5a7cf
+#include "util.h"
a5a7cf
 
a5a7cf
 #define IOTIMEOUT_SEC			60
a5a7cf
 #define TIMEOUT_NO_IO_NSEC		10000000 /*10ms = 10000000ns*/
a5a7cf
@@ -46,12 +47,6 @@
a5a7cf
 #define io_err_stat_log(prio, fmt, args...) \
a5a7cf
 	condlog(prio, "io error statistic: " fmt, ##args)
a5a7cf
 
a5a7cf
-
a5a7cf
-struct io_err_stat_pathvec {
a5a7cf
-	pthread_mutex_t mutex;
a5a7cf
-	vector		pathvec;
a5a7cf
-};
a5a7cf
-
a5a7cf
 struct dio_ctx {
a5a7cf
 	struct timespec	io_starttime;
a5a7cf
 	unsigned int	blksize;
a5a7cf
@@ -76,9 +71,10 @@ pthread_attr_t		io_err_stat_attr;
a5a7cf
 
a5a7cf
 static pthread_mutex_t io_err_thread_lock = PTHREAD_MUTEX_INITIALIZER;
a5a7cf
 static pthread_cond_t io_err_thread_cond = PTHREAD_COND_INITIALIZER;
a5a7cf
+static pthread_mutex_t io_err_pathvec_lock = PTHREAD_MUTEX_INITIALIZER;
a5a7cf
 static int io_err_thread_running = 0;
a5a7cf
 
a5a7cf
-static struct io_err_stat_pathvec *paths;
a5a7cf
+static vector io_err_pathvec;
a5a7cf
 struct vectors *vecs;
a5a7cf
 io_context_t	ioctx;
a5a7cf
 
a5a7cf
@@ -208,46 +204,23 @@ static void free_io_err_stat_path(struct io_err_stat_path *p)
a5a7cf
 	FREE(p);
a5a7cf
 }
a5a7cf
 
a5a7cf
-static struct io_err_stat_pathvec *alloc_pathvec(void)
a5a7cf
-{
a5a7cf
-	struct io_err_stat_pathvec *p;
a5a7cf
-	int r;
a5a7cf
-
a5a7cf
-	p = (struct io_err_stat_pathvec *)MALLOC(sizeof(*p));
a5a7cf
-	if (!p)
a5a7cf
-		return NULL;
a5a7cf
-	p->pathvec = vector_alloc();
a5a7cf
-	if (!p->pathvec)
a5a7cf
-		goto out_free_struct_pathvec;
a5a7cf
-	r = pthread_mutex_init(&p->mutex, NULL);
a5a7cf
-	if (r)
a5a7cf
-		goto out_free_member_pathvec;
a5a7cf
-
a5a7cf
-	return p;
a5a7cf
-
a5a7cf
-out_free_member_pathvec:
a5a7cf
-	vector_free(p->pathvec);
a5a7cf
-out_free_struct_pathvec:
a5a7cf
-	FREE(p);
a5a7cf
-	return NULL;
a5a7cf
-}
a5a7cf
-
a5a7cf
-static void free_io_err_pathvec(struct io_err_stat_pathvec *p)
a5a7cf
+static void free_io_err_pathvec(void)
a5a7cf
 {
a5a7cf
 	struct io_err_stat_path *path;
a5a7cf
 	int i;
a5a7cf
 
a5a7cf
-	if (!p)
a5a7cf
-		return;
a5a7cf
-	pthread_mutex_destroy(&p->mutex);
a5a7cf
-	if (!p->pathvec) {
a5a7cf
-		vector_foreach_slot(p->pathvec, path, i) {
a5a7cf
-			destroy_directio_ctx(path);
a5a7cf
-			free_io_err_stat_path(path);
a5a7cf
-		}
a5a7cf
-		vector_free(p->pathvec);
a5a7cf
+	pthread_mutex_lock(&io_err_pathvec_lock);
a5a7cf
+	pthread_cleanup_push(cleanup_mutex, &io_err_pathvec_lock);
a5a7cf
+	if (!io_err_pathvec)
a5a7cf
+		goto out;
a5a7cf
+	vector_foreach_slot(io_err_pathvec, path, i) {
a5a7cf
+		destroy_directio_ctx(path);
a5a7cf
+		free_io_err_stat_path(path);
a5a7cf
 	}
a5a7cf
-	FREE(p);
a5a7cf
+	vector_free(io_err_pathvec);
a5a7cf
+	io_err_pathvec = NULL;
a5a7cf
+out:
a5a7cf
+	pthread_cleanup_pop(1);
a5a7cf
 }
a5a7cf
 
a5a7cf
 /*
a5a7cf
@@ -259,13 +232,13 @@ static int enqueue_io_err_stat_by_path(struct path *path)
a5a7cf
 {
a5a7cf
 	struct io_err_stat_path *p;
a5a7cf
 
a5a7cf
-	pthread_mutex_lock(&paths->mutex);
a5a7cf
-	p = find_err_path_by_dev(paths->pathvec, path->dev);
a5a7cf
+	pthread_mutex_lock(&io_err_pathvec_lock);
a5a7cf
+	p = find_err_path_by_dev(io_err_pathvec, path->dev);
a5a7cf
 	if (p) {
a5a7cf
-		pthread_mutex_unlock(&paths->mutex);
a5a7cf
+		pthread_mutex_unlock(&io_err_pathvec_lock);
a5a7cf
 		return 0;
a5a7cf
 	}
a5a7cf
-	pthread_mutex_unlock(&paths->mutex);
a5a7cf
+	pthread_mutex_unlock(&io_err_pathvec_lock);
a5a7cf
 
a5a7cf
 	p = alloc_io_err_stat_path();
a5a7cf
 	if (!p)
a5a7cf
@@ -277,18 +250,18 @@ static int enqueue_io_err_stat_by_path(struct path *path)
a5a7cf
 
a5a7cf
 	if (setup_directio_ctx(p))
a5a7cf
 		goto free_ioerr_path;
a5a7cf
-	pthread_mutex_lock(&paths->mutex);
a5a7cf
-	if (!vector_alloc_slot(paths->pathvec))
a5a7cf
+	pthread_mutex_lock(&io_err_pathvec_lock);
a5a7cf
+	if (!vector_alloc_slot(io_err_pathvec))
a5a7cf
 		goto unlock_destroy;
a5a7cf
-	vector_set_slot(paths->pathvec, p);
a5a7cf
-	pthread_mutex_unlock(&paths->mutex);
a5a7cf
+	vector_set_slot(io_err_pathvec, p);
a5a7cf
+	pthread_mutex_unlock(&io_err_pathvec_lock);
a5a7cf
 
a5a7cf
 	io_err_stat_log(2, "%s: enqueue path %s to check",
a5a7cf
 			path->mpp->alias, path->dev);
a5a7cf
 	return 0;
a5a7cf
 
a5a7cf
 unlock_destroy:
a5a7cf
-	pthread_mutex_unlock(&paths->mutex);
a5a7cf
+	pthread_mutex_unlock(&io_err_pathvec_lock);
a5a7cf
 	destroy_directio_ctx(p);
a5a7cf
 free_ioerr_path:
a5a7cf
 	free_io_err_stat_path(p);
a5a7cf
@@ -421,9 +394,9 @@ static int delete_io_err_stat_by_addr(struct io_err_stat_path *p)
a5a7cf
 {
a5a7cf
 	int i;
a5a7cf
 
a5a7cf
-	i = find_slot(paths->pathvec, p);
a5a7cf
+	i = find_slot(io_err_pathvec, p);
a5a7cf
 	if (i != -1)
a5a7cf
-		vector_del_slot(paths->pathvec, i);
a5a7cf
+		vector_del_slot(io_err_pathvec, i);
a5a7cf
 
a5a7cf
 	destroy_directio_ctx(p);
a5a7cf
 	free_io_err_stat_path(p);
a5a7cf
@@ -594,7 +567,7 @@ static void poll_async_io_timeout(void)
a5a7cf
 
a5a7cf
 	if (clock_gettime(CLOCK_MONOTONIC, &curr_time) != 0)
a5a7cf
 		return;
a5a7cf
-	vector_foreach_slot(paths->pathvec, pp, i) {
a5a7cf
+	vector_foreach_slot(io_err_pathvec, pp, i) {
a5a7cf
 		for (j = 0; j < CONCUR_NR_EVENT; j++) {
a5a7cf
 			rc = try_to_cancel_timeout_io(pp->dio_ctx_array + j,
a5a7cf
 					&curr_time, pp->devname);
a5a7cf
@@ -640,7 +613,7 @@ static void handle_async_io_done_event(struct io_event *io_evt)
a5a7cf
 	int rc = PATH_UNCHECKED;
a5a7cf
 	int i, j;
a5a7cf
 
a5a7cf
-	vector_foreach_slot(paths->pathvec, pp, i) {
a5a7cf
+	vector_foreach_slot(io_err_pathvec, pp, i) {
a5a7cf
 		for (j = 0; j < CONCUR_NR_EVENT; j++) {
a5a7cf
 			ct = pp->dio_ctx_array + j;
a5a7cf
 			if (&ct->io == io_evt->obj) {
a5a7cf
@@ -674,19 +647,14 @@ static void service_paths(void)
a5a7cf
 	struct io_err_stat_path *pp;
a5a7cf
 	int i;
a5a7cf
 
a5a7cf
-	pthread_mutex_lock(&paths->mutex);
a5a7cf
-	vector_foreach_slot(paths->pathvec, pp, i) {
a5a7cf
+	pthread_mutex_lock(&io_err_pathvec_lock);
a5a7cf
+	vector_foreach_slot(io_err_pathvec, pp, i) {
a5a7cf
 		send_batch_async_ios(pp);
a5a7cf
 		process_async_ios_event(TIMEOUT_NO_IO_NSEC, pp->devname);
a5a7cf
 		poll_async_io_timeout();
a5a7cf
 		poll_io_err_stat(vecs, pp);
a5a7cf
 	}
a5a7cf
-	pthread_mutex_unlock(&paths->mutex);
a5a7cf
-}
a5a7cf
-
a5a7cf
-static void cleanup_unlock(void *arg)
a5a7cf
-{
a5a7cf
-	pthread_mutex_unlock((pthread_mutex_t*) arg);
a5a7cf
+	pthread_mutex_unlock(&io_err_pathvec_lock);
a5a7cf
 }
a5a7cf
 
a5a7cf
 static void cleanup_exited(__attribute__((unused)) void *arg)
a5a7cf
@@ -744,12 +712,17 @@ int start_io_err_stat_thread(void *data)
a5a7cf
 		io_err_stat_log(4, "io_setup failed");
a5a7cf
 		return 1;
a5a7cf
 	}
a5a7cf
-	paths = alloc_pathvec();
a5a7cf
-	if (!paths)
a5a7cf
+
a5a7cf
+	pthread_mutex_lock(&io_err_pathvec_lock);
a5a7cf
+	io_err_pathvec = vector_alloc();
a5a7cf
+	if (!io_err_pathvec) {
a5a7cf
+		pthread_mutex_unlock(&io_err_pathvec_lock);
a5a7cf
 		goto destroy_ctx;
a5a7cf
+	}
a5a7cf
+	pthread_mutex_unlock(&io_err_pathvec_lock);
a5a7cf
 
a5a7cf
 	pthread_mutex_lock(&io_err_thread_lock);
a5a7cf
-	pthread_cleanup_push(cleanup_unlock, &io_err_thread_lock);
a5a7cf
+	pthread_cleanup_push(cleanup_mutex, &io_err_thread_lock);
a5a7cf
 
a5a7cf
 	ret = pthread_create(&io_err_stat_thr, &io_err_stat_attr,
a5a7cf
 			     io_err_stat_loop, data);
a5a7cf
@@ -769,7 +742,10 @@ int start_io_err_stat_thread(void *data)
a5a7cf
 	return 0;
a5a7cf
 
a5a7cf
 out_free:
a5a7cf
-	free_io_err_pathvec(paths);
a5a7cf
+	pthread_mutex_lock(&io_err_pathvec_lock);
a5a7cf
+	vector_free(io_err_pathvec);
a5a7cf
+	io_err_pathvec = NULL;
a5a7cf
+	pthread_mutex_unlock(&io_err_pathvec_lock);
a5a7cf
 destroy_ctx:
a5a7cf
 	io_destroy(ioctx);
a5a7cf
 	io_err_stat_log(0, "failed to start io_error statistic thread");
a5a7cf
@@ -785,6 +761,6 @@ void stop_io_err_stat_thread(void)
a5a7cf
 		pthread_cancel(io_err_stat_thr);
a5a7cf
 
a5a7cf
 	pthread_join(io_err_stat_thr, NULL);
a5a7cf
-	free_io_err_pathvec(paths);
a5a7cf
+	free_io_err_pathvec();
a5a7cf
 	io_destroy(ioctx);
a5a7cf
 }
a5a7cf
diff --git a/libmultipath/util.c b/libmultipath/util.c
a5a7cf
index 51c38c87..dd30a46e 100644
a5a7cf
--- a/libmultipath/util.c
a5a7cf
+++ b/libmultipath/util.c
a5a7cf
@@ -469,3 +469,8 @@ void close_fd(void *arg)
a5a7cf
 {
a5a7cf
 	close((long)arg);
a5a7cf
 }
a5a7cf
+
a5a7cf
+void cleanup_mutex(void *arg)
a5a7cf
+{
a5a7cf
+	pthread_mutex_unlock(arg);
a5a7cf
+}
a5a7cf
diff --git a/libmultipath/util.h b/libmultipath/util.h
a5a7cf
index 56bd78c7..ce277680 100644
a5a7cf
--- a/libmultipath/util.h
a5a7cf
+++ b/libmultipath/util.h
a5a7cf
@@ -44,6 +44,7 @@ void set_max_fds(rlim_t max_fds);
a5a7cf
 	pthread_cleanup_push(((void (*)(void *))&f), (arg))
a5a7cf
 
a5a7cf
 void close_fd(void *arg);
a5a7cf
+void cleanup_mutex(void *arg);
a5a7cf
 
a5a7cf
 struct scandir_result {
a5a7cf
 	struct dirent **di;
a5a7cf
-- 
a5a7cf
2.17.2
a5a7cf