Blame SOURCES/0054-multipathd-avoid-io_err_stat-crash-during-shutdown.patch

b7337d
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
b7337d
From: Benjamin Marzinski <bmarzins@redhat.com>
b7337d
Date: Thu, 14 Jan 2021 20:20:23 -0600
b7337d
Subject: [PATCH] multipathd: avoid io_err_stat crash during shutdown
b7337d
b7337d
The checker thread is reponsible for enqueueing paths for the
b7337d
io_err_stat thread to check. During shutdown, the io_err_stat thread is
b7337d
shut down and cleaned up before the checker thread.  There is no code
b7337d
to make sure that the checker thread isn't accessing the io_err_stat
b7337d
pathvec or its mutex while they are being freed, which can lead to
b7337d
memory corruption crashes.
b7337d
b7337d
To solve this, get rid of the io_err_stat_pathvec structure, and
b7337d
statically define the mutex.  This means that the mutex is always valid
b7337d
to access, and the io_err_stat pathvec can only be accessed while
b7337d
holding it.  If the io_err_stat thread has already been cleaned up
b7337d
when the checker tries to access the pathvec, it will be NULL, and the
b7337d
checker will simply fail to enqueue the path.
b7337d
b7337d
This change also fixes a bug in free_io_err_pathvec(), which previously
b7337d
only attempted to free the pathvec if it was not set, instead of when it
b7337d
was set.
b7337d
b7337d
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
b7337d
Reviewed-by: Martin Wilck <mwilck@suse.com>
b7337d
---
b7337d
 libmultipath/io_err_stat.c | 112 +++++++++++++++----------------------
b7337d
 libmultipath/util.c        |   5 ++
b7337d
 libmultipath/util.h        |   1 +
b7337d
 3 files changed, 50 insertions(+), 68 deletions(-)
b7337d
b7337d
diff --git a/libmultipath/io_err_stat.c b/libmultipath/io_err_stat.c
b7337d
index 449760a0..f6c564f0 100644
b7337d
--- a/libmultipath/io_err_stat.c
b7337d
+++ b/libmultipath/io_err_stat.c
b7337d
@@ -34,6 +34,7 @@
b7337d
 #include "lock.h"
b7337d
 #include "time-util.h"
b7337d
 #include "io_err_stat.h"
b7337d
+#include "util.h"
b7337d
 
b7337d
 #define IOTIMEOUT_SEC			60
b7337d
 #define TIMEOUT_NO_IO_NSEC		10000000 /*10ms = 10000000ns*/
b7337d
@@ -46,12 +47,6 @@
b7337d
 #define io_err_stat_log(prio, fmt, args...) \
b7337d
 	condlog(prio, "io error statistic: " fmt, ##args)
b7337d
 
b7337d
-
b7337d
-struct io_err_stat_pathvec {
b7337d
-	pthread_mutex_t mutex;
b7337d
-	vector		pathvec;
b7337d
-};
b7337d
-
b7337d
 struct dio_ctx {
b7337d
 	struct timespec	io_starttime;
b7337d
 	unsigned int	blksize;
b7337d
@@ -76,9 +71,10 @@ pthread_attr_t		io_err_stat_attr;
b7337d
 
b7337d
 static pthread_mutex_t io_err_thread_lock = PTHREAD_MUTEX_INITIALIZER;
b7337d
 static pthread_cond_t io_err_thread_cond = PTHREAD_COND_INITIALIZER;
b7337d
+static pthread_mutex_t io_err_pathvec_lock = PTHREAD_MUTEX_INITIALIZER;
b7337d
 static int io_err_thread_running = 0;
b7337d
 
b7337d
-static struct io_err_stat_pathvec *paths;
b7337d
+static vector io_err_pathvec;
b7337d
 struct vectors *vecs;
b7337d
 io_context_t	ioctx;
b7337d
 
b7337d
@@ -208,46 +204,23 @@ static void free_io_err_stat_path(struct io_err_stat_path *p)
b7337d
 	FREE(p);
b7337d
 }
b7337d
 
b7337d
-static struct io_err_stat_pathvec *alloc_pathvec(void)
b7337d
-{
b7337d
-	struct io_err_stat_pathvec *p;
b7337d
-	int r;
b7337d
-
b7337d
-	p = (struct io_err_stat_pathvec *)MALLOC(sizeof(*p));
b7337d
-	if (!p)
b7337d
-		return NULL;
b7337d
-	p->pathvec = vector_alloc();
b7337d
-	if (!p->pathvec)
b7337d
-		goto out_free_struct_pathvec;
b7337d
-	r = pthread_mutex_init(&p->mutex, NULL);
b7337d
-	if (r)
b7337d
-		goto out_free_member_pathvec;
b7337d
-
b7337d
-	return p;
b7337d
-
b7337d
-out_free_member_pathvec:
b7337d
-	vector_free(p->pathvec);
b7337d
-out_free_struct_pathvec:
b7337d
-	FREE(p);
b7337d
-	return NULL;
b7337d
-}
b7337d
-
b7337d
-static void free_io_err_pathvec(struct io_err_stat_pathvec *p)
b7337d
+static void free_io_err_pathvec(void)
b7337d
 {
b7337d
 	struct io_err_stat_path *path;
b7337d
 	int i;
b7337d
 
b7337d
-	if (!p)
b7337d
-		return;
b7337d
-	pthread_mutex_destroy(&p->mutex);
b7337d
-	if (!p->pathvec) {
b7337d
-		vector_foreach_slot(p->pathvec, path, i) {
b7337d
-			destroy_directio_ctx(path);
b7337d
-			free_io_err_stat_path(path);
b7337d
-		}
b7337d
-		vector_free(p->pathvec);
b7337d
+	pthread_mutex_lock(&io_err_pathvec_lock);
b7337d
+	pthread_cleanup_push(cleanup_mutex, &io_err_pathvec_lock);
b7337d
+	if (!io_err_pathvec)
b7337d
+		goto out;
b7337d
+	vector_foreach_slot(io_err_pathvec, path, i) {
b7337d
+		destroy_directio_ctx(path);
b7337d
+		free_io_err_stat_path(path);
b7337d
 	}
b7337d
-	FREE(p);
b7337d
+	vector_free(io_err_pathvec);
b7337d
+	io_err_pathvec = NULL;
b7337d
+out:
b7337d
+	pthread_cleanup_pop(1);
b7337d
 }
b7337d
 
b7337d
 /*
b7337d
@@ -259,13 +232,13 @@ static int enqueue_io_err_stat_by_path(struct path *path)
b7337d
 {
b7337d
 	struct io_err_stat_path *p;
b7337d
 
b7337d
-	pthread_mutex_lock(&paths->mutex);
b7337d
-	p = find_err_path_by_dev(paths->pathvec, path->dev);
b7337d
+	pthread_mutex_lock(&io_err_pathvec_lock);
b7337d
+	p = find_err_path_by_dev(io_err_pathvec, path->dev);
b7337d
 	if (p) {
b7337d
-		pthread_mutex_unlock(&paths->mutex);
b7337d
+		pthread_mutex_unlock(&io_err_pathvec_lock);
b7337d
 		return 0;
b7337d
 	}
b7337d
-	pthread_mutex_unlock(&paths->mutex);
b7337d
+	pthread_mutex_unlock(&io_err_pathvec_lock);
b7337d
 
b7337d
 	p = alloc_io_err_stat_path();
b7337d
 	if (!p)
b7337d
@@ -277,18 +250,18 @@ static int enqueue_io_err_stat_by_path(struct path *path)
b7337d
 
b7337d
 	if (setup_directio_ctx(p))
b7337d
 		goto free_ioerr_path;
b7337d
-	pthread_mutex_lock(&paths->mutex);
b7337d
-	if (!vector_alloc_slot(paths->pathvec))
b7337d
+	pthread_mutex_lock(&io_err_pathvec_lock);
b7337d
+	if (!vector_alloc_slot(io_err_pathvec))
b7337d
 		goto unlock_destroy;
b7337d
-	vector_set_slot(paths->pathvec, p);
b7337d
-	pthread_mutex_unlock(&paths->mutex);
b7337d
+	vector_set_slot(io_err_pathvec, p);
b7337d
+	pthread_mutex_unlock(&io_err_pathvec_lock);
b7337d
 
b7337d
 	io_err_stat_log(2, "%s: enqueue path %s to check",
b7337d
 			path->mpp->alias, path->dev);
b7337d
 	return 0;
b7337d
 
b7337d
 unlock_destroy:
b7337d
-	pthread_mutex_unlock(&paths->mutex);
b7337d
+	pthread_mutex_unlock(&io_err_pathvec_lock);
b7337d
 	destroy_directio_ctx(p);
b7337d
 free_ioerr_path:
b7337d
 	free_io_err_stat_path(p);
b7337d
@@ -421,9 +394,9 @@ static int delete_io_err_stat_by_addr(struct io_err_stat_path *p)
b7337d
 {
b7337d
 	int i;
b7337d
 
b7337d
-	i = find_slot(paths->pathvec, p);
b7337d
+	i = find_slot(io_err_pathvec, p);
b7337d
 	if (i != -1)
b7337d
-		vector_del_slot(paths->pathvec, i);
b7337d
+		vector_del_slot(io_err_pathvec, i);
b7337d
 
b7337d
 	destroy_directio_ctx(p);
b7337d
 	free_io_err_stat_path(p);
b7337d
@@ -594,7 +567,7 @@ static void poll_async_io_timeout(void)
b7337d
 
b7337d
 	if (clock_gettime(CLOCK_MONOTONIC, &curr_time) != 0)
b7337d
 		return;
b7337d
-	vector_foreach_slot(paths->pathvec, pp, i) {
b7337d
+	vector_foreach_slot(io_err_pathvec, pp, i) {
b7337d
 		for (j = 0; j < CONCUR_NR_EVENT; j++) {
b7337d
 			rc = try_to_cancel_timeout_io(pp->dio_ctx_array + j,
b7337d
 					&curr_time, pp->devname);
b7337d
@@ -640,7 +613,7 @@ static void handle_async_io_done_event(struct io_event *io_evt)
b7337d
 	int rc = PATH_UNCHECKED;
b7337d
 	int i, j;
b7337d
 
b7337d
-	vector_foreach_slot(paths->pathvec, pp, i) {
b7337d
+	vector_foreach_slot(io_err_pathvec, pp, i) {
b7337d
 		for (j = 0; j < CONCUR_NR_EVENT; j++) {
b7337d
 			ct = pp->dio_ctx_array + j;
b7337d
 			if (&ct->io == io_evt->obj) {
b7337d
@@ -674,19 +647,14 @@ static void service_paths(void)
b7337d
 	struct io_err_stat_path *pp;
b7337d
 	int i;
b7337d
 
b7337d
-	pthread_mutex_lock(&paths->mutex);
b7337d
-	vector_foreach_slot(paths->pathvec, pp, i) {
b7337d
+	pthread_mutex_lock(&io_err_pathvec_lock);
b7337d
+	vector_foreach_slot(io_err_pathvec, pp, i) {
b7337d
 		send_batch_async_ios(pp);
b7337d
 		process_async_ios_event(TIMEOUT_NO_IO_NSEC, pp->devname);
b7337d
 		poll_async_io_timeout();
b7337d
 		poll_io_err_stat(vecs, pp);
b7337d
 	}
b7337d
-	pthread_mutex_unlock(&paths->mutex);
b7337d
-}
b7337d
-
b7337d
-static void cleanup_unlock(void *arg)
b7337d
-{
b7337d
-	pthread_mutex_unlock((pthread_mutex_t*) arg);
b7337d
+	pthread_mutex_unlock(&io_err_pathvec_lock);
b7337d
 }
b7337d
 
b7337d
 static void cleanup_exited(__attribute__((unused)) void *arg)
b7337d
@@ -744,12 +712,17 @@ int start_io_err_stat_thread(void *data)
b7337d
 		io_err_stat_log(4, "io_setup failed");
b7337d
 		return 1;
b7337d
 	}
b7337d
-	paths = alloc_pathvec();
b7337d
-	if (!paths)
b7337d
+
b7337d
+	pthread_mutex_lock(&io_err_pathvec_lock);
b7337d
+	io_err_pathvec = vector_alloc();
b7337d
+	if (!io_err_pathvec) {
b7337d
+		pthread_mutex_unlock(&io_err_pathvec_lock);
b7337d
 		goto destroy_ctx;
b7337d
+	}
b7337d
+	pthread_mutex_unlock(&io_err_pathvec_lock);
b7337d
 
b7337d
 	pthread_mutex_lock(&io_err_thread_lock);
b7337d
-	pthread_cleanup_push(cleanup_unlock, &io_err_thread_lock);
b7337d
+	pthread_cleanup_push(cleanup_mutex, &io_err_thread_lock);
b7337d
 
b7337d
 	ret = pthread_create(&io_err_stat_thr, &io_err_stat_attr,
b7337d
 			     io_err_stat_loop, data);
b7337d
@@ -769,7 +742,10 @@ int start_io_err_stat_thread(void *data)
b7337d
 	return 0;
b7337d
 
b7337d
 out_free:
b7337d
-	free_io_err_pathvec(paths);
b7337d
+	pthread_mutex_lock(&io_err_pathvec_lock);
b7337d
+	vector_free(io_err_pathvec);
b7337d
+	io_err_pathvec = NULL;
b7337d
+	pthread_mutex_unlock(&io_err_pathvec_lock);
b7337d
 destroy_ctx:
b7337d
 	io_destroy(ioctx);
b7337d
 	io_err_stat_log(0, "failed to start io_error statistic thread");
b7337d
@@ -785,6 +761,6 @@ void stop_io_err_stat_thread(void)
b7337d
 		pthread_cancel(io_err_stat_thr);
b7337d
 
b7337d
 	pthread_join(io_err_stat_thr, NULL);
b7337d
-	free_io_err_pathvec(paths);
b7337d
+	free_io_err_pathvec();
b7337d
 	io_destroy(ioctx);
b7337d
 }
b7337d
diff --git a/libmultipath/util.c b/libmultipath/util.c
b7337d
index 51c38c87..dd30a46e 100644
b7337d
--- a/libmultipath/util.c
b7337d
+++ b/libmultipath/util.c
b7337d
@@ -469,3 +469,8 @@ void close_fd(void *arg)
b7337d
 {
b7337d
 	close((long)arg);
b7337d
 }
b7337d
+
b7337d
+void cleanup_mutex(void *arg)
b7337d
+{
b7337d
+	pthread_mutex_unlock(arg);
b7337d
+}
b7337d
diff --git a/libmultipath/util.h b/libmultipath/util.h
b7337d
index 56bd78c7..ce277680 100644
b7337d
--- a/libmultipath/util.h
b7337d
+++ b/libmultipath/util.h
b7337d
@@ -44,6 +44,7 @@ void set_max_fds(rlim_t max_fds);
b7337d
 	pthread_cleanup_push(((void (*)(void *))&f), (arg))
b7337d
 
b7337d
 void close_fd(void *arg);
b7337d
+void cleanup_mutex(void *arg);
b7337d
 
b7337d
 struct scandir_result {
b7337d
 	struct dirent **di;
b7337d
-- 
b7337d
2.17.2
b7337d