Blame SOURCES/nfs-utils-2.3.3-gssd-timeout-thread.patch

4cb8ad
diff -up nfs-utils-2.3.3/nfs.conf.orig nfs-utils-2.3.3/nfs.conf
4cb8ad
--- nfs-utils-2.3.3/nfs.conf.orig	2021-07-19 09:45:40.441448059 -0400
4cb8ad
+++ nfs-utils-2.3.3/nfs.conf	2021-07-19 12:08:55.314182838 -0400
4cb8ad
@@ -22,6 +22,8 @@ use-gss-proxy=1
4cb8ad
 # cred-cache-directory=
4cb8ad
 # preferred-realm=
4cb8ad
 # set-home=1
4cb8ad
+# upcall-timeout=30
4cb8ad
+# cancel-timed-out-upcalls=0
4cb8ad
 #
4cb8ad
 [lockd]
4cb8ad
 # port=0
4cb8ad
diff -up nfs-utils-2.3.3/utils/gssd/gssd.c.orig nfs-utils-2.3.3/utils/gssd/gssd.c
4cb8ad
--- nfs-utils-2.3.3/utils/gssd/gssd.c.orig	2021-07-19 09:45:40.448448246 -0400
4cb8ad
+++ nfs-utils-2.3.3/utils/gssd/gssd.c	2021-07-19 12:08:55.315182865 -0400
4cb8ad
@@ -96,8 +96,29 @@ pthread_mutex_t clp_lock = PTHREAD_MUTEX
4cb8ad
 static bool signal_received = false;
4cb8ad
 static struct event_base *evbase = NULL;
4cb8ad
 
4cb8ad
+int upcall_timeout = DEF_UPCALL_TIMEOUT;
4cb8ad
+static bool cancel_timed_out_upcalls = false;
4cb8ad
+
4cb8ad
 TAILQ_HEAD(topdir_list_head, topdir) topdir_list;
4cb8ad
 
4cb8ad
+/*
4cb8ad
+ * active_thread_list:
4cb8ad
+ *
4cb8ad
+ * 	used to track upcalls for timeout purposes.
4cb8ad
+ *
4cb8ad
+ * 	protected by the active_thread_list_lock mutex.
4cb8ad
+ *
4cb8ad
+ * 	upcall_thread_info structures are added to the tail of the list
4cb8ad
+ * 	by start_upcall_thread(), so entries closer to the head of the list
4cb8ad
+ * 	will be closer to hitting the upcall timeout.
4cb8ad
+ *
4cb8ad
+ * 	upcall_thread_info structures are removed from the list upon a
4cb8ad
+ * 	sucessful join of the upcall thread by the watchdog thread (via
4cb8ad
+ * 	scan_active_thread_list().
4cb8ad
+ */
4cb8ad
+TAILQ_HEAD(active_thread_list_head, upcall_thread_info) active_thread_list;
4cb8ad
+pthread_mutex_t active_thread_list_lock = PTHREAD_MUTEX_INITIALIZER;
4cb8ad
+
4cb8ad
 struct topdir {
4cb8ad
 	TAILQ_ENTRY(topdir) list;
4cb8ad
 	TAILQ_HEAD(clnt_list_head, clnt_info) clnt_list;
4cb8ad
@@ -436,6 +457,138 @@ gssd_clnt_krb5_cb(int UNUSED(fd), short
4cb8ad
 	handle_krb5_upcall(clp);
4cb8ad
 }
4cb8ad
 
4cb8ad
+/*
4cb8ad
+ * scan_active_thread_list:
4cb8ad
+ *
4cb8ad
+ * Walks the active_thread_list, trying to join as many upcall threads as
4cb8ad
+ * possible.  For threads that have terminated, the corresponding
4cb8ad
+ * upcall_thread_info will be removed from the list and freed.  Threads that
4cb8ad
+ * are still busy and have exceeded the upcall_timeout will cause an error to
4cb8ad
+ * be logged and may be canceled (depending on the value of
4cb8ad
+ * cancel_timed_out_upcalls).
4cb8ad
+ *
4cb8ad
+ * Returns the number of seconds that the watchdog thread should wait before
4cb8ad
+ * calling scan_active_thread_list() again.
4cb8ad
+ */
4cb8ad
+static int
4cb8ad
+scan_active_thread_list(void)
4cb8ad
+{
4cb8ad
+	struct upcall_thread_info *info;
4cb8ad
+	struct timespec now;
4cb8ad
+	unsigned int sleeptime;
4cb8ad
+	bool sleeptime_set = false;
4cb8ad
+	int err;
4cb8ad
+	void *tret, *saveprev;
4cb8ad
+
4cb8ad
+	sleeptime = upcall_timeout;
4cb8ad
+	pthread_mutex_lock(&active_thread_list_lock);
4cb8ad
+	clock_gettime(CLOCK_MONOTONIC, &now;;
4cb8ad
+	TAILQ_FOREACH(info, &active_thread_list, list) {
4cb8ad
+		err = pthread_tryjoin_np(info->tid, &tret);
4cb8ad
+		switch (err) {
4cb8ad
+		case 0:
4cb8ad
+			/*
4cb8ad
+			 * The upcall thread has either completed successfully, or
4cb8ad
+			 * has been canceled _and_ has acted on the cancellation request
4cb8ad
+			 * (i.e. has hit a cancellation point).  We can now remove the
4cb8ad
+			 * upcall_thread_info from the list and free it.
4cb8ad
+			 */
4cb8ad
+			if (tret == PTHREAD_CANCELED)
4cb8ad
+				printerr(3, "watchdog: thread id 0x%lx cancelled successfully\n",
4cb8ad
+						info->tid);
4cb8ad
+			saveprev = info->list.tqe_prev;
4cb8ad
+			TAILQ_REMOVE(&active_thread_list, info, list);
4cb8ad
+			free(info);
4cb8ad
+			info = saveprev;
4cb8ad
+			break;
4cb8ad
+		case EBUSY:
4cb8ad
+			/*
4cb8ad
+			 * The upcall thread is still running.  If the timeout has expired
4cb8ad
+			 * then we either cancel the thread, log an error, and do an error
4cb8ad
+			 * downcall to the kernel (cancel_timed_out_upcalls=true) or simply
4cb8ad
+			 * log an error (cancel_timed_out_upcalls=false).  In either case,
4cb8ad
+			 * the error is logged only once.
4cb8ad
+			 */
4cb8ad
+			if (now.tv_sec >= info->timeout.tv_sec) {
4cb8ad
+				if (cancel_timed_out_upcalls && !(info->flags & UPCALL_THREAD_CANCELED)) {
4cb8ad
+					printerr(0, "watchdog: thread id 0x%lx timed out\n",
4cb8ad
+							info->tid);
4cb8ad
+					pthread_cancel(info->tid);
4cb8ad
+					info->flags |= (UPCALL_THREAD_CANCELED|UPCALL_THREAD_WARNED);
4cb8ad
+					do_error_downcall(info->fd, info->uid, -ETIMEDOUT);
4cb8ad
+				} else {
4cb8ad
+					if (!(info->flags & UPCALL_THREAD_WARNED)) {
4cb8ad
+						printerr(0, "watchdog: thread id 0x%lx running for %ld seconds\n",
4cb8ad
+								info->tid,
4cb8ad
+								now.tv_sec - info->timeout.tv_sec + upcall_timeout);
4cb8ad
+						info->flags |= UPCALL_THREAD_WARNED;
4cb8ad
+					}
4cb8ad
+				}
4cb8ad
+			} else if (!sleeptime_set) {
4cb8ad
+			/*
4cb8ad
+			 * The upcall thread is still running, but the timeout has not yet
4cb8ad
+			 * expired.  Calculate the time remaining until the timeout will
4cb8ad
+			 * expire.  This is the amount of time the watchdog thread will
4cb8ad
+			 * wait before running again.  We only need to do this for the busy
4cb8ad
+			 * thread closest to the head of the list - entries appearing later
4cb8ad
+			 * in the list will time out later.
4cb8ad
+			 */
4cb8ad
+				sleeptime = info->timeout.tv_sec - now.tv_sec;
4cb8ad
+				sleeptime_set = true;
4cb8ad
+			}
4cb8ad
+			break;
4cb8ad
+		default:
4cb8ad
+			/* EDEADLK, EINVAL, and ESRCH... none of which should happen! */
4cb8ad
+			printerr(0, "watchdog: attempt to join thread id 0x%lx returned %d (%s)!\n",
4cb8ad
+					info->tid, err, strerror(err));
4cb8ad
+			break;
4cb8ad
+		}
4cb8ad
+	}
4cb8ad
+	pthread_mutex_unlock(&active_thread_list_lock);
4cb8ad
+
4cb8ad
+	return sleeptime;
4cb8ad
+}
4cb8ad
+
4cb8ad
+static void *
4cb8ad
+watchdog_thread_fn(void *UNUSED(arg))
4cb8ad
+{
4cb8ad
+	unsigned int sleeptime;
4cb8ad
+
4cb8ad
+	for (;;) {
4cb8ad
+		sleeptime = scan_active_thread_list();
4cb8ad
+		printerr(4, "watchdog: sleeping %u secs\n", sleeptime);
4cb8ad
+		sleep(sleeptime);
4cb8ad
+	}
4cb8ad
+	return (void *)0;
4cb8ad
+}
4cb8ad
+
4cb8ad
+static int
4cb8ad
+start_watchdog_thread(void)
4cb8ad
+{
4cb8ad
+	pthread_attr_t attr;
4cb8ad
+	pthread_t th;
4cb8ad
+	int ret;
4cb8ad
+
4cb8ad
+	ret = pthread_attr_init(&attr);
4cb8ad
+	if (ret != 0) {
4cb8ad
+		printerr(0, "ERROR: failed to init pthread attr: ret %d: %s\n",
4cb8ad
+			 ret, strerror(errno));
4cb8ad
+		return ret;
4cb8ad
+	}
4cb8ad
+	ret = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
4cb8ad
+	if (ret != 0) {
4cb8ad
+		printerr(0, "ERROR: failed to create pthread attr: ret %d: %s\n",
4cb8ad
+			 ret, strerror(errno));
4cb8ad
+		return ret;
4cb8ad
+	}
4cb8ad
+	ret = pthread_create(&th, &attr, watchdog_thread_fn, NULL);
4cb8ad
+	if (ret != 0) {
4cb8ad
+		printerr(0, "ERROR: pthread_create failed: ret %d: %s\n",
4cb8ad
+			 ret, strerror(errno));
4cb8ad
+	}
4cb8ad
+	return ret;
4cb8ad
+}
4cb8ad
+
4cb8ad
 static struct clnt_info *
4cb8ad
 gssd_get_clnt(struct topdir *tdi, const char *name)
4cb8ad
 {
4cb8ad
@@ -810,7 +963,7 @@ sig_die(int signal)
4cb8ad
 static void
4cb8ad
 usage(char *progname)
4cb8ad
 {
4cb8ad
-	fprintf(stderr, "usage: %s [-f] [-l] [-M] [-n] [-v] [-r] [-p pipefsdir] [-k keytab] [-d ccachedir] [-t timeout] [-R preferred realm] [-D] [-H]\n",
4cb8ad
+	fprintf(stderr, "usage: %s [-f] [-l] [-M] [-n] [-v] [-r] [-p pipefsdir] [-k keytab] [-d ccachedir] [-t timeout] [-R preferred realm] [-D] [-H] [-U upcall timeout] [-C]\n",
4cb8ad
 		progname);
4cb8ad
 	exit(1);
4cb8ad
 }
4cb8ad
@@ -831,6 +984,9 @@ read_gss_conf(void)
4cb8ad
 #endif
4cb8ad
 	context_timeout = conf_get_num("gssd", "context-timeout", context_timeout);
4cb8ad
 	rpc_timeout = conf_get_num("gssd", "rpc-timeout", rpc_timeout);
4cb8ad
+	upcall_timeout = conf_get_num("gssd", "upcall-timeout", upcall_timeout);
4cb8ad
+	cancel_timed_out_upcalls = conf_get_bool("gssd", "cancel-timed-out-upcalls",
4cb8ad
+						cancel_timed_out_upcalls);
4cb8ad
 	s = conf_get_str("gssd", "pipefs-directory");
4cb8ad
 	if (!s)
4cb8ad
 		s = conf_get_str("general", "pipefs-directory");
4cb8ad
@@ -872,7 +1028,7 @@ main(int argc, char *argv[])
4cb8ad
 	verbosity = conf_get_num("gssd", "verbosity", verbosity);
4cb8ad
 	rpc_verbosity = conf_get_num("gssd", "rpc-verbosity", rpc_verbosity);
4cb8ad
 
4cb8ad
-	while ((opt = getopt(argc, argv, "HDfvrlmnMp:k:d:t:T:R:")) != -1) {
4cb8ad
+	while ((opt = getopt(argc, argv, "HDfvrlmnMp:k:d:t:T:R:U:C")) != -1) {
4cb8ad
 		switch (opt) {
4cb8ad
 			case 'f':
4cb8ad
 				fg = 1;
4cb8ad
@@ -923,6 +1079,12 @@ main(int argc, char *argv[])
4cb8ad
 			case 'H':
4cb8ad
 				set_home = false;
4cb8ad
 				break;
4cb8ad
+			case 'U':
4cb8ad
+				upcall_timeout = atoi(optarg);
4cb8ad
+				break;
4cb8ad
+			case 'C':
4cb8ad
+				cancel_timed_out_upcalls = true;
4cb8ad
+				break;
4cb8ad
 			default:
4cb8ad
 				usage(argv[0]);
4cb8ad
 				break;
4cb8ad
@@ -995,6 +1157,11 @@ main(int argc, char *argv[])
4cb8ad
 	else
4cb8ad
 		progname = argv[0];
4cb8ad
 
4cb8ad
+	if (upcall_timeout > MAX_UPCALL_TIMEOUT)
4cb8ad
+		upcall_timeout = MAX_UPCALL_TIMEOUT;
4cb8ad
+	else if (upcall_timeout < MIN_UPCALL_TIMEOUT)
4cb8ad
+		upcall_timeout = MIN_UPCALL_TIMEOUT;
4cb8ad
+
4cb8ad
 	initerr(progname, verbosity, fg);
4cb8ad
 #ifdef HAVE_LIBTIRPC_SET_DEBUG
4cb8ad
 	/*
4cb8ad
@@ -1045,6 +1212,14 @@ main(int argc, char *argv[])
4cb8ad
 			       gssd_inotify_cb, NULL);
4cb8ad
 	event_add(inotify_ev, NULL);
4cb8ad
 
4cb8ad
+	TAILQ_INIT(&active_thread_list);
4cb8ad
+
4cb8ad
+	rc = start_watchdog_thread();
4cb8ad
+	if (rc != 0) {
4cb8ad
+		printerr(0, "ERROR: failed to start watchdog thread: %d\n", rc);
4cb8ad
+		exit(EXIT_FAILURE);
4cb8ad
+	}
4cb8ad
+
4cb8ad
 	TAILQ_INIT(&topdir_list);
4cb8ad
 	gssd_scan();
4cb8ad
 	daemon_ready();
4cb8ad
diff -up nfs-utils-2.3.3/utils/gssd/gssd.h.orig nfs-utils-2.3.3/utils/gssd/gssd.h
4cb8ad
--- nfs-utils-2.3.3/utils/gssd/gssd.h.orig	2021-07-19 09:45:40.449448272 -0400
4cb8ad
+++ nfs-utils-2.3.3/utils/gssd/gssd.h	2021-07-19 12:08:55.315182865 -0400
4cb8ad
@@ -50,6 +50,12 @@
4cb8ad
 #define GSSD_DEFAULT_KEYTAB_FILE		"/etc/krb5.keytab"
4cb8ad
 #define GSSD_SERVICE_NAME			"nfs"
4cb8ad
 #define RPC_CHAN_BUF_SIZE			32768
4cb8ad
+
4cb8ad
+/* timeouts are in seconds */
4cb8ad
+#define MIN_UPCALL_TIMEOUT			5
4cb8ad
+#define DEF_UPCALL_TIMEOUT			30
4cb8ad
+#define MAX_UPCALL_TIMEOUT			600
4cb8ad
+
4cb8ad
 /*
4cb8ad
  * The gss mechanisms that we can handle
4cb8ad
  */
4cb8ad
@@ -91,10 +97,22 @@ struct clnt_upcall_info {
4cb8ad
 	char			*service;
4cb8ad
 };
4cb8ad
 
4cb8ad
+struct upcall_thread_info {
4cb8ad
+	TAILQ_ENTRY(upcall_thread_info) list;
4cb8ad
+	pthread_t		tid;
4cb8ad
+	struct timespec		timeout;
4cb8ad
+	uid_t			uid;
4cb8ad
+	int			fd;
4cb8ad
+	unsigned short		flags;
4cb8ad
+#define UPCALL_THREAD_CANCELED	0x0001
4cb8ad
+#define UPCALL_THREAD_WARNED	0x0002
4cb8ad
+};
4cb8ad
+
4cb8ad
 void handle_krb5_upcall(struct clnt_info *clp);
4cb8ad
 void handle_gssd_upcall(struct clnt_info *clp);
4cb8ad
 void free_upcall_info(struct clnt_upcall_info *info);
4cb8ad
 void gssd_free_client(struct clnt_info *clp);
4cb8ad
+int do_error_downcall(int k5_fd, uid_t uid, int err);
4cb8ad
 
4cb8ad
 
4cb8ad
 #endif /* _RPC_GSSD_H_ */
4cb8ad
diff -up nfs-utils-2.3.3/utils/gssd/gssd.man.orig nfs-utils-2.3.3/utils/gssd/gssd.man
4cb8ad
--- nfs-utils-2.3.3/utils/gssd/gssd.man.orig	2021-07-19 09:45:40.443448112 -0400
4cb8ad
+++ nfs-utils-2.3.3/utils/gssd/gssd.man	2021-07-19 12:08:55.315182865 -0400
4cb8ad
@@ -8,7 +8,7 @@
4cb8ad
 rpc.gssd \- RPCSEC_GSS daemon
4cb8ad
 .SH SYNOPSIS
4cb8ad
 .B rpc.gssd
4cb8ad
-.RB [ \-DfMnlvrH ]
4cb8ad
+.RB [ \-DfMnlvrHC ]
4cb8ad
 .RB [ \-k
4cb8ad
 .IR keytab ]
4cb8ad
 .RB [ \-p
4cb8ad
@@ -17,6 +17,10 @@ rpc.gssd \- RPCSEC_GSS daemon
4cb8ad
 .IR ccachedir ]
4cb8ad
 .RB [ \-t
4cb8ad
 .IR timeout ]
4cb8ad
+.RB [ \-T
4cb8ad
+.IR timeout ]
4cb8ad
+.RB [ \-U
4cb8ad
+.IR timeout ]
4cb8ad
 .RB [ \-R
4cb8ad
 .IR realm ]
4cb8ad
 .SH INTRODUCTION
4cb8ad
@@ -290,7 +294,7 @@ seconds, which allows changing Kerberos
4cb8ad
 The default is no explicit timeout, which means the kernel context will live
4cb8ad
 the lifetime of the Kerberos service ticket used in its creation.
4cb8ad
 .TP
4cb8ad
-.B -T timeout
4cb8ad
+.BI "-T " timeout
4cb8ad
 Timeout, in seconds, to create an RPC connection with a server while
4cb8ad
 establishing an authenticated gss context for a user.
4cb8ad
 The default timeout is set to 5 seconds.
4cb8ad
@@ -298,6 +302,18 @@ If you get messages like "WARNING: can't
4cb8ad
 %servername% for user with uid %uid%: RPC: Remote system error -
4cb8ad
 Connection timed out", you should consider an increase of this timeout.
4cb8ad
 .TP
4cb8ad
+.BI "-U " timeout
4cb8ad
+Timeout, in seconds, for upcall threads.  Threads executing longer than
4cb8ad
+.I timeout
4cb8ad
+seconds will cause an error message to be logged.  The default
4cb8ad
+.I timeout
4cb8ad
+is 30 seconds.  The minimum is 5 seconds.  The maximum is 600 seconds.
4cb8ad
+.TP
4cb8ad
+.B -C
4cb8ad
+In addition to logging an error message for threads that have timed out,
4cb8ad
+the thread will be canceled and an error of -ETIMEDOUT will be reported
4cb8ad
+to the kernel.
4cb8ad
+.TP
4cb8ad
 .B -H
4cb8ad
 Avoids setting $HOME to "/". This allows rpc.gssd to read per user k5identity
4cb8ad
 files versus trying to read /.k5identity for each user.
4cb8ad
@@ -365,6 +381,17 @@ Equivalent to
4cb8ad
 Equivalent to
4cb8ad
 .BR -R .
4cb8ad
 .TP
4cb8ad
+.B upcall-timeout
4cb8ad
+Equivalent to
4cb8ad
+.BR -U .
4cb8ad
+.TP
4cb8ad
+.B cancel-timed-out-upcalls
4cb8ad
+Setting to
4cb8ad
+.B true
4cb8ad
+is equivalent to providing the
4cb8ad
+.B -C
4cb8ad
+flag.
4cb8ad
+.TP
4cb8ad
 .B set-home
4cb8ad
 Setting to
4cb8ad
 .B false
4cb8ad
diff -up nfs-utils-2.3.3/utils/gssd/gssd_proc.c.orig nfs-utils-2.3.3/utils/gssd/gssd_proc.c
4cb8ad
--- nfs-utils-2.3.3/utils/gssd/gssd_proc.c.orig	2021-07-19 09:45:40.449448272 -0400
4cb8ad
+++ nfs-utils-2.3.3/utils/gssd/gssd_proc.c	2021-07-19 12:08:55.316182891 -0400
4cb8ad
@@ -81,11 +81,24 @@
4cb8ad
 #include "gss_names.h"
4cb8ad
 
4cb8ad
 extern pthread_mutex_t clp_lock;
4cb8ad
+extern pthread_mutex_t active_thread_list_lock;
4cb8ad
+extern int upcall_timeout;
4cb8ad
+extern TAILQ_HEAD(active_thread_list_head, upcall_thread_info) active_thread_list;
4cb8ad
 
4cb8ad
 /* Encryption types supported by the kernel rpcsec_gss code */
4cb8ad
 int num_krb5_enctypes = 0;
4cb8ad
 krb5_enctype *krb5_enctypes = NULL;
4cb8ad
 
4cb8ad
+/* Args for the cleanup_handler() */
4cb8ad
+struct cleanup_args  {
4cb8ad
+	OM_uint32 	*min_stat;
4cb8ad
+	gss_buffer_t	acceptor;
4cb8ad
+	gss_buffer_t	token;
4cb8ad
+	struct authgss_private_data *pd;
4cb8ad
+	AUTH		**auth;
4cb8ad
+	CLIENT		**rpc_clnt;
4cb8ad
+};
4cb8ad
+
4cb8ad
 /*
4cb8ad
  * Parse the supported encryption type information
4cb8ad
  */
4cb8ad
@@ -184,7 +197,7 @@ out_err:
4cb8ad
 	return;
4cb8ad
 }
4cb8ad
 
4cb8ad
-static int
4cb8ad
+int
4cb8ad
 do_error_downcall(int k5_fd, uid_t uid, int err)
4cb8ad
 {
4cb8ad
 	char	buf[1024];
4cb8ad
@@ -604,27 +617,66 @@ out:
4cb8ad
 }
4cb8ad
 
4cb8ad
 /*
4cb8ad
+ * cleanup_handler:
4cb8ad
+ *
4cb8ad
+ * Free any resources allocated by process_krb5_upcall().
4cb8ad
+ *
4cb8ad
+ * Runs upon normal termination of process_krb5_upcall as well as if the
4cb8ad
+ * thread is canceled.
4cb8ad
+ */
4cb8ad
+static void
4cb8ad
+cleanup_handler(void *arg)
4cb8ad
+{
4cb8ad
+	struct cleanup_args *args = (struct cleanup_args *)arg;
4cb8ad
+
4cb8ad
+	gss_release_buffer(args->min_stat, args->acceptor);
4cb8ad
+	if (args->token->value)
4cb8ad
+		free(args->token->value);
4cb8ad
+#ifdef HAVE_AUTHGSS_FREE_PRIVATE_DATA
4cb8ad
+	if (args->pd->pd_ctx_hndl.length != 0 || args->pd->pd_ctx != 0)
4cb8ad
+		authgss_free_private_data(args->pd);
4cb8ad
+#endif
4cb8ad
+	if (*args->auth)
4cb8ad
+		AUTH_DESTROY(*args->auth);
4cb8ad
+	if (*args->rpc_clnt)
4cb8ad
+		clnt_destroy(*args->rpc_clnt);
4cb8ad
+}
4cb8ad
+
4cb8ad
+/*
4cb8ad
+ * process_krb5_upcall:
4cb8ad
+ *
4cb8ad
  * this code uses the userland rpcsec gss library to create a krb5
4cb8ad
  * context on behalf of the kernel
4cb8ad
+ *
4cb8ad
+ * This is the meat of the upcall thread.  Note that cancelability is disabled
4cb8ad
+ * and enabled at various points to ensure that any resources reserved by the
4cb8ad
+ * lower level libraries are released safely.
4cb8ad
  */
4cb8ad
 static void
4cb8ad
-process_krb5_upcall(struct clnt_info *clp, uid_t uid, int fd, char *srchost,
4cb8ad
-		    char *tgtname, char *service)
4cb8ad
+process_krb5_upcall(struct clnt_upcall_info *info)
4cb8ad
 {
4cb8ad
+	struct clnt_info	*clp = info->clp;
4cb8ad
+	uid_t			uid = info->uid;
4cb8ad
+	int			fd = info->fd;
4cb8ad
+	char			*srchost = info->srchost;
4cb8ad
+	char			*tgtname = info->target;
4cb8ad
+	char			*service = info->service;
4cb8ad
 	CLIENT			*rpc_clnt = NULL;
4cb8ad
 	AUTH			*auth = NULL;
4cb8ad
 	struct authgss_private_data pd;
4cb8ad
 	gss_buffer_desc		token;
4cb8ad
-	int			err, downcall_err = -EACCES;
4cb8ad
+	int			err, downcall_err;
4cb8ad
 	OM_uint32		maj_stat, min_stat, lifetime_rec;
4cb8ad
 	gss_name_t		gacceptor = GSS_C_NO_NAME;
4cb8ad
 	gss_OID			mech;
4cb8ad
 	gss_buffer_desc		acceptor  = {0};
4cb8ad
+	struct cleanup_args cleanup_args = {&min_stat, &acceptor, &token, &pd, &auth, &rpc_clnt};
4cb8ad
 
4cb8ad
 	token.length = 0;
4cb8ad
 	token.value = NULL;
4cb8ad
 	memset(&pd, 0, sizeof(struct authgss_private_data));
4cb8ad
 
4cb8ad
+	pthread_cleanup_push(cleanup_handler, &cleanup_args);
4cb8ad
 	/*
4cb8ad
 	 * If "service" is specified, then the kernel is indicating that
4cb8ad
 	 * we must use machine credentials for this request.  (Regardless
4cb8ad
@@ -646,6 +698,8 @@ process_krb5_upcall(struct clnt_info *cl
4cb8ad
 	 * used for this case is not important.
4cb8ad
 	 *
4cb8ad
 	 */
4cb8ad
+	downcall_err = -EACCES;
4cb8ad
+	pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
4cb8ad
 	if (uid != 0 || (uid == 0 && root_uses_machine_creds == 0 &&
4cb8ad
 				service == NULL)) {
4cb8ad
 
4cb8ad
@@ -666,15 +720,21 @@ process_krb5_upcall(struct clnt_info *cl
4cb8ad
 			goto out_return_error;
4cb8ad
 		}
4cb8ad
 	}
4cb8ad
+	pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
4cb8ad
+	pthread_testcancel();
4cb8ad
 
4cb8ad
+	pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
4cb8ad
 	if (!authgss_get_private_data(auth, &pd)) {
4cb8ad
 		printerr(1, "WARNING: Failed to obtain authentication "
4cb8ad
 			    "data for user with uid %d for server %s\n",
4cb8ad
 			 uid, clp->servername);
4cb8ad
 		goto out_return_error;
4cb8ad
 	}
4cb8ad
+	pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
4cb8ad
+	pthread_testcancel();
4cb8ad
 
4cb8ad
 	/* Grab the context lifetime and acceptor name out of the ctx. */
4cb8ad
+	pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
4cb8ad
 	maj_stat = gss_inquire_context(&min_stat, pd.pd_ctx, NULL, &gacceptor,
4cb8ad
 				       &lifetime_rec, &mech, NULL, NULL, NULL);
4cb8ad
 
4cb8ad
@@ -686,37 +746,35 @@ process_krb5_upcall(struct clnt_info *cl
4cb8ad
 		get_hostbased_client_buffer(gacceptor, mech, &acceptor);
4cb8ad
 		gss_release_name(&min_stat, &gacceptor);
4cb8ad
 	}
4cb8ad
+	pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
4cb8ad
+	pthread_testcancel();
4cb8ad
 
4cb8ad
 	/*
4cb8ad
 	 * The serialization can mean turning pd.pd_ctx into a lucid context. If
4cb8ad
 	 * that happens then the pd.pd_ctx will be unusable, so we must never
4cb8ad
 	 * try to use it after this point.
4cb8ad
 	 */
4cb8ad
+	pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
4cb8ad
 	if (serialize_context_for_kernel(&pd.pd_ctx, &token, &krb5oid, NULL)) {
4cb8ad
 		printerr(1, "WARNING: Failed to serialize krb5 context for "
4cb8ad
 			    "user with uid %d for server %s\n",
4cb8ad
 			 uid, clp->servername);
4cb8ad
 		goto out_return_error;
4cb8ad
 	}
4cb8ad
+	pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
4cb8ad
+	pthread_testcancel();
4cb8ad
 
4cb8ad
 	do_downcall(fd, uid, &pd, &token, lifetime_rec, &acceptor);
4cb8ad
 
4cb8ad
 out:
4cb8ad
-	gss_release_buffer(&min_stat, &acceptor);
4cb8ad
-	if (token.value)
4cb8ad
-		free(token.value);
4cb8ad
-#ifdef HAVE_AUTHGSS_FREE_PRIVATE_DATA
4cb8ad
-	if (pd.pd_ctx_hndl.length != 0 || pd.pd_ctx != 0)
4cb8ad
-		authgss_free_private_data(&pd;;
4cb8ad
-#endif
4cb8ad
-	if (auth)
4cb8ad
-		AUTH_DESTROY(auth);
4cb8ad
-	if (rpc_clnt)
4cb8ad
-		clnt_destroy(rpc_clnt);
4cb8ad
+	pthread_cleanup_pop(1);
4cb8ad
 
4cb8ad
 	return;
4cb8ad
 
4cb8ad
 out_return_error:
4cb8ad
+	pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
4cb8ad
+	pthread_testcancel();
4cb8ad
+
4cb8ad
 	do_error_downcall(fd, uid, downcall_err);
4cb8ad
 	goto out;
4cb8ad
 }
4cb8ad
@@ -782,36 +840,69 @@ void free_upcall_info(struct clnt_upcall
4cb8ad
 }
4cb8ad
 
4cb8ad
 static void
4cb8ad
-gssd_work_thread_fn(struct clnt_upcall_info *info)
4cb8ad
+cleanup_clnt_upcall_info(void *arg)
4cb8ad
 {
4cb8ad
-	process_krb5_upcall(info->clp, info->uid, info->fd, info->srchost, info->target, info->service);
4cb8ad
+	struct clnt_upcall_info *info = (struct clnt_upcall_info *)arg;
4cb8ad
+
4cb8ad
 	free_upcall_info(info);
4cb8ad
 }
4cb8ad
 
4cb8ad
+static void
4cb8ad
+gssd_work_thread_fn(struct clnt_upcall_info *info)
4cb8ad
+{
4cb8ad
+	pthread_cleanup_push(cleanup_clnt_upcall_info, info);
4cb8ad
+	process_krb5_upcall(info);
4cb8ad
+	pthread_cleanup_pop(1);
4cb8ad
+}
4cb8ad
+
4cb8ad
+static struct upcall_thread_info *
4cb8ad
+alloc_upcall_thread_info(void)
4cb8ad
+{
4cb8ad
+	struct upcall_thread_info *info;
4cb8ad
+
4cb8ad
+	info = malloc(sizeof(struct upcall_thread_info));
4cb8ad
+	if (info == NULL)
4cb8ad
+		return NULL;
4cb8ad
+	memset(info, 0, sizeof(*info));
4cb8ad
+	return info;
4cb8ad
+}
4cb8ad
+
4cb8ad
 static int
4cb8ad
-start_upcall_thread(void (*func)(struct clnt_upcall_info *), void *info)
4cb8ad
+start_upcall_thread(void (*func)(struct clnt_upcall_info *), struct clnt_upcall_info *info)
4cb8ad
 {
4cb8ad
 	pthread_attr_t attr;
4cb8ad
 	pthread_t th;
4cb8ad
+	struct upcall_thread_info *tinfo;
4cb8ad
 	int ret;
4cb8ad
 
4cb8ad
+	tinfo = alloc_upcall_thread_info();
4cb8ad
+	if (!tinfo)
4cb8ad
+		return -ENOMEM;
4cb8ad
+	tinfo->fd = info->fd;
4cb8ad
+	tinfo->uid = info->uid;
4cb8ad
+
4cb8ad
 	ret = pthread_attr_init(&attr);
4cb8ad
 	if (ret != 0) {
4cb8ad
 		printerr(0, "ERROR: failed to init pthread attr: ret %d: %s\n",
4cb8ad
 			 ret, strerror(errno));
4cb8ad
-		return ret;
4cb8ad
-	}
4cb8ad
-	ret = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
4cb8ad
-	if (ret != 0) {
4cb8ad
-		printerr(0, "ERROR: failed to create pthread attr: ret %d: "
4cb8ad
-			 "%s\n", ret, strerror(errno));
4cb8ad
+		free(tinfo);
4cb8ad
 		return ret;
4cb8ad
 	}
4cb8ad
 
4cb8ad
 	ret = pthread_create(&th, &attr, (void *)func, (void *)info);
4cb8ad
-	if (ret != 0)
4cb8ad
+	if (ret != 0) {
4cb8ad
 		printerr(0, "ERROR: pthread_create failed: ret %d: %s\n",
4cb8ad
 			 ret, strerror(errno));
4cb8ad
+		free(tinfo);
4cb8ad
+		return ret;
4cb8ad
+	}
4cb8ad
+	tinfo->tid = th;
4cb8ad
+	pthread_mutex_lock(&active_thread_list_lock);
4cb8ad
+	clock_gettime(CLOCK_MONOTONIC, &tinfo->timeout);
4cb8ad
+	tinfo->timeout.tv_sec += upcall_timeout;
4cb8ad
+	TAILQ_INSERT_TAIL(&active_thread_list, tinfo, list);
4cb8ad
+	pthread_mutex_unlock(&active_thread_list_lock);
4cb8ad
+
4cb8ad
 	return ret;
4cb8ad
 }
4cb8ad