Blame SOURCES/nfs-utils-2.3.3-gssd-timeout-thread.patch

8f2508
diff -up nfs-utils-2.3.3/nfs.conf.orig nfs-utils-2.3.3/nfs.conf
8f2508
--- nfs-utils-2.3.3/nfs.conf.orig	2021-07-19 09:45:40.441448059 -0400
8f2508
+++ nfs-utils-2.3.3/nfs.conf	2021-07-19 12:08:55.314182838 -0400
8f2508
@@ -22,6 +22,8 @@ use-gss-proxy=1
8f2508
 # cred-cache-directory=
8f2508
 # preferred-realm=
8f2508
 # set-home=1
8f2508
+# upcall-timeout=30
8f2508
+# cancel-timed-out-upcalls=0
8f2508
 #
8f2508
 [lockd]
8f2508
 # port=0
8f2508
diff -up nfs-utils-2.3.3/utils/gssd/gssd.c.orig nfs-utils-2.3.3/utils/gssd/gssd.c
8f2508
--- nfs-utils-2.3.3/utils/gssd/gssd.c.orig	2021-07-19 09:45:40.448448246 -0400
8f2508
+++ nfs-utils-2.3.3/utils/gssd/gssd.c	2021-07-19 12:08:55.315182865 -0400
8f2508
@@ -96,8 +96,29 @@ pthread_mutex_t clp_lock = PTHREAD_MUTEX
8f2508
 static bool signal_received = false;
8f2508
 static struct event_base *evbase = NULL;
8f2508
 
8f2508
+int upcall_timeout = DEF_UPCALL_TIMEOUT;
8f2508
+static bool cancel_timed_out_upcalls = false;
8f2508
+
8f2508
 TAILQ_HEAD(topdir_list_head, topdir) topdir_list;
8f2508
 
8f2508
+/*
8f2508
+ * active_thread_list:
8f2508
+ *
8f2508
+ * 	used to track upcalls for timeout purposes.
8f2508
+ *
8f2508
+ * 	protected by the active_thread_list_lock mutex.
8f2508
+ *
8f2508
+ * 	upcall_thread_info structures are added to the tail of the list
8f2508
+ * 	by start_upcall_thread(), so entries closer to the head of the list
8f2508
+ * 	will be closer to hitting the upcall timeout.
8f2508
+ *
8f2508
+ * 	upcall_thread_info structures are removed from the list upon a
8f2508
+ * 	sucessful join of the upcall thread by the watchdog thread (via
8f2508
+ * 	scan_active_thread_list().
8f2508
+ */
8f2508
+TAILQ_HEAD(active_thread_list_head, upcall_thread_info) active_thread_list;
8f2508
+pthread_mutex_t active_thread_list_lock = PTHREAD_MUTEX_INITIALIZER;
8f2508
+
8f2508
 struct topdir {
8f2508
 	TAILQ_ENTRY(topdir) list;
8f2508
 	TAILQ_HEAD(clnt_list_head, clnt_info) clnt_list;
8f2508
@@ -436,6 +457,138 @@ gssd_clnt_krb5_cb(int UNUSED(fd), short
8f2508
 	handle_krb5_upcall(clp);
8f2508
 }
8f2508
 
8f2508
+/*
8f2508
+ * scan_active_thread_list:
8f2508
+ *
8f2508
+ * Walks the active_thread_list, trying to join as many upcall threads as
8f2508
+ * possible.  For threads that have terminated, the corresponding
8f2508
+ * upcall_thread_info will be removed from the list and freed.  Threads that
8f2508
+ * are still busy and have exceeded the upcall_timeout will cause an error to
8f2508
+ * be logged and may be canceled (depending on the value of
8f2508
+ * cancel_timed_out_upcalls).
8f2508
+ *
8f2508
+ * Returns the number of seconds that the watchdog thread should wait before
8f2508
+ * calling scan_active_thread_list() again.
8f2508
+ */
8f2508
+static int
8f2508
+scan_active_thread_list(void)
8f2508
+{
8f2508
+	struct upcall_thread_info *info;
8f2508
+	struct timespec now;
8f2508
+	unsigned int sleeptime;
8f2508
+	bool sleeptime_set = false;
8f2508
+	int err;
8f2508
+	void *tret, *saveprev;
8f2508
+
8f2508
+	sleeptime = upcall_timeout;
8f2508
+	pthread_mutex_lock(&active_thread_list_lock);
8f2508
+	clock_gettime(CLOCK_MONOTONIC, &now;;
8f2508
+	TAILQ_FOREACH(info, &active_thread_list, list) {
8f2508
+		err = pthread_tryjoin_np(info->tid, &tret);
8f2508
+		switch (err) {
8f2508
+		case 0:
8f2508
+			/*
8f2508
+			 * The upcall thread has either completed successfully, or
8f2508
+			 * has been canceled _and_ has acted on the cancellation request
8f2508
+			 * (i.e. has hit a cancellation point).  We can now remove the
8f2508
+			 * upcall_thread_info from the list and free it.
8f2508
+			 */
8f2508
+			if (tret == PTHREAD_CANCELED)
8f2508
+				printerr(3, "watchdog: thread id 0x%lx cancelled successfully\n",
8f2508
+						info->tid);
8f2508
+			saveprev = info->list.tqe_prev;
8f2508
+			TAILQ_REMOVE(&active_thread_list, info, list);
8f2508
+			free(info);
8f2508
+			info = saveprev;
8f2508
+			break;
8f2508
+		case EBUSY:
8f2508
+			/*
8f2508
+			 * The upcall thread is still running.  If the timeout has expired
8f2508
+			 * then we either cancel the thread, log an error, and do an error
8f2508
+			 * downcall to the kernel (cancel_timed_out_upcalls=true) or simply
8f2508
+			 * log an error (cancel_timed_out_upcalls=false).  In either case,
8f2508
+			 * the error is logged only once.
8f2508
+			 */
8f2508
+			if (now.tv_sec >= info->timeout.tv_sec) {
8f2508
+				if (cancel_timed_out_upcalls && !(info->flags & UPCALL_THREAD_CANCELED)) {
8f2508
+					printerr(0, "watchdog: thread id 0x%lx timed out\n",
8f2508
+							info->tid);
8f2508
+					pthread_cancel(info->tid);
8f2508
+					info->flags |= (UPCALL_THREAD_CANCELED|UPCALL_THREAD_WARNED);
8f2508
+					do_error_downcall(info->fd, info->uid, -ETIMEDOUT);
8f2508
+				} else {
8f2508
+					if (!(info->flags & UPCALL_THREAD_WARNED)) {
8f2508
+						printerr(0, "watchdog: thread id 0x%lx running for %ld seconds\n",
8f2508
+								info->tid,
8f2508
+								now.tv_sec - info->timeout.tv_sec + upcall_timeout);
8f2508
+						info->flags |= UPCALL_THREAD_WARNED;
8f2508
+					}
8f2508
+				}
8f2508
+			} else if (!sleeptime_set) {
8f2508
+			/*
8f2508
+			 * The upcall thread is still running, but the timeout has not yet
8f2508
+			 * expired.  Calculate the time remaining until the timeout will
8f2508
+			 * expire.  This is the amount of time the watchdog thread will
8f2508
+			 * wait before running again.  We only need to do this for the busy
8f2508
+			 * thread closest to the head of the list - entries appearing later
8f2508
+			 * in the list will time out later.
8f2508
+			 */
8f2508
+				sleeptime = info->timeout.tv_sec - now.tv_sec;
8f2508
+				sleeptime_set = true;
8f2508
+			}
8f2508
+			break;
8f2508
+		default:
8f2508
+			/* EDEADLK, EINVAL, and ESRCH... none of which should happen! */
8f2508
+			printerr(0, "watchdog: attempt to join thread id 0x%lx returned %d (%s)!\n",
8f2508
+					info->tid, err, strerror(err));
8f2508
+			break;
8f2508
+		}
8f2508
+	}
8f2508
+	pthread_mutex_unlock(&active_thread_list_lock);
8f2508
+
8f2508
+	return sleeptime;
8f2508
+}
8f2508
+
8f2508
+static void *
8f2508
+watchdog_thread_fn(void *UNUSED(arg))
8f2508
+{
8f2508
+	unsigned int sleeptime;
8f2508
+
8f2508
+	for (;;) {
8f2508
+		sleeptime = scan_active_thread_list();
8f2508
+		printerr(4, "watchdog: sleeping %u secs\n", sleeptime);
8f2508
+		sleep(sleeptime);
8f2508
+	}
8f2508
+	return (void *)0;
8f2508
+}
8f2508
+
8f2508
+static int
8f2508
+start_watchdog_thread(void)
8f2508
+{
8f2508
+	pthread_attr_t attr;
8f2508
+	pthread_t th;
8f2508
+	int ret;
8f2508
+
8f2508
+	ret = pthread_attr_init(&attr);
8f2508
+	if (ret != 0) {
8f2508
+		printerr(0, "ERROR: failed to init pthread attr: ret %d: %s\n",
8f2508
+			 ret, strerror(errno));
8f2508
+		return ret;
8f2508
+	}
8f2508
+	ret = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
8f2508
+	if (ret != 0) {
8f2508
+		printerr(0, "ERROR: failed to create pthread attr: ret %d: %s\n",
8f2508
+			 ret, strerror(errno));
8f2508
+		return ret;
8f2508
+	}
8f2508
+	ret = pthread_create(&th, &attr, watchdog_thread_fn, NULL);
8f2508
+	if (ret != 0) {
8f2508
+		printerr(0, "ERROR: pthread_create failed: ret %d: %s\n",
8f2508
+			 ret, strerror(errno));
8f2508
+	}
8f2508
+	return ret;
8f2508
+}
8f2508
+
8f2508
 static struct clnt_info *
8f2508
 gssd_get_clnt(struct topdir *tdi, const char *name)
8f2508
 {
8f2508
@@ -810,7 +963,7 @@ sig_die(int signal)
8f2508
 static void
8f2508
 usage(char *progname)
8f2508
 {
8f2508
-	fprintf(stderr, "usage: %s [-f] [-l] [-M] [-n] [-v] [-r] [-p pipefsdir] [-k keytab] [-d ccachedir] [-t timeout] [-R preferred realm] [-D] [-H]\n",
8f2508
+	fprintf(stderr, "usage: %s [-f] [-l] [-M] [-n] [-v] [-r] [-p pipefsdir] [-k keytab] [-d ccachedir] [-t timeout] [-R preferred realm] [-D] [-H] [-U upcall timeout] [-C]\n",
8f2508
 		progname);
8f2508
 	exit(1);
8f2508
 }
8f2508
@@ -831,6 +984,9 @@ read_gss_conf(void)
8f2508
 #endif
8f2508
 	context_timeout = conf_get_num("gssd", "context-timeout", context_timeout);
8f2508
 	rpc_timeout = conf_get_num("gssd", "rpc-timeout", rpc_timeout);
8f2508
+	upcall_timeout = conf_get_num("gssd", "upcall-timeout", upcall_timeout);
8f2508
+	cancel_timed_out_upcalls = conf_get_bool("gssd", "cancel-timed-out-upcalls",
8f2508
+						cancel_timed_out_upcalls);
8f2508
 	s = conf_get_str("gssd", "pipefs-directory");
8f2508
 	if (!s)
8f2508
 		s = conf_get_str("general", "pipefs-directory");
8f2508
@@ -872,7 +1028,7 @@ main(int argc, char *argv[])
8f2508
 	verbosity = conf_get_num("gssd", "verbosity", verbosity);
8f2508
 	rpc_verbosity = conf_get_num("gssd", "rpc-verbosity", rpc_verbosity);
8f2508
 
8f2508
-	while ((opt = getopt(argc, argv, "HDfvrlmnMp:k:d:t:T:R:")) != -1) {
8f2508
+	while ((opt = getopt(argc, argv, "HDfvrlmnMp:k:d:t:T:R:U:C")) != -1) {
8f2508
 		switch (opt) {
8f2508
 			case 'f':
8f2508
 				fg = 1;
8f2508
@@ -923,6 +1079,12 @@ main(int argc, char *argv[])
8f2508
 			case 'H':
8f2508
 				set_home = false;
8f2508
 				break;
8f2508
+			case 'U':
8f2508
+				upcall_timeout = atoi(optarg);
8f2508
+				break;
8f2508
+			case 'C':
8f2508
+				cancel_timed_out_upcalls = true;
8f2508
+				break;
8f2508
 			default:
8f2508
 				usage(argv[0]);
8f2508
 				break;
8f2508
@@ -995,6 +1157,11 @@ main(int argc, char *argv[])
8f2508
 	else
8f2508
 		progname = argv[0];
8f2508
 
8f2508
+	if (upcall_timeout > MAX_UPCALL_TIMEOUT)
8f2508
+		upcall_timeout = MAX_UPCALL_TIMEOUT;
8f2508
+	else if (upcall_timeout < MIN_UPCALL_TIMEOUT)
8f2508
+		upcall_timeout = MIN_UPCALL_TIMEOUT;
8f2508
+
8f2508
 	initerr(progname, verbosity, fg);
8f2508
 #ifdef HAVE_LIBTIRPC_SET_DEBUG
8f2508
 	/*
8f2508
@@ -1045,6 +1212,14 @@ main(int argc, char *argv[])
8f2508
 			       gssd_inotify_cb, NULL);
8f2508
 	event_add(inotify_ev, NULL);
8f2508
 
8f2508
+	TAILQ_INIT(&active_thread_list);
8f2508
+
8f2508
+	rc = start_watchdog_thread();
8f2508
+	if (rc != 0) {
8f2508
+		printerr(0, "ERROR: failed to start watchdog thread: %d\n", rc);
8f2508
+		exit(EXIT_FAILURE);
8f2508
+	}
8f2508
+
8f2508
 	TAILQ_INIT(&topdir_list);
8f2508
 	gssd_scan();
8f2508
 	daemon_ready();
8f2508
diff -up nfs-utils-2.3.3/utils/gssd/gssd.h.orig nfs-utils-2.3.3/utils/gssd/gssd.h
8f2508
--- nfs-utils-2.3.3/utils/gssd/gssd.h.orig	2021-07-19 09:45:40.449448272 -0400
8f2508
+++ nfs-utils-2.3.3/utils/gssd/gssd.h	2021-07-19 12:08:55.315182865 -0400
8f2508
@@ -50,6 +50,12 @@
8f2508
 #define GSSD_DEFAULT_KEYTAB_FILE		"/etc/krb5.keytab"
8f2508
 #define GSSD_SERVICE_NAME			"nfs"
8f2508
 #define RPC_CHAN_BUF_SIZE			32768
8f2508
+
8f2508
+/* timeouts are in seconds */
8f2508
+#define MIN_UPCALL_TIMEOUT			5
8f2508
+#define DEF_UPCALL_TIMEOUT			30
8f2508
+#define MAX_UPCALL_TIMEOUT			600
8f2508
+
8f2508
 /*
8f2508
  * The gss mechanisms that we can handle
8f2508
  */
8f2508
@@ -91,10 +97,22 @@ struct clnt_upcall_info {
8f2508
 	char			*service;
8f2508
 };
8f2508
 
8f2508
+struct upcall_thread_info {
8f2508
+	TAILQ_ENTRY(upcall_thread_info) list;
8f2508
+	pthread_t		tid;
8f2508
+	struct timespec		timeout;
8f2508
+	uid_t			uid;
8f2508
+	int			fd;
8f2508
+	unsigned short		flags;
8f2508
+#define UPCALL_THREAD_CANCELED	0x0001
8f2508
+#define UPCALL_THREAD_WARNED	0x0002
8f2508
+};
8f2508
+
8f2508
 void handle_krb5_upcall(struct clnt_info *clp);
8f2508
 void handle_gssd_upcall(struct clnt_info *clp);
8f2508
 void free_upcall_info(struct clnt_upcall_info *info);
8f2508
 void gssd_free_client(struct clnt_info *clp);
8f2508
+int do_error_downcall(int k5_fd, uid_t uid, int err);
8f2508
 
8f2508
 
8f2508
 #endif /* _RPC_GSSD_H_ */
8f2508
diff -up nfs-utils-2.3.3/utils/gssd/gssd.man.orig nfs-utils-2.3.3/utils/gssd/gssd.man
8f2508
--- nfs-utils-2.3.3/utils/gssd/gssd.man.orig	2021-07-19 09:45:40.443448112 -0400
8f2508
+++ nfs-utils-2.3.3/utils/gssd/gssd.man	2021-07-19 12:08:55.315182865 -0400
8f2508
@@ -8,7 +8,7 @@
8f2508
 rpc.gssd \- RPCSEC_GSS daemon
8f2508
 .SH SYNOPSIS
8f2508
 .B rpc.gssd
8f2508
-.RB [ \-DfMnlvrH ]
8f2508
+.RB [ \-DfMnlvrHC ]
8f2508
 .RB [ \-k
8f2508
 .IR keytab ]
8f2508
 .RB [ \-p
8f2508
@@ -17,6 +17,10 @@ rpc.gssd \- RPCSEC_GSS daemon
8f2508
 .IR ccachedir ]
8f2508
 .RB [ \-t
8f2508
 .IR timeout ]
8f2508
+.RB [ \-T
8f2508
+.IR timeout ]
8f2508
+.RB [ \-U
8f2508
+.IR timeout ]
8f2508
 .RB [ \-R
8f2508
 .IR realm ]
8f2508
 .SH INTRODUCTION
8f2508
@@ -290,7 +294,7 @@ seconds, which allows changing Kerberos
8f2508
 The default is no explicit timeout, which means the kernel context will live
8f2508
 the lifetime of the Kerberos service ticket used in its creation.
8f2508
 .TP
8f2508
-.B -T timeout
8f2508
+.BI "-T " timeout
8f2508
 Timeout, in seconds, to create an RPC connection with a server while
8f2508
 establishing an authenticated gss context for a user.
8f2508
 The default timeout is set to 5 seconds.
8f2508
@@ -298,6 +302,18 @@ If you get messages like "WARNING: can't
8f2508
 %servername% for user with uid %uid%: RPC: Remote system error -
8f2508
 Connection timed out", you should consider an increase of this timeout.
8f2508
 .TP
8f2508
+.BI "-U " timeout
8f2508
+Timeout, in seconds, for upcall threads.  Threads executing longer than
8f2508
+.I timeout
8f2508
+seconds will cause an error message to be logged.  The default
8f2508
+.I timeout
8f2508
+is 30 seconds.  The minimum is 5 seconds.  The maximum is 600 seconds.
8f2508
+.TP
8f2508
+.B -C
8f2508
+In addition to logging an error message for threads that have timed out,
8f2508
+the thread will be canceled and an error of -ETIMEDOUT will be reported
8f2508
+to the kernel.
8f2508
+.TP
8f2508
 .B -H
8f2508
 Avoids setting $HOME to "/". This allows rpc.gssd to read per user k5identity
8f2508
 files versus trying to read /.k5identity for each user.
8f2508
@@ -365,6 +381,17 @@ Equivalent to
8f2508
 Equivalent to
8f2508
 .BR -R .
8f2508
 .TP
8f2508
+.B upcall-timeout
8f2508
+Equivalent to
8f2508
+.BR -U .
8f2508
+.TP
8f2508
+.B cancel-timed-out-upcalls
8f2508
+Setting to
8f2508
+.B true
8f2508
+is equivalent to providing the
8f2508
+.B -C
8f2508
+flag.
8f2508
+.TP
8f2508
 .B set-home
8f2508
 Setting to
8f2508
 .B false
8f2508
diff -up nfs-utils-2.3.3/utils/gssd/gssd_proc.c.orig nfs-utils-2.3.3/utils/gssd/gssd_proc.c
8f2508
--- nfs-utils-2.3.3/utils/gssd/gssd_proc.c.orig	2021-07-19 09:45:40.449448272 -0400
8f2508
+++ nfs-utils-2.3.3/utils/gssd/gssd_proc.c	2021-07-19 12:08:55.316182891 -0400
8f2508
@@ -81,11 +81,24 @@
8f2508
 #include "gss_names.h"
8f2508
 
8f2508
 extern pthread_mutex_t clp_lock;
8f2508
+extern pthread_mutex_t active_thread_list_lock;
8f2508
+extern int upcall_timeout;
8f2508
+extern TAILQ_HEAD(active_thread_list_head, upcall_thread_info) active_thread_list;
8f2508
 
8f2508
 /* Encryption types supported by the kernel rpcsec_gss code */
8f2508
 int num_krb5_enctypes = 0;
8f2508
 krb5_enctype *krb5_enctypes = NULL;
8f2508
 
8f2508
+/* Args for the cleanup_handler() */
8f2508
+struct cleanup_args  {
8f2508
+	OM_uint32 	*min_stat;
8f2508
+	gss_buffer_t	acceptor;
8f2508
+	gss_buffer_t	token;
8f2508
+	struct authgss_private_data *pd;
8f2508
+	AUTH		**auth;
8f2508
+	CLIENT		**rpc_clnt;
8f2508
+};
8f2508
+
8f2508
 /*
8f2508
  * Parse the supported encryption type information
8f2508
  */
8f2508
@@ -184,7 +197,7 @@ out_err:
8f2508
 	return;
8f2508
 }
8f2508
 
8f2508
-static int
8f2508
+int
8f2508
 do_error_downcall(int k5_fd, uid_t uid, int err)
8f2508
 {
8f2508
 	char	buf[1024];
8f2508
@@ -604,27 +617,66 @@ out:
8f2508
 }
8f2508
 
8f2508
 /*
8f2508
+ * cleanup_handler:
8f2508
+ *
8f2508
+ * Free any resources allocated by process_krb5_upcall().
8f2508
+ *
8f2508
+ * Runs upon normal termination of process_krb5_upcall as well as if the
8f2508
+ * thread is canceled.
8f2508
+ */
8f2508
+static void
8f2508
+cleanup_handler(void *arg)
8f2508
+{
8f2508
+	struct cleanup_args *args = (struct cleanup_args *)arg;
8f2508
+
8f2508
+	gss_release_buffer(args->min_stat, args->acceptor);
8f2508
+	if (args->token->value)
8f2508
+		free(args->token->value);
8f2508
+#ifdef HAVE_AUTHGSS_FREE_PRIVATE_DATA
8f2508
+	if (args->pd->pd_ctx_hndl.length != 0 || args->pd->pd_ctx != 0)
8f2508
+		authgss_free_private_data(args->pd);
8f2508
+#endif
8f2508
+	if (*args->auth)
8f2508
+		AUTH_DESTROY(*args->auth);
8f2508
+	if (*args->rpc_clnt)
8f2508
+		clnt_destroy(*args->rpc_clnt);
8f2508
+}
8f2508
+
8f2508
+/*
8f2508
+ * process_krb5_upcall:
8f2508
+ *
8f2508
  * this code uses the userland rpcsec gss library to create a krb5
8f2508
  * context on behalf of the kernel
8f2508
+ *
8f2508
+ * This is the meat of the upcall thread.  Note that cancelability is disabled
8f2508
+ * and enabled at various points to ensure that any resources reserved by the
8f2508
+ * lower level libraries are released safely.
8f2508
  */
8f2508
 static void
8f2508
-process_krb5_upcall(struct clnt_info *clp, uid_t uid, int fd, char *srchost,
8f2508
-		    char *tgtname, char *service)
8f2508
+process_krb5_upcall(struct clnt_upcall_info *info)
8f2508
 {
8f2508
+	struct clnt_info	*clp = info->clp;
8f2508
+	uid_t			uid = info->uid;
8f2508
+	int			fd = info->fd;
8f2508
+	char			*srchost = info->srchost;
8f2508
+	char			*tgtname = info->target;
8f2508
+	char			*service = info->service;
8f2508
 	CLIENT			*rpc_clnt = NULL;
8f2508
 	AUTH			*auth = NULL;
8f2508
 	struct authgss_private_data pd;
8f2508
 	gss_buffer_desc		token;
8f2508
-	int			err, downcall_err = -EACCES;
8f2508
+	int			err, downcall_err;
8f2508
 	OM_uint32		maj_stat, min_stat, lifetime_rec;
8f2508
 	gss_name_t		gacceptor = GSS_C_NO_NAME;
8f2508
 	gss_OID			mech;
8f2508
 	gss_buffer_desc		acceptor  = {0};
8f2508
+	struct cleanup_args cleanup_args = {&min_stat, &acceptor, &token, &pd, &auth, &rpc_clnt};
8f2508
 
8f2508
 	token.length = 0;
8f2508
 	token.value = NULL;
8f2508
 	memset(&pd, 0, sizeof(struct authgss_private_data));
8f2508
 
8f2508
+	pthread_cleanup_push(cleanup_handler, &cleanup_args);
8f2508
 	/*
8f2508
 	 * If "service" is specified, then the kernel is indicating that
8f2508
 	 * we must use machine credentials for this request.  (Regardless
8f2508
@@ -646,6 +698,8 @@ process_krb5_upcall(struct clnt_info *cl
8f2508
 	 * used for this case is not important.
8f2508
 	 *
8f2508
 	 */
8f2508
+	downcall_err = -EACCES;
8f2508
+	pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
8f2508
 	if (uid != 0 || (uid == 0 && root_uses_machine_creds == 0 &&
8f2508
 				service == NULL)) {
8f2508
 
8f2508
@@ -666,15 +720,21 @@ process_krb5_upcall(struct clnt_info *cl
8f2508
 			goto out_return_error;
8f2508
 		}
8f2508
 	}
8f2508
+	pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
8f2508
+	pthread_testcancel();
8f2508
 
8f2508
+	pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
8f2508
 	if (!authgss_get_private_data(auth, &pd)) {
8f2508
 		printerr(1, "WARNING: Failed to obtain authentication "
8f2508
 			    "data for user with uid %d for server %s\n",
8f2508
 			 uid, clp->servername);
8f2508
 		goto out_return_error;
8f2508
 	}
8f2508
+	pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
8f2508
+	pthread_testcancel();
8f2508
 
8f2508
 	/* Grab the context lifetime and acceptor name out of the ctx. */
8f2508
+	pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
8f2508
 	maj_stat = gss_inquire_context(&min_stat, pd.pd_ctx, NULL, &gacceptor,
8f2508
 				       &lifetime_rec, &mech, NULL, NULL, NULL);
8f2508
 
8f2508
@@ -686,37 +746,35 @@ process_krb5_upcall(struct clnt_info *cl
8f2508
 		get_hostbased_client_buffer(gacceptor, mech, &acceptor);
8f2508
 		gss_release_name(&min_stat, &gacceptor);
8f2508
 	}
8f2508
+	pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
8f2508
+	pthread_testcancel();
8f2508
 
8f2508
 	/*
8f2508
 	 * The serialization can mean turning pd.pd_ctx into a lucid context. If
8f2508
 	 * that happens then the pd.pd_ctx will be unusable, so we must never
8f2508
 	 * try to use it after this point.
8f2508
 	 */
8f2508
+	pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
8f2508
 	if (serialize_context_for_kernel(&pd.pd_ctx, &token, &krb5oid, NULL)) {
8f2508
 		printerr(1, "WARNING: Failed to serialize krb5 context for "
8f2508
 			    "user with uid %d for server %s\n",
8f2508
 			 uid, clp->servername);
8f2508
 		goto out_return_error;
8f2508
 	}
8f2508
+	pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
8f2508
+	pthread_testcancel();
8f2508
 
8f2508
 	do_downcall(fd, uid, &pd, &token, lifetime_rec, &acceptor);
8f2508
 
8f2508
 out:
8f2508
-	gss_release_buffer(&min_stat, &acceptor);
8f2508
-	if (token.value)
8f2508
-		free(token.value);
8f2508
-#ifdef HAVE_AUTHGSS_FREE_PRIVATE_DATA
8f2508
-	if (pd.pd_ctx_hndl.length != 0 || pd.pd_ctx != 0)
8f2508
-		authgss_free_private_data(&pd;;
8f2508
-#endif
8f2508
-	if (auth)
8f2508
-		AUTH_DESTROY(auth);
8f2508
-	if (rpc_clnt)
8f2508
-		clnt_destroy(rpc_clnt);
8f2508
+	pthread_cleanup_pop(1);
8f2508
 
8f2508
 	return;
8f2508
 
8f2508
 out_return_error:
8f2508
+	pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
8f2508
+	pthread_testcancel();
8f2508
+
8f2508
 	do_error_downcall(fd, uid, downcall_err);
8f2508
 	goto out;
8f2508
 }
8f2508
@@ -782,36 +840,69 @@ void free_upcall_info(struct clnt_upcall
8f2508
 }
8f2508
 
8f2508
 static void
8f2508
-gssd_work_thread_fn(struct clnt_upcall_info *info)
8f2508
+cleanup_clnt_upcall_info(void *arg)
8f2508
 {
8f2508
-	process_krb5_upcall(info->clp, info->uid, info->fd, info->srchost, info->target, info->service);
8f2508
+	struct clnt_upcall_info *info = (struct clnt_upcall_info *)arg;
8f2508
+
8f2508
 	free_upcall_info(info);
8f2508
 }
8f2508
 
8f2508
+static void
8f2508
+gssd_work_thread_fn(struct clnt_upcall_info *info)
8f2508
+{
8f2508
+	pthread_cleanup_push(cleanup_clnt_upcall_info, info);
8f2508
+	process_krb5_upcall(info);
8f2508
+	pthread_cleanup_pop(1);
8f2508
+}
8f2508
+
8f2508
+static struct upcall_thread_info *
8f2508
+alloc_upcall_thread_info(void)
8f2508
+{
8f2508
+	struct upcall_thread_info *info;
8f2508
+
8f2508
+	info = malloc(sizeof(struct upcall_thread_info));
8f2508
+	if (info == NULL)
8f2508
+		return NULL;
8f2508
+	memset(info, 0, sizeof(*info));
8f2508
+	return info;
8f2508
+}
8f2508
+
8f2508
 static int
8f2508
-start_upcall_thread(void (*func)(struct clnt_upcall_info *), void *info)
8f2508
+start_upcall_thread(void (*func)(struct clnt_upcall_info *), struct clnt_upcall_info *info)
8f2508
 {
8f2508
 	pthread_attr_t attr;
8f2508
 	pthread_t th;
8f2508
+	struct upcall_thread_info *tinfo;
8f2508
 	int ret;
8f2508
 
8f2508
+	tinfo = alloc_upcall_thread_info();
8f2508
+	if (!tinfo)
8f2508
+		return -ENOMEM;
8f2508
+	tinfo->fd = info->fd;
8f2508
+	tinfo->uid = info->uid;
8f2508
+
8f2508
 	ret = pthread_attr_init(&attr);
8f2508
 	if (ret != 0) {
8f2508
 		printerr(0, "ERROR: failed to init pthread attr: ret %d: %s\n",
8f2508
 			 ret, strerror(errno));
8f2508
-		return ret;
8f2508
-	}
8f2508
-	ret = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
8f2508
-	if (ret != 0) {
8f2508
-		printerr(0, "ERROR: failed to create pthread attr: ret %d: "
8f2508
-			 "%s\n", ret, strerror(errno));
8f2508
+		free(tinfo);
8f2508
 		return ret;
8f2508
 	}
8f2508
 
8f2508
 	ret = pthread_create(&th, &attr, (void *)func, (void *)info);
8f2508
-	if (ret != 0)
8f2508
+	if (ret != 0) {
8f2508
 		printerr(0, "ERROR: pthread_create failed: ret %d: %s\n",
8f2508
 			 ret, strerror(errno));
8f2508
+		free(tinfo);
8f2508
+		return ret;
8f2508
+	}
8f2508
+	tinfo->tid = th;
8f2508
+	pthread_mutex_lock(&active_thread_list_lock);
8f2508
+	clock_gettime(CLOCK_MONOTONIC, &tinfo->timeout);
8f2508
+	tinfo->timeout.tv_sec += upcall_timeout;
8f2508
+	TAILQ_INSERT_TAIL(&active_thread_list, tinfo, list);
8f2508
+	pthread_mutex_unlock(&active_thread_list_lock);
8f2508
+
8f2508
 	return ret;
8f2508
 }
8f2508