Blame SOURCES/nfs-utils-2.3.3-gssd-timeout-thread.patch

07bda6
diff -up nfs-utils-2.3.3/nfs.conf.orig nfs-utils-2.3.3/nfs.conf
07bda6
--- nfs-utils-2.3.3/nfs.conf.orig	2021-07-19 09:45:40.441448059 -0400
07bda6
+++ nfs-utils-2.3.3/nfs.conf	2021-07-19 12:08:55.314182838 -0400
07bda6
@@ -22,6 +22,8 @@ use-gss-proxy=1
07bda6
 # cred-cache-directory=
07bda6
 # preferred-realm=
07bda6
 # set-home=1
07bda6
+# upcall-timeout=30
07bda6
+# cancel-timed-out-upcalls=0
07bda6
 #
07bda6
 [lockd]
07bda6
 # port=0
07bda6
diff -up nfs-utils-2.3.3/utils/gssd/gssd.c.orig nfs-utils-2.3.3/utils/gssd/gssd.c
07bda6
--- nfs-utils-2.3.3/utils/gssd/gssd.c.orig	2021-07-19 09:45:40.448448246 -0400
07bda6
+++ nfs-utils-2.3.3/utils/gssd/gssd.c	2021-07-19 12:08:55.315182865 -0400
07bda6
@@ -96,8 +96,29 @@ pthread_mutex_t clp_lock = PTHREAD_MUTEX
07bda6
 static bool signal_received = false;
07bda6
 static struct event_base *evbase = NULL;
07bda6
 
07bda6
+int upcall_timeout = DEF_UPCALL_TIMEOUT;
07bda6
+static bool cancel_timed_out_upcalls = false;
07bda6
+
07bda6
 TAILQ_HEAD(topdir_list_head, topdir) topdir_list;
07bda6
 
07bda6
+/*
07bda6
+ * active_thread_list:
07bda6
+ *
07bda6
+ * 	used to track upcalls for timeout purposes.
07bda6
+ *
07bda6
+ * 	protected by the active_thread_list_lock mutex.
07bda6
+ *
07bda6
+ * 	upcall_thread_info structures are added to the tail of the list
07bda6
+ * 	by start_upcall_thread(), so entries closer to the head of the list
07bda6
+ * 	will be closer to hitting the upcall timeout.
07bda6
+ *
07bda6
+ * 	upcall_thread_info structures are removed from the list upon a
07bda6
+ * 	sucessful join of the upcall thread by the watchdog thread (via
07bda6
+ * 	scan_active_thread_list().
07bda6
+ */
07bda6
+TAILQ_HEAD(active_thread_list_head, upcall_thread_info) active_thread_list;
07bda6
+pthread_mutex_t active_thread_list_lock = PTHREAD_MUTEX_INITIALIZER;
07bda6
+
07bda6
 struct topdir {
07bda6
 	TAILQ_ENTRY(topdir) list;
07bda6
 	TAILQ_HEAD(clnt_list_head, clnt_info) clnt_list;
07bda6
@@ -436,6 +457,138 @@ gssd_clnt_krb5_cb(int UNUSED(fd), short
07bda6
 	handle_krb5_upcall(clp);
07bda6
 }
07bda6
 
07bda6
+/*
07bda6
+ * scan_active_thread_list:
07bda6
+ *
07bda6
+ * Walks the active_thread_list, trying to join as many upcall threads as
07bda6
+ * possible.  For threads that have terminated, the corresponding
07bda6
+ * upcall_thread_info will be removed from the list and freed.  Threads that
07bda6
+ * are still busy and have exceeded the upcall_timeout will cause an error to
07bda6
+ * be logged and may be canceled (depending on the value of
07bda6
+ * cancel_timed_out_upcalls).
07bda6
+ *
07bda6
+ * Returns the number of seconds that the watchdog thread should wait before
07bda6
+ * calling scan_active_thread_list() again.
07bda6
+ */
07bda6
+static int
07bda6
+scan_active_thread_list(void)
07bda6
+{
07bda6
+	struct upcall_thread_info *info;
07bda6
+	struct timespec now;
07bda6
+	unsigned int sleeptime;
07bda6
+	bool sleeptime_set = false;
07bda6
+	int err;
07bda6
+	void *tret, *saveprev;
07bda6
+
07bda6
+	sleeptime = upcall_timeout;
07bda6
+	pthread_mutex_lock(&active_thread_list_lock);
07bda6
+	clock_gettime(CLOCK_MONOTONIC, &now;;
07bda6
+	TAILQ_FOREACH(info, &active_thread_list, list) {
07bda6
+		err = pthread_tryjoin_np(info->tid, &tret);
07bda6
+		switch (err) {
07bda6
+		case 0:
07bda6
+			/*
07bda6
+			 * The upcall thread has either completed successfully, or
07bda6
+			 * has been canceled _and_ has acted on the cancellation request
07bda6
+			 * (i.e. has hit a cancellation point).  We can now remove the
07bda6
+			 * upcall_thread_info from the list and free it.
07bda6
+			 */
07bda6
+			if (tret == PTHREAD_CANCELED)
07bda6
+				printerr(3, "watchdog: thread id 0x%lx cancelled successfully\n",
07bda6
+						info->tid);
07bda6
+			saveprev = info->list.tqe_prev;
07bda6
+			TAILQ_REMOVE(&active_thread_list, info, list);
07bda6
+			free(info);
07bda6
+			info = saveprev;
07bda6
+			break;
07bda6
+		case EBUSY:
07bda6
+			/*
07bda6
+			 * The upcall thread is still running.  If the timeout has expired
07bda6
+			 * then we either cancel the thread, log an error, and do an error
07bda6
+			 * downcall to the kernel (cancel_timed_out_upcalls=true) or simply
07bda6
+			 * log an error (cancel_timed_out_upcalls=false).  In either case,
07bda6
+			 * the error is logged only once.
07bda6
+			 */
07bda6
+			if (now.tv_sec >= info->timeout.tv_sec) {
07bda6
+				if (cancel_timed_out_upcalls && !(info->flags & UPCALL_THREAD_CANCELED)) {
07bda6
+					printerr(0, "watchdog: thread id 0x%lx timed out\n",
07bda6
+							info->tid);
07bda6
+					pthread_cancel(info->tid);
07bda6
+					info->flags |= (UPCALL_THREAD_CANCELED|UPCALL_THREAD_WARNED);
07bda6
+					do_error_downcall(info->fd, info->uid, -ETIMEDOUT);
07bda6
+				} else {
07bda6
+					if (!(info->flags & UPCALL_THREAD_WARNED)) {
07bda6
+						printerr(0, "watchdog: thread id 0x%lx running for %ld seconds\n",
07bda6
+								info->tid,
07bda6
+								now.tv_sec - info->timeout.tv_sec + upcall_timeout);
07bda6
+						info->flags |= UPCALL_THREAD_WARNED;
07bda6
+					}
07bda6
+				}
07bda6
+			} else if (!sleeptime_set) {
07bda6
+			/*
07bda6
+			 * The upcall thread is still running, but the timeout has not yet
07bda6
+			 * expired.  Calculate the time remaining until the timeout will
07bda6
+			 * expire.  This is the amount of time the watchdog thread will
07bda6
+			 * wait before running again.  We only need to do this for the busy
07bda6
+			 * thread closest to the head of the list - entries appearing later
07bda6
+			 * in the list will time out later.
07bda6
+			 */
07bda6
+				sleeptime = info->timeout.tv_sec - now.tv_sec;
07bda6
+				sleeptime_set = true;
07bda6
+			}
07bda6
+			break;
07bda6
+		default:
07bda6
+			/* EDEADLK, EINVAL, and ESRCH... none of which should happen! */
07bda6
+			printerr(0, "watchdog: attempt to join thread id 0x%lx returned %d (%s)!\n",
07bda6
+					info->tid, err, strerror(err));
07bda6
+			break;
07bda6
+		}
07bda6
+	}
07bda6
+	pthread_mutex_unlock(&active_thread_list_lock);
07bda6
+
07bda6
+	return sleeptime;
07bda6
+}
07bda6
+
07bda6
+static void *
07bda6
+watchdog_thread_fn(void *UNUSED(arg))
07bda6
+{
07bda6
+	unsigned int sleeptime;
07bda6
+
07bda6
+	for (;;) {
07bda6
+		sleeptime = scan_active_thread_list();
07bda6
+		printerr(4, "watchdog: sleeping %u secs\n", sleeptime);
07bda6
+		sleep(sleeptime);
07bda6
+	}
07bda6
+	return (void *)0;
07bda6
+}
07bda6
+
07bda6
+static int
07bda6
+start_watchdog_thread(void)
07bda6
+{
07bda6
+	pthread_attr_t attr;
07bda6
+	pthread_t th;
07bda6
+	int ret;
07bda6
+
07bda6
+	ret = pthread_attr_init(&attr);
07bda6
+	if (ret != 0) {
07bda6
+		printerr(0, "ERROR: failed to init pthread attr: ret %d: %s\n",
07bda6
+			 ret, strerror(errno));
07bda6
+		return ret;
07bda6
+	}
07bda6
+	ret = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
07bda6
+	if (ret != 0) {
07bda6
+		printerr(0, "ERROR: failed to create pthread attr: ret %d: %s\n",
07bda6
+			 ret, strerror(errno));
07bda6
+		return ret;
07bda6
+	}
07bda6
+	ret = pthread_create(&th, &attr, watchdog_thread_fn, NULL);
07bda6
+	if (ret != 0) {
07bda6
+		printerr(0, "ERROR: pthread_create failed: ret %d: %s\n",
07bda6
+			 ret, strerror(errno));
07bda6
+	}
07bda6
+	return ret;
07bda6
+}
07bda6
+
07bda6
 static struct clnt_info *
07bda6
 gssd_get_clnt(struct topdir *tdi, const char *name)
07bda6
 {
07bda6
@@ -810,7 +963,7 @@ sig_die(int signal)
07bda6
 static void
07bda6
 usage(char *progname)
07bda6
 {
07bda6
-	fprintf(stderr, "usage: %s [-f] [-l] [-M] [-n] [-v] [-r] [-p pipefsdir] [-k keytab] [-d ccachedir] [-t timeout] [-R preferred realm] [-D] [-H]\n",
07bda6
+	fprintf(stderr, "usage: %s [-f] [-l] [-M] [-n] [-v] [-r] [-p pipefsdir] [-k keytab] [-d ccachedir] [-t timeout] [-R preferred realm] [-D] [-H] [-U upcall timeout] [-C]\n",
07bda6
 		progname);
07bda6
 	exit(1);
07bda6
 }
07bda6
@@ -831,6 +984,9 @@ read_gss_conf(void)
07bda6
 #endif
07bda6
 	context_timeout = conf_get_num("gssd", "context-timeout", context_timeout);
07bda6
 	rpc_timeout = conf_get_num("gssd", "rpc-timeout", rpc_timeout);
07bda6
+	upcall_timeout = conf_get_num("gssd", "upcall-timeout", upcall_timeout);
07bda6
+	cancel_timed_out_upcalls = conf_get_bool("gssd", "cancel-timed-out-upcalls",
07bda6
+						cancel_timed_out_upcalls);
07bda6
 	s = conf_get_str("gssd", "pipefs-directory");
07bda6
 	if (!s)
07bda6
 		s = conf_get_str("general", "pipefs-directory");
07bda6
@@ -872,7 +1028,7 @@ main(int argc, char *argv[])
07bda6
 	verbosity = conf_get_num("gssd", "verbosity", verbosity);
07bda6
 	rpc_verbosity = conf_get_num("gssd", "rpc-verbosity", rpc_verbosity);
07bda6
 
07bda6
-	while ((opt = getopt(argc, argv, "HDfvrlmnMp:k:d:t:T:R:")) != -1) {
07bda6
+	while ((opt = getopt(argc, argv, "HDfvrlmnMp:k:d:t:T:R:U:C")) != -1) {
07bda6
 		switch (opt) {
07bda6
 			case 'f':
07bda6
 				fg = 1;
07bda6
@@ -923,6 +1079,12 @@ main(int argc, char *argv[])
07bda6
 			case 'H':
07bda6
 				set_home = false;
07bda6
 				break;
07bda6
+			case 'U':
07bda6
+				upcall_timeout = atoi(optarg);
07bda6
+				break;
07bda6
+			case 'C':
07bda6
+				cancel_timed_out_upcalls = true;
07bda6
+				break;
07bda6
 			default:
07bda6
 				usage(argv[0]);
07bda6
 				break;
07bda6
@@ -995,6 +1157,11 @@ main(int argc, char *argv[])
07bda6
 	else
07bda6
 		progname = argv[0];
07bda6
 
07bda6
+	if (upcall_timeout > MAX_UPCALL_TIMEOUT)
07bda6
+		upcall_timeout = MAX_UPCALL_TIMEOUT;
07bda6
+	else if (upcall_timeout < MIN_UPCALL_TIMEOUT)
07bda6
+		upcall_timeout = MIN_UPCALL_TIMEOUT;
07bda6
+
07bda6
 	initerr(progname, verbosity, fg);
07bda6
 #ifdef HAVE_LIBTIRPC_SET_DEBUG
07bda6
 	/*
07bda6
@@ -1045,6 +1212,14 @@ main(int argc, char *argv[])
07bda6
 			       gssd_inotify_cb, NULL);
07bda6
 	event_add(inotify_ev, NULL);
07bda6
 
07bda6
+	TAILQ_INIT(&active_thread_list);
07bda6
+
07bda6
+	rc = start_watchdog_thread();
07bda6
+	if (rc != 0) {
07bda6
+		printerr(0, "ERROR: failed to start watchdog thread: %d\n", rc);
07bda6
+		exit(EXIT_FAILURE);
07bda6
+	}
07bda6
+
07bda6
 	TAILQ_INIT(&topdir_list);
07bda6
 	gssd_scan();
07bda6
 	daemon_ready();
07bda6
diff -up nfs-utils-2.3.3/utils/gssd/gssd.h.orig nfs-utils-2.3.3/utils/gssd/gssd.h
07bda6
--- nfs-utils-2.3.3/utils/gssd/gssd.h.orig	2021-07-19 09:45:40.449448272 -0400
07bda6
+++ nfs-utils-2.3.3/utils/gssd/gssd.h	2021-07-19 12:08:55.315182865 -0400
07bda6
@@ -50,6 +50,12 @@
07bda6
 #define GSSD_DEFAULT_KEYTAB_FILE		"/etc/krb5.keytab"
07bda6
 #define GSSD_SERVICE_NAME			"nfs"
07bda6
 #define RPC_CHAN_BUF_SIZE			32768
07bda6
+
07bda6
+/* timeouts are in seconds */
07bda6
+#define MIN_UPCALL_TIMEOUT			5
07bda6
+#define DEF_UPCALL_TIMEOUT			30
07bda6
+#define MAX_UPCALL_TIMEOUT			600
07bda6
+
07bda6
 /*
07bda6
  * The gss mechanisms that we can handle
07bda6
  */
07bda6
@@ -91,10 +97,22 @@ struct clnt_upcall_info {
07bda6
 	char			*service;
07bda6
 };
07bda6
 
07bda6
+struct upcall_thread_info {
07bda6
+	TAILQ_ENTRY(upcall_thread_info) list;
07bda6
+	pthread_t		tid;
07bda6
+	struct timespec		timeout;
07bda6
+	uid_t			uid;
07bda6
+	int			fd;
07bda6
+	unsigned short		flags;
07bda6
+#define UPCALL_THREAD_CANCELED	0x0001
07bda6
+#define UPCALL_THREAD_WARNED	0x0002
07bda6
+};
07bda6
+
07bda6
 void handle_krb5_upcall(struct clnt_info *clp);
07bda6
 void handle_gssd_upcall(struct clnt_info *clp);
07bda6
 void free_upcall_info(struct clnt_upcall_info *info);
07bda6
 void gssd_free_client(struct clnt_info *clp);
07bda6
+int do_error_downcall(int k5_fd, uid_t uid, int err);
07bda6
 
07bda6
 
07bda6
 #endif /* _RPC_GSSD_H_ */
07bda6
diff -up nfs-utils-2.3.3/utils/gssd/gssd.man.orig nfs-utils-2.3.3/utils/gssd/gssd.man
07bda6
--- nfs-utils-2.3.3/utils/gssd/gssd.man.orig	2021-07-19 09:45:40.443448112 -0400
07bda6
+++ nfs-utils-2.3.3/utils/gssd/gssd.man	2021-07-19 12:08:55.315182865 -0400
07bda6
@@ -8,7 +8,7 @@
07bda6
 rpc.gssd \- RPCSEC_GSS daemon
07bda6
 .SH SYNOPSIS
07bda6
 .B rpc.gssd
07bda6
-.RB [ \-DfMnlvrH ]
07bda6
+.RB [ \-DfMnlvrHC ]
07bda6
 .RB [ \-k
07bda6
 .IR keytab ]
07bda6
 .RB [ \-p
07bda6
@@ -17,6 +17,10 @@ rpc.gssd \- RPCSEC_GSS daemon
07bda6
 .IR ccachedir ]
07bda6
 .RB [ \-t
07bda6
 .IR timeout ]
07bda6
+.RB [ \-T
07bda6
+.IR timeout ]
07bda6
+.RB [ \-U
07bda6
+.IR timeout ]
07bda6
 .RB [ \-R
07bda6
 .IR realm ]
07bda6
 .SH INTRODUCTION
07bda6
@@ -290,7 +294,7 @@ seconds, which allows changing Kerberos
07bda6
 The default is no explicit timeout, which means the kernel context will live
07bda6
 the lifetime of the Kerberos service ticket used in its creation.
07bda6
 .TP
07bda6
-.B -T timeout
07bda6
+.BI "-T " timeout
07bda6
 Timeout, in seconds, to create an RPC connection with a server while
07bda6
 establishing an authenticated gss context for a user.
07bda6
 The default timeout is set to 5 seconds.
07bda6
@@ -298,6 +302,18 @@ If you get messages like "WARNING: can't
07bda6
 %servername% for user with uid %uid%: RPC: Remote system error -
07bda6
 Connection timed out", you should consider an increase of this timeout.
07bda6
 .TP
07bda6
+.BI "-U " timeout
07bda6
+Timeout, in seconds, for upcall threads.  Threads executing longer than
07bda6
+.I timeout
07bda6
+seconds will cause an error message to be logged.  The default
07bda6
+.I timeout
07bda6
+is 30 seconds.  The minimum is 5 seconds.  The maximum is 600 seconds.
07bda6
+.TP
07bda6
+.B -C
07bda6
+In addition to logging an error message for threads that have timed out,
07bda6
+the thread will be canceled and an error of -ETIMEDOUT will be reported
07bda6
+to the kernel.
07bda6
+.TP
07bda6
 .B -H
07bda6
 Avoids setting $HOME to "/". This allows rpc.gssd to read per user k5identity
07bda6
 files versus trying to read /.k5identity for each user.
07bda6
@@ -365,6 +381,17 @@ Equivalent to
07bda6
 Equivalent to
07bda6
 .BR -R .
07bda6
 .TP
07bda6
+.B upcall-timeout
07bda6
+Equivalent to
07bda6
+.BR -U .
07bda6
+.TP
07bda6
+.B cancel-timed-out-upcalls
07bda6
+Setting to
07bda6
+.B true
07bda6
+is equivalent to providing the
07bda6
+.B -C
07bda6
+flag.
07bda6
+.TP
07bda6
 .B set-home
07bda6
 Setting to
07bda6
 .B false
07bda6
diff -up nfs-utils-2.3.3/utils/gssd/gssd_proc.c.orig nfs-utils-2.3.3/utils/gssd/gssd_proc.c
07bda6
--- nfs-utils-2.3.3/utils/gssd/gssd_proc.c.orig	2021-07-19 09:45:40.449448272 -0400
07bda6
+++ nfs-utils-2.3.3/utils/gssd/gssd_proc.c	2021-07-19 12:08:55.316182891 -0400
07bda6
@@ -81,11 +81,24 @@
07bda6
 #include "gss_names.h"
07bda6
 
07bda6
 extern pthread_mutex_t clp_lock;
07bda6
+extern pthread_mutex_t active_thread_list_lock;
07bda6
+extern int upcall_timeout;
07bda6
+extern TAILQ_HEAD(active_thread_list_head, upcall_thread_info) active_thread_list;
07bda6
 
07bda6
 /* Encryption types supported by the kernel rpcsec_gss code */
07bda6
 int num_krb5_enctypes = 0;
07bda6
 krb5_enctype *krb5_enctypes = NULL;
07bda6
 
07bda6
+/* Args for the cleanup_handler() */
07bda6
+struct cleanup_args  {
07bda6
+	OM_uint32 	*min_stat;
07bda6
+	gss_buffer_t	acceptor;
07bda6
+	gss_buffer_t	token;
07bda6
+	struct authgss_private_data *pd;
07bda6
+	AUTH		**auth;
07bda6
+	CLIENT		**rpc_clnt;
07bda6
+};
07bda6
+
07bda6
 /*
07bda6
  * Parse the supported encryption type information
07bda6
  */
07bda6
@@ -184,7 +197,7 @@ out_err:
07bda6
 	return;
07bda6
 }
07bda6
 
07bda6
-static int
07bda6
+int
07bda6
 do_error_downcall(int k5_fd, uid_t uid, int err)
07bda6
 {
07bda6
 	char	buf[1024];
07bda6
@@ -604,27 +617,66 @@ out:
07bda6
 }
07bda6
 
07bda6
 /*
07bda6
+ * cleanup_handler:
07bda6
+ *
07bda6
+ * Free any resources allocated by process_krb5_upcall().
07bda6
+ *
07bda6
+ * Runs upon normal termination of process_krb5_upcall as well as if the
07bda6
+ * thread is canceled.
07bda6
+ */
07bda6
+static void
07bda6
+cleanup_handler(void *arg)
07bda6
+{
07bda6
+	struct cleanup_args *args = (struct cleanup_args *)arg;
07bda6
+
07bda6
+	gss_release_buffer(args->min_stat, args->acceptor);
07bda6
+	if (args->token->value)
07bda6
+		free(args->token->value);
07bda6
+#ifdef HAVE_AUTHGSS_FREE_PRIVATE_DATA
07bda6
+	if (args->pd->pd_ctx_hndl.length != 0 || args->pd->pd_ctx != 0)
07bda6
+		authgss_free_private_data(args->pd);
07bda6
+#endif
07bda6
+	if (*args->auth)
07bda6
+		AUTH_DESTROY(*args->auth);
07bda6
+	if (*args->rpc_clnt)
07bda6
+		clnt_destroy(*args->rpc_clnt);
07bda6
+}
07bda6
+
07bda6
+/*
07bda6
+ * process_krb5_upcall:
07bda6
+ *
07bda6
  * this code uses the userland rpcsec gss library to create a krb5
07bda6
  * context on behalf of the kernel
07bda6
+ *
07bda6
+ * This is the meat of the upcall thread.  Note that cancelability is disabled
07bda6
+ * and enabled at various points to ensure that any resources reserved by the
07bda6
+ * lower level libraries are released safely.
07bda6
  */
07bda6
 static void
07bda6
-process_krb5_upcall(struct clnt_info *clp, uid_t uid, int fd, char *srchost,
07bda6
-		    char *tgtname, char *service)
07bda6
+process_krb5_upcall(struct clnt_upcall_info *info)
07bda6
 {
07bda6
+	struct clnt_info	*clp = info->clp;
07bda6
+	uid_t			uid = info->uid;
07bda6
+	int			fd = info->fd;
07bda6
+	char			*srchost = info->srchost;
07bda6
+	char			*tgtname = info->target;
07bda6
+	char			*service = info->service;
07bda6
 	CLIENT			*rpc_clnt = NULL;
07bda6
 	AUTH			*auth = NULL;
07bda6
 	struct authgss_private_data pd;
07bda6
 	gss_buffer_desc		token;
07bda6
-	int			err, downcall_err = -EACCES;
07bda6
+	int			err, downcall_err;
07bda6
 	OM_uint32		maj_stat, min_stat, lifetime_rec;
07bda6
 	gss_name_t		gacceptor = GSS_C_NO_NAME;
07bda6
 	gss_OID			mech;
07bda6
 	gss_buffer_desc		acceptor  = {0};
07bda6
+	struct cleanup_args cleanup_args = {&min_stat, &acceptor, &token, &pd, &auth, &rpc_clnt};
07bda6
 
07bda6
 	token.length = 0;
07bda6
 	token.value = NULL;
07bda6
 	memset(&pd, 0, sizeof(struct authgss_private_data));
07bda6
 
07bda6
+	pthread_cleanup_push(cleanup_handler, &cleanup_args);
07bda6
 	/*
07bda6
 	 * If "service" is specified, then the kernel is indicating that
07bda6
 	 * we must use machine credentials for this request.  (Regardless
07bda6
@@ -646,6 +698,8 @@ process_krb5_upcall(struct clnt_info *cl
07bda6
 	 * used for this case is not important.
07bda6
 	 *
07bda6
 	 */
07bda6
+	downcall_err = -EACCES;
07bda6
+	pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
07bda6
 	if (uid != 0 || (uid == 0 && root_uses_machine_creds == 0 &&
07bda6
 				service == NULL)) {
07bda6
 
07bda6
@@ -666,15 +720,21 @@ process_krb5_upcall(struct clnt_info *cl
07bda6
 			goto out_return_error;
07bda6
 		}
07bda6
 	}
07bda6
+	pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
07bda6
+	pthread_testcancel();
07bda6
 
07bda6
+	pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
07bda6
 	if (!authgss_get_private_data(auth, &pd)) {
07bda6
 		printerr(1, "WARNING: Failed to obtain authentication "
07bda6
 			    "data for user with uid %d for server %s\n",
07bda6
 			 uid, clp->servername);
07bda6
 		goto out_return_error;
07bda6
 	}
07bda6
+	pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
07bda6
+	pthread_testcancel();
07bda6
 
07bda6
 	/* Grab the context lifetime and acceptor name out of the ctx. */
07bda6
+	pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
07bda6
 	maj_stat = gss_inquire_context(&min_stat, pd.pd_ctx, NULL, &gacceptor,
07bda6
 				       &lifetime_rec, &mech, NULL, NULL, NULL);
07bda6
 
07bda6
@@ -686,37 +746,35 @@ process_krb5_upcall(struct clnt_info *cl
07bda6
 		get_hostbased_client_buffer(gacceptor, mech, &acceptor);
07bda6
 		gss_release_name(&min_stat, &gacceptor);
07bda6
 	}
07bda6
+	pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
07bda6
+	pthread_testcancel();
07bda6
 
07bda6
 	/*
07bda6
 	 * The serialization can mean turning pd.pd_ctx into a lucid context. If
07bda6
 	 * that happens then the pd.pd_ctx will be unusable, so we must never
07bda6
 	 * try to use it after this point.
07bda6
 	 */
07bda6
+	pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
07bda6
 	if (serialize_context_for_kernel(&pd.pd_ctx, &token, &krb5oid, NULL)) {
07bda6
 		printerr(1, "WARNING: Failed to serialize krb5 context for "
07bda6
 			    "user with uid %d for server %s\n",
07bda6
 			 uid, clp->servername);
07bda6
 		goto out_return_error;
07bda6
 	}
07bda6
+	pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
07bda6
+	pthread_testcancel();
07bda6
 
07bda6
 	do_downcall(fd, uid, &pd, &token, lifetime_rec, &acceptor);
07bda6
 
07bda6
 out:
07bda6
-	gss_release_buffer(&min_stat, &acceptor);
07bda6
-	if (token.value)
07bda6
-		free(token.value);
07bda6
-#ifdef HAVE_AUTHGSS_FREE_PRIVATE_DATA
07bda6
-	if (pd.pd_ctx_hndl.length != 0 || pd.pd_ctx != 0)
07bda6
-		authgss_free_private_data(&pd;;
07bda6
-#endif
07bda6
-	if (auth)
07bda6
-		AUTH_DESTROY(auth);
07bda6
-	if (rpc_clnt)
07bda6
-		clnt_destroy(rpc_clnt);
07bda6
+	pthread_cleanup_pop(1);
07bda6
 
07bda6
 	return;
07bda6
 
07bda6
 out_return_error:
07bda6
+	pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
07bda6
+	pthread_testcancel();
07bda6
+
07bda6
 	do_error_downcall(fd, uid, downcall_err);
07bda6
 	goto out;
07bda6
 }
07bda6
@@ -782,36 +840,69 @@ void free_upcall_info(struct clnt_upcall
07bda6
 }
07bda6
 
07bda6
 static void
07bda6
-gssd_work_thread_fn(struct clnt_upcall_info *info)
07bda6
+cleanup_clnt_upcall_info(void *arg)
07bda6
 {
07bda6
-	process_krb5_upcall(info->clp, info->uid, info->fd, info->srchost, info->target, info->service);
07bda6
+	struct clnt_upcall_info *info = (struct clnt_upcall_info *)arg;
07bda6
+
07bda6
 	free_upcall_info(info);
07bda6
 }
07bda6
 
07bda6
+static void
07bda6
+gssd_work_thread_fn(struct clnt_upcall_info *info)
07bda6
+{
07bda6
+	pthread_cleanup_push(cleanup_clnt_upcall_info, info);
07bda6
+	process_krb5_upcall(info);
07bda6
+	pthread_cleanup_pop(1);
07bda6
+}
07bda6
+
07bda6
+static struct upcall_thread_info *
07bda6
+alloc_upcall_thread_info(void)
07bda6
+{
07bda6
+	struct upcall_thread_info *info;
07bda6
+
07bda6
+	info = malloc(sizeof(struct upcall_thread_info));
07bda6
+	if (info == NULL)
07bda6
+		return NULL;
07bda6
+	memset(info, 0, sizeof(*info));
07bda6
+	return info;
07bda6
+}
07bda6
+
07bda6
 static int
07bda6
-start_upcall_thread(void (*func)(struct clnt_upcall_info *), void *info)
07bda6
+start_upcall_thread(void (*func)(struct clnt_upcall_info *), struct clnt_upcall_info *info)
07bda6
 {
07bda6
 	pthread_attr_t attr;
07bda6
 	pthread_t th;
07bda6
+	struct upcall_thread_info *tinfo;
07bda6
 	int ret;
07bda6
 
07bda6
+	tinfo = alloc_upcall_thread_info();
07bda6
+	if (!tinfo)
07bda6
+		return -ENOMEM;
07bda6
+	tinfo->fd = info->fd;
07bda6
+	tinfo->uid = info->uid;
07bda6
+
07bda6
 	ret = pthread_attr_init(&attr);
07bda6
 	if (ret != 0) {
07bda6
 		printerr(0, "ERROR: failed to init pthread attr: ret %d: %s\n",
07bda6
 			 ret, strerror(errno));
07bda6
-		return ret;
07bda6
-	}
07bda6
-	ret = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
07bda6
-	if (ret != 0) {
07bda6
-		printerr(0, "ERROR: failed to create pthread attr: ret %d: "
07bda6
-			 "%s\n", ret, strerror(errno));
07bda6
+		free(tinfo);
07bda6
 		return ret;
07bda6
 	}
07bda6
 
07bda6
 	ret = pthread_create(&th, &attr, (void *)func, (void *)info);
07bda6
-	if (ret != 0)
07bda6
+	if (ret != 0) {
07bda6
 		printerr(0, "ERROR: pthread_create failed: ret %d: %s\n",
07bda6
 			 ret, strerror(errno));
07bda6
+		free(tinfo);
07bda6
+		return ret;
07bda6
+	}
07bda6
+	tinfo->tid = th;
07bda6
+	pthread_mutex_lock(&active_thread_list_lock);
07bda6
+	clock_gettime(CLOCK_MONOTONIC, &tinfo->timeout);
07bda6
+	tinfo->timeout.tv_sec += upcall_timeout;
07bda6
+	TAILQ_INSERT_TAIL(&active_thread_list, tinfo, list);
07bda6
+	pthread_mutex_unlock(&active_thread_list_lock);
07bda6
+
07bda6
 	return ret;
07bda6
 }
07bda6