Blame SOURCES/libtirpc-1.1.4-multithr-cleanup.patch

a57358
diff --git a/src/bindresvport.c b/src/bindresvport.c
a57358
index ef9b345..5c0ddcf 100644
a57358
--- a/src/bindresvport.c
a57358
+++ b/src/bindresvport.c
a57358
@@ -164,10 +164,11 @@ bindresvport_sa(sd, sa)
a57358
 	int endport = ENDPORT;
a57358
 	int i;
a57358
 
a57358
+	mutex_lock(&port_lock);
a57358
+
a57358
 	if (!blacklist_read)
a57358
 		load_blacklist();
a57358
 
a57358
-	mutex_lock(&port_lock);
a57358
 	nports = ENDPORT - startport + 1;
a57358
 
a57358
         if (sa == NULL) {
a57358
diff --git a/src/clnt_dg.c b/src/clnt_dg.c
a57358
index d8890b5..166af63 100644
a57358
--- a/src/clnt_dg.c
a57358
+++ b/src/clnt_dg.c
a57358
@@ -53,6 +53,7 @@
a57358
 #include <unistd.h>
a57358
 #include <err.h>
a57358
 #include "rpc_com.h"
a57358
+#include "clnt_fd_locks.h"
a57358
 
a57358
 #ifdef IP_RECVERR
a57358
 #include <asm/types.h>
a57358
@@ -81,24 +82,29 @@ static void clnt_dg_destroy(CLIENT *);
a57358
  *	This machinery implements per-fd locks for MT-safety.  It is not
a57358
  *	sufficient to do per-CLIENT handle locks for MT-safety because a
a57358
  *	user may create more than one CLIENT handle with the same fd behind
a57358
- *	it.  Therfore, we allocate an array of flags (dg_fd_locks), protected
a57358
- *	by the clnt_fd_lock mutex, and an array (dg_cv) of condition variables
a57358
- *	similarly protected.  Dg_fd_lock[fd] == 1 => a call is activte on some
a57358
- *	CLIENT handle created for that fd.
a57358
+ *	it.
a57358
+ *
a57358
+ *	We keep track of a list of per-fd locks, protected by the clnt_fd_lock
a57358
+ *	mutex. Each per-fd lock consists of a predicate indicating whether is
a57358
+ *	active or not: fd_lock->active == TRUE => a call is active on some
a57358
+ *	CLIENT handle created for that fd. Each fd predicate is guarded by a
a57358
+ *	condition variable so that the global mutex can be unlocked while
a57358
+ *	waiting for the predicate to change.
a57358
+ *
a57358
  *	The current implementation holds locks across the entire RPC and reply,
a57358
  *	including retransmissions.  Yes, this is silly, and as soon as this
a57358
  *	code is proven to work, this should be the first thing fixed.  One step
a57358
  *	at a time.
a57358
  */
a57358
-static int	*dg_fd_locks;
a57358
+static fd_locks_t *dg_fd_locks;
a57358
 extern mutex_t clnt_fd_lock;
a57358
-static cond_t	*dg_cv;
a57358
-#define	release_fd_lock(fd, mask) {		\
a57358
+#define	release_fd_lock(fd_lock, mask) {	\
a57358
 	mutex_lock(&clnt_fd_lock);	\
a57358
-	dg_fd_locks[fd] = 0;		\
a57358
-	mutex_unlock(&clnt_fd_lock);	\
a57358
+	fd_lock->active = FALSE;	\
a57358
+	fd_lock->pending--;		\
a57358
 	thr_sigsetmask(SIG_SETMASK, &(mask), NULL); \
a57358
-	cond_signal(&dg_cv[fd]);	\
a57358
+	cond_signal(&fd_lock->cv);	\
a57358
+	mutex_unlock(&clnt_fd_lock);    \
a57358
 }
a57358
 
a57358
 static const char mem_err_clnt_dg[] = "clnt_dg_create: out of memory";
a57358
@@ -110,6 +116,7 @@ static const char mem_err_clnt_dg[] = "clnt_dg_create: out of memory";
a57358
  */
a57358
 struct cu_data {
a57358
 	int			cu_fd;		/* connections fd */
a57358
+	fd_lock_t 		*cu_fd_lock;
a57358
 	bool_t			cu_closeit;	/* opened by library */
a57358
 	struct sockaddr_storage	cu_raddr;	/* remote address */
a57358
 	int			cu_rlen;
a57358
@@ -158,46 +165,22 @@ clnt_dg_create(fd, svcaddr, program, version, sendsz, recvsz)
a57358
 	sigset_t newmask;
a57358
 	struct __rpc_sockinfo si;
a57358
 	int one = 1;
a57358
+	fd_lock_t *fd_lock;
a57358
 
a57358
 	sigfillset(&newmask);
a57358
 	thr_sigsetmask(SIG_SETMASK, &newmask, &mask);
a57358
 	mutex_lock(&clnt_fd_lock);
a57358
-	if (dg_fd_locks == (int *) NULL) {
a57358
-		size_t cv_allocsz, fd_allocsz;
a57358
-		unsigned int dtbsize = __rpc_dtbsize();
a57358
-
a57358
-		if ( (size_t) dtbsize > SIZE_MAX/sizeof(cond_t)) {
a57358
+	if (dg_fd_locks == (fd_locks_t *) NULL) {
a57358
+		dg_fd_locks = fd_locks_init();
a57358
+		if (dg_fd_locks == (fd_locks_t *) NULL) {
a57358
 			mutex_unlock(&clnt_fd_lock);
a57358
-			thr_sigsetmask(SIG_SETMASK, &(mask), NULL);
a57358
-			errno = EOVERFLOW;
a57358
 			goto err1;
a57358
 		}
a57358
-
a57358
-		fd_allocsz = dtbsize * sizeof (int);
a57358
-		dg_fd_locks = (int *) mem_alloc(fd_allocsz);
a57358
-		if (dg_fd_locks == (int *) NULL) {
a57358
-			mutex_unlock(&clnt_fd_lock);
a57358
-			thr_sigsetmask(SIG_SETMASK, &(mask), NULL);
a57358
-			errno = ENOMEM;
a57358
-			goto err1;
a57358
-		} else
a57358
-			memset(dg_fd_locks, '\0', fd_allocsz);
a57358
-
a57358
-		cv_allocsz = dtbsize * sizeof (cond_t);
a57358
-		dg_cv = (cond_t *) mem_alloc(cv_allocsz);
a57358
-		if (dg_cv == (cond_t *) NULL) {
a57358
-			mem_free(dg_fd_locks, fd_allocsz);
a57358
-			dg_fd_locks = (int *) NULL;
a57358
-			mutex_unlock(&clnt_fd_lock);
a57358
-			thr_sigsetmask(SIG_SETMASK, &(mask), NULL);
a57358
-			errno = ENOMEM;
a57358
-			goto err1;
a57358
-		} else {
a57358
-			int i;
a57358
-
a57358
-			for (i = 0; i < dtbsize; i++)
a57358
-				cond_init(&dg_cv[i], 0, (void *) 0);
a57358
-		}
a57358
+	}
a57358
+	fd_lock = fd_lock_create(fd, dg_fd_locks);
a57358
+	if (fd_lock == (fd_lock_t *) NULL) {
a57358
+		mutex_unlock(&clnt_fd_lock);
a57358
+		goto err1;
a57358
 	}
a57358
 
a57358
 	mutex_unlock(&clnt_fd_lock);
a57358
@@ -277,6 +260,7 @@ clnt_dg_create(fd, svcaddr, program, version, sendsz, recvsz)
a57358
 	 */
a57358
 	cu->cu_closeit = FALSE;
a57358
 	cu->cu_fd = fd;
a57358
+	cu->cu_fd_lock = fd_lock;
a57358
 	cl->cl_ops = clnt_dg_ops();
a57358
 	cl->cl_private = (caddr_t)(void *)cu;
a57358
 	cl->cl_auth = authnone_create();
a57358
@@ -322,17 +306,16 @@ clnt_dg_call(cl, proc, xargs, argsp, xresults, resultsp, utimeout)
a57358
 	sigset_t newmask;
a57358
 	socklen_t salen;
a57358
 	ssize_t recvlen = 0;
a57358
-	int rpc_lock_value;
a57358
 	u_int32_t xid, inval, outval;
a57358
 
a57358
 	outlen = 0;
a57358
 	sigfillset(&newmask);
a57358
 	thr_sigsetmask(SIG_SETMASK, &newmask, &mask);
a57358
 	mutex_lock(&clnt_fd_lock);
a57358
-	while (dg_fd_locks[cu->cu_fd])
a57358
-		cond_wait(&dg_cv[cu->cu_fd], &clnt_fd_lock);
a57358
-	rpc_lock_value = 1;
a57358
-	dg_fd_locks[cu->cu_fd] = rpc_lock_value;
a57358
+	cu->cu_fd_lock->pending++;
a57358
+	while (cu->cu_fd_lock->active)
a57358
+		cond_wait(&cu->cu_fd_lock->cv, &clnt_fd_lock);
a57358
+	cu->cu_fd_lock->active = TRUE;
a57358
 	mutex_unlock(&clnt_fd_lock);
a57358
 	if (cu->cu_total.tv_usec == -1) {
a57358
 		timeout = utimeout;	/* use supplied timeout */
a57358
@@ -481,7 +464,7 @@ get_reply:
a57358
 		  e = (struct sock_extended_err *) CMSG_DATA(cmsg);
a57358
 		  cu->cu_error.re_errno = e->ee_errno;
a57358
 		  mem_free(cbuf, (outlen + 256));
a57358
-		  release_fd_lock(cu->cu_fd, mask);
a57358
+		  release_fd_lock(cu->cu_fd_lock, mask);
a57358
 		  return (cu->cu_error.re_status = RPC_CANTRECV);
a57358
 		}
a57358
 	  mem_free(cbuf, (outlen + 256));
a57358
@@ -561,7 +544,7 @@ get_reply:
a57358
 
a57358
 	}
a57358
 out:
a57358
-	release_fd_lock(cu->cu_fd, mask);
a57358
+	release_fd_lock(cu->cu_fd_lock, mask);
a57358
 	return (cu->cu_error.re_status);
a57358
 }
a57358
 
a57358
@@ -590,13 +573,15 @@ clnt_dg_freeres(cl, xdr_res, res_ptr)
a57358
 	sigfillset(&newmask);
a57358
 	thr_sigsetmask(SIG_SETMASK, &newmask, &mask);
a57358
 	mutex_lock(&clnt_fd_lock);
a57358
-	while (dg_fd_locks[cu->cu_fd])
a57358
-		cond_wait(&dg_cv[cu->cu_fd], &clnt_fd_lock);
a57358
+	cu->cu_fd_lock->pending++;
a57358
+	while (cu->cu_fd_lock->active)
a57358
+		cond_wait(&cu->cu_fd_lock->cv, &clnt_fd_lock);
a57358
 	xdrs->x_op = XDR_FREE;
a57358
 	dummy = (*xdr_res)(xdrs, res_ptr);
a57358
-	mutex_unlock(&clnt_fd_lock);
a57358
+	cu->cu_fd_lock->pending--;
a57358
 	thr_sigsetmask(SIG_SETMASK, &mask, NULL);
a57358
-	cond_signal(&dg_cv[cu->cu_fd]);
a57358
+	cond_signal(&cu->cu_fd_lock->cv);
a57358
+	mutex_unlock(&clnt_fd_lock);
a57358
 	return (dummy);
a57358
 }
a57358
 
a57358
@@ -617,36 +602,35 @@ clnt_dg_control(cl, request, info)
a57358
 	struct netbuf *addr;
a57358
 	sigset_t mask;
a57358
 	sigset_t newmask;
a57358
-	int rpc_lock_value;
a57358
 
a57358
 	sigfillset(&newmask);
a57358
 	thr_sigsetmask(SIG_SETMASK, &newmask, &mask);
a57358
 	mutex_lock(&clnt_fd_lock);
a57358
-	while (dg_fd_locks[cu->cu_fd])
a57358
-		cond_wait(&dg_cv[cu->cu_fd], &clnt_fd_lock);
a57358
-        rpc_lock_value = 1;
a57358
-	dg_fd_locks[cu->cu_fd] = rpc_lock_value;
a57358
+	cu->cu_fd_lock->pending++;
a57358
+	while (cu->cu_fd_lock->active)
a57358
+		cond_wait(&cu->cu_fd_lock->cv, &clnt_fd_lock);
a57358
+	cu->cu_fd_lock->active = TRUE;
a57358
 	mutex_unlock(&clnt_fd_lock);
a57358
 	switch (request) {
a57358
 	case CLSET_FD_CLOSE:
a57358
 		cu->cu_closeit = TRUE;
a57358
-		release_fd_lock(cu->cu_fd, mask);
a57358
+		release_fd_lock(cu->cu_fd_lock, mask);
a57358
 		return (TRUE);
a57358
 	case CLSET_FD_NCLOSE:
a57358
 		cu->cu_closeit = FALSE;
a57358
-		release_fd_lock(cu->cu_fd, mask);
a57358
+		release_fd_lock(cu->cu_fd_lock, mask);
a57358
 		return (TRUE);
a57358
 	}
a57358
 
a57358
 	/* for other requests which use info */
a57358
 	if (info == NULL) {
a57358
-		release_fd_lock(cu->cu_fd, mask);
a57358
+		release_fd_lock(cu->cu_fd_lock, mask);
a57358
 		return (FALSE);
a57358
 	}
a57358
 	switch (request) {
a57358
 	case CLSET_TIMEOUT:
a57358
 		if (time_not_ok((struct timeval *)info)) {
a57358
-			release_fd_lock(cu->cu_fd, mask);
a57358
+			release_fd_lock(cu->cu_fd_lock, mask);
a57358
 			return (FALSE);
a57358
 		}
a57358
 		cu->cu_total = *(struct timeval *)info;
a57358
@@ -660,7 +644,7 @@ clnt_dg_control(cl, request, info)
a57358
 		break;
a57358
 	case CLSET_RETRY_TIMEOUT:
a57358
 		if (time_not_ok((struct timeval *)info)) {
a57358
-			release_fd_lock(cu->cu_fd, mask);
a57358
+			release_fd_lock(cu->cu_fd_lock, mask);
a57358
 			return (FALSE);
a57358
 		}
a57358
 		cu->cu_wait = *(struct timeval *)info;
a57358
@@ -680,7 +664,7 @@ clnt_dg_control(cl, request, info)
a57358
 	case CLSET_SVC_ADDR:		/* set to new address */
a57358
 		addr = (struct netbuf *)info;
a57358
 		if (addr->len < sizeof cu->cu_raddr) {
a57358
-			release_fd_lock(cu->cu_fd, mask);
a57358
+			release_fd_lock(cu->cu_fd_lock, mask);
a57358
 			return (FALSE);
a57358
 		}
a57358
 		(void) memcpy(&cu->cu_raddr, addr->buf, addr->len);
a57358
@@ -743,10 +727,10 @@ clnt_dg_control(cl, request, info)
a57358
 		cu->cu_connect = *(int *)info;
a57358
 		break;
a57358
 	default:
a57358
-		release_fd_lock(cu->cu_fd, mask);
a57358
+		release_fd_lock(cu->cu_fd_lock, mask);
a57358
 		return (FALSE);
a57358
 	}
a57358
-	release_fd_lock(cu->cu_fd, mask);
a57358
+	release_fd_lock(cu->cu_fd_lock, mask);
a57358
 	return (TRUE);
a57358
 }
a57358
 
a57358
@@ -756,14 +740,21 @@ clnt_dg_destroy(cl)
a57358
 {
a57358
 	struct cu_data *cu = (struct cu_data *)cl->cl_private;
a57358
 	int cu_fd = cu->cu_fd;
a57358
+	fd_lock_t *cu_fd_lock = cu->cu_fd_lock;
a57358
 	sigset_t mask;
a57358
 	sigset_t newmask;
a57358
 
a57358
 	sigfillset(&newmask);
a57358
 	thr_sigsetmask(SIG_SETMASK, &newmask, &mask);
a57358
 	mutex_lock(&clnt_fd_lock);
a57358
-	while (dg_fd_locks[cu_fd])
a57358
-		cond_wait(&dg_cv[cu_fd], &clnt_fd_lock);
a57358
+	/* wait until all pending operations on client are completed. */
a57358
+	while (cu_fd_lock->pending > 0) {
a57358
+		/* If a blocked operation can be awakened, then do it. */
a57358
+		if (cu_fd_lock->active == FALSE)
a57358
+			cond_signal(&cu_fd_lock->cv);
a57358
+		/* keep waiting... */
a57358
+		cond_wait(&cu_fd_lock->cv, &clnt_fd_lock);
a57358
+	}
a57358
 	if (cu->cu_closeit)
a57358
 		(void)close(cu_fd);
a57358
 	XDR_DESTROY(&(cu->cu_outxdrs));
a57358
@@ -773,9 +764,10 @@ clnt_dg_destroy(cl)
a57358
 	if (cl->cl_tp && cl->cl_tp[0])
a57358
 		mem_free(cl->cl_tp, strlen(cl->cl_tp) +1);
a57358
 	mem_free(cl, sizeof (CLIENT));
a57358
+	cond_signal(&cu_fd_lock->cv);
a57358
+	fd_lock_destroy(cu_fd, cu_fd_lock, dg_fd_locks);
a57358
 	mutex_unlock(&clnt_fd_lock);
a57358
 	thr_sigsetmask(SIG_SETMASK, &mask, NULL);
a57358
-	cond_signal(&dg_cv[cu_fd]);
a57358
 }
a57358
 
a57358
 static struct clnt_ops *
a57358
diff --git a/src/clnt_fd_locks.h b/src/clnt_fd_locks.h
a57358
new file mode 100644
a57358
index 0000000..6ba62cb
a57358
--- /dev/null
a57358
+++ b/src/clnt_fd_locks.h
a57358
@@ -0,0 +1,209 @@
a57358
+/*
a57358
+ * debug.h -- debugging routines for libtirpc
a57358
+ *
a57358
+ * Copyright (c) 2020 SUSE LINUX GmbH, Nuernberg, Germany.
a57358
+ *
a57358
+ * Redistribution and use in source and binary forms, with or without
a57358
+ * modification, are permitted provided that the following conditions are met:
a57358
+ * - Redistributions of source code must retain the above copyright notice,
a57358
+ *   this list of conditions and the following disclaimer.
a57358
+ * - Redistributions in binary form must reproduce the above copyright notice,
a57358
+ *   this list of conditions and the following disclaimer in the documentation
a57358
+ *   and/or other materials provided with the distribution.
a57358
+ * - Neither the name of Sun Microsystems, Inc. nor the names of its
a57358
+ *   contributors may be used to endorse or promote products derived
a57358
+ *   from this software without specific prior written permission.
a57358
+ *
a57358
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
a57358
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
a57358
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
a57358
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
a57358
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
a57358
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
a57358
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
a57358
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
a57358
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
a57358
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
a57358
+ * POSSIBILITY OF SUCH DAMAGE.
a57358
+ */
a57358
+
a57358
+#ifndef _CLNT_FD_LOCKS_H
a57358
+#define _CLNT_FD_LOCKS_H
a57358
+
a57358
+#include <sys/queue.h>
a57358
+#include <errno.h>
a57358
+#include <reentrant.h>
a57358
+#include <rpc/xdr.h>
a57358
+
a57358
+
a57358
+/*
a57358
+ * This utility manages a list of per-fd locks for the clients.
a57358
+ *
a57358
+ * If MAX_FDLOCKS_PREALLOC is defined, a number of pre-fd locks will be
a57358
+ * pre-allocated. This number is the minimum of MAX_FDLOCKS_PREALLOC or
a57358
+ * the process soft limit of allowed fds.
a57358
+ */
a57358
+#ifdef MAX_FDLOCKS_PREALLOC
a57358
+static unsigned int fd_locks_prealloc = 0;
a57358
+#endif
a57358
+
a57358
+/* per-fd lock */
a57358
+struct fd_lock_t {
a57358
+	bool_t active;
a57358
+	int pending;        /* Number of pending operations on fd */
a57358
+	cond_t cv;
a57358
+};
a57358
+typedef struct fd_lock_t fd_lock_t;
a57358
+
a57358
+
a57358
+/* internal type to store per-fd locks in a list */
a57358
+struct fd_lock_item_t {
a57358
+	/* fd_lock_t first so we can cast to fd_lock_item_t */
a57358
+	fd_lock_t fd_lock;
a57358
+	int fd;
a57358
+	unsigned int refs;
a57358
+	TAILQ_ENTRY(fd_lock_item_t) link;
a57358
+};
a57358
+typedef struct fd_lock_item_t fd_lock_item_t;
a57358
+#define to_fd_lock_item(fdlock_t_ptr) ((fd_lock_item_t*) fdlock_t_ptr)
a57358
+
a57358
+
a57358
+/* internal list of per-fd locks */
a57358
+typedef TAILQ_HEAD(,fd_lock_item_t) fd_lock_list_t;
a57358
+
a57358
+
a57358
+#ifdef MAX_FDLOCKS_PREALLOC
a57358
+
a57358
+/* With pre-allocation, keep track of both an array and a list */
a57358
+struct fd_locks_t {
a57358
+	fd_lock_list_t fd_lock_list;
a57358
+	fd_lock_t *fd_lock_array;
a57358
+};
a57358
+typedef struct fd_locks_t fd_locks_t;
a57358
+#define to_fd_lock_list(fd_locks_t_ptr) (&fd_locks_t_ptr->fd_lock_list)
a57358
+
a57358
+#else
a57358
+
a57358
+/* With no pre-allocation, just keep track of a list */
a57358
+typedef fd_lock_list_t fd_locks_t;
a57358
+#define to_fd_lock_list(fd_locks_t_ptr) ((fd_lock_list_t *) fd_locks_t_ptr)
a57358
+
a57358
+#endif
a57358
+
a57358
+
a57358
+/* allocate fd locks */
a57358
+static inline
a57358
+fd_locks_t* fd_locks_init() {
a57358
+	fd_locks_t *fd_locks;
a57358
+
a57358
+	fd_locks = (fd_locks_t *) mem_alloc(sizeof(fd_locks_t));
a57358
+	if (fd_locks == (fd_locks_t *) NULL) {
a57358
+		errno = ENOMEM;
a57358
+		return (NULL);
a57358
+	}
a57358
+	TAILQ_INIT(to_fd_lock_list(fd_locks));
a57358
+
a57358
+#ifdef MAX_FDLOCKS_PREALLOC
a57358
+	size_t fd_lock_arraysz;
a57358
+
a57358
+	if (fd_locks_prealloc == 0) {
a57358
+		unsigned int dtbsize = __rpc_dtbsize();
a57358
+		if (0 < dtbsize && dtbsize < MAX_FDLOCKS_PREALLOC)
a57358
+			fd_locks_prealloc = dtbsize;
a57358
+		else
a57358
+			fd_locks_prealloc = MAX_FDLOCKS_PREALLOC;
a57358
+	}
a57358
+
a57358
+	if ( (size_t) fd_locks_prealloc > SIZE_MAX/sizeof(fd_lock_t)) {
a57358
+		mem_free(fd_locks, sizeof (*fd_locks));
a57358
+		errno = EOVERFLOW;
a57358
+		return (NULL);
a57358
+	}
a57358
+
a57358
+	fd_lock_arraysz = fd_locks_prealloc * sizeof (fd_lock_t);
a57358
+	fd_locks->fd_lock_array = (fd_lock_t *) mem_alloc(fd_lock_arraysz);
a57358
+	if (fd_locks->fd_lock_array == (fd_lock_t *) NULL) {
a57358
+		mem_free(fd_locks, sizeof (*fd_locks));
a57358
+		errno = ENOMEM;
a57358
+		return (NULL);
a57358
+	}
a57358
+	else {
a57358
+		int i;
a57358
+
a57358
+		for (i = 0; i < fd_locks_prealloc; i++) {
a57358
+			fd_locks->fd_lock_array[i].active = FALSE;
a57358
+			cond_init(&fd_locks->fd_lock_array[i].cv, 0, (void *) 0);
a57358
+		}
a57358
+	}
a57358
+#endif
a57358
+
a57358
+	return fd_locks;
a57358
+}
a57358
+
a57358
+/* de-allocate fd locks */
a57358
+static inline
a57358
+void fd_locks_destroy(fd_locks_t *fd_locks) {
a57358
+#ifdef MAX_FDLOCKS_PREALLOC
a57358
+	fd_lock_t *array = fd_locks->fd_lock_array;
a57358
+	mem_free(array, fd_locks_prealloc * sizeof (fd_lock_t));
a57358
+#endif
a57358
+	fd_lock_item_t *item;
a57358
+	fd_lock_list_t *list = to_fd_lock_list(fd_locks);
a57358
+
a57358
+	TAILQ_FOREACH(item, list, link) {
a57358
+		cond_destroy(&item->fd_lock.cv);
a57358
+		mem_free(item, sizeof (*item));
a57358
+	}
a57358
+	mem_free(fd_locks, sizeof (*fd_locks));
a57358
+}
a57358
+
a57358
+/* allocate per-fd lock */
a57358
+static inline
a57358
+fd_lock_t* fd_lock_create(int fd, fd_locks_t *fd_locks) {
a57358
+#ifdef MAX_FDLOCKS_PREALLOC
a57358
+	if (fd < fd_locks_prealloc) {
a57358
+		return &fd_locks->fd_lock_array[fd];
a57358
+	}
a57358
+#endif
a57358
+	fd_lock_item_t *item;
a57358
+	fd_lock_list_t *list = to_fd_lock_list(fd_locks);
a57358
+
a57358
+	for (item = TAILQ_FIRST(list);
a57358
+	     item != (fd_lock_item_t *) NULL && item->fd != fd;
a57358
+	     item = TAILQ_NEXT(item, link));
a57358
+
a57358
+	if (item == (fd_lock_item_t *) NULL) {
a57358
+		item = (fd_lock_item_t *) mem_alloc(sizeof(fd_lock_item_t));
a57358
+		if (item == (fd_lock_item_t *) NULL) {
a57358
+			errno = ENOMEM;
a57358
+			return (NULL);
a57358
+		}
a57358
+		item->fd = fd;
a57358
+		item->refs = 1;
a57358
+		item->fd_lock.active = FALSE;
a57358
+		item->fd_lock.pending = 0;
a57358
+		cond_init(&item->fd_lock.cv, 0, (void *) 0);
a57358
+		TAILQ_INSERT_HEAD(list, item, link);
a57358
+	} else {
a57358
+		item->refs++;
a57358
+	}
a57358
+	return &item->fd_lock;
a57358
+}
a57358
+
a57358
+/* de-allocate per-fd lock */
a57358
+static inline
a57358
+void fd_lock_destroy(int fd, fd_lock_t *fd_lock, fd_locks_t *fd_locks) {
a57358
+#ifdef MAX_FDLOCKS_PREALLOC
a57358
+	if (fd < fd_locks_prealloc)
a57358
+		return;
a57358
+#endif
a57358
+	fd_lock_item_t* item = to_fd_lock_item(fd_lock);
a57358
+	item->refs--;
a57358
+	if (item->refs <= 0) {
a57358
+		TAILQ_REMOVE(to_fd_lock_list(fd_locks), item, link);
a57358
+		cond_destroy(&item->fd_lock.cv);
a57358
+		mem_free(item, sizeof (*item));
a57358
+	}
a57358
+}
a57358
+
a57358
+#endif /* _CLNT_FD_LOCKS_H */
a57358
diff --git a/src/clnt_vc.c b/src/clnt_vc.c
a57358
index 3ba55de..7fe3016 100644
a57358
--- a/src/clnt_vc.c
a57358
+++ b/src/clnt_vc.c
a57358
@@ -67,6 +67,7 @@
a57358
 
a57358
 #include <rpc/rpc.h>
a57358
 #include "rpc_com.h"
a57358
+#include "clnt_fd_locks.h"
a57358
 
a57358
 #ifdef HAVE_RPCSEC_GSS
a57358
 #include <rpc/auth_gss.h>
a57358
@@ -114,6 +115,7 @@ static int write_vc(void *, void *, int);
a57358
 
a57358
 struct ct_data {
a57358
 	int		ct_fd;		/* connection's fd */
a57358
+	fd_lock_t	*ct_fd_lock;
a57358
 	bool_t		ct_closeit;	/* close it on destroy */
a57358
 	struct timeval	ct_wait;	/* wait interval in milliseconds */
a57358
 	bool_t          ct_waitset;	/* wait set by clnt_control? */
a57358
@@ -128,27 +130,33 @@ struct ct_data {
a57358
 };
a57358
 
a57358
 /*
a57358
- *      This machinery implements per-fd locks for MT-safety.  It is not
a57358
- *      sufficient to do per-CLIENT handle locks for MT-safety because a
a57358
- *      user may create more than one CLIENT handle with the same fd behind
a57358
- *      it.  Therfore, we allocate an array of flags (vc_fd_locks), protected
a57358
- *      by the clnt_fd_lock mutex, and an array (vc_cv) of condition variables
a57358
- *      similarly protected.  Vc_fd_lock[fd] == 1 => a call is active on some
a57358
- *      CLIENT handle created for that fd.
a57358
- *      The current implementation holds locks across the entire RPC and reply.
a57358
- *      Yes, this is silly, and as soon as this code is proven to work, this
a57358
- *      should be the first thing fixed.  One step at a time.
a57358
+ *	This machinery implements per-fd locks for MT-safety.  It is not
a57358
+ *	sufficient to do per-CLIENT handle locks for MT-safety because a
a57358
+ *	user may create more than one CLIENT handle with the same fd behind
a57358
+ *	it.
a57358
+ *
a57358
+ *	We keep track of a list of per-fd locks, protected by the clnt_fd_lock
a57358
+ *	mutex. Each per-fd lock consists of a predicate indicating whether is
a57358
+ *	active or not: fd_lock->active == TRUE => a call is active on some
a57358
+ *	CLIENT handle created for that fd. Each fd predicate is guarded by a
a57358
+ *	condition variable so that the global mutex can be unlocked while
a57358
+ *	waiting for the predicate to change.
a57358
+ *
a57358
+ *	The current implementation holds locks across the entire RPC and reply,
a57358
+ *	including retransmissions.  Yes, this is silly, and as soon as this
a57358
+ *	code is proven to work, this should be the first thing fixed.  One step
a57358
+ *	at a time.
a57358
  */
a57358
-static int      *vc_fd_locks;
a57358
+static fd_locks_t *vc_fd_locks;
a57358
 extern pthread_mutex_t disrupt_lock;
a57358
 extern mutex_t  clnt_fd_lock;
a57358
-static cond_t   *vc_cv;
a57358
-#define release_fd_lock(fd, mask) {	\
a57358
+#define release_fd_lock(fd_lock, mask) {	\
a57358
 	mutex_lock(&clnt_fd_lock);	\
a57358
-	vc_fd_locks[fd] = 0;		\
a57358
-	mutex_unlock(&clnt_fd_lock);	\
a57358
+	fd_lock->active = FALSE;	\
a57358
+	fd_lock->pending--;		\
a57358
 	thr_sigsetmask(SIG_SETMASK, &(mask), (sigset_t *) NULL);	\
a57358
-	cond_signal(&vc_cv[fd]);	\
a57358
+	cond_signal(&fd_lock->cv);	\
a57358
+	mutex_unlock(&clnt_fd_lock);    \
a57358
 }
a57358
 
a57358
 static const char clnt_vc_errstr[] = "%s : %s";
a57358
@@ -185,6 +193,7 @@ clnt_vc_create(fd, raddr, prog, vers, sendsz, recvsz)
a57358
 	struct sockaddr_storage ss;
a57358
 	socklen_t slen;
a57358
 	struct __rpc_sockinfo si;
a57358
+	fd_lock_t *fd_lock;
a57358
 
a57358
 	mutex_lock(&disrupt_lock);
a57358
 	if (disrupt == 0)
a57358
@@ -205,49 +214,26 @@ clnt_vc_create(fd, raddr, prog, vers, sendsz, recvsz)
a57358
 	sigfillset(&newmask);
a57358
 	thr_sigsetmask(SIG_SETMASK, &newmask, &mask);
a57358
 	mutex_lock(&clnt_fd_lock);
a57358
-	if (vc_fd_locks == (int *) NULL) {
a57358
-		size_t cv_allocsz, fd_allocsz;
a57358
-		unsigned int dtbsize = __rpc_dtbsize();
a57358
-		struct rpc_createerr *ce = &get_rpc_createerr();
a57358
-
a57358
-		if ( (size_t) dtbsize > SIZE_MAX/sizeof(cond_t)) {
a57358
+	if (vc_fd_locks == (fd_locks_t *) NULL) {
a57358
+		vc_fd_locks = fd_locks_init();
a57358
+		if (vc_fd_locks == (fd_locks_t *) NULL) {
a57358
+			struct rpc_createerr *ce;
a57358
 			mutex_unlock(&clnt_fd_lock);
a57358
-			thr_sigsetmask(SIG_SETMASK, &(mask), NULL);
a57358
-			ce->cf_stat = RPC_SYSTEMERROR;
a57358
-			ce->cf_error.re_errno = EOVERFLOW;
a57358
-			goto err;
a57358
-		}
a57358
-
a57358
-		fd_allocsz = dtbsize * sizeof (int);
a57358
-		vc_fd_locks = (int *) mem_alloc(fd_allocsz);
a57358
-		if (vc_fd_locks == (int *) NULL) {
a57358
-			mutex_unlock(&clnt_fd_lock);
a57358
-			thr_sigsetmask(SIG_SETMASK, &(mask), NULL);
a57358
-			ce->cf_stat = RPC_SYSTEMERROR;
a57358
-			ce->cf_error.re_errno = ENOMEM;
a57358
-			goto err;
a57358
-		} else
a57358
-			memset(vc_fd_locks, '\0', fd_allocsz);
a57358
-
a57358
-		assert(vc_cv == (cond_t *) NULL);
a57358
-		cv_allocsz = dtbsize * sizeof (cond_t);
a57358
-		vc_cv = (cond_t *) mem_alloc(cv_allocsz);
a57358
-		if (vc_cv == (cond_t *) NULL) {
a57358
-			mem_free(vc_fd_locks, fd_allocsz);
a57358
-			vc_fd_locks = (int *) NULL;
a57358
-			mutex_unlock(&clnt_fd_lock);
a57358
-			thr_sigsetmask(SIG_SETMASK, &(mask), NULL);
a57358
+			ce = &get_rpc_createerr();
a57358
 			ce->cf_stat = RPC_SYSTEMERROR;
a57358
-			ce->cf_error.re_errno = ENOMEM;
a57358
+			ce->cf_error.re_errno = errno;
a57358
 			goto err;
a57358
-		} else {
a57358
-			int i;
a57358
-
a57358
-			for (i = 0; i < dtbsize; i++)
a57358
-				cond_init(&vc_cv[i], 0, (void *) 0);
a57358
 		}
a57358
-	} else
a57358
-		assert(vc_cv != (cond_t *) NULL);
a57358
+	}
a57358
+	fd_lock = fd_lock_create(fd, vc_fd_locks);
a57358
+	if (fd_lock == (fd_lock_t *) NULL) {
a57358
+		struct rpc_createerr *ce;
a57358
+		mutex_unlock(&clnt_fd_lock);
a57358
+		ce = &get_rpc_createerr();
a57358
+		ce->cf_stat = RPC_SYSTEMERROR;
a57358
+		ce->cf_error.re_errno = errno;
a57358
+		goto err;
a57358
+	}
a57358
 
a57358
 	/*
a57358
 	 * Do not hold mutex during connect
a57358
@@ -283,6 +269,7 @@ clnt_vc_create(fd, raddr, prog, vers, sendsz, recvsz)
a57358
 	 * Set up private data struct
a57358
 	 */
a57358
 	ct->ct_fd = fd;
a57358
+	ct->ct_fd_lock = fd_lock;
a57358
 	ct->ct_wait.tv_usec = 0;
a57358
 	ct->ct_waitset = FALSE;
a57358
 	ct->ct_addr.buf = malloc(raddr->maxlen);
a57358
@@ -362,17 +349,16 @@ clnt_vc_call(cl, proc, xdr_args, args_ptr, xdr_results, results_ptr, timeout)
a57358
 	bool_t shipnow;
a57358
 	int refreshes = 2;
a57358
 	sigset_t mask, newmask;
a57358
-	int rpc_lock_value;
a57358
 
a57358
 	assert(cl != NULL);
a57358
 
a57358
 	sigfillset(&newmask);
a57358
 	thr_sigsetmask(SIG_SETMASK, &newmask, &mask);
a57358
 	mutex_lock(&clnt_fd_lock);
a57358
-	while (vc_fd_locks[ct->ct_fd])
a57358
-		cond_wait(&vc_cv[ct->ct_fd], &clnt_fd_lock);
a57358
-        rpc_lock_value = 1;
a57358
-	vc_fd_locks[ct->ct_fd] = rpc_lock_value;
a57358
+	ct->ct_fd_lock->pending++;
a57358
+	while (ct->ct_fd_lock->active)
a57358
+		cond_wait(&ct->ct_fd_lock->cv, &clnt_fd_lock);
a57358
+	ct->ct_fd_lock->active = TRUE;
a57358
 	mutex_unlock(&clnt_fd_lock);
a57358
 	if (!ct->ct_waitset) {
a57358
 		/* If time is not within limits, we ignore it. */
a57358
@@ -401,22 +387,22 @@ call_again:
a57358
 		if (ct->ct_error.re_status == RPC_SUCCESS)
a57358
 			ct->ct_error.re_status = RPC_CANTENCODEARGS;
a57358
 		(void)xdrrec_endofrecord(xdrs, TRUE);
a57358
-		release_fd_lock(ct->ct_fd, mask);
a57358
+		release_fd_lock(ct->ct_fd_lock, mask);
a57358
 		return (ct->ct_error.re_status);
a57358
 	}
a57358
 	if (! xdrrec_endofrecord(xdrs, shipnow)) {
a57358
-		release_fd_lock(ct->ct_fd, mask);
a57358
+		release_fd_lock(ct->ct_fd_lock, mask);
a57358
 		return (ct->ct_error.re_status = RPC_CANTSEND);
a57358
 	}
a57358
 	if (! shipnow) {
a57358
-		release_fd_lock(ct->ct_fd, mask);
a57358
+		release_fd_lock(ct->ct_fd_lock, mask);
a57358
 		return (RPC_SUCCESS);
a57358
 	}
a57358
 	/*
a57358
 	 * Hack to provide rpc-based message passing
a57358
 	 */
a57358
 	if (timeout.tv_sec == 0 && timeout.tv_usec == 0) {
a57358
-		release_fd_lock(ct->ct_fd, mask);
a57358
+		release_fd_lock(ct->ct_fd_lock, mask);
a57358
 		return(ct->ct_error.re_status = RPC_TIMEDOUT);
a57358
 	}
a57358
 
a57358
@@ -430,14 +416,14 @@ call_again:
a57358
 		reply_msg.acpted_rply.ar_results.where = NULL;
a57358
 		reply_msg.acpted_rply.ar_results.proc = (xdrproc_t)xdr_void;
a57358
 		if (! xdrrec_skiprecord(xdrs)) {
a57358
-			release_fd_lock(ct->ct_fd, mask);
a57358
+			release_fd_lock(ct->ct_fd_lock, mask);
a57358
 			return (ct->ct_error.re_status);
a57358
 		}
a57358
 		/* now decode and validate the response header */
a57358
 		if (! xdr_replymsg(xdrs, &reply_msg)) {
a57358
 			if (ct->ct_error.re_status == RPC_SUCCESS)
a57358
 				continue;
a57358
-			release_fd_lock(ct->ct_fd, mask);
a57358
+			release_fd_lock(ct->ct_fd_lock, mask);
a57358
 			return (ct->ct_error.re_status);
a57358
 		}
a57358
 		if (reply_msg.rm_xid == x_id)
a57358
@@ -470,7 +456,7 @@ call_again:
a57358
 		if (refreshes-- && AUTH_REFRESH(cl->cl_auth, &reply_msg))
a57358
 			goto call_again;
a57358
 	}  /* end of unsuccessful completion */
a57358
-	release_fd_lock(ct->ct_fd, mask);
a57358
+	release_fd_lock(ct->ct_fd_lock, mask);
a57358
 	return (ct->ct_error.re_status);
a57358
 }
a57358
 
a57358
@@ -508,13 +494,15 @@ clnt_vc_freeres(cl, xdr_res, res_ptr)
a57358
 	sigfillset(&newmask);
a57358
 	thr_sigsetmask(SIG_SETMASK, &newmask, &mask);
a57358
 	mutex_lock(&clnt_fd_lock);
a57358
-	while (vc_fd_locks[ct->ct_fd])
a57358
-		cond_wait(&vc_cv[ct->ct_fd], &clnt_fd_lock);
a57358
+	ct->ct_fd_lock->pending++;
a57358
+	while (ct->ct_fd_lock->active)
a57358
+		cond_wait(&ct->ct_fd_lock->cv, &clnt_fd_lock);
a57358
 	xdrs->x_op = XDR_FREE;
a57358
 	dummy = (*xdr_res)(xdrs, res_ptr);
a57358
-	mutex_unlock(&clnt_fd_lock);
a57358
+	ct->ct_fd_lock->pending--;
a57358
 	thr_sigsetmask(SIG_SETMASK, &(mask), NULL);
a57358
-	cond_signal(&vc_cv[ct->ct_fd]);
a57358
+	cond_signal(&ct->ct_fd_lock->cv);
a57358
+	mutex_unlock(&clnt_fd_lock);
a57358
 
a57358
 	return dummy;
a57358
 }
a57358
@@ -536,7 +524,6 @@ clnt_vc_control(cl, request, info)
a57358
 	void *infop = info;
a57358
 	sigset_t mask;
a57358
 	sigset_t newmask;
a57358
-	int rpc_lock_value;
a57358
 	u_int32_t tmp;
a57358
 	u_int32_t ltmp;
a57358
 
a57358
@@ -547,20 +534,20 @@ clnt_vc_control(cl, request, info)
a57358
 	sigfillset(&newmask);
a57358
 	thr_sigsetmask(SIG_SETMASK, &newmask, &mask);
a57358
 	mutex_lock(&clnt_fd_lock);
a57358
-	while (vc_fd_locks[ct->ct_fd])
a57358
-		cond_wait(&vc_cv[ct->ct_fd], &clnt_fd_lock);
a57358
-        rpc_lock_value = 1;
a57358
-	vc_fd_locks[ct->ct_fd] = rpc_lock_value;
a57358
+	ct->ct_fd_lock->pending++;
a57358
+	while (ct->ct_fd_lock->active)
a57358
+		cond_wait(&ct->ct_fd_lock->cv, &clnt_fd_lock);
a57358
+	ct->ct_fd_lock->active = TRUE;
a57358
 	mutex_unlock(&clnt_fd_lock);
a57358
 
a57358
 	switch (request) {
a57358
 	case CLSET_FD_CLOSE:
a57358
 		ct->ct_closeit = TRUE;
a57358
-		release_fd_lock(ct->ct_fd, mask);
a57358
+		release_fd_lock(ct->ct_fd_lock, mask);
a57358
 		return (TRUE);
a57358
 	case CLSET_FD_NCLOSE:
a57358
 		ct->ct_closeit = FALSE;
a57358
-		release_fd_lock(ct->ct_fd, mask);
a57358
+		release_fd_lock(ct->ct_fd_lock, mask);
a57358
 		return (TRUE);
a57358
 	default:
a57358
 		break;
a57358
@@ -568,13 +555,13 @@ clnt_vc_control(cl, request, info)
a57358
 
a57358
 	/* for other requests which use info */
a57358
 	if (info == NULL) {
a57358
-		release_fd_lock(ct->ct_fd, mask);
a57358
+		release_fd_lock(ct->ct_fd_lock, mask);
a57358
 		return (FALSE);
a57358
 	}
a57358
 	switch (request) {
a57358
 	case CLSET_TIMEOUT:
a57358
 		if (time_not_ok((struct timeval *)info)) {
a57358
-			release_fd_lock(ct->ct_fd, mask);
a57358
+			release_fd_lock(ct->ct_fd_lock, mask);
a57358
 			return (FALSE);
a57358
 		}
a57358
 		ct->ct_wait = *(struct timeval *)infop;
a57358
@@ -594,7 +581,7 @@ clnt_vc_control(cl, request, info)
a57358
 		*(struct netbuf *)info = ct->ct_addr;
a57358
 		break;
a57358
 	case CLSET_SVC_ADDR:		/* set to new address */
a57358
-		release_fd_lock(ct->ct_fd, mask);
a57358
+		release_fd_lock(ct->ct_fd_lock, mask);
a57358
 		return (FALSE);
a57358
 	case CLGET_XID:
a57358
 		/*
a57358
@@ -648,10 +635,10 @@ clnt_vc_control(cl, request, info)
a57358
 		break;
a57358
 
a57358
 	default:
a57358
-		release_fd_lock(ct->ct_fd, mask);
a57358
+		release_fd_lock(ct->ct_fd_lock, mask);
a57358
 		return (FALSE);
a57358
 	}
a57358
-	release_fd_lock(ct->ct_fd, mask);
a57358
+	release_fd_lock(ct->ct_fd_lock, mask);
a57358
 	return (TRUE);
a57358
 }
a57358
 
a57358
@@ -660,20 +647,24 @@ static void
a57358
 clnt_vc_destroy(cl)
a57358
 	CLIENT *cl;
a57358
 {
a57358
+	assert(cl != NULL);
a57358
 	struct ct_data *ct = (struct ct_data *) cl->cl_private;
a57358
 	int ct_fd = ct->ct_fd;
a57358
+	fd_lock_t *ct_fd_lock = ct->ct_fd_lock;
a57358
 	sigset_t mask;
a57358
 	sigset_t newmask;
a57358
 
a57358
-	assert(cl != NULL);
a57358
-
a57358
-	ct = (struct ct_data *) cl->cl_private;
a57358
-
a57358
 	sigfillset(&newmask);
a57358
 	thr_sigsetmask(SIG_SETMASK, &newmask, &mask);
a57358
 	mutex_lock(&clnt_fd_lock);
a57358
-	while (vc_fd_locks[ct_fd])
a57358
-		cond_wait(&vc_cv[ct_fd], &clnt_fd_lock);
a57358
+	/* wait until all pending operations on client are completed. */
a57358
+	while (ct_fd_lock->pending > 0) {
a57358
+		/* If a blocked operation can be awakened, then do it. */
a57358
+		if (ct_fd_lock->active == FALSE)
a57358
+			cond_signal(&ct_fd_lock->cv);
a57358
+		/* keep waiting... */
a57358
+		cond_wait(&ct_fd_lock->cv, &clnt_fd_lock);
a57358
+	}
a57358
 	if (ct->ct_closeit && ct->ct_fd != -1) {
a57358
 		(void)close(ct->ct_fd);
a57358
 	}
a57358
@@ -686,9 +677,10 @@ clnt_vc_destroy(cl)
a57358
 	if (cl->cl_tp && cl->cl_tp[0])
a57358
 		mem_free(cl->cl_tp, strlen(cl->cl_tp) +1);
a57358
 	mem_free(cl, sizeof(CLIENT));
a57358
+	cond_signal(&ct_fd_lock->cv);
a57358
+	fd_lock_destroy(ct_fd, ct_fd_lock, vc_fd_locks);
a57358
 	mutex_unlock(&clnt_fd_lock);
a57358
 	thr_sigsetmask(SIG_SETMASK, &(mask), NULL);
a57358
-	cond_signal(&vc_cv[ct_fd]);
a57358
 }
a57358
 
a57358
 /*
a57358
diff --git a/src/getpublickey.c b/src/getpublickey.c
a57358
index 8cf4dc2..be37a24 100644
a57358
--- a/src/getpublickey.c
a57358
+++ b/src/getpublickey.c
a57358
@@ -74,7 +74,7 @@ __getpublickey_real(netname, publickey)
a57358
 		return (0);
a57358
 	}
a57358
 	*p = '\0';
a57358
-	(void) strncpy(publickey, lookup, HEXKEYBYTES);
a57358
+	memcpy(publickey, lookup, HEXKEYBYTES);
a57358
 	publickey[HEXKEYBYTES] = '\0';
a57358
 	return (1);
a57358
 }
a57358
diff --git a/src/mt_misc.c b/src/mt_misc.c
a57358
index 5a49b78..3a2bc51 100644
a57358
--- a/src/mt_misc.c
a57358
+++ b/src/mt_misc.c
a57358
@@ -13,7 +13,7 @@ pthread_rwlock_t	svc_lock = PTHREAD_RWLOCK_INITIALIZER;
a57358
 pthread_rwlock_t	svc_fd_lock = PTHREAD_RWLOCK_INITIALIZER;
a57358
 
a57358
 /* protects the RPCBIND address cache */
a57358
-pthread_rwlock_t	rpcbaddr_cache_lock = PTHREAD_RWLOCK_INITIALIZER;
a57358
+pthread_mutex_t	rpcbaddr_cache_lock = PTHREAD_MUTEX_INITIALIZER;
a57358
 
a57358
 /* protects authdes cache (svcauth_des.c) */
a57358
 pthread_mutex_t	authdes_lock = PTHREAD_MUTEX_INITIALIZER;
a57358
diff --git a/src/rpcb_clnt.c b/src/rpcb_clnt.c
a57358
index a5923cb..65ff43e 100644
a57358
--- a/src/rpcb_clnt.c
a57358
+++ b/src/rpcb_clnt.c
a57358
@@ -85,7 +85,7 @@ static int cachesize;
a57358
 
a57358
 extern int __rpc_lowvers;
a57358
 
a57358
-static struct address_cache *check_cache(const char *, const char *);
a57358
+static struct address_cache *copy_of_cached(const char *, char *);
a57358
 static void delete_cache(struct netbuf *);
a57358
 static void add_cache(const char *, const char *, struct netbuf *, char *);
a57358
 static CLIENT *getclnthandle(const char *, const struct netconfig *, char **);
a57358
@@ -94,6 +94,82 @@ static CLIENT *local_rpcb(void);
a57358
 static struct netbuf *got_entry(rpcb_entry_list_ptr, const struct netconfig *);
a57358
 #endif
a57358
 
a57358
+/*
a57358
+ * Destroys a cached address entry structure.
a57358
+ *
a57358
+ */
a57358
+static void
a57358
+destroy_addr(addr)
a57358
+	struct address_cache *addr;
a57358
+{
a57358
+	if (addr == NULL)
a57358
+		return;
a57358
+	if(addr->ac_host != NULL)
a57358
+		free(addr->ac_host);
a57358
+	if(addr->ac_netid != NULL)
a57358
+		free(addr->ac_netid);
a57358
+	if(addr->ac_uaddr != NULL)
a57358
+		free(addr->ac_uaddr);
a57358
+	if(addr->ac_taddr != NULL) {
a57358
+		if(addr->ac_taddr->buf != NULL)
a57358
+			free(addr->ac_taddr->buf);
a57358
+	}
a57358
+	free(addr);
a57358
+}
a57358
+
a57358
+/*
a57358
+ * Creates an unlinked copy of an address cache entry. If the argument is NULL
a57358
+ * or the new entry cannot be allocated then NULL is returned.
a57358
+ */
a57358
+static struct address_cache *
a57358
+copy_addr(addr)
a57358
+	const struct address_cache *addr;
a57358
+{
a57358
+	struct address_cache *copy;
a57358
+
a57358
+	if (addr == NULL)
a57358
+		return (NULL);
a57358
+
a57358
+	copy = calloc(1, sizeof(*addr));
a57358
+	if (copy == NULL)
a57358
+		return (NULL);
a57358
+
a57358
+	if (addr->ac_host != NULL) {
a57358
+		copy->ac_host = strdup(addr->ac_host);
a57358
+		if (copy->ac_host == NULL)
a57358
+			goto err;
a57358
+	}
a57358
+	if (addr->ac_netid != NULL) {
a57358
+		copy->ac_netid = strdup(addr->ac_netid);
a57358
+		if (copy->ac_netid == NULL)
a57358
+			goto err;
a57358
+	}
a57358
+	if (addr->ac_uaddr != NULL) {
a57358
+		copy->ac_uaddr = strdup(addr->ac_uaddr);
a57358
+		if (copy->ac_uaddr == NULL)
a57358
+			goto err;
a57358
+	}
a57358
+
a57358
+	if (addr->ac_taddr == NULL)
a57358
+		return (copy);
a57358
+
a57358
+	copy->ac_taddr = calloc(1, sizeof(*addr->ac_taddr));
a57358
+	if (copy->ac_taddr == NULL)
a57358
+		goto err;
a57358
+
a57358
+	memcpy(copy->ac_taddr, addr->ac_taddr, sizeof(*addr->ac_taddr));
a57358
+	copy->ac_taddr->buf = malloc(addr->ac_taddr->len);
a57358
+	if (copy->ac_taddr->buf == NULL)
a57358
+		goto err;
a57358
+
a57358
+	memcpy(copy->ac_taddr->buf, addr->ac_taddr->buf, addr->ac_taddr->len);
a57358
+	return (copy);
a57358
+
a57358
+err:
a57358
+	destroy_addr(copy);
a57358
+	return (NULL);
a57358
+}
a57358
+
a57358
 /*
a57358
  * This routine adjusts the timeout used for calls to the remote rpcbind.
a57358
  * Also, this routine can be used to set the use of portmapper version 2
a57358
@@ -125,17 +201,18 @@ __rpc_control(request, info)
a57358
 }
a57358
 
a57358
 /*
a57358
- *	It might seem that a reader/writer lock would be more reasonable here.
a57358
- *	However because getclnthandle(), the only user of the cache functions,
a57358
- *	may do a delete_cache() operation if a check_cache() fails to return an
a57358
- *	address useful to clnt_tli_create(), we may as well use a mutex.
a57358
- */
a57358
-/*
a57358
- * As it turns out, if the cache lock is *not* a reader/writer lock, we will
a57358
- * block all clnt_create's if we are trying to connect to a host that's down,
a57358
- * since the lock will be held all during that time.
a57358
+ * Protect against concurrent access to the address cache and modifications
a57358
+ * (esp. deletions) of cache entries.
a57358
+ *
a57358
+ * Previously a bidirectional R/W lock was used. However, R/W locking is
a57358
+ * dangerous as it allows concurrent modification (e.g. deletion with write
a57358
+ * lock) at the same time as the deleted element is accessed via check_cache()
a57358
+ * and a read lock). We absolutely need a single mutex for all access to
a57358
+ * prevent cache corruption. If the mutexing is restricted to only the
a57358
+ * relevant code sections, deadlocking should be avoided even with recursed
a57358
+ * client creation.
a57358
  */
a57358
-extern rwlock_t	rpcbaddr_cache_lock;
a57358
+extern pthread_mutex_t	rpcbaddr_cache_lock;
a57358
 
a57358
 /*
a57358
  * The routines check_cache(), add_cache(), delete_cache() manage the
a57358
@@ -143,49 +220,52 @@ extern rwlock_t	rpcbaddr_cache_lock;
a57358
  */
a57358
 
a57358
 static struct address_cache *
a57358
-check_cache(host, netid)
a57358
-	const char *host, *netid;
a57358
+copy_of_cached(host, netid)
a57358
+	const char *host; 
a57358
+	char *netid;
a57358
 {
a57358
-	struct address_cache *cptr;
a57358
-
a57358
-	/* READ LOCK HELD ON ENTRY: rpcbaddr_cache_lock */
a57358
+	struct address_cache *cptr, *copy = NULL;
a57358
 
a57358
+	mutex_lock(&rpcbaddr_cache_lock);
a57358
 	for (cptr = front; cptr != NULL; cptr = cptr->ac_next) {
a57358
 		if (!strcmp(cptr->ac_host, host) &&
a57358
 		    !strcmp(cptr->ac_netid, netid)) {
a57358
 			LIBTIRPC_DEBUG(3, ("check_cache: Found cache entry for %s: %s\n", 
a57358
 				host, netid));
a57358
-			return (cptr);
a57358
+			copy = copy_addr(cptr);
a57358
+			break;
a57358
 		}
a57358
 	}
a57358
-	return ((struct address_cache *) NULL);
a57358
+	mutex_unlock(&rpcbaddr_cache_lock);
a57358
+	return copy;
a57358
 }
a57358
 
a57358
 static void
a57358
 delete_cache(addr)
a57358
 	struct netbuf *addr;
a57358
 {
a57358
-	struct address_cache *cptr, *prevptr = NULL;
a57358
+	struct address_cache *cptr = NULL, *prevptr = NULL;
a57358
+
a57358
+	/* LOCK HELD ON ENTRY: rpcbaddr_cache_lock */
a57358
+	mutex_lock(&rpcbaddr_cache_lock);
a57358
 
a57358
-	/* WRITE LOCK HELD ON ENTRY: rpcbaddr_cache_lock */
a57358
 	for (cptr = front; cptr != NULL; cptr = cptr->ac_next) {
a57358
 		if (!memcmp(cptr->ac_taddr->buf, addr->buf, addr->len)) {
a57358
-			free(cptr->ac_host);
a57358
-			free(cptr->ac_netid);
a57358
-			free(cptr->ac_taddr->buf);
a57358
-			free(cptr->ac_taddr);
a57358
+			/* Unlink from cache. We'll destroy it after releasing the mutex. */
a57358
 			if (cptr->ac_uaddr)
a57358
 				free(cptr->ac_uaddr);
a57358
 			if (prevptr)
a57358
 				prevptr->ac_next = cptr->ac_next;
a57358
 			else
a57358
 				front = cptr->ac_next;
a57358
-			free(cptr);
a57358
 			cachesize--;
a57358
 			break;
a57358
 		}
a57358
 		prevptr = cptr;
a57358
 	}
a57358
+
a57358
+	mutex_unlock(&rpcbaddr_cache_lock);
a57358
+	destroy_addr(cptr);
a57358
 }
a57358
 
a57358
 static void
a57358
@@ -217,7 +297,7 @@ add_cache(host, netid, taddr, uaddr)
a57358
 
a57358
 /* VARIABLES PROTECTED BY rpcbaddr_cache_lock:  cptr */
a57358
 
a57358
-	rwlock_wrlock(&rpcbaddr_cache_lock);
a57358
+	mutex_lock(&rpcbaddr_cache_lock);
a57358
 	if (cachesize < CACHESIZE) {
a57358
 		ad_cache->ac_next = front;
a57358
 		front = ad_cache;
a57358
@@ -250,7 +330,7 @@ add_cache(host, netid, taddr, uaddr)
a57358
 		}
a57358
 		free(cptr);
a57358
 	}
a57358
-	rwlock_unlock(&rpcbaddr_cache_lock);
a57358
+	mutex_unlock(&rpcbaddr_cache_lock);
a57358
 	return;
a57358
 
a57358
 out_free:
a57358
@@ -261,6 +341,7 @@ out_free:
a57358
 	free(ad_cache);
a57358
 }
a57358
 
a57358
+
a57358
 /*
a57358
  * This routine will return a client handle that is connected to the
a57358
  * rpcbind. If targaddr is non-NULL, the "universal address" of the
a57358
@@ -275,11 +356,9 @@ getclnthandle(host, nconf, targaddr)
a57358
 	char **targaddr;
a57358
 {
a57358
 	CLIENT *client;
a57358
-	struct netbuf *addr, taddr;
a57358
-	struct netbuf addr_to_delete;
a57358
+	struct netbuf taddr;
a57358
 	struct __rpc_sockinfo si;
a57358
 	struct addrinfo hints, *res, *tres;
a57358
-	struct address_cache *ad_cache;
a57358
 	char *tmpaddr;
a57358
 
a57358
 	if (nconf == NULL) {
a57358
@@ -294,47 +373,35 @@ getclnthandle(host, nconf, targaddr)
a57358
 		return NULL;
a57358
 	}
a57358
 
a57358
-/* VARIABLES PROTECTED BY rpcbaddr_cache_lock:  ad_cache */
a57358
+
a57358
 
a57358
 	/* Get the address of the rpcbind.  Check cache first */
a57358
 	client = NULL;
a57358
 	if (targaddr)
a57358
 		*targaddr = NULL;
a57358
-	addr_to_delete.len = 0;
a57358
-	rwlock_rdlock(&rpcbaddr_cache_lock);
a57358
-	ad_cache = NULL;
a57358
-
a57358
-	if (host != NULL)
a57358
-		ad_cache = check_cache(host, nconf->nc_netid);
a57358
-	if (ad_cache != NULL) {
a57358
-		addr = ad_cache->ac_taddr;
a57358
-		client = clnt_tli_create(RPC_ANYFD, nconf, addr,
a57358
-		    (rpcprog_t)RPCBPROG, (rpcvers_t)RPCBVERS4, 0, 0);
a57358
-		if (client != NULL) {
a57358
-			if (targaddr && ad_cache->ac_uaddr)
a57358
-				*targaddr = strdup(ad_cache->ac_uaddr);
a57358
-			rwlock_unlock(&rpcbaddr_cache_lock);
a57358
-			return (client);
a57358
-		}
a57358
-		addr_to_delete.len = addr->len;
a57358
-		addr_to_delete.buf = (char *)malloc(addr->len);
a57358
-		if (addr_to_delete.buf == NULL) {
a57358
-			addr_to_delete.len = 0;
a57358
-		} else {
a57358
-			memcpy(addr_to_delete.buf, addr->buf, addr->len);
a57358
+
a57358
+	if (host != NULL)  {
a57358
+		struct address_cache *ad_cache;
a57358
+
a57358
+		/* Get an MT-safe copy of the cached address (if any) */
a57358
+		ad_cache = copy_of_cached(host, nconf->nc_netid);
a57358
+		if (ad_cache != NULL) {
a57358
+			client = clnt_tli_create(RPC_ANYFD, nconf, ad_cache->ac_taddr,
a57358
+							(rpcprog_t)RPCBPROG, (rpcvers_t)RPCBVERS4, 0, 0);
a57358
+			if (client != NULL) {
a57358
+				if (targaddr && ad_cache->ac_uaddr) {
a57358
+					*targaddr = ad_cache->ac_uaddr;
a57358
+					ad_cache->ac_uaddr = NULL; /* De-reference before destruction */
a57358
+				}
a57358
+				destroy_addr(ad_cache);
a57358
+				return (client);
a57358
+			}
a57358
+
a57358
+			delete_cache(ad_cache->ac_taddr);
a57358
+			destroy_addr(ad_cache);
a57358
 		}
a57358
 	}
a57358
-	rwlock_unlock(&rpcbaddr_cache_lock);
a57358
-	if (addr_to_delete.len != 0) {
a57358
-		/*
a57358
-		 * Assume this may be due to cache data being
a57358
-		 *  outdated
a57358
-		 */
a57358
-		rwlock_wrlock(&rpcbaddr_cache_lock);
a57358
-		delete_cache(&addr_to_delete);
a57358
-		rwlock_unlock(&rpcbaddr_cache_lock);
a57358
-		free(addr_to_delete.buf);
a57358
-	}
a57358
+
a57358
 	if (!__rpc_nconf2sockinfo(nconf, &si)) {
a57358
 		rpc_createerr.cf_stat = RPC_UNKNOWNPROTO;
a57358
 		assert(client == NULL);
a57358
diff --git a/tirpc/reentrant.h b/tirpc/reentrant.h
a57358
index 5f5c96e..5bb581a 100644
a57358
--- a/tirpc/reentrant.h
a57358
+++ b/tirpc/reentrant.h
a57358
@@ -57,6 +57,7 @@
a57358
 #define mutex_unlock(m)		pthread_mutex_unlock(m)
a57358
 
a57358
 #define cond_init(c, a, p)	pthread_cond_init(c, a)
a57358
+#define cond_destroy(c)		pthread_cond_destroy(c)
a57358
 #define cond_signal(m)		pthread_cond_signal(m)
a57358
 #define cond_broadcast(m)	pthread_cond_broadcast(m)
a57358
 #define cond_wait(c, m)		pthread_cond_wait(c, m)