Blob Blame History Raw
diff --git a/.gitignore b/.gitignore
index e91e7a25..e97b31f5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -54,6 +54,7 @@ utils/rquotad/rquotad
 utils/rquotad/rquota.h
 utils/rquotad/rquota_xdr.c
 utils/showmount/showmount
+utils/nfsdcld/nfsdcld
 utils/nfsdcltrack/nfsdcltrack
 utils/statd/statd
 tools/locktest/testlk
diff --git a/aclocal/ax_gcc_func_attribute.m4 b/aclocal/ax_gcc_func_attribute.m4
new file mode 100644
index 00000000..098c9aad
--- /dev/null
+++ b/aclocal/ax_gcc_func_attribute.m4
@@ -0,0 +1,238 @@
+# ===========================================================================
+#  https://www.gnu.org/software/autoconf-archive/ax_gcc_func_attribute.html
+# ===========================================================================
+#
+# SYNOPSIS
+#
+#   AX_GCC_FUNC_ATTRIBUTE(ATTRIBUTE)
+#
+# DESCRIPTION
+#
+#   This macro checks if the compiler supports one of GCC's function
+#   attributes; many other compilers also provide function attributes with
+#   the same syntax. Compiler warnings are used to detect supported
+#   attributes as unsupported ones are ignored by default so quieting
+#   warnings when using this macro will yield false positives.
+#
+#   The ATTRIBUTE parameter holds the name of the attribute to be checked.
+#
+#   If ATTRIBUTE is supported define HAVE_FUNC_ATTRIBUTE_<ATTRIBUTE>.
+#
+#   The macro caches its result in the ax_cv_have_func_attribute_<attribute>
+#   variable.
+#
+#   The macro currently supports the following function attributes:
+#
+#    alias
+#    aligned
+#    alloc_size
+#    always_inline
+#    artificial
+#    cold
+#    const
+#    constructor
+#    constructor_priority for constructor attribute with priority
+#    deprecated
+#    destructor
+#    dllexport
+#    dllimport
+#    error
+#    externally_visible
+#    fallthrough
+#    flatten
+#    format
+#    format_arg
+#    gnu_inline
+#    hot
+#    ifunc
+#    leaf
+#    malloc
+#    noclone
+#    noinline
+#    nonnull
+#    noreturn
+#    nothrow
+#    optimize
+#    pure
+#    sentinel
+#    sentinel_position
+#    unused
+#    used
+#    visibility
+#    warning
+#    warn_unused_result
+#    weak
+#    weakref
+#
+#   Unsupported function attributes will be tested with a prototype
+#   returning an int and not accepting any arguments and the result of the
+#   check might be wrong or meaningless so use with care.
+#
+# LICENSE
+#
+#   Copyright (c) 2013 Gabriele Svelto <gabriele.svelto@gmail.com>
+#
+#   Copying and distribution of this file, with or without modification, are
+#   permitted in any medium without royalty provided the copyright notice
+#   and this notice are preserved.  This file is offered as-is, without any
+#   warranty.
+
+#serial 9
+
+AC_DEFUN([AX_GCC_FUNC_ATTRIBUTE], [
+    AS_VAR_PUSHDEF([ac_var], [ax_cv_have_func_attribute_$1])
+
+    AC_CACHE_CHECK([for __attribute__(($1))], [ac_var], [
+        AC_LINK_IFELSE([AC_LANG_PROGRAM([
+            m4_case([$1],
+                [alias], [
+                    int foo( void ) { return 0; }
+                    int bar( void ) __attribute__(($1("foo")));
+                ],
+                [aligned], [
+                    int foo( void ) __attribute__(($1(32)));
+                ],
+                [alloc_size], [
+                    void *foo(int a) __attribute__(($1(1)));
+                ],
+                [always_inline], [
+                    inline __attribute__(($1)) int foo( void ) { return 0; }
+                ],
+                [artificial], [
+                    inline __attribute__(($1)) int foo( void ) { return 0; }
+                ],
+                [cold], [
+                    int foo( void ) __attribute__(($1));
+                ],
+                [const], [
+                    int foo( void ) __attribute__(($1));
+                ],
+                [constructor_priority], [
+                    int foo( void ) __attribute__((__constructor__(65535/2)));
+                ],
+                [constructor], [
+                    int foo( void ) __attribute__(($1));
+                ],
+                [deprecated], [
+                    int foo( void ) __attribute__(($1("")));
+                ],
+                [destructor], [
+                    int foo( void ) __attribute__(($1));
+                ],
+                [dllexport], [
+                    __attribute__(($1)) int foo( void ) { return 0; }
+                ],
+                [dllimport], [
+                    int foo( void ) __attribute__(($1));
+                ],
+                [error], [
+                    int foo( void ) __attribute__(($1("")));
+                ],
+                [externally_visible], [
+                    int foo( void ) __attribute__(($1));
+                ],
+                [fallthrough], [
+                    int foo( void ) {switch (0) { case 1: __attribute__(($1)); case 2: break ; }};
+                ],
+                [flatten], [
+                    int foo( void ) __attribute__(($1));
+                ],
+                [format], [
+                    int foo(const char *p, ...) __attribute__(($1(printf, 1, 2)));
+                ],
+                [format_arg], [
+                    char *foo(const char *p) __attribute__(($1(1)));
+                ],
+                [gnu_inline], [
+                    inline __attribute__(($1)) int foo( void ) { return 0; }
+                ],
+                [hot], [
+                    int foo( void ) __attribute__(($1));
+                ],
+                [ifunc], [
+                    int my_foo( void ) { return 0; }
+                    static int (*resolve_foo(void))(void) { return my_foo; }
+                    int foo( void ) __attribute__(($1("resolve_foo")));
+                ],
+                [leaf], [
+                    __attribute__(($1)) int foo( void ) { return 0; }
+                ],
+                [malloc], [
+                    void *foo( void ) __attribute__(($1));
+                ],
+                [noclone], [
+                    int foo( void ) __attribute__(($1));
+                ],
+                [noinline], [
+                    __attribute__(($1)) int foo( void ) { return 0; }
+                ],
+                [nonnull], [
+                    int foo(char *p) __attribute__(($1(1)));
+                ],
+                [noreturn], [
+                    void foo( void ) __attribute__(($1));
+                ],
+                [nothrow], [
+                    int foo( void ) __attribute__(($1));
+                ],
+                [optimize], [
+                    __attribute__(($1(3))) int foo( void ) { return 0; }
+                ],
+                [pure], [
+                    int foo( void ) __attribute__(($1));
+                ],
+                [sentinel], [
+                    int foo(void *p, ...) __attribute__(($1));
+                ],
+                [sentinel_position], [
+                    int foo(void *p, ...) __attribute__(($1(1)));
+                ],
+                [returns_nonnull], [
+                    void *foo( void ) __attribute__(($1));
+                ],
+                [unused], [
+                    int foo( void ) __attribute__(($1));
+                ],
+                [used], [
+                    int foo( void ) __attribute__(($1));
+                ],
+                [visibility], [
+                    int foo_def( void ) __attribute__(($1("default")));
+                    int foo_hid( void ) __attribute__(($1("hidden")));
+                    int foo_int( void ) __attribute__(($1("internal")));
+                    int foo_pro( void ) __attribute__(($1("protected")));
+                ],
+                [warning], [
+                    int foo( void ) __attribute__(($1("")));
+                ],
+                [warn_unused_result], [
+                    int foo( void ) __attribute__(($1));
+                ],
+                [weak], [
+                    int foo( void ) __attribute__(($1));
+                ],
+                [weakref], [
+                    static int foo( void ) { return 0; }
+                    static int bar( void ) __attribute__(($1("foo")));
+                ],
+                [
+                 m4_warn([syntax], [Unsupported attribute $1, the test may fail])
+                 int foo( void ) __attribute__(($1));
+                ]
+            )], [])
+            ],
+            dnl GCC doesn't exit with an error if an unknown attribute is
+            dnl provided but only outputs a warning, so accept the attribute
+            dnl only if no warning were issued.
+            [AS_IF([test -s conftest.err],
+                [AS_VAR_SET([ac_var], [no])],
+                [AS_VAR_SET([ac_var], [yes])])],
+            [AS_VAR_SET([ac_var], [no])])
+    ])
+
+    AS_IF([test yes = AS_VAR_GET([ac_var])],
+        [AC_DEFINE_UNQUOTED(AS_TR_CPP(HAVE_FUNC_ATTRIBUTE_$1), 1,
+            [Define to 1 if the system has the `$1' function attribute])], [])
+
+    AS_VAR_POPDEF([ac_var])
+])
diff --git a/configure.ac b/configure.ac
index 48eb9eb6..13ea957f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -238,6 +238,12 @@ else
 	AM_CONDITIONAL(MOUNT_CONFIG, [test "$enable_mount" = "yes"])
 fi
 
+AC_ARG_ENABLE(nfsdcld,
+	[AC_HELP_STRING([--disable-nfsdcld],
+			[disable NFSv4 clientid tracking daemon @<:@default=no@:>@])],
+	enable_nfsdcld=$enableval,
+	enable_nfsdcld="yes")
+
 AC_ARG_ENABLE(nfsdcltrack,
 	[AC_HELP_STRING([--disable-nfsdcltrack],
 			[disable NFSv4 clientid tracking programs @<:@default=no@:>@])],
@@ -317,6 +323,20 @@ if test "$enable_nfsv4" = yes; then
   dnl Check for sqlite3
   AC_SQLITE3_VERS
 
+  if test "$enable_nfsdcld" = "yes"; then
+	AC_CHECK_HEADERS([libgen.h sys/inotify.h], ,
+		AC_MSG_ERROR([Cannot find header needed for nfsdcld]))
+
+    case $libsqlite3_cv_is_recent in
+    yes) ;;
+    unknown)
+      dnl do not fail when cross-compiling
+      AC_MSG_WARN([assuming sqlite is at least v3.3]) ;;
+    *)
+      AC_MSG_ERROR([nfsdcld requires sqlite-devel]) ;;
+    esac
+  fi
+
   if test "$enable_nfsdcltrack" = "yes"; then
 	AC_CHECK_HEADERS([libgen.h sys/inotify.h], ,
 		AC_MSG_ERROR([Cannot find header needed for nfsdcltrack]))
@@ -332,6 +352,7 @@ if test "$enable_nfsv4" = yes; then
   fi
 
 else
+  enable_nfsdcld="no"
   enable_nfsdcltrack="no"
 fi
 
@@ -342,6 +363,7 @@ if test "$enable_nfsv41" = yes; then
 fi
 
 dnl enable nfsidmap when its support by libnfsidmap
+AM_CONDITIONAL(CONFIG_NFSDCLD, [test "$enable_nfsdcld" = "yes" ])
 AM_CONDITIONAL(CONFIG_NFSDCLTRACK, [test "$enable_nfsdcltrack" = "yes" ])
 
 
@@ -581,6 +603,7 @@ CHECK_CCSUPPORT([-Werror=format-overflow=2], [flg1])
 CHECK_CCSUPPORT([-Werror=int-conversion], [flg2])
 CHECK_CCSUPPORT([-Werror=incompatible-pointer-types], [flg3])
 CHECK_CCSUPPORT([-Werror=misleading-indentation], [flg4])
+AX_GCC_FUNC_ATTRIBUTE([format])
 
 AC_SUBST([AM_CFLAGS], ["$my_am_cflags $flg1 $flg2 $flg3 $flg4"])
 
@@ -617,8 +640,10 @@ AC_CONFIG_FILES([
 	tools/mountstats/Makefile
 	tools/nfs-iostat/Makefile
 	tools/nfsconf/Makefile
+	tools/clddb-tool/Makefile
 	utils/Makefile
 	utils/blkmapd/Makefile
+	utils/nfsdcld/Makefile
 	utils/nfsdcltrack/Makefile
 	utils/exportfs/Makefile
 	utils/gssd/Makefile
diff --git a/nfs.conf b/nfs.conf
index d48a4e55..56172c49 100644
--- a/nfs.conf
+++ b/nfs.conf
@@ -36,6 +36,10 @@ use-gss-proxy=1
 # state-directory-path=/var/lib/nfs
 # ha-callout=
 #
+[nfsdcld]
+# debug=0
+# storagedir=/var/lib/nfs/nfsdcld
+#
 [nfsdcltrack]
 # debug=0
 # storagedir=/var/lib/nfs/nfsdcltrack
diff --git a/support/include/cld.h b/support/include/cld.h
index f14a9ab0..88d3b63e 100644
--- a/support/include/cld.h
+++ b/support/include/cld.h
@@ -23,16 +23,22 @@
 #define _NFSD_CLD_H
 
 /* latest upcall version available */
-#define CLD_UPCALL_VERSION 1
+#define CLD_UPCALL_VERSION 2
 
 /* defined by RFC3530 */
 #define NFS4_OPAQUE_LIMIT 1024
 
+#ifndef SHA256_DIGEST_SIZE
+#define SHA256_DIGEST_SIZE      32
+#endif
+
 enum cld_command {
 	Cld_Create,		/* create a record for this cm_id */
 	Cld_Remove,		/* remove record of this cm_id */
 	Cld_Check,		/* is this cm_id allowed? */
 	Cld_GraceDone,		/* grace period is complete */
+	Cld_GraceStart,		/* grace start (upload client records) */
+	Cld_GetVersion,		/* query max supported upcall version */
 };
 
 /* representation of long-form NFSv4 client ID */
@@ -41,6 +47,17 @@ struct cld_name {
 	unsigned char	cn_id[NFS4_OPAQUE_LIMIT];	/* client-provided */
 } __attribute__((packed));
 
+/* sha256 hash of the kerberos principal */
+struct cld_princhash {
+	uint8_t		cp_len;				/* length of cp_data */
+	unsigned char	cp_data[SHA256_DIGEST_SIZE];	/* hash of principal */
+} __attribute__((packed));
+
+struct cld_clntinfo {
+	struct cld_name		cc_name;
+	struct cld_princhash	cc_princhash;
+} __attribute__((packed));
+
 /* message struct for communication with userspace */
 struct cld_msg {
 	uint8_t		cm_vers;		/* upcall version */
@@ -50,7 +67,28 @@ struct cld_msg {
 	union {
 		int64_t		cm_gracetime;	/* grace period start time */
 		struct cld_name	cm_name;
+		uint8_t		cm_version;	/* for getting max version */
+	} __attribute__((packed)) cm_u;
+} __attribute__((packed));
+
+/* version 2 message can include hash of kerberos principal */
+struct cld_msg_v2 {
+	uint8_t		cm_vers;		/* upcall version */
+	uint8_t		cm_cmd;			/* upcall command */
+	int16_t		cm_status;		/* return code */
+	uint32_t	cm_xid;			/* transaction id */
+	union {
+		struct cld_name	cm_name;
+		uint8_t		cm_version;	/* for getting max version */
+		struct cld_clntinfo cm_clntinfo; /* name & princ hash */
 	} __attribute__((packed)) cm_u;
 } __attribute__((packed));
 
+struct cld_msg_hdr {
+	uint8_t		cm_vers;		/* upcall version */
+	uint8_t		cm_cmd;			/* upcall command */
+	int16_t		cm_status;		/* return code */
+	uint32_t	cm_xid;			/* transaction id */
+} __attribute__((packed));
+
 #endif /* !_NFSD_CLD_H */
diff --git a/support/include/xcommon.h b/support/include/xcommon.h
index 23c9a135..30b0403b 100644
--- a/support/include/xcommon.h
+++ b/support/include/xcommon.h
@@ -9,6 +9,10 @@
 #ifndef _XMALLOC_H
 #define _MALLOC_H
 
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
 #include <sys/types.h>
 #include <fcntl.h>
 #include <limits.h>
@@ -25,9 +29,15 @@
 
 #define streq(s, t)	(strcmp ((s), (t)) == 0)
 
-/* Functions in sundries.c that are used in mount.c and umount.c  */ 
+#ifdef HAVE_FUNC_ATTRIBUTE_FORMAT
+#define X_FORMAT(_x) __attribute__((__format__ _x))
+#else
+#define X_FORMAT(_x)
+#endif
+
+/* Functions in sundries.c that are used in mount.c and umount.c  */
 char *canonicalize (const char *path);
-void nfs_error (const char *fmt, ...);
+void nfs_error (const char *fmt, ...) X_FORMAT((printf, 1, 2));
 void *xmalloc (size_t size);
 void *xrealloc(void *p, size_t size);
 void xfree(void *);
@@ -36,9 +46,9 @@ char *xstrndup (const char *s, int n);
 char *xstrconcat2 (const char *, const char *);
 char *xstrconcat3 (const char *, const char *, const char *);
 char *xstrconcat4 (const char *, const char *, const char *, const char *);
-void die (int errcode, const char *fmt, ...);
+void die (int errcode, const char *fmt, ...) X_FORMAT((printf, 2, 3));
 
-extern void die(int err, const char *fmt, ...);
+extern void die(int err, const char *fmt, ...) X_FORMAT((printf, 2, 3));
 extern void (*at_die)(void);
 
 /* exit status - bits below are ORed */
diff --git a/support/include/xlog.h b/support/include/xlog.h
index a11463ed..32ff5a1b 100644
--- a/support/include/xlog.h
+++ b/support/include/xlog.h
@@ -7,6 +7,10 @@
 #ifndef XLOG_H
 #define XLOG_H
 
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
 #include <stdarg.h>
 
 /* These are logged always. L_FATAL also does exit(1) */
@@ -35,6 +39,12 @@ struct xlog_debugfac {
 	int		df_fac;
 };
 
+#ifdef HAVE_FUNC_ATTRIBUTE_FORMAT
+#define XLOG_FORMAT(_x) __attribute__((__format__ _x))
+#else
+#define XLOG_FORMAT(_x)
+#endif
+
 extern int export_errno;
 void			xlog_open(char *progname);
 void			xlog_stderr(int on);
@@ -43,10 +53,10 @@ void			xlog_config(int fac, int on);
 void			xlog_sconfig(char *, int on);
 void			xlog_from_conffile(char *);
 int			xlog_enabled(int fac);
-void			xlog(int fac, const char *fmt, ...);
-void			xlog_warn(const char *fmt, ...);
-void			xlog_err(const char *fmt, ...);
-void			xlog_errno(int err, const char *fmt, ...);
-void			xlog_backend(int fac, const char *fmt, va_list args);
+void			xlog(int fac, const char *fmt, ...) XLOG_FORMAT((printf, 2, 3));
+void			xlog_warn(const char *fmt, ...) XLOG_FORMAT((printf, 1, 2));
+void			xlog_err(const char *fmt, ...) XLOG_FORMAT((printf, 1, 2));
+void			xlog_errno(int err, const char *fmt, ...) XLOG_FORMAT((printf, 2, 3));
+void			xlog_backend(int fac, const char *fmt, va_list args) XLOG_FORMAT((printf, 2, 0));
 
 #endif /* XLOG_H */
diff --git a/support/junction/junction.c b/support/junction/junction.c
index ab6caa61..41cce261 100644
--- a/support/junction/junction.c
+++ b/support/junction/junction.c
@@ -23,6 +23,10 @@
  *	http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt
  */
 
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
 #include <sys/types.h>
 #include <sys/stat.h>
 
diff --git a/support/misc/file.c b/support/misc/file.c
index 4065376e..74973169 100644
--- a/support/misc/file.c
+++ b/support/misc/file.c
@@ -18,6 +18,10 @@
  * along with nfs-utils.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
 #include <sys/stat.h>
 
 #include <string.h>
diff --git a/support/misc/mountpoint.c b/support/misc/mountpoint.c
index 9f9ce44e..4205b41c 100644
--- a/support/misc/mountpoint.c
+++ b/support/misc/mountpoint.c
@@ -3,6 +3,10 @@
  * check if a given path is a mountpoint 
  */
 
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
 #include <string.h>
 #include "xcommon.h"
 #include <sys/stat.h>
diff --git a/support/nfs/cacheio.c b/support/nfs/cacheio.c
index 9dc4cf1c..7c4cf373 100644
--- a/support/nfs/cacheio.c
+++ b/support/nfs/cacheio.c
@@ -15,6 +15,10 @@
  *
  */
 
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
 #include <nfslib.h>
 #include <stdio.h>
 #include <stdio_ext.h>
diff --git a/support/nfs/svc_create.c b/support/nfs/svc_create.c
index ef7ff05f..7b595f89 100644
--- a/support/nfs/svc_create.c
+++ b/support/nfs/svc_create.c
@@ -184,7 +184,7 @@ svc_create_sock(const struct sockaddr *sap, socklen_t salen,
 		type = SOCK_STREAM;
 		break;
 	default:
-		xlog(D_GENERAL, "%s: Unrecognized bind address semantics: %u",
+		xlog(D_GENERAL, "%s: Unrecognized bind address semantics: %lu",
 			__func__, nconf->nc_semantics);
 		return -1;
 	}
diff --git a/support/nsm/rpc.c b/support/nsm/rpc.c
index ae49006c..08b4746f 100644
--- a/support/nsm/rpc.c
+++ b/support/nsm/rpc.c
@@ -182,7 +182,7 @@ nsm_xmit_getport(const int sock, const struct sockaddr_in *sin,
 	uint32_t xid;
 	XDR xdr;
 
-	xlog(D_CALL, "Sending PMAP_GETPORT for %u, %u, udp", program, version);
+	xlog(D_CALL, "Sending PMAP_GETPORT for %lu, %lu, udp", program, version);
 
 	nsm_init_xdrmem(msgbuf, NSM_MAXMSGSIZE, &xdr);
 	xid = nsm_init_rpc_header(PMAPPROG, PMAPVERS,
diff --git a/systemd/Makefile.am b/systemd/Makefile.am
index d54518bc..53458c62 100644
--- a/systemd/Makefile.am
+++ b/systemd/Makefile.am
@@ -36,6 +36,11 @@ unit_files += \
 endif
 endif
 
+if CONFIG_NFSDCLD
+unit_files += \
+    nfsdcld.service
+endif
+
 man5_MANS	= nfs.conf.man
 man7_MANS	= nfs.systemd.man
 EXTRA_DIST = $(unit_files) $(man5_MANS) $(man7_MANS)
diff --git a/systemd/nfs-server.service b/systemd/nfs-server.service
index 136552b5..24118d69 100644
--- a/systemd/nfs-server.service
+++ b/systemd/nfs-server.service
@@ -6,10 +6,12 @@ Requires= nfs-mountd.service
 Wants=rpcbind.socket network-online.target
 Wants=rpc-statd.service nfs-idmapd.service
 Wants=rpc-statd-notify.service
+Wants=nfsdcld.service
 
 After= network-online.target local-fs.target
 After= proc-fs-nfsd.mount rpcbind.socket nfs-mountd.service
 After= nfs-idmapd.service rpc-statd.service
+After= nfsdcld.service
 Before= rpc-statd-notify.service
 
 # GSS services dependencies and ordering
diff --git a/systemd/nfsdcld.service b/systemd/nfsdcld.service
new file mode 100644
index 00000000..a32d2430
--- /dev/null
+++ b/systemd/nfsdcld.service
@@ -0,0 +1,10 @@
+[Unit]
+Description=NFSv4 Client Tracking Daemon
+DefaultDependencies=no
+Conflicts=umount.target
+Requires=rpc_pipefs.target proc-fs-nfsd.mount
+After=rpc_pipefs.target proc-fs-nfsd.mount
+
+[Service]
+Type=forking
+ExecStart=/usr/sbin/nfsdcld
diff --git a/tools/Makefile.am b/tools/Makefile.am
index 4266da49..53e61170 100644
--- a/tools/Makefile.am
+++ b/tools/Makefile.am
@@ -8,6 +8,10 @@ endif
 
 OPTDIRS += nfsconf
 
+if CONFIG_NFSDCLD
+OPTDIRS += clddb-tool
+endif
+
 SUBDIRS = locktest rpcdebug nlmtest mountstats nfs-iostat $(OPTDIRS)
 
 MAINTAINERCLEANFILES = Makefile.in
diff --git a/tools/clddb-tool/Makefile.am b/tools/clddb-tool/Makefile.am
new file mode 100644
index 00000000..15a8fd47
--- /dev/null
+++ b/tools/clddb-tool/Makefile.am
@@ -0,0 +1,13 @@
+## Process this file with automake to produce Makefile.in
+PYTHON_FILES =  clddb-tool.py
+
+man8_MANS	= clddb-tool.man
+
+EXTRA_DIST	= $(man8_MANS) $(PYTHON_FILES)
+
+all-local: $(PYTHON_FILES)
+
+install-data-hook:
+	$(INSTALL) -m 755 clddb-tool.py $(DESTDIR)$(sbindir)/clddb-tool
+
+MAINTAINERCLEANFILES=Makefile.in
diff --git a/tools/clddb-tool/clddb-tool.man b/tools/clddb-tool/clddb-tool.man
new file mode 100644
index 00000000..e80b2c05
--- /dev/null
+++ b/tools/clddb-tool/clddb-tool.man
@@ -0,0 +1,83 @@
+.\"
+.\" clddb-tool(8)
+.\"
+.TH clddb-tool 8 "07 Aug 2019"
+.SH NAME
+clddb-tool \- Tool for manipulating the nfsdcld sqlite database
+.SH SYNOPSIS
+.B clddb-tool
+.RB [ \-h | \-\-help ]
+.P
+.B clddb-tool
+.RB [ \-p | \-\-path
+.IR dbpath ]
+.B fix-table-names
+.RB [ \-h | \-\-help ]
+.P
+.B clddb-tool
+.RB [ \-p | \-\-path
+.IR dbpath ]
+.B downgrade-schema
+.RB [ \-h | \-\-help ]
+.RB [ \-v | \-\-version
+.IR to-version ]
+.P
+.B clddb-tool
+.RB [ \-p | \-\-path
+.IR dbpath ]
+.B print
+.RB [ \-h | \-\-help ]
+.RB [ \-s | \-\-summary ]
+.P
+
+.SH DESCRIPTION
+.RB "The " clddb-tool " command is provided to perform some manipulation of the nfsdcld sqlite database schema and to print the contents of the database."
+.SS Sub-commands
+Valid
+.B clddb-tool
+subcommands are:
+.IP "\fBfix-table-names\fP"
+.RB "A previous version of " nfsdcld "(8) contained a bug that corrupted the reboot epoch table names.  This sub-command will fix those table names."
+.IP "\fBdowngrade-schema\fP"
+Downgrade the database schema.  Currently the schema can only to downgraded from version 4 to version 3.
+.IP "\fBprint\fP"
+Display the contents of the database.  Prints the schema version and the values of the current and recovery epochs.  If the
+.BR \-s | \-\-summary
+option is not given, also prints the clients in the reboot epoch tables.
+.SH OPTIONS
+.SS Options valid for all sub-commands
+.TP
+.B \-h, \-\-help
+Show the help message and exit
+.TP
+\fB\-p \fIdbpath\fR, \fB\-\-path \fIdbpath\fR
+Open the sqlite database located at
+.I dbpath
+instead of
+.IR /var/lib/nfs/nfsdcld/main.sqlite ".  "
+This is mainly for testing purposes.
+.SS Options specific to the downgrade-schema sub-command
+.TP
+\fB\-v \fIto-version\fR, \fB\-\-version \fIto-version\fR
+The schema version to downgrade to.  Currently the schema can only be downgraded to version 3.
+.SS Options specific to the print sub-command
+.TP
+.B \-s, \-\-summary
+Do not list the clients in the reboot epoch tables in the output.
+.SH NOTES
+The
+.B clddb-tool
+command will not allow the
+.B fix-table-names
+or
+.B downgrade-schema
+subcommands to be used if
+.BR nfsdcld (8)
+is running.
+.SH FILES
+.TP
+.B /var/lib/nfs/nfsdcld/main.sqlite
+.SH SEE ALSO
+.BR nfsdcld (8)
+.SH AUTHOR
+Scott Mayhew <smayhew@redhat.com>
diff --git a/tools/clddb-tool/clddb-tool.py b/tools/clddb-tool/clddb-tool.py
new file mode 100644
index 00000000..8a661318
--- /dev/null
+++ b/tools/clddb-tool/clddb-tool.py
@@ -0,0 +1,266 @@
+#!/usr/bin/python3
+"""Tool for manipulating the nfsdcld sqlite database
+"""
+
+__copyright__ = """
+Copyright (C) 2019 Scott Mayhew <smayhew@redhat.com>
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+MA  02110-1301, USA.
+"""
+
+import argparse
+import os
+import sqlite3
+import sys
+
+
+class CldDb():
+    def __init__(self, path):
+        self.con = sqlite3.connect(path)
+        self.con.row_factory = sqlite3.Row
+        for row in self.con.execute('select value from parameters '
+                                    'where key = "version"'):
+            self.version = int(row['value'])
+        for row in self.con.execute('select * from grace'):
+            self.current = int(row['current'])
+            self.recovery = int(row['recovery'])
+
+    def __del__(self):
+        self.con.close()
+
+    def __str__(self):
+        return ('Schema version: {self.version} '
+                'current epoch: {self.current} '
+                'recovery epoch: {self.recovery}'.format(self=self))
+
+    def _print_clients(self, epoch):
+        if epoch:
+            for row in self.con.execute('select * from "rec-{:016x}"'
+                                        .format(epoch)):
+                if self.version >= 4:
+                    if row['princhash'] is not None:
+                        princhash = row['princhash'].hex()
+                    else:
+                        princhash = "(null)"
+                    print('id = {}, princhash = {}'
+                          .format(row['id'].decode(), princhash))
+                else:
+                    print('id = {}'.format(row['id'].decode()))
+
+    def print_current_clients(self):
+        print('Clients in current epoch:')
+        self._print_clients(self.current)
+
+    def print_recovery_clients(self):
+        if self.recovery:
+            print('Clients in recovery epoch:')
+            self._print_clients(self.recovery)
+
+    def check_bad_table_names(self):
+        bad_names = []
+        for row in self.con.execute('select name from sqlite_master '
+                                    'where type = "table" '
+                                    'and name like "%rec-%" '
+                                    'and length(name) < 20'):
+            bad_names.append(row['name'])
+        return bad_names
+
+    def fix_bad_table_names(self):
+        try:
+            self.con.execute('begin exclusive transaction')
+            bad_names = self.check_bad_table_names()
+            for bad_name in bad_names:
+                epoch = int(bad_name.split('-')[1], base=16)
+                if epoch == self.current or epoch == self.recovery:
+                    if epoch == self.current:
+                        which = 'current'
+                    else:
+                        which = 'recovery'
+                    print('found invalid table name {} for {} epoch'
+                          .format(bad_name, which))
+                    self.con.execute('alter table "{}" '
+                                     'rename to "rec-{:016x}"'
+                                     .format(bad_name, epoch))
+                    print('renamed to rec-{:016x}'.format(epoch))
+                else:
+                    print('found invalid table name {} for unknown epoch {}'
+                          .format(bad_name, epoch))
+                    self.con.execute('drop table "{}"'.format(bad_name))
+                    print('dropped table {}'.format(bad_name))
+        except sqlite3.Error:
+            self.con.rollback()
+        else:
+            self.con.commit()
+
+    def has_princ_data(self):
+        if self.version < 4:
+            return False
+        for row in self.con.execute('select count(*) '
+                                    'from "rec-{:016x}" '
+                                    'where princhash not null'
+                                    .format(self.current)):
+            count = row[0]
+        if self.recovery:
+            for row in self.con.execute('select count(*) '
+                                        'from "rec-{:016x}" '
+                                        'where princhash not null'
+                                        .format(self.current)):
+                count = count + row[0]
+        if count:
+            return True
+        return False
+
+    def _downgrade_table_v4_to_v3(self, epoch):
+        if not self.con.in_transaction:
+            raise sqlite3.Error
+        try:
+            self.con.execute('create table "new_rec-{:016x}" '
+                             '(id blob primary key)'.format(epoch))
+            self.con.execute('insert into "new_rec-{:016x}" '
+                             'select id from "rec-{:016x}"'
+                             .format(epoch, epoch))
+            self.con.execute('drop table "rec-{:016x}"'.format(epoch))
+            self.con.execute('alter table "new_rec-{:016x}" '
+                             'rename to "rec-{:016x}"'
+                             .format(epoch, epoch))
+        except sqlite3.Error:
+            raise
+
+    def downgrade_schema_v4_to_v3(self):
+        try:
+            self.con.execute('begin exclusive transaction')
+            for row in self.con.execute('select value from parameters '
+                                        'where key = "version"'):
+                version = int(row['value'])
+            if version != self.version:
+                raise sqlite3.Error
+            for row in self.con.execute('select * from grace'):
+                current = int(row['current'])
+                recovery = int(row['recovery'])
+            if current != self.current:
+                raise sqlite3.Error
+            if recovery != self.recovery:
+                raise sqlite3.Error
+            self._downgrade_table_v4_to_v3(current)
+            if recovery:
+                self._downgrade_table_v4_to_v3(recovery)
+            self.con.execute('update parameters '
+                             'set value = "3" '
+                             'where key = "version"')
+            self.version = 3
+        except sqlite3.Error:
+            self.con.rollback()
+            print('Downgrade failed')
+        else:
+            self.con.commit()
+            print('Downgrade successful')
+
+
+def nfsdcld_active():
+    rc = os.system('ps -C nfsdcld >/dev/null 2>/dev/null')
+    if rc == 0:
+        return True
+    return False
+
+
+def fix_table_names_command(db, args):
+    if nfsdcld_active():
+        print('Warning: nfsdcld is running!')
+        ans = input('Continue? ')
+        if ans.lower() not in ['y', 'yes']:
+            print('Operation canceled.')
+            return
+    bad_names = db.check_bad_table_names()
+    if not bad_names:
+        print('No invalid table names found.')
+        return
+    db.fix_bad_table_names()
+
+
+def downgrade_schema_command(db, args):
+    if nfsdcld_active():
+        print('Warning: nfsdcld is running!')
+        ans = input('Continue? ')
+        if ans.lower() not in ['y', 'yes']:
+            print('Operation canceled')
+            return
+    if db.version != 4:
+        print('Cannot downgrade database from schema version {}.'
+              .format(db.version))
+        return
+    if args.version != 3:
+        print('Cannot downgrade to version {}.'.format(args.version))
+        return
+    bad_names = db.check_bad_table_names()
+    if bad_names:
+        print('Invalid table names detected.')
+        print('Please run "{} fix-table-names" before downgrading the schema.'
+              .format(sys.argv[0]))
+        return
+    if db.has_princ_data():
+        print('Warning: database has principal data, which will be erased.')
+        ans = input('Continue? ')
+        if ans.lower() not in ['y', 'yes']:
+            print('Operation canceled')
+            return
+    db.downgrade_schema_v4_to_v3()
+
+
+def print_command(db, args):
+    print(str(db))
+    if not args.summary:
+        bad_names = db.check_bad_table_names()
+        if bad_names:
+            print('Invalid table names detected.')
+            print('Please run "{} fix-table-names".'.format(sys.argv[0]))
+            return
+        db.print_current_clients()
+        db.print_recovery_clients()
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-p', '--path',
+                        default='/var/lib/nfs/nfsdcld/main.sqlite',
+                        help='path to the database '
+                        '(default: /var/lib/nfs/nfsdcld/main.sqlite)')
+    subparsers = parser.add_subparsers(help='sub-command help')
+    fix_parser = subparsers.add_parser('fix-table-names',
+                                       help='fix invalid table names')
+    fix_parser.set_defaults(func=fix_table_names_command)
+    downgrade_parser = subparsers.add_parser('downgrade-schema',
+                                             help='downgrade database schema')
+    downgrade_parser.add_argument('-v', '--version', type=int, choices=[3],
+                                  default=3,
+                                  help='version to downgrade to')
+    downgrade_parser.set_defaults(func=downgrade_schema_command)
+    print_parser = subparsers.add_parser('print',
+                                         help='print database info')
+    print_parser.add_argument('-s', '--summary', default=False,
+                              action='store_true',
+                              help='print summary only')
+    print_parser.set_defaults(func=print_command)
+    args = parser.parse_args()
+    if not os.path.exists(args.path):
+        return parser.print_usage()
+    clddb = CldDb(args.path)
+    return args.func(clddb, args)
+
+
+if __name__ == '__main__':
+    if len(sys.argv) == 1:
+        sys.argv.extend(['print', '--summary'])
+    main()
diff --git a/utils/Makefile.am b/utils/Makefile.am
index 0a5b062c..4c930a4b 100644
--- a/utils/Makefile.am
+++ b/utils/Makefile.am
@@ -19,6 +19,10 @@ if CONFIG_MOUNT
 OPTDIRS += mount
 endif
 
+if CONFIG_NFSDCLD
+OPTDIRS += nfsdcld
+endif
+
 if CONFIG_NFSDCLTRACK
 OPTDIRS += nfsdcltrack
 endif
diff --git a/utils/exportfs/exportfs.c b/utils/exportfs/exportfs.c
index cd3c979d..4b9634b7 100644
--- a/utils/exportfs/exportfs.c
+++ b/utils/exportfs/exportfs.c
@@ -644,6 +644,9 @@ out:
 	return result;
 }
 
+#ifdef HAVE_FUNC_ATTRIBUTE_FORMAT
+__attribute__((format (printf, 2, 3)))
+#endif
 static char
 dumpopt(char c, char *fmt, ...)
 {
diff --git a/utils/mount/fstab.c b/utils/mount/fstab.c
index eedbddab..8b0aaf1a 100644
--- a/utils/mount/fstab.c
+++ b/utils/mount/fstab.c
@@ -7,6 +7,10 @@
  * - Moved code to nfs-utils/support/nfs from util-linux/mount.
  */
 
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
 #include <errno.h>
 #include <stdio.h>
 #include <fcntl.h>
diff --git a/utils/mountd/cache.c b/utils/mountd/cache.c
index a054ce6f..c73e29be 100644
--- a/utils/mountd/cache.c
+++ b/utils/mountd/cache.c
@@ -967,8 +967,7 @@ lookup_export(char *dom, char *path, struct addrinfo *ai)
 			} else if (found_type == i && found->m_warned == 0) {
 				xlog(L_WARNING, "%s exported to both %s and %s, "
 				     "arbitrarily choosing options from first",
-				     path, found->m_client->m_hostname, exp->m_client->m_hostname,
-				     dom);
+				     path, found->m_client->m_hostname, exp->m_client->m_hostname);
 				found->m_warned = 1;
 			}
 		}
diff --git a/utils/mountd/mountd.c b/utils/mountd/mountd.c
index 086c39bf..0b891121 100644
--- a/utils/mountd/mountd.c
+++ b/utils/mountd/mountd.c
@@ -209,10 +209,10 @@ killer (int sig)
 }
 
 static void
-sig_hup (int sig)
+sig_hup (int UNUSED(sig))
 {
 	/* don't exit on SIGHUP */
-	xlog (L_NOTICE, "Received SIGHUP... Ignoring.\n", sig);
+	xlog (L_NOTICE, "Received SIGHUP... Ignoring.\n");
 	return;
 }
 
diff --git a/utils/nfsdcld/Makefile.am b/utils/nfsdcld/Makefile.am
new file mode 100644
index 00000000..273d64f1
--- /dev/null
+++ b/utils/nfsdcld/Makefile.am
@@ -0,0 +1,15 @@
+## Process this file with automake to produce Makefile.in
+
+man8_MANS	= nfsdcld.man
+EXTRA_DIST	= $(man8_MANS)
+
+AM_CFLAGS	+= -D_LARGEFILE64_SOURCE
+sbin_PROGRAMS	= nfsdcld
+
+nfsdcld_SOURCES = nfsdcld.c sqlite.c legacy.c
+nfsdcld_LDADD = ../../support/nfs/libnfs.la $(LIBEVENT) $(LIBSQLITE) $(LIBCAP)
+
+noinst_HEADERS	= sqlite.h cld-internal.h legacy.h
+
+MAINTAINERCLEANFILES = Makefile.in
+
diff --git a/utils/nfsdcld/cld-internal.h b/utils/nfsdcld/cld-internal.h
new file mode 100644
index 00000000..05f01be2
--- /dev/null
+++ b/utils/nfsdcld/cld-internal.h
@@ -0,0 +1,44 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef _CLD_INTERNAL_H_
+#define _CLD_INTERNAL_H_
+
+#if CLD_UPCALL_VERSION >= 2
+#define UPCALL_VERSION		2
+#else
+#define UPCALL_VERSION		1
+#endif
+
+struct cld_client {
+	int			cl_fd;
+	struct event		cl_event;
+	union {
+		struct cld_msg		cl_msg;
+#if UPCALL_VERSION >= 2
+		struct cld_msg_v2	cl_msg_v2;
+#endif
+	} cl_u;
+};
+
+uint64_t current_epoch;
+uint64_t recovery_epoch;
+int first_time;
+int num_cltrack_records;
+int num_legacy_records;
+
+#endif /* _CLD_INTERNAL_H_ */
diff --git a/utils/nfsdcld/legacy.c b/utils/nfsdcld/legacy.c
new file mode 100644
index 00000000..3c6bea6c
--- /dev/null
+++ b/utils/nfsdcld/legacy.c
@@ -0,0 +1,185 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <stdio.h>
+#include <dirent.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <limits.h>
+#include "cld.h"
+#include "sqlite.h"
+#include "xlog.h"
+#include "legacy.h"
+
+#define NFSD_RECDIR_FILE "/proc/fs/nfsd/nfsv4recoverydir"
+
+/*
+ * Loads client records from the v4recovery directory into the database.
+ * Records are prefixed with the string "hash:" and include the '\0' byte.
+ *
+ * Called during database initialization as part of a one-time "upgrade".
+ */
+void
+legacy_load_clients_from_recdir(int *num_records)
+{
+	int fd;
+	DIR *v4recovery;
+	struct dirent *entry;
+	char recdirname[PATH_MAX];
+	char buf[NFS4_OPAQUE_LIMIT];
+	struct stat st;
+	char *nl;
+
+	fd = open(NFSD_RECDIR_FILE, O_RDONLY);
+	if (fd < 0) {
+		xlog(D_GENERAL, "Unable to open %s: %m", NFSD_RECDIR_FILE);
+		return;
+	}
+	if (read(fd, recdirname, PATH_MAX) < 0) {
+		xlog(D_GENERAL, "Unable to read from %s: %m", NFSD_RECDIR_FILE);
+		return;
+	}
+	close(fd);
+	/* the output from the proc file isn't null-terminated */
+	nl = strchr(recdirname, '\n');
+	if (!nl)
+		return;
+	*nl = '\0';
+	if (stat(recdirname, &st) < 0) {
+		xlog(D_GENERAL, "Unable to stat %s: %d", recdirname, errno);
+		return;
+	}
+	if (!S_ISDIR(st.st_mode)) {
+		xlog(D_GENERAL, "%s is not a directory: mode=0%o", recdirname
+				, st.st_mode);
+		return;
+	}
+	v4recovery = opendir(recdirname);
+	if (!v4recovery)
+		return;
+	while ((entry = readdir(v4recovery))) {
+		int ret;
+
+		/* skip "." and ".." */
+		if (entry->d_name[0] == '.') {
+			switch (entry->d_name[1]) {
+			case '\0':
+				continue;
+			case '.':
+				if (entry->d_name[2] == '\0')
+					continue;
+			}
+		}
+		/* prefix legacy records with the string "hash:" */
+		ret = snprintf(buf, sizeof(buf), "hash:%s", entry->d_name);
+		/* if there's a problem, then skip this entry */
+		if (ret < 0 || (size_t)ret >= sizeof(buf)) {
+			xlog(L_WARNING, "%s: unable to build client string for %s!",
+				__func__, entry->d_name);
+			continue;
+		}
+		/* legacy client records need to include the null terminator */
+		ret = sqlite_insert_client((unsigned char *)buf, strlen(buf) + 1);
+		if (ret)
+			xlog(L_WARNING, "%s: unable to insert %s: %d", __func__,
+				entry->d_name, ret);
+		else
+			(*num_records)++;
+	}
+	closedir(v4recovery);
+}
+
+/*
+ * Cleans out the v4recovery directory.
+ *
+ * Called upon receipt of the first "GraceDone" upcall only.
+ */
+void
+legacy_clear_recdir(void)
+{
+	int fd;
+	DIR *v4recovery;
+	struct dirent *entry;
+	char recdirname[PATH_MAX];
+	char dirname[PATH_MAX];
+	struct stat st;
+	char *nl;
+
+	fd = open(NFSD_RECDIR_FILE, O_RDONLY);
+	if (fd < 0) {
+		xlog(D_GENERAL, "Unable to open %s: %m", NFSD_RECDIR_FILE);
+		return;
+	}
+	if (read(fd, recdirname, PATH_MAX) < 0) {
+		xlog(D_GENERAL, "Unable to read from %s: %m", NFSD_RECDIR_FILE);
+		return;
+	}
+	close(fd);
+	/* the output from the proc file isn't null-terminated */
+	nl = strchr(recdirname, '\n');
+	if (!nl)
+		return;
+	*nl = '\0';
+	if (stat(recdirname, &st) < 0) {
+		xlog(D_GENERAL, "Unable to stat %s: %d", recdirname, errno);
+		return;
+	}
+	if (!S_ISDIR(st.st_mode)) {
+		xlog(D_GENERAL, "%s is not a directory: mode=0%o", recdirname
+				, st.st_mode);
+		return;
+	}
+	v4recovery = opendir(recdirname);
+	if (!v4recovery)
+		return;
+	while ((entry = readdir(v4recovery))) {
+		int len;
+
+		/* skip "." and ".." */
+		if (entry->d_name[0] == '.') {
+			switch (entry->d_name[1]) {
+			case '\0':
+				continue;
+			case '.':
+				if (entry->d_name[2] == '\0')
+					continue;
+			}
+		}
+		len = snprintf(dirname, sizeof(dirname), "%s/%s", recdirname,
+				entry->d_name);
+		/* if there's a problem, then skip this entry */
+		if (len < 0 || (size_t)len >= sizeof(dirname)) {
+			xlog(L_WARNING, "%s: unable to build filename for %s!",
+				__func__, entry->d_name);
+			continue;
+		}
+		len = rmdir(dirname);
+		if (len)
+			xlog(L_WARNING, "%s: unable to rmdir %s: %d", __func__,
+				dirname, len);
+	}
+	closedir(v4recovery);
+}
diff --git a/utils/nfsdcld/legacy.h b/utils/nfsdcld/legacy.h
new file mode 100644
index 00000000..8988f6e8
--- /dev/null
+++ b/utils/nfsdcld/legacy.h
@@ -0,0 +1,24 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef _LEGACY_H_
+#define _LEGACY_H_
+
+void legacy_load_clients_from_recdir(int *);
+void legacy_clear_recdir(void);
+
+#endif /* _LEGACY_H_ */
diff --git a/utils/nfsdcld/nfsdcld.c b/utils/nfsdcld/nfsdcld.c
new file mode 100644
index 00000000..2ad10019
--- /dev/null
+++ b/utils/nfsdcld/nfsdcld.c
@@ -0,0 +1,866 @@
+/*
+ * nfsdcld.c -- NFSv4 client name tracking daemon
+ *
+ * Copyright (C) 2011  Red Hat, Jeff Layton <jlayton@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif /* HAVE_CONFIG_H */
+
+#include <errno.h>
+#include <event.h>
+#include <stdbool.h>
+#include <getopt.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <libgen.h>
+#include <sys/inotify.h>
+#ifdef HAVE_SYS_CAPABILITY_H
+#include <sys/prctl.h>
+#include <sys/capability.h>
+#endif
+
+#include "xlog.h"
+#include "nfslib.h"
+#include "cld.h"
+#include "cld-internal.h"
+#include "sqlite.h"
+#include "../mount/version.h"
+#include "conffile.h"
+#include "legacy.h"
+
+#ifndef DEFAULT_PIPEFS_DIR
+#define DEFAULT_PIPEFS_DIR NFS_STATEDIR "/rpc_pipefs"
+#endif
+
+#define DEFAULT_CLD_PATH	"/nfsd/cld"
+
+#ifndef CLD_DEFAULT_STORAGEDIR
+#define CLD_DEFAULT_STORAGEDIR NFS_STATEDIR "/nfsdcld"
+#endif
+
+#define NFSD_END_GRACE_FILE "/proc/fs/nfsd/v4_end_grace"
+
+/* private data structures */
+
+/* global variables */
+static char pipefs_dir[PATH_MAX] = DEFAULT_PIPEFS_DIR;
+static char pipepath[PATH_MAX];
+static int 		inotify_fd = -1;
+static struct event	pipedir_event;
+static bool old_kernel = false;
+
+static struct option longopts[] =
+{
+	{ "help", 0, NULL, 'h' },
+	{ "foreground", 0, NULL, 'F' },
+	{ "debug", 0, NULL, 'd' },
+	{ "pipefsdir", 1, NULL, 'p' },
+	{ "storagedir", 1, NULL, 's' },
+	{ NULL, 0, 0, 0 },
+};
+
+/* forward declarations */
+static void cldcb(int UNUSED(fd), short which, void *data);
+
+static void
+usage(char *progname)
+{
+	printf("%s [ -hFd ] [ -p pipefsdir ] [ -s storagedir ]\n", progname);
+}
+
+static int
+cld_set_caps(void)
+{
+	int ret = 0;
+#ifdef HAVE_SYS_CAPABILITY_H
+	unsigned long i;
+	cap_t caps;
+
+	if (getuid() != 0) {
+		xlog(L_ERROR, "Not running as root. Daemon won't be able to "
+			      "open the pipe after dropping capabilities!");
+		return -EINVAL;
+	}
+
+	/* prune the bounding set to nothing */
+	for (i = 0; prctl(PR_CAPBSET_READ, i, 0, 0, 0) >= 0 ; ++i) {
+		ret = prctl(PR_CAPBSET_DROP, i, 0, 0, 0);
+		if (ret) {
+			xlog(L_ERROR, "Unable to prune capability %lu from "
+				      "bounding set: %m", i);
+			return -errno;
+		}
+	}
+
+	/* get a blank capset */
+	caps = cap_init();
+	if (caps == NULL) {
+		xlog(L_ERROR, "Unable to get blank capability set: %m");
+		return -errno;
+	}
+
+	/* reset the process capabilities */
+	if (cap_set_proc(caps) != 0) {
+		xlog(L_ERROR, "Unable to set process capabilities: %m");
+		ret = -errno;
+	}
+	cap_free(caps);
+#endif
+	return ret;
+}
+
+#define INOTIFY_EVENT_MAX (sizeof(struct inotify_event) + NAME_MAX)
+
+static int
+cld_pipe_open(struct cld_client *clnt)
+{
+	int fd;
+
+	xlog(D_GENERAL, "%s: opening upcall pipe %s", __func__, pipepath);
+	fd = open(pipepath, O_RDWR, 0);
+	if (fd < 0) {
+		xlog(D_GENERAL, "%s: open of %s failed: %m", __func__, pipepath);
+		return -errno;
+	}
+
+	if (event_initialized(&clnt->cl_event))
+		event_del(&clnt->cl_event);
+	if (clnt->cl_fd >= 0)
+		close(clnt->cl_fd);
+
+	clnt->cl_fd = fd;
+	event_set(&clnt->cl_event, clnt->cl_fd, EV_READ, cldcb, clnt);
+	/* event_add is done by the caller */
+	return 0;
+}
+
+static void
+cld_inotify_cb(int UNUSED(fd), short which, void *data)
+{
+	int ret;
+	size_t elen;
+	ssize_t rret;
+	char evbuf[INOTIFY_EVENT_MAX];
+	char *dirc = NULL, *pname;
+	struct inotify_event *event = (struct inotify_event *)evbuf;
+	struct cld_client *clnt = data;
+
+	if (which != EV_READ)
+		return;
+
+	xlog(D_GENERAL, "%s: called for EV_READ", __func__);
+
+	dirc = strndup(pipepath, PATH_MAX);
+	if (!dirc) {
+		xlog(L_ERROR, "%s: unable to allocate memory", __func__);
+		goto out;
+	}
+
+	rret = read(inotify_fd, evbuf, INOTIFY_EVENT_MAX);
+	if (rret < 0) {
+		xlog(L_ERROR, "%s: read from inotify fd failed: %m", __func__);
+		goto out;
+	}
+
+	/* check to see if we have a filename in the evbuf */
+	if (!event->len) {
+		xlog(D_GENERAL, "%s: no filename in inotify event", __func__);
+		goto out;
+	}
+
+	pname = basename(dirc);
+	elen = strnlen(event->name, event->len);
+
+	/* does the filename match our pipe? */
+	if (strlen(pname) != elen || memcmp(pname, event->name, elen)) {
+		xlog(D_GENERAL, "%s: wrong filename (%s)", __func__,
+				event->name);
+		goto out;
+	}
+
+	ret = cld_pipe_open(clnt);
+	switch (ret) {
+	case 0:
+		/* readd the event for the cl_event pipe */
+		event_add(&clnt->cl_event, NULL);
+		break;
+	case -ENOENT:
+		/* pipe must have disappeared, wait for it to come back */
+		goto out;
+	default:
+		/* anything else is fatal */
+		xlog(L_FATAL, "%s: unable to open new pipe (%d). Aborting.",
+			__func__, ret);
+		exit(ret);
+	}
+
+out:
+	event_add(&pipedir_event, NULL);
+	free(dirc);
+}
+
+static int
+cld_inotify_setup(void)
+{
+	int ret;
+	char *dirc, *dname;
+
+	dirc = strndup(pipepath, PATH_MAX);
+	if (!dirc) {
+		xlog_err("%s: unable to allocate memory", __func__);
+		ret = -ENOMEM;
+		goto out_free;
+	}
+
+	dname = dirname(dirc);
+
+	inotify_fd = inotify_init();
+	if (inotify_fd < 0) {
+		xlog_err("%s: inotify_init failed: %m", __func__);
+		ret = -errno;
+		goto out_free;
+	}
+
+	ret = inotify_add_watch(inotify_fd, dname, IN_CREATE);
+	if (ret < 0) {
+		xlog_err("%s: inotify_add_watch failed: %m", __func__);
+		ret = -errno;
+		goto out_err;
+	}
+
+out_free:
+	free(dirc);
+	return 0;
+out_err:
+	close(inotify_fd);
+	goto out_free;
+}
+
+/*
+ * Set an inotify watch on the directory that should contain the pipe, and then
+ * try to open it. If it fails with anything but -ENOENT, return the error
+ * immediately.
+ *
+ * If it succeeds, then set up the pipe event handler. At that point, set up
+ * the inotify event handler and go ahead and return success.
+ */
+static int
+cld_pipe_init(struct cld_client *clnt)
+{
+	int ret;
+
+	xlog(D_GENERAL, "%s: init pipe handlers", __func__);
+
+	ret = cld_inotify_setup();
+	if (ret != 0)
+		goto out;
+
+	clnt->cl_fd = -1;
+	ret = cld_pipe_open(clnt);
+	switch (ret) {
+	case 0:
+		/* add the event and we're good to go */
+		event_add(&clnt->cl_event, NULL);
+		break;
+	case -ENOENT:
+		/* ignore this error -- cld_inotify_cb will handle it */
+		ret = 0;
+		break;
+	default:
+		/* anything else is fatal */
+		close(inotify_fd);
+		goto out;
+	}
+
+	/* set event for inotify read */
+	event_set(&pipedir_event, inotify_fd, EV_READ, cld_inotify_cb, clnt);
+	event_add(&pipedir_event, NULL);
+out:
+	return ret;
+}
+
+/*
+ * Older kernels will not tell nfsdcld when a grace period has started.
+ * Therefore we have to peek at the /proc/fs/nfsd/v4_end_grace file to
+ * see if nfsd is in grace.  We have to do this for create and remove
+ * upcalls to ensure that the correct table is being updated - otherwise
+ * we could lose client records when the grace period is lifted.
+ */
+static int
+cld_check_grace_period(void)
+{
+	int fd, ret = 0;
+	char c;
+
+	if (!old_kernel)
+		return 0;
+	if (recovery_epoch != 0)
+		return 0;
+	fd = open(NFSD_END_GRACE_FILE, O_RDONLY);
+	if (fd < 0) {
+		xlog(L_WARNING, "Unable to open %s: %m",
+			NFSD_END_GRACE_FILE);
+		return 1;
+	}
+	if (read(fd, &c, 1) < 0) {
+		xlog(L_WARNING, "Unable to read from %s: %m",
+			NFSD_END_GRACE_FILE);
+		return 1;
+	}
+	close(fd);
+	if (c == 'N') {
+		xlog(L_WARNING, "nfsd is in grace but didn't send a gracestart upcall, "
+			"please update the kernel");
+		ret = sqlite_grace_start();
+	}
+	return ret;
+}
+
+#if UPCALL_VERSION >= 2
+static ssize_t cld_message_size(void *msg)
+{
+	struct cld_msg_hdr *hdr = (struct cld_msg_hdr *)msg;
+
+	switch (hdr->cm_vers) {
+	case 1:
+		return sizeof(struct cld_msg);
+	case 2:
+		return sizeof(struct cld_msg_v2);
+	default:
+		xlog(L_FATAL, "%s invalid upcall version %d", __func__,
+		     hdr->cm_vers);
+		exit(-EINVAL);
+	}
+}
+#else
+static ssize_t cld_message_size(void *UNUSED(msg))
+{
+	return sizeof(struct cld_msg);
+}
+#endif
+
+static void
+cld_not_implemented(struct cld_client *clnt)
+{
+	int ret;
+	ssize_t bsize, wsize;
+#if UPCALL_VERSION >= 2
+	struct cld_msg_v2 *cmsg = &clnt->cl_u.cl_msg_v2;
+#else
+	struct cld_msg *cmsg = &clnt->cl_u.cl_msg;
+#endif
+
+	xlog(D_GENERAL, "%s: downcalling with not implemented error", __func__);
+
+	/* set up reply */
+	cmsg->cm_status = -EOPNOTSUPP;
+
+	bsize = cld_message_size(cmsg);
+	wsize = atomicio((void *)write, clnt->cl_fd, cmsg, bsize);
+	if (wsize != bsize)
+		xlog(L_ERROR, "%s: problem writing to cld pipe (%zd): %m",
+			 __func__, wsize);
+
+	/* reopen pipe, just to be sure */
+	ret = cld_pipe_open(clnt);
+	if (ret) {
+		xlog(L_FATAL, "%s: unable to reopen pipe: %d", __func__, ret);
+		exit(ret);
+	}
+}
+
+static void
+cld_get_version(struct cld_client *clnt)
+{
+	int ret;
+	ssize_t bsize, wsize;
+#if UPCALL_VERSION >= 2
+	struct cld_msg_v2 *cmsg = &clnt->cl_u.cl_msg_v2;
+#else
+	struct cld_msg *cmsg = &clnt->cl_u.cl_msg;
+#endif
+
+	xlog(D_GENERAL, "%s: version = %u.", __func__, UPCALL_VERSION);
+
+	cmsg->cm_u.cm_version = UPCALL_VERSION;
+	cmsg->cm_status = 0;
+
+	bsize = cld_message_size(cmsg);
+	xlog(D_GENERAL, "Doing downcall with status %d", cmsg->cm_status);
+	wsize = atomicio((void *)write, clnt->cl_fd, cmsg, bsize);
+	if (wsize != bsize) {
+		xlog(L_ERROR, "%s: problem writing to cld pipe (%zd): %m",
+			 __func__, wsize);
+		ret = cld_pipe_open(clnt);
+		if (ret) {
+			xlog(L_FATAL, "%s: unable to reopen pipe: %d",
+					__func__, ret);
+			exit(ret);
+		}
+	}
+}
+
+static void
+cld_create(struct cld_client *clnt)
+{
+	int ret;
+	ssize_t bsize, wsize;
+#if UPCALL_VERSION >= 2
+	struct cld_msg_v2 *cmsg = &clnt->cl_u.cl_msg_v2;
+#else
+	struct cld_msg *cmsg = &clnt->cl_u.cl_msg;
+#endif
+
+	ret = cld_check_grace_period();
+	if (ret)
+		goto reply;
+
+	xlog(D_GENERAL, "%s: create client record.", __func__);
+
+#if UPCALL_VERSION >= 2
+	if (cmsg->cm_vers >= 2)
+		ret = sqlite_insert_client_and_princhash(
+					cmsg->cm_u.cm_clntinfo.cc_name.cn_id,
+					cmsg->cm_u.cm_clntinfo.cc_name.cn_len,
+					cmsg->cm_u.cm_clntinfo.cc_princhash.cp_data,
+					cmsg->cm_u.cm_clntinfo.cc_princhash.cp_len);
+	else
+		ret = sqlite_insert_client(cmsg->cm_u.cm_name.cn_id,
+					   cmsg->cm_u.cm_name.cn_len);
+#else
+	ret = sqlite_insert_client(cmsg->cm_u.cm_name.cn_id,
+				   cmsg->cm_u.cm_name.cn_len);
+#endif
+
+reply:
+	cmsg->cm_status = ret ? -EREMOTEIO : ret;
+
+	bsize = cld_message_size(cmsg);
+	xlog(D_GENERAL, "Doing downcall with status %d", cmsg->cm_status);
+	wsize = atomicio((void *)write, clnt->cl_fd, cmsg, bsize);
+	if (wsize != bsize) {
+		xlog(L_ERROR, "%s: problem writing to cld pipe (%zd): %m",
+			 __func__, wsize);
+		ret = cld_pipe_open(clnt);
+		if (ret) {
+			xlog(L_FATAL, "%s: unable to reopen pipe: %d",
+					__func__, ret);
+			exit(ret);
+		}
+	}
+}
+
+static void
+cld_remove(struct cld_client *clnt)
+{
+	int ret;
+	ssize_t bsize, wsize;
+#if UPCALL_VERSION >= 2
+	struct cld_msg_v2 *cmsg = &clnt->cl_u.cl_msg_v2;
+#else
+	struct cld_msg *cmsg = &clnt->cl_u.cl_msg;
+#endif
+
+	ret = cld_check_grace_period();
+	if (ret)
+		goto reply;
+
+	xlog(D_GENERAL, "%s: remove client record.", __func__);
+
+	ret = sqlite_remove_client(cmsg->cm_u.cm_name.cn_id,
+				   cmsg->cm_u.cm_name.cn_len);
+
+reply:
+	cmsg->cm_status = ret ? -EREMOTEIO : ret;
+
+	bsize = cld_message_size(cmsg);
+	xlog(D_GENERAL, "%s: downcall with status %d", __func__,
+			cmsg->cm_status);
+	wsize = atomicio((void *)write, clnt->cl_fd, cmsg, bsize);
+	if (wsize != bsize) {
+		xlog(L_ERROR, "%s: problem writing to cld pipe (%zd): %m",
+			 __func__, wsize);
+		ret = cld_pipe_open(clnt);
+		if (ret) {
+			xlog(L_FATAL, "%s: unable to reopen pipe: %d",
+					__func__, ret);
+			exit(ret);
+		}
+	}
+}
+
+static void
+cld_check(struct cld_client *clnt)
+{
+	int ret;
+	ssize_t bsize, wsize;
+#if UPCALL_VERSION >= 2
+	struct cld_msg_v2 *cmsg = &clnt->cl_u.cl_msg_v2;
+#else
+	struct cld_msg *cmsg = &clnt->cl_u.cl_msg;
+#endif
+
+	/*
+	 * If we get a check upcall at all, it means we're talking to an old
+	 * kernel.  Furthermore, if we're not in grace it means this is the
+	 * first client to do a reclaim.  Log a message and use
+	 * sqlite_grace_start() to advance the epoch numbers.
+	 */
+	if (recovery_epoch == 0) {
+		xlog(D_GENERAL, "%s: received a check upcall, please update the kernel",
+			__func__);
+		ret = sqlite_grace_start();
+		if (ret)
+			goto reply;
+	}
+
+	xlog(D_GENERAL, "%s: check client record", __func__);
+
+	ret = sqlite_check_client(cmsg->cm_u.cm_name.cn_id,
+				  cmsg->cm_u.cm_name.cn_len);
+
+reply:
+	/* set up reply */
+	cmsg->cm_status = ret ? -EACCES : ret;
+
+	bsize = cld_message_size(cmsg);
+	xlog(D_GENERAL, "%s: downcall with status %d", __func__,
+			cmsg->cm_status);
+	wsize = atomicio((void *)write, clnt->cl_fd, cmsg, bsize);
+	if (wsize != bsize) {
+		xlog(L_ERROR, "%s: problem writing to cld pipe (%zd): %m",
+			 __func__, wsize);
+		ret = cld_pipe_open(clnt);
+		if (ret) {
+			xlog(L_FATAL, "%s: unable to reopen pipe: %d",
+					__func__, ret);
+			exit(ret);
+		}
+	}
+}
+
+static void
+cld_gracedone(struct cld_client *clnt)
+{
+	int ret;
+	ssize_t bsize, wsize;
+#if UPCALL_VERSION >= 2
+	struct cld_msg_v2 *cmsg = &clnt->cl_u.cl_msg_v2;
+#else
+	struct cld_msg *cmsg = &clnt->cl_u.cl_msg;
+#endif
+
+	/*
+	 * If we got a "gracedone" upcall while we're not in grace, then
+	 * 1) we must be talking to an old kernel
+	 * 2) no clients attempted to reclaim
+	 * In that case, log a message and use sqlite_grace_start() to
+	 * advance the epoch numbers, and then proceed as normal.
+	 */
+	if (recovery_epoch == 0) {
+		xlog(D_GENERAL, "%s: received gracedone upcall "
+			"while not in grace, please update the kernel",
+			__func__);
+		ret = sqlite_grace_start();
+		if (ret)
+			goto reply;
+	}
+
+	xlog(D_GENERAL, "%s: grace done.", __func__);
+
+	ret = sqlite_grace_done();
+
+	if (first_time) {
+		if (num_cltrack_records > 0)
+			sqlite_delete_cltrack_records();
+		if (num_legacy_records > 0)
+			legacy_clear_recdir();
+		sqlite_first_time_done();
+		first_time = 0;
+	}
+
+reply:
+	/* set up reply: downcall with 0 status */
+	cmsg->cm_status = ret ? -EREMOTEIO : ret;
+
+	bsize = cld_message_size(cmsg);
+	xlog(D_GENERAL, "Doing downcall with status %d", cmsg->cm_status);
+	wsize = atomicio((void *)write, clnt->cl_fd, cmsg, bsize);
+	if (wsize != bsize) {
+		xlog(L_ERROR, "%s: problem writing to cld pipe (%zd): %m",
+			 __func__, wsize);
+		ret = cld_pipe_open(clnt);
+		if (ret) {
+			xlog(L_FATAL, "%s: unable to reopen pipe: %d",
+					__func__, ret);
+			exit(ret);
+		}
+	}
+}
+
+static int
+gracestart_callback(struct cld_client *clnt) {
+	ssize_t bsize, wsize;
+#if UPCALL_VERSION >= 2
+	struct cld_msg_v2 *cmsg = &clnt->cl_u.cl_msg_v2;
+#else
+	struct cld_msg *cmsg = &clnt->cl_u.cl_msg;
+#endif
+
+	cmsg->cm_status = -EINPROGRESS;
+
+	bsize = cld_message_size(cmsg);
+	xlog(D_GENERAL, "Sending client %.*s",
+			cmsg->cm_u.cm_name.cn_len, cmsg->cm_u.cm_name.cn_id);
+	wsize = atomicio((void *)write, clnt->cl_fd, cmsg, bsize);
+	if (wsize != bsize)
+		return -EIO;
+	return 0;
+}
+
+static void
+cld_gracestart(struct cld_client *clnt)
+{
+	int ret;
+	ssize_t bsize, wsize;
+#if UPCALL_VERSION >= 2
+	struct cld_msg_v2 *cmsg = &clnt->cl_u.cl_msg_v2;
+#else
+	struct cld_msg *cmsg = &clnt->cl_u.cl_msg;
+#endif
+
+	xlog(D_GENERAL, "%s: updating grace epochs", __func__);
+
+	ret = sqlite_grace_start();
+	if (ret)
+		goto reply;
+
+	xlog(D_GENERAL, "%s: sending client records to the kernel", __func__);
+
+	ret = sqlite_iterate_recovery(&gracestart_callback, clnt);
+
+reply:
+	/* set up reply: downcall with 0 status */
+	cmsg->cm_status = ret ? -EREMOTEIO : ret;
+
+	bsize = cld_message_size(cmsg);
+	xlog(D_GENERAL, "Doing downcall with status %d", cmsg->cm_status);
+	wsize = atomicio((void *)write, clnt->cl_fd, cmsg, bsize);
+	if (wsize != bsize) {
+		xlog(L_ERROR, "%s: problem writing to cld pipe (%zd): %m",
+			 __func__, wsize);
+		ret = cld_pipe_open(clnt);
+		if (ret) {
+			xlog(L_FATAL, "%s: unable to reopen pipe: %d",
+					__func__, ret);
+			exit(ret);
+		}
+	}
+}
+
+static void
+cldcb(int UNUSED(fd), short which, void *data)
+{
+	ssize_t len;
+	struct cld_client *clnt = data;
+#if UPCALL_VERSION >= 2
+	struct cld_msg_v2 *cmsg = &clnt->cl_u.cl_msg_v2;
+#else
+	struct cld_msg *cmsg = &clnt->cl_u.cl_msg;
+#endif
+
+	if (which != EV_READ)
+		goto out;
+
+	len = atomicio(read, clnt->cl_fd, cmsg, sizeof(*cmsg));
+	if (len <= 0) {
+		xlog(L_ERROR, "%s: pipe read failed: %m", __func__);
+		cld_pipe_open(clnt);
+		goto out;
+	}
+
+	if (cmsg->cm_vers > UPCALL_VERSION) {
+		xlog(L_ERROR, "%s: unsupported upcall version: %hu",
+				__func__, cmsg->cm_vers);
+		cld_pipe_open(clnt);
+		goto out;
+	}
+
+	switch(cmsg->cm_cmd) {
+	case Cld_Create:
+		cld_create(clnt);
+		break;
+	case Cld_Remove:
+		cld_remove(clnt);
+		break;
+	case Cld_Check:
+		cld_check(clnt);
+		break;
+	case Cld_GraceDone:
+		cld_gracedone(clnt);
+		break;
+	case Cld_GraceStart:
+		cld_gracestart(clnt);
+		break;
+	case Cld_GetVersion:
+		cld_get_version(clnt);
+		break;
+	default:
+		xlog(L_WARNING, "%s: command %u is not yet implemented",
+				__func__, cmsg->cm_cmd);
+		cld_not_implemented(clnt);
+	}
+out:
+	event_add(&clnt->cl_event, NULL);
+}
+
+int
+main(int argc, char **argv)
+{
+	int arg;
+	int rc = 0;
+	bool foreground = false;
+	char *progname;
+	char *storagedir = CLD_DEFAULT_STORAGEDIR;
+	struct cld_client clnt;
+	char *s;
+	first_time = 0;
+	num_cltrack_records = 0;
+	num_legacy_records = 0;
+
+	memset(&clnt, 0, sizeof(clnt));
+
+	progname = strdup(basename(argv[0]));
+	if (!progname) {
+		fprintf(stderr, "%s: unable to allocate memory.\n", argv[0]);
+		return 1;
+	}
+
+	event_init();
+	xlog_syslog(0);
+	xlog_stderr(1);
+
+	conf_init_file(NFS_CONFFILE);
+	s = conf_get_str("general", "pipefs-directory");
+	if (s)
+		strlcpy(pipefs_dir, s, sizeof(pipefs_dir));
+	s = conf_get_str("nfsdcld", "storagedir");
+	if (s)
+		storagedir = s;
+	rc = conf_get_num("nfsdcld", "debug", 0);
+	if (rc > 0)
+		xlog_config(D_ALL, 1);
+
+	/* process command-line options */
+	while ((arg = getopt_long(argc, argv, "hdFp:s:", longopts,
+				  NULL)) != EOF) {
+		switch (arg) {
+		case 'd':
+			xlog_config(D_ALL, 1);
+			break;
+		case 'F':
+			foreground = true;
+			break;
+		case 'p':
+			strlcpy(pipefs_dir, optarg, sizeof(pipefs_dir));
+			break;
+		case 's':
+			storagedir = optarg;
+			break;
+		default:
+			usage(progname);
+			return 0;
+		}
+	}
+
+	strlcpy(pipepath, pipefs_dir, sizeof(pipepath));
+	strlcat(pipepath, DEFAULT_CLD_PATH, sizeof(pipepath));
+
+	xlog_open(progname);
+	if (!foreground) {
+		xlog_syslog(1);
+		xlog_stderr(0);
+		rc = daemon(0, 0);
+		if (rc) {
+			xlog(L_ERROR, "Unable to daemonize: %m");
+			goto out;
+		}
+	}
+
+	/* drop all capabilities */
+	rc = cld_set_caps();
+	if (rc)
+		goto out;
+
+	/*
+	 * now see if the storagedir is writable by root w/o CAP_DAC_OVERRIDE.
+	 * If it isn't then give the user a warning but proceed as if
+	 * everything is OK. If the DB has already been created, then
+	 * everything might still work. If it doesn't exist at all, then
+	 * assume that the maindb init will be able to create it. Fail on
+	 * anything else.
+	 */
+	if (access(storagedir, W_OK) == -1) {
+		switch (errno) {
+		case EACCES:
+			xlog(L_WARNING, "Storage directory %s is not writable. "
+					"Should be owned by root and writable "
+					"by owner!", storagedir);
+			break;
+		case ENOENT:
+			/* ignore and assume that we can create dir as root */
+			break;
+		default:
+			xlog(L_ERROR, "Unexpected error when checking access "
+				      "on %s: %m", storagedir);
+			rc = -errno;
+			goto out;
+		}
+	}
+
+	if (linux_version_code() < MAKE_VERSION(4, 20, 0))
+		old_kernel = true;
+
+	/* set up storage db */
+	rc = sqlite_prepare_dbh(storagedir);
+	if (rc) {
+		xlog(L_ERROR, "Failed to open main database: %d", rc);
+		goto out;
+	}
+
+	/* set up event handler */
+	rc = cld_pipe_init(&clnt);
+	if (rc)
+		goto out;
+
+	xlog(D_GENERAL, "%s: Starting event dispatch handler.", __func__);
+	rc = event_dispatch();
+	if (rc < 0)
+		xlog(L_ERROR, "%s: event_dispatch failed: %m", __func__);
+
+	close(clnt.cl_fd);
+	close(inotify_fd);
+out:
+	free(progname);
+	return rc;
+}
diff --git a/utils/nfsdcld/nfsdcld.man b/utils/nfsdcld/nfsdcld.man
new file mode 100644
index 00000000..4c2b1e80
--- /dev/null
+++ b/utils/nfsdcld/nfsdcld.man
@@ -0,0 +1,221 @@
+.\" Automatically generated by Pod::Man 2.22 (Pod::Simple 3.13)
+.\"
+.\" Standard preamble:
+.\" ========================================================================
+.de Sp \" Vertical space (when we can't use .PP)
+.if t .sp .5v
+.if n .sp
+..
+.de Vb \" Begin verbatim text
+.ft CW
+.nf
+.ne \\$1
+..
+.de Ve \" End verbatim text
+.ft R
+.fi
+..
+.\" Set up some character translations and predefined strings.  \*(-- will
+.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
+.\" double quote, and \*(R" will give a right double quote.  \*(C+ will
+.\" give a nicer C++.  Capital omega is used to do unbreakable dashes and
+.\" therefore won't be available.  \*(C` and \*(C' expand to `' in nroff,
+.\" nothing in troff, for use with C<>.
+.tr \(*W-
+.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
+.ie n \{\
+.    ds -- \(*W-
+.    ds PI pi
+.    if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
+.    if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\"  diablo 12 pitch
+.    ds L" ""
+.    ds R" ""
+.    ds C` ""
+.    ds C' ""
+'br\}
+.el\{\
+.    ds -- \|\(em\|
+.    ds PI \(*p
+.    ds L" ``
+.    ds R" ''
+'br\}
+.\"
+.\" Escape single quotes in literal strings from groff's Unicode transform.
+.ie \n(.g .ds Aq \(aq
+.el       .ds Aq '
+.\"
+.\" If the F register is turned on, we'll generate index entries on stderr for
+.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
+.\" entries marked with X<> in POD.  Of course, you'll have to process the
+.\" output yourself in some meaningful fashion.
+.ie \nF \{\
+.    de IX
+.    tm Index:\\$1\t\\n%\t"\\$2"
+..
+.    nr % 0
+.    rr F
+.\}
+.el \{\
+.    de IX
+..
+.\}
+.\"
+.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
+.\" Fear.  Run.  Save yourself.  No user-serviceable parts.
+.    \" fudge factors for nroff and troff
+.if n \{\
+.    ds #H 0
+.    ds #V .8m
+.    ds #F .3m
+.    ds #[ \f1
+.    ds #] \fP
+.\}
+.if t \{\
+.    ds #H ((1u-(\\\\n(.fu%2u))*.13m)
+.    ds #V .6m
+.    ds #F 0
+.    ds #[ \&
+.    ds #] \&
+.\}
+.    \" simple accents for nroff and troff
+.if n \{\
+.    ds ' \&
+.    ds ` \&
+.    ds ^ \&
+.    ds , \&
+.    ds ~ ~
+.    ds /
+.\}
+.if t \{\
+.    ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
+.    ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
+.    ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
+.    ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
+.    ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
+.    ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
+.\}
+.    \" troff and (daisy-wheel) nroff accents
+.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
+.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
+.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
+.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
+.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
+.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
+.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
+.ds ae a\h'-(\w'a'u*4/10)'e
+.ds Ae A\h'-(\w'A'u*4/10)'E
+.    \" corrections for vroff
+.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
+.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
+.    \" for low resolution devices (crt and lpr)
+.if \n(.H>23 .if \n(.V>19 \
+\{\
+.    ds : e
+.    ds 8 ss
+.    ds o a
+.    ds d- d\h'-1'\(ga
+.    ds D- D\h'-1'\(hy
+.    ds th \o'bp'
+.    ds Th \o'LP'
+.    ds ae ae
+.    ds Ae AE
+.\}
+.rm #[ #] #H #V #F C
+.\" ========================================================================
+.\"
+.IX Title "NFSDCLD 8"
+.TH NFSDCLD 8 "2011-12-21" "" ""
+.\" For nroff, turn off justification.  Always turn off hyphenation; it makes
+.\" way too many mistakes in technical documents.
+.if n .ad l
+.nh
+.SH "NAME"
+nfsdcld \- NFSv4 Client Tracking Daemon
+.SH "SYNOPSIS"
+.IX Header "SYNOPSIS"
+nfsdcld [\-d] [\-F] [\-p path] [\-s stable storage dir]
+.SH "DESCRIPTION"
+.IX Header "DESCRIPTION"
+nfsdcld is the NFSv4 client tracking daemon. It is not necessary to run
+this daemon on machines that are not acting as NFSv4 servers.
+.PP
+When a network partition is combined with a server reboot, there are
+edge conditions that can cause the server to grant lock reclaims when
+other clients have taken conflicting locks in the interim. A more detailed
+explanation of this issue is described in \s-1RFC\s0 3530, section 8.6.3.
+.PP
+In order to prevent these problems, the server must track a small amount
+of per-client information on stable storage. This daemon provides the
+userspace piece of that functionality.
+.SH "OPTIONS"
+.IX Header "OPTIONS"
+.IP "\fB\-d\fR, \fB\-\-debug\fR" 4
+.IX Item "-d, --debug"
+Enable debug level logging.
+.IP "\fB\-F\fR, \fB\-\-foreground\fR" 4
+.IX Item "-F, --foreground"
+Runs the daemon in the foreground and prints all output to stderr
+.IP "\fB\-p\fR \fIpath\fR, \fB\-\-pipefsdir\fR=\fIpath\fR" 4
+.IX Item "-p path, --pipefsdir=path"
+Location of the rpc_pipefs filesystem. The default value is
+\&\fI/var/lib/nfs/rpc_pipefs\fR.
+.IP "\fB\-s\fR \fIstorage_dir\fR, \fB\-\-storagedir\fR=\fIstorage_dir\fR" 4
+.IX Item "-s storagedir, --storagedir=storage_dir"
+Directory where stable storage information should be kept. The default
+value is \fI/var/lib/nfs/nfsdcld\fR.
+.SH "CONFIGURATION FILE"
+.IX Header "CONFIGURATION FILE"
+The following values are recognized in the \fB[nfsdcld]\fR section
+of the \fI/etc/nfs.conf\fR configuration file:
+.IP "\fBstoragedir\fR" 4
+.IX Item "storagedir"
+Equivalent to \fB\-s\fR/\fB\-\-storagedir\fR.
+.IP "\fBdebug\fR" 4
+.IX Item "debug"
+Setting "debug = 1" is equivalent to \fB\-d\fR/\fB\-\-debug\fR.
+.LP
+In addition, the following value is recognized from the \fB[general]\fR section:
+.IP "\fBpipefs\-directory\fR" 4
+.IX Item "pipefs-directory"
+Equivalent to \fB\-p\fR/\fB\-\-pipefsdir\fR.
+.SH "NOTES"
+.IX Header "NOTES"
+The Linux kernel NFSv4 server has historically tracked this information
+on stable storage by manipulating information on the filesystem
+directly, in the directory to which \fI/proc/fs/nfsd/nfsv4recoverydir\fR
+points.
+.PP
+This changed with the original introduction of \fBnfsdcld\fR upcall in kernel version 3.4,
+which was later deprecated in favor of the \fBnfsdcltrack\fR(8) usermodehelper
+program, support for which was added in kernel version 3.8.  However, since the
+usermodehelper upcall does not work in containers, support for a new version of
+the \fBnfsdcld\fR upcall was added in kernel version 5.2.
+.PP
+This daemon requires a kernel that supports the \fBnfsdcld\fR upcall. On older kernels, if
+the legacy client name tracking code was in use, then the kernel would not create the
+pipe that \fBnfsdcld\fR uses to talk to the kernel.  On newer kernels, nfsd attempts to
+initialize client tracking in the following order:  First, the \fBnfsdcld\fR upcall.  Second,
+the \fBnfsdcltrack\fR usermodehelper upcall.  Finally, the legacy client tracking.
+.PP
+This daemon should be run as root, as the pipe that it uses to communicate
+with the kernel is only accessable by root. The daemon however does drop all
+superuser capabilities after starting. Because of this, the \fIstoragedir\fR
+should be owned by root, and be readable and writable by owner.
+.PP
+The daemon now supports different upcall versions to allow the kernel to pass additional
+data to be stored in the on-disk database.  The kernel will query the supported upcall
+version from \fBnfsdcld\fR during client tracking initialization.  A restart of \fBnfsd\fR is
+not necessary after upgrading \fBnfsdcld\fR, however \fBnfsd\fR will not use a later upcall
+version until restart.  A restart of \fBnfsd is necessary\fR after downgrading \fBnfsdcld\fR,
+to ensure that \fBnfsd\fR does not use an upcall version that \fBnfsdcld\fR does not support.
+Additionally, a downgrade of \fBnfsdcld\fR requires the schema of the on-disk database to
+be downgraded as well.  That can be accomplished using the \fBclddb-tool\fR(8) utility.
+.SH FILES
+.TP
+.B /var/lib/nfs/nfsdcld/main.sqlite
+.SH SEE ALSO
+.BR nfsdcltrack "(8), " clddb-tool (8)
+.SH "AUTHORS"
+.IX Header "AUTHORS"
+The nfsdcld daemon was developed by Jeff Layton <jlayton@redhat.com>
+with modifications from Scott Mayhew <smayhew@redhat.com>.
diff --git a/utils/nfsdcld/sqlite.c b/utils/nfsdcld/sqlite.c
new file mode 100644
index 00000000..6666c867
--- /dev/null
+++ b/utils/nfsdcld/sqlite.c
@@ -0,0 +1,1406 @@
+/*
+ * Copyright (C) 2011  Red Hat, Jeff Layton <jlayton@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+/*
+ * Explanation:
+ *
+ * This file contains the code to manage the sqlite backend database for the
+ * nfsdcld client tracking daemon.
+ *
+ * The main database is called main.sqlite and contains the following tables:
+ *
+ * parameters: simple key/value pairs for storing database info
+ *
+ * grace: a "current" column containing an INTEGER representing the current
+ *        epoch (where should new values be stored) and a "recovery" column
+ *        containing an INTEGER representing the recovery epoch (from what
+ *        epoch are we allowed to recover).  A recovery epoch of 0 means
+ *        normal operation (grace period not in force).  Note: sqlite stores
+ *        integers as signed values, so these must be cast to a uint64_t when
+ *        retrieving them from the database and back to an int64_t when storing
+ *        them in the database.
+ *
+ * rec-CCCCCCCCCCCCCCCC (where C is the hex representation of the epoch value):
+ *        an "id" column containing a BLOB with the long-form clientid
+ *        as sent by the client, and a "princhash" column containing a BLOB
+ *        with the sha256 hash of the kerberos principal (if available).
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif /* HAVE_CONFIG_H */
+
+#include <dirent.h>
+#include <errno.h>
+#include <event.h>
+#include <stdbool.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <limits.h>
+#include <sqlite3.h>
+#include <linux/limits.h>
+#include <inttypes.h>
+
+#include "xlog.h"
+#include "sqlite.h"
+#include "cld.h"
+#include "cld-internal.h"
+#include "conffile.h"
+#include "legacy.h"
+#include "nfslib.h"
+
+#define CLD_SQLITE_LATEST_SCHEMA_VERSION 4
+#define CLTRACK_DEFAULT_STORAGEDIR NFS_STATEDIR "/nfsdcltrack"
+
+/* in milliseconds */
+#define CLD_SQLITE_BUSY_TIMEOUT 10000
+
+/* private data structures */
+
+/* global variables */
+static char *cltrack_storagedir = CLTRACK_DEFAULT_STORAGEDIR;
+
+/* reusable pathname and sql command buffer */
+static char buf[PATH_MAX];
+
+/* global database handle */
+static sqlite3 *dbh;
+
+/* forward declarations */
+
+/* make a directory, ignoring EEXIST errors unless it's not a directory */
+static int
+mkdir_if_not_exist(const char *dirname)
+{
+	int ret;
+	struct stat statbuf;
+
+	ret = mkdir(dirname, S_IRWXU);
+	if (ret && errno != EEXIST)
+		return -errno;
+
+	ret = stat(dirname, &statbuf);
+	if (ret)
+		return -errno;
+
+	if (!S_ISDIR(statbuf.st_mode))
+		ret = -ENOTDIR;
+
+	return ret;
+}
+
+static int
+sqlite_query_schema_version(void)
+{
+	int ret;
+	sqlite3_stmt *stmt = NULL;
+
+	/* prepare select query */
+	ret = sqlite3_prepare_v2(dbh,
+		"SELECT value FROM parameters WHERE key == \"version\";",
+		 -1, &stmt, NULL);
+	if (ret != SQLITE_OK) {
+		xlog(D_GENERAL, "Unable to prepare select statement: %s",
+			sqlite3_errmsg(dbh));
+		ret = 0;
+		goto out;
+	}
+
+	/* query schema version */
+	ret = sqlite3_step(stmt);
+	if (ret != SQLITE_ROW) {
+		xlog(D_GENERAL, "Select statement execution failed: %s",
+				sqlite3_errmsg(dbh));
+		ret = 0;
+		goto out;
+	}
+
+	ret = sqlite3_column_int(stmt, 0);
+out:
+	sqlite3_finalize(stmt);
+	return ret;
+}
+
+static int
+sqlite_query_first_time(int *first_time)
+{
+	int ret;
+	sqlite3_stmt *stmt = NULL;
+
+	/* prepare select query */
+	ret = sqlite3_prepare_v2(dbh,
+		"SELECT value FROM parameters WHERE key == \"first_time\";",
+		 -1, &stmt, NULL);
+	if (ret != SQLITE_OK) {
+		xlog(D_GENERAL, "Unable to prepare select statement: %s",
+			sqlite3_errmsg(dbh));
+		goto out;
+	}
+
+	/* query first_time */
+	ret = sqlite3_step(stmt);
+	if (ret != SQLITE_ROW) {
+		xlog(D_GENERAL, "Select statement execution failed: %s",
+				sqlite3_errmsg(dbh));
+		goto out;
+	}
+
+	*first_time = sqlite3_column_int(stmt, 0);
+	ret = 0;
+out:
+	sqlite3_finalize(stmt);
+	return ret;
+}
+
+static int
+sqlite_add_princ_col_cb(void *UNUSED(arg), int ncols, char **cols,
+			    char **UNUSED(colnames))
+{
+	int ret;
+	char *err;
+
+	if (ncols > 1)
+		return -EINVAL;
+	ret = snprintf(buf, sizeof(buf), "ALTER TABLE \"%s\" "
+			"ADD COLUMN princhash BLOB;", cols[0]);
+	if (ret < 0) {
+		xlog(L_ERROR, "sprintf failed!");
+		return -EINVAL;
+	} else if ((size_t)ret >= sizeof(buf)) {
+		xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
+		return -EINVAL;
+	}
+	ret = sqlite3_exec(dbh, (const char *)buf, NULL, NULL, &err);
+	if (ret != SQLITE_OK) {
+		xlog(L_ERROR, "Unable to add princhash column to table %s: %s",
+		     cols[0], err);
+		goto out;
+	}
+	xlog(D_GENERAL, "Added princhash column to table %s", cols[0]);
+out:
+	sqlite3_free(err);
+	return ret;
+}
+
+static int
+sqlite_maindb_update_v3_to_v4(void)
+{
+	int ret;
+	char *err;
+
+	ret = sqlite3_exec(dbh, "SELECT name FROM sqlite_master "
+			   "WHERE type=\"table\" AND name LIKE \"%rec-%\";",
+			   sqlite_add_princ_col_cb, NULL, &err);
+	if (ret != SQLITE_OK) {
+		xlog(L_ERROR, "%s: Failed to update tables!: %s", __func__, err);
+	}
+	sqlite3_free(err);
+	return ret;
+}
+
+static int
+sqlite_maindb_update_v1v2_to_v4(void)
+{
+	int ret;
+	char *err;
+
+	/* create grace table */
+	ret = sqlite3_exec(dbh, "CREATE TABLE grace "
+				"(current INTEGER , recovery INTEGER);",
+				NULL, NULL, &err);
+	if (ret != SQLITE_OK) {
+		xlog(L_ERROR, "Unable to create grace table: %s", err);
+		goto out;
+	}
+
+	/* insert initial epochs into grace table */
+	ret = sqlite3_exec(dbh, "INSERT OR FAIL INTO grace "
+				"values (1, 0);",
+				NULL, NULL, &err);
+	if (ret != SQLITE_OK) {
+		xlog(L_ERROR, "Unable to set initial epochs: %s", err);
+		goto out;
+	}
+
+	/* create recovery table for current epoch */
+	ret = sqlite3_exec(dbh, "CREATE TABLE \"rec-0000000000000001\" "
+				"(id BLOB PRIMARY KEY, princhash BLOB);",
+				NULL, NULL, &err);
+	if (ret != SQLITE_OK) {
+		xlog(L_ERROR, "Unable to create recovery table "
+				"for current epoch: %s", err);
+		goto out;
+	}
+
+	/* copy records from old clients table */
+	ret = sqlite3_exec(dbh, "INSERT INTO \"rec-0000000000000001\" (id) "
+				"SELECT id FROM clients;",
+				NULL, NULL, &err);
+	if (ret != SQLITE_OK) {
+		xlog(L_ERROR, "Unable to copy client records: %s", err);
+		goto out;
+	}
+
+	/* drop the old clients table */
+	ret = sqlite3_exec(dbh, "DROP TABLE clients;",
+				NULL, NULL, &err);
+	if (ret != SQLITE_OK) {
+		xlog(L_ERROR, "Unable to drop old clients table: %s", err);
+	}
+out:
+	sqlite3_free(err);
+	return ret;
+}
+
+static int
+sqlite_maindb_update_schema(int oldversion)
+{
+	int ret, ret2;
+	char *err;
+
+	/* begin transaction */
+	ret = sqlite3_exec(dbh, "BEGIN EXCLUSIVE TRANSACTION;", NULL, NULL,
+				&err);
+	if (ret != SQLITE_OK) {
+		xlog(L_ERROR, "Unable to begin transaction: %s", err);
+		goto rollback;
+	}
+
+	/*
+	 * Check schema version again. This time, under an exclusive
+	 * transaction to guard against racing DB setup attempts
+	 */
+	ret = sqlite_query_schema_version();
+	if (ret != oldversion) {
+		if (ret == CLD_SQLITE_LATEST_SCHEMA_VERSION)
+			/* Someone else raced in and set it up */
+			ret = 0;
+		else
+			/* Something went wrong -- fail! */
+			ret = -EINVAL;
+		goto rollback;
+	}
+
+	/* Still at old version -- do conversion */
+
+	switch (oldversion) {
+	case 3:
+	case 2:
+		ret = sqlite_maindb_update_v3_to_v4();
+		break;
+	case 1:
+		ret = sqlite_maindb_update_v1v2_to_v4();
+		break;
+	default:
+		ret = -EINVAL;
+	}
+	if (ret != SQLITE_OK)
+		goto rollback;
+
+	ret = snprintf(buf, sizeof(buf), "UPDATE parameters SET value = %d "
+			"WHERE key = \"version\";",
+			CLD_SQLITE_LATEST_SCHEMA_VERSION);
+	if (ret < 0) {
+		xlog(L_ERROR, "sprintf failed!");
+		goto rollback;
+	} else if ((size_t)ret >= sizeof(buf)) {
+		xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
+		ret = -EINVAL;
+		goto rollback;
+	}
+
+	ret = sqlite3_exec(dbh, (const char *)buf, NULL, NULL, &err);
+	if (ret != SQLITE_OK) {
+		xlog(L_ERROR, "Unable to update schema version: %s", err);
+		goto rollback;
+	}
+
+	ret = sqlite_query_first_time(&first_time);
+	if (ret != SQLITE_OK) {
+		/* insert first_time into parameters table */
+		ret = sqlite3_exec(dbh, "INSERT OR FAIL INTO parameters "
+					"values (\"first_time\", \"1\");",
+					NULL, NULL, &err);
+		if (ret != SQLITE_OK) {
+			xlog(L_ERROR, "Unable to insert into parameter table: %s", err);
+			goto rollback;
+		}
+	}
+
+	ret = sqlite3_exec(dbh, "COMMIT TRANSACTION;", NULL, NULL, &err);
+	if (ret != SQLITE_OK) {
+		xlog(L_ERROR, "Unable to commit transaction: %s", err);
+		goto rollback;
+	}
+out:
+	sqlite3_free(err);
+	return ret;
+rollback:
+	ret2 = sqlite3_exec(dbh, "ROLLBACK TRANSACTION;", NULL, NULL, &err);
+	if (ret2 != SQLITE_OK)
+		xlog(L_ERROR, "Unable to rollback transaction: %s", err);
+	goto out;
+}
+
+/*
+ * Start an exclusive transaction and recheck the DB schema version. If it's
+ * still zero (indicating a new database) then set it up. If that all works,
+ * then insert schema version into the parameters table and commit the
+ * transaction. On any error, rollback the transaction.
+ */
+static int
+sqlite_maindb_init_v4(void)
+{
+	int ret, ret2;
+	char *err = NULL;
+
+	/* Start a transaction */
+	ret = sqlite3_exec(dbh, "BEGIN EXCLUSIVE TRANSACTION;", NULL, NULL,
+				&err);
+	if (ret != SQLITE_OK) {
+		xlog(L_ERROR, "Unable to begin transaction: %s", err);
+		return ret;
+	}
+
+	/*
+	 * Check schema version again. This time, under an exclusive
+	 * transaction to guard against racing DB setup attempts
+	 */
+	ret = sqlite_query_schema_version();
+	switch (ret) {
+	case 0:
+		/* Query failed again -- set up DB */
+		break;
+	case CLD_SQLITE_LATEST_SCHEMA_VERSION:
+		/* Someone else raced in and set it up */
+		ret = 0;
+		goto rollback;
+	default:
+		/* Something went wrong -- fail! */
+		ret = -EINVAL;
+		goto rollback;
+	}
+
+	ret = sqlite3_exec(dbh, "CREATE TABLE parameters "
+				"(key TEXT PRIMARY KEY, value TEXT);",
+				NULL, NULL, &err);
+	if (ret != SQLITE_OK) {
+		xlog(L_ERROR, "Unable to create parameter table: %s", err);
+		goto rollback;
+	}
+
+	/* create grace table */
+	ret = sqlite3_exec(dbh, "CREATE TABLE grace "
+				"(current INTEGER , recovery INTEGER);",
+				NULL, NULL, &err);
+	if (ret != SQLITE_OK) {
+		xlog(L_ERROR, "Unable to create grace table: %s", err);
+		goto rollback;
+	}
+
+	/* insert initial epochs into grace table */
+	ret = sqlite3_exec(dbh, "INSERT OR FAIL INTO grace "
+				"values (1, 0);",
+				NULL, NULL, &err);
+	if (ret != SQLITE_OK) {
+		xlog(L_ERROR, "Unable to set initial epochs: %s", err);
+		goto rollback;
+	}
+
+	/* create recovery table for current epoch */
+	ret = sqlite3_exec(dbh, "CREATE TABLE \"rec-0000000000000001\" "
+				"(id BLOB PRIMARY KEY, princhash BLOB);",
+				NULL, NULL, &err);
+	if (ret != SQLITE_OK) {
+		xlog(L_ERROR, "Unable to create recovery table "
+				"for current epoch: %s", err);
+		goto rollback;
+	}
+
+	/* insert version into parameters table */
+	ret = snprintf(buf, sizeof(buf), "INSERT OR FAIL INTO parameters "
+			"values (\"version\", \"%d\");",
+			CLD_SQLITE_LATEST_SCHEMA_VERSION);
+	if (ret < 0) {
+		xlog(L_ERROR, "sprintf failed!");
+		goto rollback;
+	} else if ((size_t)ret >= sizeof(buf)) {
+		xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
+		ret = -EINVAL;
+		goto rollback;
+	}
+
+	ret = sqlite3_exec(dbh, (const char *)buf, NULL, NULL, &err);
+	if (ret != SQLITE_OK) {
+		xlog(L_ERROR, "Unable to insert into parameter table: %s", err);
+		goto rollback;
+	}
+
+	/* insert first_time into parameters table */
+	ret = sqlite3_exec(dbh, "INSERT OR FAIL INTO parameters "
+				"values (\"first_time\", \"1\");",
+				NULL, NULL, &err);
+	if (ret != SQLITE_OK) {
+		xlog(L_ERROR, "Unable to insert into parameter table: %s", err);
+		goto rollback;
+	}
+
+	ret = sqlite3_exec(dbh, "COMMIT TRANSACTION;", NULL, NULL, &err);
+	if (ret != SQLITE_OK) {
+		xlog(L_ERROR, "Unable to commit transaction: %s", err);
+		goto rollback;
+	}
+out:
+	sqlite3_free(err);
+	return ret;
+
+rollback:
+	/* Attempt to rollback the transaction */
+	ret2 = sqlite3_exec(dbh, "ROLLBACK TRANSACTION;", NULL, NULL, &err);
+	if (ret2 != SQLITE_OK)
+		xlog(L_ERROR, "Unable to rollback transaction: %s", err);
+	goto out;
+}
+
+static int
+sqlite_startup_query_grace(void)
+{
+	int ret;
+	uint64_t tcur;
+	uint64_t trec;
+	sqlite3_stmt *stmt = NULL;
+
+	/* prepare select query */
+	ret = sqlite3_prepare_v2(dbh, "SELECT * FROM grace;", -1, &stmt, NULL);
+	if (ret != SQLITE_OK) {
+		xlog(D_GENERAL, "Unable to prepare select statement: %s",
+			sqlite3_errmsg(dbh));
+		goto out;
+	}
+
+	ret = sqlite3_step(stmt);
+	if (ret != SQLITE_ROW) {
+		xlog(D_GENERAL, "Select statement execution failed: %s",
+				sqlite3_errmsg(dbh));
+		goto out;
+	}
+
+	tcur = (uint64_t)sqlite3_column_int64(stmt, 0);
+	trec = (uint64_t)sqlite3_column_int64(stmt, 1);
+
+	current_epoch = tcur;
+	recovery_epoch = trec;
+	ret = 0;
+	xlog(D_GENERAL, "%s: current_epoch=%"PRIu64" recovery_epoch=%"PRIu64,
+		__func__, current_epoch, recovery_epoch);
+out:
+	sqlite3_finalize(stmt);
+	return ret;
+}
+
+/*
+ * Helper for renaming a recovery table to fix the padding.
+ */
+static int
+sqlite_fix_table_name(const char *name)
+{
+	int ret;
+	uint64_t val;
+	char *err;
+
+	if (sscanf(name, "rec-%" PRIx64, &val) != 1)
+		return -EINVAL;
+	ret = snprintf(buf, sizeof(buf), "ALTER TABLE \"%s\" "
+			"RENAME TO \"rec-%016" PRIx64 "\";",
+			name, val);
+	if (ret < 0) {
+		xlog(L_ERROR, "sprintf failed!");
+		return -EINVAL;
+	} else if ((size_t)ret >= sizeof(buf)) {
+		xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
+		return -EINVAL;
+	}
+	ret = sqlite3_exec(dbh, (const char *)buf, NULL, NULL, &err);
+	if (ret != SQLITE_OK) {
+		xlog(L_ERROR, "Unable to fix table for epoch %"PRIu64": %s",
+		     val, err);
+		goto out;
+	}
+	xlog(D_GENERAL, "Renamed table %s to rec-%016" PRIx64, name, val);
+out:
+	sqlite3_free(err);
+	return ret;
+}
+
+/*
+ * Callback for the sqlite_exec statement in sqlite_check_table_names.
+ * If the epoch encoded in the table name matches either the current
+ * epoch or the recovery epoch, then try to fix the padding.  Otherwise,
+ * we bail.
+ */
+static int
+sqlite_check_table_names_cb(void *UNUSED(arg), int ncols, char **cols,
+			    char **UNUSED(colnames))
+{
+	int ret = SQLITE_OK;
+	uint64_t val;
+
+	if (ncols > 1)
+		return -EINVAL;
+	if (sscanf(cols[0], "rec-%" PRIx64, &val) != 1)
+		return -EINVAL;
+	if (val == current_epoch || val == recovery_epoch) {
+		xlog(D_GENERAL, "found invalid table name %s for %s epoch",
+		     cols[0], val == current_epoch ? "current" : "recovery");
+		ret = sqlite_fix_table_name(cols[0]);
+	} else {
+		xlog(L_ERROR, "found invalid table name %s for unknown epoch %"
+		     PRId64, cols[0], val);
+		return -EINVAL;
+	}
+	return ret;
+}
+
+/*
+ * Look for recovery table names where the epoch isn't zero-padded
+ */
+static int
+sqlite_check_table_names(void)
+{
+	int ret;
+	char *err;
+
+	ret = sqlite3_exec(dbh, "SELECT name FROM sqlite_master "
+			   "WHERE type=\"table\" AND name LIKE \"%rec-%\" "
+			   "AND length(name) < 20;",
+			   sqlite_check_table_names_cb, NULL, &err);
+	if (ret != SQLITE_OK) {
+		xlog(L_ERROR, "Table names check failed: %s", err);
+	}
+	sqlite3_free(err);
+	return ret;
+}
+
+/*
+ * Simple db health check.  For now we're just making sure that the recovery
+ * table names are of the format "rec-CCCCCCCCCCCCCCCC" (where C is the hex
+ * representation of the epoch value) and that epoch value matches either
+ * the current epoch or the recovery epoch.
+ */
+static int
+sqlite_check_db_health(void)
+{
+	int ret, ret2;
+	char *err;
+
+	ret = sqlite3_exec(dbh, "BEGIN EXCLUSIVE TRANSACTION;", NULL, NULL,
+				&err);
+	if (ret != SQLITE_OK) {
+		xlog(L_ERROR, "Unable to begin transaction: %s", err);
+		goto rollback;
+	}
+
+	ret = sqlite_check_table_names();
+	if (ret != SQLITE_OK)
+		goto rollback;
+
+	ret = sqlite3_exec(dbh, "COMMIT TRANSACTION;", NULL, NULL, &err);
+	if (ret != SQLITE_OK) {
+		xlog(L_ERROR, "Unable to commit transaction: %s", err);
+		goto rollback;
+	}
+
+cleanup:
+	sqlite3_free(err);
+	xlog(D_GENERAL, "%s: returning %d", __func__, ret);
+	return ret;
+rollback:
+	ret2 = sqlite3_exec(dbh, "ROLLBACK TRANSACTION;", NULL, NULL, &err);
+	if (ret2 != SQLITE_OK)
+		xlog(L_ERROR, "Unable to rollback transaction: %s", err);
+	goto cleanup;
+}
+
+static int
+sqlite_attach_db(const char *path)
+{
+	int ret;
+	char dbpath[PATH_MAX];
+	struct stat stb;
+	sqlite3_stmt *stmt = NULL;
+
+	ret = snprintf(dbpath, PATH_MAX - 1, "%s/main.sqlite", path);
+	if (ret < 0)
+		return ret;
+
+	dbpath[PATH_MAX - 1] = '\0';
+	ret = stat(dbpath, &stb);
+	if (ret < 0)
+		return ret;
+
+	xlog(D_GENERAL, "attaching %s", dbpath);
+	ret = sqlite3_prepare_v2(dbh, "ATTACH DATABASE ? AS attached;",
+			-1, &stmt, NULL);
+	if (ret != SQLITE_OK) {
+		xlog(L_ERROR, "%s: unable to prepare attach statement: %s",
+				__func__, sqlite3_errmsg(dbh));
+		return ret;
+	}
+
+	ret = sqlite3_bind_text(stmt, 1, dbpath, strlen(dbpath), SQLITE_STATIC);
+	if (ret != SQLITE_OK) {
+		xlog(L_ERROR, "%s: bind text failed: %s",
+				__func__, sqlite3_errmsg(dbh));
+		return ret;
+	}
+
+	ret = sqlite3_step(stmt);
+	if (ret == SQLITE_DONE)
+		ret = SQLITE_OK;
+	else
+		xlog(L_ERROR, "%s: unexpected return code from attach: %s",
+				__func__, sqlite3_errmsg(dbh));
+
+	sqlite3_finalize(stmt);
+	stmt = NULL;
+	return ret;
+}
+
+static int
+sqlite_detach_db(void)
+{
+	int ret;
+	char *err = NULL;
+
+	xlog(D_GENERAL, "detaching database");
+	ret = sqlite3_exec(dbh, "DETACH DATABASE attached;", NULL, NULL, &err);
+	if (ret != SQLITE_OK) {
+		xlog(L_ERROR, "Unable to detach attached db: %s", err);
+	}
+
+	sqlite3_free(err);
+	return ret;
+}
+
+/*
+ * Copies client records from the nfsdcltrack database as part of a one-time
+ * "upgrade".
+ *
+ * Returns a non-zero sqlite error code, or SQLITE_OK (aka 0).
+ * Returns the number of records copied via "num_rec".
+ */
+static int
+sqlite_copy_cltrack_records(int *num_rec)
+{
+	int ret, ret2;
+	char *s;
+	char *err = NULL;
+	sqlite3_stmt *stmt = NULL;
+
+	s = conf_get_str("nfsdcltrack", "storagedir");
+	if (s)
+		cltrack_storagedir = s;
+	ret = sqlite_attach_db(cltrack_storagedir);
+	if (ret)
+		goto out;
+	ret = sqlite3_exec(dbh, "BEGIN EXCLUSIVE TRANSACTION;", NULL, NULL,
+				&err);
+	if (ret != SQLITE_OK) {
+		xlog(L_ERROR, "Unable to begin transaction: %s", err);
+		goto rollback;
+	}
+	ret = snprintf(buf, sizeof(buf), "DELETE FROM \"rec-%016" PRIx64 "\";",
+			current_epoch);
+	if (ret < 0) {
+		xlog(L_ERROR, "sprintf failed!");
+		goto rollback;
+	} else if ((size_t)ret >= sizeof(buf)) {
+		xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
+		ret = -EINVAL;
+		goto rollback;
+	}
+	ret = sqlite3_exec(dbh, (const char *)buf, NULL, NULL, &err);
+	if (ret != SQLITE_OK) {
+		xlog(L_ERROR, "Unable to clear records from current epoch: %s", err);
+		goto rollback;
+	}
+	ret = snprintf(buf, sizeof(buf), "INSERT INTO \"rec-%016" PRIx64 "\" (id) "
+				"SELECT id FROM attached.clients;",
+				current_epoch);
+	if (ret < 0) {
+		xlog(L_ERROR, "sprintf failed!");
+		goto rollback;
+	} else if ((size_t)ret >= sizeof(buf)) {
+		xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
+		ret = -EINVAL;
+		goto rollback;
+	}
+	ret = sqlite3_prepare_v2(dbh, buf, -1, &stmt, NULL);
+	if (ret != SQLITE_OK) {
+		xlog(L_ERROR, "%s: insert statement prepare failed: %s",
+			__func__, sqlite3_errmsg(dbh));
+		goto rollback;
+	}
+	ret = sqlite3_step(stmt);
+	if (ret != SQLITE_DONE) {
+		xlog(L_ERROR, "%s: unexpected return code from insert: %s",
+				__func__, sqlite3_errmsg(dbh));
+		goto rollback;
+	}
+	*num_rec = sqlite3_changes(dbh);
+	ret = sqlite3_exec(dbh, "COMMIT TRANSACTION;", NULL, NULL, &err);
+	if (ret != SQLITE_OK) {
+		xlog(L_ERROR, "Unable to commit transaction: %s", err);
+		goto rollback;
+	}
+cleanup:
+	sqlite3_finalize(stmt);
+	sqlite3_free(err);
+	sqlite_detach_db();
+out:
+	xlog(D_GENERAL, "%s: returning %d", __func__, ret);
+	return ret;
+rollback:
+	*num_rec = 0;
+	ret2 = sqlite3_exec(dbh, "ROLLBACK TRANSACTION;", NULL, NULL, &err);
+	if (ret2 != SQLITE_OK)
+		xlog(L_ERROR, "Unable to rollback transaction: %s", err);
+	goto cleanup;
+}
+
+/* Open the database and set up the database handle for it */
+int
+sqlite_prepare_dbh(const char *topdir)
+{
+	int ret;
+
+	/* Do nothing if the database handle is already set up */
+	if (dbh)
+		return 0;
+
+	ret = snprintf(buf, PATH_MAX - 1, "%s/main.sqlite", topdir);
+	if (ret < 0)
+		return ret;
+
+	buf[PATH_MAX - 1] = '\0';
+
+	/* open a new DB handle */
+	ret = sqlite3_open(buf, &dbh);
+	if (ret != SQLITE_OK) {
+		/* try to create the dir */
+		ret = mkdir_if_not_exist(topdir);
+		if (ret)
+			goto out_close;
+
+		/* retry open */
+		ret = sqlite3_open(buf, &dbh);
+		if (ret != SQLITE_OK)
+			goto out_close;
+	}
+
+	/* set busy timeout */
+	ret = sqlite3_busy_timeout(dbh, CLD_SQLITE_BUSY_TIMEOUT);
+	if (ret != SQLITE_OK) {
+		xlog(L_ERROR, "Unable to set sqlite busy timeout: %s",
+				sqlite3_errmsg(dbh));
+		goto out_close;
+	}
+
+	ret = sqlite_query_schema_version();
+	switch (ret) {
+	case CLD_SQLITE_LATEST_SCHEMA_VERSION:
+		/* DB is already set up. Do nothing */
+		ret = 0;
+		break;
+	case 3:
+		/* Old DB -- update to new schema */
+		ret = sqlite_maindb_update_schema(3);
+		if (ret)
+			goto out_close;
+		break;
+	case 2:
+		/* Old DB -- update to new schema */
+		ret = sqlite_maindb_update_schema(2);
+		if (ret)
+			goto out_close;
+		break;
+
+	case 1:
+		/* Old DB -- update to new schema */
+		ret = sqlite_maindb_update_schema(1);
+		if (ret)
+			goto out_close;
+		break;
+	case 0:
+		/* Query failed -- try to set up new DB */
+		ret = sqlite_maindb_init_v4();
+		if (ret)
+			goto out_close;
+		break;
+	default:
+		/* Unknown DB version -- downgrade? Fail */
+		xlog(L_ERROR, "Unsupported database schema version! "
+			"Expected %d, got %d.",
+			CLD_SQLITE_LATEST_SCHEMA_VERSION, ret);
+		ret = -EINVAL;
+		goto out_close;
+	}
+
+	ret = sqlite_startup_query_grace();
+
+	ret = sqlite_query_first_time(&first_time);
+	if (ret)
+		goto out_close;
+
+	ret = sqlite_check_db_health();
+	if (ret) {
+		xlog(L_ERROR, "Database health check failed! "
+			"Database must be fixed manually.");
+		goto out_close;
+	}
+
+	/* one-time "upgrade" from older client tracking methods */
+	if (first_time) {
+		sqlite_copy_cltrack_records(&num_cltrack_records);
+		xlog(D_GENERAL, "%s: num_cltrack_records = %d\n",
+			__func__, num_cltrack_records);
+		legacy_load_clients_from_recdir(&num_legacy_records);
+		xlog(D_GENERAL, "%s: num_legacy_records = %d\n",
+			__func__, num_legacy_records);
+		if (num_cltrack_records > 0 && num_legacy_records > 0)
+			xlog(L_WARNING, "%s: first-time upgrade detected "
+				"both cltrack and legacy records!\n", __func__);
+	}
+
+	return ret;
+out_close:
+	sqlite3_close(dbh);
+	dbh = NULL;
+	return ret;
+}
+
+/*
+ * Create a client record
+ *
+ * Returns a non-zero sqlite error code, or SQLITE_OK (aka 0)
+ */
+int
+sqlite_insert_client(const unsigned char *clname, const size_t namelen)
+{
+	int ret;
+	sqlite3_stmt *stmt = NULL;
+
+	ret = snprintf(buf, sizeof(buf), "INSERT OR REPLACE INTO \"rec-%016" PRIx64 "\" (id) "
+				"VALUES (?);", current_epoch);
+	if (ret < 0) {
+		xlog(L_ERROR, "sprintf failed!");
+		return ret;
+	} else if ((size_t)ret >= sizeof(buf)) {
+		xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
+		return -EINVAL;
+	}
+
+	ret = sqlite3_prepare_v2(dbh, buf, -1, &stmt, NULL);
+	if (ret != SQLITE_OK) {
+		xlog(L_ERROR, "%s: insert statement prepare failed: %s",
+			__func__, sqlite3_errmsg(dbh));
+		return ret;
+	}
+
+	ret = sqlite3_bind_blob(stmt, 1, (const void *)clname, namelen,
+				SQLITE_STATIC);
+	if (ret != SQLITE_OK) {
+		xlog(L_ERROR, "%s: bind blob failed: %s", __func__,
+				sqlite3_errmsg(dbh));
+		goto out_err;
+	}
+
+	ret = sqlite3_step(stmt);
+	if (ret == SQLITE_DONE)
+		ret = SQLITE_OK;
+	else
+		xlog(L_ERROR, "%s: unexpected return code from insert: %s",
+				__func__, sqlite3_errmsg(dbh));
+
+out_err:
+	xlog(D_GENERAL, "%s: returning %d", __func__, ret);
+	sqlite3_finalize(stmt);
+	return ret;
+}
+
+#if UPCALL_VERSION >= 2
+/*
+ * Create a client record including hash the kerberos principal
+ *
+ * Returns a non-zero sqlite error code, or SQLITE_OK (aka 0)
+ */
+int
+sqlite_insert_client_and_princhash(const unsigned char *clname, const size_t namelen,
+		const unsigned char *clprinchash, const size_t princhashlen)
+{
+	int ret;
+	sqlite3_stmt *stmt = NULL;
+
+	if (princhashlen > 0)
+		ret = snprintf(buf, sizeof(buf), "INSERT OR REPLACE INTO \"rec-%016" PRIx64 "\" "
+				"VALUES (?, ?);", current_epoch);
+	else
+		ret = snprintf(buf, sizeof(buf), "INSERT OR REPLACE INTO \"rec-%016" PRIx64 "\" (id) "
+				"VALUES (?);", current_epoch);
+	if (ret < 0) {
+		xlog(L_ERROR, "sprintf failed!");
+		return ret;
+	} else if ((size_t)ret >= sizeof(buf)) {
+		xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
+		return -EINVAL;
+	}
+
+	ret = sqlite3_prepare_v2(dbh, buf, -1, &stmt, NULL);
+	if (ret != SQLITE_OK) {
+		xlog(L_ERROR, "%s: insert statement prepare failed: %s",
+			__func__, sqlite3_errmsg(dbh));
+		return ret;
+	}
+
+	ret = sqlite3_bind_blob(stmt, 1, (const void *)clname, namelen,
+				SQLITE_STATIC);
+	if (ret != SQLITE_OK) {
+		xlog(L_ERROR, "%s: bind blob failed: %s", __func__,
+				sqlite3_errmsg(dbh));
+		goto out_err;
+	}
+
+	if (princhashlen > 0) {
+		ret = sqlite3_bind_blob(stmt, 2, (const void *)clprinchash, princhashlen,
+					SQLITE_STATIC);
+		if (ret != SQLITE_OK) {
+			xlog(L_ERROR, "%s: bind blob failed: %s", __func__,
+					sqlite3_errmsg(dbh));
+			goto out_err;
+		}
+	}
+
+	ret = sqlite3_step(stmt);
+	if (ret == SQLITE_DONE)
+		ret = SQLITE_OK;
+	else
+		xlog(L_ERROR, "%s: unexpected return code from insert: %s",
+				__func__, sqlite3_errmsg(dbh));
+
+out_err:
+	xlog(D_GENERAL, "%s: returning %d", __func__, ret);
+	sqlite3_finalize(stmt);
+	return ret;
+}
+#else
+int
+sqlite_insert_client_and_princhash(const unsigned char *clname, const size_t namelen,
+		const unsigned char *clprinchash, const size_t princhashlen)
+{
+	return -EINVAL;
+}
+#endif
+
+/* Remove a client record */
+int
+sqlite_remove_client(const unsigned char *clname, const size_t namelen)
+{
+	int ret;
+	sqlite3_stmt *stmt = NULL;
+
+	ret = snprintf(buf, sizeof(buf), "DELETE FROM \"rec-%016" PRIx64 "\" "
+				"WHERE id==?;", current_epoch);
+	if (ret < 0) {
+		xlog(L_ERROR, "sprintf failed!");
+		return ret;
+	} else if ((size_t)ret >= sizeof(buf)) {
+		xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
+		return -EINVAL;
+	}
+
+	ret = sqlite3_prepare_v2(dbh, buf, -1, &stmt, NULL);
+
+	if (ret != SQLITE_OK) {
+		xlog(L_ERROR, "%s: statement prepare failed: %s",
+				__func__, sqlite3_errmsg(dbh));
+		goto out_err;
+	}
+
+	ret = sqlite3_bind_blob(stmt, 1, (const void *)clname, namelen,
+				SQLITE_STATIC);
+	if (ret != SQLITE_OK) {
+		xlog(L_ERROR, "%s: bind blob failed: %s", __func__,
+				sqlite3_errmsg(dbh));
+		goto out_err;
+	}
+
+	ret = sqlite3_step(stmt);
+	if (ret == SQLITE_DONE)
+		ret = SQLITE_OK;
+	else
+		xlog(L_ERROR, "%s: unexpected return code from delete: %d",
+				__func__, ret);
+
+out_err:
+	xlog(D_GENERAL, "%s: returning %d", __func__, ret);
+	sqlite3_finalize(stmt);
+	return ret;
+}
+
+/*
+ * Is the given clname in the clients table? If so, then update its timestamp
+ * and return success. If the record isn't present, or the update fails, then
+ * return an error.
+ */
+int
+sqlite_check_client(const unsigned char *clname, const size_t namelen)
+{
+	int ret;
+	sqlite3_stmt *stmt = NULL;
+
+	ret = snprintf(buf, sizeof(buf), "SELECT count(*) FROM  \"rec-%016" PRIx64 "\" "
+				"WHERE id==?;", recovery_epoch);
+	if (ret < 0) {
+		xlog(L_ERROR, "sprintf failed!");
+		return ret;
+	} else if ((size_t)ret >= sizeof(buf)) {
+		xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
+		return -EINVAL;
+	}
+
+	ret = sqlite3_prepare_v2(dbh, buf, -1, &stmt, NULL);
+	if (ret != SQLITE_OK) {
+		xlog(L_ERROR, "%s: select statement prepare failed: %s",
+			__func__, sqlite3_errmsg(dbh));
+		return ret;
+	}
+
+	ret = sqlite3_bind_blob(stmt, 1, (const void *)clname, namelen,
+				SQLITE_STATIC);
+	if (ret != SQLITE_OK) {
+		xlog(L_ERROR, "%s: bind blob failed: %s",
+				__func__, sqlite3_errmsg(dbh));
+		goto out_err;
+	}
+
+	ret = sqlite3_step(stmt);
+	if (ret != SQLITE_ROW) {
+		xlog(L_ERROR, "%s: unexpected return code from select: %d",
+				__func__, ret);
+		goto out_err;
+	}
+
+	ret = sqlite3_column_int(stmt, 0);
+	xlog(D_GENERAL, "%s: select returned %d rows", __func__, ret);
+	if (ret != 1) {
+		ret = -EACCES;
+		goto out_err;
+	}
+
+	sqlite3_finalize(stmt);
+
+	/* Now insert the client into the table for the current epoch */
+	return sqlite_insert_client(clname, namelen);
+
+out_err:
+	xlog(D_GENERAL, "%s: returning %d", __func__, ret);
+	sqlite3_finalize(stmt);
+	return ret;
+}
+
+int
+sqlite_grace_start(void)
+{
+	int ret, ret2;
+	char *err;
+	uint64_t tcur = current_epoch;
+	uint64_t trec = recovery_epoch;
+
+	/* begin transaction */
+	ret = sqlite3_exec(dbh, "BEGIN EXCLUSIVE TRANSACTION;", NULL, NULL,
+				&err);
+	if (ret != SQLITE_OK) {
+		xlog(L_ERROR, "Unable to begin transaction: %s", err);
+		goto rollback;
+	}
+
+	if (trec == 0) {
+		/*
+		 * A normal grace start - update the epoch values in the grace
+		 * table and create a new table for the current reboot epoch.
+		 */
+		trec = tcur;
+		tcur++;
+
+		ret = snprintf(buf, sizeof(buf), "UPDATE grace "
+				"SET current = %" PRId64 ", recovery = %" PRId64 ";",
+				(int64_t)tcur, (int64_t)trec);
+		if (ret < 0) {
+			xlog(L_ERROR, "sprintf failed!");
+			goto rollback;
+		} else if ((size_t)ret >= sizeof(buf)) {
+			xlog(L_ERROR, "sprintf output too long! (%d chars)",
+				ret);
+			ret = -EINVAL;
+			goto rollback;
+		}
+
+		ret = sqlite3_exec(dbh, (const char *)buf, NULL, NULL, &err);
+		if (ret != SQLITE_OK) {
+			xlog(L_ERROR, "Unable to update epochs: %s", err);
+			goto rollback;
+		}
+
+		ret = snprintf(buf, sizeof(buf), "CREATE TABLE \"rec-%016" PRIx64 "\" "
+				"(id BLOB PRIMARY KEY, princhash blob);",
+				tcur);
+		if (ret < 0) {
+			xlog(L_ERROR, "sprintf failed!");
+			goto rollback;
+		} else if ((size_t)ret >= sizeof(buf)) {
+			xlog(L_ERROR, "sprintf output too long! (%d chars)",
+				ret);
+			ret = -EINVAL;
+			goto rollback;
+		}
+
+		ret = sqlite3_exec(dbh, (const char *)buf, NULL, NULL, &err);
+		if (ret != SQLITE_OK) {
+			xlog(L_ERROR, "Unable to create table for current epoch: %s",
+				err);
+			goto rollback;
+		}
+	} else {
+		/* Server restarted while in grace - don't update the epoch
+		 * values in the grace table, just clear out the records for
+		 * the current reboot epoch.
+		 */
+		ret = snprintf(buf, sizeof(buf), "DELETE FROM \"rec-%016" PRIx64 "\";",
+				tcur);
+		if (ret < 0) {
+			xlog(L_ERROR, "sprintf failed!");
+			goto rollback;
+		} else if ((size_t)ret >= sizeof(buf)) {
+			xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
+			ret = -EINVAL;
+			goto rollback;
+		}
+
+		ret = sqlite3_exec(dbh, (const char *)buf, NULL, NULL, &err);
+		if (ret != SQLITE_OK) {
+			xlog(L_ERROR, "Unable to clear table for current epoch: %s",
+				err);
+			goto rollback;
+		}
+	}
+
+	ret = sqlite3_exec(dbh, "COMMIT TRANSACTION;", NULL, NULL, &err);
+	if (ret != SQLITE_OK) {
+		xlog(L_ERROR, "Unable to commit transaction: %s", err);
+		goto rollback;
+	}
+
+	current_epoch = tcur;
+	recovery_epoch = trec;
+	xlog(D_GENERAL, "%s: current_epoch=%"PRIu64" recovery_epoch=%"PRIu64,
+		__func__, current_epoch, recovery_epoch);
+
+out:
+	sqlite3_free(err);
+	return ret;
+rollback:
+	ret2 = sqlite3_exec(dbh, "ROLLBACK TRANSACTION;", NULL, NULL, &err);
+	if (ret2 != SQLITE_OK)
+		xlog(L_ERROR, "Unable to rollback transaction: %s", err);
+	goto out;
+}
+
+int
+sqlite_grace_done(void)
+{
+	int ret, ret2;
+	char *err;
+
+	/* begin transaction */
+	ret = sqlite3_exec(dbh, "BEGIN EXCLUSIVE TRANSACTION;", NULL, NULL,
+				&err);
+	if (ret != SQLITE_OK) {
+		xlog(L_ERROR, "Unable to begin transaction: %s", err);
+		goto rollback;
+	}
+
+	ret = sqlite3_exec(dbh, "UPDATE grace SET recovery = \"0\";",
+			NULL, NULL, &err);
+	if (ret != SQLITE_OK) {
+		xlog(L_ERROR, "Unable to clear recovery epoch: %s", err);
+		goto rollback;
+	}
+
+	ret = snprintf(buf, sizeof(buf), "DROP TABLE \"rec-%016" PRIx64 "\";",
+		recovery_epoch);
+	if (ret < 0) {
+		xlog(L_ERROR, "sprintf failed!");
+		goto rollback;
+	} else if ((size_t)ret >= sizeof(buf)) {
+		xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
+		ret = -EINVAL;
+		goto rollback;
+	}
+
+	ret = sqlite3_exec(dbh, (const char *)buf, NULL, NULL, &err);
+	if (ret != SQLITE_OK) {
+		xlog(L_ERROR, "Unable to drop table for recovery epoch: %s",
+			err);
+		goto rollback;
+	}
+
+	ret = sqlite3_exec(dbh, "COMMIT TRANSACTION;", NULL, NULL, &err);
+	if (ret != SQLITE_OK) {
+		xlog(L_ERROR, "Unable to commit transaction: %s", err);
+		goto rollback;
+	}
+
+	recovery_epoch = 0;
+	xlog(D_GENERAL, "%s: current_epoch=%"PRIu64" recovery_epoch=%"PRIu64,
+		__func__, current_epoch, recovery_epoch);
+
+out:
+	sqlite3_free(err);
+	return ret;
+rollback:
+	ret2 = sqlite3_exec(dbh, "ROLLBACK TRANSACTION;", NULL, NULL, &err);
+	if (ret2 != SQLITE_OK)
+		xlog(L_ERROR, "Unable to rollback transaction: %s", err);
+	goto out;
+}
+
+
+int
+sqlite_iterate_recovery(int (*cb)(struct cld_client *clnt), struct cld_client *clnt)
+{
+	int ret;
+	sqlite3_stmt *stmt = NULL;
+#if UPCALL_VERSION >= 2
+	struct cld_msg_v2 *cmsg = &clnt->cl_u.cl_msg_v2;
+#else
+	struct cld_msg *cmsg = &clnt->cl_u.cl_msg;
+#endif
+
+	if (recovery_epoch == 0) {
+		xlog(D_GENERAL, "%s: not in grace!", __func__);
+		return -EINVAL;
+	}
+
+	ret = snprintf(buf, sizeof(buf), "SELECT * FROM \"rec-%016" PRIx64 "\";",
+		recovery_epoch);
+	if (ret < 0) {
+		xlog(L_ERROR, "sprintf failed!");
+		return ret;
+	} else if ((size_t)ret >= sizeof(buf)) {
+		xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
+		return -EINVAL;
+	}
+
+	ret = sqlite3_prepare_v2(dbh, buf, -1, &stmt, NULL);
+	if (ret != SQLITE_OK) {
+		xlog(L_ERROR, "%s: select statement prepare failed: %s",
+			__func__, sqlite3_errmsg(dbh));
+		return ret;
+	}
+
+	while ((ret = sqlite3_step(stmt)) == SQLITE_ROW) {
+		memset(&cmsg->cm_u, 0, sizeof(cmsg->cm_u));
+#if UPCALL_VERSION >= 2
+		memcpy(&cmsg->cm_u.cm_clntinfo.cc_name.cn_id,
+			sqlite3_column_blob(stmt, 0), NFS4_OPAQUE_LIMIT);
+		cmsg->cm_u.cm_clntinfo.cc_name.cn_len = sqlite3_column_bytes(stmt, 0);
+		if (sqlite3_column_bytes(stmt, 1) > 0) {
+			memcpy(&cmsg->cm_u.cm_clntinfo.cc_princhash.cp_data,
+				sqlite3_column_blob(stmt, 1), SHA256_DIGEST_SIZE);
+			cmsg->cm_u.cm_clntinfo.cc_princhash.cp_len = sqlite3_column_bytes(stmt, 1);
+		}
+#else
+		memcpy(&cmsg->cm_u.cm_name.cn_id, sqlite3_column_blob(stmt, 0),
+			NFS4_OPAQUE_LIMIT);
+		cmsg->cm_u.cm_name.cn_len = sqlite3_column_bytes(stmt, 0);
+#endif
+		cb(clnt);
+	}
+	if (ret == SQLITE_DONE)
+		ret = 0;
+	sqlite3_finalize(stmt);
+	return ret;
+}
+
+/*
+ * Cleans out the old nfsdcltrack database.
+ *
+ * Called upon receipt of the first "GraceDone" upcall only.
+ */
+int
+sqlite_delete_cltrack_records(void)
+{
+	int ret;
+	char *s;
+	char *err = NULL;
+
+	s = conf_get_str("nfsdcltrack", "storagedir");
+	if (s)
+		cltrack_storagedir = s;
+	ret = sqlite_attach_db(cltrack_storagedir);
+	if (ret)
+		goto out;
+	ret = sqlite3_exec(dbh, "DELETE FROM attached.clients;",
+				NULL, NULL, &err);
+	if (ret != SQLITE_OK) {
+		xlog(L_ERROR, "Unable to clear records from cltrack db: %s",
+				err);
+	}
+	sqlite_detach_db();
+out:
+	sqlite3_free(err);
+	return ret;
+}
+
+/*
+ * Sets first_time to 0 in the parameters table to ensure we only
+ * copy old client tracking records into the database one time.
+ *
+ * Called upon receipt of the first "GraceDone" upcall only.
+ */
+int
+sqlite_first_time_done(void)
+{
+	int ret;
+	char *err = NULL;
+
+	ret = sqlite3_exec(dbh, "UPDATE parameters SET value = \"0\" "
+				"WHERE key = \"first_time\";",
+				NULL, NULL, &err);
+	if (ret != SQLITE_OK)
+		xlog(L_ERROR, "Unable to clear first_time: %s", err);
+
+	sqlite3_free(err);
+	return ret;
+}
diff --git a/utils/nfsdcld/sqlite.h b/utils/nfsdcld/sqlite.h
new file mode 100644
index 00000000..0a26ad67
--- /dev/null
+++ b/utils/nfsdcld/sqlite.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2011  Red Hat, Jeff Layton <jlayton@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef _SQLITE_H_
+#define _SQLITE_H_
+
+struct cld_client;
+
+int sqlite_prepare_dbh(const char *topdir);
+int sqlite_insert_client(const unsigned char *clname, const size_t namelen);
+int sqlite_insert_client_and_princhash(const unsigned char *clname, const size_t namelen,
+		const unsigned char *clprinchash, const size_t princhashlen);
+int sqlite_remove_client(const unsigned char *clname, const size_t namelen);
+int sqlite_check_client(const unsigned char *clname, const size_t namelen);
+int sqlite_grace_start(void);
+int sqlite_grace_done(void);
+int sqlite_iterate_recovery(int (*cb)(struct cld_client *clnt), struct cld_client *clnt);
+int sqlite_delete_cltrack_records(void);
+int sqlite_first_time_done(void);
+
+#endif /* _SQLITE_H */
diff --git a/utils/nfsidmap/nfsidmap.c b/utils/nfsidmap/nfsidmap.c
index d3967a3a..4d219ef5 100644
--- a/utils/nfsidmap/nfsidmap.c
+++ b/utils/nfsidmap/nfsidmap.c
@@ -18,7 +18,7 @@
 #include "xcommon.h"
 
 int verbose = 0;
-char *usage = "Usage: %s [-vh] [-c || [-u|-g|-r key] || -d || -l || [-t timeout] key desc]";
+#define USAGE "Usage: %s [-vh] [-c || [-u|-g|-r key] || -d || -l || [-t timeout] key desc]"
 
 #define MAX_ID_LEN   11
 #define IDMAP_NAMESZ 128
@@ -401,7 +401,7 @@ int main(int argc, char **argv)
 			break;
 		case 'h':
 		default:
-			xlog_warn(usage, progname);
+			xlog_warn(USAGE, progname);
 			exit(opt == 'h' ? 0 : 1);
 		}
 	}
@@ -433,7 +433,7 @@ int main(int argc, char **argv)
 	xlog_stderr(verbose);
 	if ((argc - optind) != 2) {
 		xlog_warn("Bad arg count. Check /etc/request-key.conf");
-		xlog_warn(usage, progname);
+		xlog_warn(USAGE, progname);
 		return EXIT_FAILURE;
 	}
 
@@ -451,7 +451,7 @@ int main(int argc, char **argv)
 		return EXIT_FAILURE;
 	}
 	if (verbose) {
-		xlog_warn("key: 0x%lx type: %s value: %s timeout %ld",
+		xlog_warn("key: 0x%x type: %s value: %s timeout %d",
 			key, type, value, timeout);
 	}
 
diff --git a/utils/statd/rmtcall.c b/utils/statd/rmtcall.c
index c4f6364f..5b261480 100644
--- a/utils/statd/rmtcall.c
+++ b/utils/statd/rmtcall.c
@@ -247,7 +247,7 @@ process_reply(FD_SET_TYPE *rfds)
 		xlog_warn("%s: service %d not registered on localhost",
 			__func__, NL_MY_PROG(lp));
 	} else {
-		xlog(D_GENERAL, "%s: Callback to %s (for %d) succeeded",
+		xlog(D_GENERAL, "%s: Callback to %s (for %s) succeeded",
 			__func__, NL_MY_NAME(lp), NL_MON_NAME(lp));
 	}
 	nlist_free(&notify, lp);
diff --git a/utils/statd/statd.c b/utils/statd/statd.c
index 14673800..8eef2ff2 100644
--- a/utils/statd/statd.c
+++ b/utils/statd/statd.c
@@ -136,7 +136,7 @@ static void log_modes(void)
 	strcat(buf, "TI-RPC ");
 #endif
 
-	xlog_warn(buf);
+	xlog_warn("%s", buf);
 }
 
 /*
diff --git a/utils/statd/svc_run.c b/utils/statd/svc_run.c
index d1dbd74a..e343c768 100644
--- a/utils/statd/svc_run.c
+++ b/utils/statd/svc_run.c
@@ -53,6 +53,7 @@
 
 #include <errno.h>
 #include <time.h>
+#include <inttypes.h>
 #include "statd.h"
 #include "notlist.h"
 
@@ -104,8 +105,8 @@ my_svc_run(int sockfd)
 
 			tv.tv_sec  = NL_WHEN(notify) - now;
 			tv.tv_usec = 0;
-			xlog(D_GENERAL, "Waiting for reply... (timeo %d)",
-							tv.tv_sec);
+			xlog(D_GENERAL, "Waiting for reply... (timeo %jd)",
+							(intmax_t)tv.tv_sec);
 			selret = select(FD_SETSIZE, &readfds,
 				(void *) 0, (void *) 0, &tv);
 		} else {