Blame SOURCES/nfs-utils-2.3.3-nfsdcld-upstream-update.patch

777f42
diff --git a/.gitignore b/.gitignore
777f42
index e91e7a25..e97b31f5 100644
777f42
--- a/.gitignore
777f42
+++ b/.gitignore
777f42
@@ -54,6 +54,7 @@ utils/rquotad/rquotad
777f42
 utils/rquotad/rquota.h
777f42
 utils/rquotad/rquota_xdr.c
777f42
 utils/showmount/showmount
777f42
+utils/nfsdcld/nfsdcld
777f42
 utils/nfsdcltrack/nfsdcltrack
777f42
 utils/statd/statd
777f42
 tools/locktest/testlk
777f42
diff --git a/aclocal/ax_gcc_func_attribute.m4 b/aclocal/ax_gcc_func_attribute.m4
777f42
new file mode 100644
777f42
index 00000000..098c9aad
777f42
--- /dev/null
777f42
+++ b/aclocal/ax_gcc_func_attribute.m4
777f42
@@ -0,0 +1,238 @@
777f42
+# ===========================================================================
777f42
+#  https://www.gnu.org/software/autoconf-archive/ax_gcc_func_attribute.html
777f42
+# ===========================================================================
777f42
+#
777f42
+# SYNOPSIS
777f42
+#
777f42
+#   AX_GCC_FUNC_ATTRIBUTE(ATTRIBUTE)
777f42
+#
777f42
+# DESCRIPTION
777f42
+#
777f42
+#   This macro checks if the compiler supports one of GCC's function
777f42
+#   attributes; many other compilers also provide function attributes with
777f42
+#   the same syntax. Compiler warnings are used to detect supported
777f42
+#   attributes as unsupported ones are ignored by default so quieting
777f42
+#   warnings when using this macro will yield false positives.
777f42
+#
777f42
+#   The ATTRIBUTE parameter holds the name of the attribute to be checked.
777f42
+#
777f42
+#   If ATTRIBUTE is supported define HAVE_FUNC_ATTRIBUTE_<ATTRIBUTE>.
777f42
+#
777f42
+#   The macro caches its result in the ax_cv_have_func_attribute_<attribute>
777f42
+#   variable.
777f42
+#
777f42
+#   The macro currently supports the following function attributes:
777f42
+#
777f42
+#    alias
777f42
+#    aligned
777f42
+#    alloc_size
777f42
+#    always_inline
777f42
+#    artificial
777f42
+#    cold
777f42
+#    const
777f42
+#    constructor
777f42
+#    constructor_priority for constructor attribute with priority
777f42
+#    deprecated
777f42
+#    destructor
777f42
+#    dllexport
777f42
+#    dllimport
777f42
+#    error
777f42
+#    externally_visible
777f42
+#    fallthrough
777f42
+#    flatten
777f42
+#    format
777f42
+#    format_arg
777f42
+#    gnu_inline
777f42
+#    hot
777f42
+#    ifunc
777f42
+#    leaf
777f42
+#    malloc
777f42
+#    noclone
777f42
+#    noinline
777f42
+#    nonnull
777f42
+#    noreturn
777f42
+#    nothrow
777f42
+#    optimize
777f42
+#    pure
777f42
+#    sentinel
777f42
+#    sentinel_position
777f42
+#    unused
777f42
+#    used
777f42
+#    visibility
777f42
+#    warning
777f42
+#    warn_unused_result
777f42
+#    weak
777f42
+#    weakref
777f42
+#
777f42
+#   Unsupported function attributes will be tested with a prototype
777f42
+#   returning an int and not accepting any arguments and the result of the
777f42
+#   check might be wrong or meaningless so use with care.
777f42
+#
777f42
+# LICENSE
777f42
+#
777f42
+#   Copyright (c) 2013 Gabriele Svelto <gabriele.svelto@gmail.com>
777f42
+#
777f42
+#   Copying and distribution of this file, with or without modification, are
777f42
+#   permitted in any medium without royalty provided the copyright notice
777f42
+#   and this notice are preserved.  This file is offered as-is, without any
777f42
+#   warranty.
777f42
+
777f42
+#serial 9
777f42
+
777f42
+AC_DEFUN([AX_GCC_FUNC_ATTRIBUTE], [
777f42
+    AS_VAR_PUSHDEF([ac_var], [ax_cv_have_func_attribute_$1])
777f42
+
777f42
+    AC_CACHE_CHECK([for __attribute__(($1))], [ac_var], [
777f42
+        AC_LINK_IFELSE([AC_LANG_PROGRAM([
777f42
+            m4_case([$1],
777f42
+                [alias], [
777f42
+                    int foo( void ) { return 0; }
777f42
+                    int bar( void ) __attribute__(($1("foo")));
777f42
+                ],
777f42
+                [aligned], [
777f42
+                    int foo( void ) __attribute__(($1(32)));
777f42
+                ],
777f42
+                [alloc_size], [
777f42
+                    void *foo(int a) __attribute__(($1(1)));
777f42
+                ],
777f42
+                [always_inline], [
777f42
+                    inline __attribute__(($1)) int foo( void ) { return 0; }
777f42
+                ],
777f42
+                [artificial], [
777f42
+                    inline __attribute__(($1)) int foo( void ) { return 0; }
777f42
+                ],
777f42
+                [cold], [
777f42
+                    int foo( void ) __attribute__(($1));
777f42
+                ],
777f42
+                [const], [
777f42
+                    int foo( void ) __attribute__(($1));
777f42
+                ],
777f42
+                [constructor_priority], [
777f42
+                    int foo( void ) __attribute__((__constructor__(65535/2)));
777f42
+                ],
777f42
+                [constructor], [
777f42
+                    int foo( void ) __attribute__(($1));
777f42
+                ],
777f42
+                [deprecated], [
777f42
+                    int foo( void ) __attribute__(($1("")));
777f42
+                ],
777f42
+                [destructor], [
777f42
+                    int foo( void ) __attribute__(($1));
777f42
+                ],
777f42
+                [dllexport], [
777f42
+                    __attribute__(($1)) int foo( void ) { return 0; }
777f42
+                ],
777f42
+                [dllimport], [
777f42
+                    int foo( void ) __attribute__(($1));
777f42
+                ],
777f42
+                [error], [
777f42
+                    int foo( void ) __attribute__(($1("")));
777f42
+                ],
777f42
+                [externally_visible], [
777f42
+                    int foo( void ) __attribute__(($1));
777f42
+                ],
777f42
+                [fallthrough], [
777f42
+                    int foo( void ) {switch (0) { case 1: __attribute__(($1)); case 2: break ; }};
777f42
+                ],
777f42
+                [flatten], [
777f42
+                    int foo( void ) __attribute__(($1));
777f42
+                ],
777f42
+                [format], [
777f42
+                    int foo(const char *p, ...) __attribute__(($1(printf, 1, 2)));
777f42
+                ],
777f42
+                [format_arg], [
777f42
+                    char *foo(const char *p) __attribute__(($1(1)));
777f42
+                ],
777f42
+                [gnu_inline], [
777f42
+                    inline __attribute__(($1)) int foo( void ) { return 0; }
777f42
+                ],
777f42
+                [hot], [
777f42
+                    int foo( void ) __attribute__(($1));
777f42
+                ],
777f42
+                [ifunc], [
777f42
+                    int my_foo( void ) { return 0; }
777f42
+                    static int (*resolve_foo(void))(void) { return my_foo; }
777f42
+                    int foo( void ) __attribute__(($1("resolve_foo")));
777f42
+                ],
777f42
+                [leaf], [
777f42
+                    __attribute__(($1)) int foo( void ) { return 0; }
777f42
+                ],
777f42
+                [malloc], [
777f42
+                    void *foo( void ) __attribute__(($1));
777f42
+                ],
777f42
+                [noclone], [
777f42
+                    int foo( void ) __attribute__(($1));
777f42
+                ],
777f42
+                [noinline], [
777f42
+                    __attribute__(($1)) int foo( void ) { return 0; }
777f42
+                ],
777f42
+                [nonnull], [
777f42
+                    int foo(char *p) __attribute__(($1(1)));
777f42
+                ],
777f42
+                [noreturn], [
777f42
+                    void foo( void ) __attribute__(($1));
777f42
+                ],
777f42
+                [nothrow], [
777f42
+                    int foo( void ) __attribute__(($1));
777f42
+                ],
777f42
+                [optimize], [
777f42
+                    __attribute__(($1(3))) int foo( void ) { return 0; }
777f42
+                ],
777f42
+                [pure], [
777f42
+                    int foo( void ) __attribute__(($1));
777f42
+                ],
777f42
+                [sentinel], [
777f42
+                    int foo(void *p, ...) __attribute__(($1));
777f42
+                ],
777f42
+                [sentinel_position], [
777f42
+                    int foo(void *p, ...) __attribute__(($1(1)));
777f42
+                ],
777f42
+                [returns_nonnull], [
777f42
+                    void *foo( void ) __attribute__(($1));
777f42
+                ],
777f42
+                [unused], [
777f42
+                    int foo( void ) __attribute__(($1));
777f42
+                ],
777f42
+                [used], [
777f42
+                    int foo( void ) __attribute__(($1));
777f42
+                ],
777f42
+                [visibility], [
777f42
+                    int foo_def( void ) __attribute__(($1("default")));
777f42
+                    int foo_hid( void ) __attribute__(($1("hidden")));
777f42
+                    int foo_int( void ) __attribute__(($1("internal")));
777f42
+                    int foo_pro( void ) __attribute__(($1("protected")));
777f42
+                ],
777f42
+                [warning], [
777f42
+                    int foo( void ) __attribute__(($1("")));
777f42
+                ],
777f42
+                [warn_unused_result], [
777f42
+                    int foo( void ) __attribute__(($1));
777f42
+                ],
777f42
+                [weak], [
777f42
+                    int foo( void ) __attribute__(($1));
777f42
+                ],
777f42
+                [weakref], [
777f42
+                    static int foo( void ) { return 0; }
777f42
+                    static int bar( void ) __attribute__(($1("foo")));
777f42
+                ],
777f42
+                [
777f42
+                 m4_warn([syntax], [Unsupported attribute $1, the test may fail])
777f42
+                 int foo( void ) __attribute__(($1));
777f42
+                ]
777f42
+            )], [])
777f42
+            ],
777f42
+            dnl GCC doesn't exit with an error if an unknown attribute is
777f42
+            dnl provided but only outputs a warning, so accept the attribute
777f42
+            dnl only if no warning were issued.
777f42
+            [AS_IF([test -s conftest.err],
777f42
+                [AS_VAR_SET([ac_var], [no])],
777f42
+                [AS_VAR_SET([ac_var], [yes])])],
777f42
+            [AS_VAR_SET([ac_var], [no])])
777f42
+    ])
777f42
+
777f42
+    AS_IF([test yes = AS_VAR_GET([ac_var])],
777f42
+        [AC_DEFINE_UNQUOTED(AS_TR_CPP(HAVE_FUNC_ATTRIBUTE_$1), 1,
777f42
+            [Define to 1 if the system has the `$1' function attribute])], [])
777f42
+
777f42
+    AS_VAR_POPDEF([ac_var])
777f42
+])
777f42
diff --git a/configure.ac b/configure.ac
777f42
index 48eb9eb6..13ea957f 100644
777f42
--- a/configure.ac
777f42
+++ b/configure.ac
777f42
@@ -238,6 +238,12 @@ else
777f42
 	AM_CONDITIONAL(MOUNT_CONFIG, [test "$enable_mount" = "yes"])
777f42
 fi
777f42
 
777f42
+AC_ARG_ENABLE(nfsdcld,
777f42
+	[AC_HELP_STRING([--disable-nfsdcld],
777f42
+			[disable NFSv4 clientid tracking daemon @<:@default=no@:>@])],
777f42
+	enable_nfsdcld=$enableval,
777f42
+	enable_nfsdcld="yes")
777f42
+
777f42
 AC_ARG_ENABLE(nfsdcltrack,
777f42
 	[AC_HELP_STRING([--disable-nfsdcltrack],
777f42
 			[disable NFSv4 clientid tracking programs @<:@default=no@:>@])],
777f42
@@ -317,6 +323,20 @@ if test "$enable_nfsv4" = yes; then
777f42
   dnl Check for sqlite3
777f42
   AC_SQLITE3_VERS
777f42
 
777f42
+  if test "$enable_nfsdcld" = "yes"; then
777f42
+	AC_CHECK_HEADERS([libgen.h sys/inotify.h], ,
777f42
+		AC_MSG_ERROR([Cannot find header needed for nfsdcld]))
777f42
+
777f42
+    case $libsqlite3_cv_is_recent in
777f42
+    yes) ;;
777f42
+    unknown)
777f42
+      dnl do not fail when cross-compiling
777f42
+      AC_MSG_WARN([assuming sqlite is at least v3.3]) ;;
777f42
+    *)
777f42
+      AC_MSG_ERROR([nfsdcld requires sqlite-devel]) ;;
777f42
+    esac
777f42
+  fi
777f42
+
777f42
   if test "$enable_nfsdcltrack" = "yes"; then
777f42
 	AC_CHECK_HEADERS([libgen.h sys/inotify.h], ,
777f42
 		AC_MSG_ERROR([Cannot find header needed for nfsdcltrack]))
777f42
@@ -332,6 +352,7 @@ if test "$enable_nfsv4" = yes; then
777f42
   fi
777f42
 
777f42
 else
777f42
+  enable_nfsdcld="no"
777f42
   enable_nfsdcltrack="no"
777f42
 fi
777f42
 
777f42
@@ -342,6 +363,7 @@ if test "$enable_nfsv41" = yes; then
777f42
 fi
777f42
 
777f42
 dnl enable nfsidmap when its support by libnfsidmap
777f42
+AM_CONDITIONAL(CONFIG_NFSDCLD, [test "$enable_nfsdcld" = "yes" ])
777f42
 AM_CONDITIONAL(CONFIG_NFSDCLTRACK, [test "$enable_nfsdcltrack" = "yes" ])
777f42
 
777f42
 
777f42
@@ -581,6 +603,7 @@ CHECK_CCSUPPORT([-Werror=format-overflow=2], [flg1])
777f42
 CHECK_CCSUPPORT([-Werror=int-conversion], [flg2])
777f42
 CHECK_CCSUPPORT([-Werror=incompatible-pointer-types], [flg3])
777f42
 CHECK_CCSUPPORT([-Werror=misleading-indentation], [flg4])
777f42
+AX_GCC_FUNC_ATTRIBUTE([format])
777f42
 
777f42
 AC_SUBST([AM_CFLAGS], ["$my_am_cflags $flg1 $flg2 $flg3 $flg4"])
777f42
 
777f42
@@ -617,8 +640,10 @@ AC_CONFIG_FILES([
777f42
 	tools/mountstats/Makefile
777f42
 	tools/nfs-iostat/Makefile
777f42
 	tools/nfsconf/Makefile
777f42
+	tools/clddb-tool/Makefile
777f42
 	utils/Makefile
777f42
 	utils/blkmapd/Makefile
777f42
+	utils/nfsdcld/Makefile
777f42
 	utils/nfsdcltrack/Makefile
777f42
 	utils/exportfs/Makefile
777f42
 	utils/gssd/Makefile
777f42
diff --git a/nfs.conf b/nfs.conf
777f42
index d48a4e55..56172c49 100644
777f42
--- a/nfs.conf
777f42
+++ b/nfs.conf
777f42
@@ -36,6 +36,10 @@ use-gss-proxy=1
777f42
 # state-directory-path=/var/lib/nfs
777f42
 # ha-callout=
777f42
 #
777f42
+[nfsdcld]
777f42
+# debug=0
777f42
+# storagedir=/var/lib/nfs/nfsdcld
777f42
+#
777f42
 [nfsdcltrack]
777f42
 # debug=0
777f42
 # storagedir=/var/lib/nfs/nfsdcltrack
777f42
diff --git a/support/include/cld.h b/support/include/cld.h
777f42
index f14a9ab0..88d3b63e 100644
777f42
--- a/support/include/cld.h
777f42
+++ b/support/include/cld.h
777f42
@@ -23,16 +23,22 @@
777f42
 #define _NFSD_CLD_H
777f42
 
777f42
 /* latest upcall version available */
777f42
-#define CLD_UPCALL_VERSION 1
777f42
+#define CLD_UPCALL_VERSION 2
777f42
 
777f42
 /* defined by RFC3530 */
777f42
 #define NFS4_OPAQUE_LIMIT 1024
777f42
 
777f42
+#ifndef SHA256_DIGEST_SIZE
777f42
+#define SHA256_DIGEST_SIZE      32
777f42
+#endif
777f42
+
777f42
 enum cld_command {
777f42
 	Cld_Create,		/* create a record for this cm_id */
777f42
 	Cld_Remove,		/* remove record of this cm_id */
777f42
 	Cld_Check,		/* is this cm_id allowed? */
777f42
 	Cld_GraceDone,		/* grace period is complete */
777f42
+	Cld_GraceStart,		/* grace start (upload client records) */
777f42
+	Cld_GetVersion,		/* query max supported upcall version */
777f42
 };
777f42
 
777f42
 /* representation of long-form NFSv4 client ID */
777f42
@@ -41,6 +47,17 @@ struct cld_name {
777f42
 	unsigned char	cn_id[NFS4_OPAQUE_LIMIT];	/* client-provided */
777f42
 } __attribute__((packed));
777f42
 
777f42
+/* sha256 hash of the kerberos principal */
777f42
+struct cld_princhash {
777f42
+	uint8_t		cp_len;				/* length of cp_data */
777f42
+	unsigned char	cp_data[SHA256_DIGEST_SIZE];	/* hash of principal */
777f42
+} __attribute__((packed));
777f42
+
777f42
+struct cld_clntinfo {
777f42
+	struct cld_name		cc_name;
777f42
+	struct cld_princhash	cc_princhash;
777f42
+} __attribute__((packed));
777f42
+
777f42
 /* message struct for communication with userspace */
777f42
 struct cld_msg {
777f42
 	uint8_t		cm_vers;		/* upcall version */
777f42
@@ -50,7 +67,28 @@ struct cld_msg {
777f42
 	union {
777f42
 		int64_t		cm_gracetime;	/* grace period start time */
777f42
 		struct cld_name	cm_name;
777f42
+		uint8_t		cm_version;	/* for getting max version */
777f42
+	} __attribute__((packed)) cm_u;
777f42
+} __attribute__((packed));
777f42
+
777f42
+/* version 2 message can include hash of kerberos principal */
777f42
+struct cld_msg_v2 {
777f42
+	uint8_t		cm_vers;		/* upcall version */
777f42
+	uint8_t		cm_cmd;			/* upcall command */
777f42
+	int16_t		cm_status;		/* return code */
777f42
+	uint32_t	cm_xid;			/* transaction id */
777f42
+	union {
777f42
+		struct cld_name	cm_name;
777f42
+		uint8_t		cm_version;	/* for getting max version */
777f42
+		struct cld_clntinfo cm_clntinfo; /* name & princ hash */
777f42
 	} __attribute__((packed)) cm_u;
777f42
 } __attribute__((packed));
777f42
 
777f42
+struct cld_msg_hdr {
777f42
+	uint8_t		cm_vers;		/* upcall version */
777f42
+	uint8_t		cm_cmd;			/* upcall command */
777f42
+	int16_t		cm_status;		/* return code */
777f42
+	uint32_t	cm_xid;			/* transaction id */
777f42
+} __attribute__((packed));
777f42
+
777f42
 #endif /* !_NFSD_CLD_H */
777f42
diff --git a/support/include/xcommon.h b/support/include/xcommon.h
777f42
index 23c9a135..30b0403b 100644
777f42
--- a/support/include/xcommon.h
777f42
+++ b/support/include/xcommon.h
777f42
@@ -9,6 +9,10 @@
777f42
 #ifndef _XMALLOC_H
777f42
 #define _MALLOC_H
777f42
 
777f42
+#ifdef HAVE_CONFIG_H
777f42
+#include <config.h>
777f42
+#endif
777f42
+
777f42
 #include <sys/types.h>
777f42
 #include <fcntl.h>
777f42
 #include <limits.h>
777f42
@@ -25,9 +29,15 @@
777f42
 
777f42
 #define streq(s, t)	(strcmp ((s), (t)) == 0)
777f42
 
777f42
-/* Functions in sundries.c that are used in mount.c and umount.c  */ 
777f42
+#ifdef HAVE_FUNC_ATTRIBUTE_FORMAT
777f42
+#define X_FORMAT(_x) __attribute__((__format__ _x))
777f42
+#else
777f42
+#define X_FORMAT(_x)
777f42
+#endif
777f42
+
777f42
+/* Functions in sundries.c that are used in mount.c and umount.c  */
777f42
 char *canonicalize (const char *path);
777f42
-void nfs_error (const char *fmt, ...);
777f42
+void nfs_error (const char *fmt, ...) X_FORMAT((printf, 1, 2));
777f42
 void *xmalloc (size_t size);
777f42
 void *xrealloc(void *p, size_t size);
777f42
 void xfree(void *);
777f42
@@ -36,9 +46,9 @@ char *xstrndup (const char *s, int n);
777f42
 char *xstrconcat2 (const char *, const char *);
777f42
 char *xstrconcat3 (const char *, const char *, const char *);
777f42
 char *xstrconcat4 (const char *, const char *, const char *, const char *);
777f42
-void die (int errcode, const char *fmt, ...);
777f42
+void die (int errcode, const char *fmt, ...) X_FORMAT((printf, 2, 3));
777f42
 
777f42
-extern void die(int err, const char *fmt, ...);
777f42
+extern void die(int err, const char *fmt, ...) X_FORMAT((printf, 2, 3));
777f42
 extern void (*at_die)(void);
777f42
 
777f42
 /* exit status - bits below are ORed */
777f42
diff --git a/support/include/xlog.h b/support/include/xlog.h
777f42
index a11463ed..32ff5a1b 100644
777f42
--- a/support/include/xlog.h
777f42
+++ b/support/include/xlog.h
777f42
@@ -7,6 +7,10 @@
777f42
 #ifndef XLOG_H
777f42
 #define XLOG_H
777f42
 
777f42
+#ifdef HAVE_CONFIG_H
777f42
+#include <config.h>
777f42
+#endif
777f42
+
777f42
 #include <stdarg.h>
777f42
 
777f42
 /* These are logged always. L_FATAL also does exit(1) */
777f42
@@ -35,6 +39,12 @@ struct xlog_debugfac {
777f42
 	int		df_fac;
777f42
 };
777f42
 
777f42
+#ifdef HAVE_FUNC_ATTRIBUTE_FORMAT
777f42
+#define XLOG_FORMAT(_x) __attribute__((__format__ _x))
777f42
+#else
777f42
+#define XLOG_FORMAT(_x)
777f42
+#endif
777f42
+
777f42
 extern int export_errno;
777f42
 void			xlog_open(char *progname);
777f42
 void			xlog_stderr(int on);
777f42
@@ -43,10 +53,10 @@ void			xlog_config(int fac, int on);
777f42
 void			xlog_sconfig(char *, int on);
777f42
 void			xlog_from_conffile(char *);
777f42
 int			xlog_enabled(int fac);
777f42
-void			xlog(int fac, const char *fmt, ...);
777f42
-void			xlog_warn(const char *fmt, ...);
777f42
-void			xlog_err(const char *fmt, ...);
777f42
-void			xlog_errno(int err, const char *fmt, ...);
777f42
-void			xlog_backend(int fac, const char *fmt, va_list args);
777f42
+void			xlog(int fac, const char *fmt, ...) XLOG_FORMAT((printf, 2, 3));
777f42
+void			xlog_warn(const char *fmt, ...) XLOG_FORMAT((printf, 1, 2));
777f42
+void			xlog_err(const char *fmt, ...) XLOG_FORMAT((printf, 1, 2));
777f42
+void			xlog_errno(int err, const char *fmt, ...) XLOG_FORMAT((printf, 2, 3));
777f42
+void			xlog_backend(int fac, const char *fmt, va_list args) XLOG_FORMAT((printf, 2, 0));
777f42
 
777f42
 #endif /* XLOG_H */
777f42
diff --git a/support/junction/junction.c b/support/junction/junction.c
777f42
index ab6caa61..41cce261 100644
777f42
--- a/support/junction/junction.c
777f42
+++ b/support/junction/junction.c
777f42
@@ -23,6 +23,10 @@
777f42
  *	http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt
777f42
  */
777f42
 
777f42
+#ifdef HAVE_CONFIG_H
777f42
+#include <config.h>
777f42
+#endif
777f42
+
777f42
 #include <sys/types.h>
777f42
 #include <sys/stat.h>
777f42
 
777f42
diff --git a/support/misc/file.c b/support/misc/file.c
777f42
index 4065376e..74973169 100644
777f42
--- a/support/misc/file.c
777f42
+++ b/support/misc/file.c
777f42
@@ -18,6 +18,10 @@
777f42
  * along with nfs-utils.  If not, see <http://www.gnu.org/licenses/>.
777f42
  */
777f42
 
777f42
+#ifdef HAVE_CONFIG_H
777f42
+#include <config.h>
777f42
+#endif
777f42
+
777f42
 #include <sys/stat.h>
777f42
 
777f42
 #include <string.h>
777f42
diff --git a/support/misc/mountpoint.c b/support/misc/mountpoint.c
777f42
index 9f9ce44e..4205b41c 100644
777f42
--- a/support/misc/mountpoint.c
777f42
+++ b/support/misc/mountpoint.c
777f42
@@ -3,6 +3,10 @@
777f42
  * check if a given path is a mountpoint 
777f42
  */
777f42
 
777f42
+#ifdef HAVE_CONFIG_H
777f42
+#include <config.h>
777f42
+#endif
777f42
+
777f42
 #include <string.h>
777f42
 #include "xcommon.h"
777f42
 #include <sys/stat.h>
777f42
diff --git a/support/nfs/cacheio.c b/support/nfs/cacheio.c
777f42
index 9dc4cf1c..7c4cf373 100644
777f42
--- a/support/nfs/cacheio.c
777f42
+++ b/support/nfs/cacheio.c
777f42
@@ -15,6 +15,10 @@
777f42
  *
777f42
  */
777f42
 
777f42
+#ifdef HAVE_CONFIG_H
777f42
+#include <config.h>
777f42
+#endif
777f42
+
777f42
 #include <nfslib.h>
777f42
 #include <stdio.h>
777f42
 #include <stdio_ext.h>
777f42
diff --git a/support/nfs/svc_create.c b/support/nfs/svc_create.c
777f42
index ef7ff05f..7b595f89 100644
777f42
--- a/support/nfs/svc_create.c
777f42
+++ b/support/nfs/svc_create.c
777f42
@@ -184,7 +184,7 @@ svc_create_sock(const struct sockaddr *sap, socklen_t salen,
777f42
 		type = SOCK_STREAM;
777f42
 		break;
777f42
 	default:
777f42
-		xlog(D_GENERAL, "%s: Unrecognized bind address semantics: %u",
777f42
+		xlog(D_GENERAL, "%s: Unrecognized bind address semantics: %lu",
777f42
 			__func__, nconf->nc_semantics);
777f42
 		return -1;
777f42
 	}
777f42
diff --git a/support/nsm/rpc.c b/support/nsm/rpc.c
777f42
index ae49006c..08b4746f 100644
777f42
--- a/support/nsm/rpc.c
777f42
+++ b/support/nsm/rpc.c
777f42
@@ -182,7 +182,7 @@ nsm_xmit_getport(const int sock, const struct sockaddr_in *sin,
777f42
 	uint32_t xid;
777f42
 	XDR xdr;
777f42
 
777f42
-	xlog(D_CALL, "Sending PMAP_GETPORT for %u, %u, udp", program, version);
777f42
+	xlog(D_CALL, "Sending PMAP_GETPORT for %lu, %lu, udp", program, version);
777f42
 
777f42
 	nsm_init_xdrmem(msgbuf, NSM_MAXMSGSIZE, &xdr;;
777f42
 	xid = nsm_init_rpc_header(PMAPPROG, PMAPVERS,
777f42
diff --git a/systemd/Makefile.am b/systemd/Makefile.am
777f42
index d54518bc..53458c62 100644
777f42
--- a/systemd/Makefile.am
777f42
+++ b/systemd/Makefile.am
777f42
@@ -36,6 +36,11 @@ unit_files += \
777f42
 endif
777f42
 endif
777f42
 
777f42
+if CONFIG_NFSDCLD
777f42
+unit_files += \
777f42
+    nfsdcld.service
777f42
+endif
777f42
+
777f42
 man5_MANS	= nfs.conf.man
777f42
 man7_MANS	= nfs.systemd.man
777f42
 EXTRA_DIST = $(unit_files) $(man5_MANS) $(man7_MANS)
777f42
diff --git a/systemd/nfs-server.service b/systemd/nfs-server.service
777f42
index 136552b5..24118d69 100644
777f42
--- a/systemd/nfs-server.service
777f42
+++ b/systemd/nfs-server.service
777f42
@@ -6,10 +6,12 @@ Requires= nfs-mountd.service
777f42
 Wants=rpcbind.socket network-online.target
777f42
 Wants=rpc-statd.service nfs-idmapd.service
777f42
 Wants=rpc-statd-notify.service
777f42
+Wants=nfsdcld.service
777f42
 
777f42
 After= network-online.target local-fs.target
777f42
 After= proc-fs-nfsd.mount rpcbind.socket nfs-mountd.service
777f42
 After= nfs-idmapd.service rpc-statd.service
777f42
+After= nfsdcld.service
777f42
 Before= rpc-statd-notify.service
777f42
 
777f42
 # GSS services dependencies and ordering
777f42
diff --git a/systemd/nfsdcld.service b/systemd/nfsdcld.service
777f42
new file mode 100644
777f42
index 00000000..a32d2430
777f42
--- /dev/null
777f42
+++ b/systemd/nfsdcld.service
777f42
@@ -0,0 +1,10 @@
777f42
+[Unit]
777f42
+Description=NFSv4 Client Tracking Daemon
777f42
+DefaultDependencies=no
777f42
+Conflicts=umount.target
777f42
+Requires=rpc_pipefs.target proc-fs-nfsd.mount
777f42
+After=rpc_pipefs.target proc-fs-nfsd.mount
777f42
+
777f42
+[Service]
777f42
+Type=forking
777f42
+ExecStart=/usr/sbin/nfsdcld
777f42
diff --git a/tools/Makefile.am b/tools/Makefile.am
777f42
index 4266da49..53e61170 100644
777f42
--- a/tools/Makefile.am
777f42
+++ b/tools/Makefile.am
777f42
@@ -8,6 +8,10 @@ endif
777f42
 
777f42
 OPTDIRS += nfsconf
777f42
 
777f42
+if CONFIG_NFSDCLD
777f42
+OPTDIRS += clddb-tool
777f42
+endif
777f42
+
777f42
 SUBDIRS = locktest rpcdebug nlmtest mountstats nfs-iostat $(OPTDIRS)
777f42
 
777f42
 MAINTAINERCLEANFILES = Makefile.in
777f42
diff --git a/tools/clddb-tool/Makefile.am b/tools/clddb-tool/Makefile.am
777f42
new file mode 100644
777f42
index 00000000..15a8fd47
777f42
--- /dev/null
777f42
+++ b/tools/clddb-tool/Makefile.am
777f42
@@ -0,0 +1,13 @@
777f42
+## Process this file with automake to produce Makefile.in
777f42
+PYTHON_FILES =  clddb-tool.py
777f42
+
777f42
+man8_MANS	= clddb-tool.man
777f42
+
777f42
+EXTRA_DIST	= $(man8_MANS) $(PYTHON_FILES)
777f42
+
777f42
+all-local: $(PYTHON_FILES)
777f42
+
777f42
+install-data-hook:
777f42
+	$(INSTALL) -m 755 clddb-tool.py $(DESTDIR)$(sbindir)/clddb-tool
777f42
+
777f42
+MAINTAINERCLEANFILES=Makefile.in
777f42
diff --git a/tools/clddb-tool/clddb-tool.man b/tools/clddb-tool/clddb-tool.man
777f42
new file mode 100644
777f42
index 00000000..e80b2c05
777f42
--- /dev/null
777f42
+++ b/tools/clddb-tool/clddb-tool.man
777f42
@@ -0,0 +1,83 @@
777f42
+.\"
777f42
+.\" clddb-tool(8)
777f42
+.\"
777f42
+.TH clddb-tool 8 "07 Aug 2019"
777f42
+.SH NAME
777f42
+clddb-tool \- Tool for manipulating the nfsdcld sqlite database
777f42
+.SH SYNOPSIS
777f42
+.B clddb-tool
777f42
+.RB [ \-h | \-\-help ]
777f42
+.P
777f42
+.B clddb-tool
777f42
+.RB [ \-p | \-\-path
777f42
+.IR dbpath ]
777f42
+.B fix-table-names
777f42
+.RB [ \-h | \-\-help ]
777f42
+.P
777f42
+.B clddb-tool
777f42
+.RB [ \-p | \-\-path
777f42
+.IR dbpath ]
777f42
+.B downgrade-schema
777f42
+.RB [ \-h | \-\-help ]
777f42
+.RB [ \-v | \-\-version
777f42
+.IR to-version ]
777f42
+.P
777f42
+.B clddb-tool
777f42
+.RB [ \-p | \-\-path
777f42
+.IR dbpath ]
777f42
+.B print
777f42
+.RB [ \-h | \-\-help ]
777f42
+.RB [ \-s | \-\-summary ]
777f42
+.P
777f42
+
777f42
+.SH DESCRIPTION
777f42
+.RB "The " clddb-tool " command is provided to perform some manipulation of the nfsdcld sqlite database schema and to print the contents of the database."
777f42
+.SS Sub-commands
777f42
+Valid
777f42
+.B clddb-tool
777f42
+subcommands are:
777f42
+.IP "\fBfix-table-names\fP"
777f42
+.RB "A previous version of " nfsdcld "(8) contained a bug that corrupted the reboot epoch table names.  This sub-command will fix those table names."
777f42
+.IP "\fBdowngrade-schema\fP"
777f42
+Downgrade the database schema.  Currently the schema can only to downgraded from version 4 to version 3.
777f42
+.IP "\fBprint\fP"
777f42
+Display the contents of the database.  Prints the schema version and the values of the current and recovery epochs.  If the
777f42
+.BR \-s | \-\-summary
777f42
+option is not given, also prints the clients in the reboot epoch tables.
777f42
+.SH OPTIONS
777f42
+.SS Options valid for all sub-commands
777f42
+.TP
777f42
+.B \-h, \-\-help
777f42
+Show the help message and exit
777f42
+.TP
777f42
+\fB\-p \fIdbpath\fR, \fB\-\-path \fIdbpath\fR
777f42
+Open the sqlite database located at
777f42
+.I dbpath
777f42
+instead of
777f42
+.IR /var/lib/nfs/nfsdcld/main.sqlite ".  "
777f42
+This is mainly for testing purposes.
777f42
+.SS Options specific to the downgrade-schema sub-command
777f42
+.TP
777f42
+\fB\-v \fIto-version\fR, \fB\-\-version \fIto-version\fR
777f42
+The schema version to downgrade to.  Currently the schema can only be downgraded to version 3.
777f42
+.SS Options specific to the print sub-command
777f42
+.TP
777f42
+.B \-s, \-\-summary
777f42
+Do not list the clients in the reboot epoch tables in the output.
777f42
+.SH NOTES
777f42
+The
777f42
+.B clddb-tool
777f42
+command will not allow the
777f42
+.B fix-table-names
777f42
+or
777f42
+.B downgrade-schema
777f42
+subcommands to be used if
777f42
+.BR nfsdcld (8)
777f42
+is running.
777f42
+.SH FILES
777f42
+.TP
777f42
+.B /var/lib/nfs/nfsdcld/main.sqlite
777f42
+.SH SEE ALSO
777f42
+.BR nfsdcld (8)
777f42
+.SH AUTHOR
777f42
+Scott Mayhew <smayhew@redhat.com>
777f42
diff --git a/tools/clddb-tool/clddb-tool.py b/tools/clddb-tool/clddb-tool.py
777f42
new file mode 100644
777f42
index 00000000..8a661318
777f42
--- /dev/null
777f42
+++ b/tools/clddb-tool/clddb-tool.py
777f42
@@ -0,0 +1,266 @@
777f42
+#!/usr/bin/python3
777f42
+"""Tool for manipulating the nfsdcld sqlite database
777f42
+"""
777f42
+
777f42
+__copyright__ = """
777f42
+Copyright (C) 2019 Scott Mayhew <smayhew@redhat.com>
777f42
+
777f42
+This program is free software; you can redistribute it and/or
777f42
+modify it under the terms of the GNU General Public License
777f42
+as published by the Free Software Foundation; either version 2
777f42
+of the License, or (at your option) any later version.
777f42
+
777f42
+This program is distributed in the hope that it will be useful,
777f42
+but WITHOUT ANY WARRANTY; without even the implied warranty of
777f42
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
777f42
+GNU General Public License for more details.
777f42
+
777f42
+You should have received a copy of the GNU General Public License
777f42
+along with this program; if not, write to the Free Software
777f42
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
777f42
+MA  02110-1301, USA.
777f42
+"""
777f42
+
777f42
+import argparse
777f42
+import os
777f42
+import sqlite3
777f42
+import sys
777f42
+
777f42
+
777f42
+class CldDb():
777f42
+    def __init__(self, path):
777f42
+        self.con = sqlite3.connect(path)
777f42
+        self.con.row_factory = sqlite3.Row
777f42
+        for row in self.con.execute('select value from parameters '
777f42
+                                    'where key = "version"'):
777f42
+            self.version = int(row['value'])
777f42
+        for row in self.con.execute('select * from grace'):
777f42
+            self.current = int(row['current'])
777f42
+            self.recovery = int(row['recovery'])
777f42
+
777f42
+    def __del__(self):
777f42
+        self.con.close()
777f42
+
777f42
+    def __str__(self):
777f42
+        return ('Schema version: {self.version} '
777f42
+                'current epoch: {self.current} '
777f42
+                'recovery epoch: {self.recovery}'.format(self=self))
777f42
+
777f42
+    def _print_clients(self, epoch):
777f42
+        if epoch:
777f42
+            for row in self.con.execute('select * from "rec-{:016x}"'
777f42
+                                        .format(epoch)):
777f42
+                if self.version >= 4:
777f42
+                    if row['princhash'] is not None:
777f42
+                        princhash = row['princhash'].hex()
777f42
+                    else:
777f42
+                        princhash = "(null)"
777f42
+                    print('id = {}, princhash = {}'
777f42
+                          .format(row['id'].decode(), princhash))
777f42
+                else:
777f42
+                    print('id = {}'.format(row['id'].decode()))
777f42
+
777f42
+    def print_current_clients(self):
777f42
+        print('Clients in current epoch:')
777f42
+        self._print_clients(self.current)
777f42
+
777f42
+    def print_recovery_clients(self):
777f42
+        if self.recovery:
777f42
+            print('Clients in recovery epoch:')
777f42
+            self._print_clients(self.recovery)
777f42
+
777f42
+    def check_bad_table_names(self):
777f42
+        bad_names = []
777f42
+        for row in self.con.execute('select name from sqlite_master '
777f42
+                                    'where type = "table" '
777f42
+                                    'and name like "%rec-%" '
777f42
+                                    'and length(name) < 20'):
777f42
+            bad_names.append(row['name'])
777f42
+        return bad_names
777f42
+
777f42
+    def fix_bad_table_names(self):
777f42
+        try:
777f42
+            self.con.execute('begin exclusive transaction')
777f42
+            bad_names = self.check_bad_table_names()
777f42
+            for bad_name in bad_names:
777f42
+                epoch = int(bad_name.split('-')[1], base=16)
777f42
+                if epoch == self.current or epoch == self.recovery:
777f42
+                    if epoch == self.current:
777f42
+                        which = 'current'
777f42
+                    else:
777f42
+                        which = 'recovery'
777f42
+                    print('found invalid table name {} for {} epoch'
777f42
+                          .format(bad_name, which))
777f42
+                    self.con.execute('alter table "{}" '
777f42
+                                     'rename to "rec-{:016x}"'
777f42
+                                     .format(bad_name, epoch))
777f42
+                    print('renamed to rec-{:016x}'.format(epoch))
777f42
+                else:
777f42
+                    print('found invalid table name {} for unknown epoch {}'
777f42
+                          .format(bad_name, epoch))
777f42
+                    self.con.execute('drop table "{}"'.format(bad_name))
777f42
+                    print('dropped table {}'.format(bad_name))
777f42
+        except sqlite3.Error:
777f42
+            self.con.rollback()
777f42
+        else:
777f42
+            self.con.commit()
777f42
+
777f42
+    def has_princ_data(self):
777f42
+        if self.version < 4:
777f42
+            return False
777f42
+        for row in self.con.execute('select count(*) '
777f42
+                                    'from "rec-{:016x}" '
777f42
+                                    'where princhash not null'
777f42
+                                    .format(self.current)):
777f42
+            count = row[0]
777f42
+        if self.recovery:
777f42
+            for row in self.con.execute('select count(*) '
777f42
+                                        'from "rec-{:016x}" '
777f42
+                                        'where princhash not null'
777f42
+                                        .format(self.current)):
777f42
+                count = count + row[0]
777f42
+        if count:
777f42
+            return True
777f42
+        return False
777f42
+
777f42
+    def _downgrade_table_v4_to_v3(self, epoch):
777f42
+        if not self.con.in_transaction:
777f42
+            raise sqlite3.Error
777f42
+        try:
777f42
+            self.con.execute('create table "new_rec-{:016x}" '
777f42
+                             '(id blob primary key)'.format(epoch))
777f42
+            self.con.execute('insert into "new_rec-{:016x}" '
777f42
+                             'select id from "rec-{:016x}"'
777f42
+                             .format(epoch, epoch))
777f42
+            self.con.execute('drop table "rec-{:016x}"'.format(epoch))
777f42
+            self.con.execute('alter table "new_rec-{:016x}" '
777f42
+                             'rename to "rec-{:016x}"'
777f42
+                             .format(epoch, epoch))
777f42
+        except sqlite3.Error:
777f42
+            raise
777f42
+
777f42
+    def downgrade_schema_v4_to_v3(self):
777f42
+        try:
777f42
+            self.con.execute('begin exclusive transaction')
777f42
+            for row in self.con.execute('select value from parameters '
777f42
+                                        'where key = "version"'):
777f42
+                version = int(row['value'])
777f42
+            if version != self.version:
777f42
+                raise sqlite3.Error
777f42
+            for row in self.con.execute('select * from grace'):
777f42
+                current = int(row['current'])
777f42
+                recovery = int(row['recovery'])
777f42
+            if current != self.current:
777f42
+                raise sqlite3.Error
777f42
+            if recovery != self.recovery:
777f42
+                raise sqlite3.Error
777f42
+            self._downgrade_table_v4_to_v3(current)
777f42
+            if recovery:
777f42
+                self._downgrade_table_v4_to_v3(recovery)
777f42
+            self.con.execute('update parameters '
777f42
+                             'set value = "3" '
777f42
+                             'where key = "version"')
777f42
+            self.version = 3
777f42
+        except sqlite3.Error:
777f42
+            self.con.rollback()
777f42
+            print('Downgrade failed')
777f42
+        else:
777f42
+            self.con.commit()
777f42
+            print('Downgrade successful')
777f42
+
777f42
+
777f42
+def nfsdcld_active():
777f42
+    rc = os.system('ps -C nfsdcld >/dev/null 2>/dev/null')
777f42
+    if rc == 0:
777f42
+        return True
777f42
+    return False
777f42
+
777f42
+
777f42
+def fix_table_names_command(db, args):
777f42
+    if nfsdcld_active():
777f42
+        print('Warning: nfsdcld is running!')
777f42
+        ans = input('Continue? ')
777f42
+        if ans.lower() not in ['y', 'yes']:
777f42
+            print('Operation canceled.')
777f42
+            return
777f42
+    bad_names = db.check_bad_table_names()
777f42
+    if not bad_names:
777f42
+        print('No invalid table names found.')
777f42
+        return
777f42
+    db.fix_bad_table_names()
777f42
+
777f42
+
777f42
+def downgrade_schema_command(db, args):
777f42
+    if nfsdcld_active():
777f42
+        print('Warning: nfsdcld is running!')
777f42
+        ans = input('Continue? ')
777f42
+        if ans.lower() not in ['y', 'yes']:
777f42
+            print('Operation canceled')
777f42
+            return
777f42
+    if db.version != 4:
777f42
+        print('Cannot downgrade database from schema version {}.'
777f42
+              .format(db.version))
777f42
+        return
777f42
+    if args.version != 3:
777f42
+        print('Cannot downgrade to version {}.'.format(args.version))
777f42
+        return
777f42
+    bad_names = db.check_bad_table_names()
777f42
+    if bad_names:
777f42
+        print('Invalid table names detected.')
777f42
+        print('Please run "{} fix-table-names" before downgrading the schema.'
777f42
+              .format(sys.argv[0]))
777f42
+        return
777f42
+    if db.has_princ_data():
777f42
+        print('Warning: database has principal data, which will be erased.')
777f42
+        ans = input('Continue? ')
777f42
+        if ans.lower() not in ['y', 'yes']:
777f42
+            print('Operation canceled')
777f42
+            return
777f42
+    db.downgrade_schema_v4_to_v3()
777f42
+
777f42
+
777f42
+def print_command(db, args):
777f42
+    print(str(db))
777f42
+    if not args.summary:
777f42
+        bad_names = db.check_bad_table_names()
777f42
+        if bad_names:
777f42
+            print('Invalid table names detected.')
777f42
+            print('Please run "{} fix-table-names".'.format(sys.argv[0]))
777f42
+            return
777f42
+        db.print_current_clients()
777f42
+        db.print_recovery_clients()
777f42
+
777f42
+
777f42
+def main():
777f42
+    parser = argparse.ArgumentParser()
777f42
+    parser.add_argument('-p', '--path',
777f42
+                        default='/var/lib/nfs/nfsdcld/main.sqlite',
777f42
+                        help='path to the database '
777f42
+                        '(default: /var/lib/nfs/nfsdcld/main.sqlite)')
777f42
+    subparsers = parser.add_subparsers(help='sub-command help')
777f42
+    fix_parser = subparsers.add_parser('fix-table-names',
777f42
+                                       help='fix invalid table names')
777f42
+    fix_parser.set_defaults(func=fix_table_names_command)
777f42
+    downgrade_parser = subparsers.add_parser('downgrade-schema',
777f42
+                                             help='downgrade database schema')
777f42
+    downgrade_parser.add_argument('-v', '--version', type=int, choices=[3],
777f42
+                                  default=3,
777f42
+                                  help='version to downgrade to')
777f42
+    downgrade_parser.set_defaults(func=downgrade_schema_command)
777f42
+    print_parser = subparsers.add_parser('print',
777f42
+                                         help='print database info')
777f42
+    print_parser.add_argument('-s', '--summary', default=False,
777f42
+                              action='store_true',
777f42
+                              help='print summary only')
777f42
+    print_parser.set_defaults(func=print_command)
777f42
+    args = parser.parse_args()
777f42
+    if not os.path.exists(args.path):
777f42
+        return parser.print_usage()
777f42
+    clddb = CldDb(args.path)
777f42
+    return args.func(clddb, args)
777f42
+
777f42
+
777f42
+if __name__ == '__main__':
777f42
+    if len(sys.argv) == 1:
777f42
+        sys.argv.extend(['print', '--summary'])
777f42
+    main()
777f42
diff --git a/utils/Makefile.am b/utils/Makefile.am
777f42
index 0a5b062c..4c930a4b 100644
777f42
--- a/utils/Makefile.am
777f42
+++ b/utils/Makefile.am
777f42
@@ -19,6 +19,10 @@ if CONFIG_MOUNT
777f42
 OPTDIRS += mount
777f42
 endif
777f42
 
777f42
+if CONFIG_NFSDCLD
777f42
+OPTDIRS += nfsdcld
777f42
+endif
777f42
+
777f42
 if CONFIG_NFSDCLTRACK
777f42
 OPTDIRS += nfsdcltrack
777f42
 endif
777f42
diff --git a/utils/exportfs/exportfs.c b/utils/exportfs/exportfs.c
777f42
index cd3c979d..4b9634b7 100644
777f42
--- a/utils/exportfs/exportfs.c
777f42
+++ b/utils/exportfs/exportfs.c
777f42
@@ -644,6 +644,9 @@ out:
777f42
 	return result;
777f42
 }
777f42
 
777f42
+#ifdef HAVE_FUNC_ATTRIBUTE_FORMAT
777f42
+__attribute__((format (printf, 2, 3)))
777f42
+#endif
777f42
 static char
777f42
 dumpopt(char c, char *fmt, ...)
777f42
 {
777f42
diff --git a/utils/mount/fstab.c b/utils/mount/fstab.c
777f42
index eedbddab..8b0aaf1a 100644
777f42
--- a/utils/mount/fstab.c
777f42
+++ b/utils/mount/fstab.c
777f42
@@ -7,6 +7,10 @@
777f42
  * - Moved code to nfs-utils/support/nfs from util-linux/mount.
777f42
  */
777f42
 
777f42
+#ifdef HAVE_CONFIG_H
777f42
+#include <config.h>
777f42
+#endif
777f42
+
777f42
 #include <errno.h>
777f42
 #include <stdio.h>
777f42
 #include <fcntl.h>
777f42
diff --git a/utils/mountd/cache.c b/utils/mountd/cache.c
777f42
index a054ce6f..c73e29be 100644
777f42
--- a/utils/mountd/cache.c
777f42
+++ b/utils/mountd/cache.c
777f42
@@ -967,8 +967,7 @@ lookup_export(char *dom, char *path, struct addrinfo *ai)
777f42
 			} else if (found_type == i && found->m_warned == 0) {
777f42
 				xlog(L_WARNING, "%s exported to both %s and %s, "
777f42
 				     "arbitrarily choosing options from first",
777f42
-				     path, found->m_client->m_hostname, exp->m_client->m_hostname,
777f42
-				     dom);
777f42
+				     path, found->m_client->m_hostname, exp->m_client->m_hostname);
777f42
 				found->m_warned = 1;
777f42
 			}
777f42
 		}
777f42
diff --git a/utils/mountd/mountd.c b/utils/mountd/mountd.c
777f42
index 086c39bf..0b891121 100644
777f42
--- a/utils/mountd/mountd.c
777f42
+++ b/utils/mountd/mountd.c
777f42
@@ -209,10 +209,10 @@ killer (int sig)
777f42
 }
777f42
 
777f42
 static void
777f42
-sig_hup (int sig)
777f42
+sig_hup (int UNUSED(sig))
777f42
 {
777f42
 	/* don't exit on SIGHUP */
777f42
-	xlog (L_NOTICE, "Received SIGHUP... Ignoring.\n", sig);
777f42
+	xlog (L_NOTICE, "Received SIGHUP... Ignoring.\n");
777f42
 	return;
777f42
 }
777f42
 
777f42
diff --git a/utils/nfsdcld/Makefile.am b/utils/nfsdcld/Makefile.am
777f42
new file mode 100644
777f42
index 00000000..273d64f1
777f42
--- /dev/null
777f42
+++ b/utils/nfsdcld/Makefile.am
777f42
@@ -0,0 +1,15 @@
777f42
+## Process this file with automake to produce Makefile.in
777f42
+
777f42
+man8_MANS	= nfsdcld.man
777f42
+EXTRA_DIST	= $(man8_MANS)
777f42
+
777f42
+AM_CFLAGS	+= -D_LARGEFILE64_SOURCE
777f42
+sbin_PROGRAMS	= nfsdcld
777f42
+
777f42
+nfsdcld_SOURCES = nfsdcld.c sqlite.c legacy.c
777f42
+nfsdcld_LDADD = ../../support/nfs/libnfs.la $(LIBEVENT) $(LIBSQLITE) $(LIBCAP)
777f42
+
777f42
+noinst_HEADERS	= sqlite.h cld-internal.h legacy.h
777f42
+
777f42
+MAINTAINERCLEANFILES = Makefile.in
777f42
+
777f42
diff --git a/utils/nfsdcld/cld-internal.h b/utils/nfsdcld/cld-internal.h
777f42
new file mode 100644
777f42
index 00000000..05f01be2
777f42
--- /dev/null
777f42
+++ b/utils/nfsdcld/cld-internal.h
777f42
@@ -0,0 +1,44 @@
777f42
+/*
777f42
+ * This program is free software; you can redistribute it and/or
777f42
+ * modify it under the terms of the GNU General Public License
777f42
+ * as published by the Free Software Foundation; either version 2
777f42
+ * of the License, or (at your option) any later version.
777f42
+ *
777f42
+ * This program is distributed in the hope that it will be useful,
777f42
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
777f42
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
777f42
+ * GNU General Public License for more details.
777f42
+ *
777f42
+ * You should have received a copy of the GNU General Public License
777f42
+ * along with this program; if not, write to the Free Software
777f42
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor,
777f42
+ * Boston, MA 02110-1301, USA.
777f42
+ */
777f42
+
777f42
+#ifndef _CLD_INTERNAL_H_
777f42
+#define _CLD_INTERNAL_H_
777f42
+
777f42
+#if CLD_UPCALL_VERSION >= 2
777f42
+#define UPCALL_VERSION		2
777f42
+#else
777f42
+#define UPCALL_VERSION		1
777f42
+#endif
777f42
+
777f42
+struct cld_client {
777f42
+	int			cl_fd;
777f42
+	struct event		cl_event;
777f42
+	union {
777f42
+		struct cld_msg		cl_msg;
777f42
+#if UPCALL_VERSION >= 2
777f42
+		struct cld_msg_v2	cl_msg_v2;
777f42
+#endif
777f42
+	} cl_u;
777f42
+};
777f42
+
777f42
+uint64_t current_epoch;
777f42
+uint64_t recovery_epoch;
777f42
+int first_time;
777f42
+int num_cltrack_records;
777f42
+int num_legacy_records;
777f42
+
777f42
+#endif /* _CLD_INTERNAL_H_ */
777f42
diff --git a/utils/nfsdcld/legacy.c b/utils/nfsdcld/legacy.c
777f42
new file mode 100644
777f42
index 00000000..3c6bea6c
777f42
--- /dev/null
777f42
+++ b/utils/nfsdcld/legacy.c
777f42
@@ -0,0 +1,185 @@
777f42
+/*
777f42
+ * This program is free software; you can redistribute it and/or
777f42
+ * modify it under the terms of the GNU General Public License
777f42
+ * as published by the Free Software Foundation; either version 2
777f42
+ * of the License, or (at your option) any later version.
777f42
+ *
777f42
+ * This program is distributed in the hope that it will be useful,
777f42
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
777f42
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
777f42
+ * GNU General Public License for more details.
777f42
+ *
777f42
+ * You should have received a copy of the GNU General Public License
777f42
+ * along with this program; if not, write to the Free Software
777f42
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor,
777f42
+ * Boston, MA 02110-1301, USA.
777f42
+ */
777f42
+
777f42
+#ifdef HAVE_CONFIG_H
777f42
+#include <config.h>
777f42
+#endif
777f42
+
777f42
+#include <stdio.h>
777f42
+#include <dirent.h>
777f42
+#include <string.h>
777f42
+#include <unistd.h>
777f42
+#include <stdint.h>
777f42
+#include <fcntl.h>
777f42
+#include <errno.h>
777f42
+#include <sys/types.h>
777f42
+#include <sys/stat.h>
777f42
+#include <limits.h>
777f42
+#include "cld.h"
777f42
+#include "sqlite.h"
777f42
+#include "xlog.h"
777f42
+#include "legacy.h"
777f42
+
777f42
+#define NFSD_RECDIR_FILE "/proc/fs/nfsd/nfsv4recoverydir"
777f42
+
777f42
+/*
777f42
+ * Loads client records from the v4recovery directory into the database.
777f42
+ * Records are prefixed with the string "hash:" and include the '\0' byte.
777f42
+ *
777f42
+ * Called during database initialization as part of a one-time "upgrade".
777f42
+ */
777f42
+void
777f42
+legacy_load_clients_from_recdir(int *num_records)
777f42
+{
777f42
+	int fd;
777f42
+	DIR *v4recovery;
777f42
+	struct dirent *entry;
777f42
+	char recdirname[PATH_MAX];
777f42
+	char buf[NFS4_OPAQUE_LIMIT];
777f42
+	struct stat st;
777f42
+	char *nl;
777f42
+
777f42
+	fd = open(NFSD_RECDIR_FILE, O_RDONLY);
777f42
+	if (fd < 0) {
777f42
+		xlog(D_GENERAL, "Unable to open %s: %m", NFSD_RECDIR_FILE);
777f42
+		return;
777f42
+	}
777f42
+	if (read(fd, recdirname, PATH_MAX) < 0) {
777f42
+		xlog(D_GENERAL, "Unable to read from %s: %m", NFSD_RECDIR_FILE);
777f42
+		return;
777f42
+	}
777f42
+	close(fd);
777f42
+	/* the output from the proc file isn't null-terminated */
777f42
+	nl = strchr(recdirname, '\n');
777f42
+	if (!nl)
777f42
+		return;
777f42
+	*nl = '\0';
777f42
+	if (stat(recdirname, &st) < 0) {
777f42
+		xlog(D_GENERAL, "Unable to stat %s: %d", recdirname, errno);
777f42
+		return;
777f42
+	}
777f42
+	if (!S_ISDIR(st.st_mode)) {
777f42
+		xlog(D_GENERAL, "%s is not a directory: mode=0%o", recdirname
777f42
+				, st.st_mode);
777f42
+		return;
777f42
+	}
777f42
+	v4recovery = opendir(recdirname);
777f42
+	if (!v4recovery)
777f42
+		return;
777f42
+	while ((entry = readdir(v4recovery))) {
777f42
+		int ret;
777f42
+
777f42
+		/* skip "." and ".." */
777f42
+		if (entry->d_name[0] == '.') {
777f42
+			switch (entry->d_name[1]) {
777f42
+			case '\0':
777f42
+				continue;
777f42
+			case '.':
777f42
+				if (entry->d_name[2] == '\0')
777f42
+					continue;
777f42
+			}
777f42
+		}
777f42
+		/* prefix legacy records with the string "hash:" */
777f42
+		ret = snprintf(buf, sizeof(buf), "hash:%s", entry->d_name);
777f42
+		/* if there's a problem, then skip this entry */
777f42
+		if (ret < 0 || (size_t)ret >= sizeof(buf)) {
777f42
+			xlog(L_WARNING, "%s: unable to build client string for %s!",
777f42
+				__func__, entry->d_name);
777f42
+			continue;
777f42
+		}
777f42
+		/* legacy client records need to include the null terminator */
777f42
+		ret = sqlite_insert_client((unsigned char *)buf, strlen(buf) + 1);
777f42
+		if (ret)
777f42
+			xlog(L_WARNING, "%s: unable to insert %s: %d", __func__,
777f42
+				entry->d_name, ret);
777f42
+		else
777f42
+			(*num_records)++;
777f42
+	}
777f42
+	closedir(v4recovery);
777f42
+}
777f42
+
777f42
+/*
777f42
+ * Cleans out the v4recovery directory.
777f42
+ *
777f42
+ * Called upon receipt of the first "GraceDone" upcall only.
777f42
+ */
777f42
+void
777f42
+legacy_clear_recdir(void)
777f42
+{
777f42
+	int fd;
777f42
+	DIR *v4recovery;
777f42
+	struct dirent *entry;
777f42
+	char recdirname[PATH_MAX];
777f42
+	char dirname[PATH_MAX];
777f42
+	struct stat st;
777f42
+	char *nl;
777f42
+
777f42
+	fd = open(NFSD_RECDIR_FILE, O_RDONLY);
777f42
+	if (fd < 0) {
777f42
+		xlog(D_GENERAL, "Unable to open %s: %m", NFSD_RECDIR_FILE);
777f42
+		return;
777f42
+	}
777f42
+	if (read(fd, recdirname, PATH_MAX) < 0) {
777f42
+		xlog(D_GENERAL, "Unable to read from %s: %m", NFSD_RECDIR_FILE);
777f42
+		return;
777f42
+	}
777f42
+	close(fd);
777f42
+	/* the output from the proc file isn't null-terminated */
777f42
+	nl = strchr(recdirname, '\n');
777f42
+	if (!nl)
777f42
+		return;
777f42
+	*nl = '\0';
777f42
+	if (stat(recdirname, &st) < 0) {
777f42
+		xlog(D_GENERAL, "Unable to stat %s: %d", recdirname, errno);
777f42
+		return;
777f42
+	}
777f42
+	if (!S_ISDIR(st.st_mode)) {
777f42
+		xlog(D_GENERAL, "%s is not a directory: mode=0%o", recdirname
777f42
+				, st.st_mode);
777f42
+		return;
777f42
+	}
777f42
+	v4recovery = opendir(recdirname);
777f42
+	if (!v4recovery)
777f42
+		return;
777f42
+	while ((entry = readdir(v4recovery))) {
777f42
+		int len;
777f42
+
777f42
+		/* skip "." and ".." */
777f42
+		if (entry->d_name[0] == '.') {
777f42
+			switch (entry->d_name[1]) {
777f42
+			case '\0':
777f42
+				continue;
777f42
+			case '.':
777f42
+				if (entry->d_name[2] == '\0')
777f42
+					continue;
777f42
+			}
777f42
+		}
777f42
+		len = snprintf(dirname, sizeof(dirname), "%s/%s", recdirname,
777f42
+				entry->d_name);
777f42
+		/* if there's a problem, then skip this entry */
777f42
+		if (len < 0 || (size_t)len >= sizeof(dirname)) {
777f42
+			xlog(L_WARNING, "%s: unable to build filename for %s!",
777f42
+				__func__, entry->d_name);
777f42
+			continue;
777f42
+		}
777f42
+		len = rmdir(dirname);
777f42
+		if (len)
777f42
+			xlog(L_WARNING, "%s: unable to rmdir %s: %d", __func__,
777f42
+				dirname, len);
777f42
+	}
777f42
+	closedir(v4recovery);
777f42
+}
777f42
diff --git a/utils/nfsdcld/legacy.h b/utils/nfsdcld/legacy.h
777f42
new file mode 100644
777f42
index 00000000..8988f6e8
777f42
--- /dev/null
777f42
+++ b/utils/nfsdcld/legacy.h
777f42
@@ -0,0 +1,24 @@
777f42
+/*
777f42
+ * This program is free software; you can redistribute it and/or
777f42
+ * modify it under the terms of the GNU General Public License
777f42
+ * as published by the Free Software Foundation; either version 2
777f42
+ * of the License, or (at your option) any later version.
777f42
+ *
777f42
+ * This program is distributed in the hope that it will be useful,
777f42
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
777f42
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
777f42
+ * GNU General Public License for more details.
777f42
+ *
777f42
+ * You should have received a copy of the GNU General Public License
777f42
+ * along with this program; if not, write to the Free Software
777f42
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor,
777f42
+ * Boston, MA 02110-1301, USA.
777f42
+ */
777f42
+
777f42
+#ifndef _LEGACY_H_
777f42
+#define _LEGACY_H_
777f42
+
777f42
+void legacy_load_clients_from_recdir(int *);
777f42
+void legacy_clear_recdir(void);
777f42
+
777f42
+#endif /* _LEGACY_H_ */
777f42
diff --git a/utils/nfsdcld/nfsdcld.c b/utils/nfsdcld/nfsdcld.c
777f42
new file mode 100644
777f42
index 00000000..2ad10019
777f42
--- /dev/null
777f42
+++ b/utils/nfsdcld/nfsdcld.c
777f42
@@ -0,0 +1,866 @@
777f42
+/*
777f42
+ * nfsdcld.c -- NFSv4 client name tracking daemon
777f42
+ *
777f42
+ * Copyright (C) 2011  Red Hat, Jeff Layton <jlayton@redhat.com>
777f42
+ *
777f42
+ * This program is free software; you can redistribute it and/or
777f42
+ * modify it under the terms of the GNU General Public License
777f42
+ * as published by the Free Software Foundation; either version 2
777f42
+ * of the License, or (at your option) any later version.
777f42
+ *
777f42
+ * This program is distributed in the hope that it will be useful,
777f42
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
777f42
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
777f42
+ * GNU General Public License for more details.
777f42
+ *
777f42
+ * You should have received a copy of the GNU General Public License
777f42
+ * along with this program; if not, write to the Free Software
777f42
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor,
777f42
+ * Boston, MA 02110-1301, USA.
777f42
+ */
777f42
+
777f42
+#ifdef HAVE_CONFIG_H
777f42
+#include "config.h"
777f42
+#endif /* HAVE_CONFIG_H */
777f42
+
777f42
+#include <errno.h>
777f42
+#include <event.h>
777f42
+#include <stdbool.h>
777f42
+#include <getopt.h>
777f42
+#include <string.h>
777f42
+#include <sys/stat.h>
777f42
+#include <sys/types.h>
777f42
+#include <fcntl.h>
777f42
+#include <unistd.h>
777f42
+#include <libgen.h>
777f42
+#include <sys/inotify.h>
777f42
+#ifdef HAVE_SYS_CAPABILITY_H
777f42
+#include <sys/prctl.h>
777f42
+#include <sys/capability.h>
777f42
+#endif
777f42
+
777f42
+#include "xlog.h"
777f42
+#include "nfslib.h"
777f42
+#include "cld.h"
777f42
+#include "cld-internal.h"
777f42
+#include "sqlite.h"
777f42
+#include "../mount/version.h"
777f42
+#include "conffile.h"
777f42
+#include "legacy.h"
777f42
+
777f42
+#ifndef DEFAULT_PIPEFS_DIR
777f42
+#define DEFAULT_PIPEFS_DIR NFS_STATEDIR "/rpc_pipefs"
777f42
+#endif
777f42
+
777f42
+#define DEFAULT_CLD_PATH	"/nfsd/cld"
777f42
+
777f42
+#ifndef CLD_DEFAULT_STORAGEDIR
777f42
+#define CLD_DEFAULT_STORAGEDIR NFS_STATEDIR "/nfsdcld"
777f42
+#endif
777f42
+
777f42
+#define NFSD_END_GRACE_FILE "/proc/fs/nfsd/v4_end_grace"
777f42
+
777f42
+/* private data structures */
777f42
+
777f42
+/* global variables */
777f42
+static char pipefs_dir[PATH_MAX] = DEFAULT_PIPEFS_DIR;
777f42
+static char pipepath[PATH_MAX];
777f42
+static int 		inotify_fd = -1;
777f42
+static struct event	pipedir_event;
777f42
+static bool old_kernel = false;
777f42
+
777f42
+static struct option longopts[] =
777f42
+{
777f42
+	{ "help", 0, NULL, 'h' },
777f42
+	{ "foreground", 0, NULL, 'F' },
777f42
+	{ "debug", 0, NULL, 'd' },
777f42
+	{ "pipefsdir", 1, NULL, 'p' },
777f42
+	{ "storagedir", 1, NULL, 's' },
777f42
+	{ NULL, 0, 0, 0 },
777f42
+};
777f42
+
777f42
+/* forward declarations */
777f42
+static void cldcb(int UNUSED(fd), short which, void *data);
777f42
+
777f42
+static void
777f42
+usage(char *progname)
777f42
+{
777f42
+	printf("%s [ -hFd ] [ -p pipefsdir ] [ -s storagedir ]\n", progname);
777f42
+}
777f42
+
777f42
+static int
777f42
+cld_set_caps(void)
777f42
+{
777f42
+	int ret = 0;
777f42
+#ifdef HAVE_SYS_CAPABILITY_H
777f42
+	unsigned long i;
777f42
+	cap_t caps;
777f42
+
777f42
+	if (getuid() != 0) {
777f42
+		xlog(L_ERROR, "Not running as root. Daemon won't be able to "
777f42
+			      "open the pipe after dropping capabilities!");
777f42
+		return -EINVAL;
777f42
+	}
777f42
+
777f42
+	/* prune the bounding set to nothing */
777f42
+	for (i = 0; prctl(PR_CAPBSET_READ, i, 0, 0, 0) >= 0 ; ++i) {
777f42
+		ret = prctl(PR_CAPBSET_DROP, i, 0, 0, 0);
777f42
+		if (ret) {
777f42
+			xlog(L_ERROR, "Unable to prune capability %lu from "
777f42
+				      "bounding set: %m", i);
777f42
+			return -errno;
777f42
+		}
777f42
+	}
777f42
+
777f42
+	/* get a blank capset */
777f42
+	caps = cap_init();
777f42
+	if (caps == NULL) {
777f42
+		xlog(L_ERROR, "Unable to get blank capability set: %m");
777f42
+		return -errno;
777f42
+	}
777f42
+
777f42
+	/* reset the process capabilities */
777f42
+	if (cap_set_proc(caps) != 0) {
777f42
+		xlog(L_ERROR, "Unable to set process capabilities: %m");
777f42
+		ret = -errno;
777f42
+	}
777f42
+	cap_free(caps);
777f42
+#endif
777f42
+	return ret;
777f42
+}
777f42
+
777f42
+#define INOTIFY_EVENT_MAX (sizeof(struct inotify_event) + NAME_MAX)
777f42
+
777f42
+static int
777f42
+cld_pipe_open(struct cld_client *clnt)
777f42
+{
777f42
+	int fd;
777f42
+
777f42
+	xlog(D_GENERAL, "%s: opening upcall pipe %s", __func__, pipepath);
777f42
+	fd = open(pipepath, O_RDWR, 0);
777f42
+	if (fd < 0) {
777f42
+		xlog(D_GENERAL, "%s: open of %s failed: %m", __func__, pipepath);
777f42
+		return -errno;
777f42
+	}
777f42
+
777f42
+	if (event_initialized(&clnt->cl_event))
777f42
+		event_del(&clnt->cl_event);
777f42
+	if (clnt->cl_fd >= 0)
777f42
+		close(clnt->cl_fd);
777f42
+
777f42
+	clnt->cl_fd = fd;
777f42
+	event_set(&clnt->cl_event, clnt->cl_fd, EV_READ, cldcb, clnt);
777f42
+	/* event_add is done by the caller */
777f42
+	return 0;
777f42
+}
777f42
+
777f42
+static void
777f42
+cld_inotify_cb(int UNUSED(fd), short which, void *data)
777f42
+{
777f42
+	int ret;
777f42
+	size_t elen;
777f42
+	ssize_t rret;
777f42
+	char evbuf[INOTIFY_EVENT_MAX];
777f42
+	char *dirc = NULL, *pname;
777f42
+	struct inotify_event *event = (struct inotify_event *)evbuf;
777f42
+	struct cld_client *clnt = data;
777f42
+
777f42
+	if (which != EV_READ)
777f42
+		return;
777f42
+
777f42
+	xlog(D_GENERAL, "%s: called for EV_READ", __func__);
777f42
+
777f42
+	dirc = strndup(pipepath, PATH_MAX);
777f42
+	if (!dirc) {
777f42
+		xlog(L_ERROR, "%s: unable to allocate memory", __func__);
777f42
+		goto out;
777f42
+	}
777f42
+
777f42
+	rret = read(inotify_fd, evbuf, INOTIFY_EVENT_MAX);
777f42
+	if (rret < 0) {
777f42
+		xlog(L_ERROR, "%s: read from inotify fd failed: %m", __func__);
777f42
+		goto out;
777f42
+	}
777f42
+
777f42
+	/* check to see if we have a filename in the evbuf */
777f42
+	if (!event->len) {
777f42
+		xlog(D_GENERAL, "%s: no filename in inotify event", __func__);
777f42
+		goto out;
777f42
+	}
777f42
+
777f42
+	pname = basename(dirc);
777f42
+	elen = strnlen(event->name, event->len);
777f42
+
777f42
+	/* does the filename match our pipe? */
777f42
+	if (strlen(pname) != elen || memcmp(pname, event->name, elen)) {
777f42
+		xlog(D_GENERAL, "%s: wrong filename (%s)", __func__,
777f42
+				event->name);
777f42
+		goto out;
777f42
+	}
777f42
+
777f42
+	ret = cld_pipe_open(clnt);
777f42
+	switch (ret) {
777f42
+	case 0:
777f42
+		/* readd the event for the cl_event pipe */
777f42
+		event_add(&clnt->cl_event, NULL);
777f42
+		break;
777f42
+	case -ENOENT:
777f42
+		/* pipe must have disappeared, wait for it to come back */
777f42
+		goto out;
777f42
+	default:
777f42
+		/* anything else is fatal */
777f42
+		xlog(L_FATAL, "%s: unable to open new pipe (%d). Aborting.",
777f42
+			__func__, ret);
777f42
+		exit(ret);
777f42
+	}
777f42
+
777f42
+out:
777f42
+	event_add(&pipedir_event, NULL);
777f42
+	free(dirc);
777f42
+}
777f42
+
777f42
+static int
777f42
+cld_inotify_setup(void)
777f42
+{
777f42
+	int ret;
777f42
+	char *dirc, *dname;
777f42
+
777f42
+	dirc = strndup(pipepath, PATH_MAX);
777f42
+	if (!dirc) {
777f42
+		xlog_err("%s: unable to allocate memory", __func__);
777f42
+		ret = -ENOMEM;
777f42
+		goto out_free;
777f42
+	}
777f42
+
777f42
+	dname = dirname(dirc);
777f42
+
777f42
+	inotify_fd = inotify_init();
777f42
+	if (inotify_fd < 0) {
777f42
+		xlog_err("%s: inotify_init failed: %m", __func__);
777f42
+		ret = -errno;
777f42
+		goto out_free;
777f42
+	}
777f42
+
777f42
+	ret = inotify_add_watch(inotify_fd, dname, IN_CREATE);
777f42
+	if (ret < 0) {
777f42
+		xlog_err("%s: inotify_add_watch failed: %m", __func__);
777f42
+		ret = -errno;
777f42
+		goto out_err;
777f42
+	}
777f42
+
777f42
+out_free:
777f42
+	free(dirc);
777f42
+	return 0;
777f42
+out_err:
777f42
+	close(inotify_fd);
777f42
+	goto out_free;
777f42
+}
777f42
+
777f42
+/*
777f42
+ * Set an inotify watch on the directory that should contain the pipe, and then
777f42
+ * try to open it. If it fails with anything but -ENOENT, return the error
777f42
+ * immediately.
777f42
+ *
777f42
+ * If it succeeds, then set up the pipe event handler. At that point, set up
777f42
+ * the inotify event handler and go ahead and return success.
777f42
+ */
777f42
+static int
777f42
+cld_pipe_init(struct cld_client *clnt)
777f42
+{
777f42
+	int ret;
777f42
+
777f42
+	xlog(D_GENERAL, "%s: init pipe handlers", __func__);
777f42
+
777f42
+	ret = cld_inotify_setup();
777f42
+	if (ret != 0)
777f42
+		goto out;
777f42
+
777f42
+	clnt->cl_fd = -1;
777f42
+	ret = cld_pipe_open(clnt);
777f42
+	switch (ret) {
777f42
+	case 0:
777f42
+		/* add the event and we're good to go */
777f42
+		event_add(&clnt->cl_event, NULL);
777f42
+		break;
777f42
+	case -ENOENT:
777f42
+		/* ignore this error -- cld_inotify_cb will handle it */
777f42
+		ret = 0;
777f42
+		break;
777f42
+	default:
777f42
+		/* anything else is fatal */
777f42
+		close(inotify_fd);
777f42
+		goto out;
777f42
+	}
777f42
+
777f42
+	/* set event for inotify read */
777f42
+	event_set(&pipedir_event, inotify_fd, EV_READ, cld_inotify_cb, clnt);
777f42
+	event_add(&pipedir_event, NULL);
777f42
+out:
777f42
+	return ret;
777f42
+}
777f42
+
777f42
+/*
777f42
+ * Older kernels will not tell nfsdcld when a grace period has started.
777f42
+ * Therefore we have to peek at the /proc/fs/nfsd/v4_end_grace file to
777f42
+ * see if nfsd is in grace.  We have to do this for create and remove
777f42
+ * upcalls to ensure that the correct table is being updated - otherwise
777f42
+ * we could lose client records when the grace period is lifted.
777f42
+ */
777f42
+static int
777f42
+cld_check_grace_period(void)
777f42
+{
777f42
+	int fd, ret = 0;
777f42
+	char c;
777f42
+
777f42
+	if (!old_kernel)
777f42
+		return 0;
777f42
+	if (recovery_epoch != 0)
777f42
+		return 0;
777f42
+	fd = open(NFSD_END_GRACE_FILE, O_RDONLY);
777f42
+	if (fd < 0) {
777f42
+		xlog(L_WARNING, "Unable to open %s: %m",
777f42
+			NFSD_END_GRACE_FILE);
777f42
+		return 1;
777f42
+	}
777f42
+	if (read(fd, &c, 1) < 0) {
777f42
+		xlog(L_WARNING, "Unable to read from %s: %m",
777f42
+			NFSD_END_GRACE_FILE);
777f42
+		return 1;
777f42
+	}
777f42
+	close(fd);
777f42
+	if (c == 'N') {
777f42
+		xlog(L_WARNING, "nfsd is in grace but didn't send a gracestart upcall, "
777f42
+			"please update the kernel");
777f42
+		ret = sqlite_grace_start();
777f42
+	}
777f42
+	return ret;
777f42
+}
777f42
+
777f42
+#if UPCALL_VERSION >= 2
777f42
+static ssize_t cld_message_size(void *msg)
777f42
+{
777f42
+	struct cld_msg_hdr *hdr = (struct cld_msg_hdr *)msg;
777f42
+
777f42
+	switch (hdr->cm_vers) {
777f42
+	case 1:
777f42
+		return sizeof(struct cld_msg);
777f42
+	case 2:
777f42
+		return sizeof(struct cld_msg_v2);
777f42
+	default:
777f42
+		xlog(L_FATAL, "%s invalid upcall version %d", __func__,
777f42
+		     hdr->cm_vers);
777f42
+		exit(-EINVAL);
777f42
+	}
777f42
+}
777f42
+#else
777f42
+static ssize_t cld_message_size(void *UNUSED(msg))
777f42
+{
777f42
+	return sizeof(struct cld_msg);
777f42
+}
777f42
+#endif
777f42
+
777f42
+static void
777f42
+cld_not_implemented(struct cld_client *clnt)
777f42
+{
777f42
+	int ret;
777f42
+	ssize_t bsize, wsize;
777f42
+#if UPCALL_VERSION >= 2
777f42
+	struct cld_msg_v2 *cmsg = &clnt->cl_u.cl_msg_v2;
777f42
+#else
777f42
+	struct cld_msg *cmsg = &clnt->cl_u.cl_msg;
777f42
+#endif
777f42
+
777f42
+	xlog(D_GENERAL, "%s: downcalling with not implemented error", __func__);
777f42
+
777f42
+	/* set up reply */
777f42
+	cmsg->cm_status = -EOPNOTSUPP;
777f42
+
777f42
+	bsize = cld_message_size(cmsg);
777f42
+	wsize = atomicio((void *)write, clnt->cl_fd, cmsg, bsize);
777f42
+	if (wsize != bsize)
777f42
+		xlog(L_ERROR, "%s: problem writing to cld pipe (%zd): %m",
777f42
+			 __func__, wsize);
777f42
+
777f42
+	/* reopen pipe, just to be sure */
777f42
+	ret = cld_pipe_open(clnt);
777f42
+	if (ret) {
777f42
+		xlog(L_FATAL, "%s: unable to reopen pipe: %d", __func__, ret);
777f42
+		exit(ret);
777f42
+	}
777f42
+}
777f42
+
777f42
+static void
777f42
+cld_get_version(struct cld_client *clnt)
777f42
+{
777f42
+	int ret;
777f42
+	ssize_t bsize, wsize;
777f42
+#if UPCALL_VERSION >= 2
777f42
+	struct cld_msg_v2 *cmsg = &clnt->cl_u.cl_msg_v2;
777f42
+#else
777f42
+	struct cld_msg *cmsg = &clnt->cl_u.cl_msg;
777f42
+#endif
777f42
+
777f42
+	xlog(D_GENERAL, "%s: version = %u.", __func__, UPCALL_VERSION);
777f42
+
777f42
+	cmsg->cm_u.cm_version = UPCALL_VERSION;
777f42
+	cmsg->cm_status = 0;
777f42
+
777f42
+	bsize = cld_message_size(cmsg);
777f42
+	xlog(D_GENERAL, "Doing downcall with status %d", cmsg->cm_status);
777f42
+	wsize = atomicio((void *)write, clnt->cl_fd, cmsg, bsize);
777f42
+	if (wsize != bsize) {
777f42
+		xlog(L_ERROR, "%s: problem writing to cld pipe (%zd): %m",
777f42
+			 __func__, wsize);
777f42
+		ret = cld_pipe_open(clnt);
777f42
+		if (ret) {
777f42
+			xlog(L_FATAL, "%s: unable to reopen pipe: %d",
777f42
+					__func__, ret);
777f42
+			exit(ret);
777f42
+		}
777f42
+	}
777f42
+}
777f42
+
777f42
+static void
777f42
+cld_create(struct cld_client *clnt)
777f42
+{
777f42
+	int ret;
777f42
+	ssize_t bsize, wsize;
777f42
+#if UPCALL_VERSION >= 2
777f42
+	struct cld_msg_v2 *cmsg = &clnt->cl_u.cl_msg_v2;
777f42
+#else
777f42
+	struct cld_msg *cmsg = &clnt->cl_u.cl_msg;
777f42
+#endif
777f42
+
777f42
+	ret = cld_check_grace_period();
777f42
+	if (ret)
777f42
+		goto reply;
777f42
+
777f42
+	xlog(D_GENERAL, "%s: create client record.", __func__);
777f42
+
777f42
+#if UPCALL_VERSION >= 2
777f42
+	if (cmsg->cm_vers >= 2)
777f42
+		ret = sqlite_insert_client_and_princhash(
777f42
+					cmsg->cm_u.cm_clntinfo.cc_name.cn_id,
777f42
+					cmsg->cm_u.cm_clntinfo.cc_name.cn_len,
777f42
+					cmsg->cm_u.cm_clntinfo.cc_princhash.cp_data,
777f42
+					cmsg->cm_u.cm_clntinfo.cc_princhash.cp_len);
777f42
+	else
777f42
+		ret = sqlite_insert_client(cmsg->cm_u.cm_name.cn_id,
777f42
+					   cmsg->cm_u.cm_name.cn_len);
777f42
+#else
777f42
+	ret = sqlite_insert_client(cmsg->cm_u.cm_name.cn_id,
777f42
+				   cmsg->cm_u.cm_name.cn_len);
777f42
+#endif
777f42
+
777f42
+reply:
777f42
+	cmsg->cm_status = ret ? -EREMOTEIO : ret;
777f42
+
777f42
+	bsize = cld_message_size(cmsg);
777f42
+	xlog(D_GENERAL, "Doing downcall with status %d", cmsg->cm_status);
777f42
+	wsize = atomicio((void *)write, clnt->cl_fd, cmsg, bsize);
777f42
+	if (wsize != bsize) {
777f42
+		xlog(L_ERROR, "%s: problem writing to cld pipe (%zd): %m",
777f42
+			 __func__, wsize);
777f42
+		ret = cld_pipe_open(clnt);
777f42
+		if (ret) {
777f42
+			xlog(L_FATAL, "%s: unable to reopen pipe: %d",
777f42
+					__func__, ret);
777f42
+			exit(ret);
777f42
+		}
777f42
+	}
777f42
+}
777f42
+
777f42
+static void
777f42
+cld_remove(struct cld_client *clnt)
777f42
+{
777f42
+	int ret;
777f42
+	ssize_t bsize, wsize;
777f42
+#if UPCALL_VERSION >= 2
777f42
+	struct cld_msg_v2 *cmsg = &clnt->cl_u.cl_msg_v2;
777f42
+#else
777f42
+	struct cld_msg *cmsg = &clnt->cl_u.cl_msg;
777f42
+#endif
777f42
+
777f42
+	ret = cld_check_grace_period();
777f42
+	if (ret)
777f42
+		goto reply;
777f42
+
777f42
+	xlog(D_GENERAL, "%s: remove client record.", __func__);
777f42
+
777f42
+	ret = sqlite_remove_client(cmsg->cm_u.cm_name.cn_id,
777f42
+				   cmsg->cm_u.cm_name.cn_len);
777f42
+
777f42
+reply:
777f42
+	cmsg->cm_status = ret ? -EREMOTEIO : ret;
777f42
+
777f42
+	bsize = cld_message_size(cmsg);
777f42
+	xlog(D_GENERAL, "%s: downcall with status %d", __func__,
777f42
+			cmsg->cm_status);
777f42
+	wsize = atomicio((void *)write, clnt->cl_fd, cmsg, bsize);
777f42
+	if (wsize != bsize) {
777f42
+		xlog(L_ERROR, "%s: problem writing to cld pipe (%zd): %m",
777f42
+			 __func__, wsize);
777f42
+		ret = cld_pipe_open(clnt);
777f42
+		if (ret) {
777f42
+			xlog(L_FATAL, "%s: unable to reopen pipe: %d",
777f42
+					__func__, ret);
777f42
+			exit(ret);
777f42
+		}
777f42
+	}
777f42
+}
777f42
+
777f42
+static void
777f42
+cld_check(struct cld_client *clnt)
777f42
+{
777f42
+	int ret;
777f42
+	ssize_t bsize, wsize;
777f42
+#if UPCALL_VERSION >= 2
777f42
+	struct cld_msg_v2 *cmsg = &clnt->cl_u.cl_msg_v2;
777f42
+#else
777f42
+	struct cld_msg *cmsg = &clnt->cl_u.cl_msg;
777f42
+#endif
777f42
+
777f42
+	/*
777f42
+	 * If we get a check upcall at all, it means we're talking to an old
777f42
+	 * kernel.  Furthermore, if we're not in grace it means this is the
777f42
+	 * first client to do a reclaim.  Log a message and use
777f42
+	 * sqlite_grace_start() to advance the epoch numbers.
777f42
+	 */
777f42
+	if (recovery_epoch == 0) {
777f42
+		xlog(D_GENERAL, "%s: received a check upcall, please update the kernel",
777f42
+			__func__);
777f42
+		ret = sqlite_grace_start();
777f42
+		if (ret)
777f42
+			goto reply;
777f42
+	}
777f42
+
777f42
+	xlog(D_GENERAL, "%s: check client record", __func__);
777f42
+
777f42
+	ret = sqlite_check_client(cmsg->cm_u.cm_name.cn_id,
777f42
+				  cmsg->cm_u.cm_name.cn_len);
777f42
+
777f42
+reply:
777f42
+	/* set up reply */
777f42
+	cmsg->cm_status = ret ? -EACCES : ret;
777f42
+
777f42
+	bsize = cld_message_size(cmsg);
777f42
+	xlog(D_GENERAL, "%s: downcall with status %d", __func__,
777f42
+			cmsg->cm_status);
777f42
+	wsize = atomicio((void *)write, clnt->cl_fd, cmsg, bsize);
777f42
+	if (wsize != bsize) {
777f42
+		xlog(L_ERROR, "%s: problem writing to cld pipe (%zd): %m",
777f42
+			 __func__, wsize);
777f42
+		ret = cld_pipe_open(clnt);
777f42
+		if (ret) {
777f42
+			xlog(L_FATAL, "%s: unable to reopen pipe: %d",
777f42
+					__func__, ret);
777f42
+			exit(ret);
777f42
+		}
777f42
+	}
777f42
+}
777f42
+
777f42
+static void
777f42
+cld_gracedone(struct cld_client *clnt)
777f42
+{
777f42
+	int ret;
777f42
+	ssize_t bsize, wsize;
777f42
+#if UPCALL_VERSION >= 2
777f42
+	struct cld_msg_v2 *cmsg = &clnt->cl_u.cl_msg_v2;
777f42
+#else
777f42
+	struct cld_msg *cmsg = &clnt->cl_u.cl_msg;
777f42
+#endif
777f42
+
777f42
+	/*
777f42
+	 * If we got a "gracedone" upcall while we're not in grace, then
777f42
+	 * 1) we must be talking to an old kernel
777f42
+	 * 2) no clients attempted to reclaim
777f42
+	 * In that case, log a message and use sqlite_grace_start() to
777f42
+	 * advance the epoch numbers, and then proceed as normal.
777f42
+	 */
777f42
+	if (recovery_epoch == 0) {
777f42
+		xlog(D_GENERAL, "%s: received gracedone upcall "
777f42
+			"while not in grace, please update the kernel",
777f42
+			__func__);
777f42
+		ret = sqlite_grace_start();
777f42
+		if (ret)
777f42
+			goto reply;
777f42
+	}
777f42
+
777f42
+	xlog(D_GENERAL, "%s: grace done.", __func__);
777f42
+
777f42
+	ret = sqlite_grace_done();
777f42
+
777f42
+	if (first_time) {
777f42
+		if (num_cltrack_records > 0)
777f42
+			sqlite_delete_cltrack_records();
777f42
+		if (num_legacy_records > 0)
777f42
+			legacy_clear_recdir();
777f42
+		sqlite_first_time_done();
777f42
+		first_time = 0;
777f42
+	}
777f42
+
777f42
+reply:
777f42
+	/* set up reply: downcall with 0 status */
777f42
+	cmsg->cm_status = ret ? -EREMOTEIO : ret;
777f42
+
777f42
+	bsize = cld_message_size(cmsg);
777f42
+	xlog(D_GENERAL, "Doing downcall with status %d", cmsg->cm_status);
777f42
+	wsize = atomicio((void *)write, clnt->cl_fd, cmsg, bsize);
777f42
+	if (wsize != bsize) {
777f42
+		xlog(L_ERROR, "%s: problem writing to cld pipe (%zd): %m",
777f42
+			 __func__, wsize);
777f42
+		ret = cld_pipe_open(clnt);
777f42
+		if (ret) {
777f42
+			xlog(L_FATAL, "%s: unable to reopen pipe: %d",
777f42
+					__func__, ret);
777f42
+			exit(ret);
777f42
+		}
777f42
+	}
777f42
+}
777f42
+
777f42
+static int
777f42
+gracestart_callback(struct cld_client *clnt) {
777f42
+	ssize_t bsize, wsize;
777f42
+#if UPCALL_VERSION >= 2
777f42
+	struct cld_msg_v2 *cmsg = &clnt->cl_u.cl_msg_v2;
777f42
+#else
777f42
+	struct cld_msg *cmsg = &clnt->cl_u.cl_msg;
777f42
+#endif
777f42
+
777f42
+	cmsg->cm_status = -EINPROGRESS;
777f42
+
777f42
+	bsize = cld_message_size(cmsg);
777f42
+	xlog(D_GENERAL, "Sending client %.*s",
777f42
+			cmsg->cm_u.cm_name.cn_len, cmsg->cm_u.cm_name.cn_id);
777f42
+	wsize = atomicio((void *)write, clnt->cl_fd, cmsg, bsize);
777f42
+	if (wsize != bsize)
777f42
+		return -EIO;
777f42
+	return 0;
777f42
+}
777f42
+
777f42
+static void
777f42
+cld_gracestart(struct cld_client *clnt)
777f42
+{
777f42
+	int ret;
777f42
+	ssize_t bsize, wsize;
777f42
+#if UPCALL_VERSION >= 2
777f42
+	struct cld_msg_v2 *cmsg = &clnt->cl_u.cl_msg_v2;
777f42
+#else
777f42
+	struct cld_msg *cmsg = &clnt->cl_u.cl_msg;
777f42
+#endif
777f42
+
777f42
+	xlog(D_GENERAL, "%s: updating grace epochs", __func__);
777f42
+
777f42
+	ret = sqlite_grace_start();
777f42
+	if (ret)
777f42
+		goto reply;
777f42
+
777f42
+	xlog(D_GENERAL, "%s: sending client records to the kernel", __func__);
777f42
+
777f42
+	ret = sqlite_iterate_recovery(&gracestart_callback, clnt);
777f42
+
777f42
+reply:
777f42
+	/* set up reply: downcall with 0 status */
777f42
+	cmsg->cm_status = ret ? -EREMOTEIO : ret;
777f42
+
777f42
+	bsize = cld_message_size(cmsg);
777f42
+	xlog(D_GENERAL, "Doing downcall with status %d", cmsg->cm_status);
777f42
+	wsize = atomicio((void *)write, clnt->cl_fd, cmsg, bsize);
777f42
+	if (wsize != bsize) {
777f42
+		xlog(L_ERROR, "%s: problem writing to cld pipe (%zd): %m",
777f42
+			 __func__, wsize);
777f42
+		ret = cld_pipe_open(clnt);
777f42
+		if (ret) {
777f42
+			xlog(L_FATAL, "%s: unable to reopen pipe: %d",
777f42
+					__func__, ret);
777f42
+			exit(ret);
777f42
+		}
777f42
+	}
777f42
+}
777f42
+
777f42
+static void
777f42
+cldcb(int UNUSED(fd), short which, void *data)
777f42
+{
777f42
+	ssize_t len;
777f42
+	struct cld_client *clnt = data;
777f42
+#if UPCALL_VERSION >= 2
777f42
+	struct cld_msg_v2 *cmsg = &clnt->cl_u.cl_msg_v2;
777f42
+#else
777f42
+	struct cld_msg *cmsg = &clnt->cl_u.cl_msg;
777f42
+#endif
777f42
+
777f42
+	if (which != EV_READ)
777f42
+		goto out;
777f42
+
777f42
+	len = atomicio(read, clnt->cl_fd, cmsg, sizeof(*cmsg));
777f42
+	if (len <= 0) {
777f42
+		xlog(L_ERROR, "%s: pipe read failed: %m", __func__);
777f42
+		cld_pipe_open(clnt);
777f42
+		goto out;
777f42
+	}
777f42
+
777f42
+	if (cmsg->cm_vers > UPCALL_VERSION) {
777f42
+		xlog(L_ERROR, "%s: unsupported upcall version: %hu",
777f42
+				__func__, cmsg->cm_vers);
777f42
+		cld_pipe_open(clnt);
777f42
+		goto out;
777f42
+	}
777f42
+
777f42
+	switch(cmsg->cm_cmd) {
777f42
+	case Cld_Create:
777f42
+		cld_create(clnt);
777f42
+		break;
777f42
+	case Cld_Remove:
777f42
+		cld_remove(clnt);
777f42
+		break;
777f42
+	case Cld_Check:
777f42
+		cld_check(clnt);
777f42
+		break;
777f42
+	case Cld_GraceDone:
777f42
+		cld_gracedone(clnt);
777f42
+		break;
777f42
+	case Cld_GraceStart:
777f42
+		cld_gracestart(clnt);
777f42
+		break;
777f42
+	case Cld_GetVersion:
777f42
+		cld_get_version(clnt);
777f42
+		break;
777f42
+	default:
777f42
+		xlog(L_WARNING, "%s: command %u is not yet implemented",
777f42
+				__func__, cmsg->cm_cmd);
777f42
+		cld_not_implemented(clnt);
777f42
+	}
777f42
+out:
777f42
+	event_add(&clnt->cl_event, NULL);
777f42
+}
777f42
+
777f42
+int
777f42
+main(int argc, char **argv)
777f42
+{
777f42
+	int arg;
777f42
+	int rc = 0;
777f42
+	bool foreground = false;
777f42
+	char *progname;
777f42
+	char *storagedir = CLD_DEFAULT_STORAGEDIR;
777f42
+	struct cld_client clnt;
777f42
+	char *s;
777f42
+	first_time = 0;
777f42
+	num_cltrack_records = 0;
777f42
+	num_legacy_records = 0;
777f42
+
777f42
+	memset(&clnt, 0, sizeof(clnt));
777f42
+
777f42
+	progname = strdup(basename(argv[0]));
777f42
+	if (!progname) {
777f42
+		fprintf(stderr, "%s: unable to allocate memory.\n", argv[0]);
777f42
+		return 1;
777f42
+	}
777f42
+
777f42
+	event_init();
777f42
+	xlog_syslog(0);
777f42
+	xlog_stderr(1);
777f42
+
777f42
+	conf_init_file(NFS_CONFFILE);
777f42
+	s = conf_get_str("general", "pipefs-directory");
777f42
+	if (s)
777f42
+		strlcpy(pipefs_dir, s, sizeof(pipefs_dir));
777f42
+	s = conf_get_str("nfsdcld", "storagedir");
777f42
+	if (s)
777f42
+		storagedir = s;
777f42
+	rc = conf_get_num("nfsdcld", "debug", 0);
777f42
+	if (rc > 0)
777f42
+		xlog_config(D_ALL, 1);
777f42
+
777f42
+	/* process command-line options */
777f42
+	while ((arg = getopt_long(argc, argv, "hdFp:s:", longopts,
777f42
+				  NULL)) != EOF) {
777f42
+		switch (arg) {
777f42
+		case 'd':
777f42
+			xlog_config(D_ALL, 1);
777f42
+			break;
777f42
+		case 'F':
777f42
+			foreground = true;
777f42
+			break;
777f42
+		case 'p':
777f42
+			strlcpy(pipefs_dir, optarg, sizeof(pipefs_dir));
777f42
+			break;
777f42
+		case 's':
777f42
+			storagedir = optarg;
777f42
+			break;
777f42
+		default:
777f42
+			usage(progname);
777f42
+			return 0;
777f42
+		}
777f42
+	}
777f42
+
777f42
+	strlcpy(pipepath, pipefs_dir, sizeof(pipepath));
777f42
+	strlcat(pipepath, DEFAULT_CLD_PATH, sizeof(pipepath));
777f42
+
777f42
+	xlog_open(progname);
777f42
+	if (!foreground) {
777f42
+		xlog_syslog(1);
777f42
+		xlog_stderr(0);
777f42
+		rc = daemon(0, 0);
777f42
+		if (rc) {
777f42
+			xlog(L_ERROR, "Unable to daemonize: %m");
777f42
+			goto out;
777f42
+		}
777f42
+	}
777f42
+
777f42
+	/* drop all capabilities */
777f42
+	rc = cld_set_caps();
777f42
+	if (rc)
777f42
+		goto out;
777f42
+
777f42
+	/*
777f42
+	 * now see if the storagedir is writable by root w/o CAP_DAC_OVERRIDE.
777f42
+	 * If it isn't then give the user a warning but proceed as if
777f42
+	 * everything is OK. If the DB has already been created, then
777f42
+	 * everything might still work. If it doesn't exist at all, then
777f42
+	 * assume that the maindb init will be able to create it. Fail on
777f42
+	 * anything else.
777f42
+	 */
777f42
+	if (access(storagedir, W_OK) == -1) {
777f42
+		switch (errno) {
777f42
+		case EACCES:
777f42
+			xlog(L_WARNING, "Storage directory %s is not writable. "
777f42
+					"Should be owned by root and writable "
777f42
+					"by owner!", storagedir);
777f42
+			break;
777f42
+		case ENOENT:
777f42
+			/* ignore and assume that we can create dir as root */
777f42
+			break;
777f42
+		default:
777f42
+			xlog(L_ERROR, "Unexpected error when checking access "
777f42
+				      "on %s: %m", storagedir);
777f42
+			rc = -errno;
777f42
+			goto out;
777f42
+		}
777f42
+	}
777f42
+
777f42
+	if (linux_version_code() < MAKE_VERSION(4, 20, 0))
777f42
+		old_kernel = true;
777f42
+
777f42
+	/* set up storage db */
777f42
+	rc = sqlite_prepare_dbh(storagedir);
777f42
+	if (rc) {
777f42
+		xlog(L_ERROR, "Failed to open main database: %d", rc);
777f42
+		goto out;
777f42
+	}
777f42
+
777f42
+	/* set up event handler */
777f42
+	rc = cld_pipe_init(&clnt);
777f42
+	if (rc)
777f42
+		goto out;
777f42
+
777f42
+	xlog(D_GENERAL, "%s: Starting event dispatch handler.", __func__);
777f42
+	rc = event_dispatch();
777f42
+	if (rc < 0)
777f42
+		xlog(L_ERROR, "%s: event_dispatch failed: %m", __func__);
777f42
+
777f42
+	close(clnt.cl_fd);
777f42
+	close(inotify_fd);
777f42
+out:
777f42
+	free(progname);
777f42
+	return rc;
777f42
+}
777f42
diff --git a/utils/nfsdcld/nfsdcld.man b/utils/nfsdcld/nfsdcld.man
777f42
new file mode 100644
777f42
index 00000000..4c2b1e80
777f42
--- /dev/null
777f42
+++ b/utils/nfsdcld/nfsdcld.man
777f42
@@ -0,0 +1,221 @@
777f42
+.\" Automatically generated by Pod::Man 2.22 (Pod::Simple 3.13)
777f42
+.\"
777f42
+.\" Standard preamble:
777f42
+.\" ========================================================================
777f42
+.de Sp \" Vertical space (when we can't use .PP)
777f42
+.if t .sp .5v
777f42
+.if n .sp
777f42
+..
777f42
+.de Vb \" Begin verbatim text
777f42
+.ft CW
777f42
+.nf
777f42
+.ne \\$1
777f42
+..
777f42
+.de Ve \" End verbatim text
777f42
+.ft R
777f42
+.fi
777f42
+..
777f42
+.\" Set up some character translations and predefined strings.  \*(-- will
777f42
+.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
777f42
+.\" double quote, and \*(R" will give a right double quote.  \*(C+ will
777f42
+.\" give a nicer C++.  Capital omega is used to do unbreakable dashes and
777f42
+.\" therefore won't be available.  \*(C` and \*(C' expand to `' in nroff,
777f42
+.\" nothing in troff, for use with C<>.
777f42
+.tr \(*W-
777f42
+.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
777f42
+.ie n \{\
777f42
+.    ds -- \(*W-
777f42
+.    ds PI pi
777f42
+.    if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
777f42
+.    if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\"  diablo 12 pitch
777f42
+.    ds L" ""
777f42
+.    ds R" ""
777f42
+.    ds C` ""
777f42
+.    ds C' ""
777f42
+'br\}
777f42
+.el\{\
777f42
+.    ds -- \|\(em\|
777f42
+.    ds PI \(*p
777f42
+.    ds L" ``
777f42
+.    ds R" ''
777f42
+'br\}
777f42
+.\"
777f42
+.\" Escape single quotes in literal strings from groff's Unicode transform.
777f42
+.ie \n(.g .ds Aq \(aq
777f42
+.el       .ds Aq '
777f42
+.\"
777f42
+.\" If the F register is turned on, we'll generate index entries on stderr for
777f42
+.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
777f42
+.\" entries marked with X<> in POD.  Of course, you'll have to process the
777f42
+.\" output yourself in some meaningful fashion.
777f42
+.ie \nF \{\
777f42
+.    de IX
777f42
+.    tm Index:\\$1\t\\n%\t"\\$2"
777f42
+..
777f42
+.    nr % 0
777f42
+.    rr F
777f42
+.\}
777f42
+.el \{\
777f42
+.    de IX
777f42
+..
777f42
+.\}
777f42
+.\"
777f42
+.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
777f42
+.\" Fear.  Run.  Save yourself.  No user-serviceable parts.
777f42
+.    \" fudge factors for nroff and troff
777f42
+.if n \{\
777f42
+.    ds #H 0
777f42
+.    ds #V .8m
777f42
+.    ds #F .3m
777f42
+.    ds #[ \f1
777f42
+.    ds #] \fP
777f42
+.\}
777f42
+.if t \{\
777f42
+.    ds #H ((1u-(\\\\n(.fu%2u))*.13m)
777f42
+.    ds #V .6m
777f42
+.    ds #F 0
777f42
+.    ds #[ \&
777f42
+.    ds #] \&
777f42
+.\}
777f42
+.    \" simple accents for nroff and troff
777f42
+.if n \{\
777f42
+.    ds ' \&
777f42
+.    ds ` \&
777f42
+.    ds ^ \&
777f42
+.    ds , \&
777f42
+.    ds ~ ~
777f42
+.    ds /
777f42
+.\}
777f42
+.if t \{\
777f42
+.    ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
777f42
+.    ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
777f42
+.    ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
777f42
+.    ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
777f42
+.    ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
777f42
+.    ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
777f42
+.\}
777f42
+.    \" troff and (daisy-wheel) nroff accents
777f42
+.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
777f42
+.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
777f42
+.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
777f42
+.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
777f42
+.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
777f42
+.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
777f42
+.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
777f42
+.ds ae a\h'-(\w'a'u*4/10)'e
777f42
+.ds Ae A\h'-(\w'A'u*4/10)'E
777f42
+.    \" corrections for vroff
777f42
+.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
777f42
+.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
777f42
+.    \" for low resolution devices (crt and lpr)
777f42
+.if \n(.H>23 .if \n(.V>19 \
777f42
+\{\
777f42
+.    ds : e
777f42
+.    ds 8 ss
777f42
+.    ds o a
777f42
+.    ds d- d\h'-1'\(ga
777f42
+.    ds D- D\h'-1'\(hy
777f42
+.    ds th \o'bp'
777f42
+.    ds Th \o'LP'
777f42
+.    ds ae ae
777f42
+.    ds Ae AE
777f42
+.\}
777f42
+.rm #[ #] #H #V #F C
777f42
+.\" ========================================================================
777f42
+.\"
777f42
+.IX Title "NFSDCLD 8"
777f42
+.TH NFSDCLD 8 "2011-12-21" "" ""
777f42
+.\" For nroff, turn off justification.  Always turn off hyphenation; it makes
777f42
+.\" way too many mistakes in technical documents.
777f42
+.if n .ad l
777f42
+.nh
777f42
+.SH "NAME"
777f42
+nfsdcld \- NFSv4 Client Tracking Daemon
777f42
+.SH "SYNOPSIS"
777f42
+.IX Header "SYNOPSIS"
777f42
+nfsdcld [\-d] [\-F] [\-p path] [\-s stable storage dir]
777f42
+.SH "DESCRIPTION"
777f42
+.IX Header "DESCRIPTION"
777f42
+nfsdcld is the NFSv4 client tracking daemon. It is not necessary to run
777f42
+this daemon on machines that are not acting as NFSv4 servers.
777f42
+.PP
777f42
+When a network partition is combined with a server reboot, there are
777f42
+edge conditions that can cause the server to grant lock reclaims when
777f42
+other clients have taken conflicting locks in the interim. A more detailed
777f42
+explanation of this issue is described in \s-1RFC\s0 3530, section 8.6.3.
777f42
+.PP
777f42
+In order to prevent these problems, the server must track a small amount
777f42
+of per-client information on stable storage. This daemon provides the
777f42
+userspace piece of that functionality.
777f42
+.SH "OPTIONS"
777f42
+.IX Header "OPTIONS"
777f42
+.IP "\fB\-d\fR, \fB\-\-debug\fR" 4
777f42
+.IX Item "-d, --debug"
777f42
+Enable debug level logging.
777f42
+.IP "\fB\-F\fR, \fB\-\-foreground\fR" 4
777f42
+.IX Item "-F, --foreground"
777f42
+Runs the daemon in the foreground and prints all output to stderr
777f42
+.IP "\fB\-p\fR \fIpath\fR, \fB\-\-pipefsdir\fR=\fIpath\fR" 4
777f42
+.IX Item "-p path, --pipefsdir=path"
777f42
+Location of the rpc_pipefs filesystem. The default value is
777f42
+\&\fI/var/lib/nfs/rpc_pipefs\fR.
777f42
+.IP "\fB\-s\fR \fIstorage_dir\fR, \fB\-\-storagedir\fR=\fIstorage_dir\fR" 4
777f42
+.IX Item "-s storagedir, --storagedir=storage_dir"
777f42
+Directory where stable storage information should be kept. The default
777f42
+value is \fI/var/lib/nfs/nfsdcld\fR.
777f42
+.SH "CONFIGURATION FILE"
777f42
+.IX Header "CONFIGURATION FILE"
777f42
+The following values are recognized in the \fB[nfsdcld]\fR section
777f42
+of the \fI/etc/nfs.conf\fR configuration file:
777f42
+.IP "\fBstoragedir\fR" 4
777f42
+.IX Item "storagedir"
777f42
+Equivalent to \fB\-s\fR/\fB\-\-storagedir\fR.
777f42
+.IP "\fBdebug\fR" 4
777f42
+.IX Item "debug"
777f42
+Setting "debug = 1" is equivalent to \fB\-d\fR/\fB\-\-debug\fR.
777f42
+.LP
777f42
+In addition, the following value is recognized from the \fB[general]\fR section:
777f42
+.IP "\fBpipefs\-directory\fR" 4
777f42
+.IX Item "pipefs-directory"
777f42
+Equivalent to \fB\-p\fR/\fB\-\-pipefsdir\fR.
777f42
+.SH "NOTES"
777f42
+.IX Header "NOTES"
777f42
+The Linux kernel NFSv4 server has historically tracked this information
777f42
+on stable storage by manipulating information on the filesystem
777f42
+directly, in the directory to which \fI/proc/fs/nfsd/nfsv4recoverydir\fR
777f42
+points.
777f42
+.PP
777f42
+This changed with the original introduction of \fBnfsdcld\fR upcall in kernel version 3.4,
777f42
+which was later deprecated in favor of the \fBnfsdcltrack\fR(8) usermodehelper
777f42
+program, support for which was added in kernel version 3.8.  However, since the
777f42
+usermodehelper upcall does not work in containers, support for a new version of
777f42
+the \fBnfsdcld\fR upcall was added in kernel version 5.2.
777f42
+.PP
777f42
+This daemon requires a kernel that supports the \fBnfsdcld\fR upcall. On older kernels, if
777f42
+the legacy client name tracking code was in use, then the kernel would not create the
777f42
+pipe that \fBnfsdcld\fR uses to talk to the kernel.  On newer kernels, nfsd attempts to
777f42
+initialize client tracking in the following order:  First, the \fBnfsdcld\fR upcall.  Second,
777f42
+the \fBnfsdcltrack\fR usermodehelper upcall.  Finally, the legacy client tracking.
777f42
+.PP
777f42
+This daemon should be run as root, as the pipe that it uses to communicate
777f42
+with the kernel is only accessable by root. The daemon however does drop all
777f42
+superuser capabilities after starting. Because of this, the \fIstoragedir\fR
777f42
+should be owned by root, and be readable and writable by owner.
777f42
+.PP
777f42
+The daemon now supports different upcall versions to allow the kernel to pass additional
777f42
+data to be stored in the on-disk database.  The kernel will query the supported upcall
777f42
+version from \fBnfsdcld\fR during client tracking initialization.  A restart of \fBnfsd\fR is
777f42
+not necessary after upgrading \fBnfsdcld\fR, however \fBnfsd\fR will not use a later upcall
777f42
+version until restart.  A restart of \fBnfsd is necessary\fR after downgrading \fBnfsdcld\fR,
777f42
+to ensure that \fBnfsd\fR does not use an upcall version that \fBnfsdcld\fR does not support.
777f42
+Additionally, a downgrade of \fBnfsdcld\fR requires the schema of the on-disk database to
777f42
+be downgraded as well.  That can be accomplished using the \fBclddb-tool\fR(8) utility.
777f42
+.SH FILES
777f42
+.TP
777f42
+.B /var/lib/nfs/nfsdcld/main.sqlite
777f42
+.SH SEE ALSO
777f42
+.BR nfsdcltrack "(8), " clddb-tool (8)
777f42
+.SH "AUTHORS"
777f42
+.IX Header "AUTHORS"
777f42
+The nfsdcld daemon was developed by Jeff Layton <jlayton@redhat.com>
777f42
+with modifications from Scott Mayhew <smayhew@redhat.com>.
777f42
diff --git a/utils/nfsdcld/sqlite.c b/utils/nfsdcld/sqlite.c
777f42
new file mode 100644
777f42
index 00000000..6666c867
777f42
--- /dev/null
777f42
+++ b/utils/nfsdcld/sqlite.c
777f42
@@ -0,0 +1,1406 @@
777f42
+/*
777f42
+ * Copyright (C) 2011  Red Hat, Jeff Layton <jlayton@redhat.com>
777f42
+ *
777f42
+ * This program is free software; you can redistribute it and/or
777f42
+ * modify it under the terms of the GNU General Public License
777f42
+ * as published by the Free Software Foundation; either version 2
777f42
+ * of the License, or (at your option) any later version.
777f42
+ *
777f42
+ * This program is distributed in the hope that it will be useful,
777f42
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
777f42
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
777f42
+ * GNU General Public License for more details.
777f42
+ *
777f42
+ * You should have received a copy of the GNU General Public License
777f42
+ * along with this program; if not, write to the Free Software
777f42
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor,
777f42
+ * Boston, MA 02110-1301, USA.
777f42
+ */
777f42
+
777f42
+/*
777f42
+ * Explanation:
777f42
+ *
777f42
+ * This file contains the code to manage the sqlite backend database for the
777f42
+ * nfsdcld client tracking daemon.
777f42
+ *
777f42
+ * The main database is called main.sqlite and contains the following tables:
777f42
+ *
777f42
+ * parameters: simple key/value pairs for storing database info
777f42
+ *
777f42
+ * grace: a "current" column containing an INTEGER representing the current
777f42
+ *        epoch (where should new values be stored) and a "recovery" column
777f42
+ *        containing an INTEGER representing the recovery epoch (from what
777f42
+ *        epoch are we allowed to recover).  A recovery epoch of 0 means
777f42
+ *        normal operation (grace period not in force).  Note: sqlite stores
777f42
+ *        integers as signed values, so these must be cast to a uint64_t when
777f42
+ *        retrieving them from the database and back to an int64_t when storing
777f42
+ *        them in the database.
777f42
+ *
777f42
+ * rec-CCCCCCCCCCCCCCCC (where C is the hex representation of the epoch value):
777f42
+ *        an "id" column containing a BLOB with the long-form clientid
777f42
+ *        as sent by the client, and a "princhash" column containing a BLOB
777f42
+ *        with the sha256 hash of the kerberos principal (if available).
777f42
+ */
777f42
+
777f42
+#ifdef HAVE_CONFIG_H
777f42
+#include "config.h"
777f42
+#endif /* HAVE_CONFIG_H */
777f42
+
777f42
+#include <dirent.h>
777f42
+#include <errno.h>
777f42
+#include <event.h>
777f42
+#include <stdbool.h>
777f42
+#include <string.h>
777f42
+#include <sys/stat.h>
777f42
+#include <sys/types.h>
777f42
+#include <fcntl.h>
777f42
+#include <unistd.h>
777f42
+#include <stdlib.h>
777f42
+#include <stdint.h>
777f42
+#include <limits.h>
777f42
+#include <sqlite3.h>
777f42
+#include <linux/limits.h>
777f42
+#include <inttypes.h>
777f42
+
777f42
+#include "xlog.h"
777f42
+#include "sqlite.h"
777f42
+#include "cld.h"
777f42
+#include "cld-internal.h"
777f42
+#include "conffile.h"
777f42
+#include "legacy.h"
777f42
+#include "nfslib.h"
777f42
+
777f42
+#define CLD_SQLITE_LATEST_SCHEMA_VERSION 4
777f42
+#define CLTRACK_DEFAULT_STORAGEDIR NFS_STATEDIR "/nfsdcltrack"
777f42
+
777f42
+/* in milliseconds */
777f42
+#define CLD_SQLITE_BUSY_TIMEOUT 10000
777f42
+
777f42
+/* private data structures */
777f42
+
777f42
+/* global variables */
777f42
+static char *cltrack_storagedir = CLTRACK_DEFAULT_STORAGEDIR;
777f42
+
777f42
+/* reusable pathname and sql command buffer */
777f42
+static char buf[PATH_MAX];
777f42
+
777f42
+/* global database handle */
777f42
+static sqlite3 *dbh;
777f42
+
777f42
+/* forward declarations */
777f42
+
777f42
+/* make a directory, ignoring EEXIST errors unless it's not a directory */
777f42
+static int
777f42
+mkdir_if_not_exist(const char *dirname)
777f42
+{
777f42
+	int ret;
777f42
+	struct stat statbuf;
777f42
+
777f42
+	ret = mkdir(dirname, S_IRWXU);
777f42
+	if (ret && errno != EEXIST)
777f42
+		return -errno;
777f42
+
777f42
+	ret = stat(dirname, &statbuf);
777f42
+	if (ret)
777f42
+		return -errno;
777f42
+
777f42
+	if (!S_ISDIR(statbuf.st_mode))
777f42
+		ret = -ENOTDIR;
777f42
+
777f42
+	return ret;
777f42
+}
777f42
+
777f42
+static int
777f42
+sqlite_query_schema_version(void)
777f42
+{
777f42
+	int ret;
777f42
+	sqlite3_stmt *stmt = NULL;
777f42
+
777f42
+	/* prepare select query */
777f42
+	ret = sqlite3_prepare_v2(dbh,
777f42
+		"SELECT value FROM parameters WHERE key == \"version\";",
777f42
+		 -1, &stmt, NULL);
777f42
+	if (ret != SQLITE_OK) {
777f42
+		xlog(D_GENERAL, "Unable to prepare select statement: %s",
777f42
+			sqlite3_errmsg(dbh));
777f42
+		ret = 0;
777f42
+		goto out;
777f42
+	}
777f42
+
777f42
+	/* query schema version */
777f42
+	ret = sqlite3_step(stmt);
777f42
+	if (ret != SQLITE_ROW) {
777f42
+		xlog(D_GENERAL, "Select statement execution failed: %s",
777f42
+				sqlite3_errmsg(dbh));
777f42
+		ret = 0;
777f42
+		goto out;
777f42
+	}
777f42
+
777f42
+	ret = sqlite3_column_int(stmt, 0);
777f42
+out:
777f42
+	sqlite3_finalize(stmt);
777f42
+	return ret;
777f42
+}
777f42
+
777f42
+static int
777f42
+sqlite_query_first_time(int *first_time)
777f42
+{
777f42
+	int ret;
777f42
+	sqlite3_stmt *stmt = NULL;
777f42
+
777f42
+	/* prepare select query */
777f42
+	ret = sqlite3_prepare_v2(dbh,
777f42
+		"SELECT value FROM parameters WHERE key == \"first_time\";",
777f42
+		 -1, &stmt, NULL);
777f42
+	if (ret != SQLITE_OK) {
777f42
+		xlog(D_GENERAL, "Unable to prepare select statement: %s",
777f42
+			sqlite3_errmsg(dbh));
777f42
+		goto out;
777f42
+	}
777f42
+
777f42
+	/* query first_time */
777f42
+	ret = sqlite3_step(stmt);
777f42
+	if (ret != SQLITE_ROW) {
777f42
+		xlog(D_GENERAL, "Select statement execution failed: %s",
777f42
+				sqlite3_errmsg(dbh));
777f42
+		goto out;
777f42
+	}
777f42
+
777f42
+	*first_time = sqlite3_column_int(stmt, 0);
777f42
+	ret = 0;
777f42
+out:
777f42
+	sqlite3_finalize(stmt);
777f42
+	return ret;
777f42
+}
777f42
+
777f42
+static int
777f42
+sqlite_add_princ_col_cb(void *UNUSED(arg), int ncols, char **cols,
777f42
+			    char **UNUSED(colnames))
777f42
+{
777f42
+	int ret;
777f42
+	char *err;
777f42
+
777f42
+	if (ncols > 1)
777f42
+		return -EINVAL;
777f42
+	ret = snprintf(buf, sizeof(buf), "ALTER TABLE \"%s\" "
777f42
+			"ADD COLUMN princhash BLOB;", cols[0]);
777f42
+	if (ret < 0) {
777f42
+		xlog(L_ERROR, "sprintf failed!");
777f42
+		return -EINVAL;
777f42
+	} else if ((size_t)ret >= sizeof(buf)) {
777f42
+		xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
777f42
+		return -EINVAL;
777f42
+	}
777f42
+	ret = sqlite3_exec(dbh, (const char *)buf, NULL, NULL, &err;;
777f42
+	if (ret != SQLITE_OK) {
777f42
+		xlog(L_ERROR, "Unable to add princhash column to table %s: %s",
777f42
+		     cols[0], err);
777f42
+		goto out;
777f42
+	}
777f42
+	xlog(D_GENERAL, "Added princhash column to table %s", cols[0]);
777f42
+out:
777f42
+	sqlite3_free(err);
777f42
+	return ret;
777f42
+}
777f42
+
777f42
+static int
777f42
+sqlite_maindb_update_v3_to_v4(void)
777f42
+{
777f42
+	int ret;
777f42
+	char *err;
777f42
+
777f42
+	ret = sqlite3_exec(dbh, "SELECT name FROM sqlite_master "
777f42
+			   "WHERE type=\"table\" AND name LIKE \"%rec-%\";",
777f42
+			   sqlite_add_princ_col_cb, NULL, &err;;
777f42
+	if (ret != SQLITE_OK) {
777f42
+		xlog(L_ERROR, "%s: Failed to update tables!: %s", __func__, err);
777f42
+	}
777f42
+	sqlite3_free(err);
777f42
+	return ret;
777f42
+}
777f42
+
777f42
+static int
777f42
+sqlite_maindb_update_v1v2_to_v4(void)
777f42
+{
777f42
+	int ret;
777f42
+	char *err;
777f42
+
777f42
+	/* create grace table */
777f42
+	ret = sqlite3_exec(dbh, "CREATE TABLE grace "
777f42
+				"(current INTEGER , recovery INTEGER);",
777f42
+				NULL, NULL, &err;;
777f42
+	if (ret != SQLITE_OK) {
777f42
+		xlog(L_ERROR, "Unable to create grace table: %s", err);
777f42
+		goto out;
777f42
+	}
777f42
+
777f42
+	/* insert initial epochs into grace table */
777f42
+	ret = sqlite3_exec(dbh, "INSERT OR FAIL INTO grace "
777f42
+				"values (1, 0);",
777f42
+				NULL, NULL, &err;;
777f42
+	if (ret != SQLITE_OK) {
777f42
+		xlog(L_ERROR, "Unable to set initial epochs: %s", err);
777f42
+		goto out;
777f42
+	}
777f42
+
777f42
+	/* create recovery table for current epoch */
777f42
+	ret = sqlite3_exec(dbh, "CREATE TABLE \"rec-0000000000000001\" "
777f42
+				"(id BLOB PRIMARY KEY, princhash BLOB);",
777f42
+				NULL, NULL, &err;;
777f42
+	if (ret != SQLITE_OK) {
777f42
+		xlog(L_ERROR, "Unable to create recovery table "
777f42
+				"for current epoch: %s", err);
777f42
+		goto out;
777f42
+	}
777f42
+
777f42
+	/* copy records from old clients table */
777f42
+	ret = sqlite3_exec(dbh, "INSERT INTO \"rec-0000000000000001\" (id) "
777f42
+				"SELECT id FROM clients;",
777f42
+				NULL, NULL, &err;;
777f42
+	if (ret != SQLITE_OK) {
777f42
+		xlog(L_ERROR, "Unable to copy client records: %s", err);
777f42
+		goto out;
777f42
+	}
777f42
+
777f42
+	/* drop the old clients table */
777f42
+	ret = sqlite3_exec(dbh, "DROP TABLE clients;",
777f42
+				NULL, NULL, &err;;
777f42
+	if (ret != SQLITE_OK) {
777f42
+		xlog(L_ERROR, "Unable to drop old clients table: %s", err);
777f42
+	}
777f42
+out:
777f42
+	sqlite3_free(err);
777f42
+	return ret;
777f42
+}
777f42
+
777f42
+static int
777f42
+sqlite_maindb_update_schema(int oldversion)
777f42
+{
777f42
+	int ret, ret2;
777f42
+	char *err;
777f42
+
777f42
+	/* begin transaction */
777f42
+	ret = sqlite3_exec(dbh, "BEGIN EXCLUSIVE TRANSACTION;", NULL, NULL,
777f42
+				&err;;
777f42
+	if (ret != SQLITE_OK) {
777f42
+		xlog(L_ERROR, "Unable to begin transaction: %s", err);
777f42
+		goto rollback;
777f42
+	}
777f42
+
777f42
+	/*
777f42
+	 * Check schema version again. This time, under an exclusive
777f42
+	 * transaction to guard against racing DB setup attempts
777f42
+	 */
777f42
+	ret = sqlite_query_schema_version();
777f42
+	if (ret != oldversion) {
777f42
+		if (ret == CLD_SQLITE_LATEST_SCHEMA_VERSION)
777f42
+			/* Someone else raced in and set it up */
777f42
+			ret = 0;
777f42
+		else
777f42
+			/* Something went wrong -- fail! */
777f42
+			ret = -EINVAL;
777f42
+		goto rollback;
777f42
+	}
777f42
+
777f42
+	/* Still at old version -- do conversion */
777f42
+
777f42
+	switch (oldversion) {
777f42
+	case 3:
777f42
+	case 2:
777f42
+		ret = sqlite_maindb_update_v3_to_v4();
777f42
+		break;
777f42
+	case 1:
777f42
+		ret = sqlite_maindb_update_v1v2_to_v4();
777f42
+		break;
777f42
+	default:
777f42
+		ret = -EINVAL;
777f42
+	}
777f42
+	if (ret != SQLITE_OK)
777f42
+		goto rollback;
777f42
+
777f42
+	ret = snprintf(buf, sizeof(buf), "UPDATE parameters SET value = %d "
777f42
+			"WHERE key = \"version\";",
777f42
+			CLD_SQLITE_LATEST_SCHEMA_VERSION);
777f42
+	if (ret < 0) {
777f42
+		xlog(L_ERROR, "sprintf failed!");
777f42
+		goto rollback;
777f42
+	} else if ((size_t)ret >= sizeof(buf)) {
777f42
+		xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
777f42
+		ret = -EINVAL;
777f42
+		goto rollback;
777f42
+	}
777f42
+
777f42
+	ret = sqlite3_exec(dbh, (const char *)buf, NULL, NULL, &err;;
777f42
+	if (ret != SQLITE_OK) {
777f42
+		xlog(L_ERROR, "Unable to update schema version: %s", err);
777f42
+		goto rollback;
777f42
+	}
777f42
+
777f42
+	ret = sqlite_query_first_time(&first_time);
777f42
+	if (ret != SQLITE_OK) {
777f42
+		/* insert first_time into parameters table */
777f42
+		ret = sqlite3_exec(dbh, "INSERT OR FAIL INTO parameters "
777f42
+					"values (\"first_time\", \"1\");",
777f42
+					NULL, NULL, &err;;
777f42
+		if (ret != SQLITE_OK) {
777f42
+			xlog(L_ERROR, "Unable to insert into parameter table: %s", err);
777f42
+			goto rollback;
777f42
+		}
777f42
+	}
777f42
+
777f42
+	ret = sqlite3_exec(dbh, "COMMIT TRANSACTION;", NULL, NULL, &err;;
777f42
+	if (ret != SQLITE_OK) {
777f42
+		xlog(L_ERROR, "Unable to commit transaction: %s", err);
777f42
+		goto rollback;
777f42
+	}
777f42
+out:
777f42
+	sqlite3_free(err);
777f42
+	return ret;
777f42
+rollback:
777f42
+	ret2 = sqlite3_exec(dbh, "ROLLBACK TRANSACTION;", NULL, NULL, &err;;
777f42
+	if (ret2 != SQLITE_OK)
777f42
+		xlog(L_ERROR, "Unable to rollback transaction: %s", err);
777f42
+	goto out;
777f42
+}
777f42
+
777f42
+/*
777f42
+ * Start an exclusive transaction and recheck the DB schema version. If it's
777f42
+ * still zero (indicating a new database) then set it up. If that all works,
777f42
+ * then insert schema version into the parameters table and commit the
777f42
+ * transaction. On any error, rollback the transaction.
777f42
+ */
777f42
+static int
777f42
+sqlite_maindb_init_v4(void)
777f42
+{
777f42
+	int ret, ret2;
777f42
+	char *err = NULL;
777f42
+
777f42
+	/* Start a transaction */
777f42
+	ret = sqlite3_exec(dbh, "BEGIN EXCLUSIVE TRANSACTION;", NULL, NULL,
777f42
+				&err;;
777f42
+	if (ret != SQLITE_OK) {
777f42
+		xlog(L_ERROR, "Unable to begin transaction: %s", err);
777f42
+		return ret;
777f42
+	}
777f42
+
777f42
+	/*
777f42
+	 * Check schema version again. This time, under an exclusive
777f42
+	 * transaction to guard against racing DB setup attempts
777f42
+	 */
777f42
+	ret = sqlite_query_schema_version();
777f42
+	switch (ret) {
777f42
+	case 0:
777f42
+		/* Query failed again -- set up DB */
777f42
+		break;
777f42
+	case CLD_SQLITE_LATEST_SCHEMA_VERSION:
777f42
+		/* Someone else raced in and set it up */
777f42
+		ret = 0;
777f42
+		goto rollback;
777f42
+	default:
777f42
+		/* Something went wrong -- fail! */
777f42
+		ret = -EINVAL;
777f42
+		goto rollback;
777f42
+	}
777f42
+
777f42
+	ret = sqlite3_exec(dbh, "CREATE TABLE parameters "
777f42
+				"(key TEXT PRIMARY KEY, value TEXT);",
777f42
+				NULL, NULL, &err;;
777f42
+	if (ret != SQLITE_OK) {
777f42
+		xlog(L_ERROR, "Unable to create parameter table: %s", err);
777f42
+		goto rollback;
777f42
+	}
777f42
+
777f42
+	/* create grace table */
777f42
+	ret = sqlite3_exec(dbh, "CREATE TABLE grace "
777f42
+				"(current INTEGER , recovery INTEGER);",
777f42
+				NULL, NULL, &err;;
777f42
+	if (ret != SQLITE_OK) {
777f42
+		xlog(L_ERROR, "Unable to create grace table: %s", err);
777f42
+		goto rollback;
777f42
+	}
777f42
+
777f42
+	/* insert initial epochs into grace table */
777f42
+	ret = sqlite3_exec(dbh, "INSERT OR FAIL INTO grace "
777f42
+				"values (1, 0);",
777f42
+				NULL, NULL, &err;;
777f42
+	if (ret != SQLITE_OK) {
777f42
+		xlog(L_ERROR, "Unable to set initial epochs: %s", err);
777f42
+		goto rollback;
777f42
+	}
777f42
+
777f42
+	/* create recovery table for current epoch */
777f42
+	ret = sqlite3_exec(dbh, "CREATE TABLE \"rec-0000000000000001\" "
777f42
+				"(id BLOB PRIMARY KEY, princhash BLOB);",
777f42
+				NULL, NULL, &err;;
777f42
+	if (ret != SQLITE_OK) {
777f42
+		xlog(L_ERROR, "Unable to create recovery table "
777f42
+				"for current epoch: %s", err);
777f42
+		goto rollback;
777f42
+	}
777f42
+
777f42
+	/* insert version into parameters table */
777f42
+	ret = snprintf(buf, sizeof(buf), "INSERT OR FAIL INTO parameters "
777f42
+			"values (\"version\", \"%d\");",
777f42
+			CLD_SQLITE_LATEST_SCHEMA_VERSION);
777f42
+	if (ret < 0) {
777f42
+		xlog(L_ERROR, "sprintf failed!");
777f42
+		goto rollback;
777f42
+	} else if ((size_t)ret >= sizeof(buf)) {
777f42
+		xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
777f42
+		ret = -EINVAL;
777f42
+		goto rollback;
777f42
+	}
777f42
+
777f42
+	ret = sqlite3_exec(dbh, (const char *)buf, NULL, NULL, &err;;
777f42
+	if (ret != SQLITE_OK) {
777f42
+		xlog(L_ERROR, "Unable to insert into parameter table: %s", err);
777f42
+		goto rollback;
777f42
+	}
777f42
+
777f42
+	/* insert first_time into parameters table */
777f42
+	ret = sqlite3_exec(dbh, "INSERT OR FAIL INTO parameters "
777f42
+				"values (\"first_time\", \"1\");",
777f42
+				NULL, NULL, &err;;
777f42
+	if (ret != SQLITE_OK) {
777f42
+		xlog(L_ERROR, "Unable to insert into parameter table: %s", err);
777f42
+		goto rollback;
777f42
+	}
777f42
+
777f42
+	ret = sqlite3_exec(dbh, "COMMIT TRANSACTION;", NULL, NULL, &err;;
777f42
+	if (ret != SQLITE_OK) {
777f42
+		xlog(L_ERROR, "Unable to commit transaction: %s", err);
777f42
+		goto rollback;
777f42
+	}
777f42
+out:
777f42
+	sqlite3_free(err);
777f42
+	return ret;
777f42
+
777f42
+rollback:
777f42
+	/* Attempt to rollback the transaction */
777f42
+	ret2 = sqlite3_exec(dbh, "ROLLBACK TRANSACTION;", NULL, NULL, &err;;
777f42
+	if (ret2 != SQLITE_OK)
777f42
+		xlog(L_ERROR, "Unable to rollback transaction: %s", err);
777f42
+	goto out;
777f42
+}
777f42
+
777f42
+static int
777f42
+sqlite_startup_query_grace(void)
777f42
+{
777f42
+	int ret;
777f42
+	uint64_t tcur;
777f42
+	uint64_t trec;
777f42
+	sqlite3_stmt *stmt = NULL;
777f42
+
777f42
+	/* prepare select query */
777f42
+	ret = sqlite3_prepare_v2(dbh, "SELECT * FROM grace;", -1, &stmt, NULL);
777f42
+	if (ret != SQLITE_OK) {
777f42
+		xlog(D_GENERAL, "Unable to prepare select statement: %s",
777f42
+			sqlite3_errmsg(dbh));
777f42
+		goto out;
777f42
+	}
777f42
+
777f42
+	ret = sqlite3_step(stmt);
777f42
+	if (ret != SQLITE_ROW) {
777f42
+		xlog(D_GENERAL, "Select statement execution failed: %s",
777f42
+				sqlite3_errmsg(dbh));
777f42
+		goto out;
777f42
+	}
777f42
+
777f42
+	tcur = (uint64_t)sqlite3_column_int64(stmt, 0);
777f42
+	trec = (uint64_t)sqlite3_column_int64(stmt, 1);
777f42
+
777f42
+	current_epoch = tcur;
777f42
+	recovery_epoch = trec;
777f42
+	ret = 0;
777f42
+	xlog(D_GENERAL, "%s: current_epoch=%"PRIu64" recovery_epoch=%"PRIu64,
777f42
+		__func__, current_epoch, recovery_epoch);
777f42
+out:
777f42
+	sqlite3_finalize(stmt);
777f42
+	return ret;
777f42
+}
777f42
+
777f42
+/*
777f42
+ * Helper for renaming a recovery table to fix the padding.
777f42
+ */
777f42
+static int
777f42
+sqlite_fix_table_name(const char *name)
777f42
+{
777f42
+	int ret;
777f42
+	uint64_t val;
777f42
+	char *err;
777f42
+
777f42
+	if (sscanf(name, "rec-%" PRIx64, &val) != 1)
777f42
+		return -EINVAL;
777f42
+	ret = snprintf(buf, sizeof(buf), "ALTER TABLE \"%s\" "
777f42
+			"RENAME TO \"rec-%016" PRIx64 "\";",
777f42
+			name, val);
777f42
+	if (ret < 0) {
777f42
+		xlog(L_ERROR, "sprintf failed!");
777f42
+		return -EINVAL;
777f42
+	} else if ((size_t)ret >= sizeof(buf)) {
777f42
+		xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
777f42
+		return -EINVAL;
777f42
+	}
777f42
+	ret = sqlite3_exec(dbh, (const char *)buf, NULL, NULL, &err;;
777f42
+	if (ret != SQLITE_OK) {
777f42
+		xlog(L_ERROR, "Unable to fix table for epoch %"PRIu64": %s",
777f42
+		     val, err);
777f42
+		goto out;
777f42
+	}
777f42
+	xlog(D_GENERAL, "Renamed table %s to rec-%016" PRIx64, name, val);
777f42
+out:
777f42
+	sqlite3_free(err);
777f42
+	return ret;
777f42
+}
777f42
+
777f42
+/*
777f42
+ * Callback for the sqlite_exec statement in sqlite_check_table_names.
777f42
+ * If the epoch encoded in the table name matches either the current
777f42
+ * epoch or the recovery epoch, then try to fix the padding.  Otherwise,
777f42
+ * we bail.
777f42
+ */
777f42
+static int
777f42
+sqlite_check_table_names_cb(void *UNUSED(arg), int ncols, char **cols,
777f42
+			    char **UNUSED(colnames))
777f42
+{
777f42
+	int ret = SQLITE_OK;
777f42
+	uint64_t val;
777f42
+
777f42
+	if (ncols > 1)
777f42
+		return -EINVAL;
777f42
+	if (sscanf(cols[0], "rec-%" PRIx64, &val) != 1)
777f42
+		return -EINVAL;
777f42
+	if (val == current_epoch || val == recovery_epoch) {
777f42
+		xlog(D_GENERAL, "found invalid table name %s for %s epoch",
777f42
+		     cols[0], val == current_epoch ? "current" : "recovery");
777f42
+		ret = sqlite_fix_table_name(cols[0]);
777f42
+	} else {
777f42
+		xlog(L_ERROR, "found invalid table name %s for unknown epoch %"
777f42
+		     PRId64, cols[0], val);
777f42
+		return -EINVAL;
777f42
+	}
777f42
+	return ret;
777f42
+}
777f42
+
777f42
+/*
777f42
+ * Look for recovery table names where the epoch isn't zero-padded
777f42
+ */
777f42
+static int
777f42
+sqlite_check_table_names(void)
777f42
+{
777f42
+	int ret;
777f42
+	char *err;
777f42
+
777f42
+	ret = sqlite3_exec(dbh, "SELECT name FROM sqlite_master "
777f42
+			   "WHERE type=\"table\" AND name LIKE \"%rec-%\" "
777f42
+			   "AND length(name) < 20;",
777f42
+			   sqlite_check_table_names_cb, NULL, &err;;
777f42
+	if (ret != SQLITE_OK) {
777f42
+		xlog(L_ERROR, "Table names check failed: %s", err);
777f42
+	}
777f42
+	sqlite3_free(err);
777f42
+	return ret;
777f42
+}
777f42
+
777f42
+/*
777f42
+ * Simple db health check.  For now we're just making sure that the recovery
777f42
+ * table names are of the format "rec-CCCCCCCCCCCCCCCC" (where C is the hex
777f42
+ * representation of the epoch value) and that epoch value matches either
777f42
+ * the current epoch or the recovery epoch.
777f42
+ */
777f42
+static int
777f42
+sqlite_check_db_health(void)
777f42
+{
777f42
+	int ret, ret2;
777f42
+	char *err;
777f42
+
777f42
+	ret = sqlite3_exec(dbh, "BEGIN EXCLUSIVE TRANSACTION;", NULL, NULL,
777f42
+				&err;;
777f42
+	if (ret != SQLITE_OK) {
777f42
+		xlog(L_ERROR, "Unable to begin transaction: %s", err);
777f42
+		goto rollback;
777f42
+	}
777f42
+
777f42
+	ret = sqlite_check_table_names();
777f42
+	if (ret != SQLITE_OK)
777f42
+		goto rollback;
777f42
+
777f42
+	ret = sqlite3_exec(dbh, "COMMIT TRANSACTION;", NULL, NULL, &err;;
777f42
+	if (ret != SQLITE_OK) {
777f42
+		xlog(L_ERROR, "Unable to commit transaction: %s", err);
777f42
+		goto rollback;
777f42
+	}
777f42
+
777f42
+cleanup:
777f42
+	sqlite3_free(err);
777f42
+	xlog(D_GENERAL, "%s: returning %d", __func__, ret);
777f42
+	return ret;
777f42
+rollback:
777f42
+	ret2 = sqlite3_exec(dbh, "ROLLBACK TRANSACTION;", NULL, NULL, &err;;
777f42
+	if (ret2 != SQLITE_OK)
777f42
+		xlog(L_ERROR, "Unable to rollback transaction: %s", err);
777f42
+	goto cleanup;
777f42
+}
777f42
+
777f42
+static int
777f42
+sqlite_attach_db(const char *path)
777f42
+{
777f42
+	int ret;
777f42
+	char dbpath[PATH_MAX];
777f42
+	struct stat stb;
777f42
+	sqlite3_stmt *stmt = NULL;
777f42
+
777f42
+	ret = snprintf(dbpath, PATH_MAX - 1, "%s/main.sqlite", path);
777f42
+	if (ret < 0)
777f42
+		return ret;
777f42
+
777f42
+	dbpath[PATH_MAX - 1] = '\0';
777f42
+	ret = stat(dbpath, &stb;;
777f42
+	if (ret < 0)
777f42
+		return ret;
777f42
+
777f42
+	xlog(D_GENERAL, "attaching %s", dbpath);
777f42
+	ret = sqlite3_prepare_v2(dbh, "ATTACH DATABASE ? AS attached;",
777f42
+			-1, &stmt, NULL);
777f42
+	if (ret != SQLITE_OK) {
777f42
+		xlog(L_ERROR, "%s: unable to prepare attach statement: %s",
777f42
+				__func__, sqlite3_errmsg(dbh));
777f42
+		return ret;
777f42
+	}
777f42
+
777f42
+	ret = sqlite3_bind_text(stmt, 1, dbpath, strlen(dbpath), SQLITE_STATIC);
777f42
+	if (ret != SQLITE_OK) {
777f42
+		xlog(L_ERROR, "%s: bind text failed: %s",
777f42
+				__func__, sqlite3_errmsg(dbh));
777f42
+		return ret;
777f42
+	}
777f42
+
777f42
+	ret = sqlite3_step(stmt);
777f42
+	if (ret == SQLITE_DONE)
777f42
+		ret = SQLITE_OK;
777f42
+	else
777f42
+		xlog(L_ERROR, "%s: unexpected return code from attach: %s",
777f42
+				__func__, sqlite3_errmsg(dbh));
777f42
+
777f42
+	sqlite3_finalize(stmt);
777f42
+	stmt = NULL;
777f42
+	return ret;
777f42
+}
777f42
+
777f42
+static int
777f42
+sqlite_detach_db(void)
777f42
+{
777f42
+	int ret;
777f42
+	char *err = NULL;
777f42
+
777f42
+	xlog(D_GENERAL, "detaching database");
777f42
+	ret = sqlite3_exec(dbh, "DETACH DATABASE attached;", NULL, NULL, &err;;
777f42
+	if (ret != SQLITE_OK) {
777f42
+		xlog(L_ERROR, "Unable to detach attached db: %s", err);
777f42
+	}
777f42
+
777f42
+	sqlite3_free(err);
777f42
+	return ret;
777f42
+}
777f42
+
777f42
+/*
777f42
+ * Copies client records from the nfsdcltrack database as part of a one-time
777f42
+ * "upgrade".
777f42
+ *
777f42
+ * Returns a non-zero sqlite error code, or SQLITE_OK (aka 0).
777f42
+ * Returns the number of records copied via "num_rec".
777f42
+ */
777f42
+static int
777f42
+sqlite_copy_cltrack_records(int *num_rec)
777f42
+{
777f42
+	int ret, ret2;
777f42
+	char *s;
777f42
+	char *err = NULL;
777f42
+	sqlite3_stmt *stmt = NULL;
777f42
+
777f42
+	s = conf_get_str("nfsdcltrack", "storagedir");
777f42
+	if (s)
777f42
+		cltrack_storagedir = s;
777f42
+	ret = sqlite_attach_db(cltrack_storagedir);
777f42
+	if (ret)
777f42
+		goto out;
777f42
+	ret = sqlite3_exec(dbh, "BEGIN EXCLUSIVE TRANSACTION;", NULL, NULL,
777f42
+				&err;;
777f42
+	if (ret != SQLITE_OK) {
777f42
+		xlog(L_ERROR, "Unable to begin transaction: %s", err);
777f42
+		goto rollback;
777f42
+	}
777f42
+	ret = snprintf(buf, sizeof(buf), "DELETE FROM \"rec-%016" PRIx64 "\";",
777f42
+			current_epoch);
777f42
+	if (ret < 0) {
777f42
+		xlog(L_ERROR, "sprintf failed!");
777f42
+		goto rollback;
777f42
+	} else if ((size_t)ret >= sizeof(buf)) {
777f42
+		xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
777f42
+		ret = -EINVAL;
777f42
+		goto rollback;
777f42
+	}
777f42
+	ret = sqlite3_exec(dbh, (const char *)buf, NULL, NULL, &err;;
777f42
+	if (ret != SQLITE_OK) {
777f42
+		xlog(L_ERROR, "Unable to clear records from current epoch: %s", err);
777f42
+		goto rollback;
777f42
+	}
777f42
+	ret = snprintf(buf, sizeof(buf), "INSERT INTO \"rec-%016" PRIx64 "\" (id) "
777f42
+				"SELECT id FROM attached.clients;",
777f42
+				current_epoch);
777f42
+	if (ret < 0) {
777f42
+		xlog(L_ERROR, "sprintf failed!");
777f42
+		goto rollback;
777f42
+	} else if ((size_t)ret >= sizeof(buf)) {
777f42
+		xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
777f42
+		ret = -EINVAL;
777f42
+		goto rollback;
777f42
+	}
777f42
+	ret = sqlite3_prepare_v2(dbh, buf, -1, &stmt, NULL);
777f42
+	if (ret != SQLITE_OK) {
777f42
+		xlog(L_ERROR, "%s: insert statement prepare failed: %s",
777f42
+			__func__, sqlite3_errmsg(dbh));
777f42
+		goto rollback;
777f42
+	}
777f42
+	ret = sqlite3_step(stmt);
777f42
+	if (ret != SQLITE_DONE) {
777f42
+		xlog(L_ERROR, "%s: unexpected return code from insert: %s",
777f42
+				__func__, sqlite3_errmsg(dbh));
777f42
+		goto rollback;
777f42
+	}
777f42
+	*num_rec = sqlite3_changes(dbh);
777f42
+	ret = sqlite3_exec(dbh, "COMMIT TRANSACTION;", NULL, NULL, &err;;
777f42
+	if (ret != SQLITE_OK) {
777f42
+		xlog(L_ERROR, "Unable to commit transaction: %s", err);
777f42
+		goto rollback;
777f42
+	}
777f42
+cleanup:
777f42
+	sqlite3_finalize(stmt);
777f42
+	sqlite3_free(err);
777f42
+	sqlite_detach_db();
777f42
+out:
777f42
+	xlog(D_GENERAL, "%s: returning %d", __func__, ret);
777f42
+	return ret;
777f42
+rollback:
777f42
+	*num_rec = 0;
777f42
+	ret2 = sqlite3_exec(dbh, "ROLLBACK TRANSACTION;", NULL, NULL, &err;;
777f42
+	if (ret2 != SQLITE_OK)
777f42
+		xlog(L_ERROR, "Unable to rollback transaction: %s", err);
777f42
+	goto cleanup;
777f42
+}
777f42
+
777f42
+/* Open the database and set up the database handle for it */
777f42
+int
777f42
+sqlite_prepare_dbh(const char *topdir)
777f42
+{
777f42
+	int ret;
777f42
+
777f42
+	/* Do nothing if the database handle is already set up */
777f42
+	if (dbh)
777f42
+		return 0;
777f42
+
777f42
+	ret = snprintf(buf, PATH_MAX - 1, "%s/main.sqlite", topdir);
777f42
+	if (ret < 0)
777f42
+		return ret;
777f42
+
777f42
+	buf[PATH_MAX - 1] = '\0';
777f42
+
777f42
+	/* open a new DB handle */
777f42
+	ret = sqlite3_open(buf, &dbh;;
777f42
+	if (ret != SQLITE_OK) {
777f42
+		/* try to create the dir */
777f42
+		ret = mkdir_if_not_exist(topdir);
777f42
+		if (ret)
777f42
+			goto out_close;
777f42
+
777f42
+		/* retry open */
777f42
+		ret = sqlite3_open(buf, &dbh;;
777f42
+		if (ret != SQLITE_OK)
777f42
+			goto out_close;
777f42
+	}
777f42
+
777f42
+	/* set busy timeout */
777f42
+	ret = sqlite3_busy_timeout(dbh, CLD_SQLITE_BUSY_TIMEOUT);
777f42
+	if (ret != SQLITE_OK) {
777f42
+		xlog(L_ERROR, "Unable to set sqlite busy timeout: %s",
777f42
+				sqlite3_errmsg(dbh));
777f42
+		goto out_close;
777f42
+	}
777f42
+
777f42
+	ret = sqlite_query_schema_version();
777f42
+	switch (ret) {
777f42
+	case CLD_SQLITE_LATEST_SCHEMA_VERSION:
777f42
+		/* DB is already set up. Do nothing */
777f42
+		ret = 0;
777f42
+		break;
777f42
+	case 3:
777f42
+		/* Old DB -- update to new schema */
777f42
+		ret = sqlite_maindb_update_schema(3);
777f42
+		if (ret)
777f42
+			goto out_close;
777f42
+		break;
777f42
+	case 2:
777f42
+		/* Old DB -- update to new schema */
777f42
+		ret = sqlite_maindb_update_schema(2);
777f42
+		if (ret)
777f42
+			goto out_close;
777f42
+		break;
777f42
+
777f42
+	case 1:
777f42
+		/* Old DB -- update to new schema */
777f42
+		ret = sqlite_maindb_update_schema(1);
777f42
+		if (ret)
777f42
+			goto out_close;
777f42
+		break;
777f42
+	case 0:
777f42
+		/* Query failed -- try to set up new DB */
777f42
+		ret = sqlite_maindb_init_v4();
777f42
+		if (ret)
777f42
+			goto out_close;
777f42
+		break;
777f42
+	default:
777f42
+		/* Unknown DB version -- downgrade? Fail */
777f42
+		xlog(L_ERROR, "Unsupported database schema version! "
777f42
+			"Expected %d, got %d.",
777f42
+			CLD_SQLITE_LATEST_SCHEMA_VERSION, ret);
777f42
+		ret = -EINVAL;
777f42
+		goto out_close;
777f42
+	}
777f42
+
777f42
+	ret = sqlite_startup_query_grace();
777f42
+
777f42
+	ret = sqlite_query_first_time(&first_time);
777f42
+	if (ret)
777f42
+		goto out_close;
777f42
+
777f42
+	ret = sqlite_check_db_health();
777f42
+	if (ret) {
777f42
+		xlog(L_ERROR, "Database health check failed! "
777f42
+			"Database must be fixed manually.");
777f42
+		goto out_close;
777f42
+	}
777f42
+
777f42
+	/* one-time "upgrade" from older client tracking methods */
777f42
+	if (first_time) {
777f42
+		sqlite_copy_cltrack_records(&num_cltrack_records);
777f42
+		xlog(D_GENERAL, "%s: num_cltrack_records = %d\n",
777f42
+			__func__, num_cltrack_records);
777f42
+		legacy_load_clients_from_recdir(&num_legacy_records);
777f42
+		xlog(D_GENERAL, "%s: num_legacy_records = %d\n",
777f42
+			__func__, num_legacy_records);
777f42
+		if (num_cltrack_records > 0 && num_legacy_records > 0)
777f42
+			xlog(L_WARNING, "%s: first-time upgrade detected "
777f42
+				"both cltrack and legacy records!\n", __func__);
777f42
+	}
777f42
+
777f42
+	return ret;
777f42
+out_close:
777f42
+	sqlite3_close(dbh);
777f42
+	dbh = NULL;
777f42
+	return ret;
777f42
+}
777f42
+
777f42
+/*
777f42
+ * Create a client record
777f42
+ *
777f42
+ * Returns a non-zero sqlite error code, or SQLITE_OK (aka 0)
777f42
+ */
777f42
+int
777f42
+sqlite_insert_client(const unsigned char *clname, const size_t namelen)
777f42
+{
777f42
+	int ret;
777f42
+	sqlite3_stmt *stmt = NULL;
777f42
+
777f42
+	ret = snprintf(buf, sizeof(buf), "INSERT OR REPLACE INTO \"rec-%016" PRIx64 "\" (id) "
777f42
+				"VALUES (?);", current_epoch);
777f42
+	if (ret < 0) {
777f42
+		xlog(L_ERROR, "sprintf failed!");
777f42
+		return ret;
777f42
+	} else if ((size_t)ret >= sizeof(buf)) {
777f42
+		xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
777f42
+		return -EINVAL;
777f42
+	}
777f42
+
777f42
+	ret = sqlite3_prepare_v2(dbh, buf, -1, &stmt, NULL);
777f42
+	if (ret != SQLITE_OK) {
777f42
+		xlog(L_ERROR, "%s: insert statement prepare failed: %s",
777f42
+			__func__, sqlite3_errmsg(dbh));
777f42
+		return ret;
777f42
+	}
777f42
+
777f42
+	ret = sqlite3_bind_blob(stmt, 1, (const void *)clname, namelen,
777f42
+				SQLITE_STATIC);
777f42
+	if (ret != SQLITE_OK) {
777f42
+		xlog(L_ERROR, "%s: bind blob failed: %s", __func__,
777f42
+				sqlite3_errmsg(dbh));
777f42
+		goto out_err;
777f42
+	}
777f42
+
777f42
+	ret = sqlite3_step(stmt);
777f42
+	if (ret == SQLITE_DONE)
777f42
+		ret = SQLITE_OK;
777f42
+	else
777f42
+		xlog(L_ERROR, "%s: unexpected return code from insert: %s",
777f42
+				__func__, sqlite3_errmsg(dbh));
777f42
+
777f42
+out_err:
777f42
+	xlog(D_GENERAL, "%s: returning %d", __func__, ret);
777f42
+	sqlite3_finalize(stmt);
777f42
+	return ret;
777f42
+}
777f42
+
777f42
+#if UPCALL_VERSION >= 2
777f42
+/*
777f42
+ * Create a client record including hash the kerberos principal
777f42
+ *
777f42
+ * Returns a non-zero sqlite error code, or SQLITE_OK (aka 0)
777f42
+ */
777f42
+int
777f42
+sqlite_insert_client_and_princhash(const unsigned char *clname, const size_t namelen,
777f42
+		const unsigned char *clprinchash, const size_t princhashlen)
777f42
+{
777f42
+	int ret;
777f42
+	sqlite3_stmt *stmt = NULL;
777f42
+
777f42
+	if (princhashlen > 0)
777f42
+		ret = snprintf(buf, sizeof(buf), "INSERT OR REPLACE INTO \"rec-%016" PRIx64 "\" "
777f42
+				"VALUES (?, ?);", current_epoch);
777f42
+	else
777f42
+		ret = snprintf(buf, sizeof(buf), "INSERT OR REPLACE INTO \"rec-%016" PRIx64 "\" (id) "
777f42
+				"VALUES (?);", current_epoch);
777f42
+	if (ret < 0) {
777f42
+		xlog(L_ERROR, "sprintf failed!");
777f42
+		return ret;
777f42
+	} else if ((size_t)ret >= sizeof(buf)) {
777f42
+		xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
777f42
+		return -EINVAL;
777f42
+	}
777f42
+
777f42
+	ret = sqlite3_prepare_v2(dbh, buf, -1, &stmt, NULL);
777f42
+	if (ret != SQLITE_OK) {
777f42
+		xlog(L_ERROR, "%s: insert statement prepare failed: %s",
777f42
+			__func__, sqlite3_errmsg(dbh));
777f42
+		return ret;
777f42
+	}
777f42
+
777f42
+	ret = sqlite3_bind_blob(stmt, 1, (const void *)clname, namelen,
777f42
+				SQLITE_STATIC);
777f42
+	if (ret != SQLITE_OK) {
777f42
+		xlog(L_ERROR, "%s: bind blob failed: %s", __func__,
777f42
+				sqlite3_errmsg(dbh));
777f42
+		goto out_err;
777f42
+	}
777f42
+
777f42
+	if (princhashlen > 0) {
777f42
+		ret = sqlite3_bind_blob(stmt, 2, (const void *)clprinchash, princhashlen,
777f42
+					SQLITE_STATIC);
777f42
+		if (ret != SQLITE_OK) {
777f42
+			xlog(L_ERROR, "%s: bind blob failed: %s", __func__,
777f42
+					sqlite3_errmsg(dbh));
777f42
+			goto out_err;
777f42
+		}
777f42
+	}
777f42
+
777f42
+	ret = sqlite3_step(stmt);
777f42
+	if (ret == SQLITE_DONE)
777f42
+		ret = SQLITE_OK;
777f42
+	else
777f42
+		xlog(L_ERROR, "%s: unexpected return code from insert: %s",
777f42
+				__func__, sqlite3_errmsg(dbh));
777f42
+
777f42
+out_err:
777f42
+	xlog(D_GENERAL, "%s: returning %d", __func__, ret);
777f42
+	sqlite3_finalize(stmt);
777f42
+	return ret;
777f42
+}
777f42
+#else
777f42
+int
777f42
+sqlite_insert_client_and_princhash(const unsigned char *clname, const size_t namelen,
777f42
+		const unsigned char *clprinchash, const size_t princhashlen)
777f42
+{
777f42
+	return -EINVAL;
777f42
+}
777f42
+#endif
777f42
+
777f42
+/* Remove a client record */
777f42
+int
777f42
+sqlite_remove_client(const unsigned char *clname, const size_t namelen)
777f42
+{
777f42
+	int ret;
777f42
+	sqlite3_stmt *stmt = NULL;
777f42
+
777f42
+	ret = snprintf(buf, sizeof(buf), "DELETE FROM \"rec-%016" PRIx64 "\" "
777f42
+				"WHERE id==?;", current_epoch);
777f42
+	if (ret < 0) {
777f42
+		xlog(L_ERROR, "sprintf failed!");
777f42
+		return ret;
777f42
+	} else if ((size_t)ret >= sizeof(buf)) {
777f42
+		xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
777f42
+		return -EINVAL;
777f42
+	}
777f42
+
777f42
+	ret = sqlite3_prepare_v2(dbh, buf, -1, &stmt, NULL);
777f42
+
777f42
+	if (ret != SQLITE_OK) {
777f42
+		xlog(L_ERROR, "%s: statement prepare failed: %s",
777f42
+				__func__, sqlite3_errmsg(dbh));
777f42
+		goto out_err;
777f42
+	}
777f42
+
777f42
+	ret = sqlite3_bind_blob(stmt, 1, (const void *)clname, namelen,
777f42
+				SQLITE_STATIC);
777f42
+	if (ret != SQLITE_OK) {
777f42
+		xlog(L_ERROR, "%s: bind blob failed: %s", __func__,
777f42
+				sqlite3_errmsg(dbh));
777f42
+		goto out_err;
777f42
+	}
777f42
+
777f42
+	ret = sqlite3_step(stmt);
777f42
+	if (ret == SQLITE_DONE)
777f42
+		ret = SQLITE_OK;
777f42
+	else
777f42
+		xlog(L_ERROR, "%s: unexpected return code from delete: %d",
777f42
+				__func__, ret);
777f42
+
777f42
+out_err:
777f42
+	xlog(D_GENERAL, "%s: returning %d", __func__, ret);
777f42
+	sqlite3_finalize(stmt);
777f42
+	return ret;
777f42
+}
777f42
+
777f42
+/*
777f42
+ * Is the given clname in the clients table? If so, then update its timestamp
777f42
+ * and return success. If the record isn't present, or the update fails, then
777f42
+ * return an error.
777f42
+ */
777f42
+int
777f42
+sqlite_check_client(const unsigned char *clname, const size_t namelen)
777f42
+{
777f42
+	int ret;
777f42
+	sqlite3_stmt *stmt = NULL;
777f42
+
777f42
+	ret = snprintf(buf, sizeof(buf), "SELECT count(*) FROM  \"rec-%016" PRIx64 "\" "
777f42
+				"WHERE id==?;", recovery_epoch);
777f42
+	if (ret < 0) {
777f42
+		xlog(L_ERROR, "sprintf failed!");
777f42
+		return ret;
777f42
+	} else if ((size_t)ret >= sizeof(buf)) {
777f42
+		xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
777f42
+		return -EINVAL;
777f42
+	}
777f42
+
777f42
+	ret = sqlite3_prepare_v2(dbh, buf, -1, &stmt, NULL);
777f42
+	if (ret != SQLITE_OK) {
777f42
+		xlog(L_ERROR, "%s: select statement prepare failed: %s",
777f42
+			__func__, sqlite3_errmsg(dbh));
777f42
+		return ret;
777f42
+	}
777f42
+
777f42
+	ret = sqlite3_bind_blob(stmt, 1, (const void *)clname, namelen,
777f42
+				SQLITE_STATIC);
777f42
+	if (ret != SQLITE_OK) {
777f42
+		xlog(L_ERROR, "%s: bind blob failed: %s",
777f42
+				__func__, sqlite3_errmsg(dbh));
777f42
+		goto out_err;
777f42
+	}
777f42
+
777f42
+	ret = sqlite3_step(stmt);
777f42
+	if (ret != SQLITE_ROW) {
777f42
+		xlog(L_ERROR, "%s: unexpected return code from select: %d",
777f42
+				__func__, ret);
777f42
+		goto out_err;
777f42
+	}
777f42
+
777f42
+	ret = sqlite3_column_int(stmt, 0);
777f42
+	xlog(D_GENERAL, "%s: select returned %d rows", __func__, ret);
777f42
+	if (ret != 1) {
777f42
+		ret = -EACCES;
777f42
+		goto out_err;
777f42
+	}
777f42
+
777f42
+	sqlite3_finalize(stmt);
777f42
+
777f42
+	/* Now insert the client into the table for the current epoch */
777f42
+	return sqlite_insert_client(clname, namelen);
777f42
+
777f42
+out_err:
777f42
+	xlog(D_GENERAL, "%s: returning %d", __func__, ret);
777f42
+	sqlite3_finalize(stmt);
777f42
+	return ret;
777f42
+}
777f42
+
777f42
+int
777f42
+sqlite_grace_start(void)
777f42
+{
777f42
+	int ret, ret2;
777f42
+	char *err;
777f42
+	uint64_t tcur = current_epoch;
777f42
+	uint64_t trec = recovery_epoch;
777f42
+
777f42
+	/* begin transaction */
777f42
+	ret = sqlite3_exec(dbh, "BEGIN EXCLUSIVE TRANSACTION;", NULL, NULL,
777f42
+				&err;;
777f42
+	if (ret != SQLITE_OK) {
777f42
+		xlog(L_ERROR, "Unable to begin transaction: %s", err);
777f42
+		goto rollback;
777f42
+	}
777f42
+
777f42
+	if (trec == 0) {
777f42
+		/*
777f42
+		 * A normal grace start - update the epoch values in the grace
777f42
+		 * table and create a new table for the current reboot epoch.
777f42
+		 */
777f42
+		trec = tcur;
777f42
+		tcur++;
777f42
+
777f42
+		ret = snprintf(buf, sizeof(buf), "UPDATE grace "
777f42
+				"SET current = %" PRId64 ", recovery = %" PRId64 ";",
777f42
+				(int64_t)tcur, (int64_t)trec);
777f42
+		if (ret < 0) {
777f42
+			xlog(L_ERROR, "sprintf failed!");
777f42
+			goto rollback;
777f42
+		} else if ((size_t)ret >= sizeof(buf)) {
777f42
+			xlog(L_ERROR, "sprintf output too long! (%d chars)",
777f42
+				ret);
777f42
+			ret = -EINVAL;
777f42
+			goto rollback;
777f42
+		}
777f42
+
777f42
+		ret = sqlite3_exec(dbh, (const char *)buf, NULL, NULL, &err;;
777f42
+		if (ret != SQLITE_OK) {
777f42
+			xlog(L_ERROR, "Unable to update epochs: %s", err);
777f42
+			goto rollback;
777f42
+		}
777f42
+
777f42
+		ret = snprintf(buf, sizeof(buf), "CREATE TABLE \"rec-%016" PRIx64 "\" "
777f42
+				"(id BLOB PRIMARY KEY, princhash blob);",
777f42
+				tcur);
777f42
+		if (ret < 0) {
777f42
+			xlog(L_ERROR, "sprintf failed!");
777f42
+			goto rollback;
777f42
+		} else if ((size_t)ret >= sizeof(buf)) {
777f42
+			xlog(L_ERROR, "sprintf output too long! (%d chars)",
777f42
+				ret);
777f42
+			ret = -EINVAL;
777f42
+			goto rollback;
777f42
+		}
777f42
+
777f42
+		ret = sqlite3_exec(dbh, (const char *)buf, NULL, NULL, &err;;
777f42
+		if (ret != SQLITE_OK) {
777f42
+			xlog(L_ERROR, "Unable to create table for current epoch: %s",
777f42
+				err);
777f42
+			goto rollback;
777f42
+		}
777f42
+	} else {
777f42
+		/* Server restarted while in grace - don't update the epoch
777f42
+		 * values in the grace table, just clear out the records for
777f42
+		 * the current reboot epoch.
777f42
+		 */
777f42
+		ret = snprintf(buf, sizeof(buf), "DELETE FROM \"rec-%016" PRIx64 "\";",
777f42
+				tcur);
777f42
+		if (ret < 0) {
777f42
+			xlog(L_ERROR, "sprintf failed!");
777f42
+			goto rollback;
777f42
+		} else if ((size_t)ret >= sizeof(buf)) {
777f42
+			xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
777f42
+			ret = -EINVAL;
777f42
+			goto rollback;
777f42
+		}
777f42
+
777f42
+		ret = sqlite3_exec(dbh, (const char *)buf, NULL, NULL, &err;;
777f42
+		if (ret != SQLITE_OK) {
777f42
+			xlog(L_ERROR, "Unable to clear table for current epoch: %s",
777f42
+				err);
777f42
+			goto rollback;
777f42
+		}
777f42
+	}
777f42
+
777f42
+	ret = sqlite3_exec(dbh, "COMMIT TRANSACTION;", NULL, NULL, &err;;
777f42
+	if (ret != SQLITE_OK) {
777f42
+		xlog(L_ERROR, "Unable to commit transaction: %s", err);
777f42
+		goto rollback;
777f42
+	}
777f42
+
777f42
+	current_epoch = tcur;
777f42
+	recovery_epoch = trec;
777f42
+	xlog(D_GENERAL, "%s: current_epoch=%"PRIu64" recovery_epoch=%"PRIu64,
777f42
+		__func__, current_epoch, recovery_epoch);
777f42
+
777f42
+out:
777f42
+	sqlite3_free(err);
777f42
+	return ret;
777f42
+rollback:
777f42
+	ret2 = sqlite3_exec(dbh, "ROLLBACK TRANSACTION;", NULL, NULL, &err;;
777f42
+	if (ret2 != SQLITE_OK)
777f42
+		xlog(L_ERROR, "Unable to rollback transaction: %s", err);
777f42
+	goto out;
777f42
+}
777f42
+
777f42
+int
777f42
+sqlite_grace_done(void)
777f42
+{
777f42
+	int ret, ret2;
777f42
+	char *err;
777f42
+
777f42
+	/* begin transaction */
777f42
+	ret = sqlite3_exec(dbh, "BEGIN EXCLUSIVE TRANSACTION;", NULL, NULL,
777f42
+				&err;;
777f42
+	if (ret != SQLITE_OK) {
777f42
+		xlog(L_ERROR, "Unable to begin transaction: %s", err);
777f42
+		goto rollback;
777f42
+	}
777f42
+
777f42
+	ret = sqlite3_exec(dbh, "UPDATE grace SET recovery = \"0\";",
777f42
+			NULL, NULL, &err;;
777f42
+	if (ret != SQLITE_OK) {
777f42
+		xlog(L_ERROR, "Unable to clear recovery epoch: %s", err);
777f42
+		goto rollback;
777f42
+	}
777f42
+
777f42
+	ret = snprintf(buf, sizeof(buf), "DROP TABLE \"rec-%016" PRIx64 "\";",
777f42
+		recovery_epoch);
777f42
+	if (ret < 0) {
777f42
+		xlog(L_ERROR, "sprintf failed!");
777f42
+		goto rollback;
777f42
+	} else if ((size_t)ret >= sizeof(buf)) {
777f42
+		xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
777f42
+		ret = -EINVAL;
777f42
+		goto rollback;
777f42
+	}
777f42
+
777f42
+	ret = sqlite3_exec(dbh, (const char *)buf, NULL, NULL, &err;;
777f42
+	if (ret != SQLITE_OK) {
777f42
+		xlog(L_ERROR, "Unable to drop table for recovery epoch: %s",
777f42
+			err);
777f42
+		goto rollback;
777f42
+	}
777f42
+
777f42
+	ret = sqlite3_exec(dbh, "COMMIT TRANSACTION;", NULL, NULL, &err;;
777f42
+	if (ret != SQLITE_OK) {
777f42
+		xlog(L_ERROR, "Unable to commit transaction: %s", err);
777f42
+		goto rollback;
777f42
+	}
777f42
+
777f42
+	recovery_epoch = 0;
777f42
+	xlog(D_GENERAL, "%s: current_epoch=%"PRIu64" recovery_epoch=%"PRIu64,
777f42
+		__func__, current_epoch, recovery_epoch);
777f42
+
777f42
+out:
777f42
+	sqlite3_free(err);
777f42
+	return ret;
777f42
+rollback:
777f42
+	ret2 = sqlite3_exec(dbh, "ROLLBACK TRANSACTION;", NULL, NULL, &err;;
777f42
+	if (ret2 != SQLITE_OK)
777f42
+		xlog(L_ERROR, "Unable to rollback transaction: %s", err);
777f42
+	goto out;
777f42
+}
777f42
+
777f42
+
777f42
+int
777f42
+sqlite_iterate_recovery(int (*cb)(struct cld_client *clnt), struct cld_client *clnt)
777f42
+{
777f42
+	int ret;
777f42
+	sqlite3_stmt *stmt = NULL;
777f42
+#if UPCALL_VERSION >= 2
777f42
+	struct cld_msg_v2 *cmsg = &clnt->cl_u.cl_msg_v2;
777f42
+#else
777f42
+	struct cld_msg *cmsg = &clnt->cl_u.cl_msg;
777f42
+#endif
777f42
+
777f42
+	if (recovery_epoch == 0) {
777f42
+		xlog(D_GENERAL, "%s: not in grace!", __func__);
777f42
+		return -EINVAL;
777f42
+	}
777f42
+
777f42
+	ret = snprintf(buf, sizeof(buf), "SELECT * FROM \"rec-%016" PRIx64 "\";",
777f42
+		recovery_epoch);
777f42
+	if (ret < 0) {
777f42
+		xlog(L_ERROR, "sprintf failed!");
777f42
+		return ret;
777f42
+	} else if ((size_t)ret >= sizeof(buf)) {
777f42
+		xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
777f42
+		return -EINVAL;
777f42
+	}
777f42
+
777f42
+	ret = sqlite3_prepare_v2(dbh, buf, -1, &stmt, NULL);
777f42
+	if (ret != SQLITE_OK) {
777f42
+		xlog(L_ERROR, "%s: select statement prepare failed: %s",
777f42
+			__func__, sqlite3_errmsg(dbh));
777f42
+		return ret;
777f42
+	}
777f42
+
777f42
+	while ((ret = sqlite3_step(stmt)) == SQLITE_ROW) {
777f42
+		memset(&cmsg->cm_u, 0, sizeof(cmsg->cm_u));
777f42
+#if UPCALL_VERSION >= 2
777f42
+		memcpy(&cmsg->cm_u.cm_clntinfo.cc_name.cn_id,
777f42
+			sqlite3_column_blob(stmt, 0), NFS4_OPAQUE_LIMIT);
777f42
+		cmsg->cm_u.cm_clntinfo.cc_name.cn_len = sqlite3_column_bytes(stmt, 0);
777f42
+		if (sqlite3_column_bytes(stmt, 1) > 0) {
777f42
+			memcpy(&cmsg->cm_u.cm_clntinfo.cc_princhash.cp_data,
777f42
+				sqlite3_column_blob(stmt, 1), SHA256_DIGEST_SIZE);
777f42
+			cmsg->cm_u.cm_clntinfo.cc_princhash.cp_len = sqlite3_column_bytes(stmt, 1);
777f42
+		}
777f42
+#else
777f42
+		memcpy(&cmsg->cm_u.cm_name.cn_id, sqlite3_column_blob(stmt, 0),
777f42
+			NFS4_OPAQUE_LIMIT);
777f42
+		cmsg->cm_u.cm_name.cn_len = sqlite3_column_bytes(stmt, 0);
777f42
+#endif
777f42
+		cb(clnt);
777f42
+	}
777f42
+	if (ret == SQLITE_DONE)
777f42
+		ret = 0;
777f42
+	sqlite3_finalize(stmt);
777f42
+	return ret;
777f42
+}
777f42
+
777f42
+/*
777f42
+ * Cleans out the old nfsdcltrack database.
777f42
+ *
777f42
+ * Called upon receipt of the first "GraceDone" upcall only.
777f42
+ */
777f42
+int
777f42
+sqlite_delete_cltrack_records(void)
777f42
+{
777f42
+	int ret;
777f42
+	char *s;
777f42
+	char *err = NULL;
777f42
+
777f42
+	s = conf_get_str("nfsdcltrack", "storagedir");
777f42
+	if (s)
777f42
+		cltrack_storagedir = s;
777f42
+	ret = sqlite_attach_db(cltrack_storagedir);
777f42
+	if (ret)
777f42
+		goto out;
777f42
+	ret = sqlite3_exec(dbh, "DELETE FROM attached.clients;",
777f42
+				NULL, NULL, &err;;
777f42
+	if (ret != SQLITE_OK) {
777f42
+		xlog(L_ERROR, "Unable to clear records from cltrack db: %s",
777f42
+				err);
777f42
+	}
777f42
+	sqlite_detach_db();
777f42
+out:
777f42
+	sqlite3_free(err);
777f42
+	return ret;
777f42
+}
777f42
+
777f42
+/*
777f42
+ * Sets first_time to 0 in the parameters table to ensure we only
777f42
+ * copy old client tracking records into the database one time.
777f42
+ *
777f42
+ * Called upon receipt of the first "GraceDone" upcall only.
777f42
+ */
777f42
+int
777f42
+sqlite_first_time_done(void)
777f42
+{
777f42
+	int ret;
777f42
+	char *err = NULL;
777f42
+
777f42
+	ret = sqlite3_exec(dbh, "UPDATE parameters SET value = \"0\" "
777f42
+				"WHERE key = \"first_time\";",
777f42
+				NULL, NULL, &err;;
777f42
+	if (ret != SQLITE_OK)
777f42
+		xlog(L_ERROR, "Unable to clear first_time: %s", err);
777f42
+
777f42
+	sqlite3_free(err);
777f42
+	return ret;
777f42
+}
777f42
diff --git a/utils/nfsdcld/sqlite.h b/utils/nfsdcld/sqlite.h
777f42
new file mode 100644
777f42
index 00000000..0a26ad67
777f42
--- /dev/null
777f42
+++ b/utils/nfsdcld/sqlite.h
777f42
@@ -0,0 +1,37 @@
777f42
+/*
777f42
+ * Copyright (C) 2011  Red Hat, Jeff Layton <jlayton@redhat.com>
777f42
+ *
777f42
+ * This program is free software; you can redistribute it and/or
777f42
+ * modify it under the terms of the GNU General Public License
777f42
+ * as published by the Free Software Foundation; either version 2
777f42
+ * of the License, or (at your option) any later version.
777f42
+ *
777f42
+ * This program is distributed in the hope that it will be useful,
777f42
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
777f42
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
777f42
+ * GNU General Public License for more details.
777f42
+ *
777f42
+ * You should have received a copy of the GNU General Public License
777f42
+ * along with this program; if not, write to the Free Software
777f42
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor,
777f42
+ * Boston, MA 02110-1301, USA.
777f42
+ */
777f42
+
777f42
+#ifndef _SQLITE_H_
777f42
+#define _SQLITE_H_
777f42
+
777f42
+struct cld_client;
777f42
+
777f42
+int sqlite_prepare_dbh(const char *topdir);
777f42
+int sqlite_insert_client(const unsigned char *clname, const size_t namelen);
777f42
+int sqlite_insert_client_and_princhash(const unsigned char *clname, const size_t namelen,
777f42
+		const unsigned char *clprinchash, const size_t princhashlen);
777f42
+int sqlite_remove_client(const unsigned char *clname, const size_t namelen);
777f42
+int sqlite_check_client(const unsigned char *clname, const size_t namelen);
777f42
+int sqlite_grace_start(void);
777f42
+int sqlite_grace_done(void);
777f42
+int sqlite_iterate_recovery(int (*cb)(struct cld_client *clnt), struct cld_client *clnt);
777f42
+int sqlite_delete_cltrack_records(void);
777f42
+int sqlite_first_time_done(void);
777f42
+
777f42
+#endif /* _SQLITE_H */
777f42
diff --git a/utils/nfsidmap/nfsidmap.c b/utils/nfsidmap/nfsidmap.c
777f42
index d3967a3a..4d219ef5 100644
777f42
--- a/utils/nfsidmap/nfsidmap.c
777f42
+++ b/utils/nfsidmap/nfsidmap.c
777f42
@@ -18,7 +18,7 @@
777f42
 #include "xcommon.h"
777f42
 
777f42
 int verbose = 0;
777f42
-char *usage = "Usage: %s [-vh] [-c || [-u|-g|-r key] || -d || -l || [-t timeout] key desc]";
777f42
+#define USAGE "Usage: %s [-vh] [-c || [-u|-g|-r key] || -d || -l || [-t timeout] key desc]"
777f42
 
777f42
 #define MAX_ID_LEN   11
777f42
 #define IDMAP_NAMESZ 128
777f42
@@ -401,7 +401,7 @@ int main(int argc, char **argv)
777f42
 			break;
777f42
 		case 'h':
777f42
 		default:
777f42
-			xlog_warn(usage, progname);
777f42
+			xlog_warn(USAGE, progname);
777f42
 			exit(opt == 'h' ? 0 : 1);
777f42
 		}
777f42
 	}
777f42
@@ -433,7 +433,7 @@ int main(int argc, char **argv)
777f42
 	xlog_stderr(verbose);
777f42
 	if ((argc - optind) != 2) {
777f42
 		xlog_warn("Bad arg count. Check /etc/request-key.conf");
777f42
-		xlog_warn(usage, progname);
777f42
+		xlog_warn(USAGE, progname);
777f42
 		return EXIT_FAILURE;
777f42
 	}
777f42
 
777f42
@@ -451,7 +451,7 @@ int main(int argc, char **argv)
777f42
 		return EXIT_FAILURE;
777f42
 	}
777f42
 	if (verbose) {
777f42
-		xlog_warn("key: 0x%lx type: %s value: %s timeout %ld",
777f42
+		xlog_warn("key: 0x%x type: %s value: %s timeout %d",
777f42
 			key, type, value, timeout);
777f42
 	}
777f42
 
777f42
diff --git a/utils/statd/rmtcall.c b/utils/statd/rmtcall.c
777f42
index c4f6364f..5b261480 100644
777f42
--- a/utils/statd/rmtcall.c
777f42
+++ b/utils/statd/rmtcall.c
777f42
@@ -247,7 +247,7 @@ process_reply(FD_SET_TYPE *rfds)
777f42
 		xlog_warn("%s: service %d not registered on localhost",
777f42
 			__func__, NL_MY_PROG(lp));
777f42
 	} else {
777f42
-		xlog(D_GENERAL, "%s: Callback to %s (for %d) succeeded",
777f42
+		xlog(D_GENERAL, "%s: Callback to %s (for %s) succeeded",
777f42
 			__func__, NL_MY_NAME(lp), NL_MON_NAME(lp));
777f42
 	}
777f42
 	nlist_free(&notify, lp);
777f42
diff --git a/utils/statd/statd.c b/utils/statd/statd.c
777f42
index 14673800..8eef2ff2 100644
777f42
--- a/utils/statd/statd.c
777f42
+++ b/utils/statd/statd.c
777f42
@@ -136,7 +136,7 @@ static void log_modes(void)
777f42
 	strcat(buf, "TI-RPC ");
777f42
 #endif
777f42
 
777f42
-	xlog_warn(buf);
777f42
+	xlog_warn("%s", buf);
777f42
 }
777f42
 
777f42
 /*
777f42
diff --git a/utils/statd/svc_run.c b/utils/statd/svc_run.c
777f42
index d1dbd74a..e343c768 100644
777f42
--- a/utils/statd/svc_run.c
777f42
+++ b/utils/statd/svc_run.c
777f42
@@ -53,6 +53,7 @@
777f42
 
777f42
 #include <errno.h>
777f42
 #include <time.h>
777f42
+#include <inttypes.h>
777f42
 #include "statd.h"
777f42
 #include "notlist.h"
777f42
 
777f42
@@ -104,8 +105,8 @@ my_svc_run(int sockfd)
777f42
 
777f42
 			tv.tv_sec  = NL_WHEN(notify) - now;
777f42
 			tv.tv_usec = 0;
777f42
-			xlog(D_GENERAL, "Waiting for reply... (timeo %d)",
777f42
-							tv.tv_sec);
777f42
+			xlog(D_GENERAL, "Waiting for reply... (timeo %jd)",
777f42
+							(intmax_t)tv.tv_sec);
777f42
 			selret = select(FD_SETSIZE, &readfds,
777f42
 				(void *) 0, (void *) 0, &tv;;
777f42
 		} else {