Blame SOURCES/nfs-utils-2.3.3-nfsdcld-upstream-update.patch

cd1e0c
diff --git a/.gitignore b/.gitignore
cd1e0c
index e91e7a25..e97b31f5 100644
cd1e0c
--- a/.gitignore
cd1e0c
+++ b/.gitignore
cd1e0c
@@ -54,6 +54,7 @@ utils/rquotad/rquotad
cd1e0c
 utils/rquotad/rquota.h
cd1e0c
 utils/rquotad/rquota_xdr.c
cd1e0c
 utils/showmount/showmount
cd1e0c
+utils/nfsdcld/nfsdcld
cd1e0c
 utils/nfsdcltrack/nfsdcltrack
cd1e0c
 utils/statd/statd
cd1e0c
 tools/locktest/testlk
cd1e0c
diff --git a/aclocal/ax_gcc_func_attribute.m4 b/aclocal/ax_gcc_func_attribute.m4
cd1e0c
new file mode 100644
cd1e0c
index 00000000..098c9aad
cd1e0c
--- /dev/null
cd1e0c
+++ b/aclocal/ax_gcc_func_attribute.m4
cd1e0c
@@ -0,0 +1,238 @@
cd1e0c
+# ===========================================================================
cd1e0c
+#  https://www.gnu.org/software/autoconf-archive/ax_gcc_func_attribute.html
cd1e0c
+# ===========================================================================
cd1e0c
+#
cd1e0c
+# SYNOPSIS
cd1e0c
+#
cd1e0c
+#   AX_GCC_FUNC_ATTRIBUTE(ATTRIBUTE)
cd1e0c
+#
cd1e0c
+# DESCRIPTION
cd1e0c
+#
cd1e0c
+#   This macro checks if the compiler supports one of GCC's function
cd1e0c
+#   attributes; many other compilers also provide function attributes with
cd1e0c
+#   the same syntax. Compiler warnings are used to detect supported
cd1e0c
+#   attributes as unsupported ones are ignored by default so quieting
cd1e0c
+#   warnings when using this macro will yield false positives.
cd1e0c
+#
cd1e0c
+#   The ATTRIBUTE parameter holds the name of the attribute to be checked.
cd1e0c
+#
cd1e0c
+#   If ATTRIBUTE is supported define HAVE_FUNC_ATTRIBUTE_<ATTRIBUTE>.
cd1e0c
+#
cd1e0c
+#   The macro caches its result in the ax_cv_have_func_attribute_<attribute>
cd1e0c
+#   variable.
cd1e0c
+#
cd1e0c
+#   The macro currently supports the following function attributes:
cd1e0c
+#
cd1e0c
+#    alias
cd1e0c
+#    aligned
cd1e0c
+#    alloc_size
cd1e0c
+#    always_inline
cd1e0c
+#    artificial
cd1e0c
+#    cold
cd1e0c
+#    const
cd1e0c
+#    constructor
cd1e0c
+#    constructor_priority for constructor attribute with priority
cd1e0c
+#    deprecated
cd1e0c
+#    destructor
cd1e0c
+#    dllexport
cd1e0c
+#    dllimport
cd1e0c
+#    error
cd1e0c
+#    externally_visible
cd1e0c
+#    fallthrough
cd1e0c
+#    flatten
cd1e0c
+#    format
cd1e0c
+#    format_arg
cd1e0c
+#    gnu_inline
cd1e0c
+#    hot
cd1e0c
+#    ifunc
cd1e0c
+#    leaf
cd1e0c
+#    malloc
cd1e0c
+#    noclone
cd1e0c
+#    noinline
cd1e0c
+#    nonnull
cd1e0c
+#    noreturn
cd1e0c
+#    nothrow
cd1e0c
+#    optimize
cd1e0c
+#    pure
cd1e0c
+#    sentinel
cd1e0c
+#    sentinel_position
cd1e0c
+#    unused
cd1e0c
+#    used
cd1e0c
+#    visibility
cd1e0c
+#    warning
cd1e0c
+#    warn_unused_result
cd1e0c
+#    weak
cd1e0c
+#    weakref
cd1e0c
+#
cd1e0c
+#   Unsupported function attributes will be tested with a prototype
cd1e0c
+#   returning an int and not accepting any arguments and the result of the
cd1e0c
+#   check might be wrong or meaningless so use with care.
cd1e0c
+#
cd1e0c
+# LICENSE
cd1e0c
+#
cd1e0c
+#   Copyright (c) 2013 Gabriele Svelto <gabriele.svelto@gmail.com>
cd1e0c
+#
cd1e0c
+#   Copying and distribution of this file, with or without modification, are
cd1e0c
+#   permitted in any medium without royalty provided the copyright notice
cd1e0c
+#   and this notice are preserved.  This file is offered as-is, without any
cd1e0c
+#   warranty.
cd1e0c
+
cd1e0c
+#serial 9
cd1e0c
+
cd1e0c
+AC_DEFUN([AX_GCC_FUNC_ATTRIBUTE], [
cd1e0c
+    AS_VAR_PUSHDEF([ac_var], [ax_cv_have_func_attribute_$1])
cd1e0c
+
cd1e0c
+    AC_CACHE_CHECK([for __attribute__(($1))], [ac_var], [
cd1e0c
+        AC_LINK_IFELSE([AC_LANG_PROGRAM([
cd1e0c
+            m4_case([$1],
cd1e0c
+                [alias], [
cd1e0c
+                    int foo( void ) { return 0; }
cd1e0c
+                    int bar( void ) __attribute__(($1("foo")));
cd1e0c
+                ],
cd1e0c
+                [aligned], [
cd1e0c
+                    int foo( void ) __attribute__(($1(32)));
cd1e0c
+                ],
cd1e0c
+                [alloc_size], [
cd1e0c
+                    void *foo(int a) __attribute__(($1(1)));
cd1e0c
+                ],
cd1e0c
+                [always_inline], [
cd1e0c
+                    inline __attribute__(($1)) int foo( void ) { return 0; }
cd1e0c
+                ],
cd1e0c
+                [artificial], [
cd1e0c
+                    inline __attribute__(($1)) int foo( void ) { return 0; }
cd1e0c
+                ],
cd1e0c
+                [cold], [
cd1e0c
+                    int foo( void ) __attribute__(($1));
cd1e0c
+                ],
cd1e0c
+                [const], [
cd1e0c
+                    int foo( void ) __attribute__(($1));
cd1e0c
+                ],
cd1e0c
+                [constructor_priority], [
cd1e0c
+                    int foo( void ) __attribute__((__constructor__(65535/2)));
cd1e0c
+                ],
cd1e0c
+                [constructor], [
cd1e0c
+                    int foo( void ) __attribute__(($1));
cd1e0c
+                ],
cd1e0c
+                [deprecated], [
cd1e0c
+                    int foo( void ) __attribute__(($1("")));
cd1e0c
+                ],
cd1e0c
+                [destructor], [
cd1e0c
+                    int foo( void ) __attribute__(($1));
cd1e0c
+                ],
cd1e0c
+                [dllexport], [
cd1e0c
+                    __attribute__(($1)) int foo( void ) { return 0; }
cd1e0c
+                ],
cd1e0c
+                [dllimport], [
cd1e0c
+                    int foo( void ) __attribute__(($1));
cd1e0c
+                ],
cd1e0c
+                [error], [
cd1e0c
+                    int foo( void ) __attribute__(($1("")));
cd1e0c
+                ],
cd1e0c
+                [externally_visible], [
cd1e0c
+                    int foo( void ) __attribute__(($1));
cd1e0c
+                ],
cd1e0c
+                [fallthrough], [
cd1e0c
+                    int foo( void ) {switch (0) { case 1: __attribute__(($1)); case 2: break ; }};
cd1e0c
+                ],
cd1e0c
+                [flatten], [
cd1e0c
+                    int foo( void ) __attribute__(($1));
cd1e0c
+                ],
cd1e0c
+                [format], [
cd1e0c
+                    int foo(const char *p, ...) __attribute__(($1(printf, 1, 2)));
cd1e0c
+                ],
cd1e0c
+                [format_arg], [
cd1e0c
+                    char *foo(const char *p) __attribute__(($1(1)));
cd1e0c
+                ],
cd1e0c
+                [gnu_inline], [
cd1e0c
+                    inline __attribute__(($1)) int foo( void ) { return 0; }
cd1e0c
+                ],
cd1e0c
+                [hot], [
cd1e0c
+                    int foo( void ) __attribute__(($1));
cd1e0c
+                ],
cd1e0c
+                [ifunc], [
cd1e0c
+                    int my_foo( void ) { return 0; }
cd1e0c
+                    static int (*resolve_foo(void))(void) { return my_foo; }
cd1e0c
+                    int foo( void ) __attribute__(($1("resolve_foo")));
cd1e0c
+                ],
cd1e0c
+                [leaf], [
cd1e0c
+                    __attribute__(($1)) int foo( void ) { return 0; }
cd1e0c
+                ],
cd1e0c
+                [malloc], [
cd1e0c
+                    void *foo( void ) __attribute__(($1));
cd1e0c
+                ],
cd1e0c
+                [noclone], [
cd1e0c
+                    int foo( void ) __attribute__(($1));
cd1e0c
+                ],
cd1e0c
+                [noinline], [
cd1e0c
+                    __attribute__(($1)) int foo( void ) { return 0; }
cd1e0c
+                ],
cd1e0c
+                [nonnull], [
cd1e0c
+                    int foo(char *p) __attribute__(($1(1)));
cd1e0c
+                ],
cd1e0c
+                [noreturn], [
cd1e0c
+                    void foo( void ) __attribute__(($1));
cd1e0c
+                ],
cd1e0c
+                [nothrow], [
cd1e0c
+                    int foo( void ) __attribute__(($1));
cd1e0c
+                ],
cd1e0c
+                [optimize], [
cd1e0c
+                    __attribute__(($1(3))) int foo( void ) { return 0; }
cd1e0c
+                ],
cd1e0c
+                [pure], [
cd1e0c
+                    int foo( void ) __attribute__(($1));
cd1e0c
+                ],
cd1e0c
+                [sentinel], [
cd1e0c
+                    int foo(void *p, ...) __attribute__(($1));
cd1e0c
+                ],
cd1e0c
+                [sentinel_position], [
cd1e0c
+                    int foo(void *p, ...) __attribute__(($1(1)));
cd1e0c
+                ],
cd1e0c
+                [returns_nonnull], [
cd1e0c
+                    void *foo( void ) __attribute__(($1));
cd1e0c
+                ],
cd1e0c
+                [unused], [
cd1e0c
+                    int foo( void ) __attribute__(($1));
cd1e0c
+                ],
cd1e0c
+                [used], [
cd1e0c
+                    int foo( void ) __attribute__(($1));
cd1e0c
+                ],
cd1e0c
+                [visibility], [
cd1e0c
+                    int foo_def( void ) __attribute__(($1("default")));
cd1e0c
+                    int foo_hid( void ) __attribute__(($1("hidden")));
cd1e0c
+                    int foo_int( void ) __attribute__(($1("internal")));
cd1e0c
+                    int foo_pro( void ) __attribute__(($1("protected")));
cd1e0c
+                ],
cd1e0c
+                [warning], [
cd1e0c
+                    int foo( void ) __attribute__(($1("")));
cd1e0c
+                ],
cd1e0c
+                [warn_unused_result], [
cd1e0c
+                    int foo( void ) __attribute__(($1));
cd1e0c
+                ],
cd1e0c
+                [weak], [
cd1e0c
+                    int foo( void ) __attribute__(($1));
cd1e0c
+                ],
cd1e0c
+                [weakref], [
cd1e0c
+                    static int foo( void ) { return 0; }
cd1e0c
+                    static int bar( void ) __attribute__(($1("foo")));
cd1e0c
+                ],
cd1e0c
+                [
cd1e0c
+                 m4_warn([syntax], [Unsupported attribute $1, the test may fail])
cd1e0c
+                 int foo( void ) __attribute__(($1));
cd1e0c
+                ]
cd1e0c
+            )], [])
cd1e0c
+            ],
cd1e0c
+            dnl GCC doesn't exit with an error if an unknown attribute is
cd1e0c
+            dnl provided but only outputs a warning, so accept the attribute
cd1e0c
+            dnl only if no warning were issued.
cd1e0c
+            [AS_IF([test -s conftest.err],
cd1e0c
+                [AS_VAR_SET([ac_var], [no])],
cd1e0c
+                [AS_VAR_SET([ac_var], [yes])])],
cd1e0c
+            [AS_VAR_SET([ac_var], [no])])
cd1e0c
+    ])
cd1e0c
+
cd1e0c
+    AS_IF([test yes = AS_VAR_GET([ac_var])],
cd1e0c
+        [AC_DEFINE_UNQUOTED(AS_TR_CPP(HAVE_FUNC_ATTRIBUTE_$1), 1,
cd1e0c
+            [Define to 1 if the system has the `$1' function attribute])], [])
cd1e0c
+
cd1e0c
+    AS_VAR_POPDEF([ac_var])
cd1e0c
+])
cd1e0c
diff --git a/configure.ac b/configure.ac
cd1e0c
index 48eb9eb6..13ea957f 100644
cd1e0c
--- a/configure.ac
cd1e0c
+++ b/configure.ac
cd1e0c
@@ -238,6 +238,12 @@ else
cd1e0c
 	AM_CONDITIONAL(MOUNT_CONFIG, [test "$enable_mount" = "yes"])
cd1e0c
 fi
cd1e0c
 
cd1e0c
+AC_ARG_ENABLE(nfsdcld,
cd1e0c
+	[AC_HELP_STRING([--disable-nfsdcld],
cd1e0c
+			[disable NFSv4 clientid tracking daemon @<:@default=no@:>@])],
cd1e0c
+	enable_nfsdcld=$enableval,
cd1e0c
+	enable_nfsdcld="yes")
cd1e0c
+
cd1e0c
 AC_ARG_ENABLE(nfsdcltrack,
cd1e0c
 	[AC_HELP_STRING([--disable-nfsdcltrack],
cd1e0c
 			[disable NFSv4 clientid tracking programs @<:@default=no@:>@])],
cd1e0c
@@ -317,6 +323,20 @@ if test "$enable_nfsv4" = yes; then
cd1e0c
   dnl Check for sqlite3
cd1e0c
   AC_SQLITE3_VERS
cd1e0c
 
cd1e0c
+  if test "$enable_nfsdcld" = "yes"; then
cd1e0c
+	AC_CHECK_HEADERS([libgen.h sys/inotify.h], ,
cd1e0c
+		AC_MSG_ERROR([Cannot find header needed for nfsdcld]))
cd1e0c
+
cd1e0c
+    case $libsqlite3_cv_is_recent in
cd1e0c
+    yes) ;;
cd1e0c
+    unknown)
cd1e0c
+      dnl do not fail when cross-compiling
cd1e0c
+      AC_MSG_WARN([assuming sqlite is at least v3.3]) ;;
cd1e0c
+    *)
cd1e0c
+      AC_MSG_ERROR([nfsdcld requires sqlite-devel]) ;;
cd1e0c
+    esac
cd1e0c
+  fi
cd1e0c
+
cd1e0c
   if test "$enable_nfsdcltrack" = "yes"; then
cd1e0c
 	AC_CHECK_HEADERS([libgen.h sys/inotify.h], ,
cd1e0c
 		AC_MSG_ERROR([Cannot find header needed for nfsdcltrack]))
cd1e0c
@@ -332,6 +352,7 @@ if test "$enable_nfsv4" = yes; then
cd1e0c
   fi
cd1e0c
 
cd1e0c
 else
cd1e0c
+  enable_nfsdcld="no"
cd1e0c
   enable_nfsdcltrack="no"
cd1e0c
 fi
cd1e0c
 
cd1e0c
@@ -342,6 +363,7 @@ if test "$enable_nfsv41" = yes; then
cd1e0c
 fi
cd1e0c
 
cd1e0c
 dnl enable nfsidmap when its support by libnfsidmap
cd1e0c
+AM_CONDITIONAL(CONFIG_NFSDCLD, [test "$enable_nfsdcld" = "yes" ])
cd1e0c
 AM_CONDITIONAL(CONFIG_NFSDCLTRACK, [test "$enable_nfsdcltrack" = "yes" ])
cd1e0c
 
cd1e0c
 
cd1e0c
@@ -581,6 +603,7 @@ CHECK_CCSUPPORT([-Werror=format-overflow=2], [flg1])
cd1e0c
 CHECK_CCSUPPORT([-Werror=int-conversion], [flg2])
cd1e0c
 CHECK_CCSUPPORT([-Werror=incompatible-pointer-types], [flg3])
cd1e0c
 CHECK_CCSUPPORT([-Werror=misleading-indentation], [flg4])
cd1e0c
+AX_GCC_FUNC_ATTRIBUTE([format])
cd1e0c
 
cd1e0c
 AC_SUBST([AM_CFLAGS], ["$my_am_cflags $flg1 $flg2 $flg3 $flg4"])
cd1e0c
 
cd1e0c
@@ -617,8 +640,10 @@ AC_CONFIG_FILES([
cd1e0c
 	tools/mountstats/Makefile
cd1e0c
 	tools/nfs-iostat/Makefile
cd1e0c
 	tools/nfsconf/Makefile
cd1e0c
+	tools/clddb-tool/Makefile
cd1e0c
 	utils/Makefile
cd1e0c
 	utils/blkmapd/Makefile
cd1e0c
+	utils/nfsdcld/Makefile
cd1e0c
 	utils/nfsdcltrack/Makefile
cd1e0c
 	utils/exportfs/Makefile
cd1e0c
 	utils/gssd/Makefile
cd1e0c
diff --git a/nfs.conf b/nfs.conf
cd1e0c
index d48a4e55..56172c49 100644
cd1e0c
--- a/nfs.conf
cd1e0c
+++ b/nfs.conf
cd1e0c
@@ -36,6 +36,10 @@ use-gss-proxy=1
cd1e0c
 # state-directory-path=/var/lib/nfs
cd1e0c
 # ha-callout=
cd1e0c
 #
cd1e0c
+[nfsdcld]
cd1e0c
+# debug=0
cd1e0c
+# storagedir=/var/lib/nfs/nfsdcld
cd1e0c
+#
cd1e0c
 [nfsdcltrack]
cd1e0c
 # debug=0
cd1e0c
 # storagedir=/var/lib/nfs/nfsdcltrack
cd1e0c
diff --git a/support/include/cld.h b/support/include/cld.h
cd1e0c
index f14a9ab0..88d3b63e 100644
cd1e0c
--- a/support/include/cld.h
cd1e0c
+++ b/support/include/cld.h
cd1e0c
@@ -23,16 +23,22 @@
cd1e0c
 #define _NFSD_CLD_H
cd1e0c
 
cd1e0c
 /* latest upcall version available */
cd1e0c
-#define CLD_UPCALL_VERSION 1
cd1e0c
+#define CLD_UPCALL_VERSION 2
cd1e0c
 
cd1e0c
 /* defined by RFC3530 */
cd1e0c
 #define NFS4_OPAQUE_LIMIT 1024
cd1e0c
 
cd1e0c
+#ifndef SHA256_DIGEST_SIZE
cd1e0c
+#define SHA256_DIGEST_SIZE      32
cd1e0c
+#endif
cd1e0c
+
cd1e0c
 enum cld_command {
cd1e0c
 	Cld_Create,		/* create a record for this cm_id */
cd1e0c
 	Cld_Remove,		/* remove record of this cm_id */
cd1e0c
 	Cld_Check,		/* is this cm_id allowed? */
cd1e0c
 	Cld_GraceDone,		/* grace period is complete */
cd1e0c
+	Cld_GraceStart,		/* grace start (upload client records) */
cd1e0c
+	Cld_GetVersion,		/* query max supported upcall version */
cd1e0c
 };
cd1e0c
 
cd1e0c
 /* representation of long-form NFSv4 client ID */
cd1e0c
@@ -41,6 +47,17 @@ struct cld_name {
cd1e0c
 	unsigned char	cn_id[NFS4_OPAQUE_LIMIT];	/* client-provided */
cd1e0c
 } __attribute__((packed));
cd1e0c
 
cd1e0c
+/* sha256 hash of the kerberos principal */
cd1e0c
+struct cld_princhash {
cd1e0c
+	uint8_t		cp_len;				/* length of cp_data */
cd1e0c
+	unsigned char	cp_data[SHA256_DIGEST_SIZE];	/* hash of principal */
cd1e0c
+} __attribute__((packed));
cd1e0c
+
cd1e0c
+struct cld_clntinfo {
cd1e0c
+	struct cld_name		cc_name;
cd1e0c
+	struct cld_princhash	cc_princhash;
cd1e0c
+} __attribute__((packed));
cd1e0c
+
cd1e0c
 /* message struct for communication with userspace */
cd1e0c
 struct cld_msg {
cd1e0c
 	uint8_t		cm_vers;		/* upcall version */
cd1e0c
@@ -50,7 +67,28 @@ struct cld_msg {
cd1e0c
 	union {
cd1e0c
 		int64_t		cm_gracetime;	/* grace period start time */
cd1e0c
 		struct cld_name	cm_name;
cd1e0c
+		uint8_t		cm_version;	/* for getting max version */
cd1e0c
+	} __attribute__((packed)) cm_u;
cd1e0c
+} __attribute__((packed));
cd1e0c
+
cd1e0c
+/* version 2 message can include hash of kerberos principal */
cd1e0c
+struct cld_msg_v2 {
cd1e0c
+	uint8_t		cm_vers;		/* upcall version */
cd1e0c
+	uint8_t		cm_cmd;			/* upcall command */
cd1e0c
+	int16_t		cm_status;		/* return code */
cd1e0c
+	uint32_t	cm_xid;			/* transaction id */
cd1e0c
+	union {
cd1e0c
+		struct cld_name	cm_name;
cd1e0c
+		uint8_t		cm_version;	/* for getting max version */
cd1e0c
+		struct cld_clntinfo cm_clntinfo; /* name & princ hash */
cd1e0c
 	} __attribute__((packed)) cm_u;
cd1e0c
 } __attribute__((packed));
cd1e0c
 
cd1e0c
+struct cld_msg_hdr {
cd1e0c
+	uint8_t		cm_vers;		/* upcall version */
cd1e0c
+	uint8_t		cm_cmd;			/* upcall command */
cd1e0c
+	int16_t		cm_status;		/* return code */
cd1e0c
+	uint32_t	cm_xid;			/* transaction id */
cd1e0c
+} __attribute__((packed));
cd1e0c
+
cd1e0c
 #endif /* !_NFSD_CLD_H */
cd1e0c
diff --git a/support/include/xcommon.h b/support/include/xcommon.h
cd1e0c
index 23c9a135..30b0403b 100644
cd1e0c
--- a/support/include/xcommon.h
cd1e0c
+++ b/support/include/xcommon.h
cd1e0c
@@ -9,6 +9,10 @@
cd1e0c
 #ifndef _XMALLOC_H
cd1e0c
 #define _MALLOC_H
cd1e0c
 
cd1e0c
+#ifdef HAVE_CONFIG_H
cd1e0c
+#include <config.h>
cd1e0c
+#endif
cd1e0c
+
cd1e0c
 #include <sys/types.h>
cd1e0c
 #include <fcntl.h>
cd1e0c
 #include <limits.h>
cd1e0c
@@ -25,9 +29,15 @@
cd1e0c
 
cd1e0c
 #define streq(s, t)	(strcmp ((s), (t)) == 0)
cd1e0c
 
cd1e0c
-/* Functions in sundries.c that are used in mount.c and umount.c  */ 
cd1e0c
+#ifdef HAVE_FUNC_ATTRIBUTE_FORMAT
cd1e0c
+#define X_FORMAT(_x) __attribute__((__format__ _x))
cd1e0c
+#else
cd1e0c
+#define X_FORMAT(_x)
cd1e0c
+#endif
cd1e0c
+
cd1e0c
+/* Functions in sundries.c that are used in mount.c and umount.c  */
cd1e0c
 char *canonicalize (const char *path);
cd1e0c
-void nfs_error (const char *fmt, ...);
cd1e0c
+void nfs_error (const char *fmt, ...) X_FORMAT((printf, 1, 2));
cd1e0c
 void *xmalloc (size_t size);
cd1e0c
 void *xrealloc(void *p, size_t size);
cd1e0c
 void xfree(void *);
cd1e0c
@@ -36,9 +46,9 @@ char *xstrndup (const char *s, int n);
cd1e0c
 char *xstrconcat2 (const char *, const char *);
cd1e0c
 char *xstrconcat3 (const char *, const char *, const char *);
cd1e0c
 char *xstrconcat4 (const char *, const char *, const char *, const char *);
cd1e0c
-void die (int errcode, const char *fmt, ...);
cd1e0c
+void die (int errcode, const char *fmt, ...) X_FORMAT((printf, 2, 3));
cd1e0c
 
cd1e0c
-extern void die(int err, const char *fmt, ...);
cd1e0c
+extern void die(int err, const char *fmt, ...) X_FORMAT((printf, 2, 3));
cd1e0c
 extern void (*at_die)(void);
cd1e0c
 
cd1e0c
 /* exit status - bits below are ORed */
cd1e0c
diff --git a/support/include/xlog.h b/support/include/xlog.h
cd1e0c
index a11463ed..32ff5a1b 100644
cd1e0c
--- a/support/include/xlog.h
cd1e0c
+++ b/support/include/xlog.h
cd1e0c
@@ -7,6 +7,10 @@
cd1e0c
 #ifndef XLOG_H
cd1e0c
 #define XLOG_H
cd1e0c
 
cd1e0c
+#ifdef HAVE_CONFIG_H
cd1e0c
+#include <config.h>
cd1e0c
+#endif
cd1e0c
+
cd1e0c
 #include <stdarg.h>
cd1e0c
 
cd1e0c
 /* These are logged always. L_FATAL also does exit(1) */
cd1e0c
@@ -35,6 +39,12 @@ struct xlog_debugfac {
cd1e0c
 	int		df_fac;
cd1e0c
 };
cd1e0c
 
cd1e0c
+#ifdef HAVE_FUNC_ATTRIBUTE_FORMAT
cd1e0c
+#define XLOG_FORMAT(_x) __attribute__((__format__ _x))
cd1e0c
+#else
cd1e0c
+#define XLOG_FORMAT(_x)
cd1e0c
+#endif
cd1e0c
+
cd1e0c
 extern int export_errno;
cd1e0c
 void			xlog_open(char *progname);
cd1e0c
 void			xlog_stderr(int on);
cd1e0c
@@ -43,10 +53,10 @@ void			xlog_config(int fac, int on);
cd1e0c
 void			xlog_sconfig(char *, int on);
cd1e0c
 void			xlog_from_conffile(char *);
cd1e0c
 int			xlog_enabled(int fac);
cd1e0c
-void			xlog(int fac, const char *fmt, ...);
cd1e0c
-void			xlog_warn(const char *fmt, ...);
cd1e0c
-void			xlog_err(const char *fmt, ...);
cd1e0c
-void			xlog_errno(int err, const char *fmt, ...);
cd1e0c
-void			xlog_backend(int fac, const char *fmt, va_list args);
cd1e0c
+void			xlog(int fac, const char *fmt, ...) XLOG_FORMAT((printf, 2, 3));
cd1e0c
+void			xlog_warn(const char *fmt, ...) XLOG_FORMAT((printf, 1, 2));
cd1e0c
+void			xlog_err(const char *fmt, ...) XLOG_FORMAT((printf, 1, 2));
cd1e0c
+void			xlog_errno(int err, const char *fmt, ...) XLOG_FORMAT((printf, 2, 3));
cd1e0c
+void			xlog_backend(int fac, const char *fmt, va_list args) XLOG_FORMAT((printf, 2, 0));
cd1e0c
 
cd1e0c
 #endif /* XLOG_H */
cd1e0c
diff --git a/support/junction/junction.c b/support/junction/junction.c
cd1e0c
index ab6caa61..41cce261 100644
cd1e0c
--- a/support/junction/junction.c
cd1e0c
+++ b/support/junction/junction.c
cd1e0c
@@ -23,6 +23,10 @@
cd1e0c
  *	http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt
cd1e0c
  */
cd1e0c
 
cd1e0c
+#ifdef HAVE_CONFIG_H
cd1e0c
+#include <config.h>
cd1e0c
+#endif
cd1e0c
+
cd1e0c
 #include <sys/types.h>
cd1e0c
 #include <sys/stat.h>
cd1e0c
 
cd1e0c
diff --git a/support/misc/file.c b/support/misc/file.c
cd1e0c
index 4065376e..74973169 100644
cd1e0c
--- a/support/misc/file.c
cd1e0c
+++ b/support/misc/file.c
cd1e0c
@@ -18,6 +18,10 @@
cd1e0c
  * along with nfs-utils.  If not, see <http://www.gnu.org/licenses/>.
cd1e0c
  */
cd1e0c
 
cd1e0c
+#ifdef HAVE_CONFIG_H
cd1e0c
+#include <config.h>
cd1e0c
+#endif
cd1e0c
+
cd1e0c
 #include <sys/stat.h>
cd1e0c
 
cd1e0c
 #include <string.h>
cd1e0c
diff --git a/support/misc/mountpoint.c b/support/misc/mountpoint.c
cd1e0c
index 9f9ce44e..4205b41c 100644
cd1e0c
--- a/support/misc/mountpoint.c
cd1e0c
+++ b/support/misc/mountpoint.c
cd1e0c
@@ -3,6 +3,10 @@
cd1e0c
  * check if a given path is a mountpoint 
cd1e0c
  */
cd1e0c
 
cd1e0c
+#ifdef HAVE_CONFIG_H
cd1e0c
+#include <config.h>
cd1e0c
+#endif
cd1e0c
+
cd1e0c
 #include <string.h>
cd1e0c
 #include "xcommon.h"
cd1e0c
 #include <sys/stat.h>
cd1e0c
diff --git a/support/nfs/cacheio.c b/support/nfs/cacheio.c
cd1e0c
index 9dc4cf1c..7c4cf373 100644
cd1e0c
--- a/support/nfs/cacheio.c
cd1e0c
+++ b/support/nfs/cacheio.c
cd1e0c
@@ -15,6 +15,10 @@
cd1e0c
  *
cd1e0c
  */
cd1e0c
 
cd1e0c
+#ifdef HAVE_CONFIG_H
cd1e0c
+#include <config.h>
cd1e0c
+#endif
cd1e0c
+
cd1e0c
 #include <nfslib.h>
cd1e0c
 #include <stdio.h>
cd1e0c
 #include <stdio_ext.h>
cd1e0c
diff --git a/support/nfs/svc_create.c b/support/nfs/svc_create.c
cd1e0c
index ef7ff05f..7b595f89 100644
cd1e0c
--- a/support/nfs/svc_create.c
cd1e0c
+++ b/support/nfs/svc_create.c
cd1e0c
@@ -184,7 +184,7 @@ svc_create_sock(const struct sockaddr *sap, socklen_t salen,
cd1e0c
 		type = SOCK_STREAM;
cd1e0c
 		break;
cd1e0c
 	default:
cd1e0c
-		xlog(D_GENERAL, "%s: Unrecognized bind address semantics: %u",
cd1e0c
+		xlog(D_GENERAL, "%s: Unrecognized bind address semantics: %lu",
cd1e0c
 			__func__, nconf->nc_semantics);
cd1e0c
 		return -1;
cd1e0c
 	}
cd1e0c
diff --git a/support/nsm/rpc.c b/support/nsm/rpc.c
cd1e0c
index ae49006c..08b4746f 100644
cd1e0c
--- a/support/nsm/rpc.c
cd1e0c
+++ b/support/nsm/rpc.c
cd1e0c
@@ -182,7 +182,7 @@ nsm_xmit_getport(const int sock, const struct sockaddr_in *sin,
cd1e0c
 	uint32_t xid;
cd1e0c
 	XDR xdr;
cd1e0c
 
cd1e0c
-	xlog(D_CALL, "Sending PMAP_GETPORT for %u, %u, udp", program, version);
cd1e0c
+	xlog(D_CALL, "Sending PMAP_GETPORT for %lu, %lu, udp", program, version);
cd1e0c
 
cd1e0c
 	nsm_init_xdrmem(msgbuf, NSM_MAXMSGSIZE, &xdr;;
cd1e0c
 	xid = nsm_init_rpc_header(PMAPPROG, PMAPVERS,
cd1e0c
diff --git a/systemd/Makefile.am b/systemd/Makefile.am
cd1e0c
index d54518bc..53458c62 100644
cd1e0c
--- a/systemd/Makefile.am
cd1e0c
+++ b/systemd/Makefile.am
cd1e0c
@@ -36,6 +36,11 @@ unit_files += \
cd1e0c
 endif
cd1e0c
 endif
cd1e0c
 
cd1e0c
+if CONFIG_NFSDCLD
cd1e0c
+unit_files += \
cd1e0c
+    nfsdcld.service
cd1e0c
+endif
cd1e0c
+
cd1e0c
 man5_MANS	= nfs.conf.man
cd1e0c
 man7_MANS	= nfs.systemd.man
cd1e0c
 EXTRA_DIST = $(unit_files) $(man5_MANS) $(man7_MANS)
cd1e0c
diff --git a/systemd/nfs-server.service b/systemd/nfs-server.service
cd1e0c
index 136552b5..24118d69 100644
cd1e0c
--- a/systemd/nfs-server.service
cd1e0c
+++ b/systemd/nfs-server.service
cd1e0c
@@ -6,10 +6,12 @@ Requires= nfs-mountd.service
cd1e0c
 Wants=rpcbind.socket network-online.target
cd1e0c
 Wants=rpc-statd.service nfs-idmapd.service
cd1e0c
 Wants=rpc-statd-notify.service
cd1e0c
+Wants=nfsdcld.service
cd1e0c
 
cd1e0c
 After= network-online.target local-fs.target
cd1e0c
 After= proc-fs-nfsd.mount rpcbind.socket nfs-mountd.service
cd1e0c
 After= nfs-idmapd.service rpc-statd.service
cd1e0c
+After= nfsdcld.service
cd1e0c
 Before= rpc-statd-notify.service
cd1e0c
 
cd1e0c
 # GSS services dependencies and ordering
cd1e0c
diff --git a/systemd/nfsdcld.service b/systemd/nfsdcld.service
cd1e0c
new file mode 100644
cd1e0c
index 00000000..a32d2430
cd1e0c
--- /dev/null
cd1e0c
+++ b/systemd/nfsdcld.service
cd1e0c
@@ -0,0 +1,10 @@
cd1e0c
+[Unit]
cd1e0c
+Description=NFSv4 Client Tracking Daemon
cd1e0c
+DefaultDependencies=no
cd1e0c
+Conflicts=umount.target
cd1e0c
+Requires=rpc_pipefs.target proc-fs-nfsd.mount
cd1e0c
+After=rpc_pipefs.target proc-fs-nfsd.mount
cd1e0c
+
cd1e0c
+[Service]
cd1e0c
+Type=forking
cd1e0c
+ExecStart=/usr/sbin/nfsdcld
cd1e0c
diff --git a/tools/Makefile.am b/tools/Makefile.am
cd1e0c
index 4266da49..53e61170 100644
cd1e0c
--- a/tools/Makefile.am
cd1e0c
+++ b/tools/Makefile.am
cd1e0c
@@ -8,6 +8,10 @@ endif
cd1e0c
 
cd1e0c
 OPTDIRS += nfsconf
cd1e0c
 
cd1e0c
+if CONFIG_NFSDCLD
cd1e0c
+OPTDIRS += clddb-tool
cd1e0c
+endif
cd1e0c
+
cd1e0c
 SUBDIRS = locktest rpcdebug nlmtest mountstats nfs-iostat $(OPTDIRS)
cd1e0c
 
cd1e0c
 MAINTAINERCLEANFILES = Makefile.in
cd1e0c
diff --git a/tools/clddb-tool/Makefile.am b/tools/clddb-tool/Makefile.am
cd1e0c
new file mode 100644
cd1e0c
index 00000000..15a8fd47
cd1e0c
--- /dev/null
cd1e0c
+++ b/tools/clddb-tool/Makefile.am
cd1e0c
@@ -0,0 +1,13 @@
cd1e0c
+## Process this file with automake to produce Makefile.in
cd1e0c
+PYTHON_FILES =  clddb-tool.py
cd1e0c
+
cd1e0c
+man8_MANS	= clddb-tool.man
cd1e0c
+
cd1e0c
+EXTRA_DIST	= $(man8_MANS) $(PYTHON_FILES)
cd1e0c
+
cd1e0c
+all-local: $(PYTHON_FILES)
cd1e0c
+
cd1e0c
+install-data-hook:
cd1e0c
+	$(INSTALL) -m 755 clddb-tool.py $(DESTDIR)$(sbindir)/clddb-tool
cd1e0c
+
cd1e0c
+MAINTAINERCLEANFILES=Makefile.in
cd1e0c
diff --git a/tools/clddb-tool/clddb-tool.man b/tools/clddb-tool/clddb-tool.man
cd1e0c
new file mode 100644
cd1e0c
index 00000000..e80b2c05
cd1e0c
--- /dev/null
cd1e0c
+++ b/tools/clddb-tool/clddb-tool.man
cd1e0c
@@ -0,0 +1,83 @@
cd1e0c
+.\"
cd1e0c
+.\" clddb-tool(8)
cd1e0c
+.\"
cd1e0c
+.TH clddb-tool 8 "07 Aug 2019"
cd1e0c
+.SH NAME
cd1e0c
+clddb-tool \- Tool for manipulating the nfsdcld sqlite database
cd1e0c
+.SH SYNOPSIS
cd1e0c
+.B clddb-tool
cd1e0c
+.RB [ \-h | \-\-help ]
cd1e0c
+.P
cd1e0c
+.B clddb-tool
cd1e0c
+.RB [ \-p | \-\-path
cd1e0c
+.IR dbpath ]
cd1e0c
+.B fix-table-names
cd1e0c
+.RB [ \-h | \-\-help ]
cd1e0c
+.P
cd1e0c
+.B clddb-tool
cd1e0c
+.RB [ \-p | \-\-path
cd1e0c
+.IR dbpath ]
cd1e0c
+.B downgrade-schema
cd1e0c
+.RB [ \-h | \-\-help ]
cd1e0c
+.RB [ \-v | \-\-version
cd1e0c
+.IR to-version ]
cd1e0c
+.P
cd1e0c
+.B clddb-tool
cd1e0c
+.RB [ \-p | \-\-path
cd1e0c
+.IR dbpath ]
cd1e0c
+.B print
cd1e0c
+.RB [ \-h | \-\-help ]
cd1e0c
+.RB [ \-s | \-\-summary ]
cd1e0c
+.P
cd1e0c
+
cd1e0c
+.SH DESCRIPTION
cd1e0c
+.RB "The " clddb-tool " command is provided to perform some manipulation of the nfsdcld sqlite database schema and to print the contents of the database."
cd1e0c
+.SS Sub-commands
cd1e0c
+Valid
cd1e0c
+.B clddb-tool
cd1e0c
+subcommands are:
cd1e0c
+.IP "\fBfix-table-names\fP"
cd1e0c
+.RB "A previous version of " nfsdcld "(8) contained a bug that corrupted the reboot epoch table names.  This sub-command will fix those table names."
cd1e0c
+.IP "\fBdowngrade-schema\fP"
cd1e0c
+Downgrade the database schema.  Currently the schema can only to downgraded from version 4 to version 3.
cd1e0c
+.IP "\fBprint\fP"
cd1e0c
+Display the contents of the database.  Prints the schema version and the values of the current and recovery epochs.  If the
cd1e0c
+.BR \-s | \-\-summary
cd1e0c
+option is not given, also prints the clients in the reboot epoch tables.
cd1e0c
+.SH OPTIONS
cd1e0c
+.SS Options valid for all sub-commands
cd1e0c
+.TP
cd1e0c
+.B \-h, \-\-help
cd1e0c
+Show the help message and exit
cd1e0c
+.TP
cd1e0c
+\fB\-p \fIdbpath\fR, \fB\-\-path \fIdbpath\fR
cd1e0c
+Open the sqlite database located at
cd1e0c
+.I dbpath
cd1e0c
+instead of
cd1e0c
+.IR /var/lib/nfs/nfsdcld/main.sqlite ".  "
cd1e0c
+This is mainly for testing purposes.
cd1e0c
+.SS Options specific to the downgrade-schema sub-command
cd1e0c
+.TP
cd1e0c
+\fB\-v \fIto-version\fR, \fB\-\-version \fIto-version\fR
cd1e0c
+The schema version to downgrade to.  Currently the schema can only be downgraded to version 3.
cd1e0c
+.SS Options specific to the print sub-command
cd1e0c
+.TP
cd1e0c
+.B \-s, \-\-summary
cd1e0c
+Do not list the clients in the reboot epoch tables in the output.
cd1e0c
+.SH NOTES
cd1e0c
+The
cd1e0c
+.B clddb-tool
cd1e0c
+command will not allow the
cd1e0c
+.B fix-table-names
cd1e0c
+or
cd1e0c
+.B downgrade-schema
cd1e0c
+subcommands to be used if
cd1e0c
+.BR nfsdcld (8)
cd1e0c
+is running.
cd1e0c
+.SH FILES
cd1e0c
+.TP
cd1e0c
+.B /var/lib/nfs/nfsdcld/main.sqlite
cd1e0c
+.SH SEE ALSO
cd1e0c
+.BR nfsdcld (8)
cd1e0c
+.SH AUTHOR
cd1e0c
+Scott Mayhew <smayhew@redhat.com>
cd1e0c
diff --git a/tools/clddb-tool/clddb-tool.py b/tools/clddb-tool/clddb-tool.py
cd1e0c
new file mode 100644
cd1e0c
index 00000000..8a661318
cd1e0c
--- /dev/null
cd1e0c
+++ b/tools/clddb-tool/clddb-tool.py
cd1e0c
@@ -0,0 +1,266 @@
cd1e0c
+#!/usr/bin/python3
cd1e0c
+"""Tool for manipulating the nfsdcld sqlite database
cd1e0c
+"""
cd1e0c
+
cd1e0c
+__copyright__ = """
cd1e0c
+Copyright (C) 2019 Scott Mayhew <smayhew@redhat.com>
cd1e0c
+
cd1e0c
+This program is free software; you can redistribute it and/or
cd1e0c
+modify it under the terms of the GNU General Public License
cd1e0c
+as published by the Free Software Foundation; either version 2
cd1e0c
+of the License, or (at your option) any later version.
cd1e0c
+
cd1e0c
+This program is distributed in the hope that it will be useful,
cd1e0c
+but WITHOUT ANY WARRANTY; without even the implied warranty of
cd1e0c
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
cd1e0c
+GNU General Public License for more details.
cd1e0c
+
cd1e0c
+You should have received a copy of the GNU General Public License
cd1e0c
+along with this program; if not, write to the Free Software
cd1e0c
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
cd1e0c
+MA  02110-1301, USA.
cd1e0c
+"""
cd1e0c
+
cd1e0c
+import argparse
cd1e0c
+import os
cd1e0c
+import sqlite3
cd1e0c
+import sys
cd1e0c
+
cd1e0c
+
cd1e0c
+class CldDb():
cd1e0c
+    def __init__(self, path):
cd1e0c
+        self.con = sqlite3.connect(path)
cd1e0c
+        self.con.row_factory = sqlite3.Row
cd1e0c
+        for row in self.con.execute('select value from parameters '
cd1e0c
+                                    'where key = "version"'):
cd1e0c
+            self.version = int(row['value'])
cd1e0c
+        for row in self.con.execute('select * from grace'):
cd1e0c
+            self.current = int(row['current'])
cd1e0c
+            self.recovery = int(row['recovery'])
cd1e0c
+
cd1e0c
+    def __del__(self):
cd1e0c
+        self.con.close()
cd1e0c
+
cd1e0c
+    def __str__(self):
cd1e0c
+        return ('Schema version: {self.version} '
cd1e0c
+                'current epoch: {self.current} '
cd1e0c
+                'recovery epoch: {self.recovery}'.format(self=self))
cd1e0c
+
cd1e0c
+    def _print_clients(self, epoch):
cd1e0c
+        if epoch:
cd1e0c
+            for row in self.con.execute('select * from "rec-{:016x}"'
cd1e0c
+                                        .format(epoch)):
cd1e0c
+                if self.version >= 4:
cd1e0c
+                    if row['princhash'] is not None:
cd1e0c
+                        princhash = row['princhash'].hex()
cd1e0c
+                    else:
cd1e0c
+                        princhash = "(null)"
cd1e0c
+                    print('id = {}, princhash = {}'
cd1e0c
+                          .format(row['id'].decode(), princhash))
cd1e0c
+                else:
cd1e0c
+                    print('id = {}'.format(row['id'].decode()))
cd1e0c
+
cd1e0c
+    def print_current_clients(self):
cd1e0c
+        print('Clients in current epoch:')
cd1e0c
+        self._print_clients(self.current)
cd1e0c
+
cd1e0c
+    def print_recovery_clients(self):
cd1e0c
+        if self.recovery:
cd1e0c
+            print('Clients in recovery epoch:')
cd1e0c
+            self._print_clients(self.recovery)
cd1e0c
+
cd1e0c
+    def check_bad_table_names(self):
cd1e0c
+        bad_names = []
cd1e0c
+        for row in self.con.execute('select name from sqlite_master '
cd1e0c
+                                    'where type = "table" '
cd1e0c
+                                    'and name like "%rec-%" '
cd1e0c
+                                    'and length(name) < 20'):
cd1e0c
+            bad_names.append(row['name'])
cd1e0c
+        return bad_names
cd1e0c
+
cd1e0c
+    def fix_bad_table_names(self):
cd1e0c
+        try:
cd1e0c
+            self.con.execute('begin exclusive transaction')
cd1e0c
+            bad_names = self.check_bad_table_names()
cd1e0c
+            for bad_name in bad_names:
cd1e0c
+                epoch = int(bad_name.split('-')[1], base=16)
cd1e0c
+                if epoch == self.current or epoch == self.recovery:
cd1e0c
+                    if epoch == self.current:
cd1e0c
+                        which = 'current'
cd1e0c
+                    else:
cd1e0c
+                        which = 'recovery'
cd1e0c
+                    print('found invalid table name {} for {} epoch'
cd1e0c
+                          .format(bad_name, which))
cd1e0c
+                    self.con.execute('alter table "{}" '
cd1e0c
+                                     'rename to "rec-{:016x}"'
cd1e0c
+                                     .format(bad_name, epoch))
cd1e0c
+                    print('renamed to rec-{:016x}'.format(epoch))
cd1e0c
+                else:
cd1e0c
+                    print('found invalid table name {} for unknown epoch {}'
cd1e0c
+                          .format(bad_name, epoch))
cd1e0c
+                    self.con.execute('drop table "{}"'.format(bad_name))
cd1e0c
+                    print('dropped table {}'.format(bad_name))
cd1e0c
+        except sqlite3.Error:
cd1e0c
+            self.con.rollback()
cd1e0c
+        else:
cd1e0c
+            self.con.commit()
cd1e0c
+
cd1e0c
+    def has_princ_data(self):
cd1e0c
+        if self.version < 4:
cd1e0c
+            return False
cd1e0c
+        for row in self.con.execute('select count(*) '
cd1e0c
+                                    'from "rec-{:016x}" '
cd1e0c
+                                    'where princhash not null'
cd1e0c
+                                    .format(self.current)):
cd1e0c
+            count = row[0]
cd1e0c
+        if self.recovery:
cd1e0c
+            for row in self.con.execute('select count(*) '
cd1e0c
+                                        'from "rec-{:016x}" '
cd1e0c
+                                        'where princhash not null'
cd1e0c
+                                        .format(self.current)):
cd1e0c
+                count = count + row[0]
cd1e0c
+        if count:
cd1e0c
+            return True
cd1e0c
+        return False
cd1e0c
+
cd1e0c
+    def _downgrade_table_v4_to_v3(self, epoch):
cd1e0c
+        if not self.con.in_transaction:
cd1e0c
+            raise sqlite3.Error
cd1e0c
+        try:
cd1e0c
+            self.con.execute('create table "new_rec-{:016x}" '
cd1e0c
+                             '(id blob primary key)'.format(epoch))
cd1e0c
+            self.con.execute('insert into "new_rec-{:016x}" '
cd1e0c
+                             'select id from "rec-{:016x}"'
cd1e0c
+                             .format(epoch, epoch))
cd1e0c
+            self.con.execute('drop table "rec-{:016x}"'.format(epoch))
cd1e0c
+            self.con.execute('alter table "new_rec-{:016x}" '
cd1e0c
+                             'rename to "rec-{:016x}"'
cd1e0c
+                             .format(epoch, epoch))
cd1e0c
+        except sqlite3.Error:
cd1e0c
+            raise
cd1e0c
+
cd1e0c
+    def downgrade_schema_v4_to_v3(self):
cd1e0c
+        try:
cd1e0c
+            self.con.execute('begin exclusive transaction')
cd1e0c
+            for row in self.con.execute('select value from parameters '
cd1e0c
+                                        'where key = "version"'):
cd1e0c
+                version = int(row['value'])
cd1e0c
+            if version != self.version:
cd1e0c
+                raise sqlite3.Error
cd1e0c
+            for row in self.con.execute('select * from grace'):
cd1e0c
+                current = int(row['current'])
cd1e0c
+                recovery = int(row['recovery'])
cd1e0c
+            if current != self.current:
cd1e0c
+                raise sqlite3.Error
cd1e0c
+            if recovery != self.recovery:
cd1e0c
+                raise sqlite3.Error
cd1e0c
+            self._downgrade_table_v4_to_v3(current)
cd1e0c
+            if recovery:
cd1e0c
+                self._downgrade_table_v4_to_v3(recovery)
cd1e0c
+            self.con.execute('update parameters '
cd1e0c
+                             'set value = "3" '
cd1e0c
+                             'where key = "version"')
cd1e0c
+            self.version = 3
cd1e0c
+        except sqlite3.Error:
cd1e0c
+            self.con.rollback()
cd1e0c
+            print('Downgrade failed')
cd1e0c
+        else:
cd1e0c
+            self.con.commit()
cd1e0c
+            print('Downgrade successful')
cd1e0c
+
cd1e0c
+
cd1e0c
+def nfsdcld_active():
cd1e0c
+    rc = os.system('ps -C nfsdcld >/dev/null 2>/dev/null')
cd1e0c
+    if rc == 0:
cd1e0c
+        return True
cd1e0c
+    return False
cd1e0c
+
cd1e0c
+
cd1e0c
+def fix_table_names_command(db, args):
cd1e0c
+    if nfsdcld_active():
cd1e0c
+        print('Warning: nfsdcld is running!')
cd1e0c
+        ans = input('Continue? ')
cd1e0c
+        if ans.lower() not in ['y', 'yes']:
cd1e0c
+            print('Operation canceled.')
cd1e0c
+            return
cd1e0c
+    bad_names = db.check_bad_table_names()
cd1e0c
+    if not bad_names:
cd1e0c
+        print('No invalid table names found.')
cd1e0c
+        return
cd1e0c
+    db.fix_bad_table_names()
cd1e0c
+
cd1e0c
+
cd1e0c
+def downgrade_schema_command(db, args):
cd1e0c
+    if nfsdcld_active():
cd1e0c
+        print('Warning: nfsdcld is running!')
cd1e0c
+        ans = input('Continue? ')
cd1e0c
+        if ans.lower() not in ['y', 'yes']:
cd1e0c
+            print('Operation canceled')
cd1e0c
+            return
cd1e0c
+    if db.version != 4:
cd1e0c
+        print('Cannot downgrade database from schema version {}.'
cd1e0c
+              .format(db.version))
cd1e0c
+        return
cd1e0c
+    if args.version != 3:
cd1e0c
+        print('Cannot downgrade to version {}.'.format(args.version))
cd1e0c
+        return
cd1e0c
+    bad_names = db.check_bad_table_names()
cd1e0c
+    if bad_names:
cd1e0c
+        print('Invalid table names detected.')
cd1e0c
+        print('Please run "{} fix-table-names" before downgrading the schema.'
cd1e0c
+              .format(sys.argv[0]))
cd1e0c
+        return
cd1e0c
+    if db.has_princ_data():
cd1e0c
+        print('Warning: database has principal data, which will be erased.')
cd1e0c
+        ans = input('Continue? ')
cd1e0c
+        if ans.lower() not in ['y', 'yes']:
cd1e0c
+            print('Operation canceled')
cd1e0c
+            return
cd1e0c
+    db.downgrade_schema_v4_to_v3()
cd1e0c
+
cd1e0c
+
cd1e0c
+def print_command(db, args):
cd1e0c
+    print(str(db))
cd1e0c
+    if not args.summary:
cd1e0c
+        bad_names = db.check_bad_table_names()
cd1e0c
+        if bad_names:
cd1e0c
+            print('Invalid table names detected.')
cd1e0c
+            print('Please run "{} fix-table-names".'.format(sys.argv[0]))
cd1e0c
+            return
cd1e0c
+        db.print_current_clients()
cd1e0c
+        db.print_recovery_clients()
cd1e0c
+
cd1e0c
+
cd1e0c
+def main():
cd1e0c
+    parser = argparse.ArgumentParser()
cd1e0c
+    parser.add_argument('-p', '--path',
cd1e0c
+                        default='/var/lib/nfs/nfsdcld/main.sqlite',
cd1e0c
+                        help='path to the database '
cd1e0c
+                        '(default: /var/lib/nfs/nfsdcld/main.sqlite)')
cd1e0c
+    subparsers = parser.add_subparsers(help='sub-command help')
cd1e0c
+    fix_parser = subparsers.add_parser('fix-table-names',
cd1e0c
+                                       help='fix invalid table names')
cd1e0c
+    fix_parser.set_defaults(func=fix_table_names_command)
cd1e0c
+    downgrade_parser = subparsers.add_parser('downgrade-schema',
cd1e0c
+                                             help='downgrade database schema')
cd1e0c
+    downgrade_parser.add_argument('-v', '--version', type=int, choices=[3],
cd1e0c
+                                  default=3,
cd1e0c
+                                  help='version to downgrade to')
cd1e0c
+    downgrade_parser.set_defaults(func=downgrade_schema_command)
cd1e0c
+    print_parser = subparsers.add_parser('print',
cd1e0c
+                                         help='print database info')
cd1e0c
+    print_parser.add_argument('-s', '--summary', default=False,
cd1e0c
+                              action='store_true',
cd1e0c
+                              help='print summary only')
cd1e0c
+    print_parser.set_defaults(func=print_command)
cd1e0c
+    args = parser.parse_args()
cd1e0c
+    if not os.path.exists(args.path):
cd1e0c
+        return parser.print_usage()
cd1e0c
+    clddb = CldDb(args.path)
cd1e0c
+    return args.func(clddb, args)
cd1e0c
+
cd1e0c
+
cd1e0c
+if __name__ == '__main__':
cd1e0c
+    if len(sys.argv) == 1:
cd1e0c
+        sys.argv.extend(['print', '--summary'])
cd1e0c
+    main()
cd1e0c
diff --git a/utils/Makefile.am b/utils/Makefile.am
cd1e0c
index 0a5b062c..4c930a4b 100644
cd1e0c
--- a/utils/Makefile.am
cd1e0c
+++ b/utils/Makefile.am
cd1e0c
@@ -19,6 +19,10 @@ if CONFIG_MOUNT
cd1e0c
 OPTDIRS += mount
cd1e0c
 endif
cd1e0c
 
cd1e0c
+if CONFIG_NFSDCLD
cd1e0c
+OPTDIRS += nfsdcld
cd1e0c
+endif
cd1e0c
+
cd1e0c
 if CONFIG_NFSDCLTRACK
cd1e0c
 OPTDIRS += nfsdcltrack
cd1e0c
 endif
cd1e0c
diff --git a/utils/exportfs/exportfs.c b/utils/exportfs/exportfs.c
cd1e0c
index cd3c979d..4b9634b7 100644
cd1e0c
--- a/utils/exportfs/exportfs.c
cd1e0c
+++ b/utils/exportfs/exportfs.c
cd1e0c
@@ -644,6 +644,9 @@ out:
cd1e0c
 	return result;
cd1e0c
 }
cd1e0c
 
cd1e0c
+#ifdef HAVE_FUNC_ATTRIBUTE_FORMAT
cd1e0c
+__attribute__((format (printf, 2, 3)))
cd1e0c
+#endif
cd1e0c
 static char
cd1e0c
 dumpopt(char c, char *fmt, ...)
cd1e0c
 {
cd1e0c
diff --git a/utils/mount/fstab.c b/utils/mount/fstab.c
cd1e0c
index eedbddab..8b0aaf1a 100644
cd1e0c
--- a/utils/mount/fstab.c
cd1e0c
+++ b/utils/mount/fstab.c
cd1e0c
@@ -7,6 +7,10 @@
cd1e0c
  * - Moved code to nfs-utils/support/nfs from util-linux/mount.
cd1e0c
  */
cd1e0c
 
cd1e0c
+#ifdef HAVE_CONFIG_H
cd1e0c
+#include <config.h>
cd1e0c
+#endif
cd1e0c
+
cd1e0c
 #include <errno.h>
cd1e0c
 #include <stdio.h>
cd1e0c
 #include <fcntl.h>
cd1e0c
diff --git a/utils/mountd/cache.c b/utils/mountd/cache.c
cd1e0c
index a054ce6f..c73e29be 100644
cd1e0c
--- a/utils/mountd/cache.c
cd1e0c
+++ b/utils/mountd/cache.c
cd1e0c
@@ -967,8 +967,7 @@ lookup_export(char *dom, char *path, struct addrinfo *ai)
cd1e0c
 			} else if (found_type == i && found->m_warned == 0) {
cd1e0c
 				xlog(L_WARNING, "%s exported to both %s and %s, "
cd1e0c
 				     "arbitrarily choosing options from first",
cd1e0c
-				     path, found->m_client->m_hostname, exp->m_client->m_hostname,
cd1e0c
-				     dom);
cd1e0c
+				     path, found->m_client->m_hostname, exp->m_client->m_hostname);
cd1e0c
 				found->m_warned = 1;
cd1e0c
 			}
cd1e0c
 		}
cd1e0c
diff --git a/utils/mountd/mountd.c b/utils/mountd/mountd.c
cd1e0c
index 086c39bf..0b891121 100644
cd1e0c
--- a/utils/mountd/mountd.c
cd1e0c
+++ b/utils/mountd/mountd.c
cd1e0c
@@ -209,10 +209,10 @@ killer (int sig)
cd1e0c
 }
cd1e0c
 
cd1e0c
 static void
cd1e0c
-sig_hup (int sig)
cd1e0c
+sig_hup (int UNUSED(sig))
cd1e0c
 {
cd1e0c
 	/* don't exit on SIGHUP */
cd1e0c
-	xlog (L_NOTICE, "Received SIGHUP... Ignoring.\n", sig);
cd1e0c
+	xlog (L_NOTICE, "Received SIGHUP... Ignoring.\n");
cd1e0c
 	return;
cd1e0c
 }
cd1e0c
 
cd1e0c
diff --git a/utils/nfsdcld/Makefile.am b/utils/nfsdcld/Makefile.am
cd1e0c
new file mode 100644
cd1e0c
index 00000000..273d64f1
cd1e0c
--- /dev/null
cd1e0c
+++ b/utils/nfsdcld/Makefile.am
cd1e0c
@@ -0,0 +1,15 @@
cd1e0c
+## Process this file with automake to produce Makefile.in
cd1e0c
+
cd1e0c
+man8_MANS	= nfsdcld.man
cd1e0c
+EXTRA_DIST	= $(man8_MANS)
cd1e0c
+
cd1e0c
+AM_CFLAGS	+= -D_LARGEFILE64_SOURCE
cd1e0c
+sbin_PROGRAMS	= nfsdcld
cd1e0c
+
cd1e0c
+nfsdcld_SOURCES = nfsdcld.c sqlite.c legacy.c
cd1e0c
+nfsdcld_LDADD = ../../support/nfs/libnfs.la $(LIBEVENT) $(LIBSQLITE) $(LIBCAP)
cd1e0c
+
cd1e0c
+noinst_HEADERS	= sqlite.h cld-internal.h legacy.h
cd1e0c
+
cd1e0c
+MAINTAINERCLEANFILES = Makefile.in
cd1e0c
+
cd1e0c
diff --git a/utils/nfsdcld/cld-internal.h b/utils/nfsdcld/cld-internal.h
cd1e0c
new file mode 100644
cd1e0c
index 00000000..05f01be2
cd1e0c
--- /dev/null
cd1e0c
+++ b/utils/nfsdcld/cld-internal.h
cd1e0c
@@ -0,0 +1,44 @@
cd1e0c
+/*
cd1e0c
+ * This program is free software; you can redistribute it and/or
cd1e0c
+ * modify it under the terms of the GNU General Public License
cd1e0c
+ * as published by the Free Software Foundation; either version 2
cd1e0c
+ * of the License, or (at your option) any later version.
cd1e0c
+ *
cd1e0c
+ * This program is distributed in the hope that it will be useful,
cd1e0c
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
cd1e0c
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
cd1e0c
+ * GNU General Public License for more details.
cd1e0c
+ *
cd1e0c
+ * You should have received a copy of the GNU General Public License
cd1e0c
+ * along with this program; if not, write to the Free Software
cd1e0c
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor,
cd1e0c
+ * Boston, MA 02110-1301, USA.
cd1e0c
+ */
cd1e0c
+
cd1e0c
+#ifndef _CLD_INTERNAL_H_
cd1e0c
+#define _CLD_INTERNAL_H_
cd1e0c
+
cd1e0c
+#if CLD_UPCALL_VERSION >= 2
cd1e0c
+#define UPCALL_VERSION		2
cd1e0c
+#else
cd1e0c
+#define UPCALL_VERSION		1
cd1e0c
+#endif
cd1e0c
+
cd1e0c
+struct cld_client {
cd1e0c
+	int			cl_fd;
cd1e0c
+	struct event		cl_event;
cd1e0c
+	union {
cd1e0c
+		struct cld_msg		cl_msg;
cd1e0c
+#if UPCALL_VERSION >= 2
cd1e0c
+		struct cld_msg_v2	cl_msg_v2;
cd1e0c
+#endif
cd1e0c
+	} cl_u;
cd1e0c
+};
cd1e0c
+
cd1e0c
+uint64_t current_epoch;
cd1e0c
+uint64_t recovery_epoch;
cd1e0c
+int first_time;
cd1e0c
+int num_cltrack_records;
cd1e0c
+int num_legacy_records;
cd1e0c
+
cd1e0c
+#endif /* _CLD_INTERNAL_H_ */
cd1e0c
diff --git a/utils/nfsdcld/legacy.c b/utils/nfsdcld/legacy.c
cd1e0c
new file mode 100644
cd1e0c
index 00000000..3c6bea6c
cd1e0c
--- /dev/null
cd1e0c
+++ b/utils/nfsdcld/legacy.c
cd1e0c
@@ -0,0 +1,185 @@
cd1e0c
+/*
cd1e0c
+ * This program is free software; you can redistribute it and/or
cd1e0c
+ * modify it under the terms of the GNU General Public License
cd1e0c
+ * as published by the Free Software Foundation; either version 2
cd1e0c
+ * of the License, or (at your option) any later version.
cd1e0c
+ *
cd1e0c
+ * This program is distributed in the hope that it will be useful,
cd1e0c
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
cd1e0c
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
cd1e0c
+ * GNU General Public License for more details.
cd1e0c
+ *
cd1e0c
+ * You should have received a copy of the GNU General Public License
cd1e0c
+ * along with this program; if not, write to the Free Software
cd1e0c
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor,
cd1e0c
+ * Boston, MA 02110-1301, USA.
cd1e0c
+ */
cd1e0c
+
cd1e0c
+#ifdef HAVE_CONFIG_H
cd1e0c
+#include <config.h>
cd1e0c
+#endif
cd1e0c
+
cd1e0c
+#include <stdio.h>
cd1e0c
+#include <dirent.h>
cd1e0c
+#include <string.h>
cd1e0c
+#include <unistd.h>
cd1e0c
+#include <stdint.h>
cd1e0c
+#include <fcntl.h>
cd1e0c
+#include <errno.h>
cd1e0c
+#include <sys/types.h>
cd1e0c
+#include <sys/stat.h>
cd1e0c
+#include <limits.h>
cd1e0c
+#include "cld.h"
cd1e0c
+#include "sqlite.h"
cd1e0c
+#include "xlog.h"
cd1e0c
+#include "legacy.h"
cd1e0c
+
cd1e0c
+#define NFSD_RECDIR_FILE "/proc/fs/nfsd/nfsv4recoverydir"
cd1e0c
+
cd1e0c
+/*
cd1e0c
+ * Loads client records from the v4recovery directory into the database.
cd1e0c
+ * Records are prefixed with the string "hash:" and include the '\0' byte.
cd1e0c
+ *
cd1e0c
+ * Called during database initialization as part of a one-time "upgrade".
cd1e0c
+ */
cd1e0c
+void
cd1e0c
+legacy_load_clients_from_recdir(int *num_records)
cd1e0c
+{
cd1e0c
+	int fd;
cd1e0c
+	DIR *v4recovery;
cd1e0c
+	struct dirent *entry;
cd1e0c
+	char recdirname[PATH_MAX];
cd1e0c
+	char buf[NFS4_OPAQUE_LIMIT];
cd1e0c
+	struct stat st;
cd1e0c
+	char *nl;
cd1e0c
+
cd1e0c
+	fd = open(NFSD_RECDIR_FILE, O_RDONLY);
cd1e0c
+	if (fd < 0) {
cd1e0c
+		xlog(D_GENERAL, "Unable to open %s: %m", NFSD_RECDIR_FILE);
cd1e0c
+		return;
cd1e0c
+	}
cd1e0c
+	if (read(fd, recdirname, PATH_MAX) < 0) {
cd1e0c
+		xlog(D_GENERAL, "Unable to read from %s: %m", NFSD_RECDIR_FILE);
cd1e0c
+		return;
cd1e0c
+	}
cd1e0c
+	close(fd);
cd1e0c
+	/* the output from the proc file isn't null-terminated */
cd1e0c
+	nl = strchr(recdirname, '\n');
cd1e0c
+	if (!nl)
cd1e0c
+		return;
cd1e0c
+	*nl = '\0';
cd1e0c
+	if (stat(recdirname, &st) < 0) {
cd1e0c
+		xlog(D_GENERAL, "Unable to stat %s: %d", recdirname, errno);
cd1e0c
+		return;
cd1e0c
+	}
cd1e0c
+	if (!S_ISDIR(st.st_mode)) {
cd1e0c
+		xlog(D_GENERAL, "%s is not a directory: mode=0%o", recdirname
cd1e0c
+				, st.st_mode);
cd1e0c
+		return;
cd1e0c
+	}
cd1e0c
+	v4recovery = opendir(recdirname);
cd1e0c
+	if (!v4recovery)
cd1e0c
+		return;
cd1e0c
+	while ((entry = readdir(v4recovery))) {
cd1e0c
+		int ret;
cd1e0c
+
cd1e0c
+		/* skip "." and ".." */
cd1e0c
+		if (entry->d_name[0] == '.') {
cd1e0c
+			switch (entry->d_name[1]) {
cd1e0c
+			case '\0':
cd1e0c
+				continue;
cd1e0c
+			case '.':
cd1e0c
+				if (entry->d_name[2] == '\0')
cd1e0c
+					continue;
cd1e0c
+			}
cd1e0c
+		}
cd1e0c
+		/* prefix legacy records with the string "hash:" */
cd1e0c
+		ret = snprintf(buf, sizeof(buf), "hash:%s", entry->d_name);
cd1e0c
+		/* if there's a problem, then skip this entry */
cd1e0c
+		if (ret < 0 || (size_t)ret >= sizeof(buf)) {
cd1e0c
+			xlog(L_WARNING, "%s: unable to build client string for %s!",
cd1e0c
+				__func__, entry->d_name);
cd1e0c
+			continue;
cd1e0c
+		}
cd1e0c
+		/* legacy client records need to include the null terminator */
cd1e0c
+		ret = sqlite_insert_client((unsigned char *)buf, strlen(buf) + 1);
cd1e0c
+		if (ret)
cd1e0c
+			xlog(L_WARNING, "%s: unable to insert %s: %d", __func__,
cd1e0c
+				entry->d_name, ret);
cd1e0c
+		else
cd1e0c
+			(*num_records)++;
cd1e0c
+	}
cd1e0c
+	closedir(v4recovery);
cd1e0c
+}
cd1e0c
+
cd1e0c
+/*
cd1e0c
+ * Cleans out the v4recovery directory.
cd1e0c
+ *
cd1e0c
+ * Called upon receipt of the first "GraceDone" upcall only.
cd1e0c
+ */
cd1e0c
+void
cd1e0c
+legacy_clear_recdir(void)
cd1e0c
+{
cd1e0c
+	int fd;
cd1e0c
+	DIR *v4recovery;
cd1e0c
+	struct dirent *entry;
cd1e0c
+	char recdirname[PATH_MAX];
cd1e0c
+	char dirname[PATH_MAX];
cd1e0c
+	struct stat st;
cd1e0c
+	char *nl;
cd1e0c
+
cd1e0c
+	fd = open(NFSD_RECDIR_FILE, O_RDONLY);
cd1e0c
+	if (fd < 0) {
cd1e0c
+		xlog(D_GENERAL, "Unable to open %s: %m", NFSD_RECDIR_FILE);
cd1e0c
+		return;
cd1e0c
+	}
cd1e0c
+	if (read(fd, recdirname, PATH_MAX) < 0) {
cd1e0c
+		xlog(D_GENERAL, "Unable to read from %s: %m", NFSD_RECDIR_FILE);
cd1e0c
+		return;
cd1e0c
+	}
cd1e0c
+	close(fd);
cd1e0c
+	/* the output from the proc file isn't null-terminated */
cd1e0c
+	nl = strchr(recdirname, '\n');
cd1e0c
+	if (!nl)
cd1e0c
+		return;
cd1e0c
+	*nl = '\0';
cd1e0c
+	if (stat(recdirname, &st) < 0) {
cd1e0c
+		xlog(D_GENERAL, "Unable to stat %s: %d", recdirname, errno);
cd1e0c
+		return;
cd1e0c
+	}
cd1e0c
+	if (!S_ISDIR(st.st_mode)) {
cd1e0c
+		xlog(D_GENERAL, "%s is not a directory: mode=0%o", recdirname
cd1e0c
+				, st.st_mode);
cd1e0c
+		return;
cd1e0c
+	}
cd1e0c
+	v4recovery = opendir(recdirname);
cd1e0c
+	if (!v4recovery)
cd1e0c
+		return;
cd1e0c
+	while ((entry = readdir(v4recovery))) {
cd1e0c
+		int len;
cd1e0c
+
cd1e0c
+		/* skip "." and ".." */
cd1e0c
+		if (entry->d_name[0] == '.') {
cd1e0c
+			switch (entry->d_name[1]) {
cd1e0c
+			case '\0':
cd1e0c
+				continue;
cd1e0c
+			case '.':
cd1e0c
+				if (entry->d_name[2] == '\0')
cd1e0c
+					continue;
cd1e0c
+			}
cd1e0c
+		}
cd1e0c
+		len = snprintf(dirname, sizeof(dirname), "%s/%s", recdirname,
cd1e0c
+				entry->d_name);
cd1e0c
+		/* if there's a problem, then skip this entry */
cd1e0c
+		if (len < 0 || (size_t)len >= sizeof(dirname)) {
cd1e0c
+			xlog(L_WARNING, "%s: unable to build filename for %s!",
cd1e0c
+				__func__, entry->d_name);
cd1e0c
+			continue;
cd1e0c
+		}
cd1e0c
+		len = rmdir(dirname);
cd1e0c
+		if (len)
cd1e0c
+			xlog(L_WARNING, "%s: unable to rmdir %s: %d", __func__,
cd1e0c
+				dirname, len);
cd1e0c
+	}
cd1e0c
+	closedir(v4recovery);
cd1e0c
+}
cd1e0c
diff --git a/utils/nfsdcld/legacy.h b/utils/nfsdcld/legacy.h
cd1e0c
new file mode 100644
cd1e0c
index 00000000..8988f6e8
cd1e0c
--- /dev/null
cd1e0c
+++ b/utils/nfsdcld/legacy.h
cd1e0c
@@ -0,0 +1,24 @@
cd1e0c
+/*
cd1e0c
+ * This program is free software; you can redistribute it and/or
cd1e0c
+ * modify it under the terms of the GNU General Public License
cd1e0c
+ * as published by the Free Software Foundation; either version 2
cd1e0c
+ * of the License, or (at your option) any later version.
cd1e0c
+ *
cd1e0c
+ * This program is distributed in the hope that it will be useful,
cd1e0c
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
cd1e0c
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
cd1e0c
+ * GNU General Public License for more details.
cd1e0c
+ *
cd1e0c
+ * You should have received a copy of the GNU General Public License
cd1e0c
+ * along with this program; if not, write to the Free Software
cd1e0c
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor,
cd1e0c
+ * Boston, MA 02110-1301, USA.
cd1e0c
+ */
cd1e0c
+
cd1e0c
+#ifndef _LEGACY_H_
cd1e0c
+#define _LEGACY_H_
cd1e0c
+
cd1e0c
+void legacy_load_clients_from_recdir(int *);
cd1e0c
+void legacy_clear_recdir(void);
cd1e0c
+
cd1e0c
+#endif /* _LEGACY_H_ */
cd1e0c
diff --git a/utils/nfsdcld/nfsdcld.c b/utils/nfsdcld/nfsdcld.c
cd1e0c
new file mode 100644
cd1e0c
index 00000000..2ad10019
cd1e0c
--- /dev/null
cd1e0c
+++ b/utils/nfsdcld/nfsdcld.c
cd1e0c
@@ -0,0 +1,866 @@
cd1e0c
+/*
cd1e0c
+ * nfsdcld.c -- NFSv4 client name tracking daemon
cd1e0c
+ *
cd1e0c
+ * Copyright (C) 2011  Red Hat, Jeff Layton <jlayton@redhat.com>
cd1e0c
+ *
cd1e0c
+ * This program is free software; you can redistribute it and/or
cd1e0c
+ * modify it under the terms of the GNU General Public License
cd1e0c
+ * as published by the Free Software Foundation; either version 2
cd1e0c
+ * of the License, or (at your option) any later version.
cd1e0c
+ *
cd1e0c
+ * This program is distributed in the hope that it will be useful,
cd1e0c
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
cd1e0c
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
cd1e0c
+ * GNU General Public License for more details.
cd1e0c
+ *
cd1e0c
+ * You should have received a copy of the GNU General Public License
cd1e0c
+ * along with this program; if not, write to the Free Software
cd1e0c
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor,
cd1e0c
+ * Boston, MA 02110-1301, USA.
cd1e0c
+ */
cd1e0c
+
cd1e0c
+#ifdef HAVE_CONFIG_H
cd1e0c
+#include "config.h"
cd1e0c
+#endif /* HAVE_CONFIG_H */
cd1e0c
+
cd1e0c
+#include <errno.h>
cd1e0c
+#include <event.h>
cd1e0c
+#include <stdbool.h>
cd1e0c
+#include <getopt.h>
cd1e0c
+#include <string.h>
cd1e0c
+#include <sys/stat.h>
cd1e0c
+#include <sys/types.h>
cd1e0c
+#include <fcntl.h>
cd1e0c
+#include <unistd.h>
cd1e0c
+#include <libgen.h>
cd1e0c
+#include <sys/inotify.h>
cd1e0c
+#ifdef HAVE_SYS_CAPABILITY_H
cd1e0c
+#include <sys/prctl.h>
cd1e0c
+#include <sys/capability.h>
cd1e0c
+#endif
cd1e0c
+
cd1e0c
+#include "xlog.h"
cd1e0c
+#include "nfslib.h"
cd1e0c
+#include "cld.h"
cd1e0c
+#include "cld-internal.h"
cd1e0c
+#include "sqlite.h"
cd1e0c
+#include "../mount/version.h"
cd1e0c
+#include "conffile.h"
cd1e0c
+#include "legacy.h"
cd1e0c
+
cd1e0c
+#ifndef DEFAULT_PIPEFS_DIR
cd1e0c
+#define DEFAULT_PIPEFS_DIR NFS_STATEDIR "/rpc_pipefs"
cd1e0c
+#endif
cd1e0c
+
cd1e0c
+#define DEFAULT_CLD_PATH	"/nfsd/cld"
cd1e0c
+
cd1e0c
+#ifndef CLD_DEFAULT_STORAGEDIR
cd1e0c
+#define CLD_DEFAULT_STORAGEDIR NFS_STATEDIR "/nfsdcld"
cd1e0c
+#endif
cd1e0c
+
cd1e0c
+#define NFSD_END_GRACE_FILE "/proc/fs/nfsd/v4_end_grace"
cd1e0c
+
cd1e0c
+/* private data structures */
cd1e0c
+
cd1e0c
+/* global variables */
cd1e0c
+static char pipefs_dir[PATH_MAX] = DEFAULT_PIPEFS_DIR;
cd1e0c
+static char pipepath[PATH_MAX];
cd1e0c
+static int 		inotify_fd = -1;
cd1e0c
+static struct event	pipedir_event;
cd1e0c
+static bool old_kernel = false;
cd1e0c
+
cd1e0c
+static struct option longopts[] =
cd1e0c
+{
cd1e0c
+	{ "help", 0, NULL, 'h' },
cd1e0c
+	{ "foreground", 0, NULL, 'F' },
cd1e0c
+	{ "debug", 0, NULL, 'd' },
cd1e0c
+	{ "pipefsdir", 1, NULL, 'p' },
cd1e0c
+	{ "storagedir", 1, NULL, 's' },
cd1e0c
+	{ NULL, 0, 0, 0 },
cd1e0c
+};
cd1e0c
+
cd1e0c
+/* forward declarations */
cd1e0c
+static void cldcb(int UNUSED(fd), short which, void *data);
cd1e0c
+
cd1e0c
+static void
cd1e0c
+usage(char *progname)
cd1e0c
+{
cd1e0c
+	printf("%s [ -hFd ] [ -p pipefsdir ] [ -s storagedir ]\n", progname);
cd1e0c
+}
cd1e0c
+
cd1e0c
+static int
cd1e0c
+cld_set_caps(void)
cd1e0c
+{
cd1e0c
+	int ret = 0;
cd1e0c
+#ifdef HAVE_SYS_CAPABILITY_H
cd1e0c
+	unsigned long i;
cd1e0c
+	cap_t caps;
cd1e0c
+
cd1e0c
+	if (getuid() != 0) {
cd1e0c
+		xlog(L_ERROR, "Not running as root. Daemon won't be able to "
cd1e0c
+			      "open the pipe after dropping capabilities!");
cd1e0c
+		return -EINVAL;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	/* prune the bounding set to nothing */
cd1e0c
+	for (i = 0; prctl(PR_CAPBSET_READ, i, 0, 0, 0) >= 0 ; ++i) {
cd1e0c
+		ret = prctl(PR_CAPBSET_DROP, i, 0, 0, 0);
cd1e0c
+		if (ret) {
cd1e0c
+			xlog(L_ERROR, "Unable to prune capability %lu from "
cd1e0c
+				      "bounding set: %m", i);
cd1e0c
+			return -errno;
cd1e0c
+		}
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	/* get a blank capset */
cd1e0c
+	caps = cap_init();
cd1e0c
+	if (caps == NULL) {
cd1e0c
+		xlog(L_ERROR, "Unable to get blank capability set: %m");
cd1e0c
+		return -errno;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	/* reset the process capabilities */
cd1e0c
+	if (cap_set_proc(caps) != 0) {
cd1e0c
+		xlog(L_ERROR, "Unable to set process capabilities: %m");
cd1e0c
+		ret = -errno;
cd1e0c
+	}
cd1e0c
+	cap_free(caps);
cd1e0c
+#endif
cd1e0c
+	return ret;
cd1e0c
+}
cd1e0c
+
cd1e0c
+#define INOTIFY_EVENT_MAX (sizeof(struct inotify_event) + NAME_MAX)
cd1e0c
+
cd1e0c
+static int
cd1e0c
+cld_pipe_open(struct cld_client *clnt)
cd1e0c
+{
cd1e0c
+	int fd;
cd1e0c
+
cd1e0c
+	xlog(D_GENERAL, "%s: opening upcall pipe %s", __func__, pipepath);
cd1e0c
+	fd = open(pipepath, O_RDWR, 0);
cd1e0c
+	if (fd < 0) {
cd1e0c
+		xlog(D_GENERAL, "%s: open of %s failed: %m", __func__, pipepath);
cd1e0c
+		return -errno;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	if (event_initialized(&clnt->cl_event))
cd1e0c
+		event_del(&clnt->cl_event);
cd1e0c
+	if (clnt->cl_fd >= 0)
cd1e0c
+		close(clnt->cl_fd);
cd1e0c
+
cd1e0c
+	clnt->cl_fd = fd;
cd1e0c
+	event_set(&clnt->cl_event, clnt->cl_fd, EV_READ, cldcb, clnt);
cd1e0c
+	/* event_add is done by the caller */
cd1e0c
+	return 0;
cd1e0c
+}
cd1e0c
+
cd1e0c
+static void
cd1e0c
+cld_inotify_cb(int UNUSED(fd), short which, void *data)
cd1e0c
+{
cd1e0c
+	int ret;
cd1e0c
+	size_t elen;
cd1e0c
+	ssize_t rret;
cd1e0c
+	char evbuf[INOTIFY_EVENT_MAX];
cd1e0c
+	char *dirc = NULL, *pname;
cd1e0c
+	struct inotify_event *event = (struct inotify_event *)evbuf;
cd1e0c
+	struct cld_client *clnt = data;
cd1e0c
+
cd1e0c
+	if (which != EV_READ)
cd1e0c
+		return;
cd1e0c
+
cd1e0c
+	xlog(D_GENERAL, "%s: called for EV_READ", __func__);
cd1e0c
+
cd1e0c
+	dirc = strndup(pipepath, PATH_MAX);
cd1e0c
+	if (!dirc) {
cd1e0c
+		xlog(L_ERROR, "%s: unable to allocate memory", __func__);
cd1e0c
+		goto out;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	rret = read(inotify_fd, evbuf, INOTIFY_EVENT_MAX);
cd1e0c
+	if (rret < 0) {
cd1e0c
+		xlog(L_ERROR, "%s: read from inotify fd failed: %m", __func__);
cd1e0c
+		goto out;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	/* check to see if we have a filename in the evbuf */
cd1e0c
+	if (!event->len) {
cd1e0c
+		xlog(D_GENERAL, "%s: no filename in inotify event", __func__);
cd1e0c
+		goto out;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	pname = basename(dirc);
cd1e0c
+	elen = strnlen(event->name, event->len);
cd1e0c
+
cd1e0c
+	/* does the filename match our pipe? */
cd1e0c
+	if (strlen(pname) != elen || memcmp(pname, event->name, elen)) {
cd1e0c
+		xlog(D_GENERAL, "%s: wrong filename (%s)", __func__,
cd1e0c
+				event->name);
cd1e0c
+		goto out;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	ret = cld_pipe_open(clnt);
cd1e0c
+	switch (ret) {
cd1e0c
+	case 0:
cd1e0c
+		/* readd the event for the cl_event pipe */
cd1e0c
+		event_add(&clnt->cl_event, NULL);
cd1e0c
+		break;
cd1e0c
+	case -ENOENT:
cd1e0c
+		/* pipe must have disappeared, wait for it to come back */
cd1e0c
+		goto out;
cd1e0c
+	default:
cd1e0c
+		/* anything else is fatal */
cd1e0c
+		xlog(L_FATAL, "%s: unable to open new pipe (%d). Aborting.",
cd1e0c
+			__func__, ret);
cd1e0c
+		exit(ret);
cd1e0c
+	}
cd1e0c
+
cd1e0c
+out:
cd1e0c
+	event_add(&pipedir_event, NULL);
cd1e0c
+	free(dirc);
cd1e0c
+}
cd1e0c
+
cd1e0c
+static int
cd1e0c
+cld_inotify_setup(void)
cd1e0c
+{
cd1e0c
+	int ret;
cd1e0c
+	char *dirc, *dname;
cd1e0c
+
cd1e0c
+	dirc = strndup(pipepath, PATH_MAX);
cd1e0c
+	if (!dirc) {
cd1e0c
+		xlog_err("%s: unable to allocate memory", __func__);
cd1e0c
+		ret = -ENOMEM;
cd1e0c
+		goto out_free;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	dname = dirname(dirc);
cd1e0c
+
cd1e0c
+	inotify_fd = inotify_init();
cd1e0c
+	if (inotify_fd < 0) {
cd1e0c
+		xlog_err("%s: inotify_init failed: %m", __func__);
cd1e0c
+		ret = -errno;
cd1e0c
+		goto out_free;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	ret = inotify_add_watch(inotify_fd, dname, IN_CREATE);
cd1e0c
+	if (ret < 0) {
cd1e0c
+		xlog_err("%s: inotify_add_watch failed: %m", __func__);
cd1e0c
+		ret = -errno;
cd1e0c
+		goto out_err;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+out_free:
cd1e0c
+	free(dirc);
cd1e0c
+	return 0;
cd1e0c
+out_err:
cd1e0c
+	close(inotify_fd);
cd1e0c
+	goto out_free;
cd1e0c
+}
cd1e0c
+
cd1e0c
+/*
cd1e0c
+ * Set an inotify watch on the directory that should contain the pipe, and then
cd1e0c
+ * try to open it. If it fails with anything but -ENOENT, return the error
cd1e0c
+ * immediately.
cd1e0c
+ *
cd1e0c
+ * If it succeeds, then set up the pipe event handler. At that point, set up
cd1e0c
+ * the inotify event handler and go ahead and return success.
cd1e0c
+ */
cd1e0c
+static int
cd1e0c
+cld_pipe_init(struct cld_client *clnt)
cd1e0c
+{
cd1e0c
+	int ret;
cd1e0c
+
cd1e0c
+	xlog(D_GENERAL, "%s: init pipe handlers", __func__);
cd1e0c
+
cd1e0c
+	ret = cld_inotify_setup();
cd1e0c
+	if (ret != 0)
cd1e0c
+		goto out;
cd1e0c
+
cd1e0c
+	clnt->cl_fd = -1;
cd1e0c
+	ret = cld_pipe_open(clnt);
cd1e0c
+	switch (ret) {
cd1e0c
+	case 0:
cd1e0c
+		/* add the event and we're good to go */
cd1e0c
+		event_add(&clnt->cl_event, NULL);
cd1e0c
+		break;
cd1e0c
+	case -ENOENT:
cd1e0c
+		/* ignore this error -- cld_inotify_cb will handle it */
cd1e0c
+		ret = 0;
cd1e0c
+		break;
cd1e0c
+	default:
cd1e0c
+		/* anything else is fatal */
cd1e0c
+		close(inotify_fd);
cd1e0c
+		goto out;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	/* set event for inotify read */
cd1e0c
+	event_set(&pipedir_event, inotify_fd, EV_READ, cld_inotify_cb, clnt);
cd1e0c
+	event_add(&pipedir_event, NULL);
cd1e0c
+out:
cd1e0c
+	return ret;
cd1e0c
+}
cd1e0c
+
cd1e0c
+/*
cd1e0c
+ * Older kernels will not tell nfsdcld when a grace period has started.
cd1e0c
+ * Therefore we have to peek at the /proc/fs/nfsd/v4_end_grace file to
cd1e0c
+ * see if nfsd is in grace.  We have to do this for create and remove
cd1e0c
+ * upcalls to ensure that the correct table is being updated - otherwise
cd1e0c
+ * we could lose client records when the grace period is lifted.
cd1e0c
+ */
cd1e0c
+static int
cd1e0c
+cld_check_grace_period(void)
cd1e0c
+{
cd1e0c
+	int fd, ret = 0;
cd1e0c
+	char c;
cd1e0c
+
cd1e0c
+	if (!old_kernel)
cd1e0c
+		return 0;
cd1e0c
+	if (recovery_epoch != 0)
cd1e0c
+		return 0;
cd1e0c
+	fd = open(NFSD_END_GRACE_FILE, O_RDONLY);
cd1e0c
+	if (fd < 0) {
cd1e0c
+		xlog(L_WARNING, "Unable to open %s: %m",
cd1e0c
+			NFSD_END_GRACE_FILE);
cd1e0c
+		return 1;
cd1e0c
+	}
cd1e0c
+	if (read(fd, &c, 1) < 0) {
cd1e0c
+		xlog(L_WARNING, "Unable to read from %s: %m",
cd1e0c
+			NFSD_END_GRACE_FILE);
cd1e0c
+		return 1;
cd1e0c
+	}
cd1e0c
+	close(fd);
cd1e0c
+	if (c == 'N') {
cd1e0c
+		xlog(L_WARNING, "nfsd is in grace but didn't send a gracestart upcall, "
cd1e0c
+			"please update the kernel");
cd1e0c
+		ret = sqlite_grace_start();
cd1e0c
+	}
cd1e0c
+	return ret;
cd1e0c
+}
cd1e0c
+
cd1e0c
+#if UPCALL_VERSION >= 2
cd1e0c
+static ssize_t cld_message_size(void *msg)
cd1e0c
+{
cd1e0c
+	struct cld_msg_hdr *hdr = (struct cld_msg_hdr *)msg;
cd1e0c
+
cd1e0c
+	switch (hdr->cm_vers) {
cd1e0c
+	case 1:
cd1e0c
+		return sizeof(struct cld_msg);
cd1e0c
+	case 2:
cd1e0c
+		return sizeof(struct cld_msg_v2);
cd1e0c
+	default:
cd1e0c
+		xlog(L_FATAL, "%s invalid upcall version %d", __func__,
cd1e0c
+		     hdr->cm_vers);
cd1e0c
+		exit(-EINVAL);
cd1e0c
+	}
cd1e0c
+}
cd1e0c
+#else
cd1e0c
+static ssize_t cld_message_size(void *UNUSED(msg))
cd1e0c
+{
cd1e0c
+	return sizeof(struct cld_msg);
cd1e0c
+}
cd1e0c
+#endif
cd1e0c
+
cd1e0c
+static void
cd1e0c
+cld_not_implemented(struct cld_client *clnt)
cd1e0c
+{
cd1e0c
+	int ret;
cd1e0c
+	ssize_t bsize, wsize;
cd1e0c
+#if UPCALL_VERSION >= 2
cd1e0c
+	struct cld_msg_v2 *cmsg = &clnt->cl_u.cl_msg_v2;
cd1e0c
+#else
cd1e0c
+	struct cld_msg *cmsg = &clnt->cl_u.cl_msg;
cd1e0c
+#endif
cd1e0c
+
cd1e0c
+	xlog(D_GENERAL, "%s: downcalling with not implemented error", __func__);
cd1e0c
+
cd1e0c
+	/* set up reply */
cd1e0c
+	cmsg->cm_status = -EOPNOTSUPP;
cd1e0c
+
cd1e0c
+	bsize = cld_message_size(cmsg);
cd1e0c
+	wsize = atomicio((void *)write, clnt->cl_fd, cmsg, bsize);
cd1e0c
+	if (wsize != bsize)
cd1e0c
+		xlog(L_ERROR, "%s: problem writing to cld pipe (%zd): %m",
cd1e0c
+			 __func__, wsize);
cd1e0c
+
cd1e0c
+	/* reopen pipe, just to be sure */
cd1e0c
+	ret = cld_pipe_open(clnt);
cd1e0c
+	if (ret) {
cd1e0c
+		xlog(L_FATAL, "%s: unable to reopen pipe: %d", __func__, ret);
cd1e0c
+		exit(ret);
cd1e0c
+	}
cd1e0c
+}
cd1e0c
+
cd1e0c
+static void
cd1e0c
+cld_get_version(struct cld_client *clnt)
cd1e0c
+{
cd1e0c
+	int ret;
cd1e0c
+	ssize_t bsize, wsize;
cd1e0c
+#if UPCALL_VERSION >= 2
cd1e0c
+	struct cld_msg_v2 *cmsg = &clnt->cl_u.cl_msg_v2;
cd1e0c
+#else
cd1e0c
+	struct cld_msg *cmsg = &clnt->cl_u.cl_msg;
cd1e0c
+#endif
cd1e0c
+
cd1e0c
+	xlog(D_GENERAL, "%s: version = %u.", __func__, UPCALL_VERSION);
cd1e0c
+
cd1e0c
+	cmsg->cm_u.cm_version = UPCALL_VERSION;
cd1e0c
+	cmsg->cm_status = 0;
cd1e0c
+
cd1e0c
+	bsize = cld_message_size(cmsg);
cd1e0c
+	xlog(D_GENERAL, "Doing downcall with status %d", cmsg->cm_status);
cd1e0c
+	wsize = atomicio((void *)write, clnt->cl_fd, cmsg, bsize);
cd1e0c
+	if (wsize != bsize) {
cd1e0c
+		xlog(L_ERROR, "%s: problem writing to cld pipe (%zd): %m",
cd1e0c
+			 __func__, wsize);
cd1e0c
+		ret = cld_pipe_open(clnt);
cd1e0c
+		if (ret) {
cd1e0c
+			xlog(L_FATAL, "%s: unable to reopen pipe: %d",
cd1e0c
+					__func__, ret);
cd1e0c
+			exit(ret);
cd1e0c
+		}
cd1e0c
+	}
cd1e0c
+}
cd1e0c
+
cd1e0c
+static void
cd1e0c
+cld_create(struct cld_client *clnt)
cd1e0c
+{
cd1e0c
+	int ret;
cd1e0c
+	ssize_t bsize, wsize;
cd1e0c
+#if UPCALL_VERSION >= 2
cd1e0c
+	struct cld_msg_v2 *cmsg = &clnt->cl_u.cl_msg_v2;
cd1e0c
+#else
cd1e0c
+	struct cld_msg *cmsg = &clnt->cl_u.cl_msg;
cd1e0c
+#endif
cd1e0c
+
cd1e0c
+	ret = cld_check_grace_period();
cd1e0c
+	if (ret)
cd1e0c
+		goto reply;
cd1e0c
+
cd1e0c
+	xlog(D_GENERAL, "%s: create client record.", __func__);
cd1e0c
+
cd1e0c
+#if UPCALL_VERSION >= 2
cd1e0c
+	if (cmsg->cm_vers >= 2)
cd1e0c
+		ret = sqlite_insert_client_and_princhash(
cd1e0c
+					cmsg->cm_u.cm_clntinfo.cc_name.cn_id,
cd1e0c
+					cmsg->cm_u.cm_clntinfo.cc_name.cn_len,
cd1e0c
+					cmsg->cm_u.cm_clntinfo.cc_princhash.cp_data,
cd1e0c
+					cmsg->cm_u.cm_clntinfo.cc_princhash.cp_len);
cd1e0c
+	else
cd1e0c
+		ret = sqlite_insert_client(cmsg->cm_u.cm_name.cn_id,
cd1e0c
+					   cmsg->cm_u.cm_name.cn_len);
cd1e0c
+#else
cd1e0c
+	ret = sqlite_insert_client(cmsg->cm_u.cm_name.cn_id,
cd1e0c
+				   cmsg->cm_u.cm_name.cn_len);
cd1e0c
+#endif
cd1e0c
+
cd1e0c
+reply:
cd1e0c
+	cmsg->cm_status = ret ? -EREMOTEIO : ret;
cd1e0c
+
cd1e0c
+	bsize = cld_message_size(cmsg);
cd1e0c
+	xlog(D_GENERAL, "Doing downcall with status %d", cmsg->cm_status);
cd1e0c
+	wsize = atomicio((void *)write, clnt->cl_fd, cmsg, bsize);
cd1e0c
+	if (wsize != bsize) {
cd1e0c
+		xlog(L_ERROR, "%s: problem writing to cld pipe (%zd): %m",
cd1e0c
+			 __func__, wsize);
cd1e0c
+		ret = cld_pipe_open(clnt);
cd1e0c
+		if (ret) {
cd1e0c
+			xlog(L_FATAL, "%s: unable to reopen pipe: %d",
cd1e0c
+					__func__, ret);
cd1e0c
+			exit(ret);
cd1e0c
+		}
cd1e0c
+	}
cd1e0c
+}
cd1e0c
+
cd1e0c
+static void
cd1e0c
+cld_remove(struct cld_client *clnt)
cd1e0c
+{
cd1e0c
+	int ret;
cd1e0c
+	ssize_t bsize, wsize;
cd1e0c
+#if UPCALL_VERSION >= 2
cd1e0c
+	struct cld_msg_v2 *cmsg = &clnt->cl_u.cl_msg_v2;
cd1e0c
+#else
cd1e0c
+	struct cld_msg *cmsg = &clnt->cl_u.cl_msg;
cd1e0c
+#endif
cd1e0c
+
cd1e0c
+	ret = cld_check_grace_period();
cd1e0c
+	if (ret)
cd1e0c
+		goto reply;
cd1e0c
+
cd1e0c
+	xlog(D_GENERAL, "%s: remove client record.", __func__);
cd1e0c
+
cd1e0c
+	ret = sqlite_remove_client(cmsg->cm_u.cm_name.cn_id,
cd1e0c
+				   cmsg->cm_u.cm_name.cn_len);
cd1e0c
+
cd1e0c
+reply:
cd1e0c
+	cmsg->cm_status = ret ? -EREMOTEIO : ret;
cd1e0c
+
cd1e0c
+	bsize = cld_message_size(cmsg);
cd1e0c
+	xlog(D_GENERAL, "%s: downcall with status %d", __func__,
cd1e0c
+			cmsg->cm_status);
cd1e0c
+	wsize = atomicio((void *)write, clnt->cl_fd, cmsg, bsize);
cd1e0c
+	if (wsize != bsize) {
cd1e0c
+		xlog(L_ERROR, "%s: problem writing to cld pipe (%zd): %m",
cd1e0c
+			 __func__, wsize);
cd1e0c
+		ret = cld_pipe_open(clnt);
cd1e0c
+		if (ret) {
cd1e0c
+			xlog(L_FATAL, "%s: unable to reopen pipe: %d",
cd1e0c
+					__func__, ret);
cd1e0c
+			exit(ret);
cd1e0c
+		}
cd1e0c
+	}
cd1e0c
+}
cd1e0c
+
cd1e0c
+static void
cd1e0c
+cld_check(struct cld_client *clnt)
cd1e0c
+{
cd1e0c
+	int ret;
cd1e0c
+	ssize_t bsize, wsize;
cd1e0c
+#if UPCALL_VERSION >= 2
cd1e0c
+	struct cld_msg_v2 *cmsg = &clnt->cl_u.cl_msg_v2;
cd1e0c
+#else
cd1e0c
+	struct cld_msg *cmsg = &clnt->cl_u.cl_msg;
cd1e0c
+#endif
cd1e0c
+
cd1e0c
+	/*
cd1e0c
+	 * If we get a check upcall at all, it means we're talking to an old
cd1e0c
+	 * kernel.  Furthermore, if we're not in grace it means this is the
cd1e0c
+	 * first client to do a reclaim.  Log a message and use
cd1e0c
+	 * sqlite_grace_start() to advance the epoch numbers.
cd1e0c
+	 */
cd1e0c
+	if (recovery_epoch == 0) {
cd1e0c
+		xlog(D_GENERAL, "%s: received a check upcall, please update the kernel",
cd1e0c
+			__func__);
cd1e0c
+		ret = sqlite_grace_start();
cd1e0c
+		if (ret)
cd1e0c
+			goto reply;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	xlog(D_GENERAL, "%s: check client record", __func__);
cd1e0c
+
cd1e0c
+	ret = sqlite_check_client(cmsg->cm_u.cm_name.cn_id,
cd1e0c
+				  cmsg->cm_u.cm_name.cn_len);
cd1e0c
+
cd1e0c
+reply:
cd1e0c
+	/* set up reply */
cd1e0c
+	cmsg->cm_status = ret ? -EACCES : ret;
cd1e0c
+
cd1e0c
+	bsize = cld_message_size(cmsg);
cd1e0c
+	xlog(D_GENERAL, "%s: downcall with status %d", __func__,
cd1e0c
+			cmsg->cm_status);
cd1e0c
+	wsize = atomicio((void *)write, clnt->cl_fd, cmsg, bsize);
cd1e0c
+	if (wsize != bsize) {
cd1e0c
+		xlog(L_ERROR, "%s: problem writing to cld pipe (%zd): %m",
cd1e0c
+			 __func__, wsize);
cd1e0c
+		ret = cld_pipe_open(clnt);
cd1e0c
+		if (ret) {
cd1e0c
+			xlog(L_FATAL, "%s: unable to reopen pipe: %d",
cd1e0c
+					__func__, ret);
cd1e0c
+			exit(ret);
cd1e0c
+		}
cd1e0c
+	}
cd1e0c
+}
cd1e0c
+
cd1e0c
+static void
cd1e0c
+cld_gracedone(struct cld_client *clnt)
cd1e0c
+{
cd1e0c
+	int ret;
cd1e0c
+	ssize_t bsize, wsize;
cd1e0c
+#if UPCALL_VERSION >= 2
cd1e0c
+	struct cld_msg_v2 *cmsg = &clnt->cl_u.cl_msg_v2;
cd1e0c
+#else
cd1e0c
+	struct cld_msg *cmsg = &clnt->cl_u.cl_msg;
cd1e0c
+#endif
cd1e0c
+
cd1e0c
+	/*
cd1e0c
+	 * If we got a "gracedone" upcall while we're not in grace, then
cd1e0c
+	 * 1) we must be talking to an old kernel
cd1e0c
+	 * 2) no clients attempted to reclaim
cd1e0c
+	 * In that case, log a message and use sqlite_grace_start() to
cd1e0c
+	 * advance the epoch numbers, and then proceed as normal.
cd1e0c
+	 */
cd1e0c
+	if (recovery_epoch == 0) {
cd1e0c
+		xlog(D_GENERAL, "%s: received gracedone upcall "
cd1e0c
+			"while not in grace, please update the kernel",
cd1e0c
+			__func__);
cd1e0c
+		ret = sqlite_grace_start();
cd1e0c
+		if (ret)
cd1e0c
+			goto reply;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	xlog(D_GENERAL, "%s: grace done.", __func__);
cd1e0c
+
cd1e0c
+	ret = sqlite_grace_done();
cd1e0c
+
cd1e0c
+	if (first_time) {
cd1e0c
+		if (num_cltrack_records > 0)
cd1e0c
+			sqlite_delete_cltrack_records();
cd1e0c
+		if (num_legacy_records > 0)
cd1e0c
+			legacy_clear_recdir();
cd1e0c
+		sqlite_first_time_done();
cd1e0c
+		first_time = 0;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+reply:
cd1e0c
+	/* set up reply: downcall with 0 status */
cd1e0c
+	cmsg->cm_status = ret ? -EREMOTEIO : ret;
cd1e0c
+
cd1e0c
+	bsize = cld_message_size(cmsg);
cd1e0c
+	xlog(D_GENERAL, "Doing downcall with status %d", cmsg->cm_status);
cd1e0c
+	wsize = atomicio((void *)write, clnt->cl_fd, cmsg, bsize);
cd1e0c
+	if (wsize != bsize) {
cd1e0c
+		xlog(L_ERROR, "%s: problem writing to cld pipe (%zd): %m",
cd1e0c
+			 __func__, wsize);
cd1e0c
+		ret = cld_pipe_open(clnt);
cd1e0c
+		if (ret) {
cd1e0c
+			xlog(L_FATAL, "%s: unable to reopen pipe: %d",
cd1e0c
+					__func__, ret);
cd1e0c
+			exit(ret);
cd1e0c
+		}
cd1e0c
+	}
cd1e0c
+}
cd1e0c
+
cd1e0c
+static int
cd1e0c
+gracestart_callback(struct cld_client *clnt) {
cd1e0c
+	ssize_t bsize, wsize;
cd1e0c
+#if UPCALL_VERSION >= 2
cd1e0c
+	struct cld_msg_v2 *cmsg = &clnt->cl_u.cl_msg_v2;
cd1e0c
+#else
cd1e0c
+	struct cld_msg *cmsg = &clnt->cl_u.cl_msg;
cd1e0c
+#endif
cd1e0c
+
cd1e0c
+	cmsg->cm_status = -EINPROGRESS;
cd1e0c
+
cd1e0c
+	bsize = cld_message_size(cmsg);
cd1e0c
+	xlog(D_GENERAL, "Sending client %.*s",
cd1e0c
+			cmsg->cm_u.cm_name.cn_len, cmsg->cm_u.cm_name.cn_id);
cd1e0c
+	wsize = atomicio((void *)write, clnt->cl_fd, cmsg, bsize);
cd1e0c
+	if (wsize != bsize)
cd1e0c
+		return -EIO;
cd1e0c
+	return 0;
cd1e0c
+}
cd1e0c
+
cd1e0c
+static void
cd1e0c
+cld_gracestart(struct cld_client *clnt)
cd1e0c
+{
cd1e0c
+	int ret;
cd1e0c
+	ssize_t bsize, wsize;
cd1e0c
+#if UPCALL_VERSION >= 2
cd1e0c
+	struct cld_msg_v2 *cmsg = &clnt->cl_u.cl_msg_v2;
cd1e0c
+#else
cd1e0c
+	struct cld_msg *cmsg = &clnt->cl_u.cl_msg;
cd1e0c
+#endif
cd1e0c
+
cd1e0c
+	xlog(D_GENERAL, "%s: updating grace epochs", __func__);
cd1e0c
+
cd1e0c
+	ret = sqlite_grace_start();
cd1e0c
+	if (ret)
cd1e0c
+		goto reply;
cd1e0c
+
cd1e0c
+	xlog(D_GENERAL, "%s: sending client records to the kernel", __func__);
cd1e0c
+
cd1e0c
+	ret = sqlite_iterate_recovery(&gracestart_callback, clnt);
cd1e0c
+
cd1e0c
+reply:
cd1e0c
+	/* set up reply: downcall with 0 status */
cd1e0c
+	cmsg->cm_status = ret ? -EREMOTEIO : ret;
cd1e0c
+
cd1e0c
+	bsize = cld_message_size(cmsg);
cd1e0c
+	xlog(D_GENERAL, "Doing downcall with status %d", cmsg->cm_status);
cd1e0c
+	wsize = atomicio((void *)write, clnt->cl_fd, cmsg, bsize);
cd1e0c
+	if (wsize != bsize) {
cd1e0c
+		xlog(L_ERROR, "%s: problem writing to cld pipe (%zd): %m",
cd1e0c
+			 __func__, wsize);
cd1e0c
+		ret = cld_pipe_open(clnt);
cd1e0c
+		if (ret) {
cd1e0c
+			xlog(L_FATAL, "%s: unable to reopen pipe: %d",
cd1e0c
+					__func__, ret);
cd1e0c
+			exit(ret);
cd1e0c
+		}
cd1e0c
+	}
cd1e0c
+}
cd1e0c
+
cd1e0c
+static void
cd1e0c
+cldcb(int UNUSED(fd), short which, void *data)
cd1e0c
+{
cd1e0c
+	ssize_t len;
cd1e0c
+	struct cld_client *clnt = data;
cd1e0c
+#if UPCALL_VERSION >= 2
cd1e0c
+	struct cld_msg_v2 *cmsg = &clnt->cl_u.cl_msg_v2;
cd1e0c
+#else
cd1e0c
+	struct cld_msg *cmsg = &clnt->cl_u.cl_msg;
cd1e0c
+#endif
cd1e0c
+
cd1e0c
+	if (which != EV_READ)
cd1e0c
+		goto out;
cd1e0c
+
cd1e0c
+	len = atomicio(read, clnt->cl_fd, cmsg, sizeof(*cmsg));
cd1e0c
+	if (len <= 0) {
cd1e0c
+		xlog(L_ERROR, "%s: pipe read failed: %m", __func__);
cd1e0c
+		cld_pipe_open(clnt);
cd1e0c
+		goto out;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	if (cmsg->cm_vers > UPCALL_VERSION) {
cd1e0c
+		xlog(L_ERROR, "%s: unsupported upcall version: %hu",
cd1e0c
+				__func__, cmsg->cm_vers);
cd1e0c
+		cld_pipe_open(clnt);
cd1e0c
+		goto out;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	switch(cmsg->cm_cmd) {
cd1e0c
+	case Cld_Create:
cd1e0c
+		cld_create(clnt);
cd1e0c
+		break;
cd1e0c
+	case Cld_Remove:
cd1e0c
+		cld_remove(clnt);
cd1e0c
+		break;
cd1e0c
+	case Cld_Check:
cd1e0c
+		cld_check(clnt);
cd1e0c
+		break;
cd1e0c
+	case Cld_GraceDone:
cd1e0c
+		cld_gracedone(clnt);
cd1e0c
+		break;
cd1e0c
+	case Cld_GraceStart:
cd1e0c
+		cld_gracestart(clnt);
cd1e0c
+		break;
cd1e0c
+	case Cld_GetVersion:
cd1e0c
+		cld_get_version(clnt);
cd1e0c
+		break;
cd1e0c
+	default:
cd1e0c
+		xlog(L_WARNING, "%s: command %u is not yet implemented",
cd1e0c
+				__func__, cmsg->cm_cmd);
cd1e0c
+		cld_not_implemented(clnt);
cd1e0c
+	}
cd1e0c
+out:
cd1e0c
+	event_add(&clnt->cl_event, NULL);
cd1e0c
+}
cd1e0c
+
cd1e0c
+int
cd1e0c
+main(int argc, char **argv)
cd1e0c
+{
cd1e0c
+	int arg;
cd1e0c
+	int rc = 0;
cd1e0c
+	bool foreground = false;
cd1e0c
+	char *progname;
cd1e0c
+	char *storagedir = CLD_DEFAULT_STORAGEDIR;
cd1e0c
+	struct cld_client clnt;
cd1e0c
+	char *s;
cd1e0c
+	first_time = 0;
cd1e0c
+	num_cltrack_records = 0;
cd1e0c
+	num_legacy_records = 0;
cd1e0c
+
cd1e0c
+	memset(&clnt, 0, sizeof(clnt));
cd1e0c
+
cd1e0c
+	progname = strdup(basename(argv[0]));
cd1e0c
+	if (!progname) {
cd1e0c
+		fprintf(stderr, "%s: unable to allocate memory.\n", argv[0]);
cd1e0c
+		return 1;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	event_init();
cd1e0c
+	xlog_syslog(0);
cd1e0c
+	xlog_stderr(1);
cd1e0c
+
cd1e0c
+	conf_init_file(NFS_CONFFILE);
cd1e0c
+	s = conf_get_str("general", "pipefs-directory");
cd1e0c
+	if (s)
cd1e0c
+		strlcpy(pipefs_dir, s, sizeof(pipefs_dir));
cd1e0c
+	s = conf_get_str("nfsdcld", "storagedir");
cd1e0c
+	if (s)
cd1e0c
+		storagedir = s;
cd1e0c
+	rc = conf_get_num("nfsdcld", "debug", 0);
cd1e0c
+	if (rc > 0)
cd1e0c
+		xlog_config(D_ALL, 1);
cd1e0c
+
cd1e0c
+	/* process command-line options */
cd1e0c
+	while ((arg = getopt_long(argc, argv, "hdFp:s:", longopts,
cd1e0c
+				  NULL)) != EOF) {
cd1e0c
+		switch (arg) {
cd1e0c
+		case 'd':
cd1e0c
+			xlog_config(D_ALL, 1);
cd1e0c
+			break;
cd1e0c
+		case 'F':
cd1e0c
+			foreground = true;
cd1e0c
+			break;
cd1e0c
+		case 'p':
cd1e0c
+			strlcpy(pipefs_dir, optarg, sizeof(pipefs_dir));
cd1e0c
+			break;
cd1e0c
+		case 's':
cd1e0c
+			storagedir = optarg;
cd1e0c
+			break;
cd1e0c
+		default:
cd1e0c
+			usage(progname);
cd1e0c
+			return 0;
cd1e0c
+		}
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	strlcpy(pipepath, pipefs_dir, sizeof(pipepath));
cd1e0c
+	strlcat(pipepath, DEFAULT_CLD_PATH, sizeof(pipepath));
cd1e0c
+
cd1e0c
+	xlog_open(progname);
cd1e0c
+	if (!foreground) {
cd1e0c
+		xlog_syslog(1);
cd1e0c
+		xlog_stderr(0);
cd1e0c
+		rc = daemon(0, 0);
cd1e0c
+		if (rc) {
cd1e0c
+			xlog(L_ERROR, "Unable to daemonize: %m");
cd1e0c
+			goto out;
cd1e0c
+		}
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	/* drop all capabilities */
cd1e0c
+	rc = cld_set_caps();
cd1e0c
+	if (rc)
cd1e0c
+		goto out;
cd1e0c
+
cd1e0c
+	/*
cd1e0c
+	 * now see if the storagedir is writable by root w/o CAP_DAC_OVERRIDE.
cd1e0c
+	 * If it isn't then give the user a warning but proceed as if
cd1e0c
+	 * everything is OK. If the DB has already been created, then
cd1e0c
+	 * everything might still work. If it doesn't exist at all, then
cd1e0c
+	 * assume that the maindb init will be able to create it. Fail on
cd1e0c
+	 * anything else.
cd1e0c
+	 */
cd1e0c
+	if (access(storagedir, W_OK) == -1) {
cd1e0c
+		switch (errno) {
cd1e0c
+		case EACCES:
cd1e0c
+			xlog(L_WARNING, "Storage directory %s is not writable. "
cd1e0c
+					"Should be owned by root and writable "
cd1e0c
+					"by owner!", storagedir);
cd1e0c
+			break;
cd1e0c
+		case ENOENT:
cd1e0c
+			/* ignore and assume that we can create dir as root */
cd1e0c
+			break;
cd1e0c
+		default:
cd1e0c
+			xlog(L_ERROR, "Unexpected error when checking access "
cd1e0c
+				      "on %s: %m", storagedir);
cd1e0c
+			rc = -errno;
cd1e0c
+			goto out;
cd1e0c
+		}
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	if (linux_version_code() < MAKE_VERSION(4, 20, 0))
cd1e0c
+		old_kernel = true;
cd1e0c
+
cd1e0c
+	/* set up storage db */
cd1e0c
+	rc = sqlite_prepare_dbh(storagedir);
cd1e0c
+	if (rc) {
cd1e0c
+		xlog(L_ERROR, "Failed to open main database: %d", rc);
cd1e0c
+		goto out;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	/* set up event handler */
cd1e0c
+	rc = cld_pipe_init(&clnt);
cd1e0c
+	if (rc)
cd1e0c
+		goto out;
cd1e0c
+
cd1e0c
+	xlog(D_GENERAL, "%s: Starting event dispatch handler.", __func__);
cd1e0c
+	rc = event_dispatch();
cd1e0c
+	if (rc < 0)
cd1e0c
+		xlog(L_ERROR, "%s: event_dispatch failed: %m", __func__);
cd1e0c
+
cd1e0c
+	close(clnt.cl_fd);
cd1e0c
+	close(inotify_fd);
cd1e0c
+out:
cd1e0c
+	free(progname);
cd1e0c
+	return rc;
cd1e0c
+}
cd1e0c
diff --git a/utils/nfsdcld/nfsdcld.man b/utils/nfsdcld/nfsdcld.man
cd1e0c
new file mode 100644
cd1e0c
index 00000000..4c2b1e80
cd1e0c
--- /dev/null
cd1e0c
+++ b/utils/nfsdcld/nfsdcld.man
cd1e0c
@@ -0,0 +1,221 @@
cd1e0c
+.\" Automatically generated by Pod::Man 2.22 (Pod::Simple 3.13)
cd1e0c
+.\"
cd1e0c
+.\" Standard preamble:
cd1e0c
+.\" ========================================================================
cd1e0c
+.de Sp \" Vertical space (when we can't use .PP)
cd1e0c
+.if t .sp .5v
cd1e0c
+.if n .sp
cd1e0c
+..
cd1e0c
+.de Vb \" Begin verbatim text
cd1e0c
+.ft CW
cd1e0c
+.nf
cd1e0c
+.ne \\$1
cd1e0c
+..
cd1e0c
+.de Ve \" End verbatim text
cd1e0c
+.ft R
cd1e0c
+.fi
cd1e0c
+..
cd1e0c
+.\" Set up some character translations and predefined strings.  \*(-- will
cd1e0c
+.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
cd1e0c
+.\" double quote, and \*(R" will give a right double quote.  \*(C+ will
cd1e0c
+.\" give a nicer C++.  Capital omega is used to do unbreakable dashes and
cd1e0c
+.\" therefore won't be available.  \*(C` and \*(C' expand to `' in nroff,
cd1e0c
+.\" nothing in troff, for use with C<>.
cd1e0c
+.tr \(*W-
cd1e0c
+.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
cd1e0c
+.ie n \{\
cd1e0c
+.    ds -- \(*W-
cd1e0c
+.    ds PI pi
cd1e0c
+.    if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
cd1e0c
+.    if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\"  diablo 12 pitch
cd1e0c
+.    ds L" ""
cd1e0c
+.    ds R" ""
cd1e0c
+.    ds C` ""
cd1e0c
+.    ds C' ""
cd1e0c
+'br\}
cd1e0c
+.el\{\
cd1e0c
+.    ds -- \|\(em\|
cd1e0c
+.    ds PI \(*p
cd1e0c
+.    ds L" ``
cd1e0c
+.    ds R" ''
cd1e0c
+'br\}
cd1e0c
+.\"
cd1e0c
+.\" Escape single quotes in literal strings from groff's Unicode transform.
cd1e0c
+.ie \n(.g .ds Aq \(aq
cd1e0c
+.el       .ds Aq '
cd1e0c
+.\"
cd1e0c
+.\" If the F register is turned on, we'll generate index entries on stderr for
cd1e0c
+.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
cd1e0c
+.\" entries marked with X<> in POD.  Of course, you'll have to process the
cd1e0c
+.\" output yourself in some meaningful fashion.
cd1e0c
+.ie \nF \{\
cd1e0c
+.    de IX
cd1e0c
+.    tm Index:\\$1\t\\n%\t"\\$2"
cd1e0c
+..
cd1e0c
+.    nr % 0
cd1e0c
+.    rr F
cd1e0c
+.\}
cd1e0c
+.el \{\
cd1e0c
+.    de IX
cd1e0c
+..
cd1e0c
+.\}
cd1e0c
+.\"
cd1e0c
+.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
cd1e0c
+.\" Fear.  Run.  Save yourself.  No user-serviceable parts.
cd1e0c
+.    \" fudge factors for nroff and troff
cd1e0c
+.if n \{\
cd1e0c
+.    ds #H 0
cd1e0c
+.    ds #V .8m
cd1e0c
+.    ds #F .3m
cd1e0c
+.    ds #[ \f1
cd1e0c
+.    ds #] \fP
cd1e0c
+.\}
cd1e0c
+.if t \{\
cd1e0c
+.    ds #H ((1u-(\\\\n(.fu%2u))*.13m)
cd1e0c
+.    ds #V .6m
cd1e0c
+.    ds #F 0
cd1e0c
+.    ds #[ \&
cd1e0c
+.    ds #] \&
cd1e0c
+.\}
cd1e0c
+.    \" simple accents for nroff and troff
cd1e0c
+.if n \{\
cd1e0c
+.    ds ' \&
cd1e0c
+.    ds ` \&
cd1e0c
+.    ds ^ \&
cd1e0c
+.    ds , \&
cd1e0c
+.    ds ~ ~
cd1e0c
+.    ds /
cd1e0c
+.\}
cd1e0c
+.if t \{\
cd1e0c
+.    ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
cd1e0c
+.    ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
cd1e0c
+.    ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
cd1e0c
+.    ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
cd1e0c
+.    ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
cd1e0c
+.    ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
cd1e0c
+.\}
cd1e0c
+.    \" troff and (daisy-wheel) nroff accents
cd1e0c
+.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
cd1e0c
+.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
cd1e0c
+.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
cd1e0c
+.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
cd1e0c
+.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
cd1e0c
+.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
cd1e0c
+.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
cd1e0c
+.ds ae a\h'-(\w'a'u*4/10)'e
cd1e0c
+.ds Ae A\h'-(\w'A'u*4/10)'E
cd1e0c
+.    \" corrections for vroff
cd1e0c
+.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
cd1e0c
+.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
cd1e0c
+.    \" for low resolution devices (crt and lpr)
cd1e0c
+.if \n(.H>23 .if \n(.V>19 \
cd1e0c
+\{\
cd1e0c
+.    ds : e
cd1e0c
+.    ds 8 ss
cd1e0c
+.    ds o a
cd1e0c
+.    ds d- d\h'-1'\(ga
cd1e0c
+.    ds D- D\h'-1'\(hy
cd1e0c
+.    ds th \o'bp'
cd1e0c
+.    ds Th \o'LP'
cd1e0c
+.    ds ae ae
cd1e0c
+.    ds Ae AE
cd1e0c
+.\}
cd1e0c
+.rm #[ #] #H #V #F C
cd1e0c
+.\" ========================================================================
cd1e0c
+.\"
cd1e0c
+.IX Title "NFSDCLD 8"
cd1e0c
+.TH NFSDCLD 8 "2011-12-21" "" ""
cd1e0c
+.\" For nroff, turn off justification.  Always turn off hyphenation; it makes
cd1e0c
+.\" way too many mistakes in technical documents.
cd1e0c
+.if n .ad l
cd1e0c
+.nh
cd1e0c
+.SH "NAME"
cd1e0c
+nfsdcld \- NFSv4 Client Tracking Daemon
cd1e0c
+.SH "SYNOPSIS"
cd1e0c
+.IX Header "SYNOPSIS"
cd1e0c
+nfsdcld [\-d] [\-F] [\-p path] [\-s stable storage dir]
cd1e0c
+.SH "DESCRIPTION"
cd1e0c
+.IX Header "DESCRIPTION"
cd1e0c
+nfsdcld is the NFSv4 client tracking daemon. It is not necessary to run
cd1e0c
+this daemon on machines that are not acting as NFSv4 servers.
cd1e0c
+.PP
cd1e0c
+When a network partition is combined with a server reboot, there are
cd1e0c
+edge conditions that can cause the server to grant lock reclaims when
cd1e0c
+other clients have taken conflicting locks in the interim. A more detailed
cd1e0c
+explanation of this issue is described in \s-1RFC\s0 3530, section 8.6.3.
cd1e0c
+.PP
cd1e0c
+In order to prevent these problems, the server must track a small amount
cd1e0c
+of per-client information on stable storage. This daemon provides the
cd1e0c
+userspace piece of that functionality.
cd1e0c
+.SH "OPTIONS"
cd1e0c
+.IX Header "OPTIONS"
cd1e0c
+.IP "\fB\-d\fR, \fB\-\-debug\fR" 4
cd1e0c
+.IX Item "-d, --debug"
cd1e0c
+Enable debug level logging.
cd1e0c
+.IP "\fB\-F\fR, \fB\-\-foreground\fR" 4
cd1e0c
+.IX Item "-F, --foreground"
cd1e0c
+Runs the daemon in the foreground and prints all output to stderr
cd1e0c
+.IP "\fB\-p\fR \fIpath\fR, \fB\-\-pipefsdir\fR=\fIpath\fR" 4
cd1e0c
+.IX Item "-p path, --pipefsdir=path"
cd1e0c
+Location of the rpc_pipefs filesystem. The default value is
cd1e0c
+\&\fI/var/lib/nfs/rpc_pipefs\fR.
cd1e0c
+.IP "\fB\-s\fR \fIstorage_dir\fR, \fB\-\-storagedir\fR=\fIstorage_dir\fR" 4
cd1e0c
+.IX Item "-s storagedir, --storagedir=storage_dir"
cd1e0c
+Directory where stable storage information should be kept. The default
cd1e0c
+value is \fI/var/lib/nfs/nfsdcld\fR.
cd1e0c
+.SH "CONFIGURATION FILE"
cd1e0c
+.IX Header "CONFIGURATION FILE"
cd1e0c
+The following values are recognized in the \fB[nfsdcld]\fR section
cd1e0c
+of the \fI/etc/nfs.conf\fR configuration file:
cd1e0c
+.IP "\fBstoragedir\fR" 4
cd1e0c
+.IX Item "storagedir"
cd1e0c
+Equivalent to \fB\-s\fR/\fB\-\-storagedir\fR.
cd1e0c
+.IP "\fBdebug\fR" 4
cd1e0c
+.IX Item "debug"
cd1e0c
+Setting "debug = 1" is equivalent to \fB\-d\fR/\fB\-\-debug\fR.
cd1e0c
+.LP
cd1e0c
+In addition, the following value is recognized from the \fB[general]\fR section:
cd1e0c
+.IP "\fBpipefs\-directory\fR" 4
cd1e0c
+.IX Item "pipefs-directory"
cd1e0c
+Equivalent to \fB\-p\fR/\fB\-\-pipefsdir\fR.
cd1e0c
+.SH "NOTES"
cd1e0c
+.IX Header "NOTES"
cd1e0c
+The Linux kernel NFSv4 server has historically tracked this information
cd1e0c
+on stable storage by manipulating information on the filesystem
cd1e0c
+directly, in the directory to which \fI/proc/fs/nfsd/nfsv4recoverydir\fR
cd1e0c
+points.
cd1e0c
+.PP
cd1e0c
+This changed with the original introduction of \fBnfsdcld\fR upcall in kernel version 3.4,
cd1e0c
+which was later deprecated in favor of the \fBnfsdcltrack\fR(8) usermodehelper
cd1e0c
+program, support for which was added in kernel version 3.8.  However, since the
cd1e0c
+usermodehelper upcall does not work in containers, support for a new version of
cd1e0c
+the \fBnfsdcld\fR upcall was added in kernel version 5.2.
cd1e0c
+.PP
cd1e0c
+This daemon requires a kernel that supports the \fBnfsdcld\fR upcall. On older kernels, if
cd1e0c
+the legacy client name tracking code was in use, then the kernel would not create the
cd1e0c
+pipe that \fBnfsdcld\fR uses to talk to the kernel.  On newer kernels, nfsd attempts to
cd1e0c
+initialize client tracking in the following order:  First, the \fBnfsdcld\fR upcall.  Second,
cd1e0c
+the \fBnfsdcltrack\fR usermodehelper upcall.  Finally, the legacy client tracking.
cd1e0c
+.PP
cd1e0c
+This daemon should be run as root, as the pipe that it uses to communicate
cd1e0c
+with the kernel is only accessable by root. The daemon however does drop all
cd1e0c
+superuser capabilities after starting. Because of this, the \fIstoragedir\fR
cd1e0c
+should be owned by root, and be readable and writable by owner.
cd1e0c
+.PP
cd1e0c
+The daemon now supports different upcall versions to allow the kernel to pass additional
cd1e0c
+data to be stored in the on-disk database.  The kernel will query the supported upcall
cd1e0c
+version from \fBnfsdcld\fR during client tracking initialization.  A restart of \fBnfsd\fR is
cd1e0c
+not necessary after upgrading \fBnfsdcld\fR, however \fBnfsd\fR will not use a later upcall
cd1e0c
+version until restart.  A restart of \fBnfsd is necessary\fR after downgrading \fBnfsdcld\fR,
cd1e0c
+to ensure that \fBnfsd\fR does not use an upcall version that \fBnfsdcld\fR does not support.
cd1e0c
+Additionally, a downgrade of \fBnfsdcld\fR requires the schema of the on-disk database to
cd1e0c
+be downgraded as well.  That can be accomplished using the \fBclddb-tool\fR(8) utility.
cd1e0c
+.SH FILES
cd1e0c
+.TP
cd1e0c
+.B /var/lib/nfs/nfsdcld/main.sqlite
cd1e0c
+.SH SEE ALSO
cd1e0c
+.BR nfsdcltrack "(8), " clddb-tool (8)
cd1e0c
+.SH "AUTHORS"
cd1e0c
+.IX Header "AUTHORS"
cd1e0c
+The nfsdcld daemon was developed by Jeff Layton <jlayton@redhat.com>
cd1e0c
+with modifications from Scott Mayhew <smayhew@redhat.com>.
cd1e0c
diff --git a/utils/nfsdcld/sqlite.c b/utils/nfsdcld/sqlite.c
cd1e0c
new file mode 100644
cd1e0c
index 00000000..6666c867
cd1e0c
--- /dev/null
cd1e0c
+++ b/utils/nfsdcld/sqlite.c
cd1e0c
@@ -0,0 +1,1406 @@
cd1e0c
+/*
cd1e0c
+ * Copyright (C) 2011  Red Hat, Jeff Layton <jlayton@redhat.com>
cd1e0c
+ *
cd1e0c
+ * This program is free software; you can redistribute it and/or
cd1e0c
+ * modify it under the terms of the GNU General Public License
cd1e0c
+ * as published by the Free Software Foundation; either version 2
cd1e0c
+ * of the License, or (at your option) any later version.
cd1e0c
+ *
cd1e0c
+ * This program is distributed in the hope that it will be useful,
cd1e0c
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
cd1e0c
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
cd1e0c
+ * GNU General Public License for more details.
cd1e0c
+ *
cd1e0c
+ * You should have received a copy of the GNU General Public License
cd1e0c
+ * along with this program; if not, write to the Free Software
cd1e0c
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor,
cd1e0c
+ * Boston, MA 02110-1301, USA.
cd1e0c
+ */
cd1e0c
+
cd1e0c
+/*
cd1e0c
+ * Explanation:
cd1e0c
+ *
cd1e0c
+ * This file contains the code to manage the sqlite backend database for the
cd1e0c
+ * nfsdcld client tracking daemon.
cd1e0c
+ *
cd1e0c
+ * The main database is called main.sqlite and contains the following tables:
cd1e0c
+ *
cd1e0c
+ * parameters: simple key/value pairs for storing database info
cd1e0c
+ *
cd1e0c
+ * grace: a "current" column containing an INTEGER representing the current
cd1e0c
+ *        epoch (where should new values be stored) and a "recovery" column
cd1e0c
+ *        containing an INTEGER representing the recovery epoch (from what
cd1e0c
+ *        epoch are we allowed to recover).  A recovery epoch of 0 means
cd1e0c
+ *        normal operation (grace period not in force).  Note: sqlite stores
cd1e0c
+ *        integers as signed values, so these must be cast to a uint64_t when
cd1e0c
+ *        retrieving them from the database and back to an int64_t when storing
cd1e0c
+ *        them in the database.
cd1e0c
+ *
cd1e0c
+ * rec-CCCCCCCCCCCCCCCC (where C is the hex representation of the epoch value):
cd1e0c
+ *        an "id" column containing a BLOB with the long-form clientid
cd1e0c
+ *        as sent by the client, and a "princhash" column containing a BLOB
cd1e0c
+ *        with the sha256 hash of the kerberos principal (if available).
cd1e0c
+ */
cd1e0c
+
cd1e0c
+#ifdef HAVE_CONFIG_H
cd1e0c
+#include "config.h"
cd1e0c
+#endif /* HAVE_CONFIG_H */
cd1e0c
+
cd1e0c
+#include <dirent.h>
cd1e0c
+#include <errno.h>
cd1e0c
+#include <event.h>
cd1e0c
+#include <stdbool.h>
cd1e0c
+#include <string.h>
cd1e0c
+#include <sys/stat.h>
cd1e0c
+#include <sys/types.h>
cd1e0c
+#include <fcntl.h>
cd1e0c
+#include <unistd.h>
cd1e0c
+#include <stdlib.h>
cd1e0c
+#include <stdint.h>
cd1e0c
+#include <limits.h>
cd1e0c
+#include <sqlite3.h>
cd1e0c
+#include <linux/limits.h>
cd1e0c
+#include <inttypes.h>
cd1e0c
+
cd1e0c
+#include "xlog.h"
cd1e0c
+#include "sqlite.h"
cd1e0c
+#include "cld.h"
cd1e0c
+#include "cld-internal.h"
cd1e0c
+#include "conffile.h"
cd1e0c
+#include "legacy.h"
cd1e0c
+#include "nfslib.h"
cd1e0c
+
cd1e0c
+#define CLD_SQLITE_LATEST_SCHEMA_VERSION 4
cd1e0c
+#define CLTRACK_DEFAULT_STORAGEDIR NFS_STATEDIR "/nfsdcltrack"
cd1e0c
+
cd1e0c
+/* in milliseconds */
cd1e0c
+#define CLD_SQLITE_BUSY_TIMEOUT 10000
cd1e0c
+
cd1e0c
+/* private data structures */
cd1e0c
+
cd1e0c
+/* global variables */
cd1e0c
+static char *cltrack_storagedir = CLTRACK_DEFAULT_STORAGEDIR;
cd1e0c
+
cd1e0c
+/* reusable pathname and sql command buffer */
cd1e0c
+static char buf[PATH_MAX];
cd1e0c
+
cd1e0c
+/* global database handle */
cd1e0c
+static sqlite3 *dbh;
cd1e0c
+
cd1e0c
+/* forward declarations */
cd1e0c
+
cd1e0c
+/* make a directory, ignoring EEXIST errors unless it's not a directory */
cd1e0c
+static int
cd1e0c
+mkdir_if_not_exist(const char *dirname)
cd1e0c
+{
cd1e0c
+	int ret;
cd1e0c
+	struct stat statbuf;
cd1e0c
+
cd1e0c
+	ret = mkdir(dirname, S_IRWXU);
cd1e0c
+	if (ret && errno != EEXIST)
cd1e0c
+		return -errno;
cd1e0c
+
cd1e0c
+	ret = stat(dirname, &statbuf);
cd1e0c
+	if (ret)
cd1e0c
+		return -errno;
cd1e0c
+
cd1e0c
+	if (!S_ISDIR(statbuf.st_mode))
cd1e0c
+		ret = -ENOTDIR;
cd1e0c
+
cd1e0c
+	return ret;
cd1e0c
+}
cd1e0c
+
cd1e0c
+static int
cd1e0c
+sqlite_query_schema_version(void)
cd1e0c
+{
cd1e0c
+	int ret;
cd1e0c
+	sqlite3_stmt *stmt = NULL;
cd1e0c
+
cd1e0c
+	/* prepare select query */
cd1e0c
+	ret = sqlite3_prepare_v2(dbh,
cd1e0c
+		"SELECT value FROM parameters WHERE key == \"version\";",
cd1e0c
+		 -1, &stmt, NULL);
cd1e0c
+	if (ret != SQLITE_OK) {
cd1e0c
+		xlog(D_GENERAL, "Unable to prepare select statement: %s",
cd1e0c
+			sqlite3_errmsg(dbh));
cd1e0c
+		ret = 0;
cd1e0c
+		goto out;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	/* query schema version */
cd1e0c
+	ret = sqlite3_step(stmt);
cd1e0c
+	if (ret != SQLITE_ROW) {
cd1e0c
+		xlog(D_GENERAL, "Select statement execution failed: %s",
cd1e0c
+				sqlite3_errmsg(dbh));
cd1e0c
+		ret = 0;
cd1e0c
+		goto out;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	ret = sqlite3_column_int(stmt, 0);
cd1e0c
+out:
cd1e0c
+	sqlite3_finalize(stmt);
cd1e0c
+	return ret;
cd1e0c
+}
cd1e0c
+
cd1e0c
+static int
cd1e0c
+sqlite_query_first_time(int *first_time)
cd1e0c
+{
cd1e0c
+	int ret;
cd1e0c
+	sqlite3_stmt *stmt = NULL;
cd1e0c
+
cd1e0c
+	/* prepare select query */
cd1e0c
+	ret = sqlite3_prepare_v2(dbh,
cd1e0c
+		"SELECT value FROM parameters WHERE key == \"first_time\";",
cd1e0c
+		 -1, &stmt, NULL);
cd1e0c
+	if (ret != SQLITE_OK) {
cd1e0c
+		xlog(D_GENERAL, "Unable to prepare select statement: %s",
cd1e0c
+			sqlite3_errmsg(dbh));
cd1e0c
+		goto out;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	/* query first_time */
cd1e0c
+	ret = sqlite3_step(stmt);
cd1e0c
+	if (ret != SQLITE_ROW) {
cd1e0c
+		xlog(D_GENERAL, "Select statement execution failed: %s",
cd1e0c
+				sqlite3_errmsg(dbh));
cd1e0c
+		goto out;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	*first_time = sqlite3_column_int(stmt, 0);
cd1e0c
+	ret = 0;
cd1e0c
+out:
cd1e0c
+	sqlite3_finalize(stmt);
cd1e0c
+	return ret;
cd1e0c
+}
cd1e0c
+
cd1e0c
+static int
cd1e0c
+sqlite_add_princ_col_cb(void *UNUSED(arg), int ncols, char **cols,
cd1e0c
+			    char **UNUSED(colnames))
cd1e0c
+{
cd1e0c
+	int ret;
cd1e0c
+	char *err;
cd1e0c
+
cd1e0c
+	if (ncols > 1)
cd1e0c
+		return -EINVAL;
cd1e0c
+	ret = snprintf(buf, sizeof(buf), "ALTER TABLE \"%s\" "
cd1e0c
+			"ADD COLUMN princhash BLOB;", cols[0]);
cd1e0c
+	if (ret < 0) {
cd1e0c
+		xlog(L_ERROR, "sprintf failed!");
cd1e0c
+		return -EINVAL;
cd1e0c
+	} else if ((size_t)ret >= sizeof(buf)) {
cd1e0c
+		xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
cd1e0c
+		return -EINVAL;
cd1e0c
+	}
cd1e0c
+	ret = sqlite3_exec(dbh, (const char *)buf, NULL, NULL, &err;;
cd1e0c
+	if (ret != SQLITE_OK) {
cd1e0c
+		xlog(L_ERROR, "Unable to add princhash column to table %s: %s",
cd1e0c
+		     cols[0], err);
cd1e0c
+		goto out;
cd1e0c
+	}
cd1e0c
+	xlog(D_GENERAL, "Added princhash column to table %s", cols[0]);
cd1e0c
+out:
cd1e0c
+	sqlite3_free(err);
cd1e0c
+	return ret;
cd1e0c
+}
cd1e0c
+
cd1e0c
+static int
cd1e0c
+sqlite_maindb_update_v3_to_v4(void)
cd1e0c
+{
cd1e0c
+	int ret;
cd1e0c
+	char *err;
cd1e0c
+
cd1e0c
+	ret = sqlite3_exec(dbh, "SELECT name FROM sqlite_master "
cd1e0c
+			   "WHERE type=\"table\" AND name LIKE \"%rec-%\";",
cd1e0c
+			   sqlite_add_princ_col_cb, NULL, &err;;
cd1e0c
+	if (ret != SQLITE_OK) {
cd1e0c
+		xlog(L_ERROR, "%s: Failed to update tables!: %s", __func__, err);
cd1e0c
+	}
cd1e0c
+	sqlite3_free(err);
cd1e0c
+	return ret;
cd1e0c
+}
cd1e0c
+
cd1e0c
+static int
cd1e0c
+sqlite_maindb_update_v1v2_to_v4(void)
cd1e0c
+{
cd1e0c
+	int ret;
cd1e0c
+	char *err;
cd1e0c
+
cd1e0c
+	/* create grace table */
cd1e0c
+	ret = sqlite3_exec(dbh, "CREATE TABLE grace "
cd1e0c
+				"(current INTEGER , recovery INTEGER);",
cd1e0c
+				NULL, NULL, &err;;
cd1e0c
+	if (ret != SQLITE_OK) {
cd1e0c
+		xlog(L_ERROR, "Unable to create grace table: %s", err);
cd1e0c
+		goto out;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	/* insert initial epochs into grace table */
cd1e0c
+	ret = sqlite3_exec(dbh, "INSERT OR FAIL INTO grace "
cd1e0c
+				"values (1, 0);",
cd1e0c
+				NULL, NULL, &err;;
cd1e0c
+	if (ret != SQLITE_OK) {
cd1e0c
+		xlog(L_ERROR, "Unable to set initial epochs: %s", err);
cd1e0c
+		goto out;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	/* create recovery table for current epoch */
cd1e0c
+	ret = sqlite3_exec(dbh, "CREATE TABLE \"rec-0000000000000001\" "
cd1e0c
+				"(id BLOB PRIMARY KEY, princhash BLOB);",
cd1e0c
+				NULL, NULL, &err;;
cd1e0c
+	if (ret != SQLITE_OK) {
cd1e0c
+		xlog(L_ERROR, "Unable to create recovery table "
cd1e0c
+				"for current epoch: %s", err);
cd1e0c
+		goto out;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	/* copy records from old clients table */
cd1e0c
+	ret = sqlite3_exec(dbh, "INSERT INTO \"rec-0000000000000001\" (id) "
cd1e0c
+				"SELECT id FROM clients;",
cd1e0c
+				NULL, NULL, &err;;
cd1e0c
+	if (ret != SQLITE_OK) {
cd1e0c
+		xlog(L_ERROR, "Unable to copy client records: %s", err);
cd1e0c
+		goto out;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	/* drop the old clients table */
cd1e0c
+	ret = sqlite3_exec(dbh, "DROP TABLE clients;",
cd1e0c
+				NULL, NULL, &err;;
cd1e0c
+	if (ret != SQLITE_OK) {
cd1e0c
+		xlog(L_ERROR, "Unable to drop old clients table: %s", err);
cd1e0c
+	}
cd1e0c
+out:
cd1e0c
+	sqlite3_free(err);
cd1e0c
+	return ret;
cd1e0c
+}
cd1e0c
+
cd1e0c
+static int
cd1e0c
+sqlite_maindb_update_schema(int oldversion)
cd1e0c
+{
cd1e0c
+	int ret, ret2;
cd1e0c
+	char *err;
cd1e0c
+
cd1e0c
+	/* begin transaction */
cd1e0c
+	ret = sqlite3_exec(dbh, "BEGIN EXCLUSIVE TRANSACTION;", NULL, NULL,
cd1e0c
+				&err;;
cd1e0c
+	if (ret != SQLITE_OK) {
cd1e0c
+		xlog(L_ERROR, "Unable to begin transaction: %s", err);
cd1e0c
+		goto rollback;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	/*
cd1e0c
+	 * Check schema version again. This time, under an exclusive
cd1e0c
+	 * transaction to guard against racing DB setup attempts
cd1e0c
+	 */
cd1e0c
+	ret = sqlite_query_schema_version();
cd1e0c
+	if (ret != oldversion) {
cd1e0c
+		if (ret == CLD_SQLITE_LATEST_SCHEMA_VERSION)
cd1e0c
+			/* Someone else raced in and set it up */
cd1e0c
+			ret = 0;
cd1e0c
+		else
cd1e0c
+			/* Something went wrong -- fail! */
cd1e0c
+			ret = -EINVAL;
cd1e0c
+		goto rollback;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	/* Still at old version -- do conversion */
cd1e0c
+
cd1e0c
+	switch (oldversion) {
cd1e0c
+	case 3:
cd1e0c
+	case 2:
cd1e0c
+		ret = sqlite_maindb_update_v3_to_v4();
cd1e0c
+		break;
cd1e0c
+	case 1:
cd1e0c
+		ret = sqlite_maindb_update_v1v2_to_v4();
cd1e0c
+		break;
cd1e0c
+	default:
cd1e0c
+		ret = -EINVAL;
cd1e0c
+	}
cd1e0c
+	if (ret != SQLITE_OK)
cd1e0c
+		goto rollback;
cd1e0c
+
cd1e0c
+	ret = snprintf(buf, sizeof(buf), "UPDATE parameters SET value = %d "
cd1e0c
+			"WHERE key = \"version\";",
cd1e0c
+			CLD_SQLITE_LATEST_SCHEMA_VERSION);
cd1e0c
+	if (ret < 0) {
cd1e0c
+		xlog(L_ERROR, "sprintf failed!");
cd1e0c
+		goto rollback;
cd1e0c
+	} else if ((size_t)ret >= sizeof(buf)) {
cd1e0c
+		xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
cd1e0c
+		ret = -EINVAL;
cd1e0c
+		goto rollback;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	ret = sqlite3_exec(dbh, (const char *)buf, NULL, NULL, &err;;
cd1e0c
+	if (ret != SQLITE_OK) {
cd1e0c
+		xlog(L_ERROR, "Unable to update schema version: %s", err);
cd1e0c
+		goto rollback;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	ret = sqlite_query_first_time(&first_time);
cd1e0c
+	if (ret != SQLITE_OK) {
cd1e0c
+		/* insert first_time into parameters table */
cd1e0c
+		ret = sqlite3_exec(dbh, "INSERT OR FAIL INTO parameters "
cd1e0c
+					"values (\"first_time\", \"1\");",
cd1e0c
+					NULL, NULL, &err;;
cd1e0c
+		if (ret != SQLITE_OK) {
cd1e0c
+			xlog(L_ERROR, "Unable to insert into parameter table: %s", err);
cd1e0c
+			goto rollback;
cd1e0c
+		}
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	ret = sqlite3_exec(dbh, "COMMIT TRANSACTION;", NULL, NULL, &err;;
cd1e0c
+	if (ret != SQLITE_OK) {
cd1e0c
+		xlog(L_ERROR, "Unable to commit transaction: %s", err);
cd1e0c
+		goto rollback;
cd1e0c
+	}
cd1e0c
+out:
cd1e0c
+	sqlite3_free(err);
cd1e0c
+	return ret;
cd1e0c
+rollback:
cd1e0c
+	ret2 = sqlite3_exec(dbh, "ROLLBACK TRANSACTION;", NULL, NULL, &err;;
cd1e0c
+	if (ret2 != SQLITE_OK)
cd1e0c
+		xlog(L_ERROR, "Unable to rollback transaction: %s", err);
cd1e0c
+	goto out;
cd1e0c
+}
cd1e0c
+
cd1e0c
+/*
cd1e0c
+ * Start an exclusive transaction and recheck the DB schema version. If it's
cd1e0c
+ * still zero (indicating a new database) then set it up. If that all works,
cd1e0c
+ * then insert schema version into the parameters table and commit the
cd1e0c
+ * transaction. On any error, rollback the transaction.
cd1e0c
+ */
cd1e0c
+static int
cd1e0c
+sqlite_maindb_init_v4(void)
cd1e0c
+{
cd1e0c
+	int ret, ret2;
cd1e0c
+	char *err = NULL;
cd1e0c
+
cd1e0c
+	/* Start a transaction */
cd1e0c
+	ret = sqlite3_exec(dbh, "BEGIN EXCLUSIVE TRANSACTION;", NULL, NULL,
cd1e0c
+				&err;;
cd1e0c
+	if (ret != SQLITE_OK) {
cd1e0c
+		xlog(L_ERROR, "Unable to begin transaction: %s", err);
cd1e0c
+		return ret;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	/*
cd1e0c
+	 * Check schema version again. This time, under an exclusive
cd1e0c
+	 * transaction to guard against racing DB setup attempts
cd1e0c
+	 */
cd1e0c
+	ret = sqlite_query_schema_version();
cd1e0c
+	switch (ret) {
cd1e0c
+	case 0:
cd1e0c
+		/* Query failed again -- set up DB */
cd1e0c
+		break;
cd1e0c
+	case CLD_SQLITE_LATEST_SCHEMA_VERSION:
cd1e0c
+		/* Someone else raced in and set it up */
cd1e0c
+		ret = 0;
cd1e0c
+		goto rollback;
cd1e0c
+	default:
cd1e0c
+		/* Something went wrong -- fail! */
cd1e0c
+		ret = -EINVAL;
cd1e0c
+		goto rollback;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	ret = sqlite3_exec(dbh, "CREATE TABLE parameters "
cd1e0c
+				"(key TEXT PRIMARY KEY, value TEXT);",
cd1e0c
+				NULL, NULL, &err;;
cd1e0c
+	if (ret != SQLITE_OK) {
cd1e0c
+		xlog(L_ERROR, "Unable to create parameter table: %s", err);
cd1e0c
+		goto rollback;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	/* create grace table */
cd1e0c
+	ret = sqlite3_exec(dbh, "CREATE TABLE grace "
cd1e0c
+				"(current INTEGER , recovery INTEGER);",
cd1e0c
+				NULL, NULL, &err;;
cd1e0c
+	if (ret != SQLITE_OK) {
cd1e0c
+		xlog(L_ERROR, "Unable to create grace table: %s", err);
cd1e0c
+		goto rollback;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	/* insert initial epochs into grace table */
cd1e0c
+	ret = sqlite3_exec(dbh, "INSERT OR FAIL INTO grace "
cd1e0c
+				"values (1, 0);",
cd1e0c
+				NULL, NULL, &err;;
cd1e0c
+	if (ret != SQLITE_OK) {
cd1e0c
+		xlog(L_ERROR, "Unable to set initial epochs: %s", err);
cd1e0c
+		goto rollback;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	/* create recovery table for current epoch */
cd1e0c
+	ret = sqlite3_exec(dbh, "CREATE TABLE \"rec-0000000000000001\" "
cd1e0c
+				"(id BLOB PRIMARY KEY, princhash BLOB);",
cd1e0c
+				NULL, NULL, &err;;
cd1e0c
+	if (ret != SQLITE_OK) {
cd1e0c
+		xlog(L_ERROR, "Unable to create recovery table "
cd1e0c
+				"for current epoch: %s", err);
cd1e0c
+		goto rollback;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	/* insert version into parameters table */
cd1e0c
+	ret = snprintf(buf, sizeof(buf), "INSERT OR FAIL INTO parameters "
cd1e0c
+			"values (\"version\", \"%d\");",
cd1e0c
+			CLD_SQLITE_LATEST_SCHEMA_VERSION);
cd1e0c
+	if (ret < 0) {
cd1e0c
+		xlog(L_ERROR, "sprintf failed!");
cd1e0c
+		goto rollback;
cd1e0c
+	} else if ((size_t)ret >= sizeof(buf)) {
cd1e0c
+		xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
cd1e0c
+		ret = -EINVAL;
cd1e0c
+		goto rollback;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	ret = sqlite3_exec(dbh, (const char *)buf, NULL, NULL, &err;;
cd1e0c
+	if (ret != SQLITE_OK) {
cd1e0c
+		xlog(L_ERROR, "Unable to insert into parameter table: %s", err);
cd1e0c
+		goto rollback;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	/* insert first_time into parameters table */
cd1e0c
+	ret = sqlite3_exec(dbh, "INSERT OR FAIL INTO parameters "
cd1e0c
+				"values (\"first_time\", \"1\");",
cd1e0c
+				NULL, NULL, &err;;
cd1e0c
+	if (ret != SQLITE_OK) {
cd1e0c
+		xlog(L_ERROR, "Unable to insert into parameter table: %s", err);
cd1e0c
+		goto rollback;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	ret = sqlite3_exec(dbh, "COMMIT TRANSACTION;", NULL, NULL, &err;;
cd1e0c
+	if (ret != SQLITE_OK) {
cd1e0c
+		xlog(L_ERROR, "Unable to commit transaction: %s", err);
cd1e0c
+		goto rollback;
cd1e0c
+	}
cd1e0c
+out:
cd1e0c
+	sqlite3_free(err);
cd1e0c
+	return ret;
cd1e0c
+
cd1e0c
+rollback:
cd1e0c
+	/* Attempt to rollback the transaction */
cd1e0c
+	ret2 = sqlite3_exec(dbh, "ROLLBACK TRANSACTION;", NULL, NULL, &err;;
cd1e0c
+	if (ret2 != SQLITE_OK)
cd1e0c
+		xlog(L_ERROR, "Unable to rollback transaction: %s", err);
cd1e0c
+	goto out;
cd1e0c
+}
cd1e0c
+
cd1e0c
+static int
cd1e0c
+sqlite_startup_query_grace(void)
cd1e0c
+{
cd1e0c
+	int ret;
cd1e0c
+	uint64_t tcur;
cd1e0c
+	uint64_t trec;
cd1e0c
+	sqlite3_stmt *stmt = NULL;
cd1e0c
+
cd1e0c
+	/* prepare select query */
cd1e0c
+	ret = sqlite3_prepare_v2(dbh, "SELECT * FROM grace;", -1, &stmt, NULL);
cd1e0c
+	if (ret != SQLITE_OK) {
cd1e0c
+		xlog(D_GENERAL, "Unable to prepare select statement: %s",
cd1e0c
+			sqlite3_errmsg(dbh));
cd1e0c
+		goto out;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	ret = sqlite3_step(stmt);
cd1e0c
+	if (ret != SQLITE_ROW) {
cd1e0c
+		xlog(D_GENERAL, "Select statement execution failed: %s",
cd1e0c
+				sqlite3_errmsg(dbh));
cd1e0c
+		goto out;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	tcur = (uint64_t)sqlite3_column_int64(stmt, 0);
cd1e0c
+	trec = (uint64_t)sqlite3_column_int64(stmt, 1);
cd1e0c
+
cd1e0c
+	current_epoch = tcur;
cd1e0c
+	recovery_epoch = trec;
cd1e0c
+	ret = 0;
cd1e0c
+	xlog(D_GENERAL, "%s: current_epoch=%"PRIu64" recovery_epoch=%"PRIu64,
cd1e0c
+		__func__, current_epoch, recovery_epoch);
cd1e0c
+out:
cd1e0c
+	sqlite3_finalize(stmt);
cd1e0c
+	return ret;
cd1e0c
+}
cd1e0c
+
cd1e0c
+/*
cd1e0c
+ * Helper for renaming a recovery table to fix the padding.
cd1e0c
+ */
cd1e0c
+static int
cd1e0c
+sqlite_fix_table_name(const char *name)
cd1e0c
+{
cd1e0c
+	int ret;
cd1e0c
+	uint64_t val;
cd1e0c
+	char *err;
cd1e0c
+
cd1e0c
+	if (sscanf(name, "rec-%" PRIx64, &val) != 1)
cd1e0c
+		return -EINVAL;
cd1e0c
+	ret = snprintf(buf, sizeof(buf), "ALTER TABLE \"%s\" "
cd1e0c
+			"RENAME TO \"rec-%016" PRIx64 "\";",
cd1e0c
+			name, val);
cd1e0c
+	if (ret < 0) {
cd1e0c
+		xlog(L_ERROR, "sprintf failed!");
cd1e0c
+		return -EINVAL;
cd1e0c
+	} else if ((size_t)ret >= sizeof(buf)) {
cd1e0c
+		xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
cd1e0c
+		return -EINVAL;
cd1e0c
+	}
cd1e0c
+	ret = sqlite3_exec(dbh, (const char *)buf, NULL, NULL, &err;;
cd1e0c
+	if (ret != SQLITE_OK) {
cd1e0c
+		xlog(L_ERROR, "Unable to fix table for epoch %"PRIu64": %s",
cd1e0c
+		     val, err);
cd1e0c
+		goto out;
cd1e0c
+	}
cd1e0c
+	xlog(D_GENERAL, "Renamed table %s to rec-%016" PRIx64, name, val);
cd1e0c
+out:
cd1e0c
+	sqlite3_free(err);
cd1e0c
+	return ret;
cd1e0c
+}
cd1e0c
+
cd1e0c
+/*
cd1e0c
+ * Callback for the sqlite_exec statement in sqlite_check_table_names.
cd1e0c
+ * If the epoch encoded in the table name matches either the current
cd1e0c
+ * epoch or the recovery epoch, then try to fix the padding.  Otherwise,
cd1e0c
+ * we bail.
cd1e0c
+ */
cd1e0c
+static int
cd1e0c
+sqlite_check_table_names_cb(void *UNUSED(arg), int ncols, char **cols,
cd1e0c
+			    char **UNUSED(colnames))
cd1e0c
+{
cd1e0c
+	int ret = SQLITE_OK;
cd1e0c
+	uint64_t val;
cd1e0c
+
cd1e0c
+	if (ncols > 1)
cd1e0c
+		return -EINVAL;
cd1e0c
+	if (sscanf(cols[0], "rec-%" PRIx64, &val) != 1)
cd1e0c
+		return -EINVAL;
cd1e0c
+	if (val == current_epoch || val == recovery_epoch) {
cd1e0c
+		xlog(D_GENERAL, "found invalid table name %s for %s epoch",
cd1e0c
+		     cols[0], val == current_epoch ? "current" : "recovery");
cd1e0c
+		ret = sqlite_fix_table_name(cols[0]);
cd1e0c
+	} else {
cd1e0c
+		xlog(L_ERROR, "found invalid table name %s for unknown epoch %"
cd1e0c
+		     PRId64, cols[0], val);
cd1e0c
+		return -EINVAL;
cd1e0c
+	}
cd1e0c
+	return ret;
cd1e0c
+}
cd1e0c
+
cd1e0c
+/*
cd1e0c
+ * Look for recovery table names where the epoch isn't zero-padded
cd1e0c
+ */
cd1e0c
+static int
cd1e0c
+sqlite_check_table_names(void)
cd1e0c
+{
cd1e0c
+	int ret;
cd1e0c
+	char *err;
cd1e0c
+
cd1e0c
+	ret = sqlite3_exec(dbh, "SELECT name FROM sqlite_master "
cd1e0c
+			   "WHERE type=\"table\" AND name LIKE \"%rec-%\" "
cd1e0c
+			   "AND length(name) < 20;",
cd1e0c
+			   sqlite_check_table_names_cb, NULL, &err;;
cd1e0c
+	if (ret != SQLITE_OK) {
cd1e0c
+		xlog(L_ERROR, "Table names check failed: %s", err);
cd1e0c
+	}
cd1e0c
+	sqlite3_free(err);
cd1e0c
+	return ret;
cd1e0c
+}
cd1e0c
+
cd1e0c
+/*
cd1e0c
+ * Simple db health check.  For now we're just making sure that the recovery
cd1e0c
+ * table names are of the format "rec-CCCCCCCCCCCCCCCC" (where C is the hex
cd1e0c
+ * representation of the epoch value) and that epoch value matches either
cd1e0c
+ * the current epoch or the recovery epoch.
cd1e0c
+ */
cd1e0c
+static int
cd1e0c
+sqlite_check_db_health(void)
cd1e0c
+{
cd1e0c
+	int ret, ret2;
cd1e0c
+	char *err;
cd1e0c
+
cd1e0c
+	ret = sqlite3_exec(dbh, "BEGIN EXCLUSIVE TRANSACTION;", NULL, NULL,
cd1e0c
+				&err;;
cd1e0c
+	if (ret != SQLITE_OK) {
cd1e0c
+		xlog(L_ERROR, "Unable to begin transaction: %s", err);
cd1e0c
+		goto rollback;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	ret = sqlite_check_table_names();
cd1e0c
+	if (ret != SQLITE_OK)
cd1e0c
+		goto rollback;
cd1e0c
+
cd1e0c
+	ret = sqlite3_exec(dbh, "COMMIT TRANSACTION;", NULL, NULL, &err;;
cd1e0c
+	if (ret != SQLITE_OK) {
cd1e0c
+		xlog(L_ERROR, "Unable to commit transaction: %s", err);
cd1e0c
+		goto rollback;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+cleanup:
cd1e0c
+	sqlite3_free(err);
cd1e0c
+	xlog(D_GENERAL, "%s: returning %d", __func__, ret);
cd1e0c
+	return ret;
cd1e0c
+rollback:
cd1e0c
+	ret2 = sqlite3_exec(dbh, "ROLLBACK TRANSACTION;", NULL, NULL, &err;;
cd1e0c
+	if (ret2 != SQLITE_OK)
cd1e0c
+		xlog(L_ERROR, "Unable to rollback transaction: %s", err);
cd1e0c
+	goto cleanup;
cd1e0c
+}
cd1e0c
+
cd1e0c
+static int
cd1e0c
+sqlite_attach_db(const char *path)
cd1e0c
+{
cd1e0c
+	int ret;
cd1e0c
+	char dbpath[PATH_MAX];
cd1e0c
+	struct stat stb;
cd1e0c
+	sqlite3_stmt *stmt = NULL;
cd1e0c
+
cd1e0c
+	ret = snprintf(dbpath, PATH_MAX - 1, "%s/main.sqlite", path);
cd1e0c
+	if (ret < 0)
cd1e0c
+		return ret;
cd1e0c
+
cd1e0c
+	dbpath[PATH_MAX - 1] = '\0';
cd1e0c
+	ret = stat(dbpath, &stb;;
cd1e0c
+	if (ret < 0)
cd1e0c
+		return ret;
cd1e0c
+
cd1e0c
+	xlog(D_GENERAL, "attaching %s", dbpath);
cd1e0c
+	ret = sqlite3_prepare_v2(dbh, "ATTACH DATABASE ? AS attached;",
cd1e0c
+			-1, &stmt, NULL);
cd1e0c
+	if (ret != SQLITE_OK) {
cd1e0c
+		xlog(L_ERROR, "%s: unable to prepare attach statement: %s",
cd1e0c
+				__func__, sqlite3_errmsg(dbh));
cd1e0c
+		return ret;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	ret = sqlite3_bind_text(stmt, 1, dbpath, strlen(dbpath), SQLITE_STATIC);
cd1e0c
+	if (ret != SQLITE_OK) {
cd1e0c
+		xlog(L_ERROR, "%s: bind text failed: %s",
cd1e0c
+				__func__, sqlite3_errmsg(dbh));
cd1e0c
+		return ret;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	ret = sqlite3_step(stmt);
cd1e0c
+	if (ret == SQLITE_DONE)
cd1e0c
+		ret = SQLITE_OK;
cd1e0c
+	else
cd1e0c
+		xlog(L_ERROR, "%s: unexpected return code from attach: %s",
cd1e0c
+				__func__, sqlite3_errmsg(dbh));
cd1e0c
+
cd1e0c
+	sqlite3_finalize(stmt);
cd1e0c
+	stmt = NULL;
cd1e0c
+	return ret;
cd1e0c
+}
cd1e0c
+
cd1e0c
+static int
cd1e0c
+sqlite_detach_db(void)
cd1e0c
+{
cd1e0c
+	int ret;
cd1e0c
+	char *err = NULL;
cd1e0c
+
cd1e0c
+	xlog(D_GENERAL, "detaching database");
cd1e0c
+	ret = sqlite3_exec(dbh, "DETACH DATABASE attached;", NULL, NULL, &err;;
cd1e0c
+	if (ret != SQLITE_OK) {
cd1e0c
+		xlog(L_ERROR, "Unable to detach attached db: %s", err);
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	sqlite3_free(err);
cd1e0c
+	return ret;
cd1e0c
+}
cd1e0c
+
cd1e0c
+/*
cd1e0c
+ * Copies client records from the nfsdcltrack database as part of a one-time
cd1e0c
+ * "upgrade".
cd1e0c
+ *
cd1e0c
+ * Returns a non-zero sqlite error code, or SQLITE_OK (aka 0).
cd1e0c
+ * Returns the number of records copied via "num_rec".
cd1e0c
+ */
cd1e0c
+static int
cd1e0c
+sqlite_copy_cltrack_records(int *num_rec)
cd1e0c
+{
cd1e0c
+	int ret, ret2;
cd1e0c
+	char *s;
cd1e0c
+	char *err = NULL;
cd1e0c
+	sqlite3_stmt *stmt = NULL;
cd1e0c
+
cd1e0c
+	s = conf_get_str("nfsdcltrack", "storagedir");
cd1e0c
+	if (s)
cd1e0c
+		cltrack_storagedir = s;
cd1e0c
+	ret = sqlite_attach_db(cltrack_storagedir);
cd1e0c
+	if (ret)
cd1e0c
+		goto out;
cd1e0c
+	ret = sqlite3_exec(dbh, "BEGIN EXCLUSIVE TRANSACTION;", NULL, NULL,
cd1e0c
+				&err;;
cd1e0c
+	if (ret != SQLITE_OK) {
cd1e0c
+		xlog(L_ERROR, "Unable to begin transaction: %s", err);
cd1e0c
+		goto rollback;
cd1e0c
+	}
cd1e0c
+	ret = snprintf(buf, sizeof(buf), "DELETE FROM \"rec-%016" PRIx64 "\";",
cd1e0c
+			current_epoch);
cd1e0c
+	if (ret < 0) {
cd1e0c
+		xlog(L_ERROR, "sprintf failed!");
cd1e0c
+		goto rollback;
cd1e0c
+	} else if ((size_t)ret >= sizeof(buf)) {
cd1e0c
+		xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
cd1e0c
+		ret = -EINVAL;
cd1e0c
+		goto rollback;
cd1e0c
+	}
cd1e0c
+	ret = sqlite3_exec(dbh, (const char *)buf, NULL, NULL, &err;;
cd1e0c
+	if (ret != SQLITE_OK) {
cd1e0c
+		xlog(L_ERROR, "Unable to clear records from current epoch: %s", err);
cd1e0c
+		goto rollback;
cd1e0c
+	}
cd1e0c
+	ret = snprintf(buf, sizeof(buf), "INSERT INTO \"rec-%016" PRIx64 "\" (id) "
cd1e0c
+				"SELECT id FROM attached.clients;",
cd1e0c
+				current_epoch);
cd1e0c
+	if (ret < 0) {
cd1e0c
+		xlog(L_ERROR, "sprintf failed!");
cd1e0c
+		goto rollback;
cd1e0c
+	} else if ((size_t)ret >= sizeof(buf)) {
cd1e0c
+		xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
cd1e0c
+		ret = -EINVAL;
cd1e0c
+		goto rollback;
cd1e0c
+	}
cd1e0c
+	ret = sqlite3_prepare_v2(dbh, buf, -1, &stmt, NULL);
cd1e0c
+	if (ret != SQLITE_OK) {
cd1e0c
+		xlog(L_ERROR, "%s: insert statement prepare failed: %s",
cd1e0c
+			__func__, sqlite3_errmsg(dbh));
cd1e0c
+		goto rollback;
cd1e0c
+	}
cd1e0c
+	ret = sqlite3_step(stmt);
cd1e0c
+	if (ret != SQLITE_DONE) {
cd1e0c
+		xlog(L_ERROR, "%s: unexpected return code from insert: %s",
cd1e0c
+				__func__, sqlite3_errmsg(dbh));
cd1e0c
+		goto rollback;
cd1e0c
+	}
cd1e0c
+	*num_rec = sqlite3_changes(dbh);
cd1e0c
+	ret = sqlite3_exec(dbh, "COMMIT TRANSACTION;", NULL, NULL, &err;;
cd1e0c
+	if (ret != SQLITE_OK) {
cd1e0c
+		xlog(L_ERROR, "Unable to commit transaction: %s", err);
cd1e0c
+		goto rollback;
cd1e0c
+	}
cd1e0c
+cleanup:
cd1e0c
+	sqlite3_finalize(stmt);
cd1e0c
+	sqlite3_free(err);
cd1e0c
+	sqlite_detach_db();
cd1e0c
+out:
cd1e0c
+	xlog(D_GENERAL, "%s: returning %d", __func__, ret);
cd1e0c
+	return ret;
cd1e0c
+rollback:
cd1e0c
+	*num_rec = 0;
cd1e0c
+	ret2 = sqlite3_exec(dbh, "ROLLBACK TRANSACTION;", NULL, NULL, &err;;
cd1e0c
+	if (ret2 != SQLITE_OK)
cd1e0c
+		xlog(L_ERROR, "Unable to rollback transaction: %s", err);
cd1e0c
+	goto cleanup;
cd1e0c
+}
cd1e0c
+
cd1e0c
+/* Open the database and set up the database handle for it */
cd1e0c
+int
cd1e0c
+sqlite_prepare_dbh(const char *topdir)
cd1e0c
+{
cd1e0c
+	int ret;
cd1e0c
+
cd1e0c
+	/* Do nothing if the database handle is already set up */
cd1e0c
+	if (dbh)
cd1e0c
+		return 0;
cd1e0c
+
cd1e0c
+	ret = snprintf(buf, PATH_MAX - 1, "%s/main.sqlite", topdir);
cd1e0c
+	if (ret < 0)
cd1e0c
+		return ret;
cd1e0c
+
cd1e0c
+	buf[PATH_MAX - 1] = '\0';
cd1e0c
+
cd1e0c
+	/* open a new DB handle */
cd1e0c
+	ret = sqlite3_open(buf, &dbh;;
cd1e0c
+	if (ret != SQLITE_OK) {
cd1e0c
+		/* try to create the dir */
cd1e0c
+		ret = mkdir_if_not_exist(topdir);
cd1e0c
+		if (ret)
cd1e0c
+			goto out_close;
cd1e0c
+
cd1e0c
+		/* retry open */
cd1e0c
+		ret = sqlite3_open(buf, &dbh;;
cd1e0c
+		if (ret != SQLITE_OK)
cd1e0c
+			goto out_close;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	/* set busy timeout */
cd1e0c
+	ret = sqlite3_busy_timeout(dbh, CLD_SQLITE_BUSY_TIMEOUT);
cd1e0c
+	if (ret != SQLITE_OK) {
cd1e0c
+		xlog(L_ERROR, "Unable to set sqlite busy timeout: %s",
cd1e0c
+				sqlite3_errmsg(dbh));
cd1e0c
+		goto out_close;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	ret = sqlite_query_schema_version();
cd1e0c
+	switch (ret) {
cd1e0c
+	case CLD_SQLITE_LATEST_SCHEMA_VERSION:
cd1e0c
+		/* DB is already set up. Do nothing */
cd1e0c
+		ret = 0;
cd1e0c
+		break;
cd1e0c
+	case 3:
cd1e0c
+		/* Old DB -- update to new schema */
cd1e0c
+		ret = sqlite_maindb_update_schema(3);
cd1e0c
+		if (ret)
cd1e0c
+			goto out_close;
cd1e0c
+		break;
cd1e0c
+	case 2:
cd1e0c
+		/* Old DB -- update to new schema */
cd1e0c
+		ret = sqlite_maindb_update_schema(2);
cd1e0c
+		if (ret)
cd1e0c
+			goto out_close;
cd1e0c
+		break;
cd1e0c
+
cd1e0c
+	case 1:
cd1e0c
+		/* Old DB -- update to new schema */
cd1e0c
+		ret = sqlite_maindb_update_schema(1);
cd1e0c
+		if (ret)
cd1e0c
+			goto out_close;
cd1e0c
+		break;
cd1e0c
+	case 0:
cd1e0c
+		/* Query failed -- try to set up new DB */
cd1e0c
+		ret = sqlite_maindb_init_v4();
cd1e0c
+		if (ret)
cd1e0c
+			goto out_close;
cd1e0c
+		break;
cd1e0c
+	default:
cd1e0c
+		/* Unknown DB version -- downgrade? Fail */
cd1e0c
+		xlog(L_ERROR, "Unsupported database schema version! "
cd1e0c
+			"Expected %d, got %d.",
cd1e0c
+			CLD_SQLITE_LATEST_SCHEMA_VERSION, ret);
cd1e0c
+		ret = -EINVAL;
cd1e0c
+		goto out_close;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	ret = sqlite_startup_query_grace();
cd1e0c
+
cd1e0c
+	ret = sqlite_query_first_time(&first_time);
cd1e0c
+	if (ret)
cd1e0c
+		goto out_close;
cd1e0c
+
cd1e0c
+	ret = sqlite_check_db_health();
cd1e0c
+	if (ret) {
cd1e0c
+		xlog(L_ERROR, "Database health check failed! "
cd1e0c
+			"Database must be fixed manually.");
cd1e0c
+		goto out_close;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	/* one-time "upgrade" from older client tracking methods */
cd1e0c
+	if (first_time) {
cd1e0c
+		sqlite_copy_cltrack_records(&num_cltrack_records);
cd1e0c
+		xlog(D_GENERAL, "%s: num_cltrack_records = %d\n",
cd1e0c
+			__func__, num_cltrack_records);
cd1e0c
+		legacy_load_clients_from_recdir(&num_legacy_records);
cd1e0c
+		xlog(D_GENERAL, "%s: num_legacy_records = %d\n",
cd1e0c
+			__func__, num_legacy_records);
cd1e0c
+		if (num_cltrack_records > 0 && num_legacy_records > 0)
cd1e0c
+			xlog(L_WARNING, "%s: first-time upgrade detected "
cd1e0c
+				"both cltrack and legacy records!\n", __func__);
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	return ret;
cd1e0c
+out_close:
cd1e0c
+	sqlite3_close(dbh);
cd1e0c
+	dbh = NULL;
cd1e0c
+	return ret;
cd1e0c
+}
cd1e0c
+
cd1e0c
+/*
cd1e0c
+ * Create a client record
cd1e0c
+ *
cd1e0c
+ * Returns a non-zero sqlite error code, or SQLITE_OK (aka 0)
cd1e0c
+ */
cd1e0c
+int
cd1e0c
+sqlite_insert_client(const unsigned char *clname, const size_t namelen)
cd1e0c
+{
cd1e0c
+	int ret;
cd1e0c
+	sqlite3_stmt *stmt = NULL;
cd1e0c
+
cd1e0c
+	ret = snprintf(buf, sizeof(buf), "INSERT OR REPLACE INTO \"rec-%016" PRIx64 "\" (id) "
cd1e0c
+				"VALUES (?);", current_epoch);
cd1e0c
+	if (ret < 0) {
cd1e0c
+		xlog(L_ERROR, "sprintf failed!");
cd1e0c
+		return ret;
cd1e0c
+	} else if ((size_t)ret >= sizeof(buf)) {
cd1e0c
+		xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
cd1e0c
+		return -EINVAL;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	ret = sqlite3_prepare_v2(dbh, buf, -1, &stmt, NULL);
cd1e0c
+	if (ret != SQLITE_OK) {
cd1e0c
+		xlog(L_ERROR, "%s: insert statement prepare failed: %s",
cd1e0c
+			__func__, sqlite3_errmsg(dbh));
cd1e0c
+		return ret;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	ret = sqlite3_bind_blob(stmt, 1, (const void *)clname, namelen,
cd1e0c
+				SQLITE_STATIC);
cd1e0c
+	if (ret != SQLITE_OK) {
cd1e0c
+		xlog(L_ERROR, "%s: bind blob failed: %s", __func__,
cd1e0c
+				sqlite3_errmsg(dbh));
cd1e0c
+		goto out_err;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	ret = sqlite3_step(stmt);
cd1e0c
+	if (ret == SQLITE_DONE)
cd1e0c
+		ret = SQLITE_OK;
cd1e0c
+	else
cd1e0c
+		xlog(L_ERROR, "%s: unexpected return code from insert: %s",
cd1e0c
+				__func__, sqlite3_errmsg(dbh));
cd1e0c
+
cd1e0c
+out_err:
cd1e0c
+	xlog(D_GENERAL, "%s: returning %d", __func__, ret);
cd1e0c
+	sqlite3_finalize(stmt);
cd1e0c
+	return ret;
cd1e0c
+}
cd1e0c
+
cd1e0c
+#if UPCALL_VERSION >= 2
cd1e0c
+/*
cd1e0c
+ * Create a client record including hash the kerberos principal
cd1e0c
+ *
cd1e0c
+ * Returns a non-zero sqlite error code, or SQLITE_OK (aka 0)
cd1e0c
+ */
cd1e0c
+int
cd1e0c
+sqlite_insert_client_and_princhash(const unsigned char *clname, const size_t namelen,
cd1e0c
+		const unsigned char *clprinchash, const size_t princhashlen)
cd1e0c
+{
cd1e0c
+	int ret;
cd1e0c
+	sqlite3_stmt *stmt = NULL;
cd1e0c
+
cd1e0c
+	if (princhashlen > 0)
cd1e0c
+		ret = snprintf(buf, sizeof(buf), "INSERT OR REPLACE INTO \"rec-%016" PRIx64 "\" "
cd1e0c
+				"VALUES (?, ?);", current_epoch);
cd1e0c
+	else
cd1e0c
+		ret = snprintf(buf, sizeof(buf), "INSERT OR REPLACE INTO \"rec-%016" PRIx64 "\" (id) "
cd1e0c
+				"VALUES (?);", current_epoch);
cd1e0c
+	if (ret < 0) {
cd1e0c
+		xlog(L_ERROR, "sprintf failed!");
cd1e0c
+		return ret;
cd1e0c
+	} else if ((size_t)ret >= sizeof(buf)) {
cd1e0c
+		xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
cd1e0c
+		return -EINVAL;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	ret = sqlite3_prepare_v2(dbh, buf, -1, &stmt, NULL);
cd1e0c
+	if (ret != SQLITE_OK) {
cd1e0c
+		xlog(L_ERROR, "%s: insert statement prepare failed: %s",
cd1e0c
+			__func__, sqlite3_errmsg(dbh));
cd1e0c
+		return ret;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	ret = sqlite3_bind_blob(stmt, 1, (const void *)clname, namelen,
cd1e0c
+				SQLITE_STATIC);
cd1e0c
+	if (ret != SQLITE_OK) {
cd1e0c
+		xlog(L_ERROR, "%s: bind blob failed: %s", __func__,
cd1e0c
+				sqlite3_errmsg(dbh));
cd1e0c
+		goto out_err;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	if (princhashlen > 0) {
cd1e0c
+		ret = sqlite3_bind_blob(stmt, 2, (const void *)clprinchash, princhashlen,
cd1e0c
+					SQLITE_STATIC);
cd1e0c
+		if (ret != SQLITE_OK) {
cd1e0c
+			xlog(L_ERROR, "%s: bind blob failed: %s", __func__,
cd1e0c
+					sqlite3_errmsg(dbh));
cd1e0c
+			goto out_err;
cd1e0c
+		}
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	ret = sqlite3_step(stmt);
cd1e0c
+	if (ret == SQLITE_DONE)
cd1e0c
+		ret = SQLITE_OK;
cd1e0c
+	else
cd1e0c
+		xlog(L_ERROR, "%s: unexpected return code from insert: %s",
cd1e0c
+				__func__, sqlite3_errmsg(dbh));
cd1e0c
+
cd1e0c
+out_err:
cd1e0c
+	xlog(D_GENERAL, "%s: returning %d", __func__, ret);
cd1e0c
+	sqlite3_finalize(stmt);
cd1e0c
+	return ret;
cd1e0c
+}
cd1e0c
+#else
cd1e0c
+int
cd1e0c
+sqlite_insert_client_and_princhash(const unsigned char *clname, const size_t namelen,
cd1e0c
+		const unsigned char *clprinchash, const size_t princhashlen)
cd1e0c
+{
cd1e0c
+	return -EINVAL;
cd1e0c
+}
cd1e0c
+#endif
cd1e0c
+
cd1e0c
+/* Remove a client record */
cd1e0c
+int
cd1e0c
+sqlite_remove_client(const unsigned char *clname, const size_t namelen)
cd1e0c
+{
cd1e0c
+	int ret;
cd1e0c
+	sqlite3_stmt *stmt = NULL;
cd1e0c
+
cd1e0c
+	ret = snprintf(buf, sizeof(buf), "DELETE FROM \"rec-%016" PRIx64 "\" "
cd1e0c
+				"WHERE id==?;", current_epoch);
cd1e0c
+	if (ret < 0) {
cd1e0c
+		xlog(L_ERROR, "sprintf failed!");
cd1e0c
+		return ret;
cd1e0c
+	} else if ((size_t)ret >= sizeof(buf)) {
cd1e0c
+		xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
cd1e0c
+		return -EINVAL;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	ret = sqlite3_prepare_v2(dbh, buf, -1, &stmt, NULL);
cd1e0c
+
cd1e0c
+	if (ret != SQLITE_OK) {
cd1e0c
+		xlog(L_ERROR, "%s: statement prepare failed: %s",
cd1e0c
+				__func__, sqlite3_errmsg(dbh));
cd1e0c
+		goto out_err;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	ret = sqlite3_bind_blob(stmt, 1, (const void *)clname, namelen,
cd1e0c
+				SQLITE_STATIC);
cd1e0c
+	if (ret != SQLITE_OK) {
cd1e0c
+		xlog(L_ERROR, "%s: bind blob failed: %s", __func__,
cd1e0c
+				sqlite3_errmsg(dbh));
cd1e0c
+		goto out_err;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	ret = sqlite3_step(stmt);
cd1e0c
+	if (ret == SQLITE_DONE)
cd1e0c
+		ret = SQLITE_OK;
cd1e0c
+	else
cd1e0c
+		xlog(L_ERROR, "%s: unexpected return code from delete: %d",
cd1e0c
+				__func__, ret);
cd1e0c
+
cd1e0c
+out_err:
cd1e0c
+	xlog(D_GENERAL, "%s: returning %d", __func__, ret);
cd1e0c
+	sqlite3_finalize(stmt);
cd1e0c
+	return ret;
cd1e0c
+}
cd1e0c
+
cd1e0c
+/*
cd1e0c
+ * Is the given clname in the clients table? If so, then update its timestamp
cd1e0c
+ * and return success. If the record isn't present, or the update fails, then
cd1e0c
+ * return an error.
cd1e0c
+ */
cd1e0c
+int
cd1e0c
+sqlite_check_client(const unsigned char *clname, const size_t namelen)
cd1e0c
+{
cd1e0c
+	int ret;
cd1e0c
+	sqlite3_stmt *stmt = NULL;
cd1e0c
+
cd1e0c
+	ret = snprintf(buf, sizeof(buf), "SELECT count(*) FROM  \"rec-%016" PRIx64 "\" "
cd1e0c
+				"WHERE id==?;", recovery_epoch);
cd1e0c
+	if (ret < 0) {
cd1e0c
+		xlog(L_ERROR, "sprintf failed!");
cd1e0c
+		return ret;
cd1e0c
+	} else if ((size_t)ret >= sizeof(buf)) {
cd1e0c
+		xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
cd1e0c
+		return -EINVAL;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	ret = sqlite3_prepare_v2(dbh, buf, -1, &stmt, NULL);
cd1e0c
+	if (ret != SQLITE_OK) {
cd1e0c
+		xlog(L_ERROR, "%s: select statement prepare failed: %s",
cd1e0c
+			__func__, sqlite3_errmsg(dbh));
cd1e0c
+		return ret;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	ret = sqlite3_bind_blob(stmt, 1, (const void *)clname, namelen,
cd1e0c
+				SQLITE_STATIC);
cd1e0c
+	if (ret != SQLITE_OK) {
cd1e0c
+		xlog(L_ERROR, "%s: bind blob failed: %s",
cd1e0c
+				__func__, sqlite3_errmsg(dbh));
cd1e0c
+		goto out_err;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	ret = sqlite3_step(stmt);
cd1e0c
+	if (ret != SQLITE_ROW) {
cd1e0c
+		xlog(L_ERROR, "%s: unexpected return code from select: %d",
cd1e0c
+				__func__, ret);
cd1e0c
+		goto out_err;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	ret = sqlite3_column_int(stmt, 0);
cd1e0c
+	xlog(D_GENERAL, "%s: select returned %d rows", __func__, ret);
cd1e0c
+	if (ret != 1) {
cd1e0c
+		ret = -EACCES;
cd1e0c
+		goto out_err;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	sqlite3_finalize(stmt);
cd1e0c
+
cd1e0c
+	/* Now insert the client into the table for the current epoch */
cd1e0c
+	return sqlite_insert_client(clname, namelen);
cd1e0c
+
cd1e0c
+out_err:
cd1e0c
+	xlog(D_GENERAL, "%s: returning %d", __func__, ret);
cd1e0c
+	sqlite3_finalize(stmt);
cd1e0c
+	return ret;
cd1e0c
+}
cd1e0c
+
cd1e0c
+int
cd1e0c
+sqlite_grace_start(void)
cd1e0c
+{
cd1e0c
+	int ret, ret2;
cd1e0c
+	char *err;
cd1e0c
+	uint64_t tcur = current_epoch;
cd1e0c
+	uint64_t trec = recovery_epoch;
cd1e0c
+
cd1e0c
+	/* begin transaction */
cd1e0c
+	ret = sqlite3_exec(dbh, "BEGIN EXCLUSIVE TRANSACTION;", NULL, NULL,
cd1e0c
+				&err;;
cd1e0c
+	if (ret != SQLITE_OK) {
cd1e0c
+		xlog(L_ERROR, "Unable to begin transaction: %s", err);
cd1e0c
+		goto rollback;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	if (trec == 0) {
cd1e0c
+		/*
cd1e0c
+		 * A normal grace start - update the epoch values in the grace
cd1e0c
+		 * table and create a new table for the current reboot epoch.
cd1e0c
+		 */
cd1e0c
+		trec = tcur;
cd1e0c
+		tcur++;
cd1e0c
+
cd1e0c
+		ret = snprintf(buf, sizeof(buf), "UPDATE grace "
cd1e0c
+				"SET current = %" PRId64 ", recovery = %" PRId64 ";",
cd1e0c
+				(int64_t)tcur, (int64_t)trec);
cd1e0c
+		if (ret < 0) {
cd1e0c
+			xlog(L_ERROR, "sprintf failed!");
cd1e0c
+			goto rollback;
cd1e0c
+		} else if ((size_t)ret >= sizeof(buf)) {
cd1e0c
+			xlog(L_ERROR, "sprintf output too long! (%d chars)",
cd1e0c
+				ret);
cd1e0c
+			ret = -EINVAL;
cd1e0c
+			goto rollback;
cd1e0c
+		}
cd1e0c
+
cd1e0c
+		ret = sqlite3_exec(dbh, (const char *)buf, NULL, NULL, &err;;
cd1e0c
+		if (ret != SQLITE_OK) {
cd1e0c
+			xlog(L_ERROR, "Unable to update epochs: %s", err);
cd1e0c
+			goto rollback;
cd1e0c
+		}
cd1e0c
+
cd1e0c
+		ret = snprintf(buf, sizeof(buf), "CREATE TABLE \"rec-%016" PRIx64 "\" "
cd1e0c
+				"(id BLOB PRIMARY KEY, princhash blob);",
cd1e0c
+				tcur);
cd1e0c
+		if (ret < 0) {
cd1e0c
+			xlog(L_ERROR, "sprintf failed!");
cd1e0c
+			goto rollback;
cd1e0c
+		} else if ((size_t)ret >= sizeof(buf)) {
cd1e0c
+			xlog(L_ERROR, "sprintf output too long! (%d chars)",
cd1e0c
+				ret);
cd1e0c
+			ret = -EINVAL;
cd1e0c
+			goto rollback;
cd1e0c
+		}
cd1e0c
+
cd1e0c
+		ret = sqlite3_exec(dbh, (const char *)buf, NULL, NULL, &err;;
cd1e0c
+		if (ret != SQLITE_OK) {
cd1e0c
+			xlog(L_ERROR, "Unable to create table for current epoch: %s",
cd1e0c
+				err);
cd1e0c
+			goto rollback;
cd1e0c
+		}
cd1e0c
+	} else {
cd1e0c
+		/* Server restarted while in grace - don't update the epoch
cd1e0c
+		 * values in the grace table, just clear out the records for
cd1e0c
+		 * the current reboot epoch.
cd1e0c
+		 */
cd1e0c
+		ret = snprintf(buf, sizeof(buf), "DELETE FROM \"rec-%016" PRIx64 "\";",
cd1e0c
+				tcur);
cd1e0c
+		if (ret < 0) {
cd1e0c
+			xlog(L_ERROR, "sprintf failed!");
cd1e0c
+			goto rollback;
cd1e0c
+		} else if ((size_t)ret >= sizeof(buf)) {
cd1e0c
+			xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
cd1e0c
+			ret = -EINVAL;
cd1e0c
+			goto rollback;
cd1e0c
+		}
cd1e0c
+
cd1e0c
+		ret = sqlite3_exec(dbh, (const char *)buf, NULL, NULL, &err;;
cd1e0c
+		if (ret != SQLITE_OK) {
cd1e0c
+			xlog(L_ERROR, "Unable to clear table for current epoch: %s",
cd1e0c
+				err);
cd1e0c
+			goto rollback;
cd1e0c
+		}
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	ret = sqlite3_exec(dbh, "COMMIT TRANSACTION;", NULL, NULL, &err;;
cd1e0c
+	if (ret != SQLITE_OK) {
cd1e0c
+		xlog(L_ERROR, "Unable to commit transaction: %s", err);
cd1e0c
+		goto rollback;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	current_epoch = tcur;
cd1e0c
+	recovery_epoch = trec;
cd1e0c
+	xlog(D_GENERAL, "%s: current_epoch=%"PRIu64" recovery_epoch=%"PRIu64,
cd1e0c
+		__func__, current_epoch, recovery_epoch);
cd1e0c
+
cd1e0c
+out:
cd1e0c
+	sqlite3_free(err);
cd1e0c
+	return ret;
cd1e0c
+rollback:
cd1e0c
+	ret2 = sqlite3_exec(dbh, "ROLLBACK TRANSACTION;", NULL, NULL, &err;;
cd1e0c
+	if (ret2 != SQLITE_OK)
cd1e0c
+		xlog(L_ERROR, "Unable to rollback transaction: %s", err);
cd1e0c
+	goto out;
cd1e0c
+}
cd1e0c
+
cd1e0c
+int
cd1e0c
+sqlite_grace_done(void)
cd1e0c
+{
cd1e0c
+	int ret, ret2;
cd1e0c
+	char *err;
cd1e0c
+
cd1e0c
+	/* begin transaction */
cd1e0c
+	ret = sqlite3_exec(dbh, "BEGIN EXCLUSIVE TRANSACTION;", NULL, NULL,
cd1e0c
+				&err;;
cd1e0c
+	if (ret != SQLITE_OK) {
cd1e0c
+		xlog(L_ERROR, "Unable to begin transaction: %s", err);
cd1e0c
+		goto rollback;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	ret = sqlite3_exec(dbh, "UPDATE grace SET recovery = \"0\";",
cd1e0c
+			NULL, NULL, &err;;
cd1e0c
+	if (ret != SQLITE_OK) {
cd1e0c
+		xlog(L_ERROR, "Unable to clear recovery epoch: %s", err);
cd1e0c
+		goto rollback;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	ret = snprintf(buf, sizeof(buf), "DROP TABLE \"rec-%016" PRIx64 "\";",
cd1e0c
+		recovery_epoch);
cd1e0c
+	if (ret < 0) {
cd1e0c
+		xlog(L_ERROR, "sprintf failed!");
cd1e0c
+		goto rollback;
cd1e0c
+	} else if ((size_t)ret >= sizeof(buf)) {
cd1e0c
+		xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
cd1e0c
+		ret = -EINVAL;
cd1e0c
+		goto rollback;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	ret = sqlite3_exec(dbh, (const char *)buf, NULL, NULL, &err;;
cd1e0c
+	if (ret != SQLITE_OK) {
cd1e0c
+		xlog(L_ERROR, "Unable to drop table for recovery epoch: %s",
cd1e0c
+			err);
cd1e0c
+		goto rollback;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	ret = sqlite3_exec(dbh, "COMMIT TRANSACTION;", NULL, NULL, &err;;
cd1e0c
+	if (ret != SQLITE_OK) {
cd1e0c
+		xlog(L_ERROR, "Unable to commit transaction: %s", err);
cd1e0c
+		goto rollback;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	recovery_epoch = 0;
cd1e0c
+	xlog(D_GENERAL, "%s: current_epoch=%"PRIu64" recovery_epoch=%"PRIu64,
cd1e0c
+		__func__, current_epoch, recovery_epoch);
cd1e0c
+
cd1e0c
+out:
cd1e0c
+	sqlite3_free(err);
cd1e0c
+	return ret;
cd1e0c
+rollback:
cd1e0c
+	ret2 = sqlite3_exec(dbh, "ROLLBACK TRANSACTION;", NULL, NULL, &err;;
cd1e0c
+	if (ret2 != SQLITE_OK)
cd1e0c
+		xlog(L_ERROR, "Unable to rollback transaction: %s", err);
cd1e0c
+	goto out;
cd1e0c
+}
cd1e0c
+
cd1e0c
+
cd1e0c
+int
cd1e0c
+sqlite_iterate_recovery(int (*cb)(struct cld_client *clnt), struct cld_client *clnt)
cd1e0c
+{
cd1e0c
+	int ret;
cd1e0c
+	sqlite3_stmt *stmt = NULL;
cd1e0c
+#if UPCALL_VERSION >= 2
cd1e0c
+	struct cld_msg_v2 *cmsg = &clnt->cl_u.cl_msg_v2;
cd1e0c
+#else
cd1e0c
+	struct cld_msg *cmsg = &clnt->cl_u.cl_msg;
cd1e0c
+#endif
cd1e0c
+
cd1e0c
+	if (recovery_epoch == 0) {
cd1e0c
+		xlog(D_GENERAL, "%s: not in grace!", __func__);
cd1e0c
+		return -EINVAL;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	ret = snprintf(buf, sizeof(buf), "SELECT * FROM \"rec-%016" PRIx64 "\";",
cd1e0c
+		recovery_epoch);
cd1e0c
+	if (ret < 0) {
cd1e0c
+		xlog(L_ERROR, "sprintf failed!");
cd1e0c
+		return ret;
cd1e0c
+	} else if ((size_t)ret >= sizeof(buf)) {
cd1e0c
+		xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
cd1e0c
+		return -EINVAL;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	ret = sqlite3_prepare_v2(dbh, buf, -1, &stmt, NULL);
cd1e0c
+	if (ret != SQLITE_OK) {
cd1e0c
+		xlog(L_ERROR, "%s: select statement prepare failed: %s",
cd1e0c
+			__func__, sqlite3_errmsg(dbh));
cd1e0c
+		return ret;
cd1e0c
+	}
cd1e0c
+
cd1e0c
+	while ((ret = sqlite3_step(stmt)) == SQLITE_ROW) {
cd1e0c
+		memset(&cmsg->cm_u, 0, sizeof(cmsg->cm_u));
cd1e0c
+#if UPCALL_VERSION >= 2
cd1e0c
+		memcpy(&cmsg->cm_u.cm_clntinfo.cc_name.cn_id,
cd1e0c
+			sqlite3_column_blob(stmt, 0), NFS4_OPAQUE_LIMIT);
cd1e0c
+		cmsg->cm_u.cm_clntinfo.cc_name.cn_len = sqlite3_column_bytes(stmt, 0);
cd1e0c
+		if (sqlite3_column_bytes(stmt, 1) > 0) {
cd1e0c
+			memcpy(&cmsg->cm_u.cm_clntinfo.cc_princhash.cp_data,
cd1e0c
+				sqlite3_column_blob(stmt, 1), SHA256_DIGEST_SIZE);
cd1e0c
+			cmsg->cm_u.cm_clntinfo.cc_princhash.cp_len = sqlite3_column_bytes(stmt, 1);
cd1e0c
+		}
cd1e0c
+#else
cd1e0c
+		memcpy(&cmsg->cm_u.cm_name.cn_id, sqlite3_column_blob(stmt, 0),
cd1e0c
+			NFS4_OPAQUE_LIMIT);
cd1e0c
+		cmsg->cm_u.cm_name.cn_len = sqlite3_column_bytes(stmt, 0);
cd1e0c
+#endif
cd1e0c
+		cb(clnt);
cd1e0c
+	}
cd1e0c
+	if (ret == SQLITE_DONE)
cd1e0c
+		ret = 0;
cd1e0c
+	sqlite3_finalize(stmt);
cd1e0c
+	return ret;
cd1e0c
+}
cd1e0c
+
cd1e0c
+/*
cd1e0c
+ * Cleans out the old nfsdcltrack database.
cd1e0c
+ *
cd1e0c
+ * Called upon receipt of the first "GraceDone" upcall only.
cd1e0c
+ */
cd1e0c
+int
cd1e0c
+sqlite_delete_cltrack_records(void)
cd1e0c
+{
cd1e0c
+	int ret;
cd1e0c
+	char *s;
cd1e0c
+	char *err = NULL;
cd1e0c
+
cd1e0c
+	s = conf_get_str("nfsdcltrack", "storagedir");
cd1e0c
+	if (s)
cd1e0c
+		cltrack_storagedir = s;
cd1e0c
+	ret = sqlite_attach_db(cltrack_storagedir);
cd1e0c
+	if (ret)
cd1e0c
+		goto out;
cd1e0c
+	ret = sqlite3_exec(dbh, "DELETE FROM attached.clients;",
cd1e0c
+				NULL, NULL, &err;;
cd1e0c
+	if (ret != SQLITE_OK) {
cd1e0c
+		xlog(L_ERROR, "Unable to clear records from cltrack db: %s",
cd1e0c
+				err);
cd1e0c
+	}
cd1e0c
+	sqlite_detach_db();
cd1e0c
+out:
cd1e0c
+	sqlite3_free(err);
cd1e0c
+	return ret;
cd1e0c
+}
cd1e0c
+
cd1e0c
+/*
cd1e0c
+ * Sets first_time to 0 in the parameters table to ensure we only
cd1e0c
+ * copy old client tracking records into the database one time.
cd1e0c
+ *
cd1e0c
+ * Called upon receipt of the first "GraceDone" upcall only.
cd1e0c
+ */
cd1e0c
+int
cd1e0c
+sqlite_first_time_done(void)
cd1e0c
+{
cd1e0c
+	int ret;
cd1e0c
+	char *err = NULL;
cd1e0c
+
cd1e0c
+	ret = sqlite3_exec(dbh, "UPDATE parameters SET value = \"0\" "
cd1e0c
+				"WHERE key = \"first_time\";",
cd1e0c
+				NULL, NULL, &err;;
cd1e0c
+	if (ret != SQLITE_OK)
cd1e0c
+		xlog(L_ERROR, "Unable to clear first_time: %s", err);
cd1e0c
+
cd1e0c
+	sqlite3_free(err);
cd1e0c
+	return ret;
cd1e0c
+}
cd1e0c
diff --git a/utils/nfsdcld/sqlite.h b/utils/nfsdcld/sqlite.h
cd1e0c
new file mode 100644
cd1e0c
index 00000000..0a26ad67
cd1e0c
--- /dev/null
cd1e0c
+++ b/utils/nfsdcld/sqlite.h
cd1e0c
@@ -0,0 +1,37 @@
cd1e0c
+/*
cd1e0c
+ * Copyright (C) 2011  Red Hat, Jeff Layton <jlayton@redhat.com>
cd1e0c
+ *
cd1e0c
+ * This program is free software; you can redistribute it and/or
cd1e0c
+ * modify it under the terms of the GNU General Public License
cd1e0c
+ * as published by the Free Software Foundation; either version 2
cd1e0c
+ * of the License, or (at your option) any later version.
cd1e0c
+ *
cd1e0c
+ * This program is distributed in the hope that it will be useful,
cd1e0c
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
cd1e0c
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
cd1e0c
+ * GNU General Public License for more details.
cd1e0c
+ *
cd1e0c
+ * You should have received a copy of the GNU General Public License
cd1e0c
+ * along with this program; if not, write to the Free Software
cd1e0c
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor,
cd1e0c
+ * Boston, MA 02110-1301, USA.
cd1e0c
+ */
cd1e0c
+
cd1e0c
+#ifndef _SQLITE_H_
cd1e0c
+#define _SQLITE_H_
cd1e0c
+
cd1e0c
+struct cld_client;
cd1e0c
+
cd1e0c
+int sqlite_prepare_dbh(const char *topdir);
cd1e0c
+int sqlite_insert_client(const unsigned char *clname, const size_t namelen);
cd1e0c
+int sqlite_insert_client_and_princhash(const unsigned char *clname, const size_t namelen,
cd1e0c
+		const unsigned char *clprinchash, const size_t princhashlen);
cd1e0c
+int sqlite_remove_client(const unsigned char *clname, const size_t namelen);
cd1e0c
+int sqlite_check_client(const unsigned char *clname, const size_t namelen);
cd1e0c
+int sqlite_grace_start(void);
cd1e0c
+int sqlite_grace_done(void);
cd1e0c
+int sqlite_iterate_recovery(int (*cb)(struct cld_client *clnt), struct cld_client *clnt);
cd1e0c
+int sqlite_delete_cltrack_records(void);
cd1e0c
+int sqlite_first_time_done(void);
cd1e0c
+
cd1e0c
+#endif /* _SQLITE_H */
cd1e0c
diff --git a/utils/nfsidmap/nfsidmap.c b/utils/nfsidmap/nfsidmap.c
cd1e0c
index d3967a3a..4d219ef5 100644
cd1e0c
--- a/utils/nfsidmap/nfsidmap.c
cd1e0c
+++ b/utils/nfsidmap/nfsidmap.c
cd1e0c
@@ -18,7 +18,7 @@
cd1e0c
 #include "xcommon.h"
cd1e0c
 
cd1e0c
 int verbose = 0;
cd1e0c
-char *usage = "Usage: %s [-vh] [-c || [-u|-g|-r key] || -d || -l || [-t timeout] key desc]";
cd1e0c
+#define USAGE "Usage: %s [-vh] [-c || [-u|-g|-r key] || -d || -l || [-t timeout] key desc]"
cd1e0c
 
cd1e0c
 #define MAX_ID_LEN   11
cd1e0c
 #define IDMAP_NAMESZ 128
cd1e0c
@@ -401,7 +401,7 @@ int main(int argc, char **argv)
cd1e0c
 			break;
cd1e0c
 		case 'h':
cd1e0c
 		default:
cd1e0c
-			xlog_warn(usage, progname);
cd1e0c
+			xlog_warn(USAGE, progname);
cd1e0c
 			exit(opt == 'h' ? 0 : 1);
cd1e0c
 		}
cd1e0c
 	}
cd1e0c
@@ -433,7 +433,7 @@ int main(int argc, char **argv)
cd1e0c
 	xlog_stderr(verbose);
cd1e0c
 	if ((argc - optind) != 2) {
cd1e0c
 		xlog_warn("Bad arg count. Check /etc/request-key.conf");
cd1e0c
-		xlog_warn(usage, progname);
cd1e0c
+		xlog_warn(USAGE, progname);
cd1e0c
 		return EXIT_FAILURE;
cd1e0c
 	}
cd1e0c
 
cd1e0c
@@ -451,7 +451,7 @@ int main(int argc, char **argv)
cd1e0c
 		return EXIT_FAILURE;
cd1e0c
 	}
cd1e0c
 	if (verbose) {
cd1e0c
-		xlog_warn("key: 0x%lx type: %s value: %s timeout %ld",
cd1e0c
+		xlog_warn("key: 0x%x type: %s value: %s timeout %d",
cd1e0c
 			key, type, value, timeout);
cd1e0c
 	}
cd1e0c
 
cd1e0c
diff --git a/utils/statd/rmtcall.c b/utils/statd/rmtcall.c
cd1e0c
index c4f6364f..5b261480 100644
cd1e0c
--- a/utils/statd/rmtcall.c
cd1e0c
+++ b/utils/statd/rmtcall.c
cd1e0c
@@ -247,7 +247,7 @@ process_reply(FD_SET_TYPE *rfds)
cd1e0c
 		xlog_warn("%s: service %d not registered on localhost",
cd1e0c
 			__func__, NL_MY_PROG(lp));
cd1e0c
 	} else {
cd1e0c
-		xlog(D_GENERAL, "%s: Callback to %s (for %d) succeeded",
cd1e0c
+		xlog(D_GENERAL, "%s: Callback to %s (for %s) succeeded",
cd1e0c
 			__func__, NL_MY_NAME(lp), NL_MON_NAME(lp));
cd1e0c
 	}
cd1e0c
 	nlist_free(&notify, lp);
cd1e0c
diff --git a/utils/statd/statd.c b/utils/statd/statd.c
cd1e0c
index 14673800..8eef2ff2 100644
cd1e0c
--- a/utils/statd/statd.c
cd1e0c
+++ b/utils/statd/statd.c
cd1e0c
@@ -136,7 +136,7 @@ static void log_modes(void)
cd1e0c
 	strcat(buf, "TI-RPC ");
cd1e0c
 #endif
cd1e0c
 
cd1e0c
-	xlog_warn(buf);
cd1e0c
+	xlog_warn("%s", buf);
cd1e0c
 }
cd1e0c
 
cd1e0c
 /*
cd1e0c
diff --git a/utils/statd/svc_run.c b/utils/statd/svc_run.c
cd1e0c
index d1dbd74a..e343c768 100644
cd1e0c
--- a/utils/statd/svc_run.c
cd1e0c
+++ b/utils/statd/svc_run.c
cd1e0c
@@ -53,6 +53,7 @@
cd1e0c
 
cd1e0c
 #include <errno.h>
cd1e0c
 #include <time.h>
cd1e0c
+#include <inttypes.h>
cd1e0c
 #include "statd.h"
cd1e0c
 #include "notlist.h"
cd1e0c
 
cd1e0c
@@ -104,8 +105,8 @@ my_svc_run(int sockfd)
cd1e0c
 
cd1e0c
 			tv.tv_sec  = NL_WHEN(notify) - now;
cd1e0c
 			tv.tv_usec = 0;
cd1e0c
-			xlog(D_GENERAL, "Waiting for reply... (timeo %d)",
cd1e0c
-							tv.tv_sec);
cd1e0c
+			xlog(D_GENERAL, "Waiting for reply... (timeo %jd)",
cd1e0c
+							(intmax_t)tv.tv_sec);
cd1e0c
 			selret = select(FD_SETSIZE, &readfds,
cd1e0c
 				(void *) 0, (void *) 0, &tv;;
cd1e0c
 		} else {