diff --git a/.gitignore b/.gitignore index e91e7a25..e97b31f5 100644 --- a/.gitignore +++ b/.gitignore @@ -54,6 +54,7 @@ utils/rquotad/rquotad utils/rquotad/rquota.h utils/rquotad/rquota_xdr.c utils/showmount/showmount +utils/nfsdcld/nfsdcld utils/nfsdcltrack/nfsdcltrack utils/statd/statd tools/locktest/testlk diff --git a/aclocal/ax_gcc_func_attribute.m4 b/aclocal/ax_gcc_func_attribute.m4 new file mode 100644 index 00000000..098c9aad --- /dev/null +++ b/aclocal/ax_gcc_func_attribute.m4 @@ -0,0 +1,238 @@ +# =========================================================================== +# https://www.gnu.org/software/autoconf-archive/ax_gcc_func_attribute.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_GCC_FUNC_ATTRIBUTE(ATTRIBUTE) +# +# DESCRIPTION +# +# This macro checks if the compiler supports one of GCC's function +# attributes; many other compilers also provide function attributes with +# the same syntax. Compiler warnings are used to detect supported +# attributes as unsupported ones are ignored by default so quieting +# warnings when using this macro will yield false positives. +# +# The ATTRIBUTE parameter holds the name of the attribute to be checked. +# +# If ATTRIBUTE is supported define HAVE_FUNC_ATTRIBUTE_. +# +# The macro caches its result in the ax_cv_have_func_attribute_ +# variable. +# +# The macro currently supports the following function attributes: +# +# alias +# aligned +# alloc_size +# always_inline +# artificial +# cold +# const +# constructor +# constructor_priority for constructor attribute with priority +# deprecated +# destructor +# dllexport +# dllimport +# error +# externally_visible +# fallthrough +# flatten +# format +# format_arg +# gnu_inline +# hot +# ifunc +# leaf +# malloc +# noclone +# noinline +# nonnull +# noreturn +# nothrow +# optimize +# pure +# sentinel +# sentinel_position +# unused +# used +# visibility +# warning +# warn_unused_result +# weak +# weakref +# +# Unsupported function attributes will be tested with a prototype +# returning an int and not accepting any arguments and the result of the +# check might be wrong or meaningless so use with care. +# +# LICENSE +# +# Copyright (c) 2013 Gabriele Svelto +# +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice +# and this notice are preserved. This file is offered as-is, without any +# warranty. + +#serial 9 + +AC_DEFUN([AX_GCC_FUNC_ATTRIBUTE], [ + AS_VAR_PUSHDEF([ac_var], [ax_cv_have_func_attribute_$1]) + + AC_CACHE_CHECK([for __attribute__(($1))], [ac_var], [ + AC_LINK_IFELSE([AC_LANG_PROGRAM([ + m4_case([$1], + [alias], [ + int foo( void ) { return 0; } + int bar( void ) __attribute__(($1("foo"))); + ], + [aligned], [ + int foo( void ) __attribute__(($1(32))); + ], + [alloc_size], [ + void *foo(int a) __attribute__(($1(1))); + ], + [always_inline], [ + inline __attribute__(($1)) int foo( void ) { return 0; } + ], + [artificial], [ + inline __attribute__(($1)) int foo( void ) { return 0; } + ], + [cold], [ + int foo( void ) __attribute__(($1)); + ], + [const], [ + int foo( void ) __attribute__(($1)); + ], + [constructor_priority], [ + int foo( void ) __attribute__((__constructor__(65535/2))); + ], + [constructor], [ + int foo( void ) __attribute__(($1)); + ], + [deprecated], [ + int foo( void ) __attribute__(($1(""))); + ], + [destructor], [ + int foo( void ) __attribute__(($1)); + ], + [dllexport], [ + __attribute__(($1)) int foo( void ) { return 0; } + ], + [dllimport], [ + int foo( void ) __attribute__(($1)); + ], + [error], [ + int foo( void ) __attribute__(($1(""))); + ], + [externally_visible], [ + int foo( void ) __attribute__(($1)); + ], + [fallthrough], [ + int foo( void ) {switch (0) { case 1: __attribute__(($1)); case 2: break ; }}; + ], + [flatten], [ + int foo( void ) __attribute__(($1)); + ], + [format], [ + int foo(const char *p, ...) __attribute__(($1(printf, 1, 2))); + ], + [format_arg], [ + char *foo(const char *p) __attribute__(($1(1))); + ], + [gnu_inline], [ + inline __attribute__(($1)) int foo( void ) { return 0; } + ], + [hot], [ + int foo( void ) __attribute__(($1)); + ], + [ifunc], [ + int my_foo( void ) { return 0; } + static int (*resolve_foo(void))(void) { return my_foo; } + int foo( void ) __attribute__(($1("resolve_foo"))); + ], + [leaf], [ + __attribute__(($1)) int foo( void ) { return 0; } + ], + [malloc], [ + void *foo( void ) __attribute__(($1)); + ], + [noclone], [ + int foo( void ) __attribute__(($1)); + ], + [noinline], [ + __attribute__(($1)) int foo( void ) { return 0; } + ], + [nonnull], [ + int foo(char *p) __attribute__(($1(1))); + ], + [noreturn], [ + void foo( void ) __attribute__(($1)); + ], + [nothrow], [ + int foo( void ) __attribute__(($1)); + ], + [optimize], [ + __attribute__(($1(3))) int foo( void ) { return 0; } + ], + [pure], [ + int foo( void ) __attribute__(($1)); + ], + [sentinel], [ + int foo(void *p, ...) __attribute__(($1)); + ], + [sentinel_position], [ + int foo(void *p, ...) __attribute__(($1(1))); + ], + [returns_nonnull], [ + void *foo( void ) __attribute__(($1)); + ], + [unused], [ + int foo( void ) __attribute__(($1)); + ], + [used], [ + int foo( void ) __attribute__(($1)); + ], + [visibility], [ + int foo_def( void ) __attribute__(($1("default"))); + int foo_hid( void ) __attribute__(($1("hidden"))); + int foo_int( void ) __attribute__(($1("internal"))); + int foo_pro( void ) __attribute__(($1("protected"))); + ], + [warning], [ + int foo( void ) __attribute__(($1(""))); + ], + [warn_unused_result], [ + int foo( void ) __attribute__(($1)); + ], + [weak], [ + int foo( void ) __attribute__(($1)); + ], + [weakref], [ + static int foo( void ) { return 0; } + static int bar( void ) __attribute__(($1("foo"))); + ], + [ + m4_warn([syntax], [Unsupported attribute $1, the test may fail]) + int foo( void ) __attribute__(($1)); + ] + )], []) + ], + dnl GCC doesn't exit with an error if an unknown attribute is + dnl provided but only outputs a warning, so accept the attribute + dnl only if no warning were issued. + [AS_IF([test -s conftest.err], + [AS_VAR_SET([ac_var], [no])], + [AS_VAR_SET([ac_var], [yes])])], + [AS_VAR_SET([ac_var], [no])]) + ]) + + AS_IF([test yes = AS_VAR_GET([ac_var])], + [AC_DEFINE_UNQUOTED(AS_TR_CPP(HAVE_FUNC_ATTRIBUTE_$1), 1, + [Define to 1 if the system has the `$1' function attribute])], []) + + AS_VAR_POPDEF([ac_var]) +]) diff --git a/configure.ac b/configure.ac index 48eb9eb6..13ea957f 100644 --- a/configure.ac +++ b/configure.ac @@ -238,6 +238,12 @@ else AM_CONDITIONAL(MOUNT_CONFIG, [test "$enable_mount" = "yes"]) fi +AC_ARG_ENABLE(nfsdcld, + [AC_HELP_STRING([--disable-nfsdcld], + [disable NFSv4 clientid tracking daemon @<:@default=no@:>@])], + enable_nfsdcld=$enableval, + enable_nfsdcld="yes") + AC_ARG_ENABLE(nfsdcltrack, [AC_HELP_STRING([--disable-nfsdcltrack], [disable NFSv4 clientid tracking programs @<:@default=no@:>@])], @@ -317,6 +323,20 @@ if test "$enable_nfsv4" = yes; then dnl Check for sqlite3 AC_SQLITE3_VERS + if test "$enable_nfsdcld" = "yes"; then + AC_CHECK_HEADERS([libgen.h sys/inotify.h], , + AC_MSG_ERROR([Cannot find header needed for nfsdcld])) + + case $libsqlite3_cv_is_recent in + yes) ;; + unknown) + dnl do not fail when cross-compiling + AC_MSG_WARN([assuming sqlite is at least v3.3]) ;; + *) + AC_MSG_ERROR([nfsdcld requires sqlite-devel]) ;; + esac + fi + if test "$enable_nfsdcltrack" = "yes"; then AC_CHECK_HEADERS([libgen.h sys/inotify.h], , AC_MSG_ERROR([Cannot find header needed for nfsdcltrack])) @@ -332,6 +352,7 @@ if test "$enable_nfsv4" = yes; then fi else + enable_nfsdcld="no" enable_nfsdcltrack="no" fi @@ -342,6 +363,7 @@ if test "$enable_nfsv41" = yes; then fi dnl enable nfsidmap when its support by libnfsidmap +AM_CONDITIONAL(CONFIG_NFSDCLD, [test "$enable_nfsdcld" = "yes" ]) AM_CONDITIONAL(CONFIG_NFSDCLTRACK, [test "$enable_nfsdcltrack" = "yes" ]) @@ -581,6 +603,7 @@ CHECK_CCSUPPORT([-Werror=format-overflow=2], [flg1]) CHECK_CCSUPPORT([-Werror=int-conversion], [flg2]) CHECK_CCSUPPORT([-Werror=incompatible-pointer-types], [flg3]) CHECK_CCSUPPORT([-Werror=misleading-indentation], [flg4]) +AX_GCC_FUNC_ATTRIBUTE([format]) AC_SUBST([AM_CFLAGS], ["$my_am_cflags $flg1 $flg2 $flg3 $flg4"]) @@ -617,8 +640,10 @@ AC_CONFIG_FILES([ tools/mountstats/Makefile tools/nfs-iostat/Makefile tools/nfsconf/Makefile + tools/clddb-tool/Makefile utils/Makefile utils/blkmapd/Makefile + utils/nfsdcld/Makefile utils/nfsdcltrack/Makefile utils/exportfs/Makefile utils/gssd/Makefile diff --git a/nfs.conf b/nfs.conf index d48a4e55..56172c49 100644 --- a/nfs.conf +++ b/nfs.conf @@ -36,6 +36,10 @@ use-gss-proxy=1 # state-directory-path=/var/lib/nfs # ha-callout= # +[nfsdcld] +# debug=0 +# storagedir=/var/lib/nfs/nfsdcld +# [nfsdcltrack] # debug=0 # storagedir=/var/lib/nfs/nfsdcltrack diff --git a/support/include/cld.h b/support/include/cld.h index f14a9ab0..88d3b63e 100644 --- a/support/include/cld.h +++ b/support/include/cld.h @@ -23,16 +23,22 @@ #define _NFSD_CLD_H /* latest upcall version available */ -#define CLD_UPCALL_VERSION 1 +#define CLD_UPCALL_VERSION 2 /* defined by RFC3530 */ #define NFS4_OPAQUE_LIMIT 1024 +#ifndef SHA256_DIGEST_SIZE +#define SHA256_DIGEST_SIZE 32 +#endif + enum cld_command { Cld_Create, /* create a record for this cm_id */ Cld_Remove, /* remove record of this cm_id */ Cld_Check, /* is this cm_id allowed? */ Cld_GraceDone, /* grace period is complete */ + Cld_GraceStart, /* grace start (upload client records) */ + Cld_GetVersion, /* query max supported upcall version */ }; /* representation of long-form NFSv4 client ID */ @@ -41,6 +47,17 @@ struct cld_name { unsigned char cn_id[NFS4_OPAQUE_LIMIT]; /* client-provided */ } __attribute__((packed)); +/* sha256 hash of the kerberos principal */ +struct cld_princhash { + uint8_t cp_len; /* length of cp_data */ + unsigned char cp_data[SHA256_DIGEST_SIZE]; /* hash of principal */ +} __attribute__((packed)); + +struct cld_clntinfo { + struct cld_name cc_name; + struct cld_princhash cc_princhash; +} __attribute__((packed)); + /* message struct for communication with userspace */ struct cld_msg { uint8_t cm_vers; /* upcall version */ @@ -50,7 +67,28 @@ struct cld_msg { union { int64_t cm_gracetime; /* grace period start time */ struct cld_name cm_name; + uint8_t cm_version; /* for getting max version */ + } __attribute__((packed)) cm_u; +} __attribute__((packed)); + +/* version 2 message can include hash of kerberos principal */ +struct cld_msg_v2 { + uint8_t cm_vers; /* upcall version */ + uint8_t cm_cmd; /* upcall command */ + int16_t cm_status; /* return code */ + uint32_t cm_xid; /* transaction id */ + union { + struct cld_name cm_name; + uint8_t cm_version; /* for getting max version */ + struct cld_clntinfo cm_clntinfo; /* name & princ hash */ } __attribute__((packed)) cm_u; } __attribute__((packed)); +struct cld_msg_hdr { + uint8_t cm_vers; /* upcall version */ + uint8_t cm_cmd; /* upcall command */ + int16_t cm_status; /* return code */ + uint32_t cm_xid; /* transaction id */ +} __attribute__((packed)); + #endif /* !_NFSD_CLD_H */ diff --git a/support/include/xcommon.h b/support/include/xcommon.h index 23c9a135..30b0403b 100644 --- a/support/include/xcommon.h +++ b/support/include/xcommon.h @@ -9,6 +9,10 @@ #ifndef _XMALLOC_H #define _MALLOC_H +#ifdef HAVE_CONFIG_H +#include +#endif + #include #include #include @@ -25,9 +29,15 @@ #define streq(s, t) (strcmp ((s), (t)) == 0) -/* Functions in sundries.c that are used in mount.c and umount.c */ +#ifdef HAVE_FUNC_ATTRIBUTE_FORMAT +#define X_FORMAT(_x) __attribute__((__format__ _x)) +#else +#define X_FORMAT(_x) +#endif + +/* Functions in sundries.c that are used in mount.c and umount.c */ char *canonicalize (const char *path); -void nfs_error (const char *fmt, ...); +void nfs_error (const char *fmt, ...) X_FORMAT((printf, 1, 2)); void *xmalloc (size_t size); void *xrealloc(void *p, size_t size); void xfree(void *); @@ -36,9 +46,9 @@ char *xstrndup (const char *s, int n); char *xstrconcat2 (const char *, const char *); char *xstrconcat3 (const char *, const char *, const char *); char *xstrconcat4 (const char *, const char *, const char *, const char *); -void die (int errcode, const char *fmt, ...); +void die (int errcode, const char *fmt, ...) X_FORMAT((printf, 2, 3)); -extern void die(int err, const char *fmt, ...); +extern void die(int err, const char *fmt, ...) X_FORMAT((printf, 2, 3)); extern void (*at_die)(void); /* exit status - bits below are ORed */ diff --git a/support/include/xlog.h b/support/include/xlog.h index a11463ed..32ff5a1b 100644 --- a/support/include/xlog.h +++ b/support/include/xlog.h @@ -7,6 +7,10 @@ #ifndef XLOG_H #define XLOG_H +#ifdef HAVE_CONFIG_H +#include +#endif + #include /* These are logged always. L_FATAL also does exit(1) */ @@ -35,6 +39,12 @@ struct xlog_debugfac { int df_fac; }; +#ifdef HAVE_FUNC_ATTRIBUTE_FORMAT +#define XLOG_FORMAT(_x) __attribute__((__format__ _x)) +#else +#define XLOG_FORMAT(_x) +#endif + extern int export_errno; void xlog_open(char *progname); void xlog_stderr(int on); @@ -43,10 +53,10 @@ void xlog_config(int fac, int on); void xlog_sconfig(char *, int on); void xlog_from_conffile(char *); int xlog_enabled(int fac); -void xlog(int fac, const char *fmt, ...); -void xlog_warn(const char *fmt, ...); -void xlog_err(const char *fmt, ...); -void xlog_errno(int err, const char *fmt, ...); -void xlog_backend(int fac, const char *fmt, va_list args); +void xlog(int fac, const char *fmt, ...) XLOG_FORMAT((printf, 2, 3)); +void xlog_warn(const char *fmt, ...) XLOG_FORMAT((printf, 1, 2)); +void xlog_err(const char *fmt, ...) XLOG_FORMAT((printf, 1, 2)); +void xlog_errno(int err, const char *fmt, ...) XLOG_FORMAT((printf, 2, 3)); +void xlog_backend(int fac, const char *fmt, va_list args) XLOG_FORMAT((printf, 2, 0)); #endif /* XLOG_H */ diff --git a/support/junction/junction.c b/support/junction/junction.c index ab6caa61..41cce261 100644 --- a/support/junction/junction.c +++ b/support/junction/junction.c @@ -23,6 +23,10 @@ * http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt */ +#ifdef HAVE_CONFIG_H +#include +#endif + #include #include diff --git a/support/misc/file.c b/support/misc/file.c index 4065376e..74973169 100644 --- a/support/misc/file.c +++ b/support/misc/file.c @@ -18,6 +18,10 @@ * along with nfs-utils. If not, see . */ +#ifdef HAVE_CONFIG_H +#include +#endif + #include #include diff --git a/support/misc/mountpoint.c b/support/misc/mountpoint.c index 9f9ce44e..4205b41c 100644 --- a/support/misc/mountpoint.c +++ b/support/misc/mountpoint.c @@ -3,6 +3,10 @@ * check if a given path is a mountpoint */ +#ifdef HAVE_CONFIG_H +#include +#endif + #include #include "xcommon.h" #include diff --git a/support/nfs/cacheio.c b/support/nfs/cacheio.c index 9dc4cf1c..7c4cf373 100644 --- a/support/nfs/cacheio.c +++ b/support/nfs/cacheio.c @@ -15,6 +15,10 @@ * */ +#ifdef HAVE_CONFIG_H +#include +#endif + #include #include #include diff --git a/support/nfs/svc_create.c b/support/nfs/svc_create.c index ef7ff05f..7b595f89 100644 --- a/support/nfs/svc_create.c +++ b/support/nfs/svc_create.c @@ -184,7 +184,7 @@ svc_create_sock(const struct sockaddr *sap, socklen_t salen, type = SOCK_STREAM; break; default: - xlog(D_GENERAL, "%s: Unrecognized bind address semantics: %u", + xlog(D_GENERAL, "%s: Unrecognized bind address semantics: %lu", __func__, nconf->nc_semantics); return -1; } diff --git a/support/nsm/rpc.c b/support/nsm/rpc.c index ae49006c..08b4746f 100644 --- a/support/nsm/rpc.c +++ b/support/nsm/rpc.c @@ -182,7 +182,7 @@ nsm_xmit_getport(const int sock, const struct sockaddr_in *sin, uint32_t xid; XDR xdr; - xlog(D_CALL, "Sending PMAP_GETPORT for %u, %u, udp", program, version); + xlog(D_CALL, "Sending PMAP_GETPORT for %lu, %lu, udp", program, version); nsm_init_xdrmem(msgbuf, NSM_MAXMSGSIZE, &xdr); xid = nsm_init_rpc_header(PMAPPROG, PMAPVERS, diff --git a/systemd/Makefile.am b/systemd/Makefile.am index d54518bc..53458c62 100644 --- a/systemd/Makefile.am +++ b/systemd/Makefile.am @@ -36,6 +36,11 @@ unit_files += \ endif endif +if CONFIG_NFSDCLD +unit_files += \ + nfsdcld.service +endif + man5_MANS = nfs.conf.man man7_MANS = nfs.systemd.man EXTRA_DIST = $(unit_files) $(man5_MANS) $(man7_MANS) diff --git a/systemd/nfs-server.service b/systemd/nfs-server.service index 136552b5..24118d69 100644 --- a/systemd/nfs-server.service +++ b/systemd/nfs-server.service @@ -6,10 +6,12 @@ Requires= nfs-mountd.service Wants=rpcbind.socket network-online.target Wants=rpc-statd.service nfs-idmapd.service Wants=rpc-statd-notify.service +Wants=nfsdcld.service After= network-online.target local-fs.target After= proc-fs-nfsd.mount rpcbind.socket nfs-mountd.service After= nfs-idmapd.service rpc-statd.service +After= nfsdcld.service Before= rpc-statd-notify.service # GSS services dependencies and ordering diff --git a/systemd/nfsdcld.service b/systemd/nfsdcld.service new file mode 100644 index 00000000..a32d2430 --- /dev/null +++ b/systemd/nfsdcld.service @@ -0,0 +1,10 @@ +[Unit] +Description=NFSv4 Client Tracking Daemon +DefaultDependencies=no +Conflicts=umount.target +Requires=rpc_pipefs.target proc-fs-nfsd.mount +After=rpc_pipefs.target proc-fs-nfsd.mount + +[Service] +Type=forking +ExecStart=/usr/sbin/nfsdcld diff --git a/tools/Makefile.am b/tools/Makefile.am index 4266da49..53e61170 100644 --- a/tools/Makefile.am +++ b/tools/Makefile.am @@ -8,6 +8,10 @@ endif OPTDIRS += nfsconf +if CONFIG_NFSDCLD +OPTDIRS += clddb-tool +endif + SUBDIRS = locktest rpcdebug nlmtest mountstats nfs-iostat $(OPTDIRS) MAINTAINERCLEANFILES = Makefile.in diff --git a/tools/clddb-tool/Makefile.am b/tools/clddb-tool/Makefile.am new file mode 100644 index 00000000..15a8fd47 --- /dev/null +++ b/tools/clddb-tool/Makefile.am @@ -0,0 +1,13 @@ +## Process this file with automake to produce Makefile.in +PYTHON_FILES = clddb-tool.py + +man8_MANS = clddb-tool.man + +EXTRA_DIST = $(man8_MANS) $(PYTHON_FILES) + +all-local: $(PYTHON_FILES) + +install-data-hook: + $(INSTALL) -m 755 clddb-tool.py $(DESTDIR)$(sbindir)/clddb-tool + +MAINTAINERCLEANFILES=Makefile.in diff --git a/tools/clddb-tool/clddb-tool.man b/tools/clddb-tool/clddb-tool.man new file mode 100644 index 00000000..e80b2c05 --- /dev/null +++ b/tools/clddb-tool/clddb-tool.man @@ -0,0 +1,83 @@ +.\" +.\" clddb-tool(8) +.\" +.TH clddb-tool 8 "07 Aug 2019" +.SH NAME +clddb-tool \- Tool for manipulating the nfsdcld sqlite database +.SH SYNOPSIS +.B clddb-tool +.RB [ \-h | \-\-help ] +.P +.B clddb-tool +.RB [ \-p | \-\-path +.IR dbpath ] +.B fix-table-names +.RB [ \-h | \-\-help ] +.P +.B clddb-tool +.RB [ \-p | \-\-path +.IR dbpath ] +.B downgrade-schema +.RB [ \-h | \-\-help ] +.RB [ \-v | \-\-version +.IR to-version ] +.P +.B clddb-tool +.RB [ \-p | \-\-path +.IR dbpath ] +.B print +.RB [ \-h | \-\-help ] +.RB [ \-s | \-\-summary ] +.P + +.SH DESCRIPTION +.RB "The " clddb-tool " command is provided to perform some manipulation of the nfsdcld sqlite database schema and to print the contents of the database." +.SS Sub-commands +Valid +.B clddb-tool +subcommands are: +.IP "\fBfix-table-names\fP" +.RB "A previous version of " nfsdcld "(8) contained a bug that corrupted the reboot epoch table names. This sub-command will fix those table names." +.IP "\fBdowngrade-schema\fP" +Downgrade the database schema. Currently the schema can only to downgraded from version 4 to version 3. +.IP "\fBprint\fP" +Display the contents of the database. Prints the schema version and the values of the current and recovery epochs. If the +.BR \-s | \-\-summary +option is not given, also prints the clients in the reboot epoch tables. +.SH OPTIONS +.SS Options valid for all sub-commands +.TP +.B \-h, \-\-help +Show the help message and exit +.TP +\fB\-p \fIdbpath\fR, \fB\-\-path \fIdbpath\fR +Open the sqlite database located at +.I dbpath +instead of +.IR /var/lib/nfs/nfsdcld/main.sqlite ". " +This is mainly for testing purposes. +.SS Options specific to the downgrade-schema sub-command +.TP +\fB\-v \fIto-version\fR, \fB\-\-version \fIto-version\fR +The schema version to downgrade to. Currently the schema can only be downgraded to version 3. +.SS Options specific to the print sub-command +.TP +.B \-s, \-\-summary +Do not list the clients in the reboot epoch tables in the output. +.SH NOTES +The +.B clddb-tool +command will not allow the +.B fix-table-names +or +.B downgrade-schema +subcommands to be used if +.BR nfsdcld (8) +is running. +.SH FILES +.TP +.B /var/lib/nfs/nfsdcld/main.sqlite +.SH SEE ALSO +.BR nfsdcld (8) +.SH AUTHOR +Scott Mayhew diff --git a/tools/clddb-tool/clddb-tool.py b/tools/clddb-tool/clddb-tool.py new file mode 100644 index 00000000..8a661318 --- /dev/null +++ b/tools/clddb-tool/clddb-tool.py @@ -0,0 +1,266 @@ +#!/usr/bin/python3 +"""Tool for manipulating the nfsdcld sqlite database +""" + +__copyright__ = """ +Copyright (C) 2019 Scott Mayhew + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +MA 02110-1301, USA. +""" + +import argparse +import os +import sqlite3 +import sys + + +class CldDb(): + def __init__(self, path): + self.con = sqlite3.connect(path) + self.con.row_factory = sqlite3.Row + for row in self.con.execute('select value from parameters ' + 'where key = "version"'): + self.version = int(row['value']) + for row in self.con.execute('select * from grace'): + self.current = int(row['current']) + self.recovery = int(row['recovery']) + + def __del__(self): + self.con.close() + + def __str__(self): + return ('Schema version: {self.version} ' + 'current epoch: {self.current} ' + 'recovery epoch: {self.recovery}'.format(self=self)) + + def _print_clients(self, epoch): + if epoch: + for row in self.con.execute('select * from "rec-{:016x}"' + .format(epoch)): + if self.version >= 4: + if row['princhash'] is not None: + princhash = row['princhash'].hex() + else: + princhash = "(null)" + print('id = {}, princhash = {}' + .format(row['id'].decode(), princhash)) + else: + print('id = {}'.format(row['id'].decode())) + + def print_current_clients(self): + print('Clients in current epoch:') + self._print_clients(self.current) + + def print_recovery_clients(self): + if self.recovery: + print('Clients in recovery epoch:') + self._print_clients(self.recovery) + + def check_bad_table_names(self): + bad_names = [] + for row in self.con.execute('select name from sqlite_master ' + 'where type = "table" ' + 'and name like "%rec-%" ' + 'and length(name) < 20'): + bad_names.append(row['name']) + return bad_names + + def fix_bad_table_names(self): + try: + self.con.execute('begin exclusive transaction') + bad_names = self.check_bad_table_names() + for bad_name in bad_names: + epoch = int(bad_name.split('-')[1], base=16) + if epoch == self.current or epoch == self.recovery: + if epoch == self.current: + which = 'current' + else: + which = 'recovery' + print('found invalid table name {} for {} epoch' + .format(bad_name, which)) + self.con.execute('alter table "{}" ' + 'rename to "rec-{:016x}"' + .format(bad_name, epoch)) + print('renamed to rec-{:016x}'.format(epoch)) + else: + print('found invalid table name {} for unknown epoch {}' + .format(bad_name, epoch)) + self.con.execute('drop table "{}"'.format(bad_name)) + print('dropped table {}'.format(bad_name)) + except sqlite3.Error: + self.con.rollback() + else: + self.con.commit() + + def has_princ_data(self): + if self.version < 4: + return False + for row in self.con.execute('select count(*) ' + 'from "rec-{:016x}" ' + 'where princhash not null' + .format(self.current)): + count = row[0] + if self.recovery: + for row in self.con.execute('select count(*) ' + 'from "rec-{:016x}" ' + 'where princhash not null' + .format(self.current)): + count = count + row[0] + if count: + return True + return False + + def _downgrade_table_v4_to_v3(self, epoch): + if not self.con.in_transaction: + raise sqlite3.Error + try: + self.con.execute('create table "new_rec-{:016x}" ' + '(id blob primary key)'.format(epoch)) + self.con.execute('insert into "new_rec-{:016x}" ' + 'select id from "rec-{:016x}"' + .format(epoch, epoch)) + self.con.execute('drop table "rec-{:016x}"'.format(epoch)) + self.con.execute('alter table "new_rec-{:016x}" ' + 'rename to "rec-{:016x}"' + .format(epoch, epoch)) + except sqlite3.Error: + raise + + def downgrade_schema_v4_to_v3(self): + try: + self.con.execute('begin exclusive transaction') + for row in self.con.execute('select value from parameters ' + 'where key = "version"'): + version = int(row['value']) + if version != self.version: + raise sqlite3.Error + for row in self.con.execute('select * from grace'): + current = int(row['current']) + recovery = int(row['recovery']) + if current != self.current: + raise sqlite3.Error + if recovery != self.recovery: + raise sqlite3.Error + self._downgrade_table_v4_to_v3(current) + if recovery: + self._downgrade_table_v4_to_v3(recovery) + self.con.execute('update parameters ' + 'set value = "3" ' + 'where key = "version"') + self.version = 3 + except sqlite3.Error: + self.con.rollback() + print('Downgrade failed') + else: + self.con.commit() + print('Downgrade successful') + + +def nfsdcld_active(): + rc = os.system('ps -C nfsdcld >/dev/null 2>/dev/null') + if rc == 0: + return True + return False + + +def fix_table_names_command(db, args): + if nfsdcld_active(): + print('Warning: nfsdcld is running!') + ans = input('Continue? ') + if ans.lower() not in ['y', 'yes']: + print('Operation canceled.') + return + bad_names = db.check_bad_table_names() + if not bad_names: + print('No invalid table names found.') + return + db.fix_bad_table_names() + + +def downgrade_schema_command(db, args): + if nfsdcld_active(): + print('Warning: nfsdcld is running!') + ans = input('Continue? ') + if ans.lower() not in ['y', 'yes']: + print('Operation canceled') + return + if db.version != 4: + print('Cannot downgrade database from schema version {}.' + .format(db.version)) + return + if args.version != 3: + print('Cannot downgrade to version {}.'.format(args.version)) + return + bad_names = db.check_bad_table_names() + if bad_names: + print('Invalid table names detected.') + print('Please run "{} fix-table-names" before downgrading the schema.' + .format(sys.argv[0])) + return + if db.has_princ_data(): + print('Warning: database has principal data, which will be erased.') + ans = input('Continue? ') + if ans.lower() not in ['y', 'yes']: + print('Operation canceled') + return + db.downgrade_schema_v4_to_v3() + + +def print_command(db, args): + print(str(db)) + if not args.summary: + bad_names = db.check_bad_table_names() + if bad_names: + print('Invalid table names detected.') + print('Please run "{} fix-table-names".'.format(sys.argv[0])) + return + db.print_current_clients() + db.print_recovery_clients() + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('-p', '--path', + default='/var/lib/nfs/nfsdcld/main.sqlite', + help='path to the database ' + '(default: /var/lib/nfs/nfsdcld/main.sqlite)') + subparsers = parser.add_subparsers(help='sub-command help') + fix_parser = subparsers.add_parser('fix-table-names', + help='fix invalid table names') + fix_parser.set_defaults(func=fix_table_names_command) + downgrade_parser = subparsers.add_parser('downgrade-schema', + help='downgrade database schema') + downgrade_parser.add_argument('-v', '--version', type=int, choices=[3], + default=3, + help='version to downgrade to') + downgrade_parser.set_defaults(func=downgrade_schema_command) + print_parser = subparsers.add_parser('print', + help='print database info') + print_parser.add_argument('-s', '--summary', default=False, + action='store_true', + help='print summary only') + print_parser.set_defaults(func=print_command) + args = parser.parse_args() + if not os.path.exists(args.path): + return parser.print_usage() + clddb = CldDb(args.path) + return args.func(clddb, args) + + +if __name__ == '__main__': + if len(sys.argv) == 1: + sys.argv.extend(['print', '--summary']) + main() diff --git a/utils/Makefile.am b/utils/Makefile.am index 0a5b062c..4c930a4b 100644 --- a/utils/Makefile.am +++ b/utils/Makefile.am @@ -19,6 +19,10 @@ if CONFIG_MOUNT OPTDIRS += mount endif +if CONFIG_NFSDCLD +OPTDIRS += nfsdcld +endif + if CONFIG_NFSDCLTRACK OPTDIRS += nfsdcltrack endif diff --git a/utils/exportfs/exportfs.c b/utils/exportfs/exportfs.c index cd3c979d..4b9634b7 100644 --- a/utils/exportfs/exportfs.c +++ b/utils/exportfs/exportfs.c @@ -644,6 +644,9 @@ out: return result; } +#ifdef HAVE_FUNC_ATTRIBUTE_FORMAT +__attribute__((format (printf, 2, 3))) +#endif static char dumpopt(char c, char *fmt, ...) { diff --git a/utils/mount/fstab.c b/utils/mount/fstab.c index eedbddab..8b0aaf1a 100644 --- a/utils/mount/fstab.c +++ b/utils/mount/fstab.c @@ -7,6 +7,10 @@ * - Moved code to nfs-utils/support/nfs from util-linux/mount. */ +#ifdef HAVE_CONFIG_H +#include +#endif + #include #include #include diff --git a/utils/mountd/cache.c b/utils/mountd/cache.c index a054ce6f..c73e29be 100644 --- a/utils/mountd/cache.c +++ b/utils/mountd/cache.c @@ -967,8 +967,7 @@ lookup_export(char *dom, char *path, struct addrinfo *ai) } else if (found_type == i && found->m_warned == 0) { xlog(L_WARNING, "%s exported to both %s and %s, " "arbitrarily choosing options from first", - path, found->m_client->m_hostname, exp->m_client->m_hostname, - dom); + path, found->m_client->m_hostname, exp->m_client->m_hostname); found->m_warned = 1; } } diff --git a/utils/mountd/mountd.c b/utils/mountd/mountd.c index 086c39bf..0b891121 100644 --- a/utils/mountd/mountd.c +++ b/utils/mountd/mountd.c @@ -209,10 +209,10 @@ killer (int sig) } static void -sig_hup (int sig) +sig_hup (int UNUSED(sig)) { /* don't exit on SIGHUP */ - xlog (L_NOTICE, "Received SIGHUP... Ignoring.\n", sig); + xlog (L_NOTICE, "Received SIGHUP... Ignoring.\n"); return; } diff --git a/utils/nfsdcld/Makefile.am b/utils/nfsdcld/Makefile.am new file mode 100644 index 00000000..273d64f1 --- /dev/null +++ b/utils/nfsdcld/Makefile.am @@ -0,0 +1,15 @@ +## Process this file with automake to produce Makefile.in + +man8_MANS = nfsdcld.man +EXTRA_DIST = $(man8_MANS) + +AM_CFLAGS += -D_LARGEFILE64_SOURCE +sbin_PROGRAMS = nfsdcld + +nfsdcld_SOURCES = nfsdcld.c sqlite.c legacy.c +nfsdcld_LDADD = ../../support/nfs/libnfs.la $(LIBEVENT) $(LIBSQLITE) $(LIBCAP) + +noinst_HEADERS = sqlite.h cld-internal.h legacy.h + +MAINTAINERCLEANFILES = Makefile.in + diff --git a/utils/nfsdcld/cld-internal.h b/utils/nfsdcld/cld-internal.h new file mode 100644 index 00000000..05f01be2 --- /dev/null +++ b/utils/nfsdcld/cld-internal.h @@ -0,0 +1,44 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#ifndef _CLD_INTERNAL_H_ +#define _CLD_INTERNAL_H_ + +#if CLD_UPCALL_VERSION >= 2 +#define UPCALL_VERSION 2 +#else +#define UPCALL_VERSION 1 +#endif + +struct cld_client { + int cl_fd; + struct event cl_event; + union { + struct cld_msg cl_msg; +#if UPCALL_VERSION >= 2 + struct cld_msg_v2 cl_msg_v2; +#endif + } cl_u; +}; + +uint64_t current_epoch; +uint64_t recovery_epoch; +int first_time; +int num_cltrack_records; +int num_legacy_records; + +#endif /* _CLD_INTERNAL_H_ */ diff --git a/utils/nfsdcld/legacy.c b/utils/nfsdcld/legacy.c new file mode 100644 index 00000000..3c6bea6c --- /dev/null +++ b/utils/nfsdcld/legacy.c @@ -0,0 +1,185 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cld.h" +#include "sqlite.h" +#include "xlog.h" +#include "legacy.h" + +#define NFSD_RECDIR_FILE "/proc/fs/nfsd/nfsv4recoverydir" + +/* + * Loads client records from the v4recovery directory into the database. + * Records are prefixed with the string "hash:" and include the '\0' byte. + * + * Called during database initialization as part of a one-time "upgrade". + */ +void +legacy_load_clients_from_recdir(int *num_records) +{ + int fd; + DIR *v4recovery; + struct dirent *entry; + char recdirname[PATH_MAX]; + char buf[NFS4_OPAQUE_LIMIT]; + struct stat st; + char *nl; + + fd = open(NFSD_RECDIR_FILE, O_RDONLY); + if (fd < 0) { + xlog(D_GENERAL, "Unable to open %s: %m", NFSD_RECDIR_FILE); + return; + } + if (read(fd, recdirname, PATH_MAX) < 0) { + xlog(D_GENERAL, "Unable to read from %s: %m", NFSD_RECDIR_FILE); + return; + } + close(fd); + /* the output from the proc file isn't null-terminated */ + nl = strchr(recdirname, '\n'); + if (!nl) + return; + *nl = '\0'; + if (stat(recdirname, &st) < 0) { + xlog(D_GENERAL, "Unable to stat %s: %d", recdirname, errno); + return; + } + if (!S_ISDIR(st.st_mode)) { + xlog(D_GENERAL, "%s is not a directory: mode=0%o", recdirname + , st.st_mode); + return; + } + v4recovery = opendir(recdirname); + if (!v4recovery) + return; + while ((entry = readdir(v4recovery))) { + int ret; + + /* skip "." and ".." */ + if (entry->d_name[0] == '.') { + switch (entry->d_name[1]) { + case '\0': + continue; + case '.': + if (entry->d_name[2] == '\0') + continue; + } + } + /* prefix legacy records with the string "hash:" */ + ret = snprintf(buf, sizeof(buf), "hash:%s", entry->d_name); + /* if there's a problem, then skip this entry */ + if (ret < 0 || (size_t)ret >= sizeof(buf)) { + xlog(L_WARNING, "%s: unable to build client string for %s!", + __func__, entry->d_name); + continue; + } + /* legacy client records need to include the null terminator */ + ret = sqlite_insert_client((unsigned char *)buf, strlen(buf) + 1); + if (ret) + xlog(L_WARNING, "%s: unable to insert %s: %d", __func__, + entry->d_name, ret); + else + (*num_records)++; + } + closedir(v4recovery); +} + +/* + * Cleans out the v4recovery directory. + * + * Called upon receipt of the first "GraceDone" upcall only. + */ +void +legacy_clear_recdir(void) +{ + int fd; + DIR *v4recovery; + struct dirent *entry; + char recdirname[PATH_MAX]; + char dirname[PATH_MAX]; + struct stat st; + char *nl; + + fd = open(NFSD_RECDIR_FILE, O_RDONLY); + if (fd < 0) { + xlog(D_GENERAL, "Unable to open %s: %m", NFSD_RECDIR_FILE); + return; + } + if (read(fd, recdirname, PATH_MAX) < 0) { + xlog(D_GENERAL, "Unable to read from %s: %m", NFSD_RECDIR_FILE); + return; + } + close(fd); + /* the output from the proc file isn't null-terminated */ + nl = strchr(recdirname, '\n'); + if (!nl) + return; + *nl = '\0'; + if (stat(recdirname, &st) < 0) { + xlog(D_GENERAL, "Unable to stat %s: %d", recdirname, errno); + return; + } + if (!S_ISDIR(st.st_mode)) { + xlog(D_GENERAL, "%s is not a directory: mode=0%o", recdirname + , st.st_mode); + return; + } + v4recovery = opendir(recdirname); + if (!v4recovery) + return; + while ((entry = readdir(v4recovery))) { + int len; + + /* skip "." and ".." */ + if (entry->d_name[0] == '.') { + switch (entry->d_name[1]) { + case '\0': + continue; + case '.': + if (entry->d_name[2] == '\0') + continue; + } + } + len = snprintf(dirname, sizeof(dirname), "%s/%s", recdirname, + entry->d_name); + /* if there's a problem, then skip this entry */ + if (len < 0 || (size_t)len >= sizeof(dirname)) { + xlog(L_WARNING, "%s: unable to build filename for %s!", + __func__, entry->d_name); + continue; + } + len = rmdir(dirname); + if (len) + xlog(L_WARNING, "%s: unable to rmdir %s: %d", __func__, + dirname, len); + } + closedir(v4recovery); +} diff --git a/utils/nfsdcld/legacy.h b/utils/nfsdcld/legacy.h new file mode 100644 index 00000000..8988f6e8 --- /dev/null +++ b/utils/nfsdcld/legacy.h @@ -0,0 +1,24 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#ifndef _LEGACY_H_ +#define _LEGACY_H_ + +void legacy_load_clients_from_recdir(int *); +void legacy_clear_recdir(void); + +#endif /* _LEGACY_H_ */ diff --git a/utils/nfsdcld/nfsdcld.c b/utils/nfsdcld/nfsdcld.c new file mode 100644 index 00000000..2ad10019 --- /dev/null +++ b/utils/nfsdcld/nfsdcld.c @@ -0,0 +1,866 @@ +/* + * nfsdcld.c -- NFSv4 client name tracking daemon + * + * Copyright (C) 2011 Red Hat, Jeff Layton + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef HAVE_SYS_CAPABILITY_H +#include +#include +#endif + +#include "xlog.h" +#include "nfslib.h" +#include "cld.h" +#include "cld-internal.h" +#include "sqlite.h" +#include "../mount/version.h" +#include "conffile.h" +#include "legacy.h" + +#ifndef DEFAULT_PIPEFS_DIR +#define DEFAULT_PIPEFS_DIR NFS_STATEDIR "/rpc_pipefs" +#endif + +#define DEFAULT_CLD_PATH "/nfsd/cld" + +#ifndef CLD_DEFAULT_STORAGEDIR +#define CLD_DEFAULT_STORAGEDIR NFS_STATEDIR "/nfsdcld" +#endif + +#define NFSD_END_GRACE_FILE "/proc/fs/nfsd/v4_end_grace" + +/* private data structures */ + +/* global variables */ +static char pipefs_dir[PATH_MAX] = DEFAULT_PIPEFS_DIR; +static char pipepath[PATH_MAX]; +static int inotify_fd = -1; +static struct event pipedir_event; +static bool old_kernel = false; + +static struct option longopts[] = +{ + { "help", 0, NULL, 'h' }, + { "foreground", 0, NULL, 'F' }, + { "debug", 0, NULL, 'd' }, + { "pipefsdir", 1, NULL, 'p' }, + { "storagedir", 1, NULL, 's' }, + { NULL, 0, 0, 0 }, +}; + +/* forward declarations */ +static void cldcb(int UNUSED(fd), short which, void *data); + +static void +usage(char *progname) +{ + printf("%s [ -hFd ] [ -p pipefsdir ] [ -s storagedir ]\n", progname); +} + +static int +cld_set_caps(void) +{ + int ret = 0; +#ifdef HAVE_SYS_CAPABILITY_H + unsigned long i; + cap_t caps; + + if (getuid() != 0) { + xlog(L_ERROR, "Not running as root. Daemon won't be able to " + "open the pipe after dropping capabilities!"); + return -EINVAL; + } + + /* prune the bounding set to nothing */ + for (i = 0; prctl(PR_CAPBSET_READ, i, 0, 0, 0) >= 0 ; ++i) { + ret = prctl(PR_CAPBSET_DROP, i, 0, 0, 0); + if (ret) { + xlog(L_ERROR, "Unable to prune capability %lu from " + "bounding set: %m", i); + return -errno; + } + } + + /* get a blank capset */ + caps = cap_init(); + if (caps == NULL) { + xlog(L_ERROR, "Unable to get blank capability set: %m"); + return -errno; + } + + /* reset the process capabilities */ + if (cap_set_proc(caps) != 0) { + xlog(L_ERROR, "Unable to set process capabilities: %m"); + ret = -errno; + } + cap_free(caps); +#endif + return ret; +} + +#define INOTIFY_EVENT_MAX (sizeof(struct inotify_event) + NAME_MAX) + +static int +cld_pipe_open(struct cld_client *clnt) +{ + int fd; + + xlog(D_GENERAL, "%s: opening upcall pipe %s", __func__, pipepath); + fd = open(pipepath, O_RDWR, 0); + if (fd < 0) { + xlog(D_GENERAL, "%s: open of %s failed: %m", __func__, pipepath); + return -errno; + } + + if (event_initialized(&clnt->cl_event)) + event_del(&clnt->cl_event); + if (clnt->cl_fd >= 0) + close(clnt->cl_fd); + + clnt->cl_fd = fd; + event_set(&clnt->cl_event, clnt->cl_fd, EV_READ, cldcb, clnt); + /* event_add is done by the caller */ + return 0; +} + +static void +cld_inotify_cb(int UNUSED(fd), short which, void *data) +{ + int ret; + size_t elen; + ssize_t rret; + char evbuf[INOTIFY_EVENT_MAX]; + char *dirc = NULL, *pname; + struct inotify_event *event = (struct inotify_event *)evbuf; + struct cld_client *clnt = data; + + if (which != EV_READ) + return; + + xlog(D_GENERAL, "%s: called for EV_READ", __func__); + + dirc = strndup(pipepath, PATH_MAX); + if (!dirc) { + xlog(L_ERROR, "%s: unable to allocate memory", __func__); + goto out; + } + + rret = read(inotify_fd, evbuf, INOTIFY_EVENT_MAX); + if (rret < 0) { + xlog(L_ERROR, "%s: read from inotify fd failed: %m", __func__); + goto out; + } + + /* check to see if we have a filename in the evbuf */ + if (!event->len) { + xlog(D_GENERAL, "%s: no filename in inotify event", __func__); + goto out; + } + + pname = basename(dirc); + elen = strnlen(event->name, event->len); + + /* does the filename match our pipe? */ + if (strlen(pname) != elen || memcmp(pname, event->name, elen)) { + xlog(D_GENERAL, "%s: wrong filename (%s)", __func__, + event->name); + goto out; + } + + ret = cld_pipe_open(clnt); + switch (ret) { + case 0: + /* readd the event for the cl_event pipe */ + event_add(&clnt->cl_event, NULL); + break; + case -ENOENT: + /* pipe must have disappeared, wait for it to come back */ + goto out; + default: + /* anything else is fatal */ + xlog(L_FATAL, "%s: unable to open new pipe (%d). Aborting.", + __func__, ret); + exit(ret); + } + +out: + event_add(&pipedir_event, NULL); + free(dirc); +} + +static int +cld_inotify_setup(void) +{ + int ret; + char *dirc, *dname; + + dirc = strndup(pipepath, PATH_MAX); + if (!dirc) { + xlog_err("%s: unable to allocate memory", __func__); + ret = -ENOMEM; + goto out_free; + } + + dname = dirname(dirc); + + inotify_fd = inotify_init(); + if (inotify_fd < 0) { + xlog_err("%s: inotify_init failed: %m", __func__); + ret = -errno; + goto out_free; + } + + ret = inotify_add_watch(inotify_fd, dname, IN_CREATE); + if (ret < 0) { + xlog_err("%s: inotify_add_watch failed: %m", __func__); + ret = -errno; + goto out_err; + } + +out_free: + free(dirc); + return 0; +out_err: + close(inotify_fd); + goto out_free; +} + +/* + * Set an inotify watch on the directory that should contain the pipe, and then + * try to open it. If it fails with anything but -ENOENT, return the error + * immediately. + * + * If it succeeds, then set up the pipe event handler. At that point, set up + * the inotify event handler and go ahead and return success. + */ +static int +cld_pipe_init(struct cld_client *clnt) +{ + int ret; + + xlog(D_GENERAL, "%s: init pipe handlers", __func__); + + ret = cld_inotify_setup(); + if (ret != 0) + goto out; + + clnt->cl_fd = -1; + ret = cld_pipe_open(clnt); + switch (ret) { + case 0: + /* add the event and we're good to go */ + event_add(&clnt->cl_event, NULL); + break; + case -ENOENT: + /* ignore this error -- cld_inotify_cb will handle it */ + ret = 0; + break; + default: + /* anything else is fatal */ + close(inotify_fd); + goto out; + } + + /* set event for inotify read */ + event_set(&pipedir_event, inotify_fd, EV_READ, cld_inotify_cb, clnt); + event_add(&pipedir_event, NULL); +out: + return ret; +} + +/* + * Older kernels will not tell nfsdcld when a grace period has started. + * Therefore we have to peek at the /proc/fs/nfsd/v4_end_grace file to + * see if nfsd is in grace. We have to do this for create and remove + * upcalls to ensure that the correct table is being updated - otherwise + * we could lose client records when the grace period is lifted. + */ +static int +cld_check_grace_period(void) +{ + int fd, ret = 0; + char c; + + if (!old_kernel) + return 0; + if (recovery_epoch != 0) + return 0; + fd = open(NFSD_END_GRACE_FILE, O_RDONLY); + if (fd < 0) { + xlog(L_WARNING, "Unable to open %s: %m", + NFSD_END_GRACE_FILE); + return 1; + } + if (read(fd, &c, 1) < 0) { + xlog(L_WARNING, "Unable to read from %s: %m", + NFSD_END_GRACE_FILE); + return 1; + } + close(fd); + if (c == 'N') { + xlog(L_WARNING, "nfsd is in grace but didn't send a gracestart upcall, " + "please update the kernel"); + ret = sqlite_grace_start(); + } + return ret; +} + +#if UPCALL_VERSION >= 2 +static ssize_t cld_message_size(void *msg) +{ + struct cld_msg_hdr *hdr = (struct cld_msg_hdr *)msg; + + switch (hdr->cm_vers) { + case 1: + return sizeof(struct cld_msg); + case 2: + return sizeof(struct cld_msg_v2); + default: + xlog(L_FATAL, "%s invalid upcall version %d", __func__, + hdr->cm_vers); + exit(-EINVAL); + } +} +#else +static ssize_t cld_message_size(void *UNUSED(msg)) +{ + return sizeof(struct cld_msg); +} +#endif + +static void +cld_not_implemented(struct cld_client *clnt) +{ + int ret; + ssize_t bsize, wsize; +#if UPCALL_VERSION >= 2 + struct cld_msg_v2 *cmsg = &clnt->cl_u.cl_msg_v2; +#else + struct cld_msg *cmsg = &clnt->cl_u.cl_msg; +#endif + + xlog(D_GENERAL, "%s: downcalling with not implemented error", __func__); + + /* set up reply */ + cmsg->cm_status = -EOPNOTSUPP; + + bsize = cld_message_size(cmsg); + wsize = atomicio((void *)write, clnt->cl_fd, cmsg, bsize); + if (wsize != bsize) + xlog(L_ERROR, "%s: problem writing to cld pipe (%zd): %m", + __func__, wsize); + + /* reopen pipe, just to be sure */ + ret = cld_pipe_open(clnt); + if (ret) { + xlog(L_FATAL, "%s: unable to reopen pipe: %d", __func__, ret); + exit(ret); + } +} + +static void +cld_get_version(struct cld_client *clnt) +{ + int ret; + ssize_t bsize, wsize; +#if UPCALL_VERSION >= 2 + struct cld_msg_v2 *cmsg = &clnt->cl_u.cl_msg_v2; +#else + struct cld_msg *cmsg = &clnt->cl_u.cl_msg; +#endif + + xlog(D_GENERAL, "%s: version = %u.", __func__, UPCALL_VERSION); + + cmsg->cm_u.cm_version = UPCALL_VERSION; + cmsg->cm_status = 0; + + bsize = cld_message_size(cmsg); + xlog(D_GENERAL, "Doing downcall with status %d", cmsg->cm_status); + wsize = atomicio((void *)write, clnt->cl_fd, cmsg, bsize); + if (wsize != bsize) { + xlog(L_ERROR, "%s: problem writing to cld pipe (%zd): %m", + __func__, wsize); + ret = cld_pipe_open(clnt); + if (ret) { + xlog(L_FATAL, "%s: unable to reopen pipe: %d", + __func__, ret); + exit(ret); + } + } +} + +static void +cld_create(struct cld_client *clnt) +{ + int ret; + ssize_t bsize, wsize; +#if UPCALL_VERSION >= 2 + struct cld_msg_v2 *cmsg = &clnt->cl_u.cl_msg_v2; +#else + struct cld_msg *cmsg = &clnt->cl_u.cl_msg; +#endif + + ret = cld_check_grace_period(); + if (ret) + goto reply; + + xlog(D_GENERAL, "%s: create client record.", __func__); + +#if UPCALL_VERSION >= 2 + if (cmsg->cm_vers >= 2) + ret = sqlite_insert_client_and_princhash( + cmsg->cm_u.cm_clntinfo.cc_name.cn_id, + cmsg->cm_u.cm_clntinfo.cc_name.cn_len, + cmsg->cm_u.cm_clntinfo.cc_princhash.cp_data, + cmsg->cm_u.cm_clntinfo.cc_princhash.cp_len); + else + ret = sqlite_insert_client(cmsg->cm_u.cm_name.cn_id, + cmsg->cm_u.cm_name.cn_len); +#else + ret = sqlite_insert_client(cmsg->cm_u.cm_name.cn_id, + cmsg->cm_u.cm_name.cn_len); +#endif + +reply: + cmsg->cm_status = ret ? -EREMOTEIO : ret; + + bsize = cld_message_size(cmsg); + xlog(D_GENERAL, "Doing downcall with status %d", cmsg->cm_status); + wsize = atomicio((void *)write, clnt->cl_fd, cmsg, bsize); + if (wsize != bsize) { + xlog(L_ERROR, "%s: problem writing to cld pipe (%zd): %m", + __func__, wsize); + ret = cld_pipe_open(clnt); + if (ret) { + xlog(L_FATAL, "%s: unable to reopen pipe: %d", + __func__, ret); + exit(ret); + } + } +} + +static void +cld_remove(struct cld_client *clnt) +{ + int ret; + ssize_t bsize, wsize; +#if UPCALL_VERSION >= 2 + struct cld_msg_v2 *cmsg = &clnt->cl_u.cl_msg_v2; +#else + struct cld_msg *cmsg = &clnt->cl_u.cl_msg; +#endif + + ret = cld_check_grace_period(); + if (ret) + goto reply; + + xlog(D_GENERAL, "%s: remove client record.", __func__); + + ret = sqlite_remove_client(cmsg->cm_u.cm_name.cn_id, + cmsg->cm_u.cm_name.cn_len); + +reply: + cmsg->cm_status = ret ? -EREMOTEIO : ret; + + bsize = cld_message_size(cmsg); + xlog(D_GENERAL, "%s: downcall with status %d", __func__, + cmsg->cm_status); + wsize = atomicio((void *)write, clnt->cl_fd, cmsg, bsize); + if (wsize != bsize) { + xlog(L_ERROR, "%s: problem writing to cld pipe (%zd): %m", + __func__, wsize); + ret = cld_pipe_open(clnt); + if (ret) { + xlog(L_FATAL, "%s: unable to reopen pipe: %d", + __func__, ret); + exit(ret); + } + } +} + +static void +cld_check(struct cld_client *clnt) +{ + int ret; + ssize_t bsize, wsize; +#if UPCALL_VERSION >= 2 + struct cld_msg_v2 *cmsg = &clnt->cl_u.cl_msg_v2; +#else + struct cld_msg *cmsg = &clnt->cl_u.cl_msg; +#endif + + /* + * If we get a check upcall at all, it means we're talking to an old + * kernel. Furthermore, if we're not in grace it means this is the + * first client to do a reclaim. Log a message and use + * sqlite_grace_start() to advance the epoch numbers. + */ + if (recovery_epoch == 0) { + xlog(D_GENERAL, "%s: received a check upcall, please update the kernel", + __func__); + ret = sqlite_grace_start(); + if (ret) + goto reply; + } + + xlog(D_GENERAL, "%s: check client record", __func__); + + ret = sqlite_check_client(cmsg->cm_u.cm_name.cn_id, + cmsg->cm_u.cm_name.cn_len); + +reply: + /* set up reply */ + cmsg->cm_status = ret ? -EACCES : ret; + + bsize = cld_message_size(cmsg); + xlog(D_GENERAL, "%s: downcall with status %d", __func__, + cmsg->cm_status); + wsize = atomicio((void *)write, clnt->cl_fd, cmsg, bsize); + if (wsize != bsize) { + xlog(L_ERROR, "%s: problem writing to cld pipe (%zd): %m", + __func__, wsize); + ret = cld_pipe_open(clnt); + if (ret) { + xlog(L_FATAL, "%s: unable to reopen pipe: %d", + __func__, ret); + exit(ret); + } + } +} + +static void +cld_gracedone(struct cld_client *clnt) +{ + int ret; + ssize_t bsize, wsize; +#if UPCALL_VERSION >= 2 + struct cld_msg_v2 *cmsg = &clnt->cl_u.cl_msg_v2; +#else + struct cld_msg *cmsg = &clnt->cl_u.cl_msg; +#endif + + /* + * If we got a "gracedone" upcall while we're not in grace, then + * 1) we must be talking to an old kernel + * 2) no clients attempted to reclaim + * In that case, log a message and use sqlite_grace_start() to + * advance the epoch numbers, and then proceed as normal. + */ + if (recovery_epoch == 0) { + xlog(D_GENERAL, "%s: received gracedone upcall " + "while not in grace, please update the kernel", + __func__); + ret = sqlite_grace_start(); + if (ret) + goto reply; + } + + xlog(D_GENERAL, "%s: grace done.", __func__); + + ret = sqlite_grace_done(); + + if (first_time) { + if (num_cltrack_records > 0) + sqlite_delete_cltrack_records(); + if (num_legacy_records > 0) + legacy_clear_recdir(); + sqlite_first_time_done(); + first_time = 0; + } + +reply: + /* set up reply: downcall with 0 status */ + cmsg->cm_status = ret ? -EREMOTEIO : ret; + + bsize = cld_message_size(cmsg); + xlog(D_GENERAL, "Doing downcall with status %d", cmsg->cm_status); + wsize = atomicio((void *)write, clnt->cl_fd, cmsg, bsize); + if (wsize != bsize) { + xlog(L_ERROR, "%s: problem writing to cld pipe (%zd): %m", + __func__, wsize); + ret = cld_pipe_open(clnt); + if (ret) { + xlog(L_FATAL, "%s: unable to reopen pipe: %d", + __func__, ret); + exit(ret); + } + } +} + +static int +gracestart_callback(struct cld_client *clnt) { + ssize_t bsize, wsize; +#if UPCALL_VERSION >= 2 + struct cld_msg_v2 *cmsg = &clnt->cl_u.cl_msg_v2; +#else + struct cld_msg *cmsg = &clnt->cl_u.cl_msg; +#endif + + cmsg->cm_status = -EINPROGRESS; + + bsize = cld_message_size(cmsg); + xlog(D_GENERAL, "Sending client %.*s", + cmsg->cm_u.cm_name.cn_len, cmsg->cm_u.cm_name.cn_id); + wsize = atomicio((void *)write, clnt->cl_fd, cmsg, bsize); + if (wsize != bsize) + return -EIO; + return 0; +} + +static void +cld_gracestart(struct cld_client *clnt) +{ + int ret; + ssize_t bsize, wsize; +#if UPCALL_VERSION >= 2 + struct cld_msg_v2 *cmsg = &clnt->cl_u.cl_msg_v2; +#else + struct cld_msg *cmsg = &clnt->cl_u.cl_msg; +#endif + + xlog(D_GENERAL, "%s: updating grace epochs", __func__); + + ret = sqlite_grace_start(); + if (ret) + goto reply; + + xlog(D_GENERAL, "%s: sending client records to the kernel", __func__); + + ret = sqlite_iterate_recovery(&gracestart_callback, clnt); + +reply: + /* set up reply: downcall with 0 status */ + cmsg->cm_status = ret ? -EREMOTEIO : ret; + + bsize = cld_message_size(cmsg); + xlog(D_GENERAL, "Doing downcall with status %d", cmsg->cm_status); + wsize = atomicio((void *)write, clnt->cl_fd, cmsg, bsize); + if (wsize != bsize) { + xlog(L_ERROR, "%s: problem writing to cld pipe (%zd): %m", + __func__, wsize); + ret = cld_pipe_open(clnt); + if (ret) { + xlog(L_FATAL, "%s: unable to reopen pipe: %d", + __func__, ret); + exit(ret); + } + } +} + +static void +cldcb(int UNUSED(fd), short which, void *data) +{ + ssize_t len; + struct cld_client *clnt = data; +#if UPCALL_VERSION >= 2 + struct cld_msg_v2 *cmsg = &clnt->cl_u.cl_msg_v2; +#else + struct cld_msg *cmsg = &clnt->cl_u.cl_msg; +#endif + + if (which != EV_READ) + goto out; + + len = atomicio(read, clnt->cl_fd, cmsg, sizeof(*cmsg)); + if (len <= 0) { + xlog(L_ERROR, "%s: pipe read failed: %m", __func__); + cld_pipe_open(clnt); + goto out; + } + + if (cmsg->cm_vers > UPCALL_VERSION) { + xlog(L_ERROR, "%s: unsupported upcall version: %hu", + __func__, cmsg->cm_vers); + cld_pipe_open(clnt); + goto out; + } + + switch(cmsg->cm_cmd) { + case Cld_Create: + cld_create(clnt); + break; + case Cld_Remove: + cld_remove(clnt); + break; + case Cld_Check: + cld_check(clnt); + break; + case Cld_GraceDone: + cld_gracedone(clnt); + break; + case Cld_GraceStart: + cld_gracestart(clnt); + break; + case Cld_GetVersion: + cld_get_version(clnt); + break; + default: + xlog(L_WARNING, "%s: command %u is not yet implemented", + __func__, cmsg->cm_cmd); + cld_not_implemented(clnt); + } +out: + event_add(&clnt->cl_event, NULL); +} + +int +main(int argc, char **argv) +{ + int arg; + int rc = 0; + bool foreground = false; + char *progname; + char *storagedir = CLD_DEFAULT_STORAGEDIR; + struct cld_client clnt; + char *s; + first_time = 0; + num_cltrack_records = 0; + num_legacy_records = 0; + + memset(&clnt, 0, sizeof(clnt)); + + progname = strdup(basename(argv[0])); + if (!progname) { + fprintf(stderr, "%s: unable to allocate memory.\n", argv[0]); + return 1; + } + + event_init(); + xlog_syslog(0); + xlog_stderr(1); + + conf_init_file(NFS_CONFFILE); + s = conf_get_str("general", "pipefs-directory"); + if (s) + strlcpy(pipefs_dir, s, sizeof(pipefs_dir)); + s = conf_get_str("nfsdcld", "storagedir"); + if (s) + storagedir = s; + rc = conf_get_num("nfsdcld", "debug", 0); + if (rc > 0) + xlog_config(D_ALL, 1); + + /* process command-line options */ + while ((arg = getopt_long(argc, argv, "hdFp:s:", longopts, + NULL)) != EOF) { + switch (arg) { + case 'd': + xlog_config(D_ALL, 1); + break; + case 'F': + foreground = true; + break; + case 'p': + strlcpy(pipefs_dir, optarg, sizeof(pipefs_dir)); + break; + case 's': + storagedir = optarg; + break; + default: + usage(progname); + return 0; + } + } + + strlcpy(pipepath, pipefs_dir, sizeof(pipepath)); + strlcat(pipepath, DEFAULT_CLD_PATH, sizeof(pipepath)); + + xlog_open(progname); + if (!foreground) { + xlog_syslog(1); + xlog_stderr(0); + rc = daemon(0, 0); + if (rc) { + xlog(L_ERROR, "Unable to daemonize: %m"); + goto out; + } + } + + /* drop all capabilities */ + rc = cld_set_caps(); + if (rc) + goto out; + + /* + * now see if the storagedir is writable by root w/o CAP_DAC_OVERRIDE. + * If it isn't then give the user a warning but proceed as if + * everything is OK. If the DB has already been created, then + * everything might still work. If it doesn't exist at all, then + * assume that the maindb init will be able to create it. Fail on + * anything else. + */ + if (access(storagedir, W_OK) == -1) { + switch (errno) { + case EACCES: + xlog(L_WARNING, "Storage directory %s is not writable. " + "Should be owned by root and writable " + "by owner!", storagedir); + break; + case ENOENT: + /* ignore and assume that we can create dir as root */ + break; + default: + xlog(L_ERROR, "Unexpected error when checking access " + "on %s: %m", storagedir); + rc = -errno; + goto out; + } + } + + if (linux_version_code() < MAKE_VERSION(4, 20, 0)) + old_kernel = true; + + /* set up storage db */ + rc = sqlite_prepare_dbh(storagedir); + if (rc) { + xlog(L_ERROR, "Failed to open main database: %d", rc); + goto out; + } + + /* set up event handler */ + rc = cld_pipe_init(&clnt); + if (rc) + goto out; + + xlog(D_GENERAL, "%s: Starting event dispatch handler.", __func__); + rc = event_dispatch(); + if (rc < 0) + xlog(L_ERROR, "%s: event_dispatch failed: %m", __func__); + + close(clnt.cl_fd); + close(inotify_fd); +out: + free(progname); + return rc; +} diff --git a/utils/nfsdcld/nfsdcld.man b/utils/nfsdcld/nfsdcld.man new file mode 100644 index 00000000..4c2b1e80 --- /dev/null +++ b/utils/nfsdcld/nfsdcld.man @@ -0,0 +1,221 @@ +.\" Automatically generated by Pod::Man 2.22 (Pod::Simple 3.13) +.\" +.\" Standard preamble: +.\" ======================================================================== +.de Sp \" Vertical space (when we can't use .PP) +.if t .sp .5v +.if n .sp +.. +.de Vb \" Begin verbatim text +.ft CW +.nf +.ne \\$1 +.. +.de Ve \" End verbatim text +.ft R +.fi +.. +.\" Set up some character translations and predefined strings. \*(-- will +.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left +.\" double quote, and \*(R" will give a right double quote. \*(C+ will +.\" give a nicer C++. Capital omega is used to do unbreakable dashes and +.\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff, +.\" nothing in troff, for use with C<>. +.tr \(*W- +.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' +.ie n \{\ +. ds -- \(*W- +. ds PI pi +. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch +. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch +. ds L" "" +. ds R" "" +. ds C` "" +. ds C' "" +'br\} +.el\{\ +. ds -- \|\(em\| +. ds PI \(*p +. ds L" `` +. ds R" '' +'br\} +.\" +.\" Escape single quotes in literal strings from groff's Unicode transform. +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.\" +.\" If the F register is turned on, we'll generate index entries on stderr for +.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index +.\" entries marked with X<> in POD. Of course, you'll have to process the +.\" output yourself in some meaningful fashion. +.ie \nF \{\ +. de IX +. tm Index:\\$1\t\\n%\t"\\$2" +.. +. nr % 0 +. rr F +.\} +.el \{\ +. de IX +.. +.\} +.\" +.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). +.\" Fear. Run. Save yourself. No user-serviceable parts. +. \" fudge factors for nroff and troff +.if n \{\ +. ds #H 0 +. ds #V .8m +. ds #F .3m +. ds #[ \f1 +. ds #] \fP +.\} +.if t \{\ +. ds #H ((1u-(\\\\n(.fu%2u))*.13m) +. ds #V .6m +. ds #F 0 +. ds #[ \& +. ds #] \& +.\} +. \" simple accents for nroff and troff +.if n \{\ +. ds ' \& +. ds ` \& +. ds ^ \& +. ds , \& +. ds ~ ~ +. ds / +.\} +.if t \{\ +. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" +. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' +. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' +. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' +. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' +. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' +.\} +. \" troff and (daisy-wheel) nroff accents +.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' +.ds 8 \h'\*(#H'\(*b\h'-\*(#H' +.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] +.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' +.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' +.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] +.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] +.ds ae a\h'-(\w'a'u*4/10)'e +.ds Ae A\h'-(\w'A'u*4/10)'E +. \" corrections for vroff +.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' +.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' +. \" for low resolution devices (crt and lpr) +.if \n(.H>23 .if \n(.V>19 \ +\{\ +. ds : e +. ds 8 ss +. ds o a +. ds d- d\h'-1'\(ga +. ds D- D\h'-1'\(hy +. ds th \o'bp' +. ds Th \o'LP' +. ds ae ae +. ds Ae AE +.\} +.rm #[ #] #H #V #F C +.\" ======================================================================== +.\" +.IX Title "NFSDCLD 8" +.TH NFSDCLD 8 "2011-12-21" "" "" +.\" For nroff, turn off justification. Always turn off hyphenation; it makes +.\" way too many mistakes in technical documents. +.if n .ad l +.nh +.SH "NAME" +nfsdcld \- NFSv4 Client Tracking Daemon +.SH "SYNOPSIS" +.IX Header "SYNOPSIS" +nfsdcld [\-d] [\-F] [\-p path] [\-s stable storage dir] +.SH "DESCRIPTION" +.IX Header "DESCRIPTION" +nfsdcld is the NFSv4 client tracking daemon. It is not necessary to run +this daemon on machines that are not acting as NFSv4 servers. +.PP +When a network partition is combined with a server reboot, there are +edge conditions that can cause the server to grant lock reclaims when +other clients have taken conflicting locks in the interim. A more detailed +explanation of this issue is described in \s-1RFC\s0 3530, section 8.6.3. +.PP +In order to prevent these problems, the server must track a small amount +of per-client information on stable storage. This daemon provides the +userspace piece of that functionality. +.SH "OPTIONS" +.IX Header "OPTIONS" +.IP "\fB\-d\fR, \fB\-\-debug\fR" 4 +.IX Item "-d, --debug" +Enable debug level logging. +.IP "\fB\-F\fR, \fB\-\-foreground\fR" 4 +.IX Item "-F, --foreground" +Runs the daemon in the foreground and prints all output to stderr +.IP "\fB\-p\fR \fIpath\fR, \fB\-\-pipefsdir\fR=\fIpath\fR" 4 +.IX Item "-p path, --pipefsdir=path" +Location of the rpc_pipefs filesystem. The default value is +\&\fI/var/lib/nfs/rpc_pipefs\fR. +.IP "\fB\-s\fR \fIstorage_dir\fR, \fB\-\-storagedir\fR=\fIstorage_dir\fR" 4 +.IX Item "-s storagedir, --storagedir=storage_dir" +Directory where stable storage information should be kept. The default +value is \fI/var/lib/nfs/nfsdcld\fR. +.SH "CONFIGURATION FILE" +.IX Header "CONFIGURATION FILE" +The following values are recognized in the \fB[nfsdcld]\fR section +of the \fI/etc/nfs.conf\fR configuration file: +.IP "\fBstoragedir\fR" 4 +.IX Item "storagedir" +Equivalent to \fB\-s\fR/\fB\-\-storagedir\fR. +.IP "\fBdebug\fR" 4 +.IX Item "debug" +Setting "debug = 1" is equivalent to \fB\-d\fR/\fB\-\-debug\fR. +.LP +In addition, the following value is recognized from the \fB[general]\fR section: +.IP "\fBpipefs\-directory\fR" 4 +.IX Item "pipefs-directory" +Equivalent to \fB\-p\fR/\fB\-\-pipefsdir\fR. +.SH "NOTES" +.IX Header "NOTES" +The Linux kernel NFSv4 server has historically tracked this information +on stable storage by manipulating information on the filesystem +directly, in the directory to which \fI/proc/fs/nfsd/nfsv4recoverydir\fR +points. +.PP +This changed with the original introduction of \fBnfsdcld\fR upcall in kernel version 3.4, +which was later deprecated in favor of the \fBnfsdcltrack\fR(8) usermodehelper +program, support for which was added in kernel version 3.8. However, since the +usermodehelper upcall does not work in containers, support for a new version of +the \fBnfsdcld\fR upcall was added in kernel version 5.2. +.PP +This daemon requires a kernel that supports the \fBnfsdcld\fR upcall. On older kernels, if +the legacy client name tracking code was in use, then the kernel would not create the +pipe that \fBnfsdcld\fR uses to talk to the kernel. On newer kernels, nfsd attempts to +initialize client tracking in the following order: First, the \fBnfsdcld\fR upcall. Second, +the \fBnfsdcltrack\fR usermodehelper upcall. Finally, the legacy client tracking. +.PP +This daemon should be run as root, as the pipe that it uses to communicate +with the kernel is only accessable by root. The daemon however does drop all +superuser capabilities after starting. Because of this, the \fIstoragedir\fR +should be owned by root, and be readable and writable by owner. +.PP +The daemon now supports different upcall versions to allow the kernel to pass additional +data to be stored in the on-disk database. The kernel will query the supported upcall +version from \fBnfsdcld\fR during client tracking initialization. A restart of \fBnfsd\fR is +not necessary after upgrading \fBnfsdcld\fR, however \fBnfsd\fR will not use a later upcall +version until restart. A restart of \fBnfsd is necessary\fR after downgrading \fBnfsdcld\fR, +to ensure that \fBnfsd\fR does not use an upcall version that \fBnfsdcld\fR does not support. +Additionally, a downgrade of \fBnfsdcld\fR requires the schema of the on-disk database to +be downgraded as well. That can be accomplished using the \fBclddb-tool\fR(8) utility. +.SH FILES +.TP +.B /var/lib/nfs/nfsdcld/main.sqlite +.SH SEE ALSO +.BR nfsdcltrack "(8), " clddb-tool (8) +.SH "AUTHORS" +.IX Header "AUTHORS" +The nfsdcld daemon was developed by Jeff Layton +with modifications from Scott Mayhew . diff --git a/utils/nfsdcld/sqlite.c b/utils/nfsdcld/sqlite.c new file mode 100644 index 00000000..6666c867 --- /dev/null +++ b/utils/nfsdcld/sqlite.c @@ -0,0 +1,1406 @@ +/* + * Copyright (C) 2011 Red Hat, Jeff Layton + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +/* + * Explanation: + * + * This file contains the code to manage the sqlite backend database for the + * nfsdcld client tracking daemon. + * + * The main database is called main.sqlite and contains the following tables: + * + * parameters: simple key/value pairs for storing database info + * + * grace: a "current" column containing an INTEGER representing the current + * epoch (where should new values be stored) and a "recovery" column + * containing an INTEGER representing the recovery epoch (from what + * epoch are we allowed to recover). A recovery epoch of 0 means + * normal operation (grace period not in force). Note: sqlite stores + * integers as signed values, so these must be cast to a uint64_t when + * retrieving them from the database and back to an int64_t when storing + * them in the database. + * + * rec-CCCCCCCCCCCCCCCC (where C is the hex representation of the epoch value): + * an "id" column containing a BLOB with the long-form clientid + * as sent by the client, and a "princhash" column containing a BLOB + * with the sha256 hash of the kerberos principal (if available). + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "xlog.h" +#include "sqlite.h" +#include "cld.h" +#include "cld-internal.h" +#include "conffile.h" +#include "legacy.h" +#include "nfslib.h" + +#define CLD_SQLITE_LATEST_SCHEMA_VERSION 4 +#define CLTRACK_DEFAULT_STORAGEDIR NFS_STATEDIR "/nfsdcltrack" + +/* in milliseconds */ +#define CLD_SQLITE_BUSY_TIMEOUT 10000 + +/* private data structures */ + +/* global variables */ +static char *cltrack_storagedir = CLTRACK_DEFAULT_STORAGEDIR; + +/* reusable pathname and sql command buffer */ +static char buf[PATH_MAX]; + +/* global database handle */ +static sqlite3 *dbh; + +/* forward declarations */ + +/* make a directory, ignoring EEXIST errors unless it's not a directory */ +static int +mkdir_if_not_exist(const char *dirname) +{ + int ret; + struct stat statbuf; + + ret = mkdir(dirname, S_IRWXU); + if (ret && errno != EEXIST) + return -errno; + + ret = stat(dirname, &statbuf); + if (ret) + return -errno; + + if (!S_ISDIR(statbuf.st_mode)) + ret = -ENOTDIR; + + return ret; +} + +static int +sqlite_query_schema_version(void) +{ + int ret; + sqlite3_stmt *stmt = NULL; + + /* prepare select query */ + ret = sqlite3_prepare_v2(dbh, + "SELECT value FROM parameters WHERE key == \"version\";", + -1, &stmt, NULL); + if (ret != SQLITE_OK) { + xlog(D_GENERAL, "Unable to prepare select statement: %s", + sqlite3_errmsg(dbh)); + ret = 0; + goto out; + } + + /* query schema version */ + ret = sqlite3_step(stmt); + if (ret != SQLITE_ROW) { + xlog(D_GENERAL, "Select statement execution failed: %s", + sqlite3_errmsg(dbh)); + ret = 0; + goto out; + } + + ret = sqlite3_column_int(stmt, 0); +out: + sqlite3_finalize(stmt); + return ret; +} + +static int +sqlite_query_first_time(int *first_time) +{ + int ret; + sqlite3_stmt *stmt = NULL; + + /* prepare select query */ + ret = sqlite3_prepare_v2(dbh, + "SELECT value FROM parameters WHERE key == \"first_time\";", + -1, &stmt, NULL); + if (ret != SQLITE_OK) { + xlog(D_GENERAL, "Unable to prepare select statement: %s", + sqlite3_errmsg(dbh)); + goto out; + } + + /* query first_time */ + ret = sqlite3_step(stmt); + if (ret != SQLITE_ROW) { + xlog(D_GENERAL, "Select statement execution failed: %s", + sqlite3_errmsg(dbh)); + goto out; + } + + *first_time = sqlite3_column_int(stmt, 0); + ret = 0; +out: + sqlite3_finalize(stmt); + return ret; +} + +static int +sqlite_add_princ_col_cb(void *UNUSED(arg), int ncols, char **cols, + char **UNUSED(colnames)) +{ + int ret; + char *err; + + if (ncols > 1) + return -EINVAL; + ret = snprintf(buf, sizeof(buf), "ALTER TABLE \"%s\" " + "ADD COLUMN princhash BLOB;", cols[0]); + if (ret < 0) { + xlog(L_ERROR, "sprintf failed!"); + return -EINVAL; + } else if ((size_t)ret >= sizeof(buf)) { + xlog(L_ERROR, "sprintf output too long! (%d chars)", ret); + return -EINVAL; + } + ret = sqlite3_exec(dbh, (const char *)buf, NULL, NULL, &err); + if (ret != SQLITE_OK) { + xlog(L_ERROR, "Unable to add princhash column to table %s: %s", + cols[0], err); + goto out; + } + xlog(D_GENERAL, "Added princhash column to table %s", cols[0]); +out: + sqlite3_free(err); + return ret; +} + +static int +sqlite_maindb_update_v3_to_v4(void) +{ + int ret; + char *err; + + ret = sqlite3_exec(dbh, "SELECT name FROM sqlite_master " + "WHERE type=\"table\" AND name LIKE \"%rec-%\";", + sqlite_add_princ_col_cb, NULL, &err); + if (ret != SQLITE_OK) { + xlog(L_ERROR, "%s: Failed to update tables!: %s", __func__, err); + } + sqlite3_free(err); + return ret; +} + +static int +sqlite_maindb_update_v1v2_to_v4(void) +{ + int ret; + char *err; + + /* create grace table */ + ret = sqlite3_exec(dbh, "CREATE TABLE grace " + "(current INTEGER , recovery INTEGER);", + NULL, NULL, &err); + if (ret != SQLITE_OK) { + xlog(L_ERROR, "Unable to create grace table: %s", err); + goto out; + } + + /* insert initial epochs into grace table */ + ret = sqlite3_exec(dbh, "INSERT OR FAIL INTO grace " + "values (1, 0);", + NULL, NULL, &err); + if (ret != SQLITE_OK) { + xlog(L_ERROR, "Unable to set initial epochs: %s", err); + goto out; + } + + /* create recovery table for current epoch */ + ret = sqlite3_exec(dbh, "CREATE TABLE \"rec-0000000000000001\" " + "(id BLOB PRIMARY KEY, princhash BLOB);", + NULL, NULL, &err); + if (ret != SQLITE_OK) { + xlog(L_ERROR, "Unable to create recovery table " + "for current epoch: %s", err); + goto out; + } + + /* copy records from old clients table */ + ret = sqlite3_exec(dbh, "INSERT INTO \"rec-0000000000000001\" (id) " + "SELECT id FROM clients;", + NULL, NULL, &err); + if (ret != SQLITE_OK) { + xlog(L_ERROR, "Unable to copy client records: %s", err); + goto out; + } + + /* drop the old clients table */ + ret = sqlite3_exec(dbh, "DROP TABLE clients;", + NULL, NULL, &err); + if (ret != SQLITE_OK) { + xlog(L_ERROR, "Unable to drop old clients table: %s", err); + } +out: + sqlite3_free(err); + return ret; +} + +static int +sqlite_maindb_update_schema(int oldversion) +{ + int ret, ret2; + char *err; + + /* begin transaction */ + ret = sqlite3_exec(dbh, "BEGIN EXCLUSIVE TRANSACTION;", NULL, NULL, + &err); + if (ret != SQLITE_OK) { + xlog(L_ERROR, "Unable to begin transaction: %s", err); + goto rollback; + } + + /* + * Check schema version again. This time, under an exclusive + * transaction to guard against racing DB setup attempts + */ + ret = sqlite_query_schema_version(); + if (ret != oldversion) { + if (ret == CLD_SQLITE_LATEST_SCHEMA_VERSION) + /* Someone else raced in and set it up */ + ret = 0; + else + /* Something went wrong -- fail! */ + ret = -EINVAL; + goto rollback; + } + + /* Still at old version -- do conversion */ + + switch (oldversion) { + case 3: + case 2: + ret = sqlite_maindb_update_v3_to_v4(); + break; + case 1: + ret = sqlite_maindb_update_v1v2_to_v4(); + break; + default: + ret = -EINVAL; + } + if (ret != SQLITE_OK) + goto rollback; + + ret = snprintf(buf, sizeof(buf), "UPDATE parameters SET value = %d " + "WHERE key = \"version\";", + CLD_SQLITE_LATEST_SCHEMA_VERSION); + if (ret < 0) { + xlog(L_ERROR, "sprintf failed!"); + goto rollback; + } else if ((size_t)ret >= sizeof(buf)) { + xlog(L_ERROR, "sprintf output too long! (%d chars)", ret); + ret = -EINVAL; + goto rollback; + } + + ret = sqlite3_exec(dbh, (const char *)buf, NULL, NULL, &err); + if (ret != SQLITE_OK) { + xlog(L_ERROR, "Unable to update schema version: %s", err); + goto rollback; + } + + ret = sqlite_query_first_time(&first_time); + if (ret != SQLITE_OK) { + /* insert first_time into parameters table */ + ret = sqlite3_exec(dbh, "INSERT OR FAIL INTO parameters " + "values (\"first_time\", \"1\");", + NULL, NULL, &err); + if (ret != SQLITE_OK) { + xlog(L_ERROR, "Unable to insert into parameter table: %s", err); + goto rollback; + } + } + + ret = sqlite3_exec(dbh, "COMMIT TRANSACTION;", NULL, NULL, &err); + if (ret != SQLITE_OK) { + xlog(L_ERROR, "Unable to commit transaction: %s", err); + goto rollback; + } +out: + sqlite3_free(err); + return ret; +rollback: + ret2 = sqlite3_exec(dbh, "ROLLBACK TRANSACTION;", NULL, NULL, &err); + if (ret2 != SQLITE_OK) + xlog(L_ERROR, "Unable to rollback transaction: %s", err); + goto out; +} + +/* + * Start an exclusive transaction and recheck the DB schema version. If it's + * still zero (indicating a new database) then set it up. If that all works, + * then insert schema version into the parameters table and commit the + * transaction. On any error, rollback the transaction. + */ +static int +sqlite_maindb_init_v4(void) +{ + int ret, ret2; + char *err = NULL; + + /* Start a transaction */ + ret = sqlite3_exec(dbh, "BEGIN EXCLUSIVE TRANSACTION;", NULL, NULL, + &err); + if (ret != SQLITE_OK) { + xlog(L_ERROR, "Unable to begin transaction: %s", err); + return ret; + } + + /* + * Check schema version again. This time, under an exclusive + * transaction to guard against racing DB setup attempts + */ + ret = sqlite_query_schema_version(); + switch (ret) { + case 0: + /* Query failed again -- set up DB */ + break; + case CLD_SQLITE_LATEST_SCHEMA_VERSION: + /* Someone else raced in and set it up */ + ret = 0; + goto rollback; + default: + /* Something went wrong -- fail! */ + ret = -EINVAL; + goto rollback; + } + + ret = sqlite3_exec(dbh, "CREATE TABLE parameters " + "(key TEXT PRIMARY KEY, value TEXT);", + NULL, NULL, &err); + if (ret != SQLITE_OK) { + xlog(L_ERROR, "Unable to create parameter table: %s", err); + goto rollback; + } + + /* create grace table */ + ret = sqlite3_exec(dbh, "CREATE TABLE grace " + "(current INTEGER , recovery INTEGER);", + NULL, NULL, &err); + if (ret != SQLITE_OK) { + xlog(L_ERROR, "Unable to create grace table: %s", err); + goto rollback; + } + + /* insert initial epochs into grace table */ + ret = sqlite3_exec(dbh, "INSERT OR FAIL INTO grace " + "values (1, 0);", + NULL, NULL, &err); + if (ret != SQLITE_OK) { + xlog(L_ERROR, "Unable to set initial epochs: %s", err); + goto rollback; + } + + /* create recovery table for current epoch */ + ret = sqlite3_exec(dbh, "CREATE TABLE \"rec-0000000000000001\" " + "(id BLOB PRIMARY KEY, princhash BLOB);", + NULL, NULL, &err); + if (ret != SQLITE_OK) { + xlog(L_ERROR, "Unable to create recovery table " + "for current epoch: %s", err); + goto rollback; + } + + /* insert version into parameters table */ + ret = snprintf(buf, sizeof(buf), "INSERT OR FAIL INTO parameters " + "values (\"version\", \"%d\");", + CLD_SQLITE_LATEST_SCHEMA_VERSION); + if (ret < 0) { + xlog(L_ERROR, "sprintf failed!"); + goto rollback; + } else if ((size_t)ret >= sizeof(buf)) { + xlog(L_ERROR, "sprintf output too long! (%d chars)", ret); + ret = -EINVAL; + goto rollback; + } + + ret = sqlite3_exec(dbh, (const char *)buf, NULL, NULL, &err); + if (ret != SQLITE_OK) { + xlog(L_ERROR, "Unable to insert into parameter table: %s", err); + goto rollback; + } + + /* insert first_time into parameters table */ + ret = sqlite3_exec(dbh, "INSERT OR FAIL INTO parameters " + "values (\"first_time\", \"1\");", + NULL, NULL, &err); + if (ret != SQLITE_OK) { + xlog(L_ERROR, "Unable to insert into parameter table: %s", err); + goto rollback; + } + + ret = sqlite3_exec(dbh, "COMMIT TRANSACTION;", NULL, NULL, &err); + if (ret != SQLITE_OK) { + xlog(L_ERROR, "Unable to commit transaction: %s", err); + goto rollback; + } +out: + sqlite3_free(err); + return ret; + +rollback: + /* Attempt to rollback the transaction */ + ret2 = sqlite3_exec(dbh, "ROLLBACK TRANSACTION;", NULL, NULL, &err); + if (ret2 != SQLITE_OK) + xlog(L_ERROR, "Unable to rollback transaction: %s", err); + goto out; +} + +static int +sqlite_startup_query_grace(void) +{ + int ret; + uint64_t tcur; + uint64_t trec; + sqlite3_stmt *stmt = NULL; + + /* prepare select query */ + ret = sqlite3_prepare_v2(dbh, "SELECT * FROM grace;", -1, &stmt, NULL); + if (ret != SQLITE_OK) { + xlog(D_GENERAL, "Unable to prepare select statement: %s", + sqlite3_errmsg(dbh)); + goto out; + } + + ret = sqlite3_step(stmt); + if (ret != SQLITE_ROW) { + xlog(D_GENERAL, "Select statement execution failed: %s", + sqlite3_errmsg(dbh)); + goto out; + } + + tcur = (uint64_t)sqlite3_column_int64(stmt, 0); + trec = (uint64_t)sqlite3_column_int64(stmt, 1); + + current_epoch = tcur; + recovery_epoch = trec; + ret = 0; + xlog(D_GENERAL, "%s: current_epoch=%"PRIu64" recovery_epoch=%"PRIu64, + __func__, current_epoch, recovery_epoch); +out: + sqlite3_finalize(stmt); + return ret; +} + +/* + * Helper for renaming a recovery table to fix the padding. + */ +static int +sqlite_fix_table_name(const char *name) +{ + int ret; + uint64_t val; + char *err; + + if (sscanf(name, "rec-%" PRIx64, &val) != 1) + return -EINVAL; + ret = snprintf(buf, sizeof(buf), "ALTER TABLE \"%s\" " + "RENAME TO \"rec-%016" PRIx64 "\";", + name, val); + if (ret < 0) { + xlog(L_ERROR, "sprintf failed!"); + return -EINVAL; + } else if ((size_t)ret >= sizeof(buf)) { + xlog(L_ERROR, "sprintf output too long! (%d chars)", ret); + return -EINVAL; + } + ret = sqlite3_exec(dbh, (const char *)buf, NULL, NULL, &err); + if (ret != SQLITE_OK) { + xlog(L_ERROR, "Unable to fix table for epoch %"PRIu64": %s", + val, err); + goto out; + } + xlog(D_GENERAL, "Renamed table %s to rec-%016" PRIx64, name, val); +out: + sqlite3_free(err); + return ret; +} + +/* + * Callback for the sqlite_exec statement in sqlite_check_table_names. + * If the epoch encoded in the table name matches either the current + * epoch or the recovery epoch, then try to fix the padding. Otherwise, + * we bail. + */ +static int +sqlite_check_table_names_cb(void *UNUSED(arg), int ncols, char **cols, + char **UNUSED(colnames)) +{ + int ret = SQLITE_OK; + uint64_t val; + + if (ncols > 1) + return -EINVAL; + if (sscanf(cols[0], "rec-%" PRIx64, &val) != 1) + return -EINVAL; + if (val == current_epoch || val == recovery_epoch) { + xlog(D_GENERAL, "found invalid table name %s for %s epoch", + cols[0], val == current_epoch ? "current" : "recovery"); + ret = sqlite_fix_table_name(cols[0]); + } else { + xlog(L_ERROR, "found invalid table name %s for unknown epoch %" + PRId64, cols[0], val); + return -EINVAL; + } + return ret; +} + +/* + * Look for recovery table names where the epoch isn't zero-padded + */ +static int +sqlite_check_table_names(void) +{ + int ret; + char *err; + + ret = sqlite3_exec(dbh, "SELECT name FROM sqlite_master " + "WHERE type=\"table\" AND name LIKE \"%rec-%\" " + "AND length(name) < 20;", + sqlite_check_table_names_cb, NULL, &err); + if (ret != SQLITE_OK) { + xlog(L_ERROR, "Table names check failed: %s", err); + } + sqlite3_free(err); + return ret; +} + +/* + * Simple db health check. For now we're just making sure that the recovery + * table names are of the format "rec-CCCCCCCCCCCCCCCC" (where C is the hex + * representation of the epoch value) and that epoch value matches either + * the current epoch or the recovery epoch. + */ +static int +sqlite_check_db_health(void) +{ + int ret, ret2; + char *err; + + ret = sqlite3_exec(dbh, "BEGIN EXCLUSIVE TRANSACTION;", NULL, NULL, + &err); + if (ret != SQLITE_OK) { + xlog(L_ERROR, "Unable to begin transaction: %s", err); + goto rollback; + } + + ret = sqlite_check_table_names(); + if (ret != SQLITE_OK) + goto rollback; + + ret = sqlite3_exec(dbh, "COMMIT TRANSACTION;", NULL, NULL, &err); + if (ret != SQLITE_OK) { + xlog(L_ERROR, "Unable to commit transaction: %s", err); + goto rollback; + } + +cleanup: + sqlite3_free(err); + xlog(D_GENERAL, "%s: returning %d", __func__, ret); + return ret; +rollback: + ret2 = sqlite3_exec(dbh, "ROLLBACK TRANSACTION;", NULL, NULL, &err); + if (ret2 != SQLITE_OK) + xlog(L_ERROR, "Unable to rollback transaction: %s", err); + goto cleanup; +} + +static int +sqlite_attach_db(const char *path) +{ + int ret; + char dbpath[PATH_MAX]; + struct stat stb; + sqlite3_stmt *stmt = NULL; + + ret = snprintf(dbpath, PATH_MAX - 1, "%s/main.sqlite", path); + if (ret < 0) + return ret; + + dbpath[PATH_MAX - 1] = '\0'; + ret = stat(dbpath, &stb); + if (ret < 0) + return ret; + + xlog(D_GENERAL, "attaching %s", dbpath); + ret = sqlite3_prepare_v2(dbh, "ATTACH DATABASE ? AS attached;", + -1, &stmt, NULL); + if (ret != SQLITE_OK) { + xlog(L_ERROR, "%s: unable to prepare attach statement: %s", + __func__, sqlite3_errmsg(dbh)); + return ret; + } + + ret = sqlite3_bind_text(stmt, 1, dbpath, strlen(dbpath), SQLITE_STATIC); + if (ret != SQLITE_OK) { + xlog(L_ERROR, "%s: bind text failed: %s", + __func__, sqlite3_errmsg(dbh)); + return ret; + } + + ret = sqlite3_step(stmt); + if (ret == SQLITE_DONE) + ret = SQLITE_OK; + else + xlog(L_ERROR, "%s: unexpected return code from attach: %s", + __func__, sqlite3_errmsg(dbh)); + + sqlite3_finalize(stmt); + stmt = NULL; + return ret; +} + +static int +sqlite_detach_db(void) +{ + int ret; + char *err = NULL; + + xlog(D_GENERAL, "detaching database"); + ret = sqlite3_exec(dbh, "DETACH DATABASE attached;", NULL, NULL, &err); + if (ret != SQLITE_OK) { + xlog(L_ERROR, "Unable to detach attached db: %s", err); + } + + sqlite3_free(err); + return ret; +} + +/* + * Copies client records from the nfsdcltrack database as part of a one-time + * "upgrade". + * + * Returns a non-zero sqlite error code, or SQLITE_OK (aka 0). + * Returns the number of records copied via "num_rec". + */ +static int +sqlite_copy_cltrack_records(int *num_rec) +{ + int ret, ret2; + char *s; + char *err = NULL; + sqlite3_stmt *stmt = NULL; + + s = conf_get_str("nfsdcltrack", "storagedir"); + if (s) + cltrack_storagedir = s; + ret = sqlite_attach_db(cltrack_storagedir); + if (ret) + goto out; + ret = sqlite3_exec(dbh, "BEGIN EXCLUSIVE TRANSACTION;", NULL, NULL, + &err); + if (ret != SQLITE_OK) { + xlog(L_ERROR, "Unable to begin transaction: %s", err); + goto rollback; + } + ret = snprintf(buf, sizeof(buf), "DELETE FROM \"rec-%016" PRIx64 "\";", + current_epoch); + if (ret < 0) { + xlog(L_ERROR, "sprintf failed!"); + goto rollback; + } else if ((size_t)ret >= sizeof(buf)) { + xlog(L_ERROR, "sprintf output too long! (%d chars)", ret); + ret = -EINVAL; + goto rollback; + } + ret = sqlite3_exec(dbh, (const char *)buf, NULL, NULL, &err); + if (ret != SQLITE_OK) { + xlog(L_ERROR, "Unable to clear records from current epoch: %s", err); + goto rollback; + } + ret = snprintf(buf, sizeof(buf), "INSERT INTO \"rec-%016" PRIx64 "\" (id) " + "SELECT id FROM attached.clients;", + current_epoch); + if (ret < 0) { + xlog(L_ERROR, "sprintf failed!"); + goto rollback; + } else if ((size_t)ret >= sizeof(buf)) { + xlog(L_ERROR, "sprintf output too long! (%d chars)", ret); + ret = -EINVAL; + goto rollback; + } + ret = sqlite3_prepare_v2(dbh, buf, -1, &stmt, NULL); + if (ret != SQLITE_OK) { + xlog(L_ERROR, "%s: insert statement prepare failed: %s", + __func__, sqlite3_errmsg(dbh)); + goto rollback; + } + ret = sqlite3_step(stmt); + if (ret != SQLITE_DONE) { + xlog(L_ERROR, "%s: unexpected return code from insert: %s", + __func__, sqlite3_errmsg(dbh)); + goto rollback; + } + *num_rec = sqlite3_changes(dbh); + ret = sqlite3_exec(dbh, "COMMIT TRANSACTION;", NULL, NULL, &err); + if (ret != SQLITE_OK) { + xlog(L_ERROR, "Unable to commit transaction: %s", err); + goto rollback; + } +cleanup: + sqlite3_finalize(stmt); + sqlite3_free(err); + sqlite_detach_db(); +out: + xlog(D_GENERAL, "%s: returning %d", __func__, ret); + return ret; +rollback: + *num_rec = 0; + ret2 = sqlite3_exec(dbh, "ROLLBACK TRANSACTION;", NULL, NULL, &err); + if (ret2 != SQLITE_OK) + xlog(L_ERROR, "Unable to rollback transaction: %s", err); + goto cleanup; +} + +/* Open the database and set up the database handle for it */ +int +sqlite_prepare_dbh(const char *topdir) +{ + int ret; + + /* Do nothing if the database handle is already set up */ + if (dbh) + return 0; + + ret = snprintf(buf, PATH_MAX - 1, "%s/main.sqlite", topdir); + if (ret < 0) + return ret; + + buf[PATH_MAX - 1] = '\0'; + + /* open a new DB handle */ + ret = sqlite3_open(buf, &dbh); + if (ret != SQLITE_OK) { + /* try to create the dir */ + ret = mkdir_if_not_exist(topdir); + if (ret) + goto out_close; + + /* retry open */ + ret = sqlite3_open(buf, &dbh); + if (ret != SQLITE_OK) + goto out_close; + } + + /* set busy timeout */ + ret = sqlite3_busy_timeout(dbh, CLD_SQLITE_BUSY_TIMEOUT); + if (ret != SQLITE_OK) { + xlog(L_ERROR, "Unable to set sqlite busy timeout: %s", + sqlite3_errmsg(dbh)); + goto out_close; + } + + ret = sqlite_query_schema_version(); + switch (ret) { + case CLD_SQLITE_LATEST_SCHEMA_VERSION: + /* DB is already set up. Do nothing */ + ret = 0; + break; + case 3: + /* Old DB -- update to new schema */ + ret = sqlite_maindb_update_schema(3); + if (ret) + goto out_close; + break; + case 2: + /* Old DB -- update to new schema */ + ret = sqlite_maindb_update_schema(2); + if (ret) + goto out_close; + break; + + case 1: + /* Old DB -- update to new schema */ + ret = sqlite_maindb_update_schema(1); + if (ret) + goto out_close; + break; + case 0: + /* Query failed -- try to set up new DB */ + ret = sqlite_maindb_init_v4(); + if (ret) + goto out_close; + break; + default: + /* Unknown DB version -- downgrade? Fail */ + xlog(L_ERROR, "Unsupported database schema version! " + "Expected %d, got %d.", + CLD_SQLITE_LATEST_SCHEMA_VERSION, ret); + ret = -EINVAL; + goto out_close; + } + + ret = sqlite_startup_query_grace(); + + ret = sqlite_query_first_time(&first_time); + if (ret) + goto out_close; + + ret = sqlite_check_db_health(); + if (ret) { + xlog(L_ERROR, "Database health check failed! " + "Database must be fixed manually."); + goto out_close; + } + + /* one-time "upgrade" from older client tracking methods */ + if (first_time) { + sqlite_copy_cltrack_records(&num_cltrack_records); + xlog(D_GENERAL, "%s: num_cltrack_records = %d\n", + __func__, num_cltrack_records); + legacy_load_clients_from_recdir(&num_legacy_records); + xlog(D_GENERAL, "%s: num_legacy_records = %d\n", + __func__, num_legacy_records); + if (num_cltrack_records > 0 && num_legacy_records > 0) + xlog(L_WARNING, "%s: first-time upgrade detected " + "both cltrack and legacy records!\n", __func__); + } + + return ret; +out_close: + sqlite3_close(dbh); + dbh = NULL; + return ret; +} + +/* + * Create a client record + * + * Returns a non-zero sqlite error code, or SQLITE_OK (aka 0) + */ +int +sqlite_insert_client(const unsigned char *clname, const size_t namelen) +{ + int ret; + sqlite3_stmt *stmt = NULL; + + ret = snprintf(buf, sizeof(buf), "INSERT OR REPLACE INTO \"rec-%016" PRIx64 "\" (id) " + "VALUES (?);", current_epoch); + if (ret < 0) { + xlog(L_ERROR, "sprintf failed!"); + return ret; + } else if ((size_t)ret >= sizeof(buf)) { + xlog(L_ERROR, "sprintf output too long! (%d chars)", ret); + return -EINVAL; + } + + ret = sqlite3_prepare_v2(dbh, buf, -1, &stmt, NULL); + if (ret != SQLITE_OK) { + xlog(L_ERROR, "%s: insert statement prepare failed: %s", + __func__, sqlite3_errmsg(dbh)); + return ret; + } + + ret = sqlite3_bind_blob(stmt, 1, (const void *)clname, namelen, + SQLITE_STATIC); + if (ret != SQLITE_OK) { + xlog(L_ERROR, "%s: bind blob failed: %s", __func__, + sqlite3_errmsg(dbh)); + goto out_err; + } + + ret = sqlite3_step(stmt); + if (ret == SQLITE_DONE) + ret = SQLITE_OK; + else + xlog(L_ERROR, "%s: unexpected return code from insert: %s", + __func__, sqlite3_errmsg(dbh)); + +out_err: + xlog(D_GENERAL, "%s: returning %d", __func__, ret); + sqlite3_finalize(stmt); + return ret; +} + +#if UPCALL_VERSION >= 2 +/* + * Create a client record including hash the kerberos principal + * + * Returns a non-zero sqlite error code, or SQLITE_OK (aka 0) + */ +int +sqlite_insert_client_and_princhash(const unsigned char *clname, const size_t namelen, + const unsigned char *clprinchash, const size_t princhashlen) +{ + int ret; + sqlite3_stmt *stmt = NULL; + + if (princhashlen > 0) + ret = snprintf(buf, sizeof(buf), "INSERT OR REPLACE INTO \"rec-%016" PRIx64 "\" " + "VALUES (?, ?);", current_epoch); + else + ret = snprintf(buf, sizeof(buf), "INSERT OR REPLACE INTO \"rec-%016" PRIx64 "\" (id) " + "VALUES (?);", current_epoch); + if (ret < 0) { + xlog(L_ERROR, "sprintf failed!"); + return ret; + } else if ((size_t)ret >= sizeof(buf)) { + xlog(L_ERROR, "sprintf output too long! (%d chars)", ret); + return -EINVAL; + } + + ret = sqlite3_prepare_v2(dbh, buf, -1, &stmt, NULL); + if (ret != SQLITE_OK) { + xlog(L_ERROR, "%s: insert statement prepare failed: %s", + __func__, sqlite3_errmsg(dbh)); + return ret; + } + + ret = sqlite3_bind_blob(stmt, 1, (const void *)clname, namelen, + SQLITE_STATIC); + if (ret != SQLITE_OK) { + xlog(L_ERROR, "%s: bind blob failed: %s", __func__, + sqlite3_errmsg(dbh)); + goto out_err; + } + + if (princhashlen > 0) { + ret = sqlite3_bind_blob(stmt, 2, (const void *)clprinchash, princhashlen, + SQLITE_STATIC); + if (ret != SQLITE_OK) { + xlog(L_ERROR, "%s: bind blob failed: %s", __func__, + sqlite3_errmsg(dbh)); + goto out_err; + } + } + + ret = sqlite3_step(stmt); + if (ret == SQLITE_DONE) + ret = SQLITE_OK; + else + xlog(L_ERROR, "%s: unexpected return code from insert: %s", + __func__, sqlite3_errmsg(dbh)); + +out_err: + xlog(D_GENERAL, "%s: returning %d", __func__, ret); + sqlite3_finalize(stmt); + return ret; +} +#else +int +sqlite_insert_client_and_princhash(const unsigned char *clname, const size_t namelen, + const unsigned char *clprinchash, const size_t princhashlen) +{ + return -EINVAL; +} +#endif + +/* Remove a client record */ +int +sqlite_remove_client(const unsigned char *clname, const size_t namelen) +{ + int ret; + sqlite3_stmt *stmt = NULL; + + ret = snprintf(buf, sizeof(buf), "DELETE FROM \"rec-%016" PRIx64 "\" " + "WHERE id==?;", current_epoch); + if (ret < 0) { + xlog(L_ERROR, "sprintf failed!"); + return ret; + } else if ((size_t)ret >= sizeof(buf)) { + xlog(L_ERROR, "sprintf output too long! (%d chars)", ret); + return -EINVAL; + } + + ret = sqlite3_prepare_v2(dbh, buf, -1, &stmt, NULL); + + if (ret != SQLITE_OK) { + xlog(L_ERROR, "%s: statement prepare failed: %s", + __func__, sqlite3_errmsg(dbh)); + goto out_err; + } + + ret = sqlite3_bind_blob(stmt, 1, (const void *)clname, namelen, + SQLITE_STATIC); + if (ret != SQLITE_OK) { + xlog(L_ERROR, "%s: bind blob failed: %s", __func__, + sqlite3_errmsg(dbh)); + goto out_err; + } + + ret = sqlite3_step(stmt); + if (ret == SQLITE_DONE) + ret = SQLITE_OK; + else + xlog(L_ERROR, "%s: unexpected return code from delete: %d", + __func__, ret); + +out_err: + xlog(D_GENERAL, "%s: returning %d", __func__, ret); + sqlite3_finalize(stmt); + return ret; +} + +/* + * Is the given clname in the clients table? If so, then update its timestamp + * and return success. If the record isn't present, or the update fails, then + * return an error. + */ +int +sqlite_check_client(const unsigned char *clname, const size_t namelen) +{ + int ret; + sqlite3_stmt *stmt = NULL; + + ret = snprintf(buf, sizeof(buf), "SELECT count(*) FROM \"rec-%016" PRIx64 "\" " + "WHERE id==?;", recovery_epoch); + if (ret < 0) { + xlog(L_ERROR, "sprintf failed!"); + return ret; + } else if ((size_t)ret >= sizeof(buf)) { + xlog(L_ERROR, "sprintf output too long! (%d chars)", ret); + return -EINVAL; + } + + ret = sqlite3_prepare_v2(dbh, buf, -1, &stmt, NULL); + if (ret != SQLITE_OK) { + xlog(L_ERROR, "%s: select statement prepare failed: %s", + __func__, sqlite3_errmsg(dbh)); + return ret; + } + + ret = sqlite3_bind_blob(stmt, 1, (const void *)clname, namelen, + SQLITE_STATIC); + if (ret != SQLITE_OK) { + xlog(L_ERROR, "%s: bind blob failed: %s", + __func__, sqlite3_errmsg(dbh)); + goto out_err; + } + + ret = sqlite3_step(stmt); + if (ret != SQLITE_ROW) { + xlog(L_ERROR, "%s: unexpected return code from select: %d", + __func__, ret); + goto out_err; + } + + ret = sqlite3_column_int(stmt, 0); + xlog(D_GENERAL, "%s: select returned %d rows", __func__, ret); + if (ret != 1) { + ret = -EACCES; + goto out_err; + } + + sqlite3_finalize(stmt); + + /* Now insert the client into the table for the current epoch */ + return sqlite_insert_client(clname, namelen); + +out_err: + xlog(D_GENERAL, "%s: returning %d", __func__, ret); + sqlite3_finalize(stmt); + return ret; +} + +int +sqlite_grace_start(void) +{ + int ret, ret2; + char *err; + uint64_t tcur = current_epoch; + uint64_t trec = recovery_epoch; + + /* begin transaction */ + ret = sqlite3_exec(dbh, "BEGIN EXCLUSIVE TRANSACTION;", NULL, NULL, + &err); + if (ret != SQLITE_OK) { + xlog(L_ERROR, "Unable to begin transaction: %s", err); + goto rollback; + } + + if (trec == 0) { + /* + * A normal grace start - update the epoch values in the grace + * table and create a new table for the current reboot epoch. + */ + trec = tcur; + tcur++; + + ret = snprintf(buf, sizeof(buf), "UPDATE grace " + "SET current = %" PRId64 ", recovery = %" PRId64 ";", + (int64_t)tcur, (int64_t)trec); + if (ret < 0) { + xlog(L_ERROR, "sprintf failed!"); + goto rollback; + } else if ((size_t)ret >= sizeof(buf)) { + xlog(L_ERROR, "sprintf output too long! (%d chars)", + ret); + ret = -EINVAL; + goto rollback; + } + + ret = sqlite3_exec(dbh, (const char *)buf, NULL, NULL, &err); + if (ret != SQLITE_OK) { + xlog(L_ERROR, "Unable to update epochs: %s", err); + goto rollback; + } + + ret = snprintf(buf, sizeof(buf), "CREATE TABLE \"rec-%016" PRIx64 "\" " + "(id BLOB PRIMARY KEY, princhash blob);", + tcur); + if (ret < 0) { + xlog(L_ERROR, "sprintf failed!"); + goto rollback; + } else if ((size_t)ret >= sizeof(buf)) { + xlog(L_ERROR, "sprintf output too long! (%d chars)", + ret); + ret = -EINVAL; + goto rollback; + } + + ret = sqlite3_exec(dbh, (const char *)buf, NULL, NULL, &err); + if (ret != SQLITE_OK) { + xlog(L_ERROR, "Unable to create table for current epoch: %s", + err); + goto rollback; + } + } else { + /* Server restarted while in grace - don't update the epoch + * values in the grace table, just clear out the records for + * the current reboot epoch. + */ + ret = snprintf(buf, sizeof(buf), "DELETE FROM \"rec-%016" PRIx64 "\";", + tcur); + if (ret < 0) { + xlog(L_ERROR, "sprintf failed!"); + goto rollback; + } else if ((size_t)ret >= sizeof(buf)) { + xlog(L_ERROR, "sprintf output too long! (%d chars)", ret); + ret = -EINVAL; + goto rollback; + } + + ret = sqlite3_exec(dbh, (const char *)buf, NULL, NULL, &err); + if (ret != SQLITE_OK) { + xlog(L_ERROR, "Unable to clear table for current epoch: %s", + err); + goto rollback; + } + } + + ret = sqlite3_exec(dbh, "COMMIT TRANSACTION;", NULL, NULL, &err); + if (ret != SQLITE_OK) { + xlog(L_ERROR, "Unable to commit transaction: %s", err); + goto rollback; + } + + current_epoch = tcur; + recovery_epoch = trec; + xlog(D_GENERAL, "%s: current_epoch=%"PRIu64" recovery_epoch=%"PRIu64, + __func__, current_epoch, recovery_epoch); + +out: + sqlite3_free(err); + return ret; +rollback: + ret2 = sqlite3_exec(dbh, "ROLLBACK TRANSACTION;", NULL, NULL, &err); + if (ret2 != SQLITE_OK) + xlog(L_ERROR, "Unable to rollback transaction: %s", err); + goto out; +} + +int +sqlite_grace_done(void) +{ + int ret, ret2; + char *err; + + /* begin transaction */ + ret = sqlite3_exec(dbh, "BEGIN EXCLUSIVE TRANSACTION;", NULL, NULL, + &err); + if (ret != SQLITE_OK) { + xlog(L_ERROR, "Unable to begin transaction: %s", err); + goto rollback; + } + + ret = sqlite3_exec(dbh, "UPDATE grace SET recovery = \"0\";", + NULL, NULL, &err); + if (ret != SQLITE_OK) { + xlog(L_ERROR, "Unable to clear recovery epoch: %s", err); + goto rollback; + } + + ret = snprintf(buf, sizeof(buf), "DROP TABLE \"rec-%016" PRIx64 "\";", + recovery_epoch); + if (ret < 0) { + xlog(L_ERROR, "sprintf failed!"); + goto rollback; + } else if ((size_t)ret >= sizeof(buf)) { + xlog(L_ERROR, "sprintf output too long! (%d chars)", ret); + ret = -EINVAL; + goto rollback; + } + + ret = sqlite3_exec(dbh, (const char *)buf, NULL, NULL, &err); + if (ret != SQLITE_OK) { + xlog(L_ERROR, "Unable to drop table for recovery epoch: %s", + err); + goto rollback; + } + + ret = sqlite3_exec(dbh, "COMMIT TRANSACTION;", NULL, NULL, &err); + if (ret != SQLITE_OK) { + xlog(L_ERROR, "Unable to commit transaction: %s", err); + goto rollback; + } + + recovery_epoch = 0; + xlog(D_GENERAL, "%s: current_epoch=%"PRIu64" recovery_epoch=%"PRIu64, + __func__, current_epoch, recovery_epoch); + +out: + sqlite3_free(err); + return ret; +rollback: + ret2 = sqlite3_exec(dbh, "ROLLBACK TRANSACTION;", NULL, NULL, &err); + if (ret2 != SQLITE_OK) + xlog(L_ERROR, "Unable to rollback transaction: %s", err); + goto out; +} + + +int +sqlite_iterate_recovery(int (*cb)(struct cld_client *clnt), struct cld_client *clnt) +{ + int ret; + sqlite3_stmt *stmt = NULL; +#if UPCALL_VERSION >= 2 + struct cld_msg_v2 *cmsg = &clnt->cl_u.cl_msg_v2; +#else + struct cld_msg *cmsg = &clnt->cl_u.cl_msg; +#endif + + if (recovery_epoch == 0) { + xlog(D_GENERAL, "%s: not in grace!", __func__); + return -EINVAL; + } + + ret = snprintf(buf, sizeof(buf), "SELECT * FROM \"rec-%016" PRIx64 "\";", + recovery_epoch); + if (ret < 0) { + xlog(L_ERROR, "sprintf failed!"); + return ret; + } else if ((size_t)ret >= sizeof(buf)) { + xlog(L_ERROR, "sprintf output too long! (%d chars)", ret); + return -EINVAL; + } + + ret = sqlite3_prepare_v2(dbh, buf, -1, &stmt, NULL); + if (ret != SQLITE_OK) { + xlog(L_ERROR, "%s: select statement prepare failed: %s", + __func__, sqlite3_errmsg(dbh)); + return ret; + } + + while ((ret = sqlite3_step(stmt)) == SQLITE_ROW) { + memset(&cmsg->cm_u, 0, sizeof(cmsg->cm_u)); +#if UPCALL_VERSION >= 2 + memcpy(&cmsg->cm_u.cm_clntinfo.cc_name.cn_id, + sqlite3_column_blob(stmt, 0), NFS4_OPAQUE_LIMIT); + cmsg->cm_u.cm_clntinfo.cc_name.cn_len = sqlite3_column_bytes(stmt, 0); + if (sqlite3_column_bytes(stmt, 1) > 0) { + memcpy(&cmsg->cm_u.cm_clntinfo.cc_princhash.cp_data, + sqlite3_column_blob(stmt, 1), SHA256_DIGEST_SIZE); + cmsg->cm_u.cm_clntinfo.cc_princhash.cp_len = sqlite3_column_bytes(stmt, 1); + } +#else + memcpy(&cmsg->cm_u.cm_name.cn_id, sqlite3_column_blob(stmt, 0), + NFS4_OPAQUE_LIMIT); + cmsg->cm_u.cm_name.cn_len = sqlite3_column_bytes(stmt, 0); +#endif + cb(clnt); + } + if (ret == SQLITE_DONE) + ret = 0; + sqlite3_finalize(stmt); + return ret; +} + +/* + * Cleans out the old nfsdcltrack database. + * + * Called upon receipt of the first "GraceDone" upcall only. + */ +int +sqlite_delete_cltrack_records(void) +{ + int ret; + char *s; + char *err = NULL; + + s = conf_get_str("nfsdcltrack", "storagedir"); + if (s) + cltrack_storagedir = s; + ret = sqlite_attach_db(cltrack_storagedir); + if (ret) + goto out; + ret = sqlite3_exec(dbh, "DELETE FROM attached.clients;", + NULL, NULL, &err); + if (ret != SQLITE_OK) { + xlog(L_ERROR, "Unable to clear records from cltrack db: %s", + err); + } + sqlite_detach_db(); +out: + sqlite3_free(err); + return ret; +} + +/* + * Sets first_time to 0 in the parameters table to ensure we only + * copy old client tracking records into the database one time. + * + * Called upon receipt of the first "GraceDone" upcall only. + */ +int +sqlite_first_time_done(void) +{ + int ret; + char *err = NULL; + + ret = sqlite3_exec(dbh, "UPDATE parameters SET value = \"0\" " + "WHERE key = \"first_time\";", + NULL, NULL, &err); + if (ret != SQLITE_OK) + xlog(L_ERROR, "Unable to clear first_time: %s", err); + + sqlite3_free(err); + return ret; +} diff --git a/utils/nfsdcld/sqlite.h b/utils/nfsdcld/sqlite.h new file mode 100644 index 00000000..0a26ad67 --- /dev/null +++ b/utils/nfsdcld/sqlite.h @@ -0,0 +1,37 @@ +/* + * Copyright (C) 2011 Red Hat, Jeff Layton + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#ifndef _SQLITE_H_ +#define _SQLITE_H_ + +struct cld_client; + +int sqlite_prepare_dbh(const char *topdir); +int sqlite_insert_client(const unsigned char *clname, const size_t namelen); +int sqlite_insert_client_and_princhash(const unsigned char *clname, const size_t namelen, + const unsigned char *clprinchash, const size_t princhashlen); +int sqlite_remove_client(const unsigned char *clname, const size_t namelen); +int sqlite_check_client(const unsigned char *clname, const size_t namelen); +int sqlite_grace_start(void); +int sqlite_grace_done(void); +int sqlite_iterate_recovery(int (*cb)(struct cld_client *clnt), struct cld_client *clnt); +int sqlite_delete_cltrack_records(void); +int sqlite_first_time_done(void); + +#endif /* _SQLITE_H */ diff --git a/utils/nfsidmap/nfsidmap.c b/utils/nfsidmap/nfsidmap.c index d3967a3a..4d219ef5 100644 --- a/utils/nfsidmap/nfsidmap.c +++ b/utils/nfsidmap/nfsidmap.c @@ -18,7 +18,7 @@ #include "xcommon.h" int verbose = 0; -char *usage = "Usage: %s [-vh] [-c || [-u|-g|-r key] || -d || -l || [-t timeout] key desc]"; +#define USAGE "Usage: %s [-vh] [-c || [-u|-g|-r key] || -d || -l || [-t timeout] key desc]" #define MAX_ID_LEN 11 #define IDMAP_NAMESZ 128 @@ -401,7 +401,7 @@ int main(int argc, char **argv) break; case 'h': default: - xlog_warn(usage, progname); + xlog_warn(USAGE, progname); exit(opt == 'h' ? 0 : 1); } } @@ -433,7 +433,7 @@ int main(int argc, char **argv) xlog_stderr(verbose); if ((argc - optind) != 2) { xlog_warn("Bad arg count. Check /etc/request-key.conf"); - xlog_warn(usage, progname); + xlog_warn(USAGE, progname); return EXIT_FAILURE; } @@ -451,7 +451,7 @@ int main(int argc, char **argv) return EXIT_FAILURE; } if (verbose) { - xlog_warn("key: 0x%lx type: %s value: %s timeout %ld", + xlog_warn("key: 0x%x type: %s value: %s timeout %d", key, type, value, timeout); } diff --git a/utils/statd/rmtcall.c b/utils/statd/rmtcall.c index c4f6364f..5b261480 100644 --- a/utils/statd/rmtcall.c +++ b/utils/statd/rmtcall.c @@ -247,7 +247,7 @@ process_reply(FD_SET_TYPE *rfds) xlog_warn("%s: service %d not registered on localhost", __func__, NL_MY_PROG(lp)); } else { - xlog(D_GENERAL, "%s: Callback to %s (for %d) succeeded", + xlog(D_GENERAL, "%s: Callback to %s (for %s) succeeded", __func__, NL_MY_NAME(lp), NL_MON_NAME(lp)); } nlist_free(¬ify, lp); diff --git a/utils/statd/statd.c b/utils/statd/statd.c index 14673800..8eef2ff2 100644 --- a/utils/statd/statd.c +++ b/utils/statd/statd.c @@ -136,7 +136,7 @@ static void log_modes(void) strcat(buf, "TI-RPC "); #endif - xlog_warn(buf); + xlog_warn("%s", buf); } /* diff --git a/utils/statd/svc_run.c b/utils/statd/svc_run.c index d1dbd74a..e343c768 100644 --- a/utils/statd/svc_run.c +++ b/utils/statd/svc_run.c @@ -53,6 +53,7 @@ #include #include +#include #include "statd.h" #include "notlist.h" @@ -104,8 +105,8 @@ my_svc_run(int sockfd) tv.tv_sec = NL_WHEN(notify) - now; tv.tv_usec = 0; - xlog(D_GENERAL, "Waiting for reply... (timeo %d)", - tv.tv_sec); + xlog(D_GENERAL, "Waiting for reply... (timeo %jd)", + (intmax_t)tv.tv_sec); selret = select(FD_SETSIZE, &readfds, (void *) 0, (void *) 0, &tv); } else {