From fc4bebd605b6a579a4d19c6640aca38057397c77 Mon Sep 17 00:00:00 2001 From: Csaba Henk Date: Tue, 21 Aug 2018 12:44:54 +0200 Subject: [PATCH 527/529] fuse: diagnostic FLUSH interrupt We add dummy interrupt handling for the FLUSH fuse message. It can be enabled by the "--fuse-flush-handle-interrupt" hidden command line option, or "-ofuse-flush-handle-interrupt=yes" mount option. It serves no other than diagnostic & demonstational purposes -- to exercise the interrupt handling framework a bit and to give an usage example. Documentation is also provided that showcases interrupt handling via FLUSH. Upstream: https://review.gluster.org/20876 > Change-Id: I522f1e798501d06b74ac3592a5f73c1ab0590c60 > updates: #465 > Signed-off-by: Csaba Henk Change-Id: I510aff8895a3fe5858ab313c47514de7087d08c1 BUG: 1595246 Signed-off-by: Csaba Henk Reviewed-on: https://code.engineering.redhat.com/gerrit/162550 Tested-by: RHGS Build Bot Reviewed-by: Amar Tumballi Suryanarayan Reviewed-by: Sunil Kumar Heggodu Gopala Acharya --- doc/developer-guide/Developers-Index.md | 5 ++ doc/developer-guide/fuse-interrupt.md | 130 ++++++++++++++++++++++++++++ glusterfsd/src/glusterfsd.c | 53 +++++++++++- glusterfsd/src/glusterfsd.h | 1 + libglusterfs/src/glusterfs.h | 2 + tests/features/interrupt.t | 67 ++++++++++++++ tests/features/open_and_sleep.c | 27 ++++++ xlators/mount/fuse/src/fuse-bridge.c | 59 +++++++++++++ xlators/mount/fuse/src/fuse-bridge.h | 4 +- xlators/mount/fuse/utils/mount.glusterfs.in | 7 ++ 10 files changed, 353 insertions(+), 2 deletions(-) create mode 100644 doc/developer-guide/fuse-interrupt.md create mode 100644 tests/features/interrupt.t create mode 100644 tests/features/open_and_sleep.c diff --git a/doc/developer-guide/Developers-Index.md b/doc/developer-guide/Developers-Index.md index 4c6346e..6c00a4a 100644 --- a/doc/developer-guide/Developers-Index.md +++ b/doc/developer-guide/Developers-Index.md @@ -59,6 +59,11 @@ Translators - [Storage/posix Translator](./posix.md) - [Compression translator](./network_compression.md) +Fuse +---- + +- [Interrupt handling](./fuse-interrupt.md) + Testing/Debugging ----------------- diff --git a/doc/developer-guide/fuse-interrupt.md b/doc/developer-guide/fuse-interrupt.md new file mode 100644 index 0000000..f92b553 --- /dev/null +++ b/doc/developer-guide/fuse-interrupt.md @@ -0,0 +1,130 @@ +# Fuse interrupt handling + +## Conventions followed + +- *FUSE* refers to the "wire protocol" between kernel and userspace and + related specifications. +- *fuse* refers to the kernel subsystem and also to the GlusterFs translator. + +## FUSE interrupt handling spec + +The [Linux kernel FUSE documentation](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/filesystems/fuse.txt?h=v4.18#n148) +desrcibes how interrupt handling happens in fuse. + +## Interrupt handling in the fuse translator + +### Declarations + +This document describes the internal API in the fuse translator with which +interrupt can be handled. + +The API being internal (to be used only in fuse-bridge.c; the functions are +not exported to a header file). + +``` +enum fuse_interrupt_state { + INTERRUPT_NONE, + INTERRUPT_SQUELCHED, + INTERRUPT_HANDLED, +}; +typedef enum fuse_interrupt_state fuse_interrupt_state_t; +struct fuse_interrupt_record; +typedef struct fuse_interrupt_record fuse_interrupt_record_t; +typedef void (*fuse_interrupt_handler_t)(xlator_t *this, + fuse_interrupt_record_t *); +struct fuse_interrupt_record { + fuse_in_header_t fuse_in_header; + void *data; + /* + ... + */ +}; + +fuse_interrupt_record_t * +fuse_interrupt_record_new(fuse_in_header_t *finh, + fuse_interrupt_handler_t handler); + +void +fuse_interrupt_record_insert(xlator_t *this, fuse_interrupt_record_t *fir); + +gf_boolean_t +fuse_interrupt_finish_fop(call_frame_t *frame, xlator_t *this, + gf_boolean_t sync, void **datap); + +void +fuse_interrupt_finish_interrupt(xlator_t *this, fuse_interrupt_record_t *fir, + fuse_interrupt_state_t intstat, + gf_boolean_t sync, void **datap); +``` + +The code demonstrates the usage of the API through `fuse_flush()`. (It's a +dummy implementation only for demonstration purposes.) Flush is chosen +because a `FLUSH` interrupt is easy to trigger (see +*tests/features/interrupt.t*). Interrupt handling for flush is switched on +by `--fuse-flush-handle-interrupt` (a hidden glusterfs command line flag). +The flush interrupt handling code is guarded by the +`flush_handle_interrupt` Boolean member of `fuse_private_t`. + +### Usage + +A given FUSE fop can be enabled to handle interrupts via the following +steps: + +- Define a handler function (of type `fuse_interrupt_handler_t`). + It should implement the interrupt handling logic and in the end + call (directly or as async callback) `fuse_interrupt_finish_interrupt()`. + The `intstat` argument to `fuse_interrupt_finish_interrupt` should be + either `INTERRUPT_SQUELCHED` or `INTERRUPT_HANDLED`. + - `INTERRUPT_SQUELCHED` means that we choose not to handle the interrupt + and the fop is going on uninterrupted. + - `INTERRUPT_HANDLED` means that the interrupt was actually handled. In + this case the fop will be answered from interrupt context with errno + `EINTR` (that is, the fop should not send a response to the kernel). + + We return to the `sync` and `datap` arguments later. +- In the `fuse_` function create an interrupt record using + `fuse_interrupt_record_new()`, passing the incoming `fuse_in_header` and + the above handler function to it. + - Arbitrary further data can be referred to via the `data` member of the + interrupt record that is to be passed on from fop context to + interrupt context. +- When it's set up, pass the interrupt record to + `fuse_interrupt_record_insert()`. +- In `fuse__cbk` call `fuse_interrupt_finish_fop()`. + - `fuse_interrupt_finish_fop()` returns a Boolean according to whether the + interrupt was handled. If it was, then the fuse request is already + answered and the stack gets destroyed in `fuse_interrupt_finish_fop` so + `fuse__cbk` can just return (zero). Otherwise follow the standard + cbk logic (answer the fuse request and destroy the stack -- these are + typically accomplished by `fuse_err_cbk()`). +- The last two argument of `fuse_interrupt_finish_fop()` and + `fuse_interrupt_finish_interrupt()` are `gf_boolean_t sync` and + `void **datap`. + - `sync` represents the strategy for freeing the interrupt record. The + interrupt handler and the fop handler are in race to get at the interrupt + record first (interrupt handler for purposes of doing the interrupt + handling, fop handler for purposes of deactivating the interrupt record + upon completion of the fop handling). + - If `sync` is true, then the fop handler will wait for the interrupt + handler to finish and it takes care of freeing. + - If `sync` is false, the loser of the above race will perform freeing. + + Freeing is done within the respective interrupt finish routines, except + for the `data` field of the interrupt record; with respect to that, see + the discussion of the `datap` parameter below. The strategy has to be + consensual, that is, `fuse_interrupt_finish_fop()` and + `fuse_interrupt_finish_interrupt()` must pass the same value for `sync`. + If dismantling the resources associated with the interrupt record is + simple, `sync = _gf_false` is the suggested choice; `sync = _gf_true` can + be useful in the opposite case, when dismantling those resources would + be inconvenient to implement in two places or to enact in non-fop context. + - If `datap` is `NULL`, the `data` member of the interrupt record will be + freed within the interrupt finish routine. If it points to a valid + `void *` pointer, and if caller is doing the cleanup (see `sync` above), + then that pointer will be directed to the `data` member of the interrupt + record and it's up to the caller what it's doing with it. + - If `sync` is true, interrupt handler can use `datap = NULL`, and + fop handler will have `datap` set. + - If `sync` is false, and handlers pass a pointer to a pointer for + `datap`, they should check if the pointed pointer is NULL before + attempting to deal with the data. diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c index 2e2cd77..9c536cd 100644 --- a/glusterfsd/src/glusterfsd.c +++ b/glusterfsd/src/glusterfsd.c @@ -243,6 +243,9 @@ static struct argp_option gf_options[] = { OPTION_ARG_OPTIONAL, "disable/enable fuse event-history"}, {"reader-thread-count", ARGP_READER_THREAD_COUNT_KEY, "INTEGER", OPTION_ARG_OPTIONAL, "set fuse reader thread count"}, + {"fuse-flush-handle-interrupt", ARGP_FUSE_FLUSH_HANDLE_INTERRUPT_KEY, + "BOOL", OPTION_ARG_OPTIONAL | OPTION_HIDDEN, + "handle interrupt in fuse FLUSH handler"}, {0, 0, 0, 0, "Miscellaneous Options:"}, {0, } }; @@ -581,6 +584,38 @@ set_fuse_mount_options (glusterfs_ctx_t *ctx, dict_t *options) goto err; } } + switch (cmd_args->fuse_flush_handle_interrupt) { + case GF_OPTION_ENABLE: + ret = dict_set_static_ptr (options, + "flush-handle-interrupt", + "on"); + if (ret < 0) { + gf_msg ("glusterfsd", GF_LOG_ERROR, 0, + glusterfsd_msg_4, + "failed to set dict value for key " + "flush-handle-interrupt"); + goto err; + } + break; + case GF_OPTION_DISABLE: + ret = dict_set_static_ptr (options, + "flush-handle-interrupt", + "off"); + if (ret < 0) { + gf_msg ("glusterfsd", GF_LOG_ERROR, 0, + glusterfsd_msg_4, + "failed to set dict value for key " + "flush-handle-interrupt"); + goto err; + } + break; + case GF_OPTION_DEFERRED: /* default */ + default: + gf_msg_debug ("glusterfsd", 0, + "fuse-flush-handle-interrupt mode %d", + cmd_args->fuse_flush_handle_interrupt); + break; + } ret = 0; err: @@ -1352,7 +1387,22 @@ no_oom_api: } break; - } + case ARGP_FUSE_FLUSH_HANDLE_INTERRUPT_KEY: + if (!arg) + arg = "yes"; + + if (gf_string2boolean(arg, &b) == 0) { + cmd_args->fuse_flush_handle_interrupt = b; + + break; + } + + argp_failure(state, -1, 0, + "unknown fuse flush handle interrupt " + "setting \"%s\"", + arg); + break; + } return 0; } @@ -1648,6 +1698,7 @@ glusterfs_ctx_defaults_init (glusterfs_ctx_t *ctx) cmd_args->fuse_attribute_timeout = -1; cmd_args->fuse_entry_timeout = -1; cmd_args->fopen_keep_cache = GF_OPTION_DEFERRED; + cmd_args->fuse_flush_handle_interrupt = GF_OPTION_DEFERRED; if (ctx->mem_acct_enable) cmd_args->mem_acct = 1; diff --git a/glusterfsd/src/glusterfsd.h b/glusterfsd/src/glusterfsd.h index 1550a30..28b514a 100644 --- a/glusterfsd/src/glusterfsd.h +++ b/glusterfsd/src/glusterfsd.h @@ -101,6 +101,7 @@ enum argp_option_keys { ARGP_FUSE_EVENT_HISTORY_KEY = 179, ARGP_READER_THREAD_COUNT_KEY = 180, ARGP_FUSE_LRU_LIMIT_KEY = 190, + ARGP_FUSE_FLUSH_HANDLE_INTERRUPT_KEY = 191, }; struct _gfd_vol_top_priv { diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h index 2690306..9fa066e 100644 --- a/libglusterfs/src/glusterfs.h +++ b/libglusterfs/src/glusterfs.h @@ -448,6 +448,8 @@ struct _cmd_args { char *event_history; uint32_t reader_thread_count; + + int fuse_flush_handle_interrupt; }; typedef struct _cmd_args cmd_args_t; diff --git a/tests/features/interrupt.t b/tests/features/interrupt.t new file mode 100644 index 0000000..476d875 --- /dev/null +++ b/tests/features/interrupt.t @@ -0,0 +1,67 @@ +#!/bin/bash + +##Copy this file to tests/bugs before running run.sh (cp extras/test/bug-920583.t tests/bugs/) + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +TESTS_EXPECTED_IN_LOOP=4 + +cleanup; +logdir=`gluster --print-logdir` + +TEST build_tester $(dirname $0)/open_and_sleep.c + +## Start and create a volume +TEST glusterd; +TEST pidof glusterd; + +TEST $CLI volume create $V0 replica 2 stripe 2 $H0:$B0/${V0}{1,2,3,4,5,6,7,8}; + +## Verify volume is is created +EXPECT "$V0" volinfo_field $V0 'Volume Name'; +EXPECT 'Created' volinfo_field $V0 'Status'; + +## Start volume and verify +TEST $CLI volume start $V0; +EXPECT 'Started' volinfo_field $V0 'Status'; + +function log-file-name() +{ + logfilename=$M0".log" + echo ${logfilename:1} | tr / - +} + +log_file=$logdir"/"`log-file-name` + +function test_interrupt { + local handlebool="$1" + local logpattern="$2" + + TEST $GFS --volfile-id=$V0 --volfile-server=$H0 --fuse-flush-handle-interrupt=$handlebool --log-level=DEBUG $M0 + + # If the test helper fails (which is considered a setup error, not failure of the test + # case itself), kill will be invoked without argument, and that will be the actual + # error which is caught. + TEST "./$(dirname $0)/open_and_sleep $M0/testfile | { sleep 0.1; xargs -n1 kill -INT; }" + + TEST "grep -E '$logpattern' $log_file" + # Basic sanity check, making sure filesystem has not crashed. + TEST test -f $M0/testfile +} + +# Theoretically FLUSH might finish before INTERRUPT is handled, +# in which case we'd get the "no handler found" message (but it's unlikely). +test_interrupt yes 'FLUSH.*interrupt handler triggered|INTERRUPT.*no handler found' +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +test_interrupt no 'INTERRUPT.*no handler found' + +## Finish up +TEST $CLI volume stop $V0; +EXPECT 'Stopped' volinfo_field $V0 'Status'; + +TEST $CLI volume delete $V0; +TEST ! $CLI volume info $V0; + +cleanup_tester $(dirname $0)/open_and_sleep; +cleanup; diff --git a/tests/features/open_and_sleep.c b/tests/features/open_and_sleep.c new file mode 100644 index 0000000..da089e9 --- /dev/null +++ b/tests/features/open_and_sleep.c @@ -0,0 +1,27 @@ +#include +#include +#include + +int +main (int argc, char **argv) +{ + pid_t pid; + int fd; + + if (argc >= 2) { + fd = open (argv[1], O_RDWR | O_CREAT, 0644); + if (fd == -1) { + fprintf (stderr, "cannot open/create %s\n", argv[1]); + return 1; + } + } + + pid = getpid (); + printf ("%d\n", pid); + fflush (stdout); + + for (;;) + sleep (1); + + return 0; +} diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c index 0d4b9db..44c39e4 100644 --- a/xlators/mount/fuse/src/fuse-bridge.c +++ b/xlators/mount/fuse/src/fuse-bridge.c @@ -1779,6 +1779,21 @@ fuse_err_cbk (call_frame_t *frame, void *cookie, xlator_t *this, } static int +fuse_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + fuse_private_t *priv = this->private; + + if (priv->flush_handle_interrupt) { + if (fuse_interrupt_finish_fop (frame, this, _gf_false, NULL)) { + return 0; + } + } + + return fuse_err_cbk (frame, cookie, this, op_ret, op_errno, xdata); +} + +static int fuse_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata) @@ -2961,6 +2976,19 @@ fuse_flush_resume (fuse_state_t *state) { FUSE_FOP (state, fuse_err_cbk, GF_FOP_FLUSH, flush, state->fd, state->xdata); + FUSE_FOP (state, fuse_flush_cbk, GF_FOP_FLUSH, + flush, state->fd, state->xdata); +} + +static void +fuse_flush_interrupt_handler (xlator_t *this, fuse_interrupt_record_t *fir) +{ + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "FLUSH unique %" PRIu64 ": interrupt handler triggered", + fir->fuse_in_header.unique); + + fuse_interrupt_finish_interrupt (this, fir, INTERRUPT_HANDLED, + _gf_false, NULL); } static void @@ -2968,6 +2996,7 @@ fuse_flush (xlator_t *this, fuse_in_header_t *finh, void *msg, struct iobuf *iobuf) { struct fuse_flush_in *ffi = msg; + fuse_private_t *priv = NULL; fuse_state_t *state = NULL; fd_t *fd = NULL; @@ -2976,6 +3005,27 @@ fuse_flush (xlator_t *this, fuse_in_header_t *finh, void *msg, fd = FH_TO_FD (ffi->fh); state->fd = fd; + priv = this->private; + if (priv->flush_handle_interrupt) { + fuse_interrupt_record_t *fir = NULL; + + fir = fuse_interrupt_record_new (finh, + fuse_flush_interrupt_handler); + if (!fir) { + send_fuse_err (this, finh, ENOMEM); + + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "FLUSH unique %" PRIu64 + ":" + " interrupt record allocation failed", + finh->unique); + free_fuse_state (state); + + return; + } + fuse_interrupt_record_insert (this, fir); + } + fuse_resolve_fd_init (state, &state->resolve, fd); state->lk_owner = ffi->lock_owner; @@ -6226,6 +6276,9 @@ init (xlator_t *this_xl) GF_OPTION_INIT("event-history", priv->event_history, bool, cleanup_exit); + GF_OPTION_INIT ("flush-handle-interrupt", priv->flush_handle_interrupt, bool, + cleanup_exit); + /* user has set only background-qlen, not congestion-threshold, use the fuse kernel driver formula to set congestion. ie, 75% */ if (dict_get (this_xl->options, "background-qlen") && @@ -6552,5 +6605,11 @@ struct volume_options options[] = { .description = "makes glusterfs invalidate kernel inodes after " "reaching this limit (0 means 'unlimited')", }, + { .key = {"flush-handle-interrupt"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "false", + .description = "Handle iterrupts in FLUSH handler (for testing " + "purposes).", + }, { .key = {NULL} }, }; diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h index ba3e000..e18469d 100644 --- a/xlators/mount/fuse/src/fuse-bridge.h +++ b/xlators/mount/fuse/src/fuse-bridge.h @@ -157,6 +157,8 @@ struct fuse_private { /* Interrupt subscription */ struct list_head interrupt_list; pthread_mutex_t interrupt_mutex; + + gf_boolean_t flush_handle_interrupt; }; typedef struct fuse_private fuse_private_t; @@ -191,7 +193,7 @@ typedef struct fuse_interrupt_record fuse_interrupt_record_t; typedef void (*fuse_interrupt_handler_t) (xlator_t *this, fuse_interrupt_record_t *); struct fuse_interrupt_record { - struct fuse_in_header fuse_in_header; + fuse_in_header_t fuse_in_header; void *data; gf_boolean_t hit; fuse_interrupt_state_t interrupt_state; diff --git a/xlators/mount/fuse/utils/mount.glusterfs.in b/xlators/mount/fuse/utils/mount.glusterfs.in index 9a0404f..a3a9fbd 100755 --- a/xlators/mount/fuse/utils/mount.glusterfs.in +++ b/xlators/mount/fuse/utils/mount.glusterfs.in @@ -273,6 +273,10 @@ start_glusterfs () cmd_line=$(echo "$cmd_line --dump-fuse=$dump_fuse"); fi + if [ -n "$fuse_flush_handle_interrupt" ]; then + cmd_line=$(echo "$cmd_line --fuse-flush-handle-interrupt=$fuse_flush_handle_interrupt"); + fi + # if trasnport type is specified, we have to append it to # volume name, so that it fetches the right client vol file @@ -524,6 +528,9 @@ with_options() [ $value = "false" ] ; then no_root_squash=1; fi ;; + "fuse-flush-handle-interrupt") + fuse_flush_handle_interrupt=$value + ;; "context"|"fscontext"|"defcontext"|"rootcontext") # standard SElinux mount options to pass to the kernel [ -z "$fuse_mountopts" ] || fuse_mountopts="$fuse_mountopts," -- 1.8.3.1