Blob Blame History Raw
From fc4bebd605b6a579a4d19c6640aca38057397c77 Mon Sep 17 00:00:00 2001
From: Csaba Henk <csaba@redhat.com>
Date: Tue, 21 Aug 2018 12:44:54 +0200
Subject: [PATCH 527/529] fuse: diagnostic FLUSH interrupt

We add dummy interrupt handling for the FLUSH
fuse message. It can be enabled by the
"--fuse-flush-handle-interrupt" hidden command line
option, or "-ofuse-flush-handle-interrupt=yes"
mount option.

It serves no other than diagnostic & demonstational
purposes -- to exercise the interrupt handling framework
a bit and to give an usage example.

Documentation is also provided that showcases interrupt
handling via FLUSH.

Upstream: https://review.gluster.org/20876
> Change-Id: I522f1e798501d06b74ac3592a5f73c1ab0590c60
> updates: #465
> Signed-off-by: Csaba Henk <csaba@redhat.com>

Change-Id: I510aff8895a3fe5858ab313c47514de7087d08c1
BUG: 1595246
Signed-off-by: Csaba Henk <csaba@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/162550
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Amar Tumballi Suryanarayan <amarts@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
 doc/developer-guide/Developers-Index.md     |   5 ++
 doc/developer-guide/fuse-interrupt.md       | 130 ++++++++++++++++++++++++++++
 glusterfsd/src/glusterfsd.c                 |  53 +++++++++++-
 glusterfsd/src/glusterfsd.h                 |   1 +
 libglusterfs/src/glusterfs.h                |   2 +
 tests/features/interrupt.t                  |  67 ++++++++++++++
 tests/features/open_and_sleep.c             |  27 ++++++
 xlators/mount/fuse/src/fuse-bridge.c        |  59 +++++++++++++
 xlators/mount/fuse/src/fuse-bridge.h        |   4 +-
 xlators/mount/fuse/utils/mount.glusterfs.in |   7 ++
 10 files changed, 353 insertions(+), 2 deletions(-)
 create mode 100644 doc/developer-guide/fuse-interrupt.md
 create mode 100644 tests/features/interrupt.t
 create mode 100644 tests/features/open_and_sleep.c

diff --git a/doc/developer-guide/Developers-Index.md b/doc/developer-guide/Developers-Index.md
index 4c6346e..6c00a4a 100644
--- a/doc/developer-guide/Developers-Index.md
+++ b/doc/developer-guide/Developers-Index.md
@@ -59,6 +59,11 @@ Translators
 -   [Storage/posix Translator](./posix.md)
 -   [Compression translator](./network_compression.md)
 
+Fuse
+----
+
+-   [Interrupt handling](./fuse-interrupt.md)
+
 Testing/Debugging
 -----------------
 
diff --git a/doc/developer-guide/fuse-interrupt.md b/doc/developer-guide/fuse-interrupt.md
new file mode 100644
index 0000000..f92b553
--- /dev/null
+++ b/doc/developer-guide/fuse-interrupt.md
@@ -0,0 +1,130 @@
+# Fuse interrupt handling
+
+## Conventions followed
+
+- *FUSE* refers to the "wire protocol" between kernel and userspace and
+  related specifications.
+- *fuse* refers to the kernel subsystem and also to the GlusterFs translator.
+
+## FUSE interrupt handling spec
+
+The [Linux kernel FUSE documentation](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/filesystems/fuse.txt?h=v4.18#n148)
+desrcibes how interrupt handling happens in fuse.
+
+## Interrupt handling in the fuse translator
+
+### Declarations
+
+This document describes the internal API in the fuse translator with which
+interrupt can be handled.
+
+The API being internal (to be used only in fuse-bridge.c; the functions are
+not exported to a header file).
+
+```
+enum fuse_interrupt_state {
+    INTERRUPT_NONE,
+    INTERRUPT_SQUELCHED,
+    INTERRUPT_HANDLED,
+};
+typedef enum fuse_interrupt_state fuse_interrupt_state_t;
+struct fuse_interrupt_record;
+typedef struct fuse_interrupt_record fuse_interrupt_record_t;
+typedef void (*fuse_interrupt_handler_t)(xlator_t *this,
+                                         fuse_interrupt_record_t *);
+struct fuse_interrupt_record {
+    fuse_in_header_t fuse_in_header;
+    void *data;
+    /*
+       ...
+     */
+};
+
+fuse_interrupt_record_t *
+fuse_interrupt_record_new(fuse_in_header_t *finh,
+                          fuse_interrupt_handler_t handler);
+
+void
+fuse_interrupt_record_insert(xlator_t *this, fuse_interrupt_record_t *fir);
+
+gf_boolean_t
+fuse_interrupt_finish_fop(call_frame_t *frame, xlator_t *this,
+                          gf_boolean_t sync, void **datap);
+
+void
+fuse_interrupt_finish_interrupt(xlator_t *this, fuse_interrupt_record_t *fir,
+                                fuse_interrupt_state_t intstat,
+                                gf_boolean_t sync, void **datap);
+```
+
+The code demonstrates the usage of the API through `fuse_flush()`. (It's a
+dummy implementation only for demonstration purposes.) Flush is chosen
+because a `FLUSH` interrupt is easy to trigger (see
+*tests/features/interrupt.t*). Interrupt handling for flush is switched on
+by `--fuse-flush-handle-interrupt` (a hidden glusterfs command line flag).
+The flush interrupt handling code is guarded by the
+`flush_handle_interrupt` Boolean member of `fuse_private_t`.
+
+### Usage
+
+A given FUSE fop can be enabled to handle interrupts via the following
+steps:
+
+- Define a handler function (of type `fuse_interrupt_handler_t`).
+  It should implement the interrupt handling logic and in the end
+  call (directly or as async callback) `fuse_interrupt_finish_interrupt()`.
+  The `intstat` argument to `fuse_interrupt_finish_interrupt` should be
+  either `INTERRUPT_SQUELCHED` or `INTERRUPT_HANDLED`.
+    - `INTERRUPT_SQUELCHED` means that we choose not to handle the interrupt
+      and the fop is going on uninterrupted.
+    - `INTERRUPT_HANDLED` means that the interrupt was actually handled. In
+      this case the fop will be answered from interrupt context with errno
+      `EINTR` (that is, the fop should not send a response to the kernel).
+
+  We return to the `sync` and `datap` arguments later.
+- In the `fuse_<FOP>` function create an interrupt record using
+  `fuse_interrupt_record_new()`, passing the incoming `fuse_in_header` and
+  the above handler function to it.
+    - Arbitrary further data can be referred to via the `data` member of the
+      interrupt record that is to be passed on from fop context to
+      interrupt context.
+- When it's set up, pass the interrupt record to
+  `fuse_interrupt_record_insert()`.
+- In `fuse_<FOP>_cbk` call `fuse_interrupt_finish_fop()`.
+    - `fuse_interrupt_finish_fop()` returns a Boolean according to whether the
+      interrupt was handled. If it was, then the fuse request is already
+      answered and the stack gets destroyed in `fuse_interrupt_finish_fop` so
+      `fuse_<FOP>_cbk` can just return (zero). Otherwise follow the standard
+      cbk logic (answer the fuse request and destroy the stack -- these are
+      typically accomplished by `fuse_err_cbk()`).
+- The last two argument of `fuse_interrupt_finish_fop()` and
+  `fuse_interrupt_finish_interrupt()` are `gf_boolean_t sync` and
+  `void **datap`.
+    - `sync` represents the strategy for freeing the interrupt record. The
+      interrupt handler and the fop handler are in race to get at the interrupt
+      record first (interrupt handler for purposes of doing the interrupt
+      handling, fop handler for purposes of deactivating the interrupt record
+      upon completion of the fop handling).
+        - If `sync` is true, then the fop handler will wait for the interrupt
+          handler to finish and it takes care of freeing.
+        - If `sync` is false, the loser of the above race will perform freeing.
+
+      Freeing is done within the respective interrupt finish routines, except
+      for the `data` field of the interrupt record; with respect to that, see
+      the discussion of the `datap` parameter below. The strategy has to be
+      consensual, that is, `fuse_interrupt_finish_fop()` and
+      `fuse_interrupt_finish_interrupt()` must pass the same value for `sync`.
+      If dismantling the resources associated with the interrupt record is
+      simple, `sync = _gf_false` is the suggested choice; `sync = _gf_true` can
+      be useful in the opposite case, when dismantling those resources would
+      be inconvenient to implement in two places or to enact in non-fop context.
+    - If `datap` is `NULL`, the `data` member of the interrupt record will be
+      freed within the interrupt finish routine.  If it points to a valid
+      `void *` pointer, and if caller is doing the cleanup (see `sync` above),
+      then that pointer will be directed to the `data` member of the interrupt
+      record and it's up to the caller what it's doing with it.
+        - If `sync` is true, interrupt handler can use `datap = NULL`, and
+          fop handler will have `datap` set.
+        - If `sync` is false, and handlers pass a pointer to a pointer for
+          `datap`, they should check if the pointed pointer is NULL before
+          attempting to deal with the data.
diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c
index 2e2cd77..9c536cd 100644
--- a/glusterfsd/src/glusterfsd.c
+++ b/glusterfsd/src/glusterfsd.c
@@ -243,6 +243,9 @@ static struct argp_option gf_options[] = {
          OPTION_ARG_OPTIONAL, "disable/enable fuse event-history"},
         {"reader-thread-count", ARGP_READER_THREAD_COUNT_KEY, "INTEGER",
          OPTION_ARG_OPTIONAL, "set fuse reader thread count"},
+        {"fuse-flush-handle-interrupt", ARGP_FUSE_FLUSH_HANDLE_INTERRUPT_KEY,
+         "BOOL", OPTION_ARG_OPTIONAL | OPTION_HIDDEN,
+         "handle interrupt in fuse FLUSH handler"},
         {0, 0, 0, 0, "Miscellaneous Options:"},
         {0, }
 };
@@ -581,6 +584,38 @@ set_fuse_mount_options (glusterfs_ctx_t *ctx, dict_t *options)
                         goto err;
                 }
         }
+        switch (cmd_args->fuse_flush_handle_interrupt) {
+                case GF_OPTION_ENABLE:
+                        ret = dict_set_static_ptr (options,
+                                                   "flush-handle-interrupt",
+                                                   "on");
+                        if (ret < 0) {
+                                gf_msg ("glusterfsd", GF_LOG_ERROR, 0,
+                                        glusterfsd_msg_4,
+                                        "failed to set dict value for key "
+                                        "flush-handle-interrupt");
+                                goto err;
+                        }
+                        break;
+                case GF_OPTION_DISABLE:
+                        ret = dict_set_static_ptr (options,
+                                                   "flush-handle-interrupt",
+                                                   "off");
+                        if (ret < 0) {
+                                gf_msg ("glusterfsd", GF_LOG_ERROR, 0,
+                                        glusterfsd_msg_4,
+                                        "failed to set dict value for key "
+                                         "flush-handle-interrupt");
+                                goto err;
+                        }
+                        break;
+                case GF_OPTION_DEFERRED: /* default */
+                default:
+                        gf_msg_debug ("glusterfsd", 0,
+                                      "fuse-flush-handle-interrupt mode %d",
+                                      cmd_args->fuse_flush_handle_interrupt);
+                        break;
+        }
 
         ret = 0;
 err:
@@ -1352,7 +1387,22 @@ no_oom_api:
                 }
 
                 break;
-	}
+        case ARGP_FUSE_FLUSH_HANDLE_INTERRUPT_KEY:
+                if (!arg)
+                        arg = "yes";
+
+                if (gf_string2boolean(arg, &b) == 0) {
+                        cmd_args->fuse_flush_handle_interrupt = b;
+
+                        break;
+                }
+
+                argp_failure(state, -1, 0,
+                             "unknown fuse flush handle interrupt "
+                             "setting \"%s\"",
+                             arg);
+                break;
+        }
 
         return 0;
 }
@@ -1648,6 +1698,7 @@ glusterfs_ctx_defaults_init (glusterfs_ctx_t *ctx)
         cmd_args->fuse_attribute_timeout = -1;
         cmd_args->fuse_entry_timeout = -1;
 	cmd_args->fopen_keep_cache = GF_OPTION_DEFERRED;
+        cmd_args->fuse_flush_handle_interrupt = GF_OPTION_DEFERRED;
 
         if (ctx->mem_acct_enable)
                 cmd_args->mem_acct = 1;
diff --git a/glusterfsd/src/glusterfsd.h b/glusterfsd/src/glusterfsd.h
index 1550a30..28b514a 100644
--- a/glusterfsd/src/glusterfsd.h
+++ b/glusterfsd/src/glusterfsd.h
@@ -101,6 +101,7 @@ enum argp_option_keys {
         ARGP_FUSE_EVENT_HISTORY_KEY       = 179,
         ARGP_READER_THREAD_COUNT_KEY      = 180,
         ARGP_FUSE_LRU_LIMIT_KEY           = 190,
+        ARGP_FUSE_FLUSH_HANDLE_INTERRUPT_KEY = 191,
 };
 
 struct _gfd_vol_top_priv {
diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h
index 2690306..9fa066e 100644
--- a/libglusterfs/src/glusterfs.h
+++ b/libglusterfs/src/glusterfs.h
@@ -448,6 +448,8 @@ struct _cmd_args {
 
         char              *event_history;
         uint32_t           reader_thread_count;
+
+        int fuse_flush_handle_interrupt;
 };
 typedef struct _cmd_args cmd_args_t;
 
diff --git a/tests/features/interrupt.t b/tests/features/interrupt.t
new file mode 100644
index 0000000..476d875
--- /dev/null
+++ b/tests/features/interrupt.t
@@ -0,0 +1,67 @@
+#!/bin/bash
+
+##Copy this file to tests/bugs before running run.sh (cp extras/test/bug-920583.t tests/bugs/)
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+TESTS_EXPECTED_IN_LOOP=4
+
+cleanup;
+logdir=`gluster --print-logdir`
+
+TEST build_tester $(dirname $0)/open_and_sleep.c
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST $CLI volume create $V0 replica 2 stripe 2 $H0:$B0/${V0}{1,2,3,4,5,6,7,8};
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+function log-file-name()
+{
+    logfilename=$M0".log"
+    echo ${logfilename:1} | tr / -
+}
+
+log_file=$logdir"/"`log-file-name`
+
+function test_interrupt {
+        local handlebool="$1"
+        local logpattern="$2"
+
+        TEST $GFS --volfile-id=$V0 --volfile-server=$H0 --fuse-flush-handle-interrupt=$handlebool --log-level=DEBUG $M0
+
+        # If the test helper fails (which is considered a setup error, not failure of the test
+        # case itself), kill will be invoked without argument, and that will be the actual
+        # error which is caught.
+        TEST "./$(dirname $0)/open_and_sleep $M0/testfile | { sleep 0.1; xargs -n1 kill -INT; }"
+
+        TEST "grep -E '$logpattern' $log_file"
+        # Basic sanity check, making sure filesystem has not crashed.
+        TEST test -f $M0/testfile
+}
+
+# Theoretically FLUSH might finish before INTERRUPT is handled,
+# in which case we'd get the "no handler found" message (but it's unlikely).
+test_interrupt yes 'FLUSH.*interrupt handler triggered|INTERRUPT.*no handler found'
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+test_interrupt no 'INTERRUPT.*no handler found'
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup_tester $(dirname $0)/open_and_sleep;
+cleanup;
diff --git a/tests/features/open_and_sleep.c b/tests/features/open_and_sleep.c
new file mode 100644
index 0000000..da089e9
--- /dev/null
+++ b/tests/features/open_and_sleep.c
@@ -0,0 +1,27 @@
+#include <unistd.h>
+#include <stdio.h>
+#include <fcntl.h>
+
+int
+main (int argc, char **argv)
+{
+        pid_t pid;
+        int fd;
+
+        if (argc >= 2) {
+                fd = open (argv[1], O_RDWR | O_CREAT, 0644);
+                if (fd == -1) {
+                        fprintf (stderr, "cannot open/create %s\n", argv[1]);
+                        return 1;
+                }
+        }
+
+        pid = getpid ();
+        printf ("%d\n", pid);
+        fflush (stdout);
+
+        for (;;)
+                sleep (1);
+
+        return 0;
+}
diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
index 0d4b9db..44c39e4 100644
--- a/xlators/mount/fuse/src/fuse-bridge.c
+++ b/xlators/mount/fuse/src/fuse-bridge.c
@@ -1779,6 +1779,21 @@ fuse_err_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
 }
 
 static int
+fuse_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+                           int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+        fuse_private_t *priv = this->private;
+
+        if (priv->flush_handle_interrupt) {
+                if (fuse_interrupt_finish_fop (frame, this, _gf_false, NULL)) {
+                        return 0;
+                }
+        }
+
+        return fuse_err_cbk (frame, cookie, this, op_ret, op_errno, xdata);
+}
+
+static int
 fuse_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
                 int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
                 struct iatt *postbuf, dict_t *xdata)
@@ -2961,6 +2976,19 @@ fuse_flush_resume (fuse_state_t *state)
 {
         FUSE_FOP (state, fuse_err_cbk, GF_FOP_FLUSH,
                   flush, state->fd, state->xdata);
+        FUSE_FOP (state, fuse_flush_cbk, GF_FOP_FLUSH,
+                  flush, state->fd, state->xdata);
+}
+
+static void
+fuse_flush_interrupt_handler (xlator_t *this, fuse_interrupt_record_t *fir)
+{
+        gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+                "FLUSH unique %" PRIu64 ": interrupt handler triggered",
+                fir->fuse_in_header.unique);
+
+        fuse_interrupt_finish_interrupt (this, fir, INTERRUPT_HANDLED,
+                                         _gf_false, NULL);
 }
 
 static void
@@ -2968,6 +2996,7 @@ fuse_flush (xlator_t *this, fuse_in_header_t *finh, void *msg,
             struct iobuf *iobuf)
 {
         struct fuse_flush_in *ffi = msg;
+        fuse_private_t *priv = NULL;
 
         fuse_state_t *state = NULL;
         fd_t         *fd = NULL;
@@ -2976,6 +3005,27 @@ fuse_flush (xlator_t *this, fuse_in_header_t *finh, void *msg,
         fd = FH_TO_FD (ffi->fh);
         state->fd = fd;
 
+        priv = this->private;
+        if (priv->flush_handle_interrupt) {
+                fuse_interrupt_record_t *fir = NULL;
+
+                fir = fuse_interrupt_record_new (finh,
+                                                 fuse_flush_interrupt_handler);
+                if (!fir) {
+                        send_fuse_err (this, finh, ENOMEM);
+
+                        gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+                                "FLUSH unique %" PRIu64
+                                ":"
+                                " interrupt record allocation failed",
+                                finh->unique);
+                        free_fuse_state (state);
+
+                        return;
+                }
+                fuse_interrupt_record_insert (this, fir);
+        }
+
         fuse_resolve_fd_init (state, &state->resolve, fd);
 
         state->lk_owner = ffi->lock_owner;
@@ -6226,6 +6276,9 @@ init (xlator_t *this_xl)
         GF_OPTION_INIT("event-history", priv->event_history, bool,
                        cleanup_exit);
 
+        GF_OPTION_INIT ("flush-handle-interrupt", priv->flush_handle_interrupt, bool,
+                                   cleanup_exit);
+
         /* user has set only background-qlen, not congestion-threshold,
            use the fuse kernel driver formula to set congestion. ie, 75% */
         if (dict_get (this_xl->options, "background-qlen") &&
@@ -6552,5 +6605,11 @@ struct volume_options options[] = {
          .description = "makes glusterfs invalidate kernel inodes after "
          "reaching this limit (0 means 'unlimited')",
         },
+        { .key = {"flush-handle-interrupt"},
+          .type = GF_OPTION_TYPE_BOOL,
+          .default_value = "false",
+          .description = "Handle iterrupts in FLUSH handler (for testing "
+          "purposes).",
+        },
         { .key = {NULL} },
 };
diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h
index ba3e000..e18469d 100644
--- a/xlators/mount/fuse/src/fuse-bridge.h
+++ b/xlators/mount/fuse/src/fuse-bridge.h
@@ -157,6 +157,8 @@ struct fuse_private {
         /* Interrupt subscription */
         struct list_head interrupt_list;
         pthread_mutex_t interrupt_mutex;
+
+        gf_boolean_t flush_handle_interrupt;
 };
 typedef struct fuse_private fuse_private_t;
 
@@ -191,7 +193,7 @@ typedef struct fuse_interrupt_record fuse_interrupt_record_t;
 typedef void (*fuse_interrupt_handler_t) (xlator_t *this,
                                           fuse_interrupt_record_t *);
 struct fuse_interrupt_record {
-        struct fuse_in_header fuse_in_header;
+        fuse_in_header_t fuse_in_header;
         void *data;
         gf_boolean_t hit;
         fuse_interrupt_state_t interrupt_state;
diff --git a/xlators/mount/fuse/utils/mount.glusterfs.in b/xlators/mount/fuse/utils/mount.glusterfs.in
index 9a0404f..a3a9fbd 100755
--- a/xlators/mount/fuse/utils/mount.glusterfs.in
+++ b/xlators/mount/fuse/utils/mount.glusterfs.in
@@ -273,6 +273,10 @@ start_glusterfs ()
         cmd_line=$(echo "$cmd_line --dump-fuse=$dump_fuse");
     fi
 
+    if [ -n "$fuse_flush_handle_interrupt" ]; then
+        cmd_line=$(echo "$cmd_line --fuse-flush-handle-interrupt=$fuse_flush_handle_interrupt");
+    fi
+
     # if trasnport type is specified, we have to append it to
     # volume name, so that it fetches the right client vol file
 
@@ -524,6 +528,9 @@ with_options()
                 [ $value = "false" ] ; then
                 no_root_squash=1;
             fi ;;
+        "fuse-flush-handle-interrupt")
+            fuse_flush_handle_interrupt=$value
+            ;;
         "context"|"fscontext"|"defcontext"|"rootcontext")
             # standard SElinux mount options to pass to the kernel
             [ -z "$fuse_mountopts" ] || fuse_mountopts="$fuse_mountopts,"
-- 
1.8.3.1