cb8e9e
From 4b38c3ac3178769ec593cbe0906ffa48d67587cb Mon Sep 17 00:00:00 2001
cb8e9e
From: Venky Shankar <vshankar@redhat.com>
cb8e9e
Date: Tue, 9 Jun 2015 10:02:11 +0530
cb8e9e
Subject: [PATCH 160/190] features/bitrot: handle scrub states via state machine
cb8e9e
cb8e9e
    Backport of http://review.gluster.org/11149
cb8e9e
cb8e9e
A bunch of command line options for scrubber tempted the use of
cb8e9e
state machine to track current state of scrubber under various
cb8e9e
circumstances where the options could be in effect.
cb8e9e
cb8e9e
Change-Id: Id614bb2e6af30a90d2391ea31ae0a3edeb4e0d69
cb8e9e
BUG: 1232309
cb8e9e
Signed-off-by: Venky Shankar <vshankar@redhat.com>
cb8e9e
Reviewed-on: https://code.engineering.redhat.com/gerrit/51745
cb8e9e
Reviewed-by: Raghavendra Bhat <raghavendra@redhat.com>
cb8e9e
Tested-by: Raghavendra Bhat <raghavendra@redhat.com>
cb8e9e
---
cb8e9e
 xlators/features/bit-rot/src/bitd/Makefile.am     |    4 +-
cb8e9e
 xlators/features/bit-rot/src/bitd/bit-rot-scrub.c |  181 +++++++++++++++++----
cb8e9e
 xlators/features/bit-rot/src/bitd/bit-rot-scrub.h |    9 +-
cb8e9e
 xlators/features/bit-rot/src/bitd/bit-rot-ssm.c   |   91 +++++++++++
cb8e9e
 xlators/features/bit-rot/src/bitd/bit-rot-ssm.h   |   36 ++++
cb8e9e
 xlators/features/bit-rot/src/bitd/bit-rot.c       |   40 +++--
cb8e9e
 xlators/features/bit-rot/src/bitd/bit-rot.h       |   22 +++
cb8e9e
 7 files changed, 331 insertions(+), 52 deletions(-)
cb8e9e
 create mode 100644 xlators/features/bit-rot/src/bitd/bit-rot-ssm.c
cb8e9e
 create mode 100644 xlators/features/bit-rot/src/bitd/bit-rot-ssm.h
cb8e9e
cb8e9e
diff --git a/xlators/features/bit-rot/src/bitd/Makefile.am b/xlators/features/bit-rot/src/bitd/Makefile.am
cb8e9e
index fd42ebe..154cdfb 100644
cb8e9e
--- a/xlators/features/bit-rot/src/bitd/Makefile.am
cb8e9e
+++ b/xlators/features/bit-rot/src/bitd/Makefile.am
cb8e9e
@@ -9,11 +9,11 @@ AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
cb8e9e
                  -I$(CONTRIBDIR)/timer-wheel \
cb8e9e
                  -I$(top_srcdir)/xlators/features/bit-rot/src/stub
cb8e9e
 
cb8e9e
-bit_rot_la_SOURCES = bit-rot.c bit-rot-scrub.c bit-rot-tbf.c
cb8e9e
+bit_rot_la_SOURCES = bit-rot.c bit-rot-scrub.c bit-rot-tbf.c bit-rot-ssm.c
cb8e9e
 bit_rot_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
cb8e9e
                     $(top_builddir)/xlators/features/changelog/lib/src/libgfchangelog.la
cb8e9e
 
cb8e9e
-noinst_HEADERS = bit-rot.h bit-rot-scrub.h bit-rot-tbf.h bit-rot-bitd-messages.h
cb8e9e
+noinst_HEADERS = bit-rot.h bit-rot-scrub.h bit-rot-tbf.h bit-rot-bitd-messages.h bit-rot-ssm.h
cb8e9e
 
cb8e9e
 AM_CFLAGS = -Wall $(GF_CFLAGS)
cb8e9e
 
cb8e9e
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c
cb8e9e
index d6ee413..af31a3c 100644
cb8e9e
--- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c
cb8e9e
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c
cb8e9e
@@ -528,6 +528,33 @@ br_fsscanner_handle_entry (xlator_t *subvol,
cb8e9e
         return -1;
cb8e9e
 }
cb8e9e
 
cb8e9e
+int32_t
cb8e9e
+br_fsscan_deactivate (xlator_t *this, br_child_t *child)
cb8e9e
+{
cb8e9e
+        int ret = 0;
cb8e9e
+        br_private_t *priv = NULL;
cb8e9e
+        br_scrub_state_t nstate = 0;
cb8e9e
+        struct br_scanfs *fsscan = NULL;
cb8e9e
+
cb8e9e
+        priv = this->private;
cb8e9e
+        fsscan = &child->fsscan;
cb8e9e
+
cb8e9e
+        ret = gf_tw_del_timer (priv->timer_wheel, fsscan->timer);
cb8e9e
+        if (ret == 0) {
cb8e9e
+                nstate = BR_SCRUB_STATE_STALLED;
cb8e9e
+                gf_log (this->name, GF_LOG_INFO, "Brick [%s] is under active "
cb8e9e
+                        "scrubbing. Pausing scrub..", child->brick_path);
cb8e9e
+        } else {
cb8e9e
+                nstate = BR_SCRUB_STATE_PAUSED;
cb8e9e
+                gf_log (this->name, GF_LOG_INFO,
cb8e9e
+                        "Scrubber paused [Brick: %s]", child->brick_path);
cb8e9e
+        }
cb8e9e
+
cb8e9e
+        _br_child_set_scrub_state (child, nstate);
cb8e9e
+
cb8e9e
+        return 0;
cb8e9e
+}
cb8e9e
+
cb8e9e
 static inline void
cb8e9e
 br_fsscanner_log_time (xlator_t *this, br_child_t *child, const char *sfx)
cb8e9e
 {
cb8e9e
@@ -563,22 +590,51 @@ br_fsscanner_wait_until_kicked (struct br_scanfs *fsscan)
cb8e9e
         pthread_cleanup_pop (0);
cb8e9e
 }
cb8e9e
 
cb8e9e
+static inline void
cb8e9e
+br_fsscanner_entry_control (xlator_t *this, br_child_t *child)
cb8e9e
+{
cb8e9e
+        struct br_scanfs *fsscan = &child->fsscan;
cb8e9e
+
cb8e9e
+        LOCK (&child->lock);
cb8e9e
+        {
cb8e9e
+                if (fsscan->state == BR_SCRUB_STATE_PENDING)
cb8e9e
+                        fsscan->state = BR_SCRUB_STATE_ACTIVE;
cb8e9e
+                br_fsscanner_log_time (this, child, "started");
cb8e9e
+        }
cb8e9e
+        UNLOCK (&child->lock);
cb8e9e
+}
cb8e9e
+
cb8e9e
+static inline void
cb8e9e
+br_fsscanner_exit_control (xlator_t *this, br_child_t *child)
cb8e9e
+{
cb8e9e
+        struct br_scanfs *fsscan = &child->fsscan;
cb8e9e
+
cb8e9e
+        LOCK (&child->lock);
cb8e9e
+        {
cb8e9e
+                fsscan->over = _gf_true;
cb8e9e
+                br_fsscanner_log_time (this, child, "finished");
cb8e9e
+
cb8e9e
+                if (fsscan->state == BR_SCRUB_STATE_ACTIVE) {
cb8e9e
+                        (void) br_fsscan_activate (this, child);
cb8e9e
+                } else {
cb8e9e
+                        gf_log (this->name, GF_LOG_INFO, "Brick [%s] waiting "
cb8e9e
+                                "to get rescheduled..", child->brick_path);
cb8e9e
+                }
cb8e9e
+        }
cb8e9e
+        UNLOCK (&child->lock);
cb8e9e
+}
cb8e9e
+
cb8e9e
 void *
cb8e9e
 br_fsscanner (void *arg)
cb8e9e
 {
cb8e9e
         loc_t               loc     = {0,};
cb8e9e
         br_child_t         *child   = NULL;
cb8e9e
         xlator_t           *this    = NULL;
cb8e9e
-        br_private_t       *priv    = NULL;
cb8e9e
         struct br_scanfs   *fsscan  = NULL;
cb8e9e
-        struct br_scrubber *fsscrub = NULL;
cb8e9e
 
cb8e9e
         child = arg;
cb8e9e
         this = child->this;
cb8e9e
-        priv = this->private;
cb8e9e
-
cb8e9e
         fsscan = &child->fsscan;
cb8e9e
-        fsscrub = &priv->fsscrub;
cb8e9e
 
cb8e9e
         THIS = this;
cb8e9e
         loc.inode = child->table->root;
cb8e9e
@@ -586,8 +642,8 @@ br_fsscanner (void *arg)
cb8e9e
         while (1) {
cb8e9e
                 br_fsscanner_wait_until_kicked (fsscan);
cb8e9e
                 {
cb8e9e
-                        /* log start time */
cb8e9e
-                        br_fsscanner_log_time (this, child, "started");
cb8e9e
+                        /* precursor for scrub */
cb8e9e
+                        br_fsscanner_entry_control (this, child);
cb8e9e
 
cb8e9e
                         /* scrub */
cb8e9e
                         (void) syncop_ftw (child->xl,
cb8e9e
@@ -596,15 +652,21 @@ br_fsscanner (void *arg)
cb8e9e
                         if (!list_empty (&fsscan->queued))
cb8e9e
                                 wait_for_scrubbing (this, fsscan);
cb8e9e
 
cb8e9e
-                        /* log finish time */
cb8e9e
-                        br_fsscanner_log_time (this, child, "finished");
cb8e9e
+                        /* scrub exit criteria */
cb8e9e
+                        br_fsscanner_exit_control (this, child);
cb8e9e
                 }
cb8e9e
-                br_fsscan_reschedule (this, child, fsscan, fsscrub, _gf_false);
cb8e9e
         }
cb8e9e
 
cb8e9e
         return NULL;
cb8e9e
 }
cb8e9e
 
cb8e9e
+/**
cb8e9e
+ * Keep this routine extremely simple and do not ever try to acquire
cb8e9e
+ * child->lock here: it may lead to deadlock. Scrubber state is
cb8e9e
+ * modified in br_fsscanner(). An intermediate state change to pause
cb8e9e
+ * changes the scrub state to the _correct_ state by identifying a
cb8e9e
+ * non-pending timer.
cb8e9e
+ */
cb8e9e
 void
cb8e9e
 br_kickstart_scanner (struct gf_tw_timer_list *timer,
cb8e9e
                       void *data, unsigned long calltime)
cb8e9e
@@ -666,28 +728,38 @@ br_fsscan_calculate_timeout (uint32_t boot, uint32_t now, scrub_freq_t freq)
cb8e9e
                 break;
cb8e9e
         case BR_FSSCRUB_FREQ_MONTHLY:
cb8e9e
                 timo = br_fsscan_calculate_delta (boot, now, BR_SCRUB_MONTHLY);
cb8e9e
+                break;
cb8e9e
+        default:
cb8e9e
+                timo = 0;
cb8e9e
         }
cb8e9e
 
cb8e9e
         return timo;
cb8e9e
 }
cb8e9e
 
cb8e9e
 int32_t
cb8e9e
-br_fsscan_schedule (xlator_t *this, br_child_t *child,
cb8e9e
-                    struct br_scanfs *fsscan, struct br_scrubber *fsscrub)
cb8e9e
+br_fsscan_schedule (xlator_t *this, br_child_t *child)
cb8e9e
 {
cb8e9e
         uint32_t timo = 0;
cb8e9e
         br_private_t *priv = NULL;
cb8e9e
         struct timeval tv = {0,};
cb8e9e
         char timestr[1024] = {0,};
cb8e9e
+        struct br_scanfs *fsscan = NULL;
cb8e9e
+        struct br_scrubber *fsscrub = NULL;
cb8e9e
         struct gf_tw_timer_list *timer = NULL;
cb8e9e
 
cb8e9e
         priv = this->private;
cb8e9e
+        fsscan = &child->fsscan;
cb8e9e
+        fsscrub = &priv->fsscrub;
cb8e9e
 
cb8e9e
         (void) gettimeofday (&tv, NULL);
cb8e9e
         fsscan->boot = tv.tv_sec;
cb8e9e
 
cb8e9e
         timo = br_fsscan_calculate_timeout (fsscan->boot,
cb8e9e
                                             fsscan->boot, fsscrub->frequency);
cb8e9e
+        if (timo == 0) {
cb8e9e
+                gf_log (this->name, GF_LOG_ERROR, "BUG: Zero schedule timeout");
cb8e9e
+                goto error_return;
cb8e9e
+        }
cb8e9e
 
cb8e9e
         fsscan->timer = GF_CALLOC (1, sizeof (*fsscan->timer),
cb8e9e
                                    gf_br_stub_mt_br_scanner_freq_t);
cb8e9e
@@ -700,7 +772,9 @@ br_fsscan_schedule (xlator_t *this, br_child_t *child,
cb8e9e
         timer->data = child;
cb8e9e
         timer->expires = timo;
cb8e9e
         timer->function = br_kickstart_scanner;
cb8e9e
+
cb8e9e
         gf_tw_add_timer (priv->timer_wheel, timer);
cb8e9e
+        _br_child_set_scrub_state (child, BR_SCRUB_STATE_PENDING);
cb8e9e
 
cb8e9e
         gf_time_fmt (timestr, sizeof (timestr),
cb8e9e
                      (fsscan->boot + timo), gf_timefmt_FT);
cb8e9e
@@ -714,39 +788,76 @@ br_fsscan_schedule (xlator_t *this, br_child_t *child,
cb8e9e
 }
cb8e9e
 
cb8e9e
 int32_t
cb8e9e
-br_fsscan_reschedule (xlator_t *this,
cb8e9e
-                      br_child_t *child, struct br_scanfs *fsscan,
cb8e9e
-                      struct br_scrubber *fsscrub, gf_boolean_t pendingcheck)
cb8e9e
+br_fsscan_activate (xlator_t *this, br_child_t *child)
cb8e9e
 {
cb8e9e
-        int32_t ret = 0;
cb8e9e
-        uint32_t timo = 0;
cb8e9e
-        char timestr[1024] = {0,};
cb8e9e
-        struct timeval now = {0,};
cb8e9e
-        br_private_t *priv = NULL;
cb8e9e
+        uint32_t            timo    = 0;
cb8e9e
+        char timestr[1024]          = {0,};
cb8e9e
+        struct timeval      now     = {0,};
cb8e9e
+        br_private_t       *priv    = NULL;
cb8e9e
+        struct br_scanfs   *fsscan  = NULL;
cb8e9e
+        struct br_scrubber *fsscrub = NULL;
cb8e9e
 
cb8e9e
         priv = this->private;
cb8e9e
+        fsscan = &child->fsscan;
cb8e9e
+        fsscrub = &priv->fsscrub;
cb8e9e
 
cb8e9e
         (void) gettimeofday (&now, NULL);
cb8e9e
         timo = br_fsscan_calculate_timeout (fsscan->boot,
cb8e9e
                                             now.tv_sec, fsscrub->frequency);
cb8e9e
+        if (timo == 0) {
cb8e9e
+                gf_log (this->name, GF_LOG_ERROR, "BUG: Zero schedule timeout");
cb8e9e
+                return -1;
cb8e9e
+        }
cb8e9e
 
cb8e9e
+        fsscan->over = _gf_false;
cb8e9e
         gf_time_fmt (timestr, sizeof (timestr),
cb8e9e
                      (now.tv_sec + timo), gf_timefmt_FT);
cb8e9e
+        (void) gf_tw_mod_timer (priv->timer_wheel, fsscan->timer, timo);
cb8e9e
 
cb8e9e
-        if (pendingcheck)
cb8e9e
-                ret = gf_tw_mod_timer_pending (priv->timer_wheel,
cb8e9e
-                                               fsscan->timer, timo);
cb8e9e
-        else
cb8e9e
-                ret = gf_tw_mod_timer (priv->timer_wheel, fsscan->timer, timo);
cb8e9e
+        _br_child_set_scrub_state (child, BR_SCRUB_STATE_PENDING);
cb8e9e
+        gf_log (this->name, GF_LOG_INFO, "Scrubbing for %s rescheduled to run "
cb8e9e
+                "at %s", child->brick_path, timestr);
cb8e9e
+
cb8e9e
+        return 0;
cb8e9e
+}
cb8e9e
+
cb8e9e
+int32_t
cb8e9e
+br_fsscan_reschedule (xlator_t *this, br_child_t *child)
cb8e9e
+{
cb8e9e
+        int32_t             ret     = 0;
cb8e9e
+        uint32_t            timo    = 0;
cb8e9e
+        char timestr[1024]          = {0,};
cb8e9e
+        struct timeval      now     = {0,};
cb8e9e
+        br_private_t       *priv    = NULL;
cb8e9e
+        struct br_scanfs   *fsscan  = NULL;
cb8e9e
+        struct br_scrubber *fsscrub = NULL;
cb8e9e
+
cb8e9e
+        priv = this->private;
cb8e9e
+        fsscan = &child->fsscan;
cb8e9e
+        fsscrub = &priv->fsscrub;
cb8e9e
+
cb8e9e
+        (void) gettimeofday (&now, NULL);
cb8e9e
+        timo = br_fsscan_calculate_timeout (fsscan->boot,
cb8e9e
+                                            now.tv_sec, fsscrub->frequency);
cb8e9e
+        if (timo == 0) {
cb8e9e
+                gf_log (this->name, GF_LOG_ERROR, "BUG: Zero schedule timeout");
cb8e9e
+                return -1;
cb8e9e
+        }
cb8e9e
+
cb8e9e
+        gf_time_fmt (timestr, sizeof (timestr),
cb8e9e
+                     (now.tv_sec + timo), gf_timefmt_FT);
cb8e9e
 
cb8e9e
-        if (!ret && pendingcheck)
cb8e9e
-                gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_RUNNING,
cb8e9e
+        fsscan->over = _gf_false;
cb8e9e
+        ret = gf_tw_mod_timer_pending (priv->timer_wheel, fsscan->timer, timo);
cb8e9e
+        if (ret == 0)
cb8e9e
+                gf_log (this->name, GF_LOG_INFO,
cb8e9e
                         "Scrubber for %s is currently running and would be "
cb8e9e
                         "rescheduled after completion", child->brick_path);
cb8e9e
-        else
cb8e9e
-                gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_RESCHEDULED,
cb8e9e
-                        "Scrubbing for %s rescheduled "
cb8e9e
+        else {
cb8e9e
+                _br_child_set_scrub_state (child, BR_SCRUB_STATE_PENDING);
cb8e9e
+                gf_log (this->name, GF_LOG_INFO, "Scrubbing for %s rescheduled "
cb8e9e
                         "to run at %s", child->brick_path, timestr);
cb8e9e
+        }
cb8e9e
 
cb8e9e
         return 0;
cb8e9e
 }
cb8e9e
@@ -1131,7 +1242,8 @@ br_scrubber_handle_stall (xlator_t *this, br_private_t *priv,
cb8e9e
 }
cb8e9e
 
cb8e9e
 static int32_t
cb8e9e
-br_scrubber_handle_freq (xlator_t *this, br_private_t *priv, dict_t *options)
cb8e9e
+br_scrubber_handle_freq (xlator_t *this, br_private_t *priv,
cb8e9e
+                         dict_t *options, gf_boolean_t scrubstall)
cb8e9e
 {
cb8e9e
         int32_t ret  = -1;
cb8e9e
         char *tmp = NULL;
cb8e9e
@@ -1144,6 +1256,9 @@ br_scrubber_handle_freq (xlator_t *this, br_private_t *priv, dict_t *options)
cb8e9e
         if (ret)
cb8e9e
                 goto error_return;
cb8e9e
 
cb8e9e
+        if (scrubstall)
cb8e9e
+                tmp = BR_SCRUB_STALLED;
cb8e9e
+
cb8e9e
         if (strcasecmp (tmp, "hourly") == 0) {
cb8e9e
                 frequency = BR_FSSCRUB_FREQ_HOURLY;
cb8e9e
         } else if (strcasecmp (tmp, "daily") == 0) {
cb8e9e
@@ -1154,6 +1269,8 @@ br_scrubber_handle_freq (xlator_t *this, br_private_t *priv, dict_t *options)
cb8e9e
                 frequency = BR_FSSCRUB_FREQ_BIWEEKLY;
cb8e9e
         } else if (strcasecmp (tmp, "monthly") == 0) {
cb8e9e
                 frequency = BR_FSSCRUB_FREQ_MONTHLY;
cb8e9e
+        } else if (strcasecmp (tmp, BR_SCRUB_STALLED) == 0) {
cb8e9e
+                frequency = BR_FSSCRUB_FREQ_STALLED;
cb8e9e
         } else
cb8e9e
                 goto error_return;
cb8e9e
 
cb8e9e
@@ -1205,7 +1322,7 @@ br_scrubber_handle_options (xlator_t *this, br_private_t *priv, dict_t *options)
cb8e9e
         if (ret)
cb8e9e
                 goto error_return;
cb8e9e
 
cb8e9e
-        ret = br_scrubber_handle_freq (this, priv, options);
cb8e9e
+        ret = br_scrubber_handle_freq (this, priv, options, scrubstall);
cb8e9e
         if (ret)
cb8e9e
                 goto error_return;
cb8e9e
 
cb8e9e
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h
cb8e9e
index 6c4254a..427153c 100644
cb8e9e
--- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h
cb8e9e
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h
cb8e9e
@@ -16,11 +16,10 @@
cb8e9e
 
cb8e9e
 void *br_fsscanner (void *);
cb8e9e
 
cb8e9e
-int32_t br_fsscan_schedule (xlator_t *, br_child_t *,
cb8e9e
-                            struct br_scanfs *, struct br_scrubber *);
cb8e9e
-int32_t br_fsscan_reschedule (xlator_t *this,
cb8e9e
-                              br_child_t *child, struct br_scanfs *,
cb8e9e
-                              struct br_scrubber *, gf_boolean_t);
cb8e9e
+int32_t br_fsscan_schedule (xlator_t *, br_child_t *);
cb8e9e
+int32_t br_fsscan_reschedule (xlator_t *, br_child_t *);
cb8e9e
+int32_t br_fsscan_activate (xlator_t *, br_child_t *);
cb8e9e
+int32_t br_fsscan_deactivate (xlator_t *, br_child_t *);
cb8e9e
 
cb8e9e
 int32_t br_scrubber_handle_options (xlator_t *, br_private_t *, dict_t *);
cb8e9e
 
cb8e9e
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-ssm.c b/xlators/features/bit-rot/src/bitd/bit-rot-ssm.c
cb8e9e
new file mode 100644
cb8e9e
index 0000000..c95e555
cb8e9e
--- /dev/null
cb8e9e
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-ssm.c
cb8e9e
@@ -0,0 +1,91 @@
cb8e9e
+/*
cb8e9e
+   Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
cb8e9e
+   This file is part of GlusterFS.
cb8e9e
+
cb8e9e
+   This file is licensed to you under your choice of the GNU Lesser
cb8e9e
+   General Public License, version 3 or any later version (LGPLv3 or
cb8e9e
+   later), or the GNU General Public License, version 2 (GPLv2), in all
cb8e9e
+   cases as published by the Free Software Foundation.
cb8e9e
+*/
cb8e9e
+
cb8e9e
+#include "bit-rot-ssm.h"
cb8e9e
+#include "bit-rot-scrub.h"
cb8e9e
+
cb8e9e
+int br_scrub_ssm_noop (xlator_t *this, br_child_t *child)
cb8e9e
+{
cb8e9e
+        return 0;
cb8e9e
+}
cb8e9e
+
cb8e9e
+int
cb8e9e
+br_scrub_ssm_state_pause (xlator_t *this, br_child_t *child)
cb8e9e
+{
cb8e9e
+        gf_log (this->name, GF_LOG_INFO,
cb8e9e
+                "Scrubber paused [Brick: %s]", child->brick_path);
cb8e9e
+        _br_child_set_scrub_state (child, BR_SCRUB_STATE_PAUSED);
cb8e9e
+        return 0;
cb8e9e
+}
cb8e9e
+
cb8e9e
+int
cb8e9e
+br_scrub_ssm_state_ipause (xlator_t *this, br_child_t *child)
cb8e9e
+{
cb8e9e
+        gf_log (this->name, GF_LOG_INFO,
cb8e9e
+                "Scrubber paused [Brick: %s]", child->brick_path);
cb8e9e
+        _br_child_set_scrub_state (child, BR_SCRUB_STATE_IPAUSED);
cb8e9e
+        return 0;
cb8e9e
+}
cb8e9e
+
cb8e9e
+int
cb8e9e
+br_scrub_ssm_state_active (xlator_t *this, br_child_t *child)
cb8e9e
+{
cb8e9e
+        struct br_scanfs *fsscan = &child->fsscan;
cb8e9e
+
cb8e9e
+        if (fsscan->over) {
cb8e9e
+                (void) br_fsscan_activate (this, child);
cb8e9e
+        } else {
cb8e9e
+                gf_log (this->name, GF_LOG_INFO,
cb8e9e
+                        "Scrubbing resumed [Brick %s]", child->brick_path);
cb8e9e
+                _br_child_set_scrub_state (child, BR_SCRUB_STATE_ACTIVE);
cb8e9e
+        }
cb8e9e
+
cb8e9e
+        return 0;
cb8e9e
+}
cb8e9e
+
cb8e9e
+int
cb8e9e
+br_scrub_ssm_state_stall (xlator_t *this, br_child_t *child)
cb8e9e
+{
cb8e9e
+        gf_log (this->name, GF_LOG_INFO, "Brick [%s] is under active "
cb8e9e
+                "scrubbing. Pausing scrub..", child->brick_path);
cb8e9e
+        _br_child_set_scrub_state (child, BR_SCRUB_STATE_STALLED);
cb8e9e
+        return 0;
cb8e9e
+}
cb8e9e
+
cb8e9e
+static br_scrub_ssm_call *
cb8e9e
+br_scrub_ssm[BR_SCRUB_MAXSTATES][BR_SCRUB_MAXEVENTS] = {
cb8e9e
+        {br_fsscan_schedule, br_scrub_ssm_state_ipause},    /* INACTIVE */
cb8e9e
+        {br_fsscan_reschedule, br_fsscan_deactivate},       /* PENDING  */
cb8e9e
+        {br_scrub_ssm_noop, br_scrub_ssm_state_stall},      /* ACTIVE   */
cb8e9e
+        {br_fsscan_activate, br_scrub_ssm_noop},            /* PAUSED   */
cb8e9e
+        {br_fsscan_schedule, br_scrub_ssm_noop},            /* IPAUSED  */
cb8e9e
+        {br_scrub_ssm_state_active, br_scrub_ssm_noop},     /* STALLED  */
cb8e9e
+};
cb8e9e
+
cb8e9e
+int32_t
cb8e9e
+br_scrub_state_machine (xlator_t *this, br_child_t *child)
cb8e9e
+{
cb8e9e
+        br_private_t       *priv      = NULL;
cb8e9e
+        br_scrub_ssm_call  *call      = NULL;
cb8e9e
+        struct br_scanfs   *fsscan    = NULL;
cb8e9e
+        struct br_scrubber *fsscrub   = NULL;
cb8e9e
+        br_scrub_state_t    currstate = 0;
cb8e9e
+        br_scrub_event_t    event     = 0;
cb8e9e
+
cb8e9e
+        priv = this->private;
cb8e9e
+        fsscan = &child->fsscan;
cb8e9e
+        fsscrub = &priv->fsscrub;
cb8e9e
+
cb8e9e
+        currstate = fsscan->state;
cb8e9e
+        event = _br_child_get_scrub_event (fsscrub);
cb8e9e
+
cb8e9e
+        call = br_scrub_ssm[currstate][event];
cb8e9e
+        return call (this, child);
cb8e9e
+}
cb8e9e
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-ssm.h b/xlators/features/bit-rot/src/bitd/bit-rot-ssm.h
cb8e9e
new file mode 100644
cb8e9e
index 0000000..72fd62b
cb8e9e
--- /dev/null
cb8e9e
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-ssm.h
cb8e9e
@@ -0,0 +1,36 @@
cb8e9e
+/*
cb8e9e
+   Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
cb8e9e
+   This file is part of GlusterFS.
cb8e9e
+
cb8e9e
+   This file is licensed to you under your choice of the GNU Lesser
cb8e9e
+   General Public License, version 3 or any later version (LGPLv3 or
cb8e9e
+   later), or the GNU General Public License, version 2 (GPLv2), in all
cb8e9e
+   cases as published by the Free Software Foundation.
cb8e9e
+*/
cb8e9e
+
cb8e9e
+#ifndef __BIT_ROT_SSM_H__
cb8e9e
+#define __BIT_ROT_SSM_H__
cb8e9e
+
cb8e9e
+#include "xlator.h"
cb8e9e
+
cb8e9e
+typedef enum br_scrub_state {
cb8e9e
+        BR_SCRUB_STATE_INACTIVE = 0,
cb8e9e
+        BR_SCRUB_STATE_PENDING,
cb8e9e
+        BR_SCRUB_STATE_ACTIVE,
cb8e9e
+        BR_SCRUB_STATE_PAUSED,
cb8e9e
+        BR_SCRUB_STATE_IPAUSED,
cb8e9e
+        BR_SCRUB_STATE_STALLED,
cb8e9e
+        BR_SCRUB_MAXSTATES,
cb8e9e
+} br_scrub_state_t;
cb8e9e
+
cb8e9e
+typedef enum br_scrub_event {
cb8e9e
+        BR_SCRUB_EVENT_SCHEDULE = 0,
cb8e9e
+        BR_SCRUB_EVENT_PAUSE,
cb8e9e
+        BR_SCRUB_MAXEVENTS,
cb8e9e
+} br_scrub_event_t;
cb8e9e
+
cb8e9e
+struct br_child;
cb8e9e
+
cb8e9e
+int32_t br_scrub_state_machine (xlator_t *, struct br_child *);
cb8e9e
+
cb8e9e
+#endif /* __BIT_ROT_SSM_H__ */
cb8e9e
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c
cb8e9e
index f2cd1d8..3952f41 100644
cb8e9e
--- a/xlators/features/bit-rot/src/bitd/bit-rot.c
cb8e9e
+++ b/xlators/features/bit-rot/src/bitd/bit-rot.c
cb8e9e
@@ -1092,6 +1092,16 @@ br_set_child_state (br_child_t *child, br_child_state_t state)
cb8e9e
         UNLOCK (&child->lock);
cb8e9e
 }
cb8e9e
 
cb8e9e
+static void
cb8e9e
+br_set_scrub_state (br_child_t *child, br_scrub_state_t state)
cb8e9e
+{
cb8e9e
+        LOCK (&child->lock);
cb8e9e
+        {
cb8e9e
+                _br_child_set_scrub_state (child, state);
cb8e9e
+        }
cb8e9e
+        UNLOCK (&child->lock);
cb8e9e
+}
cb8e9e
+
cb8e9e
 /**
cb8e9e
  * At this point a thread is spawned to crawl the filesystem (in
cb8e9e
  * tortoise pace) to sign objects that were not signed in previous run(s).
cb8e9e
@@ -1155,6 +1165,7 @@ br_launch_scrubber (xlator_t *this, br_child_t *child,
cb8e9e
         priv = this->private;
cb8e9e
 
cb8e9e
         fsscan->kick = _gf_false;
cb8e9e
+        fsscan->over = _gf_false;
cb8e9e
         ret = gf_thread_create (&child->thread, NULL, br_fsscanner, child);
cb8e9e
         if (ret != 0) {
cb8e9e
                 gf_msg (this->name, GF_LOG_ALERT, 0, BRB_MSG_SPAWN_FAILED,
cb8e9e
@@ -1166,7 +1177,7 @@ br_launch_scrubber (xlator_t *this, br_child_t *child,
cb8e9e
         /* this needs to be serialized with reconfigure() */
cb8e9e
         pthread_mutex_lock (&priv->lock);
cb8e9e
         {
cb8e9e
-                ret = br_fsscan_schedule (this, child, fsscan, fsscrub);
cb8e9e
+                ret = br_scrub_state_machine (this, child);
cb8e9e
         }
cb8e9e
         pthread_mutex_unlock (&priv->lock);
cb8e9e
         if (ret)
cb8e9e
@@ -1391,6 +1402,11 @@ br_cleanup_scrubber (xlator_t *this, br_child_t *child)
cb8e9e
                 fsscan->timer = NULL;
cb8e9e
         }
cb8e9e
 
cb8e9e
+        /**
cb8e9e
+         * 0x3: reset scrubber state
cb8e9e
+         */
cb8e9e
+        _br_child_set_scrub_state (child, BR_SCRUB_STATE_INACTIVE);
cb8e9e
+
cb8e9e
         gf_log (this->name, GF_LOG_INFO,
cb8e9e
                 "Cleaned up scrubber for brick [%s]", child->brick_path);
cb8e9e
 
cb8e9e
@@ -1779,6 +1795,8 @@ br_init_children (xlator_t *this, br_private_t *priv)
cb8e9e
 
cb8e9e
                 LOCK_INIT (&child->lock);
cb8e9e
                 child->witnessed = 0;
cb8e9e
+
cb8e9e
+                br_set_scrub_state (child, BR_SCRUB_STATE_INACTIVE);
cb8e9e
                 br_set_child_state (child, BR_CHILD_STATE_DISCONNECTED);
cb8e9e
 
cb8e9e
                 child->this = this;
cb8e9e
@@ -1906,13 +1924,11 @@ fini (xlator_t *this)
cb8e9e
 }
cb8e9e
 
cb8e9e
 static void
cb8e9e
-br_reconfigure_child (xlator_t *this,
cb8e9e
-                      br_child_t *child, struct br_scrubber *fsscrub)
cb8e9e
+br_reconfigure_child (xlator_t *this, br_child_t *child)
cb8e9e
 {
cb8e9e
         int32_t ret = 0;
cb8e9e
-        struct br_scanfs *fsscan = &child->fsscan;
cb8e9e
 
cb8e9e
-        ret = br_fsscan_reschedule (this, child, fsscan, fsscrub, _gf_true);
cb8e9e
+        ret = br_scrub_state_machine (this, child);
cb8e9e
         if (ret) {
cb8e9e
                 gf_log (this->name, GF_LOG_ERROR,
cb8e9e
                         "Could not reschedule scrubber for brick: %s. "
cb8e9e
@@ -1924,14 +1940,12 @@ br_reconfigure_child (xlator_t *this,
cb8e9e
 static int
cb8e9e
 br_reconfigure_scrubber (xlator_t *this, dict_t *options)
cb8e9e
 {
cb8e9e
-        int                 i       = 0;
cb8e9e
-        int32_t             ret     = -1;
cb8e9e
-        br_child_t         *child   = NULL;
cb8e9e
-        br_private_t       *priv    = NULL;
cb8e9e
-        struct br_scrubber *fsscrub = NULL;
cb8e9e
+        int           i     = 0;
cb8e9e
+        int32_t       ret   = -1;
cb8e9e
+        br_child_t   *child = NULL;
cb8e9e
+        br_private_t *priv  = NULL;
cb8e9e
 
cb8e9e
         priv = this->private;
cb8e9e
-        fsscrub = &priv->fsscrub;
cb8e9e
 
cb8e9e
         pthread_mutex_lock (&priv->lock);
cb8e9e
         {
cb8e9e
@@ -1942,7 +1956,7 @@ br_reconfigure_scrubber (xlator_t *this, dict_t *options)
cb8e9e
         if (ret)
cb8e9e
                 goto err;
cb8e9e
 
cb8e9e
-        /* reschedule all _up_ subvolume(s) */
cb8e9e
+        /* change state for all _up_ subvolume(s) */
cb8e9e
         for (; i < priv->child_count; i++) {
cb8e9e
                 child = &priv->children[i];
cb8e9e
 
cb8e9e
@@ -1957,7 +1971,7 @@ br_reconfigure_scrubber (xlator_t *this, dict_t *options)
cb8e9e
                         }
cb8e9e
 
cb8e9e
                         if (_br_is_child_connected (child))
cb8e9e
-                                br_reconfigure_child (this, child, fsscrub);
cb8e9e
+                                br_reconfigure_child (this, child);
cb8e9e
 
cb8e9e
                         /**
cb8e9e
                          * for the rest.. either the child is in initialization
cb8e9e
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.h b/xlators/features/bit-rot/src/bitd/bit-rot.h
cb8e9e
index 9a55773..6cafd8b 100644
cb8e9e
--- a/xlators/features/bit-rot/src/bitd/bit-rot.h
cb8e9e
+++ b/xlators/features/bit-rot/src/bitd/bit-rot.h
cb8e9e
@@ -7,6 +7,7 @@
cb8e9e
    later), or the GNU General Public License, version 2 (GPLv2), in all
cb8e9e
    cases as published by the Free Software Foundation.
cb8e9e
 */
cb8e9e
+
cb8e9e
 #ifndef __BIT_ROT_H__
cb8e9e
 #define __BIT_ROT_H__
cb8e9e
 
cb8e9e
@@ -26,6 +27,7 @@
cb8e9e
 #include "timer-wheel.h"
cb8e9e
 
cb8e9e
 #include "bit-rot-tbf.h"
cb8e9e
+#include "bit-rot-ssm.h"
cb8e9e
 
cb8e9e
 #include "bit-rot-common.h"
cb8e9e
 #include "bit-rot-stub-mem-types.h"
cb8e9e
@@ -52,6 +54,7 @@ typedef enum scrub_freq {
cb8e9e
         BR_FSSCRUB_FREQ_WEEKLY,
cb8e9e
         BR_FSSCRUB_FREQ_BIWEEKLY,
cb8e9e
         BR_FSSCRUB_FREQ_MONTHLY,
cb8e9e
+        BR_FSSCRUB_FREQ_STALLED,
cb8e9e
 } scrub_freq_t;
cb8e9e
 
cb8e9e
 #define signature_size(hl) (sizeof (br_isignature_t) + hl + 1)
cb8e9e
@@ -69,6 +72,9 @@ struct br_scanfs {
cb8e9e
         /* scheduler */
cb8e9e
         uint32_t boot;
cb8e9e
         gf_boolean_t kick;
cb8e9e
+        gf_boolean_t over;
cb8e9e
+
cb8e9e
+        br_scrub_state_t state;   /* current scrub state */
cb8e9e
 
cb8e9e
         pthread_mutex_t wakelock;
cb8e9e
         pthread_cond_t  wakecond;
cb8e9e
@@ -203,6 +209,7 @@ struct br_object {
cb8e9e
 };
cb8e9e
 
cb8e9e
 typedef struct br_object br_object_t;
cb8e9e
+typedef int32_t (br_scrub_ssm_call) (xlator_t *, br_child_t *);
cb8e9e
 
cb8e9e
 void
cb8e9e
 br_log_object (xlator_t *, char *, uuid_t, int32_t);
cb8e9e
@@ -244,4 +251,19 @@ _br_child_witnessed_connection (br_child_t *child)
cb8e9e
         return (child->witnessed == 1);
cb8e9e
 }
cb8e9e
 
cb8e9e
+/* scrub state */
cb8e9e
+static inline void
cb8e9e
+_br_child_set_scrub_state (br_child_t *child, br_scrub_state_t state)
cb8e9e
+{
cb8e9e
+        struct br_scanfs *fsscan = &child->fsscan;
cb8e9e
+        fsscan->state = state;
cb8e9e
+}
cb8e9e
+
cb8e9e
+static inline br_scrub_event_t
cb8e9e
+_br_child_get_scrub_event (struct br_scrubber *fsscrub)
cb8e9e
+{
cb8e9e
+        return (fsscrub->frequency == BR_FSSCRUB_FREQ_STALLED)
cb8e9e
+                ? BR_SCRUB_EVENT_PAUSE : BR_SCRUB_EVENT_SCHEDULE;
cb8e9e
+}
cb8e9e
+
cb8e9e
 #endif /* __BIT_ROT_H__ */
cb8e9e
-- 
cb8e9e
1.7.1
cb8e9e