Blob Blame History Raw
From 4b38c3ac3178769ec593cbe0906ffa48d67587cb Mon Sep 17 00:00:00 2001
From: Venky Shankar <vshankar@redhat.com>
Date: Tue, 9 Jun 2015 10:02:11 +0530
Subject: [PATCH 160/190] features/bitrot: handle scrub states via state machine

    Backport of http://review.gluster.org/11149

A bunch of command line options for scrubber tempted the use of
state machine to track current state of scrubber under various
circumstances where the options could be in effect.

Change-Id: Id614bb2e6af30a90d2391ea31ae0a3edeb4e0d69
BUG: 1232309
Signed-off-by: Venky Shankar <vshankar@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/51745
Reviewed-by: Raghavendra Bhat <raghavendra@redhat.com>
Tested-by: Raghavendra Bhat <raghavendra@redhat.com>
---
 xlators/features/bit-rot/src/bitd/Makefile.am     |    4 +-
 xlators/features/bit-rot/src/bitd/bit-rot-scrub.c |  181 +++++++++++++++++----
 xlators/features/bit-rot/src/bitd/bit-rot-scrub.h |    9 +-
 xlators/features/bit-rot/src/bitd/bit-rot-ssm.c   |   91 +++++++++++
 xlators/features/bit-rot/src/bitd/bit-rot-ssm.h   |   36 ++++
 xlators/features/bit-rot/src/bitd/bit-rot.c       |   40 +++--
 xlators/features/bit-rot/src/bitd/bit-rot.h       |   22 +++
 7 files changed, 331 insertions(+), 52 deletions(-)
 create mode 100644 xlators/features/bit-rot/src/bitd/bit-rot-ssm.c
 create mode 100644 xlators/features/bit-rot/src/bitd/bit-rot-ssm.h

diff --git a/xlators/features/bit-rot/src/bitd/Makefile.am b/xlators/features/bit-rot/src/bitd/Makefile.am
index fd42ebe..154cdfb 100644
--- a/xlators/features/bit-rot/src/bitd/Makefile.am
+++ b/xlators/features/bit-rot/src/bitd/Makefile.am
@@ -9,11 +9,11 @@ AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
                  -I$(CONTRIBDIR)/timer-wheel \
                  -I$(top_srcdir)/xlators/features/bit-rot/src/stub
 
-bit_rot_la_SOURCES = bit-rot.c bit-rot-scrub.c bit-rot-tbf.c
+bit_rot_la_SOURCES = bit-rot.c bit-rot-scrub.c bit-rot-tbf.c bit-rot-ssm.c
 bit_rot_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
                     $(top_builddir)/xlators/features/changelog/lib/src/libgfchangelog.la
 
-noinst_HEADERS = bit-rot.h bit-rot-scrub.h bit-rot-tbf.h bit-rot-bitd-messages.h
+noinst_HEADERS = bit-rot.h bit-rot-scrub.h bit-rot-tbf.h bit-rot-bitd-messages.h bit-rot-ssm.h
 
 AM_CFLAGS = -Wall $(GF_CFLAGS)
 
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c
index d6ee413..af31a3c 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c
@@ -528,6 +528,33 @@ br_fsscanner_handle_entry (xlator_t *subvol,
         return -1;
 }
 
+int32_t
+br_fsscan_deactivate (xlator_t *this, br_child_t *child)
+{
+        int ret = 0;
+        br_private_t *priv = NULL;
+        br_scrub_state_t nstate = 0;
+        struct br_scanfs *fsscan = NULL;
+
+        priv = this->private;
+        fsscan = &child->fsscan;
+
+        ret = gf_tw_del_timer (priv->timer_wheel, fsscan->timer);
+        if (ret == 0) {
+                nstate = BR_SCRUB_STATE_STALLED;
+                gf_log (this->name, GF_LOG_INFO, "Brick [%s] is under active "
+                        "scrubbing. Pausing scrub..", child->brick_path);
+        } else {
+                nstate = BR_SCRUB_STATE_PAUSED;
+                gf_log (this->name, GF_LOG_INFO,
+                        "Scrubber paused [Brick: %s]", child->brick_path);
+        }
+
+        _br_child_set_scrub_state (child, nstate);
+
+        return 0;
+}
+
 static inline void
 br_fsscanner_log_time (xlator_t *this, br_child_t *child, const char *sfx)
 {
@@ -563,22 +590,51 @@ br_fsscanner_wait_until_kicked (struct br_scanfs *fsscan)
         pthread_cleanup_pop (0);
 }
 
+static inline void
+br_fsscanner_entry_control (xlator_t *this, br_child_t *child)
+{
+        struct br_scanfs *fsscan = &child->fsscan;
+
+        LOCK (&child->lock);
+        {
+                if (fsscan->state == BR_SCRUB_STATE_PENDING)
+                        fsscan->state = BR_SCRUB_STATE_ACTIVE;
+                br_fsscanner_log_time (this, child, "started");
+        }
+        UNLOCK (&child->lock);
+}
+
+static inline void
+br_fsscanner_exit_control (xlator_t *this, br_child_t *child)
+{
+        struct br_scanfs *fsscan = &child->fsscan;
+
+        LOCK (&child->lock);
+        {
+                fsscan->over = _gf_true;
+                br_fsscanner_log_time (this, child, "finished");
+
+                if (fsscan->state == BR_SCRUB_STATE_ACTIVE) {
+                        (void) br_fsscan_activate (this, child);
+                } else {
+                        gf_log (this->name, GF_LOG_INFO, "Brick [%s] waiting "
+                                "to get rescheduled..", child->brick_path);
+                }
+        }
+        UNLOCK (&child->lock);
+}
+
 void *
 br_fsscanner (void *arg)
 {
         loc_t               loc     = {0,};
         br_child_t         *child   = NULL;
         xlator_t           *this    = NULL;
-        br_private_t       *priv    = NULL;
         struct br_scanfs   *fsscan  = NULL;
-        struct br_scrubber *fsscrub = NULL;
 
         child = arg;
         this = child->this;
-        priv = this->private;
-
         fsscan = &child->fsscan;
-        fsscrub = &priv->fsscrub;
 
         THIS = this;
         loc.inode = child->table->root;
@@ -586,8 +642,8 @@ br_fsscanner (void *arg)
         while (1) {
                 br_fsscanner_wait_until_kicked (fsscan);
                 {
-                        /* log start time */
-                        br_fsscanner_log_time (this, child, "started");
+                        /* precursor for scrub */
+                        br_fsscanner_entry_control (this, child);
 
                         /* scrub */
                         (void) syncop_ftw (child->xl,
@@ -596,15 +652,21 @@ br_fsscanner (void *arg)
                         if (!list_empty (&fsscan->queued))
                                 wait_for_scrubbing (this, fsscan);
 
-                        /* log finish time */
-                        br_fsscanner_log_time (this, child, "finished");
+                        /* scrub exit criteria */
+                        br_fsscanner_exit_control (this, child);
                 }
-                br_fsscan_reschedule (this, child, fsscan, fsscrub, _gf_false);
         }
 
         return NULL;
 }
 
+/**
+ * Keep this routine extremely simple and do not ever try to acquire
+ * child->lock here: it may lead to deadlock. Scrubber state is
+ * modified in br_fsscanner(). An intermediate state change to pause
+ * changes the scrub state to the _correct_ state by identifying a
+ * non-pending timer.
+ */
 void
 br_kickstart_scanner (struct gf_tw_timer_list *timer,
                       void *data, unsigned long calltime)
@@ -666,28 +728,38 @@ br_fsscan_calculate_timeout (uint32_t boot, uint32_t now, scrub_freq_t freq)
                 break;
         case BR_FSSCRUB_FREQ_MONTHLY:
                 timo = br_fsscan_calculate_delta (boot, now, BR_SCRUB_MONTHLY);
+                break;
+        default:
+                timo = 0;
         }
 
         return timo;
 }
 
 int32_t
-br_fsscan_schedule (xlator_t *this, br_child_t *child,
-                    struct br_scanfs *fsscan, struct br_scrubber *fsscrub)
+br_fsscan_schedule (xlator_t *this, br_child_t *child)
 {
         uint32_t timo = 0;
         br_private_t *priv = NULL;
         struct timeval tv = {0,};
         char timestr[1024] = {0,};
+        struct br_scanfs *fsscan = NULL;
+        struct br_scrubber *fsscrub = NULL;
         struct gf_tw_timer_list *timer = NULL;
 
         priv = this->private;
+        fsscan = &child->fsscan;
+        fsscrub = &priv->fsscrub;
 
         (void) gettimeofday (&tv, NULL);
         fsscan->boot = tv.tv_sec;
 
         timo = br_fsscan_calculate_timeout (fsscan->boot,
                                             fsscan->boot, fsscrub->frequency);
+        if (timo == 0) {
+                gf_log (this->name, GF_LOG_ERROR, "BUG: Zero schedule timeout");
+                goto error_return;
+        }
 
         fsscan->timer = GF_CALLOC (1, sizeof (*fsscan->timer),
                                    gf_br_stub_mt_br_scanner_freq_t);
@@ -700,7 +772,9 @@ br_fsscan_schedule (xlator_t *this, br_child_t *child,
         timer->data = child;
         timer->expires = timo;
         timer->function = br_kickstart_scanner;
+
         gf_tw_add_timer (priv->timer_wheel, timer);
+        _br_child_set_scrub_state (child, BR_SCRUB_STATE_PENDING);
 
         gf_time_fmt (timestr, sizeof (timestr),
                      (fsscan->boot + timo), gf_timefmt_FT);
@@ -714,39 +788,76 @@ br_fsscan_schedule (xlator_t *this, br_child_t *child,
 }
 
 int32_t
-br_fsscan_reschedule (xlator_t *this,
-                      br_child_t *child, struct br_scanfs *fsscan,
-                      struct br_scrubber *fsscrub, gf_boolean_t pendingcheck)
+br_fsscan_activate (xlator_t *this, br_child_t *child)
 {
-        int32_t ret = 0;
-        uint32_t timo = 0;
-        char timestr[1024] = {0,};
-        struct timeval now = {0,};
-        br_private_t *priv = NULL;
+        uint32_t            timo    = 0;
+        char timestr[1024]          = {0,};
+        struct timeval      now     = {0,};
+        br_private_t       *priv    = NULL;
+        struct br_scanfs   *fsscan  = NULL;
+        struct br_scrubber *fsscrub = NULL;
 
         priv = this->private;
+        fsscan = &child->fsscan;
+        fsscrub = &priv->fsscrub;
 
         (void) gettimeofday (&now, NULL);
         timo = br_fsscan_calculate_timeout (fsscan->boot,
                                             now.tv_sec, fsscrub->frequency);
+        if (timo == 0) {
+                gf_log (this->name, GF_LOG_ERROR, "BUG: Zero schedule timeout");
+                return -1;
+        }
 
+        fsscan->over = _gf_false;
         gf_time_fmt (timestr, sizeof (timestr),
                      (now.tv_sec + timo), gf_timefmt_FT);
+        (void) gf_tw_mod_timer (priv->timer_wheel, fsscan->timer, timo);
 
-        if (pendingcheck)
-                ret = gf_tw_mod_timer_pending (priv->timer_wheel,
-                                               fsscan->timer, timo);
-        else
-                ret = gf_tw_mod_timer (priv->timer_wheel, fsscan->timer, timo);
+        _br_child_set_scrub_state (child, BR_SCRUB_STATE_PENDING);
+        gf_log (this->name, GF_LOG_INFO, "Scrubbing for %s rescheduled to run "
+                "at %s", child->brick_path, timestr);
+
+        return 0;
+}
+
+int32_t
+br_fsscan_reschedule (xlator_t *this, br_child_t *child)
+{
+        int32_t             ret     = 0;
+        uint32_t            timo    = 0;
+        char timestr[1024]          = {0,};
+        struct timeval      now     = {0,};
+        br_private_t       *priv    = NULL;
+        struct br_scanfs   *fsscan  = NULL;
+        struct br_scrubber *fsscrub = NULL;
+
+        priv = this->private;
+        fsscan = &child->fsscan;
+        fsscrub = &priv->fsscrub;
+
+        (void) gettimeofday (&now, NULL);
+        timo = br_fsscan_calculate_timeout (fsscan->boot,
+                                            now.tv_sec, fsscrub->frequency);
+        if (timo == 0) {
+                gf_log (this->name, GF_LOG_ERROR, "BUG: Zero schedule timeout");
+                return -1;
+        }
+
+        gf_time_fmt (timestr, sizeof (timestr),
+                     (now.tv_sec + timo), gf_timefmt_FT);
 
-        if (!ret && pendingcheck)
-                gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_RUNNING,
+        fsscan->over = _gf_false;
+        ret = gf_tw_mod_timer_pending (priv->timer_wheel, fsscan->timer, timo);
+        if (ret == 0)
+                gf_log (this->name, GF_LOG_INFO,
                         "Scrubber for %s is currently running and would be "
                         "rescheduled after completion", child->brick_path);
-        else
-                gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_RESCHEDULED,
-                        "Scrubbing for %s rescheduled "
+        else {
+                _br_child_set_scrub_state (child, BR_SCRUB_STATE_PENDING);
+                gf_log (this->name, GF_LOG_INFO, "Scrubbing for %s rescheduled "
                         "to run at %s", child->brick_path, timestr);
+        }
 
         return 0;
 }
@@ -1131,7 +1242,8 @@ br_scrubber_handle_stall (xlator_t *this, br_private_t *priv,
 }
 
 static int32_t
-br_scrubber_handle_freq (xlator_t *this, br_private_t *priv, dict_t *options)
+br_scrubber_handle_freq (xlator_t *this, br_private_t *priv,
+                         dict_t *options, gf_boolean_t scrubstall)
 {
         int32_t ret  = -1;
         char *tmp = NULL;
@@ -1144,6 +1256,9 @@ br_scrubber_handle_freq (xlator_t *this, br_private_t *priv, dict_t *options)
         if (ret)
                 goto error_return;
 
+        if (scrubstall)
+                tmp = BR_SCRUB_STALLED;
+
         if (strcasecmp (tmp, "hourly") == 0) {
                 frequency = BR_FSSCRUB_FREQ_HOURLY;
         } else if (strcasecmp (tmp, "daily") == 0) {
@@ -1154,6 +1269,8 @@ br_scrubber_handle_freq (xlator_t *this, br_private_t *priv, dict_t *options)
                 frequency = BR_FSSCRUB_FREQ_BIWEEKLY;
         } else if (strcasecmp (tmp, "monthly") == 0) {
                 frequency = BR_FSSCRUB_FREQ_MONTHLY;
+        } else if (strcasecmp (tmp, BR_SCRUB_STALLED) == 0) {
+                frequency = BR_FSSCRUB_FREQ_STALLED;
         } else
                 goto error_return;
 
@@ -1205,7 +1322,7 @@ br_scrubber_handle_options (xlator_t *this, br_private_t *priv, dict_t *options)
         if (ret)
                 goto error_return;
 
-        ret = br_scrubber_handle_freq (this, priv, options);
+        ret = br_scrubber_handle_freq (this, priv, options, scrubstall);
         if (ret)
                 goto error_return;
 
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h
index 6c4254a..427153c 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h
@@ -16,11 +16,10 @@
 
 void *br_fsscanner (void *);
 
-int32_t br_fsscan_schedule (xlator_t *, br_child_t *,
-                            struct br_scanfs *, struct br_scrubber *);
-int32_t br_fsscan_reschedule (xlator_t *this,
-                              br_child_t *child, struct br_scanfs *,
-                              struct br_scrubber *, gf_boolean_t);
+int32_t br_fsscan_schedule (xlator_t *, br_child_t *);
+int32_t br_fsscan_reschedule (xlator_t *, br_child_t *);
+int32_t br_fsscan_activate (xlator_t *, br_child_t *);
+int32_t br_fsscan_deactivate (xlator_t *, br_child_t *);
 
 int32_t br_scrubber_handle_options (xlator_t *, br_private_t *, dict_t *);
 
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-ssm.c b/xlators/features/bit-rot/src/bitd/bit-rot-ssm.c
new file mode 100644
index 0000000..c95e555
--- /dev/null
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-ssm.c
@@ -0,0 +1,91 @@
+/*
+   Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+   This file is part of GlusterFS.
+
+   This file is licensed to you under your choice of the GNU Lesser
+   General Public License, version 3 or any later version (LGPLv3 or
+   later), or the GNU General Public License, version 2 (GPLv2), in all
+   cases as published by the Free Software Foundation.
+*/
+
+#include "bit-rot-ssm.h"
+#include "bit-rot-scrub.h"
+
+int br_scrub_ssm_noop (xlator_t *this, br_child_t *child)
+{
+        return 0;
+}
+
+int
+br_scrub_ssm_state_pause (xlator_t *this, br_child_t *child)
+{
+        gf_log (this->name, GF_LOG_INFO,
+                "Scrubber paused [Brick: %s]", child->brick_path);
+        _br_child_set_scrub_state (child, BR_SCRUB_STATE_PAUSED);
+        return 0;
+}
+
+int
+br_scrub_ssm_state_ipause (xlator_t *this, br_child_t *child)
+{
+        gf_log (this->name, GF_LOG_INFO,
+                "Scrubber paused [Brick: %s]", child->brick_path);
+        _br_child_set_scrub_state (child, BR_SCRUB_STATE_IPAUSED);
+        return 0;
+}
+
+int
+br_scrub_ssm_state_active (xlator_t *this, br_child_t *child)
+{
+        struct br_scanfs *fsscan = &child->fsscan;
+
+        if (fsscan->over) {
+                (void) br_fsscan_activate (this, child);
+        } else {
+                gf_log (this->name, GF_LOG_INFO,
+                        "Scrubbing resumed [Brick %s]", child->brick_path);
+                _br_child_set_scrub_state (child, BR_SCRUB_STATE_ACTIVE);
+        }
+
+        return 0;
+}
+
+int
+br_scrub_ssm_state_stall (xlator_t *this, br_child_t *child)
+{
+        gf_log (this->name, GF_LOG_INFO, "Brick [%s] is under active "
+                "scrubbing. Pausing scrub..", child->brick_path);
+        _br_child_set_scrub_state (child, BR_SCRUB_STATE_STALLED);
+        return 0;
+}
+
+static br_scrub_ssm_call *
+br_scrub_ssm[BR_SCRUB_MAXSTATES][BR_SCRUB_MAXEVENTS] = {
+        {br_fsscan_schedule, br_scrub_ssm_state_ipause},    /* INACTIVE */
+        {br_fsscan_reschedule, br_fsscan_deactivate},       /* PENDING  */
+        {br_scrub_ssm_noop, br_scrub_ssm_state_stall},      /* ACTIVE   */
+        {br_fsscan_activate, br_scrub_ssm_noop},            /* PAUSED   */
+        {br_fsscan_schedule, br_scrub_ssm_noop},            /* IPAUSED  */
+        {br_scrub_ssm_state_active, br_scrub_ssm_noop},     /* STALLED  */
+};
+
+int32_t
+br_scrub_state_machine (xlator_t *this, br_child_t *child)
+{
+        br_private_t       *priv      = NULL;
+        br_scrub_ssm_call  *call      = NULL;
+        struct br_scanfs   *fsscan    = NULL;
+        struct br_scrubber *fsscrub   = NULL;
+        br_scrub_state_t    currstate = 0;
+        br_scrub_event_t    event     = 0;
+
+        priv = this->private;
+        fsscan = &child->fsscan;
+        fsscrub = &priv->fsscrub;
+
+        currstate = fsscan->state;
+        event = _br_child_get_scrub_event (fsscrub);
+
+        call = br_scrub_ssm[currstate][event];
+        return call (this, child);
+}
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-ssm.h b/xlators/features/bit-rot/src/bitd/bit-rot-ssm.h
new file mode 100644
index 0000000..72fd62b
--- /dev/null
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-ssm.h
@@ -0,0 +1,36 @@
+/*
+   Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+   This file is part of GlusterFS.
+
+   This file is licensed to you under your choice of the GNU Lesser
+   General Public License, version 3 or any later version (LGPLv3 or
+   later), or the GNU General Public License, version 2 (GPLv2), in all
+   cases as published by the Free Software Foundation.
+*/
+
+#ifndef __BIT_ROT_SSM_H__
+#define __BIT_ROT_SSM_H__
+
+#include "xlator.h"
+
+typedef enum br_scrub_state {
+        BR_SCRUB_STATE_INACTIVE = 0,
+        BR_SCRUB_STATE_PENDING,
+        BR_SCRUB_STATE_ACTIVE,
+        BR_SCRUB_STATE_PAUSED,
+        BR_SCRUB_STATE_IPAUSED,
+        BR_SCRUB_STATE_STALLED,
+        BR_SCRUB_MAXSTATES,
+} br_scrub_state_t;
+
+typedef enum br_scrub_event {
+        BR_SCRUB_EVENT_SCHEDULE = 0,
+        BR_SCRUB_EVENT_PAUSE,
+        BR_SCRUB_MAXEVENTS,
+} br_scrub_event_t;
+
+struct br_child;
+
+int32_t br_scrub_state_machine (xlator_t *, struct br_child *);
+
+#endif /* __BIT_ROT_SSM_H__ */
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c
index f2cd1d8..3952f41 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot.c
+++ b/xlators/features/bit-rot/src/bitd/bit-rot.c
@@ -1092,6 +1092,16 @@ br_set_child_state (br_child_t *child, br_child_state_t state)
         UNLOCK (&child->lock);
 }
 
+static void
+br_set_scrub_state (br_child_t *child, br_scrub_state_t state)
+{
+        LOCK (&child->lock);
+        {
+                _br_child_set_scrub_state (child, state);
+        }
+        UNLOCK (&child->lock);
+}
+
 /**
  * At this point a thread is spawned to crawl the filesystem (in
  * tortoise pace) to sign objects that were not signed in previous run(s).
@@ -1155,6 +1165,7 @@ br_launch_scrubber (xlator_t *this, br_child_t *child,
         priv = this->private;
 
         fsscan->kick = _gf_false;
+        fsscan->over = _gf_false;
         ret = gf_thread_create (&child->thread, NULL, br_fsscanner, child);
         if (ret != 0) {
                 gf_msg (this->name, GF_LOG_ALERT, 0, BRB_MSG_SPAWN_FAILED,
@@ -1166,7 +1177,7 @@ br_launch_scrubber (xlator_t *this, br_child_t *child,
         /* this needs to be serialized with reconfigure() */
         pthread_mutex_lock (&priv->lock);
         {
-                ret = br_fsscan_schedule (this, child, fsscan, fsscrub);
+                ret = br_scrub_state_machine (this, child);
         }
         pthread_mutex_unlock (&priv->lock);
         if (ret)
@@ -1391,6 +1402,11 @@ br_cleanup_scrubber (xlator_t *this, br_child_t *child)
                 fsscan->timer = NULL;
         }
 
+        /**
+         * 0x3: reset scrubber state
+         */
+        _br_child_set_scrub_state (child, BR_SCRUB_STATE_INACTIVE);
+
         gf_log (this->name, GF_LOG_INFO,
                 "Cleaned up scrubber for brick [%s]", child->brick_path);
 
@@ -1779,6 +1795,8 @@ br_init_children (xlator_t *this, br_private_t *priv)
 
                 LOCK_INIT (&child->lock);
                 child->witnessed = 0;
+
+                br_set_scrub_state (child, BR_SCRUB_STATE_INACTIVE);
                 br_set_child_state (child, BR_CHILD_STATE_DISCONNECTED);
 
                 child->this = this;
@@ -1906,13 +1924,11 @@ fini (xlator_t *this)
 }
 
 static void
-br_reconfigure_child (xlator_t *this,
-                      br_child_t *child, struct br_scrubber *fsscrub)
+br_reconfigure_child (xlator_t *this, br_child_t *child)
 {
         int32_t ret = 0;
-        struct br_scanfs *fsscan = &child->fsscan;
 
-        ret = br_fsscan_reschedule (this, child, fsscan, fsscrub, _gf_true);
+        ret = br_scrub_state_machine (this, child);
         if (ret) {
                 gf_log (this->name, GF_LOG_ERROR,
                         "Could not reschedule scrubber for brick: %s. "
@@ -1924,14 +1940,12 @@ br_reconfigure_child (xlator_t *this,
 static int
 br_reconfigure_scrubber (xlator_t *this, dict_t *options)
 {
-        int                 i       = 0;
-        int32_t             ret     = -1;
-        br_child_t         *child   = NULL;
-        br_private_t       *priv    = NULL;
-        struct br_scrubber *fsscrub = NULL;
+        int           i     = 0;
+        int32_t       ret   = -1;
+        br_child_t   *child = NULL;
+        br_private_t *priv  = NULL;
 
         priv = this->private;
-        fsscrub = &priv->fsscrub;
 
         pthread_mutex_lock (&priv->lock);
         {
@@ -1942,7 +1956,7 @@ br_reconfigure_scrubber (xlator_t *this, dict_t *options)
         if (ret)
                 goto err;
 
-        /* reschedule all _up_ subvolume(s) */
+        /* change state for all _up_ subvolume(s) */
         for (; i < priv->child_count; i++) {
                 child = &priv->children[i];
 
@@ -1957,7 +1971,7 @@ br_reconfigure_scrubber (xlator_t *this, dict_t *options)
                         }
 
                         if (_br_is_child_connected (child))
-                                br_reconfigure_child (this, child, fsscrub);
+                                br_reconfigure_child (this, child);
 
                         /**
                          * for the rest.. either the child is in initialization
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.h b/xlators/features/bit-rot/src/bitd/bit-rot.h
index 9a55773..6cafd8b 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot.h
+++ b/xlators/features/bit-rot/src/bitd/bit-rot.h
@@ -7,6 +7,7 @@
    later), or the GNU General Public License, version 2 (GPLv2), in all
    cases as published by the Free Software Foundation.
 */
+
 #ifndef __BIT_ROT_H__
 #define __BIT_ROT_H__
 
@@ -26,6 +27,7 @@
 #include "timer-wheel.h"
 
 #include "bit-rot-tbf.h"
+#include "bit-rot-ssm.h"
 
 #include "bit-rot-common.h"
 #include "bit-rot-stub-mem-types.h"
@@ -52,6 +54,7 @@ typedef enum scrub_freq {
         BR_FSSCRUB_FREQ_WEEKLY,
         BR_FSSCRUB_FREQ_BIWEEKLY,
         BR_FSSCRUB_FREQ_MONTHLY,
+        BR_FSSCRUB_FREQ_STALLED,
 } scrub_freq_t;
 
 #define signature_size(hl) (sizeof (br_isignature_t) + hl + 1)
@@ -69,6 +72,9 @@ struct br_scanfs {
         /* scheduler */
         uint32_t boot;
         gf_boolean_t kick;
+        gf_boolean_t over;
+
+        br_scrub_state_t state;   /* current scrub state */
 
         pthread_mutex_t wakelock;
         pthread_cond_t  wakecond;
@@ -203,6 +209,7 @@ struct br_object {
 };
 
 typedef struct br_object br_object_t;
+typedef int32_t (br_scrub_ssm_call) (xlator_t *, br_child_t *);
 
 void
 br_log_object (xlator_t *, char *, uuid_t, int32_t);
@@ -244,4 +251,19 @@ _br_child_witnessed_connection (br_child_t *child)
         return (child->witnessed == 1);
 }
 
+/* scrub state */
+static inline void
+_br_child_set_scrub_state (br_child_t *child, br_scrub_state_t state)
+{
+        struct br_scanfs *fsscan = &child->fsscan;
+        fsscan->state = state;
+}
+
+static inline br_scrub_event_t
+_br_child_get_scrub_event (struct br_scrubber *fsscrub)
+{
+        return (fsscrub->frequency == BR_FSSCRUB_FREQ_STALLED)
+                ? BR_SCRUB_EVENT_PAUSE : BR_SCRUB_EVENT_SCHEDULE;
+}
+
 #endif /* __BIT_ROT_H__ */
-- 
1.7.1