Blob Blame History Raw
From 3cf65dfbd5dcbfc72117d236d1cfd132196a4bd4 Mon Sep 17 00:00:00 2001
From: Kotresh HR <khiremat@redhat.com>
Date: Fri, 29 Apr 2016 17:45:31 +0530
Subject: [PATCH 110/139] features/bitrot: Introduce scrubber monitor thread

The patch does following changes.

 1. Introduce scrubber monitor thread.
 2. Move scrub status related APIs to separate file
    and make part of libbitrot library.

Problem:
     Earlier, each child of the scrubber was maintaining
the state machine and hence there was no way to track
the start and end time of scrubbing as each brick has
it's own start and end time. Also each brick was maintaining
it's own timer wheel instance. It was also not possible
to get scrubbed files count per session as we could not
get last child which finishes scrubbing to reset it to
zero.

Solution:
    Introduce scrubber monitor thread. It does following.

 1. Maintains the scrubber state machine. Earlier each
    child had it's own state machine. Now, only monitor
    maintains on behalf of all it's children.
 2. Maintains the timer wheel instance. Earlier each
    child had it's own timer wheel instance. Now, only
    monitor maintains on behalf of all it's children.

As a result, we can track the scrub statistics easily
and correctly.

Upstream:
master:
>BUG: 1329211
>http://review.gluster.org/14044
>BUG: 1332134
>http://review.gluster.org/#/c/14146
release-3.7:
>BUG: 1332072
>Reviewed-on: http://review.gluster.org/14140

NOTE: The patch #14146 is a compilation warning not detected
in master branch and detected only in 3.7 branch. Since the
compilation warning is introduced by patch #14044, the above
two backports are made into this single patch in release-3.7.

BUG: 1299737
Change-Id: I437585063ce0d27b8e2123e39f2e16bbc881552a
Signed-off-by: Kotresh HR <khiremat@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/73573
Reviewed-by: Venky Shankar <vshankar@redhat.com>
Tested-by: Venky Shankar <vshankar@redhat.com>
---
 xlators/features/bit-rot/src/bitd/Makefile.am      |    6 +-
 .../bit-rot/src/bitd/bit-rot-bitd-messages.h       |   18 +-
 .../bit-rot/src/bitd/bit-rot-scrub-status.c        |   73 +++
 .../bit-rot/src/bitd/bit-rot-scrub-status.h        |   46 ++
 xlators/features/bit-rot/src/bitd/bit-rot-scrub.c  |  545 ++++++++++++++------
 xlators/features/bit-rot/src/bitd/bit-rot-scrub.h  |   14 +-
 xlators/features/bit-rot/src/bitd/bit-rot-ssm.c    |   65 ++-
 xlators/features/bit-rot/src/bitd/bit-rot-ssm.h    |    4 +-
 xlators/features/bit-rot/src/bitd/bit-rot.c        |  178 +++----
 xlators/features/bit-rot/src/bitd/bit-rot.h        |   72 ++-
 10 files changed, 697 insertions(+), 324 deletions(-)
 create mode 100644 xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c
 create mode 100644 xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h

diff --git a/xlators/features/bit-rot/src/bitd/Makefile.am b/xlators/features/bit-rot/src/bitd/Makefile.am
index 154cdfb..cabdf3c 100644
--- a/xlators/features/bit-rot/src/bitd/Makefile.am
+++ b/xlators/features/bit-rot/src/bitd/Makefile.am
@@ -9,11 +9,13 @@ AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
                  -I$(CONTRIBDIR)/timer-wheel \
                  -I$(top_srcdir)/xlators/features/bit-rot/src/stub
 
-bit_rot_la_SOURCES = bit-rot.c bit-rot-scrub.c bit-rot-tbf.c bit-rot-ssm.c
+bit_rot_la_SOURCES = bit-rot.c bit-rot-scrub.c bit-rot-tbf.c bit-rot-ssm.c \
+		     bit-rot-scrub-status.c
 bit_rot_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
                     $(top_builddir)/xlators/features/changelog/lib/src/libgfchangelog.la
 
-noinst_HEADERS = bit-rot.h bit-rot-scrub.h bit-rot-tbf.h bit-rot-bitd-messages.h bit-rot-ssm.h
+noinst_HEADERS = bit-rot.h bit-rot-scrub.h bit-rot-tbf.h bit-rot-bitd-messages.h bit-rot-ssm.h \
+		 bit-rot-scrub-status.h
 
 AM_CFLAGS = -Wall $(GF_CFLAGS)
 
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-bitd-messages.h b/xlators/features/bit-rot/src/bitd/bit-rot-bitd-messages.h
index c0b83c6..c6b6a4a 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot-bitd-messages.h
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-bitd-messages.h
@@ -40,7 +40,7 @@
  */
 
 #define GLFS_BITROT_BITD_BASE                   GLFS_MSGID_COMP_BITROT_BITD
-#define GLFS_BITROT_BITD_NUM_MESSAGES           53
+#define GLFS_BITROT_BITD_NUM_MESSAGES           55
 #define GLFS_MSGID_END                          (GLFS_BITROT_BITD_BASE + \
                                            GLFS_BITROT_BITD_NUM_MESSAGES + 1)
 /* Messaged with message IDs */
@@ -427,6 +427,22 @@
  *
  */
 /*------------*/
+#define BRB_MSG_SSM_FAILED                 (GLFS_BITROT_BITD_BASE + 54)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+/*------------*/
+#define BRB_MSG_SCRUB_WAIT_FAILED          (GLFS_BITROT_BITD_BASE + 55)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+/*------------*/
 
 #define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
 #endif /* !_BITROT_BITD_MESSAGES_H_ */
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c
new file mode 100644
index 0000000..0afd7ea
--- /dev/null
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c
@@ -0,0 +1,73 @@
+/*
+  Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+  This file is part of GlusterFS.
+
+  This file is licensed to you under your choice of the GNU Lesser
+  General Public License, version 3 or any later version (LGPLv3 or
+  later), or the GNU General Public License, version 2 (GPLv2), in all
+  cases as published by the Free Software Foundation.
+*/
+
+#include <string.h>
+
+#include "bit-rot-scrub-status.h"
+
+void
+br_inc_unsigned_file_count (br_scrub_stats_t *scrub_stat)
+{
+        if (!scrub_stat)
+                return;
+
+        pthread_mutex_lock (&scrub_stat->lock);
+        {
+                scrub_stat->unsigned_files++;
+        }
+        pthread_mutex_unlock (&scrub_stat->lock);
+}
+
+void
+br_inc_scrubbed_file (br_scrub_stats_t *scrub_stat)
+{
+        if (!scrub_stat)
+                return;
+
+        pthread_mutex_lock (&scrub_stat->lock);
+        {
+                scrub_stat->scrubbed_files++;
+        }
+        pthread_mutex_unlock (&scrub_stat->lock);
+}
+
+void
+br_update_scrub_start_time (br_scrub_stats_t *scrub_stat, struct timeval *tv)
+{
+        if (!scrub_stat)
+                return;
+
+        pthread_mutex_lock (&scrub_stat->lock);
+        {
+                scrub_stat->scrub_start_tv.tv_sec = tv->tv_sec;
+        }
+        pthread_mutex_unlock (&scrub_stat->lock);
+}
+
+void
+br_update_scrub_finish_time (br_scrub_stats_t *scrub_stat, char *timestr,
+                             struct timeval *tv)
+{
+        if (!scrub_stat)
+                return;
+
+        pthread_mutex_lock (&scrub_stat->lock);
+        {
+                scrub_stat->scrub_end_tv.tv_sec = tv->tv_sec;
+
+                scrub_stat->scrub_duration =
+                                 scrub_stat->scrub_end_tv.tv_sec -
+                                 scrub_stat->scrub_start_tv.tv_sec;
+
+                strncpy (scrub_stat->last_scrub_time, timestr,
+                         sizeof (scrub_stat->last_scrub_time));
+        }
+        pthread_mutex_unlock (&scrub_stat->lock);
+}
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h
new file mode 100644
index 0000000..694ba0a
--- /dev/null
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h
@@ -0,0 +1,46 @@
+/*
+   Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+   This file is part of GlusterFS.
+
+   This file is licensed to you under your choice of the GNU Lesser
+   General Public License, version 3 or any later version (LGPLv3 or
+   later), or the GNU General Public License, version 2 (GPLv2), in all
+   cases as published by the Free Software Foundation.
+*/
+
+#ifndef __BIT_ROT_SCRUB_STATUS_H__
+#define __BIT_ROT_SCRUB_STATUS_H__
+
+#include <stdint.h>
+#include <sys/time.h>
+#include <pthread.h>
+
+struct br_scrub_stats {
+        uint64_t       scrubbed_files;       /* Total number of scrubbed file */
+
+        uint64_t       unsigned_files;       /* Total number of unsigned file */
+
+        uint64_t       scrub_duration;            /* Duration of last scrub */
+
+        char           last_scrub_time[1024];    /*last scrub completion time */
+
+        struct         timeval scrub_start_tv;   /* Scrubbing starting time*/
+
+        struct         timeval scrub_end_tv;     /* Scrubbing finishing time */
+
+        pthread_mutex_t  lock;
+};
+
+typedef struct br_scrub_stats br_scrub_stats_t;
+
+void
+br_inc_unsigned_file_count (br_scrub_stats_t *scrub_stat);
+void
+br_inc_scrubbed_file (br_scrub_stats_t *scrub_stat);
+void
+br_update_scrub_start_time (br_scrub_stats_t *scrub_stat, struct timeval *tv);
+void
+br_update_scrub_finish_time (br_scrub_stats_t *scrub_stat, char *timestr,
+                             struct timeval *tv);
+
+#endif /* __BIT_ROT_SCRUB_STATUS_H__ */
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c
index 47d1d26..e36762e 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c
@@ -24,6 +24,7 @@
 #include "bit-rot-scrub.h"
 #include <pthread.h>
 #include "bit-rot-bitd-messages.h"
+#include "bit-rot-scrub-status.h"
 
 struct br_scrubbers {
         pthread_t scrubthread;
@@ -79,20 +80,6 @@ bitd_fetch_signature (xlator_t *this, br_child_t *child,
 
 }
 
-static void
-br_inc_unsigned_file_count (xlator_t *this)
-{
-        br_private_t   *priv = NULL;
-
-        priv = this->private;
-
-        pthread_mutex_lock (&priv->scrub_stat.lock);
-        {
-                priv->scrub_stat.unsigned_files++;
-        }
-        pthread_mutex_unlock (&priv->scrub_stat.lock);
-}
-
 /**
  * POST COMPUTE CHECK
  *
@@ -106,7 +93,8 @@ int32_t
 bitd_scrub_post_compute_check (xlator_t *this,
                                br_child_t *child,
                                fd_t *fd, unsigned long version,
-                               br_isignature_out_t **signature)
+                               br_isignature_out_t **signature,
+                               br_scrub_stats_t *scrub_stat)
 {
         int32_t              ret     = 0;
         size_t               signlen = 0;
@@ -114,8 +102,10 @@ bitd_scrub_post_compute_check (xlator_t *this,
         br_isignature_out_t *signptr = NULL;
 
         ret = bitd_fetch_signature (this, child, fd, &xattr, &signptr);
-        if (ret < 0)
+        if (ret < 0) {
+                br_inc_unsigned_file_count (scrub_stat);
                 goto out;
+        }
 
         /**
          * Either the object got dirtied during the time the signature was
@@ -126,7 +116,7 @@ bitd_scrub_post_compute_check (xlator_t *this,
          * The log entry looks pretty ugly, but helps in debugging..
          */
         if (signptr->stale || (signptr->version != version)) {
-                br_inc_unsigned_file_count (this);
+                br_inc_unsigned_file_count (scrub_stat);
                 gf_msg_debug (this->name, 0, "<STAGE: POST> Object [GFID: %s] "
                               "either has a stale signature OR underwent "
                               "signing during checksumming {Stale: %d | "
@@ -154,15 +144,18 @@ bitd_scrub_post_compute_check (xlator_t *this,
 static int32_t
 bitd_signature_staleness (xlator_t *this,
                           br_child_t *child, fd_t *fd,
-                          int *stale, unsigned long *version)
+                          int *stale, unsigned long *version,
+                          br_scrub_stats_t *scrub_stat)
 {
         int32_t ret = -1;
         dict_t *xattr = NULL;
         br_isignature_out_t *signptr = NULL;
 
         ret = bitd_fetch_signature (this, child, fd, &xattr, &signptr);
-        if (ret < 0)
+        if (ret < 0) {
+                br_inc_unsigned_file_count (scrub_stat);
                 goto out;
+        }
 
         /**
          * save verison for validation in post compute stage
@@ -187,7 +180,8 @@ bitd_signature_staleness (xlator_t *this,
  */
 int32_t
 bitd_scrub_pre_compute_check (xlator_t *this, br_child_t *child,
-                              fd_t *fd, unsigned long *version)
+                              fd_t *fd, unsigned long *version,
+                              br_scrub_stats_t *scrub_stat)
 {
         int     stale = 0;
         int32_t ret   = -1;
@@ -199,9 +193,10 @@ bitd_scrub_pre_compute_check (xlator_t *this, br_child_t *child,
                 goto out;
         }
 
-        ret = bitd_signature_staleness (this, child, fd, &stale, version);
+        ret = bitd_signature_staleness (this, child, fd, &stale, version,
+                                        scrub_stat);
         if (!ret && stale) {
-                br_inc_unsigned_file_count (this);
+                br_inc_unsigned_file_count (scrub_stat);
                 gf_msg_debug (this->name, 0, "<STAGE: PRE> Object [GFID: %s] "
                               "has stale signature",
                               uuid_utoa (fd->inode->gfid));
@@ -274,16 +269,6 @@ bitd_compare_ckum (xlator_t *this,
         return ret;
 }
 
-static void
-br_inc_scrubbed_file (br_private_t *priv)
-{
-        pthread_mutex_lock (&priv->scrub_stat.lock);
-        {
-                priv->scrub_stat.scrubbed_files++;
-        }
-        pthread_mutex_unlock (&priv->scrub_stat.lock);
-}
-
 /**
  * "The Scrubber"
  *
@@ -376,7 +361,8 @@ br_scrubber_scrub_begin (xlator_t *this, struct br_fsscan_entry *fsentry)
          *  - presence of bad object
          *  - signature staleness
          */
-        ret = bitd_scrub_pre_compute_check (this, child, fd, &signedversion);
+        ret = bitd_scrub_pre_compute_check (this, child, fd, &signedversion,
+                                            &priv->scrub_stat);
         if (ret)
                 goto unrefd; /* skip this object */
 
@@ -399,8 +385,8 @@ br_scrubber_scrub_begin (xlator_t *this, struct br_fsscan_entry *fsentry)
          * perform post compute checks as an object's signature may have
          * become stale while scrubber calculated checksum.
          */
-        ret = bitd_scrub_post_compute_check (this, child,
-                                             fd, signedversion, &sign);
+        ret = bitd_scrub_post_compute_check (this, child, fd, signedversion,
+                                             &sign, &priv->scrub_stat);
         if (ret)
                 goto free_md;
 
@@ -408,7 +394,7 @@ br_scrubber_scrub_begin (xlator_t *this, struct br_fsscan_entry *fsentry)
                                  linked_inode, entry, fd, child, &loc);
 
         /* Increment of total number of scrubbed file counter */
-        br_inc_scrubbed_file (priv);
+        br_inc_scrubbed_file (&priv->scrub_stat);
 
         GF_FREE (sign); /* alloced on post-compute */
 
@@ -562,171 +548,215 @@ br_fsscanner_handle_entry (xlator_t *subvol,
 }
 
 int32_t
-br_fsscan_deactivate (xlator_t *this, br_child_t *child)
+br_fsscan_deactivate (xlator_t *this)
 {
         int ret = 0;
         br_private_t *priv = NULL;
         br_scrub_state_t nstate = 0;
-        struct br_scanfs *fsscan = NULL;
+        struct br_monitor *scrub_monitor = NULL;
 
         priv = this->private;
-        fsscan = &child->fsscan;
+        scrub_monitor = &priv->scrub_monitor;
 
-        ret = gf_tw_del_timer (priv->timer_wheel, fsscan->timer);
+        ret = gf_tw_del_timer (priv->timer_wheel, scrub_monitor->timer);
         if (ret == 0) {
                 nstate = BR_SCRUB_STATE_STALLED;
                 gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO,
-                        "Brick [%s] is under active scrubbing. Pausing scrub..",
-                        child->brick_path);
+                        "Volume is under active scrubbing. Pausing scrub..");
         } else {
                 nstate = BR_SCRUB_STATE_PAUSED;
                 gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO,
-                        "Scrubber paused [Brick: %s]", child->brick_path);
+                        "Scrubber paused");
         }
 
-        _br_child_set_scrub_state (child, nstate);
+        _br_monitor_set_scrub_state (scrub_monitor, nstate);
 
         return 0;
 }
+
 static void
-br_update_scrub_start_time (xlator_t *this, struct timeval *tv)
+br_scrubber_log_time (xlator_t *this, const char *sfx)
 {
-        br_private_t     *priv = NULL;
-        static int       child;
+        char           timestr[1024] = {0,};
+        struct         timeval tv    = {0,};
+        br_private_t  *priv          = NULL;
 
         priv = this->private;
 
+        gettimeofday (&tv, NULL);
+        gf_time_fmt (timestr, sizeof (timestr), tv.tv_sec, gf_timefmt_FT);
 
-        /* Setting scrubber starting time for first child only */
-        if (child == 0) {
-                pthread_mutex_lock (&priv->scrub_stat.lock);
-                {
-                        priv->scrub_stat.scrub_start_tv.tv_sec = tv->tv_sec;
-                }
-                pthread_mutex_unlock (&priv->scrub_stat.lock);
+        if (strcasecmp (sfx, "started") == 0) {
+                br_update_scrub_start_time (&priv->scrub_stat, &tv);
+                gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_START,
+                        "Scrubbing %s at %s", sfx, timestr);
+        } else {
+                br_update_scrub_finish_time (&priv->scrub_stat, timestr, &tv);
+                gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_FINISH,
+                        "Scrubbing %s at %s", sfx, timestr);
         }
+}
 
-        if (++child == priv->up_children) {
-                child = 0;
+static void
+br_fsscanner_log_time (xlator_t *this, br_child_t *child, const char *sfx)
+{
+        char           timestr[1024] = {0,};
+        struct         timeval tv    = {0,};
+
+        gettimeofday (&tv, NULL);
+        gf_time_fmt (timestr, sizeof (timestr), tv.tv_sec, gf_timefmt_FT);
+
+        if (strcasecmp (sfx, "started") == 0) {
+                gf_msg_debug (this->name, 0, "Scrubbing \"%s\" %s at %s",
+                              child->brick_path, sfx, timestr);
+        } else {
+                gf_msg_debug (this->name, 0, "Scrubbing \"%s\" %s at %s",
+                              child->brick_path, sfx, timestr);
         }
 }
 
+void
+br_child_set_scrub_state (br_child_t *child, gf_boolean_t state)
+{
+        child->active_scrubbing = state;
+}
+
 static void
-br_update_scrub_finish_time (xlator_t *this, char *timestr, struct timeval *tv)
+br_fsscanner_wait_until_kicked (xlator_t *this, br_child_t *child)
 {
-        br_private_t     *priv = NULL;
-        static int       child;
+        br_private_t      *priv          = NULL;
+        struct br_monitor *scrub_monitor = NULL;
 
         priv = this->private;
+        scrub_monitor = &priv->scrub_monitor;
 
-        /*Setting scrubber finishing time at time time of last child operation*/
-        if (++child == priv->up_children) {
-                pthread_mutex_lock (&priv->scrub_stat.lock);
-                {
-                        priv->scrub_stat.scrub_end_tv.tv_sec = tv->tv_sec;
-
-                        priv->scrub_stat.scrub_duration =
-                                         priv->scrub_stat.scrub_end_tv.tv_sec -
-                                         priv->scrub_stat.scrub_start_tv.tv_sec;
-
-                        strncpy (priv->scrub_stat.last_scrub_time, timestr,
-                                 sizeof (priv->scrub_stat.last_scrub_time));
+        pthread_cleanup_push (_br_lock_cleaner, &scrub_monitor->wakelock);
+        pthread_mutex_lock (&scrub_monitor->wakelock);
+        {
+                while (!scrub_monitor->kick)
+                        pthread_cond_wait (&scrub_monitor->wakecond,
+                                           &scrub_monitor->wakelock);
 
-                        child = 0;
+                /* Child lock is to synchronize with disconnect events */
+                pthread_cleanup_push (_br_lock_cleaner, &child->lock);
+                pthread_mutex_lock (&child->lock);
+                {
+                        scrub_monitor->active_child_count++;
+                        br_child_set_scrub_state (child, _gf_true);
                 }
-                pthread_mutex_unlock (&priv->scrub_stat.lock);
+                pthread_mutex_unlock (&child->lock);
+                pthread_cleanup_pop (0);
         }
+        pthread_mutex_unlock (&scrub_monitor->wakelock);
+        pthread_cleanup_pop (0);
 }
 
 static void
-br_fsscanner_log_time (xlator_t *this, br_child_t *child, const char *sfx)
+br_scrubber_entry_control (xlator_t *this)
 {
-        char           timestr[1024] = {0,};
-        struct         timeval tv    = {0,};
+        br_private_t      *priv          = NULL;
+        struct br_monitor *scrub_monitor = NULL;
 
-        gettimeofday (&tv, NULL);
-        gf_time_fmt (timestr, sizeof (timestr), tv.tv_sec, gf_timefmt_FT);
+        priv = this->private;
+        scrub_monitor = &priv->scrub_monitor;
 
-        if (strcasecmp (sfx, "started") == 0) {
-                br_update_scrub_start_time (this, &tv);
-                gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_START,
-                        "Scrubbing \"%s\" %s at %s", child->brick_path, sfx,
-                        timestr);
-        } else {
-                br_update_scrub_finish_time (this, timestr, &tv);
-                gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_FINISH,
-                        "Scrubbing \"%s\" %s at %s", child->brick_path, sfx,
-                        timestr);
+        LOCK (&scrub_monitor->lock);
+        {
+                /* Move the state to BR_SCRUB_STATE_ACTIVE */
+                if (scrub_monitor->state == BR_SCRUB_STATE_PENDING)
+                        scrub_monitor->state = BR_SCRUB_STATE_ACTIVE;
+                br_scrubber_log_time (this, "started");
         }
+        UNLOCK (&scrub_monitor->lock);
 }
 
 static void
-br_fsscanner_wait_until_kicked (xlator_t *this, struct br_scanfs *fsscan)
+br_scrubber_exit_control (xlator_t *this)
 {
-        static int            i;
-        br_private_t         *priv    = NULL;
+        br_private_t      *priv          = NULL;
+        struct br_monitor *scrub_monitor = NULL;
 
         priv = this->private;
+        scrub_monitor = &priv->scrub_monitor;
 
-        pthread_cleanup_push (_br_lock_cleaner, &fsscan->wakelock);
-        pthread_mutex_lock (&fsscan->wakelock);
+        LOCK (&scrub_monitor->lock);
         {
-                while (!fsscan->kick)
-                        pthread_cond_wait (&fsscan->wakecond,
-                                           &fsscan->wakelock);
-
-                /* resetting total number of scrubbed file when scrubbing
-                 * done for all of its children */
-                if (i == priv->up_children) {
-                        pthread_mutex_lock (&priv->scrub_stat.lock);
-                        {
-                                priv->scrub_stat.scrubbed_files = 0;
-                                priv->scrub_stat.unsigned_files = 0;
-                                i = 0;
-                        }
-                        pthread_mutex_unlock (&priv->scrub_stat.lock);
-                }
-                ++i;
+                br_scrubber_log_time (this, "finished");
 
-                fsscan->kick = _gf_false;
+                if (scrub_monitor->state == BR_SCRUB_STATE_ACTIVE) {
+                        (void) br_fsscan_activate (this);
+                } else {
+                        gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO,
+                                "Volume waiting to get rescheduled..");
+                }
         }
-        pthread_mutex_unlock (&fsscan->wakelock);
-        pthread_cleanup_pop (0);
+        UNLOCK (&scrub_monitor->lock);
 }
 
 static void
 br_fsscanner_entry_control (xlator_t *this, br_child_t *child)
 {
-        struct br_scanfs *fsscan = &child->fsscan;
-
-        LOCK (&child->lock);
-        {
-                if (fsscan->state == BR_SCRUB_STATE_PENDING)
-                        fsscan->state = BR_SCRUB_STATE_ACTIVE;
                 br_fsscanner_log_time (this, child, "started");
-        }
-        UNLOCK (&child->lock);
 }
 
 static void
 br_fsscanner_exit_control (xlator_t *this, br_child_t *child)
 {
-        struct br_scanfs *fsscan = &child->fsscan;
+        br_private_t *priv = NULL;
+        struct br_monitor *scrub_monitor = NULL;
 
-        LOCK (&child->lock);
+        priv = this->private;
+        scrub_monitor = &priv->scrub_monitor;
+
+        if (!_br_is_child_connected (child)) {
+                gf_msg (this->name, GF_LOG_WARNING, 0, BRB_MSG_SCRUB_INFO,
+                        "Brick [%s] disconnected while scrubbing. Scrubbing "
+                        "might be incomplete", child->brick_path);
+        }
+
+        br_fsscanner_log_time (this, child, "finished");
+
+        pthread_cleanup_push (_br_lock_cleaner, &scrub_monitor->wakelock);
+        pthread_mutex_lock (&scrub_monitor->wakelock);
         {
-                fsscan->over = _gf_true;
-                br_fsscanner_log_time (this, child, "finished");
+                scrub_monitor->active_child_count--;
+                pthread_cleanup_push (_br_lock_cleaner, &child->lock);
+                pthread_mutex_lock (&child->lock);
+                {
+                        br_child_set_scrub_state (child, _gf_false);
+                }
+                pthread_mutex_unlock (&child->lock);
+                pthread_cleanup_pop (0);
 
-                if (fsscan->state == BR_SCRUB_STATE_ACTIVE) {
-                        (void) br_fsscan_activate (this, child);
+                if (scrub_monitor->active_child_count == 0) {
+                        /* The last child has finished scrubbing.
+                         * Set the kick to false and  wake up other
+                         * children who are waiting for the last
+                         * child to complete scrubbing.
+                         */
+                        scrub_monitor->kick = _gf_false;
+                        pthread_cond_broadcast (&scrub_monitor->wakecond);
+
+                        /* Signal monitor thread waiting for the all
+                         * the children to finish scrubbing.
+                         */
+                        pthread_cleanup_push (_br_lock_cleaner,
+                                              &scrub_monitor->donelock);
+                        pthread_mutex_lock (&scrub_monitor->donelock);
+                        {
+                                scrub_monitor->done = _gf_true;
+                                pthread_cond_signal (&scrub_monitor->donecond);
+                        }
+                        pthread_mutex_unlock (&scrub_monitor->donelock);
+                        pthread_cleanup_pop (0);
                 } else {
-                        gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO,
-                                "Brick [%s] waiting to get rescheduled..",
-                                child->brick_path);
+                        while (scrub_monitor->active_child_count)
+                                pthread_cond_wait (&scrub_monitor->wakecond,
+                                                   &scrub_monitor->wakelock);
                 }
         }
-        UNLOCK (&child->lock);
+        pthread_mutex_unlock (&scrub_monitor->wakelock);
+        pthread_cleanup_pop (0);
 }
 
 void *
@@ -745,7 +775,7 @@ br_fsscanner (void *arg)
         loc.inode = child->table->root;
 
         while (1) {
-                br_fsscanner_wait_until_kicked (this, fsscan);
+                br_fsscanner_wait_until_kicked (this, child);
                 {
                         /* precursor for scrub */
                         br_fsscanner_entry_control (this, child);
@@ -777,22 +807,29 @@ br_kickstart_scanner (struct gf_tw_timer_list *timer,
                       void *data, unsigned long calltime)
 {
         xlator_t *this = NULL;
-        br_child_t *child = data;
-        struct br_scanfs *fsscan = NULL;
+        struct br_monitor *scrub_monitor = data;
+        br_private_t *priv = NULL;
 
-        THIS = this = child->this;
-        fsscan = &child->fsscan;
+        THIS = this = scrub_monitor->this;
+        priv = this->private;
+
+        /* Reset scrub statistics */
+        priv->scrub_stat.scrubbed_files = 0;
+        priv->scrub_stat.unsigned_files = 0;
+
+        /* Moves state from PENDING to ACTIVE */
+        (void) br_scrubber_entry_control (this);
 
         /* kickstart scanning.. */
-        pthread_mutex_lock (&fsscan->wakelock);
+        pthread_mutex_lock (&scrub_monitor->wakelock);
         {
-                fsscan->kick = _gf_true;
-                pthread_cond_signal (&fsscan->wakecond);
+                scrub_monitor->kick = _gf_true;
+                GF_ASSERT (scrub_monitor->active_child_count == 0);
+                pthread_cond_broadcast (&scrub_monitor->wakecond);
         }
-        pthread_mutex_unlock (&fsscan->wakelock);
+        pthread_mutex_unlock (&scrub_monitor->wakelock);
 
         return;
-
 }
 
 static uint32_t
@@ -836,22 +873,22 @@ br_fsscan_calculate_timeout (scrub_freq_t freq)
 }
 
 int32_t
-br_fsscan_schedule (xlator_t *this, br_child_t *child)
+br_fsscan_schedule (xlator_t *this)
 {
         uint32_t timo = 0;
         br_private_t *priv = NULL;
         struct timeval tv = {0,};
         char timestr[1024] = {0,};
-        struct br_scanfs *fsscan = NULL;
         struct br_scrubber *fsscrub = NULL;
         struct gf_tw_timer_list *timer = NULL;
+        struct br_monitor *scrub_monitor = NULL;
 
         priv = this->private;
-        fsscan = &child->fsscan;
         fsscrub = &priv->fsscrub;
+        scrub_monitor = &priv->scrub_monitor;
 
         (void) gettimeofday (&tv, NULL);
-        fsscan->boot = tv.tv_sec;
+        scrub_monitor->boot = tv.tv_sec;
 
         timo = br_fsscan_calculate_timeout (fsscrub->frequency);
         if (timo == 0) {
@@ -860,25 +897,25 @@ br_fsscan_schedule (xlator_t *this, br_child_t *child)
                 goto error_return;
         }
 
-        fsscan->timer = GF_CALLOC (1, sizeof (*fsscan->timer),
+        scrub_monitor->timer = GF_CALLOC (1, sizeof (*scrub_monitor->timer),
                                    gf_br_stub_mt_br_scanner_freq_t);
-        if (!fsscan->timer)
+        if (!scrub_monitor->timer)
                 goto error_return;
 
-        timer = fsscan->timer;
+        timer = scrub_monitor->timer;
         INIT_LIST_HEAD (&timer->entry);
 
-        timer->data = child;
+        timer->data = scrub_monitor;
         timer->expires = timo;
         timer->function = br_kickstart_scanner;
 
         gf_tw_add_timer (priv->timer_wheel, timer);
-        _br_child_set_scrub_state (child, BR_SCRUB_STATE_PENDING);
+        _br_monitor_set_scrub_state (scrub_monitor, BR_SCRUB_STATE_PENDING);
 
         gf_time_fmt (timestr, sizeof (timestr),
-                     (fsscan->boot + timo), gf_timefmt_FT);
-        gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO, "Scrubbing for "
-                "%s scheduled to run at %s", child->brick_path, timestr);
+                     (scrub_monitor->boot + timo), gf_timefmt_FT);
+        gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO, "Scrubbing is "
+                "scheduled to run at %s", timestr);
 
         return 0;
 
@@ -887,18 +924,18 @@ br_fsscan_schedule (xlator_t *this, br_child_t *child)
 }
 
 int32_t
-br_fsscan_activate (xlator_t *this, br_child_t *child)
+br_fsscan_activate (xlator_t *this)
 {
         uint32_t            timo    = 0;
         char timestr[1024]          = {0,};
         struct timeval      now     = {0,};
         br_private_t       *priv    = NULL;
-        struct br_scanfs   *fsscan  = NULL;
         struct br_scrubber *fsscrub = NULL;
+        struct br_monitor  *scrub_monitor = NULL;
 
         priv = this->private;
-        fsscan = &child->fsscan;
         fsscrub = &priv->fsscrub;
+        scrub_monitor = &priv->scrub_monitor;
 
         (void) gettimeofday (&now, NULL);
         timo = br_fsscan_calculate_timeout (fsscrub->frequency);
@@ -908,32 +945,37 @@ br_fsscan_activate (xlator_t *this, br_child_t *child)
                 return -1;
         }
 
-        fsscan->over = _gf_false;
+        pthread_mutex_lock (&scrub_monitor->donelock);
+        {
+                scrub_monitor->done = _gf_false;
+        }
+        pthread_mutex_unlock (&scrub_monitor->donelock);
+
         gf_time_fmt (timestr, sizeof (timestr),
                      (now.tv_sec + timo), gf_timefmt_FT);
-        (void) gf_tw_mod_timer (priv->timer_wheel, fsscan->timer, timo);
+        (void) gf_tw_mod_timer (priv->timer_wheel, scrub_monitor->timer, timo);
 
-        _br_child_set_scrub_state (child, BR_SCRUB_STATE_PENDING);
-        gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO, "Scrubbing for "
-                "%s rescheduled to run at %s", child->brick_path, timestr);
+        _br_monitor_set_scrub_state (scrub_monitor, BR_SCRUB_STATE_PENDING);
+        gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO, "Scrubbing is "
+                "rescheduled to run at %s", timestr);
 
         return 0;
 }
 
 int32_t
-br_fsscan_reschedule (xlator_t *this, br_child_t *child)
+br_fsscan_reschedule (xlator_t *this)
 {
         int32_t             ret     = 0;
         uint32_t            timo    = 0;
         char timestr[1024]          = {0,};
         struct timeval      now     = {0,};
         br_private_t       *priv    = NULL;
-        struct br_scanfs   *fsscan  = NULL;
         struct br_scrubber *fsscrub = NULL;
+        struct br_monitor  *scrub_monitor = NULL;
 
         priv = this->private;
-        fsscan = &child->fsscan;
         fsscrub = &priv->fsscrub;
+        scrub_monitor = &priv->scrub_monitor;
 
         if (!fsscrub->frequency_reconf)
                 return 0;
@@ -949,17 +991,21 @@ br_fsscan_reschedule (xlator_t *this, br_child_t *child)
         gf_time_fmt (timestr, sizeof (timestr),
                      (now.tv_sec + timo), gf_timefmt_FT);
 
-        fsscan->over = _gf_false;
-        ret = gf_tw_mod_timer_pending (priv->timer_wheel, fsscan->timer, timo);
+        pthread_mutex_lock (&scrub_monitor->donelock);
+        {
+                scrub_monitor->done = _gf_false;
+        }
+        pthread_mutex_unlock (&scrub_monitor->donelock);
+
+        ret = gf_tw_mod_timer_pending (priv->timer_wheel, scrub_monitor->timer, timo);
         if (ret == 0)
                 gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO,
-                        "Scrubber for %s is currently running and would be "
-                        "rescheduled after completion", child->brick_path);
+                        "Scrubber is currently running and would be "
+                        "rescheduled after completion");
         else {
-                _br_child_set_scrub_state (child, BR_SCRUB_STATE_PENDING);
+                _br_monitor_set_scrub_state (scrub_monitor, BR_SCRUB_STATE_PENDING);
                 gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO,
-                        "Scrubbing for %s rescheduled to run at %s",
-                        child->brick_path, timestr);
+                        "Scrubbing rescheduled to run at %s", timestr);
         }
 
         return 0;
@@ -1725,15 +1771,174 @@ out:
         return ret;
 }
 
+static int
+wait_for_scrub_to_finish (xlator_t *this)
+{
+        int                  ret               = -1;
+        br_private_t         *priv             = NULL;
+        struct br_monitor    *scrub_monitor    = NULL;
+
+        priv = this->private;
+        scrub_monitor = &priv->scrub_monitor;
+
+        GF_VALIDATE_OR_GOTO ("bit-rot", scrub_monitor, out);
+        GF_VALIDATE_OR_GOTO ("bit-rot", this, out);
+
+        gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO,
+                "Waiting for all children to start and finish scrub");
+
+        pthread_mutex_lock (&scrub_monitor->donelock);
+        {
+                while (!scrub_monitor->done)
+                        pthread_cond_wait (&scrub_monitor->donecond,
+                                           &scrub_monitor->donelock);
+        }
+        pthread_mutex_unlock (&scrub_monitor->donelock);
+        ret = 0;
+out:
+        return ret;
+}
+
+/**
+ * This function is executed in a separate thread. This is scrubber monitor
+ * thread that takes care of state machine.
+ */
+void *
+br_monitor_thread (void *arg)
+{
+        int32_t              ret               = 0;
+        xlator_t            *this              = NULL;
+        br_private_t        *priv              = NULL;
+        struct br_monitor   *scrub_monitor     = NULL;
+
+        this = arg;
+        priv = this->private;
+
+        /*
+         * Since, this is the topmost xlator, THIS has to be set by bit-rot
+         * xlator itself (STACK_WIND wont help in this case). Also it has
+         * to be done for each thread that gets spawned. Otherwise, a new
+         * thread will get global_xlator's pointer when it does "THIS".
+         */
+        THIS = this;
+
+        scrub_monitor = &priv->scrub_monitor;
+
+        pthread_mutex_lock (&scrub_monitor->mutex);
+        {
+                while (!scrub_monitor->inited)
+                        pthread_cond_wait (&scrub_monitor->cond,
+                                           &scrub_monitor->mutex);
+        }
+        pthread_mutex_unlock (&scrub_monitor->mutex);
+
+        /* this needs to be serialized with reconfigure() */
+        pthread_mutex_lock (&priv->lock);
+        {
+                ret = br_scrub_state_machine (this);
+        }
+        pthread_mutex_unlock (&priv->lock);
+        if (ret) {
+                gf_msg (this->name, GF_LOG_ERROR, -ret,
+                        BRB_MSG_SSM_FAILED,
+                        "Scrub state machine failed");
+                goto out;
+        }
+
+        while (1) {
+                /* Wait for all children to finish scrubbing */
+                ret = wait_for_scrub_to_finish (this);
+                if (ret) {
+                        gf_msg (this->name, GF_LOG_ERROR, -ret,
+                                BRB_MSG_SCRUB_WAIT_FAILED,
+                                "Scrub wait failed");
+                        goto out;
+                }
+
+                /* scrub exit criteria: Move the state to PENDING */
+                br_scrubber_exit_control (this);
+        }
+
+out:
+        return NULL;
+}
+
+static void
+br_set_scrub_state (struct br_monitor *scrub_monitor, br_scrub_state_t state)
+{
+        LOCK (&scrub_monitor->lock);
+        {
+                _br_monitor_set_scrub_state (scrub_monitor, state);
+        }
+        UNLOCK (&scrub_monitor->lock);
+}
+
+int32_t
+br_scrubber_monitor_init (xlator_t *this, br_private_t *priv)
+{
+        struct br_monitor *scrub_monitor = NULL;
+        int                ret           = 0;
+
+        scrub_monitor = &priv->scrub_monitor;
+
+        LOCK_INIT (&scrub_monitor->lock);
+        scrub_monitor->this = this;
+
+        scrub_monitor->inited = _gf_false;
+        pthread_mutex_init (&scrub_monitor->mutex, NULL);
+        pthread_cond_init (&scrub_monitor->cond, NULL);
+
+        scrub_monitor->kick = _gf_false;
+        scrub_monitor->active_child_count = 0;
+        pthread_mutex_init (&scrub_monitor->wakelock, NULL);
+        pthread_cond_init (&scrub_monitor->wakecond, NULL);
+
+        scrub_monitor->done = _gf_false;
+        pthread_mutex_init (&scrub_monitor->donelock, NULL);
+        pthread_cond_init (&scrub_monitor->donecond, NULL);
+
+        /* Set the state to INACTIVE */
+        br_set_scrub_state (&priv->scrub_monitor, BR_SCRUB_STATE_INACTIVE);
+
+        /* Start the monitor thread */
+        ret = gf_thread_create (&scrub_monitor->thread, NULL, br_monitor_thread, this);
+        if (ret != 0) {
+                gf_msg (this->name, GF_LOG_ERROR, -ret,
+                        BRB_MSG_SPAWN_FAILED, "monitor thread creation failed");
+                ret = -1;
+                goto err;
+        }
+
+        return 0;
+err:
+        pthread_mutex_destroy (&scrub_monitor->mutex);
+        pthread_cond_destroy (&scrub_monitor->cond);
+
+        pthread_mutex_destroy (&scrub_monitor->wakelock);
+        pthread_cond_destroy (&scrub_monitor->wakecond);
+
+        pthread_mutex_destroy (&scrub_monitor->donelock);
+        pthread_cond_destroy (&scrub_monitor->donecond);
+
+        LOCK_DESTROY (&scrub_monitor->lock);
+
+        return ret;
+}
+
 int32_t
 br_scrubber_init (xlator_t *this, br_private_t *priv)
 {
         struct br_scrubber *fsscrub = NULL;
+        int                 ret     = 0;
 
         priv->tbf = br_tbf_init (NULL, 0);
         if (!priv->tbf)
                 return -1;
 
+        ret = br_scrubber_monitor_init (this, priv);
+        if (ret)
+                return -1;
+
         fsscrub = &priv->fsscrub;
 
         fsscrub->this = this;
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h
index e730582..93bb296 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h
@@ -16,15 +16,21 @@
 
 void *br_fsscanner (void *);
 
-int32_t br_fsscan_schedule (xlator_t *, br_child_t *);
-int32_t br_fsscan_reschedule (xlator_t *, br_child_t *);
-int32_t br_fsscan_activate (xlator_t *, br_child_t *);
-int32_t br_fsscan_deactivate (xlator_t *, br_child_t *);
+int32_t br_fsscan_schedule (xlator_t *);
+int32_t br_fsscan_reschedule (xlator_t *);
+int32_t br_fsscan_activate (xlator_t *);
+int32_t br_fsscan_deactivate (xlator_t *);
 
 int32_t br_scrubber_handle_options (xlator_t *, br_private_t *, dict_t *);
 
+int32_t
+br_scrubber_monitor_init (xlator_t *, br_private_t *);
+
 int32_t br_scrubber_init (xlator_t *, br_private_t *);
 
 int32_t br_collect_bad_objects_from_children (xlator_t *this, dict_t *dict);
 
+void
+br_child_set_scrub_state (br_child_t *, gf_boolean_t);
+
 #endif /* __BIT_ROT_SCRUB_H__ */
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-ssm.c b/xlators/features/bit-rot/src/bitd/bit-rot-ssm.c
index fcffc04..d304fc8 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot-ssm.c
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-ssm.c
@@ -12,52 +12,73 @@
 #include "bit-rot-scrub.h"
 #include "bit-rot-bitd-messages.h"
 
-int br_scrub_ssm_noop (xlator_t *this, br_child_t *child)
+int br_scrub_ssm_noop (xlator_t *this)
 {
         return 0;
 }
 
 int
-br_scrub_ssm_state_pause (xlator_t *this, br_child_t *child)
+br_scrub_ssm_state_pause (xlator_t *this)
 {
+        br_private_t        *priv               = NULL;
+        struct br_monitor   *scrub_monitor      = NULL;
+
+        priv = this->private;
+        scrub_monitor = &priv->scrub_monitor;
+
         gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_GENERIC_SSM_INFO,
-                "Scrubber paused [Brick: %s]", child->brick_path);
-        _br_child_set_scrub_state (child, BR_SCRUB_STATE_PAUSED);
+                "Scrubber paused");
+        _br_monitor_set_scrub_state (scrub_monitor, BR_SCRUB_STATE_PAUSED);
         return 0;
 }
 
 int
-br_scrub_ssm_state_ipause (xlator_t *this, br_child_t *child)
+br_scrub_ssm_state_ipause (xlator_t *this)
 {
+        br_private_t        *priv               = NULL;
+        struct br_monitor   *scrub_monitor      = NULL;
+
+        priv = this->private;
+        scrub_monitor = &priv->scrub_monitor;
+
         gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_GENERIC_SSM_INFO,
-                "Scrubber paused [Brick: %s]", child->brick_path);
-        _br_child_set_scrub_state (child, BR_SCRUB_STATE_IPAUSED);
+                "Scrubber paused");
+        _br_monitor_set_scrub_state (scrub_monitor, BR_SCRUB_STATE_IPAUSED);
         return 0;
 }
 
 int
-br_scrub_ssm_state_active (xlator_t *this, br_child_t *child)
+br_scrub_ssm_state_active (xlator_t *this)
 {
-        struct br_scanfs *fsscan = &child->fsscan;
+        br_private_t        *priv               = NULL;
+        struct br_monitor   *scrub_monitor      = NULL;
 
-        if (fsscan->over) {
-                (void) br_fsscan_activate (this, child);
+        priv = this->private;
+        scrub_monitor = &priv->scrub_monitor;
+
+        if (scrub_monitor->done) {
+                (void) br_fsscan_activate (this);
         } else {
                 gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_GENERIC_SSM_INFO,
-                        "Scrubbing resumed [Brick %s]", child->brick_path);
-                _br_child_set_scrub_state (child, BR_SCRUB_STATE_ACTIVE);
+                        "Scrubbing resumed");
+                _br_monitor_set_scrub_state (scrub_monitor, BR_SCRUB_STATE_ACTIVE);
         }
 
         return 0;
 }
 
 int
-br_scrub_ssm_state_stall (xlator_t *this, br_child_t *child)
+br_scrub_ssm_state_stall (xlator_t *this)
 {
+        br_private_t        *priv               = NULL;
+        struct br_monitor   *scrub_monitor      = NULL;
+
+        priv = this->private;
+        scrub_monitor = &priv->scrub_monitor;
+
         gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_GENERIC_SSM_INFO,
-                "Brick [%s] is under active scrubbing. Pausing scrub..",
-                child->brick_path);
-        _br_child_set_scrub_state (child, BR_SCRUB_STATE_STALLED);
+                "Volume is under active scrubbing. Pausing scrub..");
+        _br_monitor_set_scrub_state (scrub_monitor, BR_SCRUB_STATE_STALLED);
         return 0;
 }
 
@@ -72,22 +93,22 @@ br_scrub_ssm[BR_SCRUB_MAXSTATES][BR_SCRUB_MAXEVENTS] = {
 };
 
 int32_t
-br_scrub_state_machine (xlator_t *this, br_child_t *child)
+br_scrub_state_machine (xlator_t *this)
 {
         br_private_t       *priv      = NULL;
         br_scrub_ssm_call  *call      = NULL;
-        struct br_scanfs   *fsscan    = NULL;
         struct br_scrubber *fsscrub   = NULL;
         br_scrub_state_t    currstate = 0;
         br_scrub_event_t    event     = 0;
+        struct br_monitor  *scrub_monitor = NULL;
 
         priv = this->private;
-        fsscan = &child->fsscan;
         fsscrub = &priv->fsscrub;
+        scrub_monitor = &priv->scrub_monitor;
 
-        currstate = fsscan->state;
+        currstate = scrub_monitor->state;
         event = _br_child_get_scrub_event (fsscrub);
 
         call = br_scrub_ssm[currstate][event];
-        return call (this, child);
+        return call (this);
 }
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-ssm.h b/xlators/features/bit-rot/src/bitd/bit-rot-ssm.h
index 72fd62b..936ee4d 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot-ssm.h
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-ssm.h
@@ -29,8 +29,8 @@ typedef enum br_scrub_event {
         BR_SCRUB_MAXEVENTS,
 } br_scrub_event_t;
 
-struct br_child;
+struct br_monitor;
 
-int32_t br_scrub_state_machine (xlator_t *, struct br_child *);
+int32_t br_scrub_state_machine (xlator_t *);
 
 #endif /* __BIT_ROT_SSM_H__ */
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c
index 0eba447..45f8d1d 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot.c
+++ b/xlators/features/bit-rot/src/bitd/bit-rot.c
@@ -1097,21 +1097,11 @@ br_oneshot_signer (void *arg)
 static void
 br_set_child_state (br_child_t *child, br_child_state_t state)
 {
-        LOCK (&child->lock);
+        pthread_mutex_lock (&child->lock);
         {
                 _br_set_child_state (child, state);
         }
-        UNLOCK (&child->lock);
-}
-
-static void
-br_set_scrub_state (br_child_t *child, br_scrub_state_t state)
-{
-        LOCK (&child->lock);
-        {
-                _br_child_set_scrub_state (child, state);
-        }
-        UNLOCK (&child->lock);
+        pthread_mutex_unlock (&child->lock);
 }
 
 /**
@@ -1173,11 +1163,11 @@ br_launch_scrubber (xlator_t *this, br_child_t *child,
 {
         int32_t ret = -1;
         br_private_t *priv = NULL;
+        struct br_monitor *scrub_monitor = NULL;
 
         priv = this->private;
 
-        fsscan->kick = _gf_false;
-        fsscan->over = _gf_false;
+        scrub_monitor = &priv->scrub_monitor;
         ret = gf_thread_create (&child->thread, NULL, br_fsscanner, child);
         if (ret != 0) {
                 gf_msg (this->name, GF_LOG_ALERT, 0, BRB_MSG_SPAWN_FAILED,
@@ -1186,14 +1176,14 @@ br_launch_scrubber (xlator_t *this, br_child_t *child,
                 goto error_return;
         }
 
-        /* this needs to be serialized with reconfigure() */
-        pthread_mutex_lock (&priv->lock);
+        /* Signal monitor to kick off state machine*/
+        pthread_mutex_lock (&scrub_monitor->mutex);
         {
-                ret = br_scrub_state_machine (this, child);
+                if (!scrub_monitor->inited)
+                        pthread_cond_signal (&scrub_monitor->cond);
+                scrub_monitor->inited = _gf_true;
         }
-        pthread_mutex_unlock (&priv->lock);
-        if (ret)
-                goto cleanup_thread;
+        pthread_mutex_unlock (&scrub_monitor->mutex);
 
         /**
          * Everything has been setup.. add this subvolume to scrubbers
@@ -1208,8 +1198,6 @@ br_launch_scrubber (xlator_t *this, br_child_t *child,
 
         return 0;
 
- cleanup_thread:
-        (void) gf_thread_cleanup_xint (child->thread);
  error_return:
         return -1;
 }
@@ -1242,10 +1230,6 @@ br_enact_scrubber (xlator_t *this, br_child_t *child)
         INIT_LIST_HEAD (&fsscan->queued);
         INIT_LIST_HEAD (&fsscan->ready);
 
-        /* init scheduler related variables */
-        pthread_mutex_init (&fsscan->wakelock, NULL);
-        pthread_cond_init (&fsscan->wakecond, NULL);
-
         ret = br_launch_scrubber (this, child, fsscan, fsscrub);
         if (ret)
                 goto error_return;
@@ -1266,7 +1250,7 @@ br_child_enaction (xlator_t *this, br_child_t *child, br_stub_init_t *stub)
         int32_t ret = -1;
         br_private_t *priv = this->private;
 
-        LOCK (&child->lock);
+        pthread_mutex_lock (&child->lock);
         {
                 if (priv->iamscrubber)
                         ret = br_enact_scrubber (this, child);
@@ -1281,7 +1265,7 @@ br_child_enaction (xlator_t *this, br_child_t *child, br_stub_init_t *stub)
                                 "Connected to brick %s..", child->brick_path);
                 }
         }
-        UNLOCK (&child->lock);
+        pthread_mutex_unlock (&child->lock);
 
         return ret;
 }
@@ -1308,6 +1292,7 @@ br_brick_connect (xlator_t *this, br_child_t *child)
         GF_VALIDATE_OR_GOTO (this->name, child, out);
         GF_VALIDATE_OR_GOTO (this->name, this->private, out);
 
+        br_child_set_scrub_state (child, _gf_false);
         br_set_child_state (child, BR_CHILD_STATE_INITIALIZING);
 
         loc.inode = inode_ref (child->table->root);
@@ -1369,12 +1354,17 @@ br_cleanup_scrubber (xlator_t *this, br_child_t *child)
 {
         int32_t ret = 0;
         br_private_t *priv = NULL;
-        struct br_scanfs *fsscan = NULL;
         struct br_scrubber *fsscrub = NULL;
+        struct br_monitor *scrub_monitor = NULL;
 
         priv    = this->private;
-        fsscan  = &child->fsscan;
         fsscrub = &priv->fsscrub;
+        scrub_monitor = &priv->scrub_monitor;
+
+        if (_br_is_child_scrub_active (child)) {
+                scrub_monitor->active_child_count--;
+                br_child_set_scrub_state (child, _gf_false);
+        }
 
         /**
          * 0x0: child (brick) goes out of rotation
@@ -1406,21 +1396,6 @@ br_cleanup_scrubber (xlator_t *this, br_child_t *child)
                         0, BRB_MSG_SCRUB_THREAD_CLEANUP,
                         "Error cleaning up scanner thread");
 
-        /**
-         * 0x2: free()up resources
-         */
-        if (fsscan->timer) {
-                (void) gf_tw_del_timer (priv->timer_wheel, fsscan->timer);
-
-                GF_FREE (fsscan->timer);
-                fsscan->timer = NULL;
-        }
-
-        /**
-         * 0x3: reset scrubber state
-         */
-        _br_child_set_scrub_state (child, BR_SCRUB_STATE_INACTIVE);
-
         gf_msg (this->name, GF_LOG_INFO,
                 0, BRB_MSG_SCRUBBER_CLEANED,
                 "Cleaned up scrubber for brick [%s]", child->brick_path);
@@ -1437,23 +1412,33 @@ int32_t
 br_brick_disconnect (xlator_t *this, br_child_t *child)
 {
         int32_t ret = 0;
+        struct br_monitor *scrub_monitor = NULL;
         br_private_t *priv = this->private;
 
-        LOCK (&child->lock);
+        scrub_monitor = &priv->scrub_monitor;
+
+        /* Lock order should be wakelock and then child lock to
+         * dead locks.
+         */
+        pthread_mutex_lock (&scrub_monitor->wakelock);
         {
-                if (!_br_is_child_connected (child))
-                        goto unblock;
+                pthread_mutex_lock (&child->lock);
+                {
+                        if (!_br_is_child_connected (child))
+                                goto unblock;
 
-                /* child is on death row.. */
-                _br_set_child_state (child, BR_CHILD_STATE_DISCONNECTED);
+                        /* child is on death row.. */
+                        _br_set_child_state (child, BR_CHILD_STATE_DISCONNECTED);
 
-                if (priv->iamscrubber)
-                        ret = br_cleanup_scrubber (this, child);
-                else
-                        ret = br_cleanup_signer (this, child);
-        }
+                        if (priv->iamscrubber)
+                                ret = br_cleanup_scrubber (this, child);
+                        else
+                                ret = br_cleanup_signer (this, child);
+                }
  unblock:
-        UNLOCK (&child->lock);
+                pthread_mutex_unlock (&child->lock);
+        }
+        pthread_mutex_unlock (&scrub_monitor->wakelock);
 
          return ret;
 }
@@ -1574,7 +1559,7 @@ br_scrubber_status_get (xlator_t *this, dict_t **dict)
 
         memset (key, 0, 256);
         snprintf (key, 256, "scrubbed-files");
-        ret = dict_set_uint32 (*dict, key, scrub_stats->scrubbed_files);
+        ret = dict_set_uint64 (*dict, key, scrub_stats->scrubbed_files);
         if (ret) {
                 gf_msg_debug (this->name, 0, "Failed to setting scrubbed file "
                               "entry to the dictionary");
@@ -1582,7 +1567,7 @@ br_scrubber_status_get (xlator_t *this, dict_t **dict)
 
         memset (key, 0, 256);
         snprintf (key, 256, "unsigned-files");
-        ret = dict_set_uint32 (*dict, key, scrub_stats->unsigned_files);
+        ret = dict_set_uint64 (*dict, key, scrub_stats->unsigned_files);
         if (ret) {
                 gf_msg_debug (this->name, 0, "Failed to set unsigned file count"
                               " entry to the dictionary");
@@ -1590,7 +1575,7 @@ br_scrubber_status_get (xlator_t *this, dict_t **dict)
 
         memset (key, 0, 256);
         snprintf (key, 256, "scrub-duration");
-        ret = dict_set_uint32 (*dict, key, scrub_stats->scrub_duration);
+        ret = dict_set_uint64 (*dict, key, scrub_stats->scrub_duration);
         if (ret) {
                 gf_msg_debug (this->name, 0, "Failed to set scrub duration"
                               " entry to the dictionary");
@@ -1848,6 +1833,33 @@ br_signer_init (xlator_t *this, br_private_t *priv)
 }
 
 static void
+br_free_scrubber_monitor (xlator_t *this, br_private_t *priv)
+{
+        struct br_monitor *scrub_monitor = &priv->scrub_monitor;
+
+        if (scrub_monitor->timer) {
+                (void) gf_tw_del_timer (priv->timer_wheel, scrub_monitor->timer);
+
+                GF_FREE (scrub_monitor->timer);
+                scrub_monitor->timer = NULL;
+        }
+
+        (void) gf_thread_cleanup_xint (scrub_monitor->thread);
+
+        /* Clean up cond and mutex variables */
+        pthread_mutex_destroy (&scrub_monitor->mutex);
+        pthread_cond_destroy (&scrub_monitor->cond);
+
+        pthread_mutex_destroy (&scrub_monitor->wakelock);
+        pthread_cond_destroy (&scrub_monitor->wakecond);
+
+        pthread_mutex_destroy (&scrub_monitor->donelock);
+        pthread_cond_destroy (&scrub_monitor->donecond);
+
+        LOCK_DESTROY (&scrub_monitor->lock);
+}
+
+static void
 br_free_children (xlator_t *this, br_private_t *priv, int count)
 {
         br_child_t *child = NULL;
@@ -1855,7 +1867,7 @@ br_free_children (xlator_t *this, br_private_t *priv, int count)
         for (--count; count >= 0; count--) {
                 child = &priv->children[count];
                 mem_pool_destroy (child->timer_pool);
-                LOCK_DESTROY (&child->lock);
+                pthread_mutex_destroy (&child->lock);
         }
 
         GF_FREE (priv->children);
@@ -1879,10 +1891,9 @@ br_init_children (xlator_t *this, br_private_t *priv)
         while (trav) {
                 child = &priv->children[i];
 
-                LOCK_INIT (&child->lock);
+                pthread_mutex_init (&child->lock, NULL);
                 child->witnessed = 0;
 
-                br_set_scrub_state (child, BR_SCRUB_STATE_INACTIVE);
                 br_set_child_state (child, BR_CHILD_STATE_DISCONNECTED);
 
                 child->this = this;
@@ -2003,6 +2014,9 @@ fini (xlator_t *this)
 
         if (!priv->iamscrubber)
                 br_fini_signer (this, priv);
+        else
+                (void) br_free_scrubber_monitor (this, priv);
+
         br_free_children (this, priv, priv->child_count);
 
         this->private = NULL;
@@ -2012,26 +2026,23 @@ fini (xlator_t *this)
 }
 
 static void
-br_reconfigure_child (xlator_t *this, br_child_t *child)
+br_reconfigure_monitor (xlator_t *this)
 {
         int32_t ret = 0;
 
-        ret = br_scrub_state_machine (this, child);
+        ret = br_scrub_state_machine (this);
         if (ret) {
                 gf_msg (this->name, GF_LOG_ERROR, 0,
                         BRB_MSG_RESCHEDULE_SCRUBBER_FAILED,
-                        "Could not reschedule scrubber for brick: %s. Scubbing "
-                        "will continue according to old frequency.",
-                        child->brick_path);
+                        "Could not reschedule scrubber for the volume. Scrubbing "
+                        "will continue according to old frequency.");
         }
 }
 
 static int
 br_reconfigure_scrubber (xlator_t *this, dict_t *options)
 {
-        int           i     = 0;
         int32_t       ret   = -1;
-        br_child_t   *child = NULL;
         br_private_t *priv  = NULL;
 
         priv = this->private;
@@ -2046,32 +2057,11 @@ br_reconfigure_scrubber (xlator_t *this, dict_t *options)
                 goto err;
 
         /* change state for all _up_ subvolume(s) */
-        for (; i < priv->child_count; i++) {
-                child = &priv->children[i];
-
-                LOCK (&child->lock);
-                {
-                        if (_br_child_failed_conn (child)) {
-                                gf_msg (this->name, GF_LOG_INFO,
-                                        0, BRB_MSG_BRICK_INFO,
-                                        "Scrubber for brick [%s] failed "
-                                        "initialization, rescheduling is "
-                                        "skipped", child->brick_path);
-                                goto unblock;
-                        }
-
-                        if (_br_is_child_connected (child))
-                                br_reconfigure_child (this, child);
-
-                        /**
-                         * for the rest.. either the child is in initialization
-                         * phase or is disconnected. either way, updated values
-                         * would be reflected on successful connection.
-                         */
-                }
-        unblock:
-                UNLOCK (&child->lock);
+        pthread_mutex_lock (&priv->lock);
+        {
+                br_reconfigure_monitor (this);
         }
+        pthread_mutex_unlock (&priv->lock);
 
  err:
         return ret;
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.h b/xlators/features/bit-rot/src/bitd/bit-rot.h
index 39ce790..835b9ca 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot.h
+++ b/xlators/features/bit-rot/src/bitd/bit-rot.h
@@ -31,6 +31,7 @@
 
 #include "bit-rot-common.h"
 #include "bit-rot-stub-mem-types.h"
+#include "bit-rot-scrub-status.h"
 
 #include <openssl/sha.h>
 
@@ -68,18 +69,6 @@ struct br_scanfs {
         unsigned int     entries;
         struct list_head queued;
         struct list_head ready;
-
-        /* scheduler */
-        uint32_t boot;
-        gf_boolean_t kick;
-        gf_boolean_t over;
-
-        br_scrub_state_t state;   /* current scrub state */
-
-        pthread_mutex_t wakelock;
-        pthread_cond_t  wakecond;
-
-        struct gf_tw_timer_list *timer;
 };
 
 /* just need three states to track child status */
@@ -91,7 +80,7 @@ typedef enum br_child_state {
 } br_child_state_t;
 
 struct br_child {
-        gf_lock_t lock;               /* protects child state */
+        pthread_mutex_t lock;         /* protects child state */
         char witnessed;               /* witnessed at least one succesfull
                                          connection */
         br_child_state_t c_state;     /* current state of this child */
@@ -116,6 +105,8 @@ struct br_child {
         struct timeval tv;
 
         struct br_scanfs fsscan;      /* per subvolume FS scanner */
+
+        gf_boolean_t active_scrubbing; /* Actively scrubbing or not */
 };
 
 typedef struct br_child br_child_t;
@@ -157,27 +148,42 @@ struct br_scrubber {
         struct list_head scrublist;
 };
 
-typedef struct br_obj_n_workers br_obj_n_workers_t;
+struct br_monitor {
+        gf_lock_t lock;
+        pthread_t thread;         /* Monitor thread */
 
-typedef struct br_private br_private_t;
+        gf_boolean_t  inited;
+        pthread_mutex_t mutex;
+        pthread_cond_t cond;      /* Thread starts and will be waiting on cond.
+                                     First child which is up wakes this up */
 
-typedef void (*br_scrubbed_file_update) (br_private_t *priv);
+        xlator_t *this;
+        /* scheduler */
+        uint32_t boot;
 
-struct br_scrub_stats {
-        uint32_t       scrubbed_files;       /* Total number of scrubbed file */
+        int32_t active_child_count; /* Number of children currently scrubbing */
+        gf_boolean_t kick;          /* This variable tracks the scrubber is
+                                     * kicked or not. Both 'kick' and
+                                     * 'active_child_count' uses the same pair
+                                     * of mutex-cond variable, i.e, wakelock and
+                                     * wakecond. */
 
-        uint32_t       unsigned_files;       /* Total number of unsigned file */
+        pthread_mutex_t wakelock;
+        pthread_cond_t  wakecond;
 
-        uint32_t       scrub_duration;            /* Duration of last scrub */
+        gf_boolean_t done;
+        pthread_mutex_t donelock;
+        pthread_cond_t  donecond;
 
-        char           last_scrub_time[1024];    /*last scrub completion time */
+        struct gf_tw_timer_list *timer;
+        br_scrub_state_t state;   /* current scrub state */
+};
 
-        struct         timeval scrub_start_tv;   /* Scrubbing starting time*/
+typedef struct br_obj_n_workers br_obj_n_workers_t;
 
-        struct         timeval scrub_end_tv;     /* Scrubbing finishing time */
+typedef struct br_private br_private_t;
 
-        pthread_mutex_t  lock;
-};
+typedef void (*br_scrubbed_file_update) (br_private_t *priv);
 
 struct br_private {
         pthread_mutex_t lock;
@@ -214,6 +220,8 @@ struct br_private {
         struct br_scrub_stats scrub_stat; /* statistics of scrub*/
 
         struct br_scrubber fsscrub;       /* scrubbers for this subvolume */
+
+        struct br_monitor scrub_monitor;  /* scrubber monitor */
 };
 
 struct br_object {
@@ -233,7 +241,7 @@ struct br_object {
 };
 
 typedef struct br_object br_object_t;
-typedef int32_t (br_scrub_ssm_call) (xlator_t *, br_child_t *);
+typedef int32_t (br_scrub_ssm_call) (xlator_t *);
 
 void
 br_log_object (xlator_t *, char *, uuid_t, int32_t);
@@ -264,6 +272,12 @@ _br_is_child_connected (br_child_t *child)
 }
 
 static inline int
+_br_is_child_scrub_active (br_child_t *child)
+{
+        return child->active_scrubbing;
+}
+
+static inline int
 _br_child_failed_conn (br_child_t *child)
 {
         return (child->c_state == BR_CHILD_STATE_CONNFAILED);
@@ -277,10 +291,10 @@ _br_child_witnessed_connection (br_child_t *child)
 
 /* scrub state */
 static inline void
-_br_child_set_scrub_state (br_child_t *child, br_scrub_state_t state)
+_br_monitor_set_scrub_state (struct br_monitor *scrub_monitor,
+                           br_scrub_state_t state)
 {
-        struct br_scanfs *fsscan = &child->fsscan;
-        fsscan->state = state;
+        scrub_monitor->state = state;
 }
 
 static inline br_scrub_event_t
-- 
1.7.1