From 3cf65dfbd5dcbfc72117d236d1cfd132196a4bd4 Mon Sep 17 00:00:00 2001 From: Kotresh HR Date: Fri, 29 Apr 2016 17:45:31 +0530 Subject: [PATCH 110/139] features/bitrot: Introduce scrubber monitor thread The patch does following changes. 1. Introduce scrubber monitor thread. 2. Move scrub status related APIs to separate file and make part of libbitrot library. Problem: Earlier, each child of the scrubber was maintaining the state machine and hence there was no way to track the start and end time of scrubbing as each brick has it's own start and end time. Also each brick was maintaining it's own timer wheel instance. It was also not possible to get scrubbed files count per session as we could not get last child which finishes scrubbing to reset it to zero. Solution: Introduce scrubber monitor thread. It does following. 1. Maintains the scrubber state machine. Earlier each child had it's own state machine. Now, only monitor maintains on behalf of all it's children. 2. Maintains the timer wheel instance. Earlier each child had it's own timer wheel instance. Now, only monitor maintains on behalf of all it's children. As a result, we can track the scrub statistics easily and correctly. Upstream: master: >BUG: 1329211 >http://review.gluster.org/14044 >BUG: 1332134 >http://review.gluster.org/#/c/14146 release-3.7: >BUG: 1332072 >Reviewed-on: http://review.gluster.org/14140 NOTE: The patch #14146 is a compilation warning not detected in master branch and detected only in 3.7 branch. Since the compilation warning is introduced by patch #14044, the above two backports are made into this single patch in release-3.7. BUG: 1299737 Change-Id: I437585063ce0d27b8e2123e39f2e16bbc881552a Signed-off-by: Kotresh HR Reviewed-on: https://code.engineering.redhat.com/gerrit/73573 Reviewed-by: Venky Shankar Tested-by: Venky Shankar --- xlators/features/bit-rot/src/bitd/Makefile.am | 6 +- .../bit-rot/src/bitd/bit-rot-bitd-messages.h | 18 +- .../bit-rot/src/bitd/bit-rot-scrub-status.c | 73 +++ .../bit-rot/src/bitd/bit-rot-scrub-status.h | 46 ++ xlators/features/bit-rot/src/bitd/bit-rot-scrub.c | 545 ++++++++++++++------ xlators/features/bit-rot/src/bitd/bit-rot-scrub.h | 14 +- xlators/features/bit-rot/src/bitd/bit-rot-ssm.c | 65 ++- xlators/features/bit-rot/src/bitd/bit-rot-ssm.h | 4 +- xlators/features/bit-rot/src/bitd/bit-rot.c | 178 +++---- xlators/features/bit-rot/src/bitd/bit-rot.h | 72 ++- 10 files changed, 697 insertions(+), 324 deletions(-) create mode 100644 xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c create mode 100644 xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h diff --git a/xlators/features/bit-rot/src/bitd/Makefile.am b/xlators/features/bit-rot/src/bitd/Makefile.am index 154cdfb..cabdf3c 100644 --- a/xlators/features/bit-rot/src/bitd/Makefile.am +++ b/xlators/features/bit-rot/src/bitd/Makefile.am @@ -9,11 +9,13 @@ AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ -I$(CONTRIBDIR)/timer-wheel \ -I$(top_srcdir)/xlators/features/bit-rot/src/stub -bit_rot_la_SOURCES = bit-rot.c bit-rot-scrub.c bit-rot-tbf.c bit-rot-ssm.c +bit_rot_la_SOURCES = bit-rot.c bit-rot-scrub.c bit-rot-tbf.c bit-rot-ssm.c \ + bit-rot-scrub-status.c bit_rot_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \ $(top_builddir)/xlators/features/changelog/lib/src/libgfchangelog.la -noinst_HEADERS = bit-rot.h bit-rot-scrub.h bit-rot-tbf.h bit-rot-bitd-messages.h bit-rot-ssm.h +noinst_HEADERS = bit-rot.h bit-rot-scrub.h bit-rot-tbf.h bit-rot-bitd-messages.h bit-rot-ssm.h \ + bit-rot-scrub-status.h AM_CFLAGS = -Wall $(GF_CFLAGS) diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-bitd-messages.h b/xlators/features/bit-rot/src/bitd/bit-rot-bitd-messages.h index c0b83c6..c6b6a4a 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot-bitd-messages.h +++ b/xlators/features/bit-rot/src/bitd/bit-rot-bitd-messages.h @@ -40,7 +40,7 @@ */ #define GLFS_BITROT_BITD_BASE GLFS_MSGID_COMP_BITROT_BITD -#define GLFS_BITROT_BITD_NUM_MESSAGES 53 +#define GLFS_BITROT_BITD_NUM_MESSAGES 55 #define GLFS_MSGID_END (GLFS_BITROT_BITD_BASE + \ GLFS_BITROT_BITD_NUM_MESSAGES + 1) /* Messaged with message IDs */ @@ -427,6 +427,22 @@ * */ /*------------*/ +#define BRB_MSG_SSM_FAILED (GLFS_BITROT_BITD_BASE + 54) +/*! + * @messageid + * @diagnosis + * @recommendedaction + * + */ +/*------------*/ +#define BRB_MSG_SCRUB_WAIT_FAILED (GLFS_BITROT_BITD_BASE + 55) +/*! + * @messageid + * @diagnosis + * @recommendedaction + * + */ +/*------------*/ #define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages" #endif /* !_BITROT_BITD_MESSAGES_H_ */ diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c new file mode 100644 index 0000000..0afd7ea --- /dev/null +++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c @@ -0,0 +1,73 @@ +/* + Copyright (c) 2016 Red Hat, Inc. + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include + +#include "bit-rot-scrub-status.h" + +void +br_inc_unsigned_file_count (br_scrub_stats_t *scrub_stat) +{ + if (!scrub_stat) + return; + + pthread_mutex_lock (&scrub_stat->lock); + { + scrub_stat->unsigned_files++; + } + pthread_mutex_unlock (&scrub_stat->lock); +} + +void +br_inc_scrubbed_file (br_scrub_stats_t *scrub_stat) +{ + if (!scrub_stat) + return; + + pthread_mutex_lock (&scrub_stat->lock); + { + scrub_stat->scrubbed_files++; + } + pthread_mutex_unlock (&scrub_stat->lock); +} + +void +br_update_scrub_start_time (br_scrub_stats_t *scrub_stat, struct timeval *tv) +{ + if (!scrub_stat) + return; + + pthread_mutex_lock (&scrub_stat->lock); + { + scrub_stat->scrub_start_tv.tv_sec = tv->tv_sec; + } + pthread_mutex_unlock (&scrub_stat->lock); +} + +void +br_update_scrub_finish_time (br_scrub_stats_t *scrub_stat, char *timestr, + struct timeval *tv) +{ + if (!scrub_stat) + return; + + pthread_mutex_lock (&scrub_stat->lock); + { + scrub_stat->scrub_end_tv.tv_sec = tv->tv_sec; + + scrub_stat->scrub_duration = + scrub_stat->scrub_end_tv.tv_sec - + scrub_stat->scrub_start_tv.tv_sec; + + strncpy (scrub_stat->last_scrub_time, timestr, + sizeof (scrub_stat->last_scrub_time)); + } + pthread_mutex_unlock (&scrub_stat->lock); +} diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h new file mode 100644 index 0000000..694ba0a --- /dev/null +++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h @@ -0,0 +1,46 @@ +/* + Copyright (c) 2016 Red Hat, Inc. + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef __BIT_ROT_SCRUB_STATUS_H__ +#define __BIT_ROT_SCRUB_STATUS_H__ + +#include +#include +#include + +struct br_scrub_stats { + uint64_t scrubbed_files; /* Total number of scrubbed file */ + + uint64_t unsigned_files; /* Total number of unsigned file */ + + uint64_t scrub_duration; /* Duration of last scrub */ + + char last_scrub_time[1024]; /*last scrub completion time */ + + struct timeval scrub_start_tv; /* Scrubbing starting time*/ + + struct timeval scrub_end_tv; /* Scrubbing finishing time */ + + pthread_mutex_t lock; +}; + +typedef struct br_scrub_stats br_scrub_stats_t; + +void +br_inc_unsigned_file_count (br_scrub_stats_t *scrub_stat); +void +br_inc_scrubbed_file (br_scrub_stats_t *scrub_stat); +void +br_update_scrub_start_time (br_scrub_stats_t *scrub_stat, struct timeval *tv); +void +br_update_scrub_finish_time (br_scrub_stats_t *scrub_stat, char *timestr, + struct timeval *tv); + +#endif /* __BIT_ROT_SCRUB_STATUS_H__ */ diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c index 47d1d26..e36762e 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c +++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c @@ -24,6 +24,7 @@ #include "bit-rot-scrub.h" #include #include "bit-rot-bitd-messages.h" +#include "bit-rot-scrub-status.h" struct br_scrubbers { pthread_t scrubthread; @@ -79,20 +80,6 @@ bitd_fetch_signature (xlator_t *this, br_child_t *child, } -static void -br_inc_unsigned_file_count (xlator_t *this) -{ - br_private_t *priv = NULL; - - priv = this->private; - - pthread_mutex_lock (&priv->scrub_stat.lock); - { - priv->scrub_stat.unsigned_files++; - } - pthread_mutex_unlock (&priv->scrub_stat.lock); -} - /** * POST COMPUTE CHECK * @@ -106,7 +93,8 @@ int32_t bitd_scrub_post_compute_check (xlator_t *this, br_child_t *child, fd_t *fd, unsigned long version, - br_isignature_out_t **signature) + br_isignature_out_t **signature, + br_scrub_stats_t *scrub_stat) { int32_t ret = 0; size_t signlen = 0; @@ -114,8 +102,10 @@ bitd_scrub_post_compute_check (xlator_t *this, br_isignature_out_t *signptr = NULL; ret = bitd_fetch_signature (this, child, fd, &xattr, &signptr); - if (ret < 0) + if (ret < 0) { + br_inc_unsigned_file_count (scrub_stat); goto out; + } /** * Either the object got dirtied during the time the signature was @@ -126,7 +116,7 @@ bitd_scrub_post_compute_check (xlator_t *this, * The log entry looks pretty ugly, but helps in debugging.. */ if (signptr->stale || (signptr->version != version)) { - br_inc_unsigned_file_count (this); + br_inc_unsigned_file_count (scrub_stat); gf_msg_debug (this->name, 0, " Object [GFID: %s] " "either has a stale signature OR underwent " "signing during checksumming {Stale: %d | " @@ -154,15 +144,18 @@ bitd_scrub_post_compute_check (xlator_t *this, static int32_t bitd_signature_staleness (xlator_t *this, br_child_t *child, fd_t *fd, - int *stale, unsigned long *version) + int *stale, unsigned long *version, + br_scrub_stats_t *scrub_stat) { int32_t ret = -1; dict_t *xattr = NULL; br_isignature_out_t *signptr = NULL; ret = bitd_fetch_signature (this, child, fd, &xattr, &signptr); - if (ret < 0) + if (ret < 0) { + br_inc_unsigned_file_count (scrub_stat); goto out; + } /** * save verison for validation in post compute stage @@ -187,7 +180,8 @@ bitd_signature_staleness (xlator_t *this, */ int32_t bitd_scrub_pre_compute_check (xlator_t *this, br_child_t *child, - fd_t *fd, unsigned long *version) + fd_t *fd, unsigned long *version, + br_scrub_stats_t *scrub_stat) { int stale = 0; int32_t ret = -1; @@ -199,9 +193,10 @@ bitd_scrub_pre_compute_check (xlator_t *this, br_child_t *child, goto out; } - ret = bitd_signature_staleness (this, child, fd, &stale, version); + ret = bitd_signature_staleness (this, child, fd, &stale, version, + scrub_stat); if (!ret && stale) { - br_inc_unsigned_file_count (this); + br_inc_unsigned_file_count (scrub_stat); gf_msg_debug (this->name, 0, " Object [GFID: %s] " "has stale signature", uuid_utoa (fd->inode->gfid)); @@ -274,16 +269,6 @@ bitd_compare_ckum (xlator_t *this, return ret; } -static void -br_inc_scrubbed_file (br_private_t *priv) -{ - pthread_mutex_lock (&priv->scrub_stat.lock); - { - priv->scrub_stat.scrubbed_files++; - } - pthread_mutex_unlock (&priv->scrub_stat.lock); -} - /** * "The Scrubber" * @@ -376,7 +361,8 @@ br_scrubber_scrub_begin (xlator_t *this, struct br_fsscan_entry *fsentry) * - presence of bad object * - signature staleness */ - ret = bitd_scrub_pre_compute_check (this, child, fd, &signedversion); + ret = bitd_scrub_pre_compute_check (this, child, fd, &signedversion, + &priv->scrub_stat); if (ret) goto unrefd; /* skip this object */ @@ -399,8 +385,8 @@ br_scrubber_scrub_begin (xlator_t *this, struct br_fsscan_entry *fsentry) * perform post compute checks as an object's signature may have * become stale while scrubber calculated checksum. */ - ret = bitd_scrub_post_compute_check (this, child, - fd, signedversion, &sign); + ret = bitd_scrub_post_compute_check (this, child, fd, signedversion, + &sign, &priv->scrub_stat); if (ret) goto free_md; @@ -408,7 +394,7 @@ br_scrubber_scrub_begin (xlator_t *this, struct br_fsscan_entry *fsentry) linked_inode, entry, fd, child, &loc); /* Increment of total number of scrubbed file counter */ - br_inc_scrubbed_file (priv); + br_inc_scrubbed_file (&priv->scrub_stat); GF_FREE (sign); /* alloced on post-compute */ @@ -562,171 +548,215 @@ br_fsscanner_handle_entry (xlator_t *subvol, } int32_t -br_fsscan_deactivate (xlator_t *this, br_child_t *child) +br_fsscan_deactivate (xlator_t *this) { int ret = 0; br_private_t *priv = NULL; br_scrub_state_t nstate = 0; - struct br_scanfs *fsscan = NULL; + struct br_monitor *scrub_monitor = NULL; priv = this->private; - fsscan = &child->fsscan; + scrub_monitor = &priv->scrub_monitor; - ret = gf_tw_del_timer (priv->timer_wheel, fsscan->timer); + ret = gf_tw_del_timer (priv->timer_wheel, scrub_monitor->timer); if (ret == 0) { nstate = BR_SCRUB_STATE_STALLED; gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO, - "Brick [%s] is under active scrubbing. Pausing scrub..", - child->brick_path); + "Volume is under active scrubbing. Pausing scrub.."); } else { nstate = BR_SCRUB_STATE_PAUSED; gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO, - "Scrubber paused [Brick: %s]", child->brick_path); + "Scrubber paused"); } - _br_child_set_scrub_state (child, nstate); + _br_monitor_set_scrub_state (scrub_monitor, nstate); return 0; } + static void -br_update_scrub_start_time (xlator_t *this, struct timeval *tv) +br_scrubber_log_time (xlator_t *this, const char *sfx) { - br_private_t *priv = NULL; - static int child; + char timestr[1024] = {0,}; + struct timeval tv = {0,}; + br_private_t *priv = NULL; priv = this->private; + gettimeofday (&tv, NULL); + gf_time_fmt (timestr, sizeof (timestr), tv.tv_sec, gf_timefmt_FT); - /* Setting scrubber starting time for first child only */ - if (child == 0) { - pthread_mutex_lock (&priv->scrub_stat.lock); - { - priv->scrub_stat.scrub_start_tv.tv_sec = tv->tv_sec; - } - pthread_mutex_unlock (&priv->scrub_stat.lock); + if (strcasecmp (sfx, "started") == 0) { + br_update_scrub_start_time (&priv->scrub_stat, &tv); + gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_START, + "Scrubbing %s at %s", sfx, timestr); + } else { + br_update_scrub_finish_time (&priv->scrub_stat, timestr, &tv); + gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_FINISH, + "Scrubbing %s at %s", sfx, timestr); } +} - if (++child == priv->up_children) { - child = 0; +static void +br_fsscanner_log_time (xlator_t *this, br_child_t *child, const char *sfx) +{ + char timestr[1024] = {0,}; + struct timeval tv = {0,}; + + gettimeofday (&tv, NULL); + gf_time_fmt (timestr, sizeof (timestr), tv.tv_sec, gf_timefmt_FT); + + if (strcasecmp (sfx, "started") == 0) { + gf_msg_debug (this->name, 0, "Scrubbing \"%s\" %s at %s", + child->brick_path, sfx, timestr); + } else { + gf_msg_debug (this->name, 0, "Scrubbing \"%s\" %s at %s", + child->brick_path, sfx, timestr); } } +void +br_child_set_scrub_state (br_child_t *child, gf_boolean_t state) +{ + child->active_scrubbing = state; +} + static void -br_update_scrub_finish_time (xlator_t *this, char *timestr, struct timeval *tv) +br_fsscanner_wait_until_kicked (xlator_t *this, br_child_t *child) { - br_private_t *priv = NULL; - static int child; + br_private_t *priv = NULL; + struct br_monitor *scrub_monitor = NULL; priv = this->private; + scrub_monitor = &priv->scrub_monitor; - /*Setting scrubber finishing time at time time of last child operation*/ - if (++child == priv->up_children) { - pthread_mutex_lock (&priv->scrub_stat.lock); - { - priv->scrub_stat.scrub_end_tv.tv_sec = tv->tv_sec; - - priv->scrub_stat.scrub_duration = - priv->scrub_stat.scrub_end_tv.tv_sec - - priv->scrub_stat.scrub_start_tv.tv_sec; - - strncpy (priv->scrub_stat.last_scrub_time, timestr, - sizeof (priv->scrub_stat.last_scrub_time)); + pthread_cleanup_push (_br_lock_cleaner, &scrub_monitor->wakelock); + pthread_mutex_lock (&scrub_monitor->wakelock); + { + while (!scrub_monitor->kick) + pthread_cond_wait (&scrub_monitor->wakecond, + &scrub_monitor->wakelock); - child = 0; + /* Child lock is to synchronize with disconnect events */ + pthread_cleanup_push (_br_lock_cleaner, &child->lock); + pthread_mutex_lock (&child->lock); + { + scrub_monitor->active_child_count++; + br_child_set_scrub_state (child, _gf_true); } - pthread_mutex_unlock (&priv->scrub_stat.lock); + pthread_mutex_unlock (&child->lock); + pthread_cleanup_pop (0); } + pthread_mutex_unlock (&scrub_monitor->wakelock); + pthread_cleanup_pop (0); } static void -br_fsscanner_log_time (xlator_t *this, br_child_t *child, const char *sfx) +br_scrubber_entry_control (xlator_t *this) { - char timestr[1024] = {0,}; - struct timeval tv = {0,}; + br_private_t *priv = NULL; + struct br_monitor *scrub_monitor = NULL; - gettimeofday (&tv, NULL); - gf_time_fmt (timestr, sizeof (timestr), tv.tv_sec, gf_timefmt_FT); + priv = this->private; + scrub_monitor = &priv->scrub_monitor; - if (strcasecmp (sfx, "started") == 0) { - br_update_scrub_start_time (this, &tv); - gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_START, - "Scrubbing \"%s\" %s at %s", child->brick_path, sfx, - timestr); - } else { - br_update_scrub_finish_time (this, timestr, &tv); - gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_FINISH, - "Scrubbing \"%s\" %s at %s", child->brick_path, sfx, - timestr); + LOCK (&scrub_monitor->lock); + { + /* Move the state to BR_SCRUB_STATE_ACTIVE */ + if (scrub_monitor->state == BR_SCRUB_STATE_PENDING) + scrub_monitor->state = BR_SCRUB_STATE_ACTIVE; + br_scrubber_log_time (this, "started"); } + UNLOCK (&scrub_monitor->lock); } static void -br_fsscanner_wait_until_kicked (xlator_t *this, struct br_scanfs *fsscan) +br_scrubber_exit_control (xlator_t *this) { - static int i; - br_private_t *priv = NULL; + br_private_t *priv = NULL; + struct br_monitor *scrub_monitor = NULL; priv = this->private; + scrub_monitor = &priv->scrub_monitor; - pthread_cleanup_push (_br_lock_cleaner, &fsscan->wakelock); - pthread_mutex_lock (&fsscan->wakelock); + LOCK (&scrub_monitor->lock); { - while (!fsscan->kick) - pthread_cond_wait (&fsscan->wakecond, - &fsscan->wakelock); - - /* resetting total number of scrubbed file when scrubbing - * done for all of its children */ - if (i == priv->up_children) { - pthread_mutex_lock (&priv->scrub_stat.lock); - { - priv->scrub_stat.scrubbed_files = 0; - priv->scrub_stat.unsigned_files = 0; - i = 0; - } - pthread_mutex_unlock (&priv->scrub_stat.lock); - } - ++i; + br_scrubber_log_time (this, "finished"); - fsscan->kick = _gf_false; + if (scrub_monitor->state == BR_SCRUB_STATE_ACTIVE) { + (void) br_fsscan_activate (this); + } else { + gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO, + "Volume waiting to get rescheduled.."); + } } - pthread_mutex_unlock (&fsscan->wakelock); - pthread_cleanup_pop (0); + UNLOCK (&scrub_monitor->lock); } static void br_fsscanner_entry_control (xlator_t *this, br_child_t *child) { - struct br_scanfs *fsscan = &child->fsscan; - - LOCK (&child->lock); - { - if (fsscan->state == BR_SCRUB_STATE_PENDING) - fsscan->state = BR_SCRUB_STATE_ACTIVE; br_fsscanner_log_time (this, child, "started"); - } - UNLOCK (&child->lock); } static void br_fsscanner_exit_control (xlator_t *this, br_child_t *child) { - struct br_scanfs *fsscan = &child->fsscan; + br_private_t *priv = NULL; + struct br_monitor *scrub_monitor = NULL; - LOCK (&child->lock); + priv = this->private; + scrub_monitor = &priv->scrub_monitor; + + if (!_br_is_child_connected (child)) { + gf_msg (this->name, GF_LOG_WARNING, 0, BRB_MSG_SCRUB_INFO, + "Brick [%s] disconnected while scrubbing. Scrubbing " + "might be incomplete", child->brick_path); + } + + br_fsscanner_log_time (this, child, "finished"); + + pthread_cleanup_push (_br_lock_cleaner, &scrub_monitor->wakelock); + pthread_mutex_lock (&scrub_monitor->wakelock); { - fsscan->over = _gf_true; - br_fsscanner_log_time (this, child, "finished"); + scrub_monitor->active_child_count--; + pthread_cleanup_push (_br_lock_cleaner, &child->lock); + pthread_mutex_lock (&child->lock); + { + br_child_set_scrub_state (child, _gf_false); + } + pthread_mutex_unlock (&child->lock); + pthread_cleanup_pop (0); - if (fsscan->state == BR_SCRUB_STATE_ACTIVE) { - (void) br_fsscan_activate (this, child); + if (scrub_monitor->active_child_count == 0) { + /* The last child has finished scrubbing. + * Set the kick to false and wake up other + * children who are waiting for the last + * child to complete scrubbing. + */ + scrub_monitor->kick = _gf_false; + pthread_cond_broadcast (&scrub_monitor->wakecond); + + /* Signal monitor thread waiting for the all + * the children to finish scrubbing. + */ + pthread_cleanup_push (_br_lock_cleaner, + &scrub_monitor->donelock); + pthread_mutex_lock (&scrub_monitor->donelock); + { + scrub_monitor->done = _gf_true; + pthread_cond_signal (&scrub_monitor->donecond); + } + pthread_mutex_unlock (&scrub_monitor->donelock); + pthread_cleanup_pop (0); } else { - gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO, - "Brick [%s] waiting to get rescheduled..", - child->brick_path); + while (scrub_monitor->active_child_count) + pthread_cond_wait (&scrub_monitor->wakecond, + &scrub_monitor->wakelock); } } - UNLOCK (&child->lock); + pthread_mutex_unlock (&scrub_monitor->wakelock); + pthread_cleanup_pop (0); } void * @@ -745,7 +775,7 @@ br_fsscanner (void *arg) loc.inode = child->table->root; while (1) { - br_fsscanner_wait_until_kicked (this, fsscan); + br_fsscanner_wait_until_kicked (this, child); { /* precursor for scrub */ br_fsscanner_entry_control (this, child); @@ -777,22 +807,29 @@ br_kickstart_scanner (struct gf_tw_timer_list *timer, void *data, unsigned long calltime) { xlator_t *this = NULL; - br_child_t *child = data; - struct br_scanfs *fsscan = NULL; + struct br_monitor *scrub_monitor = data; + br_private_t *priv = NULL; - THIS = this = child->this; - fsscan = &child->fsscan; + THIS = this = scrub_monitor->this; + priv = this->private; + + /* Reset scrub statistics */ + priv->scrub_stat.scrubbed_files = 0; + priv->scrub_stat.unsigned_files = 0; + + /* Moves state from PENDING to ACTIVE */ + (void) br_scrubber_entry_control (this); /* kickstart scanning.. */ - pthread_mutex_lock (&fsscan->wakelock); + pthread_mutex_lock (&scrub_monitor->wakelock); { - fsscan->kick = _gf_true; - pthread_cond_signal (&fsscan->wakecond); + scrub_monitor->kick = _gf_true; + GF_ASSERT (scrub_monitor->active_child_count == 0); + pthread_cond_broadcast (&scrub_monitor->wakecond); } - pthread_mutex_unlock (&fsscan->wakelock); + pthread_mutex_unlock (&scrub_monitor->wakelock); return; - } static uint32_t @@ -836,22 +873,22 @@ br_fsscan_calculate_timeout (scrub_freq_t freq) } int32_t -br_fsscan_schedule (xlator_t *this, br_child_t *child) +br_fsscan_schedule (xlator_t *this) { uint32_t timo = 0; br_private_t *priv = NULL; struct timeval tv = {0,}; char timestr[1024] = {0,}; - struct br_scanfs *fsscan = NULL; struct br_scrubber *fsscrub = NULL; struct gf_tw_timer_list *timer = NULL; + struct br_monitor *scrub_monitor = NULL; priv = this->private; - fsscan = &child->fsscan; fsscrub = &priv->fsscrub; + scrub_monitor = &priv->scrub_monitor; (void) gettimeofday (&tv, NULL); - fsscan->boot = tv.tv_sec; + scrub_monitor->boot = tv.tv_sec; timo = br_fsscan_calculate_timeout (fsscrub->frequency); if (timo == 0) { @@ -860,25 +897,25 @@ br_fsscan_schedule (xlator_t *this, br_child_t *child) goto error_return; } - fsscan->timer = GF_CALLOC (1, sizeof (*fsscan->timer), + scrub_monitor->timer = GF_CALLOC (1, sizeof (*scrub_monitor->timer), gf_br_stub_mt_br_scanner_freq_t); - if (!fsscan->timer) + if (!scrub_monitor->timer) goto error_return; - timer = fsscan->timer; + timer = scrub_monitor->timer; INIT_LIST_HEAD (&timer->entry); - timer->data = child; + timer->data = scrub_monitor; timer->expires = timo; timer->function = br_kickstart_scanner; gf_tw_add_timer (priv->timer_wheel, timer); - _br_child_set_scrub_state (child, BR_SCRUB_STATE_PENDING); + _br_monitor_set_scrub_state (scrub_monitor, BR_SCRUB_STATE_PENDING); gf_time_fmt (timestr, sizeof (timestr), - (fsscan->boot + timo), gf_timefmt_FT); - gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO, "Scrubbing for " - "%s scheduled to run at %s", child->brick_path, timestr); + (scrub_monitor->boot + timo), gf_timefmt_FT); + gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO, "Scrubbing is " + "scheduled to run at %s", timestr); return 0; @@ -887,18 +924,18 @@ br_fsscan_schedule (xlator_t *this, br_child_t *child) } int32_t -br_fsscan_activate (xlator_t *this, br_child_t *child) +br_fsscan_activate (xlator_t *this) { uint32_t timo = 0; char timestr[1024] = {0,}; struct timeval now = {0,}; br_private_t *priv = NULL; - struct br_scanfs *fsscan = NULL; struct br_scrubber *fsscrub = NULL; + struct br_monitor *scrub_monitor = NULL; priv = this->private; - fsscan = &child->fsscan; fsscrub = &priv->fsscrub; + scrub_monitor = &priv->scrub_monitor; (void) gettimeofday (&now, NULL); timo = br_fsscan_calculate_timeout (fsscrub->frequency); @@ -908,32 +945,37 @@ br_fsscan_activate (xlator_t *this, br_child_t *child) return -1; } - fsscan->over = _gf_false; + pthread_mutex_lock (&scrub_monitor->donelock); + { + scrub_monitor->done = _gf_false; + } + pthread_mutex_unlock (&scrub_monitor->donelock); + gf_time_fmt (timestr, sizeof (timestr), (now.tv_sec + timo), gf_timefmt_FT); - (void) gf_tw_mod_timer (priv->timer_wheel, fsscan->timer, timo); + (void) gf_tw_mod_timer (priv->timer_wheel, scrub_monitor->timer, timo); - _br_child_set_scrub_state (child, BR_SCRUB_STATE_PENDING); - gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO, "Scrubbing for " - "%s rescheduled to run at %s", child->brick_path, timestr); + _br_monitor_set_scrub_state (scrub_monitor, BR_SCRUB_STATE_PENDING); + gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO, "Scrubbing is " + "rescheduled to run at %s", timestr); return 0; } int32_t -br_fsscan_reschedule (xlator_t *this, br_child_t *child) +br_fsscan_reschedule (xlator_t *this) { int32_t ret = 0; uint32_t timo = 0; char timestr[1024] = {0,}; struct timeval now = {0,}; br_private_t *priv = NULL; - struct br_scanfs *fsscan = NULL; struct br_scrubber *fsscrub = NULL; + struct br_monitor *scrub_monitor = NULL; priv = this->private; - fsscan = &child->fsscan; fsscrub = &priv->fsscrub; + scrub_monitor = &priv->scrub_monitor; if (!fsscrub->frequency_reconf) return 0; @@ -949,17 +991,21 @@ br_fsscan_reschedule (xlator_t *this, br_child_t *child) gf_time_fmt (timestr, sizeof (timestr), (now.tv_sec + timo), gf_timefmt_FT); - fsscan->over = _gf_false; - ret = gf_tw_mod_timer_pending (priv->timer_wheel, fsscan->timer, timo); + pthread_mutex_lock (&scrub_monitor->donelock); + { + scrub_monitor->done = _gf_false; + } + pthread_mutex_unlock (&scrub_monitor->donelock); + + ret = gf_tw_mod_timer_pending (priv->timer_wheel, scrub_monitor->timer, timo); if (ret == 0) gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO, - "Scrubber for %s is currently running and would be " - "rescheduled after completion", child->brick_path); + "Scrubber is currently running and would be " + "rescheduled after completion"); else { - _br_child_set_scrub_state (child, BR_SCRUB_STATE_PENDING); + _br_monitor_set_scrub_state (scrub_monitor, BR_SCRUB_STATE_PENDING); gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO, - "Scrubbing for %s rescheduled to run at %s", - child->brick_path, timestr); + "Scrubbing rescheduled to run at %s", timestr); } return 0; @@ -1725,15 +1771,174 @@ out: return ret; } +static int +wait_for_scrub_to_finish (xlator_t *this) +{ + int ret = -1; + br_private_t *priv = NULL; + struct br_monitor *scrub_monitor = NULL; + + priv = this->private; + scrub_monitor = &priv->scrub_monitor; + + GF_VALIDATE_OR_GOTO ("bit-rot", scrub_monitor, out); + GF_VALIDATE_OR_GOTO ("bit-rot", this, out); + + gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO, + "Waiting for all children to start and finish scrub"); + + pthread_mutex_lock (&scrub_monitor->donelock); + { + while (!scrub_monitor->done) + pthread_cond_wait (&scrub_monitor->donecond, + &scrub_monitor->donelock); + } + pthread_mutex_unlock (&scrub_monitor->donelock); + ret = 0; +out: + return ret; +} + +/** + * This function is executed in a separate thread. This is scrubber monitor + * thread that takes care of state machine. + */ +void * +br_monitor_thread (void *arg) +{ + int32_t ret = 0; + xlator_t *this = NULL; + br_private_t *priv = NULL; + struct br_monitor *scrub_monitor = NULL; + + this = arg; + priv = this->private; + + /* + * Since, this is the topmost xlator, THIS has to be set by bit-rot + * xlator itself (STACK_WIND wont help in this case). Also it has + * to be done for each thread that gets spawned. Otherwise, a new + * thread will get global_xlator's pointer when it does "THIS". + */ + THIS = this; + + scrub_monitor = &priv->scrub_monitor; + + pthread_mutex_lock (&scrub_monitor->mutex); + { + while (!scrub_monitor->inited) + pthread_cond_wait (&scrub_monitor->cond, + &scrub_monitor->mutex); + } + pthread_mutex_unlock (&scrub_monitor->mutex); + + /* this needs to be serialized with reconfigure() */ + pthread_mutex_lock (&priv->lock); + { + ret = br_scrub_state_machine (this); + } + pthread_mutex_unlock (&priv->lock); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, -ret, + BRB_MSG_SSM_FAILED, + "Scrub state machine failed"); + goto out; + } + + while (1) { + /* Wait for all children to finish scrubbing */ + ret = wait_for_scrub_to_finish (this); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, -ret, + BRB_MSG_SCRUB_WAIT_FAILED, + "Scrub wait failed"); + goto out; + } + + /* scrub exit criteria: Move the state to PENDING */ + br_scrubber_exit_control (this); + } + +out: + return NULL; +} + +static void +br_set_scrub_state (struct br_monitor *scrub_monitor, br_scrub_state_t state) +{ + LOCK (&scrub_monitor->lock); + { + _br_monitor_set_scrub_state (scrub_monitor, state); + } + UNLOCK (&scrub_monitor->lock); +} + +int32_t +br_scrubber_monitor_init (xlator_t *this, br_private_t *priv) +{ + struct br_monitor *scrub_monitor = NULL; + int ret = 0; + + scrub_monitor = &priv->scrub_monitor; + + LOCK_INIT (&scrub_monitor->lock); + scrub_monitor->this = this; + + scrub_monitor->inited = _gf_false; + pthread_mutex_init (&scrub_monitor->mutex, NULL); + pthread_cond_init (&scrub_monitor->cond, NULL); + + scrub_monitor->kick = _gf_false; + scrub_monitor->active_child_count = 0; + pthread_mutex_init (&scrub_monitor->wakelock, NULL); + pthread_cond_init (&scrub_monitor->wakecond, NULL); + + scrub_monitor->done = _gf_false; + pthread_mutex_init (&scrub_monitor->donelock, NULL); + pthread_cond_init (&scrub_monitor->donecond, NULL); + + /* Set the state to INACTIVE */ + br_set_scrub_state (&priv->scrub_monitor, BR_SCRUB_STATE_INACTIVE); + + /* Start the monitor thread */ + ret = gf_thread_create (&scrub_monitor->thread, NULL, br_monitor_thread, this); + if (ret != 0) { + gf_msg (this->name, GF_LOG_ERROR, -ret, + BRB_MSG_SPAWN_FAILED, "monitor thread creation failed"); + ret = -1; + goto err; + } + + return 0; +err: + pthread_mutex_destroy (&scrub_monitor->mutex); + pthread_cond_destroy (&scrub_monitor->cond); + + pthread_mutex_destroy (&scrub_monitor->wakelock); + pthread_cond_destroy (&scrub_monitor->wakecond); + + pthread_mutex_destroy (&scrub_monitor->donelock); + pthread_cond_destroy (&scrub_monitor->donecond); + + LOCK_DESTROY (&scrub_monitor->lock); + + return ret; +} + int32_t br_scrubber_init (xlator_t *this, br_private_t *priv) { struct br_scrubber *fsscrub = NULL; + int ret = 0; priv->tbf = br_tbf_init (NULL, 0); if (!priv->tbf) return -1; + ret = br_scrubber_monitor_init (this, priv); + if (ret) + return -1; + fsscrub = &priv->fsscrub; fsscrub->this = this; diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h index e730582..93bb296 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h +++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h @@ -16,15 +16,21 @@ void *br_fsscanner (void *); -int32_t br_fsscan_schedule (xlator_t *, br_child_t *); -int32_t br_fsscan_reschedule (xlator_t *, br_child_t *); -int32_t br_fsscan_activate (xlator_t *, br_child_t *); -int32_t br_fsscan_deactivate (xlator_t *, br_child_t *); +int32_t br_fsscan_schedule (xlator_t *); +int32_t br_fsscan_reschedule (xlator_t *); +int32_t br_fsscan_activate (xlator_t *); +int32_t br_fsscan_deactivate (xlator_t *); int32_t br_scrubber_handle_options (xlator_t *, br_private_t *, dict_t *); +int32_t +br_scrubber_monitor_init (xlator_t *, br_private_t *); + int32_t br_scrubber_init (xlator_t *, br_private_t *); int32_t br_collect_bad_objects_from_children (xlator_t *this, dict_t *dict); +void +br_child_set_scrub_state (br_child_t *, gf_boolean_t); + #endif /* __BIT_ROT_SCRUB_H__ */ diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-ssm.c b/xlators/features/bit-rot/src/bitd/bit-rot-ssm.c index fcffc04..d304fc8 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot-ssm.c +++ b/xlators/features/bit-rot/src/bitd/bit-rot-ssm.c @@ -12,52 +12,73 @@ #include "bit-rot-scrub.h" #include "bit-rot-bitd-messages.h" -int br_scrub_ssm_noop (xlator_t *this, br_child_t *child) +int br_scrub_ssm_noop (xlator_t *this) { return 0; } int -br_scrub_ssm_state_pause (xlator_t *this, br_child_t *child) +br_scrub_ssm_state_pause (xlator_t *this) { + br_private_t *priv = NULL; + struct br_monitor *scrub_monitor = NULL; + + priv = this->private; + scrub_monitor = &priv->scrub_monitor; + gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_GENERIC_SSM_INFO, - "Scrubber paused [Brick: %s]", child->brick_path); - _br_child_set_scrub_state (child, BR_SCRUB_STATE_PAUSED); + "Scrubber paused"); + _br_monitor_set_scrub_state (scrub_monitor, BR_SCRUB_STATE_PAUSED); return 0; } int -br_scrub_ssm_state_ipause (xlator_t *this, br_child_t *child) +br_scrub_ssm_state_ipause (xlator_t *this) { + br_private_t *priv = NULL; + struct br_monitor *scrub_monitor = NULL; + + priv = this->private; + scrub_monitor = &priv->scrub_monitor; + gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_GENERIC_SSM_INFO, - "Scrubber paused [Brick: %s]", child->brick_path); - _br_child_set_scrub_state (child, BR_SCRUB_STATE_IPAUSED); + "Scrubber paused"); + _br_monitor_set_scrub_state (scrub_monitor, BR_SCRUB_STATE_IPAUSED); return 0; } int -br_scrub_ssm_state_active (xlator_t *this, br_child_t *child) +br_scrub_ssm_state_active (xlator_t *this) { - struct br_scanfs *fsscan = &child->fsscan; + br_private_t *priv = NULL; + struct br_monitor *scrub_monitor = NULL; - if (fsscan->over) { - (void) br_fsscan_activate (this, child); + priv = this->private; + scrub_monitor = &priv->scrub_monitor; + + if (scrub_monitor->done) { + (void) br_fsscan_activate (this); } else { gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_GENERIC_SSM_INFO, - "Scrubbing resumed [Brick %s]", child->brick_path); - _br_child_set_scrub_state (child, BR_SCRUB_STATE_ACTIVE); + "Scrubbing resumed"); + _br_monitor_set_scrub_state (scrub_monitor, BR_SCRUB_STATE_ACTIVE); } return 0; } int -br_scrub_ssm_state_stall (xlator_t *this, br_child_t *child) +br_scrub_ssm_state_stall (xlator_t *this) { + br_private_t *priv = NULL; + struct br_monitor *scrub_monitor = NULL; + + priv = this->private; + scrub_monitor = &priv->scrub_monitor; + gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_GENERIC_SSM_INFO, - "Brick [%s] is under active scrubbing. Pausing scrub..", - child->brick_path); - _br_child_set_scrub_state (child, BR_SCRUB_STATE_STALLED); + "Volume is under active scrubbing. Pausing scrub.."); + _br_monitor_set_scrub_state (scrub_monitor, BR_SCRUB_STATE_STALLED); return 0; } @@ -72,22 +93,22 @@ br_scrub_ssm[BR_SCRUB_MAXSTATES][BR_SCRUB_MAXEVENTS] = { }; int32_t -br_scrub_state_machine (xlator_t *this, br_child_t *child) +br_scrub_state_machine (xlator_t *this) { br_private_t *priv = NULL; br_scrub_ssm_call *call = NULL; - struct br_scanfs *fsscan = NULL; struct br_scrubber *fsscrub = NULL; br_scrub_state_t currstate = 0; br_scrub_event_t event = 0; + struct br_monitor *scrub_monitor = NULL; priv = this->private; - fsscan = &child->fsscan; fsscrub = &priv->fsscrub; + scrub_monitor = &priv->scrub_monitor; - currstate = fsscan->state; + currstate = scrub_monitor->state; event = _br_child_get_scrub_event (fsscrub); call = br_scrub_ssm[currstate][event]; - return call (this, child); + return call (this); } diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-ssm.h b/xlators/features/bit-rot/src/bitd/bit-rot-ssm.h index 72fd62b..936ee4d 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot-ssm.h +++ b/xlators/features/bit-rot/src/bitd/bit-rot-ssm.h @@ -29,8 +29,8 @@ typedef enum br_scrub_event { BR_SCRUB_MAXEVENTS, } br_scrub_event_t; -struct br_child; +struct br_monitor; -int32_t br_scrub_state_machine (xlator_t *, struct br_child *); +int32_t br_scrub_state_machine (xlator_t *); #endif /* __BIT_ROT_SSM_H__ */ diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c index 0eba447..45f8d1d 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot.c +++ b/xlators/features/bit-rot/src/bitd/bit-rot.c @@ -1097,21 +1097,11 @@ br_oneshot_signer (void *arg) static void br_set_child_state (br_child_t *child, br_child_state_t state) { - LOCK (&child->lock); + pthread_mutex_lock (&child->lock); { _br_set_child_state (child, state); } - UNLOCK (&child->lock); -} - -static void -br_set_scrub_state (br_child_t *child, br_scrub_state_t state) -{ - LOCK (&child->lock); - { - _br_child_set_scrub_state (child, state); - } - UNLOCK (&child->lock); + pthread_mutex_unlock (&child->lock); } /** @@ -1173,11 +1163,11 @@ br_launch_scrubber (xlator_t *this, br_child_t *child, { int32_t ret = -1; br_private_t *priv = NULL; + struct br_monitor *scrub_monitor = NULL; priv = this->private; - fsscan->kick = _gf_false; - fsscan->over = _gf_false; + scrub_monitor = &priv->scrub_monitor; ret = gf_thread_create (&child->thread, NULL, br_fsscanner, child); if (ret != 0) { gf_msg (this->name, GF_LOG_ALERT, 0, BRB_MSG_SPAWN_FAILED, @@ -1186,14 +1176,14 @@ br_launch_scrubber (xlator_t *this, br_child_t *child, goto error_return; } - /* this needs to be serialized with reconfigure() */ - pthread_mutex_lock (&priv->lock); + /* Signal monitor to kick off state machine*/ + pthread_mutex_lock (&scrub_monitor->mutex); { - ret = br_scrub_state_machine (this, child); + if (!scrub_monitor->inited) + pthread_cond_signal (&scrub_monitor->cond); + scrub_monitor->inited = _gf_true; } - pthread_mutex_unlock (&priv->lock); - if (ret) - goto cleanup_thread; + pthread_mutex_unlock (&scrub_monitor->mutex); /** * Everything has been setup.. add this subvolume to scrubbers @@ -1208,8 +1198,6 @@ br_launch_scrubber (xlator_t *this, br_child_t *child, return 0; - cleanup_thread: - (void) gf_thread_cleanup_xint (child->thread); error_return: return -1; } @@ -1242,10 +1230,6 @@ br_enact_scrubber (xlator_t *this, br_child_t *child) INIT_LIST_HEAD (&fsscan->queued); INIT_LIST_HEAD (&fsscan->ready); - /* init scheduler related variables */ - pthread_mutex_init (&fsscan->wakelock, NULL); - pthread_cond_init (&fsscan->wakecond, NULL); - ret = br_launch_scrubber (this, child, fsscan, fsscrub); if (ret) goto error_return; @@ -1266,7 +1250,7 @@ br_child_enaction (xlator_t *this, br_child_t *child, br_stub_init_t *stub) int32_t ret = -1; br_private_t *priv = this->private; - LOCK (&child->lock); + pthread_mutex_lock (&child->lock); { if (priv->iamscrubber) ret = br_enact_scrubber (this, child); @@ -1281,7 +1265,7 @@ br_child_enaction (xlator_t *this, br_child_t *child, br_stub_init_t *stub) "Connected to brick %s..", child->brick_path); } } - UNLOCK (&child->lock); + pthread_mutex_unlock (&child->lock); return ret; } @@ -1308,6 +1292,7 @@ br_brick_connect (xlator_t *this, br_child_t *child) GF_VALIDATE_OR_GOTO (this->name, child, out); GF_VALIDATE_OR_GOTO (this->name, this->private, out); + br_child_set_scrub_state (child, _gf_false); br_set_child_state (child, BR_CHILD_STATE_INITIALIZING); loc.inode = inode_ref (child->table->root); @@ -1369,12 +1354,17 @@ br_cleanup_scrubber (xlator_t *this, br_child_t *child) { int32_t ret = 0; br_private_t *priv = NULL; - struct br_scanfs *fsscan = NULL; struct br_scrubber *fsscrub = NULL; + struct br_monitor *scrub_monitor = NULL; priv = this->private; - fsscan = &child->fsscan; fsscrub = &priv->fsscrub; + scrub_monitor = &priv->scrub_monitor; + + if (_br_is_child_scrub_active (child)) { + scrub_monitor->active_child_count--; + br_child_set_scrub_state (child, _gf_false); + } /** * 0x0: child (brick) goes out of rotation @@ -1406,21 +1396,6 @@ br_cleanup_scrubber (xlator_t *this, br_child_t *child) 0, BRB_MSG_SCRUB_THREAD_CLEANUP, "Error cleaning up scanner thread"); - /** - * 0x2: free()up resources - */ - if (fsscan->timer) { - (void) gf_tw_del_timer (priv->timer_wheel, fsscan->timer); - - GF_FREE (fsscan->timer); - fsscan->timer = NULL; - } - - /** - * 0x3: reset scrubber state - */ - _br_child_set_scrub_state (child, BR_SCRUB_STATE_INACTIVE); - gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUBBER_CLEANED, "Cleaned up scrubber for brick [%s]", child->brick_path); @@ -1437,23 +1412,33 @@ int32_t br_brick_disconnect (xlator_t *this, br_child_t *child) { int32_t ret = 0; + struct br_monitor *scrub_monitor = NULL; br_private_t *priv = this->private; - LOCK (&child->lock); + scrub_monitor = &priv->scrub_monitor; + + /* Lock order should be wakelock and then child lock to + * dead locks. + */ + pthread_mutex_lock (&scrub_monitor->wakelock); { - if (!_br_is_child_connected (child)) - goto unblock; + pthread_mutex_lock (&child->lock); + { + if (!_br_is_child_connected (child)) + goto unblock; - /* child is on death row.. */ - _br_set_child_state (child, BR_CHILD_STATE_DISCONNECTED); + /* child is on death row.. */ + _br_set_child_state (child, BR_CHILD_STATE_DISCONNECTED); - if (priv->iamscrubber) - ret = br_cleanup_scrubber (this, child); - else - ret = br_cleanup_signer (this, child); - } + if (priv->iamscrubber) + ret = br_cleanup_scrubber (this, child); + else + ret = br_cleanup_signer (this, child); + } unblock: - UNLOCK (&child->lock); + pthread_mutex_unlock (&child->lock); + } + pthread_mutex_unlock (&scrub_monitor->wakelock); return ret; } @@ -1574,7 +1559,7 @@ br_scrubber_status_get (xlator_t *this, dict_t **dict) memset (key, 0, 256); snprintf (key, 256, "scrubbed-files"); - ret = dict_set_uint32 (*dict, key, scrub_stats->scrubbed_files); + ret = dict_set_uint64 (*dict, key, scrub_stats->scrubbed_files); if (ret) { gf_msg_debug (this->name, 0, "Failed to setting scrubbed file " "entry to the dictionary"); @@ -1582,7 +1567,7 @@ br_scrubber_status_get (xlator_t *this, dict_t **dict) memset (key, 0, 256); snprintf (key, 256, "unsigned-files"); - ret = dict_set_uint32 (*dict, key, scrub_stats->unsigned_files); + ret = dict_set_uint64 (*dict, key, scrub_stats->unsigned_files); if (ret) { gf_msg_debug (this->name, 0, "Failed to set unsigned file count" " entry to the dictionary"); @@ -1590,7 +1575,7 @@ br_scrubber_status_get (xlator_t *this, dict_t **dict) memset (key, 0, 256); snprintf (key, 256, "scrub-duration"); - ret = dict_set_uint32 (*dict, key, scrub_stats->scrub_duration); + ret = dict_set_uint64 (*dict, key, scrub_stats->scrub_duration); if (ret) { gf_msg_debug (this->name, 0, "Failed to set scrub duration" " entry to the dictionary"); @@ -1848,6 +1833,33 @@ br_signer_init (xlator_t *this, br_private_t *priv) } static void +br_free_scrubber_monitor (xlator_t *this, br_private_t *priv) +{ + struct br_monitor *scrub_monitor = &priv->scrub_monitor; + + if (scrub_monitor->timer) { + (void) gf_tw_del_timer (priv->timer_wheel, scrub_monitor->timer); + + GF_FREE (scrub_monitor->timer); + scrub_monitor->timer = NULL; + } + + (void) gf_thread_cleanup_xint (scrub_monitor->thread); + + /* Clean up cond and mutex variables */ + pthread_mutex_destroy (&scrub_monitor->mutex); + pthread_cond_destroy (&scrub_monitor->cond); + + pthread_mutex_destroy (&scrub_monitor->wakelock); + pthread_cond_destroy (&scrub_monitor->wakecond); + + pthread_mutex_destroy (&scrub_monitor->donelock); + pthread_cond_destroy (&scrub_monitor->donecond); + + LOCK_DESTROY (&scrub_monitor->lock); +} + +static void br_free_children (xlator_t *this, br_private_t *priv, int count) { br_child_t *child = NULL; @@ -1855,7 +1867,7 @@ br_free_children (xlator_t *this, br_private_t *priv, int count) for (--count; count >= 0; count--) { child = &priv->children[count]; mem_pool_destroy (child->timer_pool); - LOCK_DESTROY (&child->lock); + pthread_mutex_destroy (&child->lock); } GF_FREE (priv->children); @@ -1879,10 +1891,9 @@ br_init_children (xlator_t *this, br_private_t *priv) while (trav) { child = &priv->children[i]; - LOCK_INIT (&child->lock); + pthread_mutex_init (&child->lock, NULL); child->witnessed = 0; - br_set_scrub_state (child, BR_SCRUB_STATE_INACTIVE); br_set_child_state (child, BR_CHILD_STATE_DISCONNECTED); child->this = this; @@ -2003,6 +2014,9 @@ fini (xlator_t *this) if (!priv->iamscrubber) br_fini_signer (this, priv); + else + (void) br_free_scrubber_monitor (this, priv); + br_free_children (this, priv, priv->child_count); this->private = NULL; @@ -2012,26 +2026,23 @@ fini (xlator_t *this) } static void -br_reconfigure_child (xlator_t *this, br_child_t *child) +br_reconfigure_monitor (xlator_t *this) { int32_t ret = 0; - ret = br_scrub_state_machine (this, child); + ret = br_scrub_state_machine (this); if (ret) { gf_msg (this->name, GF_LOG_ERROR, 0, BRB_MSG_RESCHEDULE_SCRUBBER_FAILED, - "Could not reschedule scrubber for brick: %s. Scubbing " - "will continue according to old frequency.", - child->brick_path); + "Could not reschedule scrubber for the volume. Scrubbing " + "will continue according to old frequency."); } } static int br_reconfigure_scrubber (xlator_t *this, dict_t *options) { - int i = 0; int32_t ret = -1; - br_child_t *child = NULL; br_private_t *priv = NULL; priv = this->private; @@ -2046,32 +2057,11 @@ br_reconfigure_scrubber (xlator_t *this, dict_t *options) goto err; /* change state for all _up_ subvolume(s) */ - for (; i < priv->child_count; i++) { - child = &priv->children[i]; - - LOCK (&child->lock); - { - if (_br_child_failed_conn (child)) { - gf_msg (this->name, GF_LOG_INFO, - 0, BRB_MSG_BRICK_INFO, - "Scrubber for brick [%s] failed " - "initialization, rescheduling is " - "skipped", child->brick_path); - goto unblock; - } - - if (_br_is_child_connected (child)) - br_reconfigure_child (this, child); - - /** - * for the rest.. either the child is in initialization - * phase or is disconnected. either way, updated values - * would be reflected on successful connection. - */ - } - unblock: - UNLOCK (&child->lock); + pthread_mutex_lock (&priv->lock); + { + br_reconfigure_monitor (this); } + pthread_mutex_unlock (&priv->lock); err: return ret; diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.h b/xlators/features/bit-rot/src/bitd/bit-rot.h index 39ce790..835b9ca 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot.h +++ b/xlators/features/bit-rot/src/bitd/bit-rot.h @@ -31,6 +31,7 @@ #include "bit-rot-common.h" #include "bit-rot-stub-mem-types.h" +#include "bit-rot-scrub-status.h" #include @@ -68,18 +69,6 @@ struct br_scanfs { unsigned int entries; struct list_head queued; struct list_head ready; - - /* scheduler */ - uint32_t boot; - gf_boolean_t kick; - gf_boolean_t over; - - br_scrub_state_t state; /* current scrub state */ - - pthread_mutex_t wakelock; - pthread_cond_t wakecond; - - struct gf_tw_timer_list *timer; }; /* just need three states to track child status */ @@ -91,7 +80,7 @@ typedef enum br_child_state { } br_child_state_t; struct br_child { - gf_lock_t lock; /* protects child state */ + pthread_mutex_t lock; /* protects child state */ char witnessed; /* witnessed at least one succesfull connection */ br_child_state_t c_state; /* current state of this child */ @@ -116,6 +105,8 @@ struct br_child { struct timeval tv; struct br_scanfs fsscan; /* per subvolume FS scanner */ + + gf_boolean_t active_scrubbing; /* Actively scrubbing or not */ }; typedef struct br_child br_child_t; @@ -157,27 +148,42 @@ struct br_scrubber { struct list_head scrublist; }; -typedef struct br_obj_n_workers br_obj_n_workers_t; +struct br_monitor { + gf_lock_t lock; + pthread_t thread; /* Monitor thread */ -typedef struct br_private br_private_t; + gf_boolean_t inited; + pthread_mutex_t mutex; + pthread_cond_t cond; /* Thread starts and will be waiting on cond. + First child which is up wakes this up */ -typedef void (*br_scrubbed_file_update) (br_private_t *priv); + xlator_t *this; + /* scheduler */ + uint32_t boot; -struct br_scrub_stats { - uint32_t scrubbed_files; /* Total number of scrubbed file */ + int32_t active_child_count; /* Number of children currently scrubbing */ + gf_boolean_t kick; /* This variable tracks the scrubber is + * kicked or not. Both 'kick' and + * 'active_child_count' uses the same pair + * of mutex-cond variable, i.e, wakelock and + * wakecond. */ - uint32_t unsigned_files; /* Total number of unsigned file */ + pthread_mutex_t wakelock; + pthread_cond_t wakecond; - uint32_t scrub_duration; /* Duration of last scrub */ + gf_boolean_t done; + pthread_mutex_t donelock; + pthread_cond_t donecond; - char last_scrub_time[1024]; /*last scrub completion time */ + struct gf_tw_timer_list *timer; + br_scrub_state_t state; /* current scrub state */ +}; - struct timeval scrub_start_tv; /* Scrubbing starting time*/ +typedef struct br_obj_n_workers br_obj_n_workers_t; - struct timeval scrub_end_tv; /* Scrubbing finishing time */ +typedef struct br_private br_private_t; - pthread_mutex_t lock; -}; +typedef void (*br_scrubbed_file_update) (br_private_t *priv); struct br_private { pthread_mutex_t lock; @@ -214,6 +220,8 @@ struct br_private { struct br_scrub_stats scrub_stat; /* statistics of scrub*/ struct br_scrubber fsscrub; /* scrubbers for this subvolume */ + + struct br_monitor scrub_monitor; /* scrubber monitor */ }; struct br_object { @@ -233,7 +241,7 @@ struct br_object { }; typedef struct br_object br_object_t; -typedef int32_t (br_scrub_ssm_call) (xlator_t *, br_child_t *); +typedef int32_t (br_scrub_ssm_call) (xlator_t *); void br_log_object (xlator_t *, char *, uuid_t, int32_t); @@ -264,6 +272,12 @@ _br_is_child_connected (br_child_t *child) } static inline int +_br_is_child_scrub_active (br_child_t *child) +{ + return child->active_scrubbing; +} + +static inline int _br_child_failed_conn (br_child_t *child) { return (child->c_state == BR_CHILD_STATE_CONNFAILED); @@ -277,10 +291,10 @@ _br_child_witnessed_connection (br_child_t *child) /* scrub state */ static inline void -_br_child_set_scrub_state (br_child_t *child, br_scrub_state_t state) +_br_monitor_set_scrub_state (struct br_monitor *scrub_monitor, + br_scrub_state_t state) { - struct br_scanfs *fsscan = &child->fsscan; - fsscan->state = state; + scrub_monitor->state = state; } static inline br_scrub_event_t -- 1.7.1