From 11dd368eae316c3cb9a1e82e7b7213d0ee4224c6 Mon Sep 17 00:00:00 2001 From: Venky Shankar Date: Thu, 4 Jun 2015 08:50:48 +0530 Subject: [PATCH 159/190] features/bitrot: cleanup, v2 Backport of http://review.gluster.org/11148 This patch uses "cleanup, v1" infrastrcuture to cleanup scrubber (data structures, threads, timers, etc..) on brick disconnection. Signer is not cleaned up yet: probably would be done as part of another patch. Change-Id: I78a92b8a7f02b2f39078aa9a5a6b101fc499fd70 BUG: 1232309 Signed-off-by: Venky Shankar Reviewed-on: https://code.engineering.redhat.com/gerrit/51744 Reviewed-by: Raghavendra Bhat Tested-by: Raghavendra Bhat --- contrib/timer-wheel/timer-wheel.c | 10 +- contrib/timer-wheel/timer-wheel.h | 2 +- libglusterfs/src/common-utils.c | 11 ++ libglusterfs/src/common-utils.h | 3 + xlators/features/bit-rot/src/bitd/bit-rot-scrub.c | 27 ++- xlators/features/bit-rot/src/bitd/bit-rot.c | 186 +++++++++++++++++--- xlators/features/bit-rot/src/bitd/bit-rot.h | 12 +- xlators/features/changelog/src/changelog-helpers.c | 30 +--- 8 files changed, 214 insertions(+), 67 deletions(-) diff --git a/contrib/timer-wheel/timer-wheel.c b/contrib/timer-wheel/timer-wheel.c index d9b2ccd..013c0f2 100644 --- a/contrib/timer-wheel/timer-wheel.c +++ b/contrib/timer-wheel/timer-wheel.c @@ -218,14 +218,20 @@ void gf_tw_add_timer (struct tvec_base *base, struct gf_tw_timer_list *timer) /** * Remove a timer from the timer wheel */ -void gf_tw_del_timer (struct tvec_base *base, struct gf_tw_timer_list *timer) +int gf_tw_del_timer (struct tvec_base *base, struct gf_tw_timer_list *timer) { + int ret = 0; + pthread_spin_lock (&base->lock); { - if (timer_pending (timer)) + if (timer_pending (timer)) { + ret = 1; __gf_tw_detach_timer (timer); + } } pthread_spin_unlock (&base->lock); + + return ret; } int gf_tw_mod_timer_pending (struct tvec_base *base, diff --git a/contrib/timer-wheel/timer-wheel.h b/contrib/timer-wheel/timer-wheel.h index c52f2fc..faa941a 100644 --- a/contrib/timer-wheel/timer-wheel.h +++ b/contrib/timer-wheel/timer-wheel.h @@ -66,7 +66,7 @@ struct gf_tw_timer_list { struct tvec_base *gf_tw_init_timers (); int gf_tw_cleanup_timers (struct tvec_base *); void gf_tw_add_timer (struct tvec_base *, struct gf_tw_timer_list *); -void gf_tw_del_timer (struct tvec_base *, struct gf_tw_timer_list *); +int gf_tw_del_timer (struct tvec_base *, struct gf_tw_timer_list *); int gf_tw_mod_timer_pending (struct tvec_base *, struct gf_tw_timer_list *, unsigned long); diff --git a/libglusterfs/src/common-utils.c b/libglusterfs/src/common-utils.c index be5d897..cbc5230 100644 --- a/libglusterfs/src/common-utils.c +++ b/libglusterfs/src/common-utils.c @@ -4026,3 +4026,14 @@ out: return ret; } +void +_mask_cancellation (void) +{ + (void) pthread_setcancelstate (PTHREAD_CANCEL_DISABLE, NULL); +} + +void +_unmask_cancellation (void) +{ + (void) pthread_setcancelstate (PTHREAD_CANCEL_ENABLE, NULL); +} diff --git a/libglusterfs/src/common-utils.h b/libglusterfs/src/common-utils.h index ad87559..a64917f 100644 --- a/libglusterfs/src/common-utils.h +++ b/libglusterfs/src/common-utils.h @@ -762,4 +762,7 @@ gf_nread (int fd, void *buf, size_t count); ssize_t gf_nwrite (int fd, const void *buf, size_t count); +void _mask_cancellation (void); +void _unmask_cancellation (void); + #endif /* _COMMON_UTILS_H */ diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c index 45499de..d6ee413 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c +++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c @@ -393,6 +393,14 @@ br_scrubber_scrub_begin (xlator_t *this, struct br_fsscan_entry *fsentry) } static void +_br_lock_cleaner (void *arg) +{ + pthread_mutex_t *mutex = arg; + + pthread_mutex_unlock (mutex); +} + +static void wait_for_scrubbing (xlator_t *this, struct br_scanfs *fsscan) { br_private_t *priv = NULL; @@ -401,8 +409,10 @@ wait_for_scrubbing (xlator_t *this, struct br_scanfs *fsscan) priv = this->private; fsscrub = &priv->fsscrub; + pthread_cleanup_push (_br_lock_cleaner, &fsscan->waitlock); pthread_mutex_lock (&fsscan->waitlock); { + pthread_cleanup_push (_br_lock_cleaner, &fsscrub->mutex); pthread_mutex_lock (&fsscrub->mutex); { list_replace_init (&fsscan->queued, &fsscan->ready); @@ -411,12 +421,14 @@ wait_for_scrubbing (xlator_t *this, struct br_scanfs *fsscan) pthread_cond_broadcast (&fsscrub->cond); } pthread_mutex_unlock (&fsscrub->mutex); + pthread_cleanup_pop (0); while (fsscan->entries != 0) pthread_cond_wait (&fsscan->waitcond, &fsscan->waitlock); } pthread_mutex_unlock (&fsscan->waitlock); + pthread_cleanup_pop (0); } static inline void @@ -465,6 +477,8 @@ br_fsscanner_handle_entry (xlator_t *subvol, this = child->this; fsscan = &child->fsscan; + _mask_cancellation (); + fsentry = GF_CALLOC (1, sizeof (*fsentry), gf_br_mt_br_fsscan_entry_t); if (!fsentry) goto error_return; @@ -499,6 +513,8 @@ br_fsscanner_handle_entry (xlator_t *subvol, } UNLOCK (&fsscan->entrylock); + _unmask_cancellation (); + if (scrub) wait_for_scrubbing (this, fsscan); @@ -535,6 +551,7 @@ br_fsscanner_log_time (xlator_t *this, br_child_t *child, const char *sfx) static void br_fsscanner_wait_until_kicked (struct br_scanfs *fsscan) { + pthread_cleanup_push (_br_lock_cleaner, &fsscan->wakelock); pthread_mutex_lock (&fsscan->wakelock); { while (!fsscan->kick) @@ -543,6 +560,7 @@ br_fsscanner_wait_until_kicked (struct br_scanfs *fsscan) fsscan->kick = _gf_false; } pthread_mutex_unlock (&fsscan->wakelock); + pthread_cleanup_pop (0); } void * @@ -778,13 +796,6 @@ br_scrubber_calc_scale (xlator_t *this, } -static void -br_scrubber_cleanup_handler (void *arg) -{ - struct br_scrubber *fsscrub = arg; - pthread_mutex_unlock (&fsscrub->mutex); -} - static inline br_child_t * _br_scrubber_get_next_child (struct br_scrubber *fsscrub) { @@ -844,7 +855,7 @@ static void br_scrubber_pick_entry (struct br_scrubber *fsscrub, struct br_fsscan_entry **fsentry) { - pthread_cleanup_push (br_scrubber_cleanup_handler, fsscrub); + pthread_cleanup_push (_br_lock_cleaner, &fsscrub->mutex); pthread_mutex_lock (&fsscrub->mutex); { diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c index b855cd7..f2cd1d8 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot.c +++ b/xlators/features/bit-rot/src/bitd/bit-rot.c @@ -1103,7 +1103,7 @@ br_set_child_state (br_child_t *child, br_child_state_t state) * Also, we register to the changelog library to subscribe for event * notifications. */ -static inline int32_t +static int32_t br_enact_signer (xlator_t *this, br_child_t *child, br_stub_init_t *stub) { int32_t ret = 0; @@ -1145,32 +1145,16 @@ br_enact_signer (xlator_t *this, br_child_t *child, br_stub_init_t *stub) return -1; } -static inline int32_t -br_enact_scrubber (xlator_t *this, br_child_t *child) +static int32_t +br_launch_scrubber (xlator_t *this, br_child_t *child, + struct br_scanfs *fsscan, struct br_scrubber *fsscrub) { - int32_t ret = 0; + int32_t ret = -1; br_private_t *priv = NULL; - struct br_scanfs *fsscan = NULL; - struct br_scrubber *fsscrub = NULL; priv = this->private; - fsscan = &child->fsscan; - fsscrub = &priv->fsscrub; - - LOCK_INIT (&fsscan->entrylock); - pthread_mutex_init (&fsscan->waitlock, NULL); - pthread_cond_init (&fsscan->waitcond, NULL); - - fsscan->entries = 0; - INIT_LIST_HEAD (&fsscan->queued); - INIT_LIST_HEAD (&fsscan->ready); - - /* init scheduler related variables */ fsscan->kick = _gf_false; - pthread_mutex_init (&fsscan->wakelock, NULL); - pthread_cond_init (&fsscan->wakecond, NULL); - ret = gf_thread_create (&child->thread, NULL, br_fsscanner, child); if (ret != 0) { gf_msg (this->name, GF_LOG_ALERT, 0, BRB_MSG_SPAWN_FAILED, @@ -1186,7 +1170,7 @@ br_enact_scrubber (xlator_t *this, br_child_t *child) } pthread_mutex_unlock (&priv->lock); if (ret) - goto error_return; + goto cleanup_thread; /** * Everything has been setup.. add this subvolume to scrubbers @@ -1201,6 +1185,50 @@ br_enact_scrubber (xlator_t *this, br_child_t *child) return 0; + cleanup_thread: + (void) gf_thread_cleanup_xint (child->thread); + error_return: + return -1; +} + +static int32_t +br_enact_scrubber (xlator_t *this, br_child_t *child) +{ + int32_t ret = 0; + br_private_t *priv = NULL; + struct br_scanfs *fsscan = NULL; + struct br_scrubber *fsscrub = NULL; + + priv = this->private; + + fsscan = &child->fsscan; + fsscrub = &priv->fsscrub; + + /** + * if this child already witnesses a successfull connection earlier + * there's no need to initialize mutexes, condvars, etc.. + */ + if (_br_child_witnessed_connection (child)) + return br_launch_scrubber (this, child, fsscan, fsscrub); + + LOCK_INIT (&fsscan->entrylock); + pthread_mutex_init (&fsscan->waitlock, NULL); + pthread_cond_init (&fsscan->waitcond, NULL); + + fsscan->entries = 0; + INIT_LIST_HEAD (&fsscan->queued); + INIT_LIST_HEAD (&fsscan->ready); + + /* init scheduler related variables */ + pthread_mutex_init (&fsscan->wakelock, NULL); + pthread_cond_init (&fsscan->wakecond, NULL); + + ret = br_launch_scrubber (this, child, fsscan, fsscrub); + if (ret) + goto error_return; + + return 0; + error_return: LOCK_DESTROY (&fsscan->entrylock); pthread_mutex_destroy (&fsscan->waitlock); @@ -1223,6 +1251,7 @@ br_child_enaction (xlator_t *this, br_child_t *child, br_stub_init_t *stub) ret = br_enact_signer (this, child, stub); if (!ret) { + child->witnessed = 1; _br_set_child_state (child, BR_CHILD_STATE_CONNECTED); gf_log (this->name, GF_LOG_INFO, "Connected to brick %s..", child->brick_path); @@ -1304,6 +1333,100 @@ br_brick_connect (xlator_t *this, br_child_t *child) return ret; } +/* TODO: cleanup signer */ +static int32_t +br_cleanup_signer (xlator_t *this, br_child_t *child) +{ + return 0; +} + +static int32_t +br_cleanup_scrubber (xlator_t *this, br_child_t *child) +{ + int32_t ret = 0; + br_private_t *priv = NULL; + struct br_scanfs *fsscan = NULL; + struct br_scrubber *fsscrub = NULL; + + priv = this->private; + fsscan = &child->fsscan; + fsscrub = &priv->fsscrub; + + /** + * 0x0: child (brick) goes out of rotation + * + * This is fully safe w.r.t. entries for this child being actively + * scrubbed. Each of the scrubber thread(s) would finish scrubbing + * the entry (probably failing due to disconnection) and either + * putting the entry back into the queue or continuing further. + * Either way, pending entries for this child's queue need not be + * drained; entries just sit there in the queued/ready list to be + * consumed later upon re-connection. + */ + pthread_mutex_lock (&fsscrub->mutex); + { + list_del_init (&child->list); + } + pthread_mutex_unlock (&fsscrub->mutex); + + /** + * 0x1: cleanup scanner thread + * + * The pending timer needs to be removed _after_ cleaning up the + * filesystem scanner (scheduling the next scrub time is not a + * cancellation point). + */ + ret = gf_thread_cleanup_xint (child->thread); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Error cleaning up scanner thread"); + + /** + * 0x2: free()up resources + */ + if (fsscan->timer) { + (void) gf_tw_del_timer (priv->timer_wheel, fsscan->timer); + + GF_FREE (fsscan->timer); + fsscan->timer = NULL; + } + + gf_log (this->name, GF_LOG_INFO, + "Cleaned up scrubber for brick [%s]", child->brick_path); + + return 0; +} + +/** + * OK.. this child has made it's mind to go down the drain. So, + * let's clean up what it touched. (NOTE: there's no need to clean + * the inode table, it's just reused taking care of stale inodes) + */ +int32_t +br_brick_disconnect (xlator_t *this, br_child_t *child) +{ + int32_t ret = 0; + br_private_t *priv = this->private; + + LOCK (&child->lock); + { + if (!_br_is_child_connected (child)) + goto unblock; + + /* child is on death row.. */ + _br_set_child_state (child, BR_CHILD_STATE_DISCONNECTED); + + if (priv->iamscrubber) + ret = br_cleanup_scrubber (this, child); + else + ret = br_cleanup_signer (this, child); + } + unblock: + UNLOCK (&child->lock); + + return ret; +} + /** * This function is executed in a separate thread. The thread gets the * brick from where CHILD_UP has received from the queue and gets the @@ -1424,7 +1547,7 @@ notify (xlator_t *this, int32_t event, void *data, ...) { child = &priv->children[idx]; if (child->child_up == 1) - goto unblock; + goto unblock_0; priv->up_children++; child->child_up = 1; @@ -1435,7 +1558,7 @@ notify (xlator_t *this, int32_t event, void *data, ...) _br_qchild_event (this, child, br_brick_connect); pthread_cond_signal (&priv->cond); } - unblock: + unblock_0: pthread_mutex_unlock (&priv->lock); if (priv->up_children == priv->child_count) @@ -1452,11 +1575,17 @@ notify (xlator_t *this, int32_t event, void *data, ...) pthread_mutex_lock (&priv->lock); { - if (priv->children[idx].child_up == 1) { - priv->children[idx].child_up = 0; - priv->up_children--; - } + child = &priv->children[idx]; + if (child->child_up == 0) + goto unblock_1; + + child->child_up = 0; + priv->up_children--; + + _br_qchild_event (this, child, br_brick_disconnect); + pthread_cond_signal (&priv->cond); } + unblock_1: pthread_mutex_unlock (&priv->lock); if (priv->up_children == 0) @@ -1649,6 +1778,7 @@ br_init_children (xlator_t *this, br_private_t *priv) child = &priv->children[i]; LOCK_INIT (&child->lock); + child->witnessed = 0; br_set_child_state (child, BR_CHILD_STATE_DISCONNECTED); child->this = this; diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.h b/xlators/features/bit-rot/src/bitd/bit-rot.h index b8d9e3b..9a55773 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot.h +++ b/xlators/features/bit-rot/src/bitd/bit-rot.h @@ -85,8 +85,10 @@ typedef enum br_child_state { } br_child_state_t; struct br_child { - gf_lock_t lock; - br_child_state_t c_state; + gf_lock_t lock; /* protects child state */ + char witnessed; /* witnessed at least one succesfull + connection */ + br_child_state_t c_state; /* current state of this child */ char child_up; /* Indicates whether this child is up or not */ @@ -236,4 +238,10 @@ _br_child_failed_conn (br_child_t *child) return (child->c_state == BR_CHILD_STATE_CONNFAILED); } +static inline int +_br_child_witnessed_connection (br_child_t *child) +{ + return (child->witnessed == 1); +} + #endif /* __BIT_ROT_H__ */ diff --git a/xlators/features/changelog/src/changelog-helpers.c b/xlators/features/changelog/src/changelog-helpers.c index 2c0a645..144bf54 100644 --- a/xlators/features/changelog/src/changelog-helpers.c +++ b/xlators/features/changelog/src/changelog-helpers.c @@ -27,28 +27,6 @@ #include "changelog-rpc-common.h" #include -static inline void -__mask_cancellation (xlator_t *this) -{ - int ret = 0; - - ret = pthread_setcancelstate (PTHREAD_CANCEL_DISABLE, NULL); - if (ret) - gf_log (this->name, GF_LOG_WARNING, - "failed to disable thread cancellation"); -} - -static inline void -__unmask_cancellation (xlator_t *this) -{ - int ret = 0; - - ret = pthread_setcancelstate (PTHREAD_CANCEL_ENABLE, NULL); - if (ret) - gf_log (this->name, GF_LOG_WARNING, - "failed to enable thread cancellation"); -} - static void changelog_cleanup_free_mutex (void *arg_mutex) { @@ -1338,7 +1316,7 @@ changelog_rollover (void *data) continue; } - __mask_cancellation (this); + _mask_cancellation (); LOCK (&priv->lock); { @@ -1348,7 +1326,7 @@ changelog_rollover (void *data) } UNLOCK (&priv->lock); - __unmask_cancellation (this); + _unmask_cancellation (); } return NULL; @@ -1376,14 +1354,14 @@ changelog_fsync_thread (void *data) if (ret) continue; - __mask_cancellation (this); + _mask_cancellation (); ret = changelog_inject_single_event (this, priv, &cld); if (ret) gf_log (this->name, GF_LOG_ERROR, "failed to inject fsync event"); - __unmask_cancellation (this); + _unmask_cancellation (); } return NULL; -- 1.7.1