From 68b385605f52503f8c80ebf0aa15fd278b5db6d0 Mon Sep 17 00:00:00 2001 From: N Balachandran Date: Tue, 17 Jan 2017 15:43:47 +0530 Subject: [PATCH 314/361] dht/rebalance Estimate time to complete rebalance The estimates will be logged to the rebalance log on running gluster v rebalance status mainline: > BUG: 1396004 > Reviewed-on: http://review.gluster.org/15893 > Smoke: Gluster Build System > NetBSD-regression: NetBSD Build System > CentOS-regression: Gluster Build System > Reviewed-by: Raghavendra G (cherry picked from commit 310405d689760fc1abe3075c4bcf6cf7351e60bc) BUG: 1380598 Change-Id: I9d51b139cd4c8dfde1ff2c2050720ae606c13fc6 Signed-off-by: N Balachandran Reviewed-on: https://code.engineering.redhat.com/gerrit/101294 Tested-by: Milind Changire Reviewed-by: Atin Mukherjee --- cli/src/cli-rpc-ops.c | 28 +++- xlators/cluster/dht/src/dht-rebalance.c | 102 +++++++++++- xlators/mgmt/glusterd/src/glusterd-utils.c | 251 ++++++++++++++++++++++++++++- xlators/mgmt/glusterd/src/glusterd.h | 1 + 4 files changed, 376 insertions(+), 6 deletions(-) diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c index 5a0cfdf..339d91b 100644 --- a/cli/src/cli-rpc-ops.c +++ b/cli/src/cli-rpc-ops.c @@ -1611,6 +1611,9 @@ gf_cli_print_rebalance_status (dict_t *dict, enum gf_task_types task_type, int sec = 0; gf_boolean_t down = _gf_false; gf_boolean_t fix_layout = _gf_false; + uint64_t max_time = 0; + uint64_t time_left = 0; + ret = dict_get_int32 (dict, "count", &count); if (ret) { @@ -1663,6 +1666,7 @@ gf_cli_print_rebalance_status (dict_t *dict, enum gf_task_types task_type, skipped = 0; status_str = NULL; elapsed = 0; + time_left = 0; /* Check if status is NOT_STARTED, and continue early */ memset (key, 0, 256); @@ -1740,6 +1744,15 @@ gf_cli_print_rebalance_status (dict_t *dict, enum gf_task_types task_type, if (ret) gf_log ("cli", GF_LOG_TRACE, "failed to get run-time"); + memset (key, 0, 256); + snprintf (key, 256, "time-left-%d", i); + ret = dict_get_uint64 (dict, key, &time_left); + if (ret) + gf_log ("cli", GF_LOG_TRACE, + "failed to get time left"); + if (time_left > max_time) + max_time = time_left; + /* Check for array bound */ if (status_rcd >= GF_DEFRAG_STATUS_MAX) status_rcd = GF_DEFRAG_STATUS_MAX; @@ -1757,15 +1770,15 @@ gf_cli_print_rebalance_status (dict_t *dict, enum gf_task_types task_type, if (size_str) { cli_out ("%40s %16"PRIu64 " %13s" " %13"PRIu64 " %13" PRIu64" %13"PRIu64 " %20s " - "%8d:%d:%d", node_name, files, + "%8d:%02d:%02d", node_name, files, size_str, lookup, failures, skipped, status_str, hrs, min, sec); } else { cli_out ("%40s %16"PRIu64 " %13"PRIu64 " %13" PRIu64 " %13"PRIu64" %13"PRIu64 " %20s" - " %8d:%d:%d", node_name, files, size, - lookup, failures, skipped, status_str, - hrs, min, sec); + " %8d:%02d:%02d", node_name, files, + size, lookup, failures, skipped, + status_str, hrs, min, sec); } } GF_FREE(size_str); @@ -1775,6 +1788,13 @@ gf_cli_print_rebalance_status (dict_t *dict, enum gf_task_types task_type, " Please check the nodes that are down using \'gluster" " peer status\' and start the glusterd on those nodes," " else tier detach commit might fail!"); + if (max_time) { + hrs = max_time / 3600; + min = ((int) max_time % 3600) / 60; + sec = ((int) max_time % 3600) % 60; + cli_out ("Estimated time left for rebalance to complete :" + " %8d:%02d:%02d", hrs, min, sec); + } out: return ret; } diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c index 39a7bb6..76778de 100644 --- a/xlators/cluster/dht/src/dht-rebalance.c +++ b/xlators/cluster/dht/src/dht-rebalance.c @@ -46,6 +46,9 @@ } \ } \ +uint64_t g_totalfiles = 0; + + void gf_defrag_free_container (struct dht_container *container) { @@ -3701,6 +3704,58 @@ gf_tier_wait_fix_lookup (gf_defrag_info_t *defrag) { /******************Tier background Fix layout functions END********************/ +uint64_t gf_defrag_subvol_file_cnt (xlator_t *this, loc_t *root_loc) +{ + int ret = -1; + struct statvfs buf = {0,}; + + if (!this) + return 0; + + ret = syncop_statfs (this, root_loc, &buf, NULL, NULL); + if (ret) { + /* Aargh! */ + return 0; + } + return (buf.f_files - buf.f_ffree); +} + + +int gf_defrag_total_file_cnt (xlator_t *this, loc_t *root_loc) +{ + dht_conf_t *conf = NULL; + int ret = -1; + int i = 0; + uint64_t num_files = 0; + + + conf = this->private; + if (!conf) { + return ret; + } + + for (i = 0 ; i < conf->local_subvols_cnt; i++) { + num_files = gf_defrag_subvol_file_cnt (conf->local_subvols[i], + root_loc); + g_totalfiles += num_files; + gf_msg (this->name, GF_LOG_INFO, 0, 0, "local subvol: %s," + "cnt = %"PRIu64, conf->local_subvols[i]->name, + num_files); + } + + /* FIXFIXFIX: halve the number of files to negate .glusterfs contents + We need a better way to figure this out */ + + g_totalfiles = g_totalfiles/2; + if (g_totalfiles > 20000) + g_totalfiles += 10000; + + gf_msg (this->name, GF_LOG_INFO, 0, 0, + "Total number of files = %"PRIu64, g_totalfiles); + + return 0; +} + int gf_defrag_start_crawl (void *data) @@ -3725,6 +3780,7 @@ gf_defrag_start_crawl (void *data) pthread_t *tid = NULL; gf_boolean_t is_tier_detach = _gf_false; + this = data; if (!this) goto exit; @@ -3854,6 +3910,13 @@ gf_defrag_start_crawl (void *data) "are %s", conf->local_subvols[i]->name); } + ret = gf_defrag_total_file_cnt (this, &loc); + if (!ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, 0, "Failed to get " + "the total number of files. Unable to estimate " + "time to complete rebalance."); + } + /* Initialize global entry queue */ defrag->queue = GF_CALLOC (1, sizeof (struct dht_container), gf_dht_mt_container_t); @@ -4090,8 +4153,11 @@ gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict) uint64_t skipped = 0; uint64_t promoted = 0; uint64_t demoted = 0; - char *status = ""; + char *status = ""; double elapsed = 0; + uint64_t time_left = 0; + uint64_t time_to_complete = 0; + double rate_lookedup = 0; struct timeval end = {0,}; @@ -4114,6 +4180,34 @@ gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict) elapsed = end.tv_sec - defrag->start_time.tv_sec; +/*START */ + +/* rate at which files looked up */ + + + if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) + && (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED) + && g_totalfiles) { + + rate_lookedup = (defrag->num_files_lookedup)/elapsed; + if (defrag->num_files_lookedup > g_totalfiles) + g_totalfiles = defrag->num_files_lookedup + 10000; + time_to_complete = (g_totalfiles)/rate_lookedup; + time_left = time_to_complete - elapsed; + + gf_log (THIS->name, GF_LOG_INFO, + "TIME: num_files_lookedup=%"PRIu64",elapsed time = %f," + "rate_lookedup=%f", defrag->num_files_lookedup, elapsed, + rate_lookedup); + gf_log (THIS->name, GF_LOG_INFO, + "TIME: Estimated total time to complete = %"PRIu64 + " seconds", time_to_complete); + + gf_log (THIS->name, GF_LOG_INFO, + "TIME: Seconds left = %"PRIu64" seconds", time_left); + } + +/*END */ if (!dict) goto log; @@ -4162,6 +4256,12 @@ gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict) if (ret) gf_log (THIS->name, GF_LOG_WARNING, "failed to set skipped file count"); + + ret = dict_set_uint64 (dict, "time-left", time_left); + if (ret) + gf_log (THIS->name, GF_LOG_WARNING, + "failed to set time-left"); + log: switch (defrag->defrag_status) { case GF_DEFRAG_STATUS_NOT_STARTED: diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index d849690..e976ba2 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -7586,6 +7586,7 @@ glusterd_defrag_volume_status_update (glusterd_volinfo_t *volinfo, dict_t *rsp_dict) { int ret = 0; + int ret2 = 0; uint64_t files = 0; uint64_t size = 0; uint64_t lookup = 0; @@ -7596,6 +7597,7 @@ glusterd_defrag_volume_status_update (glusterd_volinfo_t *volinfo, double run_time = 0; uint64_t promoted = 0; uint64_t demoted = 0; + uint64_t time_left = 0; this = THIS; @@ -7658,6 +7660,11 @@ glusterd_defrag_volume_status_update (glusterd_volinfo_t *volinfo, volinfo->rebal.skipped_files = skipped; if (run_time) volinfo->rebal.rebalance_time = run_time; + ret2 = dict_get_uint64 (rsp_dict, "time-left", &time_left); + if (ret2) + gf_msg_trace (this->name, 0, + "failed to get time left"); + if (promoted) volinfo->tier_info.promoted = promoted; if (demoted) @@ -9254,7 +9261,7 @@ glusterd_volume_rebalance_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict) ret = dict_set_uint64 (ctx_dict, key, value); if (ret) { gf_msg_debug (THIS->name, 0, - "failed to set lookuped file count"); + "failed to set looked up file count"); } } @@ -9310,6 +9317,18 @@ glusterd_volume_rebalance_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict) } memset (key, 0, 256); + snprintf (key, 256, "time-left-%d", index); + ret = dict_get_uint64 (rsp_dict, key, &value); + if (!ret) { + memset (key, 0, 256); + snprintf (key, 256, "time-left-%d", current_index); + ret = dict_set_uint64 (ctx_dict, key, value); + if (ret) { + gf_msg_debug (THIS->name, 0, + "failed to set time-left"); + } + } + memset (key, 0, 256); snprintf (key, 256, "demoted-%d", index); ret = dict_get_uint64 (rsp_dict, key, &value); if (!ret) { @@ -9341,6 +9360,228 @@ out: } int +<<<<<<< 07a9e00a5702e76932142e9d9cdc2df601632b7a +======= +glusterd_volume_tier_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict) +{ + char key[256] = {0,}; + char *node_uuid = NULL; + char *node_uuid_str = NULL; + char *volname = NULL; + dict_t *ctx_dict = NULL; + double elapsed_time = 0; + glusterd_volinfo_t *volinfo = NULL; + int ret = 0; + int32_t index = 0; + int32_t count = 0; + int32_t value32 = 0; + uint64_t value = 0; + xlator_t *this = NULL; + char *task_id_str = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO (this->name, this, out); + GF_VALIDATE_OR_GOTO (this->name, rsp_dict, out); + + if (aggr) { + ctx_dict = aggr; + + } else { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_OPCTX_GET_FAIL, + "Operation Context is not present"); + goto out; + } + + if (!ctx_dict) + goto out; + + ret = dict_get_str (ctx_dict, "volname", &volname); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_DICT_GET_FAILED, + "Unable to get volume name"); + goto out; + } + + ret = glusterd_volinfo_find (volname, &volinfo); + + if (ret) + goto out; + + ret = dict_get_int32 (rsp_dict, "count", &index); + if (ret) + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_DICT_GET_FAILED, + "failed to get index"); + + memset (key, 0, 256); + snprintf (key, 256, "node-uuid-%d", index); + ret = dict_get_str (rsp_dict, key, &node_uuid); + if (!ret) { + node_uuid_str = gf_strdup (node_uuid); + + } + ret = dict_get_int32 (ctx_dict, "count", &count); + count++; + ret = dict_set_int32 (ctx_dict, "count", count); + if (ret) + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_DICT_SET_FAILED, + "Failed to set count"); + + memset (key, 0, 256); + snprintf (key, 256, "node-uuid-%d", count); + ret = dict_set_dynstr (ctx_dict, key, node_uuid_str); + if (ret) { + gf_msg_debug (this->name, 0, + "failed to set node-uuid"); + } + + snprintf (key, 256, "files-%d", index); + ret = dict_get_uint64 (rsp_dict, key, &value); + if (!ret) { + memset (key, 0, 256); + snprintf (key, 256, "files-%d", count); + ret = dict_set_uint64 (ctx_dict, key, value); + if (ret) { + gf_msg_debug (this->name, 0, + "failed to set the file count"); + } + } + + memset (key, 0, 256); + snprintf (key, 256, "size-%d", index); + ret = dict_get_uint64 (rsp_dict, key, &value); + if (!ret) { + memset (key, 0, 256); + snprintf (key, 256, "size-%d", count); + ret = dict_set_uint64 (ctx_dict, key, value); + if (ret) { + gf_msg_debug (this->name, 0, + "failed to set the size of migration"); + } + } + + memset (key, 0, 256); + snprintf (key, 256, "lookups-%d", index); + ret = dict_get_uint64 (rsp_dict, key, &value); + if (!ret) { + memset (key, 0, 256); + snprintf (key, 256, "lookups-%d", count); + ret = dict_set_uint64 (ctx_dict, key, value); + if (ret) { + gf_msg_debug (this->name, 0, + "failed to set looked up file count"); + } + } + + memset (key, 0, 256); + snprintf (key, 256, "status-%d", index); + ret = dict_get_int32 (rsp_dict, key, &value32); + if (!ret) { + memset (key, 0, 256); + snprintf (key, 256, "status-%d", count); + ret = dict_set_int32 (ctx_dict, key, value32); + if (ret) { + gf_msg_debug (this->name, 0, + "failed to set status"); + } + } + + memset (key, 0, 256); + snprintf (key, 256, "failures-%d", index); + ret = dict_get_uint64 (rsp_dict, key, &value); + if (!ret) { + memset (key, 0, 256); + snprintf (key, 256, "failures-%d", count); + ret = dict_set_uint64 (ctx_dict, key, value); + if (ret) { + gf_msg_debug (this->name, 0, + "failed to set failure count"); + } + } + + memset (key, 0, 256); + snprintf (key, 256, "skipped-%d", index); + ret = dict_get_uint64 (rsp_dict, key, &value); + if (!ret) { + memset (key, 0, 256); + snprintf (key, 256, "skipped-%d", count); + ret = dict_set_uint64 (ctx_dict, key, value); + if (ret) { + gf_msg_debug (this->name, 0, + "failed to set skipped count"); + } + } + memset (key, 0, 256); + snprintf (key, 256, "run-time-%d", index); + ret = dict_get_double (rsp_dict, key, &elapsed_time); + if (!ret) { + memset (key, 0, 256); + snprintf (key, 256, "run-time-%d", count); + ret = dict_set_double (ctx_dict, key, elapsed_time); + if (ret) { + gf_msg_debug (this->name, 0, + "failed to set run-time"); + } + } + + memset (key, 0, 256); + snprintf (key, 256, "demoted-%d", index); + ret = dict_get_uint64 (rsp_dict, key, &value); + if (!ret) { + memset (key, 0, 256); + snprintf (key, 256, "demoted-%d", count); + ret = dict_set_uint64 (ctx_dict, key, value); + if (ret) { + gf_msg_debug (this->name, 0, + "failed to set demoted count"); + } + } + memset (key, 0, 256); + snprintf (key, 256, "promoted-%d", index); + ret = dict_get_uint64 (rsp_dict, key, &value); + if (!ret) { + memset (key, 0, 256); + snprintf (key, 256, "promoted-%d", count); + ret = dict_set_uint64 (ctx_dict, key, value); + if (ret) { + gf_msg_debug (this->name, 0, + "failed to set promoted count"); + } + } + + memset (key, 0, 256); + snprintf (key, 256, "time-left-%d", index); + ret = dict_get_uint64 (rsp_dict, key, &value); + if (!ret) { + memset (key, 0, 256); + snprintf (key, 256, "time-left-%d", count); + ret = dict_set_uint64 (ctx_dict, key, value); + if (ret) { + gf_msg_debug (THIS->name, 0, + "failed to set time-left"); + } + } + + ret = dict_get_str (rsp_dict, GF_REMOVE_BRICK_TID_KEY, + &task_id_str); + if (ret) { + gf_msg_debug (this->name, errno, + "Missing remove-brick-id"); + } else + ret = dict_set_str (ctx_dict, GF_REMOVE_BRICK_TID_KEY, + task_id_str); + + ret = 0; + +out: + return ret; +} + +int +>>>>>>> dht/rebalance Estimate time to complete rebalance glusterd_sys_exec_output_rsp_dict (dict_t *dst, dict_t *src) { char output_name[PATH_MAX] = ""; @@ -9976,6 +10217,14 @@ glusterd_defrag_volume_node_rsp (dict_t *req_dict, dict_t *rsp_dict, "failed to set run-time"); memset (key, 0 , 256); + snprintf (key, 256, "time-left-%d", i); + ret = dict_set_uint64 (op_ctx, key, volinfo->rebal.time_left); + if (ret) + gf_msg (THIS->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_SET_FAILED, + "failed to set time left"); + + memset (key, 0 , 256); snprintf (key, 256, "promoted-%d", i); ret = dict_set_uint64 (op_ctx, key, volinfo->tier_info.promoted); if (ret) diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index a21b0a1..c84f019 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -308,6 +308,7 @@ struct glusterd_rebalance_ { uint64_t rebalance_failures; uuid_t rebalance_id; double rebalance_time; + uint64_t time_left; glusterd_op_t op; dict_t *dict; /* Dict to store misc information * like list of bricks being removed */ -- 1.8.3.1