From 3f88e480aedac049b90d94321a5e0a0eddecef4c Mon Sep 17 00:00:00 2001 From: Milind Changire Date: Fri, 2 Dec 2016 11:14:32 +0530 Subject: [PATCH 214/227] cluster/tier: handle fast demotions Demote files on priority if hi-watermark has been breached and continue to demote until the watermark drops below hi-watermark. Monitor watermark more frequently. Trigger demotion as soon as hi-watermark is breached. Add cluster.tier-query-limit option to limit number of files returned from the database query for every iteration of tier_migrate_using_query_file(). If watermark hasn't dropped below hi-watermark during the first iteration, the next iteration will be triggered approximately 1 second after tier_demote() returns to the main tiering loop. Update changetimerecorder xlator to handle query for emergency demote mode. Add tier-ctr-interface.h: Move tier and ctr interface specific macros and struct definition from libglusterfs/src/gfdb/gfdb_data_store.h to new header libglusterfs/src/tier-ctr-interface.h Fix op-version for tier-query-limit option. > Reviewed-on: http://review.gluster.org/15158 > Smoke: Gluster Build System > CentOS-regression: Gluster Build System > NetBSD-regression: NetBSD Build System > Reviewed-by: Dan Lambright (cherry picked from commit 460016428cf27484c333227f534c2e2f73a37fb1) > BUG: 1394482 > Reviewed-on: http://review.gluster.org/15835 > Smoke: Gluster Build System > NetBSD-regression: NetBSD Build System > CentOS-regression: Gluster Build System > Reviewed-by: Dan Lambright (cherry picked from commit 1001a4f7705f991f3ae1611997cf01b341ac453a) > BUG: 1394482 > Reviewed-on: http://review.gluster.org/16000 (merged commit from release-3.9 branch) Change-Id: If56af78c6c81d37529b9b6e65ae606ba5c99a811 BUG: 1361759 Signed-off-by: Milind Changire Reviewed-on: https://code.engineering.redhat.com/gerrit/91861 Reviewed-by: Atin Mukherjee --- libglusterfs/src/Makefile.am | 3 +- libglusterfs/src/gfdb/gfdb_data_store.c | 8 +- libglusterfs/src/gfdb/gfdb_data_store.h | 48 +++-------- libglusterfs/src/gfdb/gfdb_data_store_types.h | 8 +- libglusterfs/src/gfdb/gfdb_sqlite3.c | 37 +++++++-- libglusterfs/src/gfdb/gfdb_sqlite3.h | 3 +- libglusterfs/src/globals.h | 4 +- libglusterfs/src/tier-ctr-interface.h | 44 ++++++++++ xlators/cluster/dht/src/dht-common.h | 1 + xlators/cluster/dht/src/dht-shared.c | 4 + xlators/cluster/dht/src/tier.c | 93 ++++++++++++++++++---- xlators/cluster/dht/src/tier.h | 3 +- .../changetimerecorder/src/changetimerecorder.c | 17 +++- xlators/mgmt/glusterd/src/glusterd-volume-set.c | 15 +++- 14 files changed, 218 insertions(+), 70 deletions(-) create mode 100644 libglusterfs/src/tier-ctr-interface.h diff --git a/libglusterfs/src/Makefile.am b/libglusterfs/src/Makefile.am index 849cb97..b16b87a 100644 --- a/libglusterfs/src/Makefile.am +++ b/libglusterfs/src/Makefile.am @@ -60,7 +60,8 @@ noinst_HEADERS = unittest/unittest.h \ $(CONTRIBDIR)/rbtree/rb.h \ $(CONTRIBDIR)/mount/mntent_compat.h \ $(CONTRIBDIR)/libexecinfo/execinfo_compat.h \ - $(CONTRIBDIR)/timer-wheel/timer-wheel.h + $(CONTRIBDIR)/timer-wheel/timer-wheel.h \ + tier-ctr-interface.h if !HAVE_LIBUUID # FIXME: unbundle libuuid, see compat-uuid.h. diff --git a/libglusterfs/src/gfdb/gfdb_data_store.c b/libglusterfs/src/gfdb/gfdb_data_store.c index 9c042f9..7b346ae 100644 --- a/libglusterfs/src/gfdb/gfdb_data_store.c +++ b/libglusterfs/src/gfdb/gfdb_data_store.c @@ -444,12 +444,14 @@ delete_record (gfdb_conn_node_t *_conn_node, * for every record found * _query_cbk_args : Custom argument passed for the call back * function query_callback + * query_limit : number to limit number of rows returned by the query * Returns : if successful return 0 or * -ve value in case of failure*/ int find_all (gfdb_conn_node_t *_conn_node, gf_query_callback_t query_callback, - void *_query_cbk_args) + void *_query_cbk_args, + int query_limit) { int ret = 0; gfdb_db_operations_t *db_operations_t = NULL; @@ -463,7 +465,8 @@ find_all (gfdb_conn_node_t *_conn_node, if (db_operations_t->find_all_op) { ret = db_operations_t->find_all_op (gf_db_connection, query_callback, - _query_cbk_args); + _query_cbk_args, + query_limit); if (ret) { gf_msg (GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_FIND_OP_FAILED, "Find all operation " @@ -814,6 +817,7 @@ void get_gfdb_methods (gfdb_methods_t *methods) { methods->init_db = init_db; methods->fini_db = fini_db; + methods->find_all = find_all; methods->find_unchanged_for_time = find_unchanged_for_time; methods->find_recently_changed_files = find_recently_changed_files; methods->find_unchanged_for_time_freq = find_unchanged_for_time_freq; diff --git a/libglusterfs/src/gfdb/gfdb_data_store.h b/libglusterfs/src/gfdb/gfdb_data_store.h index eacb852..085df69 100644 --- a/libglusterfs/src/gfdb/gfdb_data_store.h +++ b/libglusterfs/src/gfdb/gfdb_data_store.h @@ -20,42 +20,6 @@ #include "gfdb_data_store_types.h" -#define GFDB_IPC_CTR_KEY "gfdb.ipc-ctr-op" - -/* - * CTR IPC OPERATIONS - * - * - */ -#define GFDB_IPC_CTR_QUERY_OPS "gfdb.ipc-ctr-query-op" -#define GFDB_IPC_CTR_CLEAR_OPS "gfdb.ipc-ctr-clear-op" -#define GFDB_IPC_CTR_GET_DB_PARAM_OPS "gfdb.ipc-ctr-get-db-parm" -#define GFDB_IPC_CTR_GET_DB_VERSION_OPS "gfdb.ipc-ctr-get-db-version" - -/* - * CTR IPC INPUT/OUTPUT - * - * - */ -#define GFDB_IPC_CTR_GET_QFILE_PATH "gfdb.ipc-ctr-get-qfile-path" -#define GFDB_IPC_CTR_GET_QUERY_PARAMS "gfdb.ipc-ctr-get-query-parms" -#define GFDB_IPC_CTR_RET_QUERY_COUNT "gfdb.ipc-ctr-ret-rec-count" -#define GFDB_IPC_CTR_GET_DB_KEY "gfdb.ipc-ctr-get-params-key" -#define GFDB_IPC_CTR_RET_DB_VERSION "gfdb.ipc-ctr-ret-db-version" - -/* - * gfdb ipc ctr params for query - * - * - */ -typedef struct gfdb_ipc_ctr_params { - gf_boolean_t is_promote; - int write_freq_threshold; - int read_freq_threshold; - gfdb_time_t time_stamp; -} gfdb_ipc_ctr_params_t; - - /* GFDB Connection Node: * ~~~~~~~~~~~~~~~~~~~~ * Represents the connection to the database while using libgfdb @@ -146,11 +110,20 @@ delete_record(gfdb_conn_node_t *, gfdb_db_record_t *gfdb_db_record); * for every record found * _query_cbk_args : Custom argument passed for the call back * function query_callback + * query_limit : 0 - umlimited, + * any positive value - adds the LIMIT clause + * to the SQL query + * * Returns : if successful return 0 or * -ve value in case of failure*/ int find_all(gfdb_conn_node_t *, gf_query_callback_t query_callback, - void *_query_cbk_args); + void *_query_cbk_args, + int query_limit); +typedef int (*find_all_t) (gfdb_conn_node_t *, + gf_query_callback_t query_callback, + void *_query_cbk_args, + int query_limit); @@ -353,6 +326,7 @@ typedef int (*set_db_params_t)(gfdb_conn_node_t *db_conn, typedef struct gfdb_methods_s { init_db_t init_db; fini_db_t fini_db; + find_all_t find_all; find_unchanged_for_time_t find_unchanged_for_time; find_recently_changed_files_t find_recently_changed_files; find_unchanged_for_time_freq_t find_unchanged_for_time_freq; diff --git a/libglusterfs/src/gfdb/gfdb_data_store_types.h b/libglusterfs/src/gfdb/gfdb_data_store_types.h index 1acbdf2..5341f51 100644 --- a/libglusterfs/src/gfdb/gfdb_data_store_types.h +++ b/libglusterfs/src/gfdb/gfdb_data_store_types.h @@ -381,12 +381,18 @@ typedef int * for every record found * _query_cbk_args : Custom argument passed for the call back * function query_callback + * query_limit : 0 - list all files + * positive value - add the LIMIT clause to + * the SQL query to limit the number of records + * returned + * * Returns : if successful return 0 or * -ve value in case of failure*/ typedef int (*gfdb_find_all_t)(void *db_conn, gf_query_callback_t query_callback, - void *_cbk_args); + void *_cbk_args, + int query_limit); diff --git a/libglusterfs/src/gfdb/gfdb_sqlite3.c b/libglusterfs/src/gfdb/gfdb_sqlite3.c index 04781be..ec7fe39 100644 --- a/libglusterfs/src/gfdb/gfdb_sqlite3.c +++ b/libglusterfs/src/gfdb/gfdb_sqlite3.c @@ -631,12 +631,15 @@ gf_get_basic_query_stmt (char **out_stmt) * */ int gf_sqlite3_find_all (void *db_conn, gf_query_callback_t query_callback, - void *query_cbk_args) + void *query_cbk_args, + int query_limit) { int ret = -1; char *query_str = NULL; gf_sql_connection_t *sql_conn = db_conn; sqlite3_stmt *prep_stmt = NULL; + char *limit_query = NULL; + char *query = NULL; CHECK_SQL_CONN (sql_conn, out); GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, query_callback, out); @@ -646,12 +649,28 @@ gf_sqlite3_find_all (void *db_conn, gf_query_callback_t query_callback, goto out; } - ret = sqlite3_prepare (sql_conn->sqlite3_db_conn, query_str, -1, + query = query_str; + + if (query_limit > 0) { + ret = gf_asprintf (&limit_query, "%s LIMIT %d", + query, query_limit); + if (ret < 0) { + gf_msg (GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, + LG_MSG_QUERY_FAILED, + "Failed creating limit query statement"); + limit_query = NULL; + goto out; + } + + query = limit_query; + } + + ret = sqlite3_prepare (sql_conn->sqlite3_db_conn, query, -1, &prep_stmt, 0); if (ret != SQLITE_OK) { gf_msg (GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, - LG_MSG_PREPARE_FAILED, "Failed to prepare statement %s :" - "%s", query_str, + LG_MSG_PREPARE_FAILED, + "Failed to prepare statement %s: %s", query, sqlite3_errmsg (sql_conn->sqlite3_db_conn)); ret = -1; goto out; @@ -660,7 +679,7 @@ gf_sqlite3_find_all (void *db_conn, gf_query_callback_t query_callback, ret = gf_sql_query_function (prep_stmt, query_callback, query_cbk_args); if (ret) { gf_msg (GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_QUERY_FAILED, - "Failed Query %s", query_str); + "Failed Query %s", query); goto out; } @@ -668,6 +687,10 @@ gf_sqlite3_find_all (void *db_conn, gf_query_callback_t query_callback, out: sqlite3_finalize (prep_stmt); GF_FREE (query_str); + + if (limit_query) + GF_FREE (limit_query); + return ret; } @@ -1069,10 +1092,10 @@ gf_sqlite3_find_unchanged_for_time_freq (void *db_conn, GF_COL_TB_WMSEC ") >= ? ) ) )" " AND " /*Second condition: For Reads - * Files that have reaASCd wind time smaller than for_time + * Files that have read wind time smaller than for_time * OR * File that have read wind time greater than for_time, - * but write_frequency less than freq_write_cnt*/ + * but read_frequency less than freq_read_cnt*/ "( ((" GF_COL_TB_RWSEC " * " TOSTRING(GFDB_MICROSEC) " + " GF_COL_TB_RWMSEC ") < ? )" " OR " diff --git a/libglusterfs/src/gfdb/gfdb_sqlite3.h b/libglusterfs/src/gfdb/gfdb_sqlite3.h index e69251c..96301a4 100644 --- a/libglusterfs/src/gfdb/gfdb_sqlite3.h +++ b/libglusterfs/src/gfdb/gfdb_sqlite3.h @@ -254,7 +254,8 @@ int gf_sqlite3_delete (void *db_conn, gfdb_db_record_t *); /*querying modules*/ int gf_sqlite3_find_all (void *db_conn, gf_query_callback_t, - void *_query_cbk_args); + void *_query_cbk_args, + int query_limit); int gf_sqlite3_find_unchanged_for_time (void *db_conn, gf_query_callback_t query_callback, void *_query_cbk_args, diff --git a/libglusterfs/src/globals.h b/libglusterfs/src/globals.h index b12b0ad..f6164c6 100644 --- a/libglusterfs/src/globals.h +++ b/libglusterfs/src/globals.h @@ -43,7 +43,7 @@ */ #define GD_OP_VERSION_MIN 1 /* MIN is the fresh start op-version, mostly should not change */ -#define GD_OP_VERSION_MAX GD_OP_VERSION_3_9_0 /* MAX VERSION is the maximum +#define GD_OP_VERSION_MAX GD_OP_VERSION_3_9_1 /* MAX VERSION is the maximum count in VME table, should keep changing with introduction of newer @@ -83,6 +83,8 @@ #define GD_OP_VERSION_3_9_0 30900 /* Op-version for GlusterFS 3.9.0 */ +#define GD_OP_VERSION_3_9_1 30901 /* Op-version for GlusterFS 3.9.1 */ + #include "xlator.h" /* THIS */ diff --git a/libglusterfs/src/tier-ctr-interface.h b/libglusterfs/src/tier-ctr-interface.h new file mode 100644 index 0000000..cfd3f8a --- /dev/null +++ b/libglusterfs/src/tier-ctr-interface.h @@ -0,0 +1,44 @@ +#ifndef _TIER_CTR_INTERFACE_H_ +#define _TIER_CTR_INTERFACE_H_ + +#include "common-utils.h" +#include "gfdb_data_store_types.h" + +#define GFDB_IPC_CTR_KEY "gfdb.ipc-ctr-op" + +/* + * CTR IPC OPERATIONS + * + * + */ +#define GFDB_IPC_CTR_QUERY_OPS "gfdb.ipc-ctr-query-op" +#define GFDB_IPC_CTR_CLEAR_OPS "gfdb.ipc-ctr-clear-op" +#define GFDB_IPC_CTR_GET_DB_PARAM_OPS "gfdb.ipc-ctr-get-db-parm" +#define GFDB_IPC_CTR_GET_DB_VERSION_OPS "gfdb.ipc-ctr-get-db-version" +#define GFDB_IPC_CTR_SET_COMPACT_PRAGMA "gfdb.ipc-ctr-set-compact-pragma" +/* + * CTR IPC INPUT/OUTPUT + * + * + */ +#define GFDB_IPC_CTR_GET_QFILE_PATH "gfdb.ipc-ctr-get-qfile-path" +#define GFDB_IPC_CTR_GET_QUERY_PARAMS "gfdb.ipc-ctr-get-query-parms" +#define GFDB_IPC_CTR_RET_QUERY_COUNT "gfdb.ipc-ctr-ret-rec-count" +#define GFDB_IPC_CTR_GET_DB_KEY "gfdb.ipc-ctr-get-params-key" +#define GFDB_IPC_CTR_RET_DB_VERSION "gfdb.ipc-ctr-ret-db-version" + +/* + * gfdb ipc ctr params for query + * + * + */ +typedef struct gfdb_ipc_ctr_params { + gf_boolean_t is_promote; + int write_freq_threshold; + int read_freq_threshold; + gfdb_time_t time_stamp; + int query_limit; + gf_boolean_t emergency_demote; +} gfdb_ipc_ctr_params_t; + +#endif diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index a4285c5..227dc08 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -384,6 +384,7 @@ typedef struct gf_tier_conf { int percent_full; uint64_t max_migrate_bytes; int max_migrate_files; + int query_limit; tier_mode_t mode; int tier_max_promote_size; int tier_promote_frequency; diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c index 0fea1d5..48ec9ff 100644 --- a/xlators/cluster/dht/src/dht-shared.c +++ b/xlators/cluster/dht/src/dht-shared.c @@ -1050,6 +1050,10 @@ struct volume_options options[] = { .type = GF_OPTION_TYPE_INT, .default_value = "10000", }, + { .key = {"tier-query-limit"}, + .type = GF_OPTION_TYPE_INT, + .default_value = "100", + }, /* switch option */ { .key = {"pattern.switch.case"}, .type = GF_OPTION_TYPE_ANY diff --git a/xlators/cluster/dht/src/tier.c b/xlators/cluster/dht/src/tier.c index 83903e1..abd8925 100644 --- a/xlators/cluster/dht/src/tier.c +++ b/xlators/cluster/dht/src/tier.c @@ -15,6 +15,7 @@ #include "tier-common.h" #include "syscall.h" #include "events.h" +#include "tier-ctr-interface.h" /*Hard coded DB info*/ static gfdb_db_type_t dht_tier_db_type = GFDB_SQLITE3; @@ -193,6 +194,7 @@ out: /* Check and update the watermark every WM_INTERVAL seconds */ #define WM_INTERVAL 5 +#define WM_INTERVAL_EMERG 1 static int tier_check_same_node (xlator_t *this, loc_t *loc, gf_defrag_info_t *defrag) @@ -524,7 +526,7 @@ tier_can_promote_file (xlator_t *this, char const *file_name, defrag->tier_conf.blocks_used; /* test if the estimated block usage goes above HI watermark */ - if (GF_PERCENTAGE (estimated_usage, defrag->tier_conf.blocks_total) > + if (GF_PERCENTAGE (estimated_usage, defrag->tier_conf.blocks_total) >= defrag->tier_conf.watermark_hi) { gf_msg (this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, @@ -576,6 +578,7 @@ tier_migrate_using_query_file (void *_args) gfdb_time_t current_time = { 0 }; int total_time = 0; int max_time = 0; + gf_boolean_t emergency_demote_mode = _gf_false; GF_VALIDATE_OR_GOTO ("tier", query_cbk_args, out); @@ -592,6 +595,9 @@ tier_migrate_using_query_file (void *_args) if (!migrate_data) goto out; + emergency_demote_mode = (!query_cbk_args->is_promotion && + is_hot_tier_full(&defrag->tier_conf)); + xdata_request = dict_new (); if (!xdata_request) { gf_msg (this->name, GF_LOG_ERROR, 0, @@ -1014,6 +1020,18 @@ per_file_out: gfdb_methods.gfdb_query_record_free (query_record); query_record = NULL; + + /* If we are demoting and the entry watermark was HI, then + * we are done with emergency demotions if the current + * watermark has fallen below hi-watermark level + */ + if (emergency_demote_mode) { + if (tier_check_watermark (this) == 0) { + if (!is_hot_tier_full (&defrag->tier_conf)) { + break; + } + } + } } out: @@ -1127,14 +1145,23 @@ tier_process_self_query (tier_brick_list_t *local_brick, void *args) goto out; } if (!gfdb_brick_info->_gfdb_promote) { - if (query_cbk_args->defrag->write_freq_threshold == 0 && - query_cbk_args->defrag->read_freq_threshold == 0) { - ret = gfdb_methods.find_unchanged_for_time ( - conn_node, - tier_gf_query_callback, - (void *)query_cbk_args, - gfdb_brick_info->time_stamp); + if (query_cbk_args->defrag->tier_conf.watermark_last == + TIER_WM_HI) { + /* emergency demotion mode */ + ret = gfdb_methods.find_all (conn_node, + tier_gf_query_callback, + (void *)query_cbk_args, + query_cbk_args->defrag->tier_conf. + query_limit); } else { + if (query_cbk_args->defrag->write_freq_threshold == 0 && + query_cbk_args->defrag->read_freq_threshold == 0) { + ret = gfdb_methods.find_unchanged_for_time ( + conn_node, + tier_gf_query_callback, + (void *)query_cbk_args, + gfdb_brick_info->time_stamp); + } else { ret = gfdb_methods.find_unchanged_for_time_freq ( conn_node, tier_gf_query_callback, @@ -1145,6 +1172,7 @@ tier_process_self_query (tier_brick_list_t *local_brick, void *args) query_cbk_args->defrag-> read_freq_threshold, _gf_false); + } } } else { if (query_cbk_args->defrag->write_freq_threshold == 0 && @@ -1160,8 +1188,7 @@ tier_process_self_query (tier_brick_list_t *local_brick, void *args) tier_gf_query_callback, (void *)query_cbk_args, gfdb_brick_info->time_stamp, - query_cbk_args->defrag-> - write_freq_threshold, + query_cbk_args->defrag->write_freq_threshold, query_cbk_args->defrag->read_freq_threshold, _gf_false); } @@ -1268,10 +1295,21 @@ tier_process_ctr_query (tier_brick_list_t *local_brick, void *args) /* set all the query params*/ ipc_ctr_params->is_promote = gfdb_brick_info->_gfdb_promote; - ipc_ctr_params->write_freq_threshold = query_cbk_args-> - defrag->write_freq_threshold; - ipc_ctr_params->read_freq_threshold = query_cbk_args-> - defrag->read_freq_threshold; + + ipc_ctr_params->write_freq_threshold = + query_cbk_args->defrag->write_freq_threshold; + + ipc_ctr_params->read_freq_threshold = + query_cbk_args->defrag->read_freq_threshold; + + ipc_ctr_params->query_limit = + query_cbk_args->defrag->tier_conf.query_limit; + + ipc_ctr_params->emergency_demote = + (!gfdb_brick_info->_gfdb_promote && + query_cbk_args->defrag->tier_conf.watermark_last == + TIER_WM_HI); + memcpy (&ipc_ctr_params->time_stamp, gfdb_brick_info->time_stamp, sizeof (gfdb_time_t)); @@ -1860,6 +1898,15 @@ out: return; } +static int +tier_get_wm_interval(tier_mode_t mode, tier_watermark_op_t wm) +{ + if (mode == TIER_MODE_WM && wm == TIER_WM_HI) + return WM_INTERVAL_EMERG; + + return WM_INTERVAL; +} + /* * Main tiering loop. This is called from the promotion and the * demotion threads spawned in tier_start(). @@ -1968,7 +2015,10 @@ static void check_watermark++; - if (check_watermark >= WM_INTERVAL) { + /* emergency demotion requires frequent watermark monitoring */ + if (check_watermark >= + tier_get_wm_interval(tier_conf->mode, + tier_conf->watermark_last)) { check_watermark = 0; if (tier_conf->mode == TIER_MODE_WM) { ret = tier_get_fs_stat (this, &root_loc); @@ -2396,6 +2446,15 @@ tier_init (xlator_t *this) defrag->tier_conf.max_migrate_files = freq; + + ret = dict_get_int32 (this->options, + "tier-query-limit", + &(defrag->tier_conf.query_limit)); + if (ret) { + defrag->tier_conf.query_limit = + DEFAULT_TIER_QUERY_LIMIT; + } + ret = dict_get_str (this->options, "tier-mode", &mode); if (ret) { @@ -2564,6 +2623,10 @@ tier_reconfigure (xlator_t *this, dict_t *options) defrag->tier_conf.max_migrate_files, options, int32, out); + GF_OPTION_RECONF ("tier-query-limit", + defrag->tier_conf.query_limit, + options, int32, out); + GF_OPTION_RECONF ("tier-pause", req_pause, options, bool, out); diff --git a/xlators/cluster/dht/src/tier.h b/xlators/cluster/dht/src/tier.h index 0807608..5745609 100644 --- a/xlators/cluster/dht/src/tier.h +++ b/xlators/cluster/dht/src/tier.h @@ -93,7 +93,7 @@ typedef enum tier_watermark_op_ { #define DEFAULT_PROMOTE_FREQ_SEC 120 #define DEFAULT_DEMOTE_FREQ_SEC 120 -#define DEFAULT_DEMOTE_DEGRADED 10 +#define DEFAULT_DEMOTE_DEGRADED 1 #define DEFAULT_WRITE_FREQ_SEC 0 #define DEFAULT_READ_FREQ_SEC 0 #define DEFAULT_WM_LOW 75 @@ -101,5 +101,6 @@ typedef enum tier_watermark_op_ { #define DEFAULT_TIER_MODE TIER_MODE_TEST #define DEFAULT_TIER_MAX_MIGRATE_MB 1000 #define DEFAULT_TIER_MAX_MIGRATE_FILES 5000 +#define DEFAULT_TIER_QUERY_LIMIT 100 #endif diff --git a/xlators/features/changetimerecorder/src/changetimerecorder.c b/xlators/features/changetimerecorder/src/changetimerecorder.c index 3d2e78a..dba4265 100644 --- a/xlators/features/changetimerecorder/src/changetimerecorder.c +++ b/xlators/features/changetimerecorder/src/changetimerecorder.c @@ -15,6 +15,9 @@ #include "ctr-messages.h" #include "syscall.h" +#include "changetimerecorder.h" +#include "tier-ctr-interface.h" + /*******************************inode forget***********************************/ int @@ -1724,14 +1727,21 @@ ctr_db_query (xlator_t *this, goto out; } if (!ipc_ctr_params->is_promote) { - if (ipc_ctr_params->write_freq_threshold == 0 && - ipc_ctr_params->read_freq_threshold == 0) { + if (ipc_ctr_params->emergency_demote) { + /* emergency demotion mode */ + ret = find_all (conn_node, + ctr_db_query_callback, + (void *)&query_cbk_args, + ipc_ctr_params->query_limit); + } else { + if (ipc_ctr_params->write_freq_threshold == 0 && + ipc_ctr_params->read_freq_threshold == 0) { ret = find_unchanged_for_time ( conn_node, ctr_db_query_callback, (void *)&query_cbk_args, &ipc_ctr_params->time_stamp); - } else { + } else { ret = find_unchanged_for_time_freq ( conn_node, ctr_db_query_callback, @@ -1740,6 +1750,7 @@ ctr_db_query (xlator_t *this, ipc_ctr_params->write_freq_threshold, ipc_ctr_params->read_freq_threshold, _gf_false); + } } } else { if (ipc_ctr_params->write_freq_threshold == 0 && diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c index a293021..93ed1c8 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c @@ -454,7 +454,8 @@ validate_tier (glusterd_volinfo_t *volinfo, dict_t *dict, char *key, strstr (key, "tier-max-mb") || strstr (key, "tier-max-promote-file-size") || strstr (key, "tier-max-files") || - strstr (key, "tier-demote-frequency")) { + strstr (key, "tier-demote-frequency") || + strstr (key, "tier-query-limit")) { if (origin_val < 1) { snprintf (errstr, sizeof (errstr), "%s is not a " " compatible value. %s expects a positive " @@ -2736,6 +2737,18 @@ struct volopt_map_entry glusterd_volopt_map[] = { .description = "The maximum number of files that may be migrated" " in any direction in a given cycle by a single node." }, + { .key = "cluster.tier-query-limit", + .voltype = "cluster/tier", + .option = "tier-query-limit", + .value = "100", + .op_version = GD_OP_VERSION_3_9_1, + .flags = OPT_FLAG_CLIENT_OPT, + .validate_fn = validate_tier, + .type = NO_DOC, + .description = "The maximum number of files that may be migrated " + "during an emergency demote. An emergency condition " + "is flagged when writes breach the hi-watermark." + }, { .key = "features.ctr-enabled", .voltype = "features/changetimerecorder", .value = "off", -- 2.9.3