From d4c02a65c2cb3cba552fbba2df278ca9d0692ba1 Mon Sep 17 00:00:00 2001 From: Samikshan Bairagya Date: Fri, 2 Jun 2017 10:12:12 +0530 Subject: [PATCH 552/557] glusterd: Introduce option to limit no. of muxed bricks per process This commit introduces a new global option that can be set to limit the number of multiplexed bricks in one process. Usage: `# gluster volume set all cluster.max-bricks-per-process ` If this option is not set then multiplexing will happen for now with no limitations set; i.e. a brick process will have as many bricks multiplexed to it as possible. In other words the current multiplexing behaviour won't change if this option isn't set to any value. This commit also introduces a brick process instance that contains information about brick processes, like the number of bricks handled by the process (which is 1 in non-multiplexing cases), list of bricks, and port number which also serves as an unique identifier for each brick process instance. The brick process list is maintained in 'glusterd_conf_t'. > Updates: #151 > Signed-off-by: Samikshan Bairagya > Reviewed-on: https://review.gluster.org/17469 > Smoke: Gluster Build System > CentOS-regression: Gluster Build System > Reviewed-by: Atin Mukherjee Change-Id: I74f10a43e601ed41bc3aca145c6107abc7404149 BUG: 1468950 Signed-off-by: Samikshan Bairagya Reviewed-on: https://code.engineering.redhat.com/gerrit/111799 Reviewed-by: Atin Mukherjee --- libglusterfs/src/globals.h | 4 +- tests/basic/multiplex.t | 2 + tests/bugs/core/multiplex-limit-issue-151.t | 57 ++++ xlators/mgmt/glusterd/src/glusterd-mem-types.h | 3 +- xlators/mgmt/glusterd/src/glusterd-messages.h | 34 +- xlators/mgmt/glusterd/src/glusterd-op-sm.c | 4 + xlators/mgmt/glusterd/src/glusterd-pmap.c | 19 +- xlators/mgmt/glusterd/src/glusterd-syncop.c | 19 +- xlators/mgmt/glusterd/src/glusterd-utils.c | 394 ++++++++++++++++++++---- xlators/mgmt/glusterd/src/glusterd-utils.h | 13 + xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 4 +- xlators/mgmt/glusterd/src/glusterd-volume-set.c | 39 +++ xlators/mgmt/glusterd/src/glusterd.c | 1 + xlators/mgmt/glusterd/src/glusterd.h | 12 + 14 files changed, 546 insertions(+), 59 deletions(-) create mode 100644 tests/bugs/core/multiplex-limit-issue-151.t diff --git a/libglusterfs/src/globals.h b/libglusterfs/src/globals.h index caa7f64..01c52d3 100644 --- a/libglusterfs/src/globals.h +++ b/libglusterfs/src/globals.h @@ -43,7 +43,7 @@ */ #define GD_OP_VERSION_MIN 1 /* MIN is the fresh start op-version, mostly should not change */ -#define GD_OP_VERSION_MAX GD_OP_VERSION_3_11_1 /* MAX VERSION is the maximum +#define GD_OP_VERSION_MAX GD_OP_VERSION_3_12_0 /* MAX VERSION is the maximum count in VME table, should keep changing with introduction of newer @@ -93,6 +93,8 @@ #define GD_OP_VERSION_3_11_1 31101 /* Op-version for GlusterFS 3.11.1 */ +#define GD_OP_VERSION_3_12_0 31200 /* Op-version for GlusterFS 3.12.0 */ + #include "xlator.h" /* THIS */ diff --git a/tests/basic/multiplex.t b/tests/basic/multiplex.t index 0a67240..4e18edd 100644 --- a/tests/basic/multiplex.t +++ b/tests/basic/multiplex.t @@ -13,6 +13,8 @@ function count_brick_pids { | grep -v "N/A" | sort | uniq | wc -l } +cleanup + TEST glusterd TEST $CLI volume set all cluster.brick-multiplex on push_trapfunc "$CLI volume set all cluster.brick-multiplex off" diff --git a/tests/bugs/core/multiplex-limit-issue-151.t b/tests/bugs/core/multiplex-limit-issue-151.t new file mode 100644 index 0000000..9511756 --- /dev/null +++ b/tests/bugs/core/multiplex-limit-issue-151.t @@ -0,0 +1,57 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../traps.rc +. $(dirname $0)/../../volume.rc + +function count_up_bricks { + $CLI --xml volume status all | grep '1' | wc -l +} + +function count_brick_processes { + pgrep glusterfsd | wc -l +} + +function count_brick_pids { + $CLI --xml volume status all | sed -n '/.*\([^<]*\).*/s//\1/p' \ + | grep -v "N/A" | sort | uniq | wc -l +} + +cleanup; + +TEST glusterd + +TEST $CLI volume set all cluster.brick-multiplex on +TEST ! $CLI volume set all cluster.max-bricks-per-process -1 +TEST ! $CLI volume set all cluster.max-bricks-per-process foobar +TEST $CLI volume set all cluster.max-bricks-per-process 3 + +push_trapfunc "$CLI volume set all cluster.brick-multiplex off" +push_trapfunc "cleanup" + +TEST $CLI volume create $V0 $H0:$B0/brick{0..5} +TEST $CLI volume start $V0 + +EXPECT 2 count_brick_processes +EXPECT 2 count_brick_pids +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 6 count_up_bricks + +pkill gluster +TEST glusterd + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_brick_processes +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_brick_pids +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 6 count_up_bricks + +TEST $CLI volume add-brick $V0 $H0:$B0/brick6 + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_brick_processes +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_brick_pids +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 7 count_up_bricks + +TEST $CLI volume remove-brick $V0 $H0:$B0/brick3 start +TEST $CLI volume remove-brick $V0 $H0:$B0/brick3 commit + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_brick_processes +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_brick_pids +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 6 count_up_bricks diff --git a/xlators/mgmt/glusterd/src/glusterd-mem-types.h b/xlators/mgmt/glusterd/src/glusterd-mem-types.h index ed171b6..33aac2f 100644 --- a/xlators/mgmt/glusterd/src/glusterd-mem-types.h +++ b/xlators/mgmt/glusterd/src/glusterd-mem-types.h @@ -71,7 +71,8 @@ typedef enum gf_gld_mem_types_ { gf_gld_mt_missed_snapinfo_t = gf_common_mt_end + 55, gf_gld_mt_snap_create_args_t = gf_common_mt_end + 56, gf_gld_mt_local_peers_t = gf_common_mt_end + 57, - gf_gld_mt_end = gf_common_mt_end + 58, + gf_gld_mt_glusterd_brick_proc_t = gf_common_mt_end + 58, + gf_gld_mt_end = gf_common_mt_end + 59, } gf_gld_mem_types_t; #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-messages.h b/xlators/mgmt/glusterd/src/glusterd-messages.h index 58208ec..36e6ed9 100644 --- a/xlators/mgmt/glusterd/src/glusterd-messages.h +++ b/xlators/mgmt/glusterd/src/glusterd-messages.h @@ -41,7 +41,7 @@ #define GLUSTERD_COMP_BASE GLFS_MSGID_GLUSTERD -#define GLFS_NUM_MESSAGES 602 +#define GLFS_NUM_MESSAGES 606 #define GLFS_MSGID_END (GLUSTERD_COMP_BASE + GLFS_NUM_MESSAGES + 1) /* Messaged with message IDs */ @@ -4817,6 +4817,38 @@ */ #define GD_MSG_VOL_SET_VALIDATION_INFO (GLUSTERD_COMP_BASE + 602) +/*! + * @messageid + * @diagnosis + * @recommendedaction + * + */ +#define GD_MSG_NO_MUX_LIMIT (GLUSTERD_COMP_BASE + 603) + +/*! + * @messageid + * @diagnosis + * @recommendedaction + * + */ +#define GD_MSG_BRICKPROC_REM_BRICK_FAILED (GLUSTERD_COMP_BASE + 604) + +/*! + * @messageid + * @diagnosis + * @recommendedaction + * + */ +#define GD_MSG_BRICKPROC_ADD_BRICK_FAILED (GLUSTERD_COMP_BASE + 605) + +/*! + * @messageid + * @diagnosis + * @recommendedaction + * + */ +#define GD_MSG_BRICKPROC_NEW_FAILED (GLUSTERD_COMP_BASE + 606) + /*------------*/ #define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages" diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c index 08c39ec..4c8fef7 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c @@ -78,6 +78,10 @@ glusterd_all_vol_opts valid_all_vol_opts[] = { * TBD: add a dynamic handler to set the appropriate value */ { GLUSTERD_BRICK_MULTIPLEX_KEY, "disable"}, + /* Set this value to 1 by default implying non-multiplexed behaviour. + * TBD: Discuss the default value for this. Maybe this should be a + * dynamic value depending on the memory specifications per node */ + { GLUSTERD_BRICKMUX_LIMIT_KEY, "1"}, { NULL }, }; diff --git a/xlators/mgmt/glusterd/src/glusterd-pmap.c b/xlators/mgmt/glusterd/src/glusterd-pmap.c index f43324c..6fead59 100644 --- a/xlators/mgmt/glusterd/src/glusterd-pmap.c +++ b/xlators/mgmt/glusterd/src/glusterd-pmap.c @@ -518,8 +518,8 @@ __gluster_pmap_signin (rpcsvc_request_t *req) { pmap_signin_req args = {0,}; pmap_signin_rsp rsp = {0,}; - glusterd_brickinfo_t *brickinfo = NULL; int ret = -1; + glusterd_brickinfo_t *brickinfo = NULL; ret = xdr_to_generic (req->msg[0], &args, (xdrproc_t)xdr_pmap_signin_req); @@ -532,6 +532,7 @@ __gluster_pmap_signin (rpcsvc_request_t *req) GF_PMAP_PORT_BRICKSERVER, req->trans); ret = glusterd_get_brickinfo (THIS, args.brick, args.port, &brickinfo); + fail: glusterd_submit_reply (req, &rsp, NULL, 0, NULL, (xdrproc_t)xdr_pmap_signin_rsp); @@ -599,6 +600,22 @@ __gluster_pmap_signout (rpcsvc_request_t *req) * glusterd end when a brick is killed from the * backend */ brickinfo->status = GF_BRICK_STOPPED; + + /* Remove brick from brick process if not already + * removed in the brick op phase. This situation would + * arise when the brick is killed explicitly from the + * backend */ + ret = glusterd_brick_process_remove_brick (brickinfo); + if (ret) { + gf_msg_debug (this->name, 0, "Couldn't remove " + "brick %s:%s from brick process", + brickinfo->hostname, + brickinfo->path); + /* Ignore 'ret' here since the brick might + * have already been deleted in brick op phase + */ + ret = 0; + } } } diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c index 6ecf122..38868ac 100644 --- a/xlators/mgmt/glusterd/src/glusterd-syncop.c +++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c @@ -950,6 +950,7 @@ gd_syncop_mgmt_brick_op (struct rpc_clnt *rpc, glusterd_pending_node_t *pnode, gd1_mgmt_brick_op_req *req = NULL; int ret = 0; xlator_t *this = NULL; + glusterd_brickinfo_t *brickinfo = NULL; this = THIS; args.op_ret = -1; @@ -979,6 +980,23 @@ gd_syncop_mgmt_brick_op (struct rpc_clnt *rpc, glusterd_pending_node_t *pnode, GF_FREE (args.errstr); } + if (op == GD_OP_STOP_VOLUME || op == GD_OP_REMOVE_BRICK) { + if (args.op_ret == 0) { + brickinfo = pnode->node; + ret = glusterd_brick_process_remove_brick (brickinfo); + if (ret) { + gf_msg ("glusterd", GF_LOG_ERROR, 0, + GD_MSG_BRICKPROC_REM_BRICK_FAILED, + "Removing brick %s:%s from brick" + " process failed", + brickinfo->hostname, + brickinfo->path); + args.op_ret = ret; + goto out; + } + } + } + if (GD_OP_STATUS_VOLUME == op) { ret = dict_set_int32 (args.dict, "index", pnode->index); if (ret) { @@ -1016,7 +1034,6 @@ out: dict_unref (args.dict); gd_brick_op_req_free (req); return args.op_ret; - } int32_t diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 82034ad..0f5c148 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -116,6 +116,46 @@ is_brick_mx_enabled (void) return ret ? _gf_false: enabled; } +int +get_mux_limit_per_process (int *mux_limit) +{ + char *value = NULL; + int ret = -1; + int max_bricks_per_proc = -1; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO ("glusterd", this, out); + + priv = this->private; + GF_VALIDATE_OR_GOTO (this->name, priv, out); + + if (!is_brick_mx_enabled()) { + max_bricks_per_proc = 1; + ret = 0; + goto out; + } + + ret = dict_get_str (priv->opts, GLUSTERD_BRICKMUX_LIMIT_KEY, &value); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Can't get limit for number of bricks per brick " + "process from dict"); + ret = 0; + } else { + ret = gf_string2int (value, &max_bricks_per_proc); + if (ret) + goto out; + } +out: + *mux_limit = max_bricks_per_proc; + + gf_msg_debug ("glusterd", 0, "Mux limit set to %d bricks per process", *mux_limit); + + return ret; +} + extern struct volopt_map_entry glusterd_volopt_map[]; extern glusterd_all_vol_opts valid_all_vol_opts[]; @@ -965,6 +1005,33 @@ out: } int32_t +glusterd_brickprocess_new (glusterd_brick_proc_t **brickprocess) +{ + glusterd_brick_proc_t *new_brickprocess = NULL; + int32_t ret = -1; + + GF_VALIDATE_OR_GOTO (THIS->name, brickprocess, out); + + new_brickprocess = GF_CALLOC (1, sizeof(*new_brickprocess), + gf_gld_mt_glusterd_brick_proc_t); + + if (!new_brickprocess) + goto out; + + CDS_INIT_LIST_HEAD (&new_brickprocess->bricks); + CDS_INIT_LIST_HEAD (&new_brickprocess->brick_proc_list); + + new_brickprocess->brick_count = 0; + *brickprocess = new_brickprocess; + + ret = 0; + +out: + gf_msg_debug (THIS->name, 0, "Returning %d", ret); + return ret; +} + +int32_t glusterd_brickinfo_new (glusterd_brickinfo_t **brickinfo) { glusterd_brickinfo_t *new_brickinfo = NULL; @@ -2023,6 +2090,15 @@ retry: goto out; } + ret = glusterd_brick_process_add_brick (brickinfo, volinfo); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_BRICKPROC_ADD_BRICK_FAILED, "Adding brick %s:%s " + "to brick process failed.", brickinfo->hostname, + brickinfo->path); + goto out; + } + connect: ret = glusterd_brick_connect (volinfo, brickinfo, socketpath); if (ret) { @@ -2086,6 +2162,200 @@ glusterd_brick_disconnect (glusterd_brickinfo_t *brickinfo) return 0; } +static gf_boolean_t +unsafe_option (dict_t *this, char *key, data_t *value, void *arg) +{ + /* + * Certain options are safe because they're already being handled other + * ways, such as being copied down to the bricks (all auth options) or + * being made irrelevant (event-threads). All others are suspect and + * must be checked in the next function. + */ + if (fnmatch ("*auth*", key, 0) == 0) { + return _gf_false; + } + + if (fnmatch ("*event-threads", key, 0) == 0) { + return _gf_false; + } + + return _gf_true; +} + +static int +opts_mismatch (dict_t *dict1, char *key, data_t *value1, void *dict2) +{ + data_t *value2 = dict_get (dict2, key); + int32_t min_len; + + /* + * If the option is only present on one, we can either look at the + * default or assume a mismatch. Looking at the default is pretty + * hard, because that's part of a structure within each translator and + * there's no dlopen interface to get at it, so we assume a mismatch. + * If the user really wants them to match (and for their bricks to be + * multiplexed, they can always reset the option). + */ + if (!value2) { + gf_log (THIS->name, GF_LOG_DEBUG, "missing option %s", key); + return -1; + } + + min_len = MIN (value1->len, value2->len); + if (strncmp (value1->data, value2->data, min_len) != 0) { + gf_log (THIS->name, GF_LOG_DEBUG, + "option mismatch, %s, %s != %s", + key, value1->data, value2->data); + return -1; + } + + return 0; +} + +int +glusterd_brickprocess_delete (glusterd_brick_proc_t *brick_proc) +{ + cds_list_del_init (&brick_proc->brick_proc_list); + cds_list_del_init (&brick_proc->bricks); + + GF_FREE (brick_proc); + + return 0; +} + +int +glusterd_brick_process_remove_brick (glusterd_brickinfo_t *brickinfo) +{ + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + glusterd_brick_proc_t *brick_proc = NULL; + glusterd_brickinfo_t *brickinfoiter = NULL; + glusterd_brick_proc_t *brick_proc_tmp = NULL; + glusterd_brickinfo_t *tmp = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO ("glusterd", this, out); + + priv = this->private; + GF_VALIDATE_OR_GOTO (this->name, priv, out); + GF_VALIDATE_OR_GOTO (this->name, brickinfo, out); + + cds_list_for_each_entry_safe (brick_proc, brick_proc_tmp, + &priv->brick_procs, brick_proc_list) { + if (brickinfo->port != brick_proc->port) { + continue; + } + + GF_VALIDATE_OR_GOTO (this->name, (brick_proc->brick_count > 0), out); + + cds_list_for_each_entry_safe (brickinfoiter, tmp, + &brick_proc->bricks, brick_list) { + if (strcmp (brickinfoiter->path, brickinfo->path) == 0) { + cds_list_del_init (&brickinfoiter->brick_list); + + GF_FREE (brickinfoiter->logfile); + GF_FREE (brickinfoiter); + brick_proc->brick_count--; + break; + } + } + + /* If all bricks have been removed, delete the brick process */ + if (brick_proc->brick_count == 0) { + ret = glusterd_brickprocess_delete (brick_proc); + if (ret) + goto out; + } + break; + } + + ret = 0; +out: + return ret; +} + +int +glusterd_brick_process_add_brick (glusterd_brickinfo_t *brickinfo, + glusterd_volinfo_t *volinfo) +{ + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + glusterd_brick_proc_t *brick_proc = NULL; + glusterd_brickinfo_t *brickinfo_dup = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO ("glusterd", this, out); + + priv = this->private; + GF_VALIDATE_OR_GOTO (this->name, priv, out); + GF_VALIDATE_OR_GOTO (this->name, brickinfo, out); + + ret = glusterd_brickinfo_new (&brickinfo_dup); + if (ret) { + gf_msg ("glusterd", GF_LOG_ERROR, 0, + GD_MSG_BRICK_NEW_INFO_FAIL, + "Failed to create new brickinfo"); + goto out; + } + + ret = glusterd_brickinfo_dup (brickinfo, brickinfo_dup); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_BRICK_SET_INFO_FAIL, "Failed to dup brickinfo"); + goto out; + } + + ret = glusterd_brick_proc_for_port (brickinfo->port, &brick_proc); + if (ret) { + ret = glusterd_brickprocess_new (&brick_proc); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_BRICKPROC_NEW_FAILED, "Failed to create " + "new brick process instance"); + goto out; + } + + brick_proc->port = brickinfo->port; + + cds_list_add_tail (&brick_proc->brick_proc_list, &priv->brick_procs); + } + + cds_list_add_tail (&brickinfo_dup->brick_list, &brick_proc->bricks); + brick_proc->brick_count++; +out: + return ret; +} + +/* ret = 0 only when you get a brick process associated with the port + * ret = -1 otherwise + */ +int +glusterd_brick_proc_for_port (int port, glusterd_brick_proc_t **brickprocess) +{ + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + glusterd_brick_proc_t *brick_proc = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO ("glusterd", this, out); + + priv = this->private; + GF_VALIDATE_OR_GOTO (this->name, priv, out); + + cds_list_for_each_entry (brick_proc, &priv->brick_procs, brick_proc_list) { + if (brick_proc->port == port) { + *brickprocess = brick_proc; + ret = 0; + break; + } + } +out: + return ret; +} + int32_t glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo, glusterd_brickinfo_t *brickinfo, @@ -2108,6 +2378,13 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo, ret = 0; + ret = glusterd_brick_process_remove_brick (brickinfo); + if (ret) { + gf_msg_debug (this->name, 0, "Couldn't remove brick from" + " brick process"); + goto out; + } + if (del_brick) cds_list_del_init (&brickinfo->brick_list); @@ -2139,11 +2416,13 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo, GF_FREE (op_errstr); } } + (void) glusterd_brick_disconnect (brickinfo); ret = 0; } GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, conf); + gf_msg_debug (this->name, 0, "Unlinking pidfile %s", pidfile); (void) sys_unlink (pidfile); @@ -2151,7 +2430,6 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo, if (del_brick) glusterd_delete_brick (volinfo, brickinfo); - out: return ret; } @@ -5079,6 +5357,7 @@ attach_brick (xlator_t *this, } (void) build_volfile_path (full_id, path, sizeof(path), NULL); + for (tries = 15; tries > 0; --tries) { rpc = rpc_clnt_ref (other_brick->rpc); if (rpc) { @@ -5094,6 +5373,23 @@ attach_brick (xlator_t *this, brickinfo->status = GF_BRICK_STARTED; brickinfo->rpc = rpc_clnt_ref (other_brick->rpc); + ret = glusterd_brick_process_add_brick (brickinfo, + volinfo); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_BRICKPROC_ADD_BRICK_FAILED, + "Adding brick %s:%s to brick " + "process failed", brickinfo->hostname, + brickinfo->path); + return ret; + } + + if (ret) { + gf_msg_debug (this->name, 0, "Add brick" + " to brick process failed"); + return ret; + } + return 0; } } @@ -5115,56 +5411,6 @@ attach_brick (xlator_t *this, return ret; } -static gf_boolean_t -unsafe_option (dict_t *this, char *key, data_t *value, void *arg) -{ - /* - * Certain options are safe because they're already being handled other - * ways, such as being copied down to the bricks (all auth options) or - * being made irrelevant (event-threads). All others are suspect and - * must be checked in the next function. - */ - if (fnmatch ("*auth*", key, 0) == 0) { - return _gf_false; - } - - if (fnmatch ("*event-threads", key, 0) == 0) { - return _gf_false; - } - - return _gf_true; -} - -static int -opts_mismatch (dict_t *dict1, char *key, data_t *value1, void *dict2) -{ - data_t *value2 = dict_get (dict2, key); - int32_t min_len; - - /* - * If the option is only present on one, we can either look at the - * default or assume a mismatch. Looking at the default is pretty - * hard, because that's part of a structure within each translator and - * there's no dlopen interface to get at it, so we assume a mismatch. - * If the user really wants them to match (and for their bricks to be - * multiplexed, they can always reset the option). - */ - if (!value2) { - gf_log (THIS->name, GF_LOG_DEBUG, "missing option %s", key); - return -1; - } - - min_len = MIN (value1->len, value2->len); - if (strncmp (value1->data, value2->data, min_len) != 0) { - gf_log (THIS->name, GF_LOG_DEBUG, - "option mismatch, %s, %s != %s", - key, value1->data, value2->data); - return -1; - } - - return 0; -} - /* This name was just getting too long, hence the abbreviations. */ static glusterd_brickinfo_t * find_compat_brick_in_vol (glusterd_conf_t *conf, @@ -5173,10 +5419,13 @@ find_compat_brick_in_vol (glusterd_conf_t *conf, glusterd_brickinfo_t *brickinfo) { xlator_t *this = THIS; - glusterd_brickinfo_t *other_brick; + glusterd_brickinfo_t *other_brick = NULL; + glusterd_brick_proc_t *brick_proc = NULL; char pidfile2[PATH_MAX] = {0}; int32_t pid2 = -1; int16_t retries = 15; + int mux_limit = -1; + int ret = -1; /* * If comp_vol is provided, we have to check *volume* compatibility @@ -5208,6 +5457,13 @@ find_compat_brick_in_vol (glusterd_conf_t *conf, gf_log (THIS->name, GF_LOG_DEBUG, "all options match"); } + ret = get_mux_limit_per_process (&mux_limit); + if (ret) { + gf_msg_debug (THIS->name, 0, "Retrieving brick mux " + "limit failed. Returning NULL"); + return NULL; + } + cds_list_for_each_entry (other_brick, &srch_vol->bricks, brick_list) { if (other_brick == brickinfo) { @@ -5221,6 +5477,30 @@ find_compat_brick_in_vol (glusterd_conf_t *conf, continue; } + ret = glusterd_brick_proc_for_port (other_brick->port, + &brick_proc); + if (ret) { + gf_msg_debug (THIS->name, 0, "Couldn't get brick " + "process corresponding to brick %s:%s", + other_brick->hostname, other_brick->path); + continue; + } + + if (mux_limit != -1) { + if (brick_proc->brick_count >= mux_limit) + continue; + } else { + /* This means that the "cluster.max-bricks-per-process" + * options hasn't yet been explicitly set. Continue + * as if there's no limit set + */ + gf_msg (THIS->name, GF_LOG_WARNING, 0, + GD_MSG_NO_MUX_LIMIT, + "cluster.max-bricks-per-process options isn't " + "set. Continuing with no limit set for " + "brick multiplexing."); + } + GLUSTERD_GET_BRICK_PIDFILE (pidfile2, srch_vol, other_brick, conf); @@ -5497,6 +5777,16 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo, (void) glusterd_brick_connect (volinfo, brickinfo, socketpath); + + ret = glusterd_brick_process_add_brick (brickinfo, volinfo); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_BRICKPROC_ADD_BRICK_FAILED, + "Adding brick %s:%s to brick process " + "failed.", brickinfo->hostname, + brickinfo->path); + goto out; + } } return 0; } diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h index e717c40..adc3cb1 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-utils.h @@ -145,6 +145,9 @@ gf_boolean_t glusterd_check_volume_exists (char *volname); int32_t +glusterd_brickprocess_new (glusterd_brick_proc_t **brickprocess); + +int32_t glusterd_brickinfo_new (glusterd_brickinfo_t **brickinfo); int32_t @@ -173,6 +176,16 @@ glusterd_get_next_available_brickid (glusterd_volinfo_t *volinfo); int32_t glusterd_resolve_brick (glusterd_brickinfo_t *brickinfo); +int +glusterd_brick_process_add_brick (glusterd_brickinfo_t *brickinfo, + glusterd_volinfo_t *volinfo); + +int +glusterd_brick_process_remove_brick (glusterd_brickinfo_t *brickinfo); + +int +glusterd_brick_proc_for_port (int port, glusterd_brick_proc_t **brickprocess); + int32_t glusterd_volume_start_glusterfs (glusterd_volinfo_t *volinfo, glusterd_brickinfo_t *brickinfo, diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c index cd9c9d9..0c985db 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c @@ -2222,8 +2222,6 @@ glusterd_op_create_volume (dict_t *dict, char **op_errstr) goto out; } - count = volinfo->brick_count; - ret = dict_get_str (dict, "bricks", &bricks); if (ret) { gf_msg (this->name, GF_LOG_ERROR, 0, @@ -2372,6 +2370,8 @@ glusterd_op_create_volume (dict_t *dict, char **op_errstr) free_ptr = brick_list; } + count = volinfo->brick_count; + if (count) brick = strtok_r (brick_list+1, " \n", &saveptr); caps = CAPS_BD | CAPS_THIN | CAPS_OFFLOAD_COPY | CAPS_OFFLOAD_SNAPSHOT; diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c index de4cfb7..03a220b 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c @@ -965,6 +965,39 @@ out: } static int +validate_mux_limit (glusterd_volinfo_t *volinfo, dict_t *dict, char *key, + char *value, char **op_errstr) +{ + xlator_t *this = NULL; + uint val = 0; + int ret = -1; + + this = THIS; + GF_VALIDATE_OR_GOTO ("glusterd", this, out); + + if (!is_brick_mx_enabled()) { + gf_asprintf (op_errstr, "Brick-multiplexing is not enabled. " + "Please enable brick multiplexing before trying " + "to set this option."); + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_WRONG_OPTS_SETTING, "%s", *op_errstr); + goto out; + } + + ret = gf_string2uint (value, &val); + if (ret) { + gf_asprintf (op_errstr, "%s is not a valid count. " + "%s expects an unsigned integer.", value, key); + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_INVALID_ENTRY, "%s", *op_errstr); + } +out: + gf_msg_debug ("glusterd", 0, "Returning %d", ret); + + return ret; +} + +static int validate_boolean (glusterd_volinfo_t *volinfo, dict_t *dict, char *key, char *value, char **op_errstr) { @@ -3235,6 +3268,12 @@ struct volopt_map_entry glusterd_volopt_map[] = { .op_version = GD_OP_VERSION_3_10_0, .validate_fn = validate_boolean }, + { .key = GLUSTERD_BRICKMUX_LIMIT_KEY, + .voltype = "mgmt/glusterd", + .value = "1", + .op_version = GD_OP_VERSION_3_12_0, + .validate_fn = validate_mux_limit + }, { .key = NULL } }; diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c index e49b186..e8292cd 100644 --- a/xlators/mgmt/glusterd/src/glusterd.c +++ b/xlators/mgmt/glusterd/src/glusterd.c @@ -1752,6 +1752,7 @@ init (xlator_t *this) CDS_INIT_LIST_HEAD (&conf->volumes); CDS_INIT_LIST_HEAD (&conf->snapshots); CDS_INIT_LIST_HEAD (&conf->missed_snaps_list); + CDS_INIT_LIST_HEAD (&conf->brick_procs); pthread_mutex_init (&conf->mutex, NULL); conf->rpc = rpc; diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index 7b40196..c493773 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -52,9 +52,11 @@ #define GLUSTER_SHARED_STORAGE "gluster_shared_storage" #define GLUSTERD_SHARED_STORAGE_KEY "cluster.enable-shared-storage" #define GLUSTERD_BRICK_MULTIPLEX_KEY "cluster.brick-multiplex" +#define GLUSTERD_BRICKMUX_LIMIT_KEY "cluster.max-bricks-per-process" #define GANESHA_HA_CONF CONFDIR "/ganesha-ha.conf" #define GANESHA_EXPORT_DIRECTORY CONFDIR"/exports" + #define GLUSTERD_SNAPS_MAX_HARD_LIMIT 256 #define GLUSTERD_SNAPS_DEF_SOFT_LIMIT_PERCENT 90 #define GLUSTERD_SNAPS_MAX_SOFT_LIMIT_PERCENT 100 @@ -148,6 +150,7 @@ typedef struct { struct pmap_registry *pmap; struct cds_list_head volumes; struct cds_list_head snapshots; /*List of snap volumes */ + struct cds_list_head brick_procs; /* List of brick processes */ pthread_mutex_t xprt_lock; struct list_head xprt_list; gf_store_handle_t *handle; @@ -227,6 +230,15 @@ struct glusterd_brickinfo { typedef struct glusterd_brickinfo glusterd_brickinfo_t; +struct glusterd_brick_proc { + int port; + uint32_t brick_count; + struct cds_list_head brick_proc_list; + struct cds_list_head bricks; +}; + +typedef struct glusterd_brick_proc glusterd_brick_proc_t; + struct gf_defrag_brickinfo_ { char *name; int files; -- 1.8.3.1