From b78e56b839cb48bd923e07f286977e1b09fd6609 Mon Sep 17 00:00:00 2001 From: Mohammed Rafi KC Date: Tue, 13 Jun 2017 12:58:31 +0530 Subject: [PATCH 507/509] protocol/server: make listen backlog value as configurable problem: When we call listen from protocol/server, we are giving a hard coded valie of 10 if it is not manually given. With multiplexing, especially when glusterd restarts all clients may try to connect to the server at a time. Which will result in overflowing the queue, and kernel will complain about the errors. Solution: This patch will introduce a volume set command to make backlog value as a configurable. This patch also changes the default values for backlog from 10 to 128. This changes is only applicable for sockets listening from protocol. Example: gluster volume set transport.listen-backlog 1024 Note: 1 Brick has to be restarted to get this value in effect 2 This changes won't be reflected in glusterd, or other xlators which calls listen. If you need, you have to add this option to the volfile. Backport of> >Change-Id: I0c5a2bbf28b5db612f9979e7560e05dd82b41477 >BUG: 1456405 >Signed-off-by: Mohammed Rafi KC >Reviewed-on: https://review.gluster.org/17411 >Smoke: Gluster Build System >NetBSD-regression: NetBSD Build System >CentOS-regression: Gluster Build System >Reviewed-by: Raghavendra G >Reviewed-by: Raghavendra Talur >Reviewed-by: Atin Mukherjee >Reviewed-by: Niels de Vos >Reviewed-by: Jeff Darcy Change-Id: I0c5a2bbf28b5db612f9979e7560e05dd82b41477 BUG: 1453145 Signed-off-by: Mohammed Rafi KC Reviewed-on: https://code.engineering.redhat.com/gerrit/108900 Reviewed-by: Atin Mukherjee --- libglusterfs/src/globals.h | 2 + libglusterfs/src/glusterfs.h | 1 + rpc/rpc-transport/rdma/src/rdma.c | 32 +++++++++++++- rpc/rpc-transport/rdma/src/rdma.h | 2 + rpc/rpc-transport/socket/src/socket.c | 25 ++++++----- tests/basic/multiplex.t | 3 ++ xlators/mgmt/glusterd/src/glusterd-messages.h | 10 ++++- xlators/mgmt/glusterd/src/glusterd-volume-set.c | 56 +++++++++++++++++++++++++ xlators/mgmt/glusterd/src/glusterd.c | 8 ++-- xlators/mgmt/glusterd/src/glusterd.h | 1 - xlators/protocol/server/src/server.c | 4 ++ 11 files changed, 126 insertions(+), 18 deletions(-) diff --git a/libglusterfs/src/globals.h b/libglusterfs/src/globals.h index 08031a7..c7a5c16 100644 --- a/libglusterfs/src/globals.h +++ b/libglusterfs/src/globals.h @@ -91,6 +91,8 @@ #define GD_OP_VERSION_3_11_0 31100 /* Op-version for GlusterFS 3.11.0 */ +#define GD_OP_VERSION_3_11_1 31101 /* Op-version for GlusterFS 3.11.1 */ + #include "xlator.h" /* THIS */ diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h index 3544aab..86519b0 100644 --- a/libglusterfs/src/glusterfs.h +++ b/libglusterfs/src/glusterfs.h @@ -71,6 +71,7 @@ #endif #define GLUSTERD_MAX_SNAP_NAME 255 +#define GLUSTERFS_SOCKET_LISTEN_BACKLOG 10 #define ZR_MOUNTPOINT_OPT "mountpoint" #define ZR_ATTR_TIMEOUT_OPT "attribute-timeout" #define ZR_ENTRY_TIMEOUT_OPT "entry-timeout" diff --git a/rpc/rpc-transport/rdma/src/rdma.c b/rpc/rpc-transport/rdma/src/rdma.c index d2f04bd..4a55cce 100644 --- a/rpc/rpc-transport/rdma/src/rdma.c +++ b/rpc/rpc-transport/rdma/src/rdma.c @@ -4498,6 +4498,12 @@ gf_rdma_options_init (rpc_transport_t *this) options->attr_retry_cnt = GF_RDMA_RETRY_CNT; options->attr_rnr_retry = GF_RDMA_RNR_RETRY; + temp = dict_get (this->options, "transport.listen-backlog"); + if (temp) + options->backlog = data_to_uint32 (temp); + else + options->backlog = GLUSTERFS_SOCKET_LISTEN_BACKLOG; + temp = dict_get (this->options, "transport.rdma.work-request-send-count"); if (temp) @@ -4634,6 +4640,7 @@ gf_rdma_init (rpc_transport_t *this) priv->peer.recv_count = options->recv_count; priv->peer.send_size = options->send_size; priv->peer.recv_size = options->recv_size; + priv->backlog = options->backlog; priv->peer.trans = this; INIT_LIST_HEAD (&priv->peer.ioq); @@ -4847,7 +4854,8 @@ gf_rdma_listen (rpc_transport_t *this) goto err; } - ret = rdma_listen (peer->cm_id, 10); + ret = rdma_listen (peer->cm_id, priv->backlog); + if (ret != 0) { gf_msg (this->name, GF_LOG_WARNING, errno, RDMA_MSG_LISTEN_FAILED, @@ -4918,6 +4926,28 @@ init (rpc_transport_t *this) return 0; } +int +reconfigure (rpc_transport_t *this, dict_t *options) +{ + gf_rdma_private_t *priv = NULL; + uint32_t backlog = 0; + int ret = -1; + + GF_VALIDATE_OR_GOTO ("rdma", this, out); + GF_VALIDATE_OR_GOTO ("rdma", this->private, out); + + priv = this->private; + + if (dict_get_uint32 (options, "transport.listen-backlog", + &backlog) == 0) { + priv->backlog = backlog; + gf_log (this->name, GF_LOG_DEBUG, "Reconfigued " + "transport.listen-backlog=%d", priv->backlog); + } + ret = 0; +out: + return ret; +} void fini (struct rpc_transport *this) { diff --git a/rpc/rpc-transport/rdma/src/rdma.h b/rpc/rpc-transport/rdma/src/rdma.h index 449861f..6bcf6bc 100644 --- a/rpc/rpc-transport/rdma/src/rdma.h +++ b/rpc/rpc-transport/rdma/src/rdma.h @@ -144,6 +144,7 @@ struct __gf_rdma_options { uint8_t attr_timeout; uint8_t attr_retry_cnt; uint8_t attr_rnr_retry; + uint32_t backlog; }; typedef struct __gf_rdma_options gf_rdma_options_t; @@ -383,6 +384,7 @@ struct __gf_rdma_private { pthread_mutex_t recv_mutex; pthread_cond_t recv_cond; gf_rdma_transport_entity_t entity; + uint32_t backlog; }; typedef struct __gf_rdma_private gf_rdma_private_t; diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c index 16cba78..4560d2f 100644 --- a/rpc/rpc-transport/socket/src/socket.c +++ b/rpc/rpc-transport/socket/src/socket.c @@ -3524,10 +3524,7 @@ socket_listen (rpc_transport_t *this) goto unlock; } - if (priv->backlog) - ret = listen (priv->sock, priv->backlog); - else - ret = listen (priv->sock, 10); + ret = listen (priv->sock, priv->backlog); if (ret == -1) { gf_log (this->name, GF_LOG_ERROR, @@ -3843,6 +3840,7 @@ reconfigure (rpc_transport_t *this, dict_t *options) int ret = 0; uint64_t windowsize = 0; uint32_t timeout = 0; + uint32_t backlog = 0; GF_VALIDATE_OR_GOTO ("socket", this, out); GF_VALIDATE_OR_GOTO ("socket", this->private, out); @@ -3878,6 +3876,13 @@ reconfigure (rpc_transport_t *this, dict_t *options) "transport.tcp-user-timeout=%d", timeout); } + if (dict_get_uint32 (options, "transport.listen-backlog", + &backlog) == 0) { + priv->backlog = backlog; + gf_log (this->name, GF_LOG_DEBUG, "Reconfigued " + "transport.listen-backlog=%d", priv->backlog); + } + optstr = NULL; if (dict_get_str (options, "tcp-window-size", &optstr) == 0) { if (gf_string2uint64 (optstr, &windowsize) != 0) { @@ -4144,14 +4149,17 @@ socket_init (rpc_transport_t *this) &timeout) == 0) { priv->timeout = timeout; } + gf_log (this->name, GF_LOG_DEBUG, "Configued " "transport.tcp-user-timeout=%d", priv->timeout); if (dict_get_uint32 (this->options, - "transport.socket.listen-backlog", - &backlog) == 0) { - priv->backlog = backlog; + "transport.listen-backlog", + &backlog) != 0) { + + backlog = GLUSTERFS_SOCKET_LISTEN_BACKLOG; } + priv->backlog = backlog; optstr = NULL; @@ -4561,9 +4569,6 @@ struct volume_options options[] = { { .key = {"transport.socket.keepalive-time"}, .type = GF_OPTION_TYPE_INT }, - { .key = {"transport.socket.listen-backlog"}, - .type = GF_OPTION_TYPE_INT - }, { .key = {"transport.socket.read-fail-log"}, .type = GF_OPTION_TYPE_BOOL }, diff --git a/tests/basic/multiplex.t b/tests/basic/multiplex.t index 819ff40..b69fe57 100644 --- a/tests/basic/multiplex.t +++ b/tests/basic/multiplex.t @@ -27,6 +27,9 @@ EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_up_bricks EXPECT 1 online_brick_count TEST $CLI volume stop $V0 +#Testing the volume set command introduced for protocol/server +TEST $CLI volume set $V0 transport.listen-backlog 1024 + EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT 0 online_brick_count TEST $CLI volume start $V0 EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_up_bricks diff --git a/xlators/mgmt/glusterd/src/glusterd-messages.h b/xlators/mgmt/glusterd/src/glusterd-messages.h index 3d0f48c..58208ec 100644 --- a/xlators/mgmt/glusterd/src/glusterd-messages.h +++ b/xlators/mgmt/glusterd/src/glusterd-messages.h @@ -41,7 +41,7 @@ #define GLUSTERD_COMP_BASE GLFS_MSGID_GLUSTERD -#define GLFS_NUM_MESSAGES 601 +#define GLFS_NUM_MESSAGES 602 #define GLFS_MSGID_END (GLUSTERD_COMP_BASE + GLFS_NUM_MESSAGES + 1) /* Messaged with message IDs */ @@ -4809,6 +4809,14 @@ */ #define GD_MSG_PIDFILE_UNLINKING (GLUSTERD_COMP_BASE + 601) +/*! + * @messageid + * @diagnosis + * @recommendedaction + * + */ +#define GD_MSG_VOL_SET_VALIDATION_INFO (GLUSTERD_COMP_BASE + 602) + /*------------*/ #define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages" diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c index d5fa23b..de4cfb7 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c @@ -707,6 +707,52 @@ out: } static int +validate_server_options (glusterd_volinfo_t *volinfo, dict_t *dict, char *key, + char *value, char **op_errstr) +{ + char errstr[2048] = ""; + xlator_t *this = NULL; + int ret = -1; + int origin_val = 0; + + this = THIS; + GF_ASSERT (this); + + if (volinfo->status == GLUSTERD_STATUS_STARTED) { + gf_msg (this->name, GF_LOG_INFO, 0, + GD_MSG_VOL_SET_VALIDATION_INFO, "Please note that " + "volume %s is started. This option will only get " + "effected after a brick restart.", volinfo->volname); + } + + ret = gf_string2int (value, &origin_val); + if (ret) { + snprintf (errstr, sizeof (errstr), "%s is not a compatible " + "value. %s expects an integer value.", value, key); + ret = -1; + goto out; + } + + if (origin_val < 0) { + snprintf (errstr, sizeof (errstr), "%s is not a " + "compatible value. %s expects a positive" + "integer value.", value, key); + ret = -1; + goto out; + } + + ret = 0; +out: + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, EINVAL, + GD_MSG_INCOMPATIBLE_VALUE, "%s", errstr); + *op_errstr = gf_strdup (errstr); + } + + return ret; +} + +static int validate_stripe (glusterd_volinfo_t *volinfo, dict_t *dict, char *key, char *value, char **op_errstr) { @@ -1919,6 +1965,16 @@ struct volopt_map_entry glusterd_volopt_map[] = { .op_version = GD_OP_VERSION_3_7_4, .type = NO_DOC, }, + { .key = "transport.listen-backlog", + .voltype = "protocol/server", + .option = "transport.listen-backlog", + .op_version = GD_OP_VERSION_3_11_1, + .validate_fn = validate_server_options, + .description = "This option uses the value of backlog argument that " + "defines the maximum length to which the queue of " + "pending connections for socket fd may grow.", + .value = "10", + }, /* Performance xlators enable/disbable options */ { .key = "performance.write-behind", diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c index f718d36..e49b186 100644 --- a/xlators/mgmt/glusterd/src/glusterd.c +++ b/xlators/mgmt/glusterd/src/glusterd.c @@ -461,14 +461,12 @@ glusterd_rpcsvc_options_build (dict_t *options) int ret = 0; uint32_t backlog = 0; - ret = dict_get_uint32 (options, "transport.socket.listen-backlog", - &backlog); + ret = dict_get_uint32 (options, "transport.listen-backlog", &backlog); if (ret) { - backlog = GLUSTERD_SOCKET_LISTEN_BACKLOG; + backlog = GLUSTERFS_SOCKET_LISTEN_BACKLOG; ret = dict_set_uint32 (options, - "transport.socket.listen-backlog", - backlog); + "transport.listen-backlog", backlog); if (ret) goto out; } diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index b5a0a3b..7b40196 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -39,7 +39,6 @@ #include "events.h" #define GLUSTERD_TR_LOG_SIZE 50 -#define GLUSTERD_SOCKET_LISTEN_BACKLOG 128 #define GLUSTERD_QUORUM_TYPE_KEY "cluster.server-quorum-type" #define GLUSTERD_QUORUM_RATIO_KEY "cluster.server-quorum-ratio" #define GLUSTERD_GLOBAL_OPT_VERSION "global-option-version" diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c index 6acf256..5d4d860 100644 --- a/xlators/protocol/server/src/server.c +++ b/xlators/protocol/server/src/server.c @@ -1622,6 +1622,10 @@ struct volume_options options[] = { "socket*([ \t]),*([ \t])rdma"}, .type = GF_OPTION_TYPE_STR }, + { .key = {"transport.listen-backlog"}, + .type = GF_OPTION_TYPE_INT, + .default_value = "10", + }, { .key = {"volume-filename.*"}, .type = GF_OPTION_TYPE_PATH, }, -- 1.8.3.1