Blob Blame History Raw
From b78e56b839cb48bd923e07f286977e1b09fd6609 Mon Sep 17 00:00:00 2001
From: Mohammed Rafi KC <rkavunga@redhat.com>
Date: Tue, 13 Jun 2017 12:58:31 +0530
Subject: [PATCH 507/509] protocol/server: make listen backlog value as
 configurable

problem:

When we call listen from protocol/server, we are giving a
hard coded valie of 10 if it is not manually given.
With multiplexing, especially when glusterd restarts all
clients may try to connect to the server at a time.
Which will result in overflowing the queue, and kernel
will complain about the errors.

Solution:

This patch will introduce a volume set command to make backlog
value as a configurable. This patch also changes the default
values for backlog from 10 to 128. This changes is only applicable
for sockets listening from protocol.

Example:

gluster volume set <volname> transport.listen-backlog 1024

Note: 1 Brick has to be restarted to get this value in effect
      2 This changes won't be reflected in glusterd, or other
        xlators which calls listen. If you need, you have to
        add this option to the volfile.

Backport of>
>Change-Id: I0c5a2bbf28b5db612f9979e7560e05dd82b41477
>BUG: 1456405
>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
>Reviewed-on: https://review.gluster.org/17411
>Smoke: Gluster Build System <jenkins@build.gluster.org>
>NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
>CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
>Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
>Reviewed-by: Raghavendra Talur <rtalur@redhat.com>
>Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
>Reviewed-by: Niels de Vos <ndevos@redhat.com>
>Reviewed-by: Jeff Darcy <jeff@pl.atyp.us>

Change-Id: I0c5a2bbf28b5db612f9979e7560e05dd82b41477
BUG: 1453145
Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/108900
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
 libglusterfs/src/globals.h                      |  2 +
 libglusterfs/src/glusterfs.h                    |  1 +
 rpc/rpc-transport/rdma/src/rdma.c               | 32 +++++++++++++-
 rpc/rpc-transport/rdma/src/rdma.h               |  2 +
 rpc/rpc-transport/socket/src/socket.c           | 25 ++++++-----
 tests/basic/multiplex.t                         |  3 ++
 xlators/mgmt/glusterd/src/glusterd-messages.h   | 10 ++++-
 xlators/mgmt/glusterd/src/glusterd-volume-set.c | 56 +++++++++++++++++++++++++
 xlators/mgmt/glusterd/src/glusterd.c            |  8 ++--
 xlators/mgmt/glusterd/src/glusterd.h            |  1 -
 xlators/protocol/server/src/server.c            |  4 ++
 11 files changed, 126 insertions(+), 18 deletions(-)

diff --git a/libglusterfs/src/globals.h b/libglusterfs/src/globals.h
index 08031a7..c7a5c16 100644
--- a/libglusterfs/src/globals.h
+++ b/libglusterfs/src/globals.h
@@ -91,6 +91,8 @@
 
 #define GD_OP_VERSION_3_11_0   31100 /* Op-version for GlusterFS 3.11.0 */
 
+#define GD_OP_VERSION_3_11_1   31101 /* Op-version for GlusterFS 3.11.1 */
+
 #include "xlator.h"
 
 /* THIS */
diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h
index 3544aab..86519b0 100644
--- a/libglusterfs/src/glusterfs.h
+++ b/libglusterfs/src/glusterfs.h
@@ -71,6 +71,7 @@
 #endif
 
 #define GLUSTERD_MAX_SNAP_NAME  255
+#define GLUSTERFS_SOCKET_LISTEN_BACKLOG  10
 #define ZR_MOUNTPOINT_OPT       "mountpoint"
 #define ZR_ATTR_TIMEOUT_OPT     "attribute-timeout"
 #define ZR_ENTRY_TIMEOUT_OPT    "entry-timeout"
diff --git a/rpc/rpc-transport/rdma/src/rdma.c b/rpc/rpc-transport/rdma/src/rdma.c
index d2f04bd..4a55cce 100644
--- a/rpc/rpc-transport/rdma/src/rdma.c
+++ b/rpc/rpc-transport/rdma/src/rdma.c
@@ -4498,6 +4498,12 @@ gf_rdma_options_init (rpc_transport_t *this)
         options->attr_retry_cnt = GF_RDMA_RETRY_CNT;
         options->attr_rnr_retry = GF_RDMA_RNR_RETRY;
 
+        temp = dict_get (this->options, "transport.listen-backlog");
+        if (temp)
+                options->backlog = data_to_uint32 (temp);
+        else
+                options->backlog = GLUSTERFS_SOCKET_LISTEN_BACKLOG;
+
         temp = dict_get (this->options,
                          "transport.rdma.work-request-send-count");
         if (temp)
@@ -4634,6 +4640,7 @@ gf_rdma_init (rpc_transport_t *this)
         priv->peer.recv_count = options->recv_count;
         priv->peer.send_size = options->send_size;
         priv->peer.recv_size = options->recv_size;
+        priv->backlog = options->backlog;
 
         priv->peer.trans = this;
         INIT_LIST_HEAD (&priv->peer.ioq);
@@ -4847,7 +4854,8 @@ gf_rdma_listen (rpc_transport_t *this)
                 goto err;
         }
 
-        ret = rdma_listen (peer->cm_id, 10);
+        ret = rdma_listen (peer->cm_id, priv->backlog);
+
         if (ret != 0) {
                 gf_msg (this->name, GF_LOG_WARNING, errno,
                         RDMA_MSG_LISTEN_FAILED,
@@ -4918,6 +4926,28 @@ init (rpc_transport_t *this)
         return 0;
 }
 
+int
+reconfigure (rpc_transport_t *this, dict_t *options)
+{
+        gf_rdma_private_t *priv          = NULL;
+        uint32_t          backlog        = 0;
+        int               ret            = -1;
+
+        GF_VALIDATE_OR_GOTO ("rdma", this, out);
+        GF_VALIDATE_OR_GOTO ("rdma", this->private, out);
+
+        priv = this->private;
+
+        if (dict_get_uint32 (options, "transport.listen-backlog",
+                             &backlog) == 0) {
+                priv->backlog = backlog;
+                gf_log (this->name, GF_LOG_DEBUG, "Reconfigued "
+                        "transport.listen-backlog=%d", priv->backlog);
+        }
+        ret = 0;
+out:
+        return ret;
+}
 void
 fini (struct rpc_transport *this)
 {
diff --git a/rpc/rpc-transport/rdma/src/rdma.h b/rpc/rpc-transport/rdma/src/rdma.h
index 449861f..6bcf6bc 100644
--- a/rpc/rpc-transport/rdma/src/rdma.h
+++ b/rpc/rpc-transport/rdma/src/rdma.h
@@ -144,6 +144,7 @@ struct __gf_rdma_options {
 	uint8_t  attr_timeout;
 	uint8_t  attr_retry_cnt;
 	uint8_t  attr_rnr_retry;
+        uint32_t backlog;
 };
 typedef struct __gf_rdma_options gf_rdma_options_t;
 
@@ -383,6 +384,7 @@ struct __gf_rdma_private {
         pthread_mutex_t             recv_mutex;
         pthread_cond_t              recv_cond;
         gf_rdma_transport_entity_t  entity;
+        uint32_t                    backlog;
 };
 typedef struct __gf_rdma_private    gf_rdma_private_t;
 
diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c
index 16cba78..4560d2f 100644
--- a/rpc/rpc-transport/socket/src/socket.c
+++ b/rpc/rpc-transport/socket/src/socket.c
@@ -3524,10 +3524,7 @@ socket_listen (rpc_transport_t *this)
                         goto unlock;
                 }
 
-                if (priv->backlog)
-                        ret = listen (priv->sock, priv->backlog);
-                else
-                        ret = listen (priv->sock, 10);
+                ret = listen (priv->sock, priv->backlog);
 
                 if (ret == -1) {
                         gf_log (this->name, GF_LOG_ERROR,
@@ -3843,6 +3840,7 @@ reconfigure (rpc_transport_t *this, dict_t *options)
         int               ret           = 0;
         uint64_t          windowsize    = 0;
         uint32_t          timeout       = 0;
+        uint32_t          backlog       = 0;
 
         GF_VALIDATE_OR_GOTO ("socket", this, out);
         GF_VALIDATE_OR_GOTO ("socket", this->private, out);
@@ -3878,6 +3876,13 @@ reconfigure (rpc_transport_t *this, dict_t *options)
                         "transport.tcp-user-timeout=%d", timeout);
         }
 
+        if (dict_get_uint32 (options, "transport.listen-backlog",
+                             &backlog) == 0) {
+                priv->backlog = backlog;
+                gf_log (this->name, GF_LOG_DEBUG, "Reconfigued "
+                        "transport.listen-backlog=%d", priv->backlog);
+        }
+
         optstr = NULL;
         if (dict_get_str (options, "tcp-window-size", &optstr) == 0) {
                 if (gf_string2uint64 (optstr, &windowsize) != 0) {
@@ -4144,14 +4149,17 @@ socket_init (rpc_transport_t *this)
                              &timeout) == 0) {
                 priv->timeout = timeout;
         }
+
         gf_log (this->name, GF_LOG_DEBUG, "Configued "
                 "transport.tcp-user-timeout=%d", priv->timeout);
 
         if (dict_get_uint32 (this->options,
-                             "transport.socket.listen-backlog",
-                             &backlog) == 0) {
-                priv->backlog = backlog;
+                             "transport.listen-backlog",
+                             &backlog) != 0) {
+
+                backlog = GLUSTERFS_SOCKET_LISTEN_BACKLOG;
         }
+        priv->backlog = backlog;
 
         optstr = NULL;
 
@@ -4561,9 +4569,6 @@ struct volume_options options[] = {
         { .key   = {"transport.socket.keepalive-time"},
           .type  = GF_OPTION_TYPE_INT
         },
-        { .key   = {"transport.socket.listen-backlog"},
-          .type  = GF_OPTION_TYPE_INT
-        },
         { .key   = {"transport.socket.read-fail-log"},
           .type  = GF_OPTION_TYPE_BOOL
         },
diff --git a/tests/basic/multiplex.t b/tests/basic/multiplex.t
index 819ff40..b69fe57 100644
--- a/tests/basic/multiplex.t
+++ b/tests/basic/multiplex.t
@@ -27,6 +27,9 @@ EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_up_bricks
 EXPECT 1 online_brick_count
 
 TEST $CLI volume stop $V0
+#Testing the volume set command introduced for protocol/server
+TEST $CLI volume set $V0 transport.listen-backlog 1024
+
 EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT 0 online_brick_count
 TEST $CLI volume start $V0
 EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_up_bricks
diff --git a/xlators/mgmt/glusterd/src/glusterd-messages.h b/xlators/mgmt/glusterd/src/glusterd-messages.h
index 3d0f48c..58208ec 100644
--- a/xlators/mgmt/glusterd/src/glusterd-messages.h
+++ b/xlators/mgmt/glusterd/src/glusterd-messages.h
@@ -41,7 +41,7 @@
 
 #define GLUSTERD_COMP_BASE      GLFS_MSGID_GLUSTERD
 
-#define GLFS_NUM_MESSAGES       601
+#define GLFS_NUM_MESSAGES       602
 
 #define GLFS_MSGID_END          (GLUSTERD_COMP_BASE + GLFS_NUM_MESSAGES + 1)
 /* Messaged with message IDs */
@@ -4809,6 +4809,14 @@
  */
 #define GD_MSG_PIDFILE_UNLINKING                   (GLUSTERD_COMP_BASE + 601)
 
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_VOL_SET_VALIDATION_INFO             (GLUSTERD_COMP_BASE + 602)
+
 /*------------*/
 
 #define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
index d5fa23b..de4cfb7 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
@@ -707,6 +707,52 @@ out:
 }
 
 static int
+validate_server_options (glusterd_volinfo_t *volinfo, dict_t *dict, char *key,
+                         char *value, char **op_errstr)
+{
+        char                 errstr[2048]  = "";
+        xlator_t            *this          = NULL;
+        int                  ret           = -1;
+        int                  origin_val    = 0;
+
+        this = THIS;
+        GF_ASSERT (this);
+
+        if (volinfo->status == GLUSTERD_STATUS_STARTED) {
+                gf_msg (this->name, GF_LOG_INFO, 0,
+                        GD_MSG_VOL_SET_VALIDATION_INFO, "Please note that "
+                        "volume %s is started. This option will only get "
+                        "effected after a brick restart.", volinfo->volname);
+        }
+
+        ret = gf_string2int (value, &origin_val);
+        if (ret) {
+                snprintf (errstr, sizeof (errstr), "%s is not a compatible "
+                          "value. %s expects an integer value.", value, key);
+                ret = -1;
+                goto out;
+        }
+
+        if (origin_val < 0) {
+                snprintf (errstr, sizeof (errstr), "%s is not a "
+                          "compatible value. %s expects a positive"
+                          "integer value.", value, key);
+                ret = -1;
+                goto out;
+        }
+
+        ret = 0;
+out:
+        if (ret) {
+                gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+                        GD_MSG_INCOMPATIBLE_VALUE, "%s", errstr);
+                *op_errstr = gf_strdup (errstr);
+        }
+
+        return ret;
+}
+
+static int
 validate_stripe (glusterd_volinfo_t *volinfo, dict_t *dict, char *key,
                  char *value, char **op_errstr)
 {
@@ -1919,6 +1965,16 @@ struct volopt_map_entry glusterd_volopt_map[] = {
           .op_version  = GD_OP_VERSION_3_7_4,
           .type        = NO_DOC,
         },
+        { .key         = "transport.listen-backlog",
+          .voltype     = "protocol/server",
+          .option      = "transport.listen-backlog",
+          .op_version  = GD_OP_VERSION_3_11_1,
+          .validate_fn = validate_server_options,
+          .description = "This option uses the value of backlog argument that "
+                         "defines the maximum length to which the queue of "
+                         "pending connections for socket fd may grow.",
+          .value       = "10",
+        },
 
         /* Performance xlators enable/disbable options */
         { .key         = "performance.write-behind",
diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c
index f718d36..e49b186 100644
--- a/xlators/mgmt/glusterd/src/glusterd.c
+++ b/xlators/mgmt/glusterd/src/glusterd.c
@@ -461,14 +461,12 @@ glusterd_rpcsvc_options_build (dict_t *options)
         int             ret = 0;
         uint32_t        backlog = 0;
 
-        ret = dict_get_uint32 (options, "transport.socket.listen-backlog",
-                               &backlog);
+        ret = dict_get_uint32 (options, "transport.listen-backlog", &backlog);
 
         if (ret) {
-                backlog = GLUSTERD_SOCKET_LISTEN_BACKLOG;
+                backlog = GLUSTERFS_SOCKET_LISTEN_BACKLOG;
                 ret = dict_set_uint32 (options,
-                                      "transport.socket.listen-backlog",
-                                      backlog);
+                                      "transport.listen-backlog", backlog);
                 if (ret)
                         goto out;
         }
diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
index b5a0a3b..7b40196 100644
--- a/xlators/mgmt/glusterd/src/glusterd.h
+++ b/xlators/mgmt/glusterd/src/glusterd.h
@@ -39,7 +39,6 @@
 #include "events.h"
 
 #define GLUSTERD_TR_LOG_SIZE            50
-#define GLUSTERD_SOCKET_LISTEN_BACKLOG  128
 #define GLUSTERD_QUORUM_TYPE_KEY        "cluster.server-quorum-type"
 #define GLUSTERD_QUORUM_RATIO_KEY       "cluster.server-quorum-ratio"
 #define GLUSTERD_GLOBAL_OPT_VERSION     "global-option-version"
diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c
index 6acf256..5d4d860 100644
--- a/xlators/protocol/server/src/server.c
+++ b/xlators/protocol/server/src/server.c
@@ -1622,6 +1622,10 @@ struct volume_options options[] = {
                     "socket*([ \t]),*([ \t])rdma"},
           .type  = GF_OPTION_TYPE_STR
         },
+        { .key   = {"transport.listen-backlog"},
+          .type  = GF_OPTION_TYPE_INT,
+          .default_value = "10",
+        },
         { .key   = {"volume-filename.*"},
           .type  = GF_OPTION_TYPE_PATH,
         },
-- 
1.8.3.1