e3c68b
From 86eee7e829bb33cac9b611da511ecbd2f03fab25 Mon Sep 17 00:00:00 2001
e3c68b
From: Mohit Agrawal <moagrawal@redhat.com>
e3c68b
Date: Fri, 17 May 2019 19:26:48 +0530
e3c68b
Subject: [PATCH 149/169] glusterd: Optimize code to copy dictionary in
e3c68b
 handshake code path
e3c68b
e3c68b
Problem: While high no. of volumes are configured around 2000
e3c68b
         glusterd has bottleneck during handshake at the time
e3c68b
         of copying dictionary
e3c68b
e3c68b
Solution: To avoid the bottleneck serialize a dictionary instead
e3c68b
          of copying key-value pair one by one
e3c68b
e3c68b
> Change-Id: I9fb332f432e4f915bc3af8dcab38bed26bda2b9a
e3c68b
> fixes: bz#1711297
e3c68b
> Cherry picked from commit f8f09178bb890924a8050b466cc2e7a0a30e35a7
e3c68b
> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/22742/)
e3c68b
e3c68b
BUG: 1711296
e3c68b
Change-Id: I9fb332f432e4f915bc3af8dcab38bed26bda2b9a
e3c68b
Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
e3c68b
Reviewed-on: https://code.engineering.redhat.com/gerrit/172255
e3c68b
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
e3c68b
Tested-by: RHGS Build Bot <nigelb@redhat.com>
e3c68b
---
e3c68b
 libglusterfs/src/dict.c                      |   6 +-
e3c68b
 libglusterfs/src/glusterfs/dict.h            |   6 +
e3c68b
 libglusterfs/src/libglusterfs.sym            |   1 +
e3c68b
 xlators/mgmt/glusterd/src/glusterd-rpc-ops.c |  27 ++--
e3c68b
 xlators/mgmt/glusterd/src/glusterd-utils.c   | 187 +++++++++++++++++++++++----
e3c68b
 xlators/mgmt/glusterd/src/glusterd-utils.h   |   3 +-
e3c68b
 xlators/mgmt/glusterd/src/glusterd.h         |   5 +
e3c68b
 7 files changed, 194 insertions(+), 41 deletions(-)
e3c68b
e3c68b
diff --git a/libglusterfs/src/dict.c b/libglusterfs/src/dict.c
e3c68b
index 4cd1fcf..6917df9 100644
e3c68b
--- a/libglusterfs/src/dict.c
e3c68b
+++ b/libglusterfs/src/dict.c
e3c68b
@@ -2799,10 +2799,6 @@ dict_rename_key(dict_t *this, char *key, char *replace_key)
e3c68b
  *     4        4         4       <key len>   <value len>
e3c68b
  */
e3c68b
 
e3c68b
-#define DICT_HDR_LEN 4
e3c68b
-#define DICT_DATA_HDR_KEY_LEN 4
e3c68b
-#define DICT_DATA_HDR_VAL_LEN 4
e3c68b
-
e3c68b
 /**
e3c68b
  * dict_serialized_length_lk - return the length of serialized dict. This
e3c68b
  *                             procedure has to be called with this->lock held.
e3c68b
@@ -2812,7 +2808,7 @@ dict_rename_key(dict_t *this, char *key, char *replace_key)
e3c68b
  *        : failure: -errno
e3c68b
  */
e3c68b
 
e3c68b
-static int
e3c68b
+int
e3c68b
 dict_serialized_length_lk(dict_t *this)
e3c68b
 {
e3c68b
     int ret = -EINVAL;
e3c68b
diff --git a/libglusterfs/src/glusterfs/dict.h b/libglusterfs/src/glusterfs/dict.h
e3c68b
index 52b833f..022f564 100644
e3c68b
--- a/libglusterfs/src/glusterfs/dict.h
e3c68b
+++ b/libglusterfs/src/glusterfs/dict.h
e3c68b
@@ -91,6 +91,9 @@ typedef struct _data_pair data_pair_t;
e3c68b
 #define DICT_MAX_FLAGS 256
e3c68b
 #define DICT_FLAG_SET 1
e3c68b
 #define DICT_FLAG_CLEAR 0
e3c68b
+#define DICT_HDR_LEN 4
e3c68b
+#define DICT_DATA_HDR_KEY_LEN 4
e3c68b
+#define DICT_DATA_HDR_VAL_LEN 4
e3c68b
 
e3c68b
 struct _data {
e3c68b
     char *data;
e3c68b
@@ -412,4 +415,7 @@ are_dicts_equal(dict_t *one, dict_t *two,
e3c68b
                 gf_boolean_t (*value_ignore)(char *k));
e3c68b
 int
e3c68b
 dict_has_key_from_array(dict_t *dict, char **strings, gf_boolean_t *result);
e3c68b
+
e3c68b
+int
e3c68b
+dict_serialized_length_lk(dict_t *this);
e3c68b
 #endif
e3c68b
diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym
e3c68b
index cf5757c..ec474e7 100644
e3c68b
--- a/libglusterfs/src/libglusterfs.sym
e3c68b
+++ b/libglusterfs/src/libglusterfs.sym
e3c68b
@@ -405,6 +405,7 @@ dict_rename_key
e3c68b
 dict_reset
e3c68b
 dict_serialize
e3c68b
 dict_serialized_length
e3c68b
+dict_serialized_length_lk
e3c68b
 dict_serialize_value_with_delim
e3c68b
 dict_set
e3c68b
 dict_setn
e3c68b
diff --git a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c
e3c68b
index 4ec9700..45f8f17 100644
e3c68b
--- a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c
e3c68b
+++ b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c
e3c68b
@@ -1528,11 +1528,9 @@ glusterd_rpc_friend_add(call_frame_t *frame, xlator_t *this, void *data)
e3c68b
 
e3c68b
     RCU_READ_UNLOCK;
e3c68b
 
e3c68b
-    ret = glusterd_add_volumes_to_export_dict(&peer_data);
e3c68b
-    if (ret) {
e3c68b
-        gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
e3c68b
-               "Unable to add list of volumes "
e3c68b
-               "in the peer_data dict for handshake");
e3c68b
+    peer_data = dict_new();
e3c68b
+    if (!peer_data) {
e3c68b
+        errno = ENOMEM;
e3c68b
         goto out;
e3c68b
     }
e3c68b
 
e3c68b
@@ -1563,10 +1561,23 @@ glusterd_rpc_friend_add(call_frame_t *frame, xlator_t *this, void *data)
e3c68b
         }
e3c68b
     }
e3c68b
 
e3c68b
-    ret = dict_allocate_and_serialize(peer_data, &req.vols.vols_val,
e3c68b
-                                      &req.vols.vols_len);
e3c68b
-    if (ret)
e3c68b
+    /* Don't add any key-value in peer_data dictionary after call this function
e3c68b
+     */
e3c68b
+    ret = glusterd_add_volumes_to_export_dict(peer_data, &req.vols.vols_val,
e3c68b
+                                              &req.vols.vols_len);
e3c68b
+    if (ret) {
e3c68b
+        gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
e3c68b
+               "Unable to add list of volumes "
e3c68b
+               "in the peer_data dict for handshake");
e3c68b
         goto out;
e3c68b
+    }
e3c68b
+
e3c68b
+    if (!req.vols.vols_len) {
e3c68b
+        ret = dict_allocate_and_serialize(peer_data, &req.vols.vols_val,
e3c68b
+                                          &req.vols.vols_len);
e3c68b
+        if (ret)
e3c68b
+            goto out;
e3c68b
+    }
e3c68b
 
e3c68b
     ret = glusterd_submit_request(
e3c68b
         peerinfo->rpc, &req, frame, peerinfo->peer, GLUSTERD_FRIEND_ADD, NULL,
e3c68b
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
e3c68b
index 8f1525e..2bc4836 100644
e3c68b
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
e3c68b
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
e3c68b
@@ -3466,11 +3466,118 @@ out:
e3c68b
     return NULL;
e3c68b
 }
e3c68b
 
e3c68b
+int
e3c68b
+glusterd_dict_searialize(dict_t *dict_arr[], int count, int totcount, char *buf)
e3c68b
+{
e3c68b
+    int i = 0;
e3c68b
+    int32_t keylen = 0;
e3c68b
+    int64_t netword = 0;
e3c68b
+    data_pair_t *pair = NULL;
e3c68b
+    int dict_count = 0;
e3c68b
+    int ret = 0;
e3c68b
+
e3c68b
+    netword = hton32(totcount);
e3c68b
+    memcpy(buf, &netword, sizeof(netword));
e3c68b
+    buf += DICT_HDR_LEN;
e3c68b
+
e3c68b
+    for (i = 0; i < count; i++) {
e3c68b
+        if (dict_arr[i]) {
e3c68b
+            dict_count = dict_arr[i]->count;
e3c68b
+            pair = dict_arr[i]->members_list;
e3c68b
+            while (dict_count) {
e3c68b
+                if (!pair) {
e3c68b
+                    gf_msg("glusterd", GF_LOG_ERROR, 0,
e3c68b
+                           LG_MSG_PAIRS_LESS_THAN_COUNT,
e3c68b
+                           "less than count data pairs found!");
e3c68b
+                    ret = -1;
e3c68b
+                    goto out;
e3c68b
+                }
e3c68b
+
e3c68b
+                if (!pair->key) {
e3c68b
+                    gf_msg("glusterd", GF_LOG_ERROR, 0, LG_MSG_NULL_PTR,
e3c68b
+                           "pair->key is null!");
e3c68b
+                    ret = -1;
e3c68b
+                    goto out;
e3c68b
+                }
e3c68b
+
e3c68b
+                keylen = strlen(pair->key);
e3c68b
+                netword = hton32(keylen);
e3c68b
+                memcpy(buf, &netword, sizeof(netword));
e3c68b
+                buf += DICT_DATA_HDR_KEY_LEN;
e3c68b
+                if (!pair->value) {
e3c68b
+                    gf_msg("glusterd", GF_LOG_ERROR, 0, LG_MSG_NULL_PTR,
e3c68b
+                           "pair->value is null!");
e3c68b
+                    ret = -1;
e3c68b
+                    goto out;
e3c68b
+                }
e3c68b
+
e3c68b
+                netword = hton32(pair->value->len);
e3c68b
+                memcpy(buf, &netword, sizeof(netword));
e3c68b
+                buf += DICT_DATA_HDR_VAL_LEN;
e3c68b
+
e3c68b
+                memcpy(buf, pair->key, keylen);
e3c68b
+                buf += keylen;
e3c68b
+                *buf++ = '\0';
e3c68b
+
e3c68b
+                if (pair->value->data) {
e3c68b
+                    memcpy(buf, pair->value->data, pair->value->len);
e3c68b
+                    buf += pair->value->len;
e3c68b
+                }
e3c68b
+
e3c68b
+                pair = pair->next;
e3c68b
+                dict_count--;
e3c68b
+            }
e3c68b
+        }
e3c68b
+    }
e3c68b
+
e3c68b
+out:
e3c68b
+    for (i = 0; i < count; i++) {
e3c68b
+        if (dict_arr[i])
e3c68b
+            dict_unref(dict_arr[i]);
e3c68b
+    }
e3c68b
+    return ret;
e3c68b
+}
e3c68b
+
e3c68b
+int
e3c68b
+glusterd_dict_arr_serialize(dict_t *dict_arr[], int count, char **buf,
e3c68b
+                            u_int *length)
e3c68b
+{
e3c68b
+    ssize_t len = 0;
e3c68b
+    int i = 0;
e3c68b
+    int totcount = 0;
e3c68b
+    int ret = 0;
e3c68b
+
e3c68b
+    for (i = 0; i < count; i++) {
e3c68b
+        if (dict_arr[i]) {
e3c68b
+            len += dict_serialized_length_lk(dict_arr[i]);
e3c68b
+            totcount += dict_arr[i]->count;
e3c68b
+        }
e3c68b
+    }
e3c68b
+
e3c68b
+    // Subtract HDR_LEN except one dictionary
e3c68b
+    len = len - ((count - 1) * DICT_HDR_LEN);
e3c68b
+
e3c68b
+    *buf = GF_MALLOC(len, gf_common_mt_char);
e3c68b
+    if (*buf == NULL) {
e3c68b
+        ret = -ENOMEM;
e3c68b
+        goto out;
e3c68b
+    }
e3c68b
+
e3c68b
+    if (length != NULL) {
e3c68b
+        *length = len;
e3c68b
+    }
e3c68b
+
e3c68b
+    ret = glusterd_dict_searialize(dict_arr, count, totcount, *buf);
e3c68b
+
e3c68b
+out:
e3c68b
+    return ret;
e3c68b
+}
e3c68b
+
e3c68b
 int32_t
e3c68b
-glusterd_add_volumes_to_export_dict(dict_t **peer_data)
e3c68b
+glusterd_add_volumes_to_export_dict(dict_t *peer_data, char **buf,
e3c68b
+                                    u_int *length)
e3c68b
 {
e3c68b
     int32_t ret = -1;
e3c68b
-    dict_t *dict = NULL;
e3c68b
     dict_t *dict_arr[128] = {
e3c68b
         0,
e3c68b
     };
e3c68b
@@ -3496,10 +3603,6 @@ glusterd_add_volumes_to_export_dict(dict_t **peer_data)
e3c68b
     priv = this->private;
e3c68b
     GF_ASSERT(priv);
e3c68b
 
e3c68b
-    dict = dict_new();
e3c68b
-    if (!dict)
e3c68b
-        goto out;
e3c68b
-
e3c68b
     /* Count the total number of volumes */
e3c68b
     cds_list_for_each_entry(volinfo, &priv->volumes, vol_list) volcnt++;
e3c68b
 
e3c68b
@@ -3520,14 +3623,15 @@ glusterd_add_volumes_to_export_dict(dict_t **peer_data)
e3c68b
         cds_list_for_each_entry(volinfo, &priv->volumes, vol_list)
e3c68b
         {
e3c68b
             count++;
e3c68b
-            ret = glusterd_add_volume_to_dict(volinfo, dict, count, "volume");
e3c68b
+            ret = glusterd_add_volume_to_dict(volinfo, peer_data, count,
e3c68b
+                                              "volume");
e3c68b
             if (ret)
e3c68b
                 goto out;
e3c68b
 
e3c68b
             if (!dict_get_sizen(volinfo->dict, VKEY_FEATURES_QUOTA))
e3c68b
                 continue;
e3c68b
 
e3c68b
-            ret = glusterd_vol_add_quota_conf_to_dict(volinfo, dict, count,
e3c68b
+            ret = glusterd_vol_add_quota_conf_to_dict(volinfo, peer_data, count,
e3c68b
                                                       "volume");
e3c68b
             if (ret)
e3c68b
                 goto out;
e3c68b
@@ -3569,34 +3673,34 @@ glusterd_add_volumes_to_export_dict(dict_t **peer_data)
e3c68b
 
e3c68b
         gf_log(this->name, GF_LOG_INFO,
e3c68b
                "Finished dictionary popluation in all threads");
e3c68b
-        for (i = 0; i < totthread; i++) {
e3c68b
-            dict_copy_with_ref(dict_arr[i], dict);
e3c68b
-            dict_unref(dict_arr[i]);
e3c68b
-        }
e3c68b
-        gf_log(this->name, GF_LOG_INFO,
e3c68b
-               "Finished merger of all dictionraies into single one");
e3c68b
     }
e3c68b
 
e3c68b
-    ret = dict_set_int32n(dict, "count", SLEN("count"), volcnt);
e3c68b
+    ret = dict_set_int32n(peer_data, "count", SLEN("count"), volcnt);
e3c68b
     if (ret)
e3c68b
         goto out;
e3c68b
 
e3c68b
-    ctx.dict = dict;
e3c68b
+    ctx.dict = peer_data;
e3c68b
     ctx.prefix = "global";
e3c68b
     ctx.opt_count = 1;
e3c68b
     ctx.key_name = "key";
e3c68b
     ctx.val_name = "val";
e3c68b
     dict_foreach(priv->opts, _add_dict_to_prdict, &ctx;;
e3c68b
     ctx.opt_count--;
e3c68b
-    ret = dict_set_int32n(dict, "global-opt-count", SLEN("global-opt-count"),
e3c68b
-                          ctx.opt_count);
e3c68b
+    ret = dict_set_int32n(peer_data, "global-opt-count",
e3c68b
+                          SLEN("global-opt-count"), ctx.opt_count);
e3c68b
     if (ret)
e3c68b
         goto out;
e3c68b
 
e3c68b
-    *peer_data = dict;
e3c68b
+    if (totthread) {
e3c68b
+        gf_log(this->name, GF_LOG_INFO,
e3c68b
+               "Finished merger of all dictionraies into single one");
e3c68b
+        dict_arr[totthread++] = peer_data;
e3c68b
+        ret = glusterd_dict_arr_serialize(dict_arr, totthread, buf, length);
e3c68b
+        gf_log(this->name, GF_LOG_INFO,
e3c68b
+               "Serialize dictionary data return is %d", ret);
e3c68b
+    }
e3c68b
+
e3c68b
 out:
e3c68b
-    if (ret)
e3c68b
-        dict_unref(dict);
e3c68b
 
e3c68b
     gf_msg_trace(this->name, 0, "Returning %d", ret);
e3c68b
     return ret;
e3c68b
@@ -4940,6 +5044,7 @@ glusterd_import_friend_volumes_synctask(void *opaque)
e3c68b
     xlator_t *this = NULL;
e3c68b
     glusterd_conf_t *conf = NULL;
e3c68b
     dict_t *peer_data = NULL;
e3c68b
+    glusterd_friend_synctask_args_t *arg = NULL;
e3c68b
 
e3c68b
     this = THIS;
e3c68b
     GF_ASSERT(this);
e3c68b
@@ -4947,8 +5052,20 @@ glusterd_import_friend_volumes_synctask(void *opaque)
e3c68b
     conf = this->private;
e3c68b
     GF_ASSERT(conf);
e3c68b
 
e3c68b
-    peer_data = (dict_t *)opaque;
e3c68b
-    GF_ASSERT(peer_data);
e3c68b
+    arg = opaque;
e3c68b
+    if (!arg)
e3c68b
+        goto out;
e3c68b
+
e3c68b
+    peer_data = dict_new();
e3c68b
+    if (!peer_data) {
e3c68b
+        goto out;
e3c68b
+    }
e3c68b
+
e3c68b
+    ret = dict_unserialize(arg->dict_buf, arg->dictlen, &peer_data);
e3c68b
+    if (ret) {
e3c68b
+        errno = ENOMEM;
e3c68b
+        goto out;
e3c68b
+    }
e3c68b
 
e3c68b
     ret = dict_get_int32n(peer_data, "count", SLEN("count"), &count);
e3c68b
     if (ret)
e3c68b
@@ -4980,6 +5097,11 @@ glusterd_import_friend_volumes_synctask(void *opaque)
e3c68b
 out:
e3c68b
     if (peer_data)
e3c68b
         dict_unref(peer_data);
e3c68b
+    if (arg) {
e3c68b
+        if (arg->dict_buf)
e3c68b
+            GF_FREE(arg->dict_buf);
e3c68b
+        GF_FREE(arg);
e3c68b
+    }
e3c68b
 
e3c68b
     gf_msg_debug("glusterd", 0, "Returning with %d", ret);
e3c68b
     return ret;
e3c68b
@@ -5146,7 +5268,7 @@ glusterd_compare_friend_data(dict_t *peer_data, int32_t *status, char *hostname)
e3c68b
     gf_boolean_t update = _gf_false;
e3c68b
     xlator_t *this = NULL;
e3c68b
     glusterd_conf_t *priv = NULL;
e3c68b
-    dict_t *peer_data_copy = NULL;
e3c68b
+    glusterd_friend_synctask_args_t *arg = NULL;
e3c68b
 
e3c68b
     this = THIS;
e3c68b
     GF_ASSERT(this);
e3c68b
@@ -5188,12 +5310,23 @@ glusterd_compare_friend_data(dict_t *peer_data, int32_t *status, char *hostname)
e3c68b
          * first brick to come up before attaching the subsequent bricks
e3c68b
          * in case brick multiplexing is enabled
e3c68b
          */
e3c68b
-        peer_data_copy = dict_copy_with_ref(peer_data, NULL);
e3c68b
-        glusterd_launch_synctask(glusterd_import_friend_volumes_synctask,
e3c68b
-                                 peer_data_copy);
e3c68b
+        arg = GF_CALLOC(1, sizeof(*arg), gf_common_mt_char);
e3c68b
+        ret = dict_allocate_and_serialize(peer_data, &arg->dict_buf,
e3c68b
+                                          &arg->dictlen);
e3c68b
+        if (ret < 0) {
e3c68b
+            gf_log(this->name, GF_LOG_ERROR,
e3c68b
+                   "dict_serialize failed while handling "
e3c68b
+                   " import friend volume request");
e3c68b
+            goto out;
e3c68b
+        }
e3c68b
+
e3c68b
+        glusterd_launch_synctask(glusterd_import_friend_volumes_synctask, arg);
e3c68b
     }
e3c68b
 
e3c68b
 out:
e3c68b
+    if (ret && arg) {
e3c68b
+        GF_FREE(arg);
e3c68b
+    }
e3c68b
     gf_msg_debug(this->name, 0, "Returning with ret: %d, status: %d", ret,
e3c68b
                  *status);
e3c68b
     return ret;
e3c68b
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
e3c68b
index 3647c34..6ad8062 100644
e3c68b
--- a/xlators/mgmt/glusterd/src/glusterd-utils.h
e3c68b
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
e3c68b
@@ -227,7 +227,8 @@ glusterd_volume_brickinfo_get_by_brick(char *brick, glusterd_volinfo_t *volinfo,
e3c68b
                                        gf_boolean_t construct_real_path);
e3c68b
 
e3c68b
 int32_t
e3c68b
-glusterd_add_volumes_to_export_dict(dict_t **peer_data);
e3c68b
+glusterd_add_volumes_to_export_dict(dict_t *peer_data, char **buf,
e3c68b
+                                    u_int *length);
e3c68b
 
e3c68b
 int32_t
e3c68b
 glusterd_compare_friend_data(dict_t *peer_data, int32_t *status,
e3c68b
diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
e3c68b
index 2ea8560..f96bca3 100644
e3c68b
--- a/xlators/mgmt/glusterd/src/glusterd.h
e3c68b
+++ b/xlators/mgmt/glusterd/src/glusterd.h
e3c68b
@@ -240,6 +240,11 @@ typedef struct glusterd_add_dict_args {
e3c68b
     int end;
e3c68b
 } glusterd_add_dict_args_t;
e3c68b
 
e3c68b
+typedef struct glusterd_friend_synctask_args {
e3c68b
+    char *dict_buf;
e3c68b
+    u_int dictlen;
e3c68b
+} glusterd_friend_synctask_args_t;
e3c68b
+
e3c68b
 typedef enum gf_brick_status {
e3c68b
     GF_BRICK_STOPPED,
e3c68b
     GF_BRICK_STARTED,
e3c68b
-- 
e3c68b
1.8.3.1
e3c68b