|
|
256ebe |
From aff18f761ef64d55635daa9a1d2140fe35632820 Mon Sep 17 00:00:00 2001
|
|
|
256ebe |
From: Mohit Agrawal <moagrawal@redhat.com>
|
|
|
256ebe |
Date: Fri, 29 Mar 2019 11:48:32 +0530
|
|
|
256ebe |
Subject: [PATCH 109/124] glusterd: Optimize glusterd handshaking code path
|
|
|
256ebe |
|
|
|
256ebe |
Problem: At the time of handshaking glusterd populate volume
|
|
|
256ebe |
data in a dictionary.While no. of volumes are configured
|
|
|
256ebe |
more than 1500 glusterd takes more than 10 min to generated
|
|
|
256ebe |
the data.Due to taking more time rpc request times out and
|
|
|
256ebe |
rpc start bailing of call frames.
|
|
|
256ebe |
|
|
|
256ebe |
Solution: To optimize the code done below changes
|
|
|
256ebe |
1) Spawn multiple threads to populate volumes data in bulk
|
|
|
256ebe |
in separate dictionary and introduce an option
|
|
|
256ebe |
glusterd.brick-dict-thread-count to configure no. of threads
|
|
|
256ebe |
to populate volume data.
|
|
|
256ebe |
2) Populate tier data only while volume type is tier
|
|
|
256ebe |
3) Compare snap data only while snap_count is non zero
|
|
|
256ebe |
|
|
|
256ebe |
> Fixes: bz#1699339
|
|
|
256ebe |
> Change-Id: I38dc71970c049217f9d1a06fc0aaf4c26eab18f5
|
|
|
256ebe |
> Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
|
|
|
256ebe |
> (Cherry picked from commit 26a19d9da3ab5604db02d4ca02ce868fb57193a4)
|
|
|
256ebe |
> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/22556/)
|
|
|
256ebe |
|
|
|
256ebe |
Bug: 1652461
|
|
|
256ebe |
Change-Id: Ia81671a7e1f173bcb32da9dc439be9e61c18bde1
|
|
|
256ebe |
Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
|
|
|
256ebe |
Reviewed-on: https://code.engineering.redhat.com/gerrit/167981
|
|
|
256ebe |
Tested-by: Mohit Agrawal <moagrawa@redhat.com>
|
|
|
256ebe |
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
|
|
|
256ebe |
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
|
|
256ebe |
---
|
|
|
256ebe |
libglusterfs/src/glusterfs/globals.h | 4 +-
|
|
|
256ebe |
tests/bugs/glusterd/bug-1699339.t | 69 ++++++
|
|
|
256ebe |
xlators/mgmt/glusterd/src/glusterd-op-sm.c | 1 +
|
|
|
256ebe |
.../mgmt/glusterd/src/glusterd-snapshot-utils.c | 3 +
|
|
|
256ebe |
xlators/mgmt/glusterd/src/glusterd-utils.c | 269 +++++++++++++++++----
|
|
|
256ebe |
xlators/mgmt/glusterd/src/glusterd-volume-set.c | 55 +++++
|
|
|
256ebe |
xlators/mgmt/glusterd/src/glusterd.h | 10 +
|
|
|
256ebe |
7 files changed, 362 insertions(+), 49 deletions(-)
|
|
|
256ebe |
create mode 100644 tests/bugs/glusterd/bug-1699339.t
|
|
|
256ebe |
|
|
|
256ebe |
diff --git a/libglusterfs/src/glusterfs/globals.h b/libglusterfs/src/glusterfs/globals.h
|
|
|
256ebe |
index 6642ba0..e45db14 100644
|
|
|
256ebe |
--- a/libglusterfs/src/glusterfs/globals.h
|
|
|
256ebe |
+++ b/libglusterfs/src/glusterfs/globals.h
|
|
|
256ebe |
@@ -50,7 +50,7 @@
|
|
|
256ebe |
1 /* MIN is the fresh start op-version, mostly \
|
|
|
256ebe |
should not change */
|
|
|
256ebe |
#define GD_OP_VERSION_MAX \
|
|
|
256ebe |
- GD_OP_VERSION_6_0 /* MAX VERSION is the maximum \
|
|
|
256ebe |
+ GD_OP_VERSION_7_0 /* MAX VERSION is the maximum \
|
|
|
256ebe |
count in VME table, should \
|
|
|
256ebe |
keep changing with \
|
|
|
256ebe |
introduction of newer \
|
|
|
256ebe |
@@ -134,6 +134,8 @@
|
|
|
256ebe |
|
|
|
256ebe |
#define GD_OP_VERSION_6_0 60000 /* Op-version for GlusterFS 6.0 */
|
|
|
256ebe |
|
|
|
256ebe |
+#define GD_OP_VERSION_7_0 70000 /* Op-version for GlusterFS 7.0 */
|
|
|
256ebe |
+
|
|
|
256ebe |
#include "glusterfs/xlator.h"
|
|
|
256ebe |
#include "glusterfs/options.h"
|
|
|
256ebe |
|
|
|
256ebe |
diff --git a/tests/bugs/glusterd/bug-1699339.t b/tests/bugs/glusterd/bug-1699339.t
|
|
|
256ebe |
new file mode 100644
|
|
|
256ebe |
index 0000000..3e950f4
|
|
|
256ebe |
--- /dev/null
|
|
|
256ebe |
+++ b/tests/bugs/glusterd/bug-1699339.t
|
|
|
256ebe |
@@ -0,0 +1,69 @@
|
|
|
256ebe |
+#!/bin/bash
|
|
|
256ebe |
+
|
|
|
256ebe |
+. $(dirname $0)/../../include.rc
|
|
|
256ebe |
+. $(dirname $0)/../../volume.rc
|
|
|
256ebe |
+. $(dirname $0)/../../cluster.rc
|
|
|
256ebe |
+
|
|
|
256ebe |
+cleanup;
|
|
|
256ebe |
+
|
|
|
256ebe |
+NUM_VOLS=15
|
|
|
256ebe |
+
|
|
|
256ebe |
+
|
|
|
256ebe |
+get_brick_base () {
|
|
|
256ebe |
+ printf "%s/vol%02d" $B0 $1
|
|
|
256ebe |
+}
|
|
|
256ebe |
+
|
|
|
256ebe |
+function count_up_bricks {
|
|
|
256ebe |
+ vol=$1;
|
|
|
256ebe |
+ $CLI_1 --xml volume status $vol | grep '<status>1' | wc -l
|
|
|
256ebe |
+}
|
|
|
256ebe |
+
|
|
|
256ebe |
+create_volume () {
|
|
|
256ebe |
+
|
|
|
256ebe |
+ local vol_name=$(printf "%s-vol%02d" $V0 $1)
|
|
|
256ebe |
+
|
|
|
256ebe |
+ TEST $CLI_1 volume create $vol_name replica 3 $H1:$B1/${vol_name} $H2:$B2/${vol_name} $H3:$B3/${vol_name}
|
|
|
256ebe |
+ TEST $CLI_1 volume start $vol_name
|
|
|
256ebe |
+}
|
|
|
256ebe |
+
|
|
|
256ebe |
+TEST launch_cluster 3
|
|
|
256ebe |
+TEST $CLI_1 volume set all cluster.brick-multiplex on
|
|
|
256ebe |
+
|
|
|
256ebe |
+# The option accepts the value in the range from 5 to 200
|
|
|
256ebe |
+TEST ! $CLI_1 volume set all glusterd.vol_count_per_thread 210
|
|
|
256ebe |
+TEST ! $CLI_1 volume set all glusterd.vol_count_per_thread 4
|
|
|
256ebe |
+
|
|
|
256ebe |
+TEST $CLI_1 volume set all glusterd.vol_count_per_thread 5
|
|
|
256ebe |
+
|
|
|
256ebe |
+TEST $CLI_1 peer probe $H2;
|
|
|
256ebe |
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
|
|
|
256ebe |
+
|
|
|
256ebe |
+TEST $CLI_1 peer probe $H3;
|
|
|
256ebe |
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count
|
|
|
256ebe |
+
|
|
|
256ebe |
+# Our infrastructure can't handle an arithmetic expression here. The formula
|
|
|
256ebe |
+# is (NUM_VOLS-1)*5 because it sees each TEST/EXPECT once but needs the other
|
|
|
256ebe |
+# NUM_VOLS-1 and there are 5 such statements in each iteration.
|
|
|
256ebe |
+TESTS_EXPECTED_IN_LOOP=28
|
|
|
256ebe |
+for i in $(seq 1 $NUM_VOLS); do
|
|
|
256ebe |
+ starttime="$(date +%s)";
|
|
|
256ebe |
+ create_volume $i
|
|
|
256ebe |
+done
|
|
|
256ebe |
+
|
|
|
256ebe |
+TEST kill_glusterd 1
|
|
|
256ebe |
+
|
|
|
256ebe |
+vol1=$(printf "%s-vol%02d" $V0 1)
|
|
|
256ebe |
+TEST $CLI_2 volume set $vol1 performance.readdir-ahead on
|
|
|
256ebe |
+vol2=$(printf "%s-vol%02d" $V0 2)
|
|
|
256ebe |
+TEST $CLI_2 volume set $vol2 performance.readdir-ahead on
|
|
|
256ebe |
+
|
|
|
256ebe |
+# Bring back 1st glusterd
|
|
|
256ebe |
+TEST $glusterd_1
|
|
|
256ebe |
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count
|
|
|
256ebe |
+
|
|
|
256ebe |
+EXPECT_WITHIN $PROBE_TIMEOUT "on" volinfo_field_1 $vol1 performance.readdir-ahead
|
|
|
256ebe |
+
|
|
|
256ebe |
+vol_name=$(printf "%s-vol%02d" $V0 2)
|
|
|
256ebe |
+EXPECT_WITHIN $PROBE_TIMEOUT "on" volinfo_field_1 $vol2 performance.readdir-ahead
|
|
|
256ebe |
+
|
|
|
256ebe |
+cleanup
|
|
|
256ebe |
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
|
|
|
256ebe |
index 95f9707..94a5e1f 100644
|
|
|
256ebe |
--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
|
|
|
256ebe |
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
|
|
|
256ebe |
@@ -87,6 +87,7 @@ glusterd_all_vol_opts valid_all_vol_opts[] = {
|
|
|
256ebe |
* TBD: Discuss the default value for this. Maybe this should be a
|
|
|
256ebe |
* dynamic value depending on the memory specifications per node */
|
|
|
256ebe |
{GLUSTERD_BRICKMUX_LIMIT_KEY, GLUSTERD_BRICKMUX_LIMIT_DFLT_VALUE},
|
|
|
256ebe |
+ {GLUSTERD_VOL_CNT_PER_THRD, GLUSTERD_VOL_CNT_PER_THRD_DEFAULT_VALUE},
|
|
|
256ebe |
/*{GLUSTERD_LOCALTIME_LOGGING_KEY, "disable"},*/
|
|
|
256ebe |
{GLUSTERD_DAEMON_LOG_LEVEL_KEY, "INFO"},
|
|
|
256ebe |
{NULL},
|
|
|
256ebe |
diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
|
|
|
256ebe |
index b3c4158..d225854 100644
|
|
|
256ebe |
--- a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
|
|
|
256ebe |
+++ b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
|
|
|
256ebe |
@@ -2099,6 +2099,9 @@ glusterd_compare_friend_snapshots(dict_t *peer_data, char *peername,
|
|
|
256ebe |
goto out;
|
|
|
256ebe |
}
|
|
|
256ebe |
|
|
|
256ebe |
+ if (!snap_count)
|
|
|
256ebe |
+ goto out;
|
|
|
256ebe |
+
|
|
|
256ebe |
for (i = 1; i <= snap_count; i++) {
|
|
|
256ebe |
/* Compare one snapshot from peer_data at a time */
|
|
|
256ebe |
ret = glusterd_compare_snap(peer_data, i, peername, peerid);
|
|
|
256ebe |
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
|
|
|
256ebe |
index fdd7d91..ff6102b 100644
|
|
|
256ebe |
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
|
|
|
256ebe |
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
|
|
|
256ebe |
@@ -155,6 +155,47 @@ out:
|
|
|
256ebe |
return ret;
|
|
|
256ebe |
}
|
|
|
256ebe |
|
|
|
256ebe |
+int
|
|
|
256ebe |
+get_gd_vol_thread_limit(int *thread_limit)
|
|
|
256ebe |
+{
|
|
|
256ebe |
+ char *value = NULL;
|
|
|
256ebe |
+ int ret = -1;
|
|
|
256ebe |
+ int vol_per_thread_limit = 0;
|
|
|
256ebe |
+ xlator_t *this = NULL;
|
|
|
256ebe |
+ glusterd_conf_t *priv = NULL;
|
|
|
256ebe |
+
|
|
|
256ebe |
+ this = THIS;
|
|
|
256ebe |
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
|
|
|
256ebe |
+
|
|
|
256ebe |
+ priv = this->private;
|
|
|
256ebe |
+ GF_VALIDATE_OR_GOTO(this->name, priv, out);
|
|
|
256ebe |
+
|
|
|
256ebe |
+ if (!is_brick_mx_enabled()) {
|
|
|
256ebe |
+ vol_per_thread_limit = 1;
|
|
|
256ebe |
+ ret = 0;
|
|
|
256ebe |
+ goto out;
|
|
|
256ebe |
+ }
|
|
|
256ebe |
+
|
|
|
256ebe |
+ ret = dict_get_strn(priv->opts, GLUSTERD_VOL_CNT_PER_THRD,
|
|
|
256ebe |
+ SLEN(GLUSTERD_VOL_CNT_PER_THRD), &value);
|
|
|
256ebe |
+ if (ret) {
|
|
|
256ebe |
+ value = GLUSTERD_VOL_CNT_PER_THRD_DEFAULT_VALUE;
|
|
|
256ebe |
+ }
|
|
|
256ebe |
+ ret = gf_string2int(value, &vol_per_thread_limit);
|
|
|
256ebe |
+ if (ret)
|
|
|
256ebe |
+ goto out;
|
|
|
256ebe |
+
|
|
|
256ebe |
+out:
|
|
|
256ebe |
+ *thread_limit = vol_per_thread_limit;
|
|
|
256ebe |
+
|
|
|
256ebe |
+ gf_msg_debug("glusterd", 0,
|
|
|
256ebe |
+ "Per Thread volume limit set to %d glusterd to populate dict "
|
|
|
256ebe |
+ "data parallel",
|
|
|
256ebe |
+ *thread_limit);
|
|
|
256ebe |
+
|
|
|
256ebe |
+ return ret;
|
|
|
256ebe |
+}
|
|
|
256ebe |
+
|
|
|
256ebe |
extern struct volopt_map_entry glusterd_volopt_map[];
|
|
|
256ebe |
extern glusterd_all_vol_opts valid_all_vol_opts[];
|
|
|
256ebe |
|
|
|
256ebe |
@@ -3070,50 +3111,55 @@ glusterd_add_volume_to_dict(glusterd_volinfo_t *volinfo, dict_t *dict,
|
|
|
256ebe |
|
|
|
256ebe |
/* tiering related variables */
|
|
|
256ebe |
|
|
|
256ebe |
- snprintf(key, sizeof(key), "%s%d.cold_brick_count", prefix, count);
|
|
|
256ebe |
- ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_brick_count);
|
|
|
256ebe |
- if (ret)
|
|
|
256ebe |
- goto out;
|
|
|
256ebe |
+ if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
|
|
|
256ebe |
+ snprintf(key, sizeof(key), "%s%d.cold_brick_count", prefix, count);
|
|
|
256ebe |
+ ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_brick_count);
|
|
|
256ebe |
+ if (ret)
|
|
|
256ebe |
+ goto out;
|
|
|
256ebe |
|
|
|
256ebe |
- snprintf(key, sizeof(key), "%s%d.cold_type", prefix, count);
|
|
|
256ebe |
- ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_type);
|
|
|
256ebe |
- if (ret)
|
|
|
256ebe |
- goto out;
|
|
|
256ebe |
+ snprintf(key, sizeof(key), "%s%d.cold_type", prefix, count);
|
|
|
256ebe |
+ ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_type);
|
|
|
256ebe |
+ if (ret)
|
|
|
256ebe |
+ goto out;
|
|
|
256ebe |
|
|
|
256ebe |
- snprintf(key, sizeof(key), "%s%d.cold_replica_count", prefix, count);
|
|
|
256ebe |
- ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_replica_count);
|
|
|
256ebe |
- if (ret)
|
|
|
256ebe |
- goto out;
|
|
|
256ebe |
+ snprintf(key, sizeof(key), "%s%d.cold_replica_count", prefix, count);
|
|
|
256ebe |
+ ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_replica_count);
|
|
|
256ebe |
+ if (ret)
|
|
|
256ebe |
+ goto out;
|
|
|
256ebe |
|
|
|
256ebe |
- snprintf(key, sizeof(key), "%s%d.cold_disperse_count", prefix, count);
|
|
|
256ebe |
- ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_disperse_count);
|
|
|
256ebe |
- if (ret)
|
|
|
256ebe |
- goto out;
|
|
|
256ebe |
+ snprintf(key, sizeof(key), "%s%d.cold_disperse_count", prefix, count);
|
|
|
256ebe |
+ ret = dict_set_uint32(dict, key,
|
|
|
256ebe |
+ volinfo->tier_info.cold_disperse_count);
|
|
|
256ebe |
+ if (ret)
|
|
|
256ebe |
+ goto out;
|
|
|
256ebe |
|
|
|
256ebe |
- snprintf(key, sizeof(key), "%s%d.cold_redundancy_count", prefix, count);
|
|
|
256ebe |
- ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_redundancy_count);
|
|
|
256ebe |
- if (ret)
|
|
|
256ebe |
- goto out;
|
|
|
256ebe |
+ snprintf(key, sizeof(key), "%s%d.cold_redundancy_count", prefix, count);
|
|
|
256ebe |
+ ret = dict_set_uint32(dict, key,
|
|
|
256ebe |
+ volinfo->tier_info.cold_redundancy_count);
|
|
|
256ebe |
+ if (ret)
|
|
|
256ebe |
+ goto out;
|
|
|
256ebe |
|
|
|
256ebe |
- snprintf(key, sizeof(key), "%s%d.cold_dist_count", prefix, count);
|
|
|
256ebe |
- ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_dist_leaf_count);
|
|
|
256ebe |
- if (ret)
|
|
|
256ebe |
- goto out;
|
|
|
256ebe |
+ snprintf(key, sizeof(key), "%s%d.cold_dist_count", prefix, count);
|
|
|
256ebe |
+ ret = dict_set_uint32(dict, key,
|
|
|
256ebe |
+ volinfo->tier_info.cold_dist_leaf_count);
|
|
|
256ebe |
+ if (ret)
|
|
|
256ebe |
+ goto out;
|
|
|
256ebe |
|
|
|
256ebe |
- snprintf(key, sizeof(key), "%s%d.hot_brick_count", prefix, count);
|
|
|
256ebe |
- ret = dict_set_uint32(dict, key, volinfo->tier_info.hot_brick_count);
|
|
|
256ebe |
- if (ret)
|
|
|
256ebe |
- goto out;
|
|
|
256ebe |
+ snprintf(key, sizeof(key), "%s%d.hot_brick_count", prefix, count);
|
|
|
256ebe |
+ ret = dict_set_uint32(dict, key, volinfo->tier_info.hot_brick_count);
|
|
|
256ebe |
+ if (ret)
|
|
|
256ebe |
+ goto out;
|
|
|
256ebe |
|
|
|
256ebe |
- snprintf(key, sizeof(key), "%s%d.hot_type", prefix, count);
|
|
|
256ebe |
- ret = dict_set_uint32(dict, key, volinfo->tier_info.hot_type);
|
|
|
256ebe |
- if (ret)
|
|
|
256ebe |
- goto out;
|
|
|
256ebe |
+ snprintf(key, sizeof(key), "%s%d.hot_type", prefix, count);
|
|
|
256ebe |
+ ret = dict_set_uint32(dict, key, volinfo->tier_info.hot_type);
|
|
|
256ebe |
+ if (ret)
|
|
|
256ebe |
+ goto out;
|
|
|
256ebe |
|
|
|
256ebe |
- snprintf(key, sizeof(key), "%s%d.hot_replica_count", prefix, count);
|
|
|
256ebe |
- ret = dict_set_uint32(dict, key, volinfo->tier_info.hot_replica_count);
|
|
|
256ebe |
- if (ret)
|
|
|
256ebe |
- goto out;
|
|
|
256ebe |
+ snprintf(key, sizeof(key), "%s%d.hot_replica_count", prefix, count);
|
|
|
256ebe |
+ ret = dict_set_uint32(dict, key, volinfo->tier_info.hot_replica_count);
|
|
|
256ebe |
+ if (ret)
|
|
|
256ebe |
+ goto out;
|
|
|
256ebe |
+ }
|
|
|
256ebe |
|
|
|
256ebe |
snprintf(key, sizeof(key), "%s%d", prefix, count);
|
|
|
256ebe |
ret = gd_add_vol_snap_details_to_dict(dict, key, volinfo);
|
|
|
256ebe |
@@ -3363,33 +3409,40 @@ out:
|
|
|
256ebe |
return ret;
|
|
|
256ebe |
}
|
|
|
256ebe |
|
|
|
256ebe |
-int32_t
|
|
|
256ebe |
-glusterd_add_volumes_to_export_dict(dict_t **peer_data)
|
|
|
256ebe |
+void *
|
|
|
256ebe |
+glusterd_add_bulk_volumes_create_thread(void *data)
|
|
|
256ebe |
{
|
|
|
256ebe |
int32_t ret = -1;
|
|
|
256ebe |
- dict_t *dict = NULL;
|
|
|
256ebe |
glusterd_conf_t *priv = NULL;
|
|
|
256ebe |
glusterd_volinfo_t *volinfo = NULL;
|
|
|
256ebe |
int32_t count = 0;
|
|
|
256ebe |
- glusterd_dict_ctx_t ctx = {0};
|
|
|
256ebe |
xlator_t *this = NULL;
|
|
|
256ebe |
+ glusterd_add_dict_args_t *arg = NULL;
|
|
|
256ebe |
+ dict_t *dict = NULL;
|
|
|
256ebe |
+ int start = 0;
|
|
|
256ebe |
+ int end = 0;
|
|
|
256ebe |
|
|
|
256ebe |
- this = THIS;
|
|
|
256ebe |
- GF_ASSERT(this);
|
|
|
256ebe |
+ GF_ASSERT(data);
|
|
|
256ebe |
+
|
|
|
256ebe |
+ arg = data;
|
|
|
256ebe |
+ dict = arg->voldict;
|
|
|
256ebe |
+ start = arg->start;
|
|
|
256ebe |
+ end = arg->end;
|
|
|
256ebe |
+ this = arg->this;
|
|
|
256ebe |
+ THIS = arg->this;
|
|
|
256ebe |
priv = this->private;
|
|
|
256ebe |
GF_ASSERT(priv);
|
|
|
256ebe |
|
|
|
256ebe |
- dict = dict_new();
|
|
|
256ebe |
- if (!dict)
|
|
|
256ebe |
- goto out;
|
|
|
256ebe |
-
|
|
|
256ebe |
cds_list_for_each_entry(volinfo, &priv->volumes, vol_list)
|
|
|
256ebe |
{
|
|
|
256ebe |
count++;
|
|
|
256ebe |
+ if ((count < start) || (count > end))
|
|
|
256ebe |
+ continue;
|
|
|
256ebe |
+
|
|
|
256ebe |
ret = glusterd_add_volume_to_dict(volinfo, dict, count, "volume");
|
|
|
256ebe |
if (ret)
|
|
|
256ebe |
goto out;
|
|
|
256ebe |
- if (!glusterd_is_volume_quota_enabled(volinfo))
|
|
|
256ebe |
+ if (!dict_get_sizen(volinfo->dict, VKEY_FEATURES_QUOTA))
|
|
|
256ebe |
continue;
|
|
|
256ebe |
ret = glusterd_vol_add_quota_conf_to_dict(volinfo, dict, count,
|
|
|
256ebe |
"volume");
|
|
|
256ebe |
@@ -3397,7 +3450,122 @@ glusterd_add_volumes_to_export_dict(dict_t **peer_data)
|
|
|
256ebe |
goto out;
|
|
|
256ebe |
}
|
|
|
256ebe |
|
|
|
256ebe |
- ret = dict_set_int32n(dict, "count", SLEN("count"), count);
|
|
|
256ebe |
+out:
|
|
|
256ebe |
+ GF_ATOMIC_DEC(priv->thread_count);
|
|
|
256ebe |
+ free(arg);
|
|
|
256ebe |
+ return NULL;
|
|
|
256ebe |
+}
|
|
|
256ebe |
+
|
|
|
256ebe |
+int32_t
|
|
|
256ebe |
+glusterd_add_volumes_to_export_dict(dict_t **peer_data)
|
|
|
256ebe |
+{
|
|
|
256ebe |
+ int32_t ret = -1;
|
|
|
256ebe |
+ dict_t *dict = NULL;
|
|
|
256ebe |
+ dict_t *dict_arr[128] = {
|
|
|
256ebe |
+ 0,
|
|
|
256ebe |
+ };
|
|
|
256ebe |
+ glusterd_conf_t *priv = NULL;
|
|
|
256ebe |
+ glusterd_volinfo_t *volinfo = NULL;
|
|
|
256ebe |
+ int32_t count = 0;
|
|
|
256ebe |
+ glusterd_dict_ctx_t ctx = {0};
|
|
|
256ebe |
+ xlator_t *this = NULL;
|
|
|
256ebe |
+ int totthread = 0;
|
|
|
256ebe |
+ int volcnt = 0;
|
|
|
256ebe |
+ int start = 1;
|
|
|
256ebe |
+ int endindex = 0;
|
|
|
256ebe |
+ int vol_per_thread_limit = 0;
|
|
|
256ebe |
+ glusterd_add_dict_args_t *arg = NULL;
|
|
|
256ebe |
+ pthread_t th_id = {
|
|
|
256ebe |
+ 0,
|
|
|
256ebe |
+ };
|
|
|
256ebe |
+ int th_ret = 0;
|
|
|
256ebe |
+ int i = 0;
|
|
|
256ebe |
+
|
|
|
256ebe |
+ this = THIS;
|
|
|
256ebe |
+ GF_ASSERT(this);
|
|
|
256ebe |
+ priv = this->private;
|
|
|
256ebe |
+ GF_ASSERT(priv);
|
|
|
256ebe |
+
|
|
|
256ebe |
+ dict = dict_new();
|
|
|
256ebe |
+ if (!dict)
|
|
|
256ebe |
+ goto out;
|
|
|
256ebe |
+
|
|
|
256ebe |
+ /* Count the total number of volumes */
|
|
|
256ebe |
+ cds_list_for_each_entry(volinfo, &priv->volumes, vol_list) volcnt++;
|
|
|
256ebe |
+
|
|
|
256ebe |
+ get_gd_vol_thread_limit(&vol_per_thread_limit);
|
|
|
256ebe |
+
|
|
|
256ebe |
+ if ((vol_per_thread_limit == 1) || (vol_per_thread_limit > 100)) {
|
|
|
256ebe |
+ totthread = 0;
|
|
|
256ebe |
+ } else {
|
|
|
256ebe |
+ totthread = volcnt / vol_per_thread_limit;
|
|
|
256ebe |
+ endindex = volcnt % vol_per_thread_limit;
|
|
|
256ebe |
+ if (endindex)
|
|
|
256ebe |
+ totthread++;
|
|
|
256ebe |
+ }
|
|
|
256ebe |
+
|
|
|
256ebe |
+ if (totthread == 0) {
|
|
|
256ebe |
+ cds_list_for_each_entry(volinfo, &priv->volumes, vol_list)
|
|
|
256ebe |
+ {
|
|
|
256ebe |
+ count++;
|
|
|
256ebe |
+ ret = glusterd_add_volume_to_dict(volinfo, dict, count, "volume");
|
|
|
256ebe |
+ if (ret)
|
|
|
256ebe |
+ goto out;
|
|
|
256ebe |
+
|
|
|
256ebe |
+ if (!dict_get_sizen(volinfo->dict, VKEY_FEATURES_QUOTA))
|
|
|
256ebe |
+ continue;
|
|
|
256ebe |
+
|
|
|
256ebe |
+ ret = glusterd_vol_add_quota_conf_to_dict(volinfo, dict, count,
|
|
|
256ebe |
+ "volume");
|
|
|
256ebe |
+ if (ret)
|
|
|
256ebe |
+ goto out;
|
|
|
256ebe |
+ }
|
|
|
256ebe |
+ } else {
|
|
|
256ebe |
+ for (i = 0; i < totthread; i++) {
|
|
|
256ebe |
+ arg = calloc(1, sizeof(*arg));
|
|
|
256ebe |
+ dict_arr[i] = dict_new();
|
|
|
256ebe |
+ arg->this = this;
|
|
|
256ebe |
+ arg->voldict = dict_arr[i];
|
|
|
256ebe |
+ arg->start = start;
|
|
|
256ebe |
+ if (!endindex) {
|
|
|
256ebe |
+ arg->end = ((i + 1) * vol_per_thread_limit);
|
|
|
256ebe |
+ } else {
|
|
|
256ebe |
+ arg->end = (start + endindex);
|
|
|
256ebe |
+ }
|
|
|
256ebe |
+ th_ret = gf_thread_create_detached(
|
|
|
256ebe |
+ &th_id, glusterd_add_bulk_volumes_create_thread, arg,
|
|
|
256ebe |
+ "bulkvoldict");
|
|
|
256ebe |
+ if (th_ret) {
|
|
|
256ebe |
+ gf_log(this->name, GF_LOG_ERROR,
|
|
|
256ebe |
+ "glusterd_add_bulk_volume %s"
|
|
|
256ebe |
+ " thread creation failed",
|
|
|
256ebe |
+ "bulkvoldict");
|
|
|
256ebe |
+ free(arg);
|
|
|
256ebe |
+ goto out;
|
|
|
256ebe |
+ }
|
|
|
256ebe |
+
|
|
|
256ebe |
+ start = start + vol_per_thread_limit;
|
|
|
256ebe |
+ GF_ATOMIC_INC(priv->thread_count);
|
|
|
256ebe |
+ gf_log(this->name, GF_LOG_INFO,
|
|
|
256ebe |
+ "Create thread %d to populate dict data for volume"
|
|
|
256ebe |
+ " start index is %d end index is %d",
|
|
|
256ebe |
+ (i + 1), arg->start, arg->end);
|
|
|
256ebe |
+ }
|
|
|
256ebe |
+ while (GF_ATOMIC_GET(priv->thread_count)) {
|
|
|
256ebe |
+ sleep(1);
|
|
|
256ebe |
+ }
|
|
|
256ebe |
+
|
|
|
256ebe |
+ gf_log(this->name, GF_LOG_INFO,
|
|
|
256ebe |
+ "Finished dictionary popluation in all threads");
|
|
|
256ebe |
+ for (i = 0; i < totthread; i++) {
|
|
|
256ebe |
+ dict_copy_with_ref(dict_arr[i], dict);
|
|
|
256ebe |
+ dict_unref(dict_arr[i]);
|
|
|
256ebe |
+ }
|
|
|
256ebe |
+ gf_log(this->name, GF_LOG_INFO,
|
|
|
256ebe |
+ "Finished merger of all dictionraies into single one");
|
|
|
256ebe |
+ }
|
|
|
256ebe |
+
|
|
|
256ebe |
+ ret = dict_set_int32n(dict, "count", SLEN("count"), volcnt);
|
|
|
256ebe |
if (ret)
|
|
|
256ebe |
goto out;
|
|
|
256ebe |
|
|
|
256ebe |
@@ -3499,6 +3667,9 @@ glusterd_compare_friend_volume(dict_t *peer_data, int32_t count,
|
|
|
256ebe |
goto out;
|
|
|
256ebe |
}
|
|
|
256ebe |
|
|
|
256ebe |
+ if (!dict_get_sizen(volinfo->dict, VKEY_FEATURES_QUOTA))
|
|
|
256ebe |
+ goto skip_quota;
|
|
|
256ebe |
+
|
|
|
256ebe |
snprintf(key, sizeof(key), "volume%d.quota-version", count);
|
|
|
256ebe |
ret = dict_get_uint32(peer_data, key, "a_version);
|
|
|
256ebe |
if (ret) {
|
|
|
256ebe |
@@ -3550,6 +3721,8 @@ glusterd_compare_friend_volume(dict_t *peer_data, int32_t count,
|
|
|
256ebe |
goto out;
|
|
|
256ebe |
}
|
|
|
256ebe |
}
|
|
|
256ebe |
+
|
|
|
256ebe |
+skip_quota:
|
|
|
256ebe |
*status = GLUSTERD_VOL_COMP_SCS;
|
|
|
256ebe |
|
|
|
256ebe |
out:
|
|
|
256ebe |
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
|
|
|
256ebe |
index 42ca9bb..10aa2ae 100644
|
|
|
256ebe |
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
|
|
|
256ebe |
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
|
|
|
256ebe |
@@ -1058,6 +1058,51 @@ out:
|
|
|
256ebe |
}
|
|
|
256ebe |
|
|
|
256ebe |
static int
|
|
|
256ebe |
+validate_volume_per_thread_limit(glusterd_volinfo_t *volinfo, dict_t *dict,
|
|
|
256ebe |
+ char *key, char *value, char **op_errstr)
|
|
|
256ebe |
+{
|
|
|
256ebe |
+ xlator_t *this = NULL;
|
|
|
256ebe |
+ uint val = 0;
|
|
|
256ebe |
+ int ret = -1;
|
|
|
256ebe |
+
|
|
|
256ebe |
+ this = THIS;
|
|
|
256ebe |
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
|
|
|
256ebe |
+
|
|
|
256ebe |
+ if (!is_brick_mx_enabled()) {
|
|
|
256ebe |
+ gf_asprintf(op_errstr,
|
|
|
256ebe |
+ "Brick-multiplexing is not enabled. "
|
|
|
256ebe |
+ "Please enable brick multiplexing before trying "
|
|
|
256ebe |
+ "to set this option.");
|
|
|
256ebe |
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_WRONG_OPTS_SETTING, "%s",
|
|
|
256ebe |
+ *op_errstr);
|
|
|
256ebe |
+ goto out;
|
|
|
256ebe |
+ }
|
|
|
256ebe |
+
|
|
|
256ebe |
+ ret = gf_string2uint(value, &val;;
|
|
|
256ebe |
+ if (ret) {
|
|
|
256ebe |
+ gf_asprintf(op_errstr,
|
|
|
256ebe |
+ "%s is not a valid count. "
|
|
|
256ebe |
+ "%s expects an unsigned integer.",
|
|
|
256ebe |
+ value, key);
|
|
|
256ebe |
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY, "%s",
|
|
|
256ebe |
+ *op_errstr);
|
|
|
256ebe |
+ }
|
|
|
256ebe |
+
|
|
|
256ebe |
+ if ((val < 5) || (val > 200)) {
|
|
|
256ebe |
+ gf_asprintf(
|
|
|
256ebe |
+ op_errstr,
|
|
|
256ebe |
+ "Please set this option to a greater than 5 or less than 200 "
|
|
|
256ebe |
+ "to optimize dict generated while no. of volumes are more");
|
|
|
256ebe |
+ ret = -1;
|
|
|
256ebe |
+ goto out;
|
|
|
256ebe |
+ }
|
|
|
256ebe |
+out:
|
|
|
256ebe |
+ gf_msg_debug("glusterd", 0, "Returning %d", ret);
|
|
|
256ebe |
+
|
|
|
256ebe |
+ return ret;
|
|
|
256ebe |
+}
|
|
|
256ebe |
+
|
|
|
256ebe |
+static int
|
|
|
256ebe |
validate_boolean(glusterd_volinfo_t *volinfo, dict_t *dict, char *key,
|
|
|
256ebe |
char *value, char **op_errstr)
|
|
|
256ebe |
{
|
|
|
256ebe |
@@ -3520,6 +3565,16 @@ struct volopt_map_entry glusterd_volopt_map[] = {
|
|
|
256ebe |
"brick multiplexing. Brick multiplexing ensures that "
|
|
|
256ebe |
"compatible brick instances can share one single "
|
|
|
256ebe |
"brick process."},
|
|
|
256ebe |
+ {.key = GLUSTERD_VOL_CNT_PER_THRD,
|
|
|
256ebe |
+ .voltype = "mgmt/glusterd",
|
|
|
256ebe |
+ .value = GLUSTERD_VOL_CNT_PER_THRD_DEFAULT_VALUE,
|
|
|
256ebe |
+ .op_version = GD_OP_VERSION_7_0,
|
|
|
256ebe |
+ .validate_fn = validate_volume_per_thread_limit,
|
|
|
256ebe |
+ .type = GLOBAL_NO_DOC,
|
|
|
256ebe |
+ .description =
|
|
|
256ebe |
+ "This option can be used to limit the number of volumes "
|
|
|
256ebe |
+ "handled by per thread to populate peer data.The option accepts "
|
|
|
256ebe |
+ " the value in the range of 5 to 200"},
|
|
|
256ebe |
{.key = GLUSTERD_BRICKMUX_LIMIT_KEY,
|
|
|
256ebe |
.voltype = "mgmt/glusterd",
|
|
|
256ebe |
.value = GLUSTERD_BRICKMUX_LIMIT_DFLT_VALUE,
|
|
|
256ebe |
diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
|
|
|
256ebe |
index 0ac6e63..bd9f509 100644
|
|
|
256ebe |
--- a/xlators/mgmt/glusterd/src/glusterd.h
|
|
|
256ebe |
+++ b/xlators/mgmt/glusterd/src/glusterd.h
|
|
|
256ebe |
@@ -57,8 +57,10 @@
|
|
|
256ebe |
#define GLUSTER_SHARED_STORAGE "gluster_shared_storage"
|
|
|
256ebe |
#define GLUSTERD_SHARED_STORAGE_KEY "cluster.enable-shared-storage"
|
|
|
256ebe |
#define GLUSTERD_BRICK_MULTIPLEX_KEY "cluster.brick-multiplex"
|
|
|
256ebe |
+#define GLUSTERD_VOL_CNT_PER_THRD "glusterd.vol_count_per_thread"
|
|
|
256ebe |
#define GLUSTERD_BRICKMUX_LIMIT_KEY "cluster.max-bricks-per-process"
|
|
|
256ebe |
#define GLUSTERD_BRICKMUX_LIMIT_DFLT_VALUE "250"
|
|
|
256ebe |
+#define GLUSTERD_VOL_CNT_PER_THRD_DEFAULT_VALUE "100"
|
|
|
256ebe |
#define GLUSTERD_LOCALTIME_LOGGING_KEY "cluster.localtime-logging"
|
|
|
256ebe |
#define GLUSTERD_DAEMON_LOG_LEVEL_KEY "cluster.daemon-log-level"
|
|
|
256ebe |
|
|
|
256ebe |
@@ -225,8 +227,16 @@ typedef struct {
|
|
|
256ebe |
which might lead the modification of volinfo
|
|
|
256ebe |
list.
|
|
|
256ebe |
*/
|
|
|
256ebe |
+ gf_atomic_t thread_count;
|
|
|
256ebe |
} glusterd_conf_t;
|
|
|
256ebe |
|
|
|
256ebe |
+typedef struct glusterd_add_dict_args {
|
|
|
256ebe |
+ xlator_t *this;
|
|
|
256ebe |
+ dict_t *voldict;
|
|
|
256ebe |
+ int start;
|
|
|
256ebe |
+ int end;
|
|
|
256ebe |
+} glusterd_add_dict_args_t;
|
|
|
256ebe |
+
|
|
|
256ebe |
typedef enum gf_brick_status {
|
|
|
256ebe |
GF_BRICK_STOPPED,
|
|
|
256ebe |
GF_BRICK_STARTED,
|
|
|
256ebe |
--
|
|
|
256ebe |
1.8.3.1
|
|
|
256ebe |
|